Bug 1011369 - Restrict some line-break behaviors to Chinese and Japanese contexts, as per spec. r=m_kato
authorJonathan Kew <jkew@mozilla.com>
Mon, 20 May 2019 20:47:09 +0000
changeset 474598 2607e7773ed5c4616d09f9ea2645c9204568b34a
parent 474597 436e3199c386c6a7a5e70cc8685dd1247827cb90
child 474599 b695489a68c4178622bf1efed9d8cf38f2cd175a
push id36042
push userdvarga@mozilla.com
push dateTue, 21 May 2019 04:19:40 +0000
treeherdermozilla-central@ca560ff55451 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersm_kato
bugs1011369
milestone69.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1011369 - Restrict some line-break behaviors to Chinese and Japanese contexts, as per spec. r=m_kato Differential Revision: https://phabricator.services.mozilla.com/D30788
dom/base/nsLineBreaker.cpp
dom/base/nsLineBreaker.h
intl/lwbrk/LineBreaker.cpp
intl/lwbrk/LineBreaker.h
--- a/dom/base/nsLineBreaker.cpp
+++ b/dom/base/nsLineBreaker.cpp
@@ -6,23 +6,26 @@
 
 #include "nsLineBreaker.h"
 #include "nsContentUtils.h"
 #include "gfxTextRun.h"  // for the gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_* values
 #include "nsHyphenationManager.h"
 #include "nsHyphenator.h"
 #include "mozilla/gfx/2D.h"
 #include "mozilla/intl/LineBreaker.h"
+#include "mozilla/intl/MozLocale.h"
 
 using mozilla::intl::LineBreaker;
+using mozilla::intl::Locale;
 
 nsLineBreaker::nsLineBreaker()
     : mCurrentWordLanguage(nullptr),
       mCurrentWordContainsMixedLang(false),
       mCurrentWordContainsComplexChar(false),
+      mScriptIsChineseOrJapanese(false),
       mAfterBreakableSpace(false),
       mBreakHere(false),
       mWordBreak(LineBreaker::WordBreak::Normal),
       mStrictness(LineBreaker::Strictness::Auto) {}
 
 nsLineBreaker::~nsLineBreaker() {
   NS_ASSERTION(mCurrentWord.Length() == 0,
                "Should have Reset() before destruction!");
@@ -74,17 +77,17 @@ nsresult nsLineBreaker::FlushCurrentWord
     memset(breakState.Elements(),
            mWordBreak == LineBreaker::WordBreak::BreakAll
                ? gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL
                : gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE,
            length * sizeof(uint8_t));
   } else {
     nsContentUtils::LineBreaker()->GetJISx4051Breaks(
         mCurrentWord.Elements(), length, mWordBreak, mStrictness,
-        breakState.Elements());
+        mScriptIsChineseOrJapanese, breakState.Elements());
   }
 
   bool autoHyphenate = mCurrentWordLanguage && !mCurrentWordContainsMixedLang;
   uint32_t i;
   for (i = 0; autoHyphenate && i < mTextItems.Length(); ++i) {
     TextItem* ti = &mTextItems[i];
     if (!(ti->mFlags & BREAK_USE_AUTO_HYPHENATION)) {
       autoHyphenate = false;
@@ -245,17 +248,17 @@ nsresult nsLineBreaker::AppendText(nsAto
       if (offset > wordStart && aSink) {
         if (!(aFlags & BREAK_SUPPRESS_INSIDE)) {
           if (wordHasComplexChar) {
             // Save current start-of-word state because GetJISx4051Breaks will
             // set it to false
             uint8_t currentStart = breakState[wordStart];
             nsContentUtils::LineBreaker()->GetJISx4051Breaks(
                 aText + wordStart, offset - wordStart, mWordBreak, mStrictness,
-                breakState.Elements() + wordStart);
+                mScriptIsChineseOrJapanese, breakState.Elements() + wordStart);
             breakState[wordStart] = currentStart;
           }
           if (hyphenator) {
             FindHyphenationPoints(hyphenator, aText + wordStart, aText + offset,
                                   breakState.Elements() + wordStart);
           }
         }
         if (!noCapitalizationNeeded) {
@@ -403,17 +406,17 @@ nsresult nsLineBreaker::AppendText(nsAto
     if (isSpace) {
       if (offset > wordStart && wordHasComplexChar) {
         if (aSink && !(aFlags & BREAK_SUPPRESS_INSIDE)) {
           // Save current start-of-word state because GetJISx4051Breaks will
           // set it to false
           uint8_t currentStart = breakState[wordStart];
           nsContentUtils::LineBreaker()->GetJISx4051Breaks(
               aText + wordStart, offset - wordStart, mWordBreak, mStrictness,
-              breakState.Elements() + wordStart);
+              mScriptIsChineseOrJapanese, breakState.Elements() + wordStart);
           breakState[wordStart] = currentStart;
         }
         wordHasComplexChar = false;
       }
 
       ++offset;
       if (offset >= aLength) break;
       wordStart = offset;
@@ -444,17 +447,28 @@ nsresult nsLineBreaker::AppendText(nsAto
     aSink->SetBreaks(start, offset - start, breakState.Elements() + start);
   }
   return NS_OK;
 }
 
 void nsLineBreaker::UpdateCurrentWordLanguage(nsAtom* aHyphenationLanguage) {
   if (mCurrentWordLanguage && mCurrentWordLanguage != aHyphenationLanguage) {
     mCurrentWordContainsMixedLang = true;
+    mScriptIsChineseOrJapanese = false;
   } else {
+    if (aHyphenationLanguage && !mCurrentWordLanguage) {
+      Locale loc = Locale(nsAtomCString(aHyphenationLanguage));
+      if (loc.GetScript().IsEmpty()) {
+        loc.AddLikelySubtags();
+      }
+      const nsCString& script = loc.GetScript();
+      mScriptIsChineseOrJapanese =
+          script.EqualsLiteral("Hans") || script.EqualsLiteral("Hant") ||
+          script.EqualsLiteral("Jpan") || script.EqualsLiteral("Hrkt");
+    }
     mCurrentWordLanguage = aHyphenationLanguage;
   }
 }
 
 nsresult nsLineBreaker::AppendInvisibleWhitespace(uint32_t aFlags) {
   nsresult rv = FlushCurrentWord();
   if (NS_FAILED(rv)) return rv;
 
--- a/dom/base/nsLineBreaker.h
+++ b/dom/base/nsLineBreaker.h
@@ -246,16 +246,17 @@ class nsLineBreaker {
                              const char16_t* aTextLimit, uint8_t* aBreakState);
 
   AutoTArray<char16_t, 100> mCurrentWord;
   // All the items that contribute to mCurrentWord
   AutoTArray<TextItem, 2> mTextItems;
   nsAtom* mCurrentWordLanguage;
   bool mCurrentWordContainsMixedLang;
   bool mCurrentWordContainsComplexChar;
+  bool mScriptIsChineseOrJapanese;
 
   // True if the previous character was breakable whitespace
   bool mAfterBreakableSpace;
   // True if a break must be allowed at the current position because
   // a run of breakable whitespace ends here
   bool mBreakHere;
   // line break mode by "word-break" style
   mozilla::intl::LineBreaker::WordBreak mWordBreak;
--- a/intl/lwbrk/LineBreaker.cpp
+++ b/intl/lwbrk/LineBreaker.cpp
@@ -110,29 +110,32 @@ already_AddRefed<LineBreaker> LineBreake
        18        X  X  X  X  X                    X     X
 
    3. Simplified by merged classes
 
    After the 2 simplification, the pair table have some duplication
    a. class 2, 3, 4, 5, 6,  are the same- we can merged them
    b. class 10, 11, 12, 17  are the same- we can merged them
 
+   We introduce an extra non-breaking pair at [b]/7 to better match
+   the expectations of CSS line-breaking as tested by WPT tests.
+   This added entry is marked as * in the tables below.
 
    Class of
    Leading    Class of Trailing Char Class
    Char
 
               1 [a] 7  8  9 [b]15 18
 
         1     X  X  X  X  X  X  X  X
       [a]        X
         7        X  X
         8        X              X
         9        X
-      [b]        X  x
+      [b]        X  *
        15        X        X     X  X
        18        X              X  X
 
 
    4. We add COMPLEX characters and make it breakable w/ all ther class
       except after class 1 and before class [a]
 
    Class of
@@ -141,17 +144,17 @@ already_AddRefed<LineBreaker> LineBreake
 
               1 [a] 7  8  9 [b]15 18 COMPLEX
 
         1     X  X  X  X  X  X  X  X  X
       [a]        X
         7        X  X
         8        X              X
         9        X
-      [b]        X  x
+      [b]        X  *
        15        X        X     X  X
        18        X              X  X
   COMPLEX        X                    T
 
      T : need special handling
 
 
    5. However, we need two special class for some punctuations/parentheses,
@@ -169,17 +172,17 @@ already_AddRefed<LineBreaker> LineBreake
 
               1 [a] 7  8  9 [b]15 18 COMPLEX [c] [d]
 
         1     X  X  X  X  X  X  X  X  X       X    X
       [a]        X                            X    X
         7        X  X
         8        X              X
         9        X
-      [b]        X  x                              X
+      [b]        X  *                              X
        15        X        X     X  X          X    X
        18        X              X  X          X    X
   COMPLEX        X                    T
       [c]     X  X  X  X  X  X  X  X  X       X    X
       [d]        X              X  X               X
 
 
    6. And Unicode has "NON-BREAK" characters. The lines should be broken around
@@ -191,26 +194,26 @@ already_AddRefed<LineBreaker> LineBreake
 
               1 [a] 7  8  9 [b]15 18 COMPLEX [c] [d] [e]
 
         1     X  X  X  X  X  X  X  X  X       X    X   X
       [a]        X                                 X   X
         7        X  X                                  X
         8        X              X                      X
         9        X                                     X
-      [b]        X  x                              X   X
+      [b]        X  *                              X   X
        15        X        X     X  X          X    X   X
        18        X              X  X          X    X   X
   COMPLEX        X                    T                X
       [c]     X  X  X  X  X  X  X  X  X       X    X   X
       [d]        X              X  X               X   X
       [e]     X  X  X  X  X  X  X  X  X       X    X   X
 
 
-   7. Now we use one bit to encode weather it is breakable, and use 2 bytes
+   7. Now we use one bit to encode whether it is breakable, and use 2 bytes
       for one row, then the bit table will look like:
 
                  18    <-   1
 
        1  0000 1111 1111 1111  = 0x0FFF
       [a] 0000 1100 0000 0010  = 0x0C02
        7  0000 1000 0000 0110  = 0x0806
        8  0000 1000 0100 0010  = 0x0842
@@ -243,17 +246,17 @@ static const uint16_t gPair[MAX_CLASSES]
 
               1 [a] 7  8  9 [b]15 18 COMPLEX [c] [d] [e]
 
         1     X  X  X  X  X  X  X  X  X       X    X   X
       [a]        X              X  X          X    X   X
         7        X  X           X  X          X    X   X
         8        X              X  X          X    X   X
         9        X              X  X          X    X   X
-      [b]        X  x                              X   X
+      [b]        X  *                              X   X
        15     X  X  X  X  X     X  X  X       X    X   X
        18     X  X  X  X  X     X  X  X       X    X   X
   COMPLEX        X              X  X  T       X    X   X
       [c]     X  X  X  X  X  X  X  X  X       X    X   X
       [d]     X  X  X  X  X     X  X  X       X    X   X
       [e]     X  X  X  X  X  X  X  X  X       X    X   X
 
                  18    <-   1
@@ -365,17 +368,18 @@ static inline bool IS_NONBREAKABLE_SPACE
 
 static inline bool IS_HYPHEN(char16_t u) {
   return (u == U_HYPHEN || u == 0x058A ||  // ARMENIAN HYPHEN
           u == 0x2010 ||                   // HYPHEN
           u == 0x2012 ||                   // FIGURE DASH
           u == 0x2013);                    // EN DASH
 }
 
-static int8_t GetClass(uint32_t u, LineBreaker::Strictness aLevel) {
+static int8_t GetClass(uint32_t u, LineBreaker::Strictness aLevel,
+                       bool aIsChineseOrJapanese) {
   // Mapping for Unicode LineBreak.txt classes to the (simplified) set of
   // character classes used here.
   // XXX The mappings here were derived by comparing the Unicode LineBreak
   //     values of BMP characters to the classes our existing GetClass returns
   //     for the same codepoints; in cases where characters with the same
   //     LineBreak class mapped to various classes here, I picked what seemed
   //     the most prevalent equivalence.
   //     Some of these are unclear to me, but currently they are ONLY used
@@ -447,71 +451,77 @@ static int8_t GetClass(uint32_t u, LineB
       }
       if (cls == U_LB_INSEPARABLE) {
         return CLASS_NON_BREAKABLE_BETWEEN_SAME_CLASS;
       }
       if (u == 0x3005 || u == 0x303B || u == 0x309D || u == 0x309E ||
           u == 0x30FD || u == 0x30FE) {
         return CLASS_CLOSE_LIKE_CHARACTER;
       }
-      if (cls == U_LB_POSTFIX_NUMERIC && IsEastAsianWidthAFW(u)) {
-        return CLASS_CLOSE_LIKE_CHARACTER;
-      }
-      if (cls == U_LB_PREFIX_NUMERIC && IsEastAsianWidthAFW(u)) {
-        return CLASS_OPEN_LIKE_CHARACTER;
-      }
-      if (u == 0x2010 || u == 0x2013 || u == 0x301C || u == 0x30A0) {
-        return CLASS_CLOSE_LIKE_CHARACTER;
+      if (aIsChineseOrJapanese) {
+        if (cls == U_LB_POSTFIX_NUMERIC && IsEastAsianWidthAFW(u)) {
+          return CLASS_CLOSE_LIKE_CHARACTER;
+        }
+        if (cls == U_LB_PREFIX_NUMERIC && IsEastAsianWidthAFW(u)) {
+          return CLASS_OPEN_LIKE_CHARACTER;
+        }
+        if (u == 0x2010 || u == 0x2013 || u == 0x301C || u == 0x30A0) {
+          return CLASS_CLOSE_LIKE_CHARACTER;
+        }
       }
       break;
     case LineBreaker::Strictness::Normal:
       if (cls == U_LB_CONDITIONAL_JAPANESE_STARTER) {
         return CLASS_BREAKABLE;
       }
       if (cls == U_LB_INSEPARABLE) {
         return CLASS_NON_BREAKABLE_BETWEEN_SAME_CLASS;
       }
       if (u == 0x3005 || u == 0x303B || u == 0x309D || u == 0x309E ||
           u == 0x30FD || u == 0x30FE) {
         return CLASS_CLOSE_LIKE_CHARACTER;
       }
-      if (cls == U_LB_POSTFIX_NUMERIC && IsEastAsianWidthAFW(u)) {
-        return CLASS_CLOSE_LIKE_CHARACTER;
-      }
-      if (cls == U_LB_PREFIX_NUMERIC && IsEastAsianWidthAFW(u)) {
-        return CLASS_OPEN_LIKE_CHARACTER;
-      }
-      if (u == 0x2010 || u == 0x2013 || u == 0x301C || u == 0x30A0) {
-        return CLASS_BREAKABLE;
+      if (aIsChineseOrJapanese) {
+        if (cls == U_LB_POSTFIX_NUMERIC && IsEastAsianWidthAFW(u)) {
+          return CLASS_CLOSE_LIKE_CHARACTER;
+        }
+        if (cls == U_LB_PREFIX_NUMERIC && IsEastAsianWidthAFW(u)) {
+          return CLASS_OPEN_LIKE_CHARACTER;
+        }
+        if (u == 0x2010 || u == 0x2013 || u == 0x301C || u == 0x30A0) {
+          return CLASS_BREAKABLE;
+        }
       }
       break;
     case LineBreaker::Strictness::Loose:
       if (cls == U_LB_CONDITIONAL_JAPANESE_STARTER) {
         return CLASS_BREAKABLE;
       }
       if (u == 0x3005 || u == 0x303B || u == 0x309D || u == 0x309E ||
           u == 0x30FD || u == 0x30FE) {
         return CLASS_BREAKABLE;
       }
       if (cls == U_LB_INSEPARABLE) {
         return CLASS_BREAKABLE;
       }
-      if (u == 0x30FB || u == 0xFF1A || u == 0xFF1B || u == 0xFF65 ||
-          u == 0x203C || u == 0x2047 || u == 0x2048 || u == 0x2049 ||
-          u == 0xFF01 || u == 0xFF1F) {
-        return CLASS_BREAKABLE;
-      }
-      if (cls == U_LB_POSTFIX_NUMERIC && IsEastAsianWidthAFW(u)) {
-        return CLASS_BREAKABLE;
-      }
-      if (cls == U_LB_PREFIX_NUMERIC && IsEastAsianWidthAFW(u)) {
-        return CLASS_BREAKABLE;
-      }
-      if (u == 0x2010 || u == 0x2013 || u == 0x301C || u == 0x30A0) {
-        return CLASS_BREAKABLE;
+      if (aIsChineseOrJapanese) {
+        if (u == 0x30FB || u == 0xFF1A || u == 0xFF1B || u == 0xFF65 ||
+            u == 0x203C || u == 0x2047 || u == 0x2048 || u == 0x2049 ||
+            u == 0xFF01 || u == 0xFF1F) {
+          return CLASS_BREAKABLE;
+        }
+        if (cls == U_LB_POSTFIX_NUMERIC && IsEastAsianWidthAFW(u)) {
+          return CLASS_BREAKABLE;
+        }
+        if (cls == U_LB_PREFIX_NUMERIC && IsEastAsianWidthAFW(u)) {
+          return CLASS_BREAKABLE;
+        }
+        if (u == 0x2010 || u == 0x2013 || u == 0x301C || u == 0x30A0) {
+          return CLASS_BREAKABLE;
+        }
       }
       break;
     case LineBreaker::Strictness::Anywhere:
       MOZ_ASSERT_UNREACHABLE("should have been handled already");
       break;
   }
 
   if (u < 0x10000) {
@@ -542,44 +552,44 @@ static int8_t GetClass(uint32_t u, LineB
     }
     if (0xff00 == h) {
       if (l < 0x0060) {  // Fullwidth ASCII variant
         return GETCLASSFROMTABLE(gLBClass00, (l + 0x20));
       }
       if (l < 0x00a0) {  // Halfwidth Katakana variants
         switch (l) {
           case 0x61:
-            return GetClass(0x3002, aLevel);
+            return GetClass(0x3002, aLevel, aIsChineseOrJapanese);
           case 0x62:
-            return GetClass(0x300c, aLevel);
+            return GetClass(0x300c, aLevel, aIsChineseOrJapanese);
           case 0x63:
-            return GetClass(0x300d, aLevel);
+            return GetClass(0x300d, aLevel, aIsChineseOrJapanese);
           case 0x64:
-            return GetClass(0x3001, aLevel);
+            return GetClass(0x3001, aLevel, aIsChineseOrJapanese);
           case 0x65:
-            return GetClass(0x30fb, aLevel);
+            return GetClass(0x30fb, aLevel, aIsChineseOrJapanese);
           case 0x9e:
-            return GetClass(0x309b, aLevel);
+            return GetClass(0x309b, aLevel, aIsChineseOrJapanese);
           case 0x9f:
-            return GetClass(0x309c, aLevel);
+            return GetClass(0x309c, aLevel, aIsChineseOrJapanese);
           default:
             if (IS_HALFWIDTH_IN_JISx4051_CLASS3(u)) {
               return CLASS_CLOSE;  // jis x4051 class 3
             }
             return CLASS_BREAKABLE;  // jis x4051 class 11
         }
       }
       if (l < 0x00e0) {
         return CLASS_CHARACTER;  // Halfwidth Hangul variants
       }
       if (l < 0x00f0) {
         static char16_t NarrowFFEx[16] = {
             0x00A2, 0x00A3, 0x00AC, 0x00AF, 0x00A6, 0x00A5, 0x20A9, 0x0000,
             0x2502, 0x2190, 0x2191, 0x2192, 0x2193, 0x25A0, 0x25CB, 0x0000};
-        return GetClass(NarrowFFEx[l - 0x00e0], aLevel);
+        return GetClass(NarrowFFEx[l - 0x00e0], aLevel, aIsChineseOrJapanese);
       }
     } else if (0x3100 == h) {
       if (l <= 0xbf) {  // Hangul Compatibility Jamo, Bopomofo, Kanbun
                         // XXX: This is per UAX #14, but UAX #14 may change
                         // the line breaking rules about Kanbun and Bopomofo.
         return CLASS_BREAKABLE;
       }
       if (l >= 0xf0) {  // Katakana small letters for Ainu
@@ -793,34 +803,35 @@ class ContextState {
                                 // this is true.
   bool mHasPreviousEqualsSign;  // True if we have seen a U_EQUAL
   bool mHasPreviousSlash;       // True if we have seen a U_SLASH
   bool mHasPreviousBackslash;   // True if we have seen a U_BACKSLASH
 };
 
 static int8_t ContextualAnalysis(char32_t prev, char32_t cur, char32_t next,
                                  ContextState& aState,
-                                 LineBreaker::Strictness aLevel) {
+                                 LineBreaker::Strictness aLevel,
+                                 bool aIsChineseOrJapanese) {
   // Don't return CLASS_OPEN/CLASS_CLOSE if aState.UseJISX4051 is FALSE.
 
   if (IS_HYPHEN(cur)) {
     // If next character is hyphen, we don't need to break between them.
     if (IS_HYPHEN(next)) return CLASS_CHARACTER;
     // If prev and next characters are numeric, it may be in Math context.
     // So, we should not break here.
     bool prevIsNum = IS_ASCII_DIGIT(prev);
     bool nextIsNum = IS_ASCII_DIGIT(next);
     if (prevIsNum && nextIsNum) return CLASS_NUMERIC;
     // If one side is numeric and the other is a character, or if both sides are
     // characters, the hyphen should be breakable.
     if (!aState.UseConservativeBreaking(1)) {
       char32_t prevOfHyphen = aState.GetPreviousNonHyphenCharacter();
       if (prevOfHyphen && next) {
-        int8_t prevClass = GetClass(prevOfHyphen, aLevel);
-        int8_t nextClass = GetClass(next, aLevel);
+        int8_t prevClass = GetClass(prevOfHyphen, aLevel, aIsChineseOrJapanese);
+        int8_t nextClass = GetClass(next, aLevel, aIsChineseOrJapanese);
         bool prevIsNumOrCharOrClose =
             prevIsNum ||
             (prevClass == CLASS_CHARACTER &&
              !NEED_CONTEXTUAL_ANALYSIS(prevOfHyphen)) ||
             prevClass == CLASS_CLOSE || prevClass == CLASS_CLOSE_LIKE_CHARACTER;
         bool nextIsNumOrCharOrOpen =
             nextIsNum ||
             (nextClass == CLASS_CHARACTER && !NEED_CONTEXTUAL_ANALYSIS(next)) ||
@@ -871,17 +882,17 @@ static int8_t ContextualAnalysis(char32_
       // in various Western languages varies too much; see bug #450088
       // discussion.
       if (!aState.UseConservativeBreaking() && IS_CJK_CHAR(next))
         return CLASS_OPEN;
     } else {
       NS_ERROR("Forgot to handle the current character!");
     }
   }
-  return GetClass(cur, aLevel);
+  return GetClass(cur, aLevel, aIsChineseOrJapanese);
 }
 
 int32_t LineBreaker::WordMove(const char16_t* aText, uint32_t aLen,
                               uint32_t aPos, int8_t aDirection) {
   bool textNeedsJISx4051 = false;
   int32_t begin, end;
 
   for (begin = aPos; begin > 0 && !NS_IsSpace(aText[begin - 1]); --begin) {
@@ -904,17 +915,17 @@ int32_t LineBreaker::WordMove(const char
     // Also fall back to this when out of memory.
     if (aDirection < 0) {
       ret = (begin == int32_t(aPos)) ? begin - 1 : begin;
     } else {
       ret = end;
     }
   } else {
     GetJISx4051Breaks(aText + begin, end - begin, WordBreak::Normal,
-                      Strictness::Auto, breakState.Elements());
+                      Strictness::Auto, false, breakState.Elements());
 
     ret = aPos;
     do {
       ret += aDirection;
     } while (begin < ret && ret < end && !breakState[ret - begin]);
   }
 
   return ret;
@@ -935,16 +946,17 @@ int32_t LineBreaker::Prev(const char16_t
                "Bad position passed to nsJISx4051LineBreaker::Prev");
 
   int32_t prevPos = WordMove(aText, aLen, aPos, -1);
   return prevPos > 0 ? prevPos : NS_LINEBREAKER_NEED_MORE_TEXT;
 }
 
 void LineBreaker::GetJISx4051Breaks(const char16_t* aChars, uint32_t aLength,
                                     WordBreak aWordBreak, Strictness aLevel,
+                                    bool aIsChineseOrJapanese,
                                     uint8_t* aBreakBefore) {
   uint32_t cur;
   int8_t lastClass = CLASS_NONE;
   ContextState state(aChars, aLength);
 
   for (cur = 0; cur < aLength; ++cur, state.AdvanceIndex()) {
     char32_t ch = state.GetUnicodeCharAt(cur);
     uint32_t chLen = ch > 0xFFFFu ? 2 : 1;
@@ -963,21 +975,22 @@ void LineBreaker::GetJISx4051Breaks(cons
       } else {
         prev = 0;
       }
       if (cur + chLen < aLength) {
         next = state.GetUnicodeCharAt(cur + chLen);
       } else {
         next = 0;
       }
-      cl = ContextualAnalysis(prev, ch, next, state, aLevel);
+      cl = ContextualAnalysis(prev, ch, next, state, aLevel,
+                              aIsChineseOrJapanese);
     } else {
       if (ch == U_EQUAL) state.NotifySeenEqualsSign();
       state.NotifyNonHyphenCharacter(ch);
-      cl = GetClass(ch, aLevel);
+      cl = GetClass(ch, aLevel, aIsChineseOrJapanese);
     }
 
     // To implement word-break:break-all, we overwrite the line-break class of
     // alphanumeric characters so they are treated the same as ideographic.
     // The relevant characters will have been assigned CLASS_CHARACTER or
     // CLASS_CLOSE by GetClass(), but those classes also include others that
     // we don't want to touch here, so we re-check the Unicode line-break class
     // to determine which ones to modify.
@@ -1009,17 +1022,17 @@ void LineBreaker::GetJISx4051Breaks(cons
     aBreakBefore[cur] = allowBreak;
     if (allowBreak) state.NotifyBreakBefore();
     lastClass = cl;
     if (CLASS_COMPLEX == cl) {
       uint32_t end = cur + chLen;
 
       while (end < aLength) {
         char32_t c = state.GetUnicodeCharAt(end);
-        if (CLASS_COMPLEX != GetClass(c, aLevel)) {
+        if (CLASS_COMPLEX != GetClass(c, aLevel, false)) {
           break;
         }
         ++end;
         if (c > 0xFFFFU) {  // it was a surrogate pair
           ++end;
         }
       }
 
@@ -1048,33 +1061,34 @@ void LineBreaker::GetJISx4051Breaks(cons
       aBreakBefore[cur] = false;
       state.AdvanceIndex();
     }
   }
 }
 
 void LineBreaker::GetJISx4051Breaks(const uint8_t* aChars, uint32_t aLength,
                                     WordBreak aWordBreak, Strictness aLevel,
+                                    bool aIsChineseOrJapanese,
                                     uint8_t* aBreakBefore) {
   uint32_t cur;
   int8_t lastClass = CLASS_NONE;
   ContextState state(aChars, aLength);
 
   for (cur = 0; cur < aLength; ++cur, state.AdvanceIndex()) {
     char32_t ch = aChars[cur];
     int8_t cl;
 
     if (NEED_CONTEXTUAL_ANALYSIS(ch)) {
       cl = ContextualAnalysis(cur > 0 ? aChars[cur - 1] : U_NULL, ch,
                               cur + 1 < aLength ? aChars[cur + 1] : U_NULL,
-                              state, aLevel);
+                              state, aLevel, aIsChineseOrJapanese);
     } else {
       if (ch == U_EQUAL) state.NotifySeenEqualsSign();
       state.NotifyNonHyphenCharacter(ch);
-      cl = GetClass(ch, aLevel);
+      cl = GetClass(ch, aLevel, aIsChineseOrJapanese);
     }
     if (aWordBreak == WordBreak::BreakAll &&
         (cl == CLASS_CHARACTER || cl == CLASS_CLOSE)) {
       auto cls = GetLineBreakClass(ch);
       // Don't need to check additional Japanese/Korean classes in 8-bit
       if (cls == U_LB_ALPHABETIC || cls == U_LB_NUMERIC ||
           cls == U_LB_COMPLEX_CONTEXT) {
         cl = CLASS_BREAKABLE;
--- a/intl/lwbrk/LineBreaker.h
+++ b/intl/lwbrk/LineBreaker.h
@@ -40,20 +40,20 @@ class LineBreaker {
   // Call this on a word with whitespace at either end. We will apply JISx4051
   // rules to find breaks inside the word. aBreakBefore is set to the break-
   // before status of each character; aBreakBefore[0] will always be false
   // because we never return a break before the first character.
   // aLength is the length of the aText array and also the length of the
   // aBreakBefore output array.
   void GetJISx4051Breaks(const char16_t* aText, uint32_t aLength,
                          WordBreak aWordBreak, Strictness aLevel,
-                         uint8_t* aBreakBefore);
+                         bool aIsChineseOrJapanese, uint8_t* aBreakBefore);
   void GetJISx4051Breaks(const uint8_t* aText, uint32_t aLength,
                          WordBreak aWordBreak, Strictness aLevel,
-                         uint8_t* aBreakBefore);
+                         bool aIsChineseOrJapanese, uint8_t* aBreakBefore);
 
  private:
   ~LineBreaker() {}
 
   int32_t WordMove(const char16_t* aText, uint32_t aLen, uint32_t aPos,
                    int8_t aDirection);
 };