Bug 1011369 - Implement loose|normal|strict line-break values by remapping the linebreak class of affected characters. r=m_kato
authorJonathan Kew <jkew@mozilla.com>
Mon, 20 May 2019 20:46:57 +0000
changeset 474597 436e3199c386c6a7a5e70cc8685dd1247827cb90
parent 474596 f339909acbc30e4b1380e45318ccbe187a0d49ec
child 474598 2607e7773ed5c4616d09f9ea2645c9204568b34a
push id36042
push userdvarga@mozilla.com
push dateTue, 21 May 2019 04:19:40 +0000
treeherdermozilla-central@ca560ff55451 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersm_kato
bugs1011369
milestone69.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1011369 - Implement loose|normal|strict line-break values by remapping the linebreak class of affected characters. r=m_kato Differential Revision: https://phabricator.services.mozilla.com/D30787
intl/lwbrk/LineBreaker.cpp
intl/unicharutil/util/nsUnicodeProperties.h
--- a/intl/lwbrk/LineBreaker.cpp
+++ b/intl/lwbrk/LineBreaker.cpp
@@ -122,17 +122,17 @@ already_AddRefed<LineBreaker> LineBreake
 
               1 [a] 7  8  9 [b]15 18
 
         1     X  X  X  X  X  X  X  X
       [a]        X
         7        X  X
         8        X              X
         9        X
-      [b]        X
+      [b]        X  x
        15        X        X     X  X
        18        X              X  X
 
 
    4. We add COMPLEX characters and make it breakable w/ all ther class
       except after class 1 and before class [a]
 
    Class of
@@ -141,17 +141,17 @@ already_AddRefed<LineBreaker> LineBreake
 
               1 [a] 7  8  9 [b]15 18 COMPLEX
 
         1     X  X  X  X  X  X  X  X  X
       [a]        X
         7        X  X
         8        X              X
         9        X
-      [b]        X
+      [b]        X  x
        15        X        X     X  X
        18        X              X  X
   COMPLEX        X                    T
 
      T : need special handling
 
 
    5. However, we need two special class for some punctuations/parentheses,
@@ -169,17 +169,17 @@ already_AddRefed<LineBreaker> LineBreake
 
               1 [a] 7  8  9 [b]15 18 COMPLEX [c] [d]
 
         1     X  X  X  X  X  X  X  X  X       X    X
       [a]        X                            X    X
         7        X  X
         8        X              X
         9        X
-      [b]        X                                 X
+      [b]        X  x                              X
        15        X        X     X  X          X    X
        18        X              X  X          X    X
   COMPLEX        X                    T
       [c]     X  X  X  X  X  X  X  X  X       X    X
       [d]        X              X  X               X
 
 
    6. And Unicode has "NON-BREAK" characters. The lines should be broken around
@@ -191,17 +191,17 @@ already_AddRefed<LineBreaker> LineBreake
 
               1 [a] 7  8  9 [b]15 18 COMPLEX [c] [d] [e]
 
         1     X  X  X  X  X  X  X  X  X       X    X   X
       [a]        X                                 X   X
         7        X  X                                  X
         8        X              X                      X
         9        X                                     X
-      [b]        X                                 X   X
+      [b]        X  x                              X   X
        15        X        X     X  X          X    X   X
        18        X              X  X          X    X   X
   COMPLEX        X                    T                X
       [c]     X  X  X  X  X  X  X  X  X       X    X   X
       [d]        X              X  X               X   X
       [e]     X  X  X  X  X  X  X  X  X       X    X   X
 
 
@@ -210,29 +210,29 @@ already_AddRefed<LineBreaker> LineBreake
 
                  18    <-   1
 
        1  0000 1111 1111 1111  = 0x0FFF
       [a] 0000 1100 0000 0010  = 0x0C02
        7  0000 1000 0000 0110  = 0x0806
        8  0000 1000 0100 0010  = 0x0842
        9  0000 1000 0000 0010  = 0x0802
-      [b] 0000 1100 0000 0010  = 0x0C02
+      [b] 0000 1100 0000 0110  = 0x0C06
       15  0000 1110 1101 0010  = 0x0ED2
       18  0000 1110 1100 0010  = 0x0EC2
  COMPLEX  0000 1001 0000 0010  = 0x0902
       [c] 0000 1111 1111 1111  = 0x0FFF
       [d] 0000 1100 1100 0010  = 0x0CC2
       [e] 0000 1111 1111 1111  = 0x0FFF
 */
 
 #define MAX_CLASSES 12
 
 static const uint16_t gPair[MAX_CLASSES] = {0x0FFF, 0x0C02, 0x0806, 0x0842,
-                                            0x0802, 0x0C02, 0x0ED2, 0x0EC2,
+                                            0x0802, 0x0C06, 0x0ED2, 0x0EC2,
                                             0x0902, 0x0FFF, 0x0CC2, 0x0FFF};
 
 /*
 
    8. And if the character is not enough far from word start, word end and
       another break point, we should not break in non-CJK languages.
       I.e., Don't break around 15, 18, [c] and [d], but don't change
       that if they are related to [b].
@@ -243,42 +243,42 @@ static const uint16_t gPair[MAX_CLASSES]
 
               1 [a] 7  8  9 [b]15 18 COMPLEX [c] [d] [e]
 
         1     X  X  X  X  X  X  X  X  X       X    X   X
       [a]        X              X  X          X    X   X
         7        X  X           X  X          X    X   X
         8        X              X  X          X    X   X
         9        X              X  X          X    X   X
-      [b]        X                                 X   X
+      [b]        X  x                              X   X
        15     X  X  X  X  X     X  X  X       X    X   X
        18     X  X  X  X  X     X  X  X       X    X   X
   COMPLEX        X              X  X  T       X    X   X
       [c]     X  X  X  X  X  X  X  X  X       X    X   X
       [d]     X  X  X  X  X     X  X  X       X    X   X
       [e]     X  X  X  X  X  X  X  X  X       X    X   X
 
                  18    <-   1
 
        1  0000 1111 1111 1111  = 0x0FFF
       [a] 0000 1110 1100 0010  = 0x0EC2
        7  0000 1110 1100 0110  = 0x0EC6
        8  0000 1110 1100 0010  = 0x0EC2
        9  0000 1110 1100 0010  = 0x0EC2
-      [b] 0000 1100 0000 0010  = 0x0C02
+      [b] 0000 1100 0000 0110  = 0x0C06
       15  0000 1111 1101 1111  = 0x0FDF
       18  0000 1111 1101 1111  = 0x0FDF
  COMPLEX  0000 1111 1100 0010  = 0x0FC2
       [c] 0000 1111 1111 1111  = 0x0FFF
       [d] 0000 1111 1101 1111  = 0x0FDF
       [e] 0000 1111 1111 1111  = 0x0FFF
 */
 
 static const uint16_t gPairConservative[MAX_CLASSES] = {
-    0x0FFF, 0x0EC2, 0x0EC6, 0x0EC2, 0x0EC2, 0x0C02,
+    0x0FFF, 0x0EC2, 0x0EC6, 0x0EC2, 0x0EC2, 0x0C06,
     0x0FDF, 0x0FDF, 0x0FC2, 0x0FFF, 0x0FDF, 0x0FFF};
 
 /*
 
    9. Now we map the class to number
 
       0: 1
       1: [a]- 2, 3, 4, 5, 6
@@ -365,123 +365,26 @@ static inline bool IS_NONBREAKABLE_SPACE
 
 static inline bool IS_HYPHEN(char16_t u) {
   return (u == U_HYPHEN || u == 0x058A ||  // ARMENIAN HYPHEN
           u == 0x2010 ||                   // HYPHEN
           u == 0x2012 ||                   // FIGURE DASH
           u == 0x2013);                    // EN DASH
 }
 
-static int8_t GetClass(uint32_t u) {
-  if (u < 0x10000) {
-    uint16_t h = u & 0xFF00;
-    uint16_t l = u & 0x00ff;
-
-    // Handle 3 range table first
-    if (0x0000 == h) {
-      return GETCLASSFROMTABLE(gLBClass00, l);
-    }
-    if (0x1700 == h) {
-      return GETCLASSFROMTABLE(gLBClass17, l);
-    }
-    if (NS_NeedsPlatformNativeHandling(u)) {
-      return CLASS_COMPLEX;
-    }
-    if (0x0E00 == h) {
-      return GETCLASSFROMTABLE(gLBClass0E, l);
-    }
-    if (0x2000 == h) {
-      return GETCLASSFROMTABLE(gLBClass20, l);
-    }
-    if (0x2100 == h) {
-      return GETCLASSFROMTABLE(gLBClass21, l);
-    }
-    if (0x3000 == h) {
-      return GETCLASSFROMTABLE(gLBClass30, l);
-    }
-    if (0xff00 == h) {
-      if (l < 0x0060) {  // Fullwidth ASCII variant
-        return GETCLASSFROMTABLE(gLBClass00, (l + 0x20));
-      }
-      if (l < 0x00a0) {  // Halfwidth Katakana variants
-        switch (l) {
-          case 0x61:
-            return GetClass(0x3002);
-          case 0x62:
-            return GetClass(0x300c);
-          case 0x63:
-            return GetClass(0x300d);
-          case 0x64:
-            return GetClass(0x3001);
-          case 0x65:
-            return GetClass(0x30fb);
-          case 0x9e:
-            return GetClass(0x309b);
-          case 0x9f:
-            return GetClass(0x309c);
-          default:
-            if (IS_HALFWIDTH_IN_JISx4051_CLASS3(u)) {
-              return CLASS_CLOSE;  // jis x4051 class 3
-            }
-            return CLASS_BREAKABLE;  // jis x4051 class 11
-        }
-      }
-      if (l < 0x00e0) {
-        return CLASS_CHARACTER;  // Halfwidth Hangul variants
-      }
-      if (l < 0x00f0) {
-        static char16_t NarrowFFEx[16] = {
-            0x00A2, 0x00A3, 0x00AC, 0x00AF, 0x00A6, 0x00A5, 0x20A9, 0x0000,
-            0x2502, 0x2190, 0x2191, 0x2192, 0x2193, 0x25A0, 0x25CB, 0x0000};
-        return GetClass(NarrowFFEx[l - 0x00e0]);
-      }
-    } else if (0x3100 == h) {
-      if (l <= 0xbf) {  // Hangul Compatibility Jamo, Bopomofo, Kanbun
-                        // XXX: This is per UAX #14, but UAX #14 may change
-                        // the line breaking rules about Kanbun and Bopomofo.
-        return CLASS_BREAKABLE;
-      }
-      if (l >= 0xf0) {  // Katakana small letters for Ainu
-        return CLASS_CLOSE;
-      }
-    } else if (0x0300 == h) {
-      if (0x4F == l || (0x5C <= l && l <= 0x62)) {
-        return CLASS_NON_BREAKABLE;
-      }
-    } else if (0x0500 == h) {
-      // ARMENIAN HYPHEN (for "Breaking Hyphens" of UAX#14)
-      if (l == 0x8A) {
-        return GETCLASSFROMTABLE(gLBClass00, uint16_t(U_HYPHEN));
-      }
-    } else if (0x0F00 == h) {
-      if (0x08 == l || 0x0C == l || 0x12 == l) {
-        return CLASS_NON_BREAKABLE;
-      }
-    } else if (0x1800 == h) {
-      if (0x0E == l) {
-        return CLASS_NON_BREAKABLE;
-      }
-    } else if (0x1600 == h) {
-      if (0x80 == l) {  // U+1680 OGHAM SPACE MARK
-        return CLASS_BREAKABLE;
-      }
-    } else if (u == 0xfeff) {
-      return CLASS_NON_BREAKABLE;
-    }
-  }
-
+static int8_t GetClass(uint32_t u, LineBreaker::Strictness aLevel) {
   // Mapping for Unicode LineBreak.txt classes to the (simplified) set of
   // character classes used here.
   // XXX The mappings here were derived by comparing the Unicode LineBreak
   //     values of BMP characters to the classes our existing GetClass returns
   //     for the same codepoints; in cases where characters with the same
   //     LineBreak class mapped to various classes here, I picked what seemed
   //     the most prevalent equivalence.
   //     Some of these are unclear to me, but currently they are ONLY used
-  //     for characters not handled by the old code above, so all the JISx405
+  //     for characters not handled by the old code below, so all the JISx405
   //     special cases should already be accounted for.
   static const int8_t sUnicodeLineBreakToClass[] = {
       /* UNKNOWN = 0,                       [XX] */ CLASS_CHARACTER,
       /* AMBIGUOUS = 1,                     [AI] */ CLASS_CHARACTER,
       /* ALPHABETIC = 2,                    [AL] */ CLASS_CHARACTER,
       /* BREAK_BOTH = 3,                    [B2] */ CLASS_CHARACTER,
       /* BREAK_AFTER = 4,                   [BA] */ CLASS_CHARACTER,
       /* BREAK_BEFORE = 5,                  [BB] */ CLASS_OPEN_LIKE_CHARACTER,
@@ -523,16 +426,196 @@ static int8_t GetClass(uint32_t u) {
       /* E_MODIFIER = 41,                   [EM] */ CLASS_CHARACTER,
       /* ZWJ = 42,                          [ZWJ]*/ CLASS_CHARACTER};
 
   static_assert(U_LB_COUNT == mozilla::ArrayLength(sUnicodeLineBreakToClass),
                 "Gecko vs ICU LineBreak class mismatch");
 
   auto cls = GetLineBreakClass(u);
   MOZ_ASSERT(cls < mozilla::ArrayLength(sUnicodeLineBreakToClass));
+
+  // Overrides based on rules for the different line-break values given in
+  // https://drafts.csswg.org/css-text-3/#line-break-property
+  switch (aLevel) {
+    case LineBreaker::Strictness::Auto:
+      // For now, just use legacy Gecko behavior.
+      // XXX Possible enhancement - vary strictness according to line width
+      // or other criteria.
+      break;
+    case LineBreaker::Strictness::Strict:
+      if (cls == U_LB_CONDITIONAL_JAPANESE_STARTER ||
+          (u == 0x3095 || u == 0x3096 || u == 0x30f5 || u == 0x30f6)) {
+        return CLASS_CLOSE;
+      }
+      if (cls == U_LB_INSEPARABLE) {
+        return CLASS_NON_BREAKABLE_BETWEEN_SAME_CLASS;
+      }
+      if (u == 0x3005 || u == 0x303B || u == 0x309D || u == 0x309E ||
+          u == 0x30FD || u == 0x30FE) {
+        return CLASS_CLOSE_LIKE_CHARACTER;
+      }
+      if (cls == U_LB_POSTFIX_NUMERIC && IsEastAsianWidthAFW(u)) {
+        return CLASS_CLOSE_LIKE_CHARACTER;
+      }
+      if (cls == U_LB_PREFIX_NUMERIC && IsEastAsianWidthAFW(u)) {
+        return CLASS_OPEN_LIKE_CHARACTER;
+      }
+      if (u == 0x2010 || u == 0x2013 || u == 0x301C || u == 0x30A0) {
+        return CLASS_CLOSE_LIKE_CHARACTER;
+      }
+      break;
+    case LineBreaker::Strictness::Normal:
+      if (cls == U_LB_CONDITIONAL_JAPANESE_STARTER) {
+        return CLASS_BREAKABLE;
+      }
+      if (cls == U_LB_INSEPARABLE) {
+        return CLASS_NON_BREAKABLE_BETWEEN_SAME_CLASS;
+      }
+      if (u == 0x3005 || u == 0x303B || u == 0x309D || u == 0x309E ||
+          u == 0x30FD || u == 0x30FE) {
+        return CLASS_CLOSE_LIKE_CHARACTER;
+      }
+      if (cls == U_LB_POSTFIX_NUMERIC && IsEastAsianWidthAFW(u)) {
+        return CLASS_CLOSE_LIKE_CHARACTER;
+      }
+      if (cls == U_LB_PREFIX_NUMERIC && IsEastAsianWidthAFW(u)) {
+        return CLASS_OPEN_LIKE_CHARACTER;
+      }
+      if (u == 0x2010 || u == 0x2013 || u == 0x301C || u == 0x30A0) {
+        return CLASS_BREAKABLE;
+      }
+      break;
+    case LineBreaker::Strictness::Loose:
+      if (cls == U_LB_CONDITIONAL_JAPANESE_STARTER) {
+        return CLASS_BREAKABLE;
+      }
+      if (u == 0x3005 || u == 0x303B || u == 0x309D || u == 0x309E ||
+          u == 0x30FD || u == 0x30FE) {
+        return CLASS_BREAKABLE;
+      }
+      if (cls == U_LB_INSEPARABLE) {
+        return CLASS_BREAKABLE;
+      }
+      if (u == 0x30FB || u == 0xFF1A || u == 0xFF1B || u == 0xFF65 ||
+          u == 0x203C || u == 0x2047 || u == 0x2048 || u == 0x2049 ||
+          u == 0xFF01 || u == 0xFF1F) {
+        return CLASS_BREAKABLE;
+      }
+      if (cls == U_LB_POSTFIX_NUMERIC && IsEastAsianWidthAFW(u)) {
+        return CLASS_BREAKABLE;
+      }
+      if (cls == U_LB_PREFIX_NUMERIC && IsEastAsianWidthAFW(u)) {
+        return CLASS_BREAKABLE;
+      }
+      if (u == 0x2010 || u == 0x2013 || u == 0x301C || u == 0x30A0) {
+        return CLASS_BREAKABLE;
+      }
+      break;
+    case LineBreaker::Strictness::Anywhere:
+      MOZ_ASSERT_UNREACHABLE("should have been handled already");
+      break;
+  }
+
+  if (u < 0x10000) {
+    uint16_t h = u & 0xFF00;
+    uint16_t l = u & 0x00ff;
+
+    // Handle 3 range table first
+    if (0x0000 == h) {
+      return GETCLASSFROMTABLE(gLBClass00, l);
+    }
+    if (0x1700 == h) {
+      return GETCLASSFROMTABLE(gLBClass17, l);
+    }
+    if (NS_NeedsPlatformNativeHandling(u)) {
+      return CLASS_COMPLEX;
+    }
+    if (0x0E00 == h) {
+      return GETCLASSFROMTABLE(gLBClass0E, l);
+    }
+    if (0x2000 == h) {
+      return GETCLASSFROMTABLE(gLBClass20, l);
+    }
+    if (0x2100 == h) {
+      return GETCLASSFROMTABLE(gLBClass21, l);
+    }
+    if (0x3000 == h) {
+      return GETCLASSFROMTABLE(gLBClass30, l);
+    }
+    if (0xff00 == h) {
+      if (l < 0x0060) {  // Fullwidth ASCII variant
+        return GETCLASSFROMTABLE(gLBClass00, (l + 0x20));
+      }
+      if (l < 0x00a0) {  // Halfwidth Katakana variants
+        switch (l) {
+          case 0x61:
+            return GetClass(0x3002, aLevel);
+          case 0x62:
+            return GetClass(0x300c, aLevel);
+          case 0x63:
+            return GetClass(0x300d, aLevel);
+          case 0x64:
+            return GetClass(0x3001, aLevel);
+          case 0x65:
+            return GetClass(0x30fb, aLevel);
+          case 0x9e:
+            return GetClass(0x309b, aLevel);
+          case 0x9f:
+            return GetClass(0x309c, aLevel);
+          default:
+            if (IS_HALFWIDTH_IN_JISx4051_CLASS3(u)) {
+              return CLASS_CLOSE;  // jis x4051 class 3
+            }
+            return CLASS_BREAKABLE;  // jis x4051 class 11
+        }
+      }
+      if (l < 0x00e0) {
+        return CLASS_CHARACTER;  // Halfwidth Hangul variants
+      }
+      if (l < 0x00f0) {
+        static char16_t NarrowFFEx[16] = {
+            0x00A2, 0x00A3, 0x00AC, 0x00AF, 0x00A6, 0x00A5, 0x20A9, 0x0000,
+            0x2502, 0x2190, 0x2191, 0x2192, 0x2193, 0x25A0, 0x25CB, 0x0000};
+        return GetClass(NarrowFFEx[l - 0x00e0], aLevel);
+      }
+    } else if (0x3100 == h) {
+      if (l <= 0xbf) {  // Hangul Compatibility Jamo, Bopomofo, Kanbun
+                        // XXX: This is per UAX #14, but UAX #14 may change
+                        // the line breaking rules about Kanbun and Bopomofo.
+        return CLASS_BREAKABLE;
+      }
+      if (l >= 0xf0) {  // Katakana small letters for Ainu
+        return CLASS_CLOSE;
+      }
+    } else if (0x0300 == h) {
+      if (0x4F == l || (0x5C <= l && l <= 0x62)) {
+        return CLASS_NON_BREAKABLE;
+      }
+    } else if (0x0500 == h) {
+      // ARMENIAN HYPHEN (for "Breaking Hyphens" of UAX#14)
+      if (l == 0x8A) {
+        return GETCLASSFROMTABLE(gLBClass00, uint16_t(U_HYPHEN));
+      }
+    } else if (0x0F00 == h) {
+      if (0x08 == l || 0x0C == l || 0x12 == l) {
+        return CLASS_NON_BREAKABLE;
+      }
+    } else if (0x1800 == h) {
+      if (0x0E == l) {
+        return CLASS_NON_BREAKABLE;
+      }
+    } else if (0x1600 == h) {
+      if (0x80 == l) {  // U+1680 OGHAM SPACE MARK
+        return CLASS_BREAKABLE;
+      }
+    } else if (u == 0xfeff) {
+      return CLASS_NON_BREAKABLE;
+    }
+  }
+
   return sUnicodeLineBreakToClass[cls];
 }
 
 static bool GetPair(int8_t c1, int8_t c2) {
   NS_ASSERTION(c1 < MAX_CLASSES, "illegal classes 1");
   NS_ASSERTION(c2 < MAX_CLASSES, "illegal classes 2");
 
   return (0 == ((gPair[c1] >> c2) & 0x0001));
@@ -709,34 +792,35 @@ class ContextState {
   bool mHasNonbreakableSpace;   // if the text has no-breakable space,
                                 // this is true.
   bool mHasPreviousEqualsSign;  // True if we have seen a U_EQUAL
   bool mHasPreviousSlash;       // True if we have seen a U_SLASH
   bool mHasPreviousBackslash;   // True if we have seen a U_BACKSLASH
 };
 
 static int8_t ContextualAnalysis(char32_t prev, char32_t cur, char32_t next,
-                                 ContextState& aState) {
+                                 ContextState& aState,
+                                 LineBreaker::Strictness aLevel) {
   // Don't return CLASS_OPEN/CLASS_CLOSE if aState.UseJISX4051 is FALSE.
 
   if (IS_HYPHEN(cur)) {
     // If next character is hyphen, we don't need to break between them.
     if (IS_HYPHEN(next)) return CLASS_CHARACTER;
     // If prev and next characters are numeric, it may be in Math context.
     // So, we should not break here.
     bool prevIsNum = IS_ASCII_DIGIT(prev);
     bool nextIsNum = IS_ASCII_DIGIT(next);
     if (prevIsNum && nextIsNum) return CLASS_NUMERIC;
     // If one side is numeric and the other is a character, or if both sides are
     // characters, the hyphen should be breakable.
     if (!aState.UseConservativeBreaking(1)) {
       char32_t prevOfHyphen = aState.GetPreviousNonHyphenCharacter();
       if (prevOfHyphen && next) {
-        int8_t prevClass = GetClass(prevOfHyphen);
-        int8_t nextClass = GetClass(next);
+        int8_t prevClass = GetClass(prevOfHyphen, aLevel);
+        int8_t nextClass = GetClass(next, aLevel);
         bool prevIsNumOrCharOrClose =
             prevIsNum ||
             (prevClass == CLASS_CHARACTER &&
              !NEED_CONTEXTUAL_ANALYSIS(prevOfHyphen)) ||
             prevClass == CLASS_CLOSE || prevClass == CLASS_CLOSE_LIKE_CHARACTER;
         bool nextIsNumOrCharOrOpen =
             nextIsNum ||
             (nextClass == CLASS_CHARACTER && !NEED_CONTEXTUAL_ANALYSIS(next)) ||
@@ -787,17 +871,17 @@ static int8_t ContextualAnalysis(char32_
       // in various Western languages varies too much; see bug #450088
       // discussion.
       if (!aState.UseConservativeBreaking() && IS_CJK_CHAR(next))
         return CLASS_OPEN;
     } else {
       NS_ERROR("Forgot to handle the current character!");
     }
   }
-  return GetClass(cur);
+  return GetClass(cur, aLevel);
 }
 
 int32_t LineBreaker::WordMove(const char16_t* aText, uint32_t aLen,
                               uint32_t aPos, int8_t aDirection) {
   bool textNeedsJISx4051 = false;
   int32_t begin, end;
 
   for (begin = aPos; begin > 0 && !NS_IsSpace(aText[begin - 1]); --begin) {
@@ -879,21 +963,21 @@ void LineBreaker::GetJISx4051Breaks(cons
       } else {
         prev = 0;
       }
       if (cur + chLen < aLength) {
         next = state.GetUnicodeCharAt(cur + chLen);
       } else {
         next = 0;
       }
-      cl = ContextualAnalysis(prev, ch, next, state);
+      cl = ContextualAnalysis(prev, ch, next, state, aLevel);
     } else {
       if (ch == U_EQUAL) state.NotifySeenEqualsSign();
       state.NotifyNonHyphenCharacter(ch);
-      cl = GetClass(ch);
+      cl = GetClass(ch, aLevel);
     }
 
     // To implement word-break:break-all, we overwrite the line-break class of
     // alphanumeric characters so they are treated the same as ideographic.
     // The relevant characters will have been assigned CLASS_CHARACTER or
     // CLASS_CLOSE by GetClass(), but those classes also include others that
     // we don't want to touch here, so we re-check the Unicode line-break class
     // to determine which ones to modify.
@@ -925,17 +1009,17 @@ void LineBreaker::GetJISx4051Breaks(cons
     aBreakBefore[cur] = allowBreak;
     if (allowBreak) state.NotifyBreakBefore();
     lastClass = cl;
     if (CLASS_COMPLEX == cl) {
       uint32_t end = cur + chLen;
 
       while (end < aLength) {
         char32_t c = state.GetUnicodeCharAt(end);
-        if (CLASS_COMPLEX != GetClass(c)) {
+        if (CLASS_COMPLEX != GetClass(c, aLevel)) {
           break;
         }
         ++end;
         if (c > 0xFFFFU) {  // it was a surrogate pair
           ++end;
         }
       }
 
@@ -976,21 +1060,21 @@ void LineBreaker::GetJISx4051Breaks(cons
 
   for (cur = 0; cur < aLength; ++cur, state.AdvanceIndex()) {
     char32_t ch = aChars[cur];
     int8_t cl;
 
     if (NEED_CONTEXTUAL_ANALYSIS(ch)) {
       cl = ContextualAnalysis(cur > 0 ? aChars[cur - 1] : U_NULL, ch,
                               cur + 1 < aLength ? aChars[cur + 1] : U_NULL,
-                              state);
+                              state, aLevel);
     } else {
       if (ch == U_EQUAL) state.NotifySeenEqualsSign();
       state.NotifyNonHyphenCharacter(ch);
-      cl = GetClass(ch);
+      cl = GetClass(ch, aLevel);
     }
     if (aWordBreak == WordBreak::BreakAll &&
         (cl == CLASS_CHARACTER || cl == CLASS_CLOSE)) {
       auto cls = GetLineBreakClass(ch);
       // Don't need to check additional Japanese/Korean classes in 8-bit
       if (cls == U_LB_ALPHABETIC || cls == U_LB_NUMERIC ||
           cls == U_LB_COMPLEX_CONTEXT) {
         cl = CLASS_BREAKABLE;
--- a/intl/unicharutil/util/nsUnicodeProperties.h
+++ b/intl/unicharutil/util/nsUnicodeProperties.h
@@ -133,16 +133,30 @@ inline bool IsEastAsianWidthFWH(uint32_t
     case U_EA_AMBIGUOUS:
     case U_EA_NARROW:
     case U_EA_NEUTRAL:
       return false;
   }
   return false;
 }
 
+inline bool IsEastAsianWidthAFW(uint32_t aCh) {
+  switch (u_getIntPropertyValue(aCh, UCHAR_EAST_ASIAN_WIDTH)) {
+    case U_EA_AMBIGUOUS:
+    case U_EA_FULLWIDTH:
+    case U_EA_WIDE:
+      return true;
+    case U_EA_HALFWIDTH:
+    case U_EA_NARROW:
+    case U_EA_NEUTRAL:
+      return false;
+  }
+  return false;
+}
+
 inline bool IsDefaultIgnorable(uint32_t aCh) {
   return u_hasBinaryProperty(aCh, UCHAR_DEFAULT_IGNORABLE_CODE_POINT);
 }
 
 inline EmojiPresentation GetEmojiPresentation(uint32_t aCh) {
   if (!u_hasBinaryProperty(aCh, UCHAR_EMOJI)) {
     return TextOnly;
   }