Bug 1554998 - Fix the handling of numeric characters by word-break:break-all. r=masayuki a=jcristau
authorJonathan Kew <jkew@mozilla.com>
Wed, 29 May 2019 12:05:20 +0000
changeset 536601 f787bc6258149666dd364f2d559c032a4b1a07e2
parent 536600 4e5980a74c9c5023f5f64741a4bee1c1055af24c
child 536602 44ec76478317fdc6fba90c55771f6514c32e388b
push id2082
push userffxbld-merge
push dateMon, 01 Jul 2019 08:34:18 +0000
treeherdermozilla-release@2fb19d0466d2 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersmasayuki, jcristau
bugs1554998
milestone68.0
Bug 1554998 - Fix the handling of numeric characters by word-break:break-all. r=masayuki a=jcristau Differential Revision: https://phabricator.services.mozilla.com/D32969
intl/lwbrk/LineBreaker.cpp
--- a/intl/lwbrk/LineBreaker.cpp
+++ b/intl/lwbrk/LineBreaker.cpp
@@ -493,17 +493,17 @@ static int8_t GetClass(uint32_t u) {
       /* EXCLAMATION = 11,                  [EX] */ CLASS_CHARACTER,
       /* GLUE = 12,                         [GL] */ CLASS_NON_BREAKABLE,
       /* HYPHEN = 13,                       [HY] */ CLASS_CHARACTER,
       /* IDEOGRAPHIC = 14,                  [ID] */ CLASS_BREAKABLE,
       /* INSEPARABLE = 15,                  [IN] */ CLASS_CLOSE_LIKE_CHARACTER,
       /* INFIX_NUMERIC = 16,                [IS] */ CLASS_CHARACTER,
       /* LINE_FEED = 17,                    [LF] */ CLASS_BREAKABLE,
       /* NONSTARTER = 18,                   [NS] */ CLASS_CLOSE_LIKE_CHARACTER,
-      /* NUMERIC = 19,                      [NU] */ CLASS_CHARACTER,
+      /* NUMERIC = 19,                      [NU] */ CLASS_NUMERIC,
       /* OPEN_PUNCTUATION = 20,             [OP] */ CLASS_CHARACTER,
       /* POSTFIX_NUMERIC = 21,              [PO] */ CLASS_CHARACTER,
       /* PREFIX_NUMERIC = 22,               [PR] */ CLASS_CHARACTER,
       /* QUOTATION = 23,                    [QU] */ CLASS_CHARACTER,
       /* COMPLEX_CONTEXT = 24,              [SA] */ CLASS_CHARACTER,
       /* SURROGATE = 25,                    [SG] */ CLASS_CHARACTER,
       /* SPACE = 26,                        [SP] */ CLASS_BREAKABLE,
       /* BREAK_SYMBOLS = 27,                [SY] */ CLASS_CHARACTER,
@@ -887,22 +887,22 @@ void LineBreaker::GetJISx4051Breaks(cons
     } else {
       if (ch == U_EQUAL) state.NotifySeenEqualsSign();
       state.NotifyNonHyphenCharacter(ch);
       cl = GetClass(ch);
     }
 
     // To implement word-break:break-all, we overwrite the line-break class of
     // alphanumeric characters so they are treated the same as ideographic.
-    // The relevant characters will have been assigned CLASS_CHARACTER or
-    // CLASS_CLOSE by GetClass(), but those classes also include others that
+    // The relevant characters will have been assigned CLASS_CHARACTER, _CLOSE,
+    // or _NUMERIC by GetClass(), but those classes also include others that
     // we don't want to touch here, so we re-check the Unicode line-break class
     // to determine which ones to modify.
     if (aWordBreak == LineBreaker::kWordBreak_BreakAll &&
-        (cl == CLASS_CHARACTER || cl == CLASS_CLOSE)) {
+        (cl == CLASS_CHARACTER || cl == CLASS_CLOSE || cl == CLASS_NUMERIC)) {
       auto cls = GetLineBreakClass(ch);
       if (cls == U_LB_ALPHABETIC || cls == U_LB_NUMERIC ||
           cls == U_LB_AMBIGUOUS || cls == U_LB_COMPLEX_CONTEXT ||
           /* Additional Japanese and Korean LB classes; CSS Text spec doesn't
              explicitly mention these, but this appears to give expected
              behavior (spec issue?) */
           cls == U_LB_CONDITIONAL_JAPANESE_STARTER ||
           (cls >= U_LB_H2 && cls <= U_LB_JV)) {
@@ -980,18 +980,23 @@ void LineBreaker::GetJISx4051Breaks(cons
       cl = ContextualAnalysis(cur > 0 ? aChars[cur - 1] : U_NULL, ch,
                               cur + 1 < aLength ? aChars[cur + 1] : U_NULL,
                               state);
     } else {
       if (ch == U_EQUAL) state.NotifySeenEqualsSign();
       state.NotifyNonHyphenCharacter(ch);
       cl = GetClass(ch);
     }
+<<<<<<< local
     if (aWordBreak == LineBreaker::kWordBreak_BreakAll &&
         (cl == CLASS_CHARACTER || cl == CLASS_CLOSE)) {
+=======
+    if (aWordBreak == WordBreak::BreakAll &&
+        (cl == CLASS_CHARACTER || cl == CLASS_CLOSE || cl == CLASS_NUMERIC)) {
+>>>>>>> graft
       auto cls = GetLineBreakClass(ch);
       // Don't need to check additional Japanese/Korean classes in 8-bit
       if (cls == U_LB_ALPHABETIC || cls == U_LB_NUMERIC ||
           cls == U_LB_COMPLEX_CONTEXT) {
         cl = CLASS_BREAKABLE;
       }
     }