Bug 1595645 - Add support for characters with CLASS_CLOSE_LIKE_CHARACTER missed in implementation of word-break:break-all. r=m_kato
authorJonathan Kew <jkew@mozilla.com>
Thu, 14 Nov 2019 10:34:54 +0000
changeset 501920 1ff83b67e45381297b75de8acff22971492ccaf8
parent 501919 c1e90bf96e148837a484347c9e4cacbe9cdd028c
child 501921 d4136e8170903ad9ad35cf0fff0fae8365d21b03
push id114172
push userdluca@mozilla.com
push dateTue, 19 Nov 2019 11:31:10 +0000
treeherdermozilla-inbound@b5c5ba07d3db [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersm_kato
bugs1595645
milestone72.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1595645 - Add support for characters with CLASS_CLOSE_LIKE_CHARACTER missed in implementation of word-break:break-all. r=m_kato Differential Revision: https://phabricator.services.mozilla.com/D52674
intl/lwbrk/LineBreaker.cpp
--- a/intl/lwbrk/LineBreaker.cpp
+++ b/intl/lwbrk/LineBreaker.cpp
@@ -984,21 +984,22 @@ void LineBreaker::GetJISx4051Breaks(cons
       if (ch == U_EQUAL) state.NotifySeenEqualsSign();
       state.NotifyNonHyphenCharacter(ch);
       cl = GetClass(ch, aLevel, aIsChineseOrJapanese);
     }
 
     // To implement word-break:break-all, we overwrite the line-break class of
     // alphanumeric characters so they are treated the same as ideographic.
     // The relevant characters will have been assigned CLASS_CHARACTER, _CLOSE,
-    // or _NUMERIC by GetClass(), but those classes also include others that
-    // we don't want to touch here, so we re-check the Unicode line-break class
-    // to determine which ones to modify.
+    // _CLOSE_LIKE_CHARACTER, or _NUMERIC by GetClass(), but those classes also
+    // include others that we don't want to touch here, so we re-check the
+    // Unicode line-break class to determine which ones to modify.
     if (aWordBreak == WordBreak::BreakAll &&
-        (cl == CLASS_CHARACTER || cl == CLASS_CLOSE || cl == CLASS_NUMERIC)) {
+        (cl == CLASS_CHARACTER || cl == CLASS_CLOSE ||
+         cl == CLASS_CLOSE_LIKE_CHARACTER || cl == CLASS_NUMERIC)) {
       auto cls = GetLineBreakClass(ch);
       if (cls == U_LB_ALPHABETIC || cls == U_LB_NUMERIC ||
           cls == U_LB_AMBIGUOUS || cls == U_LB_COMPLEX_CONTEXT ||
           /* Additional Japanese and Korean LB classes; CSS Text spec doesn't
              explicitly mention these, but this appears to give expected
              behavior (spec issue?) */
           cls == U_LB_CONDITIONAL_JAPANESE_STARTER ||
           (cls >= U_LB_H2 && cls <= U_LB_JV)) {
@@ -1079,17 +1080,18 @@ void LineBreaker::GetJISx4051Breaks(cons
                               cur + 1 < aLength ? aChars[cur + 1] : U_NULL,
                               state, aLevel, aIsChineseOrJapanese);
     } else {
       if (ch == U_EQUAL) state.NotifySeenEqualsSign();
       state.NotifyNonHyphenCharacter(ch);
       cl = GetClass(ch, aLevel, aIsChineseOrJapanese);
     }
     if (aWordBreak == WordBreak::BreakAll &&
-        (cl == CLASS_CHARACTER || cl == CLASS_CLOSE || cl == CLASS_NUMERIC)) {
+        (cl == CLASS_CHARACTER || cl == CLASS_CLOSE ||
+         cl == CLASS_CLOSE_LIKE_CHARACTER || cl == CLASS_NUMERIC)) {
       auto cls = GetLineBreakClass(ch);
       // Don't need to check additional Japanese/Korean classes in 8-bit
       if (cls == U_LB_ALPHABETIC || cls == U_LB_NUMERIC ||
           cls == U_LB_COMPLEX_CONTEXT) {
         cl = CLASS_BREAKABLE;
       }
     }