Bug 1640408 - Check Unicode general category to identify punctuation marks in word-breaker. r=m_kato
authorJonathan Kew <jkew@mozilla.com>
Wed, 03 Jun 2020 15:24:29 +0000
changeset 533724 430ea4182cefb1b2365d00c8423c9c3390cd9a62
parent 533723 702cb5d63666fc667d12265e0fb1028230bfdc1c
child 533725 1b82750c2f903f8924cc4539eb0b41db72a7ed2e
push id37477
push userdluca@mozilla.com
push dateThu, 04 Jun 2020 04:19:27 +0000
treeherdermozilla-central@6fd265574f27 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersm_kato
bugs1640408
milestone79.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1640408 - Check Unicode general category to identify punctuation marks in word-breaker. r=m_kato Differential Revision: https://phabricator.services.mozilla.com/D77655
intl/lwbrk/WordBreaker.cpp
--- a/intl/lwbrk/WordBreaker.cpp
+++ b/intl/lwbrk/WordBreaker.cpp
@@ -89,16 +89,19 @@ WordBreakClass WordBreaker::GetClass(cha
           (c == '_' && !sStopAtUnderscore)) {
         return kWbClassAlphaLetter;
       }
       return kWbClassPunct;
     }
     if (c == 0x00A0 /*NBSP*/) {
       return kWbClassSpace;
     }
+    if (GetGenCategory(c) == nsUGenCategory::kPunctuation) {
+      return kWbClassPunct;
+    }
     if (IsScriptioContinua(c)) {
       return kWbClassScriptioContinua;
     }
     return kWbClassAlphaLetter;
   }
   if (IS_HAN(c)) {
     return kWbClassHanLetter;
   }
@@ -106,16 +109,19 @@ WordBreakClass WordBreaker::GetClass(cha
     return kWbClassKatakanaLetter;
   }
   if (IS_HIRAGANA(c)) {
     return kWbClassHiraganaLetter;
   }
   if (IS_HALFWIDTHKATAKANA(c)) {
     return kWbClassHWKatakanaLetter;
   }
+  if (GetGenCategory(c) == nsUGenCategory::kPunctuation) {
+    return kWbClassPunct;
+  }
   if (IsScriptioContinua(c)) {
     return kWbClassScriptioContinua;
   }
   return kWbClassAlphaLetter;
 }
 
 WordRange WordBreaker::FindWord(const char16_t* aText, uint32_t aTextLen,
                                 uint32_t aOffset) {