Bug 1331339 - Don't start a new script run when the current script appears in the next character's ScriptExtensions property, or next char is a cluster-extender. r=jrmuizel
authorJonathan Kew <jkew@mozilla.com>
Wed, 18 Jan 2017 20:38:05 +0000
changeset 375046 782f2a6bf284974ae03d85ce269b9a802589b045
parent 375045 60eddecfd12de63fc795515d24977b43e5148619
child 375047 5d4a0370b0a7b30b73342cf53f4cf2374d34cfb4
push id6996
push userjlorenzo@mozilla.com
push dateMon, 06 Mar 2017 20:48:21 +0000
treeherdermozilla-beta@d89512dab048 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersjrmuizel
bugs1331339
milestone53.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1331339 - Don't start a new script run when the current script appears in the next character's ScriptExtensions property, or next char is a cluster-extender. r=jrmuizel
gfx/thebes/gfxScriptItemizer.cpp
intl/unicharutil/util/nsUnicodeProperties.h
--- a/gfx/thebes/gfxScriptItemizer.cpp
+++ b/gfx/thebes/gfxScriptItemizer.cpp
@@ -102,22 +102,27 @@ gfxScriptItemizer::fixup(Script newScrip
     int32_t fixupSP = DEC(parenSP, fixupCount);
 
     while (fixupCount-- > 0) {
         fixupSP = INC1(fixupSP);
         parenStack[fixupSP].scriptCode = newScriptCode;
     }
 }
 
+// We regard the current char as having the same script as the in-progress run
+// if either script code is Common or Inherited, or if the run script appears
+// in the character's ScriptExtensions, or if the char is a cluster extender.
 static inline bool
-SameScript(Script runScript, Script currCharScript)
+SameScript(Script runScript, Script currCharScript, uint32_t aCurrCh)
 {
     return runScript <= Script::INHERITED ||
            currCharScript <= Script::INHERITED ||
-           currCharScript == runScript;
+           currCharScript == runScript ||
+           IsClusterExtender(aCurrCh) ||
+           HasScript(aCurrCh, runScript);
 }
 
 gfxScriptItemizer::gfxScriptItemizer(const char16_t *src, uint32_t length)
     : textPtr(src), textLength(length)
 {
     reset();
 }
 
@@ -189,17 +194,17 @@ gfxScriptItemizer::Next(uint32_t& aRunSt
                 }
 
                 if (STACK_IS_NOT_EMPTY()) {
                     sc = TOP().scriptCode;
                 }
             }
         }
 
-        if (SameScript(scriptCode, sc)) {
+        if (SameScript(scriptCode, sc, ch)) {
             if (scriptCode <= Script::INHERITED &&
                 sc > Script::INHERITED)
             {
                 scriptCode = sc;
                 fixup(scriptCode);
             }
 
             /*
--- a/intl/unicharutil/util/nsUnicodeProperties.h
+++ b/intl/unicharutil/util/nsUnicodeProperties.h
@@ -100,16 +100,22 @@ GetLineBreakClass(uint32_t aCh)
 
 inline Script
 GetScriptCode(uint32_t aCh)
 {
   UErrorCode err = U_ZERO_ERROR;
   return Script(uscript_getScript(aCh, &err));
 }
 
+inline bool
+HasScript(uint32_t aCh, Script aScript)
+{
+  return uscript_hasScript(aCh, UScriptCode(aScript));
+}
+
 inline uint32_t
 GetScriptTagForCode(Script aScriptCode)
 {
   const char* tag = uscript_getShortName(UScriptCode(aScriptCode));
   return HB_TAG(tag[0], tag[1], tag[2], tag[3]);
 }
 
 inline PairedBracketType
@@ -184,16 +190,26 @@ uint8_t GetCombiningClass(uint32_t aCh);
 uint8_t GetGeneralCategory(uint32_t aCh);
 
 nsCharType GetBidiCat(uint32_t aCh);
 
 uint8_t GetLineBreakClass(uint32_t aCh);
 
 Script GetScriptCode(uint32_t aCh);
 
+// We don't support ScriptExtensions.txt data when building without ICU.
+// The most important cases will still be handled in gfxScriptItemizer
+// by checking IsClusterExtender to avoid breaking script runs within
+// a cluster.
+inline bool
+HasScript(uint32_t aCh, Script aScript)
+{
+  return false;
+}
+
 uint32_t GetScriptTagForCode(Script aScriptCode);
 
 PairedBracketType GetPairedBracketType(uint32_t aCh);
 uint32_t GetPairedBracket(uint32_t aCh);
 
 /**
  * Return the numeric value of the character. The value returned is the value
  * of the Numeric_Value in field 7 of the UCD, or -1 if field 7 is empty.