bug 1018805 - implement custom lowercasing behavior for Irish. r=smontagu
authorJonathan Kew <jkew@mozilla.com>
Mon, 02 Jun 2014 12:48:01 +0100
changeset 205375 c4ca7aeb2eb43c8a694c6071a5a4b8125872eb15
parent 205374 52e25aa9d032e0b153728e32193c8b971f32c705
child 205376 b706a4689f738da154524f8a067b45abacd3c394
push id3741
push userasasaki@mozilla.com
push dateMon, 21 Jul 2014 20:25:18 +0000
treeherdermozilla-beta@4d6f46f5af68 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerssmontagu
bugs1018805
milestone32.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
bug 1018805 - implement custom lowercasing behavior for Irish. r=smontagu
intl/unicharutil/util/IrishCasing.cpp
intl/unicharutil/util/IrishCasing.h
layout/generic/nsTextRunTransformations.cpp
--- a/intl/unicharutil/util/IrishCasing.cpp
+++ b/intl/unicharutil/util/IrishCasing.cpp
@@ -203,43 +203,46 @@ const uint8_t IrishCasing::sUcClasses[26
   kClass_Vowel, kClass_B, kClass_cC, kClass_DG, kClass_Vowel,
   kClass_fF, kClass_DG, kClass_letter, kClass_Vowel, kClass_letter,
   kClass_letter, kClass_lLNrR, kClass_letter, kClass_lLNrR, kClass_Vowel,
   kClass_pP, kClass_letter, kClass_lLNrR, kClass_sS, kClass_T,
   kClass_Vowel, kClass_letter, kClass_letter, kClass_letter, kClass_letter,
   kClass_letter
 };
 
+uint8_t
+IrishCasing::GetClass(uint32_t aCh)
+{
+  using mozilla::unicode::GetGenCategory;
+  if (aCh >= 'a' && aCh <= 'z') {
+    return sLcClasses[aCh - 'a'];
+  } else if (aCh >= 'A' && aCh <= 'Z') {
+    return sUcClasses[aCh - 'A'];
+  } else if (GetGenCategory(aCh) == nsIUGenCategory::kLetter) {
+    if (aCh == a_ACUTE || aCh == e_ACUTE || aCh == i_ACUTE ||
+        aCh == o_ACUTE || aCh == u_ACUTE) {
+      return kClass_vowel;
+    } else if (aCh == A_ACUTE || aCh == E_ACUTE || aCh == I_ACUTE ||
+               aCh == O_ACUTE || aCh == U_ACUTE) {
+      return kClass_Vowel;
+    } else {
+      return kClass_letter;
+    }
+  } else if (aCh == '-' || aCh == HYPHEN || aCh == NO_BREAK_HYPHEN) {
+    return kClass_hyph;
+  } else {
+    return kClass_other;
+  }
+}
+
 uint32_t
 IrishCasing::UpperCase(uint32_t aCh, State& aState,
                        bool& aMarkPos, uint8_t& aAction)
 {
-  using mozilla::unicode::GetGenCategory;
-  uint8_t cls;
-
-  if (aCh >= 'a' && aCh <= 'z') {
-    cls = sLcClasses[aCh - 'a'];
-  } else if (aCh >= 'A' && aCh <= 'Z') {
-    cls = sUcClasses[aCh - 'A'];
-  } else if (GetGenCategory(aCh) == nsIUGenCategory::kLetter) {
-    if (aCh == a_ACUTE || aCh == e_ACUTE || aCh == i_ACUTE ||
-        aCh == o_ACUTE || aCh == u_ACUTE) {
-      cls = kClass_vowel;
-    } else if (aCh == A_ACUTE || aCh == E_ACUTE || aCh == I_ACUTE ||
-               aCh == O_ACUTE || aCh == U_ACUTE) {
-      cls = kClass_Vowel;
-    } else {
-      cls = kClass_letter;
-    }
-  } else if (aCh == '-' || aCh == HYPHEN || aCh == NO_BREAK_HYPHEN) {
-    cls = kClass_hyph;
-  } else {
-    cls = kClass_other;
-  }
-
+  uint8_t cls = GetClass(aCh);
   uint8_t stateEntry = sUppercaseStateTable[cls][aState];
   aMarkPos = !!(stateEntry & kMarkPositionFlag);
   aAction = (stateEntry & kActionMask) >> kActionShift;
   aState = (stateEntry & kNextStateMask);
 
   return ToUpperCase(aCh);
 }
 
--- a/intl/unicharutil/util/IrishCasing.h
+++ b/intl/unicharutil/util/IrishCasing.h
@@ -96,13 +96,21 @@ public:
   };
 
   static const uint8_t sUppercaseStateTable[kNumClasses][kNumStates];
   static const uint8_t sLcClasses[26];
   static const uint8_t sUcClasses[26];
 
   static uint32_t UpperCase(uint32_t aCh, State& aState,
                             bool& aMarkPos, uint8_t& aAction);
+
+  static bool IsUpperVowel(uint32_t aCh)
+  {
+    return GetClass(aCh) == kClass_Vowel;
+  }
+
+private:
+  static uint8_t GetClass(uint32_t aCh);
 };
 
 } // namespace mozilla
 
 #endif
--- a/layout/generic/nsTextRunTransformations.cpp
+++ b/layout/generic/nsTextRunTransformations.cpp
@@ -271,16 +271,18 @@ nsCaseTransformTextRunFactory::Transform
 
   uint32_t length = aString.Length();
   const char16_t* str = aString.BeginReading();
 
   bool mergeNeeded = false;
 
   bool capitalizeDutchIJ = false;
   bool prevIsLetter = false;
+  bool ntPrefix = false; // true immediately after a word-initial 'n' or 't'
+                         // when doing Irish lowercasing
   uint32_t sigmaIndex = uint32_t(-1);
   nsIUGenCategory::nsUGenCategory cat;
 
   uint8_t style = aAllUppercase ? NS_STYLE_TEXT_TRANSFORM_UPPERCASE : 0;
   const nsIAtom* lang = aLanguage;
 
   LanguageSpecificCasingBehavior languageSpecificCasing = GetCasingFor(lang);
   mozilla::GreekCasing::State greekState;
@@ -326,16 +328,34 @@ nsCaseTransformTextRunFactory::Transform
         if (ch == LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE) {
           ch = 'i';
           prevIsLetter = true;
           sigmaIndex = uint32_t(-1);
           break;
         }
       }
 
+      cat = mozilla::unicode::GetGenCategory(ch);
+
+      if (languageSpecificCasing == eLSCB_Irish &&
+          cat == nsIUGenCategory::kLetter) {
+        // See bug 1018805 for Irish lowercasing requirements
+        if (!prevIsLetter && (ch == 'n' || ch == 't')) {
+          ntPrefix = true;
+        } else {
+          if (ntPrefix && mozilla::IrishCasing::IsUpperVowel(ch)) {
+            aConvertedString.Append('-');
+            ++extraChars;
+          }
+          ntPrefix = false;
+        }
+      } else {
+        ntPrefix = false;
+      }
+
       // Special lowercasing behavior for Greek Sigma: note that this is listed
       // as context-sensitive in Unicode's SpecialCasing.txt, but is *not* a
       // language-specific mapping; it applies regardless of the language of
       // the element.
       //
       // The lowercase mapping for CAPITAL SIGMA should be to SMALL SIGMA (i.e.
       // the non-final form) whenever there is a following letter, or when the
       // CAPITAL SIGMA occurs in isolation (neither preceded nor followed by a
@@ -344,18 +364,16 @@ nsCaseTransformTextRunFactory::Transform
       //
       // To implement the context-sensitive nature of this mapping, we keep
       // track of whether the previous character was a letter. If not, CAPITAL
       // SIGMA will map directly to SMALL SIGMA. If the previous character
       // was a letter, CAPITAL SIGMA maps to FINAL SIGMA and we record the
       // position in the converted string; if we then encounter another letter,
       // that FINAL SIGMA is replaced with a standard SMALL SIGMA.
 
-      cat = mozilla::unicode::GetGenCategory(ch);
-
       // If sigmaIndex is not -1, it marks where we have provisionally mapped
       // a CAPITAL SIGMA to FINAL SIGMA; if we now find another letter, we
       // need to change it to SMALL SIGMA.
       if (sigmaIndex != uint32_t(-1)) {
         if (cat == nsIUGenCategory::kLetter) {
           aConvertedString.SetCharAt(GREEK_SMALL_LETTER_SIGMA, sigmaIndex);
         }
       }