bug 307039 - implement Greek-specific uppercasing for text-transform & small-caps. r=past
authorJonathan Kew <jkew@mozilla.com>
Thu, 03 May 2012 08:57:18 +0100
changeset 92952 0de4cbfe2217
parent 92951 47bf5e590a0c
child 92953 5faf400155a4
push id8913
push userjkew@mozilla.com
push dateThu, 03 May 2012 07:59:26 +0000
treeherdermozilla-inbound@a6a335cd2c94 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerspast
bugs307039
milestone15.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
bug 307039 - implement Greek-specific uppercasing for text-transform & small-caps. r=past
content/base/src/nsGkAtomList.h
layout/generic/nsTextRunTransformations.cpp
--- a/content/base/src/nsGkAtomList.h
+++ b/content/base/src/nsGkAtomList.h
@@ -1906,16 +1906,17 @@ GK_ATOM(x_symbol, "x-symbol")
 // referenced in all.js
 GK_ATOM(x_user_def, "x-user-def")
 
 // additional languages that have special case transformations
 GK_ATOM(az, "az")
 GK_ATOM(ba, "ba")
 GK_ATOM(crh, "crh")
 GK_ATOM(nl, "nl")
+GK_ATOM(el, "el")
 
 // Names for editor transactions
 GK_ATOM(TypingTxnName, "Typing")
 GK_ATOM(IMETxnName, "IME")
 GK_ATOM(DeleteTxnName, "Deleting")
 
 // IPC stuff
 GK_ATOM(Remote, "remote")
--- a/layout/generic/nsTextRunTransformations.cpp
+++ b/layout/generic/nsTextRunTransformations.cpp
@@ -15,16 +15,17 @@
  * The Original Code is Novell code.
  *
  * The Initial Developer of the Original Code is Novell Corporation.
  * Portions created by the Initial Developer are Copyright (C) 2006
  * the Initial Developer. All Rights Reserved.
  *
  * Contributor(s):
  *   robert@ocallahan.org
+ *   Jonathan Kew <jfkthame@gmail.com>
  *
  * Alternatively, the contents of this file may be used under the terms of
  * either the GNU General Public License Version 2 or later (the "GPL"), or
  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
  * in which case the provisions of the GPL or the LGPL are applicable instead
  * of those above. If you wish to allow use of your version of this file only
  * under the terms of either the GPL or the LGPL, and not to allow others to
  * use your version of this file under the terms of the MPL, indicate your
@@ -51,19 +52,301 @@
 
 // Unicode characters needing special casing treatment in tr/az languages
 #define LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE  0x0130
 #define LATIN_SMALL_LETTER_DOTLESS_I           0x0131
 
 // Greek sigma needs custom handling for the lowercase transform; for details
 // see comments under "case NS_STYLE_TEXT_TRANSFORM_LOWERCASE" within
 // nsCaseTransformTextRunFactory::RebuildTextRun(), and bug 740120.
-#define GREEK_CAPITAL_LETTER_SIGMA      0x03A3
-#define GREEK_SMALL_LETTER_FINAL_SIGMA  0x03C2
-#define GREEK_SMALL_LETTER_SIGMA        0x03C3
+#define GREEK_CAPITAL_LETTER_SIGMA             0x03A3
+#define GREEK_SMALL_LETTER_FINAL_SIGMA         0x03C2
+#define GREEK_SMALL_LETTER_SIGMA               0x03C3
+
+// Custom uppercase mapping for Greek; see bug 307039 for details
+#define GREEK_LOWER_ALPHA                      0x03B1
+#define GREEK_LOWER_ALPHA_TONOS                0x03AC
+#define GREEK_LOWER_ALPHA_OXIA                 0x1F71
+#define GREEK_LOWER_EPSILON                    0x03B5
+#define GREEK_LOWER_EPSILON_TONOS              0x03AD
+#define GREEK_LOWER_EPSILON_OXIA               0x1F73
+#define GREEK_LOWER_ETA                        0x03B7
+#define GREEK_LOWER_ETA_TONOS                  0x03AE
+#define GREEK_LOWER_ETA_OXIA                   0x1F75
+#define GREEK_LOWER_IOTA                       0x03B9
+#define GREEK_LOWER_IOTA_TONOS                 0x03AF
+#define GREEK_LOWER_IOTA_OXIA                  0x1F77
+#define GREEK_LOWER_IOTA_DIALYTIKA             0x03CA
+#define GREEK_LOWER_IOTA_DIALYTIKA_TONOS       0x0390
+#define GREEK_LOWER_IOTA_DIALYTIKA_OXIA        0x1FD3
+#define GREEK_LOWER_OMICRON                    0x03BF
+#define GREEK_LOWER_OMICRON_TONOS              0x03CC
+#define GREEK_LOWER_OMICRON_OXIA               0x1F79
+#define GREEK_LOWER_UPSILON                    0x03C5
+#define GREEK_LOWER_UPSILON_TONOS              0x03CD
+#define GREEK_LOWER_UPSILON_OXIA               0x1F7B
+#define GREEK_LOWER_UPSILON_DIALYTIKA          0x03CB
+#define GREEK_LOWER_UPSILON_DIALYTIKA_TONOS    0x03B0
+#define GREEK_LOWER_UPSILON_DIALYTIKA_OXIA     0x1FE3
+#define GREEK_LOWER_OMEGA                      0x03C9
+#define GREEK_LOWER_OMEGA_TONOS                0x03CE
+#define GREEK_LOWER_OMEGA_OXIA                 0x1F7D
+#define GREEK_UPPER_ALPHA                      0x0391
+#define GREEK_UPPER_EPSILON                    0x0395
+#define GREEK_UPPER_ETA                        0x0397
+#define GREEK_UPPER_IOTA                       0x0399
+#define GREEK_UPPER_IOTA_DIALYTIKA             0x03AA
+#define GREEK_UPPER_OMICRON                    0x039F
+#define GREEK_UPPER_UPSILON                    0x03A5
+#define GREEK_UPPER_UPSILON_DIALYTIKA          0x03AB
+#define GREEK_UPPER_OMEGA                      0x03A9
+#define GREEK_UPPER_ALPHA_TONOS                0x0386
+#define GREEK_UPPER_ALPHA_OXIA                 0x1FBB
+#define GREEK_UPPER_EPSILON_TONOS              0x0388
+#define GREEK_UPPER_EPSILON_OXIA               0x1FC9
+#define GREEK_UPPER_ETA_TONOS                  0x0389
+#define GREEK_UPPER_ETA_OXIA                   0x1FCB
+#define GREEK_UPPER_IOTA_TONOS                 0x038A
+#define GREEK_UPPER_IOTA_OXIA                  0x1FDB
+#define GREEK_UPPER_OMICRON_TONOS              0x038C
+#define GREEK_UPPER_OMICRON_OXIA               0x1FF9
+#define GREEK_UPPER_UPSILON_TONOS              0x038E
+#define GREEK_UPPER_UPSILON_OXIA               0x1FEB
+#define GREEK_UPPER_OMEGA_TONOS                0x038F
+#define GREEK_UPPER_OMEGA_OXIA                 0x1FFB
+#define COMBINING_ACUTE_ACCENT                 0x0301
+#define COMBINING_DIAERESIS                    0x0308
+#define COMBINING_ACUTE_TONE_MARK              0x0341
+#define COMBINING_GREEK_DIALYTIKA_TONOS        0x0344
+
+// When doing an Uppercase transform in Greek, we need to keep track of the
+// current state while iterating through the string, to recognize and process
+// diphthongs correctly. For clarity, we define a state for each vowel and
+// each vowel with accent, although a few of these do not actually need any
+// special treatment and could be folded into kStart.
+enum GreekCasingState {
+  kStart,
+  kAlpha,
+  kEpsilon,
+  kEta,
+  kIota,
+  kOmicron,
+  kUpsilon,
+  kOmega,
+  kAlphaAcc,
+  kEpsilonAcc,
+  kEtaAcc,
+  kIotaAcc,
+  kOmicronAcc,
+  kUpsilonAcc,
+  kOmegaAcc,
+  kOmicronUpsilon,
+  kDiaeresis
+};
+
+static PRUint32
+GreekUpperCase(PRUint32 aCh, GreekCasingState* aState)
+{
+  switch (aCh) {
+  case GREEK_UPPER_ALPHA:
+  case GREEK_LOWER_ALPHA:
+    *aState = kAlpha;
+    return GREEK_UPPER_ALPHA;
+
+  case GREEK_UPPER_EPSILON:
+  case GREEK_LOWER_EPSILON:
+    *aState = kEpsilon;
+    return GREEK_UPPER_EPSILON;
+
+  case GREEK_UPPER_ETA:
+  case GREEK_LOWER_ETA:
+    *aState = kEta;
+    return GREEK_UPPER_ETA;
+
+  case GREEK_UPPER_IOTA:
+    *aState = kIota;
+    return GREEK_UPPER_IOTA;
+
+  case GREEK_UPPER_OMICRON:
+  case GREEK_LOWER_OMICRON:
+    *aState = kOmicron;
+    return GREEK_UPPER_OMICRON;
+
+  case GREEK_UPPER_UPSILON:
+    switch (*aState) {
+    case kOmicron:
+      *aState = kOmicronUpsilon;
+      break;
+    default:
+      *aState = kUpsilon;
+      break;
+    }
+    return GREEK_UPPER_UPSILON;
+
+  case GREEK_UPPER_OMEGA:
+  case GREEK_LOWER_OMEGA:
+    *aState = kOmega;
+    return GREEK_UPPER_OMEGA;
+
+  // iota and upsilon may be the second vowel of a diphthong
+  case GREEK_LOWER_IOTA:
+    switch (*aState) {
+    case kAlphaAcc:
+    case kEpsilonAcc:
+    case kOmicronAcc:
+    case kUpsilonAcc:
+      *aState = kStart;
+      return GREEK_UPPER_IOTA_DIALYTIKA;
+    default:
+      break;
+    }
+    *aState = kIota;
+    return GREEK_UPPER_IOTA;
+
+  case GREEK_LOWER_UPSILON:
+    switch (*aState) {
+    case kAlphaAcc:
+    case kEpsilonAcc:
+    case kEtaAcc:
+    case kOmicronAcc:
+      *aState = kStart;
+      return GREEK_UPPER_UPSILON_DIALYTIKA;
+    case kOmicron:
+      *aState = kOmicronUpsilon;
+      break;
+    default:
+      *aState = kUpsilon;
+      break;
+    }
+    return GREEK_UPPER_UPSILON;
+
+  case GREEK_UPPER_IOTA_DIALYTIKA:
+  case GREEK_LOWER_IOTA_DIALYTIKA:
+  case GREEK_UPPER_UPSILON_DIALYTIKA:
+  case GREEK_LOWER_UPSILON_DIALYTIKA:
+  case COMBINING_DIAERESIS:
+    *aState = kDiaeresis;
+    return ToUpperCase(aCh);
+
+  // remove accent if it follows a vowel or diaeresis,
+  // and set appropriate state for diphthong detection
+  case COMBINING_ACUTE_ACCENT:
+  case COMBINING_ACUTE_TONE_MARK:
+    switch (*aState) {
+    case kAlpha:
+      *aState = kAlphaAcc;
+      return PRUint32(-1); // omit this char from result string
+    case kEpsilon:
+      *aState = kEpsilonAcc;
+      return PRUint32(-1);
+    case kEta:
+      *aState = kEtaAcc;
+      return PRUint32(-1);
+    case kIota:
+      *aState = kIotaAcc;
+      return PRUint32(-1);
+    case kOmicron:
+      *aState = kOmicronAcc;
+      return PRUint32(-1);
+    case kUpsilon:
+      *aState = kUpsilonAcc;
+      return PRUint32(-1);
+    case kOmicronUpsilon:
+      *aState = kStart; // this completed a diphthong
+      return PRUint32(-1);
+    case kOmega:
+      *aState = kOmegaAcc;
+      return PRUint32(-1);
+    case kDiaeresis:
+      *aState = kStart;
+      return PRUint32(-1);
+    default:
+      break;
+    }
+    break;
+
+  // combinations with dieresis+accent just strip the accent,
+  // and reset to start state (don't form diphthong with following vowel)
+  case GREEK_LOWER_IOTA_DIALYTIKA_TONOS:
+  case GREEK_LOWER_IOTA_DIALYTIKA_OXIA:
+    *aState = kStart;
+    return GREEK_UPPER_IOTA_DIALYTIKA;
+
+  case GREEK_LOWER_UPSILON_DIALYTIKA_TONOS:
+  case GREEK_LOWER_UPSILON_DIALYTIKA_OXIA:
+    *aState = kStart;
+    return GREEK_UPPER_UPSILON_DIALYTIKA;
+
+  case COMBINING_GREEK_DIALYTIKA_TONOS:
+    *aState = kStart;
+    return COMBINING_DIAERESIS;
+
+  // strip accents from vowels, and note the vowel seen so that we can detect
+  // diphthongs where diaeresis needs to be added
+  case GREEK_LOWER_ALPHA_TONOS:
+  case GREEK_LOWER_ALPHA_OXIA:
+  case GREEK_UPPER_ALPHA_TONOS:
+  case GREEK_UPPER_ALPHA_OXIA:
+    *aState = kAlphaAcc;
+    return GREEK_UPPER_ALPHA;
+
+  case GREEK_LOWER_EPSILON_TONOS:
+  case GREEK_LOWER_EPSILON_OXIA:
+  case GREEK_UPPER_EPSILON_TONOS:
+  case GREEK_UPPER_EPSILON_OXIA:
+    *aState = kEpsilonAcc;
+    return GREEK_UPPER_EPSILON;
+
+  case GREEK_LOWER_ETA_TONOS:
+  case GREEK_LOWER_ETA_OXIA:
+  case GREEK_UPPER_ETA_TONOS:
+  case GREEK_UPPER_ETA_OXIA:
+    *aState = kEtaAcc;
+    return GREEK_UPPER_ETA;
+
+  case GREEK_LOWER_IOTA_TONOS:
+  case GREEK_LOWER_IOTA_OXIA:
+  case GREEK_UPPER_IOTA_TONOS:
+  case GREEK_UPPER_IOTA_OXIA:
+    *aState = kIotaAcc;
+    return GREEK_UPPER_IOTA;
+
+  case GREEK_LOWER_OMICRON_TONOS:
+  case GREEK_LOWER_OMICRON_OXIA:
+  case GREEK_UPPER_OMICRON_TONOS:
+  case GREEK_UPPER_OMICRON_OXIA:
+    *aState = kOmicronAcc;
+    return GREEK_UPPER_OMICRON;
+
+  case GREEK_LOWER_UPSILON_TONOS:
+  case GREEK_LOWER_UPSILON_OXIA:
+  case GREEK_UPPER_UPSILON_TONOS:
+  case GREEK_UPPER_UPSILON_OXIA:
+    switch (*aState) {
+    case kOmicron:
+      *aState = kStart; // this completed a diphthong
+      break;
+    default:
+      *aState = kUpsilonAcc;
+      break;
+    }
+    return GREEK_UPPER_UPSILON;
+
+  case GREEK_LOWER_OMEGA_TONOS:
+  case GREEK_LOWER_OMEGA_OXIA:
+  case GREEK_UPPER_OMEGA_TONOS:
+  case GREEK_UPPER_OMEGA_OXIA:
+    *aState = kOmegaAcc;
+    return GREEK_UPPER_OMEGA;
+  }
+
+  // all other characters just reset the state, and use standard mappings
+  *aState = kStart;
+  return ToUpperCase(aCh);
+}
 
 nsTransformedTextRun *
 nsTransformedTextRun::Create(const gfxTextRunFactory::Parameters* aParams,
                              nsTransformingTextRunFactory* aFactory,
                              gfxFontGroup* aFontGroup,
                              const PRUnichar* aString, PRUint32 aLength,
                              const PRUint32 aFlags, nsStyleContext** aStyles,
                              bool aOwnsFactory)
@@ -153,37 +436,41 @@ nsTransformingTextRunFactory::MakeTextRu
 /**
  * Copy a given textrun, but merge certain characters into a single logical
  * character. Glyphs for a character are added to the glyph list for the previous
  * character and then the merged character is eliminated. Visually the results
  * are identical.
  * 
  * This is used for text-transform:uppercase when we encounter a SZLIG,
  * whose uppercase form is "SS", or other ligature or precomposed form
- * that expands to multiple codepoints during case transformation.
+ * that expands to multiple codepoints during case transformation,
+ * and for Greek text when combining diacritics have been deleted.
  * 
  * This function is unable to merge characters when they occur in different
  * glyph runs. This only happens in tricky edge cases where a character was
  * decomposed by case-mapping (e.g. there's no precomposed uppercase version
  * of an accented lowercase letter), and then font-matching caused the
  * diacritics to be assigned to a different font than the base character.
  * In this situation, the diacritic(s) get discarded, which is less than
  * ideal, but they probably weren't going to render very well anyway.
  * Bug 543200 will improve this by making font-matching operate on entire
  * clusters instead of individual codepoints.
  * 
  * For simplicity, this produces a textrun containing all DetailedGlyphs,
  * no simple glyphs. So don't call it unless you really have merging to do.
  * 
- * @param aCharsToMerge when aCharsToMerge[i] is true, this character is
- * merged into the previous character
+ * @param aCharsToMerge when aCharsToMerge[i] is true, this character in aSrc
+ * is merged into the previous character
+ *
+ * @param aDeletedChars when aDeletedChars[i] is true, the character at this
+ * position in aDest was deleted (has no corresponding char in aSrc)
  */
 static void
 MergeCharactersInTextRun(gfxTextRun* aDest, gfxTextRun* aSrc,
-                         bool* aCharsToMerge)
+                         const bool* aCharsToMerge, const bool* aDeletedChars)
 {
   aDest->ResetGlyphRuns();
 
   gfxTextRun::GlyphRunIterator iter(aSrc, 0, aSrc->GetLength());
   PRUint32 offset = 0;
   nsAutoTArray<gfxTextRun::DetailedGlyph,2> glyphs;
   while (iter.NextRun()) {
     gfxTextRun::GlyphRun* run = iter.GetGlyphRun();
@@ -238,16 +525,20 @@ MergeCharactersInTextRun(gfxTextRun* aDe
           mergedGlyph.SetMissing(glyphs.Length());
         } else {
           mergedGlyph.SetComplex(mergedGlyph.IsClusterStart(),
                                  mergedGlyph.IsLigatureGroupStart(),
                                  glyphs.Length());
         }
         aDest->SetGlyphs(offset, mergedGlyph, glyphs.Elements());
         ++offset;
+
+        while (offset < aDest->GetLength() && aDeletedChars[offset]) {
+          aDest->SetGlyphs(offset++, gfxTextRun::CompressedGlyph(), nsnull);
+        }
       }
 
       glyphs.Clear();
       anyMissing = false;
       mergeRunStart = k + 1;
       if (mergeRunStart < stringEnd) {
         mergedGlyph = srcGlyphs[mergeRunStart];
       }
@@ -293,56 +584,87 @@ nsFontVariantTextRunFactory::RebuildText
   if (!inner.get())
     return;
 
   nsCaseTransformTextRunFactory uppercaseFactory(nsnull, true);
 
   aTextRun->ResetGlyphRuns();
 
   PRUint32 runStart = 0;
-  bool runIsLowercase = false;
   nsAutoTArray<nsStyleContext*,50> styleArray;
   nsAutoTArray<PRUint8,50> canBreakBeforeArray;
 
+  enum RunCaseState {
+    kUpperOrCaseless, // will be untouched by font-variant:small-caps
+    kLowercase,       // will be uppercased and reduced
+    kSpecialUpper     // specials: don't shrink, but apply uppercase mapping
+  };
+  RunCaseState runCase = kUpperOrCaseless;
+
   PRUint32 i;
   for (i = 0; i <= length; ++i) {
-    bool isLowercase = false;
+    RunCaseState chCase = kUpperOrCaseless;
+    nsStyleContext* styleContext = styles[i];
     if (i < length) {
       // Characters that aren't the start of a cluster are ignored here. They
       // get added to whatever lowercase/non-lowercase run we're in.
       if (!inner->IsClusterStart(i)) {
-        isLowercase = runIsLowercase;
+        chCase = runCase;
       } else {
-        if (styles[i]->GetStyleFont()->mFont.variant == NS_STYLE_FONT_VARIANT_SMALL_CAPS) {
+        if (styleContext->GetStyleFont()->mFont.variant == NS_STYLE_FONT_VARIANT_SMALL_CAPS) {
           PRUint32 ch = str[i];
           if (NS_IS_HIGH_SURROGATE(ch) && i < length - 1 && NS_IS_LOW_SURROGATE(str[i + 1])) {
             ch = SURROGATE_TO_UCS4(ch, str[i + 1]);
           }
           PRUint32 ch2 = ToUpperCase(ch);
-          isLowercase = ch != ch2 || mozilla::unicode::SpecialUpper(ch);
+          if (ch != ch2 || mozilla::unicode::SpecialUpper(ch)) {
+            chCase = kLowercase;
+          } else if (styleContext->GetStyleFont()->mLanguage == nsGkAtoms::el) {
+            // In Greek, check for characters that will be modified by the
+            // GreekUpperCase mapping - this catches accented capitals where
+            // the accent is to be removed (bug 307039). These are handled by
+            // a transformed child run using the full-size font.
+            GreekCasingState state = kStart; // don't need exact context here
+            ch2 = GreekUpperCase(ch, &state);
+            if (ch != ch2) {
+              chCase = kSpecialUpper;
+            }
+          }
         } else {
           // Don't transform the character! I.e., pretend that it's not lowercase
         }
       }
     }
 
-    if ((i == length || runIsLowercase != isLowercase) && runStart < i) {
+    if ((i == length || runCase != chCase) && runStart < i) {
       nsAutoPtr<nsTransformedTextRun> transformedChild;
       nsAutoPtr<gfxTextRun> cachedChild;
       gfxTextRun* child;
 
-      if (runIsLowercase) {
-        transformedChild = uppercaseFactory.MakeTextRun(str + runStart, i - runStart,
-            &innerParams, smallFont, flags, styleArray.Elements(), false);
+      switch (runCase) {
+      case kUpperOrCaseless:
+        cachedChild =
+          fontGroup->MakeTextRun(str + runStart, i - runStart, &innerParams,
+                                 flags);
+        child = cachedChild.get();
+        break;
+      case kLowercase:
+        transformedChild =
+          uppercaseFactory.MakeTextRun(str + runStart, i - runStart,
+                                       &innerParams, smallFont, flags,
+                                       styleArray.Elements(), false);
         child = transformedChild;
-      } else {
-        cachedChild =
-          fontGroup->MakeTextRun(str + runStart, i - runStart,
-                                 &innerParams, flags);
-        child = cachedChild.get();
+        break;
+      case kSpecialUpper:
+        transformedChild =
+          uppercaseFactory.MakeTextRun(str + runStart, i - runStart,
+                                       &innerParams, fontGroup, flags,
+                                       styleArray.Elements(), false);
+        child = transformedChild;
+        break;
       }
       if (!child)
         return;
       // Copy potential linebreaks into child so they're preserved
       // (and also child will be shaped appropriately)
       NS_ASSERTION(canBreakBeforeArray.Length() == i - runStart,
                    "lost some break-before values?");
       child->SetPotentialLineBreaks(0, canBreakBeforeArray.Length(),
@@ -353,62 +675,61 @@ nsFontVariantTextRunFactory::RebuildText
       aTextRun->CopyGlyphDataFrom(child, 0, child->GetLength(), runStart);
 
       runStart = i;
       styleArray.Clear();
       canBreakBeforeArray.Clear();
     }
 
     if (i < length) {
-      runIsLowercase = isLowercase;
-      styleArray.AppendElement(styles[i]);
+      runCase = chCase;
+      styleArray.AppendElement(styleContext);
       canBreakBeforeArray.AppendElement(aTextRun->CanBreakLineBefore(i));
     }
   }
 }
 
 void
 nsCaseTransformTextRunFactory::RebuildTextRun(nsTransformedTextRun* aTextRun,
     gfxContext* aRefContext)
 {
   PRUint32 length = aTextRun->GetLength();
   const PRUnichar* str = aTextRun->mString.BeginReading();
   nsRefPtr<nsStyleContext>* styles = aTextRun->mStyles.Elements();
 
   nsAutoString convertedString;
   nsAutoTArray<bool,50> charsToMergeArray;
+  nsAutoTArray<bool,50> deletedCharsArray;
   nsAutoTArray<nsStyleContext*,50> styleArray;
   nsAutoTArray<PRUint8,50> canBreakBeforeArray;
-  PRUint32 extraCharsCount = 0;
+  bool mergeNeeded = false;
 
   // Some languages have special casing conventions that differ from the
   // default Unicode mappings.
   // The enum values here are named for well-known exemplar languages that
   // exhibit the behavior in question; multiple lang tags may map to the
   // same setting here, if the behavior is shared by other languages.
   enum {
     eNone,    // default non-lang-specific behavior
     eTurkish, // preserve dotted/dotless-i distinction in uppercase
-    eDutch    // treat "ij" digraph as a unit for capitalization
+    eDutch,   // treat "ij" digraph as a unit for capitalization
+    eGreek    // strip accent when uppercasing Greek vowels
   } languageSpecificCasing = eNone;
 
   const nsIAtom* lang = nsnull;
   bool capitalizeDutchIJ = false;
   bool prevIsLetter = false;
   PRUint32 sigmaIndex = PRUint32(-1);
   nsIUGenCategory::nsUGenCategory cat;
+  GreekCasingState greekState = kStart;
   PRUint32 i;
   for (i = 0; i < length; ++i) {
     PRUint32 ch = str[i];
     nsStyleContext* styleContext = styles[i];
 
-    charsToMergeArray.AppendElement(false);
-    styleArray.AppendElement(styleContext);
-    canBreakBeforeArray.AppendElement(aTextRun->CanBreakLineBefore(i));
-
     PRUint8 style = mAllUppercase ? NS_STYLE_TEXT_TRANSFORM_UPPERCASE
       : styleContext->GetStyleText()->mTextTransform;
     int extraChars = 0;
     const mozilla::unicode::MultiCharMapping *mcm;
 
     if (NS_IS_HIGH_SURROGATE(ch) && i < length - 1 && NS_IS_LOW_SURROGATE(str[i + 1])) {
       ch = SURROGATE_TO_UCS4(ch, str[i + 1]);
     }
@@ -416,16 +737,19 @@ nsCaseTransformTextRunFactory::RebuildTe
     if (lang != styleContext->GetStyleFont()->mLanguage) {
       lang = styleContext->GetStyleFont()->mLanguage;
       if (lang == nsGkAtoms::tr || lang == nsGkAtoms::az ||
           lang == nsGkAtoms::ba || lang == nsGkAtoms::crh ||
           lang == nsGkAtoms::tt) {
         languageSpecificCasing = eTurkish;
       } else if (lang == nsGkAtoms::nl) {
         languageSpecificCasing = eDutch;
+      } else if (lang == nsGkAtoms::el) {
+        languageSpecificCasing = eGreek;
+        greekState = kStart;
       } else {
         languageSpecificCasing = eNone;
       }
     }
 
     switch (style) {
     case NS_STYLE_TEXT_TRANSFORM_LOWERCASE:
       if (languageSpecificCasing == eTurkish) {
@@ -513,16 +837,21 @@ nsCaseTransformTextRunFactory::RebuildTe
       break;
 
     case NS_STYLE_TEXT_TRANSFORM_UPPERCASE:
       if (languageSpecificCasing == eTurkish && ch == 'i') {
         ch = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE;
         break;
       }
 
+      if (languageSpecificCasing == eGreek) {
+        ch = GreekUpperCase(ch, &greekState);
+        break;
+      }
+
       mcm = mozilla::unicode::SpecialUpper(ch);
       if (mcm) {
         int j = 0;
         while (j < 2 && mcm->mMappedChars[j + 1]) {
           convertedString.Append(mcm->mMappedChars[j]);
           ++extraChars;
           ++j;
         }
@@ -566,33 +895,42 @@ nsCaseTransformTextRunFactory::RebuildTe
         ch = ToTitleCase(ch);
       }
       break;
 
     default:
       break;
     }
 
-    if (IS_IN_BMP(ch)) {
-      convertedString.Append(ch);
+    if (ch == PRUint32(-1)) {
+      deletedCharsArray.AppendElement(true);
+      mergeNeeded = true;
     } else {
-      convertedString.Append(H_SURROGATE(ch));
-      convertedString.Append(L_SURROGATE(ch));
-      i++;
+      deletedCharsArray.AppendElement(false);
       charsToMergeArray.AppendElement(false);
       styleArray.AppendElement(styleContext);
-      canBreakBeforeArray.AppendElement(false);
-    }
+      canBreakBeforeArray.AppendElement(aTextRun->CanBreakLineBefore(i));
 
-    while (extraChars > 0) {
-      ++extraCharsCount;
-      charsToMergeArray.AppendElement(true);
-      styleArray.AppendElement(styleContext);
-      canBreakBeforeArray.AppendElement(false);
-      --extraChars;
+      if (IS_IN_BMP(ch)) {
+        convertedString.Append(ch);
+      } else {
+        convertedString.Append(H_SURROGATE(ch));
+        convertedString.Append(L_SURROGATE(ch));
+        ++i;
+        deletedCharsArray.AppendElement(true); // not exactly deleted, but the
+                                               // trailing surrogate is skipped
+        ++extraChars;
+      }
+
+      while (extraChars-- > 0) {
+        mergeNeeded = true;
+        charsToMergeArray.AppendElement(true);
+        styleArray.AppendElement(styleContext);
+        canBreakBeforeArray.AppendElement(false);
+      }
     }
   }
 
   PRUint32 flags;
   gfxTextRunFactory::Parameters innerParams =
       GetParametersForInner(aTextRun, &flags, aRefContext);
   gfxFontGroup* fontGroup = aTextRun->GetFontGroup();
 
@@ -618,19 +956,25 @@ nsCaseTransformTextRunFactory::RebuildTe
   NS_ASSERTION(convertedString.Length() == canBreakBeforeArray.Length(),
                "Dropped characters or break-before values somewhere!");
   child->SetPotentialLineBreaks(0, canBreakBeforeArray.Length(),
       canBreakBeforeArray.Elements(), aRefContext);
   if (transformedChild) {
     transformedChild->FinishSettingProperties(aRefContext);
   }
 
-  if (extraCharsCount > 0) {
+  if (mergeNeeded) {
     // Now merge multiple characters into one multi-glyph character as required
-    MergeCharactersInTextRun(aTextRun, child, charsToMergeArray.Elements());
+    // and deal with skipping deleted accent chars
+    NS_ASSERTION(charsToMergeArray.Length() == child->GetLength(),
+                 "source length mismatch");
+    NS_ASSERTION(deletedCharsArray.Length() == aTextRun->GetLength(),
+                 "destination length mismatch");
+    MergeCharactersInTextRun(aTextRun, child, charsToMergeArray.Elements(),
+                             deletedCharsArray.Elements());
   } else {
     // No merging to do, so just copy; this produces a more optimized textrun.
     // We can't steal the data because the child may be cached and stealing
     // the data would break the cache.
     aTextRun->ResetGlyphRuns();
     aTextRun->CopyGlyphDataFrom(child, 0, child->GetLength(), 0);
   }
 }