bug 731536 - text-transform:capitalize should affect the first alphanumeric char in a word, not the first non-punctuation char. r=smontagu
authorJonathan Kew <jfkthame@gmail.com>
Wed, 14 Mar 2012 06:47:14 +0000
changeset 89369 6ac8b89ae5f05537238d71aab696cce9a3ab5000
parent 89368 6913a9ff86e1290334679ec96754edb2b1b0c91f
child 89370 0362bad5569c7ffe70e8d7571b6d3ccd7fd14d24
push id22251
push userbmo@edmorley.co.uk
push dateThu, 15 Mar 2012 12:49:51 +0000
treeherdermozilla-central@082d016c341f [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerssmontagu
bugs731536
milestone14.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
bug 731536 - text-transform:capitalize should affect the first alphanumeric char in a word, not the first non-punctuation char. r=smontagu
content/base/src/nsLineBreaker.cpp
layout/reftests/text-transform/all-title-ref.html
layout/reftests/text-transform/all-title.html
--- a/content/base/src/nsLineBreaker.cpp
+++ b/content/base/src/nsLineBreaker.cpp
@@ -55,26 +55,36 @@ nsLineBreaker::~nsLineBreaker()
 {
   NS_ASSERTION(mCurrentWord.Length() == 0, "Should have Reset() before destruction!");
 }
 
 static void
 SetupCapitalization(const PRUnichar* aWord, PRUint32 aLength,
                     bool* aCapitalization)
 {
-  // Capitalize the first non-punctuation character after a space or start
+  // Capitalize the first alphanumeric character after a space or start
   // of the word.
   // The only space character a word can contain is NBSP.
   bool capitalizeNextChar = true;
   for (PRUint32 i = 0; i < aLength; ++i) {
-    if (capitalizeNextChar && !nsContentUtils::IsFirstLetterPunctuation(aWord[i])) {
-      aCapitalization[i] = true;
-      capitalizeNextChar = false;
+    PRUint32 ch = aWord[i];
+    if (capitalizeNextChar) {
+      if (NS_IS_HIGH_SURROGATE(ch) && i + 1 < aLength &&
+          NS_IS_LOW_SURROGATE(aWord[i + 1])) {
+        ch = SURROGATE_TO_UCS4(ch, aWord[i + 1]);
+      }
+      if (nsContentUtils::IsAlphanumeric(ch)) {
+        aCapitalization[i] = true;
+        capitalizeNextChar = false;
+      }
+      if (!IS_IN_BMP(ch)) {
+        ++i;
+      }
     }
-    if (aWord[i] == 0xA0 /*NBSP*/) {
+    if (ch == 0xA0 /*NBSP*/) {
       capitalizeNextChar = true;
     }
   }
 }
 
 nsresult
 nsLineBreaker::FlushCurrentWord()
 {
--- a/layout/reftests/text-transform/all-title-ref.html
+++ b/layout/reftests/text-transform/all-title-ref.html
@@ -240,17 +240,17 @@
 &#x01A6;x
 &#x01A9;x
 &#x01AE;x
 &#x0244;x
 &#x01B1;x
 &#x01B2;x
 &#x0245;x
 &#x01B7;x
-&#x0399;x
+&#x0345;X<!-- combining Greek ypogegrammeni - category Mn, doesn't get titlecased -->
 &#x0370;x
 &#x0372;x
 &#x0376;x
 &#x03FD;x
 &#x03FE;x
 &#x03FF;x
 &#x0386;x
 &#x0388;x
@@ -727,42 +727,42 @@
 &#x2169;x
 &#x216A;x
 &#x216B;x
 &#x216C;x
 &#x216D;x
 &#x216E;x
 &#x216F;x
 &#x2183;x
-&#x24B6;x
-&#x24B7;x
-&#x24B8;x
-&#x24B9;x
-&#x24BA;x
-&#x24BB;x
-&#x24BC;x
-&#x24BD;x
-&#x24BE;x
-&#x24BF;x
-&#x24C0;x
-&#x24C1;x
-&#x24C2;x
-&#x24C3;x
-&#x24C4;x
-&#x24C5;x
-&#x24C6;x
-&#x24C7;x
-&#x24C8;x
-&#x24C9;x
-&#x24CA;x
-&#x24CB;x
-&#x24CC;x
-&#x24CD;x
-&#x24CE;x
-&#x24CF;x
+&#x24D0;X<!-- circled Latin small letters - category So - not titlecased -->
+&#x24D1;X
+&#x24D2;X
+&#x24D3;X
+&#x24D4;X
+&#x24D5;X
+&#x24D6;X
+&#x24D7;X
+&#x24D8;X
+&#x24D9;X
+&#x24DA;X
+&#x24DB;X
+&#x24DC;X
+&#x24DD;X
+&#x24DE;X
+&#x24DF;X
+&#x24E0;X
+&#x24E1;X
+&#x24E2;X
+&#x24E3;X
+&#x24E4;X
+&#x24E5;X
+&#x24E6;X
+&#x24E7;X
+&#x24E8;X
+&#x24E9;X<!-- end of circled Latin small letters -->
 &#x2C00;x
 &#x2C01;x
 &#x2C02;x
 &#x2C03;x
 &#x2C04;x
 &#x2C05;x
 &#x2C06;x
 &#x2C07;x
--- a/layout/reftests/text-transform/all-title.html
+++ b/layout/reftests/text-transform/all-title.html
@@ -243,17 +243,17 @@
 &#x0280;x
 &#x0283;x
 &#x0288;x
 &#x0289;x
 &#x028A;x
 &#x028B;x
 &#x028C;x
 &#x0292;x
-&#x0345;x
+&#x0345;x<!-- combining Greek ypogegrammeni - category Mn, doesn't get titlecased -->
 &#x0371;x
 &#x0373;x
 &#x0377;x
 &#x037B;x
 &#x037C;x
 &#x037D;x
 &#x03AC;x
 &#x03AD;x
@@ -730,17 +730,17 @@
 &#x2179;x
 &#x217A;x
 &#x217B;x
 &#x217C;x
 &#x217D;x
 &#x217E;x
 &#x217F;x
 &#x2184;x
-&#x24D0;x
+&#x24D0;x<!-- circled Latin small letters - category So - not titlecased -->
 &#x24D1;x
 &#x24D2;x
 &#x24D3;x
 &#x24D4;x
 &#x24D5;x
 &#x24D6;x
 &#x24D7;x
 &#x24D8;x
@@ -755,17 +755,17 @@
 &#x24E1;x
 &#x24E2;x
 &#x24E3;x
 &#x24E4;x
 &#x24E5;x
 &#x24E6;x
 &#x24E7;x
 &#x24E8;x
-&#x24E9;x
+&#x24E9;x<!-- end of circled Latin small letters -->
 &#x2C30;x
 &#x2C31;x
 &#x2C32;x
 &#x2C33;x
 &#x2C34;x
 &#x2C35;x
 &#x2C36;x
 &#x2C37;x