bug 732443 - handle surrogate pairs correctly in ClusterIterator. r=emk
authorJonathan Kew <jfkthame@gmail.com>
Sat, 03 Mar 2012 09:16:45 +0000
changeset 88402 bfa4df617fcab4a43d2fdaa21f8c0483a8834377
parent 88399 180aae8f8d2d1f65db819066f8c125457ff327b4
child 88403 42cf37973665b9df8b9056145febc350a2293c58
push id157
push userMs2ger@gmail.com
push dateWed, 07 Mar 2012 19:27:10 +0000
reviewersemk
bugs732443
milestone13.0a1
bug 732443 - handle surrogate pairs correctly in ClusterIterator. r=emk
intl/unicharutil/src/nsUnicodeProperties.cpp
--- a/intl/unicharutil/src/nsUnicodeProperties.cpp
+++ b/intl/unicharutil/src/nsUnicodeProperties.cpp
@@ -287,20 +287,23 @@ ClusterIterator::Next()
 {
     if (AtEnd()) {
         NS_WARNING("ClusterIterator has already reached the end");
         return;
     }
 
     PRUint32 ch = *mPos++;
 
-    // Handle conjoining Jamo that make Hangul syllables
-    if ((ch & ~0xff) == 0x1100 ||
+    if (NS_IS_HIGH_SURROGATE(ch) && mPos < mLimit &&
+        NS_IS_LOW_SURROGATE(*mPos)) {
+        ch = SURROGATE_TO_UCS4(ch, *mPos++);
+    } else if ((ch & ~0xff) == 0x1100 ||
         (ch >= 0xa960 && ch <= 0xa97f) ||
         (ch >= 0xac00 && ch <= 0xd7ff)) {
+        // Handle conjoining Jamo that make Hangul syllables
         HSType hangulState = GetHangulSyllableType(ch);
         while (mPos < mLimit) {
             ch = *mPos;
             HSType hangulType = GetHangulSyllableType(ch);
             switch (hangulType) {
             case HST_L:
             case HST_LV:
             case HST_LVT:
@@ -332,24 +335,27 @@ ClusterIterator::Next()
     }
 
     while (mPos < mLimit) {
         ch = *mPos;
 
         // Check for surrogate pairs; note that isolated surrogates will just
         // be treated as generic (non-cluster-extending) characters here,
         // which is fine for cluster-iterating purposes
-        if (NS_IS_LOW_SURROGATE(ch) &&
-            NS_IS_HIGH_SURROGATE(*(mPos - 1))) {
-            ch = SURROGATE_TO_UCS4(*(mPos - 1), *mPos);
-            mPos++;
+        if (NS_IS_HIGH_SURROGATE(ch) && mPos < mLimit - 1 &&
+            NS_IS_LOW_SURROGATE(*(mPos + 1))) {
+            ch = SURROGATE_TO_UCS4(ch, *(mPos + 1));
         }
 
         if (!IsClusterExtender(ch)) {
             break;
         }
+
         mPos++;
+        if (!IS_IN_BMP(ch)) {
+            mPos++;
+        }
     }
 }
 
 } // end namespace unicode
 
 } // end namespace mozilla