Bug 1157277 - Part 3: Update String.prototype.{toLowerCase,toUpperCase,toLocaleLowerCase,toLocaleUpperCase} to work on code points. r=till
authorTooru Fujisawa <arai_a@mac.com>
Wed, 20 Jul 2016 14:11:35 +0900
changeset 330851 4e952735e78954dffc362bc6b8e5b3b4f6147313
parent 330850 de1cf380b1d55c91ce5bd7c07f917510fa98a55e
child 330852 4ddabcdd2919946d54a1b5b06aa1de3d58105723
push id9858
push userjlund@mozilla.com
push dateMon, 01 Aug 2016 14:37:10 +0000
treeherdermozilla-aurora@203106ef6cb6 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerstill
bugs1157277
milestone50.0a1
Bug 1157277 - Part 3: Update String.prototype.{toLowerCase,toUpperCase,toLocaleLowerCase,toLocaleUpperCase} to work on code points. r=till
js/src/jsstr.cpp
js/src/tests/ecma_6/String/string-code-point-upper-lower-mapping.js
js/src/vm/make_unicode.py
--- a/js/src/jsstr.cpp
+++ b/js/src/jsstr.cpp
@@ -608,32 +608,58 @@ ToLowerCase(JSContext* cx, JSLinearStrin
     {
         AutoCheckCannotGC nogc;
         const CharT* chars = str->chars<CharT>(nogc);
 
         // Look for the first upper case character.
         size_t i = 0;
         for (; i < length; i++) {
             char16_t c = chars[i];
+            if (!IsSame<CharT, Latin1Char>::value) {
+                if (unicode::IsLeadSurrogate(c) && i + 1 < length) {
+                    char16_t trail = chars[i + 1];
+                    if (unicode::IsTrailSurrogate(trail)) {
+                        if (unicode::CanLowerCaseNonBMP(c, trail))
+                            break;
+
+                        i++;
+                        continue;
+                    }
+                }
+            }
             if (unicode::CanLowerCase(c))
                 break;
         }
 
         // If all characters are lower case, return the input string.
         if (i == length)
             return str;
 
         newChars = cx->make_pod_array<CharT>(length + 1);
         if (!newChars)
             return nullptr;
 
         PodCopy(newChars.get(), chars, i);
 
         for (; i < length; i++) {
-            char16_t c = unicode::ToLowerCase(chars[i]);
+            char16_t c = chars[i];
+            if (!IsSame<CharT, Latin1Char>::value) {
+                if (unicode::IsLeadSurrogate(c) && i + 1 < length) {
+                    char16_t trail = chars[i + 1];
+                    if (unicode::IsTrailSurrogate(trail)) {
+                        trail = unicode::ToLowerCaseNonBMPTrail(c, trail);
+                        newChars[i] = c;
+                        newChars[i + 1] = trail;
+                        i++;
+                        continue;
+                    }
+                }
+            }
+
+            c = unicode::ToLowerCase(c);
             MOZ_ASSERT_IF((IsSame<CharT, Latin1Char>::value), c <= JSString::MAX_LATIN1_CHAR);
             newChars[i] = c;
         }
 
         newChars[length] = 0;
     }
 
     JSString* res = NewStringDontDeflate<CanGC>(cx, newChars.get(), length);
@@ -702,17 +728,30 @@ static void
 ToUpperCaseImpl(DestChar* destChars, const SrcChar* srcChars, size_t firstLowerCase, size_t length)
 {
     MOZ_ASSERT(firstLowerCase < length);
 
     for (size_t i = 0; i < firstLowerCase; i++)
         destChars[i] = srcChars[i];
 
     for (size_t i = firstLowerCase; i < length; i++) {
-        char16_t c = unicode::ToUpperCase(srcChars[i]);
+        char16_t c = srcChars[i];
+        if (!IsSame<DestChar, Latin1Char>::value) {
+            if (unicode::IsLeadSurrogate(c) && i + 1 < length) {
+                char16_t trail = srcChars[i + 1];
+                if (unicode::IsTrailSurrogate(trail)) {
+                    trail = unicode::ToUpperCaseNonBMPTrail(c, trail);
+                    destChars[i] = c;
+                    destChars[i + 1] = trail;
+                    i++;
+                    continue;
+                }
+            }
+        }
+        c = unicode::ToUpperCase(c);
         MOZ_ASSERT_IF((IsSame<DestChar, Latin1Char>::value), c <= JSString::MAX_LATIN1_CHAR);
         destChars[i] = c;
     }
 
     destChars[length] = '\0';
 }
 
 template <typename CharT>
@@ -727,16 +766,28 @@ ToUpperCase(JSContext* cx, JSLinearStrin
     {
         AutoCheckCannotGC nogc;
         const CharT* chars = str->chars<CharT>(nogc);
 
         // Look for the first lower case character.
         size_t i = 0;
         for (; i < length; i++) {
             char16_t c = chars[i];
+            if (!IsSame<CharT, Latin1Char>::value) {
+                if (unicode::IsLeadSurrogate(c) && i + 1 < length) {
+                    char16_t trail = chars[i + 1];
+                    if (unicode::IsTrailSurrogate(trail)) {
+                        if (unicode::CanUpperCaseNonBMP(c, trail))
+                            break;
+
+                        i++;
+                        continue;
+                    }
+                }
+            }
             if (unicode::CanUpperCase(c))
                 break;
         }
 
         // If all characters are upper case, return the input string.
         if (i == length)
             return str;
 
new file mode 100644
--- /dev/null
+++ b/js/src/tests/ecma_6/String/string-code-point-upper-lower-mapping.js
@@ -0,0 +1,89 @@
+/* Generated by make_unicode.py DO NOT MODIFY */
+
+/*
+ * Any copyright is dedicated to the Public Domain.
+ * http://creativecommons.org/licenses/publicdomain/
+ */
+assertEq(String.fromCodePoint(0x10428).toUpperCase().codePointAt(0), 0x10400);
+assertEq(String.fromCodePoint(0x10429).toUpperCase().codePointAt(0), 0x10401);
+assertEq(String.fromCodePoint(0x1042a).toUpperCase().codePointAt(0), 0x10402);
+assertEq(String.fromCodePoint(0x1042b).toUpperCase().codePointAt(0), 0x10403);
+assertEq(String.fromCodePoint(0x1042c).toUpperCase().codePointAt(0), 0x10404);
+assertEq(String.fromCodePoint(0x1042d).toUpperCase().codePointAt(0), 0x10405);
+assertEq(String.fromCodePoint(0x1042e).toUpperCase().codePointAt(0), 0x10406);
+assertEq(String.fromCodePoint(0x1042f).toUpperCase().codePointAt(0), 0x10407);
+assertEq(String.fromCodePoint(0x10430).toUpperCase().codePointAt(0), 0x10408);
+assertEq(String.fromCodePoint(0x10431).toUpperCase().codePointAt(0), 0x10409);
+assertEq(String.fromCodePoint(0x10432).toUpperCase().codePointAt(0), 0x1040a);
+assertEq(String.fromCodePoint(0x10433).toUpperCase().codePointAt(0), 0x1040b);
+assertEq(String.fromCodePoint(0x10434).toUpperCase().codePointAt(0), 0x1040c);
+assertEq(String.fromCodePoint(0x10435).toUpperCase().codePointAt(0), 0x1040d);
+assertEq(String.fromCodePoint(0x10436).toUpperCase().codePointAt(0), 0x1040e);
+assertEq(String.fromCodePoint(0x10437).toUpperCase().codePointAt(0), 0x1040f);
+assertEq(String.fromCodePoint(0x10438).toUpperCase().codePointAt(0), 0x10410);
+assertEq(String.fromCodePoint(0x10439).toUpperCase().codePointAt(0), 0x10411);
+assertEq(String.fromCodePoint(0x1043a).toUpperCase().codePointAt(0), 0x10412);
+assertEq(String.fromCodePoint(0x1043b).toUpperCase().codePointAt(0), 0x10413);
+assertEq(String.fromCodePoint(0x1043c).toUpperCase().codePointAt(0), 0x10414);
+assertEq(String.fromCodePoint(0x1043d).toUpperCase().codePointAt(0), 0x10415);
+assertEq(String.fromCodePoint(0x1043e).toUpperCase().codePointAt(0), 0x10416);
+assertEq(String.fromCodePoint(0x1043f).toUpperCase().codePointAt(0), 0x10417);
+assertEq(String.fromCodePoint(0x10440).toUpperCase().codePointAt(0), 0x10418);
+assertEq(String.fromCodePoint(0x10441).toUpperCase().codePointAt(0), 0x10419);
+assertEq(String.fromCodePoint(0x10442).toUpperCase().codePointAt(0), 0x1041a);
+assertEq(String.fromCodePoint(0x10443).toUpperCase().codePointAt(0), 0x1041b);
+assertEq(String.fromCodePoint(0x10444).toUpperCase().codePointAt(0), 0x1041c);
+assertEq(String.fromCodePoint(0x10445).toUpperCase().codePointAt(0), 0x1041d);
+assertEq(String.fromCodePoint(0x10446).toUpperCase().codePointAt(0), 0x1041e);
+assertEq(String.fromCodePoint(0x10447).toUpperCase().codePointAt(0), 0x1041f);
+assertEq(String.fromCodePoint(0x10448).toUpperCase().codePointAt(0), 0x10420);
+assertEq(String.fromCodePoint(0x10449).toUpperCase().codePointAt(0), 0x10421);
+assertEq(String.fromCodePoint(0x1044a).toUpperCase().codePointAt(0), 0x10422);
+assertEq(String.fromCodePoint(0x1044b).toUpperCase().codePointAt(0), 0x10423);
+assertEq(String.fromCodePoint(0x1044c).toUpperCase().codePointAt(0), 0x10424);
+assertEq(String.fromCodePoint(0x1044d).toUpperCase().codePointAt(0), 0x10425);
+assertEq(String.fromCodePoint(0x1044e).toUpperCase().codePointAt(0), 0x10426);
+assertEq(String.fromCodePoint(0x1044f).toUpperCase().codePointAt(0), 0x10427);
+assertEq(String.fromCodePoint(0x10400).toLowerCase().codePointAt(0), 0x10428);
+assertEq(String.fromCodePoint(0x10401).toLowerCase().codePointAt(0), 0x10429);
+assertEq(String.fromCodePoint(0x10402).toLowerCase().codePointAt(0), 0x1042a);
+assertEq(String.fromCodePoint(0x10403).toLowerCase().codePointAt(0), 0x1042b);
+assertEq(String.fromCodePoint(0x10404).toLowerCase().codePointAt(0), 0x1042c);
+assertEq(String.fromCodePoint(0x10405).toLowerCase().codePointAt(0), 0x1042d);
+assertEq(String.fromCodePoint(0x10406).toLowerCase().codePointAt(0), 0x1042e);
+assertEq(String.fromCodePoint(0x10407).toLowerCase().codePointAt(0), 0x1042f);
+assertEq(String.fromCodePoint(0x10408).toLowerCase().codePointAt(0), 0x10430);
+assertEq(String.fromCodePoint(0x10409).toLowerCase().codePointAt(0), 0x10431);
+assertEq(String.fromCodePoint(0x1040a).toLowerCase().codePointAt(0), 0x10432);
+assertEq(String.fromCodePoint(0x1040b).toLowerCase().codePointAt(0), 0x10433);
+assertEq(String.fromCodePoint(0x1040c).toLowerCase().codePointAt(0), 0x10434);
+assertEq(String.fromCodePoint(0x1040d).toLowerCase().codePointAt(0), 0x10435);
+assertEq(String.fromCodePoint(0x1040e).toLowerCase().codePointAt(0), 0x10436);
+assertEq(String.fromCodePoint(0x1040f).toLowerCase().codePointAt(0), 0x10437);
+assertEq(String.fromCodePoint(0x10410).toLowerCase().codePointAt(0), 0x10438);
+assertEq(String.fromCodePoint(0x10411).toLowerCase().codePointAt(0), 0x10439);
+assertEq(String.fromCodePoint(0x10412).toLowerCase().codePointAt(0), 0x1043a);
+assertEq(String.fromCodePoint(0x10413).toLowerCase().codePointAt(0), 0x1043b);
+assertEq(String.fromCodePoint(0x10414).toLowerCase().codePointAt(0), 0x1043c);
+assertEq(String.fromCodePoint(0x10415).toLowerCase().codePointAt(0), 0x1043d);
+assertEq(String.fromCodePoint(0x10416).toLowerCase().codePointAt(0), 0x1043e);
+assertEq(String.fromCodePoint(0x10417).toLowerCase().codePointAt(0), 0x1043f);
+assertEq(String.fromCodePoint(0x10418).toLowerCase().codePointAt(0), 0x10440);
+assertEq(String.fromCodePoint(0x10419).toLowerCase().codePointAt(0), 0x10441);
+assertEq(String.fromCodePoint(0x1041a).toLowerCase().codePointAt(0), 0x10442);
+assertEq(String.fromCodePoint(0x1041b).toLowerCase().codePointAt(0), 0x10443);
+assertEq(String.fromCodePoint(0x1041c).toLowerCase().codePointAt(0), 0x10444);
+assertEq(String.fromCodePoint(0x1041d).toLowerCase().codePointAt(0), 0x10445);
+assertEq(String.fromCodePoint(0x1041e).toLowerCase().codePointAt(0), 0x10446);
+assertEq(String.fromCodePoint(0x1041f).toLowerCase().codePointAt(0), 0x10447);
+assertEq(String.fromCodePoint(0x10420).toLowerCase().codePointAt(0), 0x10448);
+assertEq(String.fromCodePoint(0x10421).toLowerCase().codePointAt(0), 0x10449);
+assertEq(String.fromCodePoint(0x10422).toLowerCase().codePointAt(0), 0x1044a);
+assertEq(String.fromCodePoint(0x10423).toLowerCase().codePointAt(0), 0x1044b);
+assertEq(String.fromCodePoint(0x10424).toLowerCase().codePointAt(0), 0x1044c);
+assertEq(String.fromCodePoint(0x10425).toLowerCase().codePointAt(0), 0x1044d);
+assertEq(String.fromCodePoint(0x10426).toLowerCase().codePointAt(0), 0x1044e);
+assertEq(String.fromCodePoint(0x10427).toLowerCase().codePointAt(0), 0x1044f);
+
+if (typeof reportCompare === "function")
+    reportCompare(true, true);
--- a/js/src/vm/make_unicode.py
+++ b/js/src/vm/make_unicode.py
@@ -134,17 +134,18 @@ def make_non_bmp_convert_macro(out_file,
             from_code, to_code, from_lead, from_trail, to_trail, diff))
 
     out_file.write('#define FOR_EACH_NON_BMP_{}(macro) \\\n'.format(name))
     out_file.write(' \\\n'.join(lines))
     out_file.write('\n')
 
 def generate_unicode_stuff(unicode_data, case_folding,
                            data_file, non_bmp_file,
-                           test_mapping, test_space, test_icase):
+                           test_mapping, test_non_bmp_mapping,
+                           test_space, test_icase):
     dummy = (0, 0, 0)
     table = [dummy]
     cache = {dummy: 0}
     index = [0] * (MAX + 1)
     folding_map = {}
     rev_folding_map = {}
     folding_dummy = (0, 0, 0, 0)
     folding_table = [folding_dummy]
@@ -324,16 +325,33 @@ for (var i = 0; i <= 0xffff; i++) {
     assertEq(char.toUpperCase().charCodeAt(0), info[0]);
     assertEq(char.toLowerCase().charCodeAt(0), info[1]);
 }
 
 if (typeof reportCompare === "function")
     reportCompare(true, true);
 """)
 
+    test_non_bmp_mapping.write('/* Generated by make_unicode.py DO NOT MODIFY */\n')
+    test_non_bmp_mapping.write(public_domain)
+    for code in sorted(non_bmp_upper_map.keys()):
+        test_non_bmp_mapping.write("""\
+assertEq(String.fromCodePoint(0x{:x}).toUpperCase().codePointAt(0), 0x{:x});
+""".format(code, non_bmp_upper_map[code]))
+    for code in sorted(non_bmp_lower_map.keys()):
+        test_non_bmp_mapping.write("""\
+assertEq(String.fromCodePoint(0x{:x}).toLowerCase().codePointAt(0), 0x{:x});
+""".format(code, non_bmp_lower_map[code]))
+
+    test_non_bmp_mapping.write("""
+if (typeof reportCompare === "function")
+    reportCompare(true, true);
+""")
+
+
     test_space.write('/* Generated by make_unicode.py DO NOT MODIFY */\n')
     test_space.write(public_domain)
     test_space.write('var onlySpace = String.fromCharCode(' +
                      ', '.join(map(lambda c: hex(c), test_space_table)) + ');\n')
     test_space.write("""
 assertEq(onlySpace.trim(), "");
 assertEq((onlySpace + 'aaaa').trim(), 'aaaa');
 assertEq(('aaaa' + onlySpace).trim(), 'aaaa');
@@ -590,10 +608,11 @@ if __name__ == '__main__':
         case_folding.write(data)
         case_folding.seek(0)
 
     print('Generating...')
     generate_unicode_stuff(unicode_data, case_folding,
         open('Unicode.cpp', 'w'),
         open('UnicodeNonBMP.h', 'w'),
         open('../tests/ecma_5/String/string-upper-lower-mapping.js', 'w'),
+        open('../tests/ecma_6/String/string-code-point-upper-lower-mapping.js', 'w'),
         open('../tests/ecma_5/String/string-space-trim.js', 'w'),
         open('../tests/ecma_6/RegExp/unicode-ignoreCase.js', 'w'))