Bug 1157277 - Part 3: Update String.prototype.{toLowerCase,toUpperCase,toLocaleLowerCase,toLocaleUpperCase} to work on code points. r=till
authorTooru Fujisawa <arai_a@mac.com>
Wed, 20 Jul 2016 14:11:35 +0900
changeset 345817 4e952735e78954dffc362bc6b8e5b3b4f6147313
parent 345816 de1cf380b1d55c91ce5bd7c07f917510fa98a55e
child 345818 4ddabcdd2919946d54a1b5b06aa1de3d58105723
push id6389
push userraliiev@mozilla.com
push dateMon, 19 Sep 2016 13:38:22 +0000
treeherdermozilla-beta@01d67bfe6c81 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerstill
bugs1157277
milestone50.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1157277 - Part 3: Update String.prototype.{toLowerCase,toUpperCase,toLocaleLowerCase,toLocaleUpperCase} to work on code points. r=till
js/src/jsstr.cpp
js/src/tests/ecma_6/String/string-code-point-upper-lower-mapping.js
js/src/vm/make_unicode.py
--- a/js/src/jsstr.cpp
+++ b/js/src/jsstr.cpp
@@ -608,32 +608,58 @@ ToLowerCase(JSContext* cx, JSLinearStrin
     {
         AutoCheckCannotGC nogc;
         const CharT* chars = str->chars<CharT>(nogc);
 
         // Look for the first upper case character.
         size_t i = 0;
         for (; i < length; i++) {
             char16_t c = chars[i];
+            if (!IsSame<CharT, Latin1Char>::value) {
+                if (unicode::IsLeadSurrogate(c) && i + 1 < length) {
+                    char16_t trail = chars[i + 1];
+                    if (unicode::IsTrailSurrogate(trail)) {
+                        if (unicode::CanLowerCaseNonBMP(c, trail))
+                            break;
+
+                        i++;
+                        continue;
+                    }
+                }
+            }
             if (unicode::CanLowerCase(c))
                 break;
         }
 
         // If all characters are lower case, return the input string.
         if (i == length)
             return str;
 
         newChars = cx->make_pod_array<CharT>(length + 1);
         if (!newChars)
             return nullptr;
 
         PodCopy(newChars.get(), chars, i);
 
         for (; i < length; i++) {
-            char16_t c = unicode::ToLowerCase(chars[i]);
+            char16_t c = chars[i];
+            if (!IsSame<CharT, Latin1Char>::value) {
+                if (unicode::IsLeadSurrogate(c) && i + 1 < length) {
+                    char16_t trail = chars[i + 1];
+                    if (unicode::IsTrailSurrogate(trail)) {
+                        trail = unicode::ToLowerCaseNonBMPTrail(c, trail);
+                        newChars[i] = c;
+                        newChars[i + 1] = trail;
+                        i++;
+                        continue;
+                    }
+                }
+            }
+
+            c = unicode::ToLowerCase(c);
             MOZ_ASSERT_IF((IsSame<CharT, Latin1Char>::value), c <= JSString::MAX_LATIN1_CHAR);
             newChars[i] = c;
         }
 
         newChars[length] = 0;
     }
 
     JSString* res = NewStringDontDeflate<CanGC>(cx, newChars.get(), length);
@@ -702,17 +728,30 @@ static void
 ToUpperCaseImpl(DestChar* destChars, const SrcChar* srcChars, size_t firstLowerCase, size_t length)
 {
     MOZ_ASSERT(firstLowerCase < length);
 
     for (size_t i = 0; i < firstLowerCase; i++)
         destChars[i] = srcChars[i];
 
     for (size_t i = firstLowerCase; i < length; i++) {
-        char16_t c = unicode::ToUpperCase(srcChars[i]);
+        char16_t c = srcChars[i];
+        if (!IsSame<DestChar, Latin1Char>::value) {
+            if (unicode::IsLeadSurrogate(c) && i + 1 < length) {
+                char16_t trail = srcChars[i + 1];
+                if (unicode::IsTrailSurrogate(trail)) {
+                    trail = unicode::ToUpperCaseNonBMPTrail(c, trail);
+                    destChars[i] = c;
+                    destChars[i + 1] = trail;
+                    i++;
+                    continue;
+                }
+            }
+        }
+        c = unicode::ToUpperCase(c);
         MOZ_ASSERT_IF((IsSame<DestChar, Latin1Char>::value), c <= JSString::MAX_LATIN1_CHAR);
         destChars[i] = c;
     }
 
     destChars[length] = '\0';
 }
 
 template <typename CharT>
@@ -727,16 +766,28 @@ ToUpperCase(JSContext* cx, JSLinearStrin
     {
         AutoCheckCannotGC nogc;
         const CharT* chars = str->chars<CharT>(nogc);
 
         // Look for the first lower case character.
         size_t i = 0;
         for (; i < length; i++) {
             char16_t c = chars[i];
+            if (!IsSame<CharT, Latin1Char>::value) {
+                if (unicode::IsLeadSurrogate(c) && i + 1 < length) {
+                    char16_t trail = chars[i + 1];
+                    if (unicode::IsTrailSurrogate(trail)) {
+                        if (unicode::CanUpperCaseNonBMP(c, trail))
+                            break;
+
+                        i++;
+                        continue;
+                    }
+                }
+            }
             if (unicode::CanUpperCase(c))
                 break;
         }
 
         // If all characters are upper case, return the input string.
         if (i == length)
             return str;
 
new file mode 100644
--- /dev/null
+++ b/js/src/tests/ecma_6/String/string-code-point-upper-lower-mapping.js
@@ -0,0 +1,89 @@
+/* Generated by make_unicode.py DO NOT MODIFY */
+
+/*
+ * Any copyright is dedicated to the Public Domain.
+ * http://creativecommons.org/licenses/publicdomain/
+ */
+assertEq(String.fromCodePoint(0x10428).toUpperCase().codePointAt(0), 0x10400);
+assertEq(String.fromCodePoint(0x10429).toUpperCase().codePointAt(0), 0x10401);
+assertEq(String.fromCodePoint(0x1042a).toUpperCase().codePointAt(0), 0x10402);
+assertEq(String.fromCodePoint(0x1042b).toUpperCase().codePointAt(0), 0x10403);
+assertEq(String.fromCodePoint(0x1042c).toUpperCase().codePointAt(0), 0x10404);
+assertEq(String.fromCodePoint(0x1042d).toUpperCase().codePointAt(0), 0x10405);
+assertEq(String.fromCodePoint(0x1042e).toUpperCase().codePointAt(0), 0x10406);
+assertEq(String.fromCodePoint(0x1042f).toUpperCase().codePointAt(0), 0x10407);
+assertEq(String.fromCodePoint(0x10430).toUpperCase().codePointAt(0), 0x10408);
+assertEq(String.fromCodePoint(0x10431).toUpperCase().codePointAt(0), 0x10409);
+assertEq(String.fromCodePoint(0x10432).toUpperCase().codePointAt(0), 0x1040a);
+assertEq(String.fromCodePoint(0x10433).toUpperCase().codePointAt(0), 0x1040b);
+assertEq(String.fromCodePoint(0x10434).toUpperCase().codePointAt(0), 0x1040c);
+assertEq(String.fromCodePoint(0x10435).toUpperCase().codePointAt(0), 0x1040d);
+assertEq(String.fromCodePoint(0x10436).toUpperCase().codePointAt(0), 0x1040e);
+assertEq(String.fromCodePoint(0x10437).toUpperCase().codePointAt(0), 0x1040f);
+assertEq(String.fromCodePoint(0x10438).toUpperCase().codePointAt(0), 0x10410);
+assertEq(String.fromCodePoint(0x10439).toUpperCase().codePointAt(0), 0x10411);
+assertEq(String.fromCodePoint(0x1043a).toUpperCase().codePointAt(0), 0x10412);
+assertEq(String.fromCodePoint(0x1043b).toUpperCase().codePointAt(0), 0x10413);
+assertEq(String.fromCodePoint(0x1043c).toUpperCase().codePointAt(0), 0x10414);
+assertEq(String.fromCodePoint(0x1043d).toUpperCase().codePointAt(0), 0x10415);
+assertEq(String.fromCodePoint(0x1043e).toUpperCase().codePointAt(0), 0x10416);
+assertEq(String.fromCodePoint(0x1043f).toUpperCase().codePointAt(0), 0x10417);
+assertEq(String.fromCodePoint(0x10440).toUpperCase().codePointAt(0), 0x10418);
+assertEq(String.fromCodePoint(0x10441).toUpperCase().codePointAt(0), 0x10419);
+assertEq(String.fromCodePoint(0x10442).toUpperCase().codePointAt(0), 0x1041a);
+assertEq(String.fromCodePoint(0x10443).toUpperCase().codePointAt(0), 0x1041b);
+assertEq(String.fromCodePoint(0x10444).toUpperCase().codePointAt(0), 0x1041c);
+assertEq(String.fromCodePoint(0x10445).toUpperCase().codePointAt(0), 0x1041d);
+assertEq(String.fromCodePoint(0x10446).toUpperCase().codePointAt(0), 0x1041e);
+assertEq(String.fromCodePoint(0x10447).toUpperCase().codePointAt(0), 0x1041f);
+assertEq(String.fromCodePoint(0x10448).toUpperCase().codePointAt(0), 0x10420);
+assertEq(String.fromCodePoint(0x10449).toUpperCase().codePointAt(0), 0x10421);
+assertEq(String.fromCodePoint(0x1044a).toUpperCase().codePointAt(0), 0x10422);
+assertEq(String.fromCodePoint(0x1044b).toUpperCase().codePointAt(0), 0x10423);
+assertEq(String.fromCodePoint(0x1044c).toUpperCase().codePointAt(0), 0x10424);
+assertEq(String.fromCodePoint(0x1044d).toUpperCase().codePointAt(0), 0x10425);
+assertEq(String.fromCodePoint(0x1044e).toUpperCase().codePointAt(0), 0x10426);
+assertEq(String.fromCodePoint(0x1044f).toUpperCase().codePointAt(0), 0x10427);
+assertEq(String.fromCodePoint(0x10400).toLowerCase().codePointAt(0), 0x10428);
+assertEq(String.fromCodePoint(0x10401).toLowerCase().codePointAt(0), 0x10429);
+assertEq(String.fromCodePoint(0x10402).toLowerCase().codePointAt(0), 0x1042a);
+assertEq(String.fromCodePoint(0x10403).toLowerCase().codePointAt(0), 0x1042b);
+assertEq(String.fromCodePoint(0x10404).toLowerCase().codePointAt(0), 0x1042c);
+assertEq(String.fromCodePoint(0x10405).toLowerCase().codePointAt(0), 0x1042d);
+assertEq(String.fromCodePoint(0x10406).toLowerCase().codePointAt(0), 0x1042e);
+assertEq(String.fromCodePoint(0x10407).toLowerCase().codePointAt(0), 0x1042f);
+assertEq(String.fromCodePoint(0x10408).toLowerCase().codePointAt(0), 0x10430);
+assertEq(String.fromCodePoint(0x10409).toLowerCase().codePointAt(0), 0x10431);
+assertEq(String.fromCodePoint(0x1040a).toLowerCase().codePointAt(0), 0x10432);
+assertEq(String.fromCodePoint(0x1040b).toLowerCase().codePointAt(0), 0x10433);
+assertEq(String.fromCodePoint(0x1040c).toLowerCase().codePointAt(0), 0x10434);
+assertEq(String.fromCodePoint(0x1040d).toLowerCase().codePointAt(0), 0x10435);
+assertEq(String.fromCodePoint(0x1040e).toLowerCase().codePointAt(0), 0x10436);
+assertEq(String.fromCodePoint(0x1040f).toLowerCase().codePointAt(0), 0x10437);
+assertEq(String.fromCodePoint(0x10410).toLowerCase().codePointAt(0), 0x10438);
+assertEq(String.fromCodePoint(0x10411).toLowerCase().codePointAt(0), 0x10439);
+assertEq(String.fromCodePoint(0x10412).toLowerCase().codePointAt(0), 0x1043a);
+assertEq(String.fromCodePoint(0x10413).toLowerCase().codePointAt(0), 0x1043b);
+assertEq(String.fromCodePoint(0x10414).toLowerCase().codePointAt(0), 0x1043c);
+assertEq(String.fromCodePoint(0x10415).toLowerCase().codePointAt(0), 0x1043d);
+assertEq(String.fromCodePoint(0x10416).toLowerCase().codePointAt(0), 0x1043e);
+assertEq(String.fromCodePoint(0x10417).toLowerCase().codePointAt(0), 0x1043f);
+assertEq(String.fromCodePoint(0x10418).toLowerCase().codePointAt(0), 0x10440);
+assertEq(String.fromCodePoint(0x10419).toLowerCase().codePointAt(0), 0x10441);
+assertEq(String.fromCodePoint(0x1041a).toLowerCase().codePointAt(0), 0x10442);
+assertEq(String.fromCodePoint(0x1041b).toLowerCase().codePointAt(0), 0x10443);
+assertEq(String.fromCodePoint(0x1041c).toLowerCase().codePointAt(0), 0x10444);
+assertEq(String.fromCodePoint(0x1041d).toLowerCase().codePointAt(0), 0x10445);
+assertEq(String.fromCodePoint(0x1041e).toLowerCase().codePointAt(0), 0x10446);
+assertEq(String.fromCodePoint(0x1041f).toLowerCase().codePointAt(0), 0x10447);
+assertEq(String.fromCodePoint(0x10420).toLowerCase().codePointAt(0), 0x10448);
+assertEq(String.fromCodePoint(0x10421).toLowerCase().codePointAt(0), 0x10449);
+assertEq(String.fromCodePoint(0x10422).toLowerCase().codePointAt(0), 0x1044a);
+assertEq(String.fromCodePoint(0x10423).toLowerCase().codePointAt(0), 0x1044b);
+assertEq(String.fromCodePoint(0x10424).toLowerCase().codePointAt(0), 0x1044c);
+assertEq(String.fromCodePoint(0x10425).toLowerCase().codePointAt(0), 0x1044d);
+assertEq(String.fromCodePoint(0x10426).toLowerCase().codePointAt(0), 0x1044e);
+assertEq(String.fromCodePoint(0x10427).toLowerCase().codePointAt(0), 0x1044f);
+
+if (typeof reportCompare === "function")
+    reportCompare(true, true);
--- a/js/src/vm/make_unicode.py
+++ b/js/src/vm/make_unicode.py
@@ -134,17 +134,18 @@ def make_non_bmp_convert_macro(out_file,
             from_code, to_code, from_lead, from_trail, to_trail, diff))
 
     out_file.write('#define FOR_EACH_NON_BMP_{}(macro) \\\n'.format(name))
     out_file.write(' \\\n'.join(lines))
     out_file.write('\n')
 
 def generate_unicode_stuff(unicode_data, case_folding,
                            data_file, non_bmp_file,
-                           test_mapping, test_space, test_icase):
+                           test_mapping, test_non_bmp_mapping,
+                           test_space, test_icase):
     dummy = (0, 0, 0)
     table = [dummy]
     cache = {dummy: 0}
     index = [0] * (MAX + 1)
     folding_map = {}
     rev_folding_map = {}
     folding_dummy = (0, 0, 0, 0)
     folding_table = [folding_dummy]
@@ -324,16 +325,33 @@ for (var i = 0; i <= 0xffff; i++) {
     assertEq(char.toUpperCase().charCodeAt(0), info[0]);
     assertEq(char.toLowerCase().charCodeAt(0), info[1]);
 }
 
 if (typeof reportCompare === "function")
     reportCompare(true, true);
 """)
 
+    test_non_bmp_mapping.write('/* Generated by make_unicode.py DO NOT MODIFY */\n')
+    test_non_bmp_mapping.write(public_domain)
+    for code in sorted(non_bmp_upper_map.keys()):
+        test_non_bmp_mapping.write("""\
+assertEq(String.fromCodePoint(0x{:x}).toUpperCase().codePointAt(0), 0x{:x});
+""".format(code, non_bmp_upper_map[code]))
+    for code in sorted(non_bmp_lower_map.keys()):
+        test_non_bmp_mapping.write("""\
+assertEq(String.fromCodePoint(0x{:x}).toLowerCase().codePointAt(0), 0x{:x});
+""".format(code, non_bmp_lower_map[code]))
+
+    test_non_bmp_mapping.write("""
+if (typeof reportCompare === "function")
+    reportCompare(true, true);
+""")
+
+
     test_space.write('/* Generated by make_unicode.py DO NOT MODIFY */\n')
     test_space.write(public_domain)
     test_space.write('var onlySpace = String.fromCharCode(' +
                      ', '.join(map(lambda c: hex(c), test_space_table)) + ');\n')
     test_space.write("""
 assertEq(onlySpace.trim(), "");
 assertEq((onlySpace + 'aaaa').trim(), 'aaaa');
 assertEq(('aaaa' + onlySpace).trim(), 'aaaa');
@@ -590,10 +608,11 @@ if __name__ == '__main__':
         case_folding.write(data)
         case_folding.seek(0)
 
     print('Generating...')
     generate_unicode_stuff(unicode_data, case_folding,
         open('Unicode.cpp', 'w'),
         open('UnicodeNonBMP.h', 'w'),
         open('../tests/ecma_5/String/string-upper-lower-mapping.js', 'w'),
+        open('../tests/ecma_6/String/string-code-point-upper-lower-mapping.js', 'w'),
         open('../tests/ecma_5/String/string-space-trim.js', 'w'),
         open('../tests/ecma_6/RegExp/unicode-ignoreCase.js', 'w'))