Bug 1019543 - Fix toLowerCase/toUpperCase to return the original string if possible. r=luke
authorJan de Mooij <jdemooij@mozilla.com>
Wed, 30 Jul 2014 14:38:28 +0200
changeset 196850 56861f2edc5d3814d5341d49845eb98fc4669d45
parent 196849 db9349e9fe14c02044bcbede9bcabbb3ff50c7cb
child 196851 6bc1a62eb4439c5805d55ef57e5433e23cb28528
push id1
push userroot
push dateMon, 20 Oct 2014 17:29:22 +0000
reviewersluke
bugs1019543
milestone34.0a1
Bug 1019543 - Fix toLowerCase/toUpperCase to return the original string if possible. r=luke
js/src/jit-test/tests/latin1/toLowerCase-toUpperCase.js
js/src/jsstr.cpp
--- a/js/src/jit-test/tests/latin1/toLowerCase-toUpperCase.js
+++ b/js/src/jit-test/tests/latin1/toLowerCase-toUpperCase.js
@@ -27,19 +27,21 @@ function testToLowerCase() {
 testToLowerCase();
 
 function testToUpperCase() {
     var s1 = "abcdefgABCDEFGH 12345";
     assertEq(isLatin1(s1), true);
 
     // Latin1
     var s2 = s1.toUpperCase();
+    assertEq(isLatin1(s2), true);
     assertEq(s2, "ABCDEFGABCDEFGH 12345");
 
     s2 = s1.toLocaleUpperCase();
+    assertEq(isLatin1(s2), true);
     assertEq(s2, "ABCDEFGABCDEFGH 12345");
 
     // TwoByte
     s2 = "abcdefg\u1200ABCDEFGH 12345\u1E0F".toUpperCase();
     assertEq(s2, "ABCDEFG\u1200ABCDEFGH 12345\u1E0E");
 
     s2 = "abcdefg\u1200ABCDEFGH 12345\u1E0F".toLocaleUpperCase();
     assertEq(s2, "ABCDEFG\u1200ABCDEFGH 12345\u1E0E");
--- a/js/src/jsstr.cpp
+++ b/js/src/jsstr.cpp
@@ -698,37 +698,54 @@ str_substring(JSContext *cx, unsigned ar
 }
 
 template <typename CharT>
 static JSString *
 ToLowerCase(JSContext *cx, JSLinearString *str)
 {
     // Unlike toUpperCase, toLowerCase has the nice invariant that if the input
     // is a Latin1 string, the output is also a Latin1 string.
+    UniquePtr<CharT[], JS::FreePolicy> newChars;
     size_t length = str->length();
-    ScopedJSFreePtr<CharT> newChars(cx->pod_malloc<CharT>(length + 1));
-    if (!newChars)
-        return nullptr;
-
     {
         AutoCheckCannotGC nogc;
         const CharT *chars = str->chars<CharT>(nogc);
-        for (size_t i = 0; i < length; i++) {
+
+        // Look for the first upper case character.
+        size_t i = 0;
+        for (; i < length; i++) {
+            jschar c = chars[i];
+            if (unicode::ToLowerCase(c) != c)
+                break;
+        }
+
+        // If all characters are lower case, return the input string.
+        if (i == length)
+            return str;
+
+        newChars = cx->make_pod_array<CharT>(length + 1);
+        if (!newChars)
+            return nullptr;
+
+        PodCopy(newChars.get(), chars, i);
+
+        for (; i < length; i++) {
             jschar c = unicode::ToLowerCase(chars[i]);
-            MOZ_ASSERT_IF((IsSame<CharT, Latin1Char>::value), c <= 0xff);
+            MOZ_ASSERT_IF((IsSame<CharT, Latin1Char>::value), c <= JSString::MAX_LATIN1_CHAR);
             newChars[i] = c;
         }
+
         newChars[length] = 0;
     }
 
     JSString *res = NewStringDontDeflate<CanGC>(cx, newChars.get(), length);
     if (!res)
         return nullptr;
 
-    newChars.forget();
+    newChars.release();
     return res;
 }
 
 static inline bool
 ToLowerCaseHelper(JSContext *cx, CallReceiver call)
 {
     RootedString str(cx, ThisToStringForStringProto(cx, call));
     if (!str)
@@ -775,40 +792,111 @@ str_toLocaleLowerCase(JSContext *cx, uns
 
         args.rval().set(result);
         return true;
     }
 
     return ToLowerCaseHelper(cx, args);
 }
 
+template <typename DestChar, typename SrcChar>
+static void
+ToUpperCaseImpl(DestChar *destChars, const SrcChar *srcChars, size_t firstLowerCase, size_t length)
+{
+    MOZ_ASSERT(firstLowerCase < length);
+
+    for (size_t i = 0; i < firstLowerCase; i++)
+        destChars[i] = srcChars[i];
+
+    for (size_t i = firstLowerCase; i < length; i++) {
+        jschar c = unicode::ToUpperCase(srcChars[i]);
+        MOZ_ASSERT_IF((IsSame<DestChar, Latin1Char>::value), c <= JSString::MAX_LATIN1_CHAR);
+        destChars[i] = c;
+    }
+
+    destChars[length] = '\0';
+}
+
 template <typename CharT>
 static JSString *
 ToUpperCase(JSContext *cx, JSLinearString *str)
 {
-    // toUpperCase on a Latin1 string can yield a non-Latin1 string. For now,
-    // we use a TwoByte string for the result.
+    typedef UniquePtr<Latin1Char[], JS::FreePolicy> Latin1CharPtr;
+    typedef UniquePtr<jschar[], JS::FreePolicy> TwoByteCharPtr;
+
+    mozilla::MaybeOneOf<Latin1CharPtr, TwoByteCharPtr> newChars;
     size_t length = str->length();
-    ScopedJSFreePtr<jschar> newChars(cx->pod_malloc<jschar>(length + 1));
-    if (!newChars)
-        return nullptr;
-
     {
         AutoCheckCannotGC nogc;
         const CharT *chars = str->chars<CharT>(nogc);
-        for (size_t i = 0; i < length; i++)
-            newChars[i] = unicode::ToUpperCase(chars[i]);
-        newChars[length] = 0;
+
+        // Look for the first lower case character.
+        size_t i = 0;
+        for (; i < length; i++) {
+            jschar c = chars[i];
+            if (unicode::ToUpperCase(c) != c)
+                break;
+        }
+
+        // If all characters are upper case, return the input string.
+        if (i == length)
+            return str;
+
+        // If the string is Latin1, check if it contains the MICRO SIGN (0xb5)
+        // or SMALL LETTER Y WITH DIAERESIS (0xff) character. The corresponding
+        // upper case characters are not in the Latin1 range.
+        bool resultIsLatin1;
+        if (IsSame<CharT, Latin1Char>::value) {
+            resultIsLatin1 = true;
+            for (size_t j = i; j < length; j++) {
+                Latin1Char c = chars[j];
+                if (c == 0xb5 || c == 0xff) {
+                    MOZ_ASSERT(unicode::ToUpperCase(c) > JSString::MAX_LATIN1_CHAR);
+                    resultIsLatin1 = false;
+                    break;
+                } else {
+                    MOZ_ASSERT(unicode::ToUpperCase(c) <= JSString::MAX_LATIN1_CHAR);
+                }
+            }
+        } else {
+            resultIsLatin1 = false;
+        }
+
+        if (resultIsLatin1) {
+            Latin1CharPtr buf = cx->make_pod_array<Latin1Char>(length + 1);
+            if (!buf)
+                return nullptr;
+
+            ToUpperCaseImpl(buf.get(), chars, i, length);
+            newChars.construct<Latin1CharPtr>(buf);
+        } else {
+            TwoByteCharPtr buf = cx->make_pod_array<jschar>(length + 1);
+            if (!buf)
+                return nullptr;
+
+            ToUpperCaseImpl(buf.get(), chars, i, length);
+            newChars.construct<TwoByteCharPtr>(buf);
+        }
     }
 
-    JSString *res = NewString<CanGC>(cx, newChars.get(), length);
-    if (!res)
-        return nullptr;
-
-    newChars.forget();
+    JSString *res;
+    if (newChars.constructed<Latin1CharPtr>()) {
+        res = NewStringDontDeflate<CanGC>(cx, newChars.ref<Latin1CharPtr>().get(), length);
+        if (!res)
+            return nullptr;
+
+        newChars.ref<Latin1CharPtr>().release();
+    } else {
+        res = NewStringDontDeflate<CanGC>(cx, newChars.ref<TwoByteCharPtr>().get(), length);
+        if (!res)
+            return nullptr;
+
+        newChars.ref<TwoByteCharPtr>().release();
+    }
+
     return res;
 }
 
 static bool
 ToUpperCaseHelper(JSContext *cx, CallReceiver call)
 {
     RootedString str(cx, ThisToStringForStringProto(cx, call));
     if (!str)