Backed out changeset 1bbdfed5b149 (bug 1289003)
authorTooru Fujisawa <arai_a@mac.com>
Fri, 02 Sep 2016 21:23:02 +0900
changeset 312424 6135cb7ef5bda300672ce7290ccf905b2cab68ff
parent 312423 e1942a9b87424c15245bea5f86f7fcdc14c0df42
child 312425 d18edfa7a8f9d0d9ed6b48c6038039f149c46d1d
push id20447
push userkwierso@gmail.com
push dateFri, 02 Sep 2016 20:36:44 +0000
treeherderfx-team@969397f22187 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
bugs1289003
milestone51.0a1
backs out1bbdfed5b149b231a27692542b6cee5b9f5138a8
Backed out changeset 1bbdfed5b149 (bug 1289003)
js/public/CharacterEncoding.h
js/src/vm/CharacterEncoding.cpp
--- a/js/public/CharacterEncoding.h
+++ b/js/public/CharacterEncoding.h
@@ -284,34 +284,16 @@ GetDeflatedUTF8StringLength(JSFlatString
  * than the length of the string, if the buffer is exhausted before the string
  * is fully encoded).
  */
 JS_PUBLIC_API(void)
 DeflateStringToUTF8Buffer(JSFlatString* src, mozilla::RangedPtr<char> dst,
                           size_t* dstlenp = nullptr, size_t* numcharsp = nullptr);
 
 /*
- * The smallest character encoding capable of fully representing a particular
- * string.
- */
-enum class SmallestEncoding {
-    ASCII,
-    Latin1,
-    UTF16
-};
-
-/*
- * Returns the smallest encoding possible for the given string: if all
- * codepoints are <128 then ASCII, otherwise if all codepoints are <256
- * Latin-1, else UTF16.
- */
-JS_PUBLIC_API(SmallestEncoding)
-FindSmallestEncoding(UTF8Chars utf8);
-
-/*
   * Return a null-terminated Latin-1 string copied from the input string,
   * storing its length (excluding null terminator) in |*outlen|.  Fail and
   * report an error if the string contains non-Latin-1 codepoints.  Returns
   * Latin1CharsZ() on failure.
  */
 extern Latin1CharsZ
 UTF8CharsToNewLatin1CharsZ(JSContext* cx, const UTF8Chars utf8, size_t* outlen);
 
--- a/js/src/vm/CharacterEncoding.cpp
+++ b/js/src/vm/CharacterEncoding.cpp
@@ -3,17 +3,16 @@
  * This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #include "js/CharacterEncoding.h"
 
 #include "mozilla/Range.h"
 
-#include <algorithm>
 #include <type_traits>
 
 #include "jscntxt.h"
 #include "jsprf.h"
 
 using namespace js;
 
 Latin1CharsZ
@@ -248,50 +247,47 @@ ReportTooBigCharacter(JSContext* cx, uin
     JS_ReportErrorFlagsAndNumber(cx, JSREPORT_ERROR, GetErrorMessage, nullptr,
                                  JSMSG_UTF8_CHAR_TOO_LARGE, buffer);
 }
 
 enum InflateUTF8Action {
     CountAndReportInvalids,
     CountAndIgnoreInvalids,
     AssertNoInvalids,
-    Copy,
-    FindEncoding
+    Copy
 };
 
 static const char16_t REPLACE_UTF8 = 0xFFFD;
 static const Latin1Char REPLACE_UTF8_LATIN1 = '?';
 
 // If making changes to this algorithm, make sure to also update
 // LossyConvertUTF8toUTF16() in dom/wifi/WifiUtils.cpp
 template <InflateUTF8Action Action, typename CharT>
 static bool
 InflateUTF8StringToBuffer(JSContext* cx, const UTF8Chars src, CharT* dst, size_t* dstlenp,
-                          JS::SmallestEncoding *smallestEncoding)
+                          bool* isAsciip)
 {
-    auto RequireLatin1 = [&smallestEncoding]{
-        *smallestEncoding = std::max(JS::SmallestEncoding::Latin1, *smallestEncoding);
-    };
-    auto RequireUTF16 = [&smallestEncoding]{
-        *smallestEncoding = JS::SmallestEncoding::UTF16;
-    };
+    if (Action != AssertNoInvalids)
+        *isAsciip = true;
 
     // Count how many code units need to be in the inflated string.
     // |i| is the index into |src|, and |j| is the the index into |dst|.
     size_t srclen = src.length();
     uint32_t j = 0;
     for (uint32_t i = 0; i < srclen; i++, j++) {
         uint32_t v = uint32_t(src[i]);
         if (!(v & 0x80)) {
             // ASCII code unit.  Simple copy.
             if (Action == Copy)
                 dst[j] = CharT(v);
 
         } else {
             // Non-ASCII code unit.  Determine its length in bytes (n).
+            if (Action != AssertNoInvalids)
+                *isAsciip = false;
             uint32_t n = 1;
             while (v & (0x80 >> n))
                 n++;
 
         #define INVALID(report, arg, n2)                                \
             do {                                                        \
                 if (Action == CountAndReportInvalids) {                 \
                     report(cx, arg);                                    \
@@ -300,18 +296,17 @@ InflateUTF8StringToBuffer(JSContext* cx,
                     MOZ_CRASH("invalid UTF-8 string: " # report);       \
                 } else {                                                \
                     if (Action == Copy) {                               \
                         if (std::is_same<decltype(dst[0]), Latin1Char>::value) \
                             dst[j] = CharT(REPLACE_UTF8_LATIN1);        \
                         else                                            \
                             dst[j] = CharT(REPLACE_UTF8);               \
                     } else {                                            \
-                        MOZ_ASSERT(Action == CountAndIgnoreInvalids ||  \
-                                   Action == FindEncoding);             \
+                        MOZ_ASSERT(Action == CountAndIgnoreInvalids);   \
                     }                                                   \
                     n = n2;                                             \
                     goto invalidMultiByteCodeUnit;                      \
                 }                                                       \
             } while (0)
 
             // Check the leading byte.
             if (n < 2 || n > 4)
@@ -327,34 +322,22 @@ InflateUTF8StringToBuffer(JSContext* cx,
                 (v == 0xED && ((uint8_t)src[i + 1] & 0xE0) != 0x80) ||  // ED 80~9F
                 (v == 0xF0 && ((uint8_t)src[i + 1] & 0xF0) == 0x80) ||  // F0 90~BF
                 (v == 0xF4 && ((uint8_t)src[i + 1] & 0xF0) != 0x80))    // F4 80~8F
             {
                 INVALID(ReportInvalidCharacter, i, 1);
             }
 
             // Check the continuation bytes.
-            for (uint32_t m = 1; m < n; m++) {
+            for (uint32_t m = 1; m < n; m++)
                 if ((src[i + m] & 0xC0) != 0x80)
                     INVALID(ReportInvalidCharacter, i, m);
-            }
 
             // Determine the code unit's length in CharT and act accordingly.
             v = JS::Utf8ToOneUcs4Char((uint8_t*)&src[i], n);
-            if (Action != AssertNoInvalids) {
-                if (v > 0xff) {
-                    RequireUTF16();
-                    if (Action == FindEncoding) {
-                        MOZ_ASSERT(dst == nullptr);
-                        return true;
-                    }
-                } else {
-                    RequireLatin1();
-                }
-            }
             if (v < 0x10000) {
                 // The n-byte UTF8 code unit will fit in a single CharT.
                 if (Action == Copy)
                     dst[j] = CharT(v);
             } else {
                 v -= 0x10000;
                 if (v <= 0xFFFFF) {
                     // The n-byte UTF8 code unit will fit in two CharT units.
@@ -373,46 +356,46 @@ InflateUTF8StringToBuffer(JSContext* cx,
           invalidMultiByteCodeUnit:
             // Move i to the last byte of the multi-byte code unit;  the loop
             // header will do the final i++ to move to the start of the next
             // code unit.
             i += n - 1;
         }
     }
 
-    if (Action != AssertNoInvalids || Action != FindEncoding)
+    if (Action != AssertNoInvalids)
         *dstlenp = j;
 
     return true;
 }
 
 template <InflateUTF8Action Action, typename CharsT>
 static CharsT
 InflateUTF8StringHelper(JSContext* cx, const UTF8Chars src, size_t* outlen)
 {
     using CharT = typename CharsT::CharT;
     *outlen = 0;
 
-    JS::SmallestEncoding encoding;
-    if (!InflateUTF8StringToBuffer<Action, CharT>(cx, src, /* dst = */ nullptr, outlen, &encoding))
+    bool isAscii;
+    if (!InflateUTF8StringToBuffer<Action, CharT>(cx, src, /* dst = */ nullptr, outlen, &isAscii))
         return CharsT();
 
     CharT* dst = cx->pod_malloc<CharT>(*outlen + 1);  // +1 for NUL
     if (!dst) {
         ReportOutOfMemory(cx);
         return CharsT();
     }
 
-    if (encoding == JS::SmallestEncoding::ASCII) {
+    if (isAscii) {
         size_t srclen = src.length();
         MOZ_ASSERT(*outlen == srclen);
         for (uint32_t i = 0; i < srclen; i++)
             dst[i] = CharT(src[i]);
     } else {
-        MOZ_ALWAYS_TRUE((InflateUTF8StringToBuffer<Copy, CharT>(cx, src, dst, outlen, &encoding)));
+        JS_ALWAYS_TRUE((InflateUTF8StringToBuffer<Copy, CharT>(cx, src, dst, outlen, &isAscii)));
     }
 
     dst[*outlen] = 0;    // NUL char
 
     return CharsT(dst, *outlen);
 }
 
 TwoByteCharsZ
@@ -436,29 +419,16 @@ JS::LossyUTF8CharsToNewTwoByteCharsZ(JSC
 
 TwoByteCharsZ
 JS::LossyUTF8CharsToNewTwoByteCharsZ(JSContext* cx, const ConstUTF8CharsZ& utf8, size_t* outlen)
 {
     UTF8Chars chars(utf8.c_str(), strlen(utf8.c_str()));
     return InflateUTF8StringHelper<CountAndIgnoreInvalids, TwoByteCharsZ>(cx, chars, outlen);
 }
 
-JS::SmallestEncoding
-JS::FindSmallestEncoding(UTF8Chars utf8)
-{
-    JS::SmallestEncoding encoding;
-    MOZ_ALWAYS_TRUE((InflateUTF8StringToBuffer<FindEncoding, char16_t>(
-                         /* cx = */ nullptr,
-                         utf8,
-                         /* dst = */ nullptr,
-                         /* dstlen = */ nullptr,
-                         &encoding)));
-    return encoding;
-}
-
 Latin1CharsZ
 JS::UTF8CharsToNewLatin1CharsZ(JSContext* cx, const UTF8Chars utf8, size_t* outlen)
 {
     return InflateUTF8StringHelper<CountAndReportInvalids, Latin1CharsZ>(cx, utf8, outlen);
 }
 
 Latin1CharsZ
 JS::LossyUTF8CharsToNewLatin1CharsZ(JSContext* cx, const UTF8Chars utf8, size_t* outlen)