Bug 1495571 - Part 7: Make the output a lambda. r=efaust
authorJason Orendorff <jorendorff@mozilla.com>
Tue, 02 Oct 2018 14:26:01 +0000
changeset 494939 b999f2758e3ea42c804b504dc45ac322bcd7fa80
parent 494938 a82ed21f664cba5d170a9268885326d8f0e42247
child 494940 995cd5fca351d5bf3c00e745f3cc22a2fab6fd72
push id9984
push userffxbld-merge
push dateMon, 15 Oct 2018 21:07:35 +0000
treeherdermozilla-beta@183d27ea8570 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersefaust
bugs1495571
milestone64.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1495571 - Part 7: Make the output a lambda. r=efaust Depends on D7375 Differential Revision: https://phabricator.services.mozilla.com/D7376
js/src/vm/CharacterEncoding.cpp
--- a/js/src/vm/CharacterEncoding.cpp
+++ b/js/src/vm/CharacterEncoding.cpp
@@ -275,51 +275,41 @@ enum class OnUTF8Error {
 
 // The Unicode REPLACEMENT CHARACTER, rendered as a diamond with a question
 // mark, meaning "someone screwed up here but it wasn't me".
 static const char16_t REPLACEMENT_CHARACTER = 0xFFFD;
 
 // If making changes to this algorithm, make sure to also update
 // LossyConvertUTF8toUTF16() in dom/wifi/WifiUtils.cpp
 //
-// Scan UTF8 input and (internally, at least) convert it to a series of
-// UTF-16 code units. But you can also do odd things like pass
-// CharT=Latin1Char, in which case each output code unit is silently truncated
-// to 8 bits; or Action=Count, in which case the output is discarded entirely
-// because we're just counting how many UTF-16 code units of output there are.
-template <InflateUTF8Action Action, OnUTF8Error ErrorAction, typename CharT>
+// Scan UTF8 input and (internally, at least) convert it to a series of UTF-16
+// code units. But you can also do odd things like pass an empty lambda for
+// `dst`, in which case the output is discarded entirely--the only effect of
+// calling the template that way is error-checking.
+template <InflateUTF8Action Action, OnUTF8Error ErrorAction, typename OutputFn>
 static bool
-InflateUTF8ToUTF16(JSContext* cx, const UTF8Chars src, CharT* dst, size_t* dstlenp,
+InflateUTF8ToUTF16(JSContext* cx, const UTF8Chars src, OutputFn dst,
                    JS::SmallestEncoding *smallestEncoding)
 {
-    static_assert(std::is_same<CharT, char16_t>::value ||
-                  std::is_same<CharT, Latin1Char>::value,
-                  "bad CharT");
-
     if (Action != Nop) {
         *smallestEncoding = JS::SmallestEncoding::ASCII;
     }
     auto RequireLatin1 = [&smallestEncoding]{
         *smallestEncoding = std::max(JS::SmallestEncoding::Latin1, *smallestEncoding);
     };
     auto RequireUTF16 = [&smallestEncoding]{
         *smallestEncoding = JS::SmallestEncoding::UTF16;
     };
 
-    // Count how many code units need to be in the inflated string.
-    // |i| is the index into |src|, and |j| is the the index into |dst|.
     size_t srclen = src.length();
-    uint32_t j = 0;
-    for (uint32_t i = 0; i < srclen; i++, j++) {
+    for (uint32_t i = 0; i < srclen; i++) {
         uint32_t v = uint32_t(src[i]);
         if (!(v & 0x80)) {
             // ASCII code unit.  Simple copy.
-            if (Action == Copy) {
-                dst[j] = CharT(v);
-            }
+            dst(uint16_t(v));
 
         } else {
             // Non-ASCII code unit.  Determine its length in bytes (n).
             uint32_t n = 1;
             while (v & (0x80 >> n)) {
                 n++;
             }
 
@@ -333,19 +323,17 @@ InflateUTF8ToUTF16(JSContext* cx, const 
                 } else {                                                \
                     char16_t replacement;                               \
                     if (ErrorAction == OnUTF8Error::InsertReplacementCharacter) { \
                         replacement = REPLACEMENT_CHARACTER;            \
                     } else {                                            \
                         MOZ_ASSERT(ErrorAction == OnUTF8Error::InsertQuestionMark); \
                         replacement = '?';                              \
                     }                                                   \
-                    if (Action == Copy) {                               \
-                        dst[j] = CharT(replacement);                    \
-                    }                                                   \
+                    dst(replacement);                                   \
                     n = n2;                                             \
                     goto invalidMultiByteCodeUnit;                      \
                 }                                                       \
             } while (0)
 
             // Check the leading byte.
             if (n < 2 || n > 4) {
                 INVALID(ReportInvalidCharacter, i, 1);
@@ -374,40 +362,31 @@ InflateUTF8ToUTF16(JSContext* cx, const 
             }
 
             // Determine the code unit's length in CharT and act accordingly.
             v = JS::Utf8ToOneUcs4Char((uint8_t*)&src[i], n);
             if (Action != Nop) {
                 if (v > 0xff) {
                     RequireUTF16();
                     if (Action == FindEncoding) {
-                        MOZ_ASSERT(dst == nullptr);
                         return true;
                     }
                 } else {
                     RequireLatin1();
                 }
             }
             if (v < 0x10000) {
                 // The n-byte UTF8 code unit will fit in a single CharT.
-                if (Action == Copy) {
-                    dst[j] = CharT(v);
-                }
+                dst(char16_t(v));
             } else {
                 v -= 0x10000;
                 if (v <= 0xFFFFF) {
                     // The n-byte UTF8 code unit will fit in two CharT units.
-                    if (Action == Copy) {
-                        dst[j] = CharT((v >> 10) + 0xD800);
-                    }
-                    j++;
-                    if (Action == Copy) {
-                        dst[j] = CharT((v & 0x3FF) + 0xDC00);
-                    }
-
+                    dst(char16_t((v >> 10) + 0xD800));
+                    dst(char16_t((v & 0x3FF) + 0xDC00));
                 } else {
                     // The n-byte UTF8 code unit won't fit in two CharT units.
                     INVALID(ReportTooBigCharacter, v, 1);
                 }
             }
 
           invalidMultiByteCodeUnit:
             // Move i to the last byte of the multi-byte code unit;  the loop
@@ -415,53 +394,59 @@ InflateUTF8ToUTF16(JSContext* cx, const 
             // code unit.
             i += n - 1;
             if (Action != Nop) {
                 RequireUTF16();
             }
         }
     }
 
-    if (Action != Nop && Action != FindEncoding) {
-        *dstlenp = j;
-    }
-
     return true;
 }
 
 template <OnUTF8Error ErrorAction, typename CharsT>
 static CharsT
 InflateUTF8StringHelper(JSContext* cx, const UTF8Chars src, size_t* outlen)
 {
     using CharT = typename CharsT::CharT;
+    static_assert(std::is_same<CharT, char16_t>::value ||
+                  std::is_same<CharT, Latin1Char>::value,
+                  "bad CharT");
+
     *outlen = 0;
 
     JS::SmallestEncoding encoding;
-    if (!InflateUTF8ToUTF16<Count, ErrorAction, CharT>(cx, src, /* dst = */ nullptr, outlen, &encoding)) {
+    size_t len = 0;
+    auto count = [&](char16_t) { len++; };
+    if (!InflateUTF8ToUTF16<Count, ErrorAction>(cx, src, count, &encoding)) {
         return CharsT();
     }
+    *outlen = len;
 
     CharT* dst = cx->template pod_malloc<CharT>(*outlen + 1);  // +1 for NUL
     if (!dst) {
         ReportOutOfMemory(cx);
         return CharsT();
     }
 
     if (encoding == JS::SmallestEncoding::ASCII) {
         size_t srclen = src.length();
         MOZ_ASSERT(*outlen == srclen);
         for (uint32_t i = 0; i < srclen; i++) {
             dst[i] = CharT(src[i]);
         }
-    } else if (std::is_same<decltype(dst[0]), Latin1Char>::value) {
-        MOZ_ALWAYS_TRUE((InflateUTF8ToUTF16<Copy, OnUTF8Error::InsertQuestionMark, CharT>(cx, src, dst, outlen, &encoding)));
     } else {
-        MOZ_ALWAYS_TRUE((InflateUTF8ToUTF16<Copy, OnUTF8Error::InsertReplacementCharacter, CharT>(cx, src, dst, outlen, &encoding)));
+        constexpr OnUTF8Error errorMode = std::is_same<CharT, Latin1Char>::value
+            ? OnUTF8Error::InsertQuestionMark
+            : OnUTF8Error::InsertReplacementCharacter;
+        size_t j = 0;
+        auto push = [&](char16_t c) { dst[j++] = CharT(c); };
+        MOZ_ALWAYS_TRUE((InflateUTF8ToUTF16<Copy, errorMode>(cx, src, push, &encoding)));
+        MOZ_ASSERT(j == len);
     }
-
     dst[*outlen] = 0;    // NUL char
 
     return CharsT(dst, *outlen);
 }
 
 TwoByteCharsZ
 JS::UTF8CharsToNewTwoByteCharsZ(JSContext* cx, const UTF8Chars utf8, size_t* outlen)
 {
@@ -487,21 +472,20 @@ JS::LossyUTF8CharsToNewTwoByteCharsZ(JSC
     UTF8Chars chars(utf8.c_str(), strlen(utf8.c_str()));
     return InflateUTF8StringHelper<OnUTF8Error::InsertReplacementCharacter, TwoByteCharsZ>(cx, chars, outlen);
 }
 
 JS::SmallestEncoding
 JS::FindSmallestEncoding(UTF8Chars utf8)
 {
     JS::SmallestEncoding encoding;
-    MOZ_ALWAYS_TRUE((InflateUTF8ToUTF16<FindEncoding, OnUTF8Error::InsertReplacementCharacter, char16_t>(
+    MOZ_ALWAYS_TRUE((InflateUTF8ToUTF16<FindEncoding, OnUTF8Error::InsertReplacementCharacter>(
                          /* cx = */ nullptr,
                          utf8,
-                         /* dst = */ nullptr,
-                         /* dstlen = */ nullptr,
+                         [](char16_t) {},
                          &encoding)));
     return encoding;
 }
 
 Latin1CharsZ
 JS::UTF8CharsToNewLatin1CharsZ(JSContext* cx, const UTF8Chars utf8, size_t* outlen)
 {
     return InflateUTF8StringHelper<OnUTF8Error::Throw, Latin1CharsZ>(cx, utf8, outlen);
@@ -514,21 +498,20 @@ JS::LossyUTF8CharsToNewLatin1CharsZ(JSCo
 }
 
 #ifdef DEBUG
 void
 JS::ConstUTF8CharsZ::validate(size_t aLength)
 {
     MOZ_ASSERT(data_);
     UTF8Chars chars(data_, aLength);
-    InflateUTF8ToUTF16<Nop, OnUTF8Error::Crash, char16_t>(
+    InflateUTF8ToUTF16<Nop, OnUTF8Error::Crash>(
         /* cx = */ nullptr,
         chars,
-        /* dst = */ nullptr,
-        /* dstlen = */ nullptr,
+        [](char16_t) {},
         /* smallestEncoding = */ nullptr);
 }
 #endif
 
 bool
 JS::StringIsASCII(const char* s)
 {
     while (*s) {