Bug 1495571 - Part 2: Introduce OnUTF8Error. r=efaust
authorJason Orendorff <jorendorff@mozilla.com>
Tue, 02 Oct 2018 15:19:33 +0000
changeset 494934 4775742238180accb463cc2ff53463ddcafa4f59
parent 494933 5a66c886df15268b57449f869c0fa654b2c2eb0a
child 494935 0f95c1e8fca93909844336d21612f4a25fe9c158
push id9984
push userffxbld-merge
push dateMon, 15 Oct 2018 21:07:35 +0000
treeherdermozilla-beta@183d27ea8570 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersefaust
bugs1495571
milestone64.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1495571 - Part 2: Introduce OnUTF8Error. r=efaust Depends on D7370 Differential Revision: https://phabricator.services.mozilla.com/D7371
js/src/vm/CharacterEncoding.cpp
--- a/js/src/vm/CharacterEncoding.cpp
+++ b/js/src/vm/CharacterEncoding.cpp
@@ -262,22 +262,28 @@ ReportTooBigCharacter(JSContext* cx, uin
 enum InflateUTF8Action {
     CountAndReportInvalids,
     CountAndIgnoreInvalids,
     AssertNoInvalids,
     Copy,
     FindEncoding
 };
 
+enum class OnUTF8Error {
+    InsertReplacementCharacter,
+    Throw,
+    Crash,
+};
+
 static const char16_t REPLACE_UTF8 = 0xFFFD;
 static const Latin1Char REPLACE_UTF8_LATIN1 = '?';
 
 // If making changes to this algorithm, make sure to also update
 // LossyConvertUTF8toUTF16() in dom/wifi/WifiUtils.cpp
-template <InflateUTF8Action Action, typename CharT>
+template <InflateUTF8Action Action, OnUTF8Error ErrorAction, typename CharT>
 static bool
 InflateUTF8StringToBuffer(JSContext* cx, const UTF8Chars src, CharT* dst, size_t* dstlenp,
                           JS::SmallestEncoding *smallestEncoding)
 {
     if (Action != AssertNoInvalids) {
         *smallestEncoding = JS::SmallestEncoding::ASCII;
     }
     auto RequireLatin1 = [&smallestEncoding]{
@@ -303,30 +309,28 @@ InflateUTF8StringToBuffer(JSContext* cx,
             // Non-ASCII code unit.  Determine its length in bytes (n).
             uint32_t n = 1;
             while (v & (0x80 >> n)) {
                 n++;
             }
 
         #define INVALID(report, arg, n2)                                \
             do {                                                        \
-                if (Action == CountAndReportInvalids) {                 \
+                if (ErrorAction == OnUTF8Error::Throw) {                \
                     report(cx, arg);                                    \
                     return false;                                       \
-                } else if (Action == AssertNoInvalids) {                \
+                } else if (ErrorAction == OnUTF8Error::Crash) {         \
                     MOZ_CRASH("invalid UTF-8 string: " # report);       \
                 } else {                                                \
+                    MOZ_ASSERT(ErrorAction == OnUTF8Error::InsertReplacementCharacter); \
                     if (Action == Copy) {                               \
                         if (std::is_same<decltype(dst[0]), Latin1Char>::value) \
                             dst[j] = CharT(REPLACE_UTF8_LATIN1);        \
                         else                                            \
                             dst[j] = CharT(REPLACE_UTF8);               \
-                    } else {                                            \
-                        MOZ_ASSERT(Action == CountAndIgnoreInvalids ||  \
-                                   Action == FindEncoding);             \
                     }                                                   \
                     n = n2;                                             \
                     goto invalidMultiByteCodeUnit;                      \
                 }                                                       \
             } while (0)
 
             // Check the leading byte.
             if (n < 2 || n > 4) {
@@ -404,107 +408,107 @@ InflateUTF8StringToBuffer(JSContext* cx,
 
     if (Action != AssertNoInvalids && Action != FindEncoding) {
         *dstlenp = j;
     }
 
     return true;
 }
 
-template <InflateUTF8Action Action, typename CharsT>
+template <InflateUTF8Action Action, OnUTF8Error ErrorAction, typename CharsT>
 static CharsT
 InflateUTF8StringHelper(JSContext* cx, const UTF8Chars src, size_t* outlen)
 {
     using CharT = typename CharsT::CharT;
     *outlen = 0;
 
     JS::SmallestEncoding encoding;
-    if (!InflateUTF8StringToBuffer<Action, CharT>(cx, src, /* dst = */ nullptr, outlen, &encoding)) {
+    if (!InflateUTF8StringToBuffer<Action, ErrorAction, CharT>(cx, src, /* dst = */ nullptr, outlen, &encoding)) {
         return CharsT();
     }
 
     CharT* dst = cx->template pod_malloc<CharT>(*outlen + 1);  // +1 for NUL
     if (!dst) {
         ReportOutOfMemory(cx);
         return CharsT();
     }
 
     if (encoding == JS::SmallestEncoding::ASCII) {
         size_t srclen = src.length();
         MOZ_ASSERT(*outlen == srclen);
         for (uint32_t i = 0; i < srclen; i++) {
             dst[i] = CharT(src[i]);
         }
     } else {
-        MOZ_ALWAYS_TRUE((InflateUTF8StringToBuffer<Copy, CharT>(cx, src, dst, outlen, &encoding)));
+        MOZ_ALWAYS_TRUE((InflateUTF8StringToBuffer<Copy, OnUTF8Error::InsertReplacementCharacter, CharT>(cx, src, dst, outlen, &encoding)));
     }
 
     dst[*outlen] = 0;    // NUL char
 
     return CharsT(dst, *outlen);
 }
 
 TwoByteCharsZ
 JS::UTF8CharsToNewTwoByteCharsZ(JSContext* cx, const UTF8Chars utf8, size_t* outlen)
 {
-    return InflateUTF8StringHelper<CountAndReportInvalids, TwoByteCharsZ>(cx, utf8, outlen);
+    return InflateUTF8StringHelper<CountAndReportInvalids, OnUTF8Error::Throw, TwoByteCharsZ>(cx, utf8, outlen);
 }
 
 TwoByteCharsZ
 JS::UTF8CharsToNewTwoByteCharsZ(JSContext* cx, const ConstUTF8CharsZ& utf8, size_t* outlen)
 {
     UTF8Chars chars(utf8.c_str(), strlen(utf8.c_str()));
-    return InflateUTF8StringHelper<CountAndReportInvalids, TwoByteCharsZ>(cx, chars, outlen);
+    return InflateUTF8StringHelper<CountAndReportInvalids, OnUTF8Error::Throw, TwoByteCharsZ>(cx, chars, outlen);
 }
 
 TwoByteCharsZ
 JS::LossyUTF8CharsToNewTwoByteCharsZ(JSContext* cx, const JS::UTF8Chars utf8, size_t* outlen)
 {
-    return InflateUTF8StringHelper<CountAndIgnoreInvalids, TwoByteCharsZ>(cx, utf8, outlen);
+    return InflateUTF8StringHelper<CountAndIgnoreInvalids, OnUTF8Error::InsertReplacementCharacter, TwoByteCharsZ>(cx, utf8, outlen);
 }
 
 TwoByteCharsZ
 JS::LossyUTF8CharsToNewTwoByteCharsZ(JSContext* cx, const JS::ConstUTF8CharsZ& utf8, size_t* outlen)
 {
     UTF8Chars chars(utf8.c_str(), strlen(utf8.c_str()));
-    return InflateUTF8StringHelper<CountAndIgnoreInvalids, TwoByteCharsZ>(cx, chars, outlen);
+    return InflateUTF8StringHelper<CountAndIgnoreInvalids, OnUTF8Error::InsertReplacementCharacter, TwoByteCharsZ>(cx, chars, outlen);
 }
 
 JS::SmallestEncoding
 JS::FindSmallestEncoding(UTF8Chars utf8)
 {
     JS::SmallestEncoding encoding;
-    MOZ_ALWAYS_TRUE((InflateUTF8StringToBuffer<FindEncoding, char16_t>(
+    MOZ_ALWAYS_TRUE((InflateUTF8StringToBuffer<FindEncoding, OnUTF8Error::InsertReplacementCharacter, char16_t>(
                          /* cx = */ nullptr,
                          utf8,
                          /* dst = */ nullptr,
                          /* dstlen = */ nullptr,
                          &encoding)));
     return encoding;
 }
 
 Latin1CharsZ
 JS::UTF8CharsToNewLatin1CharsZ(JSContext* cx, const UTF8Chars utf8, size_t* outlen)
 {
-    return InflateUTF8StringHelper<CountAndReportInvalids, Latin1CharsZ>(cx, utf8, outlen);
+    return InflateUTF8StringHelper<CountAndReportInvalids, OnUTF8Error::Throw, Latin1CharsZ>(cx, utf8, outlen);
 }
 
 Latin1CharsZ
 JS::LossyUTF8CharsToNewLatin1CharsZ(JSContext* cx, const UTF8Chars utf8, size_t* outlen)
 {
-    return InflateUTF8StringHelper<CountAndIgnoreInvalids, Latin1CharsZ>(cx, utf8, outlen);
+    return InflateUTF8StringHelper<CountAndIgnoreInvalids, OnUTF8Error::InsertReplacementCharacter, Latin1CharsZ>(cx, utf8, outlen);
 }
 
 #ifdef DEBUG
 void
 JS::ConstUTF8CharsZ::validate(size_t aLength)
 {
     MOZ_ASSERT(data_);
     UTF8Chars chars(data_, aLength);
-    InflateUTF8StringToBuffer<AssertNoInvalids, char16_t>(
+    InflateUTF8StringToBuffer<AssertNoInvalids, OnUTF8Error::Crash, char16_t>(
         /* cx = */ nullptr,
         chars,
         /* dst = */ nullptr,
         /* dstlen = */ nullptr,
         /* smallestEncoding = */ nullptr);
 }
 #endif