Bug 1495571 - Part 6: Specify the replacement character explicitly, not as a function of CharT. r=efaust
authorJason Orendorff <jorendorff@mozilla.com>
Tue, 02 Oct 2018 15:17:59 +0000
changeset 439202 a82ed21f664cba5d170a9268885326d8f0e42247
parent 439201 a35af6689749d7321ea8d3c63656cd23f1f332d8
child 439203 b999f2758e3ea42c804b504dc45ac322bcd7fa80
push id34758
push userdvarga@mozilla.com
push dateTue, 02 Oct 2018 21:45:21 +0000
treeherdermozilla-central@4392b5198fb7 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersefaust
bugs1495571
milestone64.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1495571 - Part 6: Specify the replacement character explicitly, not as a function of CharT. r=efaust This is not great but we're getting rid of CharT. Depends on D7374 Differential Revision: https://phabricator.services.mozilla.com/D7375
js/src/vm/CharacterEncoding.cpp
--- a/js/src/vm/CharacterEncoding.cpp
+++ b/js/src/vm/CharacterEncoding.cpp
@@ -263,22 +263,24 @@ enum InflateUTF8Action {
     Count,
     Nop,
     Copy,
     FindEncoding
 };
 
 enum class OnUTF8Error {
     InsertReplacementCharacter,
+    InsertQuestionMark,
     Throw,
     Crash,
 };
 
-static const char16_t REPLACE_UTF8 = 0xFFFD;
-static const Latin1Char REPLACE_UTF8_LATIN1 = '?';
+// The Unicode REPLACEMENT CHARACTER, rendered as a diamond with a question
+// mark, meaning "someone screwed up here but it wasn't me".
+static const char16_t REPLACEMENT_CHARACTER = 0xFFFD;
 
 // If making changes to this algorithm, make sure to also update
 // LossyConvertUTF8toUTF16() in dom/wifi/WifiUtils.cpp
 //
 // Scan UTF8 input and (internally, at least) convert it to a series of
 // UTF-16 code units. But you can also do odd things like pass
 // CharT=Latin1Char, in which case each output code unit is silently truncated
 // to 8 bits; or Action=Count, in which case the output is discarded entirely
@@ -324,22 +326,25 @@ InflateUTF8ToUTF16(JSContext* cx, const 
         #define INVALID(report, arg, n2)                                \
             do {                                                        \
                 if (ErrorAction == OnUTF8Error::Throw) {                \
                     report(cx, arg);                                    \
                     return false;                                       \
                 } else if (ErrorAction == OnUTF8Error::Crash) {         \
                     MOZ_CRASH("invalid UTF-8 string: " # report);       \
                 } else {                                                \
-                    MOZ_ASSERT(ErrorAction == OnUTF8Error::InsertReplacementCharacter); \
+                    char16_t replacement;                               \
+                    if (ErrorAction == OnUTF8Error::InsertReplacementCharacter) { \
+                        replacement = REPLACEMENT_CHARACTER;            \
+                    } else {                                            \
+                        MOZ_ASSERT(ErrorAction == OnUTF8Error::InsertQuestionMark); \
+                        replacement = '?';                              \
+                    }                                                   \
                     if (Action == Copy) {                               \
-                        if (std::is_same<decltype(dst[0]), Latin1Char>::value) \
-                            dst[j] = CharT(REPLACE_UTF8_LATIN1);        \
-                        else                                            \
-                            dst[j] = CharT(REPLACE_UTF8);               \
+                        dst[j] = CharT(replacement);                    \
                     }                                                   \
                     n = n2;                                             \
                     goto invalidMultiByteCodeUnit;                      \
                 }                                                       \
             } while (0)
 
             // Check the leading byte.
             if (n < 2 || n > 4) {
@@ -441,16 +446,18 @@ InflateUTF8StringHelper(JSContext* cx, c
     }
 
     if (encoding == JS::SmallestEncoding::ASCII) {
         size_t srclen = src.length();
         MOZ_ASSERT(*outlen == srclen);
         for (uint32_t i = 0; i < srclen; i++) {
             dst[i] = CharT(src[i]);
         }
+    } else if (std::is_same<decltype(dst[0]), Latin1Char>::value) {
+        MOZ_ALWAYS_TRUE((InflateUTF8ToUTF16<Copy, OnUTF8Error::InsertQuestionMark, CharT>(cx, src, dst, outlen, &encoding)));
     } else {
         MOZ_ALWAYS_TRUE((InflateUTF8ToUTF16<Copy, OnUTF8Error::InsertReplacementCharacter, CharT>(cx, src, dst, outlen, &encoding)));
     }
 
     dst[*outlen] = 0;    // NUL char
 
     return CharsT(dst, *outlen);
 }
@@ -498,17 +505,17 @@ Latin1CharsZ
 JS::UTF8CharsToNewLatin1CharsZ(JSContext* cx, const UTF8Chars utf8, size_t* outlen)
 {
     return InflateUTF8StringHelper<OnUTF8Error::Throw, Latin1CharsZ>(cx, utf8, outlen);
 }
 
 Latin1CharsZ
 JS::LossyUTF8CharsToNewLatin1CharsZ(JSContext* cx, const UTF8Chars utf8, size_t* outlen)
 {
-    return InflateUTF8StringHelper<OnUTF8Error::InsertReplacementCharacter, Latin1CharsZ>(cx, utf8, outlen);
+    return InflateUTF8StringHelper<OnUTF8Error::InsertQuestionMark, Latin1CharsZ>(cx, utf8, outlen);
 }
 
 #ifdef DEBUG
 void
 JS::ConstUTF8CharsZ::validate(size_t aLength)
 {
     MOZ_ASSERT(data_);
     UTF8Chars chars(data_, aLength);