Bug 1496863 - Add a constexpr char16_t unicode::REPLACEMENT_CHARACTER, and change the two (!) separate const char16_t variables of the same value in CharacterEncoding.cpp with it. r=evilpie
authorJeff Walden <jwalden@mit.edu>
Thu, 04 Oct 2018 13:31:49 -0400
changeset 489353 ed81d578d775d533862151e376c098df7c951a6f
parent 489352 c35df5c273d64a2f495ca5159a91dbc9a5feec32
child 489354 da12058b40be74451c19a1e4f37436c6bab190d8
push id247
push userfmarier@mozilla.com
push dateSat, 27 Oct 2018 01:06:44 +0000
reviewersevilpie
bugs1496863
milestone64.0a1
Bug 1496863 - Add a constexpr char16_t unicode::REPLACEMENT_CHARACTER, and change the two (!) separate const char16_t variables of the same value in CharacterEncoding.cpp with it. r=evilpie
js/src/util/Unicode.h
js/src/vm/CharacterEncoding.cpp
--- a/js/src/util/Unicode.h
+++ b/js/src/util/Unicode.h
@@ -74,16 +74,17 @@ constexpr char16_t DIVISION_SIGN = 0x00F
 constexpr char16_t LATIN_SMALL_LETTER_Y_WITH_DIAERESIS = 0x00FF;
 constexpr char16_t LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE = 0x0130;
 constexpr char16_t COMBINING_DOT_ABOVE = 0x0307;
 constexpr char16_t GREEK_CAPITAL_LETTER_SIGMA = 0x03A3;
 constexpr char16_t GREEK_SMALL_LETTER_FINAL_SIGMA = 0x03C2;
 constexpr char16_t GREEK_SMALL_LETTER_SIGMA = 0x03C3;
 constexpr char16_t LINE_SEPARATOR = 0x2028;
 constexpr char16_t PARA_SEPARATOR = 0x2029;
+constexpr char16_t REPLACEMENT_CHARACTER = 0xFFFD;
 constexpr char16_t BYTE_ORDER_MARK2 = 0xFFFE;
 
 const char16_t LeadSurrogateMin = 0xD800;
 const char16_t LeadSurrogateMax = 0xDBFF;
 const char16_t TrailSurrogateMin = 0xDC00;
 const char16_t TrailSurrogateMax = 0xDFFF;
 
 const uint32_t UTF16Max = 0xFFFF;
--- a/js/src/vm/CharacterEncoding.cpp
+++ b/js/src/vm/CharacterEncoding.cpp
@@ -7,16 +7,17 @@
 #include "js/CharacterEncoding.h"
 
 #include "mozilla/Range.h"
 #include "mozilla/Sprintf.h"
 
 #include <algorithm>
 #include <type_traits>
 
+#include "util/Unicode.h" // unicode::REPLACEMENT_CHARACTER
 #include "vm/JSContext.h"
 
 using namespace js;
 
 Latin1CharsZ
 JS::LossyTwoByteCharsToNewLatin1CharsZ(JSContext* cx,
                                        const mozilla::Range<const char16_t> tbchars)
 {
@@ -75,18 +76,16 @@ JS_PUBLIC_API(size_t)
 JS::GetDeflatedUTF8StringLength(JSFlatString* s)
 {
     JS::AutoCheckCannotGC nogc;
     return s->hasLatin1Chars()
            ? ::GetDeflatedUTF8StringLength(s->latin1Chars(nogc), s->length())
            : ::GetDeflatedUTF8StringLength(s->twoByteChars(nogc), s->length());
 }
 
-static const char16_t UTF8_REPLACEMENT_CHAR = 0xFFFD;
-
 template <typename CharT>
 static void
 DeflateStringToUTF8Buffer(const CharT* src, size_t srclen, mozilla::RangedPtr<char> dst,
                           size_t* dstlenp = nullptr, size_t* numcharsp = nullptr)
 {
     size_t capacity = 0;
     if (dstlenp) {
         capacity = *dstlenp;
@@ -96,26 +95,26 @@ DeflateStringToUTF8Buffer(const CharT* s
         *numcharsp = 0;
     }
 
     while (srclen) {
         uint32_t v;
         char16_t c = *src++;
         srclen--;
         if (c >= 0xDC00 && c <= 0xDFFF) {
-            v = UTF8_REPLACEMENT_CHAR;
+            v = unicode::REPLACEMENT_CHARACTER;
         } else if (c < 0xD800 || c > 0xDBFF) {
             v = c;
         } else {
             if (srclen < 1) {
-                v = UTF8_REPLACEMENT_CHAR;
+                v = unicode::REPLACEMENT_CHARACTER;
             } else {
                 char16_t c2 = *src;
                 if (c2 < 0xDC00 || c2 > 0xDFFF) {
-                    v = UTF8_REPLACEMENT_CHAR;
+                    v = unicode::REPLACEMENT_CHARACTER;
                 } else {
                     src++;
                     srclen--;
                     v = ((c - 0xD800) << 10) + (c2 - 0xDC00) + 0x10000;
                 }
             }
         }
 
@@ -266,20 +265,16 @@ enum class LoopDisposition {
 
 enum class OnUTF8Error {
     InsertReplacementCharacter,
     InsertQuestionMark,
     Throw,
     Crash,
 };
 
-// The Unicode REPLACEMENT CHARACTER, rendered as a diamond with a question
-// mark, meaning "someone screwed up here but it wasn't me".
-static const char16_t REPLACEMENT_CHARACTER = 0xFFFD;
-
 // If making changes to this algorithm, make sure to also update
 // LossyConvertUTF8toUTF16() in dom/wifi/WifiUtils.cpp
 //
 // Scan UTF8 input and (internally, at least) convert it to a series of UTF-16
 // code units. But you can also do odd things like pass an empty lambda for
 // `dst`, in which case the output is discarded entirely--the only effect of
 // calling the template that way is error-checking.
 template <OnUTF8Error ErrorAction, typename OutputFn>
@@ -306,17 +301,17 @@ InflateUTF8ToUTF16(JSContext* cx, const 
                 if (ErrorAction == OnUTF8Error::Throw) {                \
                     report(cx, arg);                                    \
                     return false;                                       \
                 } else if (ErrorAction == OnUTF8Error::Crash) {         \
                     MOZ_CRASH("invalid UTF-8 string: " # report);       \
                 } else {                                                \
                     char16_t replacement;                               \
                     if (ErrorAction == OnUTF8Error::InsertReplacementCharacter) { \
-                        replacement = REPLACEMENT_CHARACTER;            \
+                        replacement = unicode::REPLACEMENT_CHARACTER;   \
                     } else {                                            \
                         MOZ_ASSERT(ErrorAction == OnUTF8Error::InsertQuestionMark); \
                         replacement = '?';                              \
                     }                                                   \
                     if (dst(replacement) == LoopDisposition::Break) {   \
                         break;                                          \
                     }                                                   \
                     n = n2;                                             \