Bug 1501155 - Part 1: Add AtomizeWTF8Chars. r=jwalden
authorTooru Fujisawa <arai_a@mac.com>
Wed, 28 Nov 2018 14:16:29 +0900
changeset 507673 4ba2f019ce6d420275d088749482cf77f35d4bf4
parent 507672 e9a8d57b5c3ebfab4222042d258d793db527c2e1
child 507674 b1a6e2052ea19b8fbf7fde7e3aa25629ff5163be
push id1905
push userffxbld-merge
push dateMon, 21 Jan 2019 12:33:13 +0000
treeherdermozilla-release@c2fca1944d8c [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersjwalden
bugs1501155
milestone65.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1501155 - Part 1: Add AtomizeWTF8Chars. r=jwalden
js/public/CharacterEncoding.h
js/src/NamespaceImports.h
js/src/vm/CharacterEncoding.cpp
js/src/vm/JSAtom.cpp
js/src/vm/JSAtom.h
--- a/js/public/CharacterEncoding.h
+++ b/js/public/CharacterEncoding.h
@@ -86,16 +86,36 @@ class UTF8Chars : public mozilla::Range<
       : UTF8Chars(reinterpret_cast<char*>(aUnits), aLength)
     {}
     UTF8Chars(const mozilla::Utf8Unit* aUnits, size_t aLength)
       : UTF8Chars(reinterpret_cast<const char*>(aUnits), aLength)
     {}
 };
 
 /*
+ * Similar to UTF8Chars, but contains WTF-8.
+ * https://simonsapin.github.io/wtf-8/
+ */
+class WTF8Chars : public mozilla::Range<unsigned char>
+{
+    typedef mozilla::Range<unsigned char> Base;
+
+  public:
+    using CharT = unsigned char;
+
+    WTF8Chars() : Base() {}
+    WTF8Chars(char* aBytes, size_t aLength)
+      : Base(reinterpret_cast<unsigned char*>(aBytes), aLength)
+    {}
+    WTF8Chars(const char* aBytes, size_t aLength)
+      : Base(reinterpret_cast<unsigned char*>(const_cast<char*>(aBytes)), aLength)
+    {}
+};
+
+/*
  * SpiderMonkey also deals directly with UTF-8 encoded text in some places.
  */
 class UTF8CharsZ : public mozilla::RangedPtr<unsigned char>
 {
     typedef mozilla::RangedPtr<unsigned char> Base;
 
   public:
     using CharT = unsigned char;
@@ -250,16 +270,22 @@ Utf8ToOneUcs4Char(const uint8_t* utf8Buf
  * - On error, returns an empty TwoByteCharsZ.
  * - On success, returns a malloc'd TwoByteCharsZ, and updates |outlen| to hold
  *   its length;  the length value excludes the trailing null.
  */
 extern JS_PUBLIC_API TwoByteCharsZ
 UTF8CharsToNewTwoByteCharsZ(JSContext* cx, const UTF8Chars utf8, size_t* outlen);
 
 /*
+ * Like UTF8CharsToNewTwoByteCharsZ, but for WTF8Chars.
+ */
+extern JS_PUBLIC_API TwoByteCharsZ
+WTF8CharsToNewTwoByteCharsZ(JSContext* cx, const WTF8Chars wtf8, size_t* outlen);
+
+/*
  * Like UTF8CharsToNewTwoByteCharsZ, but for ConstUTF8CharsZ.
  */
 extern JS_PUBLIC_API TwoByteCharsZ
 UTF8CharsToNewTwoByteCharsZ(JSContext* cx, const ConstUTF8CharsZ& utf8, size_t* outlen);
 
 /*
  * The same as UTF8CharsToNewTwoByteCharsZ(), except that any malformed UTF-8 characters
  * will be replaced by \uFFFD. No exception will be thrown for malformed UTF-8
--- a/js/src/NamespaceImports.h
+++ b/js/src/NamespaceImports.h
@@ -23,16 +23,17 @@
 namespace JS {
 
 class Latin1Chars;
 class Latin1CharsZ;
 class ConstTwoByteChars;
 class TwoByteChars;
 class TwoByteCharsZ;
 class UTF8Chars;
+class WTF8Chars;
 class UTF8CharsZ;
 
 using AutoValueVector = AutoVector<Value>;
 using AutoIdVector = AutoVector<jsid>;
 using AutoObjectVector = AutoVector<JSObject*>;
 
 using ValueVector = JS::GCVector<JS::Value>;
 using IdVector = JS::GCVector<jsid>;
@@ -70,16 +71,17 @@ using JS::UndefinedValue;
 
 using JS::Latin1Char;
 using JS::Latin1Chars;
 using JS::Latin1CharsZ;
 using JS::ConstTwoByteChars;
 using JS::TwoByteChars;
 using JS::TwoByteCharsZ;
 using JS::UTF8Chars;
+using JS::WTF8Chars;
 using JS::UTF8CharsZ;
 using JS::UniqueChars;
 using JS::UniqueTwoByteChars;
 
 using JS::Result;
 using JS::Ok;
 using JS::OOM;
 
--- a/js/src/vm/CharacterEncoding.cpp
+++ b/js/src/vm/CharacterEncoding.cpp
@@ -204,23 +204,28 @@ JS::CharsToNewUTF8CharsZ(JSContext* mayb
 
 template UTF8CharsZ
 JS::CharsToNewUTF8CharsZ(JSContext* maybeCx,
                          const mozilla::Range<const char16_t> chars);
 
 static const uint32_t INVALID_UTF8 = UINT32_MAX;
 
 /*
- * Convert a utf8 character sequence into a UCS-4 character and return that
- * character.  It is assumed that the caller already checked that the sequence
- * is valid.
+ * Convert a UTF-8 or WTF-8 (depending on InputCharsT, which is either
+ * UTF8Chars or WTF8Chars) character sequence into a UCS-4 character and return
+ * that character.  It is assumed that the caller already checked that the
+ * sequence is valid.
  */
-uint32_t
-JS::Utf8ToOneUcs4Char(const uint8_t* utf8Buffer, int utf8Length)
+template <class InputCharsT>
+static uint32_t
+Utf8ToOneUcs4CharImpl(const uint8_t* utf8Buffer, int utf8Length)
 {
+    static_assert(std::is_same<InputCharsT, UTF8Chars>::value ||
+                  std::is_same<InputCharsT, WTF8Chars>::value,
+                  "must be either UTF-8 or WTF-8");
     MOZ_ASSERT(1 <= utf8Length && utf8Length <= 4);
 
     if (utf8Length == 1) {
         MOZ_ASSERT(!(*utf8Buffer & 0x80));
         return *utf8Buffer;
     }
 
     /* from Unicode 3.1, non-shortest form is illegal */
@@ -230,23 +235,36 @@ JS::Utf8ToOneUcs4Char(const uint8_t* utf
                (0x100 - (1 << (8 - utf8Length))));
     uint32_t ucs4Char = *utf8Buffer++ & ((1 << (7 - utf8Length)) - 1);
     uint32_t minucs4Char = minucs4Table[utf8Length - 2];
     while (--utf8Length) {
         MOZ_ASSERT((*utf8Buffer & 0xC0) == 0x80);
         ucs4Char = (ucs4Char << 6) | (*utf8Buffer++ & 0x3F);
     }
 
-    if (MOZ_UNLIKELY(ucs4Char < minucs4Char || (ucs4Char >= 0xD800 && ucs4Char <= 0xDFFF))) {
+    if (MOZ_UNLIKELY(ucs4Char < minucs4Char)) {
+        return INVALID_UTF8;
+    }
+
+    // WTF-8 allows lone surrogate.
+    if (std::is_same<InputCharsT, UTF8Chars>::value &&
+        MOZ_UNLIKELY(ucs4Char >= 0xD800 && ucs4Char <= 0xDFFF))
+    {
         return INVALID_UTF8;
     }
 
     return ucs4Char;
 }
 
+uint32_t
+JS::Utf8ToOneUcs4Char(const uint8_t* utf8Buffer, int utf8Length)
+{
+    return Utf8ToOneUcs4CharImpl<UTF8Chars>(utf8Buffer, utf8Length);
+}
+
 static void
 ReportInvalidCharacter(JSContext* cx, uint32_t offset)
 {
     char buffer[10];
     SprintfLiteral(buffer, "%u", offset);
     JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, JSMSG_MALFORMED_UTF8_CHAR, buffer);
 }
 
@@ -271,23 +289,23 @@ enum class LoopDisposition {
 
 enum class OnUTF8Error {
     InsertReplacementCharacter,
     InsertQuestionMark,
     Throw,
     Crash,
 };
 
-// Scan UTF8 input and (internally, at least) convert it to a series of UTF-16
-// code units. But you can also do odd things like pass an empty lambda for
-// `dst`, in which case the output is discarded entirely--the only effect of
-// calling the template that way is error-checking.
-template <OnUTF8Error ErrorAction, typename OutputFn>
+// Scan UTF-8 or WTF-8 input and (internally, at least) convert it to a series
+// of UTF-16 code units. But you can also do odd things like pass an empty
+// lambda for `dst`, in which case the output is discarded entirely--the only
+// effect of calling the template that way is error-checking.
+template <OnUTF8Error ErrorAction, typename OutputFn, class InputCharsT>
 static bool
-InflateUTF8ToUTF16(JSContext* cx, const UTF8Chars src, OutputFn dst)
+InflateUTF8ToUTF16(JSContext* cx, const InputCharsT src, OutputFn dst)
 {
     size_t srclen = src.length();
     for (uint32_t i = 0; i < srclen; i++) {
         uint32_t v = uint32_t(src[i]);
         if (!(v & 0x80)) {
             // ASCII code unit.  Simple copy.
             if (dst(uint16_t(v)) == LoopDisposition::Break) {
                 break;
@@ -334,28 +352,36 @@ InflateUTF8ToUTF16(JSContext* cx, const 
 
             // Check the second byte.  From Unicode Standard v6.2, Table 3-7
             // Well-Formed UTF-8 Byte Sequences.
             if ((v == 0xE0 && ((uint8_t)src[i + 1] & 0xE0) != 0xA0) ||  // E0 A0~BF
                 (v == 0xED && ((uint8_t)src[i + 1] & 0xE0) != 0x80) ||  // ED 80~9F
                 (v == 0xF0 && ((uint8_t)src[i + 1] & 0xF0) == 0x80) ||  // F0 90~BF
                 (v == 0xF4 && ((uint8_t)src[i + 1] & 0xF0) != 0x80))    // F4 80~8F
             {
-                INVALID(ReportInvalidCharacter, i, 1);
+                if (std::is_same<InputCharsT, UTF8Chars>::value) {
+                    INVALID(ReportInvalidCharacter, i, 1);
+                } else {
+                    // WTF-8 allows lone surrogate as ED A0~BF 80~BF.
+                    MOZ_ASSERT((std::is_same<InputCharsT, WTF8Chars>::value));
+                    if (v == 0xED && ((uint8_t)src[i + 1] & 0xE0) != 0xA0) { // ED A0~BF
+                        INVALID(ReportInvalidCharacter, i, 1);
+                    }
+                }
             }
 
             // Check the continuation bytes.
             for (uint32_t m = 1; m < n; m++) {
                 if ((src[i + m] & 0xC0) != 0x80) {
                     INVALID(ReportInvalidCharacter, i, m);
                 }
             }
 
             // Determine the code unit's length in CharT and act accordingly.
-            v = JS::Utf8ToOneUcs4Char((uint8_t*)&src[i], n);
+            v = Utf8ToOneUcs4CharImpl<InputCharsT>((uint8_t*)&src[i], n);
             if (v < 0x10000) {
                 // The n-byte UTF8 code unit will fit in a single CharT.
                 if (dst(char16_t(v)) == LoopDisposition::Break) {
                     break;
                 }
             } else {
                 v -= 0x10000;
                 if (v <= 0xFFFFF) {
@@ -378,19 +404,20 @@ InflateUTF8ToUTF16(JSContext* cx, const 
             // code unit.
             i += n - 1;
         }
     }
 
     return true;
 }
 
-template <OnUTF8Error ErrorAction, typename CharT>
+template <OnUTF8Error ErrorAction, typename CharT, class InputCharsT>
 static void
-CopyAndInflateUTF8IntoBuffer(JSContext* cx, const UTF8Chars src, CharT *dst, size_t outlen, bool allASCII)
+CopyAndInflateUTF8IntoBuffer(JSContext* cx, const InputCharsT src, CharT* dst, size_t outlen,
+                             bool allASCII)
 {
     if (allASCII) {
         size_t srclen = src.length();
         MOZ_ASSERT(outlen == srclen);
         for (uint32_t i = 0; i < srclen; i++) {
             dst[i] = CharT(src[i]);
         }
     } else {
@@ -400,19 +427,19 @@ CopyAndInflateUTF8IntoBuffer(JSContext* 
             return LoopDisposition::Continue;
         };
         MOZ_ALWAYS_TRUE((InflateUTF8ToUTF16<ErrorAction>(cx, src, push)));
         MOZ_ASSERT(j == outlen);
     }
     dst[outlen] = CharT('\0');    // NUL char
 }
 
-template <OnUTF8Error ErrorAction, typename CharsT>
+template <OnUTF8Error ErrorAction, typename CharsT, class InputCharsT>
 static CharsT
-InflateUTF8StringHelper(JSContext* cx, const UTF8Chars src, size_t* outlen)
+InflateUTF8StringHelper(JSContext* cx, const InputCharsT src, size_t* outlen)
 {
     using CharT = typename CharsT::CharT;
     static_assert(std::is_same<CharT, char16_t>::value ||
                   std::is_same<CharT, Latin1Char>::value,
                   "bad CharT");
 
     *outlen = 0;
 
@@ -444,16 +471,22 @@ InflateUTF8StringHelper(JSContext* cx, c
 
 TwoByteCharsZ
 JS::UTF8CharsToNewTwoByteCharsZ(JSContext* cx, const UTF8Chars utf8, size_t* outlen)
 {
     return InflateUTF8StringHelper<OnUTF8Error::Throw, TwoByteCharsZ>(cx, utf8, outlen);
 }
 
 TwoByteCharsZ
+JS::WTF8CharsToNewTwoByteCharsZ(JSContext* cx, const WTF8Chars wtf8, size_t* outlen)
+{
+    return InflateUTF8StringHelper<OnUTF8Error::Throw, TwoByteCharsZ>(cx, wtf8, outlen);
+}
+
+TwoByteCharsZ
 JS::UTF8CharsToNewTwoByteCharsZ(JSContext* cx, const ConstUTF8CharsZ& utf8, size_t* outlen)
 {
     UTF8Chars chars(utf8.c_str(), strlen(utf8.c_str()));
     return InflateUTF8StringHelper<OnUTF8Error::Throw, TwoByteCharsZ>(cx, chars, outlen);
 }
 
 TwoByteCharsZ
 JS::LossyUTF8CharsToNewTwoByteCharsZ(JSContext* cx, const JS::UTF8Chars utf8, size_t* outlen)
@@ -513,18 +546,19 @@ JS::LossyUTF8CharsToNewLatin1CharsZ(JSCo
 
 /**
  * Atomization Helpers.
  *
  * These functions are extremely single-use, and are not intended for general
  * consumption.
  */
 
+template <class InputCharsT>
 bool
-GetUTF8AtomizationData(JSContext* cx, const JS::UTF8Chars utf8, size_t* outlen,
+GetUTF8AtomizationData(JSContext* cx, const InputCharsT utf8, size_t* outlen,
                        JS::SmallestEncoding* encoding, HashNumber* hashNum)
 {
     *outlen = 0;
     *encoding = JS::SmallestEncoding::ASCII;
     *hashNum = 0;
 
     auto getMetadata = [outlen, encoding, hashNum](char16_t c) -> LoopDisposition {
         (*outlen)++;
@@ -534,16 +568,25 @@ GetUTF8AtomizationData(JSContext* cx, co
     };
     if (!InflateUTF8ToUTF16<OnUTF8Error::Throw>(cx, utf8, getMetadata)) {
         return false;
     }
 
     return true;
 }
 
+template
+bool
+GetUTF8AtomizationData<JS::UTF8Chars>(JSContext* cx, const JS::UTF8Chars utf8, size_t* outlen,
+                                      JS::SmallestEncoding* encoding, HashNumber* hashNum);
+template
+bool
+GetUTF8AtomizationData<JS::WTF8Chars>(JSContext* cx, const JS::WTF8Chars utf8, size_t* outlen,
+                                      JS::SmallestEncoding* encoding, HashNumber* hashNum);
+
 template <typename CharT>
 bool
 UTF8EqualsChars(const JS::UTF8Chars utfChars, const CharT* chars)
 {
     size_t ind = 0;
     bool isEqual = true;
 
     auto checkEqual = [&isEqual, &ind, chars](char16_t c) -> LoopDisposition {
@@ -570,31 +613,37 @@ UTF8EqualsChars(const JS::UTF8Chars utfC
     InflateUTF8ToUTF16<OnUTF8Error::Crash>(/* cx = */ nullptr, utfChars, checkEqual);
 
     return isEqual;
 }
 
 template bool UTF8EqualsChars<char16_t>(const JS::UTF8Chars, const char16_t*);
 template bool UTF8EqualsChars<JS::Latin1Char>(const JS::UTF8Chars, const JS::Latin1Char*);
 
-template <typename CharT>
+template <typename CharT, class InputCharsT>
 void
-InflateUTF8CharsToBufferAndTerminate(const UTF8Chars src, CharT* dst, size_t dstLen,
+InflateUTF8CharsToBufferAndTerminate(const InputCharsT src, CharT* dst, size_t dstLen,
                                      JS::SmallestEncoding encoding)
 {
     CopyAndInflateUTF8IntoBuffer<OnUTF8Error::Crash>(/* cx = */ nullptr, src, dst, dstLen,
                                                      encoding == JS::SmallestEncoding::ASCII);
 }
 
 template void
 InflateUTF8CharsToBufferAndTerminate<char16_t>(const UTF8Chars src, char16_t* dst, size_t dstLen,
                                                JS::SmallestEncoding encoding);
 template void
 InflateUTF8CharsToBufferAndTerminate<JS::Latin1Char>(const UTF8Chars src, JS::Latin1Char* dst,
                                                      size_t dstLen, JS::SmallestEncoding encoding);
+template void
+InflateUTF8CharsToBufferAndTerminate<char16_t>(const WTF8Chars src, char16_t* dst, size_t dstLen,
+                                               JS::SmallestEncoding encoding);
+template void
+InflateUTF8CharsToBufferAndTerminate<JS::Latin1Char>(const WTF8Chars src, JS::Latin1Char* dst,
+                                                     size_t dstLen, JS::SmallestEncoding encoding);
 
 #ifdef DEBUG
 void
 JS::ConstUTF8CharsZ::validate(size_t aLength)
 {
     MOZ_ASSERT(data_);
     UTF8Chars chars(data_, aLength);
     auto nop = [](char16_t) -> LoopDisposition { return LoopDisposition::Continue; };
--- a/js/src/vm/JSAtom.cpp
+++ b/js/src/vm/JSAtom.cpp
@@ -37,26 +37,27 @@
 using namespace js;
 
 using mozilla::ArrayEnd;
 using mozilla::ArrayLength;
 using mozilla::Maybe;
 using mozilla::Nothing;
 using mozilla::RangedPtr;
 
-template <typename CharT>
-extern void InflateUTF8CharsToBufferAndTerminate(const UTF8Chars src, CharT* dst, size_t dstLen,
+template <typename CharT, typename InputCharsT>
+extern void InflateUTF8CharsToBufferAndTerminate(const InputCharsT src, CharT* dst, size_t dstLen,
                                                  JS::SmallestEncoding encoding);
 
 template <typename CharT>
 extern bool UTF8EqualsChars(const JS::UTF8Chars utf8, const CharT* chars);
 
+template <typename InputCharsT>
 extern bool
-GetUTF8AtomizationData(JSContext* cx, const JS::UTF8Chars utf8, size_t* outlen, JS::SmallestEncoding* encoding,
-                       HashNumber* hashNum);
+GetUTF8AtomizationData(JSContext* cx, const InputCharsT utf8, size_t* outlen,
+                       JS::SmallestEncoding* encoding, HashNumber* hashNum);
 
 struct js::AtomHasher::Lookup
 {
     union {
         const JS::Latin1Char* latin1Chars;
         const char16_t* twoByteChars;
         const char* utf8Bytes;
     };
@@ -861,38 +862,41 @@ PermanentlyAtomizeAndCopyChars(JSContext
     {
         ReportOutOfMemory(cx);
         return nullptr;
     }
 
     return atom;
 }
 
-struct AtomizeUTF8CharsWrapper
+template <typename CharsT>
+struct AtomizeUTF8OrWTF8CharsWrapper
 {
-    JS::UTF8Chars utf8;
+    CharsT utf8;
     JS::SmallestEncoding encoding;
 
-    AtomizeUTF8CharsWrapper(const JS::UTF8Chars& chars, JS::SmallestEncoding minEncode)
+    AtomizeUTF8OrWTF8CharsWrapper(const CharsT& chars, JS::SmallestEncoding minEncode)
       : utf8(chars), encoding(minEncode)
     { }
 };
 
+// MakeFlatStringForAtomization has 4 variants.
+// This is used by Latin1Char and char16_t.
 template <typename CharT>
 MOZ_ALWAYS_INLINE
 static JSFlatString*
 MakeFlatStringForAtomization(JSContext* cx, const CharT* tbchars, size_t length)
 {
     return NewStringCopyN<NoGC>(cx, tbchars, length);
 }
 
-template<typename CharT>
+template<typename CharT, typename WrapperT>
 MOZ_ALWAYS_INLINE
 static JSFlatString*
-MakeUTF8AtomHelper(JSContext* cx, const AtomizeUTF8CharsWrapper* chars, size_t length)
+MakeUTF8AtomHelper(JSContext* cx, const WrapperT* chars, size_t length)
 {
     if (JSInlineString::lengthFits<CharT>(length)) {
         CharT* storage;
         JSInlineString* str = AllocateInlineString<NoGC>(cx, length, &storage);
         if (!str) {
             return nullptr;
         }
 
@@ -915,20 +919,24 @@ MakeUTF8AtomHelper(JSContext* cx, const 
     if (!str) {
         return nullptr;
     }
 
     mozilla::Unused << newStr.release();
     return str;
 }
 
-template<>
+// Another 2 variants of MakeFlatStringForAtomization.
+// This is used by AtomizeUTF8OrWTF8CharsWrapper with UTF8Chars or WTF8Chars.
+template<typename InputCharsT>
 MOZ_ALWAYS_INLINE
 /* static */ JSFlatString*
-MakeFlatStringForAtomization(JSContext* cx, const AtomizeUTF8CharsWrapper* chars, size_t length)
+MakeFlatStringForAtomization(JSContext* cx,
+                             const AtomizeUTF8OrWTF8CharsWrapper<InputCharsT>* chars,
+                             size_t length)
 {
     if (length == 0) {
         return cx->emptyString();
     }
 
     if (chars->encoding == JS::SmallestEncoding::UTF16) {
         return MakeUTF8AtomHelper<char16_t>(cx, chars, length);
     }
@@ -1036,37 +1044,50 @@ js::AtomizeChars(JSContext* cx, const Ch
 }
 
 template JSAtom*
 js::AtomizeChars(JSContext* cx, const Latin1Char* chars, size_t length, PinningBehavior pin);
 
 template JSAtom*
 js::AtomizeChars(JSContext* cx, const char16_t* chars, size_t length, PinningBehavior pin);
 
+template <typename CharsT>
 JSAtom*
-js::AtomizeUTF8Chars(JSContext* cx, const char* utf8Chars, size_t utf8ByteLength)
+AtomizeUTF8OrWTF8Chars(JSContext* cx, const char* utf8Chars, size_t utf8ByteLength)
 {
     // Since the static strings are all ascii, we can check them before trying anything else.
     if (JSAtom* s = cx->staticStrings().lookup(utf8Chars, utf8ByteLength)) {
         return s;
     }
 
     size_t length;
     HashNumber hash;
     JS::SmallestEncoding forCopy;
-    UTF8Chars utf8(utf8Chars, utf8ByteLength);
+    CharsT utf8(utf8Chars, utf8ByteLength);
     if (!GetUTF8AtomizationData(cx, utf8, &length, &forCopy, &hash)) {
         return nullptr;
     }
 
-    AtomizeUTF8CharsWrapper chars(utf8, forCopy);
+    AtomizeUTF8OrWTF8CharsWrapper<CharsT> chars(utf8, forCopy);
     AtomHasher::Lookup lookup(utf8Chars, utf8ByteLength, length, hash);
     return AtomizeAndCopyCharsFromLookup(cx, &chars, length, lookup, DoNotPinAtom, Nothing());
 }
 
+JSAtom*
+js::AtomizeUTF8Chars(JSContext* cx, const char* utf8Chars, size_t utf8ByteLength)
+{
+    return AtomizeUTF8OrWTF8Chars<UTF8Chars>(cx, utf8Chars, utf8ByteLength);
+}
+
+JSAtom*
+js::AtomizeWTF8Chars(JSContext* cx, const char* wtf8Chars, size_t wtf8ByteLength)
+{
+    return AtomizeUTF8OrWTF8Chars<WTF8Chars>(cx, wtf8Chars, wtf8ByteLength);
+}
+
 bool
 js::IndexToIdSlow(JSContext* cx, uint32_t index, MutableHandleId idp)
 {
     MOZ_ASSERT(index > JSID_INT_MAX);
 
     char16_t buf[UINT32_CHAR_BUFFER_LENGTH];
     RangedPtr<char16_t> end(ArrayEnd(buf), buf, ArrayEnd(buf));
     RangedPtr<char16_t> start = BackfillIndexInCharBuffer(index, end);
--- a/js/src/vm/JSAtom.h
+++ b/js/src/vm/JSAtom.h
@@ -65,16 +65,19 @@ template <typename CharT>
 extern JSAtom*
 AtomizeChars(JSContext* cx, const CharT* chars, size_t length,
              js::PinningBehavior pin = js::DoNotPinAtom);
 
 extern JSAtom*
 AtomizeUTF8Chars(JSContext* cx, const char* utf8Chars, size_t utf8ByteLength);
 
 extern JSAtom*
+AtomizeWTF8Chars(JSContext* cx, const char* wtf8Chars, size_t wtf8ByteLength);
+
+extern JSAtom*
 AtomizeString(JSContext* cx, JSString* str, js::PinningBehavior pin = js::DoNotPinAtom);
 
 template <AllowGC allowGC>
 extern JSAtom*
 ToAtom(JSContext* cx, typename MaybeRooted<JS::Value, allowGC>::HandleType v);
 
 // These functions are declared in vm/Xdr.h
 //