Bug 1476866 - Move fillCharBufferWithTemplateStringContents into TokenStreamCharsBase so that a UTF-8 specialization can eventually be defined for it. r=arai
authorJeff Walden <jwalden@mit.edu>
Fri, 29 Jun 2018 13:46:09 -0700
changeset 427439 7d07cfa666bf8bb2ec9aef3c7f88654f66e9a433
parent 427438 4bd4c0e74bc6ce7ef610ef902085a3609b9517f8
child 427440 8258ce540165d59bd100a953e89ea316fe5962ac
push id34304
push usertoros@mozilla.com
push dateFri, 20 Jul 2018 09:57:23 +0000
treeherdermozilla-central@4f12d77b4f9b [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersarai
bugs1476866
milestone63.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1476866 - Move fillCharBufferWithTemplateStringContents into TokenStreamCharsBase so that a UTF-8 specialization can eventually be defined for it. r=arai
js/src/frontend/TokenStream.cpp
js/src/frontend/TokenStream.h
--- a/js/src/frontend/TokenStream.cpp
+++ b/js/src/frontend/TokenStream.cpp
@@ -435,16 +435,42 @@ TokenStreamAnyChars::TokenStreamAnyChars
 
 template<typename CharT>
 TokenStreamCharsBase<CharT>::TokenStreamCharsBase(JSContext* cx, const CharT* chars, size_t length,
                                                   size_t startOffset)
   : TokenStreamCharsShared(cx),
     sourceUnits(chars, length, startOffset)
 {}
 
+template<>
+MOZ_MUST_USE bool
+TokenStreamCharsBase<char16_t>::fillCharBufferWithTemplateStringContents(const char16_t* cur,
+                                                                         const char16_t* end)
+{
+    MOZ_ASSERT(this->charBuffer.length() == 0);
+
+    while (cur < end) {
+        // Template literals normalize only '\r' and "\r\n" to '\n'.  The
+        // Unicode separators need no special handling here.
+        // https://tc39.github.io/ecma262/#sec-static-semantics-tv-and-trv
+        char16_t ch = *cur++;
+        if (ch == '\r') {
+            ch = '\n';
+            if (cur < end && *cur == '\n')
+                cur++;
+        }
+
+        if (!this->charBuffer.append(ch))
+            return false;
+    }
+
+    MOZ_ASSERT(cur == end);
+    return true;
+}
+
 template<typename CharT, class AnyCharsAccess>
 TokenStreamSpecific<CharT, AnyCharsAccess>::TokenStreamSpecific(JSContext* cx,
                                                                 const ReadOnlyCompileOptions& options,
                                                                 const CharT* base, size_t length)
   : TokenStreamChars<CharT, AnyCharsAccess>(cx, base, length, options.scriptSourceOffset)
 {}
 
 bool
--- a/js/src/frontend/TokenStream.h
+++ b/js/src/frontend/TokenStream.h
@@ -1284,18 +1284,23 @@ class TokenStreamCharsBase
     inline void consumeKnownCodeUnit(int32_t unit);
 
     // Forbid accidental calls to consumeKnownCodeUnit *not* with the single
     // unit-or-EOF type.  CharT should use SourceUnits::consumeKnownCodeUnit;
     // CodeUnitValue() results should go through toCharT(), or better yet just
     // use the original CharT.
     template<typename T> inline void consumeKnownCodeUnit(T) = delete;
 
-    MOZ_MUST_USE inline bool
-    fillCharBufferWithTemplateStringContents(const CharT* cur, const CharT* end);
+    /**
+     * Accumulate the provided range of already-validated (i.e. valid UTF-8, or
+     * anything if CharT is char16_t because JS permits lone and mispaired
+     * surrogates) raw template literal text (i.e. containing no escapes or
+     * substitutions) into |charBuffer|.
+     */
+    MOZ_MUST_USE bool fillCharBufferWithTemplateStringContents(const CharT* cur, const CharT* end);
 
   protected:
     /** Code units in the source code being tokenized. */
     SourceUnits sourceUnits;
 };
 
 template<>
 inline char16_t
@@ -1323,42 +1328,16 @@ TokenStreamCharsBase<CharT>::consumeKnow
 template<>
 /* static */ MOZ_ALWAYS_INLINE JSAtom*
 TokenStreamCharsBase<char16_t>::atomizeSourceChars(JSContext* cx, const char16_t* chars,
                                                    size_t length)
 {
     return AtomizeChars(cx, chars, length);
 }
 
-template<>
-MOZ_MUST_USE inline bool
-TokenStreamCharsBase<char16_t>::fillCharBufferWithTemplateStringContents(const char16_t* cur,
-                                                                         const char16_t* end)
-{
-    MOZ_ASSERT(this->charBuffer.length() == 0);
-
-    while (cur < end) {
-        // U+2028 LINE SEPARATOR and U+2029 PARAGRAPH SEPARATOR are
-        // interpreted literally inside template literal contents; only
-        // literal CRLF sequences are normalized to '\n'.  See
-        // <https://tc39.github.io/ecma262/#sec-static-semantics-tv-and-trv>.
-        char16_t ch = *cur++;
-        if (ch == '\r') {
-            ch = '\n';
-            if (cur < end && *cur == '\n')
-                cur++;
-        }
-
-        if (!this->charBuffer.append(ch))
-            return false;
-    }
-
-    return true;
-}
-
 template<typename CharT>
 class SpecializedTokenStreamCharsBase;
 
 template<>
 class SpecializedTokenStreamCharsBase<char16_t>
   : public TokenStreamCharsBase<char16_t>
 {
     using CharsBase = TokenStreamCharsBase<char16_t>;
@@ -1476,16 +1455,19 @@ class GeneralTokenStreamChars
 
         return token;
     }
 
     uint32_t matchUnicodeEscape(uint32_t* codePoint);
     uint32_t matchExtendedUnicodeEscape(uint32_t* codePoint);
 
   protected:
+    using TokenStreamCharsShared::drainCharBufferIntoAtom;
+    using CharsBase::fillCharBufferWithTemplateStringContents;
+
     using typename CharsBase::SourceUnits;
 
   protected:
     using SpecializedCharsBase::SpecializedCharsBase;
 
     TokenStreamAnyChars& anyCharsAccess() {
         return AnyCharsAccess::anyChars(this);
     }
@@ -1567,16 +1549,38 @@ class GeneralTokenStreamChars
     }
 
     MOZ_MUST_USE MOZ_ALWAYS_INLINE bool updateLineInfoForEOL() {
         return anyCharsAccess().internalUpdateLineInfoForEOL(this->sourceUnits.offset());
     }
 
     uint32_t matchUnicodeEscapeIdStart(uint32_t* codePoint);
     bool matchUnicodeEscapeIdent(uint32_t* codePoint);
+
+  public:
+    JSAtom* getRawTemplateStringAtom() {
+        TokenStreamAnyChars& anyChars = anyCharsAccess();
+
+        MOZ_ASSERT(anyChars.currentToken().type == TokenKind::TemplateHead ||
+                   anyChars.currentToken().type == TokenKind::NoSubsTemplate);
+        const CharT* cur = this->sourceUnits.codeUnitPtrAt(anyChars.currentToken().pos.begin + 1);
+        const CharT* end;
+        if (anyChars.currentToken().type == TokenKind::TemplateHead) {
+            // Of the form    |`...${|   or   |}...${|
+            end = this->sourceUnits.codeUnitPtrAt(anyChars.currentToken().pos.end - 2);
+        } else {
+            // NO_SUBS_TEMPLATE is of the form   |`...`|   or   |}...`|
+            end = this->sourceUnits.codeUnitPtrAt(anyChars.currentToken().pos.end - 1);
+        }
+
+        if (!fillCharBufferWithTemplateStringContents(cur, end))
+            return nullptr;
+
+        return drainCharBufferIntoAtom(anyChars.cx);
+    }
 };
 
 template<typename CharT, class AnyCharsAccess> class TokenStreamChars;
 
 template<class AnyCharsAccess>
 class TokenStreamChars<char16_t, AnyCharsAccess>
   : public GeneralTokenStreamChars<char16_t, AnyCharsAccess>
 {
@@ -1900,37 +1904,16 @@ class MOZ_STACK_CLASS TokenStreamSpecifi
     // These functions take a |va_list*| parameter, not a |va_list| parameter,
     // to hack around bug 1363116.  (Longer-term, the right fix is of course to
     // not use ellipsis functions or |va_list| at all in error reporting.)
     bool reportStrictModeErrorNumberVA(UniquePtr<JSErrorNotes> notes, uint32_t offset,
                                        bool strictMode, unsigned errorNumber, va_list* args);
     bool reportExtraWarningErrorNumberVA(UniquePtr<JSErrorNotes> notes, uint32_t offset,
                                          unsigned errorNumber, va_list* args);
 
-    JSAtom* getRawTemplateStringAtom() {
-        TokenStreamAnyChars& anyChars = anyCharsAccess();
-
-        MOZ_ASSERT(anyChars.currentToken().type == TokenKind::TemplateHead ||
-                   anyChars.currentToken().type == TokenKind::NoSubsTemplate);
-        const CharT* cur = this->sourceUnits.codeUnitPtrAt(anyChars.currentToken().pos.begin + 1);
-        const CharT* end;
-        if (anyChars.currentToken().type == TokenKind::TemplateHead) {
-            // Of the form    |`...${|   or   |}...${|
-            end = this->sourceUnits.codeUnitPtrAt(anyChars.currentToken().pos.end - 2);
-        } else {
-            // NO_SUBS_TEMPLATE is of the form   |`...`|   or   |}...`|
-            end = this->sourceUnits.codeUnitPtrAt(anyChars.currentToken().pos.end - 1);
-        }
-
-        if (!fillCharBufferWithTemplateStringContents(cur, end))
-            return nullptr;
-
-        return drainCharBufferIntoAtom(anyChars.cx);
-    }
-
   private:
     // This is private because it should only be called by the tokenizer while
     // tokenizing not by, for example, BytecodeEmitter.
     bool reportStrictModeError(unsigned errorNumber, ...);
 
     void reportInvalidEscapeError(uint32_t offset, InvalidEscapeType type) {
         switch (type) {
             case InvalidEscapeType::None: