Bug 1351107 - Split out all source-text-character-type-agnostic functionality in TokenStream into a TokenStreamBase class. r=arai
authorJeff Walden <jwalden@mit.edu>
Mon, 20 Mar 2017 13:35:07 -0700
changeset 351989 e83b7192f1535583576b3ae25dc2b4d3395f0dcb
parent 351988 36652248ec274ad0febac23927906db40a2b1c75
child 351990 1cbd82417e3bcfc13809f36385db49b3b8dc78e0
push id31624
push userarchaeopteryx@coole-files.de
push dateSat, 08 Apr 2017 20:49:20 +0000
treeherdermozilla-central@2a3ecdb7d1ea [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersarai
bugs1351107
milestone55.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1351107 - Split out all source-text-character-type-agnostic functionality in TokenStream into a TokenStreamBase class. r=arai
js/src/frontend/TokenStream.cpp
js/src/frontend/TokenStream.h
--- a/js/src/frontend/TokenStream.cpp
+++ b/js/src/frontend/TokenStream.cpp
@@ -234,17 +234,17 @@ frontend::ReservedWordToCharZ(PropertyNa
 #undef EMIT_CASE
       default:
         MOZ_ASSERT_UNREACHABLE("Not a reserved word PropertyName.");
     }
     return nullptr;
 }
 
 PropertyName*
-TokenStream::reservedWordToPropertyName(TokenKind tt) const
+TokenStreamBase::reservedWordToPropertyName(TokenKind tt) const
 {
     MOZ_ASSERT(tt != TOK_NAME);
     switch (tt) {
 #define EMIT_CASE(word, name, type) case type: return cx->names().name;
       FOR_EACH_JAVASCRIPT_RESERVED_WORD(EMIT_CASE)
 #undef EMIT_CASE
       default:
         MOZ_ASSERT_UNREACHABLE("Not a reserved word TokenKind.");
@@ -401,36 +401,42 @@ TokenStream::SourceCoords::lineNumAndCol
     *columnIndex = offset - lineStartOffset;
 }
 
 #ifdef _MSC_VER
 #pragma warning(push)
 #pragma warning(disable:4351)
 #endif
 
-TokenStream::TokenStream(JSContext* cx, const ReadOnlyCompileOptions& options,
-                         const char16_t* base, size_t length, StrictModeGetter* smg)
+TokenStreamBase::TokenStreamBase(JSContext* cx, const ReadOnlyCompileOptions& options,
+                                 StrictModeGetter* smg)
   : srcCoords(cx, options.lineno),
     options_(options),
     tokens(),
     cursor(),
     lookahead(),
     lineno(options.lineno),
     flags(),
     linebase(0),
     prevLinebase(size_t(-1)),
-    userbuf(cx, base, length, options.column),
     filename(options.filename()),
     displayURL_(nullptr),
     sourceMapURL_(nullptr),
-    tokenbuf(cx),
     cx(cx),
     mutedErrors(options.mutedErrors()),
     strictModeGetter(smg)
 {
+}
+
+TokenStream::TokenStream(JSContext* cx, const ReadOnlyCompileOptions& options,
+                         const char16_t* base, size_t length, StrictModeGetter* smg)
+  : TokenStreamBase(cx, options, smg),
+    userbuf(cx, base, length, options.column),
+    tokenbuf(cx)
+{
     // Nb: the following tables could be static, but initializing them here is
     // much easier.  Don't worry, the time to initialize them for each
     // TokenStream is trivial.  See bug 639420.
 
     // See Parser::assignExpr() for an explanation of isExprEnding[].
     memset(isExprEnding, 0, sizeof(isExprEnding));
     isExprEnding[TOK_COMMA] = 1;
     isExprEnding[TOK_SEMI]  = 1;
@@ -452,20 +458,16 @@ TokenStream::checkOptions()
     if (options().column >= mozilla::MaxValue<int32_t>::value / 2 + 1) {
         reportErrorNoOffset(JSMSG_BAD_COLUMN_NUMBER);
         return false;
     }
 
     return true;
 }
 
-TokenStream::~TokenStream()
-{
-}
-
 // Use the fastest available getc.
 #if defined(HAVE_GETC_UNLOCKED)
 # define fast_getc getc_unlocked
 #elif defined(HAVE__GETC_NOLOCK)
 # define fast_getc _getc_nolock
 #else
 # define fast_getc getc
 #endif
@@ -475,17 +477,17 @@ TokenStream::updateLineInfoForEOL()
 {
     prevLinebase = linebase;
     linebase = userbuf.offset();
     lineno++;
     return srcCoords.add(lineno, linebase);
 }
 
 MOZ_ALWAYS_INLINE void
-TokenStream::updateFlagsForEOL()
+TokenStreamBase::updateFlagsForEOL()
 {
     flags.isDirtyLine = false;
 }
 
 // This gets the next char, normalizing all EOL sequences to '\n' as it goes.
 bool
 TokenStream::getChar(int32_t* cp)
 {
--- a/js/src/frontend/TokenStream.h
+++ b/js/src/frontend/TokenStream.h
@@ -88,17 +88,17 @@ enum class InvalidEscapeType {
     Unicode,
     // An otherwise well-formed \u escape which represents a
     // codepoint > 10FFFF.
     UnicodeOverflow,
     // An octal escape in a template token.
     Octal
 };
 
-class TokenStream;
+class TokenStreamBase;
 
 struct Token
 {
   private:
     // Sometimes the parser needs to inform the tokenizer to interpret
     // subsequent text in a particular manner: for example, to tokenize a
     // keyword as an identifier, not as the actual keyword, on the right-hand
     // side of a dotted property access.  Such information is communicated to
@@ -156,17 +156,17 @@ struct Token
         // operand before try getting colon/comma/semicolon.
         // See also the comment in Parser::assignExpr().
         NoneIsOperand,
 
         // If a semicolon is inserted automatically, the next token is already
         // gotten with None, but we expect Operand.
         OperandIsNone,
     };
-    friend class TokenStream;
+    friend class TokenStreamBase;
 
   public:
     TokenKind           type;           // char value or above enumerator
     TokenPos            pos;            // token position in file
     union {
       private:
         friend struct Token;
         PropertyName*   name;          // non-numeric atom
@@ -264,91 +264,45 @@ IsStrictReservedWord(JSLinearString* str
 // This class is a tiny back-channel from TokenStream to the strict mode flag
 // that avoids exposing the rest of SharedContext to TokenStream.
 //
 class StrictModeGetter {
   public:
     virtual bool strictMode() = 0;
 };
 
-// TokenStream is the lexical scanner for Javascript source text.
-//
-// It takes a buffer of char16_t characters and linearly scans it into |Token|s.
-// Internally the class uses a four element circular buffer |tokens| of
-// |Token|s. As an index for |tokens|, the member |cursor| points to the
-// current token.
-// Calls to getToken() increase |cursor| by one and return the new current
-// token. If a TokenStream was just created, the current token is initialized
-// with random data (i.e. not initialized). It is therefore important that
-// one of the first four member functions listed below is called first.
-// The circular buffer lets us go back up to two tokens from the last
-// scanned token. Internally, the relative number of backward steps that were
-// taken (via ungetToken()) after the last token was scanned is stored in
-// |lookahead|.
-//
-// The following table lists in which situations it is safe to call each listed
-// function. No checks are made by the functions in non-debug builds.
-//
-// Function Name     | Precondition; changes to |lookahead|
-// ------------------+---------------------------------------------------------
-// getToken          | none; if |lookahead > 0| then |lookahead--|
-// peekToken         | none; if |lookahead == 0| then |lookahead == 1|
-// peekTokenSameLine | none; if |lookahead == 0| then |lookahead == 1|
-// matchToken        | none; if |lookahead > 0| and the match succeeds then
-//                   |       |lookahead--|
-// consumeKnownToken | none; if |lookahead > 0| then |lookahead--|
-// ungetToken        | 0 <= |lookahead| <= |maxLookahead - 1|; |lookahead++|
-//
-// The behavior of the token scanning process (see getTokenInternal()) can be
-// modified by calling one of the first four above listed member functions with
-// an optional argument of type Modifier.  However, the modifier will be
-// ignored unless |lookahead == 0| holds.  Due to constraints of the grammar,
-// this turns out not to be a problem in practice. See the
-// mozilla.dev.tech.js-engine.internals thread entitled 'Bug in the scanner?'
-// for more details:
-// https://groups.google.com/forum/?fromgroups=#!topic/mozilla.dev.tech.js-engine.internals/2JLH5jRcr7E).
-//
-// The methods seek() and tell() allow to rescan from a previous visited
-// location of the buffer.
-//
-class MOZ_STACK_CLASS TokenStream
+class TokenStreamBase
 {
+  protected:
+    TokenStreamBase(JSContext* cx, const ReadOnlyCompileOptions& options, StrictModeGetter* smg);
+
     // Unicode separators that are treated as line terminators, in addition to \n, \r.
     enum {
         LINE_SEPARATOR = 0x2028,
         PARA_SEPARATOR = 0x2029
     };
 
     static const size_t ntokens = 4;                // 1 current + 2 lookahead, rounded
                                                     // to power of 2 to avoid divmod by 3
     static const unsigned maxLookahead = 2;
     static const unsigned ntokensMask = ntokens - 1;
 
   public:
-    typedef Vector<char16_t, 32> CharBuffer;
-
-    TokenStream(JSContext* cx, const ReadOnlyCompileOptions& options,
-                const char16_t* base, size_t length, StrictModeGetter* smg);
-
-    ~TokenStream();
-
-    MOZ_MUST_USE bool checkOptions();
-
     // Accessors.
     const Token& currentToken() const { return tokens[cursor]; }
     bool isCurrentTokenType(TokenKind type) const {
         return currentToken().type == type;
     }
-    const CharBuffer& getTokenbuf() const { return tokenbuf; }
+
     const char* getFilename() const { return filename; }
     bool getMutedErrors() const { return mutedErrors; }
     JSVersion versionNumber() const { return VersionNumber(options().version); }
     JSVersion versionWithFlags() const { return options().version; }
 
-  private:
+  protected:
     PropertyName* reservedWordToPropertyName(TokenKind tt) const;
 
   public:
     PropertyName* currentName() const {
         if (isCurrentTokenType(TOK_NAME))
             return currentToken().name();
 
         MOZ_ASSERT(TokenKindIsPossibleIdentifierName(currentToken().type));
@@ -375,134 +329,48 @@ class MOZ_STACK_CLASS TokenStream
 
     bool hasInvalidTemplateEscape() const {
         return invalidTemplateEscapeType != InvalidEscapeType::None;
     }
     void clearInvalidTemplateEscape() {
         invalidTemplateEscapeType = InvalidEscapeType::None;
     }
 
-    // If there is an invalid escape in a template, report it and return false,
-    // otherwise return true.
-    bool checkForInvalidTemplateEscapeError() {
-        if (invalidTemplateEscapeType == InvalidEscapeType::None)
-            return true;
-
-        reportInvalidEscapeError(invalidTemplateEscapeOffset, invalidTemplateEscapeType);
-        return false;
-    }
-
-    // TokenStream-specific error reporters.
-    bool reportError(unsigned errorNumber, ...);
-    bool reportErrorNoOffset(unsigned errorNumber, ...);
-
-    // Report the given error at the current offset.
-    void error(unsigned errorNumber, ...);
-
-    // Report the given error at the given offset.
-    void errorAt(uint32_t offset, unsigned errorNumber, ...);
-
-    // Warn at the current offset.
-    MOZ_MUST_USE bool warning(unsigned errorNumber, ...);
-
     static const uint32_t NoOffset = UINT32_MAX;
 
-    // General-purpose error reporters.  You should avoid calling these
-    // directly, and instead use the more succinct alternatives (error(),
-    // warning(), &c.) in TokenStream, Parser, and BytecodeEmitter.
-    bool reportCompileErrorNumberVA(UniquePtr<JSErrorNotes> notes, uint32_t offset, unsigned flags,
-                                    unsigned errorNumber, va_list args);
-    bool reportStrictModeErrorNumberVA(UniquePtr<JSErrorNotes> notes, uint32_t offset,
-                                       bool strictMode, unsigned errorNumber, va_list args);
-    bool reportExtraWarningErrorNumberVA(UniquePtr<JSErrorNotes> notes, uint32_t offset,
-                                         unsigned errorNumber, va_list args);
-
-    // asm.js reporter
-    void reportAsmJSError(uint32_t offset, unsigned errorNumber, ...);
-
-    JSAtom* getRawTemplateStringAtom() {
-        MOZ_ASSERT(currentToken().type == TOK_TEMPLATE_HEAD ||
-                   currentToken().type == TOK_NO_SUBS_TEMPLATE);
-        const char16_t* cur = userbuf.rawCharPtrAt(currentToken().pos.begin + 1);
-        const char16_t* end;
-        if (currentToken().type == TOK_TEMPLATE_HEAD) {
-            // Of the form    |`...${|   or   |}...${|
-            end = userbuf.rawCharPtrAt(currentToken().pos.end - 2);
-        } else {
-            // NO_SUBS_TEMPLATE is of the form   |`...`|   or   |}...`|
-            end = userbuf.rawCharPtrAt(currentToken().pos.end - 1);
-        }
-
-        CharBuffer charbuf(cx);
-        while (cur < end) {
-            int32_t ch = *cur;
-            if (ch == '\r') {
-                ch = '\n';
-                if ((cur + 1 < end) && (*(cur + 1) == '\n'))
-                    cur++;
-            }
-            if (!charbuf.append(ch))
-                return nullptr;
-            cur++;
-        }
-        return AtomizeChars(cx, charbuf.begin(), charbuf.length());
-    }
-
-  private:
-    // These are private because they should only be called by the tokenizer
+  protected:
+    // This is protected because it should only be called by the tokenizer
     // while tokenizing not by, for example, BytecodeEmitter.
-    bool reportStrictModeError(unsigned errorNumber, ...);
     bool strictMode() const { return strictModeGetter && strictModeGetter->strictMode(); }
 
     void setInvalidTemplateEscape(uint32_t offset, InvalidEscapeType type) {
         MOZ_ASSERT(type != InvalidEscapeType::None);
         if (invalidTemplateEscapeType != InvalidEscapeType::None)
             return;
         invalidTemplateEscapeOffset = offset;
         invalidTemplateEscapeType = type;
     }
-    void reportInvalidEscapeError(uint32_t offset, InvalidEscapeType type) {
-        switch (type) {
-            case InvalidEscapeType::None:
-                MOZ_ASSERT_UNREACHABLE("unexpected InvalidEscapeType");
-                return;
-            case InvalidEscapeType::Hexadecimal:
-                errorAt(offset, JSMSG_MALFORMED_ESCAPE, "hexadecimal");
-                return;
-            case InvalidEscapeType::Unicode:
-                errorAt(offset, JSMSG_MALFORMED_ESCAPE, "Unicode");
-                return;
-            case InvalidEscapeType::UnicodeOverflow:
-                errorAt(offset, JSMSG_UNICODE_OVERFLOW, "escape sequence");
-                return;
-            case InvalidEscapeType::Octal:
-                errorAt(offset, JSMSG_DEPRECATED_OCTAL);
-                return;
-        }
-    }
 
-    static JSAtom* atomize(JSContext* cx, CharBuffer& cb);
-    MOZ_MUST_USE bool putIdentInTokenbuf(const char16_t* identStart);
+    uint32_t invalidTemplateEscapeOffset = 0;
+    InvalidEscapeType invalidTemplateEscapeType = InvalidEscapeType::None;
 
+  protected:
     struct Flags
     {
         bool isEOF:1;           // Hit end of file.
         bool isDirtyLine:1;     // Non-whitespace since start of line.
         bool sawOctalEscape:1;  // Saw an octal character escape.
         bool hadError:1;        // Hit a syntax error, at start or during a
                                 // token.
 
         Flags()
           : isEOF(), isDirtyLine(), sawOctalEscape(), hadError()
         {}
     };
 
-    uint32_t invalidTemplateEscapeOffset = 0;
-    InvalidEscapeType invalidTemplateEscapeType = InvalidEscapeType::None;
-
   public:
     typedef Token::Modifier Modifier;
     static constexpr Modifier None = Token::None;
     static constexpr Modifier Operand = Token::Operand;
     static constexpr Modifier TemplateTail = Token::TemplateTail;
 
     typedef Token::ModifierException ModifierException;
     static constexpr ModifierException NoException = Token::NoException;
@@ -563,16 +431,296 @@ class MOZ_STACK_CLASS TokenStream
         }
 
         MOZ_ASSERT_UNREACHABLE("this token was previously looked up with a "
                                "different modifier, potentially making "
                                "tokenization non-deterministic");
 #endif
     }
 
+#ifdef DEBUG
+    inline bool debugHasNoLookahead() const {
+        return lookahead == 0;
+    }
+#endif
+
+    bool hasDisplayURL() const {
+        return displayURL_ != nullptr;
+    }
+
+    char16_t* displayURL() {
+        return displayURL_.get();
+    }
+
+    bool hasSourceMapURL() const {
+        return sourceMapURL_ != nullptr;
+    }
+
+    char16_t* sourceMapURL() {
+        return sourceMapURL_.get();
+    }
+
+    // This class maps a userbuf offset (which is 0-indexed) to a line number
+    // (which is 1-indexed) and a column index (which is 0-indexed).
+    class SourceCoords
+    {
+        // For a given buffer holding source code, |lineStartOffsets_| has one
+        // element per line of source code, plus one sentinel element.  Each
+        // non-sentinel element holds the buffer offset for the start of the
+        // corresponding line of source code.  For this example script:
+        //
+        // 1  // xyz            [line starts at offset 0]
+        // 2  var x;            [line starts at offset 7]
+        // 3                    [line starts at offset 14]
+        // 4  var y;            [line starts at offset 15]
+        //
+        // |lineStartOffsets_| is:
+        //
+        //   [0, 7, 14, 15, MAX_PTR]
+        //
+        // To convert a "line number" to a "line index" (i.e. an index into
+        // |lineStartOffsets_|), subtract |initialLineNum_|.  E.g. line 3's
+        // line index is (3 - initialLineNum_), which is 2.  Therefore
+        // lineStartOffsets_[2] holds the buffer offset for the start of line 3,
+        // which is 14.  (Note that |initialLineNum_| is often 1, but not
+        // always.)
+        //
+        // The first element is always 0, and the last element is always the
+        // MAX_PTR sentinel.
+        //
+        // offset-to-line/column lookups are O(log n) in the worst case (binary
+        // search), but in practice they're heavily clustered and we do better
+        // than that by using the previous lookup's result (lastLineIndex_) as
+        // a starting point.
+        //
+        // Checking if an offset lies within a particular line number
+        // (isOnThisLine()) is O(1).
+        //
+        Vector<uint32_t, 128> lineStartOffsets_;
+        uint32_t            initialLineNum_;
+
+        // This is mutable because it's modified on every search, but that fact
+        // isn't visible outside this class.
+        mutable uint32_t    lastLineIndex_;
+
+        uint32_t lineIndexOf(uint32_t offset) const;
+
+        static const uint32_t MAX_PTR = UINT32_MAX;
+
+        uint32_t lineIndexToNum(uint32_t lineIndex) const { return lineIndex + initialLineNum_; }
+        uint32_t lineNumToIndex(uint32_t lineNum)   const { return lineNum   - initialLineNum_; }
+
+      public:
+        SourceCoords(JSContext* cx, uint32_t ln);
+
+        MOZ_MUST_USE bool add(uint32_t lineNum, uint32_t lineStartOffset);
+        MOZ_MUST_USE bool fill(const SourceCoords& other);
+
+        bool isOnThisLine(uint32_t offset, uint32_t lineNum, bool* onThisLine) const {
+            uint32_t lineIndex = lineNumToIndex(lineNum);
+            if (lineIndex + 1 >= lineStartOffsets_.length()) // +1 due to sentinel
+                return false;
+            *onThisLine = lineStartOffsets_[lineIndex] <= offset &&
+                          offset < lineStartOffsets_[lineIndex + 1];
+            return true;
+        }
+
+        uint32_t lineNum(uint32_t offset) const;
+        uint32_t columnIndex(uint32_t offset) const;
+        void lineNumAndColumnIndex(uint32_t offset, uint32_t* lineNum, uint32_t* columnIndex) const;
+    };
+
+    SourceCoords srcCoords;
+
+    JSAtomState& names() const {
+        return cx->names();
+    }
+
+    JSContext* context() const {
+        return cx;
+    }
+
+    const ReadOnlyCompileOptions& options() const {
+        return options_;
+    }
+
+    void updateFlagsForEOL();
+
+    const Token& nextToken() const {
+        MOZ_ASSERT(hasLookahead());
+        return tokens[(cursor + 1) & ntokensMask];
+    }
+
+    bool hasLookahead() const { return lookahead > 0; }
+
+  protected:
+    // Options used for parsing/tokenizing.
+    const ReadOnlyCompileOptions& options_;
+
+    Token               tokens[ntokens];    // circular token buffer
+    unsigned            cursor;             // index of last parsed token
+    unsigned            lookahead;          // count of lookahead tokens
+    unsigned            lineno;             // current line number
+    Flags               flags;              // flags -- see above
+    size_t              linebase;           // start of current line
+    size_t              prevLinebase;       // start of previous line;  size_t(-1) if on the first line
+    const char*         filename;           // input filename or null
+    UniqueTwoByteChars  displayURL_;        // the user's requested source URL or null
+    UniqueTwoByteChars  sourceMapURL_;      // source map's filename or null
+    uint8_t             isExprEnding[TOK_LIMIT];// which tokens definitely terminate exprs?
+    JSContext* const    cx;
+    bool                mutedErrors;
+    StrictModeGetter*   strictModeGetter;  // used to test for strict mode
+};
+
+// TokenStream is the lexical scanner for Javascript source text.
+//
+// It takes a buffer of char16_t characters and linearly scans it into |Token|s.
+// Internally the class uses a four element circular buffer |tokens| of
+// |Token|s. As an index for |tokens|, the member |cursor| points to the
+// current token.
+// Calls to getToken() increase |cursor| by one and return the new current
+// token. If a TokenStream was just created, the current token is initialized
+// with random data (i.e. not initialized). It is therefore important that
+// one of the first four member functions listed below is called first.
+// The circular buffer lets us go back up to two tokens from the last
+// scanned token. Internally, the relative number of backward steps that were
+// taken (via ungetToken()) after the last token was scanned is stored in
+// |lookahead|.
+//
+// The following table lists in which situations it is safe to call each listed
+// function. No checks are made by the functions in non-debug builds.
+//
+// Function Name     | Precondition; changes to |lookahead|
+// ------------------+---------------------------------------------------------
+// getToken          | none; if |lookahead > 0| then |lookahead--|
+// peekToken         | none; if |lookahead == 0| then |lookahead == 1|
+// peekTokenSameLine | none; if |lookahead == 0| then |lookahead == 1|
+// matchToken        | none; if |lookahead > 0| and the match succeeds then
+//                   |       |lookahead--|
+// consumeKnownToken | none; if |lookahead > 0| then |lookahead--|
+// ungetToken        | 0 <= |lookahead| <= |maxLookahead - 1|; |lookahead++|
+//
+// The behavior of the token scanning process (see getTokenInternal()) can be
+// modified by calling one of the first four above listed member functions with
+// an optional argument of type Modifier.  However, the modifier will be
+// ignored unless |lookahead == 0| holds.  Due to constraints of the grammar,
+// this turns out not to be a problem in practice. See the
+// mozilla.dev.tech.js-engine.internals thread entitled 'Bug in the scanner?'
+// for more details:
+// https://groups.google.com/forum/?fromgroups=#!topic/mozilla.dev.tech.js-engine.internals/2JLH5jRcr7E).
+//
+// The methods seek() and tell() allow to rescan from a previous visited
+// location of the buffer.
+//
+class MOZ_STACK_CLASS TokenStream final : public TokenStreamBase
+{
+  public:
+    using CharBuffer = Vector<char16_t, 32>;
+
+    TokenStream(JSContext* cx, const ReadOnlyCompileOptions& options,
+                const char16_t* base, size_t length, StrictModeGetter* smg);
+
+    MOZ_MUST_USE bool checkOptions();
+
+    const CharBuffer& getTokenbuf() const { return tokenbuf; }
+
+    // If there is an invalid escape in a template, report it and return false,
+    // otherwise return true.
+    bool checkForInvalidTemplateEscapeError() {
+        if (invalidTemplateEscapeType == InvalidEscapeType::None)
+            return true;
+
+        reportInvalidEscapeError(invalidTemplateEscapeOffset, invalidTemplateEscapeType);
+        return false;
+    }
+
+    // TokenStream-specific error reporters.
+    bool reportError(unsigned errorNumber, ...);
+    bool reportErrorNoOffset(unsigned errorNumber, ...);
+
+    // Report the given error at the current offset.
+    void error(unsigned errorNumber, ...);
+
+    // Report the given error at the given offset.
+    void errorAt(uint32_t offset, unsigned errorNumber, ...);
+
+    // Warn at the current offset.
+    MOZ_MUST_USE bool warning(unsigned errorNumber, ...);
+
+    // General-purpose error reporters.  You should avoid calling these
+    // directly, and instead use the more succinct alternatives (error(),
+    // warning(), &c.) in TokenStream, Parser, and BytecodeEmitter.
+    bool reportCompileErrorNumberVA(UniquePtr<JSErrorNotes> notes, uint32_t offset, unsigned flags,
+                                    unsigned errorNumber, va_list args);
+    bool reportStrictModeErrorNumberVA(UniquePtr<JSErrorNotes> notes, uint32_t offset,
+                                       bool strictMode, unsigned errorNumber, va_list args);
+    bool reportExtraWarningErrorNumberVA(UniquePtr<JSErrorNotes> notes, uint32_t offset,
+                                         unsigned errorNumber, va_list args);
+
+    // asm.js reporter
+    void reportAsmJSError(uint32_t offset, unsigned errorNumber, ...);
+
+    JSAtom* getRawTemplateStringAtom() {
+        MOZ_ASSERT(currentToken().type == TOK_TEMPLATE_HEAD ||
+                   currentToken().type == TOK_NO_SUBS_TEMPLATE);
+        const char16_t* cur = userbuf.rawCharPtrAt(currentToken().pos.begin + 1);
+        const char16_t* end;
+        if (currentToken().type == TOK_TEMPLATE_HEAD) {
+            // Of the form    |`...${|   or   |}...${|
+            end = userbuf.rawCharPtrAt(currentToken().pos.end - 2);
+        } else {
+            // NO_SUBS_TEMPLATE is of the form   |`...`|   or   |}...`|
+            end = userbuf.rawCharPtrAt(currentToken().pos.end - 1);
+        }
+
+        CharBuffer charbuf(cx);
+        while (cur < end) {
+            int32_t ch = *cur;
+            if (ch == '\r') {
+                ch = '\n';
+                if ((cur + 1 < end) && (*(cur + 1) == '\n'))
+                    cur++;
+            }
+            if (!charbuf.append(ch))
+                return nullptr;
+            cur++;
+        }
+        return AtomizeChars(cx, charbuf.begin(), charbuf.length());
+    }
+
+  private:
+    // This is private because it should only be called by the tokenizer while
+    // tokenizing not by, for example, BytecodeEmitter.
+    bool reportStrictModeError(unsigned errorNumber, ...);
+
+    void reportInvalidEscapeError(uint32_t offset, InvalidEscapeType type) {
+        switch (type) {
+            case InvalidEscapeType::None:
+                MOZ_ASSERT_UNREACHABLE("unexpected InvalidEscapeType");
+                return;
+            case InvalidEscapeType::Hexadecimal:
+                errorAt(offset, JSMSG_MALFORMED_ESCAPE, "hexadecimal");
+                return;
+            case InvalidEscapeType::Unicode:
+                errorAt(offset, JSMSG_MALFORMED_ESCAPE, "Unicode");
+                return;
+            case InvalidEscapeType::UnicodeOverflow:
+                errorAt(offset, JSMSG_UNICODE_OVERFLOW, "escape sequence");
+                return;
+            case InvalidEscapeType::Octal:
+                errorAt(offset, JSMSG_DEPRECATED_OCTAL);
+                return;
+        }
+    }
+
+    static JSAtom* atomize(JSContext* cx, CharBuffer& cb);
+    MOZ_MUST_USE bool putIdentInTokenbuf(const char16_t* identStart);
+
+  public:
     // Advance to the next token.  If the token stream encountered an error,
     // return false.  Otherwise return true and store the token kind in |*ttp|.
     MOZ_MUST_USE bool getToken(TokenKind* ttp, Modifier modifier = None) {
         // Check for a pushed-back token resulting from mismatching lookahead.
         if (lookahead != 0) {
             MOZ_ASSERT(!flags.hadError);
             lookahead--;
             cursor = (cursor + 1) & ntokensMask;
@@ -726,130 +874,25 @@ class MOZ_STACK_CLASS TokenStream
         unsigned lookahead;
         Token lookaheadTokens[maxLookahead];
     };
 
     MOZ_MUST_USE bool advance(size_t position);
     void tell(Position*);
     void seek(const Position& pos);
     MOZ_MUST_USE bool seek(const Position& pos, const TokenStream& other);
-#ifdef DEBUG
-    inline bool debugHasNoLookahead() const {
-        return lookahead == 0;
-    }
-#endif
 
     const char16_t* rawCharPtrAt(size_t offset) const {
         return userbuf.rawCharPtrAt(offset);
     }
 
     const char16_t* rawLimit() const {
         return userbuf.limit();
     }
 
-    bool hasDisplayURL() const {
-        return displayURL_ != nullptr;
-    }
-
-    char16_t* displayURL() {
-        return displayURL_.get();
-    }
-
-    bool hasSourceMapURL() const {
-        return sourceMapURL_ != nullptr;
-    }
-
-    char16_t* sourceMapURL() {
-        return sourceMapURL_.get();
-    }
-
-    // This class maps a userbuf offset (which is 0-indexed) to a line number
-    // (which is 1-indexed) and a column index (which is 0-indexed).
-    class SourceCoords
-    {
-        // For a given buffer holding source code, |lineStartOffsets_| has one
-        // element per line of source code, plus one sentinel element.  Each
-        // non-sentinel element holds the buffer offset for the start of the
-        // corresponding line of source code.  For this example script:
-        //
-        // 1  // xyz            [line starts at offset 0]
-        // 2  var x;            [line starts at offset 7]
-        // 3                    [line starts at offset 14]
-        // 4  var y;            [line starts at offset 15]
-        //
-        // |lineStartOffsets_| is:
-        //
-        //   [0, 7, 14, 15, MAX_PTR]
-        //
-        // To convert a "line number" to a "line index" (i.e. an index into
-        // |lineStartOffsets_|), subtract |initialLineNum_|.  E.g. line 3's
-        // line index is (3 - initialLineNum_), which is 2.  Therefore
-        // lineStartOffsets_[2] holds the buffer offset for the start of line 3,
-        // which is 14.  (Note that |initialLineNum_| is often 1, but not
-        // always.)
-        //
-        // The first element is always 0, and the last element is always the
-        // MAX_PTR sentinel.
-        //
-        // offset-to-line/column lookups are O(log n) in the worst case (binary
-        // search), but in practice they're heavily clustered and we do better
-        // than that by using the previous lookup's result (lastLineIndex_) as
-        // a starting point.
-        //
-        // Checking if an offset lies within a particular line number
-        // (isOnThisLine()) is O(1).
-        //
-        Vector<uint32_t, 128> lineStartOffsets_;
-        uint32_t            initialLineNum_;
-
-        // This is mutable because it's modified on every search, but that fact
-        // isn't visible outside this class.
-        mutable uint32_t    lastLineIndex_;
-
-        uint32_t lineIndexOf(uint32_t offset) const;
-
-        static const uint32_t MAX_PTR = UINT32_MAX;
-
-        uint32_t lineIndexToNum(uint32_t lineIndex) const { return lineIndex + initialLineNum_; }
-        uint32_t lineNumToIndex(uint32_t lineNum)   const { return lineNum   - initialLineNum_; }
-
-      public:
-        SourceCoords(JSContext* cx, uint32_t ln);
-
-        MOZ_MUST_USE bool add(uint32_t lineNum, uint32_t lineStartOffset);
-        MOZ_MUST_USE bool fill(const SourceCoords& other);
-
-        bool isOnThisLine(uint32_t offset, uint32_t lineNum, bool* onThisLine) const {
-            uint32_t lineIndex = lineNumToIndex(lineNum);
-            if (lineIndex + 1 >= lineStartOffsets_.length()) // +1 due to sentinel
-                return false;
-            *onThisLine = lineStartOffsets_[lineIndex] <= offset &&
-                          offset < lineStartOffsets_[lineIndex + 1];
-            return true;
-        }
-
-        uint32_t lineNum(uint32_t offset) const;
-        uint32_t columnIndex(uint32_t offset) const;
-        void lineNumAndColumnIndex(uint32_t offset, uint32_t* lineNum, uint32_t* columnIndex) const;
-    };
-
-    SourceCoords srcCoords;
-
-    JSAtomState& names() const {
-        return cx->names();
-    }
-
-    JSContext* context() const {
-        return cx;
-    }
-
-    const ReadOnlyCompileOptions& options() const {
-        return options_;
-    }
-
   private:
     // This is the low-level interface to the JS source code buffer.  It just
     // gets raw chars, basically.  TokenStreams functions are layered on top
     // and do some extra stuff like converting all EOL sequences to '\n',
     // tracking the line number, and setting |flags.isEOF|.  (The "raw" in "raw
     // chars" refers to the lack of EOL sequence normalization.)
     //
     // buf[0..length-1] often represents a substring of some larger source,
@@ -1014,44 +1057,19 @@ class MOZ_STACK_CLASS TokenStream
     void skipCharsIgnoreEOL(uint8_t n) {
         while (n-- > 0) {
             MOZ_ASSERT(userbuf.hasRawChars());
             getCharIgnoreEOL();
         }
     }
 
     MOZ_MUST_USE MOZ_ALWAYS_INLINE bool updateLineInfoForEOL();
-    void updateFlagsForEOL();
 
-    const Token& nextToken() const {
-        MOZ_ASSERT(hasLookahead());
-        return tokens[(cursor + 1) & ntokensMask];
-    }
-
-    bool hasLookahead() const { return lookahead > 0; }
-
-    // Options used for parsing/tokenizing.
-    const ReadOnlyCompileOptions& options_;
-
-    Token               tokens[ntokens];    // circular token buffer
-    unsigned            cursor;             // index of last parsed token
-    unsigned            lookahead;          // count of lookahead tokens
-    unsigned            lineno;             // current line number
-    Flags               flags;              // flags -- see above
-    size_t              linebase;           // start of current line
-    size_t              prevLinebase;       // start of previous line;  size_t(-1) if on the first line
     TokenBuf            userbuf;            // user input buffer
-    const char*         filename;           // input filename or null
-    UniqueTwoByteChars  displayURL_;        // the user's requested source URL or null
-    UniqueTwoByteChars  sourceMapURL_;      // source map's filename or null
     CharBuffer          tokenbuf;           // current token string buffer
-    uint8_t             isExprEnding[TOK_LIMIT];// which tokens definitely terminate exprs?
-    JSContext* const    cx;
-    bool                mutedErrors;
-    StrictModeGetter*   strictModeGetter;  // used to test for strict mode
 };
 
 extern const char*
 TokenKindToDesc(TokenKind tt);
 
 } // namespace frontend
 } // namespace js