Bug 1456296 - Move IdentifierName parsing into a separate function from TSS::getTokenInternal to simplify some control flow. r=arai
authorJeff Walden <jwalden@mit.edu>
Mon, 23 Apr 2018 13:37:39 -0700
changeset 415692 3010f9088dc76ba9d1918ab6fd5a5aaf24436a42
parent 415691 595101eb4739f648080e23c5561ff27eb812a4cd
child 415693 822e81707327954a2c4e7d8060c5d91ae528bbf3
push id33901
push userapavel@mozilla.com
push dateThu, 26 Apr 2018 06:05:37 +0000
treeherdermozilla-central@b62ad926cf2a [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersarai
bugs1456296
milestone61.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1456296 - Move IdentifierName parsing into a separate function from TSS::getTokenInternal to simplify some control flow. r=arai * * * [mq]: more
js/src/frontend/TokenStream.cpp
js/src/frontend/TokenStream.h
--- a/js/src/frontend/TokenStream.cpp
+++ b/js/src/frontend/TokenStream.cpp
@@ -1327,16 +1327,77 @@ TokenStreamSpecific<CharT, AnyCharsAcces
 
         if (!appendCodePointToTokenbuf(codePoint))
             return false;
     }
 
     return true;
 }
 
+template<typename CharT, class AnyCharsAccess>
+MOZ_MUST_USE bool
+TokenStreamSpecific<CharT, AnyCharsAccess>::identifierName(Token* token, const CharT* identStart,
+                                                           IdentifierEscapes escaping)
+{
+    int c;
+    while (true) {
+        c = getCharIgnoreEOL();
+        if (c == EOF)
+            break;
+
+        uint32_t codePoint;
+        if (!matchMultiUnitCodePoint(c, &codePoint))
+            return false;
+        if (codePoint) {
+            if (!unicode::IsIdentifierPart(codePoint))
+                break;
+
+            continue;
+        }
+
+        if (!unicode::IsIdentifierPart(char16_t(c))) {
+            uint32_t qc;
+            if (c != '\\' || !matchUnicodeEscapeIdent(&qc))
+                break;
+            escaping = IdentifierEscapes::SawUnicodeEscape;
+        }
+    }
+    ungetCharIgnoreEOL(c);
+
+    const CharT* chars;
+    size_t length;
+    if (escaping == IdentifierEscapes::SawUnicodeEscape) {
+        // Identifiers containing Unicode escapes have to be converted into
+        // tokenbuf before atomizing.
+        if (!putIdentInTokenbuf(identStart))
+            return false;
+
+        chars = tokenbuf.begin();
+        length = tokenbuf.length();
+    } else {
+        // Escape-free identifiers can be created directly from userbuf.
+        chars = identStart;
+        length = userbuf.addressOfNextRawChar() - identStart;
+
+        // Represent reserved words lacking escapes as reserved word tokens.
+        if (const ReservedWordInfo* rw = FindReservedWord(chars, length)) {
+            token->type = rw->tokentype;
+            return true;
+        }
+    }
+
+    JSAtom* atom = atomizeChars(anyCharsAccess().cx, chars, length);
+    if (!atom)
+        return false;
+
+    token->type = TokenKind::Name;
+    token->setName(atom->asPropertyName());
+    return true;
+}
+
 enum FirstCharKind {
     // A char16_t has the 'OneChar' kind if it, by itself, constitutes a valid
     // token that cannot also be a prefix of a longer token.  E.g. ';' has the
     // OneChar kind, but '+' does not, because '++' and '+=' are valid longer tokens
     // that begin with '+'.
     //
     // The few token kinds satisfying these properties cover roughly 35--45%
     // of the tokens seen in practice.
@@ -1415,24 +1476,21 @@ static const uint8_t firstCharKinds[] = 
 static_assert(LastCharKind < (1 << (sizeof(firstCharKinds[0]) * 8)),
               "Elements of firstCharKinds[] are too small");
 
 template<typename CharT, class AnyCharsAccess>
 MOZ_MUST_USE bool
 TokenStreamSpecific<CharT, AnyCharsAccess>::getTokenInternal(TokenKind* ttp, Modifier modifier)
 {
     int c;
-    uint32_t qc;
     Token* tp;
     FirstCharKind c1kind;
     const CharT* numStart;
     bool hasExp;
     DecimalPoint decimalPoint;
-    const CharT* identStart;
-    bool hadUnicodeEscape;
 
     // Check if in the middle of a template string. Have to get this out of
     // the way first.
     if (MOZ_UNLIKELY(modifier == TemplateTail)) {
         if (!getStringOrTemplateToken('`', &tp))
             goto error;
         goto out;
     }
@@ -1462,35 +1520,39 @@ TokenStreamSpecific<CharT, AnyCharsAcces
             goto retry;
         }
 
         tp = newToken(-1);
 
         // If the first codepoint is really the start of an identifier, the
         // identifier starts at the previous raw char.  If it isn't, it's a bad
         // char and this assignment won't be examined anyway.
-        identStart = userbuf.addressOfNextRawChar() - 1;
+        const CharT* identStart = userbuf.addressOfNextRawChar() - 1;
 
         static_assert('$' < 128,
                       "IdentifierStart contains '$', but as !IsUnicodeIDStart('$'), "
                       "ensure that '$' is never handled here");
         static_assert('_' < 128,
                       "IdentifierStart contains '_', but as !IsUnicodeIDStart('_'), "
                       "ensure that '_' is never handled here");
         if (unicode::IsUnicodeIDStart(char16_t(c))) {
-            hadUnicodeEscape = false;
-            goto identifier;
+            if (!identifierName(tp, identStart, IdentifierEscapes::None))
+                goto error;
+
+            goto out;
         }
 
         uint32_t codePoint = c;
         if (!matchMultiUnitCodePoint(c, &codePoint))
             goto error;
         if (codePoint && unicode::IsUnicodeIDStart(codePoint)) {
-            hadUnicodeEscape = false;
-            goto identifier;
+            if (!identifierName(tp, identStart, IdentifierEscapes::None))
+                goto error;
+
+            goto out;
         }
 
         ungetCodePointIgnoreEOL(codePoint);
         error(JSMSG_ILLEGAL_CHARACTER);
         goto error;
     }
 
     // Get the token kind, based on the first char.  The ordering of c1kind
@@ -1527,72 +1589,20 @@ TokenStreamSpecific<CharT, AnyCharsAcces
     //
     if (c1kind == Space)
         goto retry;
 
     // Look for an identifier.
     //
     if (c1kind == Ident) {
         tp = newToken(-1);
-        identStart = userbuf.addressOfNextRawChar() - 1;
-        hadUnicodeEscape = false;
-
-      identifier:
-        for (;;) {
-            c = getCharIgnoreEOL();
-            if (c == EOF)
-                break;
-
-            uint32_t codePoint;
-            if (!matchMultiUnitCodePoint(c, &codePoint))
-                goto error;
-            if (codePoint) {
-                if (!unicode::IsIdentifierPart(codePoint))
-                    break;
-
-                continue;
-            }
-
-            if (!unicode::IsIdentifierPart(char16_t(c))) {
-                if (c != '\\' || !matchUnicodeEscapeIdent(&qc))
-                    break;
-                hadUnicodeEscape = true;
-            }
-        }
-        ungetCharIgnoreEOL(c);
 
-        // Identifiers containing no Unicode escapes can be processed directly
-        // from userbuf.  The rest must use the escapes converted via tokenbuf
-        // before atomizing.
-        const CharT* chars;
-        size_t length;
-        if (hadUnicodeEscape) {
-            if (!putIdentInTokenbuf(identStart))
-                goto error;
+        if (!identifierName(tp, userbuf.addressOfNextRawChar() - 1, IdentifierEscapes::None))
+            goto error;
 
-            chars = tokenbuf.begin();
-            length = tokenbuf.length();
-        } else {
-            chars = identStart;
-            length = userbuf.addressOfNextRawChar() - identStart;
-        }
-
-        // Represent reserved words as reserved word tokens.
-        if (!hadUnicodeEscape) {
-            if (const ReservedWordInfo* rw = FindReservedWord(chars, length)) {
-                tp->type = rw->tokentype;
-                goto out;
-            }
-        }
-
-        JSAtom* atom = atomizeChars(anyCharsAccess().cx, chars, length);
-        if (!atom)
-            goto error;
-        tp->type = TokenKind::Name;
-        tp->setName(atom->asPropertyName());
         goto out;
     }
 
     // Look for a decimal number.
     //
     if (c1kind == Dec) {
         tp = newToken(-1);
         numStart = userbuf.addressOfNextRawChar() - 1;
@@ -1814,21 +1824,25 @@ TokenStreamSpecific<CharT, AnyCharsAcces
       case '+':
         if (matchChar('+'))
             tp->type = TokenKind::Inc;
         else
             tp->type = matchChar('=') ? TokenKind::AddAssign : TokenKind::Add;
         goto out;
 
       case '\\': {
-        uint32_t escapeLength = matchUnicodeEscapeIdStart(&qc);
-        if (escapeLength > 0) {
-            identStart = userbuf.addressOfNextRawChar() - escapeLength - 1;
-            hadUnicodeEscape = true;
-            goto identifier;
+        uint32_t qc;
+        if (uint32_t escapeLength = matchUnicodeEscapeIdStart(&qc)) {
+            if (!identifierName(tp, userbuf.addressOfNextRawChar() - escapeLength - 1,
+                                IdentifierEscapes::SawUnicodeEscape))
+            {
+                goto error;
+            }
+
+            goto out;
         }
 
         // We could point "into" a mistyped escape, e.g. for "\u{41H}" we could
         // point at the 'H'.  But we don't do that now, so the character after
         // the '\' isn't necessarily bad, so just point at the start of
         // the actually-invalid escape.
         ungetCharIgnoreEOL('\\');
         error(JSMSG_BAD_ESCAPE);
--- a/js/src/frontend/TokenStream.h
+++ b/js/src/frontend/TokenStream.h
@@ -244,16 +244,19 @@ enum class InvalidEscapeType {
     Unicode,
     // An otherwise well-formed \u escape which represents a
     // codepoint > 10FFFF.
     UnicodeOverflow,
     // An octal escape in a template token.
     Octal
 };
 
+// The only escapes found in IdentifierName are of the Unicode flavor.
+enum class IdentifierEscapes { None, SawUnicodeEscape };
+
 class TokenStreamShared;
 
 struct Token
 {
   private:
     // Sometimes the parser needs to inform the tokenizer to interpret
     // subsequent text in a particular manner: for example, to tokenize a
     // keyword as an identifier, not as the actual keyword, on the right-hand
@@ -1440,16 +1443,19 @@ class MOZ_STACK_CLASS TokenStreamSpecifi
     const CharT* rawCharPtrAt(size_t offset) const {
         return userbuf.rawCharPtrAt(offset);
     }
 
     const CharT* rawLimit() const {
         return userbuf.limit();
     }
 
+    MOZ_MUST_USE bool identifierName(Token* token, const CharT* identStart,
+                                     IdentifierEscapes escaping);
+
     MOZ_MUST_USE bool getTokenInternal(TokenKind* ttp, Modifier modifier);
 
     MOZ_MUST_USE bool getStringOrTemplateToken(char untilChar, Token** tp);
 
     // Try to get the next character, normalizing '\r', '\r\n', and '\n' into
     // '\n'.  Also updates internal line-counter state.  Return true on success
     // and store the character in |*c|.  Return false and leave |*c| undefined
     // on failure.