Bug 645598 - Trim last bits of fat from the scanner, part 2 of 3. r=jwalden.
authorNicholas Nethercote <nnethercote@mozilla.com>
Tue, 07 Jun 2011 13:48:21 +1000
changeset 70979 3bae9deeb9b4a00a1495de94f431e7b9d60c6dc3
parent 70978 0fd30fdb7a6bc67ea0a216d82b0b9551e773d78a
child 70980 b71f5419e37a43fb3b04e156fc830e5dc5665dc3
push id1
push userroot
push dateMon, 20 Oct 2014 17:29:22 +0000
reviewersjwalden
bugs645598
milestone7.0a1
Bug 645598 - Trim last bits of fat from the scanner, part 2 of 3. r=jwalden.
js/src/jsscan.cpp
--- a/js/src/jsscan.cpp
+++ b/js/src/jsscan.cpp
@@ -1366,17 +1366,22 @@ TokenStream::getTokenInternal()
             hadUnicodeEscape = false;
             goto identifier;
         }
 
         goto badchar;
     }
 
     /*
-     * Get the token class, based on the first char.
+     * Get the token kind, based on the first char.  The ordering of c1kind
+     * comparison is based on the frequency of tokens in real code.  Minified
+     * and non-minified code have different characteristics, mostly in that
+     * whitespace occurs much less in minified code.  Token kinds that fall in
+     * the 'Other' category typically account for less than 2% of all tokens,
+     * so their order doesn't matter much.
      */
     c1kind = FirstCharKind(firstCharKinds[c]);
 
     /*
      * Skip over whitespace chars;  update line state on EOLs.  Even though
      * whitespace isn't very common in minified code we have to handle it first
      * (and jump back to 'retry') before calling newToken().
      */
@@ -1504,16 +1509,115 @@ TokenStream::getTokenInternal()
         } else {
             tp->t_op = JSOP_NOP;
             tt = TOK_ASSIGN;
         }
         goto out;
     }
 
     /*
+     * Look for a string.
+     */
+    if (c1kind == String) {
+        qc = c;
+        tokenbuf.clear();
+        while (true) {
+            /*
+             * We need to detect any of these chars:  " or ', \n (or its
+             * equivalents), \\, EOF.  We use maybeStrSpecial[] in a manner
+             * similar to maybeEOL[], see above.  Because we detect EOL
+             * sequences here and put them back immediately, we can use
+             * getCharIgnoreEOL().
+             */
+            c = getCharIgnoreEOL();
+            if (maybeStrSpecial[c & 0xff]) {
+                if (c == qc)
+                    break;
+                if (c == '\\') {
+                    switch (c = getChar()) {
+                      case 'b': c = '\b'; break;
+                      case 'f': c = '\f'; break;
+                      case 'n': c = '\n'; break;
+                      case 'r': c = '\r'; break;
+                      case 't': c = '\t'; break;
+                      case 'v': c = '\v'; break;
+
+                      default:
+                        if ('0' <= c && c < '8') {
+                            int32 val = JS7_UNDEC(c);
+
+                            c = peekChar();
+                            /* Strict mode code allows only \0, then a non-digit. */
+                            if (val != 0 || JS7_ISDEC(c)) {
+                                if (!ReportStrictModeError(cx, this, NULL, NULL,
+                                                           JSMSG_DEPRECATED_OCTAL)) {
+                                    goto error;
+                                }
+                                setOctalCharacterEscape();
+                            }
+                            if ('0' <= c && c < '8') {
+                                val = 8 * val + JS7_UNDEC(c);
+                                getChar();
+                                c = peekChar();
+                                if ('0' <= c && c < '8') {
+                                    int32 save = val;
+                                    val = 8 * val + JS7_UNDEC(c);
+                                    if (val <= 0377)
+                                        getChar();
+                                    else
+                                        val = save;
+                                }
+                            }
+
+                            c = (jschar)val;
+                        } else if (c == 'u') {
+                            jschar cp[4];
+                            if (peekChars(4, cp) &&
+                                JS7_ISHEX(cp[0]) && JS7_ISHEX(cp[1]) &&
+                                JS7_ISHEX(cp[2]) && JS7_ISHEX(cp[3])) {
+                                c = (((((JS7_UNHEX(cp[0]) << 4)
+                                        + JS7_UNHEX(cp[1])) << 4)
+                                      + JS7_UNHEX(cp[2])) << 4)
+                                    + JS7_UNHEX(cp[3]);
+                                skipChars(4);
+                            }
+                        } else if (c == 'x') {
+                            jschar cp[2];
+                            if (peekChars(2, cp) &&
+                                JS7_ISHEX(cp[0]) && JS7_ISHEX(cp[1])) {
+                                c = (JS7_UNHEX(cp[0]) << 4) + JS7_UNHEX(cp[1]);
+                                skipChars(2);
+                            }
+                        } else if (c == '\n') {
+                            /* ECMA follows C by removing escaped newlines. */
+                            continue;
+                        }
+                        break;
+                    }
+                } else if (TokenBuf::isRawEOLChar(c) || c == EOF) {
+                    ungetCharIgnoreEOL(c);
+                    ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR,
+                                             JSMSG_UNTERMINATED_STRING);
+                    goto error;
+                }
+            }
+            if (!tokenbuf.append(c))
+                goto error;
+        }
+        JSAtom *atom = atomize(cx, tokenbuf);
+        if (!atom)
+            goto error;
+        tp->pos.end.lineno = lineno;
+        tp->t_op = JSOP_STRING;
+        tp->t_atom = atom;
+        tt = TOK_STRING;
+        goto out;
+    }
+
+    /*
      * Look for a decimal number.
      */
     if (c1kind == Dec) {
         numStart = userbuf.addressOfNextRawChar() - 1;
 
       decimal:
         hasFracOrExp = false;
         while (JS7_ISDEC(c))
@@ -1649,115 +1753,16 @@ TokenStream::getTokenInternal()
         if (!GetPrefixInteger(cx, numStart, userbuf.addressOfNextRawChar(), radix, &dummy, &dval))
             goto error;
         tp->t_dval = dval;
         tt = TOK_NUMBER;
         goto out;
     }
 
     /*
-     * Look for a string.
-     */
-    if (c1kind == String) {
-        qc = c;
-        tokenbuf.clear();
-        while (true) {
-            /*
-             * We need to detect any of these chars:  " or ', \n (or its
-             * equivalents), \\, EOF.  We use maybeStrSpecial[] in a manner
-             * similar to maybeEOL[], see above.  Because we detect EOL
-             * sequences here and put them back immediately, we can use
-             * getCharIgnoreEOL().
-             */
-            c = getCharIgnoreEOL();
-            if (maybeStrSpecial[c & 0xff]) {
-                if (c == qc)
-                    break;
-                if (c == '\\') {
-                    switch (c = getChar()) {
-                      case 'b': c = '\b'; break;
-                      case 'f': c = '\f'; break;
-                      case 'n': c = '\n'; break;
-                      case 'r': c = '\r'; break;
-                      case 't': c = '\t'; break;
-                      case 'v': c = '\v'; break;
-
-                      default:
-                        if ('0' <= c && c < '8') {
-                            int32 val = JS7_UNDEC(c);
-
-                            c = peekChar();
-                            /* Strict mode code allows only \0, then a non-digit. */
-                            if (val != 0 || JS7_ISDEC(c)) {
-                                if (!ReportStrictModeError(cx, this, NULL, NULL,
-                                                           JSMSG_DEPRECATED_OCTAL)) {
-                                    goto error;
-                                }
-                                setOctalCharacterEscape();
-                            }
-                            if ('0' <= c && c < '8') {
-                                val = 8 * val + JS7_UNDEC(c);
-                                getChar();
-                                c = peekChar();
-                                if ('0' <= c && c < '8') {
-                                    int32 save = val;
-                                    val = 8 * val + JS7_UNDEC(c);
-                                    if (val <= 0377)
-                                        getChar();
-                                    else
-                                        val = save;
-                                }
-                            }
-
-                            c = (jschar)val;
-                        } else if (c == 'u') {
-                            jschar cp[4];
-                            if (peekChars(4, cp) &&
-                                JS7_ISHEX(cp[0]) && JS7_ISHEX(cp[1]) &&
-                                JS7_ISHEX(cp[2]) && JS7_ISHEX(cp[3])) {
-                                c = (((((JS7_UNHEX(cp[0]) << 4)
-                                        + JS7_UNHEX(cp[1])) << 4)
-                                      + JS7_UNHEX(cp[2])) << 4)
-                                    + JS7_UNHEX(cp[3]);
-                                skipChars(4);
-                            }
-                        } else if (c == 'x') {
-                            jschar cp[2];
-                            if (peekChars(2, cp) &&
-                                JS7_ISHEX(cp[0]) && JS7_ISHEX(cp[1])) {
-                                c = (JS7_UNHEX(cp[0]) << 4) + JS7_UNHEX(cp[1]);
-                                skipChars(2);
-                            }
-                        } else if (c == '\n') {
-                            /* ECMA follows C by removing escaped newlines. */
-                            continue;
-                        }
-                        break;
-                    }
-                } else if (TokenBuf::isRawEOLChar(c) || c == EOF) {
-                    ungetCharIgnoreEOL(c);
-                    ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR,
-                                             JSMSG_UNTERMINATED_STRING);
-                    goto error;
-                }
-            }
-            if (!tokenbuf.append(c))
-                goto error;
-        }
-        JSAtom *atom = atomize(cx, tokenbuf);
-        if (!atom)
-            goto error;
-        tp->pos.end.lineno = lineno;
-        tp->t_op = JSOP_STRING;
-        tp->t_atom = atom;
-        tt = TOK_STRING;
-        goto out;
-    }
-
-    /*
      * This handles everything else.
      */
     JS_ASSERT(c1kind == Other);
     switch (c) {
       case '\\':
         hadUnicodeEscape = matchUnicodeEscapeIdStart(&qc);
         if (hadUnicodeEscape) {
             c = qc;