Bug 645598 - Trim last bits of fat from the scanner, part 2 of 3. r=jwalden.
authorNicholas Nethercote <nnethercote@mozilla.com>
Tue, 07 Jun 2011 13:48:21 +1000
changeset 70979 3bae9deeb9b4a00a1495de94f431e7b9d60c6dc3
parent 70978 0fd30fdb7a6bc67ea0a216d82b0b9551e773d78a
child 70980 b71f5419e37a43fb3b04e156fc830e5dc5665dc3
push id20463
push usercleary@mozilla.com
push dateMon, 13 Jun 2011 17:53:39 +0000
treeherdermozilla-central@872ae1cef345 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersjwalden
bugs645598
milestone7.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 645598 - Trim last bits of fat from the scanner, part 2 of 3. r=jwalden.
js/src/jsscan.cpp
--- a/js/src/jsscan.cpp
+++ b/js/src/jsscan.cpp
@@ -1366,17 +1366,22 @@ TokenStream::getTokenInternal()
             hadUnicodeEscape = false;
             goto identifier;
         }
 
         goto badchar;
     }
 
     /*
-     * Get the token class, based on the first char.
+     * Get the token kind, based on the first char.  The ordering of c1kind
+     * comparison is based on the frequency of tokens in real code.  Minified
+     * and non-minified code have different characteristics, mostly in that
+     * whitespace occurs much less in minified code.  Token kinds that fall in
+     * the 'Other' category typically account for less than 2% of all tokens,
+     * so their order doesn't matter much.
      */
     c1kind = FirstCharKind(firstCharKinds[c]);
 
     /*
      * Skip over whitespace chars;  update line state on EOLs.  Even though
      * whitespace isn't very common in minified code we have to handle it first
      * (and jump back to 'retry') before calling newToken().
      */
@@ -1504,16 +1509,115 @@ TokenStream::getTokenInternal()
         } else {
             tp->t_op = JSOP_NOP;
             tt = TOK_ASSIGN;
         }
         goto out;
     }
 
     /*
+     * Look for a string.
+     */
+    if (c1kind == String) {
+        qc = c;
+        tokenbuf.clear();
+        while (true) {
+            /*
+             * We need to detect any of these chars:  " or ', \n (or its
+             * equivalents), \\, EOF.  We use maybeStrSpecial[] in a manner
+             * similar to maybeEOL[], see above.  Because we detect EOL
+             * sequences here and put them back immediately, we can use
+             * getCharIgnoreEOL().
+             */
+            c = getCharIgnoreEOL();
+            if (maybeStrSpecial[c & 0xff]) {
+                if (c == qc)
+                    break;
+                if (c == '\\') {
+                    switch (c = getChar()) {
+                      case 'b': c = '\b'; break;
+                      case 'f': c = '\f'; break;
+                      case 'n': c = '\n'; break;
+                      case 'r': c = '\r'; break;
+                      case 't': c = '\t'; break;
+                      case 'v': c = '\v'; break;
+
+                      default:
+                        if ('0' <= c && c < '8') {
+                            int32 val = JS7_UNDEC(c);
+
+                            c = peekChar();
+                            /* Strict mode code allows only \0, then a non-digit. */
+                            if (val != 0 || JS7_ISDEC(c)) {
+                                if (!ReportStrictModeError(cx, this, NULL, NULL,
+                                                           JSMSG_DEPRECATED_OCTAL)) {
+                                    goto error;
+                                }
+                                setOctalCharacterEscape();
+                            }
+                            if ('0' <= c && c < '8') {
+                                val = 8 * val + JS7_UNDEC(c);
+                                getChar();
+                                c = peekChar();
+                                if ('0' <= c && c < '8') {
+                                    int32 save = val;
+                                    val = 8 * val + JS7_UNDEC(c);
+                                    if (val <= 0377)
+                                        getChar();
+                                    else
+                                        val = save;
+                                }
+                            }
+
+                            c = (jschar)val;
+                        } else if (c == 'u') {
+                            jschar cp[4];
+                            if (peekChars(4, cp) &&
+                                JS7_ISHEX(cp[0]) && JS7_ISHEX(cp[1]) &&
+                                JS7_ISHEX(cp[2]) && JS7_ISHEX(cp[3])) {
+                                c = (((((JS7_UNHEX(cp[0]) << 4)
+                                        + JS7_UNHEX(cp[1])) << 4)
+                                      + JS7_UNHEX(cp[2])) << 4)
+                                    + JS7_UNHEX(cp[3]);
+                                skipChars(4);
+                            }
+                        } else if (c == 'x') {
+                            jschar cp[2];
+                            if (peekChars(2, cp) &&
+                                JS7_ISHEX(cp[0]) && JS7_ISHEX(cp[1])) {
+                                c = (JS7_UNHEX(cp[0]) << 4) + JS7_UNHEX(cp[1]);
+                                skipChars(2);
+                            }
+                        } else if (c == '\n') {
+                            /* ECMA follows C by removing escaped newlines. */
+                            continue;
+                        }
+                        break;
+                    }
+                } else if (TokenBuf::isRawEOLChar(c) || c == EOF) {
+                    ungetCharIgnoreEOL(c);
+                    ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR,
+                                             JSMSG_UNTERMINATED_STRING);
+                    goto error;
+                }
+            }
+            if (!tokenbuf.append(c))
+                goto error;
+        }
+        JSAtom *atom = atomize(cx, tokenbuf);
+        if (!atom)
+            goto error;
+        tp->pos.end.lineno = lineno;
+        tp->t_op = JSOP_STRING;
+        tp->t_atom = atom;
+        tt = TOK_STRING;
+        goto out;
+    }
+
+    /*
      * Look for a decimal number.
      */
     if (c1kind == Dec) {
         numStart = userbuf.addressOfNextRawChar() - 1;
 
       decimal:
         hasFracOrExp = false;
         while (JS7_ISDEC(c))
@@ -1649,115 +1753,16 @@ TokenStream::getTokenInternal()
         if (!GetPrefixInteger(cx, numStart, userbuf.addressOfNextRawChar(), radix, &dummy, &dval))
             goto error;
         tp->t_dval = dval;
         tt = TOK_NUMBER;
         goto out;
     }
 
     /*
-     * Look for a string.
-     */
-    if (c1kind == String) {
-        qc = c;
-        tokenbuf.clear();
-        while (true) {
-            /*
-             * We need to detect any of these chars:  " or ', \n (or its
-             * equivalents), \\, EOF.  We use maybeStrSpecial[] in a manner
-             * similar to maybeEOL[], see above.  Because we detect EOL
-             * sequences here and put them back immediately, we can use
-             * getCharIgnoreEOL().
-             */
-            c = getCharIgnoreEOL();
-            if (maybeStrSpecial[c & 0xff]) {
-                if (c == qc)
-                    break;
-                if (c == '\\') {
-                    switch (c = getChar()) {
-                      case 'b': c = '\b'; break;
-                      case 'f': c = '\f'; break;
-                      case 'n': c = '\n'; break;
-                      case 'r': c = '\r'; break;
-                      case 't': c = '\t'; break;
-                      case 'v': c = '\v'; break;
-
-                      default:
-                        if ('0' <= c && c < '8') {
-                            int32 val = JS7_UNDEC(c);
-
-                            c = peekChar();
-                            /* Strict mode code allows only \0, then a non-digit. */
-                            if (val != 0 || JS7_ISDEC(c)) {
-                                if (!ReportStrictModeError(cx, this, NULL, NULL,
-                                                           JSMSG_DEPRECATED_OCTAL)) {
-                                    goto error;
-                                }
-                                setOctalCharacterEscape();
-                            }
-                            if ('0' <= c && c < '8') {
-                                val = 8 * val + JS7_UNDEC(c);
-                                getChar();
-                                c = peekChar();
-                                if ('0' <= c && c < '8') {
-                                    int32 save = val;
-                                    val = 8 * val + JS7_UNDEC(c);
-                                    if (val <= 0377)
-                                        getChar();
-                                    else
-                                        val = save;
-                                }
-                            }
-
-                            c = (jschar)val;
-                        } else if (c == 'u') {
-                            jschar cp[4];
-                            if (peekChars(4, cp) &&
-                                JS7_ISHEX(cp[0]) && JS7_ISHEX(cp[1]) &&
-                                JS7_ISHEX(cp[2]) && JS7_ISHEX(cp[3])) {
-                                c = (((((JS7_UNHEX(cp[0]) << 4)
-                                        + JS7_UNHEX(cp[1])) << 4)
-                                      + JS7_UNHEX(cp[2])) << 4)
-                                    + JS7_UNHEX(cp[3]);
-                                skipChars(4);
-                            }
-                        } else if (c == 'x') {
-                            jschar cp[2];
-                            if (peekChars(2, cp) &&
-                                JS7_ISHEX(cp[0]) && JS7_ISHEX(cp[1])) {
-                                c = (JS7_UNHEX(cp[0]) << 4) + JS7_UNHEX(cp[1]);
-                                skipChars(2);
-                            }
-                        } else if (c == '\n') {
-                            /* ECMA follows C by removing escaped newlines. */
-                            continue;
-                        }
-                        break;
-                    }
-                } else if (TokenBuf::isRawEOLChar(c) || c == EOF) {
-                    ungetCharIgnoreEOL(c);
-                    ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR,
-                                             JSMSG_UNTERMINATED_STRING);
-                    goto error;
-                }
-            }
-            if (!tokenbuf.append(c))
-                goto error;
-        }
-        JSAtom *atom = atomize(cx, tokenbuf);
-        if (!atom)
-            goto error;
-        tp->pos.end.lineno = lineno;
-        tp->t_op = JSOP_STRING;
-        tp->t_atom = atom;
-        tt = TOK_STRING;
-        goto out;
-    }
-
-    /*
      * This handles everything else.
      */
     JS_ASSERT(c1kind == Other);
     switch (c) {
       case '\\':
         hadUnicodeEscape = matchUnicodeEscapeIdStart(&qc);
         if (hadUnicodeEscape) {
             c = qc;