Bug 636654 - Extract large cold chunks (e4x, @lines) from getTokenInternal(). r=brendan.
authorNicholas Nethercote <nnethercote@mozilla.com>
Mon, 04 Apr 2011 18:54:48 -0700
changeset 67917 37e5a7fe49efa374d2259280cee36b7851aedda7
parent 67916 84e734e6e8ab4d4c07b90b2ab0e33d01639fb7da
child 67918 408c30d4a038cf8b221556f73efff5e5e1967c50
push id1
push userroot
push dateTue, 26 Apr 2011 22:38:44 +0000
treeherdermozilla-beta@bfdb6e623a36 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersbrendan
bugs636654
milestone2.2a1pre
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 636654 - Extract large cold chunks (e4x, @lines) from getTokenInternal(). r=brendan.
js/src/jsscan.cpp
js/src/jsscan.h
--- a/js/src/jsscan.cpp
+++ b/js/src/jsscan.cpp
@@ -370,17 +370,17 @@ TokenStream::ungetCharIgnoreEOL(int32 c)
     userbuf.ungetRawChar();
 }
 
 /*
  * Peek n chars ahead into ts.  Return true if n chars were read, false if
  * there weren't enough characters in the input stream.  This function cannot
  * be used to peek into or past a newline.
  */
-JSBool
+bool
 TokenStream::peekChars(intN n, jschar *cp)
 {
     intN i, j;
     int32 c;
 
     for (i = 0; i < n; i++) {
         c = getChar();
         if (c == EOF)
@@ -428,17 +428,17 @@ TokenStream::reportCompileErrorNumberVA(
     jschar *linechars;
     char *linebytes;
     bool warning;
     JSBool ok;
     const TokenPos *tp;
     uintN i;
 
     if (JSREPORT_IS_STRICT(flags) && !cx->hasStrictOption())
-        return JS_TRUE;
+        return true;
 
     warning = JSREPORT_IS_WARNING(flags);
     if (warning && cx->hasWErrorOption()) {
         flags &= ~JSREPORT_WARNING;
         warning = false;
     }
 
     PodZero(&report);
@@ -590,46 +590,46 @@ js::ReportCompileErrorNumber(JSContext *
     bool result = ts->reportCompileErrorNumberVA(pn, flags, errorNumber, ap);
     va_end(ap);
 
     return result;
 }
 
 #if JS_HAS_XML_SUPPORT
 
-JSBool
+bool
 TokenStream::getXMLEntity()
 {
     ptrdiff_t offset, length, i;
     int c, d;
     JSBool ispair;
     jschar *bp, digit;
     char *bytes;
     JSErrNum msg;
 
     CharBuffer &tb = tokenbuf;
 
     /* Put the entity, including the '&' already scanned, in tokenbuf. */
     offset = tb.length();
     if (!tb.append('&'))
-        return JS_FALSE;
+        return false;
     while ((c = getChar()) != ';') {
         if (c == EOF || c == '\n') {
             ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, JSMSG_END_OF_XML_ENTITY);
-            return JS_FALSE;
+            return false;
         }
         if (!tb.append(c))
-            return JS_FALSE;
+            return false;
     }
 
     /* Let length be the number of jschars after the '&', including the ';'. */
     length = tb.length() - offset;
     bp = tb.begin() + offset;
     c = d = 0;
-    ispair = JS_FALSE;
+    ispair = false;
     if (length > 2 && bp[1] == '#') {
         /* Match a well-formed XML Character Reference. */
         i = 2;
         if (length > 3 && JS_TOLOWER(bp[i]) == 'x') {
             if (length > 9)     /* at most 6 hex digits allowed */
                 goto badncr;
             while (++i < length) {
                 digit = bp[i];
@@ -647,17 +647,17 @@ TokenStream::getXMLEntity()
                     goto badncr;
             }
         }
 
         if (0x10000 <= c && c <= 0x10FFFF) {
             /* Form a surrogate pair (c, d) -- c is the high surrogate. */
             d = 0xDC00 + (c & 0x3FF);
             c = 0xD7C0 + (c >> 10);
-            ispair = JS_TRUE;
+            ispair = true;
         } else {
             /* Enforce the http://www.w3.org/TR/REC-xml/#wf-Legalchar WFC. */
             if (c != 0x9 && c != 0xA && c != 0xD &&
                 !(0x20 <= c && c <= 0xD7FF) &&
                 !(0xE000 <= c && c <= 0xFFFD)) {
                 goto badncr;
             }
         }
@@ -691,167 +691,39 @@ TokenStream::getXMLEntity()
         }
     }
 
     /* If we matched, retract tokenbuf and store the entity's value. */
     *bp++ = (jschar) c;
     if (ispair)
         *bp++ = (jschar) d;
     tb.shrinkBy(tb.end() - bp);
-    return JS_TRUE;
+    return true;
 
   badncr:
     msg = JSMSG_BAD_XML_NCR;
   bad:
     /* No match: throw a TypeError per ECMA-357 10.3.2.1 step 8(a). */
     JS_ASSERT((tb.end() - bp) >= 1);
     bytes = js_DeflateString(cx, bp + 1, (tb.end() - bp) - 1);
     if (bytes) {
         ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, msg, bytes);
         cx->free_(bytes);
     }
-    return JS_FALSE;
-}
-
-#endif /* JS_HAS_XML_SUPPORT */
-
-/*
- * We have encountered a '\': check for a Unicode escape sequence after it.
- * Return 'true' and the character code value (by value) if we found a
- * Unicode escape sequence.  Otherwise, return 'false'.  In both cases, do not
- * advance along the buffer.
- */
-bool
-TokenStream::peekUnicodeEscape(int *result)
-{
-    jschar cp[5];
-
-    if (peekChars(5, cp) && cp[0] == 'u' &&
-        JS7_ISHEX(cp[1]) && JS7_ISHEX(cp[2]) &&
-        JS7_ISHEX(cp[3]) && JS7_ISHEX(cp[4]))
-    {
-        *result = (((((JS7_UNHEX(cp[1]) << 4)
-                + JS7_UNHEX(cp[2])) << 4)
-              + JS7_UNHEX(cp[3])) << 4)
-            + JS7_UNHEX(cp[4]);
-        return true;
-    }
-    return false;
-}
-
-bool
-TokenStream::matchUnicodeEscapeIdStart(int32 *cp)
-{
-    if (peekUnicodeEscape(cp) && JS_ISIDSTART(*cp)) {
-        skipChars(5);
-        return true;
-    }
     return false;
 }
 
 bool
-TokenStream::matchUnicodeEscapeIdent(int32 *cp)
-{
-    if (peekUnicodeEscape(cp) && JS_ISIDENT(*cp)) {
-        skipChars(5);
-        return true;
-    }
-    return false;
-}
-
-Token *
-TokenStream::newToken(ptrdiff_t adjust)
-{
-    cursor = (cursor + 1) & ntokensMask;
-    Token *tp = &tokens[cursor];
-    tp->ptr = userbuf.addressOfNextRawChar() + adjust;
-    tp->pos.begin.index = tp->ptr - linebase;
-    tp->pos.begin.lineno = tp->pos.end.lineno = lineno;
-    return tp;
-}
-
-JS_ALWAYS_INLINE JSAtom *
-TokenStream::atomize(JSContext *cx, CharBuffer &cb)
-{
-    return js_AtomizeChars(cx, cb.begin(), cb.length(), 0);
-}
-
-#ifdef DEBUG
-bool
-IsTokenSane(Token *tp)
-{
-    /*
-     * Nb: TOK_EOL should never be used in an actual Token;  it should only be
-     * returned as a TokenKind from peekTokenSameLine().
-     */
-    if (tp->type < TOK_ERROR || tp->type >= TOK_LIMIT || tp->type == TOK_EOL)
-        return false;
-
-    if (tp->pos.begin.lineno == tp->pos.end.lineno) {
-        if (tp->pos.begin.index > tp->pos.end.index)
-            return false;
-    } else {
-        /* Only certain token kinds can be multi-line. */
-        switch (tp->type) {
-          case TOK_STRING:
-          case TOK_XMLATTR:
-          case TOK_XMLSPACE:
-          case TOK_XMLTEXT:
-          case TOK_XMLCOMMENT:
-          case TOK_XMLCDATA:
-          case TOK_XMLPI:
-            break;
-          default:
-            return false;
-        }
-    }
-    return true;
-}
-#endif
-
-bool
-TokenStream::putIdentInTokenbuf(const jschar *identStart)
-{
-    int32 c, qc;
-    const jschar *tmp = userbuf.addressOfNextRawChar(); 
-    userbuf.setAddressOfNextRawChar(identStart);
-
-    tokenbuf.clear();
-    for (;;) {
-        c = getCharIgnoreEOL();
-        if (!JS_ISIDENT(c)) {
-            if (c != '\\' || !matchUnicodeEscapeIdent(&qc))
-                break;
-            c = qc;
-        }
-        if (!tokenbuf.append(c)) {
-            userbuf.setAddressOfNextRawChar(tmp);
-            return false;
-        }
-    }
-    userbuf.setAddressOfNextRawChar(tmp);
-    return true;
-}
-
-TokenKind
-TokenStream::getTokenInternal()
+TokenStream::getXMLTextOrTag(TokenKind *ttp, Token **tpp)
 {
     TokenKind tt;
     int c, qc;
     Token *tp;
     JSAtom *atom;
-    bool hadUnicodeEscape;
-    const jschar *numStart;
-#if JS_HAS_XML_SUPPORT
-    JSBool inTarget;
-    size_t targetLength;
-    ptrdiff_t contentIndex;
-#endif
 
-#if JS_HAS_XML_SUPPORT
     /*
      * Look for XML text.
      */
     if (flags & TSF_XMLTEXTMODE) {
         tt = TOK_XMLSPACE;      /* veto if non-space, return TOK_XMLTEXT */
         tp = newToken(0);
         tokenbuf.clear();
         qc = (flags & TSF_XMLONLYMODE) ? '<' : '{';
@@ -880,19 +752,20 @@ TokenStream::getTokenInternal()
         }
         tp->pos.end.lineno = lineno;
         tp->t_op = JSOP_STRING;
         tp->t_atom = atom;
         goto out;
     }
 
     /*
-     * Look for XML tags.
+     * XML tags.
      */
-    if (flags & TSF_XMLTAGMODE) {
+    else {
+        JS_ASSERT(flags & TSF_XMLTAGMODE);
         tp = newToken(0);
         c = getChar();
         if (JS_ISXMLSPACE(c)) {
             do {
                 c = getChar();
             } while (JS_ISXMLSPACE(c));
             ungetChar(c);
             tp->pos.end.lineno = lineno;
@@ -1003,20 +876,399 @@ TokenStream::getTokenInternal()
             }
             /* FALL THROUGH */
 
           bad_xml_char:
           default:
             ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, JSMSG_BAD_XML_CHARACTER);
             goto error;
         }
-        /* NOTREACHED */
+        JS_NOT_REACHED("getXMLTextOrTag 1");
+    }
+    JS_NOT_REACHED("getXMLTextOrTag 2");
+
+  out:
+    *ttp = tt;
+    *tpp = tp;
+    return true;
+
+  error:
+    *ttp = TOK_ERROR;
+    *tpp = tp;
+    return false;
+}
+
+/*
+ * After much testing, it's clear that Postel's advice to protocol designers
+ * ("be liberal in what you accept, and conservative in what you send") invites
+ * a natural-law repercussion for JS as "protocol":
+ *
+ * "If you are liberal in what you accept, others will utterly fail to be
+ *  conservative in what they send."
+ *
+ * Which means you will get <!-- comments to end of line in the middle of .js
+ * files, and after if conditions whose then statements are on the next line,
+ * and other wonders.  See at least the following bugs:
+ * - https://bugzilla.mozilla.org/show_bug.cgi?id=309242
+ * - https://bugzilla.mozilla.org/show_bug.cgi?id=309712
+ * - https://bugzilla.mozilla.org/show_bug.cgi?id=310993
+ *
+ * So without JSOPTION_XML, we changed around Firefox 1.5 never to scan an XML
+ * comment or CDATA literal.  Instead, we always scan <! as the start of an
+ * HTML comment hack to end of line, used since Netscape 2 to hide script tag
+ * content from script-unaware browsers.
+ *
+ * But this still leaves XML resources with certain internal structure
+ * vulnerable to being loaded as script cross-origin, and some internal data
+ * stolen, so for Firefox 3.5 and beyond, we reject programs whose source
+ * consists only of XML literals. See:
+ *
+ * https://bugzilla.mozilla.org/show_bug.cgi?id=336551
+ *
+ * The check for this is in jsparse.cpp, Compiler::compileScript.
+ */
+bool
+TokenStream::getXMLMarkup(TokenKind *ttp, Token **tpp)
+{
+    TokenKind tt;
+    int c;
+    Token *tp = *tpp;
+    JSAtom *atom;
+    JSBool inTarget;
+    size_t targetLength;
+    ptrdiff_t contentIndex;
+
+    /* Check for XML comment or CDATA section. */
+    if (matchChar('!')) {
+        tokenbuf.clear();
+
+        /* Scan XML comment. */
+        if (matchChar('-')) {
+            if (!matchChar('-'))
+                goto bad_xml_markup;
+            while ((c = getChar()) != '-' || !matchChar('-')) {
+                if (c == EOF)
+                    goto bad_xml_markup;
+                if (!tokenbuf.append(c))
+                    goto error;
+            }
+            tt = TOK_XMLCOMMENT;
+            tp->t_op = JSOP_XMLCOMMENT;
+            goto finish_xml_markup;
+        }
+
+        /* Scan CDATA section. */
+        if (matchChar('[')) {
+            jschar cp[6];
+            if (peekChars(6, cp) &&
+                cp[0] == 'C' &&
+                cp[1] == 'D' &&
+                cp[2] == 'A' &&
+                cp[3] == 'T' &&
+                cp[4] == 'A' &&
+                cp[5] == '[') {
+                skipChars(6);
+                while ((c = getChar()) != ']' ||
+                       !peekChars(2, cp) ||
+                       cp[0] != ']' ||
+                       cp[1] != '>') {
+                    if (c == EOF)
+                        goto bad_xml_markup;
+                    if (!tokenbuf.append(c))
+                        goto error;
+                }
+                getChar();            /* discard ] but not > */
+                tt = TOK_XMLCDATA;
+                tp->t_op = JSOP_XMLCDATA;
+                goto finish_xml_markup;
+            }
+            goto bad_xml_markup;
+        }
+    }
+
+    /* Check for processing instruction. */
+    if (matchChar('?')) {
+        inTarget = JS_TRUE;
+        targetLength = 0;
+        contentIndex = -1;
+
+        tokenbuf.clear();
+        while ((c = getChar()) != '?' || peekChar() != '>') {
+            if (c == EOF)
+                goto bad_xml_markup;
+            if (inTarget) {
+                if (JS_ISXMLSPACE(c)) {
+                    if (tokenbuf.empty())
+                        goto bad_xml_markup;
+                    inTarget = JS_FALSE;
+                } else {
+                    if (!(tokenbuf.empty()
+                          ? JS_ISXMLNSSTART(c)
+                          : JS_ISXMLNS(c))) {
+                        goto bad_xml_markup;
+                    }
+                    ++targetLength;
+                }
+            } else {
+                if (contentIndex < 0 && !JS_ISXMLSPACE(c))
+                    contentIndex = tokenbuf.length();
+            }
+            if (!tokenbuf.append(c))
+                goto error;
+        }
+        if (targetLength == 0)
+            goto bad_xml_markup;
+        if (contentIndex < 0) {
+            atom = cx->runtime->atomState.emptyAtom;
+        } else {
+            atom = js_AtomizeChars(cx, tokenbuf.begin() + contentIndex,
+                                   tokenbuf.length() - contentIndex, 0);
+            if (!atom)
+                goto error;
+        }
+        tokenbuf.shrinkBy(tokenbuf.length() - targetLength);
+        tp->t_atom2 = atom;
+        tt = TOK_XMLPI;
+
+  finish_xml_markup:
+        if (!matchChar('>'))
+            goto bad_xml_markup;
+        atom = atomize(cx, tokenbuf);
+        if (!atom)
+            goto error;
+        tp->t_atom = atom;
+        tp->pos.end.lineno = lineno;
+        goto out;
     }
+
+    /* An XML start-of-tag character. */
+    tt = matchChar('/') ? TOK_XMLETAGO : TOK_XMLSTAGO;
+
+  out:
+    *ttp = tt;
+    *tpp = tp;
+    return true;
+
+  bad_xml_markup:
+    ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, JSMSG_BAD_XML_MARKUP);
+  error:
+    *ttp = TOK_ERROR;
+    *tpp = tp;
+    return false;
+}
 #endif /* JS_HAS_XML_SUPPORT */
 
+/*
+ * We have encountered a '\': check for a Unicode escape sequence after it.
+ * Return 'true' and the character code value (by value) if we found a
+ * Unicode escape sequence.  Otherwise, return 'false'.  In both cases, do not
+ * advance along the buffer.
+ */
+bool
+TokenStream::peekUnicodeEscape(int *result)
+{
+    jschar cp[5];
+
+    if (peekChars(5, cp) && cp[0] == 'u' &&
+        JS7_ISHEX(cp[1]) && JS7_ISHEX(cp[2]) &&
+        JS7_ISHEX(cp[3]) && JS7_ISHEX(cp[4]))
+    {
+        *result = (((((JS7_UNHEX(cp[1]) << 4)
+                + JS7_UNHEX(cp[2])) << 4)
+              + JS7_UNHEX(cp[3])) << 4)
+            + JS7_UNHEX(cp[4]);
+        return true;
+    }
+    return false;
+}
+
+bool
+TokenStream::matchUnicodeEscapeIdStart(int32 *cp)
+{
+    if (peekUnicodeEscape(cp) && JS_ISIDSTART(*cp)) {
+        skipChars(5);
+        return true;
+    }
+    return false;
+}
+
+bool
+TokenStream::matchUnicodeEscapeIdent(int32 *cp)
+{
+    if (peekUnicodeEscape(cp) && JS_ISIDENT(*cp)) {
+        skipChars(5);
+        return true;
+    }
+    return false;
+}
+
+bool
+TokenStream::getAtLine()
+{
+    int c;
+    jschar cp[5];
+    uintN i, line, temp;
+    char filenameBuf[1024];
+
+    /*
+     * Hack for source filters such as the Mozilla XUL preprocessor:
+     * "//@line 123\n" sets the number of the *next* line after the
+     * comment to 123.  If we reach here, we've already seen "//".
+     */
+    if (peekChars(5, cp) &&
+        cp[0] == '@' &&
+        cp[1] == 'l' &&
+        cp[2] == 'i' &&
+        cp[3] == 'n' &&
+        cp[4] == 'e') {
+        skipChars(5);
+        while ((c = getChar()) != '\n' && JS_ISSPACE_OR_BOM((jschar)c))
+            continue;
+        if (JS7_ISDEC(c)) {
+            line = JS7_UNDEC(c);
+            while ((c = getChar()) != EOF && JS7_ISDEC(c)) {
+                temp = 10 * line + JS7_UNDEC(c);
+                if (temp < line) {
+                    /* Ignore overlarge line numbers. */
+                    return true;
+                }
+                line = temp;
+            }
+            while (c != '\n' && JS_ISSPACE_OR_BOM((jschar)c))
+                c = getChar();
+            i = 0;
+            if (c == '"') {
+                while ((c = getChar()) != EOF && c != '"') {
+                    if (c == '\n') {
+                        ungetChar(c);
+                        return true;
+                    }
+                    if ((c >> 8) != 0 || i >= sizeof filenameBuf - 1)
+                        return true;
+                    filenameBuf[i++] = (char) c;
+                }
+                if (c == '"') {
+                    while ((c = getChar()) != '\n' &&
+                           JS_ISSPACE_OR_BOM((jschar)c)) {
+                        continue;
+                    }
+                }
+            }
+            filenameBuf[i] = '\0';
+            if (c == EOF || c == '\n') {
+                if (i > 0) {
+                    if (flags & TSF_OWNFILENAME)
+                        cx->free_((void *) filename);
+                    filename = JS_strdup(cx, filenameBuf);
+                    if (!filename)
+                        return false;
+                    flags |= TSF_OWNFILENAME;
+                }
+                lineno = line;
+            }
+        }
+        ungetChar(c);
+    }
+    return true;
+}
+
+Token *
+TokenStream::newToken(ptrdiff_t adjust)
+{
+    cursor = (cursor + 1) & ntokensMask;
+    Token *tp = &tokens[cursor];
+    tp->ptr = userbuf.addressOfNextRawChar() + adjust;
+    tp->pos.begin.index = tp->ptr - linebase;
+    tp->pos.begin.lineno = tp->pos.end.lineno = lineno;
+    return tp;
+}
+
+JS_ALWAYS_INLINE JSAtom *
+TokenStream::atomize(JSContext *cx, CharBuffer &cb)
+{
+    return js_AtomizeChars(cx, cb.begin(), cb.length(), 0);
+}
+
+#ifdef DEBUG
+bool
+IsTokenSane(Token *tp)
+{
+    /*
+     * Nb: TOK_EOL should never be used in an actual Token;  it should only be
+     * returned as a TokenKind from peekTokenSameLine().
+     */
+    if (tp->type < TOK_ERROR || tp->type >= TOK_LIMIT || tp->type == TOK_EOL)
+        return false;
+
+    if (tp->pos.begin.lineno == tp->pos.end.lineno) {
+        if (tp->pos.begin.index > tp->pos.end.index)
+            return false;
+    } else {
+        /* Only certain token kinds can be multi-line. */
+        switch (tp->type) {
+          case TOK_STRING:
+          case TOK_XMLATTR:
+          case TOK_XMLSPACE:
+          case TOK_XMLTEXT:
+          case TOK_XMLCOMMENT:
+          case TOK_XMLCDATA:
+          case TOK_XMLPI:
+            break;
+          default:
+            return false;
+        }
+    }
+    return true;
+}
+#endif
+
+bool
+TokenStream::putIdentInTokenbuf(const jschar *identStart)
+{
+    int32 c, qc;
+    const jschar *tmp = userbuf.addressOfNextRawChar(); 
+    userbuf.setAddressOfNextRawChar(identStart);
+
+    tokenbuf.clear();
+    for (;;) {
+        c = getCharIgnoreEOL();
+        if (!JS_ISIDENT(c)) {
+            if (c != '\\' || !matchUnicodeEscapeIdent(&qc))
+                break;
+            c = qc;
+        }
+        if (!tokenbuf.append(c)) {
+            userbuf.setAddressOfNextRawChar(tmp);
+            return false;
+        }
+    }
+    userbuf.setAddressOfNextRawChar(tmp);
+    return true;
+}
+
+TokenKind
+TokenStream::getTokenInternal()
+{
+    TokenKind tt;
+    int c, qc;
+    Token *tp;
+    bool hadUnicodeEscape;
+    const jschar *numStart;
+
+#if JS_HAS_XML_SUPPORT
+    /*
+     * Look for XML text and tags.
+     */
+    if (flags & (TSF_XMLTEXTMODE|TSF_XMLTAGMODE)) {
+        if (!getXMLTextOrTag(&tt, &tp))
+            goto error;
+        goto out;
+    }
+#endif
+
   retry:
     /*
      * This gets the next non-space char and starts the token.
      */
     do {
         c = getChar();
         if (c == '\n') {
             flags &= ~TSF_DIRTYLINE;
@@ -1104,16 +1356,17 @@ TokenStream::getTokenInternal()
             }
         }
 
         /* 
          * Identifiers containing no Unicode escapes can be atomized directly
          * from userbuf.  The rest must have the escapes converted via
          * tokenbuf before atomizing.
          */
+        JSAtom *atom;
         if (!hadUnicodeEscape)
             atom = js_AtomizeChars(cx, identStart, userbuf.addressOfNextRawChar() - identStart, 0);
         else if (putIdentInTokenbuf(identStart))
             atom = atomize(cx, tokenbuf);
         else
             atom = NULL;
         if (!atom)
             goto error;
@@ -1327,17 +1580,17 @@ TokenStream::getTokenInternal()
                     ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR,
                                              JSMSG_UNTERMINATED_STRING);
                     goto error;
                 }
             }
             if (!tokenbuf.append(c))
                 goto error;
         }
-        atom = atomize(cx, tokenbuf);
+        JSAtom *atom = atomize(cx, tokenbuf);
         if (!atom)
             goto error;
         tp->pos.end.lineno = lineno;
         tp->t_op = JSOP_STRING;
         tp->t_atom = atom;
         tt = TOK_STRING;
         goto out;
     }
@@ -1424,158 +1677,22 @@ TokenStream::getTokenInternal()
 #if JS_HAS_XML_SUPPORT
       case '@':
         tt = TOK_AT;
         break;
 #endif
 
       case '<':
 #if JS_HAS_XML_SUPPORT
-        /*
-         * After much testing, it's clear that Postel's advice to protocol
-         * designers ("be liberal in what you accept, and conservative in what
-         * you send") invites a natural-law repercussion for JS as "protocol":
-         *
-         * "If you are liberal in what you accept, others will utterly fail to
-         *  be conservative in what they send."
-         *
-         * Which means you will get <!-- comments to end of line in the middle
-         * of .js files, and after if conditions whose then statements are on
-         * the next line, and other wonders.  See at least the following bugs:
-         * https://bugzilla.mozilla.org/show_bug.cgi?id=309242
-         * https://bugzilla.mozilla.org/show_bug.cgi?id=309712
-         * https://bugzilla.mozilla.org/show_bug.cgi?id=310993
-         *
-         * So without JSOPTION_XML, we changed around Firefox 1.5 never to scan
-         * an XML comment or CDATA literal.  Instead, we always scan <! as the
-         * start of an HTML comment hack to end of line, used since Netscape 2
-         * to hide script tag content from script-unaware browsers.
-         *
-         * But this still leaves XML resources with certain internal structure
-         * vulnerable to being loaded as script cross-origin, and some internal
-         * data stolen, so for Firefox 3.5 and beyond, we reject programs whose
-         * source consists only of XML literals. See:
-         *
-         * https://bugzilla.mozilla.org/show_bug.cgi?id=336551
-         *
-         * The check for this is in jsparse.cpp, Compiler::compileScript.
-         */
         if ((flags & TSF_OPERAND) && (hasXML() || peekChar() != '!')) {
-            /* Check for XML comment or CDATA section. */
-            if (matchChar('!')) {
-                tokenbuf.clear();
-
-                /* Scan XML comment. */
-                if (matchChar('-')) {
-                    if (!matchChar('-'))
-                        goto bad_xml_markup;
-                    while ((c = getChar()) != '-' || !matchChar('-')) {
-                        if (c == EOF)
-                            goto bad_xml_markup;
-                        if (!tokenbuf.append(c))
-                            goto error;
-                    }
-                    tt = TOK_XMLCOMMENT;
-                    tp->t_op = JSOP_XMLCOMMENT;
-                    goto finish_xml_markup;
-                }
-
-                /* Scan CDATA section. */
-                if (matchChar('[')) {
-                    jschar cp[6];
-                    if (peekChars(6, cp) &&
-                        cp[0] == 'C' &&
-                        cp[1] == 'D' &&
-                        cp[2] == 'A' &&
-                        cp[3] == 'T' &&
-                        cp[4] == 'A' &&
-                        cp[5] == '[') {
-                        skipChars(6);
-                        while ((c = getChar()) != ']' ||
-                               !peekChars(2, cp) ||
-                               cp[0] != ']' ||
-                               cp[1] != '>') {
-                            if (c == EOF)
-                                goto bad_xml_markup;
-                            if (!tokenbuf.append(c))
-                                goto error;
-                        }
-                        getChar();            /* discard ] but not > */
-                        tt = TOK_XMLCDATA;
-                        tp->t_op = JSOP_XMLCDATA;
-                        goto finish_xml_markup;
-                    }
-                    goto bad_xml_markup;
-                }
-            }
-
-            /* Check for processing instruction. */
-            if (matchChar('?')) {
-                inTarget = JS_TRUE;
-                targetLength = 0;
-                contentIndex = -1;
-
-                tokenbuf.clear();
-                while ((c = getChar()) != '?' || peekChar() != '>') {
-                    if (c == EOF)
-                        goto bad_xml_markup;
-                    if (inTarget) {
-                        if (JS_ISXMLSPACE(c)) {
-                            if (tokenbuf.empty())
-                                goto bad_xml_markup;
-                            inTarget = JS_FALSE;
-                        } else {
-                            if (!(tokenbuf.empty()
-                                  ? JS_ISXMLNSSTART(c)
-                                  : JS_ISXMLNS(c))) {
-                                goto bad_xml_markup;
-                            }
-                            ++targetLength;
-                        }
-                    } else {
-                        if (contentIndex < 0 && !JS_ISXMLSPACE(c))
-                            contentIndex = tokenbuf.length();
-                    }
-                    if (!tokenbuf.append(c))
-                        goto error;
-                }
-                if (targetLength == 0)
-                    goto bad_xml_markup;
-                if (contentIndex < 0) {
-                    atom = cx->runtime->atomState.emptyAtom;
-                } else {
-                    atom = js_AtomizeChars(cx, tokenbuf.begin() + contentIndex,
-                                           tokenbuf.length() - contentIndex, 0);
-                    if (!atom)
-                        goto error;
-                }
-                tokenbuf.shrinkBy(tokenbuf.length() - targetLength);
-                tp->t_atom2 = atom;
-                tt = TOK_XMLPI;
-
-        finish_xml_markup:
-                if (!matchChar('>'))
-                    goto bad_xml_markup;
-                atom = atomize(cx, tokenbuf);
-                if (!atom)
-                    goto error;
-                tp->t_atom = atom;
-                tp->pos.end.lineno = lineno;
-                goto out;
-            }
-
-            /* An XML start-of-tag character. */
-            tt = matchChar('/') ? TOK_XMLETAGO : TOK_XMLSTAGO;
+            if (!getXMLMarkup(&tt, &tp))
+                goto error;
             goto out;
-
-        bad_xml_markup:
-            ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, JSMSG_BAD_XML_MARKUP);
-            goto error;
         }
-#endif /* JS_HAS_XML_SUPPORT */
+#endif
 
         /* NB: treat HTML begin-comment as comment-till-end-of-line */
         if (matchChar('!')) {
             if (matchChar('-')) {
                 if (matchChar('-')) {
                     flags |= TSF_IN_HTML_COMMENT;
                     goto skipline;
                 }
@@ -1603,82 +1720,22 @@ TokenStream::getTokenInternal()
         break;
 
       case '*':
         tp->t_op = JSOP_MUL;
         tt = matchChar('=') ? TOK_ASSIGN : TOK_STAR;
         break;
 
       case '/':
+        /*
+         * Look for a single-line comment.
+         */
         if (matchChar('/')) {
-            /*
-             * Hack for source filters such as the Mozilla XUL preprocessor:
-             * "//@line 123\n" sets the number of the *next* line after the
-             * comment to 123.
-             */
-            if (cx->hasAtLineOption()) {
-                jschar cp[5];
-                uintN i, line, temp;
-                char filenameBuf[1024];
-
-                if (peekChars(5, cp) &&
-                    cp[0] == '@' &&
-                    cp[1] == 'l' &&
-                    cp[2] == 'i' &&
-                    cp[3] == 'n' &&
-                    cp[4] == 'e') {
-                    skipChars(5);
-                    while ((c = getChar()) != '\n' && JS_ISSPACE_OR_BOM((jschar)c))
-                        continue;
-                    if (JS7_ISDEC(c)) {
-                        line = JS7_UNDEC(c);
-                        while ((c = getChar()) != EOF && JS7_ISDEC(c)) {
-                            temp = 10 * line + JS7_UNDEC(c);
-                            if (temp < line) {
-                                /* Ignore overlarge line numbers. */
-                                goto skipline;
-                            }
-                            line = temp;
-                        }
-                        while (c != '\n' && JS_ISSPACE_OR_BOM((jschar)c))
-                            c = getChar();
-                        i = 0;
-                        if (c == '"') {
-                            while ((c = getChar()) != EOF && c != '"') {
-                                if (c == '\n') {
-                                    ungetChar(c);
-                                    goto skipline;
-                                }
-                                if ((c >> 8) != 0 || i >= sizeof filenameBuf - 1)
-                                    goto skipline;
-                                filenameBuf[i++] = (char) c;
-                            }
-                            if (c == '"') {
-                                while ((c = getChar()) != '\n' &&
-                                       JS_ISSPACE_OR_BOM((jschar)c)) {
-                                    continue;
-                                }
-                            }
-                        }
-                        filenameBuf[i] = '\0';
-                        if (c == EOF || c == '\n') {
-                            if (i > 0) {
-                                if (flags & TSF_OWNFILENAME)
-                                    cx->free_((void *) filename);
-                                filename = JS_strdup(cx, filenameBuf);
-                                if (!filename)
-                                    goto error;
-                                flags |= TSF_OWNFILENAME;
-                            }
-                            lineno = line;
-                        }
-                    }
-                    ungetChar(c);
-                }
-            }
+            if (cx->hasAtLineOption() && !getAtLine())
+                goto error;
 
   skipline:
             /* Optimize line skipping if we are not in an HTML comment. */
             if (flags & TSF_IN_HTML_COMMENT) {
                 while ((c = getChar()) != EOF && c != '\n') {
                     if (c == '-' && matchChar('-') && matchChar('>'))
                         flags &= ~TSF_IN_HTML_COMMENT;
                 }
@@ -1686,16 +1743,19 @@ TokenStream::getTokenInternal()
                 while ((c = getChar()) != EOF && c != '\n')
                     continue;
             }
             ungetChar(c);
             cursor = (cursor - 1) & ntokensMask;
             goto retry;
         }
 
+        /*
+         * Look for a multi-line comment.
+         */
         if (matchChar('*')) {
             uintN linenoBefore = lineno;
             while ((c = getChar()) != EOF &&
                    !(c == '*' && matchChar('/'))) {
                 /* Ignore all characters until comment close. */
             }
             if (c == EOF) {
                 ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR,
@@ -1705,16 +1765,19 @@ TokenStream::getTokenInternal()
             if (linenoBefore != lineno) {
                 flags &= ~TSF_DIRTYLINE;
                 flags |= TSF_EOL;
             }
             cursor = (cursor - 1) & ntokensMask;
             goto retry;
         }
 
+        /*
+         * Look for a regexp.
+         */
         if (flags & TSF_OPERAND) {
             uintN reflags, length;
             JSBool inCharClass = JS_FALSE;
 
             tokenbuf.clear();
             for (;;) {
                 c = getChar();
                 if (c == '\\') {
@@ -1843,22 +1906,20 @@ TokenStream::getTokenInternal()
         if (c == '=')
             tt = TOK_DEFSHARP;
         else if (c == '#')
             tt = TOK_USESHARP;
         else
             goto badchar;
         break;
       }
+
+      badchar:
 #endif /* JS_HAS_SHARP_VARS */
 
-#if JS_HAS_SHARP_VARS || JS_HAS_XML_SUPPORT
-      badchar:
-#endif
-
       default:
         ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, JSMSG_ILLEGAL_CHARACTER);
         goto error;
     }
 
   out:
     flags |= TSF_DIRTYLINE;
     tp->pos.end.index = userbuf.addressOfNextRawChar() - linebase;
--- a/js/src/jsscan.h
+++ b/js/src/jsscan.h
@@ -465,22 +465,22 @@ class TokenStream
         }
         ungetToken();
         return tt;
     }
 
     /*
      * Get the next token from the stream if its kind is |tt|.
      */
-    JSBool matchToken(TokenKind tt, uintN withFlags = 0) {
+    bool matchToken(TokenKind tt, uintN withFlags = 0) {
         Flagger flagger(this, withFlags);
         if (getToken() == tt)
-            return JS_TRUE;
+            return true;
         ungetToken();
-        return JS_FALSE;
+        return false;
     }
 
   private:
     /*
      * This is the low-level interface to the JS source code buffer.  It just
      * gets raw chars, basically.  TokenStreams functions are layered on top
      * and do some extra stuff like converting all EOL sequences to '\n',
      * tracking the line number, and setting the TSF_EOF flag.  (The "raw" in
@@ -574,25 +574,29 @@ class TokenStream
     int32 getChar();
     int32 getCharIgnoreEOL();
     void ungetChar(int32 c);
     void ungetCharIgnoreEOL(int32 c);
     Token *newToken(ptrdiff_t adjust);
     bool peekUnicodeEscape(int32 *c);
     bool matchUnicodeEscapeIdStart(int32 *c);
     bool matchUnicodeEscapeIdent(int32 *c);
-    JSBool peekChars(intN n, jschar *cp);
-    JSBool getXMLEntity();
+    bool peekChars(intN n, jschar *cp);
+    bool getAtLine();
 
-    JSBool matchChar(int32 expect) {
+    bool getXMLEntity();
+    bool getXMLTextOrTag(TokenKind *ttp, Token **tpp);
+    bool getXMLMarkup(TokenKind *ttp, Token **tpp);
+
+    bool matchChar(int32 expect) {
         int32 c = getChar();
         if (c == expect)
-            return JS_TRUE;
+            return true;
         ungetChar(c);
-        return JS_FALSE;
+        return false;
     }
 
     int32 peekChar() {
         int32 c = getChar();
         ungetChar(c);
         return c;
     }