Bug 551344 part 5 - Address more sicking's review comments in the Java parts of the HTML5 parser. r=jonas.
authorHenri Sivonen <hsivonen@iki.fi>
Fri, 16 Apr 2010 13:52:06 +0300
changeset 40985 46be92d24873be598cf7fd6a5e0191ae167bb8ef
parent 40984 715e63b786d41f0c55f6ef65cb9ea2f50cd5f162
child 40986 a0b7fc3ed2ef1a9b455db3f3c379d0b4d398b95a
push id1
push userroot
push dateTue, 26 Apr 2011 22:38:44 +0000
treeherdermozilla-beta@bfdb6e623a36 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersjonas
bugs551344
milestone1.9.3a5pre
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
Bug 551344 part 5 - Address more sicking's review comments in the Java parts of the HTML5 parser. r=jonas.
parser/html/javasrc/Tokenizer.java
parser/html/javasrc/TreeBuilder.java
parser/html/nsHtml5Tokenizer.cpp
parser/html/nsHtml5Tokenizer.h
parser/html/nsHtml5TreeBuilder.cpp
--- a/parser/html/javasrc/Tokenizer.java
+++ b/parser/html/javasrc/Tokenizer.java
@@ -61,17 +61,19 @@ import org.xml.sax.SAXParseException;
  * can be configured to treat these conditions as fatal or to coerce the infoset
  * to something that XML 1.0 allows.
  * 
  * @version $Id$
  * @author hsivonen
  */
 public class Tokenizer implements Locator {
 
-    public static final int DATA = 0;
+    private static final int DATA_AND_RCDATA_MASK = ~1;
+
+	   public static final int DATA = 0;
 
     public static final int RCDATA = 1;
 
     public static final int SCRIPT_DATA = 2;
 
     public static final int PLAINTEXT = 3;
 
     private static final int TAG_OPEN = 4;
@@ -406,19 +408,19 @@ public class Tokenizer implements Locato
     /**
      * Buffer for expanding astral NCRs.
      */
     private final char[] astralChar;
 
     /**
      * The element whose end tag closes the current CDATA or RCDATA element.
      */
-    protected ElementName contentModelElement = null;
-
-    private char[] contentModelElementNameAsArray;
+    protected ElementName endTagExpectation = null;
+
+    private char[] endTagExpectationAsArray;
 
     /**
      * <code>true</code> if tokenizing an end tag
      */
     protected boolean endTag;
 
     /**
      * The current tag token name.
@@ -654,86 +656,89 @@ public class Tokenizer implements Locato
             boolean html4ModeCompatibleWithXhtml1Schemata) {
         this.html4ModeCompatibleWithXhtml1Schemata = html4ModeCompatibleWithXhtml1Schemata;
     }
 
     // ]NOCPP]
 
     // For the token handler to call
     /**
-     * Sets the content model flag and the associated element name.
+     * Sets the tokenizer state and the associated element name. This should 
+     * only ever used to put the tokenizer into one of the states that have
+     * a special end tag expectation.
      * 
-     * @param contentModelFlag
-     *            the flag
-     * @param contentModelElement
-     *            the element causing the flag to be set
+     * @param specialTokenizerState
+     *            the tokenizer state to set
+     * @param endTagExpectation
+     *            the expected end tag for transitioning back to normal
      */
-    public void setContentModelFlag(int contentModelFlag,
-            @Local String contentModelElement) {
-        this.stateSave = contentModelFlag;
-        if (contentModelFlag == Tokenizer.DATA) {
+    public void setStateAndEndTagExpectation(int specialTokenizerState,
+            @Local String endTagExpectation) {
+        this.stateSave = specialTokenizerState;
+        if (specialTokenizerState == Tokenizer.DATA) {
             return;
         }
-        // XXX does this make any sense?
-        char[] asArray = Portability.newCharArrayFromLocal(contentModelElement);
-        this.contentModelElement = ElementName.elementNameByBuffer(asArray, 0,
+        char[] asArray = Portability.newCharArrayFromLocal(endTagExpectation);
+        this.endTagExpectation = ElementName.elementNameByBuffer(asArray, 0,
                 asArray.length, interner);
         Portability.releaseArray(asArray);
-        contentModelElementToArray();
+        endTagExpectationToArray();
     }
 
     /**
-     * Sets the content model flag and the associated element name.
+     * Sets the tokenizer state and the associated element name. This should 
+     * only ever used to put the tokenizer into one of the states that have
+     * a special end tag expectation.
      * 
-     * @param contentModelFlag
-     *            the flag
-     * @param contentModelElement
-     *            the element causing the flag to be set
+     * @param specialTokenizerState
+     *            the tokenizer state to set
+     * @param endTagExpectation
+     *            the expected end tag for transitioning back to normal
      */
-    public void setContentModelFlag(int contentModelFlag,
-            ElementName contentModelElement) {
-        this.stateSave = contentModelFlag;
-        this.contentModelElement = contentModelElement;
-        contentModelElementToArray();
-    }
-
-    private void contentModelElementToArray() {
-        switch (contentModelElement.group) {
+    public void setStateAndEndTagExpectation(int specialTokenizerState,
+            ElementName endTagExpectation) {
+        this.stateSave = specialTokenizerState;
+        this.endTagExpectation = endTagExpectation;
+        endTagExpectationToArray();
+    }
+
+    private void endTagExpectationToArray() {
+        switch (endTagExpectation.group) {
             case TreeBuilder.TITLE:
-                contentModelElementNameAsArray = TITLE_ARR;
+                endTagExpectationAsArray = TITLE_ARR;
                 return;
             case TreeBuilder.SCRIPT:
-                contentModelElementNameAsArray = SCRIPT_ARR;
+                endTagExpectationAsArray = SCRIPT_ARR;
                 return;
             case TreeBuilder.STYLE:
-                contentModelElementNameAsArray = STYLE_ARR;
+                endTagExpectationAsArray = STYLE_ARR;
                 return;
             case TreeBuilder.PLAINTEXT:
-                contentModelElementNameAsArray = PLAINTEXT_ARR;
+                endTagExpectationAsArray = PLAINTEXT_ARR;
                 return;
             case TreeBuilder.XMP:
-                contentModelElementNameAsArray = XMP_ARR;
+                endTagExpectationAsArray = XMP_ARR;
                 return;
             case TreeBuilder.TEXTAREA:
-                contentModelElementNameAsArray = TEXTAREA_ARR;
+                endTagExpectationAsArray = TEXTAREA_ARR;
                 return;
             case TreeBuilder.IFRAME:
-                contentModelElementNameAsArray = IFRAME_ARR;
+                endTagExpectationAsArray = IFRAME_ARR;
                 return;
             case TreeBuilder.NOEMBED:
-                contentModelElementNameAsArray = NOEMBED_ARR;
+                endTagExpectationAsArray = NOEMBED_ARR;
                 return;
             case TreeBuilder.NOSCRIPT:
-                contentModelElementNameAsArray = NOSCRIPT_ARR;
+                endTagExpectationAsArray = NOSCRIPT_ARR;
                 return;
             case TreeBuilder.NOFRAMES:
-                contentModelElementNameAsArray = NOFRAMES_ARR;
+                endTagExpectationAsArray = NOFRAMES_ARR;
                 return;
             default:
-                assert false;
+                assert false: "Bad end tag expectation.";
                 return;
         }
     }
 
     /**
      * For C++ use only.
      */
     public void setLineNumber(int line) {
@@ -793,70 +798,51 @@ public class Tokenizer implements Locato
         } else {
             // ]NOCPP]
             return HtmlAttributes.EMPTY_ATTRIBUTES;
             // [NOCPP[
         }
         // ]NOCPP]
     }
 
-    private void clearStrBufAndAppendCurrentC(char c) {
+    @Inline private void clearStrBufAndAppend(char c) {
         strBuf[0] = c;
-
         strBufLen = 1;
-        // strBufOffset = pos;
-    }
-
-    private void clearStrBufAndAppendForceWrite(char c) {
-        strBuf[0] = c; // test
-
-        strBufLen = 1;
-        // strBufOffset = pos;
-        // buf[pos] = c;
-    }
-
-    private void clearStrBufForNextState() {
+    }
+
+    @Inline private void clearStrBuf() {
         strBufLen = 0;
-        // strBufOffset = pos + 1;
     }
 
     /**
      * Appends to the smaller buffer.
      * 
      * @param c
      *            the UTF-16 code unit to append
      */
     private void appendStrBuf(char c) {
-        // if (strBufOffset != -1) {
-        // strBufLen++;
-        // } else {
         if (strBufLen == strBuf.length) {
             char[] newBuf = new char[strBuf.length + Tokenizer.BUFFER_GROW_BY];
             System.arraycopy(strBuf, 0, newBuf, 0, strBuf.length);
             Portability.releaseArray(strBuf);
             strBuf = newBuf;
         }
         strBuf[strBufLen++] = c;
-        // }
     }
 
     /**
      * The smaller buffer as a String. Currently only used for error reporting.
      * 
      * <p>
      * C++ memory note: The return value must be released.
      * 
      * @return the smaller buffer as a string
      */
     protected String strBufToString() {
-        // if (strBufOffset != -1) {
-        // return Portability.newStringFromBuffer(buf, strBufOffset, strBufLen);
-        // } else {
         return Portability.newStringFromBuffer(strBuf, 0, strBufLen);
-        // }
     }
 
     /**
      * Returns the short buffer as a local name. The return value is released in
      * emitDoctypeToken().
      * 
      * @return the smaller buffer as local name
      */
@@ -868,67 +854,46 @@ public class Tokenizer implements Locato
     /**
      * Emits the smaller buffer as character tokens.
      * 
      * @throws SAXException
      *             if the token handler threw
      */
     private void emitStrBuf() throws SAXException {
         if (strBufLen > 0) {
-            // if (strBufOffset != -1) {
-            // tokenHandler.characters(buf, strBufOffset, strBufLen);
-            // } else {
             tokenHandler.characters(strBuf, 0, strBufLen);
-            // }
         }
     }
 
-    private void clearLongStrBufForNextState() {
-        // longStrBufOffset = pos + 1;
+    @Inline private void clearLongStrBuf() {
         longStrBufLen = 0;
     }
 
-    private void clearLongStrBuf() {
-        // longStrBufOffset = pos;
-        longStrBufLen = 0;
-    }
-
-    private void clearLongStrBufAndAppendCurrentC(char c) {
+    @Inline private void clearLongStrBufAndAppend(char c) {
         longStrBuf[0] = c;
         longStrBufLen = 1;
-        // longStrBufOffset = pos;
-    }
-
-    private void clearLongStrBufAndAppendToComment(char c) {
-        longStrBuf[0] = c;
-        // longStrBufOffset = pos;
-        longStrBufLen = 1;
     }
 
     /**
      * Appends to the larger buffer.
      * 
      * @param c
      *            the UTF-16 code unit to append
      */
     private void appendLongStrBuf(char c) {
-        // if (longStrBufOffset != -1) {
-        // longStrBufLen++;
-        // } else {
         if (longStrBufLen == longStrBuf.length) {
             char[] newBuf = new char[longStrBufLen + (longStrBufLen >> 1)];
             System.arraycopy(longStrBuf, 0, newBuf, 0, longStrBuf.length);
             Portability.releaseArray(longStrBuf);
             longStrBuf = newBuf;
         }
         longStrBuf[longStrBufLen++] = c;
-        // }
-    }
-
-    private void appendSecondHyphenToBogusComment() throws SAXException {
+    }
+
+    @Inline private void appendSecondHyphenToBogusComment() throws SAXException {
         // [NOCPP[
         switch (commentPolicy) {
             case ALTER_INFOSET:
                 // detachLongStrBuf();
                 appendLongStrBuf(' ');
                 // FALLTHROUGH
             case ALLOW:
                 warn("The document is not mappable to XML 1.0 due to two consecutive hyphens in a comment.");
@@ -956,17 +921,17 @@ public class Tokenizer implements Locato
             case FATAL:
                 fatal("The document is not mappable to XML 1.0 due to a trailing hyphen in a comment.");
                 break;
         }
     }
 
     // ]NOCPP]
 
-    private void adjustDoubleHyphenAndAppendToLongStrBufAndErr(char c)
+    @Inline private void adjustDoubleHyphenAndAppendToLongStrBufAndErr(char c)
             throws SAXException {
         errConsecutiveHyphens();
         // [NOCPP[
         switch (commentPolicy) {
             case ALTER_INFOSET:
                 // detachLongStrBuf();
                 longStrBufLen--;
                 appendLongStrBuf(' ');
@@ -993,53 +958,32 @@ public class Tokenizer implements Locato
             Portability.releaseArray(longStrBuf);
             longStrBuf = newBuf;
         }
         System.arraycopy(buffer, offset, longStrBuf, longStrBufLen, length);
         longStrBufLen = reqLen;
     }
 
     /**
-     * Appends to the larger buffer.
-     * 
-     * @param arr
-     *            the UTF-16 code units to append
-     */
-    private void appendLongStrBuf(char[] arr) {
-        // assert longStrBufOffset == -1;
-        appendLongStrBuf(arr, 0, arr.length);
-    }
-
-    /**
      * Append the contents of the smaller buffer to the larger one.
      */
-    private void appendStrBufToLongStrBuf() {
-        // assert longStrBufOffset == -1;
-        // if (strBufOffset != -1) {
-        // appendLongStrBuf(buf, strBufOffset, strBufLen);
-        // } else {
+    @Inline private void appendStrBufToLongStrBuf() {
         appendLongStrBuf(strBuf, 0, strBufLen);
-        // }
     }
 
     /**
      * The larger buffer as a string.
      * 
      * <p>
      * C++ memory note: The return value must be released.
      * 
      * @return the larger buffer as a string
      */
     private String longStrBufToString() {
-        // if (longStrBufOffset != -1) {
-        // return Portability.newStringFromBuffer(buf, longStrBufOffset,
-        // longStrBufLen);
-        // } else {
         return Portability.newStringFromBuffer(longStrBuf, 0, longStrBufLen);
-        // }
     }
 
     /**
      * Emits the current comment token.
      * 
      * @param pos
      *            TODO
      * 
@@ -1073,17 +1017,17 @@ public class Tokenizer implements Locato
      * 
      * @throws SAXException
      */
     protected void flushChars(@NoLength char[] buf, int pos)
             throws SAXException {
         if (pos > cstart) {
             tokenHandler.characters(buf, cstart, pos - cstart);
         }
-        cstart = 0x7fffffff;
+        cstart = Integer.MAX_VALUE;
     }
 
     /**
      * Reports an condition that would make the infoset incompatible with XML
      * 1.0 as fatal.
      * 
      * @param message
      *            the message
@@ -1184,16 +1128,20 @@ public class Tokenizer implements Locato
             tokenHandler.endTag(tagName);
             Portability.delete(attributes);
         } else {
             tokenHandler.startTag(tagName, attrs, selfClosing);
         }
         tagName.release();
         tagName = null;
         resetAttributes();
+        /*
+         * The token handler may have called setStateAndEndTagExpectation
+         * and changed stateSave since the start of this method.
+         */
         return stateSave;
     }
 
     private void attributeNameComplete() throws SAXException {
         // if (strBufOffset != -1) {
         // attributeName = AttributeName.nameByBuffer(buf, strBufOffset,
         // strBufLen, namePolicy != XmlViolationPolicy.ALLOW);
         // } else {
@@ -1307,22 +1255,22 @@ public class Tokenizer implements Locato
             if (c >= 'A' && c <= 'Z') {
                 c += 0x20;
             }
             buf[i] = c;
         }
         return new String(buf);
     }
 
+    protected void startErrorReporting() throws SAXException {
+
+    }
+
     // ]NOCPP]
-
-    protected void startErrorReporting() throws SAXException {
-
-    }
-
+    
     public void start() throws SAXException {
         initializeWithoutStarting();
         tokenHandler.startTokenization(this);
         // [NOCPP[
         startErrorReporting();
         // ]NOCPP]
     }
 
@@ -1473,18 +1421,18 @@ public class Tokenizer implements Locato
                         }
                         switch (c) {
                             case '&':
                                 /*
                                  * U+0026 AMPERSAND (&) Switch to the character
                                  * reference in data state.
                                  */
                                 flushChars(buf, pos);
-                                clearStrBufAndAppendCurrentC(c);
-                                rememberAmpersandLocation('\u0000');
+                                clearStrBufAndAppend(c);
+                                setAdditionalAndRememberAmpersandLocation('\u0000');
                                 returnState = state;
                                 state = Tokenizer.CONSUME_CHARACTER_REFERENCE;
                                 continue stateloop;
                             case '<':
                                 /*
                                  * U+003C LESS-THAN SIGN (<) Switch to the tag
                                  * open state.
                                  */
@@ -1533,17 +1481,17 @@ public class Tokenizer implements Locato
                              * token,
                              */
                             endTag = false;
                             /*
                              * set its tag name to the lowercase version of the
                              * input character (add 0x0020 to the character's
                              * code point),
                              */
-                            clearStrBufAndAppendForceWrite((char) (c + 0x20));
+                            clearStrBufAndAppend((char) (c + 0x20));
                             /* then switch to the tag name state. */
                             state = Tokenizer.TAG_NAME;
                             /*
                              * (Don't emit the token yet; further details will
                              * be filled in before it is emitted.)
                              */
                             break tagopenloop;
                             // continue stateloop;
@@ -1552,17 +1500,17 @@ public class Tokenizer implements Locato
                              * U+0061 LATIN SMALL LETTER A through to U+007A
                              * LATIN SMALL LETTER Z Create a new start tag
                              * token,
                              */
                             endTag = false;
                             /*
                              * set its tag name to the input character,
                              */
-                            clearStrBufAndAppendCurrentC(c);
+                            clearStrBufAndAppend(c);
                             /* then switch to the tag name state. */
                             state = Tokenizer.TAG_NAME;
                             /*
                              * (Don't emit the token yet; further details will
                              * be filled in before it is emitted.)
                              */
                             break tagopenloop;
                             // continue stateloop;
@@ -1585,17 +1533,17 @@ public class Tokenizer implements Locato
                             case '?':
                                 /*
                                  * U+003F QUESTION MARK (?) Parse error.
                                  */
                                 errProcessingInstruction();
                                 /*
                                  * Switch to the bogus comment state.
                                  */
-                                clearLongStrBufAndAppendToComment(c);
+                                clearLongStrBufAndAppend(c);
                                 state = Tokenizer.BOGUS_COMMENT;
                                 continue stateloop;
                             case '>':
                                 /*
                                  * U+003E GREATER-THAN SIGN (>) Parse error.
                                  */
                                 errLtGt();
                                 /*
@@ -1787,17 +1735,17 @@ public class Tokenizer implements Locato
                                      * 0x0020 to the character's code point)
                                      */
                                     c += 0x20;
                                 }
                                 /*
                                  * Set that attribute's name to the current
                                  * input character,
                                  */
-                                clearStrBufAndAppendCurrentC(c);
+                                clearStrBufAndAppend(c);
                                 /*
                                  * and its value to the empty string.
                                  */
                                 // Will do later.
                                 /*
                                  * Switch to the attribute name state.
                                  */
                                 state = Tokenizer.ATTRIBUTE_NAME;
@@ -1932,17 +1880,17 @@ public class Tokenizer implements Locato
                                  * in the before attribute value state.
                                  */
                                 continue;
                             case '"':
                                 /*
                                  * U+0022 QUOTATION MARK (") Switch to the
                                  * attribute value (double-quoted) state.
                                  */
-                                clearLongStrBufForNextState();
+                                clearLongStrBuf();
                                 state = Tokenizer.ATTRIBUTE_VALUE_DOUBLE_QUOTED;
                                 break beforeattributevalueloop;
                             // continue stateloop;
                             case '&':
                                 /*
                                  * U+0026 AMPERSAND (&) Switch to the attribute
                                  * value (unquoted) state and reconsume this
                                  * input character.
@@ -1952,17 +1900,17 @@ public class Tokenizer implements Locato
                                 noteUnquotedAttributeValue();
                                 reconsume = true;
                                 continue stateloop;
                             case '\'':
                                 /*
                                  * U+0027 APOSTROPHE (') Switch to the attribute
                                  * value (single-quoted) state.
                                  */
-                                clearLongStrBufForNextState();
+                                clearLongStrBuf();
                                 state = Tokenizer.ATTRIBUTE_VALUE_SINGLE_QUOTED;
                                 continue stateloop;
                             case '>':
                                 /*
                                  * U+003E GREATER-THAN SIGN (>) Parse error.
                                  */
                                 errAttributeValueMissing();
                                 /*
@@ -1995,17 +1943,17 @@ public class Tokenizer implements Locato
                             default:
                                 // [NOCPP[
                                 errHtml4NonNameInUnquotedAttribute(c);
                                 // ]NOCPP]
                                 /*
                                  * Anything else Append the current input
                                  * character to the current attribute's value.
                                  */
-                                clearLongStrBufAndAppendCurrentC(c);
+                                clearLongStrBufAndAppend(c);
                                 /*
                                  * Switch to the attribute value (unquoted)
                                  * state.
                                  */
 
                                 state = Tokenizer.ATTRIBUTE_VALUE_UNQUOTED;
                                 noteUnquotedAttributeValue();
                                 continue stateloop;
@@ -2038,18 +1986,18 @@ public class Tokenizer implements Locato
                             // continue stateloop;
                             case '&':
                                 /*
                                  * U+0026 AMPERSAND (&) Switch to the character
                                  * reference in attribute value state, with the
                                  * additional allowed character being U+0022
                                  * QUOTATION MARK (").
                                  */
-                                clearStrBufAndAppendCurrentC(c);
-                                rememberAmpersandLocation('\"');
+                                clearStrBufAndAppend(c);
+                                setAdditionalAndRememberAmpersandLocation('\"');
                                 returnState = state;
                                 state = Tokenizer.CONSUME_CHARACTER_REFERENCE;
                                 continue stateloop;
                             case '\r':
                                 appendLongStrBufCarriageReturn();
                                 break stateloop;
                             case '\n':
                                 appendLongStrBufLineFeed();
@@ -2207,18 +2155,18 @@ public class Tokenizer implements Locato
                                 continue stateloop;
                             case '&':
                                 /*
                                  * U+0026 AMPERSAND (&) Switch to the character
                                  * reference in attribute value state, with the
                                  * additional allowed character being U+003E
                                  * GREATER-THAN SIGN (>)
                                  */
-                                clearStrBufAndAppendCurrentC(c);
-                                rememberAmpersandLocation('>');
+                                clearStrBufAndAppend(c);
+                                setAdditionalAndRememberAmpersandLocation('>');
                                 returnState = state;
                                 state = Tokenizer.CONSUME_CHARACTER_REFERENCE;
                                 continue stateloop;
                             case '>':
                                 /*
                                  * U+003E GREATER-THAN SIGN (>) Emit the current
                                  * tag token.
                                  */
@@ -2346,17 +2294,17 @@ public class Tokenizer implements Locato
                                      * 0x0020 to the character's code point)
                                      */
                                     c += 0x20;
                                 }
                                 /*
                                  * Set that attribute's name to the current
                                  * input character,
                                  */
-                                clearStrBufAndAppendCurrentC(c);
+                                clearStrBufAndAppend(c);
                                 /*
                                  * and its value to the empty string.
                                  */
                                 // Will do later.
                                 /*
                                  * Switch to the attribute name state.
                                  */
                                 state = Tokenizer.ATTRIBUTE_NAME;
@@ -2479,29 +2427,29 @@ public class Tokenizer implements Locato
                          * 
                          * Otherwise, is is a parse error. Switch to the bogus
                          * comment state. The next character that is consumed,
                          * if any, is the first character that will be in the
                          * comment.
                          */
                         switch (c) {
                             case '-':
-                                clearLongStrBufAndAppendToComment(c);
+                                clearLongStrBufAndAppend(c);
                                 state = Tokenizer.MARKUP_DECLARATION_HYPHEN;
                                 break markupdeclarationopenloop;
                             // continue stateloop;
                             case 'd':
                             case 'D':
-                                clearLongStrBufAndAppendToComment(c);
+                                clearLongStrBufAndAppend(c);
                                 index = 0;
                                 state = Tokenizer.MARKUP_DECLARATION_OCTYPE;
                                 continue stateloop;
                             case '[':
                                 if (tokenHandler.inForeign()) {
-                                    clearLongStrBufAndAppendToComment(c);
+                                    clearLongStrBufAndAppend(c);
                                     index = 0;
                                     state = Tokenizer.CDATA_START;
                                     continue stateloop;
                                 } else {
                                     // fall through
                                 }
                             default:
                                 errBogusComment();
@@ -2517,17 +2465,17 @@ public class Tokenizer implements Locato
                         if (++pos == endPos) {
                             break stateloop;
                         }
                         c = checkChar(buf, pos);
                         switch (c) {
                             case '\u0000':
                                 break stateloop;
                             case '-':
-                                clearLongStrBufForNextState();
+                                clearLongStrBuf();
                                 state = Tokenizer.COMMENT_START;
                                 break markupdeclarationhyphenloop;
                             // continue stateloop;
                             default:
                                 errBogusComment();
                                 state = Tokenizer.BOGUS_COMMENT;
                                 reconsume = true;
                                 continue stateloop;
@@ -3082,17 +3030,17 @@ public class Tokenizer implements Locato
                                      */
                                     c += 0x20;
                                 }
                                 /* Anything else Create a new DOCTYPE token. */
                                 /*
                                  * Set the token's name name to the current
                                  * input character.
                                  */
-                                clearStrBufAndAppendCurrentC(c);
+                                clearStrBufAndAppend(c);
                                 /*
                                  * Switch to the DOCTYPE name state.
                                  */
                                 state = Tokenizer.DOCTYPE_NAME;
                                 break beforedoctypenameloop;
                             // continue stateloop;
                         }
                     }
@@ -3303,33 +3251,33 @@ public class Tokenizer implements Locato
                                 /*
                                  * U+0022 QUOTATION MARK (") Parse Error.
                                  */
                                 errNoSpaceBetweenDoctypePublicKeywordAndQuote();
                                 /*
                                  * Set the DOCTYPE token's public identifier to
                                  * the empty string (not missing),
                                  */
-                                clearLongStrBufForNextState();
+                                clearLongStrBuf();
                                 /*
                                  * then switch to the DOCTYPE public identifier
                                  * (double-quoted) state.
                                  */
                                 state = Tokenizer.DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED;
                                 continue stateloop;
                             case '\'':
                                 /*
                                  * U+0027 APOSTROPHE (') Parse Error.
                                  */
                                 errNoSpaceBetweenDoctypePublicKeywordAndQuote();
                                 /*
                                  * Set the DOCTYPE token's public identifier to
                                  * the empty string (not missing),
                                  */
-                                clearLongStrBufForNextState();
+                                clearLongStrBuf();
                                 /*
                                  * then switch to the DOCTYPE public identifier
                                  * (single-quoted) state.
                                  */
                                 state = Tokenizer.DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED;
                                 continue stateloop;
                             case '>':
                                 /* U+003E GREATER-THAN SIGN (>) Parse error. */
@@ -3390,31 +3338,31 @@ public class Tokenizer implements Locato
                                  */
                                 continue;
                             case '"':
                                 /*
                                  * U+0022 QUOTATION MARK (") Set the DOCTYPE
                                  * token's public identifier to the empty string
                                  * (not missing),
                                  */
-                                clearLongStrBufForNextState();
+                                clearLongStrBuf();
                                 /*
                                  * then switch to the DOCTYPE public identifier
                                  * (double-quoted) state.
                                  */
                                 state = Tokenizer.DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED;
                                 break beforedoctypepublicidentifierloop;
                             // continue stateloop;
                             case '\'':
                                 /*
                                  * U+0027 APOSTROPHE (') Set the DOCTYPE token's
                                  * public identifier to the empty string (not
                                  * missing),
                                  */
-                                clearLongStrBufForNextState();
+                                clearLongStrBuf();
                                 /*
                                  * then switch to the DOCTYPE public identifier
                                  * (single-quoted) state.
                                  */
                                 state = Tokenizer.DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED;
                                 continue stateloop;
                             case '>':
                                 /* U+003E GREATER-THAN SIGN (>) Parse error. */
@@ -3555,33 +3503,33 @@ public class Tokenizer implements Locato
                                 /*
                                  * U+0022 QUOTATION MARK (") Parse error.
                                  */
                                 errNoSpaceBetweenPublicAndSystemIds();
                                 /*
                                  * Set the DOCTYPE token's system identifier to
                                  * the empty string (not missing),
                                  */
-                                clearLongStrBufForNextState();
+                                clearLongStrBuf();
                                 /*
                                  * then switch to the DOCTYPE system identifier
                                  * (double-quoted) state.
                                  */
                                 state = Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED;
                                 continue stateloop;
                             case '\'':
                                 /*
                                  * U+0027 APOSTROPHE (') Parse error.
                                  */
                                 errNoSpaceBetweenPublicAndSystemIds();
                                 /*
                                  * Set the DOCTYPE token's system identifier to
                                  * the empty string (not missing),
                                  */
-                                clearLongStrBufForNextState();
+                                clearLongStrBuf();
                                 /*
                                  * then switch to the DOCTYPE system identifier
                                  * (single-quoted) state.
                                  */
                                 state = Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED;
                                 continue stateloop;
                             default:
                                 bogusDoctype();
@@ -3636,31 +3584,31 @@ public class Tokenizer implements Locato
                                 state = Tokenizer.DATA;
                                 continue stateloop;
                             case '"':
                                 /*
                                  * U+0022 QUOTATION MARK (") Set the DOCTYPE
                                  * token's system identifier to the empty string
                                  * (not missing),
                                  */
-                                clearLongStrBufForNextState();
+                                clearLongStrBuf();
                                 /*
                                  * then switch to the DOCTYPE system identifier
                                  * (double-quoted) state.
                                  */
                                 state = Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED;
                                 break betweendoctypepublicandsystemidentifiersloop;
                             // continue stateloop;
                             case '\'':
                                 /*
                                  * U+0027 APOSTROPHE (') Set the DOCTYPE token's
                                  * system identifier to the empty string (not
                                  * missing),
                                  */
-                                clearLongStrBufForNextState();
+                                clearLongStrBuf();
                                 /*
                                  * then switch to the DOCTYPE system identifier
                                  * (single-quoted) state.
                                  */
                                 state = Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED;
                                 continue stateloop;
                             default:
                                 bogusDoctype();
@@ -3899,33 +3847,33 @@ public class Tokenizer implements Locato
                                 /*
                                  * U+0022 QUOTATION MARK (") Parse Error.
                                  */
                                 errNoSpaceBetweenDoctypeSystemKeywordAndQuote();
                                 /*
                                  * Set the DOCTYPE token's system identifier to
                                  * the empty string (not missing),
                                  */
-                                clearLongStrBufForNextState();
+                                clearLongStrBuf();
                                 /*
                                  * then switch to the DOCTYPE public identifier
                                  * (double-quoted) state.
                                  */
                                 state = Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED;
                                 continue stateloop;
                             case '\'':
                                 /*
                                  * U+0027 APOSTROPHE (') Parse Error.
                                  */
                                 errNoSpaceBetweenDoctypeSystemKeywordAndQuote();
                                 /*
                                  * Set the DOCTYPE token's public identifier to
                                  * the empty string (not missing),
                                  */
-                                clearLongStrBufForNextState();
+                                clearLongStrBuf();
                                 /*
                                  * then switch to the DOCTYPE public identifier
                                  * (single-quoted) state.
                                  */
                                 state = Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED;
                                 continue stateloop;
                             case '>':
                                 /* U+003E GREATER-THAN SIGN (>) Parse error. */
@@ -3986,30 +3934,30 @@ public class Tokenizer implements Locato
                                  */
                                 continue;
                             case '"':
                                 /*
                                  * U+0022 QUOTATION MARK (") Set the DOCTYPE
                                  * token's system identifier to the empty string
                                  * (not missing),
                                  */
-                                clearLongStrBufForNextState();
+                                clearLongStrBuf();
                                 /*
                                  * then switch to the DOCTYPE system identifier
                                  * (double-quoted) state.
                                  */
                                 state = Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED;
                                 continue stateloop;
                             case '\'':
                                 /*
                                  * U+0027 APOSTROPHE (') Set the DOCTYPE token's
                                  * system identifier to the empty string (not
                                  * missing),
                                  */
-                                clearLongStrBufForNextState();
+                                clearLongStrBuf();
                                 /*
                                  * then switch to the DOCTYPE system identifier
                                  * (single-quoted) state.
                                  */
                                 state = Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED;
                                 break beforedoctypesystemidentifierloop;
                             // continue stateloop;
                             case '>':
@@ -4280,18 +4228,18 @@ public class Tokenizer implements Locato
                                 continue stateloop;
                             case '&':
                                 /*
                                  * U+0026 AMPERSAND (&) Switch to the character
                                  * reference in attribute value state, with the
                                  * + additional allowed character being U+0027
                                  * APOSTROPHE (').
                                  */
-                                clearStrBufAndAppendCurrentC(c);
-                                rememberAmpersandLocation('\'');
+                                clearStrBufAndAppend(c);
+                                setAdditionalAndRememberAmpersandLocation('\'');
                                 returnState = state;
                                 state = Tokenizer.CONSUME_CHARACTER_REFERENCE;
                                 break attributevaluesinglequotedloop;
                             // continue stateloop;
                             case '\r':
                                 appendLongStrBufCarriageReturn();
                                 break stateloop;
                             case '\n':
@@ -4343,17 +4291,17 @@ public class Tokenizer implements Locato
                         case ' ':
                         case '\t':
                         case '\n':
                         case '\r': // we'll reconsume!
                         case '\u000C':
                         case '<':
                         case '&':
                             emitOrAppendStrBuf(returnState);
-                            if ((returnState & (~1)) == 0) {
+                            if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
                                 cstart = pos;
                             }
                             state = returnState;
                             reconsume = true;
                             continue stateloop;
                         case '#':
                             /*
                              * U+0023 NUMBER SIGN (#) Consume the U+0023 NUMBER
@@ -4376,17 +4324,17 @@ public class Tokenizer implements Locato
                             } else {
                                 // No match
                                 /*
                                  * If no match can be made, then this is a parse
                                  * error.
                                  */
                                 errNoNamedCharacterMatch();
                                 emitOrAppendStrBuf(returnState);
-                                if ((returnState & (~1)) == 0) {
+                                if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
                                     cstart = pos;
                                 }
                                 state = returnState;
                                 reconsume = true;
                                 continue stateloop;
                             }
                             // Didn't fail yet
                             appendStrBuf(c);
@@ -4450,17 +4398,17 @@ public class Tokenizer implements Locato
                         }
                         if (hilo == 0) {
                             /*
                              * If no match can be made, then this is a parse
                              * error.
                              */
                             errNoNamedCharacterMatch();
                             emitOrAppendStrBuf(returnState);
-                            if ((returnState & (~1)) == 0) {
+                            if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
                                 cstart = pos;
                             }
                             state = returnState;
                             reconsume = true;
                             continue stateloop;
                         }
                         // Didn't fail yet
                         appendStrBuf(c);
@@ -4532,32 +4480,32 @@ public class Tokenizer implements Locato
                     // TODO warn about apos (IE) and TRADE (Opera)
                     if (candidate == -1) {
                         // reconsume deals with CR, LF or nul
                         /*
                          * If no match can be made, then this is a parse error.
                          */
                         errNoNamedCharacterMatch();
                         emitOrAppendStrBuf(returnState);
-                        if ((returnState & (~1)) == 0) {
+                        if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
                             cstart = pos;
                         }
                         state = returnState;
                         reconsume = true;
                         continue stateloop;
                     } else {
                         // c can't be CR, LF or nul if we got here
                         byte[] candidateArr = NamedCharacters.NAMES[candidate];
                         if (candidateArr.length == 0
                                 || candidateArr[candidateArr.length - 1] != ';') {
                             /*
                              * If the last character matched is not a U+003B
                              * SEMICOLON (;), there is a parse error.
                              */
-                            if ((returnState & (~1)) != 0) {
+                            if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
                                 /*
                                  * If the entity is being consumed as part of an
                                  * attribute, and the last character matched is
                                  * not a U+003B SEMICOLON (;),
                                  */
                                 char ch;
                                 if (strBufMark == strBufLen) {
                                     ch = c;
@@ -4584,17 +4532,17 @@ public class Tokenizer implements Locato
                                      */
                                     errNoNamedCharacterMatch();
                                     appendStrBufToLongStrBuf();
                                     state = returnState;
                                     reconsume = true;
                                     continue stateloop;
                                 }
                             }
-                            if ((returnState & (~1)) != 0) {
+                            if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
                                 errUnescapedAmpersandInterpretedAsCharacterReference();
                             } else {
                                 errNotSemicolonTerminated();
                             }
                         }
 
                         /*
                          * Otherwise, return a character token for the character
@@ -4617,27 +4565,27 @@ public class Tokenizer implements Locato
                             // appendLongStrBuf(buf[strBufOffset + i]);
                             // }
                             // } else {
                             // tokenHandler.characters(buf, strBufOffset
                             // + strBufMark, strBufLen
                             // - strBufMark);
                             // }
                             // } else {
-                            if ((returnState & (~1)) != 0) {
+                            if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
                                 for (int i = strBufMark; i < strBufLen; i++) {
                                     appendLongStrBuf(strBuf[i]);
                                 }
                             } else {
                                 tokenHandler.characters(strBuf, strBufMark,
                                         strBufLen - strBufMark);
                             }
                             // }
                         }
-                        if ((returnState & (~1)) == 0) {
+                        if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
                             cstart = pos;
                         }
                         state = returnState;
                         reconsume = true;
                         continue stateloop;
                         /*
                          * If the markup contains I'm &notit; I tell you, the
                          * entity is parsed as "not", as in, I'm ┬Čit; I tell
@@ -4717,27 +4665,27 @@ public class Tokenizer implements Locato
                          */
                         if (c >= '0' && c <= '9') {
                             seenDigits = true;
                             value *= 10;
                             value += c - '0';
                             continue;
                         } else if (c == ';') {
                             if (seenDigits) {
-                                if ((returnState & (~1)) == 0) {
+                                if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
                                     cstart = pos + 1;
                                 }
                                 state = Tokenizer.HANDLE_NCR_VALUE;
                                 // FALL THROUGH continue stateloop;
                                 break decimalloop;
                             } else {
                                 errNoDigitsInNCR();
                                 appendStrBuf(';');
                                 emitOrAppendStrBuf(returnState);
-                                if ((returnState & (~1)) == 0) {
+                                if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
                                     cstart = pos + 1;
                                 }
                                 state = returnState;
                                 continue stateloop;
                             }
                         } else {
                             /*
                              * If no characters match the range, then don't
@@ -4748,25 +4696,25 @@ public class Tokenizer implements Locato
                              * 
                              * Otherwise, if the next character is a U+003B
                              * SEMICOLON, consume that too. If it isn't, there
                              * is a parse error.
                              */
                             if (!seenDigits) {
                                 errNoDigitsInNCR();
                                 emitOrAppendStrBuf(returnState);
-                                if ((returnState & (~1)) == 0) {
+                                if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
                                     cstart = pos;
                                 }
                                 state = returnState;
                                 reconsume = true;
                                 continue stateloop;
                             } else {
                                 errCharRefLacksSemicolon();
-                                if ((returnState & (~1)) == 0) {
+                                if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
                                     cstart = pos;
                                 }
                                 state = Tokenizer.HANDLE_NCR_VALUE;
                                 reconsume = true;
                                 // FALL THROUGH continue stateloop;
                                 break decimalloop;
                             }
                         }
@@ -4808,26 +4756,26 @@ public class Tokenizer implements Locato
                             continue;
                         } else if (c >= 'a' && c <= 'f') {
                             seenDigits = true;
                             value *= 16;
                             value += c - 'a' + 10;
                             continue;
                         } else if (c == ';') {
                             if (seenDigits) {
-                                if ((returnState & (~1)) == 0) {
+                                if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
                                     cstart = pos + 1;
                                 }
                                 state = Tokenizer.HANDLE_NCR_VALUE;
                                 continue stateloop;
                             } else {
                                 errNoDigitsInNCR();
                                 appendStrBuf(';');
                                 emitOrAppendStrBuf(returnState);
-                                if ((returnState & (~1)) == 0) {
+                                if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
                                     cstart = pos + 1;
                                 }
                                 state = returnState;
                                 continue stateloop;
                             }
                         } else {
                             /*
                              * If no characters match the range, then don't
@@ -4838,25 +4786,25 @@ public class Tokenizer implements Locato
                              * 
                              * Otherwise, if the next character is a U+003B
                              * SEMICOLON, consume that too. If it isn't, there
                              * is a parse error.
                              */
                             if (!seenDigits) {
                                 errNoDigitsInNCR();
                                 emitOrAppendStrBuf(returnState);
-                                if ((returnState & (~1)) == 0) {
+                                if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
                                     cstart = pos;
                                 }
                                 state = returnState;
                                 reconsume = true;
                                 continue stateloop;
                             } else {
                                 errCharRefLacksSemicolon();
-                                if ((returnState & (~1)) == 0) {
+                                if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
                                     cstart = pos;
                                 }
                                 state = Tokenizer.HANDLE_NCR_VALUE;
                                 reconsume = true;
                                 continue stateloop;
                             }
                         }
                     }
@@ -4938,17 +4886,17 @@ public class Tokenizer implements Locato
                         switch (c) {
                             case '/':
                                 /*
                                  * U+002F SOLIDUS (/) Set the temporary buffer
                                  * to the empty string. Switch to the script
                                  * data end tag open state.
                                  */
                                 index = 0;
-                                clearStrBufForNextState();
+                                clearStrBuf();
                                 state = Tokenizer.NON_DATA_END_TAG_NAME;
                                 continue stateloop;
                             case '!':
                                 tokenHandler.characters(Tokenizer.LT_GT, 0, 1);
                                 cstart = pos;
                                 state = Tokenizer.SCRIPT_DATA_ESCAPE_START;
                                 break scriptdatalessthansignloop; // FALL THRU
                             // continue
@@ -5196,17 +5144,17 @@ public class Tokenizer implements Locato
                         switch (c) {
                             case '/':
                                 /*
                                  * U+002F SOLIDUS (/) Set the temporary buffer
                                  * to the empty string. Switch to the script
                                  * data escaped end tag open state.
                                  */
                                 index = 0;
-                                clearStrBufForNextState();
+                                clearStrBuf();
                                 returnState = Tokenizer.SCRIPT_DATA_ESCAPED;
                                 state = Tokenizer.NON_DATA_END_TAG_NAME;
                                 continue stateloop;
                             case 'S':
                             case 's':
                                 /*
                                  * U+0041 LATIN CAPITAL LETTER A through to
                                  * U+005A LATIN CAPITAL LETTER Z Emit a U+003C
@@ -5559,27 +5507,27 @@ public class Tokenizer implements Locato
                             continue stateloop;
                         case '\r':
                             silentCarriageReturn();
                             /* Anything else Parse error. */
                             errGarbageAfterLtSlash();
                             /*
                              * Switch to the bogus comment state.
                              */
-                            clearLongStrBufAndAppendToComment('\n');
+                            clearLongStrBufAndAppend('\n');
                             state = Tokenizer.BOGUS_COMMENT;
                             break stateloop;
                         case '\n':
                             silentLineFeed();
                             /* Anything else Parse error. */
                             errGarbageAfterLtSlash();
                             /*
                              * Switch to the bogus comment state.
                              */
-                            clearLongStrBufAndAppendToComment('\n');
+                            clearLongStrBufAndAppend('\n');
                             state = Tokenizer.BOGUS_COMMENT;
                             continue stateloop;
                         case '\u0000':
                             c = '\uFFFD';
                             // fall thru
                         default:
                             if (c >= 'A' && c <= 'Z') {
                                 c += 0x20;
@@ -5589,31 +5537,31 @@ public class Tokenizer implements Locato
                                  * U+0061 LATIN SMALL LETTER A through to U+007A
                                  * LATIN SMALL LETTER Z Create a new end tag
                                  * token,
                                  */
                                 endTag = true;
                                 /*
                                  * set its tag name to the input character,
                                  */
-                                clearStrBufAndAppendCurrentC(c);
+                                clearStrBufAndAppend(c);
                                 /*
                                  * then switch to the tag name state. (Don't
                                  * emit the token yet; further details will be
                                  * filled in before it is emitted.)
                                  */
                                 state = Tokenizer.TAG_NAME;
                                 continue stateloop;
                             } else {
                                 /* Anything else Parse error. */
                                 errGarbageAfterLtSlash();
                                 /*
                                  * Switch to the bogus comment state.
                                  */
-                                clearLongStrBufAndAppendToComment(c);
+                                clearLongStrBufAndAppend(c);
                                 state = Tokenizer.BOGUS_COMMENT;
                                 continue stateloop;
                             }
                     }
                     // XXX reorder point
                 case RCDATA:
                     rcdataloop: for (;;) {
                         if (reconsume) {
@@ -5626,17 +5574,17 @@ public class Tokenizer implements Locato
                         }
                         switch (c) {
                             case '&':
                                 /*
                                  * U+0026 AMPERSAND (&) Switch to the character
                                  * reference in RCDATA state.
                                  */
                                 flushChars(buf, pos);
-                                clearStrBufAndAppendCurrentC(c);
+                                clearStrBufAndAppend(c);
                                 additional = '\u0000';
                                 returnState = state;
                                 state = Tokenizer.CONSUME_CHARACTER_REFERENCE;
                                 continue stateloop;
                             case '<':
                                 /*
                                  * U+003C LESS-THAN SIGN (<) Switch to the
                                  * RCDATA less-than sign state.
@@ -5711,17 +5659,17 @@ public class Tokenizer implements Locato
                         switch (c) {
                             case '/':
                                 /*
                                  * U+002F SOLIDUS (/) Set the temporary buffer
                                  * to the empty string. Switch to the script
                                  * data end tag open state.
                                  */
                                 index = 0;
-                                clearStrBufForNextState();
+                                clearStrBuf();
                                 state = Tokenizer.NON_DATA_END_TAG_NAME;
                                 break rawtextrcdatalessthansignloop;
                             // FALL THRU continue stateloop;
                             default:
                                 /*
                                  * Otherwise, emit a U+003C LESS-THAN SIGN
                                  * character token
                                  */
@@ -5744,18 +5692,18 @@ public class Tokenizer implements Locato
                         }
                         c = checkChar(buf, pos);
                         /*
                          * ASSERT! when entering this state, set index to 0 and
                          * call clearStrBuf() assert (contentModelElement !=
                          * null); Let's implement the above without lookahead.
                          * strBuf is the 'temporary buffer'.
                          */
-                        if (index < contentModelElementNameAsArray.length) {
-                            char e = contentModelElementNameAsArray[index];
+                        if (index < endTagExpectationAsArray.length) {
+                            char e = endTagExpectationAsArray[index];
                             char folded = c;
                             if (c >= 'A' && c <= 'Z') {
                                 folded += 0x20;
                             }
                             if (folded != e) {
                                 // [NOCPP[
                                 errHtml4LtSlashInRcdata(folded);
                                 // ]NOCPP]
@@ -5769,17 +5717,17 @@ public class Tokenizer implements Locato
                             }
                             appendStrBuf(c);
                             index++;
                             continue;
                         } else {
                             endTag = true;
                             // XXX replace contentModelElement with different
                             // type
-                            tagName = contentModelElement;
+                            tagName = endTagExpectation;
                             switch (c) {
                                 case '\r':
                                     silentCarriageReturn();
                                     state = Tokenizer.BEFORE_ATTRIBUTE_NAME;
                                     break stateloop;
                                 case '\n':
                                     silentLineFeed();
                                     // fall thru
@@ -5911,17 +5859,17 @@ public class Tokenizer implements Locato
     private void emitReplacementCharacter(@NoLength char[] buf, int pos)
             throws SAXException {
         silentCarriageReturn();
         flushChars(buf, pos);
         tokenHandler.characters(Tokenizer.REPLACEMENT_CHARACTER, 0, 1);
         cstart = Integer.MAX_VALUE;
     }
 
-    private void rememberAmpersandLocation(char add) {
+    private void setAdditionalAndRememberAmpersandLocation(char add) {
         additional = add;
         // [NOCPP[
         ampersandLocation = new LocatorImpl(this);
         // ]NOCPP]
     }
 
     private void bogusDoctype() throws SAXException {
         errBogusDoctype();
@@ -5929,17 +5877,17 @@ public class Tokenizer implements Locato
     }
 
     private void bogusDoctypeWithoutQuirks() throws SAXException {
         errBogusDoctype();
         forceQuirks = false;
     }
 
     private void emitOrAppendStrBuf(int returnState) throws SAXException {
-        if ((returnState & (~1)) != 0) {
+        if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
             appendStrBufToLongStrBuf();
         } else {
             emitStrBuf();
         }
     }
 
     private void handleNcrValue(int returnState) throws SAXException {
         /*
@@ -6434,17 +6382,17 @@ public class Tokenizer implements Locato
                     } else {
                         byte[] candidateArr = NamedCharacters.NAMES[candidate];
                         if (candidateArr.length == 0
                                 || candidateArr[candidateArr.length - 1] != ';') {
                             /*
                              * If the last character matched is not a U+003B
                              * SEMICOLON (;), there is a parse error.
                              */
-                            if ((returnState & (~1)) != 0) {
+                            if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
                                 /*
                                  * If the entity is being consumed as part of an
                                  * attribute, and the last character matched is
                                  * not a U+003B SEMICOLON (;),
                                  */
                                 char ch;
                                 if (strBufMark == strBufLen) {
                                     ch = '\u0000';
@@ -6466,17 +6414,17 @@ public class Tokenizer implements Locato
                                      * unconsumed, and nothing is returned.
                                      */
                                     errNoNamedCharacterMatch();
                                     appendStrBufToLongStrBuf();
                                     state = returnState;
                                     continue eofloop;
                                 }
                             }
-                            if ((returnState & (~1)) != 0) {
+                            if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
                                 errUnescapedAmpersandInterpretedAsCharacterReference();
                             } else {
                                 errNotSemicolonTerminated();
                             }
                         }
 
                         /*
                          * Otherwise, return a character token for the character
@@ -6488,17 +6436,17 @@ public class Tokenizer implements Locato
                         // See if the first slot holds a high surrogate
                         if ((val[0] & 0xFC00) == 0xD800) {
                             emitOrAppendTwo(val, returnState);
                         } else {
                             emitOrAppendOne(val, returnState);
                         }
                         // this is so complicated!
                         if (strBufMark < strBufLen) {
-                            if ((returnState & (~1)) != 0) {
+                            if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
                                 for (int i = strBufMark; i < strBufLen; i++) {
                                     appendLongStrBuf(strBuf[i]);
                                 }
                             } else {
                                 tokenHandler.characters(strBuf, strBufMark,
                                         strBufLen - strBufMark);
                             }
                         }
@@ -6590,27 +6538,27 @@ public class Tokenizer implements Locato
     }
 
     /**
      * @param val
      * @throws SAXException
      */
     private void emitOrAppendTwo(@Const @NoLength char[] val, int returnState)
             throws SAXException {
-        if ((returnState & (~1)) != 0) {
+        if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
             appendLongStrBuf(val[0]);
             appendLongStrBuf(val[1]);
         } else {
             tokenHandler.characters(val, 0, 2);
         }
     }
 
     private void emitOrAppendOne(@Const @NoLength char[] val, int returnState)
             throws SAXException {
-        if ((returnState & (~1)) != 0) {
+        if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
             appendLongStrBuf(val[0]);
         } else {
             tokenHandler.characters(val, 0, 1);
         }
     }
 
     public void end() throws SAXException {
         Portability.releaseArray(strBuf);
@@ -6642,16 +6590,18 @@ public class Tokenizer implements Locato
             attributes = null;
         }
     }
 
     public void requestSuspension() {
         shouldSuspend = true;
     }
 
+    // [NOCPP[
+    
     public void becomeConfident() {
         confident = true;
     }
 
     /**
      * Returns the nextCharOnNewLine.
      * 
      * @return the nextCharOnNewLine
@@ -6677,16 +6627,18 @@ public class Tokenizer implements Locato
      * Returns the col.
      * 
      * @return the col
      */
     public int getCol() {
         return -1;
     }
 
+    // ]NOCPP]
+    
     public boolean isInDataState() {
         return (stateSave == DATA);
     }
 
     public void resetToDataState() {
         strBufLen = 0;
         longStrBufLen = 0;
         stateSave = Tokenizer.DATA;
@@ -6739,18 +6691,18 @@ public class Tokenizer implements Locato
         if (longStrBufLen > longStrBuf.length) {
             Portability.releaseArray(longStrBuf);
             longStrBuf = new char[longStrBufLen];
         }
         System.arraycopy(other.longStrBuf, 0, longStrBuf, 0, longStrBufLen);
 
         stateSave = other.stateSave;
         returnStateSave = other.returnStateSave;
-        contentModelElement = other.contentModelElement;
-        contentModelElementNameAsArray = other.contentModelElementNameAsArray;
+        endTagExpectation = other.endTagExpectation;
+        endTagExpectationAsArray = other.endTagExpectationAsArray;
         // line = 1; XXX line numbers
         lastCR = other.lastCR;
         index = other.index;
         forceQuirks = other.forceQuirks;
         additional = other.additional;
         entCol = other.entCol;
         firstCharKey = other.firstCharKey;
         lo = other.lo;
--- a/parser/html/javasrc/TreeBuilder.java
+++ b/parser/html/javasrc/TreeBuilder.java
@@ -515,29 +515,29 @@ public abstract class TreeBuilder<T> imp
                 elt = createHtmlElementSetAsRoot(tokenizer.emptyAttributes());
             }
             StackNode<T> node = new StackNode<T>(
                     "http://www.w3.org/1999/xhtml", ElementName.HTML, elt);
             currentPtr++;
             stack[currentPtr] = node;
             resetTheInsertionMode();
             if ("title" == contextName || "textarea" == contextName) {
-                tokenizer.setContentModelFlag(Tokenizer.RCDATA, contextName);
+                tokenizer.setStateAndEndTagExpectation(Tokenizer.RCDATA, contextName);
             } else if ("style" == contextName || "xmp" == contextName
                     || "iframe" == contextName || "noembed" == contextName
                     || "noframes" == contextName
                     || (scriptingEnabled && "noscript" == contextName)) {
-                tokenizer.setContentModelFlag(Tokenizer.RAWTEXT, contextName);
+                tokenizer.setStateAndEndTagExpectation(Tokenizer.RAWTEXT, contextName);
             } else if ("plaintext" == contextName) {
-                tokenizer.setContentModelFlag(Tokenizer.PLAINTEXT, contextName);
+                tokenizer.setStateAndEndTagExpectation(Tokenizer.PLAINTEXT, contextName);
             } else if ("script" == contextName) {
-                tokenizer.setContentModelFlag(Tokenizer.SCRIPT_DATA,
+                tokenizer.setStateAndEndTagExpectation(Tokenizer.SCRIPT_DATA,
                         contextName);
             } else {
-                tokenizer.setContentModelFlag(Tokenizer.DATA, contextName);
+                tokenizer.setStateAndEndTagExpectation(Tokenizer.DATA, contextName);
             }
             Portability.releaseLocal(contextName);
             contextName = null;
             Portability.releaseElement(contextNode);
             contextNode = null;
             Portability.releaseElement(elt);
         } else {
             mode = INITIAL;
@@ -789,46 +789,41 @@ public abstract class TreeBuilder<T> imp
     public final void comment(@NoLength char[] buf, int start, int length)
             throws SAXException {
         needToDropLF = false;
         // [NOCPP[
         if (!wantingComments) {
             return;
         }
         // ]NOCPP]
-        commentloop: for (;;) {
-            switch (foreignFlag) {
-                case IN_FOREIGN:
-                    break commentloop;
+        if (foreignFlag != IN_FOREIGN) {
+            switch (mode) {
+                case INITIAL:
+                case BEFORE_HTML:
+                case AFTER_AFTER_BODY:
+                case AFTER_AFTER_FRAMESET:
+                    /*
+                     * A comment token Append a Comment node to the Document
+                     * object with the data attribute set to the data given in
+                     * the comment token.
+                     */
+                    appendCommentToDocument(buf, start, length);
+                    return;
+                case AFTER_BODY:
+                    /*
+                     * A comment token Append a Comment node to the first
+                     * element in the stack of open elements (the html element),
+                     * with the data attribute set to the data given in the
+                     * comment token.
+                     */
+                    flushCharacters();
+                    appendComment(stack[0].node, buf, start, length);
+                    return;
                 default:
-                    switch (mode) {
-                        case INITIAL:
-                        case BEFORE_HTML:
-                        case AFTER_AFTER_BODY:
-                        case AFTER_AFTER_FRAMESET:
-                            /*
-                             * A comment token Append a Comment node to the
-                             * Document object with the data attribute set to
-                             * the data given in the comment token.
-                             */
-                            appendCommentToDocument(buf, start, length);
-                            return;
-                        case AFTER_BODY:
-                            /*
-                             * A comment token Append a Comment node to the
-                             * first element in the stack of open elements (the
-                             * html element), with the data attribute set to the
-                             * data given in the comment token.
-                             */
-                            flushCharacters();
-                            appendComment(stack[0].node, buf, start, length);
-                            return;
-                        default:
-                            break commentloop;
-                    }
+                    break;
             }
         }
         /*
          * A comment token Append a Comment node to the current node with the
          * data attribute set to the data given in the comment token.
          */
         flushCharacters();
         appendComment(stack[currentPtr].node, buf, start, length);
@@ -1216,25 +1211,22 @@ public abstract class TreeBuilder<T> imp
                 if (start < end) {
                     accumulateCharacters(buf, start, end - start);
                 }
         }
     }
 
     public final void eof() throws SAXException {
         flushCharacters();
-        switch (foreignFlag) {
-            case IN_FOREIGN:
-                err("End of file in a foreign namespace context.");
-                while (stack[currentPtr].ns != "http://www.w3.org/1999/xhtml") {
-                    popOnEof();
-                }
-                foreignFlag = TreeBuilder.NOT_IN_FOREIGN;
-            default:
-                // fall through
+        if (foreignFlag == IN_FOREIGN) {
+            err("End of file in a foreign namespace context.");
+            while (stack[currentPtr].ns != "http://www.w3.org/1999/xhtml") {
+                popOnEof();
+            }
+            foreignFlag = TreeBuilder.NOT_IN_FOREIGN;
         }
         eofloop: for (;;) {
             switch (mode) {
                 case INITIAL:
                     /*
                      * Parse error.
                      */
                     // [NOCPP[
@@ -1658,27 +1650,27 @@ public abstract class TreeBuilder<T> imp
                                         // here if
                                         // supporting
                                         // document.write()
                                         appendToCurrentNodeAndPushElement(
                                                 "http://www.w3.org/1999/xhtml",
                                                 elementName, attributes);
                                         originalMode = mode;
                                         mode = TEXT;
-                                        tokenizer.setContentModelFlag(
+                                        tokenizer.setStateAndEndTagExpectation(
                                                 Tokenizer.SCRIPT_DATA, elementName);
                                         attributes = null; // CPP
                                         break starttagloop;
                                     case STYLE:
                                         appendToCurrentNodeAndPushElement(
                                                 "http://www.w3.org/1999/xhtml",
                                                 elementName, attributes);
                                         originalMode = mode;
                                         mode = TEXT;
-                                        tokenizer.setContentModelFlag(
+                                        tokenizer.setStateAndEndTagExpectation(
                                                 Tokenizer.RAWTEXT, elementName);
                                         attributes = null; // CPP
                                         break starttagloop;
                                     case INPUT:
                                         if (!Portability.lowerCaseLiteralEqualsIgnoreAsciiCaseString(
                                                 "hidden",
                                                 attributes.getValue(AttributeName.TYPE))) {
                                             break intableloop;
@@ -1905,17 +1897,17 @@ public abstract class TreeBuilder<T> imp
                                                 elementName, attributes);
                                         attributes = null; // CPP
                                         break starttagloop;
                                     case PLAINTEXT:
                                         implicitlyCloseP();
                                         appendToCurrentNodeAndPushElementMayFoster(
                                                 "http://www.w3.org/1999/xhtml",
                                                 elementName, attributes);
-                                        tokenizer.setContentModelFlag(
+                                        tokenizer.setStateAndEndTagExpectation(
                                                 Tokenizer.PLAINTEXT,
                                                 elementName);
                                         attributes = null; // CPP
                                         break starttagloop;
                                     case A:
                                         int activeAPos = findInListOfActiveFormattingElementsContainsBetweenEndAndLastMarker("a");
                                         if (activeAPos != -1) {
                                             err("An \u201Ca\u201D start tag seen with already an active \u201Ca\u201D element.");
@@ -2123,32 +2115,32 @@ public abstract class TreeBuilder<T> imp
                                         // Portability.delete(inputAttributes);
                                         // Don't delete attributes, they are deleted later
                                         break starttagloop;
                                     case TEXTAREA:
                                         appendToCurrentNodeAndPushElementMayFoster(
                                                 "http://www.w3.org/1999/xhtml",
                                                 elementName, attributes,
                                                 formPointer);
-                                        tokenizer.setContentModelFlag(
+                                        tokenizer.setStateAndEndTagExpectation(
                                                 Tokenizer.RCDATA, elementName);
                                         originalMode = mode;
                                         mode = TEXT;
                                         needToDropLF = true;
                                         attributes = null; // CPP
                                         break starttagloop;
                                     case XMP:
                                         implicitlyCloseP();
                                         reconstructTheActiveFormattingElements();
                                         appendToCurrentNodeAndPushElementMayFoster(
                                                 "http://www.w3.org/1999/xhtml",
                                                 elementName, attributes);
                                         originalMode = mode;
                                         mode = TEXT;
-                                        tokenizer.setContentModelFlag(
+                                        tokenizer.setStateAndEndTagExpectation(
                                                 Tokenizer.RAWTEXT, elementName);
                                         attributes = null; // CPP
                                         break starttagloop;
                                     case NOSCRIPT:
                                         if (!scriptingEnabled) {
                                             reconstructTheActiveFormattingElements();
                                             appendToCurrentNodeAndPushElementMayFoster(
                                                     "http://www.w3.org/1999/xhtml",
@@ -2161,17 +2153,17 @@ public abstract class TreeBuilder<T> imp
                                     case NOFRAMES:
                                     case IFRAME:
                                     case NOEMBED:
                                         appendToCurrentNodeAndPushElementMayFoster(
                                                 "http://www.w3.org/1999/xhtml",
                                                 elementName, attributes);
                                         originalMode = mode;
                                         mode = TEXT;
-                                        tokenizer.setContentModelFlag(
+                                        tokenizer.setStateAndEndTagExpectation(
                                                 Tokenizer.RAWTEXT, elementName);
                                         attributes = null; // CPP
                                         break starttagloop;
                                     case SELECT:
                                         reconstructTheActiveFormattingElements();
                                         appendToCurrentNodeAndPushElementMayFoster(
                                                 "http://www.w3.org/1999/xhtml",
                                                 elementName, attributes,
@@ -2348,28 +2340,28 @@ public abstract class TreeBuilder<T> imp
                                         // Fall through to IN_HEAD_NOSCRIPT
                                         break inheadloop;
                                     case TITLE:
                                         appendToCurrentNodeAndPushElementMayFoster(
                                                 "http://www.w3.org/1999/xhtml",
                                                 elementName, attributes);
                                         originalMode = mode;
                                         mode = TEXT;
-                                        tokenizer.setContentModelFlag(
+                                        tokenizer.setStateAndEndTagExpectation(
                                                 Tokenizer.RCDATA, elementName);
                                         attributes = null; // CPP
                                         break starttagloop;
                                     case NOSCRIPT:
                                         if (scriptingEnabled) {
                                             appendToCurrentNodeAndPushElement(
                                                     "http://www.w3.org/1999/xhtml",
                                                     elementName, attributes);
                                             originalMode = mode;
                                             mode = TEXT;
-                                            tokenizer.setContentModelFlag(
+                                            tokenizer.setStateAndEndTagExpectation(
                                                     Tokenizer.RAWTEXT,
                                                     elementName);
                                         } else {
                                             appendToCurrentNodeAndPushElementMayFoster(
                                                     "http://www.w3.org/1999/xhtml",
                                                     elementName, attributes);
                                             mode = IN_HEAD_NOSCRIPT;
                                         }
@@ -2380,28 +2372,28 @@ public abstract class TreeBuilder<T> imp
                                         // here if
                                         // supporting
                                         // document.write()
                                         appendToCurrentNodeAndPushElementMayFoster(
                                                 "http://www.w3.org/1999/xhtml",
                                                 elementName, attributes);
                                         originalMode = mode;
                                         mode = TEXT;
-                                        tokenizer.setContentModelFlag(
+                                        tokenizer.setStateAndEndTagExpectation(
                                                 Tokenizer.SCRIPT_DATA, elementName);
                                         attributes = null; // CPP
                                         break starttagloop;
                                     case STYLE:
                                     case NOFRAMES:
                                         appendToCurrentNodeAndPushElementMayFoster(
                                                 "http://www.w3.org/1999/xhtml",
                                                 elementName, attributes);
                                         originalMode = mode;
                                         mode = TEXT;
-                                        tokenizer.setContentModelFlag(
+                                        tokenizer.setStateAndEndTagExpectation(
                                                 Tokenizer.RAWTEXT, elementName);
                                         attributes = null; // CPP
                                         break starttagloop;
                                     case HEAD:
                                         /* Parse error. */
                                         err("Start tag for \u201Chead\u201D seen when \u201Chead\u201D was already open.");
                                         /* Ignore the token. */
                                         break starttagloop;
@@ -2437,17 +2429,17 @@ public abstract class TreeBuilder<T> imp
                                     break starttagloop;
                                 case STYLE:
                                 case NOFRAMES:
                                     appendToCurrentNodeAndPushElement(
                                             "http://www.w3.org/1999/xhtml",
                                             elementName, attributes);
                                     originalMode = mode;
                                     mode = TEXT;
-                                    tokenizer.setContentModelFlag(
+                                    tokenizer.setStateAndEndTagExpectation(
                                             Tokenizer.RAWTEXT, elementName);
                                     attributes = null; // CPP
                                     break starttagloop;
                                 case HEAD:
                                     err("Start tag for \u201Chead\u201D seen when \u201Chead\u201D was already open.");
                                     break starttagloop;
                                 case NOSCRIPT:
                                     err("Start tag for \u201Cnoscript\u201D seen when \u201Cnoscript\u201D was already open.");
@@ -2569,17 +2561,17 @@ public abstract class TreeBuilder<T> imp
                                     // here if
                                     // supporting
                                     // document.write()
                                     appendToCurrentNodeAndPushElementMayFoster(
                                             "http://www.w3.org/1999/xhtml",
                                             elementName, attributes);
                                     originalMode = mode;
                                     mode = TEXT;
-                                    tokenizer.setContentModelFlag(
+                                    tokenizer.setStateAndEndTagExpectation(
                                             Tokenizer.SCRIPT_DATA, elementName);
                                     attributes = null; // CPP
                                     break starttagloop;
                                 default:
                                     err("Stray \u201C" + name
                                             + "\u201D start tag.");
                                     break starttagloop;
                             }
@@ -2622,17 +2614,17 @@ public abstract class TreeBuilder<T> imp
                                     attributes = null; // CPP
                                     break starttagloop;
                                 case NOFRAMES:
                                     appendToCurrentNodeAndPushElement(
                                             "http://www.w3.org/1999/xhtml",
                                             elementName, attributes);
                                     originalMode = mode;
                                     mode = TEXT;
-                                    tokenizer.setContentModelFlag(
+                                    tokenizer.setStateAndEndTagExpectation(
                                             Tokenizer.RAWTEXT, elementName);
                                     attributes = null; // CPP
                                     break starttagloop;
                                 default:
                                     err("Stray \u201C" + name
                                             + "\u201D start tag.");
                                     break starttagloop;
                             }
@@ -2818,44 +2810,44 @@ public abstract class TreeBuilder<T> imp
                                 case SCRIPT:
                                     err("\u201Cscript\u201D element between \u201Chead\u201D and \u201Cbody\u201D.");
                                     pushHeadPointerOntoStack();
                                     appendToCurrentNodeAndPushElement(
                                             "http://www.w3.org/1999/xhtml",
                                             elementName, attributes);
                                     originalMode = mode;
                                     mode = TEXT;
-                                    tokenizer.setContentModelFlag(
+                                    tokenizer.setStateAndEndTagExpectation(
                                             Tokenizer.SCRIPT_DATA, elementName);
                                     attributes = null; // CPP
                                     break starttagloop;
                                 case STYLE:
                                 case NOFRAMES:
                                     err("\u201C"
                                             + name
                                             + "\u201D element between \u201Chead\u201D and \u201Cbody\u201D.");
                                     pushHeadPointerOntoStack();
                                     appendToCurrentNodeAndPushElement(
                                             "http://www.w3.org/1999/xhtml",
                                             elementName, attributes);
                                     originalMode = mode;
                                     mode = TEXT;
-                                    tokenizer.setContentModelFlag(
+                                    tokenizer.setStateAndEndTagExpectation(
                                             Tokenizer.RAWTEXT, elementName);
                                     attributes = null; // CPP
                                     break starttagloop;
                                 case TITLE:
                                     err("\u201Ctitle\u201D element outside \u201Chead\u201D.");
                                     pushHeadPointerOntoStack();
                                     appendToCurrentNodeAndPushElement(
                                             "http://www.w3.org/1999/xhtml",
                                             elementName, attributes);
                                     originalMode = mode;
                                     mode = TEXT;
-                                    tokenizer.setContentModelFlag(
+                                    tokenizer.setStateAndEndTagExpectation(
                                             Tokenizer.RCDATA, elementName);
                                     attributes = null; // CPP
                                     break starttagloop;
                                 case HEAD:
                                     err("Stray start tag \u201Chead\u201D.");
                                     break starttagloop;
                                 default:
                                     appendToCurrentNodeAndPushBodyElement();
@@ -2879,17 +2871,17 @@ public abstract class TreeBuilder<T> imp
                         case AFTER_AFTER_FRAMESET:
                             switch (group) {
                                 case NOFRAMES:
                                     appendToCurrentNodeAndPushElementMayFoster(
                                             "http://www.w3.org/1999/xhtml",
                                             elementName, attributes);
                                     originalMode = mode;
                                     mode = TEXT;
-                                    tokenizer.setContentModelFlag(
+                                    tokenizer.setStateAndEndTagExpectation(
                                             Tokenizer.SCRIPT_DATA, elementName);
                                     attributes = null; // CPP
                                     break starttagloop;
                                 default:
                                     err("Stray \u201C" + name
                                             + "\u201D start tag.");
                                     break starttagloop;
                             }
--- a/parser/html/nsHtml5Tokenizer.cpp
+++ b/parser/html/nsHtml5Tokenizer.cpp
@@ -103,78 +103,78 @@ nsHtml5Tokenizer::initLocation(nsString*
 nsHtml5Tokenizer::~nsHtml5Tokenizer()
 {
   MOZ_COUNT_DTOR(nsHtml5Tokenizer);
   bmpChar.release();
   astralChar.release();
 }
 
 void 
-nsHtml5Tokenizer::setContentModelFlag(PRInt32 contentModelFlag, nsIAtom* contentModelElement)
+nsHtml5Tokenizer::setStateAndEndTagExpectation(PRInt32 specialTokenizerState, nsIAtom* endTagExpectation)
 {
-  this->stateSave = contentModelFlag;
-  if (contentModelFlag == NS_HTML5TOKENIZER_DATA) {
+  this->stateSave = specialTokenizerState;
+  if (specialTokenizerState == NS_HTML5TOKENIZER_DATA) {
     return;
   }
-  jArray<PRUnichar,PRInt32> asArray = nsHtml5Portability::newCharArrayFromLocal(contentModelElement);
-  this->contentModelElement = nsHtml5ElementName::elementNameByBuffer(asArray, 0, asArray.length, interner);
+  jArray<PRUnichar,PRInt32> asArray = nsHtml5Portability::newCharArrayFromLocal(endTagExpectation);
+  this->endTagExpectation = nsHtml5ElementName::elementNameByBuffer(asArray, 0, asArray.length, interner);
   asArray.release();
-  contentModelElementToArray();
+  endTagExpectationToArray();
 }
 
 void 
-nsHtml5Tokenizer::setContentModelFlag(PRInt32 contentModelFlag, nsHtml5ElementName* contentModelElement)
+nsHtml5Tokenizer::setStateAndEndTagExpectation(PRInt32 specialTokenizerState, nsHtml5ElementName* endTagExpectation)
 {
-  this->stateSave = contentModelFlag;
-  this->contentModelElement = contentModelElement;
-  contentModelElementToArray();
+  this->stateSave = specialTokenizerState;
+  this->endTagExpectation = endTagExpectation;
+  endTagExpectationToArray();
 }
 
 void 
-nsHtml5Tokenizer::contentModelElementToArray()
+nsHtml5Tokenizer::endTagExpectationToArray()
 {
-  switch(contentModelElement->group) {
+  switch(endTagExpectation->group) {
     case NS_HTML5TREE_BUILDER_TITLE: {
-      contentModelElementNameAsArray = TITLE_ARR;
+      endTagExpectationAsArray = TITLE_ARR;
       return;
     }
     case NS_HTML5TREE_BUILDER_SCRIPT: {
-      contentModelElementNameAsArray = SCRIPT_ARR;
+      endTagExpectationAsArray = SCRIPT_ARR;
       return;
     }
     case NS_HTML5TREE_BUILDER_STYLE: {
-      contentModelElementNameAsArray = STYLE_ARR;
+      endTagExpectationAsArray = STYLE_ARR;
       return;
     }
     case NS_HTML5TREE_BUILDER_PLAINTEXT: {
-      contentModelElementNameAsArray = PLAINTEXT_ARR;
+      endTagExpectationAsArray = PLAINTEXT_ARR;
       return;
     }
     case NS_HTML5TREE_BUILDER_XMP: {
-      contentModelElementNameAsArray = XMP_ARR;
+      endTagExpectationAsArray = XMP_ARR;
       return;
     }
     case NS_HTML5TREE_BUILDER_TEXTAREA: {
-      contentModelElementNameAsArray = TEXTAREA_ARR;
+      endTagExpectationAsArray = TEXTAREA_ARR;
       return;
     }
     case NS_HTML5TREE_BUILDER_IFRAME: {
-      contentModelElementNameAsArray = IFRAME_ARR;
+      endTagExpectationAsArray = IFRAME_ARR;
       return;
     }
     case NS_HTML5TREE_BUILDER_NOEMBED: {
-      contentModelElementNameAsArray = NOEMBED_ARR;
+      endTagExpectationAsArray = NOEMBED_ARR;
       return;
     }
     case NS_HTML5TREE_BUILDER_NOSCRIPT: {
-      contentModelElementNameAsArray = NOSCRIPT_ARR;
+      endTagExpectationAsArray = NOSCRIPT_ARR;
       return;
     }
     case NS_HTML5TREE_BUILDER_NOFRAMES: {
-      contentModelElementNameAsArray = NOFRAMES_ARR;
+      endTagExpectationAsArray = NOFRAMES_ARR;
       return;
     }
     default: {
 
       return;
     }
   }
 }
@@ -187,36 +187,16 @@ nsHtml5Tokenizer::setLineNumber(PRInt32 
 
 nsHtml5HtmlAttributes* 
 nsHtml5Tokenizer::emptyAttributes()
 {
   return nsHtml5HtmlAttributes::EMPTY_ATTRIBUTES;
 }
 
 void 
-nsHtml5Tokenizer::clearStrBufAndAppendCurrentC(PRUnichar c)
-{
-  strBuf[0] = c;
-  strBufLen = 1;
-}
-
-void 
-nsHtml5Tokenizer::clearStrBufAndAppendForceWrite(PRUnichar c)
-{
-  strBuf[0] = c;
-  strBufLen = 1;
-}
-
-void 
-nsHtml5Tokenizer::clearStrBufForNextState()
-{
-  strBufLen = 0;
-}
-
-void 
 nsHtml5Tokenizer::appendStrBuf(PRUnichar c)
 {
   if (strBufLen == strBuf.length) {
     jArray<PRUnichar,PRInt32> newBuf = jArray<PRUnichar,PRInt32>(strBuf.length + NS_HTML5TOKENIZER_BUFFER_GROW_BY);
     nsHtml5ArrayCopy::arraycopy(strBuf, newBuf, strBuf.length);
     strBuf.release();
     strBuf = newBuf;
   }
@@ -239,92 +219,41 @@ void
 nsHtml5Tokenizer::emitStrBuf()
 {
   if (strBufLen > 0) {
     tokenHandler->characters(strBuf, 0, strBufLen);
   }
 }
 
 void 
-nsHtml5Tokenizer::clearLongStrBufForNextState()
-{
-  longStrBufLen = 0;
-}
-
-void 
-nsHtml5Tokenizer::clearLongStrBuf()
-{
-  longStrBufLen = 0;
-}
-
-void 
-nsHtml5Tokenizer::clearLongStrBufAndAppendCurrentC(PRUnichar c)
-{
-  longStrBuf[0] = c;
-  longStrBufLen = 1;
-}
-
-void 
-nsHtml5Tokenizer::clearLongStrBufAndAppendToComment(PRUnichar c)
-{
-  longStrBuf[0] = c;
-  longStrBufLen = 1;
-}
-
-void 
 nsHtml5Tokenizer::appendLongStrBuf(PRUnichar c)
 {
   if (longStrBufLen == longStrBuf.length) {
     jArray<PRUnichar,PRInt32> newBuf = jArray<PRUnichar,PRInt32>(longStrBufLen + (longStrBufLen >> 1));
     nsHtml5ArrayCopy::arraycopy(longStrBuf, newBuf, longStrBuf.length);
     longStrBuf.release();
     longStrBuf = newBuf;
   }
   longStrBuf[longStrBufLen++] = c;
 }
 
 void 
-nsHtml5Tokenizer::appendSecondHyphenToBogusComment()
-{
-  appendLongStrBuf('-');
-}
-
-void 
-nsHtml5Tokenizer::adjustDoubleHyphenAndAppendToLongStrBufAndErr(PRUnichar c)
-{
-
-  appendLongStrBuf(c);
-}
-
-void 
 nsHtml5Tokenizer::appendLongStrBuf(jArray<PRUnichar,PRInt32> buffer, PRInt32 offset, PRInt32 length)
 {
   PRInt32 reqLen = longStrBufLen + length;
   if (longStrBuf.length < reqLen) {
     jArray<PRUnichar,PRInt32> newBuf = jArray<PRUnichar,PRInt32>(reqLen + (reqLen >> 1));
     nsHtml5ArrayCopy::arraycopy(longStrBuf, newBuf, longStrBuf.length);
     longStrBuf.release();
     longStrBuf = newBuf;
   }
   nsHtml5ArrayCopy::arraycopy(buffer, offset, longStrBuf, longStrBufLen, length);
   longStrBufLen = reqLen;
 }
 
-void 
-nsHtml5Tokenizer::appendLongStrBuf(jArray<PRUnichar,PRInt32> arr)
-{
-  appendLongStrBuf(arr, 0, arr.length);
-}
-
-void 
-nsHtml5Tokenizer::appendStrBufToLongStrBuf()
-{
-  appendLongStrBuf(strBuf, 0, strBufLen);
-}
-
 nsString* 
 nsHtml5Tokenizer::longStrBufToString()
 {
   return nsHtml5Portability::newStringFromBuffer(longStrBuf, 0, longStrBufLen);
 }
 
 void 
 nsHtml5Tokenizer::emitComment(PRInt32 provisionalHyphens, PRInt32 pos)
@@ -334,17 +263,17 @@ nsHtml5Tokenizer::emitComment(PRInt32 pr
 }
 
 void 
 nsHtml5Tokenizer::flushChars(PRUnichar* buf, PRInt32 pos)
 {
   if (pos > cstart) {
     tokenHandler->characters(buf, cstart, pos - cstart);
   }
-  cstart = 0x7fffffff;
+  cstart = PR_INT32_MAX;
 }
 
 void 
 nsHtml5Tokenizer::resetAttributes()
 {
   attributes = nsnull;
 }
 
@@ -404,21 +333,16 @@ nsHtml5Tokenizer::addAttributeWithValue(
   if (!!attributeName) {
     nsString* val = longStrBufToString();
     attributes->addAttribute(attributeName, val);
     attributeName = nsnull;
   }
 }
 
 void 
-nsHtml5Tokenizer::startErrorReporting()
-{
-}
-
-void 
 nsHtml5Tokenizer::start()
 {
   initializeWithoutStarting();
   tokenHandler->startTokenization(this);
 }
 
 PRBool 
 nsHtml5Tokenizer::tokenizeBuffer(nsHtml5UTF16Buffer* buffer)
@@ -478,18 +402,18 @@ nsHtml5Tokenizer::stateLoop(PRInt32 stat
             if (++pos == endPos) {
               goto stateloop_end;
             }
             c = checkChar(buf, pos);
           }
           switch(c) {
             case '&': {
               flushChars(buf, pos);
-              clearStrBufAndAppendCurrentC(c);
-              rememberAmpersandLocation('\0');
+              clearStrBufAndAppend(c);
+              setAdditionalAndRememberAmpersandLocation('\0');
               returnState = state;
               state = NS_HTML5TOKENIZER_CONSUME_CHARACTER_REFERENCE;
               goto stateloop;
             }
             case '<': {
               flushChars(buf, pos);
               state = NS_HTML5TOKENIZER_TAG_OPEN;
               goto dataloop_end;
@@ -515,37 +439,37 @@ nsHtml5Tokenizer::stateLoop(PRInt32 stat
       case NS_HTML5TOKENIZER_TAG_OPEN: {
         for (; ; ) {
           if (++pos == endPos) {
             goto stateloop_end;
           }
           c = checkChar(buf, pos);
           if (c >= 'A' && c <= 'Z') {
             endTag = PR_FALSE;
-            clearStrBufAndAppendForceWrite((PRUnichar) (c + 0x20));
+            clearStrBufAndAppend((PRUnichar) (c + 0x20));
             state = NS_HTML5TOKENIZER_TAG_NAME;
             goto tagopenloop_end;
           } else if (c >= 'a' && c <= 'z') {
             endTag = PR_FALSE;
-            clearStrBufAndAppendCurrentC(c);
+            clearStrBufAndAppend(c);
             state = NS_HTML5TOKENIZER_TAG_NAME;
             goto tagopenloop_end;
           }
           switch(c) {
             case '!': {
               state = NS_HTML5TOKENIZER_MARKUP_DECLARATION_OPEN;
               goto stateloop;
             }
             case '/': {
               state = NS_HTML5TOKENIZER_CLOSE_TAG_OPEN;
               goto stateloop;
             }
             case '\?': {
 
-              clearLongStrBufAndAppendToComment(c);
+              clearLongStrBufAndAppend(c);
               state = NS_HTML5TOKENIZER_BOGUS_COMMENT;
               goto stateloop;
             }
             case '>': {
 
               tokenHandler->characters(nsHtml5Tokenizer::LT_GT, 0, 2);
               cstart = pos + 1;
               state = NS_HTML5TOKENIZER_DATA;
@@ -653,17 +577,17 @@ nsHtml5Tokenizer::stateLoop(PRInt32 stat
             case '\"':
             case '\'':
             case '<':
             case '=':
             default: {
               if (c >= 'A' && c <= 'Z') {
                 c += 0x20;
               }
-              clearStrBufAndAppendCurrentC(c);
+              clearStrBufAndAppend(c);
               state = NS_HTML5TOKENIZER_ATTRIBUTE_NAME;
               goto beforeattributenameloop_end;
             }
           }
         }
         beforeattributenameloop_end: ;
       }
       case NS_HTML5TOKENIZER_ATTRIBUTE_NAME: {
@@ -741,29 +665,29 @@ nsHtml5Tokenizer::stateLoop(PRInt32 stat
               silentLineFeed();
             }
             case ' ':
             case '\t':
             case '\f': {
               continue;
             }
             case '\"': {
-              clearLongStrBufForNextState();
+              clearLongStrBuf();
               state = NS_HTML5TOKENIZER_ATTRIBUTE_VALUE_DOUBLE_QUOTED;
               goto beforeattributevalueloop_end;
             }
             case '&': {
               clearLongStrBuf();
               state = NS_HTML5TOKENIZER_ATTRIBUTE_VALUE_UNQUOTED;
 
               reconsume = PR_TRUE;
               goto stateloop;
             }
             case '\'': {
-              clearLongStrBufForNextState();
+              clearLongStrBuf();
               state = NS_HTML5TOKENIZER_ATTRIBUTE_VALUE_SINGLE_QUOTED;
               goto stateloop;
             }
             case '>': {
 
               addAttributeWithoutValue();
               state = emitCurrentTagToken(PR_FALSE, pos);
               if (shouldSuspend) {
@@ -773,17 +697,17 @@ nsHtml5Tokenizer::stateLoop(PRInt32 stat
             }
             case '\0': {
               c = 0xfffd;
             }
             case '<':
             case '=':
             case '`':
             default: {
-              clearLongStrBufAndAppendCurrentC(c);
+              clearLongStrBufAndAppend(c);
               state = NS_HTML5TOKENIZER_ATTRIBUTE_VALUE_UNQUOTED;
 
               goto stateloop;
             }
           }
         }
         beforeattributevalueloop_end: ;
       }
@@ -799,18 +723,18 @@ nsHtml5Tokenizer::stateLoop(PRInt32 stat
           }
           switch(c) {
             case '\"': {
               addAttributeWithValue();
               state = NS_HTML5TOKENIZER_AFTER_ATTRIBUTE_VALUE_QUOTED;
               goto attributevaluedoublequotedloop_end;
             }
             case '&': {
-              clearStrBufAndAppendCurrentC(c);
-              rememberAmpersandLocation('\"');
+              clearStrBufAndAppend(c);
+              setAdditionalAndRememberAmpersandLocation('\"');
               returnState = state;
               state = NS_HTML5TOKENIZER_CONSUME_CHARACTER_REFERENCE;
               goto stateloop;
             }
             case '\r': {
               appendLongStrBufCarriageReturn();
               goto stateloop_end;
             }
@@ -915,18 +839,18 @@ nsHtml5Tokenizer::stateLoop(PRInt32 stat
             case ' ':
             case '\t':
             case '\f': {
               addAttributeWithValue();
               state = NS_HTML5TOKENIZER_BEFORE_ATTRIBUTE_NAME;
               goto stateloop;
             }
             case '&': {
-              clearStrBufAndAppendCurrentC(c);
-              rememberAmpersandLocation('>');
+              clearStrBufAndAppend(c);
+              setAdditionalAndRememberAmpersandLocation('>');
               returnState = state;
               state = NS_HTML5TOKENIZER_CONSUME_CHARACTER_REFERENCE;
               goto stateloop;
             }
             case '>': {
               addAttributeWithValue();
               state = emitCurrentTagToken(PR_FALSE, pos);
               if (shouldSuspend) {
@@ -992,17 +916,17 @@ nsHtml5Tokenizer::stateLoop(PRInt32 stat
             case '\"':
             case '\'':
             case '<':
             default: {
               addAttributeWithoutValue();
               if (c >= 'A' && c <= 'Z') {
                 c += 0x20;
               }
-              clearStrBufAndAppendCurrentC(c);
+              clearStrBufAndAppend(c);
               state = NS_HTML5TOKENIZER_ATTRIBUTE_NAME;
               goto stateloop;
             }
           }
         }
       }
       case NS_HTML5TOKENIZER_BOGUS_COMMENT: {
         for (; ; ) {
@@ -1085,30 +1009,30 @@ nsHtml5Tokenizer::stateLoop(PRInt32 stat
       case NS_HTML5TOKENIZER_MARKUP_DECLARATION_OPEN: {
         for (; ; ) {
           if (++pos == endPos) {
             goto stateloop_end;
           }
           c = checkChar(buf, pos);
           switch(c) {
             case '-': {
-              clearLongStrBufAndAppendToComment(c);
+              clearLongStrBufAndAppend(c);
               state = NS_HTML5TOKENIZER_MARKUP_DECLARATION_HYPHEN;
               goto markupdeclarationopenloop_end;
             }
             case 'd':
             case 'D': {
-              clearLongStrBufAndAppendToComment(c);
+              clearLongStrBufAndAppend(c);
               index = 0;
               state = NS_HTML5TOKENIZER_MARKUP_DECLARATION_OCTYPE;
               goto stateloop;
             }
             case '[': {
               if (tokenHandler->inForeign()) {
-                clearLongStrBufAndAppendToComment(c);
+                clearLongStrBufAndAppend(c);
                 index = 0;
                 state = NS_HTML5TOKENIZER_CDATA_START;
                 goto stateloop;
               } else {
               }
             }
             default: {
 
@@ -1127,17 +1051,17 @@ nsHtml5Tokenizer::stateLoop(PRInt32 stat
             goto stateloop_end;
           }
           c = checkChar(buf, pos);
           switch(c) {
             case '\0': {
               goto stateloop_end;
             }
             case '-': {
-              clearLongStrBufForNextState();
+              clearLongStrBuf();
               state = NS_HTML5TOKENIZER_COMMENT_START;
               goto markupdeclarationhyphenloop_end;
             }
             default: {
 
               state = NS_HTML5TOKENIZER_BOGUS_COMMENT;
               reconsume = PR_TRUE;
               goto stateloop;
@@ -1514,17 +1438,17 @@ nsHtml5Tokenizer::stateLoop(PRInt32 stat
             }
             case '\0': {
               c = 0xfffd;
             }
             default: {
               if (c >= 'A' && c <= 'Z') {
                 c += 0x20;
               }
-              clearStrBufAndAppendCurrentC(c);
+              clearStrBufAndAppend(c);
               state = NS_HTML5TOKENIZER_DOCTYPE_NAME;
               goto beforedoctypenameloop_end;
             }
           }
         }
         beforedoctypenameloop_end: ;
       }
       case NS_HTML5TOKENIZER_DOCTYPE_NAME: {
@@ -1664,23 +1588,23 @@ nsHtml5Tokenizer::stateLoop(PRInt32 stat
             case ' ':
             case '\t':
             case '\f': {
               state = NS_HTML5TOKENIZER_BEFORE_DOCTYPE_PUBLIC_IDENTIFIER;
               goto afterdoctypepublickeywordloop_end;
             }
             case '\"': {
 
-              clearLongStrBufForNextState();
+              clearLongStrBuf();
               state = NS_HTML5TOKENIZER_DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED;
               goto stateloop;
             }
             case '\'': {
 
-              clearLongStrBufForNextState();
+              clearLongStrBuf();
               state = NS_HTML5TOKENIZER_DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED;
               goto stateloop;
             }
             case '>': {
 
               forceQuirks = PR_TRUE;
               emitDoctypeToken(pos);
               state = NS_HTML5TOKENIZER_DATA;
@@ -1710,22 +1634,22 @@ nsHtml5Tokenizer::stateLoop(PRInt32 stat
               silentLineFeed();
             }
             case ' ':
             case '\t':
             case '\f': {
               continue;
             }
             case '\"': {
-              clearLongStrBufForNextState();
+              clearLongStrBuf();
               state = NS_HTML5TOKENIZER_DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED;
               goto beforedoctypepublicidentifierloop_end;
             }
             case '\'': {
-              clearLongStrBufForNextState();
+              clearLongStrBuf();
               state = NS_HTML5TOKENIZER_DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED;
               goto stateloop;
             }
             case '>': {
 
               forceQuirks = PR_TRUE;
               emitDoctypeToken(pos);
               state = NS_HTML5TOKENIZER_DATA;
@@ -1802,23 +1726,23 @@ nsHtml5Tokenizer::stateLoop(PRInt32 stat
             }
             case '>': {
               emitDoctypeToken(pos);
               state = NS_HTML5TOKENIZER_DATA;
               goto stateloop;
             }
             case '\"': {
 
-              clearLongStrBufForNextState();
+              clearLongStrBuf();
               state = NS_HTML5TOKENIZER_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED;
               goto stateloop;
             }
             case '\'': {
 
-              clearLongStrBufForNextState();
+              clearLongStrBuf();
               state = NS_HTML5TOKENIZER_DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED;
               goto stateloop;
             }
             default: {
               bogusDoctype();
               state = NS_HTML5TOKENIZER_BOGUS_DOCTYPE;
               goto stateloop;
             }
@@ -1846,22 +1770,22 @@ nsHtml5Tokenizer::stateLoop(PRInt32 stat
               continue;
             }
             case '>': {
               emitDoctypeToken(pos);
               state = NS_HTML5TOKENIZER_DATA;
               goto stateloop;
             }
             case '\"': {
-              clearLongStrBufForNextState();
+              clearLongStrBuf();
               state = NS_HTML5TOKENIZER_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED;
               goto betweendoctypepublicandsystemidentifiersloop_end;
             }
             case '\'': {
-              clearLongStrBufForNextState();
+              clearLongStrBuf();
               state = NS_HTML5TOKENIZER_DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED;
               goto stateloop;
             }
             default: {
               bogusDoctype();
               state = NS_HTML5TOKENIZER_BOGUS_DOCTYPE;
               goto stateloop;
             }
@@ -2019,23 +1943,23 @@ nsHtml5Tokenizer::stateLoop(PRInt32 stat
             case ' ':
             case '\t':
             case '\f': {
               state = NS_HTML5TOKENIZER_BEFORE_DOCTYPE_SYSTEM_IDENTIFIER;
               goto afterdoctypesystemkeywordloop_end;
             }
             case '\"': {
 
-              clearLongStrBufForNextState();
+              clearLongStrBuf();
               state = NS_HTML5TOKENIZER_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED;
               goto stateloop;
             }
             case '\'': {
 
-              clearLongStrBufForNextState();
+              clearLongStrBuf();
               state = NS_HTML5TOKENIZER_DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED;
               goto stateloop;
             }
             case '>': {
 
               forceQuirks = PR_TRUE;
               emitDoctypeToken(pos);
               state = NS_HTML5TOKENIZER_DATA;
@@ -2065,22 +1989,22 @@ nsHtml5Tokenizer::stateLoop(PRInt32 stat
               silentLineFeed();
             }
             case ' ':
             case '\t':
             case '\f': {
               continue;
             }
             case '\"': {
-              clearLongStrBufForNextState();
+              clearLongStrBuf();
               state = NS_HTML5TOKENIZER_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED;
               goto stateloop;
             }
             case '\'': {
-              clearLongStrBufForNextState();
+              clearLongStrBuf();
               state = NS_HTML5TOKENIZER_DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED;
               goto beforedoctypesystemidentifierloop_end;
             }
             case '>': {
 
               forceQuirks = PR_TRUE;
               emitDoctypeToken(pos);
               state = NS_HTML5TOKENIZER_DATA;
@@ -2284,18 +2208,18 @@ nsHtml5Tokenizer::stateLoop(PRInt32 stat
           }
           switch(c) {
             case '\'': {
               addAttributeWithValue();
               state = NS_HTML5TOKENIZER_AFTER_ATTRIBUTE_VALUE_QUOTED;
               goto stateloop;
             }
             case '&': {
-              clearStrBufAndAppendCurrentC(c);
-              rememberAmpersandLocation('\'');
+              clearStrBufAndAppend(c);
+              setAdditionalAndRememberAmpersandLocation('\'');
               returnState = state;
               state = NS_HTML5TOKENIZER_CONSUME_CHARACTER_REFERENCE;
               goto attributevaluesinglequotedloop_end;
             }
             case '\r': {
               appendLongStrBufCarriageReturn();
               goto stateloop_end;
             }
@@ -2326,17 +2250,17 @@ nsHtml5Tokenizer::stateLoop(PRInt32 stat
           case ' ':
           case '\t':
           case '\n':
           case '\r':
           case '\f':
           case '<':
           case '&': {
             emitOrAppendStrBuf(returnState);
-            if (!(returnState & (~1))) {
+            if (!(returnState & NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK)) {
               cstart = pos;
             }
             state = returnState;
             reconsume = PR_TRUE;
             goto stateloop;
           }
           case '#': {
             appendStrBuf('#');
@@ -2352,17 +2276,17 @@ nsHtml5Tokenizer::stateLoop(PRInt32 stat
             }
             if (c >= 'a' && c <= 'z') {
               firstCharKey = c - 'a' + 26;
             } else if (c >= 'A' && c <= 'Z') {
               firstCharKey = c - 'A';
             } else {
 
               emitOrAppendStrBuf(returnState);
-              if (!(returnState & (~1))) {
+              if (!(returnState & NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK)) {
                 cstart = pos;
               }
               state = returnState;
               reconsume = PR_TRUE;
               goto stateloop;
             }
             appendStrBuf(c);
             state = NS_HTML5TOKENIZER_CHARACTER_REFERENCE_HILO_LOOKUP;
@@ -2383,17 +2307,17 @@ nsHtml5Tokenizer::stateLoop(PRInt32 stat
             const PRInt32* row = nsHtml5NamedCharacters::HILO_ACCEL[c];
             if (!!row) {
               hilo = row[firstCharKey];
             }
           }
           if (!hilo) {
 
             emitOrAppendStrBuf(returnState);
-            if (!(returnState & (~1))) {
+            if (!(returnState & NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK)) {
               cstart = pos;
             }
             state = returnState;
             reconsume = PR_TRUE;
             goto stateloop;
           }
           appendStrBuf(c);
           lo = hilo & 0xFFFF;
@@ -2452,62 +2376,62 @@ nsHtml5Tokenizer::stateLoop(PRInt32 stat
           }
           appendStrBuf(c);
           continue;
         }
         outer_end: ;
         if (candidate == -1) {
 
           emitOrAppendStrBuf(returnState);
-          if (!(returnState & (~1))) {
+          if (!(returnState & NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK)) {
             cstart = pos;
           }
           state = returnState;
           reconsume = PR_TRUE;
           goto stateloop;
         } else {
           jArray<PRInt8,PRInt32> candidateArr = nsHtml5NamedCharacters::NAMES[candidate];
           if (!candidateArr.length || candidateArr[candidateArr.length - 1] != ';') {
-            if ((returnState & (~1))) {
+            if ((returnState & NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK)) {
               PRUnichar ch;
               if (strBufMark == strBufLen) {
                 ch = c;
               } else {
                 ch = strBuf[strBufMark];
               }
               if ((ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z')) {
 
                 appendStrBufToLongStrBuf();
                 state = returnState;
                 reconsume = PR_TRUE;
                 goto stateloop;
               }
             }
-            if ((returnState & (~1))) {
+            if ((returnState & NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK)) {
 
             } else {
 
             }
           }
           const PRUnichar* val = nsHtml5NamedCharacters::VALUES[candidate];
           if ((val[0] & 0xFC00) == 0xD800) {
             emitOrAppendTwo(val, returnState);
           } else {
             emitOrAppendOne(val, returnState);
           }
           if (strBufMark < strBufLen) {
-            if ((returnState & (~1))) {
+            if ((returnState & NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK)) {
               for (PRInt32 i = strBufMark; i < strBufLen; i++) {
                 appendLongStrBuf(strBuf[i]);
               }
             } else {
               tokenHandler->characters(strBuf, strBufMark, strBufLen - strBufMark);
             }
           }
-          if (!(returnState & (~1))) {
+          if (!(returnState & NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK)) {
             cstart = pos;
           }
           state = returnState;
           reconsume = PR_TRUE;
           goto stateloop;
         }
       }
       case NS_HTML5TOKENIZER_CONSUME_NCR: {
@@ -2547,44 +2471,44 @@ nsHtml5Tokenizer::stateLoop(PRInt32 stat
           prevValue = value;
           if (c >= '0' && c <= '9') {
             seenDigits = PR_TRUE;
             value *= 10;
             value += c - '0';
             continue;
           } else if (c == ';') {
             if (seenDigits) {
-              if (!(returnState & (~1))) {
+              if (!(returnState & NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK)) {
                 cstart = pos + 1;
               }
               state = NS_HTML5TOKENIZER_HANDLE_NCR_VALUE;
               goto decimalloop_end;
             } else {
 
               appendStrBuf(';');
               emitOrAppendStrBuf(returnState);
-              if (!(returnState & (~1))) {
+              if (!(returnState & NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK)) {
                 cstart = pos + 1;
               }
               state = returnState;
               goto stateloop;
             }
           } else {
             if (!seenDigits) {
 
               emitOrAppendStrBuf(returnState);
-              if (!(returnState & (~1))) {
+              if (!(returnState & NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK)) {
                 cstart = pos;
               }
               state = returnState;
               reconsume = PR_TRUE;
               goto stateloop;
             } else {
 
-              if (!(returnState & (~1))) {
+              if (!(returnState & NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK)) {
                 cstart = pos;
               }
               state = NS_HTML5TOKENIZER_HANDLE_NCR_VALUE;
               reconsume = PR_TRUE;
               goto decimalloop_end;
             }
           }
         }
@@ -2617,44 +2541,44 @@ nsHtml5Tokenizer::stateLoop(PRInt32 stat
             continue;
           } else if (c >= 'a' && c <= 'f') {
             seenDigits = PR_TRUE;
             value *= 16;
             value += c - 'a' + 10;
             continue;
           } else if (c == ';') {
             if (seenDigits) {
-              if (!(returnState & (~1))) {
+              if (!(returnState & NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK)) {
                 cstart = pos + 1;
               }
               state = NS_HTML5TOKENIZER_HANDLE_NCR_VALUE;
               goto stateloop;
             } else {
 
               appendStrBuf(';');
               emitOrAppendStrBuf(returnState);
-              if (!(returnState & (~1))) {
+              if (!(returnState & NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK)) {
                 cstart = pos + 1;
               }
               state = returnState;
               goto stateloop;
             }
           } else {
             if (!seenDigits) {
 
               emitOrAppendStrBuf(returnState);
-              if (!(returnState & (~1))) {
+              if (!(returnState & NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK)) {
                 cstart = pos;
               }
               state = returnState;
               reconsume = PR_TRUE;
               goto stateloop;
             } else {
 
-              if (!(returnState & (~1))) {
+              if (!(returnState & NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK)) {
                 cstart = pos;
               }
               state = NS_HTML5TOKENIZER_HANDLE_NCR_VALUE;
               reconsume = PR_TRUE;
               goto stateloop;
             }
           }
         }
@@ -2727,17 +2651,17 @@ nsHtml5Tokenizer::stateLoop(PRInt32 stat
         for (; ; ) {
           if (++pos == endPos) {
             goto stateloop_end;
           }
           c = checkChar(buf, pos);
           switch(c) {
             case '/': {
               index = 0;
-              clearStrBufForNextState();
+              clearStrBuf();
               state = NS_HTML5TOKENIZER_NON_DATA_END_TAG_NAME;
               goto stateloop;
             }
             case '!': {
               tokenHandler->characters(nsHtml5Tokenizer::LT_GT, 0, 1);
               cstart = pos;
               state = NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPE_START;
               goto scriptdatalessthansignloop_end;
@@ -2912,17 +2836,17 @@ nsHtml5Tokenizer::stateLoop(PRInt32 stat
         for (; ; ) {
           if (++pos == endPos) {
             goto stateloop_end;
           }
           c = checkChar(buf, pos);
           switch(c) {
             case '/': {
               index = 0;
-              clearStrBufForNextState();
+              clearStrBuf();
               returnState = NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED;
               state = NS_HTML5TOKENIZER_NON_DATA_END_TAG_NAME;
               goto stateloop;
             }
             case 'S':
             case 's': {
               tokenHandler->characters(nsHtml5Tokenizer::LT_GT, 0, 1);
               cstart = pos;
@@ -3175,42 +3099,42 @@ nsHtml5Tokenizer::stateLoop(PRInt32 stat
 
             cstart = pos + 1;
             state = NS_HTML5TOKENIZER_DATA;
             goto stateloop;
           }
           case '\r': {
             silentCarriageReturn();
 
-            clearLongStrBufAndAppendToComment('\n');
+            clearLongStrBufAndAppend('\n');
             state = NS_HTML5TOKENIZER_BOGUS_COMMENT;
             goto stateloop_end;
           }
           case '\n': {
             silentLineFeed();
 
-            clearLongStrBufAndAppendToComment('\n');
+            clearLongStrBufAndAppend('\n');
             state = NS_HTML5TOKENIZER_BOGUS_COMMENT;
             goto stateloop;
           }
           case '\0': {
             c = 0xfffd;
           }
           default: {
             if (c >= 'A' && c <= 'Z') {
               c += 0x20;
             }
             if (c >= 'a' && c <= 'z') {
               endTag = PR_TRUE;
-              clearStrBufAndAppendCurrentC(c);
+              clearStrBufAndAppend(c);
               state = NS_HTML5TOKENIZER_TAG_NAME;
               goto stateloop;
             } else {
 
-              clearLongStrBufAndAppendToComment(c);
+              clearLongStrBufAndAppend(c);
               state = NS_HTML5TOKENIZER_BOGUS_COMMENT;
               goto stateloop;
             }
           }
         }
       }
       case NS_HTML5TOKENIZER_RCDATA: {
         for (; ; ) {
@@ -3220,17 +3144,17 @@ nsHtml5Tokenizer::stateLoop(PRInt32 stat
             if (++pos == endPos) {
               goto stateloop_end;
             }
             c = checkChar(buf, pos);
           }
           switch(c) {
             case '&': {
               flushChars(buf, pos);
-              clearStrBufAndAppendCurrentC(c);
+              clearStrBufAndAppend(c);
               additional = '\0';
               returnState = state;
               state = NS_HTML5TOKENIZER_CONSUME_CHARACTER_REFERENCE;
               goto stateloop;
             }
             case '<': {
               flushChars(buf, pos);
               returnState = state;
@@ -3294,17 +3218,17 @@ nsHtml5Tokenizer::stateLoop(PRInt32 stat
         for (; ; ) {
           if (++pos == endPos) {
             goto stateloop_end;
           }
           c = checkChar(buf, pos);
           switch(c) {
             case '/': {
               index = 0;
-              clearStrBufForNextState();
+              clearStrBuf();
               state = NS_HTML5TOKENIZER_NON_DATA_END_TAG_NAME;
               goto rawtextrcdatalessthansignloop_end;
             }
             default: {
               tokenHandler->characters(nsHtml5Tokenizer::LT_GT, 0, 1);
               cstart = pos;
               state = returnState;
               reconsume = PR_TRUE;
@@ -3315,18 +3239,18 @@ nsHtml5Tokenizer::stateLoop(PRInt32 stat
         rawtextrcdatalessthansignloop_end: ;
       }
       case NS_HTML5TOKENIZER_NON_DATA_END_TAG_NAME: {
         for (; ; ) {
           if (++pos == endPos) {
             goto stateloop_end;
           }
           c = checkChar(buf, pos);
-          if (index < contentModelElementNameAsArray.length) {
-            PRUnichar e = contentModelElementNameAsArray[index];
+          if (index < endTagExpectationAsArray.length) {
+            PRUnichar e = endTagExpectationAsArray[index];
             PRUnichar folded = c;
             if (c >= 'A' && c <= 'Z') {
               folded += 0x20;
             }
             if (folded != e) {
               tokenHandler->characters(nsHtml5Tokenizer::LT_SOLIDUS, 0, 2);
               emitStrBuf();
               cstart = pos;
@@ -3334,17 +3258,17 @@ nsHtml5Tokenizer::stateLoop(PRInt32 stat
               reconsume = PR_TRUE;
               goto stateloop;
             }
             appendStrBuf(c);
             index++;
             continue;
           } else {
             endTag = PR_TRUE;
-            tagName = contentModelElement;
+            tagName = endTagExpectation;
             switch(c) {
               case '\r': {
                 silentCarriageReturn();
                 state = NS_HTML5TOKENIZER_BEFORE_ATTRIBUTE_NAME;
                 goto stateloop_end;
               }
               case '\n': {
                 silentLineFeed();
@@ -3420,17 +3344,17 @@ nsHtml5Tokenizer::emitReplacementCharact
 {
   silentCarriageReturn();
   flushChars(buf, pos);
   tokenHandler->characters(nsHtml5Tokenizer::REPLACEMENT_CHARACTER, 0, 1);
   cstart = PR_INT32_MAX;
 }
 
 void 
-nsHtml5Tokenizer::rememberAmpersandLocation(PRUnichar add)
+nsHtml5Tokenizer::setAdditionalAndRememberAmpersandLocation(PRUnichar add)
 {
   additional = add;
 }
 
 void 
 nsHtml5Tokenizer::bogusDoctype()
 {
 
@@ -3442,17 +3366,17 @@ nsHtml5Tokenizer::bogusDoctypeWithoutQui
 {
 
   forceQuirks = PR_FALSE;
 }
 
 void 
 nsHtml5Tokenizer::emitOrAppendStrBuf(PRInt32 returnState)
 {
-  if ((returnState & (~1))) {
+  if ((returnState & NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK)) {
     appendStrBufToLongStrBuf();
   } else {
     emitStrBuf();
   }
 }
 
 void 
 nsHtml5Tokenizer::handleNcrValue(PRInt32 returnState)
@@ -3719,44 +3643,44 @@ nsHtml5Tokenizer::eof()
         if (candidate == -1) {
 
           emitOrAppendStrBuf(returnState);
           state = returnState;
           goto eofloop;
         } else {
           jArray<PRInt8,PRInt32> candidateArr = nsHtml5NamedCharacters::NAMES[candidate];
           if (!candidateArr.length || candidateArr[candidateArr.length - 1] != ';') {
-            if ((returnState & (~1))) {
+            if ((returnState & NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK)) {
               PRUnichar ch;
               if (strBufMark == strBufLen) {
                 ch = '\0';
               } else {
                 ch = strBuf[strBufMark];
               }
               if ((ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z')) {
 
                 appendStrBufToLongStrBuf();
                 state = returnState;
                 goto eofloop;
               }
             }
-            if ((returnState & (~1))) {
+            if ((returnState & NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK)) {
 
             } else {
 
             }
           }
           const PRUnichar* val = nsHtml5NamedCharacters::VALUES[candidate];
           if ((val[0] & 0xFC00) == 0xD800) {
             emitOrAppendTwo(val, returnState);
           } else {
             emitOrAppendOne(val, returnState);
           }
           if (strBufMark < strBufLen) {
-            if ((returnState & (~1))) {
+            if ((returnState & NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK)) {
               for (PRInt32 i = strBufMark; i < strBufLen; i++) {
                 appendLongStrBuf(strBuf[i]);
               }
             } else {
               tokenHandler->characters(strBuf, strBufMark, strBufLen - strBufMark);
             }
           }
           state = returnState;
@@ -3808,28 +3732,28 @@ nsHtml5Tokenizer::internalEncodingDeclar
   if (!!encodingDeclarationHandler) {
     encodingDeclarationHandler->internalEncodingDeclaration(internalCharset);
   }
 }
 
 void 
 nsHtml5Tokenizer::emitOrAppendTwo(const PRUnichar* val, PRInt32 returnState)
 {
-  if ((returnState & (~1))) {
+  if ((returnState & NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK)) {
     appendLongStrBuf(val[0]);
     appendLongStrBuf(val[1]);
   } else {
     tokenHandler->characters(val, 0, 2);
   }
 }
 
 void 
 nsHtml5Tokenizer::emitOrAppendOne(const PRUnichar* val, PRInt32 returnState)
 {
-  if ((returnState & (~1))) {
+  if ((returnState & NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK)) {
     appendLongStrBuf(val[0]);
   } else {
     tokenHandler->characters(val, 0, 1);
   }
 }
 
 void 
 nsHtml5Tokenizer::end()
@@ -3865,46 +3789,16 @@ nsHtml5Tokenizer::end()
 }
 
 void 
 nsHtml5Tokenizer::requestSuspension()
 {
   shouldSuspend = PR_TRUE;
 }
 
-void 
-nsHtml5Tokenizer::becomeConfident()
-{
-  confident = PR_TRUE;
-}
-
-PRBool 
-nsHtml5Tokenizer::isNextCharOnNewLine()
-{
-  return PR_FALSE;
-}
-
-PRBool 
-nsHtml5Tokenizer::isPrevCR()
-{
-  return lastCR;
-}
-
-PRInt32 
-nsHtml5Tokenizer::getLine()
-{
-  return -1;
-}
-
-PRInt32 
-nsHtml5Tokenizer::getCol()
-{
-  return -1;
-}
-
 PRBool 
 nsHtml5Tokenizer::isInDataState()
 {
   return (stateSave == NS_HTML5TOKENIZER_DATA);
 }
 
 void 
 nsHtml5Tokenizer::resetToDataState()
@@ -3954,18 +3848,18 @@ nsHtml5Tokenizer::loadState(nsHtml5Token
   longStrBufLen = other->longStrBufLen;
   if (longStrBufLen > longStrBuf.length) {
     longStrBuf.release();
     longStrBuf = jArray<PRUnichar,PRInt32>(longStrBufLen);
   }
   nsHtml5ArrayCopy::arraycopy(other->longStrBuf, longStrBuf, longStrBufLen);
   stateSave = other->stateSave;
   returnStateSave = other->returnStateSave;
-  contentModelElement = other->contentModelElement;
-  contentModelElementNameAsArray = other->contentModelElementNameAsArray;
+  endTagExpectation = other->endTagExpectation;
+  endTagExpectationAsArray = other->endTagExpectationAsArray;
   lastCR = other->lastCR;
   index = other->index;
   forceQuirks = other->forceQuirks;
   additional = other->additional;
   entCol = other->entCol;
   firstCharKey = other->firstCharKey;
   lo = other->lo;
   hi = other->hi;
--- a/parser/html/nsHtml5Tokenizer.h
+++ b/parser/html/nsHtml5Tokenizer.h
@@ -113,19 +113,19 @@ class nsHtml5Tokenizer
     nsString* systemId;
     jArray<PRUnichar,PRInt32> strBuf;
     PRInt32 strBufLen;
     jArray<PRUnichar,PRInt32> longStrBuf;
     PRInt32 longStrBufLen;
     jArray<PRUnichar,PRInt32> bmpChar;
     jArray<PRUnichar,PRInt32> astralChar;
   protected:
-    nsHtml5ElementName* contentModelElement;
+    nsHtml5ElementName* endTagExpectation;
   private:
-    jArray<PRUnichar,PRInt32> contentModelElementNameAsArray;
+    jArray<PRUnichar,PRInt32> endTagExpectationAsArray;
   protected:
     PRBool endTag;
   private:
     nsHtml5ElementName* tagName;
   protected:
     nsHtml5AttributeName* attributeName;
   private:
     nsIAtom* doctypeName;
@@ -139,61 +139,86 @@ class nsHtml5Tokenizer
   private:
     PRInt32 line;
     nsHtml5AtomTable* interner;
   public:
     nsHtml5Tokenizer(nsHtml5TreeBuilder* tokenHandler);
     void setInterner(nsHtml5AtomTable* interner);
     void initLocation(nsString* newPublicId, nsString* newSystemId);
     ~nsHtml5Tokenizer();
-    void setContentModelFlag(PRInt32 contentModelFlag, nsIAtom* contentModelElement);
-    void setContentModelFlag(PRInt32 contentModelFlag, nsHtml5ElementName* contentModelElement);
+    void setStateAndEndTagExpectation(PRInt32 specialTokenizerState, nsIAtom* endTagExpectation);
+    void setStateAndEndTagExpectation(PRInt32 specialTokenizerState, nsHtml5ElementName* endTagExpectation);
   private:
-    void contentModelElementToArray();
+    void endTagExpectationToArray();
   public:
     void setLineNumber(PRInt32 line);
     inline PRInt32 getLineNumber()
     {
       return line;
     }
 
     nsHtml5HtmlAttributes* emptyAttributes();
   private:
-    void clearStrBufAndAppendCurrentC(PRUnichar c);
-    void clearStrBufAndAppendForceWrite(PRUnichar c);
-    void clearStrBufForNextState();
+    inline void clearStrBufAndAppend(PRUnichar c)
+    {
+      strBuf[0] = c;
+      strBufLen = 1;
+    }
+
+    inline void clearStrBuf()
+    {
+      strBufLen = 0;
+    }
+
     void appendStrBuf(PRUnichar c);
   protected:
     nsString* strBufToString();
   private:
     void strBufToDoctypeName();
     void emitStrBuf();
-    void clearLongStrBufForNextState();
-    void clearLongStrBuf();
-    void clearLongStrBufAndAppendCurrentC(PRUnichar c);
-    void clearLongStrBufAndAppendToComment(PRUnichar c);
+    inline void clearLongStrBuf()
+    {
+      longStrBufLen = 0;
+    }
+
+    inline void clearLongStrBufAndAppend(PRUnichar c)
+    {
+      longStrBuf[0] = c;
+      longStrBufLen = 1;
+    }
+
     void appendLongStrBuf(PRUnichar c);
-    void appendSecondHyphenToBogusComment();
-    void adjustDoubleHyphenAndAppendToLongStrBufAndErr(PRUnichar c);
+    inline void appendSecondHyphenToBogusComment()
+    {
+      appendLongStrBuf('-');
+    }
+
+    inline void adjustDoubleHyphenAndAppendToLongStrBufAndErr(PRUnichar c)
+    {
+
+      appendLongStrBuf(c);
+    }
+
     void appendLongStrBuf(jArray<PRUnichar,PRInt32> buffer, PRInt32 offset, PRInt32 length);
-    void appendLongStrBuf(jArray<PRUnichar,PRInt32> arr);
-    void appendStrBufToLongStrBuf();
+    inline void appendStrBufToLongStrBuf()
+    {
+      appendLongStrBuf(strBuf, 0, strBufLen);
+    }
+
     nsString* longStrBufToString();
     void emitComment(PRInt32 provisionalHyphens, PRInt32 pos);
   protected:
     void flushChars(PRUnichar* buf, PRInt32 pos);
   private:
     void resetAttributes();
     void strBufToElementNameString();
     PRInt32 emitCurrentTagToken(PRBool selfClosing, PRInt32 pos);
     void attributeNameComplete();
     void addAttributeWithoutValue();
     void addAttributeWithValue();
-  protected:
-    void startErrorReporting();
   public:
     void start();
     PRBool tokenizeBuffer(nsHtml5UTF16Buffer* buffer);
   private:
     PRInt32 stateLoop(PRInt32 state, PRUnichar c, PRInt32 pos, PRUnichar* buf, PRBool reconsume, PRInt32 returnState, PRInt32 endPos);
     void initDoctypeFields();
     inline void adjustDoubleHyphenAndAppendToLongStrBufCarriageReturn()
     {
@@ -229,17 +254,17 @@ class nsHtml5Tokenizer
     inline void silentLineFeed()
     {
       ++line;
     }
 
   private:
     void emitCarriageReturn(PRUnichar* buf, PRInt32 pos);
     void emitReplacementCharacter(PRUnichar* buf, PRInt32 pos);
-    void rememberAmpersandLocation(PRUnichar add);
+    void setAdditionalAndRememberAmpersandLocation(PRUnichar add);
     void bogusDoctype();
     void bogusDoctypeWithoutQuirks();
     void emitOrAppendStrBuf(PRInt32 returnState);
     void handleNcrValue(PRInt32 returnState);
   public:
     void eof();
   private:
     void emitDoctypeToken(PRInt32 pos);
@@ -252,21 +277,16 @@ class nsHtml5Tokenizer
   public:
     void internalEncodingDeclaration(nsString* internalCharset);
   private:
     void emitOrAppendTwo(const PRUnichar* val, PRInt32 returnState);
     void emitOrAppendOne(const PRUnichar* val, PRInt32 returnState);
   public:
     void end();
     void requestSuspension();
-    void becomeConfident();
-    PRBool isNextCharOnNewLine();
-    PRBool isPrevCR();
-    PRInt32 getLine();
-    PRInt32 getCol();
     PRBool isInDataState();
     void resetToDataState();
     void loadState(nsHtml5Tokenizer* other);
     void initializeWithoutStarting();
     void setEncodingDeclarationHandler(nsAHtml5EncodingDeclarationHandler* encodingDeclarationHandler);
     static void initializeStatics();
     static void releaseStatics();
 };
@@ -288,16 +308,17 @@ jArray<PRUnichar,PRInt32> nsHtml5Tokeniz
 jArray<PRUnichar,PRInt32> nsHtml5Tokenizer::XMP_ARR = 0;
 jArray<PRUnichar,PRInt32> nsHtml5Tokenizer::TEXTAREA_ARR = 0;
 jArray<PRUnichar,PRInt32> nsHtml5Tokenizer::IFRAME_ARR = 0;
 jArray<PRUnichar,PRInt32> nsHtml5Tokenizer::NOEMBED_ARR = 0;
 jArray<PRUnichar,PRInt32> nsHtml5Tokenizer::NOSCRIPT_ARR = 0;
 jArray<PRUnichar,PRInt32> nsHtml5Tokenizer::NOFRAMES_ARR = 0;
 #endif
 
+#define NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK ~1
 #define NS_HTML5TOKENIZER_DATA 0
 #define NS_HTML5TOKENIZER_RCDATA 1
 #define NS_HTML5TOKENIZER_SCRIPT_DATA 2
 #define NS_HTML5TOKENIZER_PLAINTEXT 3
 #define NS_HTML5TOKENIZER_TAG_OPEN 4
 #define NS_HTML5TOKENIZER_CLOSE_TAG_OPEN 5
 #define NS_HTML5TOKENIZER_TAG_NAME 6
 #define NS_HTML5TOKENIZER_BEFORE_ATTRIBUTE_NAME 7
--- a/parser/html/nsHtml5TreeBuilder.cpp
+++ b/parser/html/nsHtml5TreeBuilder.cpp
@@ -92,25 +92,25 @@ nsHtml5TreeBuilder::startTokenization(ns
     } else {
       elt = createHtmlElementSetAsRoot(tokenizer->emptyAttributes());
     }
     nsHtml5StackNode* node = new nsHtml5StackNode(kNameSpaceID_XHTML, nsHtml5ElementName::ELT_HTML, elt);
     currentPtr++;
     stack[currentPtr] = node;
     resetTheInsertionMode();
     if (nsHtml5Atoms::title == contextName || nsHtml5Atoms::textarea == contextName) {
-      tokenizer->setContentModelFlag(NS_HTML5TOKENIZER_RCDATA, contextName);
+      tokenizer->setStateAndEndTagExpectation(NS_HTML5TOKENIZER_RCDATA, contextName);
     } else if (nsHtml5Atoms::style == contextName || nsHtml5Atoms::xmp == contextName || nsHtml5Atoms::iframe == contextName || nsHtml5Atoms::noembed == contextName || nsHtml5Atoms::noframes == contextName || (scriptingEnabled && nsHtml5Atoms::noscript == contextName)) {
-      tokenizer->setContentModelFlag(NS_HTML5TOKENIZER_RAWTEXT, contextName);
+      tokenizer->setStateAndEndTagExpectation(NS_HTML5TOKENIZER_RAWTEXT, contextName);
     } else if (nsHtml5Atoms::plaintext == contextName) {
-      tokenizer->setContentModelFlag(NS_HTML5TOKENIZER_PLAINTEXT, contextName);
+      tokenizer->setStateAndEndTagExpectation(NS_HTML5TOKENIZER_PLAINTEXT, contextName);
     } else if (nsHtml5Atoms::script == contextName) {
-      tokenizer->setContentModelFlag(NS_HTML5TOKENIZER_SCRIPT_DATA, contextName);
+      tokenizer->setStateAndEndTagExpectation(NS_HTML5TOKENIZER_SCRIPT_DATA, contextName);
     } else {
-      tokenizer->setContentModelFlag(NS_HTML5TOKENIZER_DATA, contextName);
+      tokenizer->setStateAndEndTagExpectation(NS_HTML5TOKENIZER_DATA, contextName);
     }
     nsHtml5Portability::releaseLocal(contextName);
     contextName = nsnull;
     ;
     contextNode = nsnull;
     ;
   } else {
     mode = NS_HTML5TREE_BUILDER_INITIAL;
@@ -156,43 +156,35 @@ nsHtml5TreeBuilder::doctype(nsIAtom* nam
 
   return;
 }
 
 void 
 nsHtml5TreeBuilder::comment(PRUnichar* buf, PRInt32 start, PRInt32 length)
 {
   needToDropLF = PR_FALSE;
-  for (; ; ) {
-    switch(foreignFlag) {
-      case NS_HTML5TREE_BUILDER_IN_FOREIGN: {
-        goto commentloop_end;
+  if (foreignFlag != NS_HTML5TREE_BUILDER_IN_FOREIGN) {
+    switch(mode) {
+      case NS_HTML5TREE_BUILDER_INITIAL:
+      case NS_HTML5TREE_BUILDER_BEFORE_HTML:
+      case NS_HTML5TREE_BUILDER_AFTER_AFTER_BODY:
+      case NS_HTML5TREE_BUILDER_AFTER_AFTER_FRAMESET: {
+        appendCommentToDocument(buf, start, length);
+        return;
+      }
+      case NS_HTML5TREE_BUILDER_AFTER_BODY: {
+        flushCharacters();
+        appendComment(stack[0]->node, buf, start, length);
+        return;
       }
       default: {
-        switch(mode) {
-          case NS_HTML5TREE_BUILDER_INITIAL:
-          case NS_HTML5TREE_BUILDER_BEFORE_HTML:
-          case NS_HTML5TREE_BUILDER_AFTER_AFTER_BODY:
-          case NS_HTML5TREE_BUILDER_AFTER_AFTER_FRAMESET: {
-            appendCommentToDocument(buf, start, length);
-            return;
-          }
-          case NS_HTML5TREE_BUILDER_AFTER_BODY: {
-            flushCharacters();
-            appendComment(stack[0]->node, buf, start, length);
-            return;
-          }
-          default: {
-            goto commentloop_end;
-          }
-        }
+        break;
       }
     }
   }
-  commentloop_end: ;
   flushCharacters();
   appendComment(stack[currentPtr]->node, buf, start, length);
   return;
 }
 
 void 
 nsHtml5TreeBuilder::characters(const PRUnichar* buf, PRInt32 start, PRInt32 length)
 {
@@ -428,26 +420,22 @@ nsHtml5TreeBuilder::characters(const PRU
     }
   }
 }
 
 void 
 nsHtml5TreeBuilder::eof()
 {
   flushCharacters();
-  switch(foreignFlag) {
-    case NS_HTML5TREE_BUILDER_IN_FOREIGN: {
-
-      while (stack[currentPtr]->ns != kNameSpaceID_XHTML) {
-        popOnEof();
-      }
-      foreignFlag = NS_HTML5TREE_BUILDER_NOT_IN_FOREIGN;
+  if (foreignFlag == NS_HTML5TREE_BUILDER_IN_FOREIGN) {
+
+    while (stack[currentPtr]->ns != kNameSpaceID_XHTML) {
+      popOnEof();
     }
-    default:
-      ; // fall through
+    foreignFlag = NS_HTML5TREE_BUILDER_NOT_IN_FOREIGN;
   }
   for (; ; ) {
     switch(mode) {
       case NS_HTML5TREE_BUILDER_INITIAL: {
         documentModeInternal(QUIRKS_MODE, nsnull, nsnull, PR_FALSE);
         mode = NS_HTML5TREE_BUILDER_BEFORE_HTML;
         continue;
       }
@@ -769,25 +757,25 @@ nsHtml5TreeBuilder::startTag(nsHtml5Elem
                   }
                   resetTheInsertionMode();
                   goto starttagloop;
                 }
                 case NS_HTML5TREE_BUILDER_SCRIPT: {
                   appendToCurrentNodeAndPushElement(kNameSpaceID_XHTML, elementName, attributes);
                   originalMode = mode;
                   mode = NS_HTML5TREE_BUILDER_TEXT;
-                  tokenizer->setContentModelFlag(NS_HTML5TOKENIZER_SCRIPT_DATA, elementName);
+                  tokenizer->setStateAndEndTagExpectation(NS_HTML5TOKENIZER_SCRIPT_DATA, elementName);
                   attributes = nsnull;
                   goto starttagloop_end;
                 }
                 case NS_HTML5TREE_BUILDER_STYLE: {
                   appendToCurrentNodeAndPushElement(kNameSpaceID_XHTML, elementName, attributes);
                   originalMode = mode;
                   mode = NS_HTML5TREE_BUILDER_TEXT;
-                  tokenizer->setContentModelFlag(NS_HTML5TOKENIZER_RAWTEXT, elementName);
+                  tokenizer->setStateAndEndTagExpectation(NS_HTML5TOKENIZER_RAWTEXT, elementName);
                   attributes = nsnull;
                   goto starttagloop_end;
                 }
                 case NS_HTML5TREE_BUILDER_INPUT: {
                   if (!nsHtml5Portability::lowerCaseLiteralEqualsIgnoreAsciiCaseString("hidden", attributes->getValue(nsHtml5AttributeName::ATTR_TYPE))) {
                     goto intableloop_end;
                   }
                   appendVoidElementToCurrent(kNameSpaceID_XHTML, name, attributes, formPointer);
@@ -1002,17 +990,17 @@ nsHtml5TreeBuilder::startTag(nsHtml5Elem
                   implicitlyCloseP();
                   appendToCurrentNodeAndPushElementMayFoster(kNameSpaceID_XHTML, elementName, attributes);
                   attributes = nsnull;
                   goto starttagloop_end;
                 }
                 case NS_HTML5TREE_BUILDER_PLAINTEXT: {
                   implicitlyCloseP();
                   appendToCurrentNodeAndPushElementMayFoster(kNameSpaceID_XHTML, elementName, attributes);
-                  tokenizer->setContentModelFlag(NS_HTML5TOKENIZER_PLAINTEXT, elementName);
+                  tokenizer->setStateAndEndTagExpectation(NS_HTML5TOKENIZER_PLAINTEXT, elementName);
                   attributes = nsnull;
                   goto starttagloop_end;
                 }
                 case NS_HTML5TREE_BUILDER_A: {
                   PRInt32 activeAPos = findInListOfActiveFormattingElementsContainsBetweenEndAndLastMarker(nsHtml5Atoms::a);
                   if (activeAPos != -1) {
 
                     nsHtml5StackNode* activeA = listOfActiveFormattingElements[activeAPos];
@@ -1157,30 +1145,30 @@ nsHtml5TreeBuilder::startTag(nsHtml5Elem
                   pop();
                   appendVoidElementToCurrentMayFoster(kNameSpaceID_XHTML, nsHtml5ElementName::ELT_HR, nsHtml5HtmlAttributes::EMPTY_ATTRIBUTES);
                   pop();
                   selfClosing = PR_FALSE;
                   goto starttagloop_end;
                 }
                 case NS_HTML5TREE_BUILDER_TEXTAREA: {
                   appendToCurrentNodeAndPushElementMayFoster(kNameSpaceID_XHTML, elementName, attributes, formPointer);
-                  tokenizer->setContentModelFlag(NS_HTML5TOKENIZER_RCDATA, elementName);
+                  tokenizer->setStateAndEndTagExpectation(NS_HTML5TOKENIZER_RCDATA, elementName);
                   originalMode = mode;
                   mode = NS_HTML5TREE_BUILDER_TEXT;
                   needToDropLF = PR_TRUE;
                   attributes = nsnull;
                   goto starttagloop_end;
                 }
                 case NS_HTML5TREE_BUILDER_XMP: {
                   implicitlyCloseP();
                   reconstructTheActiveFormattingElements();
                   appendToCurrentNodeAndPushElementMayFoster(kNameSpaceID_XHTML, elementName, attributes);
                   originalMode = mode;
                   mode = NS_HTML5TREE_BUILDER_TEXT;
-                  tokenizer->setContentModelFlag(NS_HTML5TOKENIZER_RAWTEXT, elementName);
+                  tokenizer->setStateAndEndTagExpectation(NS_HTML5TOKENIZER_RAWTEXT, elementName);
                   attributes = nsnull;
                   goto starttagloop_end;
                 }
                 case NS_HTML5TREE_BUILDER_NOSCRIPT: {
                   if (!scriptingEnabled) {
                     reconstructTheActiveFormattingElements();
                     appendToCurrentNodeAndPushElementMayFoster(kNameSpaceID_XHTML, elementName, attributes);
                     attributes = nsnull;
@@ -1189,17 +1177,17 @@ nsHtml5TreeBuilder::startTag(nsHtml5Elem
                   }
                 }
                 case NS_HTML5TREE_BUILDER_NOFRAMES:
                 case NS_HTML5TREE_BUILDER_IFRAME:
                 case NS_HTML5TREE_BUILDER_NOEMBED: {
                   appendToCurrentNodeAndPushElementMayFoster(kNameSpaceID_XHTML, elementName, attributes);
                   originalMode = mode;
                   mode = NS_HTML5TREE_BUILDER_TEXT;
-                  tokenizer->setContentModelFlag(NS_HTML5TOKENIZER_RAWTEXT, elementName);
+                  tokenizer->setStateAndEndTagExpectation(NS_HTML5TOKENIZER_RAWTEXT, elementName);
                   attributes = nsnull;
                   goto starttagloop_end;
                 }
                 case NS_HTML5TREE_BUILDER_SELECT: {
                   reconstructTheActiveFormattingElements();
                   appendToCurrentNodeAndPushElementMayFoster(kNameSpaceID_XHTML, elementName, attributes, formPointer);
                   switch(mode) {
                     case NS_HTML5TREE_BUILDER_IN_TABLE:
@@ -1337,47 +1325,47 @@ nsHtml5TreeBuilder::startTag(nsHtml5Elem
                 case NS_HTML5TREE_BUILDER_META:
                 case NS_HTML5TREE_BUILDER_LINK: {
                   goto inheadloop_end;
                 }
                 case NS_HTML5TREE_BUILDER_TITLE: {
                   appendToCurrentNodeAndPushElementMayFoster(kNameSpaceID_XHTML, elementName, attributes);
                   originalMode = mode;
                   mode = NS_HTML5TREE_BUILDER_TEXT;
-                  tokenizer->setContentModelFlag(NS_HTML5TOKENIZER_RCDATA, elementName);
+                  tokenizer->setStateAndEndTagExpectation(NS_HTML5TOKENIZER_RCDATA, elementName);
                   attributes = nsnull;
                   goto starttagloop_end;
                 }
                 case NS_HTML5TREE_BUILDER_NOSCRIPT: {
                   if (scriptingEnabled) {
                     appendToCurrentNodeAndPushElement(kNameSpaceID_XHTML, elementName, attributes);
                     originalMode = mode;
                     mode = NS_HTML5TREE_BUILDER_TEXT;
-                    tokenizer->setContentModelFlag(NS_HTML5TOKENIZER_RAWTEXT, elementName);
+                    tokenizer->setStateAndEndTagExpectation(NS_HTML5TOKENIZER_RAWTEXT, elementName);
                   } else {
                     appendToCurrentNodeAndPushElementMayFoster(kNameSpaceID_XHTML, elementName, attributes);
                     mode = NS_HTML5TREE_BUILDER_IN_HEAD_NOSCRIPT;
                   }
                   attributes = nsnull;
                   goto starttagloop_end;
                 }
                 case NS_HTML5TREE_BUILDER_SCRIPT: {
                   appendToCurrentNodeAndPushElementMayFoster(kNameSpaceID_XHTML, elementName, attributes);
                   originalMode = mode;
                   mode = NS_HTML5TREE_BUILDER_TEXT;
-                  tokenizer->setContentModelFlag(NS_HTML5TOKENIZER_SCRIPT_DATA, elementName);
+                  tokenizer->setStateAndEndTagExpectation(NS_HTML5TOKENIZER_SCRIPT_DATA, elementName);
                   attributes = nsnull;
                   goto starttagloop_end;
                 }
                 case NS_HTML5TREE_BUILDER_STYLE:
                 case NS_HTML5TREE_BUILDER_NOFRAMES: {
                   appendToCurrentNodeAndPushElementMayFoster(kNameSpaceID_XHTML, elementName, attributes);
                   originalMode = mode;
                   mode = NS_HTML5TREE_BUILDER_TEXT;
-                  tokenizer->setContentModelFlag(NS_HTML5TOKENIZER_RAWTEXT, elementName);
+                  tokenizer->setStateAndEndTagExpectation(NS_HTML5TOKENIZER_RAWTEXT, elementName);
                   attributes = nsnull;
                   goto starttagloop_end;
                 }
                 case NS_HTML5TREE_BUILDER_HEAD: {
 
                   goto starttagloop_end;
                 }
                 default: {
@@ -1410,17 +1398,17 @@ nsHtml5TreeBuilder::startTag(nsHtml5Elem
                 attributes = nsnull;
                 goto starttagloop_end;
               }
               case NS_HTML5TREE_BUILDER_STYLE:
               case NS_HTML5TREE_BUILDER_NOFRAMES: {
                 appendToCurrentNodeAndPushElement(kNameSpaceID_XHTML, elementName, attributes);
                 originalMode = mode;
                 mode = NS_HTML5TREE_BUILDER_TEXT;
-                tokenizer->setContentModelFlag(NS_HTML5TOKENIZER_RAWTEXT, elementName);
+                tokenizer->setStateAndEndTagExpectation(NS_HTML5TOKENIZER_RAWTEXT, elementName);
                 attributes = nsnull;
                 goto starttagloop_end;
               }
               case NS_HTML5TREE_BUILDER_HEAD: {
 
                 goto starttagloop_end;
               }
               case NS_HTML5TREE_BUILDER_NOSCRIPT: {
@@ -1540,17 +1528,17 @@ nsHtml5TreeBuilder::startTag(nsHtml5Elem
                 }
                 resetTheInsertionMode();
                 continue;
               }
               case NS_HTML5TREE_BUILDER_SCRIPT: {
                 appendToCurrentNodeAndPushElementMayFoster(kNameSpaceID_XHTML, elementName, attributes);
                 originalMode = mode;
                 mode = NS_HTML5TREE_BUILDER_TEXT;
-                tokenizer->setContentModelFlag(NS_HTML5TOKENIZER_SCRIPT_DATA, elementName);
+                tokenizer->setStateAndEndTagExpectation(NS_HTML5TOKENIZER_SCRIPT_DATA, elementName);
                 attributes = nsnull;
                 goto starttagloop_end;
               }
               default: {
 
                 goto starttagloop_end;
               }
             }
@@ -1594,17 +1582,17 @@ nsHtml5TreeBuilder::startTag(nsHtml5Elem
                 addAttributesToHtml(attributes);
                 attributes = nsnull;
                 goto starttagloop_end;
               }
               case NS_HTML5TREE_BUILDER_NOFRAMES: {
                 appendToCurrentNodeAndPushElement(kNameSpaceID_XHTML, elementName, attributes);
                 originalMode = mode;
                 mode = NS_HTML5TREE_BUILDER_TEXT;
-                tokenizer->setContentModelFlag(NS_HTML5TOKENIZER_RAWTEXT, elementName);
+                tokenizer->setStateAndEndTagExpectation(NS_HTML5TOKENIZER_RAWTEXT, elementName);
                 attributes = nsnull;
                 goto starttagloop_end;
               }
               default: {
 
                 goto starttagloop_end;
               }
             }
@@ -1708,38 +1696,38 @@ nsHtml5TreeBuilder::startTag(nsHtml5Elem
                 goto starttagloop_end;
               }
               case NS_HTML5TREE_BUILDER_SCRIPT: {
 
                 pushHeadPointerOntoStack();
                 appendToCurrentNodeAndPushElement(kNameSpaceID_XHTML, elementName, attributes);
                 originalMode = mode;
                 mode = NS_HTML5TREE_BUILDER_TEXT;
-                tokenizer->setContentModelFlag(NS_HTML5TOKENIZER_SCRIPT_DATA, elementName);
+                tokenizer->setStateAndEndTagExpectation(NS_HTML5TOKENIZER_SCRIPT_DATA, elementName);
                 attributes = nsnull;
                 goto starttagloop_end;
               }
               case NS_HTML5TREE_BUILDER_STYLE:
               case NS_HTML5TREE_BUILDER_NOFRAMES: {
 
                 pushHeadPointerOntoStack();
                 appendToCurrentNodeAndPushElement(kNameSpaceID_XHTML, elementName, attributes);
                 originalMode = mode;
                 mode = NS_HTML5TREE_BUILDER_TEXT;
-                tokenizer->setContentModelFlag(NS_HTML5TOKENIZER_RAWTEXT, elementName);
+                tokenizer->setStateAndEndTagExpectation(NS_HTML5TOKENIZER_RAWTEXT, elementName);
                 attributes = nsnull;
                 goto starttagloop_end;
               }
               case NS_HTML5TREE_BUILDER_TITLE: {
 
                 pushHeadPointerOntoStack();
                 appendToCurrentNodeAndPushElement(kNameSpaceID_XHTML, elementName, attributes);
                 originalMode = mode;
                 mode = NS_HTML5TREE_BUILDER_TEXT;
-                tokenizer->setContentModelFlag(NS_HTML5TOKENIZER_RCDATA, elementName);
+                tokenizer->setStateAndEndTagExpectation(NS_HTML5TOKENIZER_RCDATA, elementName);
                 attributes = nsnull;
                 goto starttagloop_end;
               }
               case NS_HTML5TREE_BUILDER_HEAD: {
 
                 goto starttagloop_end;
               }
               default: {
@@ -1766,17 +1754,17 @@ nsHtml5TreeBuilder::startTag(nsHtml5Elem
             }
           }
           case NS_HTML5TREE_BUILDER_AFTER_AFTER_FRAMESET: {
             switch(group) {
               case NS_HTML5TREE_BUILDER_NOFRAMES: {
                 appendToCurrentNodeAndPushElementMayFoster(kNameSpaceID_XHTML, elementName, attributes);
                 originalMode = mode;
                 mode = NS_HTML5TREE_BUILDER_TEXT;
-                tokenizer->setContentModelFlag(NS_HTML5TOKENIZER_SCRIPT_DATA, elementName);
+                tokenizer->setStateAndEndTagExpectation(NS_HTML5TOKENIZER_SCRIPT_DATA, elementName);
                 attributes = nsnull;
                 goto starttagloop_end;
               }
               default: {
 
                 goto starttagloop_end;
               }
             }