Bug 559303 - Consolidate strBuf and longStrBuf in the tokenizer. r=wchen.
authorHenri Sivonen <hsivonen@hsivonen.fi>
Tue, 25 Aug 2015 18:05:45 +0300
changeset 259276 7b3c5dd7ad30fde4bf6dad4a850ebf90a6f2511e
parent 259275 8e716c7b5571ee8b97ed1e2e6bea8c3a99c7bf95
child 259277 bb56d50195c4835545282c518ce7571807ee5883
push id29275
push userryanvm@gmail.com
push dateTue, 25 Aug 2015 20:52:52 +0000
treeherdermozilla-central@c46370eea81a [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerswchen
bugs559303
milestone43.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 559303 - Consolidate strBuf and longStrBuf in the tokenizer. r=wchen.
parser/html/javasrc/Tokenizer.java
parser/html/nsHtml5Tokenizer.cpp
parser/html/nsHtml5Tokenizer.h
--- a/parser/html/javasrc/Tokenizer.java
+++ b/parser/html/javasrc/Tokenizer.java
@@ -350,17 +350,17 @@ public class Tokenizer implements Locato
     private int firstCharKey;
 
     private int lo;
 
     private int hi;
 
     private int candidate;
 
-    private int strBufMark;
+    private int charRefBufMark;
 
     private int prevValue;
 
     protected int value;
 
     private boolean seenDigits;
 
     protected int cstart;
@@ -373,45 +373,36 @@ public class Tokenizer implements Locato
 
     /**
      * The SAX system id for the resource being tokenized. (Only passed to back
      * as part of locator data.)
      */
     private String systemId;
 
     /**
-     * Buffer for short identifiers.
+     * Buffer for bufferable things other than those that fit the description
+     * of <code>charRefBuf</code>.
      */
     private @Auto char[] strBuf;
 
     /**
      * Number of significant <code>char</code>s in <code>strBuf</code>.
      */
     private int strBufLen;
 
     /**
-     * <code>-1</code> to indicate that <code>strBuf</code> is used or otherwise
-     * an offset to the main buffer.
+     * Buffer for characters that might form a character reference but may
+     * end up not forming one.
      */
-    // private int strBufOffset = -1;
-    /**
-     * Buffer for long strings.
-     */
-    private @Auto char[] longStrBuf;
+    private @Auto char[] charRefBuf;
 
     /**
-     * Number of significant <code>char</code>s in <code>longStrBuf</code>.
+     * Number of significant <code>char</code>s in <code>charRefBuf</code>.
      */
-    private int longStrBufLen;
-
-    /**
-     * <code>-1</code> to indicate that <code>longStrBuf</code> is used or
-     * otherwise an offset to the main buffer.
-     */
-    // private int longStrBufOffset = -1;
+    private int charRefBufLen;
 
     /**
      * Buffer for expanding NCRs falling into the Basic Multilingual Plane.
      */
     private final @Auto char[] bmpChar;
 
     /**
      * Buffer for expanding astral NCRs.
@@ -813,16 +804,40 @@ public class Tokenizer implements Locato
         } else {
             // ]NOCPP]
             return HtmlAttributes.EMPTY_ATTRIBUTES;
             // [NOCPP[
         }
         // ]NOCPP]
     }
 
+    @Inline private void appendCharRefBuf(char c) {
+        if (charRefBufLen == charRefBuf.length) {
+            char[] newBuf = new char[charRefBuf.length + Tokenizer.BUFFER_GROW_BY];
+            System.arraycopy(charRefBuf, 0, newBuf, 0, charRefBuf.length);
+            charRefBuf = newBuf;
+        }
+        charRefBuf[charRefBufLen++] = c;
+    }
+
+    @Inline private void clearCharRefBufAndAppend(char c) {
+        charRefBuf[0] = c;
+        charRefBufLen = 1;
+    }
+
+    private void emitOrAppendCharRefBuf(int returnState) throws SAXException {
+        if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
+            appendCharRefBufToStrBuf();
+        } else {
+            if (charRefBufLen > 0) {
+                tokenHandler.characters(charRefBuf, 0, charRefBufLen);
+            }
+        }
+    }
+
     @Inline private void clearStrBufAndAppend(char c) {
         strBuf[0] = c;
         strBufLen = 1;
     }
 
     @Inline private void clearStrBuf() {
         strBufLen = 0;
     }
@@ -872,152 +887,108 @@ public class Tokenizer implements Locato
      *             if the token handler threw
      */
     private void emitStrBuf() throws SAXException {
         if (strBufLen > 0) {
             tokenHandler.characters(strBuf, 0, strBufLen);
         }
     }
 
-    @Inline private void clearLongStrBuf() {
-        longStrBufLen = 0;
-    }
-
-    @Inline private void clearLongStrBufAndAppend(char c) {
-        longStrBuf[0] = c;
-        longStrBufLen = 1;
-    }
-
-    /**
-     * Appends to the larger buffer.
-     * 
-     * @param c
-     *            the UTF-16 code unit to append
-     */
-    private void appendLongStrBuf(char c) {
-        if (longStrBufLen == longStrBuf.length) {
-            char[] newBuf = new char[longStrBufLen + (longStrBufLen >> 1)];
-            System.arraycopy(longStrBuf, 0, newBuf, 0, longStrBuf.length);
-            longStrBuf = newBuf;
-        }
-        longStrBuf[longStrBufLen++] = c;
-    }
-
     @Inline private void appendSecondHyphenToBogusComment() throws SAXException {
         // [NOCPP[
         switch (commentPolicy) {
             case ALTER_INFOSET:
-                // detachLongStrBuf();
-                appendLongStrBuf(' ');
+                appendStrBuf(' ');
                 // FALLTHROUGH
             case ALLOW:
                 warn("The document is not mappable to XML 1.0 due to two consecutive hyphens in a comment.");
                 // ]NOCPP]
-                appendLongStrBuf('-');
+                appendStrBuf('-');
                 // [NOCPP[
                 break;
             case FATAL:
                 fatal("The document is not mappable to XML 1.0 due to two consecutive hyphens in a comment.");
                 break;
         }
         // ]NOCPP]
     }
 
     // [NOCPP[
     private void maybeAppendSpaceToBogusComment() throws SAXException {
         switch (commentPolicy) {
             case ALTER_INFOSET:
-                // detachLongStrBuf();
-                appendLongStrBuf(' ');
+                appendStrBuf(' ');
                 // FALLTHROUGH
             case ALLOW:
                 warn("The document is not mappable to XML 1.0 due to a trailing hyphen in a comment.");
                 break;
             case FATAL:
                 fatal("The document is not mappable to XML 1.0 due to a trailing hyphen in a comment.");
                 break;
         }
     }
 
     // ]NOCPP]
 
-    @Inline private void adjustDoubleHyphenAndAppendToLongStrBufAndErr(char c)
+    @Inline private void adjustDoubleHyphenAndAppendToStrBufAndErr(char c)
             throws SAXException {
         errConsecutiveHyphens();
         // [NOCPP[
         switch (commentPolicy) {
             case ALTER_INFOSET:
-                // detachLongStrBuf();
-                longStrBufLen--;
-                appendLongStrBuf(' ');
-                appendLongStrBuf('-');
+                strBufLen--;
+                appendStrBuf(' ');
+                appendStrBuf('-');
                 // FALLTHROUGH
             case ALLOW:
                 warn("The document is not mappable to XML 1.0 due to two consecutive hyphens in a comment.");
                 // ]NOCPP]
-                appendLongStrBuf(c);
+                appendStrBuf(c);
                 // [NOCPP[
                 break;
             case FATAL:
                 fatal("The document is not mappable to XML 1.0 due to two consecutive hyphens in a comment.");
                 break;
         }
         // ]NOCPP]
     }
 
-    private void appendLongStrBuf(@NoLength char[] buffer, int offset, int length) {
-        int reqLen = longStrBufLen + length;
-        if (longStrBuf.length < reqLen) {
+    private void appendStrBuf(@NoLength char[] buffer, int offset, int length) {
+        int reqLen = strBufLen + length;
+        if (strBuf.length < reqLen) {
             char[] newBuf = new char[reqLen + (reqLen >> 1)];
-            System.arraycopy(longStrBuf, 0, newBuf, 0, longStrBuf.length);
-            longStrBuf = newBuf;
+            System.arraycopy(strBuf, 0, newBuf, 0, strBuf.length);
+            strBuf = newBuf;
         }
-        System.arraycopy(buffer, offset, longStrBuf, longStrBufLen, length);
-        longStrBufLen = reqLen;
+        System.arraycopy(buffer, offset, strBuf, strBufLen, length);
+        strBufLen = reqLen;
     }
 
     /**
      * Append the contents of the smaller buffer to the larger one.
      */
-    @Inline private void appendStrBufToLongStrBuf() {
-        appendLongStrBuf(strBuf, 0, strBufLen);
-    }
-
-    /**
-     * The larger buffer as a string.
-     * 
-     * <p>
-     * C++ memory note: The return value must be released.
-     * 
-     * @return the larger buffer as a string
-     */
-    private String longStrBufToString() {
-        return Portability.newStringFromBuffer(longStrBuf, 0, longStrBufLen);
+    @Inline private void appendCharRefBufToStrBuf() {
+        appendStrBuf(charRefBuf, 0, charRefBufLen);
     }
 
     /**
      * Emits the current comment token.
      * 
      * @param pos
      *            TODO
      * 
      * @throws SAXException
      */
     private void emitComment(int provisionalHyphens, int pos)
             throws SAXException {
         // [NOCPP[
         if (wantsComments) {
             // ]NOCPP]
-            // if (longStrBufOffset != -1) {
-            // tokenHandler.comment(buf, longStrBufOffset, longStrBufLen
-            // - provisionalHyphens);
-            // } else {
-            tokenHandler.comment(longStrBuf, 0, longStrBufLen
+            tokenHandler.comment(strBuf, 0, strBufLen
                     - provisionalHyphens);
-            // }
             // [NOCPP[
         }
         // ]NOCPP]
         cstart = pos + 1;
     }
 
     /**
      * Flushes coalesced character tokens.
@@ -1096,22 +1067,18 @@ public class Tokenizer implements Locato
         if (errorHandler == null) {
             return;
         }
         SAXParseException spe = new SAXParseException(message, this);
         errorHandler.warning(spe);
     }
 
     private void strBufToElementNameString() {
-        // if (strBufOffset != -1) {
-        // return ElementName.elementNameByBuffer(buf, strBufOffset, strBufLen);
-        // } else {
         tagName = ElementName.elementNameByBuffer(strBuf, 0, strBufLen,
                 interner);
-        // }
     }
 
     private int emitCurrentTagToken(boolean selfClosing, int pos)
             throws SAXException {
         cstart = pos + 1;
         maybeErrSlashInEndTag(selfClosing);
         stateSave = Tokenizer.DATA;
         HtmlAttributes attrs = (attributes == null ? HtmlAttributes.EMPTY_ATTRIBUTES
@@ -1148,26 +1115,21 @@ public class Tokenizer implements Locato
         /*
          * The token handler may have called setStateAndEndTagExpectation
          * and changed stateSave since the start of this method.
          */
         return stateSave;
     }
 
     private void attributeNameComplete() throws SAXException {
-        // if (strBufOffset != -1) {
-        // attributeName = AttributeName.nameByBuffer(buf, strBufOffset,
-        // strBufLen, namePolicy != XmlViolationPolicy.ALLOW);
-        // } else {
         attributeName = AttributeName.nameByBuffer(strBuf, 0, strBufLen
         // [NOCPP[
                 , namePolicy != XmlViolationPolicy.ALLOW
                 // ]NOCPP]
                 , interner);
-        // }
 
         if (attributes == null) {
             attributes = new HtmlAttributes(mappingLangToXmlLang);
         }
 
         /*
          * When the user agent leaves the attribute name state (and before
          * emitting the tag token, if appropriate), the complete attribute's
@@ -1234,17 +1196,17 @@ public class Tokenizer implements Locato
     private void addAttributeWithValue() throws SAXException {
         // [NOCPP[
         if (metaBoundaryPassed && ElementName.META == tagName
                 && AttributeName.CHARSET == attributeName) {
             err("A \u201Ccharset\u201D attribute on a \u201Cmeta\u201D element found after the first 512 bytes.");
         }
         // ]NOCPP]
         if (attributeName != null) {
-            String val = longStrBufToString(); // Ownership transferred to
+            String val = strBufToString(); // Ownership transferred to
             // HtmlAttributes
             // CPPONLY: if (mViewSource) {
             // CPPONLY:   mViewSource.MaybeLinkifyAttributeValue(attributeName, val);
             // CPPONLY: }
             // [NOCPP[
             if (!endTag && html4 && html4ModeCompatibleWithXhtml1Schemata
                     && attributeName.isCaseFolded()) {
                 val = newAsciiLowerCaseStringFromString(val);
@@ -1445,17 +1407,17 @@ public class Tokenizer implements Locato
                         }
                         switch (c) {
                             case '&':
                                 /*
                                  * U+0026 AMPERSAND (&) Switch to the character
                                  * reference in data state.
                                  */
                                 flushChars(buf, pos);
-                                clearStrBufAndAppend(c);
+                                clearCharRefBufAndAppend(c);
                                 setAdditionalAndRememberAmpersandLocation('\u0000');
                                 returnState = state;
                                 state = transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos);
                                 continue stateloop;
                             case '<':
                                 /*
                                  * U+003C LESS-THAN SIGN (<) Switch to the tag
                                  * open state.
@@ -1564,17 +1526,17 @@ public class Tokenizer implements Locato
                                 // CPPONLY: }
                                 /*
                                  * U+003F QUESTION MARK (?) Parse error.
                                  */
                                 errProcessingInstruction();
                                 /*
                                  * Switch to the bogus comment state.
                                  */
-                                clearLongStrBufAndAppend(c);
+                                clearStrBufAndAppend(c);
                                 state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
                                 continue stateloop;
                             case '>':
                                 /*
                                  * U+003E GREATER-THAN SIGN (>) Parse error.
                                  */
                                 errLtGt();
                                 /*
@@ -1911,37 +1873,37 @@ public class Tokenizer implements Locato
                                  * in the before attribute value state.
                                  */
                                 continue;
                             case '"':
                                 /*
                                  * U+0022 QUOTATION MARK (") Switch to the
                                  * attribute value (double-quoted) state.
                                  */
-                                clearLongStrBuf();
+                                clearStrBuf();
                                 state = transition(state, Tokenizer.ATTRIBUTE_VALUE_DOUBLE_QUOTED, reconsume, pos);
                                 break beforeattributevalueloop;
                             // continue stateloop;
                             case '&':
                                 /*
                                  * U+0026 AMPERSAND (&) Switch to the attribute
                                  * value (unquoted) state and reconsume this
                                  * input character.
                                  */
-                                clearLongStrBuf();
+                                clearStrBuf();
                                 reconsume = true;
                                 state = transition(state, Tokenizer.ATTRIBUTE_VALUE_UNQUOTED, reconsume, pos);
                                 noteUnquotedAttributeValue();
                                 continue stateloop;
                             case '\'':
                                 /*
                                  * U+0027 APOSTROPHE (') Switch to the attribute
                                  * value (single-quoted) state.
                                  */
-                                clearLongStrBuf();
+                                clearStrBuf();
                                 state = transition(state, Tokenizer.ATTRIBUTE_VALUE_SINGLE_QUOTED, reconsume, pos);
                                 continue stateloop;
                             case '>':
                                 /*
                                  * U+003E GREATER-THAN SIGN (>) Parse error.
                                  */
                                 errAttributeValueMissing();
                                 /*
@@ -1974,17 +1936,17 @@ public class Tokenizer implements Locato
                             default:
                                 // [NOCPP[
                                 errHtml4NonNameInUnquotedAttribute(c);
                                 // ]NOCPP]
                                 /*
                                  * Anything else Append the current input
                                  * character to the current attribute's value.
                                  */
-                                clearLongStrBufAndAppend(c);
+                                clearStrBufAndAppend(c);
                                 /*
                                  * Switch to the attribute value (unquoted)
                                  * state.
                                  */
 
                                 state = transition(state, Tokenizer.ATTRIBUTE_VALUE_UNQUOTED, reconsume, pos);
                                 noteUnquotedAttributeValue();
                                 continue stateloop;
@@ -2017,36 +1979,36 @@ public class Tokenizer implements Locato
                             // continue stateloop;
                             case '&':
                                 /*
                                  * U+0026 AMPERSAND (&) Switch to the character
                                  * reference in attribute value state, with the
                                  * additional allowed character being U+0022
                                  * QUOTATION MARK (").
                                  */
-                                clearStrBufAndAppend(c);
+                                clearCharRefBufAndAppend(c);
                                 setAdditionalAndRememberAmpersandLocation('\"');
                                 returnState = state;
                                 state = transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos);
                                 continue stateloop;
                             case '\r':
-                                appendLongStrBufCarriageReturn();
+                                appendStrBufCarriageReturn();
                                 break stateloop;
                             case '\n':
-                                appendLongStrBufLineFeed();
+                                appendStrBufLineFeed();
                                 continue;
                             case '\u0000':
                                 c = '\uFFFD';
                                 // fall thru
                             default:
                                 /*
                                  * Anything else Append the current input
                                  * character to the current attribute's value.
                                  */
-                                appendLongStrBuf(c);
+                                appendStrBuf(c);
                                 /*
                                  * Stay in the attribute value (double-quoted)
                                  * state.
                                  */
                                 continue;
                         }
                     }
                     // FALLTHRU DON'T REORDER
@@ -2186,17 +2148,17 @@ public class Tokenizer implements Locato
                                 continue stateloop;
                             case '&':
                                 /*
                                  * U+0026 AMPERSAND (&) Switch to the character
                                  * reference in attribute value state, with the
                                  * additional allowed character being U+003E
                                  * GREATER-THAN SIGN (>)
                                  */
-                                clearStrBufAndAppend(c);
+                                clearCharRefBufAndAppend(c);
                                 setAdditionalAndRememberAmpersandLocation('>');
                                 returnState = state;
                                 state = transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos);
                                 continue stateloop;
                             case '>':
                                 /*
                                  * U+003E GREATER-THAN SIGN (>) Emit the current
                                  * tag token.
@@ -2232,17 +2194,17 @@ public class Tokenizer implements Locato
                             default:
                                 // [NOCPP]
                                 errHtml4NonNameInUnquotedAttribute(c);
                                 // ]NOCPP]
                                 /*
                                  * Anything else Append the current input
                                  * character to the current attribute's value.
                                  */
-                                appendLongStrBuf(c);
+                                appendStrBuf(c);
                                 /*
                                  * Stay in the attribute value (unquoted) state.
                                  */
                                 continue;
                         }
                     }
                     // XXX reorder point
                 case AFTER_ATTRIBUTE_NAME:
@@ -2371,54 +2333,54 @@ public class Tokenizer implements Locato
                          * 
                          * Otherwise, is is a parse error. Switch to the bogus
                          * comment state. The next character that is consumed,
                          * if any, is the first character that will be in the
                          * comment.
                          */
                         switch (c) {
                             case '-':
-                                clearLongStrBufAndAppend(c);
+                                clearStrBufAndAppend(c);
                                 state = transition(state, Tokenizer.MARKUP_DECLARATION_HYPHEN, reconsume, pos);
                                 break markupdeclarationopenloop;
                             // continue stateloop;
                             case 'd':
                             case 'D':
-                                clearLongStrBufAndAppend(c);
+                                clearStrBufAndAppend(c);
                                 index = 0;
                                 state = transition(state, Tokenizer.MARKUP_DECLARATION_OCTYPE, reconsume, pos);
                                 continue stateloop;
                             case '[':
                                 if (tokenHandler.cdataSectionAllowed()) {
-                                    clearLongStrBufAndAppend(c);
+                                    clearStrBufAndAppend(c);
                                     index = 0;
                                     state = transition(state, Tokenizer.CDATA_START, reconsume, pos);
                                     continue stateloop;
                                 }
                                 // else fall through
                             default:
                                 errBogusComment();
-                                clearLongStrBuf();
+                                clearStrBuf();
                                 reconsume = true;
                                 state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
                                 continue stateloop;
                         }
                     }
                     // FALLTHRU DON'T REORDER
                 case MARKUP_DECLARATION_HYPHEN:
                     markupdeclarationhyphenloop: for (;;) {
                         if (++pos == endPos) {
                             break stateloop;
                         }
                         c = checkChar(buf, pos);
                         switch (c) {
                             case '\u0000':
                                 break stateloop;
                             case '-':
-                                clearLongStrBuf();
+                                clearStrBuf();
                                 state = transition(state, Tokenizer.COMMENT_START, reconsume, pos);
                                 break markupdeclarationhyphenloop;
                             // continue stateloop;
                             default:
                                 errBogusComment();
                                 reconsume = true;
                                 state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
                                 continue stateloop;
@@ -2438,48 +2400,48 @@ public class Tokenizer implements Locato
                          * Consume the next input character:
                          */
                         switch (c) {
                             case '-':
                                 /*
                                  * U+002D HYPHEN-MINUS (-) Switch to the comment
                                  * start dash state.
                                  */
-                                appendLongStrBuf(c);
+                                appendStrBuf(c);
                                 state = transition(state, Tokenizer.COMMENT_START_DASH, reconsume, pos);
                                 continue stateloop;
                             case '>':
                                 /*
                                  * U+003E GREATER-THAN SIGN (>) Parse error.
                                  */
                                 errPrematureEndOfComment();
                                 /* Emit the comment token. */
                                 emitComment(0, pos);
                                 /*
                                  * Switch to the data state.
                                  */
                                 state = transition(state, Tokenizer.DATA, reconsume, pos);
                                 continue stateloop;
                             case '\r':
-                                appendLongStrBufCarriageReturn();
+                                appendStrBufCarriageReturn();
                                 state = transition(state, Tokenizer.COMMENT, reconsume, pos);
                                 break stateloop;
                             case '\n':
-                                appendLongStrBufLineFeed();
+                                appendStrBufLineFeed();
                                 state = transition(state, Tokenizer.COMMENT, reconsume, pos);
                                 break commentstartloop;
                             case '\u0000':
                                 c = '\uFFFD';
                                 // fall thru
                             default:
                                 /*
                                  * Anything else Append the input character to
                                  * the comment token's data.
                                  */
-                                appendLongStrBuf(c);
+                                appendStrBuf(c);
                                 /*
                                  * Switch to the comment state.
                                  */
                                 state = transition(state, Tokenizer.COMMENT, reconsume, pos);
                                 break commentstartloop;
                             // continue stateloop;
                         }
                     }
@@ -2494,35 +2456,35 @@ public class Tokenizer implements Locato
                          * Comment state Consume the next input character:
                          */
                         switch (c) {
                             case '-':
                                 /*
                                  * U+002D HYPHEN-MINUS (-) Switch to the comment
                                  * end dash state
                                  */
-                                appendLongStrBuf(c);
+                                appendStrBuf(c);
                                 state = transition(state, Tokenizer.COMMENT_END_DASH, reconsume, pos);
                                 break commentloop;
                             // continue stateloop;
                             case '\r':
-                                appendLongStrBufCarriageReturn();
+                                appendStrBufCarriageReturn();
                                 break stateloop;
                             case '\n':
-                                appendLongStrBufLineFeed();
+                                appendStrBufLineFeed();
                                 continue;
                             case '\u0000':
                                 c = '\uFFFD';
                                 // fall thru
                             default:
                                 /*
                                  * Anything else Append the input character to
                                  * the comment token's data.
                                  */
-                                appendLongStrBuf(c);
+                                appendStrBuf(c);
                                 /*
                                  * Stay in the comment state.
                                  */
                                 continue;
                         }
                     }
                     // FALLTHRU DON'T REORDER
                 case COMMENT_END_DASH:
@@ -2536,38 +2498,38 @@ public class Tokenizer implements Locato
                          * character:
                          */
                         switch (c) {
                             case '-':
                                 /*
                                  * U+002D HYPHEN-MINUS (-) Switch to the comment
                                  * end state
                                  */
-                                appendLongStrBuf(c);
+                                appendStrBuf(c);
                                 state = transition(state, Tokenizer.COMMENT_END, reconsume, pos);
                                 break commentenddashloop;
                             // continue stateloop;
                             case '\r':
-                                appendLongStrBufCarriageReturn();
+                                appendStrBufCarriageReturn();
                                 state = transition(state, Tokenizer.COMMENT, reconsume, pos);
                                 break stateloop;
                             case '\n':
-                                appendLongStrBufLineFeed();
+                                appendStrBufLineFeed();
                                 state = transition(state, Tokenizer.COMMENT, reconsume, pos);
                                 continue stateloop;
                             case '\u0000':
                                 c = '\uFFFD';
                                 // fall thru
                             default:
                                 /*
                                  * Anything else Append a U+002D HYPHEN-MINUS
                                  * (-) character and the input character to the
                                  * comment token's data.
                                  */
-                                appendLongStrBuf(c);
+                                appendStrBuf(c);
                                 /*
                                  * Switch to the comment state.
                                  */
                                 state = transition(state, Tokenizer.COMMENT, reconsume, pos);
                                 continue stateloop;
                         }
                     }
                     // FALLTHRU DON'T REORDER
@@ -2594,44 +2556,44 @@ public class Tokenizer implements Locato
                                 state = transition(state, Tokenizer.DATA, reconsume, pos);
                                 continue stateloop;
                             case '-':
                                 /* U+002D HYPHEN-MINUS (-) Parse error. */
                                 /*
                                  * Append a U+002D HYPHEN-MINUS (-) character to
                                  * the comment token's data.
                                  */
-                                adjustDoubleHyphenAndAppendToLongStrBufAndErr(c);
+                                adjustDoubleHyphenAndAppendToStrBufAndErr(c);
                                 /*
                                  * Stay in the comment end state.
                                  */
                                 continue;
                             case '\r':
-                                adjustDoubleHyphenAndAppendToLongStrBufCarriageReturn();
+                                adjustDoubleHyphenAndAppendToStrBufCarriageReturn();
                                 state = transition(state, Tokenizer.COMMENT, reconsume, pos);
                                 break stateloop;
                             case '\n':
-                                adjustDoubleHyphenAndAppendToLongStrBufLineFeed();
+                                adjustDoubleHyphenAndAppendToStrBufLineFeed();
                                 state = transition(state, Tokenizer.COMMENT, reconsume, pos);
                                 continue stateloop;
                             case '!':
                                 errHyphenHyphenBang();
-                                appendLongStrBuf(c);
+                                appendStrBuf(c);
                                 state = transition(state, Tokenizer.COMMENT_END_BANG, reconsume, pos);
                                 continue stateloop;
                             case '\u0000':
                                 c = '\uFFFD';
                                 // fall thru
                             default:
                                 /*
                                  * Append two U+002D HYPHEN-MINUS (-) characters
                                  * and the input character to the comment
                                  * token's data.
                                  */
-                                adjustDoubleHyphenAndAppendToLongStrBufAndErr(c);
+                                adjustDoubleHyphenAndAppendToStrBufAndErr(c);
                                 /*
                                  * Switch to the comment state.
                                  */
                                 state = transition(state, Tokenizer.COMMENT, reconsume, pos);
                                 continue stateloop;
                         }
                     }
                     // XXX reorder point
@@ -2659,40 +2621,40 @@ public class Tokenizer implements Locato
                                 state = transition(state, Tokenizer.DATA, reconsume, pos);
                                 continue stateloop;
                             case '-':
                                 /*
                                  * Append two U+002D HYPHEN-MINUS (-) characters
                                  * and a U+0021 EXCLAMATION MARK (!) character
                                  * to the comment token's data.
                                  */
-                                appendLongStrBuf(c);
+                                appendStrBuf(c);
                                 /*
                                  * Switch to the comment end dash state.
                                  */
                                 state = transition(state, Tokenizer.COMMENT_END_DASH, reconsume, pos);
                                 continue stateloop;
                             case '\r':
-                                appendLongStrBufCarriageReturn();
+                                appendStrBufCarriageReturn();
                                 break stateloop;
                             case '\n':
-                                appendLongStrBufLineFeed();
+                                appendStrBufLineFeed();
                                 continue;
                             case '\u0000':
                                 c = '\uFFFD';
                                 // fall thru
                             default:
                                 /*
                                  * Anything else Append two U+002D HYPHEN-MINUS
                                  * (-) characters, a U+0021 EXCLAMATION MARK (!)
                                  * character, and the input character to the
                                  * comment token's data. Switch to the comment
                                  * state.
                                  */
-                                appendLongStrBuf(c);
+                                appendStrBuf(c);
                                 /*
                                  * Switch to the comment state.
                                  */
                                 state = transition(state, Tokenizer.COMMENT, reconsume, pos);
                                 continue stateloop;
                         }
                     }
                     // XXX reorder point
@@ -2707,62 +2669,62 @@ public class Tokenizer implements Locato
                      * Consume the next input character:
                      */
                     switch (c) {
                         case '-':
                             /*
                              * U+002D HYPHEN-MINUS (-) Switch to the comment end
                              * state
                              */
-                            appendLongStrBuf(c);
+                            appendStrBuf(c);
                             state = transition(state, Tokenizer.COMMENT_END, reconsume, pos);
                             continue stateloop;
                         case '>':
                             errPrematureEndOfComment();
                             /* Emit the comment token. */
                             emitComment(1, pos);
                             /*
                              * Switch to the data state.
                              */
                             state = transition(state, Tokenizer.DATA, reconsume, pos);
                             continue stateloop;
                         case '\r':
-                            appendLongStrBufCarriageReturn();
+                            appendStrBufCarriageReturn();
                             state = transition(state, Tokenizer.COMMENT, reconsume, pos);
                             break stateloop;
                         case '\n':
-                            appendLongStrBufLineFeed();
+                            appendStrBufLineFeed();
                             state = transition(state, Tokenizer.COMMENT, reconsume, pos);
                             continue stateloop;
                         case '\u0000':
                             c = '\uFFFD';
                             // fall thru
                         default:
                             /*
                              * Append a U+002D HYPHEN-MINUS character (-) and
                              * the current input character to the comment
                              * token's data.
                              */
-                            appendLongStrBuf(c);
+                            appendStrBuf(c);
                             /*
                              * Switch to the comment state.
                              */
                             state = transition(state, Tokenizer.COMMENT, reconsume, pos);
                             continue stateloop;
                     }
                     // XXX reorder point
                 case CDATA_START:
                     for (;;) {
                         if (++pos == endPos) {
                             break stateloop;
                         }
                         c = checkChar(buf, pos);
                         if (index < 6) { // CDATA_LSQB.length
                             if (c == Tokenizer.CDATA_LSQB[index]) {
-                                appendLongStrBuf(c);
+                                appendStrBuf(c);
                             } else {
                                 errBogusComment();
                                 reconsume = true;
                                 state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
                                 continue stateloop;
                             }
                             index++;
                             continue;
@@ -2875,37 +2837,37 @@ public class Tokenizer implements Locato
                                 continue stateloop;
                             case '&':
                                 /*
                                  * U+0026 AMPERSAND (&) Switch to the character
                                  * reference in attribute value state, with the
                                  * + additional allowed character being U+0027
                                  * APOSTROPHE (').
                                  */
-                                clearStrBufAndAppend(c);
+                                clearCharRefBufAndAppend(c);
                                 setAdditionalAndRememberAmpersandLocation('\'');
                                 returnState = state;
                                 state = transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos);
                                 break attributevaluesinglequotedloop;
                             // continue stateloop;
                             case '\r':
-                                appendLongStrBufCarriageReturn();
+                                appendStrBufCarriageReturn();
                                 break stateloop;
                             case '\n':
-                                appendLongStrBufLineFeed();
+                                appendStrBufLineFeed();
                                 continue;
                             case '\u0000':
                                 c = '\uFFFD';
                                 // fall thru
                             default:
                                 /*
                                  * Anything else Append the current input
                                  * character to the current attribute's value.
                                  */
-                                appendLongStrBuf(c);
+                                appendStrBuf(c);
                                 /*
                                  * Stay in the attribute value (double-quoted)
                                  * state.
                                  */
                                 continue;
                         }
                     }
                     // FALLTHRU DON'T REORDER
@@ -2937,59 +2899,59 @@ public class Tokenizer implements Locato
                     switch (c) {
                         case ' ':
                         case '\t':
                         case '\n':
                         case '\r': // we'll reconsume!
                         case '\u000C':
                         case '<':
                         case '&':
-                            emitOrAppendStrBuf(returnState);
+                            emitOrAppendCharRefBuf(returnState);
                             if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
                                 cstart = pos;
                             }
                             reconsume = true;
                             state = transition(state, returnState, reconsume, pos);
                             continue stateloop;
                         case '#':
                             /*
                              * U+0023 NUMBER SIGN (#) Consume the U+0023 NUMBER
                              * SIGN.
                              */
-                            appendStrBuf('#');
+                            appendCharRefBuf('#');
                             state = transition(state, Tokenizer.CONSUME_NCR, reconsume, pos);
                             continue stateloop;
                         default:
                             if (c == additional) {
-                                emitOrAppendStrBuf(returnState);
+                                emitOrAppendCharRefBuf(returnState);
                                 reconsume = true;
                                 state = transition(state, returnState, reconsume, pos);
                                 continue stateloop;
                             }
                             if (c >= 'a' && c <= 'z') {
                                 firstCharKey = c - 'a' + 26;
                             } else if (c >= 'A' && c <= 'Z') {
                                 firstCharKey = c - 'A';
                             } else {
                                 // No match
                                 /*
                                  * If no match can be made, then this is a parse
                                  * error.
                                  */
                                 errNoNamedCharacterMatch();
-                                emitOrAppendStrBuf(returnState);
+                                emitOrAppendCharRefBuf(returnState);
                                 if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
                                     cstart = pos;
                                 }
                                 reconsume = true;
                                 state = transition(state, returnState, reconsume, pos);
                                 continue stateloop;
                             }
                             // Didn't fail yet
-                            appendStrBuf(c);
+                            appendCharRefBuf(c);
                             state = transition(state, Tokenizer.CHARACTER_REFERENCE_HILO_LOOKUP, reconsume, pos);
                             // FALL THROUGH continue stateloop;
                     }
                     // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
                 case CHARACTER_REFERENCE_HILO_LOOKUP:
                     {
                         if (++pos == endPos) {
                             break stateloop;
@@ -3044,31 +3006,31 @@ public class Tokenizer implements Locato
                             }
                         }
                         if (hilo == 0) {
                             /*
                              * If no match can be made, then this is a parse
                              * error.
                              */
                             errNoNamedCharacterMatch();
-                            emitOrAppendStrBuf(returnState);
+                            emitOrAppendCharRefBuf(returnState);
                             if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
                                 cstart = pos;
                             }
                             reconsume = true;
                             state = transition(state, returnState, reconsume, pos);
                             continue stateloop;
                         }
                         // Didn't fail yet
-                        appendStrBuf(c);
+                        appendCharRefBuf(c);
                         lo = hilo & 0xFFFF;
                         hi = hilo >> 16;
                         entCol = -1;
                         candidate = -1;
-                        strBufMark = 0;
+                        charRefBufMark = 0;
                         state = transition(state, Tokenizer.CHARACTER_REFERENCE_TAIL, reconsume, pos);
                         // FALL THROUGH continue stateloop;
                     }
                 case CHARACTER_REFERENCE_TAIL:
                     outer: for (;;) {
                         if (++pos == endPos) {
                             break stateloop;
                         }
@@ -3085,17 +3047,17 @@ public class Tokenizer implements Locato
                          * manner).
                          */
                         loloop: for (;;) {
                             if (hi < lo) {
                                 break outer;
                             }
                             if (entCol == NamedCharacters.NAMES[lo].length()) {
                                 candidate = lo;
-                                strBufMark = strBufLen;
+                                charRefBufMark = charRefBufLen;
                                 lo++;
                             } else if (entCol > NamedCharacters.NAMES[lo].length()) {
                                 break outer;
                             } else if (c > NamedCharacters.NAMES[lo].charAt(entCol)) {
                                 lo++;
                             } else {
                                 break loloop;
                             }
@@ -3120,35 +3082,35 @@ public class Tokenizer implements Locato
                         if (c == ';') {
                             // If we see a semicolon, there cannot be a 
                             // longer match. Break the loop. However, before
                             // breaking, take the longest match so far as the 
                             // candidate, if we are just about to complete a 
                             // match.
                             if (entCol + 1 == NamedCharacters.NAMES[lo].length()) {
                                 candidate = lo;
-                                strBufMark = strBufLen;
+                                charRefBufMark = charRefBufLen;
                             }                            
                             break outer;
                         }
                         
                         if (hi < lo) {
                             break outer;
                         }
-                        appendStrBuf(c);
+                        appendCharRefBuf(c);
                         continue;
                     }
 
                     if (candidate == -1) {
                         // reconsume deals with CR, LF or nul
                         /*
                          * If no match can be made, then this is a parse error.
                          */
                         errNoNamedCharacterMatch();
-                        emitOrAppendStrBuf(returnState);
+                        emitOrAppendCharRefBuf(returnState);
                         if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
                             cstart = pos;
                         }
                         reconsume = true;
                         state = transition(state, returnState, reconsume, pos);
                         continue stateloop;
                     } else {
                         // c can't be CR, LF or nul if we got here
@@ -3161,24 +3123,20 @@ public class Tokenizer implements Locato
                              */
                             if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
                                 /*
                                  * If the entity is being consumed as part of an
                                  * attribute, and the last character matched is
                                  * not a U+003B SEMICOLON (;),
                                  */
                                 char ch;
-                                if (strBufMark == strBufLen) {
+                                if (charRefBufMark == charRefBufLen) {
                                     ch = c;
                                 } else {
-                                    // if (strBufOffset != -1) {
-                                    // ch = buf[strBufOffset + strBufMark];
-                                    // } else {
-                                    ch = strBuf[strBufMark];
-                                    // }
+                                    ch = charRefBuf[charRefBufMark];
                                 }
                                 if (ch == '=' || (ch >= '0' && ch <= '9')
                                         || (ch >= 'A' && ch <= 'Z')
                                         || (ch >= 'a' && ch <= 'z')) {
                                     /*
                                      * and the next character is either a U+003D
                                      * EQUALS SIGN character (=) or in the range
                                      * U+0030 DIGIT ZERO to U+0039 DIGIT NINE,
@@ -3186,17 +3144,17 @@ public class Tokenizer implements Locato
                                      * LATIN CAPITAL LETTER Z, or U+0061 LATIN
                                      * SMALL LETTER A to U+007A LATIN SMALL
                                      * LETTER Z, then, for historical reasons,
                                      * all the characters that were matched
                                      * after the U+0026 AMPERSAND (&) must be
                                      * unconsumed, and nothing is returned.
                                      */
                                     errNoNamedCharacterMatch();
-                                    appendStrBufToLongStrBuf();
+                                    appendCharRefBufToStrBuf();
                                     reconsume = true;
                                     state = transition(state, returnState, reconsume, pos);
                                     continue stateloop;
                                 }
                             }
                             if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
                                 errUnescapedAmpersandInterpretedAsCharacterReference();
                             } else {
@@ -3218,33 +3176,32 @@ public class Tokenizer implements Locato
                         // ]NOCPP]
                         // CPPONLY: val[1] == 0
                         ) {
                             emitOrAppendOne(val, returnState);
                         } else {
                             emitOrAppendTwo(val, returnState);
                         }
                         // this is so complicated!
-                        if (strBufMark < strBufLen) {
+                        if (charRefBufMark < charRefBufLen) {
                             if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
-                                for (int i = strBufMark; i < strBufLen; i++) {
-                                    appendLongStrBuf(strBuf[i]);
-                                }
+                                appendStrBuf(charRefBuf, charRefBufMark,
+                                        charRefBufLen - charRefBufMark);
                             } else {
-                                tokenHandler.characters(strBuf, strBufMark,
-                                        strBufLen - strBufMark);
+                                tokenHandler.characters(charRefBuf, charRefBufMark,
+                                        charRefBufLen - charRefBufMark);
                             }
                         }
                         // Check if we broke out early with c being the last
                         // character that matched as opposed to being the
                         // first one that didn't match. In the case of an 
                         // early break, the next run on text should start
                         // *after* the current character and the current 
                         // character shouldn't be reconsumed.
-                        boolean earlyBreak = (c == ';' && strBufMark == strBufLen);
+                        boolean earlyBreak = (c == ';' && charRefBufMark == charRefBufLen);
                         if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
                             cstart = earlyBreak ? pos + 1 : pos;
                         }
                         reconsume = !earlyBreak;
                         state = transition(state, returnState, reconsume, pos);
                         continue stateloop;
                         /*
                          * If the markup contains I'm &notit; I tell you, the
@@ -3280,17 +3237,17 @@ public class Tokenizer implements Locato
                              * DIGIT NINE, U+0061 LATIN SMALL LETTER A through
                              * to U+0066 LATIN SMALL LETTER F, and U+0041 LATIN
                              * CAPITAL LETTER A, through to U+0046 LATIN CAPITAL
                              * LETTER F (in other words, 0-9, A-F, a-f).
                              * 
                              * When it comes to interpreting the number,
                              * interpret it as a hexadecimal number.
                              */
-                            appendStrBuf(c);
+                            appendCharRefBuf(c);
                             state = transition(state, Tokenizer.HEX_NCR_LOOP, reconsume, pos);
                             continue stateloop;
                         default:
                             /*
                              * Anything else Follow the steps below, but using
                              * the range of characters U+0030 DIGIT ZERO through
                              * to U+0039 DIGIT NINE (i.e. just 0-9).
                              * 
@@ -3333,18 +3290,18 @@ public class Tokenizer implements Locato
                                 if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
                                     cstart = pos + 1;
                                 }
                                 state = transition(state, Tokenizer.HANDLE_NCR_VALUE, reconsume, pos);
                                 // FALL THROUGH continue stateloop;
                                 break decimalloop;
                             } else {
                                 errNoDigitsInNCR();
-                                appendStrBuf(';');
-                                emitOrAppendStrBuf(returnState);
+                                appendCharRefBuf(';');
+                                emitOrAppendCharRefBuf(returnState);
                                 if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
                                     cstart = pos + 1;
                                 }
                                 state = transition(state, returnState, reconsume, pos);
                                 continue stateloop;
                             }
                         } else {
                             /*
@@ -3355,17 +3312,17 @@ public class Tokenizer implements Locato
                              * returned.
                              * 
                              * Otherwise, if the next character is a U+003B
                              * SEMICOLON, consume that too. If it isn't, there
                              * is a parse error.
                              */
                             if (!seenDigits) {
                                 errNoDigitsInNCR();
-                                emitOrAppendStrBuf(returnState);
+                                emitOrAppendCharRefBuf(returnState);
                                 if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
                                     cstart = pos;
                                 }
                                 reconsume = true;
                                 state = transition(state, returnState, reconsume, pos);
                                 continue stateloop;
                             } else {
                                 errCharRefLacksSemicolon();
@@ -3423,18 +3380,18 @@ public class Tokenizer implements Locato
                             if (seenDigits) {
                                 if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
                                     cstart = pos + 1;
                                 }
                                 state = transition(state, Tokenizer.HANDLE_NCR_VALUE, reconsume, pos);
                                 continue stateloop;
                             } else {
                                 errNoDigitsInNCR();
-                                appendStrBuf(';');
-                                emitOrAppendStrBuf(returnState);
+                                appendCharRefBuf(';');
+                                emitOrAppendCharRefBuf(returnState);
                                 if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
                                     cstart = pos + 1;
                                 }
                                 state = transition(state, returnState, reconsume, pos);
                                 continue stateloop;
                             }
                         } else {
                             /*
@@ -3445,17 +3402,17 @@ public class Tokenizer implements Locato
                              * returned.
                              * 
                              * Otherwise, if the next character is a U+003B
                              * SEMICOLON, consume that too. If it isn't, there
                              * is a parse error.
                              */
                             if (!seenDigits) {
                                 errNoDigitsInNCR();
-                                emitOrAppendStrBuf(returnState);
+                                emitOrAppendCharRefBuf(returnState);
                                 if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
                                     cstart = pos;
                                 }
                                 reconsume = true;
                                 state = transition(state, returnState, reconsume, pos);
                                 continue stateloop;
                             } else {
                                 errCharRefLacksSemicolon();
@@ -3520,27 +3477,27 @@ public class Tokenizer implements Locato
                             continue stateloop;
                         case '\r':
                             silentCarriageReturn();
                             /* Anything else Parse error. */
                             errGarbageAfterLtSlash();
                             /*
                              * Switch to the bogus comment state.
                              */
-                            clearLongStrBufAndAppend('\n');
+                            clearStrBufAndAppend('\n');
                             state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
                             break stateloop;
                         case '\n':
                             silentLineFeed();
                             /* Anything else Parse error. */
                             errGarbageAfterLtSlash();
                             /*
                              * Switch to the bogus comment state.
                              */
-                            clearLongStrBufAndAppend('\n');
+                            clearStrBufAndAppend('\n');
                             state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
                             continue stateloop;
                         case '\u0000':
                             c = '\uFFFD';
                             // fall thru
                         default:
                             if (c >= 'A' && c <= 'Z') {
                                 c += 0x20;
@@ -3564,17 +3521,17 @@ public class Tokenizer implements Locato
                                 state = transition(state, Tokenizer.TAG_NAME, reconsume, pos);
                                 continue stateloop;
                             } else {
                                 /* Anything else Parse error. */
                                 errGarbageAfterLtSlash();
                                 /*
                                  * Switch to the bogus comment state.
                                  */
-                                clearLongStrBufAndAppend(c);
+                                clearStrBufAndAppend(c);
                                 state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
                                 continue stateloop;
                             }
                     }
                     // XXX reorder point
                 case RCDATA:
                     rcdataloop: for (;;) {
                         if (reconsume) {
@@ -3587,17 +3544,17 @@ public class Tokenizer implements Locato
                         }
                         switch (c) {
                             case '&':
                                 /*
                                  * U+0026 AMPERSAND (&) Switch to the character
                                  * reference in RCDATA state.
                                  */
                                 flushChars(buf, pos);
-                                clearStrBufAndAppend(c);
+                                clearCharRefBufAndAppend(c);
                                 setAdditionalAndRememberAmpersandLocation('\u0000');
                                 returnState = state;
                                 state = transition(state, Tokenizer.CONSUME_CHARACTER_REFERENCE, reconsume, pos);
                                 continue stateloop;
                             case '<':
                                 /*
                                  * U+003C LESS-THAN SIGN (<) Switch to the
                                  * RCDATA less-than sign state.
@@ -3835,30 +3792,30 @@ public class Tokenizer implements Locato
                          * character.
                          */
                         switch (c) {
                             case '>':
                                 emitComment(0, pos);
                                 state = transition(state, Tokenizer.DATA, reconsume, pos);
                                 continue stateloop;
                             case '-':
-                                appendLongStrBuf(c);
+                                appendStrBuf(c);
                                 state = transition(state, Tokenizer.BOGUS_COMMENT_HYPHEN, reconsume, pos);
                                 break boguscommentloop;
                             case '\r':
-                                appendLongStrBufCarriageReturn();
+                                appendStrBufCarriageReturn();
                                 break stateloop;
                             case '\n':
-                                appendLongStrBufLineFeed();
+                                appendStrBufLineFeed();
                                 continue;
                             case '\u0000':
                                 c = '\uFFFD';
                                 // fall thru
                             default:
-                                appendLongStrBuf(c);
+                                appendStrBuf(c);
                                 continue;
                         }
                     }
                     // FALLTHRU DON'T REORDER
                 case BOGUS_COMMENT_HYPHEN:
                     boguscommenthyphenloop: for (;;) {
                         if (++pos == endPos) {
                             break stateloop;
@@ -3871,28 +3828,28 @@ public class Tokenizer implements Locato
                                 // ]NOCPP]
                                 emitComment(0, pos);
                                 state = transition(state, Tokenizer.DATA, reconsume, pos);
                                 continue stateloop;
                             case '-':
                                 appendSecondHyphenToBogusComment();
                                 continue boguscommenthyphenloop;
                             case '\r':
-                                appendLongStrBufCarriageReturn();
+                                appendStrBufCarriageReturn();
                                 state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
                                 break stateloop;
                             case '\n':
-                                appendLongStrBufLineFeed();
+                                appendStrBufLineFeed();
                                 state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
                                 continue stateloop;
                             case '\u0000':
                                 c = '\uFFFD';
                                 // fall thru
                             default:
-                                appendLongStrBuf(c);
+                                appendStrBuf(c);
                                 state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
                                 continue stateloop;
                         }
                     }
                     // XXX reorder point
                 case SCRIPT_DATA:
                     scriptdataloop: for (;;) {
                         if (reconsume) {
@@ -4547,17 +4504,17 @@ public class Tokenizer implements Locato
                         }
                         c = checkChar(buf, pos);
                         if (index < 6) { // OCTYPE.length
                             char folded = c;
                             if (c >= 'A' && c <= 'Z') {
                                 folded += 0x20;
                             }
                             if (folded == Tokenizer.OCTYPE[index]) {
-                                appendLongStrBuf(c);
+                                appendStrBuf(c);
                             } else {
                                 errBogusComment();
                                 reconsume = true;
                                 state = transition(state, Tokenizer.BOGUS_COMMENT, reconsume, pos);
                                 continue stateloop;
                             }
                             index++;
                             continue;
@@ -4902,33 +4859,33 @@ public class Tokenizer implements Locato
                                 /*
                                  * U+0022 QUOTATION MARK (") Parse Error.
                                  */
                                 errNoSpaceBetweenDoctypePublicKeywordAndQuote();
                                 /*
                                  * Set the DOCTYPE token's public identifier to
                                  * the empty string (not missing),
                                  */
-                                clearLongStrBuf();
+                                clearStrBuf();
                                 /*
                                  * then switch to the DOCTYPE public identifier
                                  * (double-quoted) state.
                                  */
                                 state = transition(state, Tokenizer.DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos);
                                 continue stateloop;
                             case '\'':
                                 /*
                                  * U+0027 APOSTROPHE (') Parse Error.
                                  */
                                 errNoSpaceBetweenDoctypePublicKeywordAndQuote();
                                 /*
                                  * Set the DOCTYPE token's public identifier to
                                  * the empty string (not missing),
                                  */
-                                clearLongStrBuf();
+                                clearStrBuf();
                                 /*
                                  * then switch to the DOCTYPE public identifier
                                  * (single-quoted) state.
                                  */
                                 state = transition(state, Tokenizer.DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED, reconsume, pos);
                                 continue stateloop;
                             case '>':
                                 /* U+003E GREATER-THAN SIGN (>) Parse error. */
@@ -4989,31 +4946,31 @@ public class Tokenizer implements Locato
                                  */
                                 continue;
                             case '"':
                                 /*
                                  * U+0022 QUOTATION MARK (") Set the DOCTYPE
                                  * token's public identifier to the empty string
                                  * (not missing),
                                  */
-                                clearLongStrBuf();
+                                clearStrBuf();
                                 /*
                                  * then switch to the DOCTYPE public identifier
                                  * (double-quoted) state.
                                  */
                                 state = transition(state, Tokenizer.DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos);
                                 break beforedoctypepublicidentifierloop;
                             // continue stateloop;
                             case '\'':
                                 /*
                                  * U+0027 APOSTROPHE (') Set the DOCTYPE token's
                                  * public identifier to the empty string (not
                                  * missing),
                                  */
-                                clearLongStrBuf();
+                                clearStrBuf();
                                 /*
                                  * then switch to the DOCTYPE public identifier
                                  * (single-quoted) state.
                                  */
                                 state = transition(state, Tokenizer.DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED, reconsume, pos);
                                 continue stateloop;
                             case '>':
                                 /* U+003E GREATER-THAN SIGN (>) Parse error. */
@@ -5057,56 +5014,56 @@ public class Tokenizer implements Locato
                          * Consume the next input character:
                          */
                         switch (c) {
                             case '"':
                                 /*
                                  * U+0022 QUOTATION MARK (") Switch to the after
                                  * DOCTYPE public identifier state.
                                  */
-                                publicIdentifier = longStrBufToString();
+                                publicIdentifier = strBufToString();
                                 state = transition(state, Tokenizer.AFTER_DOCTYPE_PUBLIC_IDENTIFIER, reconsume, pos);
                                 break doctypepublicidentifierdoublequotedloop;
                             // continue stateloop;
                             case '>':
                                 /*
                                  * U+003E GREATER-THAN SIGN (>) Parse error.
                                  */
                                 errGtInPublicId();
                                 /*
                                  * Set the DOCTYPE token's force-quirks flag to
                                  * on.
                                  */
                                 forceQuirks = true;
                                 /*
                                  * Emit that DOCTYPE token.
                                  */
-                                publicIdentifier = longStrBufToString();
+                                publicIdentifier = strBufToString();
                                 emitDoctypeToken(pos);
                                 /*
                                  * Switch to the data state.
                                  */
                                 state = transition(state, Tokenizer.DATA, reconsume, pos);
                                 continue stateloop;
                             case '\r':
-                                appendLongStrBufCarriageReturn();
+                                appendStrBufCarriageReturn();
                                 break stateloop;
                             case '\n':
-                                appendLongStrBufLineFeed();
+                                appendStrBufLineFeed();
                                 continue;
                             case '\u0000':
                                 c = '\uFFFD';
                                 // fall thru
                             default:
                                 /*
                                  * Anything else Append the current input
                                  * character to the current DOCTYPE token's
                                  * public identifier.
                                  */
-                                appendLongStrBuf(c);
+                                appendStrBuf(c);
                                 /*
                                  * Stay in the DOCTYPE public identifier
                                  * (double-quoted) state.
                                  */
                                 continue;
                         }
                     }
                     // FALLTHRU DON'T REORDER
@@ -5154,33 +5111,33 @@ public class Tokenizer implements Locato
                                 /*
                                  * U+0022 QUOTATION MARK (") Parse error.
                                  */
                                 errNoSpaceBetweenPublicAndSystemIds();
                                 /*
                                  * Set the DOCTYPE token's system identifier to
                                  * the empty string (not missing),
                                  */
-                                clearLongStrBuf();
+                                clearStrBuf();
                                 /*
                                  * then switch to the DOCTYPE system identifier
                                  * (double-quoted) state.
                                  */
                                 state = transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos);
                                 continue stateloop;
                             case '\'':
                                 /*
                                  * U+0027 APOSTROPHE (') Parse error.
                                  */
                                 errNoSpaceBetweenPublicAndSystemIds();
                                 /*
                                  * Set the DOCTYPE token's system identifier to
                                  * the empty string (not missing),
                                  */
-                                clearLongStrBuf();
+                                clearStrBuf();
                                 /*
                                  * then switch to the DOCTYPE system identifier
                                  * (single-quoted) state.
                                  */
                                 state = transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED, reconsume, pos);
                                 continue stateloop;
                             default:
                                 bogusDoctype();
@@ -5235,31 +5192,31 @@ public class Tokenizer implements Locato
                                 state = transition(state, Tokenizer.DATA, reconsume, pos);
                                 continue stateloop;
                             case '"':
                                 /*
                                  * U+0022 QUOTATION MARK (") Set the DOCTYPE
                                  * token's system identifier to the empty string
                                  * (not missing),
                                  */
-                                clearLongStrBuf();
+                                clearStrBuf();
                                 /*
                                  * then switch to the DOCTYPE system identifier
                                  * (double-quoted) state.
                                  */
                                 state = transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos);
                                 break betweendoctypepublicandsystemidentifiersloop;
                             // continue stateloop;
                             case '\'':
                                 /*
                                  * U+0027 APOSTROPHE (') Set the DOCTYPE token's
                                  * system identifier to the empty string (not
                                  * missing),
                                  */
-                                clearLongStrBuf();
+                                clearStrBuf();
                                 /*
                                  * then switch to the DOCTYPE system identifier
                                  * (single-quoted) state.
                                  */
                                 state = transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED, reconsume, pos);
                                 continue stateloop;
                             default:
                                 bogusDoctype();
@@ -5286,55 +5243,55 @@ public class Tokenizer implements Locato
                          * Consume the next input character:
                          */
                         switch (c) {
                             case '"':
                                 /*
                                  * U+0022 QUOTATION MARK (") Switch to the after
                                  * DOCTYPE system identifier state.
                                  */
-                                systemIdentifier = longStrBufToString();
+                                systemIdentifier = strBufToString();
                                 state = transition(state, Tokenizer.AFTER_DOCTYPE_SYSTEM_IDENTIFIER, reconsume, pos);
                                 continue stateloop;
                             case '>':
                                 /*
                                  * U+003E GREATER-THAN SIGN (>) Parse error.
                                  */
                                 errGtInSystemId();
                                 /*
                                  * Set the DOCTYPE token's force-quirks flag to
                                  * on.
                                  */
                                 forceQuirks = true;
                                 /*
                                  * Emit that DOCTYPE token.
                                  */
-                                systemIdentifier = longStrBufToString();
+                                systemIdentifier = strBufToString();
                                 emitDoctypeToken(pos);
                                 /*
                                  * Switch to the data state.
                                  */
                                 state = transition(state, Tokenizer.DATA, reconsume, pos);
                                 continue stateloop;
                             case '\r':
-                                appendLongStrBufCarriageReturn();
+                                appendStrBufCarriageReturn();
                                 break stateloop;
                             case '\n':
-                                appendLongStrBufLineFeed();
+                                appendStrBufLineFeed();
                                 continue;
                             case '\u0000':
                                 c = '\uFFFD';
                                 // fall thru
                             default:
                                 /*
                                  * Anything else Append the current input
                                  * character to the current DOCTYPE token's
                                  * system identifier.
                                  */
-                                appendLongStrBuf(c);
+                                appendStrBuf(c);
                                 /*
                                  * Stay in the DOCTYPE system identifier
                                  * (double-quoted) state.
                                  */
                                 continue;
                         }
                     }
                     // FALLTHRU DON'T REORDER
@@ -5498,33 +5455,33 @@ public class Tokenizer implements Locato
                                 /*
                                  * U+0022 QUOTATION MARK (") Parse Error.
                                  */
                                 errNoSpaceBetweenDoctypeSystemKeywordAndQuote();
                                 /*
                                  * Set the DOCTYPE token's system identifier to
                                  * the empty string (not missing),
                                  */
-                                clearLongStrBuf();
+                                clearStrBuf();
                                 /*
                                  * then switch to the DOCTYPE public identifier
                                  * (double-quoted) state.
                                  */
                                 state = transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos);
                                 continue stateloop;
                             case '\'':
                                 /*
                                  * U+0027 APOSTROPHE (') Parse Error.
                                  */
                                 errNoSpaceBetweenDoctypeSystemKeywordAndQuote();
                                 /*
                                  * Set the DOCTYPE token's public identifier to
                                  * the empty string (not missing),
                                  */
-                                clearLongStrBuf();
+                                clearStrBuf();
                                 /*
                                  * then switch to the DOCTYPE public identifier
                                  * (single-quoted) state.
                                  */
                                 state = transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED, reconsume, pos);
                                 continue stateloop;
                             case '>':
                                 /* U+003E GREATER-THAN SIGN (>) Parse error. */
@@ -5585,30 +5542,30 @@ public class Tokenizer implements Locato
                                  */
                                 continue;
                             case '"':
                                 /*
                                  * U+0022 QUOTATION MARK (") Set the DOCTYPE
                                  * token's system identifier to the empty string
                                  * (not missing),
                                  */
-                                clearLongStrBuf();
+                                clearStrBuf();
                                 /*
                                  * then switch to the DOCTYPE system identifier
                                  * (double-quoted) state.
                                  */
                                 state = transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos);
                                 continue stateloop;
                             case '\'':
                                 /*
                                  * U+0027 APOSTROPHE (') Set the DOCTYPE token's
                                  * system identifier to the empty string (not
                                  * missing),
                                  */
-                                clearLongStrBuf();
+                                clearStrBuf();
                                 /*
                                  * then switch to the DOCTYPE system identifier
                                  * (single-quoted) state.
                                  */
                                 state = transition(state, Tokenizer.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED, reconsume, pos);
                                 break beforedoctypesystemidentifierloop;
                             // continue stateloop;
                             case '>':
@@ -5653,52 +5610,52 @@ public class Tokenizer implements Locato
                          * Consume the next input character:
                          */
                         switch (c) {
                             case '\'':
                                 /*
                                  * U+0027 APOSTROPHE (') Switch to the after
                                  * DOCTYPE system identifier state.
                                  */
-                                systemIdentifier = longStrBufToString();
+                                systemIdentifier = strBufToString();
                                 state = transition(state, Tokenizer.AFTER_DOCTYPE_SYSTEM_IDENTIFIER, reconsume, pos);
                                 continue stateloop;
                             case '>':
                                 errGtInSystemId();
                                 /*
                                  * Set the DOCTYPE token's force-quirks flag to
                                  * on.
                                  */
                                 forceQuirks = true;
                                 /*
                                  * Emit that DOCTYPE token.
                                  */
-                                systemIdentifier = longStrBufToString();
+                                systemIdentifier = strBufToString();
                                 emitDoctypeToken(pos);
                                 /*
                                  * Switch to the data state.
                                  */
                                 state = transition(state, Tokenizer.DATA, reconsume, pos);
                                 continue stateloop;
                             case '\r':
-                                appendLongStrBufCarriageReturn();
+                                appendStrBufCarriageReturn();
                                 break stateloop;
                             case '\n':
-                                appendLongStrBufLineFeed();
+                                appendStrBufLineFeed();
                                 continue;
                             case '\u0000':
                                 c = '\uFFFD';
                                 // fall thru
                             default:
                                 /*
                                  * Anything else Append the current input
                                  * character to the current DOCTYPE token's
                                  * system identifier.
                                  */
-                                appendLongStrBuf(c);
+                                appendStrBuf(c);
                                 /*
                                  * Stay in the DOCTYPE system identifier
                                  * (double-quoted) state.
                                  */
                                 continue;
                         }
                     }
                     // XXX reorder point
@@ -5712,52 +5669,52 @@ public class Tokenizer implements Locato
                          * Consume the next input character:
                          */
                         switch (c) {
                             case '\'':
                                 /*
                                  * U+0027 APOSTROPHE (') Switch to the after
                                  * DOCTYPE public identifier state.
                                  */
-                                publicIdentifier = longStrBufToString();
+                                publicIdentifier = strBufToString();
                                 state = transition(state, Tokenizer.AFTER_DOCTYPE_PUBLIC_IDENTIFIER, reconsume, pos);
                                 continue stateloop;
                             case '>':
                                 errGtInPublicId();
                                 /*
                                  * Set the DOCTYPE token's force-quirks flag to
                                  * on.
                                  */
                                 forceQuirks = true;
                                 /*
                                  * Emit that DOCTYPE token.
                                  */
-                                publicIdentifier = longStrBufToString();
+                                publicIdentifier = strBufToString();
                                 emitDoctypeToken(pos);
                                 /*
                                  * Switch to the data state.
                                  */
                                 state = transition(state, Tokenizer.DATA, reconsume, pos);
                                 continue stateloop;
                             case '\r':
-                                appendLongStrBufCarriageReturn();
+                                appendStrBufCarriageReturn();
                                 break stateloop;
                             case '\n':
-                                appendLongStrBufLineFeed();
+                                appendStrBufLineFeed();
                                 continue;
                             case '\u0000':
                                 c = '\uFFFD';
                                 // fall thru
                             default:
                                 /*
                                  * Anything else Append the current input
                                  * character to the current DOCTYPE token's
                                  * public identifier.
                                  */
-                                appendLongStrBuf(c);
+                                appendStrBuf(c);
                                 /*
                                  * Stay in the DOCTYPE public identifier
                                  * (single-quoted) state.
                                  */
                                 continue;
                         }
                     }
                     // XXX reorder point
@@ -5826,36 +5783,36 @@ public class Tokenizer implements Locato
         }
         if (publicIdentifier != null) {
             Portability.releaseString(publicIdentifier);
             publicIdentifier = null;
         }
         forceQuirks = false;
     }
 
-    @Inline private void adjustDoubleHyphenAndAppendToLongStrBufCarriageReturn()
+    @Inline private void adjustDoubleHyphenAndAppendToStrBufCarriageReturn()
             throws SAXException {
         silentCarriageReturn();
-        adjustDoubleHyphenAndAppendToLongStrBufAndErr('\n');
-    }
-
-    @Inline private void adjustDoubleHyphenAndAppendToLongStrBufLineFeed()
+        adjustDoubleHyphenAndAppendToStrBufAndErr('\n');
+    }
+
+    @Inline private void adjustDoubleHyphenAndAppendToStrBufLineFeed()
             throws SAXException {
         silentLineFeed();
-        adjustDoubleHyphenAndAppendToLongStrBufAndErr('\n');
-    }
-
-    @Inline private void appendLongStrBufLineFeed() {
+        adjustDoubleHyphenAndAppendToStrBufAndErr('\n');
+    }
+
+    @Inline private void appendStrBufLineFeed() {
         silentLineFeed();
-        appendLongStrBuf('\n');
-    }
-
-    @Inline private void appendLongStrBufCarriageReturn() {
+        appendStrBuf('\n');
+    }
+
+    @Inline private void appendStrBufCarriageReturn() {
         silentCarriageReturn();
-        appendLongStrBuf('\n');
+        appendStrBuf('\n');
     }
 
     @Inline protected void silentCarriageReturn() {
         ++line;
         lastCR = true;
     }
 
     @Inline protected void silentLineFeed() {
@@ -5896,24 +5853,16 @@ public class Tokenizer implements Locato
         forceQuirks = true;
     }
 
     private void bogusDoctypeWithoutQuirks() throws SAXException {
         errBogusDoctype();
         forceQuirks = false;
     }
 
-    private void emitOrAppendStrBuf(int returnState) throws SAXException {
-        if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
-            appendStrBufToLongStrBuf();
-        } else {
-            emitStrBuf();
-        }
-    }
-
     private void handleNcrValue(int returnState) throws SAXException {
         /*
          * If one or more characters match the range, then take them all and
          * interpret the string of characters as a number (either hexadecimal or
          * decimal as appropriate).
          */
         if (value <= 0xFFFF) {
             if (value >= 0x80 && value <= 0x9f) {
@@ -6109,17 +6058,17 @@ public class Tokenizer implements Locato
                 case BOGUS_COMMENT_HYPHEN:
                     // [NOCPP[
                     maybeAppendSpaceToBogusComment();
                     // ]NOCPP]
                     emitComment(0, 0);
                     break eofloop;
                 case MARKUP_DECLARATION_OPEN:
                     errBogusComment();
-                    clearLongStrBuf();
+                    clearStrBuf();
                     emitComment(0, 0);
                     break eofloop;
                 case MARKUP_DECLARATION_HYPHEN:
                     errBogusComment();
                     emitComment(0, 0);
                     break eofloop;
                 case MARKUP_DECLARATION_OCTYPE:
                     if (index < 6) {
@@ -6245,17 +6194,17 @@ public class Tokenizer implements Locato
                     errEofInPublicId();
                     /*
                      * Set the DOCTYPE token's force-quirks flag to on.
                      */
                     forceQuirks = true;
                     /*
                      * Emit that DOCTYPE token.
                      */
-                    publicIdentifier = longStrBufToString();
+                    publicIdentifier = strBufToString();
                     emitDoctypeToken(0);
                     /*
                      * Reconsume the EOF character in the data state.
                      */
                     break eofloop;
                 case AFTER_DOCTYPE_PUBLIC_IDENTIFIER:
                 case BEFORE_DOCTYPE_SYSTEM_IDENTIFIER:
                 case BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS:
@@ -6278,17 +6227,17 @@ public class Tokenizer implements Locato
                     errEofInSystemId();
                     /*
                      * Set the DOCTYPE token's force-quirks flag to on.
                      */
                     forceQuirks = true;
                     /*
                      * Emit that DOCTYPE token.
                      */
-                    systemIdentifier = longStrBufToString();
+                    systemIdentifier = strBufToString();
                     emitDoctypeToken(0);
                     /*
                      * Reconsume the EOF character in the data state.
                      */
                     break eofloop;
                 case AFTER_DOCTYPE_SYSTEM_IDENTIFIER:
                     errEofInDoctype();
                     /*
@@ -6326,22 +6275,22 @@ public class Tokenizer implements Locato
                      * definition is used when parsing entities in text and in
                      * attributes.
                      * 
                      * The behavior depends on the identity of the next
                      * character (the one immediately after the U+0026 AMPERSAND
                      * character):
                      */
 
-                    emitOrAppendStrBuf(returnState);
+                    emitOrAppendCharRefBuf(returnState);
                     state = returnState;
                     continue;
                 case CHARACTER_REFERENCE_HILO_LOOKUP:
                     errNoNamedCharacterMatch();
-                    emitOrAppendStrBuf(returnState);
+                    emitOrAppendCharRefBuf(returnState);
                     state = returnState;
                     continue;
                 case CHARACTER_REFERENCE_TAIL:
                     outer: for (;;) {
                         char c = '\u0000';
                         entCol++;
                         /*
                          * Consume the maximum number of characters possible,
@@ -6367,17 +6316,17 @@ public class Tokenizer implements Locato
                         }
 
                         loloop: for (;;) {
                             if (hi < lo) {
                                 break outer;
                             }
                             if (entCol == NamedCharacters.NAMES[lo].length()) {
                                 candidate = lo;
-                                strBufMark = strBufLen;
+                                charRefBufMark = charRefBufLen;
                                 lo++;
                             } else if (entCol > NamedCharacters.NAMES[lo].length()) {
                                 break outer;
                             } else if (c > NamedCharacters.NAMES[lo].charAt(entCol)) {
                                 lo++;
                             } else {
                                 break loloop;
                             }
@@ -6388,17 +6337,17 @@ public class Tokenizer implements Locato
                         continue;
                     }
 
                     if (candidate == -1) {
                         /*
                          * If no match can be made, then this is a parse error.
                          */
                         errNoNamedCharacterMatch();
-                        emitOrAppendStrBuf(returnState);
+                        emitOrAppendCharRefBuf(returnState);
                         state = returnState;
                         continue eofloop;
                     } else {
                         @Const @CharacterName String candidateName = NamedCharacters.NAMES[candidate];
                         if (candidateName.length() == 0
                                 || candidateName.charAt(candidateName.length() - 1) != ';') {
                             /*
                              * If the last character matched is not a U+003B
@@ -6406,37 +6355,37 @@ public class Tokenizer implements Locato
                              */
                             if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
                                 /*
                                  * If the entity is being consumed as part of an
                                  * attribute, and the last character matched is
                                  * not a U+003B SEMICOLON (;),
                                  */
                                 char ch;
-                                if (strBufMark == strBufLen) {
+                                if (charRefBufMark == charRefBufLen) {
                                     ch = '\u0000';
                                 } else {
-                                    ch = strBuf[strBufMark];
+                                    ch = charRefBuf[charRefBufMark];
                                 }
                                 if ((ch >= '0' && ch <= '9')
                                         || (ch >= 'A' && ch <= 'Z')
                                         || (ch >= 'a' && ch <= 'z')) {
                                     /*
                                      * and the next character is in the range
                                      * U+0030 DIGIT ZERO to U+0039 DIGIT NINE,
                                      * U+0041 LATIN CAPITAL LETTER A to U+005A
                                      * LATIN CAPITAL LETTER Z, or U+0061 LATIN
                                      * SMALL LETTER A to U+007A LATIN SMALL
                                      * LETTER Z, then, for historical reasons,
                                      * all the characters that were matched
                                      * after the U+0026 AMPERSAND (&) must be
                                      * unconsumed, and nothing is returned.
                                      */
                                     errNoNamedCharacterMatch();
-                                    appendStrBufToLongStrBuf();
+                                    appendCharRefBufToStrBuf();
                                     state = returnState;
                                     continue eofloop;
                                 }
                             }
                             if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
                                 errUnescapedAmpersandInterpretedAsCharacterReference();
                             } else {
                                 errNotSemicolonTerminated();
@@ -6456,24 +6405,23 @@ public class Tokenizer implements Locato
                         // ]NOCPP]
                         // CPPONLY: val[1] == 0
                         ) {
                             emitOrAppendOne(val, returnState);
                         } else {
                             emitOrAppendTwo(val, returnState);
                         }
                         // this is so complicated!
-                        if (strBufMark < strBufLen) {
+                        if (charRefBufMark < charRefBufLen) {
                             if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
-                                for (int i = strBufMark; i < strBufLen; i++) {
-                                    appendLongStrBuf(strBuf[i]);
-                                }
+                                appendStrBuf(charRefBuf, charRefBufMark,
+                                        charRefBufLen - charRefBufMark);
                             } else {
-                                tokenHandler.characters(strBuf, strBufMark,
-                                        strBufLen - strBufMark);
+                                tokenHandler.characters(charRefBuf, charRefBufMark,
+                                        charRefBufLen - charRefBufMark);
                             }
                         }
                         state = returnState;
                         continue eofloop;
                         /*
                          * If the markup contains I'm &notit; I tell you, the
                          * entity is parsed as "not", as in, I'm ┬Čit; I tell
                          * you. But if the markup was I'm &notin; I tell you,
@@ -6490,17 +6438,17 @@ public class Tokenizer implements Locato
                      * character and, if appropriate, the X character). This is
                      * a parse error; nothing is returned.
                      * 
                      * Otherwise, if the next character is a U+003B SEMICOLON,
                      * consume that too. If it isn't, there is a parse error.
                      */
                     if (!seenDigits) {
                         errNoDigitsInNCR();
-                        emitOrAppendStrBuf(returnState);
+                        emitOrAppendCharRefBuf(returnState);
                         state = returnState;
                         continue;
                     } else {
                         errCharRefLacksSemicolon();
                     }
                     // WARNING previous state sets reconsume
                     handleNcrValue(returnState);
                     state = returnState;
@@ -6553,35 +6501,35 @@ public class Tokenizer implements Locato
 
     /**
      * @param val
      * @throws SAXException
      */
     private void emitOrAppendTwo(@Const @NoLength char[] val, int returnState)
             throws SAXException {
         if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
-            appendLongStrBuf(val[0]);
-            appendLongStrBuf(val[1]);
+            appendStrBuf(val[0]);
+            appendStrBuf(val[1]);
         } else {
             tokenHandler.characters(val, 0, 2);
         }
     }
 
     private void emitOrAppendOne(@Const @NoLength char[] val, int returnState)
             throws SAXException {
         if ((returnState & DATA_AND_RCDATA_MASK) != 0) {
-            appendLongStrBuf(val[0]);
+            appendStrBuf(val[0]);
         } else {
             tokenHandler.characters(val, 0, 1);
         }
     }
 
     public void end() throws SAXException {
         strBuf = null;
-        longStrBuf = null;
+        charRefBuf = null;
         doctypeName = null;
         if (systemIdentifier != null) {
             Portability.releaseString(systemIdentifier);
             systemIdentifier = null;
         }
         if (publicIdentifier != null) {
             Portability.releaseString(publicIdentifier);
             publicIdentifier = null;
@@ -6647,29 +6595,29 @@ public class Tokenizer implements Locato
     // ]NOCPP]
     
     public boolean isInDataState() {
         return (stateSave == DATA);
     }
 
     public void resetToDataState() {
         strBufLen = 0;
-        longStrBufLen = 0;
+        charRefBufLen = 0;
         stateSave = Tokenizer.DATA;
         // line = 1; XXX line numbers
         lastCR = false;
         index = 0;
         forceQuirks = false;
         additional = '\u0000';
         entCol = -1;
         firstCharKey = -1;
         lo = 0;
         hi = 0; // will always be overwritten before use anyway
         candidate = -1;
-        strBufMark = 0;
+        charRefBufMark = 0;
         prevValue = -1;
         value = 0;
         seenDigits = false;
         endTag = false;
         shouldSuspend = false;
         initDoctypeFields();
         if (tagName != null) {
             tagName.release();
@@ -6689,37 +6637,37 @@ public class Tokenizer implements Locato
 
     public void loadState(Tokenizer other) throws SAXException {
         strBufLen = other.strBufLen;
         if (strBufLen > strBuf.length) {
             strBuf = new char[strBufLen];
         }
         System.arraycopy(other.strBuf, 0, strBuf, 0, strBufLen);
 
-        longStrBufLen = other.longStrBufLen;
-        if (longStrBufLen > longStrBuf.length) {
-            longStrBuf = new char[longStrBufLen];
+        charRefBufLen = other.charRefBufLen;
+        if (charRefBufLen > charRefBuf.length) {
+            charRefBuf = new char[charRefBufLen];
         }
-        System.arraycopy(other.longStrBuf, 0, longStrBuf, 0, longStrBufLen);
+        System.arraycopy(other.charRefBuf, 0, charRefBuf, 0, charRefBufLen);
 
         stateSave = other.stateSave;
         returnStateSave = other.returnStateSave;
         endTagExpectation = other.endTagExpectation;
         endTagExpectationAsArray = other.endTagExpectationAsArray;
         // line = 1; XXX line numbers
         lastCR = other.lastCR;
         index = other.index;
         forceQuirks = other.forceQuirks;
         additional = other.additional;
         entCol = other.entCol;
         firstCharKey = other.firstCharKey;
         lo = other.lo;
         hi = other.hi;
         candidate = other.candidate;
-        strBufMark = other.strBufMark;
+        charRefBufMark = other.charRefBufMark;
         prevValue = other.prevValue;
         value = other.value;
         seenDigits = other.seenDigits;
         endTag = other.endTag;
         shouldSuspend = false;
 
         if (other.doctypeName == null) {
             doctypeName = null;
@@ -6765,18 +6713,18 @@ public class Tokenizer implements Locato
             attributes = null;
         } else {
             attributes = other.attributes.cloneAttributes(interner);
         }
     }
 
     public void initializeWithoutStarting() throws SAXException {
         confident = false;
-        strBuf = new char[64];
-        longStrBuf = new char[1024];
+        strBuf = new char[1024];
+        charRefBuf = new char[64];
         line = 1;
         // [NOCPP[
         html4 = false;
         metaBoundaryPassed = false;
         wantsComments = tokenHandler.wantsComments();
         if (!newAttributesEachTime) {
             attributes = new HtmlAttributes(mappingLangToXmlLang);
         }
--- a/parser/html/nsHtml5Tokenizer.cpp
+++ b/parser/html/nsHtml5Tokenizer.cpp
@@ -203,16 +203,28 @@ nsHtml5Tokenizer::setLineNumber(int32_t 
 
 nsHtml5HtmlAttributes* 
 nsHtml5Tokenizer::emptyAttributes()
 {
   return nsHtml5HtmlAttributes::EMPTY_ATTRIBUTES;
 }
 
 void 
+nsHtml5Tokenizer::emitOrAppendCharRefBuf(int32_t returnState)
+{
+  if ((returnState & NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK)) {
+    appendCharRefBufToStrBuf();
+  } else {
+    if (charRefBufLen > 0) {
+      tokenHandler->characters(charRefBuf, 0, charRefBufLen);
+    }
+  }
+}
+
+void 
 nsHtml5Tokenizer::appendStrBuf(char16_t c)
 {
   if (strBufLen == strBuf.length) {
     jArray<char16_t,int32_t> newBuf = jArray<char16_t,int32_t>::newJArray(strBuf.length + NS_HTML5TOKENIZER_BUFFER_GROW_BY);
     nsHtml5ArrayCopy::arraycopy(strBuf, newBuf, strBuf.length);
     strBuf = newBuf;
   }
   strBuf[strBufLen++] = c;
@@ -234,49 +246,32 @@ void
 nsHtml5Tokenizer::emitStrBuf()
 {
   if (strBufLen > 0) {
     tokenHandler->characters(strBuf, 0, strBufLen);
   }
 }
 
 void 
-nsHtml5Tokenizer::appendLongStrBuf(char16_t c)
-{
-  if (longStrBufLen == longStrBuf.length) {
-    jArray<char16_t,int32_t> newBuf = jArray<char16_t,int32_t>::newJArray(longStrBufLen + (longStrBufLen >> 1));
-    nsHtml5ArrayCopy::arraycopy(longStrBuf, newBuf, longStrBuf.length);
-    longStrBuf = newBuf;
-  }
-  longStrBuf[longStrBufLen++] = c;
-}
-
-void 
-nsHtml5Tokenizer::appendLongStrBuf(char16_t* buffer, int32_t offset, int32_t length)
+nsHtml5Tokenizer::appendStrBuf(char16_t* buffer, int32_t offset, int32_t length)
 {
-  int32_t reqLen = longStrBufLen + length;
-  if (longStrBuf.length < reqLen) {
+  int32_t reqLen = strBufLen + length;
+  if (strBuf.length < reqLen) {
     jArray<char16_t,int32_t> newBuf = jArray<char16_t,int32_t>::newJArray(reqLen + (reqLen >> 1));
-    nsHtml5ArrayCopy::arraycopy(longStrBuf, newBuf, longStrBuf.length);
-    longStrBuf = newBuf;
+    nsHtml5ArrayCopy::arraycopy(strBuf, newBuf, strBuf.length);
+    strBuf = newBuf;
   }
-  nsHtml5ArrayCopy::arraycopy(buffer, offset, longStrBuf, longStrBufLen, length);
-  longStrBufLen = reqLen;
-}
-
-nsString* 
-nsHtml5Tokenizer::longStrBufToString()
-{
-  return nsHtml5Portability::newStringFromBuffer(longStrBuf, 0, longStrBufLen);
+  nsHtml5ArrayCopy::arraycopy(buffer, offset, strBuf, strBufLen, length);
+  strBufLen = reqLen;
 }
 
 void 
 nsHtml5Tokenizer::emitComment(int32_t provisionalHyphens, int32_t pos)
 {
-  tokenHandler->comment(longStrBuf, 0, longStrBufLen - provisionalHyphens);
+  tokenHandler->comment(strBuf, 0, strBufLen - provisionalHyphens);
   cstart = pos + 1;
 }
 
 void 
 nsHtml5Tokenizer::flushChars(char16_t* buf, int32_t pos)
 {
   if (pos > cstart) {
     tokenHandler->characters(buf, cstart, pos - cstart);
@@ -348,17 +343,17 @@ nsHtml5Tokenizer::addAttributeWithoutVal
     attributeName = nullptr;
   }
 }
 
 void 
 nsHtml5Tokenizer::addAttributeWithValue()
 {
   if (attributeName) {
-    nsString* val = longStrBufToString();
+    nsString* val = strBufToString();
     if (mViewSource) {
       mViewSource->MaybeLinkifyAttributeValue(attributeName, val);
     }
     attributes->addAttribute(attributeName, val);
     attributeName = nullptr;
   }
 }
 
@@ -434,17 +429,17 @@ nsHtml5Tokenizer::stateLoop(int32_t stat
             if (++pos == endPos) {
               NS_HTML5_BREAK(stateloop);
             }
             c = checkChar(buf, pos);
           }
           switch(c) {
             case '&': {
               flushChars(buf, pos);
-              clearStrBufAndAppend(c);
+              clearCharRefBufAndAppend(c);
               setAdditionalAndRememberAmpersandLocation('\0');
               returnState = state;
               state = P::transition(mViewSource, NS_HTML5TOKENIZER_CONSUME_CHARACTER_REFERENCE, reconsume, pos);
               NS_HTML5_CONTINUE(stateloop);
             }
             case '<': {
               flushChars(buf, pos);
               state = P::transition(mViewSource, NS_HTML5TOKENIZER_TAG_OPEN, reconsume, pos);
@@ -497,17 +492,17 @@ nsHtml5Tokenizer::stateLoop(int32_t stat
             case '\?': {
               if (viewingXmlSource) {
                 state = P::transition(mViewSource, NS_HTML5TOKENIZER_PROCESSING_INSTRUCTION, reconsume, pos);
                 NS_HTML5_CONTINUE(stateloop);
               }
               if (P::reportErrors) {
                 errProcessingInstruction();
               }
-              clearLongStrBufAndAppend(c);
+              clearStrBufAndAppend(c);
               state = P::transition(mViewSource, NS_HTML5TOKENIZER_BOGUS_COMMENT, reconsume, pos);
               NS_HTML5_CONTINUE(stateloop);
             }
             case '>': {
               if (P::reportErrors) {
                 errLtGt();
               }
               tokenHandler->characters(nsHtml5Tokenizer::LT_GT, 0, 2);
@@ -715,29 +710,29 @@ nsHtml5Tokenizer::stateLoop(int32_t stat
               silentLineFeed();
             }
             case ' ':
             case '\t':
             case '\f': {
               continue;
             }
             case '\"': {
-              clearLongStrBuf();
+              clearStrBuf();
               state = P::transition(mViewSource, NS_HTML5TOKENIZER_ATTRIBUTE_VALUE_DOUBLE_QUOTED, reconsume, pos);
               NS_HTML5_BREAK(beforeattributevalueloop);
             }
             case '&': {
-              clearLongStrBuf();
+              clearStrBuf();
               reconsume = true;
               state = P::transition(mViewSource, NS_HTML5TOKENIZER_ATTRIBUTE_VALUE_UNQUOTED, reconsume, pos);
 
               NS_HTML5_CONTINUE(stateloop);
             }
             case '\'': {
-              clearLongStrBuf();
+              clearStrBuf();
               state = P::transition(mViewSource, NS_HTML5TOKENIZER_ATTRIBUTE_VALUE_SINGLE_QUOTED, reconsume, pos);
               NS_HTML5_CONTINUE(stateloop);
             }
             case '>': {
               if (P::reportErrors) {
                 errAttributeValueMissing();
               }
               addAttributeWithoutValue();
@@ -753,17 +748,17 @@ nsHtml5Tokenizer::stateLoop(int32_t stat
             case '<':
             case '=':
             case '`': {
               if (P::reportErrors) {
                 errLtOrEqualsOrGraveInUnquotedAttributeOrNull(c);
               }
             }
             default: {
-              clearLongStrBufAndAppend(c);
+              clearStrBufAndAppend(c);
               state = P::transition(mViewSource, NS_HTML5TOKENIZER_ATTRIBUTE_VALUE_UNQUOTED, reconsume, pos);
 
               NS_HTML5_CONTINUE(stateloop);
             }
           }
         }
         beforeattributevalueloop_end: ;
       }
@@ -779,35 +774,35 @@ nsHtml5Tokenizer::stateLoop(int32_t stat
           }
           switch(c) {
             case '\"': {
               addAttributeWithValue();
               state = P::transition(mViewSource, NS_HTML5TOKENIZER_AFTER_ATTRIBUTE_VALUE_QUOTED, reconsume, pos);
               NS_HTML5_BREAK(attributevaluedoublequotedloop);
             }
             case '&': {
-              clearStrBufAndAppend(c);
+              clearCharRefBufAndAppend(c);
               setAdditionalAndRememberAmpersandLocation('\"');
               returnState = state;
               state = P::transition(mViewSource, NS_HTML5TOKENIZER_CONSUME_CHARACTER_REFERENCE, reconsume, pos);
               NS_HTML5_CONTINUE(stateloop);
             }
             case '\r': {
-              appendLongStrBufCarriageReturn();
+              appendStrBufCarriageReturn();
               NS_HTML5_BREAK(stateloop);
             }
             case '\n': {
-              appendLongStrBufLineFeed();
+              appendStrBufLineFeed();
               continue;
             }
             case '\0': {
               c = 0xfffd;
             }
             default: {
-              appendLongStrBuf(c);
+              appendStrBuf(c);
               continue;
             }
           }
         }
         attributevaluedoublequotedloop_end: ;
       }
       case NS_HTML5TOKENIZER_AFTER_ATTRIBUTE_VALUE_QUOTED: {
         for (; ; ) {
@@ -899,17 +894,17 @@ nsHtml5Tokenizer::stateLoop(int32_t stat
             case ' ':
             case '\t':
             case '\f': {
               addAttributeWithValue();
               state = P::transition(mViewSource, NS_HTML5TOKENIZER_BEFORE_ATTRIBUTE_NAME, reconsume, pos);
               NS_HTML5_CONTINUE(stateloop);
             }
             case '&': {
-              clearStrBufAndAppend(c);
+              clearCharRefBufAndAppend(c);
               setAdditionalAndRememberAmpersandLocation('>');
               returnState = state;
               state = P::transition(mViewSource, NS_HTML5TOKENIZER_CONSUME_CHARACTER_REFERENCE, reconsume, pos);
               NS_HTML5_CONTINUE(stateloop);
             }
             case '>': {
               addAttributeWithValue();
               state = P::transition(mViewSource, emitCurrentTagToken(false, pos), reconsume, pos);
@@ -927,17 +922,17 @@ nsHtml5Tokenizer::stateLoop(int32_t stat
             case '=':
             case '`': {
               if (P::reportErrors) {
                 errUnquotedAttributeValOrNull(c);
               }
             }
             default: {
 
-              appendLongStrBuf(c);
+              appendStrBuf(c);
               continue;
             }
           }
         }
       }
       case NS_HTML5TOKENIZER_AFTER_ATTRIBUTE_NAME: {
         for (; ; ) {
           if (++pos == endPos) {
@@ -999,40 +994,40 @@ nsHtml5Tokenizer::stateLoop(int32_t stat
       case NS_HTML5TOKENIZER_MARKUP_DECLARATION_OPEN: {
         for (; ; ) {
           if (++pos == endPos) {
             NS_HTML5_BREAK(stateloop);
           }
           c = checkChar(buf, pos);
           switch(c) {
             case '-': {
-              clearLongStrBufAndAppend(c);
+              clearStrBufAndAppend(c);
               state = P::transition(mViewSource, NS_HTML5TOKENIZER_MARKUP_DECLARATION_HYPHEN, reconsume, pos);
               NS_HTML5_BREAK(markupdeclarationopenloop);
             }
             case 'd':
             case 'D': {
-              clearLongStrBufAndAppend(c);
+              clearStrBufAndAppend(c);
               index = 0;
               state = P::transition(mViewSource, NS_HTML5TOKENIZER_MARKUP_DECLARATION_OCTYPE, reconsume, pos);
               NS_HTML5_CONTINUE(stateloop);
             }
             case '[': {
               if (tokenHandler->cdataSectionAllowed()) {
-                clearLongStrBufAndAppend(c);
+                clearStrBufAndAppend(c);
                 index = 0;
                 state = P::transition(mViewSource, NS_HTML5TOKENIZER_CDATA_START, reconsume, pos);
                 NS_HTML5_CONTINUE(stateloop);
               }
             }
             default: {
               if (P::reportErrors) {
                 errBogusComment();
               }
-              clearLongStrBuf();
+              clearStrBuf();
               reconsume = true;
               state = P::transition(mViewSource, NS_HTML5TOKENIZER_BOGUS_COMMENT, reconsume, pos);
               NS_HTML5_CONTINUE(stateloop);
             }
           }
         }
         markupdeclarationopenloop_end: ;
       }
@@ -1042,17 +1037,17 @@ nsHtml5Tokenizer::stateLoop(int32_t stat
             NS_HTML5_BREAK(stateloop);
           }
           c = checkChar(buf, pos);
           switch(c) {
             case '\0': {
               NS_HTML5_BREAK(stateloop);
             }
             case '-': {
-              clearLongStrBuf();
+              clearStrBuf();
               state = P::transition(mViewSource, NS_HTML5TOKENIZER_COMMENT_START, reconsume, pos);
               NS_HTML5_BREAK(markupdeclarationhyphenloop);
             }
             default: {
               if (P::reportErrors) {
                 errBogusComment();
               }
               reconsume = true;
@@ -1066,108 +1061,108 @@ nsHtml5Tokenizer::stateLoop(int32_t stat
       case NS_HTML5TOKENIZER_COMMENT_START: {
         for (; ; ) {
           if (++pos == endPos) {
             NS_HTML5_BREAK(stateloop);
           }
           c = checkChar(buf, pos);
           switch(c) {
             case '-': {
-              appendLongStrBuf(c);
+              appendStrBuf(c);
               state = P::transition(mViewSource, NS_HTML5TOKENIZER_COMMENT_START_DASH, reconsume, pos);
               NS_HTML5_CONTINUE(stateloop);
             }
             case '>': {
               if (P::reportErrors) {
                 errPrematureEndOfComment();
               }
               emitComment(0, pos);
               state = P::transition(mViewSource, NS_HTML5TOKENIZER_DATA, reconsume, pos);
               NS_HTML5_CONTINUE(stateloop);
             }
             case '\r': {
-              appendLongStrBufCarriageReturn();
+              appendStrBufCarriageReturn();
               state = P::transition(mViewSource, NS_HTML5TOKENIZER_COMMENT, reconsume, pos);
               NS_HTML5_BREAK(stateloop);
             }
             case '\n': {
-              appendLongStrBufLineFeed();
+              appendStrBufLineFeed();
               state = P::transition(mViewSource, NS_HTML5TOKENIZER_COMMENT, reconsume, pos);
               NS_HTML5_BREAK(commentstartloop);
             }
             case '\0': {
               c = 0xfffd;
             }
             default: {
-              appendLongStrBuf(c);
+              appendStrBuf(c);
               state = P::transition(mViewSource, NS_HTML5TOKENIZER_COMMENT, reconsume, pos);
               NS_HTML5_BREAK(commentstartloop);
             }
           }
         }
         commentstartloop_end: ;
       }
       case NS_HTML5TOKENIZER_COMMENT: {
         for (; ; ) {
           if (++pos == endPos) {
             NS_HTML5_BREAK(stateloop);
           }
           c = checkChar(buf, pos);
           switch(c) {
             case '-': {
-              appendLongStrBuf(c);
+              appendStrBuf(c);
               state = P::transition(mViewSource, NS_HTML5TOKENIZER_COMMENT_END_DASH, reconsume, pos);
               NS_HTML5_BREAK(commentloop);
             }
             case '\r': {
-              appendLongStrBufCarriageReturn();
+              appendStrBufCarriageReturn();
               NS_HTML5_BREAK(stateloop);
             }
             case '\n': {
-              appendLongStrBufLineFeed();
+              appendStrBufLineFeed();
               continue;
             }
             case '\0': {
               c = 0xfffd;
             }
             default: {
-              appendLongStrBuf(c);
+              appendStrBuf(c);
               continue;
             }
           }
         }
         commentloop_end: ;
       }
       case NS_HTML5TOKENIZER_COMMENT_END_DASH: {
         for (; ; ) {
           if (++pos == endPos) {
             NS_HTML5_BREAK(stateloop);
           }
           c = checkChar(buf, pos);
           switch(c) {
             case '-': {
-              appendLongStrBuf(c);
+              appendStrBuf(c);
               state = P::transition(mViewSource, NS_HTML5TOKENIZER_COMMENT_END, reconsume, pos);
               NS_HTML5_BREAK(commentenddashloop);
             }
             case '\r': {
-              appendLongStrBufCarriageReturn();
+              appendStrBufCarriageReturn();
               state = P::transition(mViewSource, NS_HTML5TOKENIZER_COMMENT, reconsume, pos);
               NS_HTML5_BREAK(stateloop);
             }
             case '\n': {
-              appendLongStrBufLineFeed();
+              appendStrBufLineFeed();
               state = P::transition(mViewSource, NS_HTML5TOKENIZER_COMMENT, reconsume, pos);
               NS_HTML5_CONTINUE(stateloop);
             }
             case '\0': {
               c = 0xfffd;
             }
             default: {
-              appendLongStrBuf(c);
+              appendStrBuf(c);
               state = P::transition(mViewSource, NS_HTML5TOKENIZER_COMMENT, reconsume, pos);
               NS_HTML5_CONTINUE(stateloop);
             }
           }
         }
         commentenddashloop_end: ;
       }
       case NS_HTML5TOKENIZER_COMMENT_END: {
@@ -1178,42 +1173,42 @@ nsHtml5Tokenizer::stateLoop(int32_t stat
           c = checkChar(buf, pos);
           switch(c) {
             case '>': {
               emitComment(2, pos);
               state = P::transition(mViewSource, NS_HTML5TOKENIZER_DATA, reconsume, pos);
               NS_HTML5_CONTINUE(stateloop);
             }
             case '-': {
-              adjustDoubleHyphenAndAppendToLongStrBufAndErr(c);
+              adjustDoubleHyphenAndAppendToStrBufAndErr(c);
               continue;
             }
             case '\r': {
-              adjustDoubleHyphenAndAppendToLongStrBufCarriageReturn();
+              adjustDoubleHyphenAndAppendToStrBufCarriageReturn();
               state = P::transition(mViewSource, NS_HTML5TOKENIZER_COMMENT, reconsume, pos);
               NS_HTML5_BREAK(stateloop);
             }
             case '\n': {
-              adjustDoubleHyphenAndAppendToLongStrBufLineFeed();
+              adjustDoubleHyphenAndAppendToStrBufLineFeed();
               state = P::transition(mViewSource, NS_HTML5TOKENIZER_COMMENT, reconsume, pos);
               NS_HTML5_CONTINUE(stateloop);
             }
             case '!': {
               if (P::reportErrors) {
                 errHyphenHyphenBang();
               }
-              appendLongStrBuf(c);
+              appendStrBuf(c);
               state = P::transition(mViewSource, NS_HTML5TOKENIZER_COMMENT_END_BANG, reconsume, pos);
               NS_HTML5_CONTINUE(stateloop);
             }
             case '\0': {
               c = 0xfffd;
             }
             default: {
-              adjustDoubleHyphenAndAppendToLongStrBufAndErr(c);
+              adjustDoubleHyphenAndAppendToStrBufAndErr(c);
               state = P::transition(mViewSource, NS_HTML5TOKENIZER_COMMENT, reconsume, pos);
               NS_HTML5_CONTINUE(stateloop);
             }
           }
         }
 
       }
       case NS_HTML5TOKENIZER_COMMENT_END_BANG: {
@@ -1224,87 +1219,87 @@ nsHtml5Tokenizer::stateLoop(int32_t stat
           c = checkChar(buf, pos);
           switch(c) {
             case '>': {
               emitComment(3, pos);
               state = P::transition(mViewSource, NS_HTML5TOKENIZER_DATA, reconsume, pos);
               NS_HTML5_CONTINUE(stateloop);
             }
             case '-': {
-              appendLongStrBuf(c);
+              appendStrBuf(c);
               state = P::transition(mViewSource, NS_HTML5TOKENIZER_COMMENT_END_DASH, reconsume, pos);
               NS_HTML5_CONTINUE(stateloop);
             }
             case '\r': {
-              appendLongStrBufCarriageReturn();
+              appendStrBufCarriageReturn();
               NS_HTML5_BREAK(stateloop);
             }
             case '\n': {
-              appendLongStrBufLineFeed();
+              appendStrBufLineFeed();
               continue;
             }
             case '\0': {
               c = 0xfffd;
             }
             default: {
-              appendLongStrBuf(c);
+              appendStrBuf(c);
               state = P::transition(mViewSource, NS_HTML5TOKENIZER_COMMENT, reconsume, pos);
               NS_HTML5_CONTINUE(stateloop);
             }
           }
         }
       }
       case NS_HTML5TOKENIZER_COMMENT_START_DASH: {
         if (++pos == endPos) {
           NS_HTML5_BREAK(stateloop);
         }
         c = checkChar(buf, pos);
         switch(c) {
           case '-': {
-            appendLongStrBuf(c);
+            appendStrBuf(c);
             state = P::transition(mViewSource, NS_HTML5TOKENIZER_COMMENT_END, reconsume, pos);
             NS_HTML5_CONTINUE(stateloop);
           }
           case '>': {
             if (P::reportErrors) {
               errPrematureEndOfComment();
             }
             emitComment(1, pos);
             state = P::transition(mViewSource, NS_HTML5TOKENIZER_DATA, reconsume, pos);
             NS_HTML5_CONTINUE(stateloop);
           }
           case '\r': {
-            appendLongStrBufCarriageReturn();
+            appendStrBufCarriageReturn();
             state = P::transition(mViewSource, NS_HTML5TOKENIZER_COMMENT, reconsume, pos);
             NS_HTML5_BREAK(stateloop);
           }
           case '\n': {
-            appendLongStrBufLineFeed();
+            appendStrBufLineFeed();
             state = P::transition(mViewSource, NS_HTML5TOKENIZER_COMMENT, reconsume, pos);
             NS_HTML5_CONTINUE(stateloop);
           }
           case '\0': {
             c = 0xfffd;
           }
           default: {
-            appendLongStrBuf(c);
+            appendStrBuf(c);
             state = P::transition(mViewSource, NS_HTML5TOKENIZER_COMMENT, reconsume, pos);
             NS_HTML5_CONTINUE(stateloop);
           }
         }
       }
       case NS_HTML5TOKENIZER_CDATA_START: {
         for (; ; ) {
           if (++pos == endPos) {
             NS_HTML5_BREAK(stateloop);
           }
           c = checkChar(buf, pos);
           if (index < 6) {
             if (c == nsHtml5Tokenizer::CDATA_LSQB[index]) {
-              appendLongStrBuf(c);
+              appendStrBuf(c);
             } else {
               if (P::reportErrors) {
                 errBogusComment();
               }
               reconsume = true;
               state = P::transition(mViewSource, NS_HTML5TOKENIZER_BOGUS_COMMENT, reconsume, pos);
               NS_HTML5_CONTINUE(stateloop);
             }
@@ -1413,35 +1408,35 @@ nsHtml5Tokenizer::stateLoop(int32_t stat
           }
           switch(c) {
             case '\'': {
               addAttributeWithValue();
               state = P::transition(mViewSource, NS_HTML5TOKENIZER_AFTER_ATTRIBUTE_VALUE_QUOTED, reconsume, pos);
               NS_HTML5_CONTINUE(stateloop);
             }
             case '&': {
-              clearStrBufAndAppend(c);
+              clearCharRefBufAndAppend(c);
               setAdditionalAndRememberAmpersandLocation('\'');
               returnState = state;
               state = P::transition(mViewSource, NS_HTML5TOKENIZER_CONSUME_CHARACTER_REFERENCE, reconsume, pos);
               NS_HTML5_BREAK(attributevaluesinglequotedloop);
             }
             case '\r': {
-              appendLongStrBufCarriageReturn();
+              appendStrBufCarriageReturn();
               NS_HTML5_BREAK(stateloop);
             }
             case '\n': {
-              appendLongStrBufLineFeed();
+              appendStrBufLineFeed();
               continue;
             }
             case '\0': {
               c = 0xfffd;
             }
             default: {
-              appendLongStrBuf(c);
+              appendStrBuf(c);
               continue;
             }
           }
         }
         attributevaluesinglequotedloop_end: ;
       }
       case NS_HTML5TOKENIZER_CONSUME_CHARACTER_REFERENCE: {
         if (++pos == endPos) {
@@ -1454,53 +1449,53 @@ nsHtml5Tokenizer::stateLoop(int32_t stat
         switch(c) {
           case ' ':
           case '\t':
           case '\n':
           case '\r':
           case '\f':
           case '<':
           case '&': {
-            emitOrAppendStrBuf(returnState);
+            emitOrAppendCharRefBuf(returnState);
             if (!(returnState & NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK)) {
               cstart = pos;
             }
             reconsume = true;
             state = P::transition(mViewSource, returnState, reconsume, pos);
             NS_HTML5_CONTINUE(stateloop);
           }
           case '#': {
-            appendStrBuf('#');
+            appendCharRefBuf('#');
             state = P::transition(mViewSource, NS_HTML5TOKENIZER_CONSUME_NCR, reconsume, pos);
             NS_HTML5_CONTINUE(stateloop);
           }
           default: {
             if (c == additional) {
-              emitOrAppendStrBuf(returnState);
+              emitOrAppendCharRefBuf(returnState);
               reconsume = true;
               state = P::transition(mViewSource, returnState, reconsume, pos);
               NS_HTML5_CONTINUE(stateloop);
             }
             if (c >= 'a' && c <= 'z') {
               firstCharKey = c - 'a' + 26;
             } else if (c >= 'A' && c <= 'Z') {
               firstCharKey = c - 'A';
             } else {
               if (P::reportErrors) {
                 errNoNamedCharacterMatch();
               }
-              emitOrAppendStrBuf(returnState);
+              emitOrAppendCharRefBuf(returnState);
               if (!(returnState & NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK)) {
                 cstart = pos;
               }
               reconsume = true;
               state = P::transition(mViewSource, returnState, reconsume, pos);
               NS_HTML5_CONTINUE(stateloop);
             }
-            appendStrBuf(c);
+            appendCharRefBuf(c);
             state = P::transition(mViewSource, NS_HTML5TOKENIZER_CHARACTER_REFERENCE_HILO_LOOKUP, reconsume, pos);
           }
         }
       }
       case NS_HTML5TOKENIZER_CHARACTER_REFERENCE_HILO_LOOKUP: {
         {
           if (++pos == endPos) {
             NS_HTML5_BREAK(stateloop);
@@ -1515,30 +1510,30 @@ nsHtml5Tokenizer::stateLoop(int32_t stat
             if (row) {
               hilo = row[firstCharKey];
             }
           }
           if (!hilo) {
             if (P::reportErrors) {
               errNoNamedCharacterMatch();
             }
-            emitOrAppendStrBuf(returnState);
+            emitOrAppendCharRefBuf(returnState);
             if (!(returnState & NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK)) {
               cstart = pos;
             }
             reconsume = true;
             state = P::transition(mViewSource, returnState, reconsume, pos);
             NS_HTML5_CONTINUE(stateloop);
           }
-          appendStrBuf(c);
+          appendCharRefBuf(c);
           lo = hilo & 0xFFFF;
           hi = hilo >> 16;
           entCol = -1;
           candidate = -1;
-          strBufMark = 0;
+          charRefBufMark = 0;
           state = P::transition(mViewSource, NS_HTML5TOKENIZER_CHARACTER_REFERENCE_TAIL, reconsume, pos);
         }
       }
       case NS_HTML5TOKENIZER_CHARACTER_REFERENCE_TAIL: {
         for (; ; ) {
           if (++pos == endPos) {
             NS_HTML5_BREAK(stateloop);
           }
@@ -1548,17 +1543,17 @@ nsHtml5Tokenizer::stateLoop(int32_t stat
           }
           entCol++;
           for (; ; ) {
             if (hi < lo) {
               NS_HTML5_BREAK(outer);
             }
             if (entCol == nsHtml5NamedCharacters::NAMES[lo].length()) {
               candidate = lo;
-              strBufMark = strBufLen;
+              charRefBufMark = charRefBufLen;
               lo++;
             } else if (entCol > nsHtml5NamedCharacters::NAMES[lo].length()) {
               NS_HTML5_BREAK(outer);
             } else if (c > nsHtml5NamedCharacters::NAMES[lo].charAt(entCol)) {
               lo++;
             } else {
               NS_HTML5_BREAK(loloop);
             }
@@ -1578,53 +1573,53 @@ nsHtml5Tokenizer::stateLoop(int32_t stat
             } else {
               NS_HTML5_BREAK(hiloop);
             }
           }
           hiloop_end: ;
           if (c == ';') {
             if (entCol + 1 == nsHtml5NamedCharacters::NAMES[lo].length()) {
               candidate = lo;
-              strBufMark = strBufLen;
+              charRefBufMark = charRefBufLen;
             }
             NS_HTML5_BREAK(outer);
           }
           if (hi < lo) {
             NS_HTML5_BREAK(outer);
           }
-          appendStrBuf(c);
+          appendCharRefBuf(c);
           continue;
         }
         outer_end: ;
         if (candidate == -1) {
           if (P::reportErrors) {
             errNoNamedCharacterMatch();
           }
-          emitOrAppendStrBuf(returnState);
+          emitOrAppendCharRefBuf(returnState);
           if (!(returnState & NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK)) {
             cstart = pos;
           }
           reconsume = true;
           state = P::transition(mViewSource, returnState, reconsume, pos);
           NS_HTML5_CONTINUE(stateloop);
         } else {
           const nsHtml5CharacterName& candidateName = nsHtml5NamedCharacters::NAMES[candidate];
           if (!candidateName.length() || candidateName.charAt(candidateName.length() - 1) != ';') {
             if ((returnState & NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK)) {
               char16_t ch;
-              if (strBufMark == strBufLen) {
+              if (charRefBufMark == charRefBufLen) {
                 ch = c;
               } else {
-                ch = strBuf[strBufMark];
+                ch = charRefBuf[charRefBufMark];
               }
               if (ch == '=' || (ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z')) {
                 if (P::reportErrors) {
                   errNoNamedCharacterMatch();
                 }
-                appendStrBufToLongStrBuf();
+                appendCharRefBufToStrBuf();
                 reconsume = true;
                 state = P::transition(mViewSource, returnState, reconsume, pos);
                 NS_HTML5_CONTINUE(stateloop);
               }
             }
             if ((returnState & NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK)) {
               if (P::reportErrors) {
                 errUnescapedAmpersandInterpretedAsCharacterReference();
@@ -1637,26 +1632,24 @@ nsHtml5Tokenizer::stateLoop(int32_t stat
           }
           P::completedNamedCharacterReference(mViewSource);
           const char16_t* val = nsHtml5NamedCharacters::VALUES[candidate];
           if (!val[1]) {
             emitOrAppendOne(val, returnState);
           } else {
             emitOrAppendTwo(val, returnState);
           }
-          if (strBufMark < strBufLen) {
+          if (charRefBufMark < charRefBufLen) {
             if ((returnState & NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK)) {
-              for (int32_t i = strBufMark; i < strBufLen; i++) {
-                appendLongStrBuf(strBuf[i]);
-              }
+              appendStrBuf(charRefBuf, charRefBufMark, charRefBufLen - charRefBufMark);
             } else {
-              tokenHandler->characters(strBuf, strBufMark, strBufLen - strBufMark);
+              tokenHandler->characters(charRefBuf, charRefBufMark, charRefBufLen - charRefBufMark);
             }
           }
-          bool earlyBreak = (c == ';' && strBufMark == strBufLen);
+          bool earlyBreak = (c == ';' && charRefBufMark == charRefBufLen);
           if (!(returnState & NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK)) {
             cstart = earlyBreak ? pos + 1 : pos;
           }
           reconsume = !earlyBreak;
           state = P::transition(mViewSource, returnState, reconsume, pos);
           NS_HTML5_CONTINUE(stateloop);
         }
       }
@@ -1666,17 +1659,17 @@ nsHtml5Tokenizer::stateLoop(int32_t stat
         }
         c = checkChar(buf, pos);
         prevValue = -1;
         value = 0;
         seenDigits = false;
         switch(c) {
           case 'x':
           case 'X': {
-            appendStrBuf(c);
+            appendCharRefBuf(c);
             state = P::transition(mViewSource, NS_HTML5TOKENIZER_HEX_NCR_LOOP, reconsume, pos);
             NS_HTML5_CONTINUE(stateloop);
           }
           default: {
             reconsume = true;
             state = P::transition(mViewSource, NS_HTML5TOKENIZER_DECIMAL_NRC_LOOP, reconsume, pos);
           }
         }
@@ -1706,30 +1699,30 @@ nsHtml5Tokenizer::stateLoop(int32_t stat
                 cstart = pos + 1;
               }
               state = P::transition(mViewSource, NS_HTML5TOKENIZER_HANDLE_NCR_VALUE, reconsume, pos);
               NS_HTML5_BREAK(decimalloop);
             } else {
               if (P::reportErrors) {
                 errNoDigitsInNCR();
               }
-              appendStrBuf(';');
-              emitOrAppendStrBuf(returnState);
+              appendCharRefBuf(';');
+              emitOrAppendCharRefBuf(returnState);
               if (!(returnState & NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK)) {
                 cstart = pos + 1;
               }
               state = P::transition(mViewSource, returnState, reconsume, pos);
               NS_HTML5_CONTINUE(stateloop);
             }
           } else {
             if (!seenDigits) {
               if (P::reportErrors) {
                 errNoDigitsInNCR();
               }
-              emitOrAppendStrBuf(returnState);
+              emitOrAppendCharRefBuf(returnState);
               if (!(returnState & NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK)) {
                 cstart = pos;
               }
               reconsume = true;
               state = P::transition(mViewSource, returnState, reconsume, pos);
               NS_HTML5_CONTINUE(stateloop);
             } else {
               if (P::reportErrors) {
@@ -1782,30 +1775,30 @@ nsHtml5Tokenizer::stateLoop(int32_t stat
                 cstart = pos + 1;
               }
               state = P::transition(mViewSource, NS_HTML5TOKENIZER_HANDLE_NCR_VALUE, reconsume, pos);
               NS_HTML5_CONTINUE(stateloop);
             } else {
               if (P::reportErrors) {
                 errNoDigitsInNCR();
               }
-              appendStrBuf(';');
-              emitOrAppendStrBuf(returnState);
+              appendCharRefBuf(';');
+              emitOrAppendCharRefBuf(returnState);
               if (!(returnState & NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK)) {
                 cstart = pos + 1;
               }
               state = P::transition(mViewSource, returnState, reconsume, pos);
               NS_HTML5_CONTINUE(stateloop);
             }
           } else {
             if (!seenDigits) {
               if (P::reportErrors) {
                 errNoDigitsInNCR();
               }
-              emitOrAppendStrBuf(returnState);
+              emitOrAppendCharRefBuf(returnState);
               if (!(returnState & NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK)) {
                 cstart = pos;
               }
               reconsume = true;
               state = P::transition(mViewSource, returnState, reconsume, pos);
               NS_HTML5_CONTINUE(stateloop);
             } else {
               if (P::reportErrors) {
@@ -1864,26 +1857,26 @@ nsHtml5Tokenizer::stateLoop(int32_t stat
             state = P::transition(mViewSource, NS_HTML5TOKENIZER_DATA, reconsume, pos);
             NS_HTML5_CONTINUE(stateloop);
           }
           case '\r': {
             silentCarriageReturn();
             if (P::reportErrors) {
               errGarbageAfterLtSlash();
             }
-            clearLongStrBufAndAppend('\n');
+            clearStrBufAndAppend('\n');
             state = P::transition(mViewSource, NS_HTML5TOKENIZER_BOGUS_COMMENT, reconsume, pos);
             NS_HTML5_BREAK(stateloop);
           }
           case '\n': {
             silentLineFeed();
             if (P::reportErrors) {
               errGarbageAfterLtSlash();
             }
-            clearLongStrBufAndAppend('\n');
+            clearStrBufAndAppend('\n');
             state = P::transition(mViewSource, NS_HTML5TOKENIZER_BOGUS_COMMENT, reconsume, pos);
             NS_HTML5_CONTINUE(stateloop);
           }
           case '\0': {
             c = 0xfffd;
           }
           default: {
             if (c >= 'A' && c <= 'Z') {
@@ -1893,17 +1886,17 @@ nsHtml5Tokenizer::stateLoop(int32_t stat
               endTag = true;
               clearStrBufAndAppend(c);
               state = P::transition(mViewSource, NS_HTML5TOKENIZER_TAG_NAME, reconsume, pos);
               NS_HTML5_CONTINUE(stateloop);
             } else {
               if (P::reportErrors) {
                 errGarbageAfterLtSlash();
               }
-              clearLongStrBufAndAppend(c);
+              clearStrBufAndAppend(c);
               state = P::transition(mViewSource, NS_HTML5TOKENIZER_BOGUS_COMMENT, reconsume, pos);
               NS_HTML5_CONTINUE(stateloop);
             }
           }
         }
       }
       case NS_HTML5TOKENIZER_RCDATA: {
         for (; ; ) {
@@ -1913,17 +1906,17 @@ nsHtml5Tokenizer::stateLoop(int32_t stat
             if (++pos == endPos) {
               NS_HTML5_BREAK(stateloop);
             }
             c = checkChar(buf, pos);
           }
           switch(c) {
             case '&': {
               flushChars(buf, pos);
-              clearStrBufAndAppend(c);
+              clearCharRefBufAndAppend(c);
               setAdditionalAndRememberAmpersandLocation('\0');
               returnState = state;
               state = P::transition(mViewSource, NS_HTML5TOKENIZER_CONSUME_CHARACTER_REFERENCE, reconsume, pos);
               NS_HTML5_CONTINUE(stateloop);
             }
             case '<': {
               flushChars(buf, pos);
               returnState = state;
@@ -2086,33 +2079,33 @@ nsHtml5Tokenizer::stateLoop(int32_t stat
           }
           switch(c) {
             case '>': {
               emitComment(0, pos);
               state = P::transition(mViewSource, NS_HTML5TOKENIZER_DATA, reconsume, pos);
               NS_HTML5_CONTINUE(stateloop);
             }
             case '-': {
-              appendLongStrBuf(c);
+              appendStrBuf(c);
               state = P::transition(mViewSource, NS_HTML5TOKENIZER_BOGUS_COMMENT_HYPHEN, reconsume, pos);
               NS_HTML5_BREAK(boguscommentloop);
             }
             case '\r': {
-              appendLongStrBufCarriageReturn();
+              appendStrBufCarriageReturn();
               NS_HTML5_BREAK(stateloop);
             }
             case '\n': {
-              appendLongStrBufLineFeed();
+              appendStrBufLineFeed();
               continue;
             }
             case '\0': {
               c = 0xfffd;
             }
             default: {
-              appendLongStrBuf(c);
+              appendStrBuf(c);
               continue;
             }
           }
         }
         boguscommentloop_end: ;
       }
       case NS_HTML5TOKENIZER_BOGUS_COMMENT_HYPHEN: {
         boguscommenthyphenloop: for (; ; ) {
@@ -2126,30 +2119,30 @@ nsHtml5Tokenizer::stateLoop(int32_t stat
               state = P::transition(mViewSource, NS_HTML5TOKENIZER_DATA, reconsume, pos);
               NS_HTML5_CONTINUE(stateloop);
             }
             case '-': {
               appendSecondHyphenToBogusComment();
               NS_HTML5_CONTINUE(boguscommenthyphenloop);
             }
             case '\r': {
-              appendLongStrBufCarriageReturn();
+              appendStrBufCarriageReturn();
               state = P::transition(mViewSource, NS_HTML5TOKENIZER_BOGUS_COMMENT, reconsume, pos);
               NS_HTML5_BREAK(stateloop);
             }
             case '\n': {
-              appendLongStrBufLineFeed();
+              appendStrBufLineFeed();
               state = P::transition(mViewSource, NS_HTML5TOKENIZER_BOGUS_COMMENT, reconsume, pos);
               NS_HTML5_CONTINUE(stateloop);
             }
             case '\0': {
               c = 0xfffd;
             }
             default: {
-              appendLongStrBuf(c);
+              appendStrBuf(c);
               state = P::transition(mViewSource, NS_HTML5TOKENIZER_BOGUS_COMMENT, reconsume, pos);
               NS_HTML5_CONTINUE(stateloop);
             }
           }
         }
 
       }
       case NS_HTML5TOKENIZER_SCRIPT_DATA: {
@@ -2636,17 +2629,17 @@ nsHtml5Tokenizer::stateLoop(int32_t stat
           }
           c = checkChar(buf, pos);
           if (index < 6) {
             char16_t folded = c;
             if (c >= 'A' && c <= 'Z') {
               folded += 0x20;
             }
             if (folded == nsHtml5Tokenizer::OCTYPE[index]) {
-              appendLongStrBuf(c);
+              appendStrBuf(c);
             } else {
               if (P::reportErrors) {
                 errBogusComment();
               }
               reconsume = true;
               state = P::transition(mViewSource, NS_HTML5TOKENIZER_BOGUS_COMMENT, reconsume, pos);
               NS_HTML5_CONTINUE(stateloop);
             }
@@ -2884,25 +2877,25 @@ nsHtml5Tokenizer::stateLoop(int32_t stat
             case '\f': {
               state = P::transition(mViewSource, NS_HTML5TOKENIZER_BEFORE_DOCTYPE_PUBLIC_IDENTIFIER, reconsume, pos);
               NS_HTML5_BREAK(afterdoctypepublickeywordloop);
             }
             case '\"': {
               if (P::reportErrors) {
                 errNoSpaceBetweenDoctypePublicKeywordAndQuote();
               }
-              clearLongStrBuf();
+              clearStrBuf();
               state = P::transition(mViewSource, NS_HTML5TOKENIZER_DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos);
               NS_HTML5_CONTINUE(stateloop);
             }
             case '\'': {
               if (P::reportErrors) {
                 errNoSpaceBetweenDoctypePublicKeywordAndQuote();
               }
-              clearLongStrBuf();
+              clearStrBuf();
               state = P::transition(mViewSource, NS_HTML5TOKENIZER_DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED, reconsume, pos);
               NS_HTML5_CONTINUE(stateloop);
             }
             case '>': {
               if (P::reportErrors) {
                 errExpectedPublicId();
               }
               forceQuirks = true;
@@ -2934,22 +2927,22 @@ nsHtml5Tokenizer::stateLoop(int32_t stat
               silentLineFeed();
             }
             case ' ':
             case '\t':
             case '\f': {
               continue;
             }
             case '\"': {
-              clearLongStrBuf();
+              clearStrBuf();
               state = P::transition(mViewSource, NS_HTML5TOKENIZER_DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos);
               NS_HTML5_BREAK(beforedoctypepublicidentifierloop);
             }
             case '\'': {
-              clearLongStrBuf();
+              clearStrBuf();
               state = P::transition(mViewSource, NS_HTML5TOKENIZER_DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED, reconsume, pos);
               NS_HTML5_CONTINUE(stateloop);
             }
             case '>': {
               if (P::reportErrors) {
                 errExpectedPublicId();
               }
               forceQuirks = true;
@@ -2969,43 +2962,43 @@ nsHtml5Tokenizer::stateLoop(int32_t stat
       case NS_HTML5TOKENIZER_DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED: {
         for (; ; ) {
           if (++pos == endPos) {
             NS_HTML5_BREAK(stateloop);
           }
           c = checkChar(buf, pos);
           switch(c) {
             case '\"': {
-              publicIdentifier = longStrBufToString();
+              publicIdentifier = strBufToString();
               state = P::transition(mViewSource, NS_HTML5TOKENIZER_AFTER_DOCTYPE_PUBLIC_IDENTIFIER, reconsume, pos);
               NS_HTML5_BREAK(doctypepublicidentifierdoublequotedloop);
             }
             case '>': {
               if (P::reportErrors) {
                 errGtInPublicId();
               }
               forceQuirks = true;
-              publicIdentifier = longStrBufToString();
+              publicIdentifier = strBufToString();
               emitDoctypeToken(pos);
               state = P::transition(mViewSource, NS_HTML5TOKENIZER_DATA, reconsume, pos);
               NS_HTML5_CONTINUE(stateloop);
             }
             case '\r': {
-              appendLongStrBufCarriageReturn();
+              appendStrBufCarriageReturn();
               NS_HTML5_BREAK(stateloop);
             }
             case '\n': {
-              appendLongStrBufLineFeed();
+              appendStrBufLineFeed();
               continue;
             }
             case '\0': {
               c = 0xfffd;
             }
             default: {
-              appendLongStrBuf(c);
+              appendStrBuf(c);
               continue;
             }
           }
         }
         doctypepublicidentifierdoublequotedloop_end: ;
       }
       case NS_HTML5TOKENIZER_AFTER_DOCTYPE_PUBLIC_IDENTIFIER: {
         for (; ; ) {
@@ -3032,25 +3025,25 @@ nsHtml5Tokenizer::stateLoop(int32_t stat
               emitDoctypeToken(pos);
               state = P::transition(mViewSource, NS_HTML5TOKENIZER_DATA, reconsume, pos);
               NS_HTML5_CONTINUE(stateloop);
             }
             case '\"': {
               if (P::reportErrors) {
                 errNoSpaceBetweenPublicAndSystemIds();
               }
-              clearLongStrBuf();
+              clearStrBuf();
               state = P::transition(mViewSource, NS_HTML5TOKENIZER_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos);
               NS_HTML5_CONTINUE(stateloop);
             }
             case '\'': {
               if (P::reportErrors) {
                 errNoSpaceBetweenPublicAndSystemIds();
               }
-              clearLongStrBuf();
+              clearStrBuf();
               state = P::transition(mViewSource, NS_HTML5TOKENIZER_DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED, reconsume, pos);
               NS_HTML5_CONTINUE(stateloop);
             }
             default: {
               bogusDoctype();
               state = P::transition(mViewSource, NS_HTML5TOKENIZER_BOGUS_DOCTYPE, reconsume, pos);
               NS_HTML5_CONTINUE(stateloop);
             }
@@ -3078,22 +3071,22 @@ nsHtml5Tokenizer::stateLoop(int32_t stat
               continue;
             }
             case '>': {
               emitDoctypeToken(pos);
               state = P::transition(mViewSource, NS_HTML5TOKENIZER_DATA, reconsume, pos);
               NS_HTML5_CONTINUE(stateloop);
             }
             case '\"': {
-              clearLongStrBuf();
+              clearStrBuf();
               state = P::transition(mViewSource, NS_HTML5TOKENIZER_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos);
               NS_HTML5_BREAK(betweendoctypepublicandsystemidentifiersloop);
             }
             case '\'': {
-              clearLongStrBuf();
+              clearStrBuf();
               state = P::transition(mViewSource, NS_HTML5TOKENIZER_DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED, reconsume, pos);
               NS_HTML5_CONTINUE(stateloop);
             }
             default: {
               bogusDoctype();
               state = P::transition(mViewSource, NS_HTML5TOKENIZER_BOGUS_DOCTYPE, reconsume, pos);
               NS_HTML5_CONTINUE(stateloop);
             }
@@ -3104,43 +3097,43 @@ nsHtml5Tokenizer::stateLoop(int32_t stat
       case NS_HTML5TOKENIZER_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED: {
         for (; ; ) {
           if (++pos == endPos) {
             NS_HTML5_BREAK(stateloop);
           }
           c = checkChar(buf, pos);
           switch(c) {
             case '\"': {
-              systemIdentifier = longStrBufToString();
+              systemIdentifier = strBufToString();
               state = P::transition(mViewSource, NS_HTML5TOKENIZER_AFTER_DOCTYPE_SYSTEM_IDENTIFIER, reconsume, pos);
               NS_HTML5_CONTINUE(stateloop);
             }
             case '>': {
               if (P::reportErrors) {
                 errGtInSystemId();
               }
               forceQuirks = true;
-              systemIdentifier = longStrBufToString();
+              systemIdentifier = strBufToString();
               emitDoctypeToken(pos);
               state = P::transition(mViewSource, NS_HTML5TOKENIZER_DATA, reconsume, pos);
               NS_HTML5_CONTINUE(stateloop);
             }
             case '\r': {
-              appendLongStrBufCarriageReturn();
+              appendStrBufCarriageReturn();
               NS_HTML5_BREAK(stateloop);
             }
             case '\n': {
-              appendLongStrBufLineFeed();
+              appendStrBufLineFeed();
               continue;
             }
             case '\0': {
               c = 0xfffd;
             }
             default: {
-              appendLongStrBuf(c);
+              appendStrBuf(c);
               continue;
             }
           }
         }
 
       }
       case NS_HTML5TOKENIZER_AFTER_DOCTYPE_SYSTEM_IDENTIFIER: {
         for (; ; ) {
@@ -3255,25 +3248,25 @@ nsHtml5Tokenizer::stateLoop(int32_t stat
             case '\f': {
               state = P::transition(mViewSource, NS_HTML5TOKENIZER_BEFORE_DOCTYPE_SYSTEM_IDENTIFIER, reconsume, pos);
               NS_HTML5_BREAK(afterdoctypesystemkeywordloop);
             }
             case '\"': {
               if (P::reportErrors) {
                 errNoSpaceBetweenDoctypeSystemKeywordAndQuote();
               }
-              clearLongStrBuf();
+              clearStrBuf();
               state = P::transition(mViewSource, NS_HTML5TOKENIZER_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos);
               NS_HTML5_CONTINUE(stateloop);
             }
             case '\'': {
               if (P::reportErrors) {
                 errNoSpaceBetweenDoctypeSystemKeywordAndQuote();
               }
-              clearLongStrBuf();
+              clearStrBuf();
               state = P::transition(mViewSource, NS_HTML5TOKENIZER_DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED, reconsume, pos);
               NS_HTML5_CONTINUE(stateloop);
             }
             case '>': {
               if (P::reportErrors) {
                 errExpectedPublicId();
               }
               forceQuirks = true;
@@ -3305,22 +3298,22 @@ nsHtml5Tokenizer::stateLoop(int32_t stat
               silentLineFeed();
             }
             case ' ':
             case '\t':
             case '\f': {
               continue;
             }
             case '\"': {
-              clearLongStrBuf();
+              clearStrBuf();
               state = P::transition(mViewSource, NS_HTML5TOKENIZER_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED, reconsume, pos);
               NS_HTML5_CONTINUE(stateloop);
             }
             case '\'': {
-              clearLongStrBuf();
+              clearStrBuf();
               state = P::transition(mViewSource, NS_HTML5TOKENIZER_DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED, reconsume, pos);
               NS_HTML5_BREAK(beforedoctypesystemidentifierloop);
             }
             case '>': {
               if (P::reportErrors) {
                 errExpectedSystemId();
               }
               forceQuirks = true;
@@ -3340,83 +3333,83 @@ nsHtml5Tokenizer::stateLoop(int32_t stat
       case NS_HTML5TOKENIZER_DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED: {
         for (; ; ) {
           if (++pos == endPos) {
             NS_HTML5_BREAK(stateloop);
           }
           c = checkChar(buf, pos);
           switch(c) {
             case '\'': {
-              systemIdentifier = longStrBufToString();
+              systemIdentifier = strBufToString();
               state = P::transition(mViewSource, NS_HTML5TOKENIZER_AFTER_DOCTYPE_SYSTEM_IDENTIFIER, reconsume, pos);
               NS_HTML5_CONTINUE(stateloop);
             }
             case '>': {
               if (P::reportErrors) {
                 errGtInSystemId();
               }
               forceQuirks = true;
-              systemIdentifier = longStrBufToString();
+              systemIdentifier = strBufToString();
               emitDoctypeToken(pos);
               state = P::transition(mViewSource, NS_HTML5TOKENIZER_DATA, reconsume, pos);
               NS_HTML5_CONTINUE(stateloop);
             }
             case '\r': {
-              appendLongStrBufCarriageReturn();
+              appendStrBufCarriageReturn();
               NS_HTML5_BREAK(stateloop);
             }
             case '\n': {
-              appendLongStrBufLineFeed();
+              appendStrBufLineFeed();
               continue;
             }
             case '\0': {
               c = 0xfffd;
             }
             default: {
-              appendLongStrBuf(c);
+              appendStrBuf(c);
               continue;
             }
           }
         }
       }
       case NS_HTML5TOKENIZER_DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED: {
         for (; ; ) {
           if (++pos == endPos) {
             NS_HTML5_BREAK(stateloop);
           }
           c = checkChar(buf, pos);
           switch(c) {
             case '\'': {
-              publicIdentifier = longStrBufToString();
+              publicIdentifier = strBufToString();
               state = P::transition(mViewSource, NS_HTML5TOKENIZER_AFTER_DOCTYPE_PUBLIC_IDENTIFIER, reconsume, pos);
               NS_HTML5_CONTINUE(stateloop);
             }
             case '>': {
               if (P::reportErrors) {
                 errGtInPublicId();
               }
               forceQuirks = true;
-              publicIdentifier = longStrBufToString();
+              publicIdentifier = strBufToString();
               emitDoctypeToken(pos);
               state = P::transition(mViewSource, NS_HTML5TOKENIZER_DATA, reconsume, pos);
               NS_HTML5_CONTINUE(stateloop);
             }
             case '\r': {
-              appendLongStrBufCarriageReturn();
+              appendStrBufCarriageReturn();
               NS_HTML5_BREAK(stateloop);
             }
             case '\n': {
-              appendLongStrBufLineFeed();
+              appendStrBufLineFeed();
               continue;
             }
             case '\0': {
               c = 0xfffd;
             }
             default: {
-              appendLongStrBuf(c);
+              appendStrBuf(c);
               continue;
             }
           }
         }
       }
       case NS_HTML5TOKENIZER_PROCESSING_INSTRUCTION: {
         for (; ; ) {
           if (++pos == endPos) {
@@ -3516,26 +3509,16 @@ nsHtml5Tokenizer::bogusDoctype()
 void 
 nsHtml5Tokenizer::bogusDoctypeWithoutQuirks()
 {
   errBogusDoctype();
   forceQuirks = false;
 }
 
 void 
-nsHtml5Tokenizer::emitOrAppendStrBuf(int32_t returnState)
-{
-  if ((returnState & NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK)) {
-    appendStrBufToLongStrBuf();
-  } else {
-    emitStrBuf();
-  }
-}
-
-void 
 nsHtml5Tokenizer::handleNcrValue(int32_t returnState)
 {
   if (value <= 0xFFFF) {
     if (value >= 0x80 && value <= 0x9f) {
       errNcrInC1Range();
       char16_t* val = nsHtml5NamedCharacters::WINDOWS_1252[value - 0x80];
       emitOrAppendOne(val, returnState);
     } else if (value == 0x0) {
@@ -3620,17 +3603,17 @@ nsHtml5Tokenizer::eof()
         NS_HTML5_BREAK(eofloop);
       }
       case NS_HTML5TOKENIZER_BOGUS_COMMENT_HYPHEN: {
         emitComment(0, 0);
         NS_HTML5_BREAK(eofloop);
       }
       case NS_HTML5TOKENIZER_MARKUP_DECLARATION_OPEN: {
         errBogusComment();
-        clearLongStrBuf();
+        clearStrBuf();
         emitComment(0, 0);
         NS_HTML5_BREAK(eofloop);
       }
       case NS_HTML5TOKENIZER_MARKUP_DECLARATION_HYPHEN: {
         errBogusComment();
         emitComment(0, 0);
         NS_HTML5_BREAK(eofloop);
       }
@@ -3701,54 +3684,54 @@ nsHtml5Tokenizer::eof()
         forceQuirks = true;
         emitDoctypeToken(0);
         NS_HTML5_BREAK(eofloop);
       }
       case NS_HTML5TOKENIZER_DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED:
       case NS_HTML5TOKENIZER_DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED: {
         errEofInPublicId();
         forceQuirks = true;
-        publicIdentifier = longStrBufToString();
+        publicIdentifier = strBufToString();
         emitDoctypeToken(0);
         NS_HTML5_BREAK(eofloop);
       }
       case NS_HTML5TOKENIZER_AFTER_DOCTYPE_PUBLIC_IDENTIFIER:
       case NS_HTML5TOKENIZER_BEFORE_DOCTYPE_SYSTEM_IDENTIFIER:
       case NS_HTML5TOKENIZER_BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS: {
         errEofInDoctype();
         forceQuirks = true;
         emitDoctypeToken(0);
         NS_HTML5_BREAK(eofloop);
       }
       case NS_HTML5TOKENIZER_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED:
       case NS_HTML5TOKENIZER_DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED: {
         errEofInSystemId();
         forceQuirks = true;
-        systemIdentifier = longStrBufToString();
+        systemIdentifier = strBufToString();
         emitDoctypeToken(0);
         NS_HTML5_BREAK(eofloop);
       }
       case NS_HTML5TOKENIZER_AFTER_DOCTYPE_SYSTEM_IDENTIFIER: {
         errEofInDoctype();
         forceQuirks = true;
         emitDoctypeToken(0);
         NS_HTML5_BREAK(eofloop);
       }
       case NS_HTML5TOKENIZER_BOGUS_DOCTYPE: {
         emitDoctypeToken(0);
         NS_HTML5_BREAK(eofloop);
       }
       case NS_HTML5TOKENIZER_CONSUME_CHARACTER_REFERENCE: {
-        emitOrAppendStrBuf(returnState);
+        emitOrAppendCharRefBuf(returnState);
         state = returnState;
         continue;
       }
       case NS_HTML5TOKENIZER_CHARACTER_REFERENCE_HILO_LOOKUP: {
         errNoNamedCharacterMatch();
-        emitOrAppendStrBuf(returnState);
+        emitOrAppendCharRefBuf(returnState);
         state = returnState;
         continue;
       }
       case NS_HTML5TOKENIZER_CHARACTER_REFERENCE_TAIL: {
         for (; ; ) {
           char16_t c = '\0';
           entCol++;
           for (; ; ) {
@@ -3768,17 +3751,17 @@ nsHtml5Tokenizer::eof()
           }
           hiloop_end: ;
           for (; ; ) {
             if (hi < lo) {
               NS_HTML5_BREAK(outer);
             }
             if (entCol == nsHtml5NamedCharacters::NAMES[lo].length()) {
               candidate = lo;
-              strBufMark = strBufLen;
+              charRefBufMark = charRefBufLen;
               lo++;
             } else if (entCol > nsHtml5NamedCharacters::NAMES[lo].length()) {
               NS_HTML5_BREAK(outer);
             } else if (c > nsHtml5NamedCharacters::NAMES[lo].charAt(entCol)) {
               lo++;
             } else {
               NS_HTML5_BREAK(loloop);
             }
@@ -3787,67 +3770,65 @@ nsHtml5Tokenizer::eof()
           if (hi < lo) {
             NS_HTML5_BREAK(outer);
           }
           continue;
         }
         outer_end: ;
         if (candidate == -1) {
           errNoNamedCharacterMatch();
-          emitOrAppendStrBuf(returnState);
+          emitOrAppendCharRefBuf(returnState);
           state = returnState;
           NS_HTML5_CONTINUE(eofloop);
         } else {
           const nsHtml5CharacterName& candidateName = nsHtml5NamedCharacters::NAMES[candidate];
           if (!candidateName.length() || candidateName.charAt(candidateName.length() - 1) != ';') {
             if ((returnState & NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK)) {
               char16_t ch;
-              if (strBufMark == strBufLen) {
+              if (charRefBufMark == charRefBufLen) {
                 ch = '\0';
               } else {
-                ch = strBuf[strBufMark];
+                ch = charRefBuf[charRefBufMark];
               }
               if ((ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z')) {
                 errNoNamedCharacterMatch();
-                appendStrBufToLongStrBuf();
+                appendCharRefBufToStrBuf();
                 state = returnState;
                 NS_HTML5_CONTINUE(eofloop);
               }
             }
             if ((returnState & NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK)) {
               errUnescapedAmpersandInterpretedAsCharacterReference();
             } else {
               errNotSemicolonTerminated();
             }
           }
           const char16_t* val = nsHtml5NamedCharacters::VALUES[candidate];
           if (!val[1]) {
             emitOrAppendOne(val, returnState);
           } else {
             emitOrAppendTwo(val, returnState);
           }
-          if (strBufMark < strBufLen) {
+          if (charRefBufMark < charRefBufLen) {
             if ((returnState & NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK)) {
-              for (int32_t i = strBufMark; i < strBufLen; i++) {
-                appendLongStrBuf(strBuf[i]);
-              }
+              appendStrBuf(charRefBuf, charRefBufMark, charRefBufLen - charRefBufMark);
             } else {
-              tokenHandler->characters(strBuf, strBufMark, strBufLen - strBufMark);
+              tokenHandler->characters(charRefBuf, charRefBufMark, charRefBufLen - charRefBufMark);
             }
           }
           state = returnState;
           NS_HTML5_CONTINUE(eofloop);
         }
       }
       case NS_HTML5TOKENIZER_CONSUME_NCR:
       case NS_HTML5TOKENIZER_DECIMAL_NRC_LOOP:
       case NS_HTML5TOKENIZER_HEX_NCR_LOOP: {
         if (!seenDigits) {
           errNoDigitsInNCR();
-          emitOrAppendStrBuf(returnState);
+          emitOrAppendCharRefBuf(returnState);
           state = returnState;
           continue;
         } else {
           errCharRefLacksSemicolon();
         }
         handleNcrValue(returnState);
         state = returnState;
         continue;
@@ -3891,38 +3872,38 @@ nsHtml5Tokenizer::internalEncodingDeclar
   }
   return false;
 }
 
 void 
 nsHtml5Tokenizer::emitOrAppendTwo(const char16_t* val, int32_t returnState)
 {
   if ((returnState & NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK)) {
-    appendLongStrBuf(val[0]);
-    appendLongStrBuf(val[1]);
+    appendStrBuf(val[0]);
+    appendStrBuf(val[1]);
   } else {
     tokenHandler->characters(val, 0, 2);
   }
 }
 
 void 
 nsHtml5Tokenizer::emitOrAppendOne(const char16_t* val, int32_t returnState)
 {
   if ((returnState & NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK)) {
-    appendLongStrBuf(val[0]);
+    appendStrBuf(val[0]);
   } else {
     tokenHandler->characters(val, 0, 1);
   }
 }
 
 void 
 nsHtml5Tokenizer::end()
 {
   strBuf = nullptr;
-  longStrBuf = nullptr;
+  charRefBuf = nullptr;
   doctypeName = nullptr;
   if (systemIdentifier) {
     nsHtml5Portability::releaseString(systemIdentifier);
     systemIdentifier = nullptr;
   }
   if (publicIdentifier) {
     nsHtml5Portability::releaseString(publicIdentifier);
     publicIdentifier = nullptr;
@@ -3952,28 +3933,28 @@ nsHtml5Tokenizer::isInDataState()
 {
   return (stateSave == NS_HTML5TOKENIZER_DATA);
 }
 
 void 
 nsHtml5Tokenizer::resetToDataState()
 {
   strBufLen = 0;
-  longStrBufLen = 0;
+  charRefBufLen = 0;
   stateSave = NS_HTML5TOKENIZER_DATA;
   lastCR = false;
   index = 0;
   forceQuirks = false;
   additional = '\0';
   entCol = -1;
   firstCharKey = -1;
   lo = 0;
   hi = 0;
   candidate = -1;
-  strBufMark = 0;
+  charRefBufMark = 0;
   prevValue = -1;
   value = 0;
   seenDigits = false;
   endTag = false;
   shouldSuspend = false;
   initDoctypeFields();
   if (tagName) {
     tagName->release();
@@ -3994,35 +3975,35 @@ nsHtml5Tokenizer::resetToDataState()
 void 
 nsHtml5Tokenizer::loadState(nsHtml5Tokenizer* other)
 {
   strBufLen = other->strBufLen;
   if (strBufLen > strBuf.length) {
     strBuf = jArray<char16_t,int32_t>::newJArray(strBufLen);
   }
   nsHtml5ArrayCopy::arraycopy(other->strBuf, strBuf, strBufLen);
-  longStrBufLen = other->longStrBufLen;
-  if (longStrBufLen > longStrBuf.length) {
-    longStrBuf = jArray<char16_t,int32_t>::newJArray(longStrBufLen);
+  charRefBufLen = other->charRefBufLen;
+  if (charRefBufLen > charRefBuf.length) {
+    charRefBuf = jArray<char16_t,int32_t>::newJArray(charRefBufLen);
   }
-  nsHtml5ArrayCopy::arraycopy(other->longStrBuf, longStrBuf, longStrBufLen);
+  nsHtml5ArrayCopy::arraycopy(other->charRefBuf, charRefBuf, charRefBufLen);
   stateSave = other->stateSave;
   returnStateSave = other->returnStateSave;
   endTagExpectation = other->endTagExpectation;
   endTagExpectationAsArray = other->endTagExpectationAsArray;
   lastCR = other->lastCR;
   index = other->index;
   forceQuirks = other->forceQuirks;
   additional = other->additional;
   entCol = other->entCol;
   firstCharKey = other->firstCharKey;
   lo = other->lo;
   hi = other->hi;
   candidate = other->candidate;
-  strBufMark = other->strBufMark;
+  charRefBufMark = other->charRefBufMark;
   prevValue = other->prevValue;
   value = other->value;
   seenDigits = other->seenDigits;
   endTag = other->endTag;
   shouldSuspend = false;
   if (!other->doctypeName) {
     doctypeName = nullptr;
   } else {
@@ -4063,18 +4044,18 @@ nsHtml5Tokenizer::loadState(nsHtml5Token
     attributes = other->attributes->cloneAttributes(interner);
   }
 }
 
 void 
 nsHtml5Tokenizer::initializeWithoutStarting()
 {
   confident = false;
-  strBuf = jArray<char16_t,int32_t>::newJArray(64);
-  longStrBuf = jArray<char16_t,int32_t>::newJArray(1024);
+  strBuf = jArray<char16_t,int32_t>::newJArray(1024);
+  charRefBuf = jArray<char16_t,int32_t>::newJArray(64);
   line = 1;
   resetToDataState();
 }
 
 void 
 nsHtml5Tokenizer::setEncodingDeclarationHandler(nsHtml5StreamParser* encodingDeclarationHandler)
 {
   this->encodingDeclarationHandler = encodingDeclarationHandler;
--- a/parser/html/nsHtml5Tokenizer.h
+++ b/parser/html/nsHtml5Tokenizer.h
@@ -93,31 +93,31 @@ class nsHtml5Tokenizer
   private:
     bool forceQuirks;
     char16_t additional;
     int32_t entCol;
     int32_t firstCharKey;
     int32_t lo;
     int32_t hi;
     int32_t candidate;
-    int32_t strBufMark;
+    int32_t charRefBufMark;
     int32_t prevValue;
   protected:
     int32_t value;
   private:
     bool seenDigits;
   protected:
     int32_t cstart;
   private:
     nsString* publicId;
     nsString* systemId;
     autoJArray<char16_t,int32_t> strBuf;
     int32_t strBufLen;
-    autoJArray<char16_t,int32_t> longStrBuf;
-    int32_t longStrBufLen;
+    autoJArray<char16_t,int32_t> charRefBuf;
+    int32_t charRefBufLen;
     autoJArray<char16_t,int32_t> bmpChar;
     autoJArray<char16_t,int32_t> astralChar;
   protected:
     nsHtml5ElementName* endTagExpectation;
   private:
     jArray<char16_t,int32_t> endTagExpectationAsArray;
   protected:
     bool endTag;
@@ -151,16 +151,33 @@ class nsHtml5Tokenizer
     void setLineNumber(int32_t line);
     inline int32_t getLineNumber()
     {
       return line;
     }
 
     nsHtml5HtmlAttributes* emptyAttributes();
   private:
+    inline void appendCharRefBuf(char16_t c)
+    {
+      if (charRefBufLen == charRefBuf.length) {
+        jArray<char16_t,int32_t> newBuf = jArray<char16_t,int32_t>::newJArray(charRefBuf.length + NS_HTML5TOKENIZER_BUFFER_GROW_BY);
+        nsHtml5ArrayCopy::arraycopy(charRefBuf, newBuf, charRefBuf.length);
+        charRefBuf = newBuf;
+      }
+      charRefBuf[charRefBufLen++] = c;
+    }
+
+    inline void clearCharRefBufAndAppend(char16_t c)
+    {
+      charRefBuf[0] = c;
+      charRefBufLen = 1;
+    }
+
+    void emitOrAppendCharRefBuf(int32_t returnState);
     inline void clearStrBufAndAppend(char16_t c)
     {
       strBuf[0] = c;
       strBufLen = 1;
     }
 
     inline void clearStrBuf()
     {
@@ -168,83 +185,70 @@ class nsHtml5Tokenizer
     }
 
     void appendStrBuf(char16_t c);
   protected:
     nsString* strBufToString();
   private:
     void strBufToDoctypeName();
     void emitStrBuf();
-    inline void clearLongStrBuf()
-    {
-      longStrBufLen = 0;
-    }
-
-    inline void clearLongStrBufAndAppend(char16_t c)
-    {
-      longStrBuf[0] = c;
-      longStrBufLen = 1;
-    }
-
-    void appendLongStrBuf(char16_t c);
     inline void appendSecondHyphenToBogusComment()
     {
-      appendLongStrBuf('-');
+      appendStrBuf('-');
     }
 
-    inline void adjustDoubleHyphenAndAppendToLongStrBufAndErr(char16_t c)
+    inline void adjustDoubleHyphenAndAppendToStrBufAndErr(char16_t c)
     {
       errConsecutiveHyphens();
-      appendLongStrBuf(c);
+      appendStrBuf(c);
     }
 
-    void appendLongStrBuf(char16_t* buffer, int32_t offset, int32_t length);
-    inline void appendStrBufToLongStrBuf()
+    void appendStrBuf(char16_t* buffer, int32_t offset, int32_t length);
+    inline void appendCharRefBufToStrBuf()
     {
-      appendLongStrBuf(strBuf, 0, strBufLen);
+      appendStrBuf(charRefBuf, 0, charRefBufLen);
     }
 
-    nsString* longStrBufToString();
     void emitComment(int32_t provisionalHyphens, int32_t pos);
   protected:
     void flushChars(char16_t* buf, int32_t pos);
   private:
     void strBufToElementNameString();
     int32_t emitCurrentTagToken(bool selfClosing, int32_t pos);
     void attributeNameComplete();
     void addAttributeWithoutValue();
     void addAttributeWithValue();
   public:
     void start();
     bool tokenizeBuffer(nsHtml5UTF16Buffer* buffer);
   private:
     template<class P> int32_t stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* buf, bool reconsume, int32_t returnState, int32_t endPos);
     void initDoctypeFields();
-    inline void adjustDoubleHyphenAndAppendToLongStrBufCarriageReturn()
+    inline void adjustDoubleHyphenAndAppendToStrBufCarriageReturn()
     {
       silentCarriageReturn();
-      adjustDoubleHyphenAndAppendToLongStrBufAndErr('\n');
+      adjustDoubleHyphenAndAppendToStrBufAndErr('\n');
     }
 
-    inline void adjustDoubleHyphenAndAppendToLongStrBufLineFeed()
+    inline void adjustDoubleHyphenAndAppendToStrBufLineFeed()
     {
       silentLineFeed();
-      adjustDoubleHyphenAndAppendToLongStrBufAndErr('\n');
+      adjustDoubleHyphenAndAppendToStrBufAndErr('\n');
     }
 
-    inline void appendLongStrBufLineFeed()
+    inline void appendStrBufLineFeed()
     {
       silentLineFeed();
-      appendLongStrBuf('\n');
+      appendStrBuf('\n');
     }
 
-    inline void appendLongStrBufCarriageReturn()
+    inline void appendStrBufCarriageReturn()
     {
       silentCarriageReturn();
-      appendLongStrBuf('\n');
+      appendStrBuf('\n');
     }
 
   protected:
     inline void silentCarriageReturn()
     {
       ++line;
       lastCR = true;
     }
@@ -256,17 +260,16 @@ class nsHtml5Tokenizer
 
   private:
     void emitCarriageReturn(char16_t* buf, int32_t pos);
     void emitReplacementCharacter(char16_t* buf, int32_t pos);
     void emitPlaintextReplacementCharacter(char16_t* buf, int32_t pos);
     void setAdditionalAndRememberAmpersandLocation(char16_t add);
     void bogusDoctype();
     void bogusDoctypeWithoutQuirks();
-    void emitOrAppendStrBuf(int32_t returnState);
     void handleNcrValue(int32_t returnState);
   public:
     void eof();
   private:
     void emitDoctypeToken(int32_t pos);
   protected:
     inline char16_t checkChar(char16_t* buf, int32_t pos)
     {