Bug 1584216 - Adjust tokenization of U+0000. r=alchen a=lizzard
Differential Revision:
https://phabricator.services.mozilla.com/D47594
--- a/parser/html/javasrc/Tokenizer.java
+++ b/parser/html/javasrc/Tokenizer.java
@@ -2573,18 +2573,16 @@ public class Tokenizer implements Locato
// CPPONLY: MOZ_FALLTHROUGH;
case MARKUP_DECLARATION_HYPHEN:
markupdeclarationhyphenloop: for (;;) {
if (++pos == endPos) {
break stateloop;
}
c = checkChar(buf, pos);
switch (c) {
- case '\u0000':
- break stateloop;
case '-':
clearStrBufAfterOneHyphen();
state = transition(state, Tokenizer.COMMENT_START, reconsume, pos);
break markupdeclarationhyphenloop;
// continue stateloop;
default:
errBogusComment();
reconsume = true;
@@ -3077,19 +3075,16 @@ public class Tokenizer implements Locato
}
}
// CPPONLY: MOZ_FALLTHROUGH;
case CONSUME_CHARACTER_REFERENCE:
if (++pos == endPos) {
break stateloop;
}
c = checkChar(buf, pos);
- if (c == '\u0000') {
- break stateloop;
- }
/*
* Unlike the definition is the spec, this state does not
* return a value and never requires the caller to
* backtrack. This state takes care of emitting characters
* or appending to the current attribute value. It also
* takes care of that in the case when consuming the
* character reference fails.
*/
@@ -3105,16 +3100,17 @@ public class Tokenizer implements Locato
switch (c) {
case ' ':
case '\t':
case '\n':
case '\r': // we'll reconsume!
case '\u000C':
case '<':
case '&':
+ case '\u0000':
emitOrAppendCharRefBuf(returnState);
if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
cstart = pos;
}
reconsume = true;
state = transition(state, returnState, reconsume, pos);
continue stateloop;
case '#':
@@ -3158,19 +3154,16 @@ public class Tokenizer implements Locato
}
// CPPONLY: MOZ_FALLTHROUGH;
case CHARACTER_REFERENCE_HILO_LOOKUP:
{
if (++pos == endPos) {
break stateloop;
}
c = checkChar(buf, pos);
- if (c == '\u0000') {
- break stateloop;
- }
/*
* The data structure is as follows:
*
* HILO_ACCEL is a two-dimensional int array whose major
* index corresponds to the second character of the
* character reference (code point as index) and the
* minor index corresponds to the first character of the
* character reference (packed so that A-Z runs from 0
@@ -3237,19 +3230,16 @@ public class Tokenizer implements Locato
}
// CPPONLY: MOZ_FALLTHROUGH;
case CHARACTER_REFERENCE_TAIL:
outer: for (;;) {
if (++pos == endPos) {
break stateloop;
}
c = checkChar(buf, pos);
- if (c == '\u0000') {
- break stateloop;
- }
entCol++;
/*
* Consume the maximum number of characters possible,
* with the consumed characters matching one of the
* identifiers in the first column of the named
* character references table (in a case-sensitive
* manner).
*/
--- a/parser/html/nsHtml5Tokenizer.cpp
+++ b/parser/html/nsHtml5Tokenizer.cpp
@@ -1203,19 +1203,16 @@ stateloop:
}
case MARKUP_DECLARATION_HYPHEN: {
for (;;) {
if (++pos == endPos) {
NS_HTML5_BREAK(stateloop);
}
c = checkChar(buf, pos);
switch (c) {
- case '\0': {
- NS_HTML5_BREAK(stateloop);
- }
case '-': {
clearStrBufAfterOneHyphen();
state = P::transition(
mViewSource, nsHtml5Tokenizer::COMMENT_START, reconsume, pos);
NS_HTML5_BREAK(markupdeclarationhyphenloop);
}
default: {
if (P::reportErrors) {
@@ -1670,27 +1667,25 @@ stateloop:
attributevaluesinglequotedloop_end:;
MOZ_FALLTHROUGH;
}
case CONSUME_CHARACTER_REFERENCE: {
if (++pos == endPos) {
NS_HTML5_BREAK(stateloop);
}
c = checkChar(buf, pos);
- if (c == '\0') {
- NS_HTML5_BREAK(stateloop);
- }
switch (c) {
case ' ':
case '\t':
case '\n':
case '\r':
case '\f':
case '<':
- case '&': {
+ case '&':
+ case '\0': {
emitOrAppendCharRefBuf(returnState);
if (!(returnState & DATA_AND_RCDATA_MASK)) {
cstart = pos;
}
reconsume = true;
state = P::transition(mViewSource, returnState, reconsume, pos);
NS_HTML5_CONTINUE(stateloop);
}
@@ -1732,19 +1727,16 @@ stateloop:
MOZ_FALLTHROUGH;
}
case CHARACTER_REFERENCE_HILO_LOOKUP: {
{
if (++pos == endPos) {
NS_HTML5_BREAK(stateloop);
}
c = checkChar(buf, pos);
- if (c == '\0') {
- NS_HTML5_BREAK(stateloop);
- }
int32_t hilo = 0;
if (c <= 'z') {
const int32_t* row = nsHtml5NamedCharactersAccel::HILO_ACCEL[c];
if (row) {
hilo = row[firstCharKey];
}
}
if (!hilo) {
@@ -1772,19 +1764,16 @@ stateloop:
MOZ_FALLTHROUGH;
}
case CHARACTER_REFERENCE_TAIL: {
for (;;) {
if (++pos == endPos) {
NS_HTML5_BREAK(stateloop);
}
c = checkChar(buf, pos);
- if (c == '\0') {
- NS_HTML5_BREAK(stateloop);
- }
entCol++;
for (;;) {
if (hi < lo) {
NS_HTML5_BREAK(outer);
}
if (entCol == nsHtml5NamedCharacters::NAMES[lo].length()) {
candidate = lo;
charRefBufMark = charRefBufLen;