Bug 1584216 - Adjust tokenization of U+0000. r=alchen a=lizzard
authorHenri Sivonen <hsivonen@hsivonen.fi>
Tue, 01 Oct 2019 12:26:55 +0000
changeset 555502 0b47d14dc42ff43dd27252fdf8831d4c7b22509f
parent 555501 90d8a6faebc4cff379ff91fb4356e30e7be362d8
child 555503 6d321e762d12fe61f383e744cedfe7f19610b8c2
push id2165
push userffxbld-merge
push dateMon, 14 Oct 2019 16:30:58 +0000
treeherdermozilla-release@0eae18af659f [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersalchen, lizzard
bugs1584216
milestone70.0
Bug 1584216 - Adjust tokenization of U+0000. r=alchen a=lizzard Differential Revision: https://phabricator.services.mozilla.com/D47594
parser/html/javasrc/Tokenizer.java
parser/html/nsHtml5Tokenizer.cpp
--- a/parser/html/javasrc/Tokenizer.java
+++ b/parser/html/javasrc/Tokenizer.java
@@ -2573,18 +2573,16 @@ public class Tokenizer implements Locato
                     // CPPONLY: MOZ_FALLTHROUGH;
                 case MARKUP_DECLARATION_HYPHEN:
                     markupdeclarationhyphenloop: for (;;) {
                         if (++pos == endPos) {
                             break stateloop;
                         }
                         c = checkChar(buf, pos);
                         switch (c) {
-                            case '\u0000':
-                                break stateloop;
                             case '-':
                                 clearStrBufAfterOneHyphen();
                                 state = transition(state, Tokenizer.COMMENT_START, reconsume, pos);
                                 break markupdeclarationhyphenloop;
                             // continue stateloop;
                             default:
                                 errBogusComment();
                                 reconsume = true;
@@ -3077,19 +3075,16 @@ public class Tokenizer implements Locato
                         }
                     }
                     // CPPONLY: MOZ_FALLTHROUGH;
                 case CONSUME_CHARACTER_REFERENCE:
                     if (++pos == endPos) {
                         break stateloop;
                     }
                     c = checkChar(buf, pos);
-                    if (c == '\u0000') {
-                        break stateloop;
-                    }
                     /*
                      * Unlike the definition is the spec, this state does not
                      * return a value and never requires the caller to
                      * backtrack. This state takes care of emitting characters
                      * or appending to the current attribute value. It also
                      * takes care of that in the case when consuming the
                      * character reference fails.
                      */
@@ -3105,16 +3100,17 @@ public class Tokenizer implements Locato
                     switch (c) {
                         case ' ':
                         case '\t':
                         case '\n':
                         case '\r': // we'll reconsume!
                         case '\u000C':
                         case '<':
                         case '&':
+                        case '\u0000':
                             emitOrAppendCharRefBuf(returnState);
                             if ((returnState & DATA_AND_RCDATA_MASK) == 0) {
                                 cstart = pos;
                             }
                             reconsume = true;
                             state = transition(state, returnState, reconsume, pos);
                             continue stateloop;
                         case '#':
@@ -3158,19 +3154,16 @@ public class Tokenizer implements Locato
                     }
                     // CPPONLY: MOZ_FALLTHROUGH;
                 case CHARACTER_REFERENCE_HILO_LOOKUP:
                     {
                         if (++pos == endPos) {
                             break stateloop;
                         }
                         c = checkChar(buf, pos);
-                        if (c == '\u0000') {
-                            break stateloop;
-                        }
                         /*
                          * The data structure is as follows:
                          *
                          * HILO_ACCEL is a two-dimensional int array whose major
                          * index corresponds to the second character of the
                          * character reference (code point as index) and the
                          * minor index corresponds to the first character of the
                          * character reference (packed so that A-Z runs from 0
@@ -3237,19 +3230,16 @@ public class Tokenizer implements Locato
                     }
                     // CPPONLY: MOZ_FALLTHROUGH;
                 case CHARACTER_REFERENCE_TAIL:
                     outer: for (;;) {
                         if (++pos == endPos) {
                             break stateloop;
                         }
                         c = checkChar(buf, pos);
-                        if (c == '\u0000') {
-                            break stateloop;
-                        }
                         entCol++;
                         /*
                          * Consume the maximum number of characters possible,
                          * with the consumed characters matching one of the
                          * identifiers in the first column of the named
                          * character references table (in a case-sensitive
                          * manner).
                          */
--- a/parser/html/nsHtml5Tokenizer.cpp
+++ b/parser/html/nsHtml5Tokenizer.cpp
@@ -1203,19 +1203,16 @@ stateloop:
       }
       case MARKUP_DECLARATION_HYPHEN: {
         for (;;) {
           if (++pos == endPos) {
             NS_HTML5_BREAK(stateloop);
           }
           c = checkChar(buf, pos);
           switch (c) {
-            case '\0': {
-              NS_HTML5_BREAK(stateloop);
-            }
             case '-': {
               clearStrBufAfterOneHyphen();
               state = P::transition(
                   mViewSource, nsHtml5Tokenizer::COMMENT_START, reconsume, pos);
               NS_HTML5_BREAK(markupdeclarationhyphenloop);
             }
             default: {
               if (P::reportErrors) {
@@ -1670,27 +1667,25 @@ stateloop:
       attributevaluesinglequotedloop_end:;
         MOZ_FALLTHROUGH;
       }
       case CONSUME_CHARACTER_REFERENCE: {
         if (++pos == endPos) {
           NS_HTML5_BREAK(stateloop);
         }
         c = checkChar(buf, pos);
-        if (c == '\0') {
-          NS_HTML5_BREAK(stateloop);
-        }
         switch (c) {
           case ' ':
           case '\t':
           case '\n':
           case '\r':
           case '\f':
           case '<':
-          case '&': {
+          case '&':
+          case '\0': {
             emitOrAppendCharRefBuf(returnState);
             if (!(returnState & DATA_AND_RCDATA_MASK)) {
               cstart = pos;
             }
             reconsume = true;
             state = P::transition(mViewSource, returnState, reconsume, pos);
             NS_HTML5_CONTINUE(stateloop);
           }
@@ -1732,19 +1727,16 @@ stateloop:
         MOZ_FALLTHROUGH;
       }
       case CHARACTER_REFERENCE_HILO_LOOKUP: {
         {
           if (++pos == endPos) {
             NS_HTML5_BREAK(stateloop);
           }
           c = checkChar(buf, pos);
-          if (c == '\0') {
-            NS_HTML5_BREAK(stateloop);
-          }
           int32_t hilo = 0;
           if (c <= 'z') {
             const int32_t* row = nsHtml5NamedCharactersAccel::HILO_ACCEL[c];
             if (row) {
               hilo = row[firstCharKey];
             }
           }
           if (!hilo) {
@@ -1772,19 +1764,16 @@ stateloop:
         MOZ_FALLTHROUGH;
       }
       case CHARACTER_REFERENCE_TAIL: {
         for (;;) {
           if (++pos == endPos) {
             NS_HTML5_BREAK(stateloop);
           }
           c = checkChar(buf, pos);
-          if (c == '\0') {
-            NS_HTML5_BREAK(stateloop);
-          }
           entCol++;
           for (;;) {
             if (hi < lo) {
               NS_HTML5_BREAK(outer);
             }
             if (entCol == nsHtml5NamedCharacters::NAMES[lo].length()) {
               candidate = lo;
               charRefBufMark = charRefBufLen;