Bug 809956 - Deal with an odd number of ] characters before ]]> in a CDATA section in the HTML parser. r=smaug.
authorHenri Sivonen <hsivonen@iki.fi>
Mon, 12 Nov 2012 15:42:23 +0200
changeset 120923 0a19ff77bee4434ca790c41635a82d2ef3459cc9
parent 120922 e960dece2a232b9a7d0c1f58486b69d93d85b726
child 120924 f76ba3850b0a599eded03eea5f471db32e7f80ef
push id1997
push userakeybl@mozilla.com
push dateMon, 07 Jan 2013 21:25:26 +0000
treeherdermozilla-beta@4baf45cdcf21 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerssmaug
bugs809956
milestone19.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 809956 - Deal with an odd number of ] characters before ]]> in a CDATA section in the HTML parser. r=smaug.
parser/html/javasrc/Tokenizer.java
parser/html/nsHtml5Tokenizer.cpp
parser/htmlparser/tests/mochitest/html5lib_tree_construction/adoption01.dat
parser/htmlparser/tests/mochitest/html5lib_tree_construction/tests21.dat
--- a/parser/html/javasrc/Tokenizer.java
+++ b/parser/html/javasrc/Tokenizer.java
@@ -2819,32 +2819,40 @@ public class Tokenizer implements Locato
                                 cstart = pos;
                                 reconsume = true;
                                 state = transition(state, Tokenizer.CDATA_SECTION, reconsume, pos);
                                 continue stateloop;
                         }
                     }
                     // WARNING FALLTHRU CASE TRANSITION: DON'T REORDER
                 case CDATA_RSQB_RSQB:
-                    if (++pos == endPos) {
-                        break stateloop;
-                    }
-                    c = checkChar(buf, pos);
-                    switch (c) {
-                        case '>':
-                            cstart = pos + 1;
-                            state = transition(state, Tokenizer.DATA, reconsume, pos);
-                            continue stateloop;
-                        default:
-                            tokenHandler.characters(Tokenizer.RSQB_RSQB, 0, 2);
-                            cstart = pos;
-                            reconsume = true;
-                            state = transition(state, Tokenizer.CDATA_SECTION, reconsume, pos);
-                            continue stateloop;
-
+                    cdatarsqbrsqb: for (;;) {
+                        if (++pos == endPos) {
+                            break stateloop;
+                        }
+                        c = checkChar(buf, pos);
+                        switch (c) {
+                            case ']':
+                                // Saw a third ]. Emit one ] (logically the 
+                                // first one) and stay in this state to 
+                                // remember that the last two characters seen
+                                // have been ]].
+                                tokenHandler.characters(Tokenizer.RSQB_RSQB, 0, 1);                                
+                                continue;
+                            case '>':
+                                cstart = pos + 1;
+                                state = transition(state, Tokenizer.DATA, reconsume, pos);
+                                continue stateloop;
+                            default:
+                                tokenHandler.characters(Tokenizer.RSQB_RSQB, 0, 2);
+                                cstart = pos;
+                                reconsume = true;
+                                state = transition(state, Tokenizer.CDATA_SECTION, reconsume, pos);
+                                continue stateloop;
+                        }
                     }
                     // XXX reorder point
                 case ATTRIBUTE_VALUE_SINGLE_QUOTED:
                     attributevaluesinglequotedloop: for (;;) {
                         if (reconsume) {
                             reconsume = false;
                         } else {
                             if (++pos == endPos) {
--- a/parser/html/nsHtml5Tokenizer.cpp
+++ b/parser/html/nsHtml5Tokenizer.cpp
@@ -1366,34 +1366,41 @@ nsHtml5Tokenizer::stateLoop(int32_t stat
               state = P::transition(mViewSource, NS_HTML5TOKENIZER_CDATA_SECTION, reconsume, pos);
               NS_HTML5_CONTINUE(stateloop);
             }
           }
         }
         cdatarsqb_end: ;
       }
       case NS_HTML5TOKENIZER_CDATA_RSQB_RSQB: {
-        if (++pos == endPos) {
-          NS_HTML5_BREAK(stateloop);
-        }
-        c = checkChar(buf, pos);
-        switch(c) {
-          case '>': {
-            cstart = pos + 1;
-            state = P::transition(mViewSource, NS_HTML5TOKENIZER_DATA, reconsume, pos);
-            NS_HTML5_CONTINUE(stateloop);
+        for (; ; ) {
+          if (++pos == endPos) {
+            NS_HTML5_BREAK(stateloop);
           }
-          default: {
-            tokenHandler->characters(nsHtml5Tokenizer::RSQB_RSQB, 0, 2);
-            cstart = pos;
-            reconsume = true;
-            state = P::transition(mViewSource, NS_HTML5TOKENIZER_CDATA_SECTION, reconsume, pos);
-            NS_HTML5_CONTINUE(stateloop);
+          c = checkChar(buf, pos);
+          switch(c) {
+            case ']': {
+              tokenHandler->characters(nsHtml5Tokenizer::RSQB_RSQB, 0, 1);
+              continue;
+            }
+            case '>': {
+              cstart = pos + 1;
+              state = P::transition(mViewSource, NS_HTML5TOKENIZER_DATA, reconsume, pos);
+              NS_HTML5_CONTINUE(stateloop);
+            }
+            default: {
+              tokenHandler->characters(nsHtml5Tokenizer::RSQB_RSQB, 0, 2);
+              cstart = pos;
+              reconsume = true;
+              state = P::transition(mViewSource, NS_HTML5TOKENIZER_CDATA_SECTION, reconsume, pos);
+              NS_HTML5_CONTINUE(stateloop);
+            }
           }
         }
+
       }
       case NS_HTML5TOKENIZER_ATTRIBUTE_VALUE_SINGLE_QUOTED: {
         for (; ; ) {
           if (reconsume) {
             reconsume = false;
           } else {
             if (++pos == endPos) {
               NS_HTML5_BREAK(stateloop);
--- a/parser/htmlparser/tests/mochitest/html5lib_tree_construction/adoption01.dat
+++ b/parser/htmlparser/tests/mochitest/html5lib_tree_construction/adoption01.dat
@@ -187,8 +187,89 @@
 #document
 | <html>
 |   <head>
 |   <body>
 |     <a>
 |       <svg svg>
 |         <svg tr>
 |           <svg input>
+
+#data
+<div><a><b><div><div><div><div><div><div><div><div><div><div></a>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       <a>
+|         <b>
+|       <b>
+|         <div>
+|           <a>
+|           <div>
+|             <a>
+|             <div>
+|               <a>
+|               <div>
+|                 <a>
+|                 <div>
+|                   <a>
+|                   <div>
+|                     <a>
+|                     <div>
+|                       <a>
+|                       <div>
+|                         <a>
+|                           <div>
+|                             <div>
+
+#data
+<div><a><b><u><i><code><div></a>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       <a>
+|         <b>
+|           <u>
+|             <i>
+|               <code>
+|       <u>
+|         <i>
+|           <code>
+|             <div>
+|               <a>
+
+#data
+<b><b><b><b>x</b></b></b></b>y
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       <b>
+|         <b>
+|           <b>
+|             "x"
+|     "y"
+
+#data
+<p><b><b><b><b><p>x
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <b>
+|         <b>
+|           <b>
+|             <b>
+|     <p>
+|       <b>
+|         <b>
+|           <b>
+|             "x"
--- a/parser/htmlparser/tests/mochitest/html5lib_tree_construction/tests21.dat
+++ b/parser/htmlparser/tests/mochitest/html5lib_tree_construction/tests21.dat
@@ -112,16 +112,52 @@
 #document
 | <html>
 |   <head>
 |   <body>
 |     <svg svg>
 |       "]>a"
 
 #data
+<!DOCTYPE html><svg><![CDATA[foo]]]>
+#errors
+36: End of file in a foreign namespace context.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       "foo]"
+
+#data
+<!DOCTYPE html><svg><![CDATA[foo]]]]>
+#errors
+36: End of file in a foreign namespace context.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       "foo]]"
+
+#data
+<!DOCTYPE html><svg><![CDATA[foo]]]]]>
+#errors
+36: End of file in a foreign namespace context.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       "foo]]]"
+
+#data
 <svg><foreignObject><div><![CDATA[foo]]>
 #errors
 #document
 | <html>
 |   <head>
 |   <body>
 |     <svg svg>
 |       <svg foreignObject>