Bug 563526 - Ignore U+0000 in element content when the tree builder is not in the "text" mode or the "in foreign" mode. r=jonas.
authorHenri Sivonen <hsivonen@iki.fi>
Wed, 09 Jun 2010 09:45:32 +0300
changeset 46394 ae259fec2443d88fefa678d73ea2b4d86628e685
parent 46393 071e01b400325e55b6ef915e7aa969203cdb2bc9
child 46395 6060488c29716eff5326c3301b972486c9eafe6c
push id14162
push userhsivonen@iki.fi
push dateTue, 29 Jun 2010 07:31:20 +0000
treeherdermozilla-central@81ff228d9bd0 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersjonas
bugs563526
milestone2.0b2pre
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 563526 - Ignore U+0000 in element content when the tree builder is not in the "text" mode or the "in foreign" mode. r=jonas.
parser/html/javasrc/Tokenizer.java
parser/html/javasrc/TreeBuilder.java
parser/html/nsHtml5Tokenizer.cpp
parser/html/nsHtml5TreeBuilder.cpp
parser/html/nsHtml5TreeBuilder.h
--- a/parser/html/javasrc/Tokenizer.java
+++ b/parser/html/javasrc/Tokenizer.java
@@ -5855,17 +5855,17 @@ public class Tokenizer implements Locato
         flushChars(buf, pos);
         tokenHandler.characters(Tokenizer.LF, 0, 1);
         cstart = Integer.MAX_VALUE;
     }
 
     private void emitReplacementCharacter(@NoLength char[] buf, int pos)
             throws SAXException {
         flushChars(buf, pos);
-        tokenHandler.characters(Tokenizer.REPLACEMENT_CHARACTER, 0, 1);
+        tokenHandler.zeroOriginatingReplacementCharacter();
         cstart = pos + 1;
     }
 
     private void setAdditionalAndRememberAmpersandLocation(char add) {
         additional = add;
         // [NOCPP[
         ampersandLocation = new LocatorImpl(this);
         // ]NOCPP]
--- a/parser/html/javasrc/TreeBuilder.java
+++ b/parser/html/javasrc/TreeBuilder.java
@@ -55,16 +55,21 @@ import nu.validator.htmlparser.common.Xm
 
 import org.xml.sax.ErrorHandler;
 import org.xml.sax.SAXException;
 import org.xml.sax.SAXParseException;
 
 public abstract class TreeBuilder<T> implements TokenHandler,
         TreeBuilderState<T> {
     
+    /**
+     * Array version of U+FFFD.
+     */
+    private static final @NoLength char[] REPLACEMENT_CHARACTER = { '\uFFFD' };
+    
     // Start dispatch groups
 
     final static int OTHER = 0;
 
     final static int A = 1;
 
     final static int BASE = 2;
 
@@ -834,18 +839,21 @@ public abstract class TreeBuilder<T> imp
                 length--;
                 if (length == 0) {
                     return;
                 }
             }
             needToDropLF = false;
         }
 
+        if (inForeign) {
+            accumulateCharacters(buf, start, length);
+            return;
+        }
         // optimize the most common case
-        // XXX should there be an IN FOREIGN check here?
         switch (mode) {
             case IN_BODY:
             case IN_CELL:
             case IN_CAPTION:
                 reconstructTheActiveFormattingElements();
                 // fall through
             case TEXT:
                 accumulateCharacters(buf, start, length);
@@ -1201,16 +1209,26 @@ public abstract class TreeBuilder<T> imp
                     }
                 }
                 if (start < end) {
                     accumulateCharacters(buf, start, end - start);
                 }
         }
     }
 
+    /**
+     * @see nu.validator.htmlparser.common.TokenHandler#zeroOriginatingReplacementCharacter()
+     */
+    @Override public void zeroOriginatingReplacementCharacter()
+            throws SAXException {
+        if (inForeign || mode == TEXT) {
+            characters(REPLACEMENT_CHARACTER, 0, 1);
+        }
+    }
+
     public final void eof() throws SAXException {
         flushCharacters();
         if (inForeign) {
             err("End of file in a foreign namespace context.");
             while (stack[currentPtr].ns != "http://www.w3.org/1999/xhtml") {
                 popOnEof();
             }
             inForeign = false;
--- a/parser/html/nsHtml5Tokenizer.cpp
+++ b/parser/html/nsHtml5Tokenizer.cpp
@@ -3334,17 +3334,17 @@ nsHtml5Tokenizer::emitCarriageReturn(PRU
   tokenHandler->characters(nsHtml5Tokenizer::LF, 0, 1);
   cstart = PR_INT32_MAX;
 }
 
 void 
 nsHtml5Tokenizer::emitReplacementCharacter(PRUnichar* buf, PRInt32 pos)
 {
   flushChars(buf, pos);
-  tokenHandler->characters(nsHtml5Tokenizer::REPLACEMENT_CHARACTER, 0, 1);
+  tokenHandler->zeroOriginatingReplacementCharacter();
   cstart = pos + 1;
 }
 
 void 
 nsHtml5Tokenizer::setAdditionalAndRememberAmpersandLocation(PRUnichar add)
 {
   additional = add;
 }
--- a/parser/html/nsHtml5TreeBuilder.cpp
+++ b/parser/html/nsHtml5TreeBuilder.cpp
@@ -185,16 +185,20 @@ nsHtml5TreeBuilder::characters(const PRU
       start++;
       length--;
       if (!length) {
         return;
       }
     }
     needToDropLF = PR_FALSE;
   }
+  if (inForeign) {
+    accumulateCharacters(buf, start, length);
+    return;
+  }
   switch(mode) {
     case NS_HTML5TREE_BUILDER_IN_BODY:
     case NS_HTML5TREE_BUILDER_IN_CELL:
     case NS_HTML5TREE_BUILDER_IN_CAPTION: {
       reconstructTheActiveFormattingElements();
     }
     case NS_HTML5TREE_BUILDER_TEXT: {
       accumulateCharacters(buf, start, length);
@@ -409,16 +413,24 @@ nsHtml5TreeBuilder::characters(const PRU
       if (start < end) {
         accumulateCharacters(buf, start, end - start);
       }
     }
   }
 }
 
 void 
+nsHtml5TreeBuilder::zeroOriginatingReplacementCharacter()
+{
+  if (inForeign || mode == NS_HTML5TREE_BUILDER_TEXT) {
+    characters(REPLACEMENT_CHARACTER, 0, 1);
+  }
+}
+
+void 
 nsHtml5TreeBuilder::eof()
 {
   flushCharacters();
   if (inForeign) {
 
     while (stack[currentPtr]->ns != kNameSpaceID_XHTML) {
       popOnEof();
     }
--- a/parser/html/nsHtml5TreeBuilder.h
+++ b/parser/html/nsHtml5TreeBuilder.h
@@ -65,16 +65,17 @@ class nsHtml5HtmlAttributes;
 class nsHtml5UTF16Buffer;
 class nsHtml5StateSnapshot;
 class nsHtml5Portability;
 
 
 class nsHtml5TreeBuilder : public nsAHtml5TreeBuilderState
 {
   private:
+    static PRUnichar REPLACEMENT_CHARACTER[];
     static jArray<const char*,PRInt32> QUIRKY_PUBLIC_IDS;
     PRInt32 mode;
     PRInt32 originalMode;
     PRBool framesetOk;
     PRBool inForeign;
   protected:
     nsHtml5Tokenizer* tokenizer;
   private:
@@ -95,16 +96,17 @@ class nsHtml5TreeBuilder : public nsAHtm
     PRInt32 charBufferLen;
   private:
     PRBool quirks;
   public:
     void startTokenization(nsHtml5Tokenizer* self);
     void doctype(nsIAtom* name, nsString* publicIdentifier, nsString* systemIdentifier, PRBool forceQuirks);
     void comment(PRUnichar* buf, PRInt32 start, PRInt32 length);
     void characters(const PRUnichar* buf, PRInt32 start, PRInt32 length);
+    void zeroOriginatingReplacementCharacter();
     void eof();
     void endTokenization();
     void startTag(nsHtml5ElementName* elementName, nsHtml5HtmlAttributes* attributes, PRBool selfClosing);
     static nsString* extractCharsetFromContent(nsString* attributeValue);
   private:
     void checkMetaCharset(nsHtml5HtmlAttributes* attributes);
   public:
     void endTag(nsHtml5ElementName* elementName);
@@ -234,16 +236,17 @@ class nsHtml5TreeBuilder : public nsAHtm
     PRInt32 getStackLength();
     static void initializeStatics();
     static void releaseStatics();
 
 #include "nsHtml5TreeBuilderHSupplement.h"
 };
 
 #ifdef nsHtml5TreeBuilder_cpp__
+PRUnichar nsHtml5TreeBuilder::REPLACEMENT_CHARACTER[] = { 0xfffd };
 jArray<const char*,PRInt32> nsHtml5TreeBuilder::QUIRKY_PUBLIC_IDS = nsnull;
 #endif
 
 #define NS_HTML5TREE_BUILDER_OTHER 0
 #define NS_HTML5TREE_BUILDER_A 1
 #define NS_HTML5TREE_BUILDER_BASE 2
 #define NS_HTML5TREE_BUILDER_BODY 3
 #define NS_HTML5TREE_BUILDER_BR 4