Bug 500937 - Make the output of XMLSerializer more HTML-friendly. r=laurent, sr=jst.
authorHenri Sivonen <hsivonen@iki.fi>
Mon, 28 Sep 2009 10:59:52 +0300
changeset 33238 e915fafc9ba5677a6f6cd06914111ec85d064dad
parent 33237 b6d32703aee2893bdcb0babe0bfccc2760b70366
child 33239 f2a06414948cef0a4396b4bc88bdd8b77f56e106
push id1
push userroot
push dateTue, 26 Apr 2011 22:38:44 +0000
treeherdermozilla-beta@bfdb6e623a36 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerslaurent, jst
bugs500937
milestone1.9.3a1pre
Bug 500937 - Make the output of XMLSerializer more HTML-friendly. r=laurent, sr=jst.
content/base/public/nsIDocumentEncoder.idl
content/base/src/nsDOMSerializer.cpp
content/base/src/nsDocumentEncoder.cpp
content/base/src/nsHTMLContentSerializer.cpp
content/base/src/nsXHTMLContentSerializer.cpp
content/base/src/nsXHTMLContentSerializer.h
content/base/src/nsXMLContentSerializer.cpp
content/base/src/nsXMLContentSerializer.h
content/base/test/Makefile.in
content/base/test/test_bug500937.html
content/test/unit/test_xml_serializer.js
--- a/content/base/public/nsIDocumentEncoder.idl
+++ b/content/base/public/nsIDocumentEncoder.idl
@@ -39,17 +39,17 @@
 #include "nsISupports.idl"
 
 interface nsIDOMDocument;
 interface nsIDOMRange;
 interface nsISelection;
 interface nsIDOMNode;
 interface nsIOutputStream;
 
-[scriptable, uuid(3c556e41-0f73-4e1d-b724-1474884fe2e3)]
+[scriptable, uuid(c0da5b87-0ba7-4d7c-8cb3-fcb02af4253d)]
 interface nsIDocumentEncoderNodeFixup : nsISupports
 {
   /**
    * Create a fixed up version of a node. This method is called before
    * each node in a document is about to be persisted. The implementor
    * may return a new node with fixed up attributes or null. If null is
    * returned the node should be used as-is.
    * @param aNode Node to fixup.
@@ -202,16 +202,23 @@ interface nsIDocumentEncoder : nsISuppor
 
   /**
    * Normally &nbsp; is replaced with a space character when
    * encoding data as plain text, set this flag if that's
    * not desired.
    * Plaintext output only.
    */
   const unsigned long OutputPersistNBSP = (1 << 17);
+
+  /**
+   * Normally when serializing the whole document using the HTML or 
+   * XHTML serializer, the encoding declaration is rewritten to match.
+   * This flag suppresses that behavior.
+   */
+  const unsigned long OutputDontRewriteEncodingDeclaration = (1 << 18);
   
   /**
    * Initialize with a pointer to the document and the mime type.
    * @param aDocument Document to encode.
    * @param aMimeType MimeType to use. May also be set by SetMimeType.
    * @param aFlags Flags to use while encoding. May also be set by SetFlags.
    */
   void init(in nsIDOMDocument aDocument,
--- a/content/base/src/nsDOMSerializer.cpp
+++ b/content/base/src/nsDOMSerializer.cpp
@@ -74,32 +74,34 @@ NS_IMPL_RELEASE(nsDOMSerializer)
 static nsresult
 SetUpEncoder(nsIDOMNode *aRoot, const nsACString& aCharset,
              nsIDocumentEncoder **aEncoder)
 {
   *aEncoder = nsnull;
    
   nsresult rv;
   nsCOMPtr<nsIDocumentEncoder> encoder =
-    do_CreateInstance(NS_DOC_ENCODER_CONTRACTID_BASE "text/xml", &rv);
+    do_CreateInstance(NS_DOC_ENCODER_CONTRACTID_BASE "application/xhtml+xml", &rv);
   if (NS_FAILED(rv))
     return rv;
 
   PRBool entireDocument = PR_TRUE;
   nsCOMPtr<nsIDOMDocument> domDoc(do_QueryInterface(aRoot));
   if (!domDoc) {
     entireDocument = PR_FALSE;
     rv = aRoot->GetOwnerDocument(getter_AddRefs(domDoc));
     if (NS_FAILED(rv))
       return rv;
   }
 
   // This method will fail if no document
-  rv = encoder->Init(domDoc, NS_LITERAL_STRING("text/xml"),
-                     nsIDocumentEncoder::OutputEncodeBasicEntities);
+  rv = encoder->Init(domDoc, NS_LITERAL_STRING("application/xhtml+xml"),
+                     nsIDocumentEncoder::OutputRaw |
+                     nsIDocumentEncoder::OutputDontRewriteEncodingDeclaration);
+
   if (NS_FAILED(rv))
     return rv;
 
   nsCAutoString charset(aCharset);
   if (charset.IsEmpty()) {
     nsCOMPtr<nsIDocument> doc = do_QueryInterface(domDoc);
     NS_ASSERTION(doc, "Need a document");
     charset = doc->GetDocumentCharacterSet();
--- a/content/base/src/nsDocumentEncoder.cpp
+++ b/content/base/src/nsDocumentEncoder.cpp
@@ -912,18 +912,18 @@ nsDocumentEncoder::EncodeToString(nsAStr
   nsCOMPtr<nsIAtom> charsetAtom;
   if (!mCharset.IsEmpty()) {
     if (!mCharsetConverterManager) {
       mCharsetConverterManager = do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID, &rv);
       NS_ENSURE_SUCCESS(rv, rv);
     }
   }
   
-  PRBool isWholeDocument = !(mSelection || mRange || mNode);
-  mSerializer->Init(mFlags, mWrapColumn, mCharset.get(), mIsCopying, isWholeDocument);
+  PRBool rewriteEncodingDeclaration = !(mSelection || mRange || mNode) && !(mFlags & OutputDontRewriteEncodingDeclaration);
+  mSerializer->Init(mFlags, mWrapColumn, mCharset.get(), mIsCopying, rewriteEncodingDeclaration);
 
   if (mSelection) {
     nsCOMPtr<nsIDOMRange> range;
     PRInt32 i, count = 0;
 
     rv = mSelection->GetRangeCount(&count);
     NS_ENSURE_SUCCESS(rv, rv);
 
--- a/content/base/src/nsHTMLContentSerializer.cpp
+++ b/content/base/src/nsHTMLContentSerializer.cpp
@@ -190,17 +190,17 @@ nsHTMLContentSerializer::SerializeHTMLAt
         }
       }
       // Need to escape URI.
       nsAutoString tempURI(valueStr);
       if (!isJS && NS_FAILED(EscapeURI(aContent, tempURI, valueStr)))
         valueStr = tempURI;
     }
 
-    if (mIsWholeDocument && aTagName == nsGkAtoms::meta &&
+    if (mRewriteEncodingDeclaration && aTagName == nsGkAtoms::meta &&
         attrName == nsGkAtoms::content) {
       // If we're serializing a <meta http-equiv="content-type">,
       // use the proper value, rather than what's in the document.
       nsAutoString header;
       aContent->GetAttr(kNameSpaceID_None, nsGkAtoms::httpEquiv, header);
       if (header.LowerCaseEqualsLiteral("content-type")) {
         valueStr = NS_LITERAL_STRING("text/html; charset=") +
           NS_ConvertASCIItoUTF16(mCharset);
--- a/content/base/src/nsXHTMLContentSerializer.cpp
+++ b/content/base/src/nsXHTMLContentSerializer.cpp
@@ -94,34 +94,34 @@ nsXHTMLContentSerializer::nsXHTMLContent
 nsXHTMLContentSerializer::~nsXHTMLContentSerializer()
 {
   NS_ASSERTION(mOLStateStack.IsEmpty(), "Expected OL State stack to be empty");
 }
 
 NS_IMETHODIMP
 nsXHTMLContentSerializer::Init(PRUint32 aFlags, PRUint32 aWrapColumn,
                               const char* aCharSet, PRBool aIsCopying,
-                              PRBool aIsWholeDocument)
+                              PRBool aRewriteEncodingDeclaration)
 {
   // The previous version of the HTML serializer did implicit wrapping
   // when there is no flags, so we keep wrapping in order to keep
   // compatibility with the existing calling code
   // XXXLJ perhaps we should remove these two default settings later ?
   if (aFlags & nsIDocumentEncoder::OutputFormatted ) {
       aFlags = aFlags | nsIDocumentEncoder::OutputWrap;
   }
   else if (!(aFlags & nsIDocumentEncoder::OutputRaw)) {
       aFlags = aFlags | nsIDocumentEncoder::OutputWrap;
   }
 
   nsresult rv;
-  rv = nsXMLContentSerializer::Init(aFlags, aWrapColumn, aCharSet, aIsCopying, aIsWholeDocument);
+  rv = nsXMLContentSerializer::Init(aFlags, aWrapColumn, aCharSet, aIsCopying, aRewriteEncodingDeclaration);
   NS_ENSURE_SUCCESS(rv, rv);
 
-  mIsWholeDocument = aIsWholeDocument;
+  mRewriteEncodingDeclaration = aRewriteEncodingDeclaration;
   mIsCopying = aIsCopying;
   mIsFirstChildOfOL = PR_FALSE;
   mInBody = 0;
   mDisableEntityEncoding = 0;
   mBodyOnly = (mFlags & nsIDocumentEncoder::OutputBodyOnly) ? PR_TRUE
                                                             : PR_FALSE;
 
   // set up entity converter if we are going to need it
@@ -415,17 +415,17 @@ nsXHTMLContentSerializer::SerializeAttri
           }
         }
         // Need to escape URI.
         nsAutoString tempURI(valueStr);
         if (!isJS && NS_FAILED(EscapeURI(aContent, tempURI, valueStr)))
           valueStr = tempURI;
       }
 
-      if (mIsWholeDocument && aTagName == nsGkAtoms::meta &&
+      if (mRewriteEncodingDeclaration && aTagName == nsGkAtoms::meta &&
           attrName == nsGkAtoms::content) {
         // If we're serializing a <meta http-equiv="content-type">,
         // use the proper value, rather than what's in the document.
         nsAutoString header;
         aContent->GetAttr(kNameSpaceID_None, nsGkAtoms::httpEquiv, header);
         if (header.LowerCaseEqualsLiteral("content-type")) {
           valueStr = NS_LITERAL_STRING("text/html; charset=") +
             NS_ConvertASCIItoUTF16(mCharset);
@@ -488,17 +488,17 @@ nsXHTMLContentSerializer::AppendEndOfEle
 
 void
 nsXHTMLContentSerializer::AfterElementStart(nsIContent * aContent,
                                             nsIDOMElement *aOriginalElement,
                                             nsAString& aStr)
 {
   nsIAtom *name = aContent->Tag();
   if (aContent->GetNameSpaceID() == kNameSpaceID_XHTML &&
-      mIsWholeDocument &&
+      mRewriteEncodingDeclaration &&
       name == nsGkAtoms::head) {
 
     // Check if there already are any content-type meta children.
     // If there are, they will be modified to use the correct charset.
     // If there aren't, we'll insert one here.
     PRBool hasMeta = PR_FALSE;
     PRUint32 i, childCount = aContent->GetChildCount();
     for (i = 0; i < childCount; ++i) {
--- a/content/base/src/nsXHTMLContentSerializer.h
+++ b/content/base/src/nsXHTMLContentSerializer.h
@@ -55,17 +55,17 @@ class nsIAtom;
 
 class nsXHTMLContentSerializer : public nsXMLContentSerializer {
  public:
   nsXHTMLContentSerializer();
   virtual ~nsXHTMLContentSerializer();
 
   NS_IMETHOD Init(PRUint32 flags, PRUint32 aWrapColumn,
                   const char* aCharSet, PRBool aIsCopying,
-                  PRBool aIsWholeDocument);
+                  PRBool aRewriteEncodingDeclaration);
 
   NS_IMETHOD AppendText(nsIDOMText* aText,
                         PRInt32 aStartOffset,
                         PRInt32 aEndOffset,
                         nsAString& aStr);
 
   NS_IMETHOD AppendDocumentStart(nsIDOMDocument *aDocument,
                                  nsAString& aStr);
@@ -165,17 +165,17 @@ class nsXHTMLContentSerializer : public 
    * element is defined as CDATA in the DTD, it simply means we'll
    * output the content of the element without doing any entity encoding
    * what so ever.
    */
   PRInt32 mDisableEntityEncoding;
 
   // This is to ensure that we only do meta tag fixups when dealing with
   // whole documents.
-  PRPackedBool  mIsWholeDocument;
+  PRPackedBool  mRewriteEncodingDeclaration;
 
   // To keep track of First LI child of OL in selected range 
   PRPackedBool  mIsFirstChildOfOL;
 
   // To keep track of startvalue of OL and first list item for nested lists
   struct olState {
     olState(PRInt32 aStart, PRBool aIsFirst)
       : startVal(aStart),
--- a/content/base/src/nsXMLContentSerializer.cpp
+++ b/content/base/src/nsXMLContentSerializer.cpp
@@ -102,17 +102,17 @@ nsXMLContentSerializer::~nsXMLContentSer
 {
 }
 
 NS_IMPL_ISUPPORTS1(nsXMLContentSerializer, nsIContentSerializer)
 
 NS_IMETHODIMP 
 nsXMLContentSerializer::Init(PRUint32 aFlags, PRUint32 aWrapColumn,
                              const char* aCharSet, PRBool aIsCopying,
-                             PRBool aIsWholeDocument)
+                             PRBool aRewriteEncodingDeclaration)
 {
   mCharset = aCharSet;
   mFlags = aFlags;
 
   // Set the line break character:
   if ((mFlags & nsIDocumentEncoder::OutputCRLineBreak)
       && (mFlags & nsIDocumentEncoder::OutputLFLineBreak)) { // Windows
     mLineBreak.AssignLiteral("\r\n");
--- a/content/base/src/nsXMLContentSerializer.h
+++ b/content/base/src/nsXMLContentSerializer.h
@@ -63,17 +63,17 @@ class nsXMLContentSerializer : public ns
  public:
   nsXMLContentSerializer();
   virtual ~nsXMLContentSerializer();
 
   NS_DECL_ISUPPORTS
 
   NS_IMETHOD Init(PRUint32 flags, PRUint32 aWrapColumn,
                   const char* aCharSet, PRBool aIsCopying,
-                  PRBool aIsWholeDocument);
+                  PRBool aRewriteEncodingDeclaration);
 
   NS_IMETHOD AppendText(nsIDOMText* aText, PRInt32 aStartOffset,
                         PRInt32 aEndOffset, nsAString& aStr);
 
   NS_IMETHOD AppendCDATASection(nsIDOMCDATASection* aCDATASection,
                                 PRInt32 aStartOffset, PRInt32 aEndOffset,
                                 nsAString& aStr);
 
--- a/content/base/test/Makefile.in
+++ b/content/base/test/Makefile.in
@@ -272,16 +272,17 @@ include $(topsrcdir)/config/rules.mk
 		file_xhtmlserializer_1_sibling_body_only_body.xhtml \
 		file_xhtmlserializer_1_no_body.xhtml \
 		test_bug422403-2.xhtml \
 		file_xhtmlserializer_2.xhtml \
 		file_xhtmlserializer_2_basic.xhtml \
 		file_xhtmlserializer_2_enthtml.xhtml \
 		file_xhtmlserializer_2_entw3c.xhtml \
 		file_xhtmlserializer_2_latin1.xhtml \
+		test_bug500937.html \
 		test_htmlcopyencoder.html \
 		test_htmlcopyencoder.xhtml \
 		test_bug270145.xhtml \
 		test_elementTraversal.html \
 		test_w3element_traversal.html \
 		test_w3element_traversal.xhtml \
 		test_bug469020.html \
 		test_w3element_traversal_svg.html \
new file mode 100644
--- /dev/null
+++ b/content/base/test/test_bug500937.html
@@ -0,0 +1,55 @@
+<!DOCTYPE HTML>
+<html>
+<!--
+https://bugzilla.mozilla.org/show_bug.cgi?id=500937
+-->
+<head>
+  <title>Test for Bug 500937</title>
+  <script type="application/javascript" src="/MochiKit/packed.js"></script>
+  <script type="application/javascript" src="/tests/SimpleTest/SimpleTest.js"></script>
+  <link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css"/>
+</head>
+<body>
+<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=500937">Mozilla Bug 500937</a>
+<p id="display"></p>
+<div id="content" style="display: none">
+  <iframe id=iframe src="about:blank"></iframe>
+</div>
+<pre id="test">
+<script type="application/javascript">
+
+/** Test for Bug 500937 **/
+
+var d = document.implementation.createDocument("http://www.w3.org/1999/xhtml", "html", null);
+var h = d.documentElement;
+h.appendChild(d.createElementNS("http://www.w3.org/1999/xhtml", "head"));
+var b = d.createElementNS("http://www.w3.org/1999/xhtml", "body");
+h.appendChild(b);
+
+b.appendChild(d.createElementNS("http://www.w3.org/1999/xhtml", "div"));
+b.appendChild(d.createElementNS("http://www.w3.org/1999/xhtml", "script"));
+b.appendChild(d.createElementNS("http://www.w3.org/1999/xhtml", "br"));
+b.appendChild(d.createElementNS("http://www.w3.org/1999/xhtml", "source"));
+b.appendChild(d.createElementNS("http://www.w3.org/1999/xhtml", "param"));
+b.appendChild(d.createTextNode("\u00A0"));
+
+is(new XMLSerializer().serializeToString(d), 
+   '<html xmlns="http://www.w3.org/1999/xhtml"><head></head><body><div></div><script><\/script><br /><source /><param />\u00A0</body></html>', 
+   "XML DOM input to XMLSerializer");
+
+d = document.getElementById('iframe').contentWindow.document;
+
+while(d.documentElement.previousSibling) {
+  d.removeChild(d.documentElement.previousSibling);
+}
+
+d.replaceChild(h, d.documentElement);
+
+is(new XMLSerializer().serializeToString(d),  
+   '<html xmlns="http://www.w3.org/1999/xhtml"><head></head><body><div></div><script><\/script><br /><source /><param />\u00A0</body></html>', 
+   "HTML DOM input to XMLSerializer");
+
+</script>
+</pre>
+</body>
+</html>
--- a/content/test/unit/test_xml_serializer.js
+++ b/content/test/unit/test_xml_serializer.js
@@ -256,29 +256,29 @@ function test7() {
                         "http://www.w3.org/1999/xhtml");
   do_check_serialize(doc);
   do_check_eq(SerializeXML(doc),
               '<root xmlns="http://www.w3.org/1999/xhtml"><child1 xmlns="">' +
               '<child2/></child1></root>');
 
   doc = ParseXML('<root xmlns="http://www.w3.org/1999/xhtml">' +
                  '<child1 xmlns="">' +
-                 '<child2 xmlns="http://www.w3.org/1999/xhtml"/>' +
+                 '<child2 xmlns="http://www.w3.org/1999/xhtml"></child2>' +
                  '</child1></root>')
   root = doc.documentElement;
   // No interface flattening in xpcshell
   child1 = root.firstChild.QueryInterface(nsIDOMElement);
   var child2 = child1.firstChild.QueryInterface(nsIDOMElement);
   child1.setAttributeNS("http://www.w3.org/2000/xmlns/", "xmlns",
                         "http://www.w3.org/1999/xhtml");
   child2.setAttributeNS("http://www.w3.org/2000/xmlns/", "xmlns", "");
   do_check_serialize(doc);
   do_check_eq(SerializeXML(doc),
               '<root xmlns="http://www.w3.org/1999/xhtml"><child1 xmlns="">' +
-              '<a0:child2 xmlns:a0="http://www.w3.org/1999/xhtml" xmlns=""/></child1></root>');
+              '<a0:child2 xmlns:a0="http://www.w3.org/1999/xhtml" xmlns=""></a0:child2></child1></root>');
 }
 
 function test8() {
   // Test behavior of serializing with a given charset.
   var str1 = '<?xml version="1.0" encoding="ISO-8859-1"?>'+LB+'<root/>';
   var str2 = '<?xml version="1.0" encoding="UTF8"?>'+LB+'<root/>';
   var doc1 = ParseXML(str1);
   var doc2 = ParseXML(str2);