Bug 424359. Fix OutputBodyOnly to not output a stray </html> and fix newline stuff for doctypes, comments, and the entire XML serializer. Now with even better passing tests. Third time is the charm! r=smaug, sr=bzbarsky
authorLaurent Jouanneau <laurent@xulfr.org>
Tue, 21 Oct 2008 16:30:09 -0400
changeset 20734 f19fc94f348b39f1350493120c967765c03b6523
parent 20733 f05dd60fa3edd37783ada9ad8be7dba02fbd9ed7
child 20735 037eb901b8b28cc985017ca9bb1c001174ef24d5
push idunknown
push userunknown
push dateunknown
reviewerssmaug, bzbarsky
bugs424359
milestone1.9.1b2pre
Bug 424359. Fix OutputBodyOnly to not output a stray </html> and fix newline stuff for doctypes, comments, and the entire XML serializer. Now with even better passing tests. Third time is the charm! r=smaug, sr=bzbarsky
content/base/public/nsIDocumentEncoder.idl
content/base/src/nsHTMLContentSerializer.cpp
content/base/src/nsHTMLContentSerializer.h
content/base/src/nsXMLContentSerializer.cpp
content/base/src/nsXMLContentSerializer.h
content/base/test/Makefile.in
content/test/unit/test_xml_serializer.js
--- a/content/base/public/nsIDocumentEncoder.idl
+++ b/content/base/public/nsIDocumentEncoder.idl
@@ -72,69 +72,74 @@ interface nsIDocumentEncoder : nsISuppor
    * Output only the selection (as opposed to the whole document).
    */
   const unsigned long OutputSelectionOnly = (1 << 0);
 
   /** Plaintext output: Convert html to plaintext that looks like the html.
     * Implies wrap (except inside <pre>), since html wraps.
     * HTML output: always do prettyprinting, ignoring existing formatting.
     * (Probably not well tested for HTML output.)
+    * XML output: unsupported
     */
   const unsigned long OutputFormatted     = (1 << 1);
 
   /** Don't do prettyprinting of HTML.  Don't do any wrapping that's not in
    * the existing HTML source.  This option overrides OutputFormatted if both
    * are set.
+   * Plaintext and HTML output only.
    * @note This option does not affect entity conversion.
    */
   const unsigned long OutputRaw           = (1 << 2);
 
   /** 
    * Do not print html head tags.
+   * HTML output only.
    */
   const unsigned long OutputBodyOnly      = (1 << 3);
 
   /**
-   * Wrap even if we're not doing formatted output (e.g. for text fields)
-   * XXXbz this doesn't seem to be used by all serializers... document?  How
-   * does this interact with
+   * Wrap even if we're not doing formatted output (e.g. for text fields).
+   * Plaintext output only.
+   * XXXbz How does this interact with
    * OutputFormatted/OutputRaw/OutputWrap/OutputFormatFlowed?
    */
   const unsigned long OutputPreformatted  = (1 << 4);
 
   /**
    * Output as though the content is preformatted
    * (e.g. maybe it's wrapped in a PRE or PRE_WRAP style tag)
-   * XXXbz this doesn't seem to be used by all serializers... document?  How
-   * does this interact with
+   * Plaintext output only.
+   * XXXbz How does this interact with
    * OutputFormatted/OutputRaw/OutputPreformatted/OutputFormatFlowed?
    */
   const unsigned long OutputWrap          = (1 << 5);
 
   /**
    * Output for format flowed (RFC 2646). This is used when converting
    * to text for mail sending. This differs just slightly
    * but in an important way from normal formatted, and that is that
    * lines are space stuffed. This can't (correctly) be done later.
-   * XXXbz this doesn't seem to be used by all serializers... document?  How
-   * does this interact with
+   * PlainText output only.
+   * XXXbz How does this interact with
    * OutputFormatted/OutputRaw/OutputPreformatted/OutputWrap?
    */
   const unsigned long OutputFormatFlowed  = (1 << 6);
 
   /**
-   * Convert links, image src, and script src to absolute URLs when possible
+   * Convert links, image src, and script src to absolute URLs when possible.
+   * HTML output only.
    */
   const unsigned long OutputAbsoluteLinks = (1 << 7);
 
   /**
    * Attempt to encode entities standardized at W3C (HTML, MathML, etc).
    * This is a catch-all flag for documents with mixed contents. Beware of
    * interoperability issues. See below for other flags which might likely
    * do what you want.
+   * HTML output only.
    */
   const unsigned long OutputEncodeW3CEntities = (1 << 8);
 
   /** 
    * LineBreak processing: if this flag is set than CR line breaks will
    * be written. If neither this nor OutputLFLineBreak is set, then we
    * will use platform line breaks. The combination of the two flags will
    * cause CRLF line breaks to be written.
@@ -158,46 +163,50 @@ interface nsIDocumentEncoder : nsISuppor
   /**
    * Output the content of noframes elements (only for serializing
    * to plaintext).
    */
   const unsigned long OutputNoFramesContent = (1 << 12);
 
   /**
    * Don't allow any formatting nodes (e.g. <br>, <b>) inside a <pre>.
-   * This is used primarily by mail.
+   * This is used primarily by mail. HTML output only.
    */
   const unsigned long OutputNoFormattingInPre = (1 << 13);
 
   /**
    * Encode entities when outputting to a string.
    * E.g. If set, we'll output &nbsp; if clear, we'll output 0xa0.
    * The basic set is just &nbsp; &amp; &lt; &gt; &quot; for interoperability
    * with older products that don't support &alpha; and friends.
+   * HTML output only.
    */
   const unsigned long OutputEncodeBasicEntities = (1 << 14);
     
   /**
    * Encode entities when outputting to a string.
    * The Latin1 entity set additionally includes 8bit accented letters
    * between 128 and 255.
+   * HTML output only.
    */
   const unsigned long OutputEncodeLatin1Entities = (1 << 15);
   
   /**
    * Encode entities when outputting to a string.
    * The HTML entity set additionally includes accented letters, greek
    * letters, and other special markup symbols as defined in HTML4.
+   * HTML output only.
    */
   const unsigned long OutputEncodeHTMLEntities = (1 << 16);
 
   /**
    * Normally &nbsp; is replaced with a space character when
    * encoding data as plain text, set this flag if that's
    * not desired.
+   * Plaintext output only.
    */
   const unsigned long OutputPersistNBSP = (1 << 17);
   
   /**
    * Initialize with a pointer to the document and the mime type.
    * @param aDocument Document to encode.
    * @param aMimeType MimeType to use. May also be set by SetMimeType.
    * @param aFlags Flags to use while encoding. May also be set by SetFlags.
--- a/content/base/src/nsHTMLContentSerializer.cpp
+++ b/content/base/src/nsHTMLContentSerializer.cpp
@@ -17,16 +17,17 @@
  *
  * The Initial Developer of the Original Code is
  * Netscape Communications Corporation.
  * Portions created by the Initial Developer are Copyright (C) 1998
  * the Initial Developer. All Rights Reserved.
  *
  * Contributor(s):
  *   Ryan Jones <sciguyryan@gmail.com>
+ *   Laurent Jouanneau <laurent.jouanneau@disruptive-innovations.com>
  *
  * Alternatively, the contents of this file may be used under the terms of
  * either of the GNU General Public License Version 2 or later (the "GPL"),
  * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
  * in which case the provisions of the GPL or the LGPL are applicable instead
  * of those above. If you wish to allow use of your version of this file only
  * under the terms of either the GPL or the LGPL, and not to allow others to
  * use your version of this file under the terms of the MPL, indicate your
@@ -62,16 +63,17 @@
 #include "nsEscape.h"
 #include "nsITextToSubURI.h"
 #include "nsCRT.h"
 #include "nsIParserService.h"
 #include "nsContentUtils.h"
 #include "nsLWBrkCIID.h"
 #include "nsIScriptElement.h"
 #include "nsAttrName.h"
+#include "nsILineBreaker.h"
 
 #define kIndentStr NS_LITERAL_STRING("  ")
 #define kLessThan NS_LITERAL_STRING("<")
 #define kGreaterThan NS_LITERAL_STRING(">")
 #define kEndTag NS_LITERAL_STRING("</")
 
 static const char kMozStr[] = "moz";
 
@@ -84,23 +86,21 @@ nsresult NS_NewHTMLContentSerializer(nsI
     return NS_ERROR_OUT_OF_MEMORY;
   }
 
   return CallQueryInterface(it, aSerializer);
 }
 
 nsHTMLContentSerializer::nsHTMLContentSerializer()
 : mIndent(0),
-  mColPos(0),
-  mInBody(PR_FALSE),
+  mInBody(0),
   mAddSpace(PR_FALSE),
   mMayIgnoreLineBreakSequence(PR_FALSE),
   mIsWholeDocument(PR_FALSE),
-  mInCDATA(PR_FALSE),
-  mNeedLineBreaker(PR_TRUE)
+  mInCDATA(PR_FALSE)
 {
 }
 
 nsHTMLContentSerializer::~nsHTMLContentSerializer()
 {
   NS_ASSERTION(mOLStateStack.Count() == 0, "Expected OL State stack to be empty");
   if (mOLStateStack.Count() > 0){
     for (PRInt32 i = 0; i < mOLStateStack.Count(); i++){
@@ -111,74 +111,53 @@ nsHTMLContentSerializer::~nsHTMLContentS
   }
 }
 
 NS_IMETHODIMP 
 nsHTMLContentSerializer::Init(PRUint32 aFlags, PRUint32 aWrapColumn,
                               const char* aCharSet, PRBool aIsCopying,
                               PRBool aIsWholeDocument)
 {
-  mFlags = aFlags;
+  nsresult rv;
+  rv = nsXMLContentSerializer::Init(aFlags, aWrapColumn, aCharSet, aIsCopying, aIsWholeDocument);
+  NS_ENSURE_SUCCESS(rv, rv);
+
   if (!aWrapColumn) {
     mMaxColumn = 72;
   }
   else {
     mMaxColumn = aWrapColumn;
   }
 
   mIsWholeDocument = aIsWholeDocument;
   mIsCopying = aIsCopying;
   mIsFirstChildOfOL = PR_FALSE;
   mDoFormat = (mFlags & nsIDocumentEncoder::OutputFormatted) ? PR_TRUE
                                                              : PR_FALSE;
   mBodyOnly = (mFlags & nsIDocumentEncoder::OutputBodyOnly) ? PR_TRUE
                                                             : PR_FALSE;
-  // Set the line break character:
-  if ((mFlags & nsIDocumentEncoder::OutputCRLineBreak)
-      && (mFlags & nsIDocumentEncoder::OutputLFLineBreak)) { // Windows
-    mLineBreak.AssignLiteral("\r\n");
-  }
-  else if (mFlags & nsIDocumentEncoder::OutputCRLineBreak) { // Mac
-    mLineBreak.AssignLiteral("\r");
-  }
-  else if (mFlags & nsIDocumentEncoder::OutputLFLineBreak) { // Unix/DOM
-    mLineBreak.AssignLiteral("\n");
-  }
-  else {
-    mLineBreak.AssignLiteral(NS_LINEBREAK);         // Platform/default
-  }
 
   mPreLevel = 0;
 
-  mCharset = aCharSet;
-
   // set up entity converter if we are going to need it
   if (mFlags & nsIDocumentEncoder::OutputEncodeW3CEntities) {
     mEntityConverter = do_CreateInstance(NS_ENTITYCONVERTER_CONTRACTID);
   }
 
   return NS_OK;
 }
 
 NS_IMETHODIMP 
 nsHTMLContentSerializer::AppendText(nsIDOMText* aText, 
                                     PRInt32 aStartOffset,
                                     PRInt32 aEndOffset,
                                     nsAString& aStr)
 {
   NS_ENSURE_ARG(aText);
 
-  if (mNeedLineBreaker) {
-    mNeedLineBreaker = PR_FALSE;
-
-    nsCOMPtr<nsIDOMDocument> domDoc;
-    aText->GetOwnerDocument(getter_AddRefs(domDoc));
-    nsCOMPtr<nsIDocument> document = do_QueryInterface(domDoc);
-  }
-
   nsAutoString data;
 
   nsresult rv;
   rv = AppendTextData((nsIDOMNode*)aText, aStartOffset, 
                       aEndOffset, data, PR_TRUE, PR_FALSE);
   if (NS_FAILED(rv))
     return NS_ERROR_FAILURE;
 
@@ -653,17 +632,17 @@ nsHTMLContentSerializer::AppendElementSt
       && (mFlags & nsIDocumentEncoder::OutputNoFormattingInPre)) {
     AppendToString(mLineBreak, aStr);
     mMayIgnoreLineBreakSequence = PR_TRUE;
     mColPos = 0;
     return NS_OK;
   }
 
   if (name == nsGkAtoms::body) {
-    mInBody = PR_TRUE;
+    ++mInBody;
   }
 
   if (LineBreakBeforeOpen(name, hasDirtyAttr)) {
     AppendToString(mLineBreak, aStr);
     mMayIgnoreLineBreakSequence = PR_TRUE;
     mColPos = 0;
     mAddSpace = PR_FALSE;
   }
@@ -853,16 +832,20 @@ nsHTMLContentSerializer::AppendElementEn
     AppendToString(mLineBreak, aStr);
     mMayIgnoreLineBreakSequence = PR_TRUE;
     mColPos = 0;
   }
   else {
     MaybeFlagNewline(aElement);
   }
 
+  if (name == nsGkAtoms::body) {
+    --mInBody;
+  }
+
   mInCDATA = PR_FALSE;
 
   return NS_OK;
 }
 
 void
 nsHTMLContentSerializer::AppendToString(const PRUnichar* aStr,
                                         PRInt32 aLength,
@@ -1031,41 +1014,16 @@ nsHTMLContentSerializer::AppendToString(
     }
 
     return;
   }
 
   aOutputStr.Append(aStr);
 }
 
-void
-nsHTMLContentSerializer::AppendToStringConvertLF(const nsAString& aStr,
-                                                 nsAString& aOutputStr)
-{
-  // Convert line-endings to mLineBreak
-  PRUint32 start = 0;
-  PRUint32 theLen = aStr.Length();
-  while (start < theLen) {
-    PRInt32 eol = aStr.FindChar('\n', start);
-    if (eol == kNotFound) {
-      nsDependentSubstring dataSubstring(aStr, start, theLen - start);
-      AppendToString(dataSubstring, aOutputStr);
-      start = theLen;
-    }
-    else {
-      nsDependentSubstring dataSubstring(aStr, start, eol - start);
-      AppendToString(dataSubstring, aOutputStr);
-      AppendToString(mLineBreak, aOutputStr);
-      start = eol + 1;
-      if (start == theLen)
-        mColPos = 0;
-    }
-  }
-}
-
 PRBool
 nsHTMLContentSerializer::LineBreakBeforeOpen(nsIAtom* aName, 
                                              PRBool aHasDirtyAttr)
 {
   if ((!mDoFormat && !aHasDirtyAttr) || mPreLevel || !mColPos ||
       (mFlags & nsIDocumentEncoder::OutputRaw)) {
     return PR_FALSE;
   }
--- a/content/base/src/nsHTMLContentSerializer.h
+++ b/content/base/src/nsHTMLContentSerializer.h
@@ -15,16 +15,17 @@
  * The Original Code is mozilla.org code.
  *
  * The Initial Developer of the Original Code is
  * Netscape Communications Corporation.
  * Portions created by the Initial Developer are Copyright (C) 1998
  * the Initial Developer. All Rights Reserved.
  *
  * Contributor(s):
+ *   Laurent Jouanneau <laurent.jouanneau@disruptive-innovations.com>
  *
  * Alternatively, the contents of this file may be used under the terms of
  * either of the GNU General Public License Version 2 or later (the "GPL"),
  * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
  * in which case the provisions of the GPL or the LGPL are applicable instead
  * of those above. If you wish to allow use of your version of this file only
  * under the terms of either the GPL or the LGPL, and not to allow others to
  * use your version of this file under the terms of the MPL, indicate your
@@ -42,17 +43,16 @@
  */
 
 #ifndef nsHTMLContentSerializer_h__
 #define nsHTMLContentSerializer_h__
 
 #include "nsXMLContentSerializer.h"
 #include "nsIEntityConverter.h"
 #include "nsString.h"
-#include "nsILineBreaker.h"
 
 class nsIContent;
 class nsIAtom;
 
 class nsHTMLContentSerializer : public nsXMLContentSerializer {
  public:
   nsHTMLContentSerializer();
   virtual ~nsHTMLContentSerializer();
@@ -96,18 +96,17 @@ class nsHTMLContentSerializer : public n
                               PRInt32 aLength,
                               nsAString& aOutputStr);
   virtual void AppendToString(const PRUnichar aChar,
                               nsAString& aOutputStr);
   virtual void AppendToString(const nsAString& aStr,
                               nsAString& aOutputStr,
                               PRBool aTranslateEntities = PR_FALSE,
                               PRBool aIncrColumn = PR_TRUE);
-  virtual void AppendToStringConvertLF(const nsAString& aStr,
-                                       nsAString& aOutputStr);
+
   void AppendWrapped_WhitespaceSequence(
           nsASingleFragmentString::const_char_iterator &aPos,
           const nsASingleFragmentString::const_char_iterator aEnd,
           const nsASingleFragmentString::const_char_iterator aSequenceStart,
           PRBool &aMayIgnoreStartOfLineWhitespaceSequence,
           nsAString &aOutputStr);
   void AppendWrapped_NonWhitespaceSequence(
           nsASingleFragmentString::const_char_iterator &aPos,
@@ -120,19 +119,18 @@ class nsHTMLContentSerializer : public n
                                      PRBool aTranslateEntities);
   PRBool HasLongLines(const nsString& text, PRInt32& aLastNewlineOffset);
   nsresult EscapeURI(const nsAString& aURI, nsAString& aEscapedURI);
   PRBool IsJavaScript(nsIAtom* aAttrNameAtom, const nsAString& aAttrValueString);
 
   nsCOMPtr<nsIEntityConverter> mEntityConverter;
 
   PRInt32   mIndent;
-  PRInt32   mColPos;
-  PRUint32  mFlags;
-  PRPackedBool  mInBody;
+
+  PRUint32  mInBody;
 
   PRPackedBool  mDoFormat;
   PRPackedBool  mDoHeader;
   PRPackedBool  mBodyOnly;
   PRPackedBool  mIsCopying; // Set to PR_TRUE only while copying
 
   // Indicates that a space will be added if and only if content is
   // continued on the same line while serializing source.  Otherwise,
@@ -153,22 +151,18 @@ class nsHTMLContentSerializer : public n
    * the content of a element whose content is considerd CDATA by the
    * serializer (such elements are 'script', 'style', 'noscript' and
    * possibly others) This doesn't have anything to do with if the
    * element is defined as CDATA in the DTD, it simply means we'll
    * output the content of the element without doing any entity encoding
    * what so ever.
    */
   PRPackedBool mInCDATA;
-  PRPackedBool mNeedLineBreaker;
-
-  nsCOMPtr<nsILineBreaker> mLineBreaker;
 
   PRInt32   mMaxColumn;
-  nsString  mLineBreak;
 
   // To keep track of startvalue of OL and first list item for nested lists
   struct olState {
     olState(PRInt32 aStart, PRBool aIsFirst):startVal(aStart),isFirstListItem(aIsFirst)
     {
     }
     PRInt32 startVal;
     PRBool isFirstListItem;
--- a/content/base/src/nsXMLContentSerializer.cpp
+++ b/content/base/src/nsXMLContentSerializer.cpp
@@ -15,16 +15,17 @@
  * The Original Code is mozilla.org code.
  *
  * The Initial Developer of the Original Code is
  * Netscape Communications Corporation.
  * Portions created by the Initial Developer are Copyright (C) 1998
  * the Initial Developer. All Rights Reserved.
  *
  * Contributor(s):
+ *   Laurent Jouanneau <laurent.jouanneau@disruptive-innovations.com>
  *
  * Alternatively, the contents of this file may be used under the terms of
  * either of the GNU General Public License Version 2 or later (the "GPL"),
  * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
  * in which case the provisions of the GPL or the LGPL are applicable instead
  * of those above. If you wish to allow use of your version of this file only
  * under the terms of either the GPL or the LGPL, and not to allow others to
  * use your version of this file under the terms of the MPL, indicate your
@@ -48,16 +49,17 @@
 #include "nsIDOMCDATASection.h"
 #include "nsIDOMProcessingInstruction.h"
 #include "nsIDOMComment.h"
 #include "nsIDOMDocument.h"
 #include "nsIDOMDocumentType.h"
 #include "nsIDOMElement.h"
 #include "nsIContent.h"
 #include "nsIDocument.h"
+#include "nsIDocumentEncoder.h"
 #include "nsINameSpaceManager.h"
 #include "nsTextFragment.h"
 #include "nsString.h"
 #include "prprf.h"
 #include "nsUnicharUtils.h"
 #include "nsCRT.h"
 #include "nsContentUtils.h"
 #include "nsAttrName.h"
@@ -75,33 +77,50 @@ nsresult NS_NewXMLContentSerializer(nsIC
     return NS_ERROR_OUT_OF_MEMORY;
   }
 
   return CallQueryInterface(it, aSerializer);
 }
 
 nsXMLContentSerializer::nsXMLContentSerializer()
   : mPrefixIndex(0),
+    mColPos(0),
     mInAttribute(PR_FALSE),
     mAddNewline(PR_FALSE)
 {
 }
  
 nsXMLContentSerializer::~nsXMLContentSerializer()
 {
 }
 
 NS_IMPL_ISUPPORTS1(nsXMLContentSerializer, nsIContentSerializer)
 
 NS_IMETHODIMP 
-nsXMLContentSerializer::Init(PRUint32 flags, PRUint32 aWrapColumn,
+nsXMLContentSerializer::Init(PRUint32 aFlags, PRUint32 aWrapColumn,
                              const char* aCharSet, PRBool aIsCopying,
                              PRBool aIsWholeDocument)
 {
   mCharset = aCharSet;
+  mFlags = aFlags;
+
+  // Set the line break character:
+  if ((mFlags & nsIDocumentEncoder::OutputCRLineBreak)
+      && (mFlags & nsIDocumentEncoder::OutputLFLineBreak)) { // Windows
+    mLineBreak.AssignLiteral("\r\n");
+  }
+  else if (mFlags & nsIDocumentEncoder::OutputCRLineBreak) { // Mac
+    mLineBreak.AssignLiteral("\r");
+  }
+  else if (mFlags & nsIDocumentEncoder::OutputLFLineBreak) { // Unix/DOM
+    mLineBreak.AssignLiteral("\n");
+  }
+  else {
+    mLineBreak.AssignLiteral(NS_LINEBREAK);         // Platform/default
+  }
   return NS_OK;
 }
 
 nsresult
 nsXMLContentSerializer::AppendTextData(nsIDOMNode* aNode, 
                                        PRInt32 aStartOffset,
                                        PRInt32 aEndOffset,
                                        nsAString& aStr,
@@ -118,17 +137,16 @@ nsXMLContentSerializer::AppendTextData(n
   PRInt32 length = endoffset - aStartOffset;
 
   NS_ASSERTION(aStartOffset >= 0, "Negative start offset for text fragment!");
   NS_ASSERTION(aStartOffset <= endoffset, "A start offset is beyond the end of the text fragment!");
 
   if (length <= 0) {
     // XXX Zero is a legal value, maybe non-zero values should be an
     // error.
-
     return NS_OK;
   }
     
   if (frag->Is2b()) {
     const PRUnichar *strStart = frag->Get2b() + aStartOffset;
     AppendToString(Substring(strStart, strStart + length), aStr,
                    aTranslateEntities, aIncrColumn);
   }
@@ -143,17 +161,26 @@ nsXMLContentSerializer::AppendTextData(n
 NS_IMETHODIMP 
 nsXMLContentSerializer::AppendText(nsIDOMText* aText, 
                                    PRInt32 aStartOffset,
                                    PRInt32 aEndOffset,
                                    nsAString& aStr)
 {
   NS_ENSURE_ARG(aText);
 
-  return AppendTextData(aText, aStartOffset, aEndOffset, aStr, PR_TRUE, PR_TRUE);
+  nsAutoString data;
+  nsresult rv;
+
+  rv = AppendTextData(aText, aStartOffset, aEndOffset, data, PR_TRUE, PR_TRUE);
+  if (NS_FAILED(rv))
+    return NS_ERROR_FAILURE;
+
+  AppendToStringConvertLF(data, aStr);
+
+  return NS_OK;
 }
 
 NS_IMETHODIMP 
 nsXMLContentSerializer::AppendCDATASection(nsIDOMCDATASection* aCDATASection,
                                            PRInt32 aStartOffset,
                                            PRInt32 aEndOffset,
                                            nsAString& aStr)
 {
@@ -185,17 +212,17 @@ nsXMLContentSerializer::AppendProcessing
 
   rv = aPI->GetData(data);
   if (NS_FAILED(rv)) return NS_ERROR_FAILURE;
 
   AppendToString(NS_LITERAL_STRING("<?"), aStr);
   AppendToString(target, aStr);
   if (!data.IsEmpty()) {
     AppendToString(NS_LITERAL_STRING(" "), aStr);
-    AppendToString(data, aStr);
+    AppendToStringConvertLF(data, aStr);
   }
   AppendToString(NS_LITERAL_STRING("?>"), aStr);
   MaybeFlagNewline(aPI);
   
   return NS_OK;
 }
 
 NS_IMETHODIMP 
@@ -215,20 +242,20 @@ nsXMLContentSerializer::AppendComment(ns
 
   AppendToString(NS_LITERAL_STRING("<!--"), aStr);
   if (aStartOffset || (aEndOffset != -1)) {
     PRInt32 length = (aEndOffset == -1) ? data.Length() : aEndOffset;
     length -= aStartOffset;
 
     nsAutoString frag;
     data.Mid(frag, aStartOffset, length);
-    AppendToString(frag, aStr);
+    AppendToStringConvertLF(frag, aStr);
   }
   else {
-    AppendToString(data, aStr);
+    AppendToStringConvertLF(data, aStr);
   }
   AppendToString(NS_LITERAL_STRING("-->"), aStr);
   MaybeFlagNewline(aComment);
   
   return NS_OK;
 }
 
 NS_IMETHODIMP 
@@ -960,17 +987,17 @@ nsXMLContentSerializer::IsShorthandAttr(
 
   return PR_FALSE;
 }
 
 void
 nsXMLContentSerializer::MaybeAddNewline(nsAString& aStr)
 {
   if (mAddNewline) {
-    aStr.Append((PRUnichar)'\n');
+    aStr.Append(mLineBreak);
     mAddNewline = PR_FALSE;
   }
 }
 
 void
 nsXMLContentSerializer::MaybeFlagNewline(nsIDOMNode* aNode)
 {
   nsCOMPtr<nsIDOMNode> parent;
@@ -1019,8 +1046,33 @@ nsXMLContentSerializer::AppendDocumentSt
     aStr += NS_LITERAL_STRING(" standalone=\"") + standalone + endQuote;
   }
 
   aStr.AppendLiteral("?>");
   mAddNewline = PR_TRUE;
 
   return NS_OK;
 }
+
+void
+nsXMLContentSerializer::AppendToStringConvertLF(const nsAString& aStr,
+                                                 nsAString& aOutputStr)
+{
+  // Convert line-endings to mLineBreak
+  PRUint32 start = 0;
+  PRUint32 theLen = aStr.Length();
+  while (start < theLen) {
+    PRInt32 eol = aStr.FindChar('\n', start);
+    if (eol == kNotFound) {
+      nsDependentSubstring dataSubstring(aStr, start, theLen - start);
+      AppendToString(dataSubstring, aOutputStr);
+      start = theLen;
+    }
+    else {
+      nsDependentSubstring dataSubstring(aStr, start, eol - start);
+      AppendToString(dataSubstring, aOutputStr);
+      AppendToString(mLineBreak, aOutputStr);
+      start = eol + 1;
+      if (start == theLen)
+        mColPos = 0;
+    }
+  }
+}
--- a/content/base/src/nsXMLContentSerializer.h
+++ b/content/base/src/nsXMLContentSerializer.h
@@ -142,27 +142,39 @@ class nsXMLContentSerializer : public ns
   void SerializeAttr(const nsAString& aPrefix,
                      const nsAString& aName,
                      const nsAString& aValue,
                      nsAString& aStr,
                      PRBool aDoEscapeEntities);
   PRBool IsShorthandAttr(const nsIAtom* aAttrName,
                          const nsIAtom* aElementName);
 
+  virtual void AppendToStringConvertLF(const nsAString& aStr,
+                                       nsAString& aOutputStr);
+
   // Functions to check for newlines that needs to be added between nodes in
   // the root of a document.
   void MaybeAddNewline(nsAString& aStr);
   void MaybeFlagNewline(nsIDOMNode* aNode);
 
   PRInt32 mPrefixIndex;
   nsVoidArray mNameSpaceStack;
 
+  // nsIDocumentEncoder flags
+  PRUint32  mFlags;
+
+  // characters to use for line break
+  nsString  mLineBreak;
+
   // The charset that was passed to Init()
   nsCString mCharset;
   
+  // current column position
+  PRInt32   mColPos;
+
   PRPackedBool mInAttribute;
   PRPackedBool mAddNewline;
 };
 
 nsresult
 NS_NewXMLContentSerializer(nsIContentSerializer** aSerializer);
 
 #endif 
--- a/content/base/test/Makefile.in
+++ b/content/base/test/Makefile.in
@@ -42,16 +42,18 @@ VPATH		= @srcdir@
 relativesrcdir  = content/base/test
 
 include $(DEPTH)/config/autoconf.mk
 
 DIRS += \
   chrome \
   $(NULL)
 
+MODULE = content
+
 CPP_UNIT_TESTS += TestNativeXMLHttpRequest.cpp TestPlainTextSerializer.cpp
 
 LOCAL_INCLUDES += -I$(topsrcdir)/xpcom/tests
 
 REQUIRES += \
   caps \
   content \
   dom \
@@ -67,16 +69,20 @@ CPPSRCS += $(CPP_UNIT_TESTS)
 
 SIMPLE_PROGRAMS += $(CPP_UNIT_TESTS:.cpp=$(BIN_SUFFIX))
 
 LIBS += \
   $(XPCOM_GLUE_LDOPTS) \
   $(NSPR_LIBS) \
   $(NULL)
 
+XPCSHELL_TESTS = \
+               unit \
+               $(NULL)
+
 include $(topsrcdir)/config/rules.mk
 
 _TEST_FILES = 	test_bug5141.html \
 		test_bug51034.html \
 		test_bug199959.html \
 		test_bug218236.html \
 		file_bug218236_multipart.txt \
 		file_bug218236_multipart.txt^headers^ \
@@ -162,16 +168,35 @@ include $(topsrcdir)/config/rules.mk
 		test_bug417255.html \
 		test_bug417384.html \
 		test_bug418214.html \
 		test_bug419527.xhtml \
 		test_bug420609.xhtml \
 		test_bug420700.html \
 		test_bug421602.html \
 		test_bug422537.html \
+		test_bug424359-1.html \
+		file_htmlserializer_1.html \
+		file_htmlserializer_1_bodyonly.html \
+		file_htmlserializer_1_format.html \
+		file_htmlserializer_1_linebreak.html \
+		file_htmlserializer_1_links.html \
+		file_htmlserializer_1_noflag.html \
+		file_htmlserializer_1_noformatpre.html \
+		file_htmlserializer_1_raw.html \
+		file_htmlserializer_1_nested_body.html \
+		file_htmlserializer_1_sibling_body.html \
+		file_htmlserializer_1_sibling_body_only_body.html \
+		file_htmlserializer_1_no_body.html \
+		test_bug424359-2.html \
+		file_htmlserializer_2.html \
+		file_htmlserializer_2_basic.html \
+		file_htmlserializer_2_enthtml.html \
+		file_htmlserializer_2_entw3c.html \
+		file_htmlserializer_2_latin1.html \
 		test_bug424212.html \
 		test_bug425013.html \
 		bug426308-redirect.sjs \
 		test_bug426308.html \
 		test_bug426646.html \
 		file_bug426646-1.html \
 		file_bug426646-2.html \
 		test_bug429157.html \
--- a/content/test/unit/test_xml_serializer.js
+++ b/content/test/unit/test_xml_serializer.js
@@ -1,9 +1,22 @@
+
+// The xml serializer uses the default line break of the plateform.
+// So we need to know the value of this default line break, in order
+// to build correctly the reference strings for tests.
+// This variable will contain this value.
+var LB;
+
 function run_test() {
+
+  if(("@mozilla.org/windows-registry-key;1" in C) || ("nsILocalFileOS2" in I))
+    LB = "\r\n";
+  else
+    LB = "\n";
+
   for (var i = 0; i < tests.length && tests[i]; ++i) {
     tests[i].call();
   }
 }
 
 var tests = [
   test1,
   test2,
@@ -260,18 +273,18 @@ function test7() {
   do_check_serialize(doc);
   do_check_eq(SerializeXML(doc),
               '<root xmlns="http://www.w3.org/1999/xhtml"><child1 xmlns="">' +
               '<a0:child2 xmlns:a0="http://www.w3.org/1999/xhtml" xmlns=""/></child1></root>');
 }
 
 function test8() {
   // Test behavior of serializing with a given charset.
-  var str1 = '<?xml version="1.0" encoding="ISO-8859-1"?>\n<root/>';
-  var str2 = '<?xml version="1.0" encoding="UTF8"?>\n<root/>';
+  var str1 = '<?xml version="1.0" encoding="ISO-8859-1"?>'+LB+'<root/>';
+  var str2 = '<?xml version="1.0" encoding="UTF8"?>'+LB+'<root/>';
   var doc1 = ParseXML(str1);
   var doc2 = ParseXML(str2);
 
   var p = Pipe();
   DOMSerializer().serializeToStream(doc1, p.outputStream, "ISO-8859-1");
   p.outputStream.close();
   do_check_eq(ScriptableInput(p).read(-1), str1);
 
@@ -292,19 +305,19 @@ function test8() {
 }
 
 function test9() {
   // Test behavior of serializing between given charsets, using
   // ISO-8859-1-representable text.
   var contents = '<root>' +
                    '\u00BD + \u00BE == \u00BD\u00B2 + \u00BC + \u00BE' +
                  '</root>';
-  var str1 = '<?xml version="1.0" encoding="ISO-8859-1"?>\n' + contents;
-  var str2 = '<?xml version="1.0" encoding="UTF8"?>\n' + contents;
-  var str3 = '<?xml version="1.0" encoding="UTF-16"?>\n' + contents;
+  var str1 = '<?xml version="1.0" encoding="ISO-8859-1"?>'+ LB + contents;
+  var str2 = '<?xml version="1.0" encoding="UTF8"?>'+ LB + contents;
+  var str3 = '<?xml version="1.0" encoding="UTF-16"?>'+ LB + contents;
   var doc1 = ParseXML(str1);
   var doc2 = ParseXML(str2);
   var doc3 = ParseXML(str3);
 
   checkSerialization(doc1, "ISO-8859-1", str1);
   checkSerialization(doc2, "ISO-8859-1", str1);
   checkSerialization(doc3, "ISO-8859-1", str1);
 
@@ -322,18 +335,18 @@ function test10() {
   // Unicode characters (XXX but only BMP ones because I don't know
   // how to create one with non-BMP characters, either with JS strings
   // or using DOM APIs).
   var contents = '<root>' +
                    'AZaz09 \u007F ' +               // U+000000 to U+00007F
                    '\u0080 \u0398 \u03BB \u0725 ' + // U+000080 to U+0007FF
                    '\u0964 \u0F5F \u20AC \uFFFB' +  // U+000800 to U+00FFFF
                  '</root>';
-  var str1 = '<?xml version="1.0" encoding="UTF8"?>\n' + contents;
-  var str2 = '<?xml version="1.0" encoding="UTF-16"?>\n' + contents;
+  var str1 = '<?xml version="1.0" encoding="UTF8"?>'+ LB + contents;
+  var str2 = '<?xml version="1.0" encoding="UTF-16"?>'+ LB + contents;
   var doc1 = ParseXML(str1);
   var doc2 = ParseXML(str2);
 
   checkSerialization(doc1, "UTF8", str1);
   checkSerialization(doc2, "UTF8", str1);
 
   checkSerialization(doc1, "UTF-16", str2);
   checkSerialization(doc2, "UTF-16", str2);