bug 564737 - plaintext serializer shouldn't take care of leading spaces in a block. tests by David :Bienvenu. r=laurentj sr+a=jst
authorJonathan Kamens <jik@kamens.brookline.ma.us>
Tue, 21 Sep 2010 15:02:55 +0200
changeset 54464 27df5529f2cf3e259ccf20e8d45820c2f19c2f8d
parent 54463 2ef97f492c59e65d192b13f5701617d7a3482989
child 54465 3ea256acb017288d9ce5359892c42e26bff4c912
push id15892
push userlaurent@xulfr.org
push dateTue, 21 Sep 2010 13:08:03 +0000
treeherdermozilla-central@27df5529f2cf [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerslaurentj
bugs564737
milestone2.0b7pre
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
bug 564737 - plaintext serializer shouldn't take care of leading spaces in a block. tests by David :Bienvenu. r=laurentj sr+a=jst
content/base/src/nsPlainTextSerializer.cpp
content/base/test/TestPlainTextSerializer.cpp
content/base/test/test_copypaste.html
--- a/content/base/src/nsPlainTextSerializer.cpp
+++ b/content/base/src/nsPlainTextSerializer.cpp
@@ -114,17 +114,17 @@ nsPlainTextSerializer::nsPlainTextSerial
   }
 
   // Line breaker
   mWrapColumn = 72;     // XXX magic number, we expect someone to reset this
   mCurrentLineWidth = 0;
 
   // Flow
   mEmptyLines = 1; // The start of the document is an "empty line" in itself,
-  mInWhitespace = PR_TRUE;
+  mInWhitespace = PR_FALSE;
   mPreFormatted = PR_FALSE;
   mStartedOutput = PR_FALSE;
 
   // initialize the tag stack to zero:
   mTagStack = new nsHTMLTag[TagStackSize];
   mTagStackIndex = 0;
   mIgnoreAboveIndex = (PRUint32)kNotFound;
 
@@ -633,16 +633,18 @@ nsPlainTextSerializer::DoOpenContainer(c
 #ifdef DEBUG_preformatted
         printf("Set mPreFormatted based on style pre\n");
 #endif
         mPreFormatted = PR_TRUE;
         mWrapColumn = 0;
       }
     } 
     else {
+      /* See comment at end of function. */
+      mInWhitespace = PR_TRUE;
       mPreFormatted = PR_FALSE;
     }
 
     return NS_OK;
   }
 
   // Keep this in sync with DoCloseContainer!
   if (!DoOutput()) {
@@ -802,16 +804,23 @@ nsPlainTextSerializer::DoOpenContainer(c
   else if ((type == eHTMLTag_em || type == eHTMLTag_i)
            && mStructs && !currentNodeIsConverted) {
     Write(NS_LITERAL_STRING("/"));
   }
   else if (type == eHTMLTag_u && mStructs && !currentNodeIsConverted) {
     Write(NS_LITERAL_STRING("_"));
   }
 
+  /* Container elements are always block elements, so we shouldn't
+     output any whitespace immediately after the container tag even if
+     there's extra whitespace there because the HTML is pretty-printed
+     or something. To ensure that happens, tell the serializer we're
+     already in whitespace so it won't output more. */
+  mInWhitespace = PR_TRUE;
+
   return NS_OK;
 }
 
 nsresult
 nsPlainTextSerializer::DoCloseContainer(PRInt32 aTag)
 {
   if (mFlags & nsIDocumentEncoder::OutputRaw) {
     // Raw means raw.  Don't even think about doing anything fancy
@@ -1068,48 +1077,36 @@ nsPlainTextSerializer::DoAddLeaf(const n
     // Another egregious editor workaround, see bug 38194:
     // ignore the bogus br tags that the editor sticks here and there.
     nsAutoString typeAttr;
     if (NS_FAILED(GetAttributeValue(aNode, nsGkAtoms::type, typeAttr))
         || !typeAttr.EqualsLiteral("_moz")) {
       EnsureVerticalSpace(mEmptyLines+1);
     }
   }
-  else if (type == eHTMLTag_whitespace) {
+  else if (type == eHTMLTag_whitespace || type == eHTMLTag_newline) {
     // The only times we want to pass along whitespace from the original
     // html source are if we're forced into preformatted mode via flags,
     // or if we're prettyprinting and we're inside a <pre>.
     // Otherwise, either we're collapsing to minimal text, or we're
     // prettyprinting to mimic the html format, and in neither case
     // does the formatting of the html source help us.
-    // One exception: at the very beginning of a selection,
-    // we want to preserve whitespace.
     if (mFlags & nsIDocumentEncoder::OutputPreformatted ||
         (mPreFormatted && !mWrapColumn) ||
         IsInPre()) {
-      Write(aText);
+      if (type == eHTMLTag_newline)
+        EnsureVerticalSpace(mEmptyLines+1);
+      else  
+        Write(aText);
     }
-    else if(!mInWhitespace ||
-            (!mStartedOutput
-             && mFlags | nsIDocumentEncoder::OutputSelectionOnly)) {
-      mInWhitespace = PR_FALSE;
+    else if(!mInWhitespace) {
       Write(kSpace);
       mInWhitespace = PR_TRUE;
     }
   }
-  else if (type == eHTMLTag_newline) {
-    if (mFlags & nsIDocumentEncoder::OutputPreformatted ||
-        (mPreFormatted && !mWrapColumn) ||
-        IsInPre()) {
-      EnsureVerticalSpace(mEmptyLines+1);
-    }
-    else {
-      Write(kSpace);
-    }
-  }
   else if (type == eHTMLTag_hr &&
            (mFlags & nsIDocumentEncoder::OutputFormatted)) {
     EnsureVerticalSpace(0);
 
     // Make a line of dashes as wide as the wrap width
     // XXX honoring percentage would be nice
     nsAutoString line;
     PRUint32 width = (mWrapColumn > 0 ? mWrapColumn : 25);
@@ -1156,20 +1153,22 @@ nsPlainTextSerializer::DoAddLeaf(const n
 void
 nsPlainTextSerializer::EnsureVerticalSpace(PRInt32 noOfRows)
 {
   // If we have something in the indent we probably want to output
   // it and it's not included in the count for empty lines so we don't
   // realize that we should start a new line.
   if(noOfRows >= 0 && !mInIndentString.IsEmpty()) {
     EndLine(PR_FALSE);
+    mInWhitespace = PR_TRUE;
   }
 
   while(mEmptyLines < noOfRows) {
     EndLine(PR_FALSE);
+    mInWhitespace = PR_TRUE;
   }
   mLineBreakDue = PR_FALSE;
   mFloatingLines = -1;
 }
 
 /**
  * This empties the current line cache without adding a NEWLINE.
  * Should not be used if line wrapping is of importance since
--- a/content/base/test/TestPlainTextSerializer.cpp
+++ b/content/base/test/TestPlainTextSerializer.cpp
@@ -41,16 +41,17 @@
 #include "nsIHTMLToTextSink.h"
 #include "nsIParser.h"
 #include "nsIContentSink.h"
 #include "nsIParserService.h"
 #include "nsServiceManagerUtils.h"
 #include "nsStringGlue.h"
 #include "nsParserCIID.h"
 #include "nsIDocumentEncoder.h"
+#include "nsCRT.h"
 
 static NS_DEFINE_CID(kCParserCID, NS_PARSER_CID);
 
 void
 ConvertBufToPlainText(nsString &aConBuf, int aFlag)
 {
   nsCOMPtr<nsIParser> parser = do_CreateInstance(kCParserCID);
   if (parser) {
@@ -139,16 +140,37 @@ TestCJKWithFlowedDelSp()
   }
 
   passed("HTML to CJK text serialization with format=flowed; delsp=yes");
 
   return NS_OK;
 }
 
 nsresult
+TestPrettyPrintedHtml()
+{
+  nsString test;
+  test.AppendLiteral(
+    "<html>" NS_LINEBREAK
+    "<body>" NS_LINEBREAK
+    "  first<br>" NS_LINEBREAK
+    "  second<br>" NS_LINEBREAK
+    "</body>" NS_LINEBREAK "</html>");
+
+  ConvertBufToPlainText(test, 0);
+  if (!test.EqualsLiteral("first" NS_LINEBREAK "second" NS_LINEBREAK)) {
+    fail("Wrong prettyprinted html to text serialization");
+    return NS_ERROR_FAILURE;
+  }
+
+  passed("prettyprinted HTML to text serialization test");
+  return NS_OK;
+}
+
+nsresult
 TestPlainTextSerializer()
 {
   nsString test;
   test.AppendLiteral("<html><base>base</base><head><span>span</span></head>"
                      "<body>body</body></html>");
   ConvertBufToPlainText(test, 0);
   if (!test.EqualsLiteral("basespanbody")) {
     fail("Wrong html to text serialization");
@@ -158,16 +180,19 @@ TestPlainTextSerializer()
   passed("HTML to text serialization test");
 
   nsresult rv = TestASCIIWithFlowedDelSp();
   NS_ENSURE_SUCCESS(rv, rv);
 
   rv = TestCJKWithFlowedDelSp();
   NS_ENSURE_SUCCESS(rv, rv);
 
+  rv = TestPrettyPrintedHtml();
+  NS_ENSURE_SUCCESS(rv, rv);
+
   // Add new tests here...
   return NS_OK;
 }
 
 int main(int argc, char** argv)
 {
   ScopedXPCOM xpcom("PlainTextSerializer");
   if (xpcom.failed())
--- a/content/base/test/test_copypaste.html
+++ b/content/base/test/test_copypaste.html
@@ -107,46 +107,46 @@ function testCopyPaste () {
   testSelectionToString("This is a draggable bit of text.");
   testClipboardValue("text/unicode",
                      "This is a draggable bit of text.");
   testClipboardValue("text/html",
                      "<div id=\"draggable\" title=\"title to have a long HTML line\">This is a <em>draggable</em> bit of text.</div>");
   testPasteText("This is a draggable bit of text.");
 
   copyChildrenToClipboard("alist");
-  testSelectionToString("bla\n\n    foo\n    bar\n\n");
-  testClipboardValue("text/unicode", " bla\n\n    foo\n    bar\n\n");
+  testSelectionToString(" bla\n\n    foo\n    bar\n\n");
+  testClipboardValue("text/unicode", "bla\n\n    foo\n    bar\n\n");
   testClipboardValue("text/html", "<div id=\"alist\">\n    bla\n    <ul>\n      <li>foo</li>\n      \n      <li>bar</li>\n    </ul>\n  </div>");
-  testPasteText(" bla\n\n    foo\n    bar\n\n");
+  testPasteText("bla\n\n    foo\n    bar\n\n");
 
   copyChildrenToClipboard("blist");
-  testSelectionToString("mozilla\n\n    foo\n    bar\n\n");
-  testClipboardValue("text/unicode", " mozilla\n\n    foo\n    bar\n\n");
+  testSelectionToString(" mozilla\n\n    foo\n    bar\n\n");
+  testClipboardValue("text/unicode", "mozilla\n\n    foo\n    bar\n\n");
   testClipboardValue("text/html", "<div id=\"blist\">\n    mozilla\n    <ol>\n      <li>foo</li>\n      \n      <li>bar</li>\n    </ol>\n  </div>");
-  testPasteText(" mozilla\n\n    foo\n    bar\n\n");
+  testPasteText("mozilla\n\n    foo\n    bar\n\n");
 
   copyChildrenToClipboard("clist");
-  testSelectionToString("mzla\n\n    foo\n        bazzinga!\n    bar\n\n");
-  testClipboardValue("text/unicode", " mzla\n\n    foo\n        bazzinga!\n    bar\n\n");
+  testSelectionToString(" mzla\n\n    foo\n        bazzinga!\n    bar\n\n");
+  testClipboardValue("text/unicode", "mzla\n\n    foo\n        bazzinga!\n    bar\n\n");
   testClipboardValue("text/html", "<div id=\"clist\">\n    mzla\n    <ul>\n      <li>foo<ul>\n        <li>bazzinga!</li>\n      </ul></li>\n      \n      <li>bar</li>\n    </ul>\n  </div>");
-  testPasteText(" mzla\n\n    foo\n        bazzinga!\n    bar\n\n");
+  testPasteText("mzla\n\n    foo\n        bazzinga!\n    bar\n\n");
 
   copyChildrenToClipboard("div4");
-  testSelectionToString("Tt t t ");
-  testClipboardValue("text/unicode", " Tt t t ");
+  testSelectionToString(" Tt t t ");
+  testClipboardValue("text/unicode", "Tt t t ");
   testClipboardValue("text/html", "<div id=\"div4\">\n  T<textarea>t t t</textarea>\n</div>");
   testInnerHTML("div4", "\n  T<textarea>t t t</textarea>\n");
-  testPasteText(" Tt t t ");
+  testPasteText("Tt t t ");
 
   copyChildrenToClipboard("div5");
-  testSelectionToString("T ");
-  testClipboardValue("text/unicode", " T ");
+  testSelectionToString(" T ");
+  testClipboardValue("text/unicode", "T ");
   testClipboardValue("text/html", "<div id=\"div5\">\n  T<textarea>     </textarea>\n</div>");
   testInnerHTML("div5", "\n  T<textarea>     </textarea>\n");
-  testPasteText(" T ");
+  testPasteText("T ");
 
   copyRangeToClipboard($("div6").childNodes[0],0, $("div6").childNodes[1],1);
   testSelectionToString("");
 // START Disabled due to bug 564688
 if (false) {
   testClipboardValue("text/unicode", "");
   testClipboardValue("text/html", "");
 }