Bug 650784 part 1 - Introduce a new API for converting HTML to plain text. r=smaug.
authorHenri Sivonen <hsivonen@iki.fi>
Mon, 27 Feb 2012 13:57:48 +0200
changeset 87838 429263e580904b6266149bcb6a31fc89356c7c49
parent 87837 d40ba365b995634f5f1156c7b37c48b3333bc297
child 87839 a423ca3c3ce1509175463275a170fb91bbf64d85
push id22160
push usermbrubeck@mozilla.com
push dateTue, 28 Feb 2012 17:21:33 +0000
treeherdermozilla-central@dde4e0089a18 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerssmaug
bugs650784
milestone13.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 650784 part 1 - Introduce a new API for converting HTML to plain text. r=smaug.
content/base/public/nsContentUtils.h
content/base/src/nsContentUtils.cpp
content/base/src/nsDOMParser.cpp
content/base/test/TestPlainTextSerializer.cpp
parser/html/Makefile.in
parser/html/nsHtml5StringParser.cpp
parser/html/nsHtml5StringParser.h
parser/html/nsIParserUtils.idl
toolkit/components/build/nsToolkitCompsCID.h
toolkit/components/build/nsToolkitCompsModule.cpp
toolkit/components/feeds/nsIScriptableUnescapeHTML.idl
toolkit/components/feeds/nsScriptableUnescapeHTML.cpp
toolkit/components/feeds/nsScriptableUnescapeHTML.h
widget/xpwidgets/nsHTMLFormatConverter.cpp
--- a/content/base/public/nsContentUtils.h
+++ b/content/base/public/nsContentUtils.h
@@ -1139,22 +1139,44 @@ public:
 
   /**
    * Parse a string into a document using the HTML parser.
    * Script elements are marked unexecutable.
    *
    * @param aSourceBuffer the string to parse as an HTML document
    * @param aTargetDocument the document object to parse into. Must not have
    *                        child nodes.
+   * @param aScriptingEnabledForNoscriptParsing whether <noscript> is parsed
+   *                                            as if scripting was enabled
    * @return NS_ERROR_DOM_INVALID_STATE_ERR if a re-entrant attempt to parse
    *         fragments is made, NS_ERROR_OUT_OF_MEMORY if aSourceBuffer is too
    *         long and NS_OK otherwise.
    */
   static nsresult ParseDocumentHTML(const nsAString& aSourceBuffer,
-                                    nsIDocument* aTargetDocument);
+                                    nsIDocument* aTargetDocument,
+                                    bool aScriptingEnabledForNoscriptParsing);
+
+  /**
+   * Converts HTML source to plain text by parsing the source and using the
+   * plain text serializer on the resulting tree.
+   *
+   * @param aSourceBuffer the string to parse as an HTML document
+   * @param aResultBuffer the string where the plain text result appears;
+   *                      may be the same string as aSourceBuffer
+   * @param aFlags Flags from nsIDocumentEncoder.
+   * @param aWrapCol Number of columns after which to line wrap; 0 for no
+   *                 auto-wrapping
+   * @return NS_ERROR_DOM_INVALID_STATE_ERR if a re-entrant attempt to parse
+   *         fragments is made, NS_ERROR_OUT_OF_MEMORY if aSourceBuffer is too
+   *         long and NS_OK otherwise.
+   */
+  static nsresult ConvertToPlainText(const nsAString& aSourceBuffer,
+                                     nsAString& aResultBuffer,
+                                     PRUint32 aFlags,
+                                     PRUint32 aWrapCol);
 
   /**
    * Creates a new XML document, which is marked to be loaded as data.
    *
    * @param aNamespaceURI Namespace for the root element to create and insert in
    *                      the document. Only used if aQualifiedName is not
    *                      empty.
    * @param aQualifiedName Qualified name for the root element to create and
--- a/content/base/src/nsContentUtils.cpp
+++ b/content/base/src/nsContentUtils.cpp
@@ -3922,31 +3922,33 @@ nsContentUtils::ParseFragmentHTML(const 
                                        aQuirks,
                                        aPreventScriptExecution);
   return rv;
 }
 
 /* static */
 nsresult
 nsContentUtils::ParseDocumentHTML(const nsAString& aSourceBuffer,
-                                  nsIDocument* aTargetDocument)
+                                  nsIDocument* aTargetDocument,
+                                  bool aScriptingEnabledForNoscriptParsing)
 {
   if (nsContentUtils::sFragmentParsingActive) {
     NS_NOTREACHED("Re-entrant fragment parsing attempted.");
     return NS_ERROR_DOM_INVALID_STATE_ERR;
   }
   mozilla::AutoRestore<bool> guard(nsContentUtils::sFragmentParsingActive);
   nsContentUtils::sFragmentParsingActive = true;
   if (!sHTMLFragmentParser) {
     NS_ADDREF(sHTMLFragmentParser = new nsHtml5StringParser());
     // Now sHTMLFragmentParser owns the object
   }
   nsresult rv =
     sHTMLFragmentParser->ParseDocument(aSourceBuffer,
-                                       aTargetDocument);
+                                       aTargetDocument,
+                                       aScriptingEnabledForNoscriptParsing);
   return rv;
 }
 
 /* static */
 nsresult
 nsContentUtils::ParseFragmentXML(const nsAString& aSourceBuffer,
                                  nsIDocument* aDocument,
                                  nsTArray<nsString>& aTagStack,
@@ -3987,16 +3989,54 @@ nsContentUtils::ParseFragmentXML(const n
 
   rv = sXMLFragmentSink->FinishFragmentParsing(aReturn);
 
   sXMLFragmentParser->Reset();
 
   return rv;
 }
 
+/* static */
+nsresult
+nsContentUtils::ConvertToPlainText(const nsAString& aSourceBuffer,
+                                   nsAString& aResultBuffer,
+                                   PRUint32 aFlags,
+                                   PRUint32 aWrapCol)
+{
+  nsCOMPtr<nsIURI> uri;
+  NS_NewURI(getter_AddRefs(uri), "about:blank");
+  nsCOMPtr<nsIPrincipal> principal =
+    do_CreateInstance("@mozilla.org/nullprincipal;1");
+  nsCOMPtr<nsIDOMDocument> domDocument;
+  nsresult rv = nsContentUtils::CreateDocument(EmptyString(),
+                                               EmptyString(),
+                                               nsnull,
+                                               uri,
+                                               uri,
+                                               principal,
+                                               nsnull,
+                                               DocumentFlavorHTML,
+                                               getter_AddRefs(domDocument));
+  NS_ENSURE_SUCCESS(rv, rv);
+
+  nsCOMPtr<nsIDocument> document = do_QueryInterface(domDocument);
+  rv = nsContentUtils::ParseDocumentHTML(aSourceBuffer, document,
+    !(aFlags & nsIDocumentEncoder::OutputNoScriptContent));
+  NS_ENSURE_SUCCESS(rv, rv);
+
+  nsCOMPtr<nsIDocumentEncoder> encoder = do_CreateInstance(
+    "@mozilla.org/layout/documentEncoder;1?type=text/plain");
+
+  rv = encoder->Init(domDocument, NS_LITERAL_STRING("text/plain"), aFlags);
+  NS_ENSURE_SUCCESS(rv, rv);
+
+  encoder->SetWrapColumn(aWrapCol);
+
+  return encoder->EncodeToString(aResultBuffer);
+}
 
 /* static */
 nsresult
 nsContentUtils::CreateDocument(const nsAString& aNamespaceURI, 
                                const nsAString& aQualifiedName, 
                                nsIDOMDocumentType* aDoctype,
                                nsIURI* aDocumentURI, nsIURI* aBaseURI,
                                nsIPrincipal* aPrincipal,
--- a/content/base/src/nsDOMParser.cpp
+++ b/content/base/src/nsDOMParser.cpp
@@ -97,17 +97,17 @@ nsDOMParser::ParseFromString(const PRUni
   nsresult rv;
 
   if (!nsCRT::strcmp(contentType, "text/html")) {
     nsCOMPtr<nsIDOMDocument> domDocument;
     rv = SetUpDocument(DocumentFlavorHTML, getter_AddRefs(domDocument));
     NS_ENSURE_SUCCESS(rv, rv);
     nsCOMPtr<nsIDocument> document = do_QueryInterface(domDocument);
     nsDependentString sourceBuffer(str);
-    rv = nsContentUtils::ParseDocumentHTML(sourceBuffer, document);
+    rv = nsContentUtils::ParseDocumentHTML(sourceBuffer, document, false);
     NS_ENSURE_SUCCESS(rv, rv);
 
     // Keep the XULXBL state, base URL and principal setting in sync with the
     // XML case
 
     if (nsContentUtils::IsSystemPrincipal(mOriginalPrincipal)) {
       document->ForceEnableXULXBL();
     }
--- a/content/base/test/TestPlainTextSerializer.cpp
+++ b/content/base/test/TestPlainTextSerializer.cpp
@@ -32,47 +32,29 @@
  * and other provisions required by the GPL or the LGPL. If you do not delete
  * the provisions above, a recipient may use your version of this file under
  * the terms of any one of the MPL, the GPL or the LGPL.
  *
  * ***** END LICENSE BLOCK ***** */
 
 #include "TestHarness.h"
 
-#include "nsIParser.h"
-#include "nsIHTMLToTextSink.h"
-#include "nsIParser.h"
-#include "nsIContentSink.h"
-#include "nsIParserService.h"
 #include "nsServiceManagerUtils.h"
 #include "nsStringGlue.h"
-#include "nsParserCIID.h"
 #include "nsIDocumentEncoder.h"
 #include "nsCRT.h"
-
-static NS_DEFINE_CID(kCParserCID, NS_PARSER_CID);
+#include "nsIParserUtils.h"
+#include "nsToolkitCompsCID.h"
 
 void
 ConvertBufToPlainText(nsString &aConBuf, int aFlag)
 {
-  nsCOMPtr<nsIParser> parser = do_CreateInstance(kCParserCID);
-  if (parser) {
-    nsCOMPtr<nsIContentSink> sink;
-    sink = do_CreateInstance(NS_PLAINTEXTSINK_CONTRACTID);
-    if (sink) {
-      nsCOMPtr<nsIHTMLToTextSink> textSink(do_QueryInterface(sink));
-      if (textSink) {
-        nsAutoString convertedText;
-        textSink->Initialize(&convertedText, aFlag, 72);
-        parser->SetContentSink(sink);
-        parser->Parse(aConBuf, 0, NS_LITERAL_CSTRING("text/html"), true);
-        aConBuf = convertedText;
-      }
-    }
-  }
+  nsCOMPtr<nsIParserUtils> utils =
+    do_GetService(NS_PARSERUTILS_CONTRACTID);
+  utils->ConvertToPlainText(aConBuf, aFlag, 72, aConBuf);
 }
 
 // Test for ASCII with format=flowed; delsp=yes
 nsresult
 TestASCIIWithFlowedDelSp()
 {
   nsString test;
   nsString result;
--- a/parser/html/Makefile.in
+++ b/parser/html/Makefile.in
@@ -40,16 +40,19 @@ srcdir    = @srcdir@
 VPATH     = @srcdir@
 
 include $(DEPTH)/config/autoconf.mk
 
 MODULE		= html5
 LIBRARY_NAME	= html5p_s
 LIBXUL_LIBRARY	= 1
 
+XPIDLSRCS = \
+  nsIParserUtils.idl \
+  $(NULL)
 
 EXPORTS		= \
 		jArray.h \
 		nsAHtml5TreeBuilderState.h \
 		nsHtml5ArrayCopy.h \
 		nsHtml5Atoms.h \
 		nsHtml5AtomList.h \
 		nsHtml5AtomTable.h \
--- a/parser/html/nsHtml5StringParser.cpp
+++ b/parser/html/nsHtml5StringParser.cpp
@@ -97,31 +97,32 @@ nsHtml5StringParser::ParseFragment(const
   mExecutor->EnableFragmentMode(aPreventScriptExecution);
 
   Tokenize(aSourceBuffer, doc, true);
   return NS_OK;
 }
 
 nsresult
 nsHtml5StringParser::ParseDocument(const nsAString& aSourceBuffer,
-                                   nsIDocument* aTargetDoc)
+                                   nsIDocument* aTargetDoc,
+                                   bool aScriptingEnabledForNoscriptParsing)
 {
   MOZ_ASSERT(!aTargetDoc->GetFirstChild());
 
   NS_ENSURE_TRUE(aSourceBuffer.Length() <= PR_INT32_MAX,
                  NS_ERROR_OUT_OF_MEMORY);
 
   mTreeBuilder->setFragmentContext(nsnull,
                                    kNameSpaceID_None,
                                    nsnull,
                                    false);
 
   mExecutor->PreventScriptExecution();
 
-  Tokenize(aSourceBuffer, aTargetDoc, false);
+  Tokenize(aSourceBuffer, aTargetDoc, aScriptingEnabledForNoscriptParsing);
   return NS_OK;
 }
 
 void
 nsHtml5StringParser::Tokenize(const nsAString& aSourceBuffer,
                               nsIDocument* aDocument,
                               bool aScriptingEnabledForNoscriptParsing) {
 
--- a/parser/html/nsHtml5StringParser.h
+++ b/parser/html/nsHtml5StringParser.h
@@ -81,17 +81,18 @@ class nsHtml5StringParser : public nsPar
                            bool aPreventScriptExecution);
 
     /**
      * Parse an entire HTML document from a source string.
      * DO NOT CALL from outside nsContentUtils.cpp.
      *
      */
     nsresult ParseDocument(const nsAString& aSourceBuffer,
-                           nsIDocument* aTargetDoc);
+                           nsIDocument* aTargetDoc,
+                           bool aScriptingEnabledForNoscriptParsing);
 
   private:
 
     void Tokenize(const nsAString& aSourceBuffer,
                   nsIDocument* aDocument,
                   bool aScriptingEnabledForNoscriptParsing);
 
     /**
new file mode 100644
--- /dev/null
+++ b/parser/html/nsIParserUtils.idl
@@ -0,0 +1,25 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsISupports.idl"
+
+/**
+ * Non-Web HTML parser functionality to Firefox extensions and XULRunner apps. 
+ * Don't use this from within Gecko--use nsContentUtils directly instead.
+ */
+[scriptable, uuid(290f49bb-0619-4bda-8006-ab31bec7231a)]
+interface nsIParserUtils : nsISupports
+{
+  /**
+   * Convert HTML to plain text.
+   *
+   * @param src the HTML source to parse (C++ callers are allowed but not
+   *            required to use the same string for the return value.)
+   * @param flags conversion option flags defined in nsIDocumentEncoder
+   * @param wrapCol number of characters per line; 0 for no auto-wrapping
+   */
+  AString convertToPlainText(in AString src,
+                             in unsigned long flags,
+                             in unsigned long wrapCol);
+};
--- a/toolkit/components/build/nsToolkitCompsCID.h
+++ b/toolkit/components/build/nsToolkitCompsCID.h
@@ -91,16 +91,19 @@
     "@mozilla.org/url-classifier/streamupdater;1"
 
 #define NS_URLCLASSIFIERUTILS_CONTRACTID \
     "@mozilla.org/url-classifier/utils;1"
 
 #define NS_URLCLASSIFIERHASHCOMPLETER_CONTRACTID \
     "@mozilla.org/url-classifier/hashcompleter;1"
 
+#define NS_PARSERUTILS_CONTRACTID \
+    "@mozilla.org/parserutils;1"
+
 #define NS_SCRIPTABLEUNESCAPEHTML_CONTRACTID "@mozilla.org/feed-unescapehtml;1"
 
 #define NS_NAVHISTORYSERVICE_CONTRACTID \
   "@mozilla.org/browser/nav-history-service;1"
 
 #define NS_ANNOTATIONSERVICE_CONTRACTID \
   "@mozilla.org/browser/annotation-service;1"
 
@@ -174,16 +177,19 @@
 // {c2be6dc0-ef1e-4abd-86a2-4f864ddc57f6}
 #define NS_URLCLASSIFIERSTREAMUPDATER_CID \
 { 0xc2be6dc0, 0xef1e, 0x4abd, { 0x86, 0xa2, 0x4f, 0x86, 0x4d, 0xdc, 0x57, 0xf6} }
 
 // {b7b2ccec-7912-4ea6-a548-b038447004bd}
 #define NS_URLCLASSIFIERUTILS_CID \
 { 0xb7b2ccec, 0x7912, 0x4ea6, { 0xa5, 0x48, 0xb0, 0x38, 0x44, 0x70, 0x04, 0xbd} }
 
+#define NS_PARSERUTILS_CID  \
+{ 0xaf7b24cb, 0x893f, 0x41bb, { 0x96, 0x1f, 0x5a, 0x69, 0x38, 0x8e, 0x27, 0xc3 } }
+
 // {10f2f5f0-f103-4901-980f-ba11bd70d60d}
 #define NS_SCRIPTABLEUNESCAPEHTML_CID  \
 { 0x10f2f5f0, 0xf103, 0x4901, { 0x98, 0x0f, 0xba, 0x11, 0xbd, 0x70, 0xd6, 0x0d} }
 
 #define NS_NAVHISTORYSERVICE_CID \
 { 0x88cecbb7, 0x6c63, 0x4b3b, { 0x8c, 0xd4, 0x84, 0xf3, 0xb8, 0x22, 0x8c, 0x69 } }
 
 #define NS_NAVHISTORYRESULTTREEVIEWER_CID \
--- a/toolkit/components/build/nsToolkitCompsModule.cpp
+++ b/toolkit/components/build/nsToolkitCompsModule.cpp
@@ -135,16 +135,17 @@ NS_DEFINE_NAMED_CID(NS_FIND_SERVICE_CID)
 NS_DEFINE_NAMED_CID(NS_TYPEAHEADFIND_CID);
 #ifdef MOZ_URL_CLASSIFIER
 NS_DEFINE_NAMED_CID(NS_URLCLASSIFIERPREFIXSET_CID);
 NS_DEFINE_NAMED_CID(NS_URLCLASSIFIERDBSERVICE_CID);
 NS_DEFINE_NAMED_CID(NS_URLCLASSIFIERSTREAMUPDATER_CID);
 NS_DEFINE_NAMED_CID(NS_URLCLASSIFIERUTILS_CID);
 #endif
 #ifdef MOZ_FEEDS
+NS_DEFINE_NAMED_CID(NS_PARSERUTILS_CID);
 NS_DEFINE_NAMED_CID(NS_SCRIPTABLEUNESCAPEHTML_CID);
 #endif
 NS_DEFINE_NAMED_CID(NS_BROWSERSTATUSFILTER_CID);
 NS_DEFINE_NAMED_CID(NS_CHARSETMENU_CID);
 
 static const mozilla::Module::CIDEntry kToolkitCIDs[] = {
   { &kNS_TOOLKIT_APPSTARTUP_CID, false, NULL, nsAppStartupConstructor },
   { &kNS_USERINFO_CID, false, NULL, nsUserInfoConstructor },
@@ -160,16 +161,17 @@ static const mozilla::Module::CIDEntry k
   { &kNS_TYPEAHEADFIND_CID, false, NULL, nsTypeAheadFindConstructor },
 #ifdef MOZ_URL_CLASSIFIER
   { &kNS_URLCLASSIFIERPREFIXSET_CID, false, NULL, nsUrlClassifierPrefixSetConstructor },
   { &kNS_URLCLASSIFIERDBSERVICE_CID, false, NULL, nsUrlClassifierDBServiceConstructor },
   { &kNS_URLCLASSIFIERSTREAMUPDATER_CID, false, NULL, nsUrlClassifierStreamUpdaterConstructor },
   { &kNS_URLCLASSIFIERUTILS_CID, false, NULL, nsUrlClassifierUtilsConstructor },
 #endif
 #ifdef MOZ_FEEDS
+  { &kNS_PARSERUTILS_CID, false, NULL, nsScriptableUnescapeHTMLConstructor },
   { &kNS_SCRIPTABLEUNESCAPEHTML_CID, false, NULL, nsScriptableUnescapeHTMLConstructor },
 #endif
   { &kNS_BROWSERSTATUSFILTER_CID, false, NULL, nsBrowserStatusFilterConstructor },
   { &kNS_CHARSETMENU_CID, false, NULL, NS_NewCharsetMenu },
   { NULL }
 };
 
 static const mozilla::Module::ContractIDEntry kToolkitContracts[] = {
@@ -188,16 +190,17 @@ static const mozilla::Module::ContractID
 #ifdef MOZ_URL_CLASSIFIER
   { NS_URLCLASSIFIERPREFIXSET_CONTRACTID, &kNS_URLCLASSIFIERPREFIXSET_CID },
   { NS_URLCLASSIFIERDBSERVICE_CONTRACTID, &kNS_URLCLASSIFIERDBSERVICE_CID },
   { NS_URICLASSIFIERSERVICE_CONTRACTID, &kNS_URLCLASSIFIERDBSERVICE_CID },
   { NS_URLCLASSIFIERSTREAMUPDATER_CONTRACTID, &kNS_URLCLASSIFIERSTREAMUPDATER_CID },
   { NS_URLCLASSIFIERUTILS_CONTRACTID, &kNS_URLCLASSIFIERUTILS_CID },
 #endif
 #ifdef MOZ_FEEDS
+  { NS_PARSERUTILS_CONTRACTID, &kNS_PARSERUTILS_CID },
   { NS_SCRIPTABLEUNESCAPEHTML_CONTRACTID, &kNS_SCRIPTABLEUNESCAPEHTML_CID },
 #endif
   { NS_BROWSERSTATUSFILTER_CONTRACTID, &kNS_BROWSERSTATUSFILTER_CID },
   { NS_RDF_DATASOURCE_CONTRACTID_PREFIX NS_CHARSETMENU_PID, &kNS_CHARSETMENU_CID },
   { NULL }
 };
 
 static const mozilla::Module kToolkitModule = {
--- a/toolkit/components/feeds/nsIScriptableUnescapeHTML.idl
+++ b/toolkit/components/feeds/nsIScriptableUnescapeHTML.idl
@@ -36,28 +36,39 @@
 
 #include "nsISupports.idl"
 
 interface nsIDOMElement;
 interface nsIDOMDocumentFragment;
 interface nsIURI;
 
 /**
- * A utility class that unescapes HTML strings.
+ * A utility class for HTML parsing in the feed processor.
  */
 [scriptable, uuid(3ab244a9-f09d-44da-9e3f-ee4d67367f2d)]
 interface nsIScriptableUnescapeHTML : nsISupports 
 {
   /** 
-   * Converts all entities to Unicode.
+   * Converts HTML to plain text. This is equivalent to calling
+   * nsIParserUtils::convertToPlainText(src, 
+   *   nsIDocumentEncoder::OutputSelectionOnly |
+   *   nsIDocumentEncoder::OutputAbsoluteLinks, 0).
    *
-   * @param src The HTML string to escape.
+   * You should most likely call nsIParserUtils::convertToPlainText()
+   * instead of calling this method.
+   *
+   * @param src The HTML string to convert to plain text.
    */ 
   AString unescape(in AString src);
         
   /**
-   * Appends the text to the element.
+   * Parses markup into a sanitized document fragment.
+   *
+   * @param fragment the input markup
+   * @param isXML true if |fragment| is XML and false if HTML
+   * @param baseURI the base URL for this fragment
+   * @param element the context node for the fragment parsing algorithm
    */
   nsIDOMDocumentFragment parseFragment(in AString fragment,
                                        in boolean isXML,
                                        in nsIURI baseURI,
                                        in nsIDOMElement element);
 };
--- a/toolkit/components/feeds/nsScriptableUnescapeHTML.cpp
+++ b/toolkit/components/feeds/nsScriptableUnescapeHTML.cpp
@@ -44,17 +44,16 @@
 #include "nsEscape.h"
 #include "nsIParser.h"
 #include "nsIDTD.h"
 #include "nsNetCID.h"
 #include "nsNetUtil.h"
 #include "nsParserCIID.h"
 #include "nsContentUtils.h"
 #include "nsIContentSink.h"
-#include "nsIHTMLToTextSink.h"
 #include "nsIDocumentEncoder.h"
 #include "nsIDOMDocumentFragment.h"
 #include "nsIFragmentContentSink.h"
 #include "nsIDOMDocument.h"
 #include "nsIDOMNodeList.h"
 #include "nsIDOMNode.h"
 #include "nsIDOMElement.h"
 #include "nsIDocument.h"
@@ -65,52 +64,45 @@
 #include "nsIScriptableUnescapeHTML.h"
 #include "nsScriptableUnescapeHTML.h"
 #include "nsAutoPtr.h"
 #include "nsTreeSanitizer.h"
 #include "nsHtml5Module.h"
 
 #define XHTML_DIV_TAG "div xmlns=\"http://www.w3.org/1999/xhtml\""
 
-NS_IMPL_ISUPPORTS1(nsScriptableUnescapeHTML, nsIScriptableUnescapeHTML)
+NS_IMPL_ISUPPORTS2(nsScriptableUnescapeHTML,
+                   nsIScriptableUnescapeHTML,
+                   nsIParserUtils)
 
 static NS_DEFINE_CID(kCParserCID, NS_PARSER_CID);
 
-// From /widget/HTMLConverter
-//
-// Takes HTML and converts it to plain text but in unicode.
-//
+
+
 NS_IMETHODIMP
-nsScriptableUnescapeHTML::Unescape(const nsAString & aFromStr, 
+nsScriptableUnescapeHTML::ConvertToPlainText(const nsAString & aFromStr,
+                                             PRUint32 aFlags,
+                                             PRUint32 aWrapCol,
+                                             nsAString & aToStr)
+{
+  return nsContentUtils::ConvertToPlainText(aFromStr,
+    aToStr,
+    aFlags,
+    aWrapCol);
+}
+
+NS_IMETHODIMP
+nsScriptableUnescapeHTML::Unescape(const nsAString & aFromStr,
                                    nsAString & aToStr)
 {
-  // create the parser to do the conversion.
-  aToStr.SetLength(0);
-  nsresult rv;
-  nsCOMPtr<nsIParser> parser = do_CreateInstance(kCParserCID, &rv);
-  if (NS_FAILED(rv)) return rv;
-
-  // convert it!
-  nsCOMPtr<nsIContentSink> sink;
-
-  sink = do_CreateInstance(NS_PLAINTEXTSINK_CONTRACTID);
-  NS_ENSURE_TRUE(sink, NS_ERROR_FAILURE);
-
-  nsCOMPtr<nsIHTMLToTextSink> textSink(do_QueryInterface(sink));
-  NS_ENSURE_TRUE(textSink, NS_ERROR_FAILURE);
-
-  textSink->Initialize(&aToStr, nsIDocumentEncoder::OutputSelectionOnly
-                       | nsIDocumentEncoder::OutputAbsoluteLinks, 0);
-
-  parser->SetContentSink(sink);
-
-  parser->Parse(aFromStr, 0, NS_LITERAL_CSTRING("text/html"),
-                true, eDTDMode_fragment);
-
-  return NS_OK;
+  return nsContentUtils::ConvertToPlainText(aFromStr,
+    aToStr,
+    nsIDocumentEncoder::OutputSelectionOnly |
+    nsIDocumentEncoder::OutputAbsoluteLinks,
+    0);
 }
 
 // The feed version of nsContentUtils::CreateContextualFragment It
 // creates a fragment, but doesn't go to all the effort to preserve
 // context like innerHTML does, because feed DOMs shouldn't have that.
 NS_IMETHODIMP
 nsScriptableUnescapeHTML::ParseFragment(const nsAString &aFragment,
                                         bool aIsXML,
--- a/toolkit/components/feeds/nsScriptableUnescapeHTML.h
+++ b/toolkit/components/feeds/nsScriptableUnescapeHTML.h
@@ -33,17 +33,20 @@
  * the terms of any one of the MPL, the GPL or the LGPL.
  *
  * ***** END LICENSE BLOCK ***** */
 
 #ifndef nsScriptableHTMLUnescape_h__
 #define nsScriptableHTMLUnescape_h__
 
 #include "nsIScriptableUnescapeHTML.h"
+#include "nsIParserUtils.h"
 
-class nsScriptableUnescapeHTML : public nsIScriptableUnescapeHTML
+class nsScriptableUnescapeHTML : public nsIScriptableUnescapeHTML,
+                                 public nsIParserUtils
 {
 public:
   NS_DECL_ISUPPORTS
   NS_DECL_NSISCRIPTABLEUNESCAPEHTML
+  NS_DECL_NSIPARSERUTILS
 };
 
 #endif // nsScriptableHTMLUnescape_h__
--- a/widget/xpwidgets/nsHTMLFormatConverter.cpp
+++ b/widget/xpwidgets/nsHTMLFormatConverter.cpp
@@ -44,25 +44,19 @@
 #include "nsIComponentManager.h"
 #include "nsCOMPtr.h"
 #include "nsXPCOM.h"
 #include "nsISupportsPrimitives.h"
 
 #include "nsITransferable.h" // for mime defs, this is BAD
 
 // HTML convertor stuff
-#include "nsIParser.h"
-#include "nsIDTD.h"
-#include "nsParserCIID.h"
-#include "nsIContentSink.h"
 #include "nsPrimitiveHelpers.h"
 #include "nsIDocumentEncoder.h"
-#include "nsIHTMLToTextSink.h"
-
-static NS_DEFINE_CID(kCParserCID, NS_PARSER_CID);
+#include "nsContentUtils.h"
 
 nsHTMLFormatConverter::nsHTMLFormatConverter()
 {
 }
 
 nsHTMLFormatConverter::~nsHTMLFormatConverter()
 {
 }
@@ -267,46 +261,23 @@ nsHTMLFormatConverter::Convert(const cha
 //
 // ConvertFromHTMLToUnicode
 //
 // Takes HTML and converts it to plain text but in unicode.
 //
 NS_IMETHODIMP
 nsHTMLFormatConverter::ConvertFromHTMLToUnicode(const nsAutoString & aFromStr, nsAutoString & aToStr)
 {
-  // create the parser to do the conversion.
-  aToStr.SetLength(0);
-  nsresult rv;
-  nsCOMPtr<nsIParser> parser = do_CreateInstance(kCParserCID, &rv);
-  if ( !parser )
-    return rv;
-
-  // convert it!
-  nsCOMPtr<nsIContentSink> sink;
-
-  sink = do_CreateInstance(NS_PLAINTEXTSINK_CONTRACTID);
-  NS_ENSURE_TRUE(sink, NS_ERROR_FAILURE);
-
-  nsCOMPtr<nsIHTMLToTextSink> textSink(do_QueryInterface(sink));
-  NS_ENSURE_TRUE(textSink, NS_ERROR_FAILURE);
-
-  // We set OutputNoScriptContent and OutputNoFramesContent unconditionally
-  // here because |aFromStr| is already filtered based on user preferences.
-  PRUint32 flags =
+  return nsContentUtils::ConvertToPlainText(aFromStr,
+    aToStr,
     nsIDocumentEncoder::OutputSelectionOnly |
     nsIDocumentEncoder::OutputAbsoluteLinks |
     nsIDocumentEncoder::OutputNoScriptContent |
-    nsIDocumentEncoder::OutputNoFramesContent;
-  textSink->Initialize(&aToStr, flags, 0);
-
-  parser->SetContentSink(sink);
-
-  parser->Parse(aFromStr, 0, NS_LITERAL_CSTRING("text/html"), true, eDTDMode_fragment);
-  
-  return NS_OK;
+    nsIDocumentEncoder::OutputNoFramesContent,
+    0);
 } // ConvertFromHTMLToUnicode
 
 
 NS_IMETHODIMP
 nsHTMLFormatConverter::ConvertFromHTMLToAOLMail(const nsAutoString & aFromStr,
                                                 nsAutoString & aToStr)
 {
   aToStr.AssignLiteral("<HTML>");