Bug 650787 - Use nsIParserUtils for HTML to plain text conversion. r=dbienvenu.
authorHenri Sivonen <hsivonen@iki.fi>
Thu, 08 Mar 2012 16:22:49 +0200
changeset 11030 766214fa44ababc45d6cf6c17623fd65ceb2d4cf
parent 11029 b7630ce0ad582e2cf3f5437fff581e706b2b0256
child 11031 2a69aecfd64a14c510007e069caa2bcd1e8d901f
push id463
push userbugzilla@standard8.plus.com
push dateTue, 24 Apr 2012 17:34:51 +0000
treeherdercomm-beta@e53588e8f7b0 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersdbienvenu
bugs650787
Bug 650787 - Use nsIParserUtils for HTML to plain text conversion. r=dbienvenu.
mailnews/base/util/nsMsgDBFolder.cpp
mailnews/base/util/nsMsgUtils.cpp
mailnews/extensions/bayesian-spam-filter/src/nsBayesianFilter.cpp
mailnews/mime/src/mimemoz2.cpp
--- a/mailnews/base/util/nsMsgDBFolder.cpp
+++ b/mailnews/base/util/nsMsgDBFolder.cpp
@@ -68,22 +68,18 @@
 #include "nsIAbDirectory.h"
 #include "nsISpamSettings.h"
 #include "nsIMsgFilterPlugin.h"
 #include "nsIMsgMailSession.h"
 #include "nsIRDFService.h"
 #include "nsTextFormatter.h"
 #include "nsMsgDBCID.h"
 #include "nsReadLine.h"
-#include "nsParserCIID.h"
-#include "nsIParser.h"
-#include "nsIHTMLContentSink.h"
-#include "nsIContentSerializer.h"
 #include "nsLayoutCID.h"
-#include "nsIHTMLToTextSink.h"
+#include "nsIParserUtils.h"
 #include "nsIDocumentEncoder.h"
 #include "nsMsgI18N.h"
 #include "nsIMIMEHeaderParam.h"
 #include "plbase64.h"
 #include "nsArrayEnumerator.h"
 #include <time.h>
 #include "nsIMsgFolderNotificationService.h"
 #include "nsIMutableArray.h"
@@ -113,17 +109,16 @@ static PRTime gtimeOfLastPurgeCheck;    
 #define PREF_MAIL_PURGE_MIGRATED "mail.purge_threshold_migrated"
 #define PREF_MAIL_PURGE_ASK "mail.purge.ask"
 #define PREF_MAIL_WARN_FILTER_CHANGED "mail.warn_filter_changed"
 
 const char *kUseServerRetentionProp = "useServerRetention";
 
 static NS_DEFINE_CID(kRDFServiceCID, NS_RDFSERVICE_CID);
 static NS_DEFINE_CID(kCMailDB, NS_MAILDB_CID);
-static NS_DEFINE_CID(kParserCID, NS_PARSER_CID);
 
 nsICollation * nsMsgDBFolder::gCollationKeyGenerator = nsnull;
 
 PRUnichar *nsMsgDBFolder::kLocalizedInboxName;
 PRUnichar *nsMsgDBFolder::kLocalizedTrashName;
 PRUnichar *nsMsgDBFolder::kLocalizedSentName;
 PRUnichar *nsMsgDBFolder::kLocalizedDraftsName;
 PRUnichar *nsMsgDBFolder::kLocalizedTemplatesName;
@@ -5705,39 +5700,23 @@ void nsMsgDBFolder::compressQuotesInMsgS
       break;
     }
   }
 }
 
 NS_IMETHODIMP nsMsgDBFolder::ConvertMsgSnippetToPlainText(
     const nsAString& aMessageText, nsAString& aOutText)
 {
-  nsString bodyText;
-  nsresult rv = NS_OK;
-
-  // Create a parser
-  nsCOMPtr<nsIParser> parser = do_CreateInstance(kParserCID, &rv);
-  NS_ENSURE_SUCCESS(rv, rv);
-
-  // Create the appropriate output sink
-  nsCOMPtr<nsIContentSink> sink = do_CreateInstance(NS_PLAINTEXTSINK_CONTRACTID,&rv);
-  NS_ENSURE_SUCCESS(rv, rv);
-
-  nsCOMPtr<nsIHTMLToTextSink> textSink(do_QueryInterface(sink));
-  NS_ENSURE_TRUE(textSink, NS_ERROR_FAILURE);
   PRUint32 flags = nsIDocumentEncoder::OutputLFLineBreak
                    | nsIDocumentEncoder::OutputNoScriptContent
                    | nsIDocumentEncoder::OutputNoFramesContent
                    | nsIDocumentEncoder::OutputBodyOnly;
-
-  textSink->Initialize(&bodyText, flags, 80);
-  parser->SetContentSink(sink);
-  rv = parser->Parse(aMessageText, 0, NS_LITERAL_CSTRING("text/html"), true);
-  aOutText.Assign(bodyText);
-  return rv;
+  nsCOMPtr<nsIParserUtils> utils =
+    do_GetService(NS_PARSERUTILS_CONTRACTID);
+  return utils->ConvertToPlainText(aMessageText, flags, 80, aOutText);
 }
 
 nsresult nsMsgDBFolder::GetMsgPreviewTextFromStream(nsIMsgDBHdr *msgHdr, nsIInputStream *stream)
 {
   nsCString msgBody;
   nsCAutoString charset;
   msgHdr->GetCharset(getter_Copies(charset));
   nsCAutoString contentType;
--- a/mailnews/base/util/nsMsgUtils.cpp
+++ b/mailnews/base/util/nsMsgUtils.cpp
@@ -93,25 +93,21 @@
 #include "nsTextFormatter.h"
 #include "nsIAtomService.h"
 #include "nsIStreamListener.h"
 #include "nsReadLine.h"
 #include "nsICharsetDetectionObserver.h"
 #include "nsICharsetDetector.h"
 #include "nsILineInputStream.h"
 #include "nsIPlatformCharset.h"
-#include "nsIParser.h"
-#include "nsParserCIID.h"
-#include "nsIHTMLToTextSink.h"
-#include "nsIContentSink.h"
+#include "nsIParserUtils.h"
 #include "nsICharsetConverterManager.h"
 #include "nsIDocumentEncoder.h"
 #include "mozilla/Services.h"
 
-static NS_DEFINE_CID(kCParserCID, NS_PARSER_CID);
 static NS_DEFINE_CID(kImapUrlCID, NS_IMAPURL_CID);
 static NS_DEFINE_CID(kCMailboxUrl, NS_MAILBOXURL_CID);
 static NS_DEFINE_CID(kCNntpUrlCID, NS_NNTPURL_CID);
 
 #define ILLEGAL_FOLDER_CHARS ";#"
 #define ILLEGAL_FOLDER_CHARS_AS_FIRST_LETTER "."
 #define ILLEGAL_FOLDER_CHARS_AS_LAST_LETTER  ".~ "
 
@@ -2355,21 +2351,16 @@ MsgDetectCharsetFromFile(nsILocalFile *a
  * unknown or deemed of no importance NULL could be passed.
  */
 NS_MSG_BASE nsresult
 ConvertBufToPlainText(nsString &aConBuf, bool formatFlowed /* = false */, bool formatOutput)
 {
   if (aConBuf.IsEmpty())
     return NS_OK;
 
-  nsresult rv;
-  nsCOMPtr<nsIParser> parser = do_CreateInstance(kCParserCID, &rv);
-  if (NS_FAILED(rv) || !parser)
-    return rv;
-
   PRInt32 wrapWidth = 72;
   nsCOMPtr<nsIPrefBranch> pPrefBranch(do_GetService(NS_PREFSERVICE_CONTRACTID));
 
   if (pPrefBranch)
   {
     pPrefBranch->GetIntPref("mailnews.wraplength", &wrapWidth);
     // Let sanity reign!
     if (wrapWidth == 0 || wrapWidth > 990)
@@ -2379,27 +2370,15 @@ ConvertBufToPlainText(nsString &aConBuf,
   }
 
   PRUint32 converterFlags = 0;
   if (formatOutput)
     converterFlags = nsIDocumentEncoder::OutputFormatted;
   if (formatFlowed)
     converterFlags |= nsIDocumentEncoder::OutputFormatFlowed;
 
-  nsCOMPtr<nsIContentSink> sink = do_CreateInstance(NS_PLAINTEXTSINK_CONTRACTID);
-  NS_ENSURE_TRUE(sink, NS_ERROR_FAILURE);
-
-  nsCOMPtr<nsIHTMLToTextSink> textSink(do_QueryInterface(sink));
-  NS_ENSURE_TRUE(textSink, NS_ERROR_FAILURE);
-
-  nsAutoString convertedText;
-  textSink->Initialize(&convertedText, converterFlags, wrapWidth);
-
-  parser->SetContentSink(sink);
-
-  parser->Parse(aConBuf, 0, NS_LITERAL_CSTRING("text/html"), true);
-
-  // Now if we get here, we need to get from ASCII text to
-  // UTF-8 format or there is a problem downstream...
-  aConBuf = convertedText;
-
-  return NS_OK;
+  nsCOMPtr<nsIParserUtils> utils =
+    do_GetService(NS_PARSERUTILS_CONTRACTID);
+  return utils->ConvertToPlainText(aConBuf,
+                                   converterFlags,
+                                   wrapWidth,
+                                   aConBuf);
 }
--- a/mailnews/extensions/bayesian-spam-filter/src/nsBayesianFilter.cpp
+++ b/mailnews/extensions/bayesian-spam-filter/src/nsBayesianFilter.cpp
@@ -77,17 +77,17 @@
 #include "nsIMsgHdr.h"
 
 // needed to strip html out of the body
 #include "nsParserCIID.h"
 #include "nsIParser.h"
 #include "nsIHTMLContentSink.h"
 #include "nsIContentSerializer.h"
 #include "nsLayoutCID.h"
-#include "nsIHTMLToTextSink.h"
+#include "nsIParserUtils.h"
 #include "nsIDocumentEncoder.h"
 
 #include "nsIncompleteGamma.h"
 #include <math.h>
 #include <prmem.h>
 #include "nsIMsgTraitService.h"
 #include "mozilla/Services.h"
 
@@ -803,37 +803,23 @@ void Tokenizer::tokenize_japanese_word(c
 
     cc = getCharClass(*p2);
     p1 = p2;
   }
 }
 
 nsresult Tokenizer::stripHTML(const nsAString& inString, nsAString& outString)
 {
-  nsresult rv = NS_OK;
-  // Create a parser
-  nsCOMPtr<nsIParser> parser = do_CreateInstance(kParserCID, &rv);
-  NS_ENSURE_SUCCESS(rv, rv);
-
-  // Create the appropriate output sink
-  nsCOMPtr<nsIContentSink> sink = do_CreateInstance(NS_PLAINTEXTSINK_CONTRACTID,&rv);
-  NS_ENSURE_SUCCESS(rv, rv);
-
-  nsCOMPtr<nsIHTMLToTextSink> textSink(do_QueryInterface(sink));
-  NS_ENSURE_TRUE(textSink, NS_ERROR_FAILURE);
   PRUint32 flags = nsIDocumentEncoder::OutputLFLineBreak
                  | nsIDocumentEncoder::OutputNoScriptContent
                  | nsIDocumentEncoder::OutputNoFramesContent
                  | nsIDocumentEncoder::OutputBodyOnly;
-
-  textSink->Initialize(&outString, flags, 80);
-
-  parser->SetContentSink(sink);
-
-  return parser->Parse(inString, 0, NS_LITERAL_CSTRING("text/html"), true);
+  nsCOMPtr<nsIParserUtils> utils =
+    do_GetService(NS_PARSERUTILS_CONTRACTID);
+  return utils->ConvertToPlainText(inString, flags, 80, outString);
 }
 
 void Tokenizer::tokenize(const char* aText)
 {
   PR_LOG(BayesianFilterLogModule, PR_LOG_DEBUG, ("tokenize: %s", aText));
 
   // strip out HTML tags before we begin processing
   // uggh but first we have to blow up our string into UCS2
--- a/mailnews/mime/src/mimemoz2.cpp
+++ b/mailnews/mime/src/mimemoz2.cpp
@@ -86,17 +86,17 @@
 // <for functions="HTML2Plaintext,HTMLSantinize">
 #include "nsXPCOM.h"
 #include "nsParserCIID.h"
 #include "nsIParser.h"
 #include "nsIHTMLContentSink.h"
 #include "nsIContentSerializer.h"
 #include "nsLayoutCID.h"
 #include "nsIComponentManager.h"
-#include "nsIHTMLToTextSink.h"
+#include "nsIParserUtils.h"
 #include "mozISanitizingSerializer.h"
 // </for>
 #include "mozilla/Services.h"
 
 // <for functions="HTML2Plaintext,HTMLSantinize">
 static NS_DEFINE_CID(kParserCID, NS_PARSER_CID);
 // </for>
 
@@ -2216,70 +2216,23 @@ nsresult GetMailNewsFont(MimeObject *obj
   return NS_OK;
 }
 
 /* This function syncronously converts an HTML document (as string)
    to plaintext (as string) using the Gecko converter.
 
    flags: see nsIDocumentEncoder.h
 */
-// TODO: |printf|s?
-/* <copy from="mozilla/parser/htmlparser/test/outsinks/Convert.cpp"
-         author="akk"
-         adapted-by="Ben Bucksch"
-         comment=" 'This code would not have been possible without akk.' ;-P.
-                   No, really. "
-   > */
 nsresult
 HTML2Plaintext(const nsString& inString, nsString& outString,
                PRUint32 flags, PRUint32 wrapCol)
 {
-  nsresult rv = NS_OK;
-
-#if DEBUG_BenB
-  printf("Converting HTML to plaintext\n");
-  char* charstar = ToNewUTF8String(inString);
-  printf("HTML source is:\n--------------------\n%s--------------------\n",
-         charstar);
-  delete[] charstar;
-#endif
-
-  // Create a parser
-  nsCOMPtr<nsIParser> parser = do_CreateInstance(kParserCID);
-  NS_ENSURE_TRUE(parser, NS_ERROR_FAILURE);
-
-  // Create the appropriate output sink
-  nsCOMPtr<nsIContentSink> sink =
-                               do_CreateInstance(NS_PLAINTEXTSINK_CONTRACTID);
-  NS_ENSURE_TRUE(sink, NS_ERROR_FAILURE);
-
-  nsCOMPtr<nsIHTMLToTextSink> textSink(do_QueryInterface(sink));
-  NS_ENSURE_TRUE(textSink, NS_ERROR_FAILURE);
-
-  textSink->Initialize(&outString, flags, wrapCol);
-
-  parser->SetContentSink(sink);
-
-  rv = parser->Parse(inString, 0, NS_LITERAL_CSTRING("text/html"), true);
-
-  // Aah! How can NS_ERROR and NS_ABORT_IF_FALSE be no-ops in release builds???
-  if (NS_FAILED(rv))
-  {
-    NS_ERROR("Parse() failed!");
-    return rv;
-  }
-
-#if DEBUG_BenB
-  charstar = ToNewUTF8String(outString);
-  printf("Plaintext is:\n--------------------\n%s--------------------\n",
-         charstar);
-  delete[] charstar;
-#endif
-
-  return rv;
+  nsCOMPtr<nsIParserUtils> utils =
+    do_GetService(NS_PARSERUTILS_CONTRACTID);
+  return utils->ConvertToPlainText(inString, flags, wrapCol, outString);
 }
 // </copy>
 
 
 
 /* This function syncronously sanitizes an HTML document (string->string)
    using the Gecko ContentSink mozISanitizingHTMLSerializer.