Bug 1230815 - remove unused SetStripHtml() and accumulate HTML part to correct tag stripping. r=aceman
authorJorg K <jorgk@jorgk.com>
Tue, 23 Oct 2018 09:59:56 +0200
changeset 33539 8072f03fc22d28f8ecefd4dd0dc1b640c78419e0
parent 33538 95f2852bdd11a73e492c3b9d088730ff7865167b
child 33540 617e07ac0498edf0a03aad617447f7f89abef2a3
push id388
push userclokep@gmail.com
push dateMon, 28 Jan 2019 20:54:56 +0000
reviewersaceman
bugs1230815
Bug 1230815 - remove unused SetStripHtml() and accumulate HTML part to correct tag stripping. r=aceman
mailnews/base/search/public/nsMsgBodyHandler.h
mailnews/base/search/src/nsMsgBodyHandler.cpp
--- a/mailnews/base/search/public/nsMsgBodyHandler.h
+++ b/mailnews/base/search/public/nsMsgBodyHandler.h
@@ -33,17 +33,16 @@ public:
 
   virtual ~nsMsgBodyHandler();
 
   // Returns next message line in buf and the applicable charset, if found.
   // The return value is the length of 'buf' or -1 for EOF.
   int32_t GetNextLine(nsCString &buf, nsCString &charset);
 
   // Transformations
-  void SetStripHtml (bool strip) { m_stripHtml = strip; }
   void SetStripHeaders (bool strip) { m_stripHeaders = strip; }
 
 protected:
   void Initialize();  // common initialization code
 
   // filter related methods. For filtering we always use the headers
   // list instead of the database...
   bool m_Filtering;
@@ -85,17 +84,16 @@ protected:
 
 
   nsCOMPtr<nsIMsgDBHdr> m_msgHdr;
   nsCOMPtr<nsIMsgDatabase> m_db;
 
   // Transformations
   // With the exception of m_isMultipart, these all apply to the various parts
   bool m_stripHeaders;    // true if we're supposed to strip of message headers
-  bool m_stripHtml;       // true if we're supposed to strip off HTML tags
   bool m_pastMsgHeaders;  // true if we've already skipped over the message headers
   bool m_pastPartHeaders; // true if we've already skipped over the part headers
   bool m_partIsHtml;      // true if the Content-type header claims text/html
   bool m_base64part;      // true if the current part is in base64
   bool m_isMultipart;     // true if the message is a multipart/* message
   bool m_partIsText;      // true if the current part is text/*
   bool m_inMessageAttachment; // true if current part is message/*
 
--- a/mailnews/base/search/src/nsMsgBodyHandler.cpp
+++ b/mailnews/base/search/src/nsMsgBodyHandler.cpp
@@ -69,17 +69,16 @@ nsMsgBodyHandler::nsMsgBodyHandler(nsIMs
     OpenLocalFolder();  // if nothing else applies, then we must be a POP folder file
 }
 
 void nsMsgBodyHandler::Initialize()
 // common initialization code regardless of what body type we are handling...
 {
   // Default transformations for local message search and MAPI access
   m_stripHeaders = true;
-  m_stripHtml = true;
   m_partIsHtml = false;
   m_base64part = false;
   m_isMultipart = false;
   m_partIsText = true; // Default is text/plain, maybe proven otherwise later.
   m_pastMsgHeaders = false;
   m_pastPartHeaders = false;
   m_inMessageAttachment = false;
   m_headerBytesRead = 0;
@@ -125,16 +124,22 @@ int32_t nsMsgBodyHandler::GetNextLine (n
   if (!m_isMultipart && m_base64part)
   {
     Base64Decode(buf);
     m_base64part = false;
     // And reapply our transformations...
     outLength = ApplyTransformations(buf, buf.Length(), eatThisLine, buf);
   }
 
+  // Process aggregated HTML.
+  if (!m_isMultipart && m_partIsHtml) {
+    StripHtml(buf);
+    outLength = buf.Length();
+  }
+
   charset = m_partCharset;
   return outLength;
 }
 
 void nsMsgBodyHandler::OpenLocalFolder()
 {
   nsCOMPtr <nsIInputStream> inputStream;
   nsresult rv = m_scope->GetInputStream(m_msgHdr, getter_AddRefs(inputStream));
@@ -288,22 +293,27 @@ int32_t nsMsgBodyHandler::ApplyTransform
       {
         // It is wrong to call ApplyTransformations() here since this will
         // lead to the buffer being doubled-up at |buf.Append(line.get());| below.
         // ApplyTransformations(buf, buf.Length(), eatThisLine, buf);
         // Avoid spurious failures
         eatThisLine = false;
       }
     }
-    else
+    else if (!m_partIsHtml)
     {
       buf.Truncate();
       eatThisLine = true; // We have no content...
     }
 
+    if (m_partIsHtml)
+    {
+      StripHtml(buf);
+    }
+
     // Reset all assumed headers
     m_base64part = false;
     // Get ready to sniff new part headers, but do not reset m_pastMsgHeaders
     // since it will screw the body line count.
     m_pastPartHeaders = false;
     m_partIsHtml = false;
     // If we ever see a multipart message, each part needs to set 'm_partIsText',
     // so no more defaulting to 'true' when the part is done.
@@ -315,31 +325,27 @@ int32_t nsMsgBodyHandler::ApplyTransform
   if (!m_partIsText)
   {
     // Ignore non-text parts
     buf.Truncate();
     eatThisLine = true;
     return 0;
   }
 
-  if (m_base64part)
+  // Accumulate base64 parts and HTML parts for later decoding or tag stripping.
+  if (m_base64part || m_partIsHtml)
   {
-    // We need to keep track of all lines to parse base64encoded...
+    if (m_partIsHtml && ! m_base64part)  // Replace newline in HTML with a space.
+      buf.Append(' ');
     buf.Append(line.get());
     eatThisLine = true;
     return buf.Length();
   }
 
-  // ... but there's no point if we're not parsing base64.
   buf.Assign(line);
-  if (m_stripHtml && m_partIsHtml)
-  {
-    StripHtml (buf);
-  }
-
   return buf.Length();
 }
 
 void nsMsgBodyHandler::StripHtml (nsCString &pBufInOut)
 {
   char *pBuf = (char*) PR_Malloc (pBufInOut.Length() + 1);
   if (pBuf)
   {