Bug 1614796 - Body search: Strip soft line breaks in QP parts when assembling HTML body. r=benc a=jorgk
authorJorg K <jorgk@jorgk.com>
Sun, 23 Feb 2020 08:59:56 +0100
changeset 38184 aade030db497ef2b73acf128dbdbefa2799a732e
parent 38183 2aeba9647436e354e4e61bf3e37f492e0728dafa
child 38185 541c7a9fd7f3d6a8a5b69d4a697d0fb8c412bb3c
push id398
push userclokep@gmail.com
push dateMon, 09 Mar 2020 19:10:28 +0000
reviewersbenc, jorgk
bugs1614796
Bug 1614796 - Body search: Strip soft line breaks in QP parts when assembling HTML body. r=benc a=jorgk
mailnews/base/search/src/nsMsgBodyHandler.cpp
mailnews/base/search/src/nsMsgSearchTerm.cpp
mailnews/base/test/unit/test_searchBody.js
mailnews/test/data/21-plaintext.eml
mailnews/test/data/23-HTML.eml
mailnews/test/data/24-HTML+attachment.eml
--- a/mailnews/base/search/src/nsMsgBodyHandler.cpp
+++ b/mailnews/base/search/src/nsMsgBodyHandler.cpp
@@ -269,17 +269,17 @@ int32_t nsMsgBodyHandler::ApplyTransform
     if (m_base64part && m_partIsText) {
       Base64Decode(buf);
       // Work on the parsed string
       if (!buf.Length()) {
         NS_WARNING("Trying to transform an empty buffer");
         eatThisLine = true;
       } else {
         // It is wrong to call ApplyTransformations() here since this will
-        // lead to the buffer being doubled-up at |buf.Append(line.get());|
+        // lead to the buffer being doubled-up at |buf.Append(line);|
         // below. ApplyTransformations(buf, buf.Length(), eatThisLine, buf);
         // Avoid spurious failures
         eatThisLine = false;
       }
     } else if (!m_partIsHtml) {
       buf.Truncate();
       eatThisLine = true;  // We have no content...
     }
@@ -308,19 +308,28 @@ int32_t nsMsgBodyHandler::ApplyTransform
     // Ignore non-text parts
     buf.Truncate();
     eatThisLine = true;
     return 0;
   }
 
   // Accumulate base64 parts and HTML parts for later decoding or tag stripping.
   if (m_base64part || m_partIsHtml) {
-    if (m_partIsHtml && !m_base64part)  // Replace newline in HTML with a space.
-      buf.Append(' ');
-    buf.Append(line.get());
+    if (m_partIsHtml && !m_base64part) {
+      size_t bufLength = buf.Length();
+      if (!m_partIsQP || bufLength == 0 ||
+          !StringEndsWith(buf, NS_LITERAL_CSTRING("="))) {
+        // Replace newline in HTML with a space.
+        buf.Append(' ');
+      } else {
+        // Strip the soft line break.
+        buf.SetLength(bufLength - 1);
+      }
+    }
+    buf.Append(line);
     eatThisLine = true;
     return buf.Length();
   }
 
   buf.Assign(line);
   return buf.Length();
 }
 
--- a/mailnews/base/search/src/nsMsgSearchTerm.cpp
+++ b/mailnews/base/search/src/nsMsgSearchTerm.cpp
@@ -854,17 +854,17 @@ nsresult nsMsgSearchTerm::MatchBody(nsIM
         MsgStripQuotedPrintable(buf);
         // If soft line break, chop off the last char as well.
         size_t bufLength = buf.Length();
         if ((bufLength > 0) && softLineBreak) buf.SetLength(bufLength - 1);
       }
       compare.Append(buf);
       // If this line ends with a soft line break, loop around
       // and get the next line before looking for the search string.
-      // This assumes the message can't end on a QP soft-line break.
+      // This assumes the message can't end on a QP soft line break.
       // That seems like a pretty safe assumption.
       if (softLineBreak) continue;
       if (!compare.IsEmpty()) {
         char startChar = (char)compare.CharAt(0);
         if (startChar != '\r' && startChar != '\n') {
           rv = MatchString(compare,
                            charset.IsEmpty() ? folderCharset : charset.get(),
                            &result);
--- a/mailnews/base/test/unit/test_searchBody.js
+++ b/mailnews/base/test/unit/test_searchBody.js
@@ -127,16 +127,19 @@ var Tests = [
   // Messages 16, 17, 18, 20 contain "hïhï" in the plaintext part.
   { value: "hïhï", op: Contains, count: 4 },
 
   // Messages 21 and 23 to 30 contain "höhö" once.
   { value: "höhö", op: Contains, count: 9 },
   // Message 22 contains Καλημέρα (good morning in Greek).
   { value: "Καλημέρα", op: Contains, count: 1 },
 
+  // Messages 21, 23 and 24 contain "softbreak" broken by a soft line break.
+  { value: "softbreak", op: Contains, count: 3 },
+
   // Messages 16, 17, 18, 20 contain "hähä" in the plaintext part.
   { value: "hähä", op: Contains, count: 4 },
 
   // The four messages with message/rfc822 attachment contain "bodyOfAttachedMessagePlain"
   // or "bodyOfAttachedMessagePläin" in the plaintext part and "bodyOfAttachedMessageHTML"
   // or "bodyOfAttachedMessägeHTML" in the HTML part.
   { value: "bodyOfAttachedMessagePlain", op: Contains, count: 2 },
   { value: "bodyOfAttachedMessagePläin", op: Contains, count: 2 },
--- a/mailnews/test/data/21-plaintext.eml
+++ b/mailnews/test/data/21-plaintext.eml
@@ -6,9 +6,11 @@ Date: Sat, 30 Dec 2017 19:12:38 +0100
 User-Agent: Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:59.0) Gecko/20100101
  Thunderbird/59.0a1
 MIME-Version: 1.0
 Content-Type: text/plain; charset="utf-8"; format=flowed
 Content-Transfer-Encoding: quoted-printable
 Content-Language: en-GB
 
 Search for h=C3=B6h=C3=B6
+Test that we ignore a soft=
+break correctly.
 
--- a/mailnews/test/data/23-HTML.eml
+++ b/mailnews/test/data/23-HTML.eml
@@ -5,10 +5,14 @@ Message-ID: <8259dd8e-2293-8765-e720-61d
 Date: Sat, 30 Dec 2017 19:12:38 +0100
 User-Agent: Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:59.0) Gecko/20100101
  Thunderbird/59.0a1
 MIME-Version: 1.0
 Content-Type: text/html; charset="utf-8"; format=flowed
 Content-Transfer-Encoding: quoted-printable
 Content-Language: en-GB
 
-<body>Search for <b>h=C3=B6h=C3=B6</b></body>
+<body>Search for <b>h=C3=B6h=C3=B6</b>
+Test that we assemble HTML bodies correctly when there is a
+<a href=3D"https://www.example.com">soft=
+break</a> involved.
+</body>
 
--- a/mailnews/test/data/24-HTML+attachment.eml
+++ b/mailnews/test/data/24-HTML+attachment.eml
@@ -10,17 +10,20 @@ Content-Type: multipart/mixed;
  boundary="------------BC006DD22051247571F398E0"
 Content-Language: en-GB
 
 This is a multi-part message in MIME format.
 --------------BC006DD22051247571F398E0
 Content-Type: text/html; charset="utf-8"; format=flowed
 Content-Transfer-Encoding: quoted-printable
 
-<body>Search for <b>h=C3=B6h=C3=B6</b></body>
+<body>Search for <b>h=C3=B6h=C3=B6</b>
+Test that we ignore a soft=
+break correctly.
+</body>
 
 --------------BC006DD22051247571F398E0
 Content-Type: image/png;
  name="attach.png"
 Content-Transfer-Encoding: base64
 Content-Disposition: attachment;
  filename="attach.png"