Bug 1427124 - fix body search for non-ASCII bodies (incl. 10 new test cases). r=aceman
authorJorg K <jorgk@jorgk.com>
Tue, 02 Jan 2018 23:33:59 +0100
changeset 22966 25e260b9958157daed1021916711bab304b73938
parent 22965 bb674bf2da143e8f074239cfe30cde7d8a760770
child 22967 cecc2f11dcca399c7c91bca688f7fa52c12f839c
push id13902
push usermozilla@jorgk.com
push dateTue, 02 Jan 2018 22:36:07 +0000
treeherdercomm-central@726cc3c0baf2 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersaceman
bugs1427124
Bug 1427124 - fix body search for non-ASCII bodies (incl. 10 new test cases). r=aceman
mailnews/base/search/public/nsMsgBodyHandler.h
mailnews/base/search/src/nsMsgBodyHandler.cpp
mailnews/base/search/src/nsMsgSearchTerm.cpp
mailnews/base/test/unit/test_searchBody.js
mailnews/test/data/11-plaintext.eml
mailnews/test/data/12-plaintext+attachment.eml
mailnews/test/data/13-HTML.eml
mailnews/test/data/14-HTML+attachment.eml
mailnews/test/data/15-HTML+embedded-image.eml
mailnews/test/data/16-plaintext+HMTL.eml
mailnews/test/data/17-plaintext+(HTML+embedded-image).eml
mailnews/test/data/18-plaintext+HTML+attachment.eml
mailnews/test/data/19-(HTML+embedded-image)+attachment.eml
mailnews/test/data/20-plaintext+(HTML+embedded-image)+attachment.eml
--- a/mailnews/base/search/public/nsMsgBodyHandler.h
+++ b/mailnews/base/search/public/nsMsgBodyHandler.h
@@ -28,18 +28,19 @@ public:
   // headersSize is ignored!!!
   nsMsgBodyHandler (nsIMsgSearchScopeTerm *,
     uint32_t length, nsIMsgDBHdr * msg, nsIMsgDatabase * db,
     const char * headers /* NULL terminated list of headers */,
     uint32_t headersSize, bool ForFilters);
 
   virtual ~nsMsgBodyHandler();
 
-  // returns next message line in buf
-  int32_t GetNextLine(nsCString &buf);
+  // Returns next message line in buf and the applicable charset, if found.
+  // The return value is the length of 'buf' or -1 for EOF.
+  int32_t GetNextLine(nsCString &buf, nsCString &charset);
 
   // Transformations
   void SetStripHtml (bool strip) { m_stripHtml = strip; }
   void SetStripHeaders (bool strip) { m_stripHeaders = strip; }
 
 protected:
   void Initialize();  // common initialization code
 
@@ -93,16 +94,17 @@ protected:
   bool m_pastMsgHeaders;  // true if we've already skipped over the message headers
   bool m_pastPartHeaders; // true if we've already skipped over the part headers
   bool m_partIsHtml;      // true if the Content-type header claims text/html
   bool m_base64part;      // true if the current part is in base64
   bool m_isMultipart;     // true if the message is a multipart/* message
   bool m_partIsText;      // true if the current part is text/*
 
   nsTArray<nsCString> m_boundaries;  // The boundary strings to look for
+  nsCString m_partCharset; // The charset found in the part
 
   // See implementation for comments
   int32_t ApplyTransformations (const nsCString &line, int32_t length,
                                 bool &returnThisLine, nsCString &buf);
   void SniffPossibleMIMEHeader (const nsCString &line);
   static void StripHtml (nsCString &buf);
   static void Base64Decode (nsCString &buf);
 };
--- a/mailnews/base/search/src/nsMsgBodyHandler.cpp
+++ b/mailnews/base/search/src/nsMsgBodyHandler.cpp
@@ -83,17 +83,17 @@ void nsMsgBodyHandler::Initialize()
   m_pastPartHeaders = false;
   m_headerBytesRead = 0;
 }
 
 nsMsgBodyHandler::~nsMsgBodyHandler()
 {
 }
 
-int32_t nsMsgBodyHandler::GetNextLine (nsCString &buf)
+int32_t nsMsgBodyHandler::GetNextLine (nsCString &buf, nsCString &charset)
 {
   int32_t length = -1;          // length of incoming line or -1 eof
   int32_t outLength = -1;       // length of outgoing line or -1 eof
   bool eatThisLine = true;
   nsAutoCString nextLine;
 
   while (eatThisLine) {
     // first, handle the filtering case...this is easy....
@@ -124,16 +124,17 @@ int32_t nsMsgBodyHandler::GetNextLine (n
   if (!m_isMultipart && m_base64part)
   {
     Base64Decode(buf);
     m_base64part = false;
     // And reapply our transformations...
     outLength = ApplyTransformations(buf, buf.Length(), eatThisLine, buf);
   }
 
+  charset = m_partCharset;
   return outLength;
 }
 
 void nsMsgBodyHandler::OpenLocalFolder()
 {
   nsCOMPtr <nsIInputStream> inputStream;
   nsresult rv = m_scope->GetInputStream(m_msgHdr, getter_AddRefs(inputStream));
   // Warn and return if GetInputStream fails
@@ -222,17 +223,16 @@ int32_t nsMsgBodyHandler::GetNextLocalLi
  * @param buf         (inout) if m_base64part, the current part as needed for
  *                            decoding; else, it is treated as an out param (a
  *                            redundant version of line).
  * @return            the length of the line after applying transformations
  */
 int32_t nsMsgBodyHandler::ApplyTransformations (const nsCString &line, int32_t length,
                                                 bool &eatThisLine, nsCString &buf)
 {
-  int32_t newLength = length;
   eatThisLine = false;
 
   if (!m_pastPartHeaders)  // line is a line from the part headers
   {
     if (m_stripHeaders)
       eatThisLine = true;
 
     // We have already grabbed all worthwhile information from the headers,
@@ -318,20 +318,19 @@ int32_t nsMsgBodyHandler::ApplyTransform
     return buf.Length();
   }
 
   // ... but there's no point if we're not parsing base64.
   buf.Assign(line);
   if (m_stripHtml && m_partIsHtml)
   {
     StripHtml (buf);
-    newLength = buf.Length();
   }
 
-  return newLength;
+  return buf.Length();
 }
 
 void nsMsgBodyHandler::StripHtml (nsCString &pBufInOut)
 {
   char *pBuf = (char*) PR_Malloc (pBufInOut.Length() + 1);
   if (pBuf)
   {
     char *pWalk = pBuf;
@@ -396,44 +395,56 @@ void nsMsgBodyHandler::SniffPossibleMIME
       {
         // Nested multipart, get ready for new headers.
         m_base64part = false;
         m_pastPartHeaders = false;
         m_partIsHtml = false;
         m_partIsText = false;
       }
       m_isMultipart = true;
+      m_partCharset.Truncate();
     }
     else if (lowerCaseLine.Find("text/", /* ignoreCase = */ true) != -1)
       m_partIsText = true;
     else if (lowerCaseLine.Find("text/", /* ignoreCase = */ true) == -1)
       m_partIsText = false; // We have disproved our assumption
   }
 
+  int32_t start;
   if (m_isMultipart &&
-      lowerCaseLine.Find("boundary=", /* ignoreCase = */ true) != -1)
+      (start = lowerCaseLine.Find("boundary=", /* ignoreCase = */ true)) != -1)
   {
-    int32_t start = lowerCaseLine.Find("boundary=", /* ignoreCase = */ true);
-    start += 9;
+    start += 9;  // strlen("boundary=")
     if (line[start] == '\"')
       start++;
     int32_t end = line.RFindChar('\"');
     if (end == -1)
       end = line.Length();
 
     // Collect all boundaries. Since we only react to crossing a boundary,
     // we can simply collect the boundaries instead of forming a tree
     // structure from the message. Keep it simple ;-)
     nsCString boundary;
     boundary.AssignLiteral("--");
-    boundary.Append(Substring(line,start,end-start));
+    boundary.Append(Substring(line, start, end-start));
     if (!m_boundaries.Contains(boundary))
       m_boundaries.AppendElement(boundary);
   }
 
+  if (m_isMultipart &&
+      (start = lowerCaseLine.Find("charset=", /* ignoreCase = */ true)) != -1)
+  {
+    start += 8;  // strlen("charset=")
+    int32_t end = line.RFindChar(';');
+    if (end == -1)
+      end = line.Length();
+
+    m_partCharset.Assign(Substring(line, start, end-start));
+  }
+
   if (StringBeginsWith(lowerCaseLine,
                        NS_LITERAL_CSTRING("content-transfer-encoding:")) &&
       lowerCaseLine.Find(ENCODING_BASE64, /* ignoreCase = */ true) != kNotFound)
     m_base64part = true;
 }
 
 /**
  * Decodes the given base64 string.
--- a/mailnews/base/search/src/nsMsgSearchTerm.cpp
+++ b/mailnews/base/search/src/nsMsgSearchTerm.cpp
@@ -782,17 +782,18 @@ nsresult nsMsgSearchTerm::MatchArbitrary
   nsAutoCString curMsgHeader;
   bool searchingHeaders = true;
 
   // We will allow accumulation of received headers;
   bool isReceivedHeader = m_arbitraryHeader.EqualsLiteral("received");
 
   while (searchingHeaders)
   {
-    if (bodyHandler->GetNextLine(buf) < 0 || EMPTY_MESSAGE_LINE(buf))
+    nsCString charsetIgnored;
+    if (bodyHandler->GetNextLine(buf, charsetIgnored) < 0 || EMPTY_MESSAGE_LINE(buf))
       searchingHeaders = false;
     bool isContinuationHeader = searchingHeaders ? NS_IsAsciiWhitespace(buf.CharAt(0))
                                                    : false;
 
     // We try to match the header from the last time through the loop, which should now
     //  have accumulated over possible multiple lines. For all headers except received,
     //  we process a single accumulation, but process accumulated received at the end.
     if (!searchingHeaders || (!isContinuationHeader &&
@@ -952,19 +953,20 @@ nsresult nsMsgSearchTerm::MatchBody (nsI
   // side of too many hits rather than not enough, we'll assume in that
   // general direction. Blech. ### FIX ME
   // bug fix #314637: for stateful charsets like ISO-2022-JP, we don't
   // want to decode quoted printable since it contains '='.
   bool isQuotedPrintable = !nsMsgI18Nstateful_charset(folderCharset) &&
     (PL_strchr (m_value.string, '=') == nullptr);
 
   nsCString compare;
+  nsCString charset;
   while (!endOfFile && result == boolContinueLoop)
   {
-    if (bodyHan->GetNextLine(buf) >= 0)
+    if (bodyHan->GetNextLine(buf, charset) >= 0)
     {
       bool softLineBreak = false;
       // Do in-place decoding of quoted printable
       if (isQuotedPrintable)
       {
         softLineBreak = StringEndsWith(buf, NS_LITERAL_CSTRING("="));
         MsgStripQuotedPrintable(buf);
         // If soft line break, chop off the last char as well.
@@ -979,17 +981,19 @@ nsresult nsMsgSearchTerm::MatchBody (nsI
       // That seems like a pretty safe assumption.
       if (softLineBreak)
         continue;
       if (!compare.IsEmpty())
       {
         char startChar = (char) compare.CharAt(0);
         if (startChar != '\r' && startChar != '\n')
         {
-          rv = MatchString(compare, folderCharset, &result);
+          rv = MatchString(compare,
+                           charset.IsEmpty() ? folderCharset : charset.get(),
+                           &result);
           lines++;
         }
         compare.Truncate();
       }
     }
     else
       endOfFile = true;
   }
--- a/mailnews/base/test/unit/test_searchBody.js
+++ b/mailnews/base/test/unit/test_searchBody.js
@@ -31,26 +31,39 @@ var Body = nsMsgSearchAttrib.Body;
 
 var Files =
 [
   "../../../data/base64-1",
   "../../../data/basic1",
   "../../../data/multipart-base64-2",
   "../../../data/bug132340",
 
+  // Base64 encoded bodies.
   "../../../data/01-plaintext.eml",
   "../../../data/02-plaintext+attachment.eml",
   "../../../data/03-HTML.eml",
   "../../../data/04-HTML+attachment.eml",
   "../../../data/05-HTML+embedded-image.eml",
   "../../../data/06-plaintext+HMTL.eml",
   "../../../data/07-plaintext+(HTML+embedded-image).eml",
   "../../../data/08-plaintext+HTML+attachment.eml",
   "../../../data/09-(HTML+embedded-image)+attachment.eml",
-  "../../../data/10-plaintext+(HTML+embedded-image)+attachment.eml"
+  "../../../data/10-plaintext+(HTML+embedded-image)+attachment.eml",
+
+  // Bodies with non-ASCII characters in UTF-8 and other charsets.
+  "../../../data/11-plaintext.eml",
+  "../../../data/12-plaintext+attachment.eml",  // using ISO-8859-7 (Greek)
+  "../../../data/13-HTML.eml",
+  "../../../data/14-HTML+attachment.eml",
+  "../../../data/15-HTML+embedded-image.eml",
+  "../../../data/16-plaintext+HMTL.eml",                   // text part is base64 encoded
+  "../../../data/17-plaintext+(HTML+embedded-image).eml",  // HTML part is base64 encoded
+  "../../../data/18-plaintext+HTML+attachment.eml",
+  "../../../data/19-(HTML+embedded-image)+attachment.eml",
+  "../../../data/20-plaintext+(HTML+embedded-image)+attachment.eml"  // using windows-1252
 ]
 var Tests =
 [
   /* Translate Base64 messages */
   // "World!" is contained in three messages, but in bug132340 it's not in a text
   // part and should not be found.
   { value: "World!", op: Contains, count: 2 },
   /* Don't match the base64 text */
@@ -72,16 +85,24 @@ var Tests =
   { value: "iVBORw", op: Contains, count: 0 },
 
   // The base64 of attachments contains "wMA005J0z" and we don't want to find that.
   { value: "wMA005J0z", op: Contains, count: 0 },
 
   // The base64 of the plaintext and HTML parts contains "U2VhcmNoIGZ"
   // and we don't want to find that.
   { value: "U2VhcmNoIGZ", op: Contains, count: 0 },
+
+  // Messages 11 and 13 to 20 contain "hühü" once.
+  { value: "hühü", op: Contains, count: 9 },
+  // Message 12 contains Καλησπέρα (good evening in Greek).
+  { value: "Καλησπέρα", op: Contains, count: 1 },
+
+  // Messages 16, 17, 18, 20 contain "hïhï" in the plaintext part.
+  { value: "hïhï", op: Contains, count: 4 },
 ];
 
 function fixFile(file) {
   var fstream = Cc["@mozilla.org/network/file-input-stream;1"]
                   .createInstance(Ci.nsIFileInputStream);
   fstream.init(file, -1, -1, Ci.nsIFileInputStream.CLOSE_ON_EOF);
   var sstream = Cc["@mozilla.org/scriptableinputstream;1"]
                   .createInstance(Ci.nsIScriptableInputStream);
new file mode 100644
--- /dev/null
+++ b/mailnews/test/data/11-plaintext.eml
@@ -0,0 +1,14 @@
+To: test@example.com
+From: test@example.com
+Subject: 11 plaintext
+Message-ID: <8259dd8e-2293-8765-e720-61dfcd10a6f3@example.com>
+Date: Sat, 30 Dec 2017 19:12:38 +0100
+User-Agent: Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:59.0) Gecko/20100101
+ Thunderbird/59.0a1
+MIME-Version: 1.0
+Content-Type: text/plain; charset=utf-8; format=flowed
+Content-Transfer-Encoding: 8bit
+Content-Language: en-GB
+
+Search for hühü
+
new file mode 100644
--- /dev/null
+++ b/mailnews/test/data/12-plaintext+attachment.eml
@@ -0,0 +1,32 @@
+To: test@example.com
+From: test@example.com
+Subject: 12 plaintext + attachment
+Message-ID: <9ec4f4cb-b14b-aed6-a042-58897d12e4a9@example.com>
+Date: Sat, 30 Dec 2017 19:15:38 +0100
+User-Agent: Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:59.0) Gecko/20100101
+ Thunderbird/59.0a1
+MIME-Version: 1.0
+Content-Type: multipart/mixed;
+ boundary="------------BC006DD22051247571F398E0"
+Content-Language: en-GB
+
+This is a multi-part message in MIME format.
+--------------BC006DD22051247571F398E0
+Content-Type: text/plain; charset=ISO-8859-7; format=flowed
+Content-Transfer-Encoding: 8bit
+
+Search for Greek text 
+
+--------------BC006DD22051247571F398E0
+Content-Type: image/png;
+ name="attach.png"
+Content-Transfer-Encoding: base64
+Content-Disposition: attachment;
+ filename="attach.png"
+
+iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAIAAACQkWg2AAAAAXNSR0IArs4c6QAAAARnQU1B
+AACxjwv8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAAASdEVYdFNvZnR3YXJlAEdyZWVuc2hv
+dF5VCAUAAABpSURBVDhP3dA7EoAgDEXR7Ew+bgdx/018BEYyiICtb27FcCig3Z7Im6gK3ZxN
+/RcQkb6aK8DjtuRMzMEAiNGvlFpgtyOdEjFz14xA10wA1pg5wLRZAthtVgEm5vGtA4DhvILa
+O8A+AuYLy0U5xUUpL8kAAAAASUVORK5CYII=
+--------------BC006DD22051247571F398E0--
new file mode 100644
--- /dev/null
+++ b/mailnews/test/data/13-HTML.eml
@@ -0,0 +1,14 @@
+To: test@example.com
+From: test@example.com
+Subject: 13 HTML
+Message-ID: <8259dd8e-2293-8765-e720-61dfcd10a6f3@example.com>
+Date: Sat, 30 Dec 2017 19:12:38 +0100
+User-Agent: Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:59.0) Gecko/20100101
+ Thunderbird/59.0a1
+MIME-Version: 1.0
+Content-Type: text/html; charset=utf-8; format=flowed
+Content-Transfer-Encoding: 8bit
+Content-Language: en-GB
+
+<body>Search for <b>hühü</b></body>
+
new file mode 100644
--- /dev/null
+++ b/mailnews/test/data/14-HTML+attachment.eml
@@ -0,0 +1,32 @@
+To: test@example.com
+From: test@example.com
+Subject: 14 HTML + attachment
+Message-ID: <9ec4f4cb-b14b-aed6-a042-58897d12e4a9@example.com>
+Date: Sat, 30 Dec 2017 19:15:38 +0100
+User-Agent: Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:59.0) Gecko/20100101
+ Thunderbird/59.0a1
+MIME-Version: 1.0
+Content-Type: multipart/mixed;
+ boundary="------------BC006DD22051247571F398E0"
+Content-Language: en-GB
+
+This is a multi-part message in MIME format.
+--------------BC006DD22051247571F398E0
+Content-Type: text/html; charset=utf-8; format=flowed
+Content-Transfer-Encoding: 8bit
+
+<body>Search for <b>hühü</b></body>
+
+--------------BC006DD22051247571F398E0
+Content-Type: image/png;
+ name="attach.png"
+Content-Transfer-Encoding: base64
+Content-Disposition: attachment;
+ filename="attach.png"
+
+iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAIAAACQkWg2AAAAAXNSR0IArs4c6QAAAARnQU1B
+AACxjwv8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAAASdEVYdFNvZnR3YXJlAEdyZWVuc2hv
+dF5VCAUAAABpSURBVDhP3dA7EoAgDEXR7Ew+bgdx/018BEYyiICtb27FcCig3Z7Im6gK3ZxN
+/RcQkb6aK8DjtuRMzMEAiNGvlFpgtyOdEjFz14xA10wA1pg5wLRZAthtVgEm5vGtA4DhvILa
+O8A+AuYLy0U5xUUpL8kAAAAASUVORK5CYII=
+--------------BC006DD22051247571F398E0--
new file mode 100644
--- /dev/null
+++ b/mailnews/test/data/15-HTML+embedded-image.eml
@@ -0,0 +1,42 @@
+To: test@example.com
+From: test@example.com
+Subject: 15 HTML + embedded image
+Message-ID: <c1ddcd5d-71c1-9c9d-1b81-e3b9abb99030@example.com>
+Date: Sat, 30 Dec 2017 19:26:23 +0100
+User-Agent: Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:59.0) Gecko/20100101
+ Thunderbird/59.0a1
+MIME-Version: 1.0
+Content-Type: multipart/related;
+ boundary="------------B2BBD36A919AB2B2F84E2469"
+Content-Language: en-GB
+
+This is a multi-part message in MIME format.
+--------------B2BBD36A919AB2B2F84E2469
+Content-Type: text/html; charset=utf-8
+Content-Transfer-Encoding: 8bit
+
+<html>
+  <head>
+
+    <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  </head>
+  <body text="#000000" bgcolor="#FFFFFF">
+    <p><tt>Search for hühü</tt></p>
+    <p><img src="cid:part1.8C5E6A81.D0C1B91A@example.com" alt=""></p>
+  </body>
+</html>
+
+--------------B2BBD36A919AB2B2F84E2469
+Content-Type: image/png;
+ name="kigaaldcbanejcbi.png"
+Content-Transfer-Encoding: base64
+Content-ID: <part1.8C5E6A81.D0C1B91A@example.com>
+Content-Disposition: inline;
+ filename="kigaaldcbanejcbi.png"
+
+iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAIAAACQkWg2AAAAAXNSR0IArs4c6QAAAARnQU1B
+AACxjwv8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAAASdEVYdFNvZnR3YXJlAEdyZWVuc2hv
+dF5VCAUAAABpSURBVDhP3dA7EoAgDEXR7Ew+bgdx/018BEYyiICtb27FcCig3Z7Im6gK3ZxN
+/RcQkb6aK8DjtuRMzMEAiNGvlFpgtyOdEjFz14xA10wA1pg5wLRZAthtVgEm5vGtA4DhvILa
+O8A+AuYLy0U5xUUpL8kAAAAASUVORK5CYII=
+--------------B2BBD36A919AB2B2F84E2469--
new file mode 100644
--- /dev/null
+++ b/mailnews/test/data/16-plaintext+HMTL.eml
@@ -0,0 +1,27 @@
+To: test@example.com
+From: test@example.com
+Subject: 16 plaintext + HMTL
+Message-ID: <a30f750d-d56c-8a52-971c-f95a131e8332@example.com>
+Date: Sat, 30 Dec 2017 19:31:21 +0100
+User-Agent: Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:59.0) Gecko/20100101
+ Thunderbird/59.0a1
+MIME-Version: 1.0
+Content-Type: multipart/alternative;
+ boundary="------------FAB286B8794CC63C0A0FD1BB"
+Content-Language: de-DE
+
+This is a multi-part message in MIME format.
+--------------FAB286B8794CC63C0A0FD1BB
+Content-Type: text/plain; charset=utf-8; format=flowed
+Content-Transfer-Encoding: base64
+
+U2VhcmNoIGZvciBow69ow68=
+
+
+--------------FAB286B8794CC63C0A0FD1BB
+Content-Type: text/html; charset=utf-8
+Content-Transfer-Encoding: 8bit
+
+<body>Search for <b>hühü</b></body>
+
+--------------FAB286B8794CC63C0A0FD1BB--
new file mode 100644
--- /dev/null
+++ b/mailnews/test/data/17-plaintext+(HTML+embedded-image).eml
@@ -0,0 +1,52 @@
+To: test@example.com
+From: test@example.com
+Subject: 17 plaintext + (HTML + embedded image)
+Message-ID: <fd7a5d4a-6a3a-8b9b-b3e4-9e9391c3c703@example.com>
+Date: Sat, 30 Dec 2017 19:36:00 +0100
+User-Agent: Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:59.0) Gecko/20100101
+ Thunderbird/59.0a1
+MIME-Version: 1.0
+Content-Type: multipart/alternative;
+ boundary="------------77E82F0826A0A90EABD21FC3"
+Content-Language: en-GB
+
+This is a multi-part message in MIME format.
+--------------77E82F0826A0A90EABD21FC3
+Content-Type: text/plain; charset=utf-8; format=flowed
+Content-Transfer-Encoding: 8bit
+
+Search for hïhï
+
+
+--------------77E82F0826A0A90EABD21FC3
+Content-Type: multipart/related;
+ boundary="------------D719681335F2A7D71D3761B1"
+
+
+--------------D719681335F2A7D71D3761B1
+Content-Type: text/html; charset=utf-8
+Content-Transfer-Encoding: base64
+
+PGh0bWw+DQogIDxoZWFkPg0KDQogICAgPG1ldGEgaHR0cC1lcXVpdj0iY29udGVudC10eXBl
+IiBjb250ZW50PSJ0ZXh0L2h0bWw7IGNoYXJzZXQ9dXRmLTgiPg0KICA8L2hlYWQ+DQogIDxi
+b2R5IHRleHQ9IiMwMDAwMDAiIGJnY29sb3I9IiNGRkZGRkYiPg0KICAgIDxwPjx0dD5TZWFy
+Y2ggZm9yIGjDvGjDvDwvdHQ+PC9wPg0KICAgIDxwPjxpbWcgc3JjPSJjaWQ6cGFydDEuOEM1
+RTZBODEuRDBDMUI5MUFAZXhhbXBsZS5jb20iIGFsdD0iIj48L3A+DQogIDwvYm9keT4NCjwv
+aHRtbD4=
+
+--------------D719681335F2A7D71D3761B1
+Content-Type: image/png;
+ name="kigaaldcbanejcbi.png"
+Content-Transfer-Encoding: base64
+Content-ID: <part1.8C5E6A81.D0C1B91A@example.com>
+Content-Disposition: inline;
+ filename="kigaaldcbanejcbi.png"
+
+iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAIAAACQkWg2AAAAAXNSR0IArs4c6QAAAARnQU1B
+AACxjwv8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAAASdEVYdFNvZnR3YXJlAEdyZWVuc2hv
+dF5VCAUAAABpSURBVDhP3dA7EoAgDEXR7Ew+bgdx/018BEYyiICtb27FcCig3Z7Im6gK3ZxN
+/RcQkb6aK8DjtuRMzMEAiNGvlFpgtyOdEjFz14xA10wA1pg5wLRZAthtVgEm5vGtA4DhvILa
+O8A+AuYLy0U5xUUpL8kAAAAASUVORK5CYII=
+--------------D719681335F2A7D71D3761B1--
+
+--------------77E82F0826A0A90EABD21FC3--
new file mode 100644
--- /dev/null
+++ b/mailnews/test/data/18-plaintext+HTML+attachment.eml
@@ -0,0 +1,46 @@
+To: test@example.com
+From: test@example.com
+Subject: 18 plaintext + HTML + attachment
+Message-ID: <b09c8682-a485-98ee-8f8e-edb89a1deec3@example.com>
+Date: Sat, 30 Dec 2017 19:58:40 +0100
+User-Agent: Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:59.0) Gecko/20100101
+ Thunderbird/59.0a1
+MIME-Version: 1.0
+Content-Type: multipart/mixed;
+ boundary="------------A1EC8071C6B86B871C9CB87F"
+Content-Language: en-GB
+
+This is a multi-part message in MIME format.
+--------------A1EC8071C6B86B871C9CB87F
+Content-Type: multipart/alternative;
+ boundary="------------9EC5D7C387C9839604A227BB"
+
+
+--------------9EC5D7C387C9839604A227BB
+Content-Type: text/plain; charset=utf-8; format=flowed
+Content-Transfer-Encoding: 8bit
+
+Search for hïhï
+
+
+--------------9EC5D7C387C9839604A227BB
+Content-Type: text/html; charset=utf-8
+Content-Transfer-Encoding: 8bit
+
+<body>Search for <b>hühü</b></body>
+
+--------------9EC5D7C387C9839604A227BB--
+
+--------------A1EC8071C6B86B871C9CB87F
+Content-Type: image/png;
+ name="attach.png"
+Content-Transfer-Encoding: base64
+Content-Disposition: attachment;
+ filename="attach.png"
+
+iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAIAAACQkWg2AAAAAXNSR0IArs4c6QAAAARnQU1B
+AACxjwv8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAAASdEVYdFNvZnR3YXJlAEdyZWVuc2hv
+dF5VCAUAAABpSURBVDhP3dA7EoAgDEXR7Ew+bgdx/018BEYyiICtb27FcCig3Z7Im6gK3ZxN
+/RcQkb6aK8DjtuRMzMEAiNGvlFpgtyOdEjFz14xA10wA1pg5wLRZAthtVgEm5vGtA4DhvILa
+O8A+AuYLy0U5xUUpL8kAAAAASUVORK5CYII=
+--------------A1EC8071C6B86B871C9CB87F--
new file mode 100644
--- /dev/null
+++ b/mailnews/test/data/19-(HTML+embedded-image)+attachment.eml
@@ -0,0 +1,59 @@
+To: test@example.com
+From: test@example.com
+Subject: 19 (HTML + embedded image) + attachment
+Message-ID: <cdf5bf44-03a4-4be1-c9c7-88cb4a5838ed@example.com>
+Date: Sat, 30 Dec 2017 20:19:46 +0100
+User-Agent: Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:59.0) Gecko/20100101
+ Thunderbird/59.0a1
+MIME-Version: 1.0
+Content-Type: multipart/mixed;
+ boundary="------------F5CEBCED9FC06ACB07B3D485"
+Content-Language: en-GB
+
+This is a multi-part message in MIME format.
+--------------F5CEBCED9FC06ACB07B3D485
+Content-Type: multipart/related;
+ boundary="------------1722706F2C203820A6CAA06F"
+
+
+--------------1722706F2C203820A6CAA06F
+Content-Type: text/html; charset=utf-8
+Content-Transfer-Encoding: 8bit
+
+<html>
+  <head>
+
+    <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  </head>
+  <body text="#000000" bgcolor="#FFFFFF">
+    <p><tt>Search for hühü</tt></p>
+    <p><img src="cid:part1.8C5E6A81.D0C1B91A@example.com" alt=""></p>
+  </body>
+</html>
+
+--------------1722706F2C203820A6CAA06F
+Content-Type: image/png;
+ name="kigaaldcbanejcbi.png"
+Content-Transfer-Encoding: base64
+Content-ID: <part1.8C5E6A81.D0C1B91A@example.com>
+Content-Disposition: inline;
+ filename="kigaaldcbanejcbi.png"
+
+iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAIAAACQkWg2AAAAAXNSR0IArs4c6QAAAARnQU1B
+AACxjwv8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAAASdEVYdFNvZnR3YXJlAEdyZWVuc2hv
+dF5VCAUAAABpSURBVDhP3dA7EoAgDEXR7Ew+bgdx/018BEYyiICtb27FcCig3Z7Im6gK3ZxN
+/RcQkb6aK8DjtuRMzMEAiNGvlFpgtyOdEjFz14xA10wA1pg5wLRZAthtVgEm5vGtA4DhvILa
+O8A+AuYLy0U5xUUpL8kAAAAASUVORK5CYII=
+--------------1722706F2C203820A6CAA06F--
+
+--------------F5CEBCED9FC06ACB07B3D485
+Content-Type: image/png;
+ name="attach2.png"
+Content-Transfer-Encoding: base64
+Content-Disposition: attachment;
+ filename="attach2.png"
+
+iVBORw0KGgoAAAANSUhEUgAAAA4AAAAOCAIAAACQKrqGAAAAAXNSR0IArs4c6QAAAARnQU1B
+AACxjwv8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAAASdEVYdFNvZnR3YXJlAEdyZWVuc2hv
+dF5VCAUAAAAVSURBVChTY/hPNBhVOqqUaKX//wMA005J0zvV0VsAAAAASUVORK5CYII=
+--------------F5CEBCED9FC06ACB07B3D485--
new file mode 100644
--- /dev/null
+++ b/mailnews/test/data/20-plaintext+(HTML+embedded-image)+attachment.eml
@@ -0,0 +1,73 @@
+To: test@example.com
+From: test@example.com
+Subject: 20 plaintext + (HTML + embedded image) + attachment
+Message-ID: <1e58c8f2-3a15-96e7-76b7-046cf6e1ce1e@example.com>
+Date: Sat, 30 Dec 2017 20:50:01 +0100
+User-Agent: Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:59.0) Gecko/20100101
+ Thunderbird/59.0a1
+MIME-Version: 1.0
+Content-Type: multipart/mixed;
+ boundary="------------B94553864BC0A4472640622E"
+Content-Language: en-GB
+
+This is a multi-part message in MIME format.
+--------------B94553864BC0A4472640622E
+Content-Type: multipart/alternative;
+ boundary="------------B24EA868A72E5E6144485481"
+
+
+--------------B24EA868A72E5E6144485481
+Content-Type: text/plain; charset=windows-1252; format=flowed
+Content-Transfer-Encoding: 8bit
+
+Search for hh
+
+
+--------------B24EA868A72E5E6144485481
+Content-Type: multipart/related;
+ boundary="------------D1360749D11EBC0C64444B6C"
+
+
+--------------D1360749D11EBC0C64444B6C
+Content-Type: text/html; charset=windows-1252
+Content-Transfer-Encoding: 8bit
+
+<html>
+  <head>
+
+    <meta http-equiv="content-type" content="text/html; charset=windows-1252">
+  </head>
+  <body text="#000000" bgcolor="#FFFFFF">
+    <p><tt>Search for hh</tt></p>
+    <p><img src="cid:part1.8C5E6A81.D0C1B91A@example.com" alt=""></p>
+  </body>
+</html>
+
+--------------D1360749D11EBC0C64444B6C
+Content-Type: image/png;
+ name="kigaaldcbanejcbi.png"
+Content-Transfer-Encoding: base64
+Content-ID: <part1.8C5E6A81.D0C1B91A@example.com>
+Content-Disposition: inline;
+ filename="kigaaldcbanejcbi.png"
+
+iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAIAAACQkWg2AAAAAXNSR0IArs4c6QAAAARnQU1B
+AACxjwv8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAAASdEVYdFNvZnR3YXJlAEdyZWVuc2hv
+dF5VCAUAAABpSURBVDhP3dA7EoAgDEXR7Ew+bgdx/018BEYyiICtb27FcCig3Z7Im6gK3ZxN
+/RcQkb6aK8DjtuRMzMEAiNGvlFpgtyOdEjFz14xA10wA1pg5wLRZAthtVgEm5vGtA4DhvILa
+O8A+AuYLy0U5xUUpL8kAAAAASUVORK5CYII=
+--------------D1360749D11EBC0C64444B6C--
+
+--------------B24EA868A72E5E6144485481--
+
+--------------B94553864BC0A4472640622E
+Content-Type: image/png;
+ name="attach2.png"
+Content-Transfer-Encoding: base64
+Content-Disposition: attachment;
+ filename="attach2.png"
+
+iVBORw0KGgoAAAANSUhEUgAAAA4AAAAOCAIAAACQKrqGAAAAAXNSR0IArs4c6QAAAARnQU1B
+AACxjwv8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAAASdEVYdFNvZnR3YXJlAEdyZWVuc2hv
+dF5VCAUAAAAVSURBVChTY/hPNBhVOqqUaKX//wMA005J0zvV0VsAAAAASUVORK5CYII=
+--------------B94553864BC0A4472640622E--