Bug 1301989 - make base64 decoding more robust. r=A.Peters
authorJorg K <jorgk@jorgk.com>
Sat, 08 Jul 2017 07:38:42 +0200
changeset 28415 23fafbba334ac5e12dd48bca5a8b10680265f6f1
parent 28414 aa2b50ea86596b917d59ba5f6f9385b2d0a35bf2
child 28416 4a0c88a88c3d919cf13960ceda498d7410429505
push id1986
push userclokep@gmail.com
push dateWed, 02 Aug 2017 14:43:31 +0000
treeherdercomm-beta@b51c9adf2c9e [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersA.Peters
bugs1301989
Bug 1301989 - make base64 decoding more robust. r=A.Peters
mailnews/mime/jsmime/jsmime.js
mailnews/mime/jsmime/test/test_header.js
--- a/mailnews/mime/jsmime/jsmime.js
+++ b/mailnews/mime/jsmime/jsmime.js
@@ -53,16 +53,18 @@ function decode_qp(buffer, more) {
  *                              false, we should flush all pending output.
  * @returns {Array(BinaryString, BinaryString)} The first element of the array
  *          is the decoded string. The second element contains the data that
  *          could not be decoded and needs to be retained for the next call.
  */
 function decode_base64(buffer, more) {
   // Drop all non-base64 characters
   let sanitize = buffer.replace(/[^A-Za-z0-9+\/=]/g,'');
+  // Remove harmful `=' chars in the middle.
+  sanitize = sanitize.replace(/=+([A-Za-z0-9+\/])/g, '$1');
   // We need to encode in groups of 4 chars. If we don't have enough, leave the
   // excess for later. If there aren't any more, drop enough to make it 4.
   let excess = sanitize.length % 4;
   if (excess != 0 && more)
     buffer = sanitize.slice(-excess);
   else
     buffer = '';
   sanitize = sanitize.substring(0, sanitize.length - excess);
@@ -630,22 +632,16 @@ function decodeRFC2047Words(headerValue)
 
     let buffer;
     if (encoding == 'B' || encoding == 'b') {
       // Decode base64. If there's any non-base64 data, treat the string as
       // an illegal token.
       if (/[^A-Za-z0-9+\/=]/.exec(text))
         return false;
 
-      // Base64 strings must be a length of multiple 4, but it seems that some
-      // mailers accidentally insert one too many `=' chars. Gracefully handle
-      // this case; see bug 227290 for more information.
-      if (text.length % 4 == 1 && text.charAt(text.length - 1) == '=')
-        text = text.slice(0, -1);
-
       // Decode the string
       buffer = mimeutils.decode_base64(text, false)[0];
     } else if (encoding == 'Q' || encoding == 'q') {
       // Q encoding here looks a lot like quoted-printable text. The differences
       // between quoted-printable and this are that quoted-printable allows you
       // to quote newlines (this doesn't), while this replaces spaces with _.
       // We can reuse the decode_qp code here, since newlines are already
       // stripped from the header. There is one edge case that could trigger a
--- a/mailnews/mime/jsmime/test/test_header.js
+++ b/mailnews/mime/jsmime/test/test_header.js
@@ -627,23 +627,30 @@ suite('headerparser', function () {
       // We don't decode unrecognized charsets (This one is actually UTF-8).
       ["=??B?Sy4gSC4gdm9uIFLDvGRlbg==?=", "=??B?Sy4gSC4gdm9uIFLDvGRlbg==?="],
 
       // Test for bug 1374149 with ISO-2022-JP where we shouldn't stream
       // if the first token ends in ESC(B.
       // GyRCJCIbKEI= is the base64 encoding of ESC$B$"ESC(B.
       ["=?ISO-2022-JP?B?GyRCJCIbKEI=?==?ISO-2022-JP?B?GyRCJCIbKEI=?=", "ああ"],
 
-      // Tolerate invalid split of character, € = 0xE2 0x82 0xAC in UTF-8.  
+      // Tolerate invalid split of character, € = 0xE2 0x82 0xAC in UTF-8.
       ["Split =?UTF-8?Q?=E2?= =?UTF-8?Q?=82=AC?= after first byte",
-        "Split € after first byte"], 
+        "Split € after first byte"],
       ["Split =?UTF-8?Q?=E2=82?= =?UTF-8?Q?=AC?= after second byte",
-        "Split € after second byte"], 
+        "Split € after second byte"],
       ["Byte missing =?UTF-8?Q?=E2=82?=",
         "Byte missing \ufffd"], // Replacement character for invalid input.
+
+      // Test for bug 1301989: Tolerate invalid base64 encoding.
+      ["=?us-ascii?B?YWJjZA==?=", "abcd"],    // correct
+      ["=?us-ascii?B?YWJjZA=?=",  "abc"],     // not a multiple of 4
+      ["=?us-ascii?B?Y=WJjZA==?=", "abcd"],   // invalid =
+      ["=?us-ascii?B?Y=WJj==ZA==?=", "abcd"], // invalid =
+      ["=?us-ascii?B?YWJjZA===?=", "abcd"],   // excess = at the end, see bug 227290.
     ];
     header_tests.forEach(function (data) {
       arrayTest(data, function () {
         assert.deepEqual(headerparser.decodeRFC2047Words(data[0]), data[1]);
       });
     });
   });
   suite('8-bit header processing', function () {