Bug 1069790 - Email addresses with parenthesis are not pretty-printed anymore. r=rkent, a=rkent
authorKarsten Düsterloh <mnyromyr@tprac.de>
Fri, 24 Apr 2015 17:07:00 +0200
changeset 22300 a74742812d519a6dc151f131b91ca1d90a54b5af
parent 22299 c3e582335241e9cf6c8c6d2769a5584fd40dcd75
child 22301 49615525b791c363d64e74e1e273296ff5638807
push idunknown
push userunknown
push dateunknown
reviewersrkent, rkent
bugs1069790
Bug 1069790 - Email addresses with parenthesis are not pretty-printed anymore. r=rkent, a=rkent
mailnews/mime/jsmime/jsmime.js
mailnews/mime/jsmime/test/test_header.js
--- a/mailnews/mime/jsmime/jsmime.js
+++ b/mailnews/mime/jsmime/jsmime.js
@@ -331,16 +331,21 @@ var headerparser = {};
  * This method does not properly tokenize 5322 in all corner cases; however,
  * this is equivalent in those corner cases to an older header parsing
  * algorithm, so the algorithm should be correct for all real-world cases. The
  * corner cases are as follows:
  * 1. Quoted-strings and domain literals are parsed even if they are within a
  *    comment block (we effectively treat ctext as containing qstring).
  * 2. WSP need not be between a qstring and an atom (a"b" produces two tokens,
  *    a and b). This is an error case, though.
+ * 3. Legacy comments as display names: We recognize address fields with
+ *    comments, and (a) either drop them if inside addr-spec or (b) preserve
+ *    them as part of the display-name if not. If the display-name is empty
+ *    while the last comment is not, we assume it's the legacy form above and
+ *    take the comment content as the display-name.
  *
  * @param {String} value      The header value, post charset conversion but
  *                            before RFC 2047 decoding, to be parsed.
  * @param {String} delimiters A set of delimiters to include as individual
  *                            tokens.
  * @param {Object} opts       A set of options selecting what to parse.
  * @param {Boolean} [opts.qstring]  If true, recognize quoted strings.
  * @param {Boolean} [opts.dliteral] If true, recognize domain literals.
@@ -476,28 +481,38 @@ function getHeaderTokens(value, delimite
       tokenIsStarting = true;
       endQuote = ch;
     } else if (opts.dliteral && ch == '[') {
       // Domain literals end the last token and start a new one.
       tokenIsEnding = true;
       tokenIsStarting = true;
       endQuote = ']';
     } else if (opts.comments && ch == '(') {
-      // Comments are nested (oh joy). They also end the prior token, and need
-      // to be output if the consumer requests it.
+      // Comments are nested (oh joy). We only really care for the outer
+      // delimiter, though, which also ends the prior token and needs to be
+      // output if the consumer requests it.
       commentDepth++;
-      tokenIsEnding = true;
-      isSpecial = true;
+      if (commentDepth == 1) {
+        tokenIsEnding = true;
+        isSpecial = true;
+      } else {
+        tokenIsStarting = true;
+      }
     } else if (opts.comments && ch == ')') {
-      // Comments are nested (oh joy). They also end the prior token, and need
-      // to be output if the consumer requests it.
+      // Comments are nested (oh joy). We only really care for the outer
+      // delimiter, though, which also ends the prior token and needs to be
+      // output if the consumer requests it.
       if (commentDepth > 0)
         commentDepth--;
-      tokenIsEnding = true;
-      isSpecial = true;
+      if (commentDepth == 0) {
+        tokenIsEnding = true;
+        isSpecial = true;
+      } else {
+        tokenIsStarting = true;
+      }
     } else {
       // Not a delimiter, whitespace, comment, domain literal, or quoted string.
       // Must be part of an atom then!
       tokenIsStarting = true;
     }
 
     // If our analysis concluded that we closed an open token, and there is an
     // open token, then yield that token.
@@ -731,105 +746,183 @@ function parseAddressingHeader(header, d
     doRFC2047 = true;
 
   // The final (top-level) results list to append to.
   let results = [];
   // Temporary results
   let addrlist = [];
 
   // Build up all of the values
-  var name = '', groupName = '', address = '';
+  let name = '', groupName = '', localPart = '', address = '', comment = '';
   // Indicators of current state
-  var inAngle = false, needsSpace = false;
+  let inAngle = false, inComment = false, needsSpace = false;
+  let preserveSpace = false;
+  let commentClosed = false;
+
+  // RFC 5322 §3.4 notes that legacy implementations exist which use a simple
+  // recipient form where the addr-spec appears without the angle brackets,
+  // but includes the name of the recipient in parentheses as a comment
+  // following the addr-spec. While we do not create this format, we still
+  // want to recognize it, though.
+  // Furthermore, despite allowing comments in addresses, RFC 5322 §3.4 notes
+  // that legacy implementations may interpret the comment, and thus it
+  // recommends not to use them. (Also, they may be illegal as per RFC 5321.)
+  // While we do not create address fields with comments, we recognize such
+  // comments during parsing and (a) either drop them if inside addr-spec or
+  // (b) preserve them as part of the display-name if not.
+  // If the display-name is empty while the last comment is not, we assume it's
+  // the legacy form above and take the comment content as the display-name.
+  //
+  // When parsing the address field, we at first do not know whether any
+  // strings belong to the display-name (which may include comments) or to the
+  // local-part of an addr-spec (where we ignore comments) until we find an
+  // '@' or an '<' token. Thus, we collect both variants until the fog lifts,
+  // plus the last comment seen.
+  let lastComment = '';
+
+  /**
+   * Add the parsed mailbox object to the address list.
+   * If it's in the legacy form above, correct the display-name.
+   * Also reset any faked flags.
+   * @param {String} displayName   display-name as per RFC 5322
+   * @param {String} addrSpec      addr-spec as per RFC 5322
+   */
+  function addToAddrList(displayName, addrSpec) {
+    if (displayName === '' && lastComment !== '') {
+      // Take last comment content as the display-name.
+      let offset = lastComment[0] === ' ' ? 2 : 1;
+      displayName = lastComment.substr(offset, lastComment.length - offset - 1);
+    }
+    if (displayName !== '' || addrSpec !== '')
+      addrlist.push({name: displayName, email: addrSpec});
+    // Clear pending flags and variables.
+    name = localPart = address = lastComment = '';
+    inAngle = inComment = needsSpace = false;
+  }
+
   // Main parsing loop
   for (let token of getHeaderTokens(header, ":,;<>@",
         {qstring: true, comments: true, dliteral: true, rfc2047: doRFC2047})) {
     if (token === ':') {
       groupName = name;
       name = '';
+      localPart = '';
       // If we had prior email address results, commit them to the top-level.
       if (addrlist.length > 0)
         results = results.concat(addrlist);
       addrlist = [];
     } else if (token === '<') {
       inAngle = true;
     } else if (token === '>') {
       inAngle = false;
+      // Forget addr-spec comments.
+      lastComment = '';
+    } else if (token === '(') {
+      inComment = true;
+      // The needsSpace flag may not always be set even if it should be,
+      // e.g. for a comment behind an angle-addr.
+      // Also, we need to restore the needsSpace flag if we ignore the comment.
+      preserveSpace = needsSpace;
+      if (!needsSpace)
+        needsSpace = name !== '' && name.substr(-1) !== ' ';
+      comment = needsSpace ? ' (' : '(';
+      commentClosed = false;
+    } else if (token === ')') {
+      inComment = false;
+      comment += ')';
+      lastComment = comment;
+      // The comment may be part of the name, but not of the local-part.
+      // Enforce a space behind the comment only when not ignoring it.
+      if (inAngle) {
+        needsSpace = preserveSpace;
+      } else {
+        name += comment;
+        needsSpace = true;
+      }
+      commentClosed = true;
+      continue;
     } else if (token === '@') {
       // An @ means we see an email address. If we're not within <> brackets,
       // then we just parsed an email address instead of a display name. Empty
       // out the display name for the current production.
       if (!inAngle) {
-        address = name;
+        address = localPart;
         name = '';
+        localPart = '';
+        // The remainder of this mailbox is part of an addr-spec.
+        inAngle = true;
       }
       // Keep the local-part quoted if it needs to be.
       if (/[ !()<>\[\]:;@\\,"]/.exec(address) !== null)
         address = '"' + address.replace(/([\\"])/g, "\\$1") + '"';
       address += '@';
     } else if (token === ',') {
       // A comma ends the current name. If we have something that's kind of a
       // name, add it to the result list. If we don't, then our input looks like
       // To: , , -> don't bother adding an empty entry.
-      if (name !== '' || address !== '')
-        addrlist.push({
-          name: name,
-          email: address
-        });
-      name = address = '';
+      addToAddrList(name, address);
     } else if (token === ';') {
       // Add pending name to the list
-      if (name !== '' || address !== '')
-        addrlist.push({name: name, email: address});
+      addToAddrList(name, address);
 
       // If no group name was found, treat the ';' as a ','. In any case, we
       // need to copy the results of addrlist into either a new group object or
       // the main list.
       if (groupName === '') {
         results = results.concat(addrlist);
       } else {
         results.push({
           name: groupName,
           group: addrlist
         });
       }
       // ... and reset every other variable.
       addrlist = [];
-      groupName = name = address = '';
+      groupName = '';
     } else {
-      // This is either the comment delimiters, a quoted-string, or some span of
+      // This is either comment content, a quoted-string, or some span of
       // dots and atoms.
 
       // Ignore the needs space if we're a "close" delimiter token.
-      if (needsSpace && token !== ')' && token.toString()[0] != '.')
-        token = ' ' + token;
+      let spacedToken = token;
+      if (needsSpace && token.toString()[0] != '.')
+        spacedToken = ' ' + spacedToken;
 
       // Which field do we add this data to?
-      if (inAngle || address !== '')
-        address += token;
-      else
-        name += token;
+      if (inComment) {
+        comment += spacedToken;
+      } else if (inAngle) {
+        address += spacedToken;
+      } else {
+        name += spacedToken;
+        // Never add a space to the local-part, if we just ignored a comment.
+        if (commentClosed) {
+          localPart += token;
+          commentClosed = false;
+        } else {
+          localPart += spacedToken;
+        }
+      }
 
       // We need space for the next token if we aren't some kind of comment or
       // . delimiter.
-      needsSpace = token !== '(' && token !== ' (' && token.toString()[0] != '.';
+      needsSpace = token.toString()[0] != '.';
       // The fall-through case after this resets needsSpace to false, and we
       // don't want that!
       continue;
     }
 
     // If we just parsed a delimiter, we don't need any space for the next
     // token.
     needsSpace = false;
   }
 
   // If we're missing the final ';' of a group, assume it was present. Also, add
   // in the details of any email/address that we previously saw.
-  if (name !== '' || address !== '')
-    addrlist.push({name: name, email: address});
+  addToAddrList(name, address);
   if (groupName !== '') {
     results.push({name: groupName, group: addrlist});
     addrlist = [];
   }
 
   // Add the current address list build-up to the list of addresses, and return
   // the whole array to the caller.
   return results.concat(addrlist);
--- a/mailnews/mime/jsmime/test/test_header.js
+++ b/mailnews/mime/jsmime/test/test_header.js
@@ -324,37 +324,55 @@ suite('headerparser', function () {
       ["a < <a@b.c>", [{name: "a", email: "a@b.c"}]],
       ["Name <incomplete@email", [{name: "Name", email: "incomplete@email"}]],
       ["Name <space here@email.invalid>",
         [{name: 'Name', email: '"space here"@email.invalid'}]],
       ["Name <not an email>", [{name: "Name", email: "not an email"}]],
       ["=?UTF-8?Q?Simple?= <a@b.c>",
         [{name: "=?UTF-8?Q?Simple?=", email: "a@b.c"}]],
       ["No email address", [{name: "No email address", email: ""}]],
+      // Handling of comments and legacy display-names as per RFC 5322 §3.4
+      ["(c1)n(c2) <(c3)a(c4)@(c5)b(c6).(c7)d(c8)> (c9(c10)c11)",
+        [{name: "(c1) n (c2) (c9(c10)c11)", email: "a@b.d"}]],
+      ["<(c3)a(c4)@(c5)b(c6).(c7)d(c8)> (c9(c10)c11)",
+        [{name: "(c9(c10)c11)", email: "a@b.d"}]],
+      ["(c3)a(c4)@(c5)b(c6).(c7)d(c8)(c9(c10)c11)",
+        [{name: "c9(c10)c11", email: "a@b.d"}]],
+      ["(c1)n(c2) <(c3)a(c4)@(c5)b(c6).(c7)d(c8)> (c9(c10)c11)(c12)",
+        [{name: "(c1) n (c2) (c9(c10)c11) (c12)", email: "a@b.d"}]],
+      ["<(c3)a(c4)@(c5)b(c6).(c7)d(c8)> (c9(c10)c11)(c12)",
+        [{name: "(c9(c10)c11) (c12)", email: "a@b.d"}]],
+      ["(c3)a(c4)@(c5)b(c6).(c7)d(c8)(c9(c10)c11)(c12)", [{name: "c12", email: "a@b.d"}]],
     ];
     header_tests.forEach(function (data) {
       arrayTest(data, function () {
         assert.deepEqual(headerparser.parseAddressingHeader(data[0], false),
           data[1]);
       });
     });
   });
   suite('parseAddressingHeader (RFC 2047 support)', function () {
     let header_tests = [
       ["Simple <a@b.c>", [{name: "Simple", email: "a@b.c"}]],
       ["=?UTF-8?Q?Simple?= <a@b.c>", [{name: "Simple", email: "a@b.c"}]],
       ["=?UTF-8?Q?=3C@b.c?= <a@b.c>", [{name: "<@b.c", email: "a@b.c"}]],
 
-      // RFC 2047 token should not interfer with lexical processing
+      // RFC 2047 tokens should not interfere with lexical processing
       ["=?UTF-8?Q?a@b.c,?= <b@b.c>", [{name: "a@b.c,", email: "b@b.c"}]],
       ["=?UTF-8?Q?a@b.c=2C?= <b@b.c>", [{name: "a@b.c,", email: "b@b.c"}]],
       ["=?UTF-8?Q?<?= <a@b.c>", [{name: "<", email: "a@b.c"}]],
       ["Simple =?UTF-8?Q?<?= a@b.c>",
         [{name: "", email: '"Simple < a"@b.c'}]],
       ["Tag <=?UTF-8?Q?email?=@b.c>", [{name: "Tag", email: "email@b.c"}]],
+      // handling of comments and legacy display-names as per RFC 5322 §3.4
+      ["jl1@b.c (=?ISO-8859-1?Q?Joe_L=F6we?=)", [{name: "Joe Löwe", email: "jl1@b.c"}]],
+      ["(=?ISO-8859-1?Q?Joe_L=F6we?=) jl2@b.c", [{name: "Joe Löwe", email: "jl2@b.c"}]],
+      ["(=?ISO-8859-1?Q?Joe_L=F6we?=) jl3@b.c (c2)", [{name: "c2", email: "jl3@b.c"}]],
+      ["=?ISO-8859-1?Q?Joe_L=F6we?= <jl3@b.c> (c2)", [{name: "Joe Löwe (c2)", email: "jl3@b.c"}]],
+      ["(=?ISO-8859-1?Q?Joe_L=F6we?=) <jl3@b.c> (c2)", [{name: "(Joe Löwe) (c2)", email: "jl3@b.c"}]],
     ];
     header_tests.forEach(function (data) {
       arrayTest(data, function () {
         assert.deepEqual(headerparser.parseAddressingHeader(data[0], true),
           data[1]);
       });
     });
   });