deleted file mode 100644
--- a/mailnews/mime/Makefile.in
+++ /dev/null
@@ -1,17 +0,0 @@
-#
-# This Source Code Form is subject to the terms of the Mozilla Public
-# License, v. 2.0. If a copy of the MPL was not distributed with this
-# file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-DEPTH = @DEPTH@
-topsrcdir = @top_srcdir@
-srcdir = @srcdir@
-VPATH = @srcdir@
-
-include $(DEPTH)/config/autoconf.mk
-
-libs:: jsmime/mimeParser.jsm
- $(call install_cmd,$(IFLAGS1) $^ $(FINAL_TARGET)/modules)
-
-include $(topsrcdir)/config/rules.mk
-
new file mode 100644
--- /dev/null
+++ b/mailnews/mime/jsmime/LICENSE
@@ -0,0 +1,19 @@
+Copyright (c) 2013 Joshua Cranmer
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
rename from mailnews/mime/jsmime/README
rename to mailnews/mime/jsmime/README.md
--- a/mailnews/mime/jsmime/README
+++ b/mailnews/mime/jsmime/README.md
@@ -1,89 +1,59 @@
-===============
-= Code Layout =
-===============
+Code Layout
+===========
-This directory consists of a MIME parser which is primarily implemented in JS
-and is designed to use either HTML 5 specifications or texts that are intendend
-to become HTML 5 specifications.
+JSMime is a MIME parsing and composition library that is written completely in
+JavaScript using ES6 functionality and WebAPIs (where such APIs exist). There
+are a few features for which a standardized WebAPI does not exist; for these,
+external JavaScript libraries are used.
The MIME parser consists of three logical phases of translation:
+
1. Build the MIME (and pseudo-MIME) tree.
-2. Convert the MIME tree into a body-and-attachments view.
-3. Use the result to drive the message view.
+2. Convert the MIME tree into a list of body parts and attachments.
+3. Use the result to drive a displayed version of the message.
-The first stage is located in mimeParserCore.js. The later stages are not yet
-implemented in JS. This file should not be included directly by consumers, who
-should instead use mimeParser.jsm, which contains easier-to-use APIs and also
-necessary glue components for use in JS modules or component contexts.
+The first stage is located in `mimeparser.js`. The latter stages have yet to be
+implemented.
-=============================
-= Underlying specifications =
-=============================
+Dependencies
+============
-The specification of MIME is complicated and is spread around a very large
-number of references. For a single guide to everywhere, what follows is a list
-of all references used in the course of this parser, structured roughly
-according to how they are used:
-[NOTE: specifications which are marked with an X instead are not integrated
- in this version of the code, but will be supported in a later version]
-Basic format of bodies:
-* RFC 2045 -- MIME Part 1, Format of Internet Message Bodies
-* RFC 2046 -- MIME Part 2, Media Types
+This code depends on the following ES6 features and Web APIs:
+* ES6 generators
+* ES6 Map and Set
+* ES6 @@iterator support (especially for Map and Set)
+* ES6 let
+* ES6 let-destructuring
+* ES6 const
+* Typed arrays (predominantly Uint8Array)
+* btoa, atob (found on global Windows or WorkerScopes)
+* TextDecoder
-Structured header interpretation:
-X RFC 2047 -- MIME Part 3, Message Header Extensions for Non-ASCII Text
-X RFC 2231 -- MIME Parameter Value and Encoded Word Extensions
-* RFC 5322 -- Internet Message Format (see also RFC 2822, RFC 822)
-* RFC 5536 -- Netnews Article Format (see also RFC 1036)
-X RFC 6532 -- Internationalized Email Headers (see also RFC 5335)
-
-Body decoding:
-X <http://pubs.opengroup.org/onlinepubs/7908799/xcu/uuencode.html> -- Uuencode
-X <http://www.yenc.org/yenc-draft.1.3.txt> -- yEnc
-X RFC 1741 -- BinHex
-X <http://msdn.microsoft.com/en-us/library/cc425498%28v=exchg.80%29.aspx> --
- TNEF
-X RFC 3165 -- MIME security with PGP
-X RFC 4880 -- OpenPGP (see also RFC 2440)
-X RFC 5751 -- S/MIME (see also RFC 3851, RFC 2633)
+Versions and API stability
+==========================
-Other:
-X RFC 2387 -- multipart/related
-X RFC 2392 -- Content-ID and Message-ID URLs
-X RFC 2557 -- MIME-encapsulated aggegrate documents
-* RFC 3501 -- IMAPv4rev1 [partial basis for part numbering]
-X RFC 3676 -- text/plain format (format=flowed) (see also RFC 2646)
-X RFC 3798 -- Message delivery notification (see also RFC 5337 and RFC 6533)
-
-While the above is a fairly comprehensive list of specifications, it turns
-out that a somewhat different structure can be found in practice. Following
-the general principle of "Be liberal in what you accept and conservative in
-what you send," this parser will attempt to make some sense of anything
-passed into it. Some pathologically bad cases that the specification gives no
-guidance to (such as having nested multipart/* bodies with the same boundary
-permitted) are likely to provide inconsistent results with different parsers.
+As APIs require some use and experimentation to get a feel for what works best,
+the APIs may change between successive version updates as uses indicate
+substandard or error-prone APIs. Therefore, there will be no guarantee of API
+stability until version 1.0 is released.
-However, what follows is a list of modifications to the above specifications
-that are necessary to account for messages which have been observed to cause
-issues in the real world:
-* All three line conventions are treated as a CRLF (\r, \n, \r\n). In this
- parser, it is possible to use a mixture of line endings in the same file,
- although this is highly unlikely to come up in practice.
-* The input text need not be either ASCII or UTF-8 (permissible under the
- newer EAI specifications). Some tools that are insufficiently aware of i18n
- issues with respect to MIME may end up emitting non-ASCII (or non-UTF-8)
- data. In this parser, all header data is passed through as-is. Header names
- are canonicalized to lowercase using .toLowerCase(), which causes case
- conversion for non-ASCII charsets as well. However, even under EAI, header
- names are specified to be pure ASCII so this should not be an issue in
- practice. The body is left alone unless a charset is specified and recoding
- is explicitly requested.
-* CFWS is permitted in fewer places than the specifications require. This was
- done to match other parsers (including the one this replaced, among others).
- In particular, the Content-Type parameter needs to be a single run of text, so
- "multipart / mixed" would be treated as an invalid type.
-* If the first line of a headers block starts with the Berkely mailbox delimiter
- (From followed by a space), it is ignored.
-* A message/rfc822-like part may be encoded in quoted-printable or base64, while
- RFC 6532 only permits this for message/global.
-* XXX: RFC 2047 encoded words may contain embedded spaces.
+This code is being initially developed as an effort to replace the MIME library
+within Thunderbird. New versions will be released as needed to bring new support
+into the Thunderbird codebase; version 1.0 will correspond to the version where
+feature-parity with the old MIME library is reached. The set of features which
+will be added before 1.0 are the following:
+* S/MIME encryption and decryption
+* PGP encryption and decryption
+* IMAP parts-on-demand support
+* Support for text/plain to HTML conversion for display
+* Support for HTML downgrading and sanitization for display
+* Support for all major multipart types
+* Ability to convert HTML documents to text/plain and multipart/related
+* Support for building outgoing messages
+* Support for IDN and EAI
+* yEnc and uuencode decoding support
+* Support for date and Message-ID/References-like headers
+
+Other features than these may be added before version 1.0 is released (most
+notably TNEF decoding support), but they are not considered necessary to release
+a version 1.0.
rename from mailnews/mime/jsmime/mimeParserCore.js
rename to mailnews/mime/jsmime/jsmime.js
--- a/mailnews/mime/jsmime/mimeParserCore.js
+++ b/mailnews/mime/jsmime/jsmime.js
@@ -1,36 +1,1202 @@
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this file,
- * You can obtain one at http://mozilla.org/MPL/2.0/. */
+(function (root, fn) {
+ if (typeof define === 'function' && define.amd) {
+ define(fn);
+ } else if (typeof module !== 'undefined' && module.exports) {
+ module.exports = fn();
+ } else {
+ root.jsmime = fn();
+ }
+}(this, function() {
+ var mods = {};
+ function req(id) {
+ return mods[id.replace(/^\.\//, '')];
+ }
+
+ function def(id, fn) {
+ mods[id] = fn(req);
+ }
+def('mimeutils', function() {
+"use strict";
+
+/**
+ * Decode a quoted-printable buffer into a binary string.
+ *
+ * @param buffer {BinaryString} The string to decode.
+ * @param more {Boolean} This argument is ignored.
+ * @returns {Array(BinaryString, BinaryString)} The first element of the array
+ * is the decoded string. The second element is always the empty
+ * string.
+ */
+function decode_qp(buffer, more) {
+ // Unlike base64, quoted-printable isn't stateful across multiple lines, so
+ // there is no need to buffer input, so we can always ignore more.
+ let decoded = buffer.replace(
+ // Replace either =<hex><hex> or =<wsp>CRLF
+ /=([0-9A-F][0-9A-F]|[ \t]*(\r\n|[\r\n]|$))/gi,
+ function replace_chars(match, param) {
+ // If trailing text matches [ \t]*CRLF, drop everything, since it's a
+ // soft line break.
+ if (param.trim().length == 0)
+ return '';
+ return String.fromCharCode(parseInt(param, 16));
+ });
+ return [decoded, ''];
+}
+
+/**
+ * Decode a base64 buffer into a binary string. Unlike window.atob, the buffer
+ * may contain non-base64 characters that will be ignored.
+ *
+ * @param buffer {BinaryString} The string to decode.
+ * @param more {Boolean} If true, we expect that this function could be
+ * called again and should retain extra data. If
+ * false, we should flush all pending output.
+ * @returns {Array(BinaryString, BinaryString)} The first element of the array
+ * is the decoded string. The second element contains the data that
+ * could not be decoded and needs to be retained for the next call.
+ */
+function decode_base64(buffer, more) {
+ // Drop all non-base64 characters
+ let sanitize = buffer.replace(/[^A-Za-z0-9+\/=]/g,'');
+ // We need to encode in groups of 4 chars. If we don't have enough, leave the
+ // excess for later. If there aren't any more, drop enough to make it 4.
+ let excess = sanitize.length % 4;
+ if (excess != 0 && more)
+ buffer = sanitize.slice(-excess);
+ else
+ buffer = '';
+ sanitize = sanitize.substring(0, sanitize.length - excess);
+ // Use the atob function we (ought to) have in global scope.
+ return [atob(sanitize), buffer];
+}
+
+/**
+ * Converts a binary string into a Uint8Array buffer.
+ *
+ * @param buffer {BinaryString} The string to convert.
+ * @returns {Uint8Array} The converted data.
+ */
+function stringToTypedArray(buffer) {
+ var typedarray = new Uint8Array(buffer.length);
+ for (var i = 0; i < buffer.length; i++)
+ typedarray[i] = buffer.charCodeAt(i);
+ return typedarray;
+}
+
+/**
+ * Converts a Uint8Array buffer to a binary string.
+ *
+ * @param buffer {BinaryString} The string to convert.
+ * @returns {Uint8Array} The converted data.
+ */
+function typedArrayToString(buffer) {
+ var string = '';
+ for (var i = 0; i < buffer.length; i+= 100)
+ string += String.fromCharCode.apply(undefined, buffer.subarray(i, i + 100));
+ return string;
+}
+
+return {
+ decode_base64: decode_base64,
+ decode_qp: decode_qp,
+ stringToTypedArray: stringToTypedArray,
+ typedArrayToString: typedArrayToString,
+};
+});
+/**
+ * This file implements knowledge of how to encode or decode structured headers
+ * for several key headers. It is not meant to be used externally to jsmime.
+ */
+
+def('structuredHeaders', function (require) {
+"use strict";
+
+var structuredDecoders = new Map();
+var structuredEncoders = new Map();
+var preferredSpellings = new Map();
+
+function addHeader(name, decoder, encoder) {
+ var lowerName = name.toLowerCase();
+ structuredDecoders.set(lowerName, decoder);
+ structuredEncoders.set(lowerName, encoder);
+ preferredSpellings.set(lowerName, name);
+}
+
+
+// Addressing headers: We assume that they can be specified in 1* form (this is
+// false for From, but it's close enough to the truth that it shouldn't matter).
+// There is no need to specialize the results for the header, so just pun it
+// back to parseAddressingHeader.
+function parseAddress(value) {
+ let results = [];
+ let headerparser = this;
+ return value.reduce(function (results, header) {
+ return results.concat(headerparser.parseAddressingHeader(header, true));
+ }, []);
+}
+function writeAddress(value) {
+ // Make sure the input is an array (accept a single entry)
+ if (!Array.isArray(value))
+ value = [value];
+ this.addAddresses(value);
+}
+
+// Addressing headers from RFC 5322:
+addHeader("Bcc", parseAddress, writeAddress);
+addHeader("Cc", parseAddress, writeAddress);
+addHeader("From", parseAddress, writeAddress);
+addHeader("Reply-To", parseAddress, writeAddress);
+addHeader("Resent-Bcc", parseAddress, writeAddress);
+addHeader("Resent-Cc", parseAddress, writeAddress);
+addHeader("Resent-From", parseAddress, writeAddress);
+addHeader("Resent-Sender", parseAddress, writeAddress);
+addHeader("Resent-To", parseAddress, writeAddress);
+addHeader("Sender", parseAddress, writeAddress);
+addHeader("To", parseAddress, writeAddress);
+// From RFC 5536:
+addHeader("Approved", parseAddress, writeAddress);
+// From RFC 3798:
+addHeader("Disposition-Notification-To", parseAddress, writeAddress);
+// Non-standard headers:
+addHeader("Delivered-To", parseAddress, writeAddress);
+addHeader("Return-Receipt-To", parseAddress, writeAddress);
+
+
+// Parameter-based headers. Note that all parameters are slightly different, so
+// we use slightly different variants here.
+function parseParameterHeader(value, do2231, do2047) {
+ // Only use the first header for parameters; ignore subsequent redefinitions.
+ return this.parseParameterHeader(value[0], do2231, do2047);
+}
+
+// RFC 2045
+function parseContentType(value) {
+ let params = parseParameterHeader.call(this, value, false, false);
+ let origtype = params.preSemi;
+ let parts = origtype.split('/');
+ if (parts.length != 2) {
+ // Malformed. Return to text/plain. Evil, ain't it?
+ params = new Map();
+ parts = ["text", "plain"];
+ }
+ let mediatype = parts[0].toLowerCase();
+ let subtype = parts[1].toLowerCase();
+ let type = mediatype + '/' + subtype;
+ let structure = new Map();
+ structure.mediatype = mediatype;
+ structure.subtype = subtype;
+ structure.type = type;
+ params.forEach(function (value, name) {
+ structure.set(name.toLowerCase(), value);
+ });
+ return structure;
+}
+structuredDecoders.set("Content-Type", parseContentType);
+
+// Unstructured headers (just decode RFC 2047 for the first header value)
+function parseUnstructured(values) {
+ return this.decodeRFC2047Words(values[0]);
+}
+function writeUnstructured(value) {
+ this.addUnstructured(value);
+}
+
+// RFC 5322
+addHeader("Comments", parseUnstructured, writeUnstructured);
+addHeader("Keywords", parseUnstructured, writeUnstructured);
+addHeader("Subject", parseUnstructured, writeUnstructured);
+
+// RFC 2045
+addHeader("Content-Description", parseUnstructured, writeUnstructured);
+
+
+
+// Miscellaneous headers (those that don't fall under the above schemes):
+
+// RFC 2047
+structuredDecoders.set("Content-Transfer-Encoding", function (values) {
+ return values[0].toLowerCase();
+});
+structuredEncoders.set("Content-Transfer-Encoding", writeUnstructured);
+
+return Object.freeze({
+ decoders: structuredDecoders,
+ encoders: structuredEncoders,
+ spellings: preferredSpellings,
+});
+
+});
+def('headerparser', function(require) {
+/**
+ * This file implements the structured decoding of message header fields. It is
+ * part of the same system as found in mimemimeutils.js, and occasionally makes
+ * references to globals defined in that file or other dependencies thereof. See
+ * documentation in that file for more information about external dependencies.
+ */
+
+"use strict";
+var mimeutils = require('./mimeutils');
+
+/**
+ * This is the API that we ultimately return.
+ *
+ * We define it as a global here, because we need to pass it as a |this|
+ * argument to a few functions.
+ */
+var headerparser = {};
+
+/**
+ * Tokenizes a message header into a stream of tokens as a generator.
+ *
+ * The low-level tokens are meant to be loosely correspond to the tokens as
+ * defined in RFC 5322. For reasons of saner error handling, however, the two
+ * definitions are not exactly equivalent. The tokens we emit are the following:
+ * 1. Special delimiters: Any char in the delimiters string is emitted as a
+ * string by itself. Parsing parameter headers, for example, would use ";="
+ * for the delimiter string.
+ * 2. Quoted-strings (if opt.qstring is true): A string which is surrounded by
+ * double quotes. Escapes in the string are omitted when returning.
+ * 3. Domain Literals (if opt.dliteral is true): A string which matches the
+ * dliteral construct in RFC 5322. Escapes here are NOT omitted.
+ * 4. Comments (if opt.comments is true): Comments are handled specially. In
+ * practice, decoding the comments in To headers appears to be necessary, so
+ * comments are not stripped in the output value. Instead, they are emitted
+ * as if they are a special delimiter. However, all delimiters found within a
+ * comment are returned as if they were a quoted string, so that consumers
+ * ignore delimiters within comments. If ignoring comment text completely is
+ * desired, upon seeing a "(" token, consumers should ignore all tokens until
+ * a matching ")" is found (note that comments can be nested).
+ * 5. RFC 2047 encoded-words (if opts.rfc2047 is true): These are strings which
+ * are the decoded contents of RFC 2047's =?UTF-8?Q?blah?=-style words.
+ * 6. Atoms: Atoms are defined not in the RFC 5322 sense, but rather as the
+ * longest sequence of characters that is neither whitespace nor any of the
+ * special characters above.
+ *
+ * The intended interpretation of the stream of output tokens is that they are
+ * the portions of text which can be safely wrapped in whitespace with no ill
+ * effect. The output tokens are either strings (which represent individual
+ * delimiter tokens) or instances of a class that has a customized .toString()
+ * for output (for quoted strings, atoms, domain literals, and encoded-words).
+ * Checking for a delimiter MUST use the strictly equals operator (===). For
+ * example, the proper way to call this method is as follows:
+ *
+ * for (let token of getHeaderTokens(rest, ";=", opts)) {
+ * if (token === ';') {
+ * // This represents a literal ';' in the string
+ * } else if (token === '=') {
+ * // This represents a literal '=' in the string
+ * } else {
+ * // If a ";" qstring was parsed, we fall through to here!
+ * token = token.toString();
+ * }
+ * }
+ *
+ * This method does not properly tokenize 5322 in all corner cases; however,
+ * this is equivalent in those corner cases to an older header parsing
+ * algorithm, so the algorithm should be correct for all real-world cases. The
+ * corner cases are as follows:
+ * 1. Quoted-strings and domain literals are parsed even if they are within a
+ * comment block (we effectively treat ctext as containing qstring).
+ * 2. WSP need not be between a qstring and an atom (a"b" produces two tokens,
+ * a and b). This is an error case, though.
+ *
+ * @param {String} value The header value, post charset conversion but
+ * before RFC 2047 decoding, to be parsed.
+ * @param {String} delimiters A set of delimiters to include as individual
+ * tokens.
+ * @param {Object} opts A set of options selecting what to parse.
+ * @param {Boolean} [opts.qstring] If true, recognize quoted strings.
+ * @param {Boolean} [opts.dliteral] If true, recognize domain literals.
+ * @param {Boolean} [opts.comments] If true, recognize comments.
+ * @param {Boolean} [opts.rfc2047] If true, parse and decode RFC 2047
+ * encoded-words.
+ * @returns {(Token|String)*} A sequence of Token objects (which have a
+ * toString method returning their value) or String
+ * objects (representing delimiters).
+ */
+function* getHeaderTokens(value, delimiters, opts) {
+ /// Represents a non-delimiter token
+ function Token(token) {
+ // Unescape all quoted pairs. Any trailing \ is deleted.
+ this.token = token.replace(/\\(.?)/g, "$1");
+ }
+ Token.prototype.toString = function () { return this.token; };
+
+ // The start of the current token (e.g., atoms, strings)
+ let tokenStart = undefined;
+ // The set of whitespace characters, as defined by RFC 5322
+ let wsp = " \t\r\n";
+ // If we are a domain literal ([]) or a quoted string ("), this is set to the
+ // character to look for at the end.
+ let endQuote = undefined;
+ // The current depth of comments, since they can be nested. A value 0 means we
+ // are not in a comment.
+ let commentDepth = 0;
+
+ // Iterate over every character one character at a time.
+ let length = value.length;
+ for (let i = 0; i < length; i++) {
+ let ch = value[i];
+ // If we see a \, no matter what context we are in, ignore the next
+ // character.
+ if (ch == '\\') {
+ i++;
+ continue;
+ }
+
+ // If we are in a qstring or a dliteral, process the character only if it is
+ // what we are looking for to end the quote.
+ if (endQuote !== undefined) {
+ if (ch == endQuote && ch == '"') {
+ // Quoted strings don't include their delimiters.
+ let text = value.slice(tokenStart + 1, i);
+
+ // If RFC 2047 is enabled, decode the qstring only if the entire string
+ // appears to be a 2047 token. Don't unquote just yet (this will better
+ // match people who incorrectly treat RFC 2047 decoding as a separate,
+ // earlier step).
+ if (opts.rfc2047 && text.startsWith("=?") && text.endsWith("?="))
+ text = decodeRFC2047Words(text);
+
+ yield new Token(text);
+ endQuote = undefined;
+ tokenStart = undefined;
+ } else if (ch == endQuote && ch == ']') {
+ // Domain literals include their delimiters.
+ yield new Token(value.slice(tokenStart, i + 1));
+ endQuote = undefined;
+ tokenStart = undefined;
+ }
+ // Avoid any further processing.
+ continue;
+ }
+
+ // If we can match the RFC 2047 encoded-word pattern, we need to decode the
+ // entire word or set of words.
+ if (opts.rfc2047 && ch == '=' && i + 1 < value.length && value[i + 1] == '?') {
+ // RFC 2047 tokens separated only by whitespace are conceptually part of
+ // the same output token, so we need to decode them all at once.
+ let encodedWordsRE = /([ \t\r\n]*=\?[^?]*\?[BbQq]\?[^?]*\?=)+/;
+ let result = encodedWordsRE.exec(value.slice(i));
+ if (result !== null) {
+ // If we were in the middle of a prior token (i.e., something like
+ // foobar=?UTF-8?Q?blah?=), yield the previous segment as a token.
+ if (tokenStart !== undefined) {
+ yield new Token(value.slice(tokenStart, i));
+ tokenStart = undefined;
+ }
+
+ // Find out how much we need to decode...
+ let encWordsLen = result[0].length;
+ let string = decodeRFC2047Words(value.slice(i, i + encWordsLen),
+ "UTF-8");
+ // Don't make a new Token variable, since we do not want to unescape the
+ // decoded string.
+ yield { toString: function() { return string; }};
+
+ // Skip everything we decoded. The -1 is because we don't want to
+ // include the starting character.
+ i += encWordsLen - 1;
+ continue;
+ }
+
+ // If we are here, then we failed to match the simple 2047 encoded-word
+ // regular expression, despite the fact that it matched the =? at the
+ // beginning. Fall through and treat the text as if we aren't trying to
+ // decode RFC 2047.
+ }
+
+ // If we reach this point, we're not inside of quoted strings, domain
+ // literals, or RFC 2047 encoded-words. This means that the characters we
+ // parse are potential delimiters (unless we're in comments, where
+ // everything starts to go really wonky). Several things could happen,
+ // depending on the kind of character we read and whether or not we were in
+ // the middle of a token. The three values here tell us what we could need
+ // to do at this point:
+ // tokenIsEnding: The current character is not able to be accumulated to an
+ // atom, so we need to flush the atom if there is one.
+ // tokenIsStarting: The current character could begin an atom (or
+ // anything that requires us to mark the starting point), so we need to save
+ // the location.
+ // isSpecial: The current character is a delimiter that needs to be output.
+ let tokenIsEnding = false, tokenIsStarting = false, isSpecial = false;
+ if (wsp.contains(ch)) {
+ // Whitespace ends current tokens, doesn't emit anything.
+ tokenIsEnding = true;
+ } else if (commentDepth == 0 && delimiters.contains(ch)) {
+ // Delimiters end the current token, and need to be output. They do not
+ // apply within comments.
+ tokenIsEnding = true;
+ isSpecial = true;
+ } else if (opts.qstring && ch == '"') {
+ // Quoted strings end the last token and start a new one.
+ tokenIsEnding = true;
+ tokenIsStarting = true;
+ endQuote = ch;
+ } else if (opts.dliteral && ch == '[') {
+ // Domain literals end the last token and start a new one.
+ tokenIsEnding = true;
+ tokenIsStarting = true;
+ endQuote = ']';
+ } else if (opts.comments && ch == '(') {
+ // Comments are nested (oh joy). They also end the prior token, and need
+ // to be output if the consumer requests it.
+ commentDepth++;
+ tokenIsEnding = true;
+ isSpecial = true;
+ } else if (opts.comments && ch == ')') {
+ // Comments are nested (oh joy). They also end the prior token, and need
+ // to be output if the consumer requests it.
+ if (commentDepth > 0)
+ commentDepth--;
+ tokenIsEnding = true;
+ isSpecial = true;
+ } else {
+ // Not a delimiter, whitespace, comment, domain literal, or quoted string.
+ // Must be part of an atom then!
+ tokenIsStarting = true;
+ }
+
+ // If our analysis concluded that we closed an open token, and there is an
+ // open token, then yield that token.
+ if (tokenIsEnding && tokenStart !== undefined) {
+ yield new Token(value.slice(tokenStart, i));
+ tokenStart = undefined;
+ }
+ // If we need to output a delimiter, do so.
+ if (isSpecial)
+ yield ch;
+ // If our analysis concluded that we could open a token, and no token is
+ // opened yet, then start the token.
+ if (tokenIsStarting && tokenStart === undefined) {
+ tokenStart = i;
+ }
+ }
+
+ // That concludes the loop! If there is a currently open token, close that
+ // token now.
+ if (tokenStart !== undefined) {
+ // Error case: a partially-open quoted string is assumed to have a trailing
+ // " character.
+ if (endQuote == '"')
+ yield new Token(value.slice(tokenStart + 1));
+ else
+ yield new Token(value.slice(tokenStart));
+ }
+}
+
+/**
+ * Convert a header value into UTF-16 strings by attempting to decode as UTF-8
+ * or another legacy charset. If the header is valid UTF-8, it will be decoded
+ * as UTF-8; if it is not, the fallbackCharset will be attempted instead.
+ *
+ * @param {String} headerValue The header (as a binary string) to attempt
+ * to convert to UTF-16.
+ * @param {String} [fallbackCharset] The optional charset to try if UTF-8
+ * doesn't work.
+ * @returns {String} The UTF-16 representation of the string above.
+ */
+function convert8BitHeader(headerValue, fallbackCharset) {
+ // Only attempt to convert the headerValue if it contains non-ASCII
+ // characters.
+ if (/[\x80-\xff]/.exec(headerValue)) {
+ // First convert the value to a typed-array for TextDecoder.
+ let typedarray = mimeutils.stringToTypedArray(headerValue);
+
+ // Don't try UTF-8 as fallback (redundant), and don't try UTF-16 or UTF-32
+ // either, since they radically change header interpretation.
+ // If we have a fallback charset, we want to know if decoding will fail;
+ // otherwise, we want to replace with substitution chars.
+ let hasFallback = fallbackCharset &&
+ !fallbackCharset.toLowerCase().startsWith("utf");
+ let utf8Decoder = new TextDecoder("utf-8", {fatal: hasFallback});
+ try {
+ headerValue = utf8Decoder.decode(typedarray);
+ } catch (e) {
+ // Failed, try the fallback
+ let decoder = new TextDecoder(fallbackCharset, {fatal: false});
+ headerValue = decoder.decode(typedarray);
+ }
+ }
+ return headerValue;
+}
+
+/**
+ * Decodes all RFC 2047 encoded-words in the input string. The string does not
+ * necessarily have to contain any such words. This is useful, for example, for
+ * parsing unstructured headers.
+ *
+ * @param {String} headerValue The header which may contain RFC 2047 encoded-
+ * words.
+ * @returns {String} A full UTF-16 string with all encoded words expanded.
+ */
+function decodeRFC2047Words(headerValue) {
+ // Unfortunately, many implementations of RFC 2047 encoding are actually wrong
+ // in that they split over-long encoded words without regard for whether or
+ // not the split point is in the middle of a multibyte character. Therefore,
+ // we need to be able to handle these situations gracefully. This is done by
+ // using the decoder in streaming mode so long as the next token is another
+ // 2047 token with the same charset.
+ let lastCharset = '', currentDecoder = undefined;
+
+ /**
+ * Decode a single RFC 2047 token. This function is inline so that we can
+ * easily close over the lastCharset/currentDecoder variables, needed for
+ * handling bad RFC 2047 productions properly.
+ */
+ function decode2047Token(token) {
+ let tokenParts = token.split("?");
+
+ // If it's obviously not a valid token, return false immediately.
+ if (tokenParts.length != 5 || tokenParts[4] != '=')
+ return false;
+
+ // The charset parameter is defined in RFC 2231 to be charset or
+ // charset*language. We only care about the charset here, so ignore any
+ // language parameter that gets passed in.
+ let charset = tokenParts[1].split('*', 1)[0];
+ let encoding = tokenParts[2], text = tokenParts[3];
+
+ let buffer;
+ if (encoding == 'B' || encoding == 'b') {
+ // Decode base64. If there's any non-base64 data, treat the string as
+ // an illegal token.
+ if (/[^A-Za-z0-9+\/=]/.exec(text))
+ return false;
+
+ // Base64 strings must be a length of multiple 4, but it seems that some
+ // mailers accidentally insert one too many `=' chars. Gracefully handle
+ // this case; see bug 227290 for more information.
+ if (text.length % 4 == 1 && text.charAt(text.length - 1) == '=')
+ text = text.slice(0, -1);
+
+ // Decode the string
+ buffer = mimeutils.decode_base64(text, false)[0];
+ } else if (encoding == 'Q' || encoding == 'q') {
+ // Q encoding here looks a lot like quoted-printable text. The differences
+ // between quoted-printable and this are that quoted-printable allows you
+ // to quote newlines (this doesn't), while this replaces spaces with _.
+ // We can reuse the decode_qp code here, since newlines are already
+ // stripped from the header. There is one edge case that could trigger a
+ // false positive, namely when you have a single = or an = followed by
+ // whitespace at the end of the string. Such an input string is already
+ // malformed to begin with, so stripping the = and following input in that
+ // case should not be an important loss.
+ buffer = mimeutils.decode_qp(text.replace('_', ' ', 'g'), false)[0];
+ } else {
+ return false;
+ }
+
+ // Make the buffer be a typed array for what follows
+ buffer = mimeutils.stringToTypedArray(buffer);
+
+ // If we cannot reuse the last decoder, flush out whatever remains.
+ var output = '';
+ if (charset != lastCharset && currentDecoder) {
+ output += currentDecoder.decode();
+ currentDecoder = null;
+ }
+
+ // Initialize the decoder for this token.
+ lastCharset = charset;
+ if (!currentDecoder) {
+ try {
+ currentDecoder = new TextDecoder(charset, {fatal: false});
+ } catch (e) {
+ // We don't recognize the charset, so give up.
+ return false;
+ }
+ }
+
+ // Convert this token with the buffer. Note the stream parameter--although
+ // RFC 2047 tokens aren't supposed to break in the middle of a multibyte
+ // character, a lot of software messes up and does so because it's hard not
+ // to (see headeremitter.js for exactly how hard!).
+ return output + currentDecoder.decode(buffer, {stream: true});
+ }
+
+ // The first step of decoding is to split the string into RFC 2047 and
+ // non-RFC 2047 tokens. RFC 2047 tokens look like the following:
+ // =?charset?c?text?=, where c is one of B, b, Q, and q. The split regex does
+ // some amount of semantic checking, so that malformed RFC 2047 tokens will
+ // get ignored earlier.
+ let components = headerValue.split(/(=\?[^?]*\?[BQbq]\?[^?]*\?=)/);
+ for (let i = 0; i < components.length; i++) {
+ if (components[i].substring(0, 2) == "=?") {
+ let decoded = decode2047Token(components[i]);
+ if (decoded !== false) {
+ // If 2047 decoding succeeded for this bit, rewrite the original value
+ // with the proper decoding.
+ components[i] = decoded;
+
+ // We're done processing, so continue to the next link.
+ continue;
+ }
+ } else if (/^[ \t\r\n]*$/.exec(components[i])) {
+ // Whitespace-only tokens get squashed into nothing, so 2047 tokens will
+ // be concatenated together.
+ components[i] = '';
+ continue;
+ }
+
+ // If there was stuff left over from decoding the last 2047 token, flush it
+ // out.
+ lastCharset = '';
+ if (currentDecoder) {
+ components[i] = currentDecoder.decode() + components[i];
+ currentDecoder = null;
+ }
+ }
+
+ // After the for loop, we'll have a set of decoded strings. Concatenate them
+ // together to make the return value.
+ return components.join('');
+}
+
+///////////////////////////////
+// Structured field decoders //
+///////////////////////////////
+
+/**
+ * Extract a list of addresses from a header which matches the RFC 5322
+ * address-list production, possibly doing RFC 2047 decoding along the way.
+ *
+ * The output of this method is an array of elements corresponding to the
+ * addresses and the groups in the input header. An address is represented by
+ * an object of the form:
+ * {
+ * name: The display name of the address
+ * email: The address of the object
+ * }
+ * while a group is represented by an object of the form:
+ * {
+ * name: The display name of the group
+ * group: An array of address object for members in the group.
+ * }
+ *
+ * @param {String} header The MIME header text to be parsed
+ * @param {Boolean} doRFC2047 If true, decode RFC 2047 parameters found in the
+ * header.
+ * @returns {(Address|Group)[]} An array of the addresses found in the header,
+ * where each element is of the form mentioned
+ * above.
+ */
+function parseAddressingHeader(header, doRFC2047) {
+ // Default to true
+ if (doRFC2047 === undefined)
+ doRFC2047 = true;
+
+ // The final (top-level) results list to append to.
+ let results = [];
+ // Temporary results
+ let addrlist = [];
+
+ // Build up all of the values
+ var name = '', groupName = '', address = '';
+ // Indicators of current state
+ var inAngle = false, needsSpace = false;
+ // Main parsing loop
+ for (let token of getHeaderTokens(header, ":,;<>@",
+ {qstring: true, comments: true, dliteral: true, rfc2047: doRFC2047})) {
+ if (token === ':') {
+ groupName = name;
+ name = '';
+ // If we had prior email address results, commit them to the top-level.
+ if (addrlist.length > 0)
+ results = results.concat(addrlist);
+ addrlist = [];
+ } else if (token === '<') {
+ inAngle = true;
+ } else if (token === '>') {
+ inAngle = false;
+ } else if (token === '@') {
+ // An @ means we see an email address. If we're not within <> brackets,
+ // then we just parsed an email address instead of a display name. Empty
+ // out the display name for the current production.
+ if (!inAngle) {
+ address = name;
+ name = '';
+ }
+ // Keep the local-part quoted if it needs to be.
+ if (/[ !()<>\[\]:;@\\,"]/.exec(address) !== null)
+ address = '"' + address.replace(/([\\"])/g, "\\$1") + '"';
+ address += '@';
+ } else if (token === ',') {
+ // A comma ends the current name. If we have something that's kind of a
+ // name, add it to the result list. If we don't, then our input looks like
+ // To: , , -> don't bother adding an empty entry.
+ if (name !== '' || address !== '')
+ addrlist.push({
+ name: name,
+ email: address
+ });
+ name = address = '';
+ } else if (token === ';') {
+ // Add pending name to the list
+ if (name !== '' || address !== '')
+ addrlist.push({name: name, email: address});
+
+ // If no group name was found, treat the ';' as a ','. In any case, we
+ // need to copy the results of addrlist into either a new group object or
+ // the main list.
+ if (groupName === '') {
+ results = results.concat(addrlist);
+ } else {
+ results.push({
+ name: groupName,
+ group: addrlist
+ });
+ }
+ // ... and reset every other variable.
+ addrlist = [];
+ groupName = name = address = '';
+ } else {
+ // This is either the comment delimiters, a quoted-string, or some span of
+ // dots and atoms.
+
+ // Ignore the needs space if we're a "close" delimiter token.
+ if (needsSpace && token !== ')' && token.toString()[0] != '.')
+ token = ' ' + token;
+
+ // Which field do we add this data to?
+ if (inAngle || address !== '')
+ address += token;
+ else
+ name += token;
+
+ // We need space for the next token if we aren't some kind of comment or
+ // . delimiter.
+ needsSpace = token !== '(' && token !== ' (' && token.toString()[0] != '.';
+ // The fall-through case after this resets needsSpace to false, and we
+ // don't want that!
+ continue;
+ }
+
+ // If we just parsed a delimiter, we don't need any space for the next
+ // token.
+ needsSpace = false;
+ }
+
+ // If we're missing the final ';' of a group, assume it was present. Also, add
+ // in the details of any email/address that we previously saw.
+ if (name !== '' || address !== '')
+ addrlist.push({name: name, email: address});
+ if (groupName !== '') {
+ results.push({name: groupName, group: addrlist});
+ addrlist = [];
+ }
+
+ // Add the current address list build-up to the list of addresses, and return
+ // the whole array to the caller.
+ return results.concat(addrlist);
+}
+
+/**
+ * Extract parameters from a header which is a series of ;-separated
+ * attribute=value tokens.
+ *
+ * @param {String} headerValue The MIME header value to parse.
+ * @param {Boolean} doRFC2047 If true, decode RFC 2047 encoded-words.
+ * @param {Boolean} doRFC2231 If true, decode RFC 2231 encoded parameters.
+ * @return {Map(String -> String)} A map of parameter names to parameter values.
+ * The property preSemi is set to the token that
+ * precedes the first semicolon.
+ */
+function parseParameterHeader(headerValue, doRFC2047, doRFC2231) {
+ // The basic syntax of headerValue is token [; token = token-or-qstring]*
+ // Copying more or less liberally from nsMIMEHeaderParamImpl:
+ // The first token is the text to the first whitespace or semicolon.
+ var semi = headerValue.indexOf(";");
+ if (semi < 0) {
+ var start = headerValue;
+ var rest = '';
+ } else {
+ var start = headerValue.substring(0, semi);
+ var rest = headerValue.substring(semi); // Include the semicolon
+ }
+ // Strip start to be <WSP><nowsp><WSP>.
+ start = start.trim().split(/[ \t\r\n]/)[0];
+
+ // Decode the the parameter tokens.
+ let opts = {qstring: true, rfc2047: doRFC2047};
+ // Name is the name of the parameter, inName is true iff we don't have a name
+ // yet.
+ let name = '', inName = true;
+ // Matches is a list of [name, value] pairs, where we found something that
+ // looks like name=value in the input string.
+ let matches = [];
+ for (let token of getHeaderTokens(rest, ";=", opts)) {
+ if (token === ';') {
+ // If we didn't find a name yet (we have ... tokenA; tokenB), push the
+ // name with an empty token instead.
+ if (name != '' && inName == false)
+ matches.push([name, '']);
+ name = '';
+ inName = true;
+ } else if (token === '=') {
+ inName = false;
+ } else if (inName && name == '') {
+ name = token.toString();
+ } else if (!inName && name != '') {
+ token = token.toString();
+ // RFC 2231 doesn't make it clear if %-encoding is supposed to happen
+ // within a quoted string, but this is very much required in practice. If
+ // it ends with a '*', then the string is an extended-value, which means
+ // that its value may be %-encoded.
+ if (doRFC2231 && name.endsWith('*')) {
+ token = token.replace(/%([0-9A-Fa-f]{2})/g,
+ function percent_deencode(match, hexchars) {
+ return String.fromCharCode(parseInt(hexchars, 16));
+ });
+ }
+ matches.push([name, token]);
+ // Clear the name, so we ignore anything afterwards.
+ name = '';
+ } else if (inName) {
+ // We have ...; tokenA tokenB ... -> ignore both tokens
+ name = ''; // Error recovery, ignore this one
+ }
+ }
+ // If we have a leftover ...; tokenA, push the tokenA
+ if (name != '' && inName == false)
+ matches.push([name, '']);
+
+ // Now matches holds the parameters, so clean up for RFC 2231. There are three
+ // cases: param=val, param*=us-ascii'en-US'blah, and param*n= variants. The
+ // order of preference is to pick the middle, then the last, then the first.
+ // Note that we already unpacked %-encoded values.
+
+ // simpleValues is just a straight parameter -> value map.
+ // charsetValues is the parameter -> value map, although values are stored
+ // before charset decoding happens.
+ // continuationValues maps parameter -> array of values, with extra properties
+ // valid (if we decided we couldn't do anything anymore) and hasCharset (which
+ // records if we need to decode the charset parameter or not).
+ var simpleValues = new Map(), charsetValues = new Map(),
+ continuationValues = new Map();
+ for (let pair of matches) {
+ let name = pair[0];
+ let value = pair[1];
+ // Get first index, not last index, so we match param*0*= like param*0=.
+ let star = name.indexOf('*');
+ if (star == -1) {
+ // This is the case of param=val. Select the first value here, if there
+ // are multiple ones.
+ if (!simpleValues.has(name))
+ simpleValues.set(name, value);
+ } else if (star == name.length - 1) {
+ // This is the case of param*=us-ascii'en-US'blah.
+ name = name.substring(0, star);
+ // Again, select only the first value here.
+ if (!charsetValues.has(name))
+ charsetValues.set(name, value);
+ } else {
+ // This is the case of param*0= or param*0*=.
+ let param = name.substring(0, star);
+ let entry = continuationValues.get(param);
+ // Did we previously find this one to be bungled? Then ignore it.
+ if (continuationValues.has(param) && !entry.valid)
+ continue;
+
+ // If we haven't seen it yet, set up entry already. Note that entries are
+ // not straight string values but rather [valid, hasCharset, param0, ... ]
+ if (!continuationValues.has(param)) {
+ entry = new Array();
+ entry.valid = true;
+ entry.hasCharset = undefined;
+ continuationValues.set(param, entry);
+ }
+
+ // When the string ends in *, we need to charset decoding.
+ // Note that the star is only meaningful for the *0*= case.
+ let lastStar = name[name.length - 1] == '*';
+ let number = name.substring(star + 1, name.length - (lastStar ? 1 : 0));
+ if (number == '0')
+ entry.hasCharset = lastStar;
+
+ // Is the continuation number illegal?
+ else if ((number[0] == '0' && number != '0') ||
+ !(/^[0-9]+$/.test(number))) {
+ entry.valid = false;
+ continue;
+ }
+ // Normalize to an integer
+ number = parseInt(number, 10);
+
+ // Is this a repeat? If so, bail.
+ if (entry[number] !== undefined) {
+ entry.valid = false;
+ continue;
+ }
+
+ // Set the value for this continuation index. JS's magic array setter will
+ // expand the array if necessary.
+ entry[number] = value;
+ }
+ }
+
+ // Build the actual parameter array from the parsed values
+ var values = new Map();
+ // Simple values have lowest priority, so just add everything into the result
+ // now.
+ for (let pair of simpleValues) {
+ values.set(pair[0], pair[1]);
+ }
+
+ if (doRFC2231) {
+ // Continuation values come next
+ for (let pair of continuationValues) {
+ let name = pair[0];
+ let entry = pair[1];
+ // If we never saw a param*0= or param*0*= value, then we can't do any
+ // reasoning about what it looks like, so bail out now.
+ if (entry.hasCharset === undefined) continue;
+
+ // Use as many entries in the array as are valid--if we are missing an
+ // entry, stop there.
+ let valid = true;
+ for (var i = 0; valid && i < entry.length; i++)
+ if (entry[i] === undefined)
+ valid = false;
+
+ // Concatenate as many parameters as are valid. If we need to decode thec
+ // charset, do so now.
+ var value = entry.slice(0, i).join('');
+ if (entry.hasCharset) {
+ try {
+ value = decode2231Value(value);
+ } catch (e) {
+ // Bad charset, don't add anything.
+ continue;
+ }
+ }
+ // Finally, add this to the output array.
+ values.set(name, value);
+ }
+
+ // Highest priority is the charset conversion.
+ for (let pair of charsetValues) {
+ try {
+ values.set(pair[0], decode2231Value(pair[1]));
+ } catch (e) {
+ // Bad charset, don't add anything.
+ }
+ }
+ }
+
+ // Finally, return the values computed above.
+ values.preSemi = start;
+ return values;
+}
+
+/**
+ * Convert a RFC 2231-encoded string parameter into a Unicode version of the
+ * string. This assumes that percent-decoding has already been applied.
+ *
+ * @param {String} value The RFC 2231-encoded string to decode.
+ * @return The Unicode version of the string.
+ */
+function decode2231Value(value) {
+ let quote1 = value.indexOf("'");
+ let quote2 = quote1 >= 0 ? value.indexOf("'", quote1 + 1) : -1;
+
+ let charset = (quote1 >= 0 ? value.substring(0, quote1) : "");
+ // It turns out that the language isn't useful anywhere in our codebase for
+ // the present time, so we will safely ignore it.
+ //var language = (quote2 >= 0 ? value.substring(quote1 + 2, quote2) : "");
+ value = value.substring(Math.max(quote1, quote2) + 1);
+
+ // Convert the value into a typed array for decoding
+ let typedarray = mimeutils.stringToTypedArray(value);
+
+ // Decode the charset. If the charset isn't found, we throw an error. Try to
+ // fallback in that case.
+ return new TextDecoder(charset, {fatal: true})
+ .decode(typedarray, {stream: false});
+}
+
+////////////////////////////////////////
+// Structured header decoding support //
+////////////////////////////////////////
+
+// Load the default structured decoders
+var structuredDecoders = new Map();
+var structuredHeaders = require('./structuredHeaders');
+var preferredSpellings = structuredHeaders.spellings;
+var forbiddenHeaders = new Set();
+for (let pair of structuredHeaders.decoders) {
+ addStructuredDecoder(pair[0], pair[1]);
+ forbiddenHeaders.add(pair[0].toLowerCase());
+}
+
+/**
+ * Use an already-registered structured decoder to parse the value of the header
+ * into a structured representation.
+ *
+ * As this method is designed to be used for the internal MIME Parser to convert
+ * the raw header values to well-structured values, value is intended to be an
+ * array consisting of all occurences of the header in order. However, for ease
+ * of use by other callers, it can also be treated as a string.
+ *
+ * If the decoder for the header is not found, an exception will be thrown.
+ *
+ * A large set of headers have pre-defined structured decoders; these decoders
+ * cannot be overrided with addStructuredDecoder, as doing so could prevent the
+ * MIME or message parsers from working properly. The pre-defined structured
+ * headers break down into five clases of results, plus some ad-hoc
+ * representations. They are:
+ *
+ * Addressing headers (results are the same as parseAddressingHeader):
+ * - Approved
+ * - Bcc
+ * - Cc
+ * - Delivered-To
+ * - Disposition-Notification-To
+ * - From
+ * - Reply-To
+ * - Resent-Bcc
+ * - Resent-Cc
+ * - Resent-From
+ * - Resent-Sender
+ * - Resent-To
+ * - Return-Receipt-To
+ * - Sender
+ * - To
+ *
+ * Date headers (results are the same as parseDateHeader):
+ * - (TODO: Parsing support for these headers is currently unsupported)
+ *
+ * References headers (results are the same as parseReferencesHeader):
+ * - (TODO: Parsing support for these headers is currently unsupported)
+ *
+ * Message-ID headers (results are the first entry of the result of
+ * parseReferencesHeader):
+ * - (TODO: Parsing support for these headers is currently unsupported)
+ *
+ * Unstructured headers (results are merely decoded according to RFC 2047):
+ * - Comments
+ * - Content-Description
+ * - Keywords
+ * - Subject
+ *
+ * The ad-hoc headers and their resulting formats are as follows:
+ * Content-Type: returns a JS Map of parameter names (in lower case) to their
+ * values, along with the following extra properties defined on the map:
+ * - mediatype: the type to the left of '/' (e.g., 'text', 'message')
+ * - subtype: the type to the right of '/' (e.g., 'plain', 'rfc822')
+ * - type: the full typename (e.g., 'text/plain')
+ * RFC 2047 and RFC 2231 decoding is applied where appropriate. The values of
+ * the type, mediatype, and subtype attributes are all normalized to lower-case,
+ * as are the names of all parameters.
+ *
+ * Content-Transfer-Encoding: the first value is converted to lower-case.
+ *
+ * @param {String} header The name of the header of the values.
+ * @param {String|Array} value The value(s) of the headers, after charset
+ * conversion (if any) has been applied. If it is
+ * an array, the headers are listed in the order
+ * they appear in the message.
+ * @returns {Object} A structured representation of the header values.
+ */
+function parseStructuredHeader(header, value) {
+ // Enforce that the parameter is an array. If it's a string, make it a
+ // 1-element array.
+ if (typeof value === "string" || value instanceof String)
+ value = [value];
+ if (!Array.isArray(value))
+ throw new TypeError("Header value is not an array: " + value);
+
+ // Lookup the header in our decoders; if present, use that to decode the
+ // header.
+ let lowerHeader = header.toLowerCase();
+ if (structuredDecoders.has(lowerHeader)) {
+ return structuredDecoders.get(lowerHeader).call(headerparser, value);
+ }
+
+ // If not present, throw an exception.
+ throw new Error("Unknown structured header: " + header);
+}
+
+/**
+ * Add a custom structured MIME decoder to the set of known decoders. These
+ * decoders are used for {@link parseStructuredHeader} and similar functions to
+ * encode richer, more structured values instead of relying on string
+ * representations everywhere.
+ *
+ * Structured decoders are functions which take in a single parameter consisting
+ * of an array of the string values of the header, in order that they appear in
+ * the message. These headers have had the charset conversion (if necessary)
+ * applied to them already. The this parameter of the function is set to be the
+ * jsmime.headerparser module.
+ *
+ * There is a large set of structured decoders built-in to the jsmime library
+ * already. As these headers are fundamental to the workings of jsmime,
+ * attempting to replace them with a custom version will instead produce an
+ * exception.
+ *
+ * @param {String} header The header name (in any case)
+ * for which the decoder will be
+ * used.
+ * @param {Function(String[] -> Object)} decoder The structured decoder
+ * function.
+ */
+function addStructuredDecoder(header, decoder) {
+ let lowerHeader = header.toLowerCase();
+ if (forbiddenHeaders.has(lowerHeader))
+ throw new Error("Cannot override header: " + header);
+ structuredDecoders.set(lowerHeader, decoder);
+ if (!preferredSpellings.has(lowerHeader))
+ preferredSpellings.set(lowerHeader, header);
+}
+
+headerparser.addStructuredDecoder = addStructuredDecoder;
+headerparser.convert8BitHeader = convert8BitHeader;
+headerparser.decodeRFC2047Words = decodeRFC2047Words;
+headerparser.getHeaderTokens = getHeaderTokens;
+headerparser.parseAddressingHeader = parseAddressingHeader;
+headerparser.parseParameterHeader = parseParameterHeader;
+headerparser.parseStructuredHeader = parseStructuredHeader;
+return Object.freeze(headerparser);
+
+});
+
////////////////////////////////////////////////////////////////////////////////
// JavaScript Raw MIME Parser //
////////////////////////////////////////////////////////////////////////////////
/**
* The parser implemented in this file produces a MIME part tree for a given
* input message via a streaming callback interface. It does not, by itself,
* understand concepts like attachments (hence the term 'Raw'); the consumer
* must translate output into such a format.
*
- * Dependencies:
- * This file requires some external code for processing. The following are
- * things that are presumed to exist in the global scope:
- * function atob(str): Converts a JS string to a JS binary string
- *
* Charsets:
* The MIME specifications permit a single message to contain multiple charsets
* (or perhaps none) as raw octets. As JavaScript strings are implicitly
* implemented in UTF-16, it is possible that some engines will attempt to
* convert these strings using an incorrect charset or simply fail to convert
* them at all. This parser assumes that its input is in the form of a "binary
* string", a string that uses only the first 256 characters of Unicode to
- * represent the individual octets. Even if this is the case, the data in the
- * strings are likely to be passed through unchanged no matter their charset
- * unless charset conversion happens on an 8-bit or binary string.
+ * represent the individual octets. To verify that charsets are not getting
+ * mangled elsewhere in the pipeline, the auxiliary test file test/data/charsets
+ * can be used.
+ *
+ * This parser attempts to hide the charset details from clients as much as
+ * possible. The resulting values of structured headers are always converted
+ * into proper Unicode strings before being exposed to clients; getting at the
+ * raw binary string data can only be done via getRawHeader. The .charset
+ * parameter on header objects, if changed, changes the fallback charset used
+ * for headers. It is initialized to the presumed charset of the corresponding
+ * part, taking into account the charset and force-charset options of the
+ * parser. Body parts are only converted into Unicode strings if the strformat
+ * option is set to Unicode. Even then, only the bodies of parts with a media
+ * type of text are converted to Unicode strings using available charset data;
+ * other parts are retained as Uint8Array objects.
*
* Part numbering:
* Since the output is a streaming format, individual parts are identified by a
* numbering scheme. The intent of the numbering scheme for parts is to comply
* with the part numbers as dictated by RFC 3501 as much possible; however,
* that scheme does have several edge cases which would, if strictly followed,
* make it impossible to refer to certain parts of the message. In addition, we
* wish to make it possible to refer to parts which are not discoverable in the
@@ -43,18 +1209,325 @@
* - The numbers of multipart/* parts are separated by `.' characters.
* - The outermost message is referred to by use of the empty string.
* --> The following segments are not accounted for by IMAP part numbering. <--
* - The body of any message/rfc822 or similar part is distinguished from the
* message part as a whole by appending a `$' character. This does not apply
* to the outermost message/rfc822 envelope.
*/
+def('mimeparser', function(require) {
"use strict";
+var mimeutils = require('./mimeutils');
+var headerparser = require('./headerparser');
+var spellings = require('./structuredHeaders').spellings;
+
+/**
+ * An object that represents the structured MIME headers for a message.
+ *
+ * This class is primarily used as the 'headers' parameter in the startPart
+ * callback on handlers for MimeParser. As such, it is designed to do the right
+ * thing in common cases as much as possible, with some advanced customization
+ * possible for clients that need such flexibility.
+ *
+ * In a nutshell, this class stores the raw headers as an internal Map. The
+ * structured headers are not computed until they are actually used, which means
+ * that potentially expensive structuring (e.g., doing manual DKIM validation)
+ * can be performed as a structured decoder without impeding performance for
+ * those who just want a few common headers.
+ *
+ * The outer API of this class is intended to be similar to a read-only Map
+ * object (complete with iterability support), with a few extra properties to
+ * represent things that are hard to determine properly from headers. The keys
+ * used are "preferred spellings" of the headers, although the get and has
+ * methods will accept header parameters of any case. Preferred spellings are
+ * derived from the name passed to addStructuredDecoder/addStructuredEncoder; if
+ * no structured decoder has been registered, then the name capitalizes the
+ * first letter of every word in the header name.
+ *
+ * Extra properties compared to a Map object are:
+ * - charset: This field represents the assumed charset of the associated MIME
+ * body. It is prefilled using a combination of the charset and force-charset
+ * options on the associated MimeParser instance as well as attempting to find
+ * a charset parameter in the Content-Type header.
+ *
+ * If the force-charset option is false, the charset is guessed first using
+ * the Content-Type header's charset parameter, falling back to the charset
+ * option if it is present. If the force-charset option is true, the charset
+ * is initially set to the charset option. This initial guessed value can be
+ * overridden at any time by simply setting the field on this object.
+ *
+ * The charset is better reflected as a parameter of the body rather than the
+ * headers; this is ultimately the charset parameter that will be used if a
+ * body part is being converted to a Unicode strformat. Headers are converted
+ * using headerparser.convert8BitHeader, and this field is used as the
+ * fallbackCharset parameter, which will always to attempt to decode as UTF-8
+ * first (in accordance with RFC 6532) and will refuse to decode as UTF-16 or
+ * UTF-32, as ASCII is not a subset of those charsets.
+ *
+ * - rawHeaderText: This read-only field contains the original header text from
+ * which headers were parsed, preserving case and whitespace (including
+ * alternate line endings instead of CRLF) exactly. If the header text begins
+ * with the mbox delimiter (i.e., a line that begins with "From "), then that
+ * is excluded from the rawHeaderText value and is not reflected anywhere in
+ * this object.
+ *
+ * - contentType: This field contains the structured representation of the
+ * Content-Type header, if it is present. If it is not present, it is set to
+ * the structured representation of the default Content-Type for a part (as
+ * this data is not easily guessed given only MIME tree events).
+ *
+ * The constructor for these objects is not externally exported, and thus they
+ * can only be created via MimeParser.
+ *
+ * @param rawHeaderText {BinaryString} The contents of the MIME headers to be
+ * parsed.
+ * @param options {Object} Options for the header parser.
+ * @param options.stripcontinuations {Boolean} If true, elide CRLFs from the
+ * raw header output.
+ */
+function StructuredHeaders(rawHeaderText, options) {
+ // An individual header is terminated by a CRLF, except if the CRLF is
+ // followed by a SP or TAB. Use negative lookahead to capture the latter case,
+ // and don't capture the strings or else split results get nasty.
+ let values = rawHeaderText.split(/(?:\r\n|\n)(?![ \t])|\r(?![ \t\n])/);
+
+ // Ignore the first "header" if it begins with an mbox delimiter
+ if (values.length > 0 && values[0].substring(0, 5) == "From ") {
+ values.shift();
+ // Elide the mbox delimiter from this._headerData
+ if (values.length == 0)
+ rawHeaderText = '';
+ else
+ rawHeaderText = rawHeaderText.substring(rawHeaderText.indexOf(values[0]));
+ }
+
+ let headers = new Map();
+ for (let i = 0; i < values.length; i++) {
+ // Look for a colon. If it's not present, this header line is malformed,
+ // perhaps by premature EOF or similar.
+ let colon = values[i].indexOf(":");
+ if (colon >= 0) {
+ var header = values[i].substring(0, colon);
+ var val = values[i].substring(colon + 1).trim();
+ if (options.stripcontinuations)
+ val = val.replace(/[\r\n]/g, '');
+ } else {
+ var header = values[i];
+ var val = '';
+ }
+
+ // Canonicalize the header in lower-case form.
+ header = header.trim().toLowerCase();
+ // Omit "empty" headers
+ if (header == '')
+ continue;
+
+ // We keep an array of values for each header, since a given header may be
+ // repeated multiple times.
+ if (headers.has(header)) {
+ headers.get(header).push(val);
+ } else {
+ headers.set(header, [val]);
+ }
+ }
+
+ /**
+ * A map of header names to arrays of raw values found in this header block.
+ * @private
+ */
+ this._rawHeaders = headers;
+ /**
+ * Cached results of structured header parsing.
+ * @private
+ */
+ this._cachedHeaders = new Map();
+ Object.defineProperty(this, "rawHeaderText",
+ {get: function () { return rawHeaderText; }});
+ Object.defineProperty(this, "size",
+ {get: function () { return this._rawHeaders.size; }});
+ Object.defineProperty(this, "charset", {
+ get: function () { return this._charset; },
+ set: function (value) {
+ this._charset = value;
+ // Clear the cached headers, since this could change their values
+ this._cachedHeaders.clear();
+ }
+ });
+
+ // Default to the charset, until the message parser overrides us.
+ if ('charset' in options)
+ this._charset = options.charset;
+ else
+ this._charset = null;
+
+ // If we have a Content-Type header, set contentType to return the structured
+ // representation. We don't set the value off the bat, since we want to let
+ // someone who changes the charset affect the values of 8-bit parameters.
+ Object.defineProperty(this, "contentType", {
+ configurable: true,
+ get: function () { return this.get('Content-Type'); }
+ });
+}
+
+/**
+ * Get a raw header.
+ *
+ * Raw headers are an array of the header values, listed in order that they were
+ * specified in the header block, and without any attempt to convert charsets or
+ * apply RFC 2047 decoding. For example, in the following message (where the
+ * <XX> is meant to represent binary-octets):
+ *
+ * X-Header: Value A
+ * X-Header: V<C3><A5>lue B
+ * Header2: Q
+ *
+ * the result of calling getRawHeader('X-Header') or getRawHeader('x-header')
+ * would be ['Value A', 'V\xC3\xA5lue B'] and the result of
+ * getRawHeader('Header2') would be ['Q'].
+ *
+ * @param headerName {String} The header name for which to get header values.
+ * @returns {BinaryString[]} The raw header values (with no charset conversion
+ * applied).
+ */
+StructuredHeaders.prototype.getRawHeader = function (headerName) {
+ return this._rawHeaders.get(headerName.toLowerCase());
+};
+
+/**
+ * Retrieve a structured version of the header.
+ *
+ * If there is a registered structured decoder (registration happens via
+ * headerparser.addStructuredDecoder), then the result of calling that decoder
+ * on the charset-corrected version of the header is returned. Otherwise, the
+ * values are charset-corrected and RFC 2047 decoding is applied as if the
+ * header were an unstructured header.
+ *
+ * A substantial set of headers have pre-registed structured decoders, which, in
+ * some cases, are unable to be overridden due to their importance in the
+ * functioning of the parser code itself.
+ *
+ * @param headerName {String} The header name for which to get the header value.
+ * @returns The structured header value of the output.
+ */
+StructuredHeaders.prototype.get = function (headerName) {
+ // Normalize the header name to lower case
+ headerName = headerName.toLowerCase();
+
+ // First, check the cache for the header value
+ if (this._cachedHeaders.has(headerName))
+ return this._cachedHeaders.get(headerName);
+
+ // Not cached? Grab it [propagating lack of header to caller]
+ let headerValue = this._rawHeaders.get(headerName);
+ if (headerValue === undefined)
+ return headerValue;
+
+ // Convert the header to Unicode
+ let charset = this.charset;
+ headerValue = headerValue.map(function (value) {
+ return headerparser.convert8BitHeader(value, charset);
+ });
+
+ // If there is a structured decoder, use that; otherwise, assume that the
+ // header is unstructured and only do RFC 2047 conversion
+ let structured;
+ try {
+ structured = headerparser.parseStructuredHeader(headerName, headerValue);
+ } catch (e) {
+ structured = headerValue.map(function (value) {
+ return headerparser.decodeRFC2047Words(value);
+ });
+ }
+
+ // Cache the result and return it
+ this._cachedHeaders.set(headerName, structured);
+ return structured;
+};
+
+/**
+ * Check if the message has the given header.
+ *
+ * @param headerName {String} The header name for which to get the header value.
+ * @returns {Boolean} True if the header is present in this header block.
+ */
+StructuredHeaders.prototype.has = function (headerName) {
+ // Check for presence in the raw headers instead of cached headers.
+ return this._rawHeaders.has(headerName.toLowerCase());
+};
+
+// Make a custom iterator. Presently, support for Symbol isn't yet present in
+// SpiderMonkey (or V8 for that matter), so type-pun the name for now.
+if (typeof Symbol === "undefined") {
+ var Symbol = {iterator: "@@iterator"};
+}
+
+/**
+ * An equivalent of Map.@@iterator, applied to the structured header
+ * representations. This is the function that makes
+ * for (let [header, value] of headers) work properly.
+ */
+StructuredHeaders.prototype[Symbol.iterator] = function*() {
+ // Iterate over all the raw headers, and use the cached headers to retrieve
+ // them.
+ for (let headerName of this.keys()) {
+ yield [headerName, this.get(headerName)];
+ }
+};
+
+/**
+ * An equivalent of Map.forEach, applied to the structured header
+ * representations.
+ *
+ * @param callback {Function(value, name, headers)} The callback to call for
+ * each header/value combo.
+ * @param thisarg {Object} The parameter that will be
+ * the |this| of the callback.
+ */
+StructuredHeaders.prototype.forEach = function (callback, thisarg) {
+ for (let [header, value] of this) {
+ callback.call(thisarg, value, header, this);
+ }
+};
+
+/**
+ * An equivalent of Map.entries, applied to the structured header
+ * representations.
+ */
+StructuredHeaders.prototype.entries =
+ StructuredHeaders.prototype[Symbol.iterator];
+
+/// This function maps lower case names to a pseudo-preferred spelling.
+function capitalize(headerName) {
+ return headerName.replace(/\b[a-z]/g, function (match) {
+ return match.toUpperCase();
+ });
+}
+
+/**
+ * An equivalent of Map.keys, applied to the structured header representations.
+ */
+StructuredHeaders.prototype.keys = function*() {
+ for (let name of this._rawHeaders.keys()) {
+ yield spellings.get(name) || capitalize(name);
+ }
+};
+
+/**
+ * An equivalent of Map.values, applied to the structured header
+ * representations.
+ */
+StructuredHeaders.prototype.values = function* () {
+ for (let [, value] of this) {
+ yield value;
+ }
+};
+
+
/**
* A MIME parser.
*
* The inputs to the constructor consist of a callback object which receives
* information about the output data and an optional object containing the
* settings for the parser.
*
* The first parameter, emitter, is an object which contains several callbacks.
@@ -64,22 +1537,18 @@
* Called when the stream to be parsed has started delivering data. This
* will be called exactly once, before any other call.
* endMessage()
* Called after all data has been delivered and the message parsing has
* been completed. This will be called exactly once, after any other call.
* startPart(string partNum, object headers)
* Called after the headers for a body part (including the top-level
* message) have been parsed. The first parameter is the part number (see
- * the discussion on part numbering). The second parameter is a JS Map
- * object where the keys are lower-case header names and the values are
- * arrays of strings of the header values with newlines stripped. In
- * addition, the headers object also contains a property rawHeaderText
- * whose value is the text of all MIME headers, with whitespace and case
- * perfectly preserved.
+ * the discussion on part numbering). The second parameter is an instance
+ * of StructuredHeaders that represents all of the headers for the part.
* endPart(string partNum)
* Called after all of the data for a body part (including sub-parts) has
* been parsed. The first parameter is the part number.
* deliverPartData(string partNum, {string,typedarray} data)
* Called when some data for a body part has been delivered. The first
* parameter is the part number. The second parameter is the data which is
* being delivered; the exact type of this data depends on the options
* used. Note that data is only delivered for leaf body parts.
@@ -90,51 +1559,69 @@
* Treat the message as starting at the given part number, so that no parts
* above <string> are returned.
* bodyformat: one of {none, raw, nodecode, decode} [default=nodecode]
* How to return the bodies of parts:
* none: no part data is returned
* raw: the body of the part is passed through raw
* nodecode: the body is passed through without decoding QP/Base64
* decode: quoted-printable and base64 are fully decoded
+ * strformat: one of {binarystring, unicode, typedarray} [default=binarystring]
+ * How to treat output strings:
+ * binarystring: Data is a JS string with chars in the range [\x00-\xff]
+ * unicode: Data for text parts is converted to UTF-16; data for other
+ * parts is a typed array buffer, akin to typedarray.
+ * typedarray: Data is a JS typed array buffer
+ * charset: <string> [default=""]
+ * What charset to assume if no charset information is explicitly provided.
+ * This only matters if strformat is unicode. See above note on charsets
+ * for more details.
+ * force-charset: <boolean> [default=false]
+ * If true, this coerces all types to use the charset option, even if the
+ * message specifies a different content-type.
* stripcontinuations: <boolean> [default=true]
* If true, then the newlines in headers are removed in the returned
* header objects.
* onerror: <function(thrown error)> [default = nop-function]
* An error function that is called if an emitter callback throws an error.
* By default, such errors are swallowed by the parser. If you want the
* parser itself to throw an error, rethrow it via the onerror function.
*/
-function Parser(emitter, options) {
+function MimeParser(emitter, options) {
/// The actual emitter
this._emitter = emitter;
/// Options for the parser (those listed here are defaults)
this._options = {
pruneat: "",
bodyformat: "nodecode",
strformat: "binarystring",
stripcontinuations: true,
+ charset: "",
+ "force-charset": false,
onerror: function swallow(error) {}
};
// Load the options as a copy here (prevents people from changing on the fly).
if (options)
for (var opt in options) {
this._options[opt] = options[opt];
}
// Ensure that the error function is in fact a function
if (typeof this._options.onerror != "function")
throw new Exception("onerror callback must be a function");
// Reset the parser
this.resetParser();
}
-/// Resets the parser to read a new message.
-Parser.prototype.resetParser = function Parser_resetParser() {
+/**
+ * Resets the parser to read a new message. This method need not be called
+ * immediately after construction.
+ */
+MimeParser.prototype.resetParser = function () {
/// Current parser state
this._state = PARSING_HEADERS;
/// Input data that needs to be held for buffer conditioning
this._holdData = '';
/// Complete collection of headers (also used to accumulate _headerData)
this._headerData = '';
/// Whether or not emitter.startMessage has been called
this._triggeredCall = false;
@@ -144,22 +1631,24 @@ Parser.prototype.resetParser = function
/// Subparsing
this._subparser = this._subPartNum = undefined;
/// Data that has yet to be consumed by _convertData
this._savedBuffer = '';
/// Convert data
this._convertData = undefined;
/// String decoder
this._decoder = undefined;
-}
+};
/**
* Deliver a buffer of data to the parser.
+ *
+ * @param buffer {BinaryString} The raw data to add to the message.
*/
-Parser.prototype.deliverData = function Parser_deliverData(buffer) {
+MimeParser.prototype.deliverData = function (buffer) {
// In ideal circumstances, we'd like to parse the message all at once. In
// reality, though, data will be coming to us in packets. To keep the amount
// of saved state low, we want to make basic guarantees about how packets get
// delivered. Our basic model is a twist on line-buffering, as the format of
// MIME and messages make it hard to not do so: we can handle multiple lines
// at once. To ensure this, we start by conditioning the packet by
// withholding data to make sure that the internal deliveries have the
// guarantees. This implies that we need to do the following steps:
@@ -194,86 +1683,99 @@ Parser.prototype.deliverData = function
this._callEmitter("startMessage");
this._triggeredCall = true;
}
// Finally, send it the internal parser.
this._dispatchData("", buffer, true);
}
-/// This function returns [string that ends in CRLF, rest of string]
+/**
+ * Ensure that a set of data always ends in an end-of-line character.
+ *
+ * @param buffer {BinaryString} The data with no guarantees about where it ends.
+ * @returns {BinaryString[]} An array of 2 binary strings where the first string
+ * ends in a newline and the last string contains the
+ * text in buffer following the first string.
+ */
function conditionToEndOnCRLF(buffer) {
// Find the last occurrence of '\r' or '\n' to split the string. However, we
// don't want to consider '\r' if it is the very last character, as we need
// the next packet to tell if the '\r' is the beginning of a CRLF or a line
// ending by itself.
let lastCR = buffer.lastIndexOf('\r', buffer.length - 2);
let lastLF = buffer.lastIndexOf('\n');
let end = lastLF > lastCR ? lastLF : lastCR;
return [buffer.substring(0, end + 1), buffer.substring(end + 1)];
-}
+};
/**
* Tell the parser that all of the data has been delivered.
*
* This will flush all of the internal state of the parser.
*/
-Parser.prototype.deliverEOF = function Parser_deliverEOF() {
+MimeParser.prototype.deliverEOF = function () {
// Start of input buffered too long? Call start message now.
if (!this._triggeredCall) {
this._triggeredCall = true;
this._callEmitter("startMessage");
}
// Force a flush of all of the data.
if (this._holdData)
this._dispatchData("", this._holdData, true);
this._dispatchEOF("");
// Signal to the emitter that we're done.
this._callEmitter("endMessage");
-}
+};
/**
* Calls a method on the emitter safely.
*
* This method ensures that errors in the emitter call won't cause the parser
* to exit with an error, unless the user wants it to.
+ *
+ * @param funcname {String} The function name to call on the emitter.
+ * @param args... Extra arguments to pass into the emitter callback.
*/
-Parser.prototype._callEmitter = function Parser_callEmitter(funcname) {
+MimeParser.prototype._callEmitter = function (funcname) {
if (this._emitter && funcname in this._emitter) {
let args = Array.prototype.splice.call(arguments, 1);
if (args.length > 0 && this._willIgnorePart(args[0])) {
// partNum is always the first argument, so check to make sure that it
// satisfies our emitter's pruneat requirement.
return;
}
try {
this._emitter[funcname].apply(this._emitter, args);
} catch (e) {
// We ensure that the onerror attribute in options is a function, so this
// is always safe.
this._options.onerror(e);
}
}
-}
+};
/**
* Helper function to decide if a part's output will never be seen.
+ *
+ * @param part {String} The number of the part.
+ * @returns {Boolean} True if the emitter is not interested in this part.
*/
-Parser.prototype._willIgnorePart = function Parser_willIgnorePart(part) {
+MimeParser.prototype._willIgnorePart = function (part) {
if (this._options["pruneat"]) {
let match = this._options["pruneat"];
let start = part.substr(0, match.length);
// It needs to start with and follow with a new part indicator
// (i.e., don't let 10 match with 1, but let 1.1 or 1$ do so)
if (start != match || (match.length < part.length &&
"$.".indexOf(part[match.length]) == -1))
return true;
}
return false;
-}
+};
//////////////////////
// MIME parser core //
//////////////////////
// This MIME parser is a stateful parser; handling of the MIME tree is mostly
// done by creating new parsers and feeding data to them manually. In parallel
// to the externally-visible deliverData and deliverEOF, the two methods
@@ -326,19 +1828,25 @@ const SEND_TO_SUBPARSER = 4;
/**
* Main dispatch for incoming packet data.
*
* The incoming data needs to have been sanitized so that each packet begins on
* a newline boundary. The part number for the current parser also needs to be
* passed in. The checkSplit parameter controls whether or not the data in
* buffer needs to be checked against _splitRegex; this is used internally for
* the mechanics of splitting and should otherwise always be true.
+ *
+ * @param partNum {String} The part number being currently parsed.
+ * @param buffer {BinaryString} The text (conditioned as mentioned above) to
+ * pass to the parser.
+ * @param checkSplit {Boolean} If true, split the text using _splitRegex.
+ * This is set to false internally to handle
+ * low-level splitting details.
*/
-Parser.prototype._dispatchData = function Parser_dispatchData(partNum, buffer,
- checkSplit) {
+MimeParser.prototype._dispatchData = function (partNum, buffer, checkSplit) {
// Are we parsing headers?
if (this._state == PARSING_HEADERS) {
this._headerData += buffer;
// Find the end of the headers--either it's a CRLF at the beginning (in
// which case we have no headers), or it's a pair of CRLFs.
let result = /(?:^(?:\r\n|[\r\n]))|(\r\n|[\r\n])\1/.exec(this._headerData);
if (result != null) {
// If we found the end of headers, split the data at this point and send
@@ -389,162 +1897,186 @@ Parser.prototype._dispatchData = functio
buffer = this._applyDataConversion(buffer, this._options["strformat"]);
if (buffer.length > 0)
this._callEmitter("deliverPartData", partNum, buffer);
} else if (this._state == SEND_TO_SUBPARSER) {
buffer = this._applyDataConversion(buffer, "binarystring");
if (buffer.length > 0)
this._subparser._dispatchData(this._subPartNum, buffer, true);
}
-}
+};
-/// Applies this._convertData(buffer, true) if necessary
-Parser.prototype._applyDataConversion = function Parser_convertData(buf, type) {
+/**
+ * Output data using the desired output format, saving data if data conversion
+ * needs extra data to be saved.
+ *
+ * @param buf {BinaryString} The data to be sent to the output.
+ * @param type {String} The type of the data to output. Valid values are
+ * the same as the strformat option.
+ * @returns Coerced and converted data that can be sent to the emitter or
+ * subparser.
+ */
+MimeParser.prototype._applyDataConversion = function (buf, type) {
// If we need to convert data, do so.
if (this._convertData) {
// Prepend leftover data from the last conversion.
buf = this._savedBuffer + buf;
[buf, this._savedBuffer] = this._convertData(buf, true);
}
return this._coerceData(buf, type, true);
-}
+};
+/**
+ * Coerce the input buffer into the given output type.
+ *
+ * @param buffer {BinaryString|Uint8Array} The data to be converted.
+ * @param type {String} The type to convert the data to.
+ * @param more {boolean} If true, this function will never be
+ * called again.
+ * @returns {BinaryString|String|Uint8Array} The desired output format.
+ */
/// Coerces the buffer (a string or typedarray) into a given type
-Parser.prototype._coerceData = function Parser_coerce(buffer, type, more) {
- // Note: This function is a placeholder for later code primarily relating to
- // charsets and strformat options.
- return buffer;
-}
+MimeParser.prototype._coerceData = function (buffer, type, more) {
+ if (typeof buffer == "string") {
+ // string -> binarystring is a nop
+ if (type == "binarystring")
+ return buffer;
+ // Either we're going to array or unicode. Both people need the array
+ var typedarray = mimeutils.stringToTypedArray(buffer);
+ // If it's unicode, do the coercion from the array
+ // If its typedarray, just return the synthesized one
+ return type == "unicode" ? this._coerceData(typedarray, "unicode", more)
+ : typedarray;
+ } else if (type == "binarystring") {
+ // Doing array -> binarystring
+ return mimeutils.typedArrayToString(buffer);
+ } else if (type == "unicode") {
+ // Doing array-> unicode: Use the decoder set up earlier to convert
+ if (this._decoder)
+ return this._decoder.decode(buffer, {stream: more});
+ // If there is no charset, just return the typed array instead.
+ return buffer;
+ }
+ throw new Error("Invalid type: " + type);
+};
/**
* Signal that no more data will be dispatched to this parser.
+ *
+ * @param partNum {String} The part number being currently parsed.
*/
-Parser.prototype._dispatchEOF = function Parser_dispatchEOF(partNum) {
+MimeParser.prototype._dispatchEOF = function (partNum) {
if (this._state == PARSING_HEADERS) {
// Unexpected EOF in headers. Parse them now and call startPart/endPart
this._headers = this._parseHeaders();
this._callEmitter("startPart", partNum, this._headers);
} else if (this._state == SEND_TO_SUBPARSER) {
// Pass in any lingering data
if (this._convertData && this._savedBuffer)
this._subparser._dispatchData(this._subPartNum,
this._convertData(this._savedBuffer, false)[0], true);
this._subparser._dispatchEOF(this._subPartNum);
// Clean up after ourselves
this._subparser = null;
} else if (this._convertData && this._savedBuffer) {
// Convert lingering data
- [buffer, ] = this._convertData(this._savedBuffer, false);
+ let [buffer, ] = this._convertData(this._savedBuffer, false);
buffer = this._coerceData(buffer, this._options["strformat"], false);
if (buffer.length > 0)
this._callEmitter("deliverPartData", partNum, buffer);
}
// We've reached EOF for this part; tell the emitter
this._callEmitter("endPart", partNum);
-}
+};
/**
* Produce a dictionary of all headers as if they were unstructured fields.
+ *
+ * @returns {StructuredHeaders} The structured header objects for the header
+ * block.
*/
-Parser.prototype._parseHeaders = function Parser_parseHeaders() {
- // An individual header is terminated by a CRLF, except if the CRLF is
- // followed by a SP or TAB. Use negative lookahead to capture the latter case,
- // and don't capture the strings or else split results get nasty.
- let values = this._headerData.split(/(?:\r\n|\n)(?![ \t])|\r(?![ \t\n])/);
+MimeParser.prototype._parseHeaders = function () {
+ let headers = new StructuredHeaders(this._headerData, this._options);
- // Ignore the first "header" if it begins with an mbox delimiter
- if (values.length > 0 && values[0].substring(0, 5) == "From ") {
- values.shift();
- // Elide the mbox delimiter from this._headerData
- if (values.length == 0)
- this._headerData = '';
- else
- this._headerData = this._headerData.substring(
- this._headerData.indexOf(values[0]));
+ // Fill the headers.contentType parameter of headers.
+ let contentType = headers.get('Content-Type');
+ if (typeof contentType === "undefined") {
+ contentType = headerparser.parseStructuredHeader('Content-Type',
+ this._defaultContentType || 'text/plain');
+ Object.defineProperty(headers, "contentType", {
+ get: function () { return contentType; }
+ });
+ } else {
+ Object.defineProperty(headers, "contentType", { configurable: false });
}
- let headers = new Map();
- for (let i = 0; i < values.length; i++) {
- // Look for a colon. If it's not present, this header line is malformed,
- // perhaps by premature EOF or similar. The value is null in this case.
- let colon = values[i].indexOf(":");
- if (colon >= 0) {
- var header = values[i].substring(0, colon);
- var val = values[i].substring(colon + 1).trim();
- if (this._options.stripcontinuations)
- val = val.replace(/[\r\n]/g, '');
- } else {
- var header = values[i];
- var val = null;
- }
+ // Find the charset for the current part. If the user requested a forced
+ // conversion, use that first. Otherwise, check the content-type for one and
+ // fallback to a default if it is not present.
+ let charset = '';
+ if (this._options["force-charset"])
+ charset = this._options["charset"];
+ else if (contentType.has("charset"))
+ charset = contentType.get("charset");
+ else
+ charset = this._options["charset"];
+ headers.charset = charset;
- // Canonicalize the header in lower-case form.
- header = header.trim().toLowerCase();
- // Omit "empty" headers
- if (header == '')
- continue;
-
- // We keep an array of values for each header, since a given header may be
- // repeated multiple times.
- if (headers.has(header)) {
- headers.get(header).push(val);
- } else {
- headers.set(header, [val]);
- }
- }
-
- headers.rawHeaderText = this._headerData;
+ // Retain a copy of the charset so that users don't override our decision for
+ // decoding body parts.
+ this._charset = charset;
return headers;
-}
+};
/**
* Initialize the parser state for the body of this message.
+ *
+ * @param partNum {String} The part number being currently parsed.
*/
-Parser.prototype._startBody = function Parser_startBody(partNum) {
+MimeParser.prototype._startBody = function Parser_startBody(partNum) {
+ let contentType = this._headers.contentType;
+
// Should the bodyformat be raw, we just want to pass through all data without
// trying to interpret it.
if (this._options["bodyformat"] == "raw" &&
partNum == this._options["pruneat"]) {
this._state = SEND_TO_EMITTER;
return;
}
- // What do we assume if there's no content-type?
- let defaultContentType = this._defaultContentType || 'text/plain';
- let contentType = this._extractHeader('content-type', defaultContentType);
// The output depents on the content-type. Basic rule of thumb:
// 1. Discrete media types (text, video, audio, image, application) are passed
// through with no alterations beyond Content-Transfer-Encoding unpacking.
// 2. Everything with a media type of multipart is treated the same.
// 3. Any message/* type that acts like a mail message (rfc822, news, global)
// is parsed as a header/body pair again. Most of the other message/* types
// have similar structures, but they don't have cascading child subparts,
// so it's better to pass their entire contents to the emitter and let the
// consumer deal with them.
// 4. For untyped data, there needs to be no Content-Type header. This helps
// avoid false positives.
if (contentType.mediatype == 'multipart') {
// If there's no boundary type, everything will be part of the prologue of
// the multipart message, so just feed everything into a black hole.
- if (!('param-boundary' in contentType)) {
+ if (!contentType.has('boundary')) {
this._state = SEND_TO_BLACK_HOLE;
return;
}
// The boundary of a multipart message needs to start with -- and be at the
// beginning of the line. If -- is after the boundary, it represents the
// terminator of the multipart. After the line, there may be only whitespace
// and then the CRLF at the end. Since the CRLFs in here are necessary for
// distinguishing the parts, they are not included in the subparts, so we
// need to capture them in the regex as well to prevent them leaking out.
this._splitRegex = new RegExp('(\r\n|[\r\n]|^)--' +
- contentType['param-boundary'].replace(/[\\^$*+?.()|{}[\]]/g, '\\$&') +
+ contentType.get('boundary').replace(/[\\^$*+?.()|{}[\]]/g, '\\$&') +
'(--)?[ \t]*(?:\r\n|[\r\n]|$)');
this._handleSplit = this._whenMultipart;
- this._subparser = new Parser(this._emitter, this._options);
+ this._subparser = new MimeParser(this._emitter, this._options);
// multipart/digest defaults to message/rfc822 instead of text/plain
if (contentType.subtype == "digest")
this._subparser._defaultContentType = "message/rfc822";
// All text before the first boundary and after the closing boundary are
// supposed to be ignored ("must be ignored", according to RFC 2046 §5.1.1);
// in accordance with these wishes, ensure they don't get passed to any
// deliverPartData.
@@ -558,27 +2090,29 @@ Parser.prototype._startBody = function P
this._convertData = function mpart_no_leak_crlf(buffer, more) {
let splitPoint = buffer.length;
if (more) {
if (buffer.charAt(splitPoint - 1) == '\n')
splitPoint--;
if (splitPoint >= 0 && buffer.charAt(splitPoint - 1) == '\r')
splitPoint--;
}
- let [preLF, rest] = conditionToEndOnCRLF(buffer.substring(0, splitPoint));
+ let res = conditionToEndOnCRLF(buffer.substring(0, splitPoint));
+ let preLF = res[0];
+ let rest = res[1];
return [preLF, rest + buffer.substring(splitPoint)];
}
} else if (contentType.type == 'message/rfc822' ||
contentType.type == 'message/global' ||
contentType.type == 'message/news') {
// The subpart is just another header/body pair that goes to EOF, so just
// return the parse from that blob
this._state = SEND_TO_SUBPARSER;
this._subPartNum = partNum + "$";
- this._subparser = new Parser(this._emitter, this._options);
+ this._subparser = new MimeParser(this._emitter, this._options);
// So, RFC 6532 happily allows message/global types to have CTE applied.
// This means that subparts would need to be decoded to determine their
// contents properly. There seems to be some evidence that message/rfc822
// that is illegally-encoded exists in the wild, so be lenient and decode
// for any message/* type that gets here.
let cte = this._extractHeader('content-transfer-encoding', '');
if (cte in ContentDecoders)
@@ -588,20 +2122,48 @@ Parser.prototype._startBody = function P
this._state = SEND_TO_EMITTER;
if (this._options["bodyformat"] == "decode") {
// If we wish to decode, look it up in one of our decoders.
let cte = this._extractHeader('content-transfer-encoding', '');
if (cte in ContentDecoders)
this._convertData = ContentDecoders[cte];
}
}
-}
+
+ // Set up the encoder for charset conversions; only do this for text parts.
+ // Other parts are almost certainly binary, so no translation should be
+ // applied to them.
+ if (this._options["strformat"] == "unicode" &&
+ contentType.mediatype == "text") {
+ // If the charset is nonempty, initialize the decoder
+ if (this._charset !== "") {
+ this._decoder = new TextDecoder(this._charset);
+ } else {
+ // There's no charset we can use for decoding, so pass through as an
+ // identity encoder or otherwise this._coerceData will complain.
+ this._decoder = {
+ decode: function identity_decoder(buffer) {
+ return MimeParser.prototype._coerceData(buffer, "binarystring", true);
+ }
+ };
+ }
+ } else {
+ this._decoder = null;
+ }
+};
// Internal split handling for multipart messages.
-Parser.prototype._whenMultipart = function Parser_mpart(partNum, lastResult) {
+/**
+ * When a multipary boundary is found, handle the process of managing the
+ * subparser state. This is meant to be used as a value for this._handleSplit.
+ *
+ * @param partNum {String} The part number being currently parsed.
+ * @param lastResult {Array} The result of the regular expression match.
+ */
+MimeParser.prototype._whenMultipart = function (partNum, lastResult) {
// Fix up the part number (don't do '' -> '.4' and don't do '1' -> '14')
if (partNum != "") partNum += ".";
if (!this._subPartNum) {
// No count? This means that this is the first time we've seen the boundary,
// so do some initialization for later here.
this._count = 1;
} else {
// If we did not match a CRLF at the beginning of the line, strip CRLF from
@@ -632,145 +2194,744 @@ Parser.prototype._whenMultipart = functi
this._state = SEND_TO_SUBPARSER;
this._subPartNum = partNum + this._count;
this._count += 1;
} else {
// Ignore the epilogue
this._splitRegex = null;
this._state = SEND_TO_BLACK_HOLE;
}
+};
+
+/**
+ * Return the structured header from the current header block, or a default if
+ * it is not present.
+ *
+ * @param name {String} The header name to get.
+ * @param dflt {String} The default MIME value of the header.
+ * @returns The structured representation of the header.
+ */
+MimeParser.prototype._extractHeader = function (name, dflt) {
+ name = name.toLowerCase(); // Normalize name
+ return this._headers.has(name) ? this._headers.get(name) :
+ headerparser.parseStructuredHeader(name, [dflt]);
+};
+
+var ContentDecoders = {};
+ContentDecoders['quoted-printable'] = mimeutils.decode_qp;
+ContentDecoders['base64'] = mimeutils.decode_base64;
+
+return MimeParser;
+});
+def('headeremitter', function(require) {
+/**
+ * This module implements the code for emitting structured representations of
+ * MIME headers into their encoded forms. The code here is a companion to,
+ * but completely independent of, jsmime.headerparser: the structured
+ * representations that are used as input to the functions in this file are the
+ * same forms that would be parsed.
+ */
+
+"use strict";
+
+var mimeutils = require('./mimeutils');
+
+// Get the default structured encoders and add them to the map
+var structuredHeaders = require('structuredHeaders');
+var encoders = new Map();
+var preferredSpellings = structuredHeaders.spellings;
+for (let [header, encoder] of structuredHeaders.encoders) {
+ addStructuredEncoder(header, encoder);
}
-// Extract a header. This is for internal purposes.
-// This calls the structured decoder if it exists. If it does not, it just trims
-// the value and makes it lower case.
-Parser.prototype._extractHeader = function extractHeader(name, dflt) {
- let value = this._headers.has(name) ? this._headers.get(name)[0] : dflt;
- if (name in StructuredDecoders)
- return StructuredDecoders[name](value);
- // In lieu of anything else, just return lower-case version
- return value.trim().toLowerCase();
+/// Clamp a value in the range [min, max], defaulting to def if it is undefined.
+function clamp(value, min, max, def) {
+ if (value === undefined)
+ return def;
+ if (value < min)
+ return min;
+ if (value > max)
+ return max;
+ return value;
}
-// Content transfer decoders
-var ContentDecoders = {};
-ContentDecoders['quoted-printable'] = function decode_qp(buffer, more) {
- // Unlike base64, quoted-printable isn't stateful across multiple lines, so
- // there is no need to buffer input, so we can always ignore more.
- let decoded = buffer.replace(
- // Replace either =<hex><hex> or =<wsp>CRLF
- /=([0-9A-F][0-9A-F]|[ \t]*(\r\n|[\r\n]|$))/gi,
- function replace_chars(match, param) {
- // If trailing text matches [ \t]*CRLF, drop everything, since it's a
- // soft line break.
- if (param.trim().length == 0)
- return '';
- return String.fromCharCode(parseInt(param, 16));
- });
- return [decoded, ''];
-}
-ContentDecoders['base64'] = function decode_base64(buffer, more) {
- // Drop all non-base64 characters
- let sanitize = buffer.replace(/[^A-Za-z0-9+\/=]/g,'');
- // We need to encode in groups of 4 chars. If we don't have enough, leave the
- // excess for later. If there aren't any more, drop enough to make it 4.
- let excess = sanitize.length % 4;
- if (excess != 0 && more)
- buffer = sanitize.slice(-excess);
- else
- buffer = '';
- sanitize = sanitize.substring(0, sanitize.length - excess);
- // Use the atob function we (ought to) have in global scope.
- return [atob(sanitize), buffer];
+/**
+ * An object that can assemble structured header representations into their MIME
+ * representation.
+ *
+ * The character-counting portion of this class operates using individual JS
+ * characters as its representation of logical character, which is not the same
+ * as the number of octets used as UTF-8. If non-ASCII characters are to be
+ * included in headers without some form of encoding, then care should be taken
+ * to set the maximum line length to account for the mismatch between character
+ * counts and octet counts: the maximum line is 998 octets, which could be as
+ * few as 332 JS characters (non-BMP characters, although they take up 4 octets
+ * in UTF-8, count as 2 in JS strings).
+ *
+ * This code takes care to only insert line breaks at the higher-level breaking
+ * points in a header (as recommended by RFC 5322), but it may need to resort to
+ * including them more aggressively if this is not possible. If even aggressive
+ * line-breaking cannot allow a header to be emitted without violating line
+ * length restrictions, the methods will throw an exception to indicate this
+ * situation.
+ *
+ * In general, this code does not attempt to modify its input; for example, it
+ * does not attempt to change the case of any input characters, apply any
+ * Unicode normalization algorithms, or convert email addresses to ACE where
+ * applicable. The biggest exception to this rule is that most whitespace is
+ * collapsed to a single space, even in unstructured headers, while most leading
+ * and trailing whitespace is trimmed from inputs.
+ *
+ * @param {StreamHandler} handler The handler to which all output is sent.
+ * @param {Function(String)} handler.deliverData Receives encoded data.
+ * @param {Function()} handler.deliverEOF Sent when all text is sent.
+ * @param {Object} options Options for the emitter.
+ * @param [options.softMargin=78] {30 <= Integer <= 900}
+ * The ideal maximum number of logical characters to include in a line, not
+ * including the final CRLF pair. Lines may exceed this margin if parameters
+ * are excessively long.
+ * @param [options.hardMargin=332] {softMargin <= Integer <= 998}
+ * The maximum number of logical characters that can be included in a line,
+ * not including the final CRLF pair. If this count would be exceeded, then
+ * an error will be thrown and encoding will not be possible.
+ * @param [options.useASCII=true] {Boolean}
+ * If true, then RFC 2047 and RFC 2231 encoding of headers will be performed
+ * as needed to retain headers as ASCII.
+ */
+function HeaderEmitter(handler, options) {
+ /// The inferred value of options.useASCII
+ this._useASCII = options.useASCII === undefined ? true : options.useASCII;
+ /// The handler to use.
+ this._handler = handler;
+ /**
+ * The current line being built; note that we may insert a line break in the
+ * middle to keep under the maximum line length.
+ *
+ * @type String
+ * @private
+ */
+ this._currentLine = "";
+
+ // Our bounds for soft and margins are not completely arbitrary. The minimum
+ // amount we need to encode is 20 characters, which can encode a single
+ // non-BMP character with RFC 2047. The value of 30 is chosen to give some
+ // breathing room for delimiters or other unbreakable characters. The maximum
+ // length is 998 octets, per RFC 5322; soft margins are slightly lower to
+ // allow for breathing room as well. The default of 78 for the soft margin is
+ // recommended by RFC 5322; the default of 332 for the hard margin ensures
+ // that UTF-8 encoding the output never violates the 998 octet limit.
+ this._softMargin = clamp(options.softMargin, 30, 900, 78);
+ this._hardMargin = clamp(options.hardMargin, this._softMargin, 998, 332);
+
+ /**
+ * The index of the last preferred breakable position in the current line.
+ *
+ * @type Integer
+ * @private
+ */
+ this._preferredBreakpoint = 0;
}
-///////////////////////////////
-// Structured field decoders //
-///////////////////////////////
+
+///////////////////////
+// Low-level methods //
+///////////////////////
-// Structured decoders exist in two pieces. There are the basic methods, for
-// decoding headers based on their type rather than full semantic decomposition.
-// All of these methods take as their first parameter the string to be parsed.
-// In addition to these, we have specific structurers for individual headers
-// that are useful for the parser (e.g., Content-Type).
+// Explanation of the emitter internals:
+// RFC 5322 requires that we wrap our lines, ideally at 78 characters and at
+// least by 998 octets. We can't wrap in arbitrary places, but wherever CFWS is
+// valid... and ideally wherever clients are likely to expect it. In theory, we
+// can break between every token (this is how RFC 822 operates), but, in RFC
+// 5322, many of those breaks are relegated to obsolete productions, mostly
+// because it is common to not properly handle breaks in those locations.
+//
+// So how do we do line breaking? The algorithm we implement is greedy, to
+// simplify implementation. There are two margins: the soft margin, which we
+// want to keep within, and the hard margin, which we absolutely have to keep
+// within. There are also two kinds of break points: preferred and emergency.
+// As long as we keep the line within the hard margin, we will only break at
+// preferred breakpoints; emergency breakpoints are only used if we would
+// otherwise exceed the hard margin.
+//
+// For illustration, here is an example header and where these break points are
+// located:
+//
+// To: John "The Rock" Smith <jsmith@a.long.domain.invalid>
+// Preferred: ^ ^ ^
+// Emergency: ^ ^ ^ ^^ ^ ^ ^ ^ ^
+//
+// Preferred breakpoints are indicated by setting the mayBreakAfter parameter of
+// addText to true, while emergency breakpoints are set after every token passed
+// into addText. This is handled implicitly by only adding text to _currentLine
+// if it ends in an emergency breakpoint.
+//
+// Internally, the code keeps track of margins by use of two variables. The
+// _softMargin and _hardMargin variables encode the positions at which code must
+// absolutely break, and are set up from the initial options parameter. Breaking
+// happens when _currentLine.length approaches these values, as mentioned above.
+
+/**
+ * Send a header line consisting of the first N characters to the handler.
+ *
+ * If the count parameter is missing, then we presume that the current header
+ * value being emitted is done and therefore we should not send a continuation
+ * space. Otherwise, we presume that we're still working, so we will send the
+ * continuation space.
+ *
+ * @private
+ * @param [count] {Integer} The number of characters in the current line to
+ * include before wrapping.
+ */
+HeaderEmitter.prototype._commitLine = function (count) {
+ let isContinuing = typeof count !== "undefined";
+
+ // Split at the point, and lop off whitespace immediately before and after.
+ if (isContinuing) {
+ var firstN = this._currentLine.slice(0, count).trimRight();
+ var lastN = this._currentLine.slice(count).trimLeft();
+ } else {
+ var firstN = this._currentLine.trimRight();
+ var lastN = "";
+ }
+
+ // How many characters do we need to shift preferred/emergency breakpoints?
+ let shift = this._currentLine.length - lastN.length;
+
+ // Send the line plus the final CRLF.
+ this._handler.deliverData(firstN + '\r\n');
+
+ // Fill the start of the line with the new data.
+ this._currentLine = lastN;
-function extractParameters(headerValue) {
- // The basic syntax of headerValue is token [; token = token-or-qstring]*
- // Copying more or less liberally from nsMIMEHeaderParamImpl:
- // The first token is the text to the first whitespace or semicolon.
- var semi = headerValue.indexOf(";");
- if (semi < 0) {
- var start = headerValue;
- var rest = '';
- } else {
- var start = headerValue.substring(0, semi);
- var rest = headerValue.substring(semi); // Include the semicolon
+ // If this is a continuation, add an extra space at the beginning of the line.
+ // Adjust the breakpoint shift amount as well.
+ if (isContinuing) {
+ this._currentLine = ' ' + this._currentLine;
+ shift++;
}
- // Strip start to be <WSP><nowsp><WSP>
- start = start.trim().split(/[ \t\r\n]/)[0];
+
+ // We will always break at a point at or after the _preferredBreakpoint, if it
+ // exists, so this always gets reset to 0.
+ this._preferredBreakpoint = 0;
+};
+
+/**
+ * Reserve at least length characters in the current line. If there aren't
+ * enough characters, insert a line break.
+ *
+ * @private
+ * @param length {Integer} The number of characters to reserve space for.
+ * @return {Boolean} Whether or not there is enough space for length characters.
+ */
+HeaderEmitter.prototype._reserveTokenSpace = function (length) {
+ // We are not going to do a sanity check that length is within the wrap
+ // margins. The rationale is that this lets code simply call this function to
+ // force a higher-level line break than normal preferred line breaks (see
+ // addAddress for an example use). The text that would be added may need to be
+ // itself broken up, so it might not need all the length anyways, but it
+ // starts the break already.
+
+ // If we have enough space, we don't need to do anything.
+ if (this._currentLine.length + length <= this._softMargin)
+ return true;
+
+ // If we have a preferred breakpoint, commit the line at that point, and see
+ // if that is sufficient line-breaking.
+ if (this._preferredBreakpoint > 0) {
+ this._commitLine(this._preferredBreakpoint);
+ if (this._currentLine.length + length <= this._softMargin)
+ return true;
+ }
+
+ // At this point, we can no longer keep within the soft margin. Let us see if
+ // we can fit within the hard margin.
+ if (this._currentLine.length + length <= this._hardMargin) {
+ return true;
+ }
+
+ // Adding the text to length would violate the hard margin as well. Break at
+ // the last emergency breakpoint.
+ if (this._currentLine.length > 0) {
+ this._commitLine(this._currentLine.length);
+ }
+
+ // At this point, if there is still insufficient room in the hard margin, we
+ // can no longer do anything to encode this word. Bail.
+ return this._currentLine.length + length <= this._hardMargin;
+};
+
+/**
+ * Adds a block of text to the current header, inserting a break if necessary.
+ * If mayBreakAfter is true and text does not end in whitespace, a single space
+ * character may be added to the output. If the text could not be added without
+ * violating line length restrictions, an error is thrown instead.
+ *
+ * @protected
+ * @param {String} text The text to add to the output.
+ * @param {Boolean} mayBreakAfter If true, the end of this text is a preferred
+ * breakpoint.
+ */
+HeaderEmitter.prototype.addText = function (text, mayBreakAfter) {
+ // Try to reserve space for the tokens. If we can't, give up.
+ if (!this._reserveTokenSpace(text.length))
+ throw new Error("Cannot encode " + text + " due to length.");
- // Now, match parameters. The RFC 2231 processing comes later, just yank out
- // all of the parameters for now. This is doing via a regex which is
- // continually executed to find each pair. The match to try to find is this:
- // ;<WSP><token><WSP>=<WSP><token> or ;<WSP><token><WSP>=<WSP><quote string>
- // where the first token is any string that isn't whitespace and doesn't
- // contain an = or ; and the second token merely doesn't contain ;.
- var wsp = "[ \t\r\n]*";
- var token = "[^ \t\r\n=;]*";
- var qstring = '"(?:[^\\\\"]|\\\\.)*"?';
- var qstring_or_tok = qstring + "|[^ \t\r\n;]*";
- var regex = new RegExp(";" + wsp + "(" + token + ")" + wsp + "=" + wsp +
- "(" + qstring_or_tok + ")", "g");
+ this._currentLine += text;
+ if (mayBreakAfter) {
+ // Make sure that there is an extra space if text could break afterwards.
+ this._preferredBreakpoint = this._currentLine.length;
+ if (text[text.length - 1] != ' ') {
+ this._currentLine += ' ';
+ }
+ }
+};
+
+/**
+ * Adds a block of text that may need quoting if it contains some character in
+ * qchars. If it is already quoted, no quoting will be applied. If the text
+ * cannot be added without violating maximum line length, an error is thrown
+ * instead.
+ *
+ * @protected
+ * @param {String} text The text to add to the output.
+ * @param {String} qchars The set of characters that cannot appear
+ * outside of a quoted string.
+ * @param {Boolean} mayBreakAfter If true, the end of this text is a preferred
+ * breakpoint.
+ */
+HeaderEmitter.prototype.addQuotable = function (text, qchars, mayBreakAfter) {
+ // Figure out if we need to quote the string. Don't quote a string which
+ // already appears to be quoted.
+ let needsQuote = false;
+ if (!(text[0] == '"' && text[text.length - 1] == '"') && qchars != '') {
+ for (let i = 0; i < text.length; i++) {
+ if (qchars.contains(text[i])) {
+ needsQuote = true;
+ break;
+ }
+ }
+ }
+
+ if (needsQuote)
+ text = '"' + text.replace(/["\\]/g, "\\$&") + '"';
+ this.addText(text, mayBreakAfter);
+};
+
+/**
+ * Adds a block of text that corresponds to the phrase production in RFC 5322.
+ * Such text is a sequence of atoms, quoted-strings, or RFC-2047 encoded-words.
+ * This method will preprocess input to normalize all space sequences to a
+ * single space. If the text cannot be added without violating maximum line
+ * length, an error is thrown instead.
+ *
+ * @protected
+ * @param {String} text The text to add to the output.
+ * @param {String} qchars The set of characters that cannot appear
+ * outside of a quoted string.
+ * @param {Boolean} mayBreakAfter If true, the end of this text is a preferred
+ * breakpoint.
+ */
+HeaderEmitter.prototype.addPhrase = function (text, qchars, mayBreakAfter) {
+ // Collapse all whitespace spans into a single whitespace node.
+ text = text.replace(/[ \t\r\n]+/g, " ");
+
+ // If we have non-ASCII text, encode it using RFC 2047.
+ if (this._useASCII && nonAsciiRe.test(text)) {
+ this.encodeRFC2047Phrase(text, mayBreakAfter);
+ return;
+ }
- // Actually do the matching
- var matches = [], match;
- while ((match = regex.exec(rest)) != null) {
- var name = match[1];
- var value = match[2];
- if (value.length > 0 && value[0] == '"') {
- let end = value.length > 1 && value[value.length - 1] == '"' ?
- value.length - 1 : value.length;
- value = value.substring(1, end).replace(/\\(.)/g, "$1");
+ // If quoting the entire string at once could fit in the line length, then do
+ // so. The check here is very loose, but this will inform is if we are going
+ // to definitely overrun the soft margin.
+ if (text.length < this._softMargin) {
+ try {
+ this.addQuotable(text, qchars, mayBreakAfter);
+ // If we don't have a breakpoint, and the text is encoded as a sequence of
+ // atoms (and not a quoted-string), then make the last space we added a
+ // breakpoint, regardless of the mayBreakAfter setting.
+ if (this._preferredBreakpoint == 0 && text.contains(" ")) {
+ if (this._currentLine[this._currentLine.length - 1] != '"')
+ this._preferredBreakpoint = this._currentLine.lastIndexOf(" ");
+ }
+ return;
+ } catch (e) {
+ // If we get an error at this point, we failed to add the quoted string
+ // because the string was too long. Fall through to the case where we know
+ // that the input was too long to begin with.
}
- matches.push([name, value]);
+ }
+
+ // If the text is too long, split the quotable string at space boundaries and
+ // add each word invidually. If we still can't add all those words, there is
+ // nothing that we can do.
+ let words = text.split(' ');
+ for (let i = 0; i < words.length; i++) {
+ this.addQuotable(words[i], qchars,
+ i == words.length - 1 ? mayBreakAfter : true);
+ }
+};
+
+/// A regular expression for characters that need to be encoded.
+let nonAsciiRe = /[^\x20-\x7e]/;
+
+/// The beginnings of RFC 2047 encoded-word
+const b64Prelude = "=?UTF-8?B?", qpPrelude = "=?UTF-8?Q?";
+
+/// A list of ASCII characters forbidden in RFC 2047 encoded-words
+const qpForbidden = "=?_()\"";
+
+const hexString = "0123456789abcdef";
+
+/**
+ * Add a block of text as a single RFC 2047 encoded word. This does not try to
+ * split words if they are too long.
+ *
+ * @private
+ * @param {Uint8Array} encodedText The octets to encode.
+ * @param {Boolean} useQP If true, use quoted-printable; if false,
+ * use base64.
+ * @param {Boolean} mayBreakAfter If true, the end of this text is a
+ * preferred breakpoint.
+ */
+HeaderEmitter.prototype._addRFC2047Word = function (encodedText, useQP,
+ mayBreakAfter) {
+ let binaryString = mimeutils.typedArrayToString(encodedText);
+ if (useQP) {
+ var token = qpPrelude;
+ for (let i = 0; i < encodedText.length; i++) {
+ if (encodedText[i] < 0x20 || encodedText[i] >= 0x7F ||
+ qpForbidden.contains(binaryString[i])) {
+ let ch = encodedText[i];
+ token += "=" + hexString[(ch & 0xf0) >> 4] + hexString[ch & 0x0f];
+ } else if (binaryString[i] == " ") {
+ token += "_";
+ } else {
+ token += binaryString[i];
+ }
+ }
+ token += "?=";
+ } else {
+ var token = b64Prelude + btoa(binaryString) + "?=";
+ }
+ this.addText(token, mayBreakAfter);
+};
+
+/**
+ * Add a block of text as potentially several RFC 2047 encoded-word tokens.
+ *
+ * @protected
+ * @param {String} text The text to add to the output.
+ * @param {Boolean} mayBreakAfter If true, the end of this text is a preferred
+ * breakpoint.
+ */
+HeaderEmitter.prototype.encodeRFC2047Phrase = function (text, mayBreakAfter) {
+ // Start by encoding the text into UTF-8 directly.
+ let encodedText = new TextEncoder("UTF-8").encode(text);
+
+ // Make sure there's enough room for a single token.
+ let minLineLen = b64Prelude.length + 10; // Eight base64 characters plus ?=
+ if (!this._reserveTokenSpace(minLineLen)) {
+ this._commitLine(this._currentLine.length);
}
- // Now matches holds the parameters. Clean up for RFC 2231. There are four
- // cases: param=val, param*=us-ascii'en-US'blah, and param*n= variants. The
- // order of preference is to pick the middle, then the last, then the first.
- // TODO: RFC 2231 is yet to be implemented
- var simpleValues = {};
- for (let [name, value] of matches) {
- // The first match of simple param=val wins.
- if (!(name in simpleValues))
- simpleValues[name] = value;
+ // Try to encode as much UTF-8 text as possible in each go.
+ let b64Len = 0, qpLen = 0, start = 0;
+ let maxChars = (this._softMargin - this._currentLine.length) -
+ (b64Prelude.length + 2);
+ for (let i = 0; i < encodedText.length; i++) {
+ let b64Inc = 0, qpInc = 0;
+ // The length we need for base64 is ceil(length / 3) * 4...
+ if ((i - start) % 3 == 0)
+ b64Inc += 4;
+
+ // The length for quoted-printable is 3 chars only if encoded
+ if (encodedText[i] < 0x20 || encodedText[i] >= 0x7f ||
+ qpForbidden.contains(String.fromCharCode(encodedText[i]))) {
+ qpInc = 3;
+ } else {
+ qpInc = 1;
+ }
+
+ if (b64Len + b64Inc > maxChars && qpLen + qpInc > maxChars) {
+ // Oops, we have too many characters! We need to encode everything through
+ // the current character. However, we can't split in the middle of a
+ // multibyte character. In UTF-8, characters that start with 10xx xxxx are
+ // the middle of multibyte characters, so backtrack until the start
+ // character is legal.
+ while ((encodedText[i] & 0xC0) == 0x80)
+ --i;
+
+ // Add this part of the word and then make a continuation.
+ this._addRFC2047Word(encodedText.subarray(start, i), b64Len >= qpLen,
+ true);
+
+ // Reset the array for parsing.
+ start = i;
+ --i; // Reparse this character as well
+ b64Len = qpLen = 0;
+ maxChars = this._softMargin - b64Prelude.length - 3;
+ } else {
+ // Add the counts for the current variable to the count to encode.
+ b64Len += b64Inc;
+ qpLen += qpInc;
+ }
+ }
+
+ // Add the entire array at this point.
+ this._addRFC2047Word(encodedText.subarray(start), b64Len >= qpLen,
+ mayBreakAfter);
+};
+
+////////////////////////
+// High-level methods //
+////////////////////////
+
+/**
+ * Add the header name, with the colon and trailing space, to the output.
+ *
+ * @public
+ * @param {String} name The name of the header.
+ */
+HeaderEmitter.prototype.addHeaderName = function (name) {
+ this._currentLine = this._currentLine.trimRight();
+ if (this._currentLine.length > 0) {
+ this._commitLine();
+ }
+ this.addText(name + ": ", true);
+};
+
+/**
+ * Add a header and its structured value to the output.
+ *
+ * The name can be any case-insensitive variant of a known structured header;
+ * the output will include the preferred name of the structure instead of the
+ * case put into the name. If no structured encoder can be found, and the input
+ * value is a string, then the header is assumed to be unstructured and the
+ * value is added as if {@link addUnstructured} were called.
+ *
+ * @public
+ * @param {String} name The name of the header.
+ * @param value The structured value of the header.
+ */
+HeaderEmitter.prototype.addStructuredHeader = function (name, value) {
+ let lowerName = name.toLowerCase();
+ if (encoders.has(lowerName)) {
+ this.addHeaderName(preferredSpellings.get(lowerName));
+ encoders.get(lowerName).call(this, value);
+ } else if (typeof value === "string") {
+ // Assume it's an unstructured header
+ this.addHeaderName(name);
+ this.addUnstructured(value);
+ } else {
+ throw new Error("Unknown header " + name);
}
- return [start, simpleValues];
+};
+
+/**
+ * Add a single address to the header. The address is an object consisting of a
+ * possibly-empty display name and an email address.
+ *
+ * @public
+ * @param Address addr The address to be added.
+ * @param {String} addr.name The (possibly-empty) name of the address to add.
+ * @param {String} addr.email The email of the address to add.
+ * @see headerparser.parseAddressingHeader
+ */
+HeaderEmitter.prototype.addAddress = function (addr) {
+ // If we have a display name, add that first.
+ if (addr.name) {
+ // This is a simple estimate that keeps names on one line if possible.
+ this._reserveTokenSpace(addr.name.length + addr.email.length + 3);
+ this.addPhrase(addr.name, ",()<>:;.\"", true);
+ this.addText("<", false);
+ }
+
+ // Find the local-part and domain of the address, since the local-part may
+ // need to be quoted separately. Note that the @ goes to the domain, so that
+ // the local-part may be quoted if it needs to be.
+ let at = addr.email.lastIndexOf("@");
+ let localpart = "", domain = ""
+ if (at == -1)
+ localpart = addr.email;
+ else {
+ localpart = addr.email.slice(0, at);
+ domain = addr.email.slice(at);
+ }
+
+ this.addQuotable(localpart, "()<>[]:;@\\,\" !", false);
+ this.addText(domain + (addr.name ? ">" : ""), false);
+};
+
+/**
+ * Add an array of addresses and groups to the output. Such an array may be
+ * found as the output of {@link headerparser.parseAddressingHeader}. Each
+ * element is either an address (an object with properties name and email), or a
+ * group (an object with properties name and group).
+ *
+ * @public
+ * @param {(Address|Group)[]} addrs A collection of addresses to add.
+ * @param {String} addrs[i].name The (possibly-empty) name of the
+ * address or the group to add.
+ * @param {String} [addrs[i].email] The email of the address to add.
+ * @param {Address[]} [addrs[i].group] A list of email addresses in the group.
+ * @see HeaderEmitter.addAddress
+ * @see headerparser.parseAddressingHeader
+ */
+HeaderEmitter.prototype.addAddresses = function (addresses) {
+ let needsComma = false;
+ for (let addr of addresses) {
+ // Ignore a dummy empty address.
+ if ("email" in addr && addr.email === "")
+ continue;
+
+ // Add a comma if this is not the first element.
+ if (needsComma)
+ this.addText(", ", true);
+ needsComma = true;
+
+ if ("email" in addr) {
+ this.addAddress(addr);
+ } else {
+ // A group has format name: member, member;
+ // Note that we still add a comma after the group is completed.
+ this.addPhrase(addr.name, ",()<>:;.\"", false);
+ this.addText(":", true);
+
+ this.addAddresses(addr.group);
+ this.addText(";", true);
+ }
+ }
+};
+
+/**
+ * Add an unstructured header value to the output. This effectively means only
+ * inserting line breaks were necessary, and using RFC 2047 encoding where
+ * necessary.
+ *
+ * @public
+ * @param {String} text The text to add to the output.
+ */
+HeaderEmitter.prototype.addUnstructured = function (text) {
+ // Unstructured text is basically a phrase that can't be quoted. So, if we
+ // have nothing in qchars, nothing should be quoted.
+ this.addPhrase(text, "", false);
+};
+
+/**
+ * Signal that the current header has been finished encoding.
+ *
+ * @public
+ * @param {Boolean} deliverEOF If true, signal to the handler that no more text
+ * will be arriving.
+ */
+HeaderEmitter.prototype.finish = function (deliverEOF) {
+ this._commitLine();
+ if (deliverEOF)
+ this._handler.deliverEOF();
+};
+
+/**
+ * Make a streaming header emitter that outputs on the given handler.
+ *
+ * @param {StreamHandler} handler The handler to consume output
+ * @param options Options to pass into the HeaderEmitter
+ * constructor.
+ * @returns {HeaderEmitter} A header emitter constructed with the given options.
+ */
+function makeStreamingEmitter(handler, options) {
+ return new HeaderEmitter(handler, options);
}
-var StructuredDecoders = {};
-StructuredDecoders['content-type'] = function structure_content_type(value) {
- let [type, params] = extractParameters(value);
- let parts = type.split('/');
- if (parts.length != 2) {
- // Malformed. Return to text/plain. Evil, ain't it?
- params = {};
- parts = ["text", "plain"];
+function StringHandler() {
+ this.value = "";
+ this.deliverData = function (str) { this.value += str; };
+ this.deliverEOF = function () { };
+}
+
+/**
+ * Given a header name and its structured value, output a string containing its
+ * MIME-encoded value. The trailing CRLF for the header is included.
+ *
+ * @param {String} name The name of the structured header.
+ * @param value The value of the structured header.
+ * @param options Options for the HeaderEmitter constructor.
+ * @returns {String} A MIME-encoded representation of the structured header.
+ * @see HeaderEmitter.addStructuredHeader
+ */
+function emitStructuredHeader(name, value, options) {
+ let handler = new StringHandler();
+ let emitter = new HeaderEmitter(handler, options);
+ emitter.addStructuredHeader(name, value);
+ emitter.finish(true);
+ return handler.value;
+}
+
+/**
+ * Given a map of header names and their structured values, output a string
+ * containing all of their headers and their MIME-encoded values.
+ *
+ * This method is designed to be able to emit header values given the headerData
+ * values produced by MIME parsing. Thus, the values of the map are arrays
+ * corresponding to header multiplicity.
+ *
+ * @param {Map(String->Object[])} headerValues A map of header names to arrays
+ * of their structured values.
+ * @param options Options for the HeaderEmitter
+ * constructor.
+ * @returns {String} A MIME-encoded representation of the structured header.
+ * @see HeaderEmitter.addStructuredHeader
+ */
+function emitStructuredHeaders(headerValues, options) {
+ let handler = new StringHandler();
+ let emitter = new HeaderEmitter(handler, options);
+ for (let instance of headerValues) {
+ instance[1].forEach(function (e) {
+ emitter.addStructuredHeader(instance[0], e)
+ });
}
- let mediatype = parts[0].toLowerCase();
- let subtype = parts[1].toLowerCase();
- let type = mediatype + '/' + subtype;
- let structure = {
- 'mediatype': mediatype,
- 'subtype': subtype,
- 'type': type,
- };
- for (let name in params) {
- structure['param-' + name.toLowerCase()] = params[name];
+ emitter.finish(true);
+ return handler.value;
+}
+
+/**
+ * Add a custom structured MIME encoder to the set of known encoders. These
+ * encoders are used for {@link emitStructuredHeader} and similar functions to
+ * encode richer, more structured values instead of relying on string
+ * representations everywhere.
+ *
+ * Structured encoders are functions which take in a single parameter
+ * representing their structured value. The this parameter is set to be an
+ * instance of {@link HeaderEmitter}, and it is intended that the several public
+ * or protected methods on that class are useful for encoding values.
+ *
+ * There is a large set of structured encoders built-in to the jsmime library
+ * already.
+ *
+ * @param {String} header The header name (in its preferred case) for
+ * which the encoder will be used.
+ * @param {Function(Value)} encoder The structured encoder function.
+ */
+function addStructuredEncoder(header, encoder) {
+ let lowerName = header.toLowerCase();
+ encoders.set(lowerName, encoder);
+ if (!preferredSpellings.has(lowerName))
+ preferredSpellings.set(lowerName, header);
+}
+
+return Object.freeze({
+ addStructuredEncoder: addStructuredEncoder,
+ emitStructuredHeader: emitStructuredHeader,
+ emitStructuredHeaders: emitStructuredHeaders,
+ makeStreamingEmitter: makeStreamingEmitter
+});
+
+});
+
+def('jsmime', function(require) {
+ return {
+ MimeParser: require('./mimeparser'),
+ headerparser: require('./headerparser'),
+ headeremitter: require('./headeremitter')
}
- return structure;
-};
-
-
-// Gather up the header parsing things for easier export as symbols.
-var HeaderParser = Object.freeze({
- extractParameters: extractParameters
});
+ return mods['jsmime'];
+}));
copy from mailnews/test/data/base64-1
copy to mailnews/mime/jsmime/test/data/base64-1
copy from mailnews/test/data/base64-2
copy to mailnews/mime/jsmime/test/data/base64-2
copy from mailnews/test/data/basic1
copy to mailnews/mime/jsmime/test/data/basic1
copy from mailnews/test/data/bug505221
copy to mailnews/mime/jsmime/test/data/bug505221
copy from mailnews/test/data/bugmail11
copy to mailnews/mime/jsmime/test/data/bugmail11
new file mode 100644
index 0000000000000000000000000000000000000000..d33046820d2c9b173e86ecdf2c584d12f3d65efc
GIT binary patch
literal 1979
zc$}?O(N5bi6wPy_{Dak37HA=9Q;?-WVQg%SN$Uh}lat)EVU8Var=@*n(mr5F)84ne
z>}@amh5gFL{$aLh5lDzN2r0h4zOnB)KG#lqoGD<+?@vltM@2<dUJ{{Jiu@GP4bMd}
zuUJaN1UKeh!*e~iKg=c44?zk}6)HeVG61B8M4=&(h;k(n&rmWXLW1@ynX>^J5s~vs
zB47zmbwEBsihHh9LMQMQNVA%y)XcrUx)i(wK_|=TC08?;G3dD?n31M{qBvKcYv^lg
z45fLNfzX?~O0#^)9MI()A^>qQAS|CkiZtnx<U+3N?0_=uDWs{`k%WY67bp}apIK<b
zqyS6a(7hcIX}F19Kqy5@5GE;6IcHX###5k_qcImWZRiZ1I~Rx&HmJ0sRv>PP`~JKF
zp4+=oHdBI?l9HTl%#rc4*9U$Sg<ZdWZMK2e4E>#%{OjnX(t4(Tskfj=Ou+!*WDP?H
zLx7VGUTxweGL9am-I)VnT{>_2PyDDplQhC)&EQ5IYXcjxtc(-QhF7oR?#+m{wW-Qs
zo~gITxvWn&TE%h8*v#6bYa^NUQyjH6akq)dDvrp`!wVP_3GKk}2!h>y5IomySLim>
zZ4?B(t?)&D7>0-Yb+HF=D{8Ja7ep_UCE6i$Fz6TTfE<lZ4tuoSKH95`TB*!$wT-e1
z!A_^!YX8-Eyfob&nZ7;U-rL{AxGpSIiT_U2uYC;WqQv&<=v$IxI8N-7nH!@I>}=S?
z5Zy<LyNiGM?epA=^ACUCpZ`7obaD3c;`2wOyWc;~>f*)uAY2IU+vZpH<?;7l55J*X
zHP%ALDJELN5R=gOCKBOp+o;|Rag;2^<WA_$Y;#+j9rH>c{c9=o=O6BOfaCuh;GEfX
ep6hHm++ohKwv{6s<WL8+Eo?M(ZovD#=l%n-Q<NzH
new file mode 100644
--- /dev/null
+++ b/mailnews/mime/jsmime/test/data/message-encoded
@@ -0,0 +1,24 @@
+Content-Type: multipart/mixed; boundary="iamaboundary"
+
+This is a text message in MIME format.
+This part shouldn't appear in the output.
+
+--iamaboundary
+Content-Type: message/rfc822
+
+Subject: I am a subject
+
+This is a plain-text message.
+--iamaboundary
+Content-Type: message/global
+Content-Transfer-Encoding: base64
+
+U3ViamVjdDog56eB44Gv44CB5Lu25ZCN5Y2I5YmNDQoNCkkgYW0gYSBwbGFpbi10ZXh0IG1lc3NhZ2Uu
+--iamaboundary
+Content-Type: message/news
+Content-Transfer-Encoding: quoted-printable
+
+Subject: =e7=a7=81=e3=81=af=e3=80=81=e4=bb=b6=e5=90=8d=e5=8d=88=e5=89=8d
+
+I am an encoded plain-text message.
+--iamaboundary--
copy from mailnews/test/data/mime-torture
copy to mailnews/mime/jsmime/test/data/mime-torture
copy from mailnews/test/data/multipart-base64-1
copy to mailnews/mime/jsmime/test/data/multipart-base64-1
copy from mailnews/test/data/multipart-base64-2
copy to mailnews/mime/jsmime/test/data/multipart-base64-2
copy from mailnews/test/data/multipart-base64-3
copy to mailnews/mime/jsmime/test/data/multipart-base64-3
copy from mailnews/test/data/multipart-complex1
copy to mailnews/mime/jsmime/test/data/multipart-complex1
copy from mailnews/test/data/multipart-complex2
copy to mailnews/mime/jsmime/test/data/multipart-complex2
copy from mailnews/test/data/multipart1
copy to mailnews/mime/jsmime/test/data/multipart1
copy from mailnews/test/data/multipart2
copy to mailnews/mime/jsmime/test/data/multipart2
copy from mailnews/test/data/multipart3
copy to mailnews/mime/jsmime/test/data/multipart3
copy from mailnews/test/data/multipart4
copy to mailnews/mime/jsmime/test/data/multipart4
copy from mailnews/test/data/multipartmalt-detach
copy to mailnews/mime/jsmime/test/data/multipartmalt-detach
new file mode 100644
--- /dev/null
+++ b/mailnews/mime/jsmime/test/data/shift-jis-image
@@ -0,0 +1,21 @@
+Subject: Shift-JIS and PNG test
+Content-Type: multipart/mixed; boundary="vungrzvzr"
+
+--vungrzvzr
+Content-Type: text/plain; charset=Shift-JIS
+Content-Transfer-Encoding: 8bit
+Content-Description: ƒPƒcƒ@ƒ‹ƒRƒAƒgƒ‹
+
+Portable Network Graphicsiƒ|[ƒ^ƒuƒ‹Eƒlƒbƒgƒ[ƒNEƒOƒ‰ƒtƒBƒbƒNƒXAPNGj‚̓Rƒ“ƒsƒ…[ƒ^‚Ńrƒbƒgƒ}ƒbƒv‰æ‘œ‚ðˆµ‚¤ƒtƒ@ƒCƒ‹ƒtƒH[ƒ}ƒbƒg‚Å‚ ‚éBˆ³kƒAƒ‹ƒSƒŠƒYƒ€‚Æ‚µ‚ÄDeflate‚ðÌ—p‚µ‚Ä‚¢‚éAˆ³k‚É‚æ‚é‰æŽ¿‚Ì—ò‰»‚̂Ȃ¢‰Â‹tˆ³k‚̉摜ƒtƒ@ƒCƒ‹ƒtƒH[ƒ}ƒbƒg‚Å‚ ‚éB
+
+--vungrzvzr
+Content-Type: image/png
+Content-Transfer-Encoding: base64
+Content-Description: ƒPƒcƒ@ƒ‹ƒRƒAƒgƒ‹
+
+iVBORw0KGgoAAAANSUhEUgAAAIAAAABECAIAAADGJao+AAAAwklEQVR4Xu3UgQbDMBRA0bc03f//
+b7N0VuqJEmwoc+KqNEkDh9b+2HuJu1KNO4f+AQCAAAAQAAACAEAAAAgAAAEAIAAABACAAAAQAAAC
+AEAAAAgAAAEAIAAAANReamRLlPWYfNH0klxcPs+cP3NxWF+vi3lb7pa2R+vx6tHOtuN1O+a5lY3H
+zgM5ya/GM5N7ZjfPq7/5yS8IgAAAEAAAAgBAAAAIAAABACAAAAQAgAAAEAAAAgBAAAAIAAABACAA
+AIw322gDIPvtlmUAAAAASUVORK5CYII=
+--vungrzvzr--
new file mode 100644
--- /dev/null
+++ b/mailnews/mime/jsmime/test/head_xpcshell_glue.js
@@ -0,0 +1,167 @@
+// This file needs to contain glue to rephrase the Mocha testsuite framework in
+// a way that the xpcshell test suite can understand.
+
+Components.utils.import("resource://gre/modules/osfile.jsm");
+Components.utils.import("resource://gre/modules/Promise.jsm");
+Components.utils.import("resource://gre/modules/Services.jsm");
+Components.utils.import("resource://gre/modules/Task.jsm");
+Components.utils.import("resource://testing-common/Assert.jsm");
+var requireCache = new Map();
+
+// Preload an assert module
+var assert = new Assert();
+assert.doesNotThrow = function (block, message) {
+ message = (message ? ' ' + message : '.');
+ try {
+ block();
+ } catch (e) {
+ this.report(true, e, null, 'Got unwanted exception' + message);
+ }
+};
+requireCache.set("assert", assert);
+
+// Preload an fs module
+var Cc = Components.classes, Ci = Components.interfaces;
+var fs = {
+ readFile: function (filename, options, callback) {
+ if (callback === undefined) {
+ callback = options;
+ options = {};
+ }
+
+ // Convert according to encoding. For the moment, we don't support this
+ // node.js feature in the shim since we don't need to.
+ var translator = (contents => contents);
+ if (options !== undefined && 'encoding' in options) {
+ translator = function () {
+ throw new Error("I can't do this!");
+ };
+ }
+
+ Promise.resolve(filename)
+ .then(do_get_file)
+ .then(file => OS.File.read(file.path))
+ .then(translator)
+ .then(contents => callback(undefined, contents), callback);
+ },
+};
+requireCache.set("fs", fs);
+Services.scriptloader.loadSubScript("resource:///modules/jsmime/jsmime.js");
+requireCache.set("jsmime", jsmime);
+
+function require(path) {
+ if (requireCache.has(path))
+ return requireCache.get(path);
+
+ var file = "resource:///modules/jsmime/" + path + ".js";
+ var globalObject = {
+ define: innerDefine.bind(this, path),
+ };
+ Services.scriptloader.loadSubScript(file, globalObject);
+ return requireCache.get(path);
+}
+
+function innerDefine(moduleName, dfn) {
+ if (typeof dfn !== "function")
+ throw new Error("What is going on here?");
+ function resolvingRequire(path) {
+ if (path.startsWith("./"))
+ path = path.substring(2);
+ return require(path);
+ }
+ var result = dfn(resolvingRequire);
+ requireCache.set(moduleName, result);
+}
+
+var define = innerDefine.bind(this, "xpcshell-test");
+
+///////////////////////////
+// Mocha TDD UI Bindings //
+///////////////////////////
+
+/**
+ * A block of tests, from the suite class.
+ */
+function MochaSuite(name) {
+ this.name = name;
+ this.setup = [];
+ this.tests = [];
+ this.teardown = [];
+ this.suites = [];
+}
+
+/// The real code for running a suite of tests, written as a generator.
+MochaSuite.prototype._runSuite = function *() {
+ do_print("Running suite " + this.name);
+ for (let setup of this.setup) {
+ yield runFunction(setup);
+ }
+ for (let test of this.tests) {
+ do_print("Running test " + test.name);
+ yield runFunction(test.test);
+ }
+ for (let suite of this.suites) {
+ yield suite.runSuite();
+ }
+ for (let fn of this.teardown) {
+ yield runFunction(fn);
+ }
+ do_print("Finished suite " + this.name);
+};
+
+/// The outer call to run a test suite, which returns a promise of completion.
+MochaSuite.prototype.runSuite = function () {
+ return Task.spawn(this._runSuite.bind(this));
+};
+
+/// Run the given function, returning a promise of when the test will complete.
+function runFunction(fn) {
+ let completed = new Promise(function (resolve, reject) {
+ function onEnd(error) {
+ if (error !== undefined)
+ reject(error);
+ else
+ resolve();
+ }
+ // If the function is expecting an argument, that argument is the callback
+ // above. If it's not, then it may be returning a promise.
+ if (fn.length == 1) {
+ fn(onEnd);
+ } else {
+ // Promise.resolve nicely handles both promises and not-promise values for
+ // us.
+ resolve(fn());
+ }
+ });
+ return completed;
+}
+
+var currentSuite = new MochaSuite('');
+function suite(name, tests) {
+ name = name.toString();
+ if (/[\x80-]/.exec(name))
+ name = "<unprintable name>";
+ let suiteParent = currentSuite;
+ currentSuite = new MochaSuite(name);
+ suiteParent.suites.push(currentSuite);
+ tests();
+ currentSuite = suiteParent;
+}
+function test(name, block) {
+ name = name.toString();
+ if (/[\x80-]/.exec(name))
+ name = "<unprintable name>";
+ currentSuite.tests.push({name: name, test: block});
+}
+function setup(block) {
+ currentSetup.setup.push(block);
+}
+function teardown(block) {
+ currentSetup.teardown.push(block);
+}
+
+/// The actual binding xpcshell needs to do its work.
+function run_test() {
+ add_task(currentSuite.runSuite());
+ run_next_test();
+}
new file mode 100644
--- /dev/null
+++ b/mailnews/mime/jsmime/test/test_custom_headers.js
@@ -0,0 +1,69 @@
+"use strict";
+define(function (require) {
+
+var assert = require('assert');
+var jsmime = require('jsmime');
+
+function arrayTest(data, fn) {
+ fn.toString = function () {
+ let text = Function.prototype.toString.call(this);
+ text = text.replace(/data\[([0-9]*)\]/g, function (m, p) {
+ return JSON.stringify(data[p]);
+ });
+ return text;
+ };
+ return test(data[0], fn);
+}
+
+function testHeader(header, tests) {
+ suite(header, function () {
+ tests.forEach(function (data) {
+ arrayTest(data, function () {
+ array.deepEqual(headerparser.parseStructuredHeader(header,
+ data[0]), data[1]);
+ });
+ });
+ });
+}
+
+function makeCT(media, sub, params) {
+ var object = new Map();
+ object.mediatype = media;
+ object.subtype = sub;
+ object.type = media + "/" + sub;
+ for (let k in params)
+ object.set(k, params[k]);
+ return object;
+}
+suite('Custom decoder support', function () {
+ function customDecoder(values) {
+ let value = values.join('');
+ return atob(value);
+ }
+ function customEncoder(value) {
+ this.addText(btoa(value), true);
+ }
+ test('addStructuredEncoder', function () {
+ assert.equal('X-Base64: String\r\n',
+ jsmime.headeremitter.emitStructuredHeader('X-Base64', 'String', {}));
+ jsmime.headeremitter.addStructuredEncoder('X-Base64', customEncoder);
+ assert.equal('X-Base64: U3RyaW5n\r\n',
+ jsmime.headeremitter.emitStructuredHeader('X-Base64', 'String', {}));
+ assert.equal('X-Base64: U3RyaW5n\r\n',
+ jsmime.headeremitter.emitStructuredHeader('x-bASe64', 'String', {}));
+ });
+ test('addStructuredDecoder', function () {
+ assert.throws(function () {
+ jsmime.headerparser.parseStructuredHeader('X-Base64', 'U3RyaW5n');
+ });
+ jsmime.headerparser.addStructuredDecoder('X-Base64', customDecoder);
+ assert.equal('String',
+ jsmime.headerparser.parseStructuredHeader('X-Base64', 'U3RyaW5n'));
+ assert.throws(function () {
+ jsmime.headerparser.addStructuredDecoder('To', customDecoder);
+ });
+ });
+});
+
+});
+
new file mode 100644
--- /dev/null
+++ b/mailnews/mime/jsmime/test/test_header.js
@@ -0,0 +1,515 @@
+"use strict";
+define(function(require) {
+
+var headerparser = require('jsmime').headerparser;
+var assert = require('assert');
+
+function arrayTest(data, fn) {
+ fn.toString = function () {
+ let text = Function.prototype.toString.call(this);
+ text = text.replace(/data\[([0-9]*)\]/g, function (m, p) {
+ return JSON.stringify(data[p]);
+ });
+ return text;
+ };
+ return test(data[0], fn);
+}
+suite('headerparser', function () {
+ suite('parseParameterHeader', function () {
+ let header_tests = [
+ ['multipart/related', ["multipart/related", {}]],
+ ["a ; b=v", ["a", {"b": "v"}]],
+ ["a ; b='v'", ["a", {"b": "'v'"}]],
+ ['a; b = "v"', ["a", {"b": "v"}]],
+ ["a;b=1;b=2", ["a", {"b": "1"}]],
+ ["a;b=2;b=1", ["a", {"b": "2"}]],
+ ['a;b="a;b"', ["a", {"b": "a;b"}]],
+ ['a;b="\\\\"', ["a", {"b": "\\"}]],
+ ['a;b="a\\b\\c"', ["a", {"b": "abc"}]],
+ ['a;b=1;c=2', ["a", {"b": "1", "c": "2"}]],
+ ['a;b="a\\', ["a", {"b": "a"}]],
+ ['a;b', ["a", {}]],
+ ['a;b=";";c=d', ["a", {"b": ';', 'c': "d"}]],
+ ];
+ header_tests.forEach(function (data) {
+ arrayTest(data, function () {
+ let testMap = new Map();
+ for (let key in data[1][1])
+ testMap.set(key, data[1][1][key]);
+ testMap.preSemi = data[1][0];
+ assert.deepEqual(headerparser.parseParameterHeader(data[0], false, false),
+ testMap);
+ });
+ });
+ });
+ suite('parseParameterHeader (2231/2047 support)', function () {
+ let header_tests = [
+ // Copied from test_MIME_params.js and adapted
+ ["attachment;", ["attachment", {}]],
+ ["attachment; filename=basic", ["attachment", {filename: "basic"}]],
+ ["attachment; filename=\"\\\"\"", ["attachment", {filename: '"'}]],
+ ["attachment; filename=\"\\x\"", ["attachment", {filename: "x"}]],
+ ["attachment; filename=\"\"", ["attachment", {filename: ""}]],
+ ["attachment; filename=", ["attachment", {filename: ""}]],
+ ["attachment; filename X", ["attachment", {}]],
+ ["attachment; filename = foo-A.html",
+ ["attachment", {filename: "foo-A.html"}]],
+ ["attachment; filename=\"", ["attachment", {filename: ""}]],
+ ["attachment; filename=foo; trouble", ["attachment", {filename: "foo"}]],
+ ["attachment; filename=foo; trouble ", ["attachment", {filename: "foo"}]],
+ ["attachment", ["attachment", {}]],
+ ["attachment; filename=foo", ["attachment", {filename: "foo"}]],
+ ["attachment; filename=\"foo\"", ["attachment", {filename: "foo"}]],
+ ["attachment; filename='foo'", ["attachment", {filename: "'foo'"}]],
+ ["attachment; filename=\"=?UTF-8?Q?foo?=\"",
+ ["attachment", {filename: "foo"}]],
+ ["attachment; filename==?UTF-8?Q?foo?=",
+ ["attachment", {filename: "foo"}]],
+ // 2231/5987 tests from test_MIME_params.js
+ ["attachment; filename*=UTF-8''extended",
+ ["attachment", {filename: "extended"}]],
+ ["attachment; filename=basic; filename*=UTF-8''extended",
+ ["attachment", {filename: "extended"}]],
+ ["attachment; filename*=UTF-8''extended; filename=basic",
+ ["attachment", {filename: "extended"}]],
+ ["attachment; filename*0=foo; filename*1=bar",
+ ["attachment", {filename: "foobar"}]],
+ ["attachment; filename*0=first; filename*0=wrong; filename=basic",
+ ["attachment", {filename: "first"}]], // or basic?
+ ["attachment; filename*0=first; filename*1=second; filename*0=wrong",
+ ["attachment", {filename: "firstsecond"}]], // or nothing?
+ ["attachment; filename=basic; filename*0=foo; filename*1=bar",
+ ["attachment", {filename: "foobar"}]],
+ ["attachment; filename=basic; filename*0=first; filename*0=wrong; " +
+ "filename*=UTF-8''extended", ["attachment", {filename: "extended"}]],
+ ["attachment; filename=basic; filename*=UTF-8''extended; filename*0=foo" +
+ "; filename*1=bar", ["attachment", {filename: "extended"}]],
+ ["attachment; filename*0=foo; filename*2=bar",
+ ["attachment", {filename: "foo"}]],
+ ["attachment; filename*0=foo; filename*01=bar",
+ ["attachment", {filename: "foo"}]],
+ ["attachment; filename=basic; filename*0*=UTF-8''multi; filename*1=line" +
+ "; filename*2*=%20extended",
+ ["attachment", {filename: "multiline extended"}]],
+ ["attachment; filename=basic; filename*0*=UTF-8''multi; filename*1=line" +
+ "; filename*3*=%20extended", ["attachment", {filename: "multiline"}]],
+ ["attachment; filename=basic; filename*0*=UTF-8''multi; filename*1=line" +
+ "; filename*0*=UTF-8''wrong; filename*1=bad; filename*2=evil",
+ ["attachment", {filename: "multiline"}]],
+ ["attachment; filename=basic; filename*0=UTF-8''multi; filename*=UTF-8'" +
+ "'extended; filename*1=line; filename*2*=%20extended",
+ ["attachment", {filename: "extended"}]],
+ ["attachment; filename*0=UTF-8''unescaped; filename*1*=%20so%20includes" +
+ "%20UTF-8''%20in%20value",
+ ["attachment", {filename: "UTF-8''unescaped so includes UTF-8'' in value"}]],
+ ["attachment; filename=basic; filename*0*=UTF-8''multi; filename*1=line" +
+ "; filename*0*=UTF-8''wrong; filename*1=bad; filename*2=evil",
+ ["attachment", {filename: "multiline"}]],
+ ["attachment; filename=basic; filename*1=foo; filename*2=bar",
+ ["attachment", {filename: "basic"}]],
+ ["attachment; filename=basic; filename*0*=UTF-8''0; filename*1=1; filen" +
+ "ame*2=2;filename*3=3;filename*4=4;filename*5=5;filename*6=6;filename" +
+ "*7=7;filename*8=8;filename*9=9;filename*10=a;filename*11=b;filename*" +
+ "12=c;filename*13=d;filename*14=e;filename*15=f",
+ ["attachment", {filename: "0123456789abcdef"}]],
+ ["attachment; filename=basic; filename*0*=UTF-8''0; filename*1=1; filen" +
+ "ame*2=2;filename*3=3;filename*4=4;filename*5=5;filename*6=6;filename" +
+ "*7=7;filename*8=8;filename*9=9;filename*10=a;filename*11=b;filename*" +
+ "12=c;filename*14=e", ["attachment", {filename: "0123456789abc"}]],
+ ["attachment; filename*1=multi; filename*2=line; filename*3*=%20extended",
+ ["attachment", {}]],
+ ["attachment; filename=basic; filename*0*=UTF-8''0; filename*1=1; filen" +
+ "ame*2=2;filename*3=3;filename*4=4;filename*5=5;filename*6=6;filename" +
+ "*7=7;filename*8=8;filename*9=9;filename*10=a;filename*11=b;filename*" +
+ "12=c;filename*13=d;filename*15=f;filename*14=e",
+ ["attachment", {filename: "0123456789abcdef"}]],
+ ["attachment; filename=basic; filename*0*=UTF-8''0; filename*1a=1",
+ ["attachment", {filename: "0"}]],
+ ["attachment; filename=basic; filename*0*=UTF-8''0; filename*1111111111" +
+ "1111111111111111111111111=1", ["attachment", {filename: "0"}]],
+ ["attachment; filename=basic; filename*0*=UTF-8''0; filename*-1=1",
+ ["attachment", {filename: "0"}]],
+ ["attachment; filename=basic; filename*0=\"0\"; filename*1=1; filename*" +
+ "2*=%32", ["attachment", {filename: "012"}]],
+ ["attachment; filename=basic; filename**=UTF-8''0;",
+ ["attachment", {filename: "basic"}]],
+ ["attachment; filename=IT839\x04\xB5(m8)2.pdf;",
+ ["attachment", {filename: "IT839\u0004\u00b5(m8)2.pdf"}]],
+ ["attachment; filename*=utf-8''%41", ["attachment", {filename: "A"}]],
+ // See bug 651185 and bug 703015
+ ["attachment; filename*=\"utf-8''%41\"", ["attachment", {filename: "A"}]],
+ ["attachment; filename *=utf-8''foo-%41", ["attachment", {}]],
+ ["attachment; filename*=''foo", ["attachment", {}]],
+ ["attachment; filename*=a''foo", ["attachment", {}]],
+ // Bug 692574: we should ignore this one...
+ ["attachment; filename*=UTF-8'foo-%41",
+ ["attachment", {filename: "foo-A"}]],
+ ["attachment; filename*=foo-%41", ["attachment", {}]],
+ ["attachment; filename*=UTF-8'foo-%41; filename=bar",
+ ["attachment", {filename: "foo-A"}]],
+ ["attachment; filename*=ISO-8859-1''%c3%a4",
+ ["attachment", {filename: "\u00c3\u00a4"}]],
+ ["attachment; filename*=ISO-8859-1''%e2%82%ac",
+ ["attachment", {filename: "\u00e2\u201a\u00ac"}]],
+ ["attachment; filename*=UTF-8''A%e4B", ["attachment", {}]],
+ ["attachment; filename*=UTF-8''A%e4B; filename=fallback",
+ ["attachment", {filename: "fallback"}]],
+ ["attachment; filename*0*=UTF-8''A%e4B; filename=fallback",
+ ["attachment", {filename: "fallback"}]],
+ ["attachment; filename*0*=ISO-8859-15''euro-sign%3d%a4; filename*=ISO-8" +
+ "859-1''currency-sign%3d%a4",
+ ["attachment", {filename: "currency-sign=\u00a4"}]],
+ ["attachment; filename*=ISO-8859-1''currency-sign%3d%a4; filename*0*=IS" +
+ "O-8859-15''euro-sign%3d%a4",
+ ["attachment", {filename: "currency-sign=\u00a4"}]],
+ ["attachment; filename=basic; filename*0=\"foo\"; filename*1=\"\\b\\a\\" +
+ "r\"", ["attachment", {filename: "foobar"}]],
+ ["attachment; filename=basic; filename*0=\"foo\"; filename*1=\"\\b\\a\\",
+ ["attachment", {filename: "fooba"}]],
+ ["attachment; filename=\"\\b\\a\\", ["attachment", {filename: "ba"}]],
+ // According to comments and bugs, this works in necko, but it doesn't
+ // appear that it ought to. See bug 732369 for more info.
+ ["attachment; extension=bla filename=foo",
+ ["attachment", {extension: "bla"}]],
+ ["attachment; filename==?ISO-8859-1?Q?foo-=E4.html?=",
+ ["attachment", {filename: "foo-\u00e4.html"}]],
+ ["attachment; filename=\"=?ISO-8859-1?Q?foo-=E4.html?=\"",
+ ["attachment", {filename: "foo-\u00e4.html"}]],
+ ["attachment; filename=\"=?ISO-8859-1?Q?foo-=E4.html?=\"; filename*=UTF" +
+ "-8''5987", ["attachment", {filename: "5987"}]],
+ ];
+ header_tests.forEach(function (data) {
+ arrayTest(data, function () {
+ let testMap = new Map();
+ for (let key in data[1][1])
+ testMap.set(key, data[1][1][key]);
+ testMap.preSemi = data[1][0];
+ assert.deepEqual(headerparser.parseParameterHeader(data[0], true, true),
+ testMap);
+ });
+ });
+ });
+ suite('parseAddressingHeader', function () {
+ let header_tests = [
+ ["", []],
+ ["Joe Schmoe <jschmoe@invalid.invalid>",
+ [{name: "Joe Schmoe", email: "jschmoe@invalid.invalid"}]],
+ ["user@tinderbox.invalid",
+ [{name: "", email: "user@tinderbox.invalid"}]],
+ ["Hello Kitty <a@b.c>, No Kitty <b@b.c>",
+ [{name: "Hello Kitty", email: "a@b.c"},
+ {name: "No Kitty", email: "b@b.c"}]],
+ ["undisclosed-recipients:;",
+ [{name: "undisclosed-recipients", group: []}]],
+ ["me@[127.0.0.1]", [{name: "", email: "me@[127.0.0.1]"}]],
+ ["\"me\"@a.com", [{name: "", email: "me@a.com"}]],
+ ["\"!\"@a.com", [{name: "", email: "\"!\"@a.com"}]],
+ ["\"\\!\"@a.com", [{name: "", email: "\"!\"@a.com"}]],
+ ["\"\\\\!\"@a.com", [{name: "", email: "\"\\\\!\"@a.com"}]],
+ ["Coward (not@email) <real@email.com>",
+ [{name: "Coward (not@email)", email: "real@email.com"}]],
+ ["Group: a@b.com, b@c.com;", [{name: "Group", group:
+ [{name: "", email: "a@b.com"}, {name: "", email: "b@c.com"}]}]],
+ ["a@invalid.invalid, Group: a@b.com;",
+ [{name: "", email: "a@invalid.invalid"},
+ {name: "Group", group: [{name: "", email: "a@b.com"}]}]],
+ ["Group A: a@b.com;, Group B: b@b.com;",
+ [{name: "Group A", group: [{name: "", email: "a@b.com"}]},
+ {name: "Group B", group: [{name: "", email: "b@b.com"}]}]],
+ ["Crazy (<Stupid \"name\") <simple@a.email>",
+ [{name: "Crazy (<Stupid name)", email: "simple@a.email"}]],
+ ["Group: Real <a@b.com>, Fake <a@b.com>", [{name: "Group", group:
+ [{name: "Real", email: "a@b.com"},
+ {name: "Fake", email: "a@b.com"}]}]],
+ ["\"Joe Q. Public\" <john.q.public@example.com>," +
+ "Test <\"abc!x.yz\"@foo.invalid>, Test <test@[xyz!]>," +
+ "\"Giant; \\\"Big\\\" Box\" <sysservices@example.net>",
+ [{name: "Joe Q. Public", email: "john.q.public@example.com"},
+ {name: "Test", email: "\"abc!x.yz\"@foo.invalid"},
+ {name: "Test", email: "test@[xyz!]"},
+ {name: "Giant; \"Big\" Box", email: "sysservices@example.net"}]],
+ ["Unfortunate breaking < so . many . spaces @ here . invalid >",
+ [{name: "Unfortunate breaking", email: "so.many.spaces@here.invalid"}]],
+ ["so . many . spaces @ here . invalid",
+ [{name: "", email: "so.many.spaces@here.invalid"}]],
+ ["abc@foo.invalid", [{name:"", email: "abc@foo.invalid"}]],
+ ["foo <ghj@foo.invalid>", [{name: "foo", email: "ghj@foo.invalid"}]],
+ ["abc@foo.invalid, foo <ghj@foo.invalid>",
+ [{name: "", email: "abc@foo.invalid"},
+ {name: "foo", email: "ghj@foo.invalid"}]],
+ ["foo bar <foo@bar.invalid>",
+ [{name: "foo bar", email: "foo@bar.invalid"}]],
+ ["foo bar <foo@bar.invalid>, abc@foo.invalid, foo <ghj@foo.invalid>",
+ [{name: "foo bar", email: "foo@bar.invalid"},
+ {name: "", email: "abc@foo.invalid"},
+ {name: "foo", email: "ghj@foo.invalid"}]],
+ ["foo\u00D0 bar <foo@bar.invalid>, \u00F6foo <ghj@foo.invalid>",
+ [{name: "foo\u00D0 bar", email: "foo@bar.invalid"},
+ {name: "\u00F6foo", email: "ghj@foo.invalid"}]],
+ ["Undisclosed recipients:;",
+ [{name: "Undisclosed recipients", group: []}]],
+ ["\" \"@a a;b",
+ [{name: "", email: "\" \"@a a"},
+ {name: "b", email: ""}]],
+ ["Undisclosed recipients:;\0:; foo <ghj@veryveryveryverylongveryveryver" +
+ "yveryinvalidaddress.invalid>",
+ [{name: "Undisclosed recipients", group: []},
+ {name: "\0", group: []},
+ {name: "foo", email: "ghj@veryveryveryverylongveryveryveryveryinvali" +
+ "daddress.invalid"}]],
+ // XXX: test_nsIMsgHeaderParser2 has an empty one here...
+ ["<a;a@invalid",
+ [{name: "", email: "a"}, {name: "", email: "a@invalid"}]],
+ ["me@foo.invalid", [{name: "", email: "me@foo.invalid"}]],
+ ["me@foo.invalid, me2@foo.invalid",
+ [{name: "", email: "me@foo.invalid"},
+ {name: "", email: "me2@foo.invalid"}]],
+ ['"foo bar" <me@foo.invalid>',
+ [{name: "foo bar", email: "me@foo.invalid"}]],
+ ['"foo bar" <me@foo.invalid>, "bar foo" <me2@foo.invalid>',
+ [{name: "foo bar", email: "me@foo.invalid"},
+ {name: "bar foo", email: "me2@foo.invalid"}]],
+ ["A Group:Ed Jones <c@a.invalid>,joe@where.invalid,John <jdoe@one.invalid>;",
+ [{name: "A Group", group: [
+ {name: "Ed Jones", email: "c@a.invalid"},
+ {name: "", email: "joe@where.invalid"},
+ {name: "John", email: "jdoe@one.invalid"}]}]],
+ ['mygroup:;, empty:;, foo@foo.invalid, othergroup:bar@foo.invalid, bar2' +
+ '@foo.invalid;, y@y.invalid, empty:;',
+ [{name: "mygroup", group: []},
+ {name: "empty", group: []},
+ {name: "", email: "foo@foo.invalid"},
+ {name: "othergroup", group: [
+ {name: "", email: "bar@foo.invalid"},
+ {name: "", email: "bar2@foo.invalid"}
+ ]},
+ {name: "", email: "y@y.invalid"},
+ {name: "empty", group: []}]],
+ ["Undisclosed recipients:;;;;;;;;;;;;;;;;,,,,,,,,,,,,,,,,",
+ [{name: "Undisclosed recipients", group: []}]],
+ ["a@xxx.invalid; b@xxx.invalid",
+ [{name: "", email: "a@xxx.invalid"},
+ {name: "", email: "b@xxx.invalid"}]],
+ ["a@xxx.invalid; B <b@xxx.invalid>",
+ [{name: "", email: "a@xxx.invalid"},
+ {name: "B", email: "b@xxx.invalid"}]],
+ ['"A " <a@xxx.invalid>; b@xxx.invalid',
+ [{name: "A ", email: "a@xxx.invalid"},
+ {name: "", email: "b@xxx.invalid"}]],
+ ["A <a@xxx.invalid>; B <b@xxx.invalid>",
+ [{name: "A", email: "a@xxx.invalid"},
+ {name: "B", email: "b@xxx.invalid"}]],
+ ["A (this: is, a comment;) <a.invalid>; g: (this: is, <a> comment;) C" +
+ "<c.invalid>, d.invalid;",
+ [{name: "A (this: is, a comment;)", email: "a.invalid"},
+ {name: "g", group: [
+ {name: "(this: is, <a> comment;) C", email: "c.invalid"},
+ {name: "d.invalid", email: ""}]}]],
+ ['Mary Smith <mary@x.invalid>, extra:;, group:jdoe@example.invalid; Who' +
+ '? <one@y.invalid>; <boss@nil.invalid>, "Giant; \\"Big\\" Box" <sysse' +
+ 'rvices@example.invalid>, ',
+ [{name: "Mary Smith", email: "mary@x.invalid"},
+ {name: "extra", group: []},
+ {name: "group", group: [{name: "", email: "jdoe@example.invalid"}]},
+ {name: "Who?", email: "one@y.invalid"},
+ {name: "", email: "boss@nil.invalid"},
+ {name: "Giant; \"Big\" Box", email: "sysservices@example.invalid"}]],
+ ["Undisclosed recipients: a@foo.invalid ;;extra:;",
+ [{name: "Undisclosed recipients", group: [
+ {name: "", email: "a@foo.invalid"}]},
+ {name: "extra", group: []}]],
+ ["Undisclosed recipients:;;extra:a@foo.invalid;",
+ [{name: "Undisclosed recipients", group: []},
+ {name: "extra", group: [{name: "", email: "a@foo.invalid"}]}]],
+ ["a < <a@b.c>", [{name: "a", email: "a@b.c"}]],
+ ["Name <incomplete@email", [{name: "Name", email: "incomplete@email"}]],
+ ["Name <space here@email.invalid>",
+ [{name: 'Name', email: '"space here"@email.invalid'}]],
+ ["Name <not an email>", [{name: "Name", email: "not an email"}]],
+ ["=?UTF-8?Q?Simple?= <a@b.c>",
+ [{name: "=?UTF-8?Q?Simple?=", email: "a@b.c"}]],
+ ];
+ header_tests.forEach(function (data) {
+ arrayTest(data, function () {
+ assert.deepEqual(headerparser.parseAddressingHeader(data[0], false),
+ data[1]);
+ });
+ });
+ });
+ suite('parseAddressingHeader (RFC 2047 support)', function () {
+ let header_tests = [
+ ["Simple <a@b.c>", [{name: "Simple", email: "a@b.c"}]],
+ ["=?UTF-8?Q?Simple?= <a@b.c>", [{name: "Simple", email: "a@b.c"}]],
+ ["=?UTF-8?Q?=3C@b.c?= <a@b.c>", [{name: "<@b.c", email: "a@b.c"}]],
+
+ // RFC 2047 token should not interfer with lexical processing
+ ["=?UTF-8?Q?a@b.c,?= <b@b.c>", [{name: "a@b.c,", email: "b@b.c"}]],
+ ["=?UTF-8?Q?a@b.c=2C?= <b@b.c>", [{name: "a@b.c,", email: "b@b.c"}]],
+ ["=?UTF-8?Q?<?= <a@b.c>", [{name: "<", email: "a@b.c"}]],
+ ["Simple =?UTF-8?Q?<?= a@b.c>",
+ [{name: "", email: '"Simple < a"@b.c'}]],
+ ["Tag <=?UTF-8?Q?email?=@b.c>", [{name: "Tag", email: "email@b.c"}]],
+ ];
+ header_tests.forEach(function (data) {
+ arrayTest(data, function () {
+ assert.deepEqual(headerparser.parseAddressingHeader(data[0], true),
+ data[1]);
+ });
+ });
+ });
+ suite('decodeRFC2047Words', function () {
+ let header_tests = [
+ // Some basic sanity tests for the test process
+ ["Test", "Test"],
+ ["Test 2", "Test 2"],
+ ["Basic words", "Basic words"],
+ ["Not a =? word", "Not a =? word"],
+
+ // Simple 2047 decodings
+ ["=?UTF-8?Q?Encoded?=", "Encoded"],
+ ["=?UTF-8?q?Encoded?=", "Encoded"],
+ ["=?ISO-8859-1?Q?oxyg=e8ne?=", "oxyg\u00e8ne"],
+ ["=?UTF-8?B?QmFzZTY0?=", "Base64"],
+ ["=?UTF-8?b?QmFzZTY0?=", "Base64"],
+ ["=?UTF-8?Q?A_space?=", "A space"],
+ ["=?UTF-8?Q?A space?=", "A space"],
+ ["A =?UTF-8?Q?B?= C", "A B C"],
+ ["=?UTF-8?Q?A?= =?UTF-8?Q?B?=", "AB"],
+ ["=?UTF-8?Q?oxyg=c3=a8ne?=", "oxyg\u00e8ne"],
+ ["=?utf-8?Q?oxyg=C3=A8ne?=", "oxyg\u00e8ne"],
+ ["=?UTF-8?B?b3h5Z8OobmU=?=", "oxyg\u00e8ne"],
+ ["=?UTF-8*fr?B?b3h5Z8OobmU=?=", "oxyg\u00e8ne"],
+ ["=?BIG5?Q?=B9=CF=AE=D1=C0]SSCI=A4=CEJCR=B8=EA=AE=C6=AEw=C1=BF=B2=DF=A1A=A8" +
+ "=F3=A7U=B1z=A1u=B4=A3=A4=C9=AC=E3=A8s=AF=C0=BD=E8=BBP=AE=C4=B2v=A5H=A4=CE" +
+ "=A7=EB=BDZ=B5=A6=B2=A4=AA=BA=B9B=A5=CE=A1v=A1A=C5w=AA=EF=B3=F8=A6W=B0=D1" +
+ "=A5[=A1C?=", "\u5716\u66F8\u9928SSCI\u53CAJCR\u8CC7\u6599\u5EAB\u8B1B" +
+ "\u7FD2\uFF0C\u5354\u52A9\u60A8\u300C\u63D0\u5347\u7814\u7A76\u7D20\u8CEA" +
+ "\u8207\u6548\u7387\u4EE5\u53CA\u6295\u7A3F\u7B56\u7565\u7684\u904B\u7528" +
+ "\u300D\uFF0C\u6B61\u8FCE\u5831\u540D\u53C3\u52A0\u3002"],
+
+ // Invalid decodings
+ ["=?UTF-8?Q?=f0ab?=", "\ufffdab"],
+ ["=?UTF-8?Q?=f0?= ab", "\ufffd ab"],
+ ["=?UTF-8?Q?=ed=a0=bd=ed=b2=a9?=", "\ufffd\ufffd\ufffd\ufffd\ufffd\ufffd"],
+ ["=?NoSuchCharset?Q?ab?=", "=?NoSuchCharset?Q?ab?="],
+ ["=?UTF-8?U?Encoded?=", "=?UTF-8?U?Encoded?="],
+ ["=?UTF-8?Q?Almost", "=?UTF-8?Q?Almost"],
+
+ // Try some non-BMP characters in various charsets
+ ["=?UTF-8?B?8J+SqQ==?=", "\ud83d\udca9"],
+ // The goal for the next one is to be a non-BMP in a non-full-Unicode
+ // charset. The only category where this exists is a small set of
+ // characters in Big5, which were previously mapped to a PUA in an older
+ // version but then reassigned to Plane 1. However, Big5 is really a set
+ // of slightly different, slightly incompatible charsets.
+ // TODO: This requires future investigation. Bug 912470 discusses the
+ // changes to Big5 proposed within Mozilla.
+ //["=?Big5?Q?=87E?=", "\ud85c\ude67"],
+ ["=?GB18030?B?lDnaMw==?=", "\ud83d\udca9"],
+
+ // How to handle breaks in multi-byte encoding
+ ["=?UTF-8?Q?=f0=9f?= =?UTF-8?Q?=92=a9?=", "\ud83d\udca9"],
+ ["=?UTF-8?B?8J+S?= =?UTF-8?B?qQ==?=", "\ud83d\udca9"],
+ ["=?UTF-8?B?8J+S?= =?UTF-8?Q?=a9?=", "\ud83d\udca9"],
+ ["=?UTF-8?B?8J+S?= =?ISO-8859-1?B?qQ==?=", "\ufffd\u00a9"],
+ ["=?UTF-8?Q?=f0?= =?UTF-8?Q?ab?=", "\ufffdab"],
+
+ // This is a split non-BMP character.
+ ["=?UTF-8?B?YfCfkqnwn5Kp8J+SqfCfkqnwn5Kp8J+SqfCfkqnvv70=?= =?UTF-8?B?77+9?=",
+ "a\uD83D\uDCA9\uD83D\uDCA9\uD83D\uDCA9\uD83D\uDCA9\uD83D\uDCA9\uD83D" +
+ "\uDCA9\uD83D\uDCA9\uFFFD\uFFFD"],
+
+ // Spaces in RFC 2047 tokens
+ ["=?UTF-8?Q?Invalid token?=", "Invalid token"],
+
+ // More tests from bug 493544
+ ["AAA =?UTF-8?Q?bbb?= CCC =?UTF-8?Q?ddd?= EEE =?UTF-8?Q?fff?= GGG",
+ "AAA bbb CCC ddd EEE fff GGG"],
+ ["=?UTF-8?B?4oiAICDiiIEgIOKIgiAg4oiDICDiiIQgIOKIhSAg4oiGICDiiIcgIOKIiC" +
+ "Ag?=\n =?UTF-8?B?4oiJICDiiIogIOKIiyAg4oiMICDiiI0gIOKIjiAg4oiP?=",
+ "\u2200 \u2201 \u2202 \u2203 \u2204 \u2205 \u2206 \u2207 " +
+ "\u2208 \u2209 \u220a \u220b \u220c \u220d \u220e \u220f"],
+ ["=?utf-8?Q?=E2=88=80__=E2=88=81__=E2=88=82__=E2=88=83__=E2=88=84__=E2" +
+ "?=\n =?utf-8?Q?=88=85__=E2=88=86__=E2=88=87__=E2=88=88__=E2=88=89__" +
+ "=E2=88?=\n =?utf-8?Q?=8A__=E2=88=8B__=E2=88=8C__=E2=88=8D__=E2=88=8" +
+ "E__=E2=88=8F?=",
+ "\u2200 \u2201 \u2202 \u2203 \u2204 \u2205 \u2206 \u2207 " +
+ "\u2208 \u2209 \u220a \u220b \u220c \u220d \u220e \u220f"],
+ ["=?UTF-8?B?4oiAICDiiIEgIOKIgiAg4oiDICDiiIQgIOKIhSAg4oiGICDiiIcgIOKIiA" +
+ "==?=\n =?UTF-8?B?ICDiiIkgIOKIiiAg4oiLICDiiIwgIOKIjSAg4oiOICDiiI8=?=",
+ "\u2200 \u2201 \u2202 \u2203 \u2204 \u2205 \u2206 \u2207 " +
+ "\u2208 \u2209 \u220a \u220b \u220c \u220d \u220e \u220f"],
+ ["=?UTF-8?b?4oiAICDiiIEgIOKIgiAg4oiDICDiiIQgIOKIhSAg4oiGICDiiIcgIOKIiA" +
+ "==?=\n =?UTF-8?b?ICDiiIkgIOKIiiAg4oiLICDiiIwgIOKIjSAg4oiOICDiiI8=?=",
+ "\u2200 \u2201 \u2202 \u2203 \u2204 \u2205 \u2206 \u2207 " +
+ "\u2208 \u2209 \u220a \u220b \u220c \u220d \u220e \u220f"],
+ ["=?utf-8?Q?=E2=88=80__=E2=88=81__=E2=88=82__=E2=88=83__=E2=88=84__?=\n" +
+ " =?utf-8?Q?=E2=88=85__=E2=88=86__=E2=88=87__=E2=88=88__=E2=88=89__?=\n"+
+ " =?utf-8?Q?=E2=88=8A__=E2=88=8B__=E2=88=8C__=E2=88=8D__=E2=88=8E__?=\n"+
+ " =?utf-8?Q?=E2=88=8F?=",
+ "\u2200 \u2201 \u2202 \u2203 \u2204 \u2205 \u2206 \u2207 " +
+ "\u2208 \u2209 \u220a \u220b \u220c \u220d \u220e \u220f"],
+ ["=?utf-8?q?=E2=88=80__=E2=88=81__=E2=88=82__=E2=88=83__=E2=88=84__?=\n" +
+ " =?utf-8?q?=E2=88=85__=E2=88=86__=E2=88=87__=E2=88=88__=E2=88=89__?=\n"+
+ " =?utf-8?q?=E2=88=8A__=E2=88=8B__=E2=88=8C__=E2=88=8D__=E2=88=8E__?=\n"+
+ " =?utf-8?q?=E2=88=8F?=",
+ "\u2200 \u2201 \u2202 \u2203 \u2204 \u2205 \u2206 \u2207 " +
+ "\u2208 \u2209 \u220a \u220b \u220c \u220d \u220e \u220f"],
+ ["=?UTF-8?B?4oiAICDiiIEgIOKIgiAg4oiDICDiiIQgIOKIhSAg4oiGICDiiIcgIOKIiA=" +
+ "==?=\n =?UTF-8?B?ICDiiIkgIOKIiiAg4oiLICDiiIwgIOKIjSAg4oiOICDiiI8=?=",
+ "\u2200 \u2201 \u2202 \u2203 \u2204 \u2205 \u2206 \u2207 " +
+ "\u2208 \u2209 \u220a \u220b \u220c \u220d \u220e \u220f"],
+
+ // Some interesting headers found in the wild:
+ // Invalid base64 text. We decide not to decode this word.
+ ["Re: [Kitchen Nightmares] Meow! Gordon Ramsay Is =?ISO-8859-1?B?UEgR l" +
+ "qZ VuIEhlYWQgVH rbGeOIFNob BJc RP2JzZXNzZW?= With My =?ISO-8859-1?B?" +
+ "SHVzYmFuZ JzX0JhbGxzL JfU2F5c19BbXiScw==?= Baking Company Owner",
+ "Re: [Kitchen Nightmares] Meow! Gordon Ramsay Is =?ISO-8859-1?B?UEgR " +
+ "lqZ VuIEhlYWQgVH rbGeOIFNob BJc RP2JzZXNzZW?= With My =?ISO-8859-1?B" +
+ "?SHVzYmFuZ JzX0JhbGxzL JfU2F5c19BbXiScw==?= Baking Company Owner"],
+ ["=?us-ascii?Q?=09Edward_Rosten?=", "\tEdward Rosten"],
+ ["=?us-ascii?Q?=3D=3FUTF-8=3FQ=3Ff=3DC3=3DBCr=3F=3D?=",
+ "=?UTF-8?Q?f=C3=BCr?="],
+ // We don't decode unrecognized charsets (This one is actually UTF-8).
+ ["=??B?Sy4gSC4gdm9uIFLDvGRlbg==?=", "=??B?Sy4gSC4gdm9uIFLDvGRlbg==?="],
+ ];
+ header_tests.forEach(function (data) {
+ arrayTest(data, function () {
+ assert.deepEqual(headerparser.decodeRFC2047Words(data[0]), data[1]);
+ });
+ });
+ });
+ suite('8-bit header processing', function () {
+ let header_tests = [
+ // Non-ASCII header values
+ ["oxyg\xc3\xa8ne", "oxyg\u00e8ne", "UTF-8"],
+ ["oxyg\xc3\xa8ne", "oxyg\u00e8ne", "ISO-8859-1"], // UTF-8 overrides
+ ["oxyg\xc3\xa8ne", "oxyg\u00e8ne"], // default to UTF-8 if no charset
+ ["oxyg\xe8ne", "oxyg\ufffdne", "UTF-8"],
+ ["oxyg\xe8ne", "oxyg\u00e8ne", "ISO-8859-1"],
+ ["\xc3\xa8\xe8", "\u00e8\ufffd", "UTF-8"],
+ ["\xc3\xa8\xe8", "\u00c3\u00a8\u00e8", "ISO-8859-1"],
+
+ // Don't fallback to UTF-16 or UTF-32
+ ["\xe8S!0", "\ufffdS!0", "UTF-16"],
+ ["\xe8S!0", "\ufffdS!0", "UTF-16be"],
+ ["\xe8S!0", "\ufffdS!0", "UTF-32"],
+ ["\xe8S!0", "\ufffdS!0", "utf-32"],
+
+ // Don't combine encoded-word and header charset decoding
+ ["=?UTF-8?Q?=c3?= \xa8", "\ufffd \ufffd", "UTF-8"],
+ ["=?UTF-8?Q?=c3?= \xa8", "\ufffd \u00a8", "ISO-8859-1"],
+ ["\xc3 =?UTF-8?Q?=a8?=", "\ufffd \ufffd", "UTF-8"],
+ ["\xc3 =?UTF-8?Q?=a8?=", "\u00c3 \ufffd", "ISO-8859-1"],
+ ];
+ header_tests.forEach(function (data) {
+ arrayTest(data, function () {
+ assert.deepEqual(headerparser.decodeRFC2047Words(
+ headerparser.convert8BitHeader(data[0], data[2])), data[1]);
+ });
+ });
+ });
+});
+
+});
new file mode 100644
--- /dev/null
+++ b/mailnews/mime/jsmime/test/test_header_emitter.js
@@ -0,0 +1,228 @@
+"use strict";
+define(function(require) {
+
+var assert = require('assert');
+var headeremitter = require('jsmime').headeremitter;
+
+function arrayTest(data, fn) {
+ fn.toString = function () {
+ let text = Function.prototype.toString.call(this);
+ text = text.replace(/data\[([0-9]*)\]/g, function (m, p) {
+ return JSON.stringify(data[p]);
+ });
+ return text;
+ };
+ return test(JSON.stringify(data[0]), fn);
+}
+
+suite('headeremitter', function () {
+ suite('addAddresses', function () {
+ let handler = {
+ reset: function (expected) {
+ this.output = '';
+ this.expected = expected;
+ },
+ deliverData: function (data) { this.output += data; },
+ deliverEOF: function () {
+ assert.equal(this.output, this.expected + '\r\n');
+ for (let line of this.output.split('\r\n'))
+ assert.ok(line.length <= 30, "Line is too long");
+ }
+ };
+ let header_tests = [
+ [[{name: "", email: ""}], ""],
+ [[{name: "", email: "a@example.com"}], "a@example.com"],
+ [[{name: "John Doe", email: "a@example.com"}], "John Doe <a@example.com>"],
+ [[{name: "", email: "a@b.c"}, {name: "", email: "b@b.c"}], "a@b.c, b@b.c"],
+ [[{name: "JD", email: "a@a.c"}, {name: "SD", email: "b@b.c"}],
+ "JD <a@a.c>, SD <b@b.c>"],
+ [[{name: "John Doe", email: "a@example.com"},
+ {name: "Sally Doe", email: "b@example.com"}],
+ "John Doe <a@example.com>,\r\n Sally Doe <b@example.com>"],
+ [[{name: "My name is really long and I split somewhere", email: "a@a.c"}],
+ "My name is really long and I\r\n split somewhere <a@a.c>"],
+ // Note that the name is 29 chars here, so adding the email needs a break.
+ [[{name: "My name is really really long", email: "a@a.c"}],
+ "My name is really really long\r\n <a@a.c>"],
+ [[{name: "", email: "a@a.c"}, {name: "This name is long", email: "b@b.c"}],
+ "a@a.c,\r\n This name is long <b@b.c>"],
+ [[{name: "", email: "a@a.c"}, {name: "This name is also long", email: "b@b.c"}],
+ "a@a.c,\r\n This name is also long\r\n <b@b.c>"],
+ [[{name: "", email: "hi!bad@all.com"}], "\"hi!bad\"@all.com"],
+ [[{name: "", email: "\"hi!bad\"@all.com"}], "\"hi!bad\"@all.com"],
+ [[{name: "Doe, John", email: "a@a.com"}], "\"Doe, John\" <a@a.com>"],
+ // This one violates the line length, so it underquotes instead.
+ [[{name: "A really, really long name to quote", email: "a@example.com"}],
+ "A \"really,\" really long name\r\n to quote <a@example.com>"],
+ [[{name: "Group", group: [{name: "", email: "a@a.c"},
+ {name: "", email: "b@b.c"}]}],
+ "Group: a@a.c, b@b.c;"],
+ ];
+ header_tests.forEach(function (data) {
+ arrayTest(data, function () {
+ let emitter = headeremitter.makeStreamingEmitter(handler, {
+ softMargin: 30,
+ useASCII: false,
+ });
+ handler.reset(data[1]);
+ emitter.addAddresses(data[0]);
+ emitter.finish(true);
+ });
+ });
+ });
+ suite('addAddresses (RFC 2047)', function () {
+ let handler = {
+ reset: function (expected) {
+ this.output = '';
+ this.expected = expected;
+ },
+ deliverData: function (data) { this.output += data; },
+ deliverEOF: function () {
+ assert.equal(this.output, this.expected + '\r\n');
+ for (let line of this.output.split('\r\n'))
+ assert.ok(line.length <= 30, "Line is too long");
+ }
+ }
+ let header_tests = [
+ [[{name: "\u0436", email: "a@a.c"}], "=?UTF-8?B?0LY=?= <a@a.c>"],
+ [[{name: "dioxyg\u00e8ne", email: "a@a.c"}],
+ "=?UTF-8?Q?dioxyg=c3=a8ne?=\r\n <a@a.c>"],
+ // Prefer QP if base64 and QP are exactly the same length
+ [[{name: "oxyg\u00e8ne", email: "a@a.c"}],
+ // =?UTF-8?B?b3h5Z8OobmU=?=
+ "=?UTF-8?Q?oxyg=c3=a8ne?=\r\n <a@a.c>"],
+ [[{name: "\ud83d\udca9\ud83d\udca9\ud83d\udca9\ud83d\udca9",
+ email: "a@a.c"}],
+ "=?UTF-8?B?8J+SqfCfkqnwn5Kp?=\r\n =?UTF-8?B?8J+SqQ==?= <a@a.c>"],
+ ];
+ header_tests.forEach(function (data) {
+ arrayTest(data, function () {
+ let emitter = headeremitter.makeStreamingEmitter(handler, {
+ softMargin: 30,
+ useASCII: true
+ });
+ handler.reset(data[1]);
+ emitter.addAddresses(data[0]);
+ emitter.finish(true);
+ });
+ });
+ });
+ suite('addUnstructured (RFC 2047)', function () {
+ let handler = {
+ reset: function (expected) {
+ this.output = '';
+ this.expected = expected;
+ },
+ deliverData: function (data) { this.output += data; },
+ deliverEOF: function () {
+ assert.equal(this.output, this.expected + '\r\n');
+ for (let line of this.output.split('\r\n'))
+ assert.ok(line.length <= 30, "Line is too long");
+ }
+ }
+ let header_tests = [
+ ["My house burned down!", "My house burned down!"],
+
+ // Which variables need to be encoded in QP encoding?
+ ["! \" # $ % & ' ( ) * + , - . / : ; < = > ? @ [ \\ ] ^ _ ` { | } ~ \x7f",
+ "=?UTF-8?Q?!_=22_#_$_%_&_'_?=\r\n" +
+ " =?UTF-8?Q?=28_=29_*_+_,_-_.?=\r\n" +
+ " =?UTF-8?Q?_/_:_;_<_=3d_>_?=\r\n" +
+ " =?UTF-8?Q?=3f_@_[_\\_]_^_=5f?=\r\n" +
+ " =?UTF-8?Q?_`_{_|_}_~_=7f?="],
+ // But non-printable characters don't need it in the first place!
+ ["! \" # $ % & ' ( ) * + , - . / : ; < = > ? @ [ \\ ] ^ _ ` { | } ~",
+ "! \" # $ % & ' ( ) * + , - . /\r\n" +
+ " : ; < = > ? @ [ \\ ] ^ _ ` { |\r\n" +
+ " } ~"],
+
+ // Test to make sure 2047-encoding chooses the right values.
+ ["\u001f", "=?UTF-8?Q?=1f?="],
+ ["\u001fa", "=?UTF-8?Q?=1fa?="],
+ ["\u001faa", "=?UTF-8?B?H2Fh?="],
+ ["\u001faaa", "=?UTF-8?Q?=1faaa?="],
+ ["\u001faaa\u001f", "=?UTF-8?B?H2FhYR8=?="],
+ ["\u001faaa\u001fa", "=?UTF-8?B?H2FhYR9h?="],
+ ["\u001faaa\u001faa", "=?UTF-8?Q?=1faaa=1faa?="],
+ ["\u001faaa\u001faa\u001faaaa", "=?UTF-8?B?H2FhYR9hYR9hYWFh?="],
+
+ // Make sure line breaking works right at the edge cases
+ ["\u001faaa\u001faaaaaaaaa", "=?UTF-8?Q?=1faaa=1faaaaaaaaa?="],
+ ["\u001faaa\u001faaaaaaaaaa",
+ "=?UTF-8?Q?=1faaa=1faaaaaaaaa?=\r\n =?UTF-8?Q?a?="],
+
+ // Choose base64/qp independently for each word
+ ["\ud83d\udca9\ud83d\udca9\ud83d\udca9a",
+ "=?UTF-8?B?8J+SqfCfkqnwn5Kp?=\r\n =?UTF-8?Q?a?="],
+
+ // Don't split a surrogate character!
+ ["a\ud83d\udca9\ud83d\udca9\ud83d\udca9a",
+ "=?UTF-8?B?YfCfkqnwn5Kp?=\r\n =?UTF-8?B?8J+SqWE=?="],
+
+ // Spacing a UTF-8 string
+ ["L'oxyg\u00e8ne est un \u00e9l\u00e9ment chimique du groupe des " +
+ "chalcog\u00e8nes",
+ // 1 2 3
+ // 123456789012345678901234567890
+ "=?UTF-8?Q?L'oxyg=c3=a8ne_est?=\r\n" +
+ " =?UTF-8?B?IHVuIMOpbMOpbWVu?=\r\n" +
+ " =?UTF-8?Q?t_chimique_du_gro?=\r\n" +
+ " =?UTF-8?Q?upe_des_chalcog?=\r\n" +
+ " =?UTF-8?B?w6huZXM=?="],
+ ];
+ header_tests.forEach(function (data) {
+ arrayTest(data, function () {
+ let emitter = headeremitter.makeStreamingEmitter(handler, {
+ softMargin: 30,
+ useASCII: true
+ });
+ handler.reset(data[1]);
+ emitter.addUnstructured(data[0]);
+ emitter.finish(true);
+ });
+ });
+ });
+
+ suite("Header lengths", function () {
+ let handler = {
+ reset: function (expected) {
+ this.output = '';
+ this.expected = expected;
+ },
+ deliverData: function (data) { this.output += data; },
+ deliverEOF: function () {
+ assert.equal(this.output, this.expected + '\r\n');
+ }
+ };
+ let header_tests = [
+ [[{name: "Supercalifragilisticexpialidocious", email: "a@b.c"}],
+ 'Supercalifragilisticexpialidocious\r\n <a@b.c>'],
+ [[{email: "supercalifragilisticexpialidocious@" +
+ "the.longest.domain.name.in.the.world.invalid"}],
+ 'supercalifragilisticexpialidocious\r\n' +
+ ' @the.longest.domain.name.in.the.world.invalid'],
+ [[{name: "Lopadotemachoselachogaleokranioleipsanodrimhypotrimmatosilphi" +
+ "paraomelitokatakechymenokichlepikossyphophattoperisteralektryonoptek" +
+ "ephalliokigklopeleiolagoiosiraiobaphetraganopterygon", email: "a@b.c"}],
+ new Error],
+ ];
+ header_tests.forEach(function (data) {
+ arrayTest(data, function () {
+ let emitter = headeremitter.makeStreamingEmitter(handler, {
+ softMargin: 30,
+ hardMargin: 50,
+ useASCII: false,
+ });
+ handler.reset(data[1]);
+ if (data[1] instanceof Error)
+ assert.throws(function () { emitter.addAddresses(data[0]); });
+ else {
+ assert.doesNotThrow(function () { emitter.addAddresses(data[0]); });
+ emitter.finish(true);
+ }
+ });
+ });
+ });
+});
+
+});
new file mode 100644
--- /dev/null
+++ b/mailnews/mime/jsmime/test/test_mime_tree.js
@@ -0,0 +1,610 @@
+"use strict";
+define(function(require) {
+
+var assert = require('assert');
+var jsmime = require('jsmime');
+var fs = require('fs');
+
+function arrayTest(data, fn) {
+ fn.toString = function () {
+ let text = Function.prototype.toString.call(this);
+ text = text.replace(/data\[([0-9]*)\]/g, function (m, p) {
+ return JSON.stringify(data[p]);
+ });
+ return text;
+ };
+ return test(data[0], fn);
+}
+
+/// Returns and deletes object[field] if present, or undefined if not.
+function extract_field(object, field) {
+ if (field in object) {
+ var result = object[field];
+ delete object[field];
+ return result;
+ }
+ return undefined;
+}
+
+/// A file cache for read_file.
+var file_cache = {};
+
+/**
+ * Read a file into a string (all line endings become CRLF).
+ * @param file The name of the file to read, relative to the data/ directory.
+ * @param start The first line of the file to return, defaulting to 0
+ * @param end The last line of the file to return, defaulting to the number of
+ * lines in the file.
+ * @return Promise<String> The contents of the file as a binary string.
+ */
+function read_file(file, start, end) {
+ if (!(file in file_cache)) {
+ var realFile = new Promise(function (resolve, reject) {
+ fs.readFile("data/" + file, function (err, data) {
+ if (err) reject(err);
+ else resolve(data);
+ });
+ });
+ var loader = realFile.then(function (contents) {
+ var inStrForm = '';
+ while (contents.length > 0) {
+ inStrForm += String.fromCharCode.apply(null,
+ contents.subarray(0, 1024));
+ contents = contents.subarray(1024);
+ }
+ return inStrForm.split(/\r\n|[\r\n]/);
+ });
+ file_cache[file] = loader;
+ }
+ return file_cache[file].then(function (contents) {
+ if (start !== undefined) {
+ contents = contents.slice(start - 1, end - 1);
+ }
+ return contents.join('\r\n');
+ });
+}
+
+/**
+ * Helper for body tests.
+ *
+ * Some extra options are listed too:
+ * _split: The contents of the file will be passed in packets split by this
+ * regex. Be sure to include the split delimiter in a group so that they
+ * are included in the output packets!
+ * _eol: The CRLFs in the input file will be replaced with the given line
+ * ending instead.
+ * @param test The name of test
+ * @param file The name of the file to read (relative to mailnews/data)
+ * @param opts Options for the mime parser, as well as a few extras detailed
+ * above.
+ * @param partspec An array of [partnum, line start, line end] detailing the
+ * expected parts in the body. It will be expected that the
+ * accumulated body part data for partnum would be the contents
+ * of the file from [line start, line end) [1-based lines]
+ */
+function make_body_test(test, file, opts, partspec) {
+ var results = Promise.all([
+ Promise.all([p[0], read_file(file, p[1], p[2])]) for (p of partspec)]);
+ var eol = extract_field(opts, "_eol");
+ var msgtext = read_file(file).then(function(msgcontents) {
+ var packetize = extract_field(opts, "_split");
+ if (packetize !== undefined)
+ msgcontents = msgcontents.split(packetize);
+ if (eol !== undefined) {
+ msgcontents = msgcontents.replace(/\r\n/g, eol);
+ }
+ return msgcontents;
+ });
+ if (eol !== undefined) {
+ results = results.then(function(results) {
+ for (var part of results) {
+ part[1] = part[1].replace(/\r\n/g, eol);
+ }
+ return results;
+ });
+ }
+ return [test, msgtext, opts, results];
+}
+
+/**
+ * Execute a single MIME tree test.
+ *
+ * @param message Either the text of the message, an array of textual message
+ * part data (imagine coming on different TCP packets), or a
+ * promise that resolves to any of the above.
+ * @param opts A set of options for the parser and for the test.
+ * @param results The expected results of the call. This may either be a
+ * dictionary of part number -> header -> values (to check
+ * headers), or an array of [partnum, partdata] for expected
+ * results to deliverPartData, or a promise for the above.
+ * @return A promise containing the results of the test.
+ */
+function testParser(message, opts, results) {
+ var uncheckedValues;
+ var checkingHeaders;
+ var calls = 0;
+ var fusingParts = extract_field(opts, "_nofuseparts") === undefined;
+ var emitter = {
+ stack: [],
+ startMessage: function emitter_startMsg() {
+ assert.equal(this.stack.length, 0);
+ calls++;
+ this.partData = '';
+ },
+ endMessage: function emitter_endMsg() {
+ assert.equal(this.stack.length, 0);
+ calls++;
+ },
+ startPart: function emitter_startPart(partNum, headers) {
+ this.stack.push(partNum);
+ if (checkingHeaders) {
+ assert.ok(partNum in uncheckedValues);
+ // Headers is a map, convert it to an object.
+ var objmap = new Object();
+ for (let pair of headers)
+ objmap[pair[0]] = pair[1];
+ var expected = uncheckedValues[partNum];
+ var convresults = new Object();
+ for (let key in expected) {
+ try {
+ convresults[key] =
+ jsmime.headerparser.parseStructuredHeader(key, expected[key]);
+ } catch (e) {
+ convresults[key] = expected[key];
+ }
+ }
+ assert.deepEqual(objmap, convresults);
+ if (fusingParts)
+ assert.equal(this.partData, '');
+ delete uncheckedValues[partNum];
+ }
+ },
+ deliverPartData: function emitter_partData(partNum, data) {
+ assert.equal(this.stack[this.stack.length - 1], partNum);
+ if (!checkingHeaders) {
+ if (fusingParts)
+ this.partData += data;
+ else {
+ let check = uncheckedValues.shift();
+ assert.equal(partNum, check[0]);
+ assert.equal(data, check[1]);
+ }
+ }
+ },
+ endPart: function emitter_endPart(partNum) {
+ if (this.partData != '') {
+ let check = uncheckedValues.shift();
+ assert.equal(partNum, check[0]);
+ assert.equal(this.partData, check[1]);
+ this.partData = '';
+ }
+ assert.equal(this.stack.pop(), partNum);
+ }
+ };
+ opts.onerror = function (e) { throw e; };
+
+ return Promise.all([message, results]).then(function (vals) {
+ let [message, results] = vals;
+ // Clone the results array into uncheckedValues
+ if (Array.isArray(results)) {
+ uncheckedValues = [for (val of results) val];
+ checkingHeaders = false;
+ } else {
+ uncheckedValues = {};
+ for (let key in results) {
+ uncheckedValues[key] = results[key];
+ }
+ checkingHeaders = true;
+ }
+ if (!Array.isArray(message))
+ message = [message];
+ var parser = new jsmime.MimeParser(emitter, opts);
+ message.forEach(function (packet) {
+ parser.deliverData(packet);
+ });
+ parser.deliverEOF();
+ assert.equal(calls, 2);
+ if (!checkingHeaders)
+ assert.equal(0, uncheckedValues.length);
+ else
+ assert.deepEqual({}, uncheckedValues);
+ });
+}
+
+suite('MimeParser', function () {
+ /// This is the expected part specifier for the multipart-complex1 test file,
+ /// specified here because it is used in several cases.
+ let mpart_complex1 = [['1', 8, 10], ['2', 14, 16], ['3.1', 22, 24],
+ ['4', 29, 31], ['5', 33, 35]];
+
+ suite('Simple tests', function () {
+ let parser_tests = [
+ // The following tests are either degenerate or error cases that should
+ // work
+ ["Empty string", "", {}, {'': {}}],
+ ["No value for header", "Header", {}, {'': {"Header": ['']}}],
+ ["No trailing newline", "To: eof@example.net", {},
+ {'': {"To": ["eof@example.net"]}}],
+ ["Header no val", "To: eof@example.net\r\n", {},
+ {'': {"To": ["eof@example.net"]}}],
+ ["No body no headers", "\r\n\r\n", {}, {'': {}}],
+ ["Body no headers", "\r\n\r\nA", {}, {'': {}}],
+ // Basic cases for headers
+ ['Multiparts get headers', read_file("multipart-complex1"), {},
+ { '': {'Content-Type': ['multipart/mixed; boundary="boundary"']},
+ '1': {'Content-Type': ['application/octet-stream'],
+ 'Content-Transfer-Encoding': ['base64']},
+ '2': {'Content-Type': ['image/png'],
+ 'Content-Transfer-Encoding': ['base64']},
+ '3': {'Content-Type': ['multipart/related; boundary="boundary2"']},
+ '3.1': {'Content-Type': ['text/html']},
+ '4': {'Content-Type': ['text/plain']}, '5': {} }],
+ ];
+ parser_tests.forEach(function (data) {
+ arrayTest(data, function () {
+ return testParser(data[1], data[2], data[3]);
+ });
+ });
+ });
+
+ suite('Body tests', function () {
+ let parser_tests = [
+ // Body tests from data
+ // (Note: line numbers are 1-based. Also, to capture trailing EOF, add 2
+ // to the last line number of the file).
+ make_body_test("Basic body", "basic1", {}, [['', 3, 5]]),
+ make_body_test("Basic multipart", "multipart1", {}, [['1', 10, 12]]),
+ make_body_test("Basic multipart", "multipart2", {}, [['1', 8, 11]]),
+ make_body_test("Complex multipart", "multipart-complex1", {},
+ mpart_complex1),
+ make_body_test("Truncated multipart", "multipart-complex2", {},
+ [['1.1.1.1', 21, 25], ['2', 27, 57], ['3', 60, 62]]),
+ make_body_test("No LF multipart", "multipartmalt-detach", {},
+ [['1', 20, 21], ['2.1', 27, 38], ['2.2', 42, 43], ['2.3', 47, 48],
+ ['3', 53, 54]]),
+ make_body_test("Raw body", "multipart1", {bodyformat: "raw"},
+ [['', 4, 14]]),
+ ["Base64 decode 1", read_file("base64-1"), {bodyformat: "decode"},
+ [['', "\r\nHello, world! (Again...)\r\n\r\nLet's see how well base64 " +
+ "text is handled. Yay, lots of space" +
+ "s! There's even a CRLF at the end and one at the beginning, bu" +
+ "t the output shouldn't have it.\r\n"]]],
+ ["Base64 decode 2", read_file("base64-2"), {bodyformat: "decode"},
+ [['', "<html><body>This is base64 encoded HTML text, and the tags sho" +
+ "uldn't be stripped.\r\n<b>Bold text is bold!</b></body></html>" +
+ "\r\n"]]],
+ ["Base64 decode line issues",
+ read_file("base64-2").then(function (s) { return s.split(/(\r\n)/) }),
+ {bodyformat: "decode"},
+ [['', "<html><body>This is base64 encoded HTML text, and the tags sho" +
+ "uldn't be stripped.\r\n<b>Bold text is bold!</b></body></html>" +
+ "\r\n"]]],
+ make_body_test("Base64 nodecode", "base64-1", {}, [['', 4, 9]]),
+ ["QP decode", read_file("bug505221"),
+ {pruneat: '1', bodyformat: "decode"},
+ [['1', '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"' +
+ '>\r\n<HTML><HEAD>\r\n<META HTTP-EQUIV="Content-Type" CONTENT=' +
+ '"text/html; charset=us-ascii">\r\n\r\n\r\n<META content="MSHT' +
+ 'ML 6.00.6000.16735" name=GENERATOR></HEAD>\r\n<BODY> bbb\r\n<' +
+ '/BODY></HTML>']]],
+ ["Nested messages", read_file("message-encoded"), {bodyformat: "decode"},
+ [['1$', 'This is a plain-text message.'],
+ ['2$', 'I am a plain-text message.'],
+ ['3$', 'I am an encoded plain-text message.']]],
+ ["Nested message headers", read_file("message-encoded"), {},
+ {'': {'Content-Type': ['multipart/mixed; boundary="iamaboundary"']},
+ '1': {'Content-Type': ['message/rfc822']},
+ '1$': {'Subject': ['I am a subject']},
+ '2': {'Content-Type': ['message/global'],
+ 'Content-Transfer-Encoding': ['base64']},
+ '2$': {'Subject': ['\u79c1\u306f\u3001\u4ef6\u540d\u5348\u524d']},
+ '3': {'Content-Type': ['message/news'],
+ 'Content-Transfer-Encoding': ['quoted-printable']},
+ '3$': {'Subject': ['\u79c1\u306f\u3001\u4ef6\u540d\u5348\u524d']}}],
+ ];
+ parser_tests.forEach(function (data) {
+ arrayTest(data, function () {
+ return testParser(data[1], data[2], data[3]);
+ });
+ });
+ });
+
+ suite('Torture tests', function () {
+ // Generate a very long message for tests
+ let teststr = 'a';
+ for (let i = 0; i < 16; i++)
+ teststr += teststr;
+ let parser_tests = [
+ ["Base64 very long decode",
+ "Content-Transfer-Encoding: base64\r\n\r\n" + btoa(teststr) + "\r\n",
+ {bodyformat: "decode"}, [['', teststr]]],
+ make_body_test("Torture regular body", "mime-torture", {}, [
+ ['1', 17, 21], ['2$.1', 58, 75], ['2$.2.1', 83, 97], ['2$.3', 102, 130],
+ ['3$', 155, 7742], ['4', 7747, 8213], ['5', 8218, 8242],
+ ['6$.1.1', 8284, 8301], ['6$.1.2', 8306, 8733], ['6$.2.1', 8742, 9095],
+ ['6$.2.2', 9100, 9354], ['6$.2.3', 9357, 11794],
+ ['6$.2.4', 11797, 12155], ['6$.3', 12161, 12809],
+ ['7$.1', 12844, 12845], ['7$.2', 12852, 13286],
+ ['7$.3', 13288, 13297], ['8$.1', 13331, 13358], ['8$.2', 13364, 13734],
+ ['9$', 13757, 20179], ['10', 20184, 21200], ['11$.1', 21223, 22031],
+ ['11$.2', 22036, 22586], ['12$.1', 22607, 23469],
+ ['12$.2', 23474, 23774], ['12$.3$.1', 23787, 23795],
+ ['12$.3$.2.1', 23803, 23820], ['12$.3$.2.2', 23825, 24633],
+ ['12$.3$.3', 24640, 24836], ['12$.3$.4$', 24848, 25872]]),
+ make_body_test("Torture pruneat", "mime-torture", {"pruneat": '4'},
+ [['4', 7747, 8213]]),
+
+ // Test packetization problems
+ make_body_test("Large packets", "multipart-complex1",
+ {"_split": /(.{30})/}, mpart_complex1),
+ make_body_test("Split on newline", "multipart-complex1",
+ {"_split": /(\r\n)/}, mpart_complex1),
+ make_body_test("Pathological splitting", "multipart-complex1",
+ {"_split": ''}, mpart_complex1),
+
+ // Non-CLRF line endings?
+ make_body_test("LF-based messages", "multipart-complex1",
+ {"_eol": "\n"}, mpart_complex1),
+ make_body_test("CR-based messages", "multipart-complex1",
+ {"_eol": "\r"}, mpart_complex1),
+ ];
+ parser_tests.forEach(function (data) {
+ arrayTest(data, function () {
+ return testParser(data[1], data[2], data[3]);
+ });
+ });
+ });
+
+ suite('Header tests', function () {
+ let parser_tests = [
+ // Basic cases for headers
+ ['Multiparts get headers', read_file("multipart-complex1"), {},
+ { '': {'Content-Type': ['multipart/mixed; boundary="boundary"']},
+ '1': {'Content-Type': ['application/octet-stream'],
+ 'Content-Transfer-Encoding': ['base64']},
+ '2': {'Content-Type': ['image/png'],
+ 'Content-Transfer-Encoding': ['base64']},
+ '3': {'Content-Type': ['multipart/related; boundary="boundary2"']},
+ '3.1': {'Content-Type': ['text/html']},
+ '4': {'Content-Type': ['text/plain']}, '5': {} }],
+ // 'From ' is not an [iterable] header
+ ['Exclude mbox delimiter', read_file('bugmail11'), {}, {'': {
+ 'X-Mozilla-Status': ['0001'], 'X-Mozilla-Status2': ['00000000'],
+ 'X-Mozilla-Keys': [''],
+ 'Return-Path': ['<example@example.com>',
+ '<bugzilla-daemon@mozilla.org>'],
+ 'Delivered-To': ['bugmail@example.org'],
+ 'Received': ['by 10.114.166.12 with SMTP id o12cs163262wae;' +
+ ' Fri, 11 Apr 2008 07:17:31 -0700 (PDT)',
+ 'by 10.115.60.1 with SMTP id n1mr214763wak.181.1207923450166;' +
+ ' Fri, 11 Apr 2008 07:17:30 -0700 (PDT)',
+ 'from webapp-out.mozilla.org (webapp01.sj.mozilla.com [63.245.208.1' +
+ '46]) by mx.google.com with ESMTP id n38si6807242wag.2.2008.' +
+ '04.11.07.17.29; Fri, 11 Apr 2008 07:17:30 -0700 (PDT)',
+ 'from mrapp51.mozilla.org (mrapp51.mozilla.org [127.0.0.1])' +
+ '\tby webapp-out.mozilla.org (8.13.8/8.13.8) with ESMTP id m3BEHTGU' +
+ '030132\tfor <bugmail@example.org>; Fri, 11 Apr 2008 07:17:29 -0700',
+ '(from root@localhost)' +
+ '\tby mrapp51.mozilla.org (8.13.8/8.13.8/Submit) id m3BEHTk4030129;' +
+ '\tFri, 11 Apr 2008 07:17:29 -0700'],
+ 'Received-Spf': ['neutral (google.com: 63.245.208.146 is neither perm' +
+ 'itted nor denied by best guess record for domain of bugzilla-daemo' +
+ 'n@mozilla.org) client-ip=63.245.208.146;'],
+ 'Authentication-Results': ['mx.google.com; spf=neutral (google.com: 6' +
+ '3.245.208.146 is neither permitted nor denied by best guess record' +
+ ' for domain of bugzilla-daemon@mozilla.org) smtp.mail=bugzilla-dae' +
+ 'mon@mozilla.org'],
+ 'Date': ['Fri, 11 Apr 2008 07:17:29 -0700'],
+ 'Message-Id': ['<200804111417.m3BEHTk4030129@mrapp51.mozilla.org>'],
+ 'From': ['bugzilla-daemon@mozilla.org'], 'To': ['bugmail@example.org'],
+ 'Subject': ['Bugzilla: confirm account creation'],
+ 'X-Bugzilla-Type': ['admin'],
+ 'Content-Type': ['text/plain; charset="UTF-8"'],
+ 'Mime-Version': ['1.0']}}],
+ ];
+ parser_tests.forEach(function (data) {
+ arrayTest(data, function () {
+ return testParser(data[1], data[2], data[3]);
+ });
+ });
+ });
+
+ suite('Charset tests', function () {
+ function buildTree(file, options) {
+ var tree = new Map();
+ var emitter = {
+ startPart: function (part, headers) {
+ tree.set(part, {headers: headers, body: null});
+ },
+ deliverPartData: function (part, data) {
+ var obj = tree.get(part);
+ if (obj.body === null)
+ obj.body = data;
+ else if (typeof obj.body === "string")
+ obj.body += data;
+ else {
+ var newData = new Uint8Array(obj.body.length + data.length);
+ newData.set(obj.body);
+ newData.subarray(obj.body.length).set(data);
+ obj.body = newData;
+ }
+ }
+ };
+ return file.then(function (data) {
+ var parser = new jsmime.MimeParser(emitter, options);
+ parser.deliverData(data);
+ parser.deliverEOF();
+ return tree;
+ });
+ }
+ test('Unicode decoding', function () {
+ return buildTree(read_file('shift-jis-image'), {
+ strformat: "unicode",
+ bodyformat: "decode"
+ }).then(function (tree) {
+ // text/plain should be transcoded...
+ assert.equal(tree.get('1').headers.get('Content-Type').get('charset'),
+ 'Shift-JIS');
+ assert.equal(tree.get('1').headers.charset, 'Shift-JIS');
+ assert.equal(tree.get('1').headers.get('Content-Description'),
+ '\u30b1\u30c4\u30a1\u30eb\u30b3\u30a2\u30c8\u30eb');
+ assert.equal(tree.get('1').body, 'Portable Network Graphics\uff08' +
+ '\u30dd\u30fc\u30bf\u30d6\u30eb\u30fb\u30cd\u30c3\u30c8\u30ef\u30fc' +
+ '\u30af\u30fb\u30b0\u30e9\u30d5\u30a3\u30c3\u30af\u30b9\u3001PNG' +
+ '\uff09\u306f\u30b3\u30f3\u30d4\u30e5\u30fc\u30bf\u3067\u30d3\u30c3' +
+ '\u30c8\u30de\u30c3\u30d7\u753b\u50cf\u3092\u6271\u3046\u30d5\u30a1' +
+ '\u30a4\u30eb\u30d5\u30a9\u30fc\u30de\u30c3\u30c8\u3067\u3042\u308b' +
+ '\u3002\u5727\u7e2e\u30a2\u30eb\u30b4\u30ea\u30ba\u30e0\u3068\u3057' +
+ '\u3066Deflate\u3092\u63a1\u7528\u3057\u3066\u3044\u308b\u3001' +
+ '\u5727\u7e2e\u306b\u3088\u308b\u753b\u8cea\u306e\u52a3\u5316\u306e' +
+ '\u306a\u3044\u53ef\u9006\u5727\u7e2e\u306e\u753b\u50cf\u30d5\u30a1' +
+ '\u30a4\u30eb\u30d5\u30a9\u30fc\u30de\u30c3\u30c8\u3067\u3042\u308b' +
+ '\u3002\r\n');
+ // ... but not image/png
+ assert.ok(!tree.get('2').headers.get('Content-Type').has('charset'));
+ assert.equal(tree.get('2').headers.charset, '');
+ assert.equal(tree.get('2').headers.get('Content-Description'),
+ '\ufffdP\ufffdc\ufffd@\ufffd\ufffd\ufffdR\ufffdA\ufffdg\ufffd\ufffd');
+ assert.equal(tree.get('2').headers.getRawHeader('Content-Description'),
+ '\x83\x50\x83\x63\x83\x40\x83\x8b\x83\x52\x83\x41\x83\x67\x83\x8b');
+ var imageData = 'iVBORw0KGgoAAAANSUhEUgAAAIAAAABECAIAAADGJao+AAAAwklE' +
+ 'QVR4Xu3UgQbDMBRA0bc03f//b7N0VuqJEmwoc+KqNEkDh9b+2HuJu1KNO4f+AQCAAA' +
+ 'AQAAACAEAAAAgAAAEAIAAABACAAAAQAAACAEAAAAgAAAEAIAAAANReamRLlPWYfNH0' +
+ 'klxcPs+cP3NxWF+vi3lb7pa2R+vx6tHOtuN1O+a5lY3HzgM5ya/GM5N7ZjfPq7/5yS' +
+ '8IgAAAEAAAAgBAAAAIAAABACAAAAQAgAAAEAAAAgBAAAAIAAABACAAAIw322gDIPvt' +
+ 'lmUAAAAASUVORK5CYII=';
+ imageData = atob(imageData);
+ var asArray = new Uint8Array(imageData.length);
+ for (var i = 0; i < asArray.length; i++)
+ asArray[i] = imageData.charCodeAt(i);
+ assert.deepEqual(tree.get('2').body, asArray);
+
+ // Touching the header charset should change the interpretation.
+ tree.get('1').headers.charset = 'Shift-JIS';
+ assert.equal(tree.get('1').headers.charset, 'Shift-JIS');
+ assert.equal(tree.get('1').headers.get('Content-Description'),
+ '\u30b1\u30c4\u30a1\u30eb\u30b3\u30a2\u30c8\u30eb');
+ });
+ });
+ test('Fallback charset decoding', function () {
+ return buildTree(read_file('shift-jis-image'), {
+ strformat: "unicode",
+ charset: "ISO-8859-1",
+ bodyformat: "decode"
+ }).then(function (tree) {
+ // text/plain should be transcoded...
+ assert.equal(tree.get('1').headers.get('Content-Type').get('charset'),
+ 'Shift-JIS');
+ assert.equal(tree.get('1').headers.charset, 'Shift-JIS');
+ assert.equal(tree.get('1').headers.get('Content-Description'),
+ '\u30b1\u30c4\u30a1\u30eb\u30b3\u30a2\u30c8\u30eb');
+ assert.equal(tree.get('1').body, 'Portable Network Graphics\uff08' +
+ '\u30dd\u30fc\u30bf\u30d6\u30eb\u30fb\u30cd\u30c3\u30c8\u30ef\u30fc' +
+ '\u30af\u30fb\u30b0\u30e9\u30d5\u30a3\u30c3\u30af\u30b9\u3001PNG' +
+ '\uff09\u306f\u30b3\u30f3\u30d4\u30e5\u30fc\u30bf\u3067\u30d3\u30c3' +
+ '\u30c8\u30de\u30c3\u30d7\u753b\u50cf\u3092\u6271\u3046\u30d5\u30a1' +
+ '\u30a4\u30eb\u30d5\u30a9\u30fc\u30de\u30c3\u30c8\u3067\u3042\u308b' +
+ '\u3002\u5727\u7e2e\u30a2\u30eb\u30b4\u30ea\u30ba\u30e0\u3068\u3057' +
+ '\u3066Deflate\u3092\u63a1\u7528\u3057\u3066\u3044\u308b\u3001' +
+ '\u5727\u7e2e\u306b\u3088\u308b\u753b\u8cea\u306e\u52a3\u5316\u306e' +
+ '\u306a\u3044\u53ef\u9006\u5727\u7e2e\u306e\u753b\u50cf\u30d5\u30a1' +
+ '\u30a4\u30eb\u30d5\u30a9\u30fc\u30de\u30c3\u30c8\u3067\u3042\u308b' +
+ '\u3002\r\n');
+ // ... but not image/png
+ assert.ok(!tree.get('2').headers.get('Content-Type').has('charset'));
+ assert.equal(tree.get('2').headers.charset, 'ISO-8859-1');
+ assert.equal(tree.get('2').headers.get('Content-Description'),
+ '\u0192P\u0192c\u0192@\u0192\u2039\u0192R\u0192A\u0192g\u0192\u2039');
+ assert.equal(tree.get('2').headers.getRawHeader('Content-Description'),
+ '\x83\x50\x83\x63\x83\x40\x83\x8b\x83\x52\x83\x41\x83\x67\x83\x8b');
+ var imageData = 'iVBORw0KGgoAAAANSUhEUgAAAIAAAABECAIAAADGJao+AAAAwklE' +
+ 'QVR4Xu3UgQbDMBRA0bc03f//b7N0VuqJEmwoc+KqNEkDh9b+2HuJu1KNO4f+AQCAAA' +
+ 'AQAAACAEAAAAgAAAEAIAAABACAAAAQAAACAEAAAAgAAAEAIAAAANReamRLlPWYfNH0' +
+ 'klxcPs+cP3NxWF+vi3lb7pa2R+vx6tHOtuN1O+a5lY3HzgM5ya/GM5N7ZjfPq7/5yS' +
+ '8IgAAAEAAAAgBAAAAIAAABACAAAAQAgAAAEAAAAgBAAAAIAAABACAAAIw322gDIPvt' +
+ 'lmUAAAAASUVORK5CYII=';
+ imageData = atob(imageData);
+ var asArray = new Uint8Array(imageData.length);
+ for (var i = 0; i < asArray.length; i++)
+ asArray[i] = imageData.charCodeAt(i);
+ assert.deepEqual(tree.get('2').body, asArray);
+
+ // Touching the header charset should change the interpretation.
+ tree.get('1').headers.charset = 'Shift-JIS';
+ assert.equal(tree.get('1').headers.charset, 'Shift-JIS');
+ assert.equal(tree.get('1').headers.get('Content-Description'),
+ '\u30b1\u30c4\u30a1\u30eb\u30b3\u30a2\u30c8\u30eb');
+ });
+ });
+ test('Forced charset decoding', function () {
+ return buildTree(read_file('shift-jis-image'), {
+ strformat: "unicode",
+ charset: "ISO-8859-1",
+ "force-charset": true,
+ bodyformat: "decode"
+ }).then(function (tree) {
+ // text/plain should be transcoded...
+ assert.equal(tree.get('1').headers.get('Content-Type').get('charset'),
+ 'Shift-JIS');
+ assert.equal(tree.get('1').headers.charset, 'ISO-8859-1');
+ assert.equal(tree.get('1').headers.get('Content-Description'),
+ '\u0192P\u0192c\u0192@\u0192\u2039\u0192R\u0192A\u0192g\u0192\u2039');
+ assert.equal(tree.get('1').body, 'Portable Network Graphics\u0081i' +
+ '\u0192|\u0081[\u0192^\u0192u\u0192\u2039\u0081E\u0192l\u0192b' +
+ '\u0192g\u0192\u008f\u0081[\u0192N\u0081E\u0192O\u0192\u2030\u0192t' +
+ '\u0192B\u0192b\u0192N\u0192X\u0081APNG\u0081j\u201a\u00cd\u0192R' +
+ '\u0192\u201c\u0192s\u0192\u2026\u0081[\u0192^\u201a\u00c5\u0192r' +
+ '\u0192b\u0192g\u0192}\u0192b\u0192v\u2030\u00e6\u2018\u0153\u201a' +
+ '\u00f0\u02c6\u00b5\u201a\u00a4\u0192t\u0192@\u0192C\u0192\u2039' +
+ '\u0192t\u0192H\u0081[\u0192}\u0192b\u0192g\u201a\u00c5\u201a\u00a0' +
+ '\u201a\u00e9\u0081B\u02c6\u00b3\u008fk\u0192A\u0192\u2039\u0192S' +
+ '\u0192\u0160\u0192Y\u0192\u20ac\u201a\u00c6\u201a\u00b5\u201a' +
+ '\u00c4Deflate\u201a\u00f0\u008d\u00cc\u2014p\u201a\u00b5\u201a' +
+ '\u00c4\u201a\u00a2\u201a\u00e9\u0081A\u02c6\u00b3\u008fk\u201a' +
+ '\u00c9\u201a\u00e6\u201a\u00e9\u2030\u00e6\u017d\u00bf\u201a\u00cc' +
+ '\u2014\u00f2\u2030\u00bb\u201a\u00cc\u201a\u00c8\u201a\u00a2\u2030' +
+ '\u00c2\u2039t\u02c6\u00b3\u008fk\u201a\u00cc\u2030\u00e6\u2018' +
+ '\u0153\u0192t\u0192@\u0192C\u0192\u2039\u0192t\u0192H\u0081[\u0192' +
+ '}\u0192b\u0192g\u201a\u00c5\u201a\u00a0\u201a\u00e9\u0081B\r\n');
+ // ... but not image/png
+ assert.ok(!tree.get('2').headers.get('Content-Type').has('charset'));
+ assert.equal(tree.get('2').headers.charset, 'ISO-8859-1');
+ assert.equal(tree.get('2').headers.get('Content-Description'),
+ '\u0192P\u0192c\u0192@\u0192\u2039\u0192R\u0192A\u0192g\u0192\u2039');
+ assert.equal(tree.get('2').headers.getRawHeader('Content-Description'),
+ '\x83\x50\x83\x63\x83\x40\x83\x8b\x83\x52\x83\x41\x83\x67\x83\x8b');
+ var imageData = 'iVBORw0KGgoAAAANSUhEUgAAAIAAAABECAIAAADGJao+AAAAwklE' +
+ 'QVR4Xu3UgQbDMBRA0bc03f//b7N0VuqJEmwoc+KqNEkDh9b+2HuJu1KNO4f+AQCAAA' +
+ 'AQAAACAEAAAAgAAAEAIAAABACAAAAQAAACAEAAAAgAAAEAIAAAANReamRLlPWYfNH0' +
+ 'klxcPs+cP3NxWF+vi3lb7pa2R+vx6tHOtuN1O+a5lY3HzgM5ya/GM5N7ZjfPq7/5yS' +
+ '8IgAAAEAAAAgBAAAAIAAABACAAAAQAgAAAEAAAAgBAAAAIAAABACAAAIw322gDIPvt' +
+ 'lmUAAAAASUVORK5CYII=';
+ imageData = atob(imageData);
+ var asArray = new Uint8Array(imageData.length);
+ for (var i = 0; i < asArray.length; i++)
+ asArray[i] = imageData.charCodeAt(i);
+ assert.deepEqual(tree.get('2').body, asArray);
+
+ // Touching the header charset should change the interpretation.
+ tree.get('1').headers.charset = 'Shift-JIS';
+ assert.equal(tree.get('1').headers.charset, 'Shift-JIS');
+ assert.equal(tree.get('1').headers.get('Content-Description'),
+ '\u30b1\u30c4\u30a1\u30eb\u30b3\u30a2\u30c8\u30eb');
+ });
+ });
+ test('Charset conversion', function () {
+ return buildTree(read_file('charsets'), {
+ strformat: "unicode",
+ bodyformat: "decode"
+ }).then(function (tree) {
+ var numParts = 12;
+ for (var i = 1; i < numParts; i+= 2) {
+ assert.equal(tree.get("" + i).body, tree.get("" + (i + 1)).body);
+ }
+ assert.ok(!tree.has("" + (numParts + 1)));
+ });
+ });
+ });
+});
+
+});
new file mode 100644
--- /dev/null
+++ b/mailnews/mime/jsmime/test/test_structured_header_emitters.js
@@ -0,0 +1,81 @@
+"use strict";
+define(function (require) {
+
+var assert = require('assert');
+var headeremitter = require('jsmime').headeremitter;
+
+function arrayTest(data, fn) {
+ fn.toString = function () {
+ let text = Function.prototype.toString.call(this);
+ text = text.replace(/data\[([0-9]*)\]/g, function (m, p) {
+ return JSON.stringify(data[p]);
+ });
+ return text;
+ };
+ return test(JSON.stringify(data[0]), fn);
+}
+
+function testHeader(header, tests) {
+ suite(header, function () {
+ tests.forEach(function (data) {
+ arrayTest(data, function () {
+ assert.deepEqual(headeremitter.emitStructuredHeader(header,
+ data[0], {softMargin: 100, useASCII: true}),
+ (header + ": " + data[1]).trim() + '\r\n');
+ });
+ });
+ });
+}
+
+suite('Structured header emitters', function () {
+ // Ad-hoc header tests
+ // TODO: add structured encoder tests for Content-Type when it is added.
+
+ testHeader("Content-Transfer-Encoding", [
+ ["", ""],
+ ["8bit", "8bit"],
+ ["invalid", "invalid"]
+ ]);
+
+ // Non-ad-hoc header tests
+ let addressing_headers = ['From', 'To', 'Cc', 'Bcc', 'Sender', 'Reply-To',
+ 'Resent-Bcc', 'Resent-To', 'Resent-From', 'Resent-Cc', 'Resent-Sender',
+ 'Approved', 'Disposition-Notification-To', 'Delivered-To',
+ 'Return-Receipt-To'];
+ let address_tests = [
+ [{name: "", email: ""}, ""],
+ [{name: "John Doe", email: "john.doe@test.invalid"},
+ "John Doe <john.doe@test.invalid>"],
+ [[{name: "John Doe", email: "john.doe@test.invalid"}],
+ "John Doe <john.doe@test.invalid>"],
+ [{name: "undisclosed-recipients", group: []},
+ "undisclosed-recipients: ;"],
+ ];
+ addressing_headers.forEach(function (header) {
+ testHeader(header, address_tests);
+ });
+
+ let unstructured_headers = ['Comments', 'Content-Description', 'Keywords',
+ 'Subject'];
+ let unstructured_tests = [
+ ["", ""],
+ ["This is a subject", "This is a subject"],
+ ["\u79c1\u306f\u4ef6\u540d\u5348\u524d",
+ "=?UTF-8?B?56eB44Gv5Lu25ZCN5Y2I5YmN?="],
+ ];
+ unstructured_headers.forEach(function (header) {
+ testHeader(header, unstructured_tests);
+ });
+
+ test('emitStructuredHeaders', function () {
+ let headers = new Map();
+ headers.set('From', [{name:'', email: 'bugzilla-daemon@mozilla.org'}]);
+ headers.set('subject', ['[Bug 939557] browsercomps.dll failed to build']);
+ let str = headeremitter.emitStructuredHeaders(headers, {});
+ assert.equal(str,
+ 'From: bugzilla-daemon@mozilla.org\r\n' +
+ 'Subject: [Bug 939557] browsercomps.dll failed to build\r\n');
+ });
+});
+
+});
new file mode 100644
--- /dev/null
+++ b/mailnews/mime/jsmime/test/test_structured_headers.js
@@ -0,0 +1,144 @@
+"use strict";
+define(function (require) {
+
+var assert = require('assert');
+var headerparser = require('jsmime').headerparser;
+
+function smartDeepEqual(actual, expected) {
+ assert.deepEqual(actual, expected);
+ if (actual instanceof Map && expected instanceof Map) {
+ assert.deepEqual([x for (x of actual.entries())],
+ [y for (y of expected.entries())]);
+ }
+}
+
+function arrayTest(data, fn) {
+ fn.toString = function () {
+ let text = Function.prototype.toString.call(this);
+ text = text.replace(/data\[([0-9]*)\]/g, function (m, p) {
+ return JSON.stringify(data[p]);
+ });
+ return text;
+ };
+ return test(data[0], fn);
+}
+
+function testHeader(header, tests) {
+ suite(header, function () {
+ tests.forEach(function (data) {
+ arrayTest(data, function () {
+ smartDeepEqual(headerparser.parseStructuredHeader(header,
+ data[0]), data[1]);
+ });
+ });
+ });
+}
+
+function makeCT(media, sub, params) {
+ var object = new Map();
+ object.mediatype = media;
+ object.subtype = sub;
+ object.type = media + "/" + sub;
+ for (let k in params)
+ object.set(k, params[k]);
+ return object;
+}
+suite('Structured headers', function () {
+ // Ad-hoc header tests
+ testHeader('Content-Type', [
+ ['text/plain', makeCT("text", "plain", {})],
+ ['text/html', makeCT("text", "html", {})],
+ ['text/plain; charset="UTF-8"',
+ makeCT("text", "plain", {charset: "UTF-8"})],
+ ['text/', makeCT("text", "", {})],
+ ['text', makeCT("text", "plain", {})],
+ ['image/', makeCT("image", "", {})],
+ ['image', makeCT("text", "plain", {})],
+ ['hacker/x-mailnews', makeCT("hacker", "x-mailnews", {})],
+ ['hacker/x-mailnews;', makeCT("hacker", "x-mailnews", {})],
+ ['HACKER/X-MAILNEWS', makeCT("hacker", "x-mailnews", {})],
+ ['application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
+ makeCT("application",
+ "vnd.openxmlformats-officedocument.spreadsheetml.sheet", {})],
+ ['application/vnd.openxmlformats-officedocument.spreadsheetml.sheet;\r' +
+ '\n name="Presentation.pptx"',
+ makeCT("application",
+ "vnd.openxmlformats-officedocument.spreadsheetml.sheet",
+ {name: "Presentation.pptx"})],
+ ['', makeCT("text", "plain", {})],
+ [' ', makeCT("text", "plain", {})],
+ ['text/plain; c', makeCT("text", "plain", {})],
+ ['text/plain; charset=', makeCT("text", "plain", {charset: ""})],
+ ['text/plain; charset="', makeCT("text", "plain", {charset: ""})],
+ ['text\\/enriched', makeCT("text\\", "enriched", {})],
+ ['multipart/mixed ";" wtf=stupid', makeCT("multipart", "mixed", {})],
+ ['multipart/mixed; wtf=stupid',
+ makeCT("multipart", "mixed", {wtf: "stupid"})],
+ ['text/plain; CHARSET=Big5', makeCT("text", "plain", {charset: "Big5"})],
+ ['text/html; CHARSET="Big5"', makeCT("text", "html", {charset: "Big5"})],
+ ['text/html; CHARSET="Big5', makeCT("text", "html", {charset: "Big5"})],
+ [['text/html', 'multipart/mixed'], makeCT("text", "html", {})],
+ ]);
+ testHeader('Content-Transfer-Encoding', [
+ ['', ''],
+ ['8bit', '8bit'],
+ ['8BIT', '8bit'],
+ ['QuOtEd-PrInTaBlE', 'quoted-printable'],
+ ['Base64', 'base64'],
+ ['7bit', '7bit'],
+ [['7bit', '8bit'], '7bit'],
+ ['x-uuencode', 'x-uuencode']
+ ]);
+
+ // Non-ad-hoc header tests
+ let addressing_headers = ['From', 'To', 'Cc', 'Bcc', 'Sender', 'Reply-To',
+ 'Resent-Bcc', 'Resent-To', 'Resent-From', 'Resent-Cc', 'Resent-Sender',
+ 'Approved', 'Disposition-Notification-To', 'Delivered-To',
+ 'Return-Receipt-To'];
+ let address_tests = [
+ ["", []],
+ ["a@example.invalid", [{name: "", email: "a@example.invalid"}]],
+ ["John Doe <a@example.invalid>",
+ [{name: "John Doe", email: "a@example.invalid"}]],
+ ["John Doe <A@EXAMPLE.INVALID>",
+ [{name: "John Doe", email: "A@EXAMPLE.INVALID"}]],
+ ["=?UTF-8?B?5bGx55Sw5aSq6YOO?= <a@example.invalid>",
+ [{name: "\u5c71\u7530\u592a\u90ce", email: "a@example.invalid"}]],
+ ["undisclosed-recipients:;", [{name: "undisclosed-recipients", group: []}]],
+ ["world: a@example.invalid, b@example.invalid;",
+ [{name: "world", group: [
+ {name: "", email: "a@example.invalid"},
+ {name: "", email: "b@example.invalid"}
+ ]}]],
+ // TODO when we support IDN:
+ // This should be \u4f8b.invalid instead (Japanese kanji for "example")
+ ["\u5c71\u7530\u592a\u90ce <a@xn--fsq.invalid>",
+ [{name: "\u5c71\u7530\u592a\u90ce", email: "a@xn--fsq.invalid"}]],
+ ["\u5c71\u7530\u592a\u90ce <a@\u4f8b.invalid>",
+ [{name: "\u5c71\u7530\u592a\u90ce", email: "a@\u4f8b.invalid"}]],
+ ["\u30b1\u30c4\u30a1\u30eb\u30b3\u30a2\u30c8\u30eb@\u4f8b.invalid",
+ [{name: "", email:
+ "\u30b1\u30c4\u30a1\u30eb\u30b3\u30a2\u30c8\u30eb@\u4f8b.invalid"}]],
+ [["a@example.invalid", "b@example.invalid"],
+ [{name: "", email: "a@example.invalid"},
+ {name: "", email: "b@example.invalid"}]],
+ ];
+ addressing_headers.forEach(function (header) {
+ testHeader(header, address_tests);
+ });
+
+ let unstructured_headers = ['Comments', 'Content-Description', 'Keywords',
+ 'Subject'];
+ let unstructured_tests = [
+ ["", ""],
+ ["This is a subject", "This is a subject"],
+ [["Subject 1", "Subject 2"], "Subject 1"],
+ ["=?UTF-8?B?56eB44Gv5Lu25ZCN5Y2I5YmN?=",
+ "\u79c1\u306f\u4ef6\u540d\u5348\u524d"],
+ ];
+ unstructured_headers.forEach(function (header) {
+ testHeader(header, unstructured_tests);
+ });
+});
+
+});
new file mode 100644
--- /dev/null
+++ b/mailnews/mime/jsmime/test/xpcshell.ini
@@ -0,0 +1,11 @@
+[DEFAULT]
+head=head_xpcshell_glue.js
+tail=
+support-files=data/**
+
+[test_custom_headers.js]
+[test_header_emitter.js]
+[test_header.js]
+[test_mime_tree.js]
+[test_structured_header_emitters.js]
+[test_structured_headers.js]
--- a/mailnews/mime/moz.build
+++ b/mailnews/mime/moz.build
@@ -8,13 +8,14 @@ PARALLEL_DIRS += [
'src',
'emitters',
'cthandlers',
]
TEST_DIRS += ['test']
EXTRA_JS_MODULES += [
- 'jsmime/mimeParserCore.js',
+ 'jsmime/jsmime.js',
]
-JS_MODULES_PATH = 'modules/mime'
+JS_MODULES_PATH = 'modules/jsmime'
+XPCSHELL_TESTS_MANIFESTS += ['jsmime/test/xpcshell.ini']
new file mode 100644
--- /dev/null
+++ b/mailnews/mime/src/jsmime.jsm
@@ -0,0 +1,24 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+// vim:set ts=2 sw=2 sts=2 et ft=javascript:
+
+Components.utils.import("resource:///modules/Services.jsm");
+
+/**
+ * This file exports the JSMime code, polyfilling code as appropriate for use in
+ * Gecko.
+ */
+
+// Load the core MIME parser. Since it doesn't define EXPORTED_SYMBOLS, we must
+// use the subscript loader instead.
+Services.scriptloader.loadSubScript("resource:///modules/jsmime/jsmime.js");
+
+var EXPORTED_SYMBOLS = ["jsmime"];
+
+// Note: JSMime 0.2 doesn't require any polyfilling for the moment, which means
+// this code looks empty. However, it is anticipated that future code will need
+// some amount of polyfilling (supporting non-UTF-8 encodings in TextEncoder for
+// composition code is the most prominent example). Since I want people to start
+// out doing the right thing, I'm defining jsmime.jsm before there's a real need
+// for it.
--- a/mailnews/mime/src/mimeJSComponents.js
+++ b/mailnews/mime/src/mimeJSComponents.js
@@ -19,17 +19,17 @@ MimeHeaders.prototype = {
extractHeader: function MimeHeaders_extractHeader(header, getAll) {
if (!this._headers)
throw Components.results.NS_ERROR_NOT_INITIALIZED;
// Canonicalized to lower-case form
header = header.toLowerCase();
if (!this._headers.has(header))
return null;
- var values = this._headers.get(header);
+ var values = this._headers.getRawHeader(header);
if (getAll)
return values.join(",\r\n\t");
else
return values[0];
},
get allHeaders() {
return this._headers.rawHeaderText;
rename from mailnews/mime/jsmime/mimeParser.jsm
rename to mailnews/mime/src/mimeParser.jsm
--- a/mailnews/mime/jsmime/mimeParser.jsm
+++ b/mailnews/mime/src/mimeParser.jsm
@@ -1,19 +1,16 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this file,
* You can obtain one at http://mozilla.org/MPL/2.0/. */
// vim:set ts=2 sw=2 sts=2 et ft=javascript:
Components.utils.import("resource:///modules/XPCOMUtils.jsm");
Components.utils.import("resource:///modules/Services.jsm");
-
-// Load the core MIME parser. Since it doesn't define EXPORTED_SYMBOLS, we must
-// use the subscript loader instead.
-Services.scriptloader.loadSubScript("resource:///modules/mime/mimeParserCore.js");
+Components.utils.import("resource:///modules/jsmime.jsm");
var EXPORTED_SYMBOLS = ["MimeParser"];
// Emitter helpers, for internal functions later on.
var ExtractHeadersEmitter = {
startPart: function (partNum, headers) {
if (partNum == '') {
this.headers = headers;
@@ -82,17 +79,17 @@ var MimeParser = {
*/
parseSync: function MimeParser_parseSync(input, emitter, opts) {
// We only support string parsing if we are trying to do this parse
// synchronously.
if (typeof input != "string") {
throw new Error("input is not a recognizable type!");
}
setDefaultParserOptions(opts);
- var parser = new Parser(emitter, opts);
+ var parser = new jsmime.MimeParser(emitter, opts);
parser.deliverData(input);
parser.deliverEOF();
return;
},
/**
* Returns a stream listener that feeds data into a parser.
*
@@ -125,32 +122,32 @@ var MimeParser = {
scriptIn.init(aStream);
// Use readBytes instead of read to handle embedded NULs properly.
this._parser.deliverData(scriptIn.readBytes(aCount));
},
QueryInterface: XPCOMUtils.generateQI([Ci.nsIStreamListener,
Ci.nsIRequestObserver])
};
setDefaultParserOptions(opts);
- StreamListener._parser = new Parser(emitter, opts);
+ StreamListener._parser = new jsmime.MimeParser(emitter, opts);
return StreamListener;
},
/**
* Returns a new raw MIME parser.
*
* Prefer one of the other methods where possible, since the input here must
* be driven manually.
*
* @param emitter The emitter to receive callbacks on.
* @param opts A set of options for the parser.
*/
makeParser: function MimeParser_makeParser(emitter, opts) {
setDefaultParserOptions(opts);
- return new Parser(emitter, opts);
+ return new jsmime.MimeParser(emitter, opts);
},
/**
* Returns a dictionary of headers for the given input.
*
* The input is any type of input that would be accepted by parseSync. What
* is returned is a JS object that represents the headers of the entire
* envelope as would be received by startPart when partNum is the empty
@@ -206,14 +203,14 @@ var MimeParser = {
* @param flags A set of flags that controls interpretation of the header.
* @param charset A default charset to assume if no information may be found.
*/
parseHeaderField: function MimeParser_parseHeaderField(text, flags, charset) {
// The low 4 bits indicate the type of the header we are parsing. All of the
// higher-order bits are flags.
switch (flags & 0x0f) {
case MimeParser.HEADER_PARAMETER:
- return HeaderParser.extractParameters(text);
+ return jsmime.headerparser.parseParameterHeader(text, false, false);
default:
throw "Illegal type of header field";
}
},
};
--- a/mailnews/mime/src/moz.build
+++ b/mailnews/mime/src/moz.build
@@ -70,11 +70,16 @@ SOURCES += [
'nsStreamConverter.cpp',
]
EXTRA_COMPONENTS += [
'mimeJSComponents.js',
'msgMime.manifest',
]
+EXTRA_JS_MODULES += [
+ 'jsmime.jsm',
+ 'mimeParser.jsm'
+]
+
FINAL_LIBRARY = 'mail'
DEFINES['ENABLE_SMIME'] = True
--- a/mailnews/mime/test/unit/test_parser.js
+++ b/mailnews/mime/test/unit/test_parser.js
@@ -70,28 +70,18 @@ function read_file(file, start, end) {
* @param opts Options for the mime parser, as well as a few extras detailed
* above.
* @param partspec An array of [partnum, line start, line end] detailing the
* expected parts in the body. It will be expected that the
* accumulated body part data for partnum would be the contents
* of the file from [line start, line end) [1-based lines]
*/
function make_body_test(test, file, opts, partspec) {
- var results = [[p[0], read_file(file, p[1], p[2])] for each (p in partspec)];
+ var results = [[p[0], read_file(file, p[1], p[2])] for (p of partspec)];
var msgcontents = read_file(file);
- var packetize = extract_field(opts, "_split");
- if (packetize !== undefined)
- msgcontents = msgcontents.split(packetize);
- var eol = extract_field(opts, "_eol");
- if (eol !== undefined) {
- msgcontents = msgcontents.replace(/\r\n/g, eol);
- for (var part of results) {
- part[1] = part[1].replace(/\r\n/g, eol);
- }
- }
return [test, msgcontents, opts, results];
}
/// This is the expected part specifier for the multipart-complex1 test file,
/// specified here because it is used in several cases.
let mpart_complex1 = [['1', 8, 10], ['2', 14, 16], ['3.1', 22, 24],
['4', 29, 31], ['5', 33, 35]];
@@ -101,34 +91,16 @@ let mpart_complex1 = [['1', 8, 10], ['2'
// entry[2] = options for the MIME parser
// entry[3] = A checker result:
// either a {partnum: header object} (to check headers)
// or a [[partnum body], [partnum body], ...] (to check bodies)
// (the partnums refer to the expected part numbers of the MIME test)
// Note that for body tests, unless you're testing decoding, it is preferable to
// use make_body_test instead of writing the array yourself.
let parser_tests = [
- // The following tests are either degenerate or error cases that should work
- ["Empty string", "", {}, {'': {}}],
- ["No value for header", "Header", {}, {'': {"header": [null]}}],
- ["Header no val", "A: EOF", {}, {'': {"a": ["EOF"]}}],
- ["Header no val", "A: EOF\r\n", {}, {'': {"a": ["EOF"]}}],
- ["No body no headers", "\r\n\r\n", {}, {'': {}}],
- ["Body no headers", "\r\n\r\nA", {}, {'': {}}],
-
- // Basic cases for headers
- ['Multiparts get headers', read_file("multipart-complex1"), {},
- { '': {'content-type': ['multipart/mixed; boundary="boundary"']},
- '1': {'content-type': ['application/octet-stream'],
- 'content-transfer-encoding': ['base64']},
- '2': {'content-type': ['image/png'],
- 'content-transfer-encoding': ['base64']},
- '3': {'content-type': ['multipart/related; boundary="boundary2"']},
- '3.1': {'content-type': ['text/html']},
- '4': {'content-type': ['text/plain']}, '5': {} }],
// Body tests from data
// (Note: line numbers are 1-based. Also, to capture trailing EOF, add 2 to
// the last line number of the file).
make_body_test("Basic body", "basic1", {}, [['', 3, 5]]),
make_body_test("Basic multipart", "multipart1", {}, [['1', 10, 12]]),
make_body_test("Basic multipart", "multipart2", {}, [['1', 8, 11]]),
make_body_test("Complex multipart", "multipart-complex1", {}, mpart_complex1),
make_body_test("Truncated multipart", "multipart-complex2", {},
@@ -140,20 +112,16 @@ let parser_tests = [
["Base64 decode 1", read_file("base64-1"), {bodyformat: "decode"},
[['', "\r\nHello, world! (Again...)\r\n\r\nLet's see how well base64 text" +
" is handled. Yay, lots of spaces! There" +
"'s even a CRLF at the end and one at the beginning, but the output" +
" shouldn't have it.\r\n"]]],
["Base64 decode 2", read_file("base64-2"), {bodyformat: "decode"},
[['', "<html><body>This is base64 encoded HTML text, and the tags shouldn" +
"'t be stripped.\r\n<b>Bold text is bold!</b></body></html>\r\n"]]],
- ["Base64 decode line issues", read_file("base64-2").split(/(\r\n)/),
- {bodyformat: "decode"},
- [['', "<html><body>This is base64 encoded HTML text, and the tags shouldn" +
- "'t be stripped.\r\n<b>Bold text is bold!</b></body></html>\r\n"]]],
make_body_test("Base64 nodecode", "base64-1", {}, [['', 4, 9]]),
["QP decode", read_file("bug505221"), {pruneat: '1', bodyformat: "decode"},
[['1', '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">\r' +
'\n<HTML><HEAD>\r\n<META HTTP-EQUIV="Content-Type" CONTENT="text/h' +
'tml; charset=us-ascii">\r\n\r\n\r\n<META content="MSHTML 6.00.600' +
'0.16735" name=GENERATOR></HEAD>\r\n<BODY> bbb\r\n</BODY></HTML>']]],
// Comprehensive tests from the torture test
@@ -166,68 +134,19 @@ let parser_tests = [
['7$.3', 13288, 13297], ['8$.1', 13331, 13358], ['8$.2', 13364, 13734],
['9$', 13757, 20179], ['10', 20184, 21200], ['11$.1', 21223, 22031],
['11$.2', 22036, 22586], ['12$.1', 22607, 23469], ['12$.2', 23474, 23774],
['12$.3$.1', 23787, 23795], ['12$.3$.2.1', 23803, 23820],
['12$.3$.2.2', 23825, 24633], ['12$.3$.3', 24640, 24836],
['12$.3$.4$', 24848, 25872]]),
make_body_test("Torture pruneat", "mime-torture", {"pruneat": '4'},
[['4', 7747, 8213]]),
-
- // Test packetization problems
- make_body_test("Large packets", "multipart-complex1",
- {"_split": /(.{30})/}, mpart_complex1),
- make_body_test("Split on newline", "multipart-complex1",
- {"_split": /(\r\n)/}, mpart_complex1),
- make_body_test("Pathological splitting", "multipart-complex1",
- {"_split": ''}, mpart_complex1),
-
- // Non-CLRF line endings?
- make_body_test("LF-based messages", "multipart-complex1",
- {"_eol": "\n"}, mpart_complex1),
- make_body_test("CR-based messages", "multipart-complex1",
- {"_eol": "\r"}, mpart_complex1),
-
- // 'From ' is not an [iterable] header
- ['Exclude mbox delimiter', read_file('bugmail11'), {}, {'': {
- 'x-mozilla-status': ['0001'], 'x-mozilla-status2': ['00000000'],
- 'x-mozilla-keys': [''],
- 'return-path': ['<example@example.com>', '<bugzilla-daemon@mozilla.org>'],
- 'delivered-to': ['bugmail@example.org'],
- 'received': ['by 10.114.166.12 with SMTP id o12cs163262wae;' +
- ' Fri, 11 Apr 2008 07:17:31 -0700 (PDT)',
- 'by 10.115.60.1 with SMTP id n1mr214763wak.181.1207923450166;' +
- ' Fri, 11 Apr 2008 07:17:30 -0700 (PDT)',
- 'from webapp-out.mozilla.org (webapp01.sj.mozilla.com [63.245.208.146])' +
- ' by mx.google.com with ESMTP id n38si6807242wag.2.2008.04.11.07' +
- '.17.29; Fri, 11 Apr 2008 07:17:30 -0700 (PDT)',
- 'from mrapp51.mozilla.org (mrapp51.mozilla.org [127.0.0.1])' +
- '\tby webapp-out.mozilla.org (8.13.8/8.13.8) with ESMTP id m3BEHTGU0301' +
- '32\tfor <bugmail@example.org>; Fri, 11 Apr 2008 07:17:29 -0700',
- '(from root@localhost)' +
- '\tby mrapp51.mozilla.org (8.13.8/8.13.8/Submit) id m3BEHTk4030129;' +
- '\tFri, 11 Apr 2008 07:17:29 -0700'],
- 'received-spf': ['neutral (google.com: 63.245.208.146 is neither permitte' +
- 'd nor denied by best guess record for domain of bugzilla-daemon@mozill' +
- 'a.org) client-ip=63.245.208.146;'],
- 'authentication-results': ['mx.google.com; spf=neutral (google.com: 63.24' +
- '5.208.146 is neither permitted nor denied by best guess record for dom' +
- 'ain of bugzilla-daemon@mozilla.org) smtp.mail=bugzilla-daemon@mozilla.' +
- 'org'],
- 'date': ['Fri, 11 Apr 2008 07:17:29 -0700'],
- 'message-id': ['<200804111417.m3BEHTk4030129@mrapp51.mozilla.org>'],
- 'from': ['bugzilla-daemon@mozilla.org'],'to': ['bugmail@example.org'],
- 'subject': ['Bugzilla: confirm account creation'],
- 'x-bugzilla-type': ['admin'],
- 'content-type': ['text/plain; charset="UTF-8"'], 'mime-version': ['1.0']}}],
];
function test_parser(message, opts, results) {
- if (!(message instanceof Array))
- message = [message];
var checkingHeaders = !(results instanceof Array);
var calls = 0, dataCalls = 0;
var fusingParts = extract_field(opts, "_nofuseparts") === undefined;
var emitter = {
stack: [],
startMessage: function emitter_startMsg() {
do_check_eq(this.stack.length, 0);
calls++;
@@ -268,20 +187,17 @@ function test_parser(message, opts, resu
compare_objects(this.partData, results[dataCalls][1]);
dataCalls++;
this.partData = '';
}
do_check_eq(this.stack.pop(), partNum);
}
};
opts.onerror = function (e) { throw e; };
- var parser = MimeParser.makeParser(emitter, opts);
- for each (var packet in message)
- parser.deliverData(packet);
- parser.deliverEOF();
+ MimeParser.parseSync(message, emitter, opts);
do_check_eq(calls, 2);
if (!checkingHeaders)
do_check_eq(dataCalls, results.length);
}
const ATTACH = MimeParser.HEADER_PARAMETER;
// Format of tests:
// entry[0] = header
@@ -296,50 +212,30 @@ let header_tests = [
["a;b=1;b=2", MimeParser.HEADER_PARAMETER, ["a", {"b": "1"}]],
["a;b=2;b=1", MimeParser.HEADER_PARAMETER, ["a", {"b": "2"}]],
['a;b="a;b"', MimeParser.HEADER_PARAMETER, ["a", {"b": "a;b"}]],
['a;b="\\\\"', MimeParser.HEADER_PARAMETER, ["a", {"b": "\\"}]],
['a;b="a\\b\\c"', MimeParser.HEADER_PARAMETER, ["a", {"b": "abc"}]],
['a;b=1;c=2', MimeParser.HEADER_PARAMETER, ["a", {"b": "1", "c": "2"}]],
['a;b="a\\', MimeParser.HEADER_PARAMETER, ["a", {"b": "a"}]],
['a;b', MimeParser.HEADER_PARAMETER, ["a", {}]],
- ['a;b=1"2;c=d', MimeParser.HEADER_PARAMETER, ["a", {"b": '1"2', 'c': "d"}]],
-
- // Copied from test_MIME_params.js and adapted
- ["attachment;", ATTACH, ["attachment", {}]],
- ["attachment; filename=basic", ATTACH, ["attachment", {filename: "basic"}]],
- ["attachment; filename=\"\\\"\"", ATTACH, ["attachment", {filename: '"'}]],
- ["attachment; filename=\"\\x\"", ATTACH, ["attachment", {filename: "x"}]],
- ["attachment; filename=\"\"", ATTACH, ["attachment", {filename: ""}]],
- ["attachment; filename=", ATTACH, ["attachment", {filename: ""}]],
- ["attachment; filename X", ATTACH, ["attachment", {}]],
- ["attachment; filename = foo-A.html", ATTACH,
- ["attachment", {filename: "foo-A.html"}]],
- ["attachment; filename=\"", ATTACH, ["attachment", {filename: ""}]],
- ["attachment; filename=foo; trouble", ATTACH,
- ["attachment", {filename: "foo"}]],
- ["attachment; filename=foo; trouble ", ATTACH,
- ["attachment", {filename: "foo"}]],
- ["attachment", ATTACH, ["attachment", {}]],
- // According to comments and bugs, this works in necko, but it doesn't appear
- // that it ought to. See bug 732369 for more info.
- ["attachment; extension=bla filename=foo", ATTACH,
- ["attachment", {extension: "bla"}]],
+ ['a;b=";";c=d', MimeParser.HEADER_PARAMETER, ["a", {"b": ';', 'c': "d"}]],
];
function test_header(headerValue, flags, expected) {
let result = MimeParser.parseHeaderField(headerValue, flags);
- do_check_eq(uneval(result), uneval(expected));
+ do_check_eq(result.preSemi, expected[0]);
+ compare_objects(result, expected[1]);
}
function run_test() {
- for each (let test in parser_tests) {
+ for (let test of parser_tests) {
dump("Testing message " + test[0]);
if (test[1] instanceof Array)
dump(" using " + test[1].length + " packets");
dump('\n');
test_parser(test[1], test[2], test[3]);
}
- for each (let test in header_tests) {
+ for (let test of header_tests) {
dump("Testing value ->" + test[0] + "<- with flags " + test[1] + "\n");
test_header(test[0], test[1], test[2]);
}
}
--- a/mailnews/test/fakeserver/imapd.js
+++ b/mailnews/test/fakeserver/imapd.js
@@ -1534,27 +1534,27 @@ IMAP_RFC3501_handler.prototype = {
break;
case "HEADER.FIELDS":
var joinList = [];
var headers = message.getPartHeaders(partNum);
for (let header of queryArgs) {
header = header.toLowerCase();
if (headers.has(header))
joinList.push([header + ": " + value
- for (value of headers.get(header))].join('\r\n'));
+ for (value of headers.getRawHeader(header))].join('\r\n'));
}
data += joinList.join('\r\n') + "\r\n";
break;
case "HEADER.FIELDS.NOT":
var joinList = [];
var headers = message.getPartHeaders(partNum);
for (let header of headers) {
if (!(header in queryArgs))
joinList.push([header + ": " + value
- for (value of headers.get(header))].join('\r\n'));
+ for (value of headers.getRawHeader(header))].join('\r\n'));
}
data += joinList.join('\r\n') + "\r\n";
break;
default:
data += message.getPartBody(partNum);
}
this.sendingLiteral = true;
@@ -2149,62 +2149,57 @@ function bodystructure(msg, extension) {
// Use the mime parser emitter to generate body structure data. Most of the
// string will be built as we exit a part. Currently not working:
// 1. Some of the fields return NIL instead of trying to calculate them.
// 2. MESSAGE is missing the ENVELOPE and the lines at the end.
var bodystruct = '';
function paramToString(params) {
let paramList = [];
- for (var param in params)
- paramList.push('"' + param.toUpperCase() + '" "' + params[param] + '"');
+ for (let [param, value] of params)
+ paramList.push('"' + param.toUpperCase() + '" "' + value + '"');
return paramList.length == 0 ? 'NIL' : '(' + paramList.join(' ') + ')';
}
var headerStack = [];
var BodyStructureEmitter = {
startPart: function bodystructure_startPart(partNum, headers) {
bodystruct += '(';
headerStack.push(headers);
this.numLines = 0;
this.length = 0;
},
deliverPartData: function bodystructure_deliverPartData(partNum, data) {
this.length += data.length;
- this.numLines += [x for each (x in data) if (x == '\n')].length;
+ this.numLines += [x for (x of data) if (x == '\n')].length;
},
endPart: function bodystructure_endPart(partNum) {
// Grab the headers from before
let headers = headerStack.pop();
- let contentType = headers.has('content-type') ?
- headers.get('content-type')[0] : 'text/plain';
- let [type, params] = MimeParser.parseHeaderField(contentType,
- MimeParser.HEADER_PARAMETER);
- // Use uppercase canonicalization for now
- type = type.toUpperCase();
- let [media, sub] = type.split('/', 2);
- if (media == "MULTIPART") {
- bodystruct += ' "' + sub + '"';
+ let contentType = headers.contentType;
+ if (contentType.mediatype == "multipart") {
+ bodystruct += ' "' + contentType.subtype.toUpperCase() + '"';
if (extension) {
- bodystruct += ' ' + paramToString(params);
+ bodystruct += ' ' + paramToString(contentType);
// XXX: implement the rest
bodystruct += ' NIL NIL NIL';
}
} else {
- bodystruct += '"' + media + '" "' + sub + '"';
- bodystruct += ' ' + paramToString(params);
+ bodystruct += '"' + contentType.mediatype.toUpperCase() + '" "' +
+ contentType.subtype.toUpperCase() + '"';
+ bodystruct += ' ' + paramToString(contentType);
// XXX: Content ID, Content description
bodystruct += ' NIL NIL';
let cte = headers.has('content-transfer-encoding') ?
- headers.get('content-transfer-encoding')[0].toUpperCase() : '7BIT';
+ headers.get('content-transfer-encoding') : '7BIT';
bodystruct += ' "' + cte + '"';
bodystruct += ' ' + this.length;
- if (media == "TEXT")
+ if (contentType.mediatype == "text")
bodystruct += ' ' + this.numLines;
// XXX: I don't want to implement these yet
if (extension)
bodystruct += ' NIL NIL NIL NIL';
}
bodystruct += ')';
}
--- a/mailnews/test/fakeserver/nntpd.js
+++ b/mailnews/test/fakeserver/nntpd.js
@@ -77,17 +77,17 @@ nntpDaemon.prototype = {
function newsArticle(text) {
this.headers = {};
this.body = "";
this.messageID = "";
this.fullText = text;
var headerMap;
[headerMap, this.body] = MimeParser.extractHeadersAndBody(text);
- for (var [header, values] of headerMap) {
+ for (var [header, values] of headerMap._rawHeaders) {
var value = values[0];
this.headers[header] = value;
if (header == "message-id") {
var start = value.indexOf('<');
var end = value.indexOf('>', start);
this.messageID = value.substring(start, end+1);
} else if (header == "newsgroups") {
this.groups = value.split(/[ \t]*,[ \t]*/);