Bug 1424359 - Use MimeTextDecoder() to return actual text decoder used in jsmime.js. r=aceman,jorgk a=jorgk BETA_60_CONTINUATION
authorJorg K <jorgk@jorgk.com> and aceman <acelists@atlas.sk>
Fri, 08 Dec 2017 12:50:00 +0100
branchBETA_60_CONTINUATION
changeset 32012 c26f70927c76
parent 32011 6f0d6ee66882
child 32013 b78ee64b973d
push id384
push userclokep@gmail.com
push date2018-06-26 01:38 +0000
reviewersaceman, jorgk, jorgk
bugs1424359
Bug 1424359 - Use MimeTextDecoder() to return actual text decoder used in jsmime.js. r=aceman,jorgk a=jorgk
mailnews/mime/jsmime/jsmime.js
mailnews/mime/jsmime/test/head_xpcshell_glue.js
mailnews/mime/src/jsmime.jsm
--- a/mailnews/mime/jsmime/jsmime.js
+++ b/mailnews/mime/jsmime/jsmime.js
@@ -570,31 +570,31 @@ function getHeaderTokens(value, delimite
  * @param {String} [fallbackCharset] The optional charset to try if UTF-8
  *                                   doesn't work.
  * @returns {String} The UTF-16 representation of the string above.
  */
 function convert8BitHeader(headerValue, fallbackCharset) {
   // Only attempt to convert the headerValue if it contains non-ASCII
   // characters.
   if (/[\x80-\xff]/.exec(headerValue)) {
-    // First convert the value to a typed-array for TextDecoder.
+    // First convert the value to a typed-array for MimeTextDecoder.
     let typedarray = mimeutils.stringToTypedArray(headerValue);
 
     // Don't try UTF-8 as fallback (redundant), and don't try UTF-16 or UTF-32
     // either, since they radically change header interpretation.
     // If we have a fallback charset, we want to know if decoding will fail;
     // otherwise, we want to replace with substitution chars.
     let hasFallback = fallbackCharset &&
                       !fallbackCharset.toLowerCase().startsWith("utf");
-    let utf8Decoder = new TextDecoder("utf-8", {fatal: hasFallback});
+    let utf8Decoder = new MimeTextDecoder("utf-8", {fatal: hasFallback});
     try {
       headerValue = utf8Decoder.decode(typedarray);
     } catch (e) {
       // Failed, try the fallback
-      let decoder = new TextDecoder(fallbackCharset, {fatal: false});
+      let decoder = new MimeTextDecoder(fallbackCharset, {fatal: false});
       headerValue = decoder.decode(typedarray);
     }
   }
   return headerValue;
 }
 
 /**
  * Decodes all RFC 2047 encoded-words in the input string. The string does not
@@ -666,17 +666,17 @@ function decodeRFC2047Words(headerValue)
       output += currentDecoder.decode();
       currentDecoder = null;
     }
 
     // Initialize the decoder for this token.
     lastCharset = charset;
     if (!currentDecoder) {
       try {
-        currentDecoder = new TextDecoder(charset, {fatal: false});
+        currentDecoder = new MimeTextDecoder(charset, {fatal: false});
       } catch (e) {
         // We don't recognize the charset, so give up.
         return false;
       }
     }
 
     // Convert this token with the buffer. Note the stream parameter--although
     // RFC 2047 tokens aren't supposed to break in the middle of a multibyte
@@ -1183,17 +1183,17 @@ function decode2231Value(value) {
   //var language = (quote2 >= 0 ? value.substring(quote1 + 2, quote2) : "");
   value = value.substring(Math.max(quote1, quote2) + 1);
 
   // Convert the value into a typed array for decoding
   let typedarray = mimeutils.stringToTypedArray(value);
 
   // Decode the charset. If the charset isn't found, we throw an error. Try to
   // fallback in that case.
-  return new TextDecoder(charset, {fatal: true})
+  return new MimeTextDecoder(charset, {fatal: true})
     .decode(typedarray, {stream: false});
 }
 
 // This is a map of known timezone abbreviations, for fallback in obsolete Date
 // productions.
 var kKnownTZs = {
   // The following timezones are explicitly listed in RFC 5322.
   "UT":  "+0000", "GMT": "+0000",
@@ -2420,17 +2420,17 @@ MimeParser.prototype._startBody = functi
 
   // Set up the encoder for charset conversions; only do this for text parts.
   // Other parts are almost certainly binary, so no translation should be
   // applied to them.
   if (this._options["strformat"] == "unicode" &&
       contentType.mediatype == "text") {
     // If the charset is nonempty, initialize the decoder
     if (this._charset !== "") {
-      this._decoder = new TextDecoder(this._charset);
+      this._decoder = new MimeTextDecoder(this._charset);
     } else {
       // There's no charset we can use for decoding, so pass through as an
       // identity encoder or otherwise this._coerceData will complain.
       this._decoder = {
         decode: function identity_decoder(buffer) {
           return MimeParser.prototype._coerceData(buffer, "binarystring", true);
         }
       };
--- a/mailnews/mime/jsmime/test/head_xpcshell_glue.js
+++ b/mailnews/mime/jsmime/test/head_xpcshell_glue.js
@@ -39,17 +39,17 @@ var fs = {
     Promise.resolve(filename)
            .then(do_get_file)
            .then(file => OS.File.read(file.path))
            .then(translator)
            .then(contents => callback(undefined, contents), callback);
   },
 };
 requireCache.set("fs", fs);
-Services.scriptloader.loadSubScript("resource:///modules/jsmime/jsmime.js");
+Components.utils.import("resource:///modules/jsmime.jsm");
 requireCache.set("jsmime", jsmime);
 
 function require(path) {
   if (requireCache.has(path))
     return requireCache.get(path);
 
   if (path.startsWith("test/")) {
     let name = path.substring("test/".length);
--- a/mailnews/mime/src/jsmime.jsm
+++ b/mailnews/mime/src/jsmime.jsm
@@ -18,80 +18,54 @@ var EXPORTED_SYMBOLS = ["jsmime"];
 
 function bytesToString(buffer) {
   var string = '';
   for (var i = 0; i < buffer.length; i++)
     string += String.fromCharCode(buffer[i]);
   return string;
 }
 
-// A polyfill to support non-encoding-spec charsets. Since the only converter
-// available to us from JavaScript has a very, very weak and inflexible API, we
-// choose to rely on the regular text decoder unless absolutely necessary.
-// support non-encoding-spec charsets.
-function FakeTextDecoder(label="UTF-8", options = {}) {
-  this._reset(label);
-  // So nsIScriptableUnicodeConverter only gives us fatal=false, unless we are
-  // using UTF-8, where we only get fatal=true. The internals of said class tell
-  // us to use a C++-only class if we need better behavior.
+// Our UTF-7 decoder.
+function UTF7TextDecoder(label, options = {}) {
+  this.manager = Cc["@mozilla.org/charset-converter-manager;1"]
+                   .createInstance(Ci.nsICharsetConverterManager);
+  let charset;
+  try {
+    charset = this.manager.getCharsetAlias(label);
+  } catch (ex) {
+    // Unknown charset, nothing we can do.
+    throw ex;
+  }
+  if (charset.toLowerCase() != "utf-7")
+    throw new Error("UTF7TextDecoder: This code should never be reached for " + label);
+  this.collectInput = "";
 }
-FakeTextDecoder.prototype = {
-  _reset: function (label) {
-    this._encoder = Cc[
-      "@mozilla.org/intl/scriptableunicodeconverter"]
-      .createInstance(Ci.nsIScriptableUnicodeConverter);
-    this._encoder.isInternal = true;
-    let manager = Cc["@mozilla.org/charset-converter-manager;1"]
-                    .createInstance(Ci.nsICharsetConverterManager);
-    this.charset = manager.getCharsetAlias(label);
-    if (this.charset.toLowerCase() != "utf-7")
-      this._encoder.charset = this.charset;
-    else
-      this.collectInput = "";
-  },
-  get encoding() { return this._encoder.charset; },
+UTF7TextDecoder.prototype = {
+  // Since the constructor checked, this will only be called for UTF-7.
   decode: function (input, options = {}) {
     let more = 'stream' in options ? options.stream : false;
-    if (this.charset.toLowerCase() == "utf-7") {
-      this.collectInput += bytesToString(input);
-      if (more)
-        return "";
-      let manager = Cc["@mozilla.org/charset-converter-manager;1"]
-                      .getService(Ci.nsICharsetConverterManager);
-      return manager.utf7ToUnicode(this.collectInput);
-    }
-
-    let result = "";
-    if (input !== undefined) {
-      let data = new Uint8Array(input);
-      result = this._encoder.convertFromByteArray(data, data.length);
-    }
-    // This isn't quite right--it won't handle errors if there are a few
-    // remaining bytes in the buffer, but it's the best we can do.
-    if (!more)
-      this._reset(this.encoding);
-    return result;
+    // There are cases where this is called without input.
+    if (!input)
+      return "";
+    this.collectInput += bytesToString(input);
+    if (more)
+      return "";
+    return this.manager.utf7ToUnicode(this.collectInput);
   },
 };
 
-var {TextDecoder} = Cu.getGlobalForObject(
-  ChromeUtils.import("resource://gre/modules/Services.jsm", {}));
-
-var RealTextDecoder = TextDecoder;
-
-function FallbackTextDecoder(charset, options) {
+function MimeTextDecoder(charset, options) {
   try {
-    return new RealTextDecoder(charset, options);
+    return new TextDecoder(charset, options);
   } catch (e) {
-    return new FakeTextDecoder(charset, options);
+    // If TextDecoder fails, it must be UTF-7 or an invalid charset.
+    return new UTF7TextDecoder(charset, options);
   }
 }
 
-TextDecoder = FallbackTextDecoder;
-
 
 // The following code loads custom MIME encoders.
 var CATEGORY_NAME = "custom-mime-encoder";
 Services.obs.addObserver(function (subject, topic, data) {
   subject = subject.QueryInterface(Ci.nsISupportsCString)
                    .data;
   if (data == CATEGORY_NAME) {
     let url = catman.getCategoryEntry(CATEGORY_NAME, subject);