Bug 848842 - Stop using heuristic detection in the File API. r=bzbarsky.
authorHenri Sivonen <hsivonen@hsivonen.fi>
Tue, 17 Dec 2013 12:47:25 +0200
changeset 176816 5b57ad1e78fe444f0d0d17db4721a6bc5957a377
parent 176815 ff89e684ff19680bf1f6b237ba4ead9ee93aa5f4
child 176817 81a5cd771bfe933a12d5b06ac6d348cd54f85b09
push id3343
push userffxbld
push dateMon, 17 Mar 2014 21:55:32 +0000
treeherdermozilla-beta@2f7d3415f79f [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersbzbarsky
bugs848842
milestone29.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 848842 - Stop using heuristic detection in the File API. r=bzbarsky.
content/base/public/nsContentUtils.h
content/base/src/nsContentUtils.cpp
content/base/src/nsDOMFileReader.cpp
content/base/src/nsDOMFileReader.h
content/base/src/nsDocument.cpp
content/base/src/nsReferencedElement.cpp
content/html/document/src/nsHTMLDocument.cpp
dom/encoding/TextDecoder.cpp
dom/encoding/TextDecoder.h
dom/file/ArchiveReader.cpp
dom/file/ArchiveReader.h
dom/file/ArchiveZipEvent.cpp
dom/file/ArchiveZipEvent.h
dom/file/LockedFile.cpp
dom/file/test/test_archivereader_nonUnicode.html
dom/workers/FileReaderSync.cpp
dom/workers/FileReaderSync.h
--- a/content/base/public/nsContentUtils.h
+++ b/content/base/public/nsContentUtils.h
@@ -501,39 +501,37 @@ public:
    * aDocument.
    */
   static nsresult NewURIWithDocumentCharset(nsIURI** aResult,
                                             const nsAString& aSpec,
                                             nsIDocument* aDocument,
                                             nsIURI* aBaseURI);
 
   /**
-   * Convert aInput (in charset aCharset) to UTF16 in aOutput.
+   * Convert aInput (in encoding aEncoding) to UTF16 in aOutput.
    *
-   * @param aCharset the name of the charset; if empty, we assume UTF8
+   * @param aEncoding the Gecko-canonical name of the encoding or the empty
+   *                  string (meaning UTF-8)
    */
-  static nsresult ConvertStringFromCharset(const nsACString& aCharset,
-                                           const nsACString& aInput,
-                                           nsAString& aOutput);
+  static nsresult ConvertStringFromEncoding(const nsACString& aEncoding,
+                                            const nsACString& aInput,
+                                            nsAString& aOutput);
 
   /**
    * Determine whether a buffer begins with a BOM for UTF-8, UTF-16LE,
    * UTF-16BE
    *
    * @param aBuffer the buffer to check
    * @param aLength the length of the buffer
    * @param aCharset empty if not found
    * @return boolean indicating whether a BOM was detected.
    */
   static bool CheckForBOM(const unsigned char* aBuffer, uint32_t aLength,
                           nsACString& aCharset);
 
-  static nsresult GuessCharset(const char *aData, uint32_t aDataLen,
-                               nsACString &aCharset);
-
   static nsresult CheckQName(const nsAString& aQualifiedName,
                              bool aNamespaceAware = true,
                              const PRUnichar** aColon = nullptr);
 
   static nsresult SplitQName(const nsIContent* aNamespaceResolver,
                              const nsAFlatString& aQName,
                              int32_t *aNamespace, nsIAtom **aLocalName);
 
--- a/content/base/src/nsContentUtils.cpp
+++ b/content/base/src/nsContentUtils.cpp
@@ -80,18 +80,16 @@
 #include "nsGkAtoms.h"
 #include "nsHostObjectProtocolHandler.h"
 #include "nsHtml5Module.h"
 #include "nsHtml5StringParser.h"
 #include "nsIAsyncVerifyRedirectCallback.h"
 #include "nsICategoryManager.h"
 #include "nsIChannelEventSink.h"
 #include "nsIChannelPolicy.h"
-#include "nsICharsetDetectionObserver.h"
-#include "nsICharsetDetector.h"
 #include "nsIChromeRegistry.h"
 #include "nsIConsoleService.h"
 #include "nsIContent.h"
 #include "nsIContentSecurityPolicy.h"
 #include "nsIContentSink.h"
 #include "nsIContentViewer.h"
 #include "nsIDocShell.h"
 #include "nsIDocument.h"
@@ -129,17 +127,16 @@
 #include "nsINodeInfo.h"
 #include "nsIObjectLoadingContent.h"
 #include "nsIObserver.h"
 #include "nsIObserverService.h"
 #include "nsIOfflineCacheUpdate.h"
 #include "nsIParser.h"
 #include "nsIParserService.h"
 #include "nsIPermissionManager.h"
-#include "nsIPlatformCharset.h"
 #include "nsIPluginHost.h"
 #include "nsIRunnable.h"
 #include "nsIScriptContext.h"
 #include "nsIScriptError.h"
 #include "nsIScriptGlobalObject.h"
 #include "nsIScriptObjectPrincipal.h"
 #include "nsIScriptSecurityManager.h"
 #include "nsIStringBundle.h"
@@ -3453,36 +3450,33 @@ nsContentUtils::MatchElementId(nsIConten
   if (!id) {
     // OOM, so just bail
     return nullptr;
   }
 
   return MatchElementId(aContent, id);
 }
 
-// Convert the string from the given charset to Unicode.
+// Convert the string from the given encoding to Unicode.
 /* static */
 nsresult
-nsContentUtils::ConvertStringFromCharset(const nsACString& aCharset,
-                                         const nsACString& aInput,
-                                         nsAString& aOutput)
-{
-  if (aCharset.IsEmpty()) {
-    // Treat the string as UTF8
-    CopyUTF8toUTF16(aInput, aOutput);
-    return NS_OK;
+nsContentUtils::ConvertStringFromEncoding(const nsACString& aEncoding,
+                                          const nsACString& aInput,
+                                          nsAString& aOutput)
+{
+  nsAutoCString encoding;
+  if (aEncoding.IsEmpty()) {
+    encoding.AssignLiteral("UTF-8");
+  } else {
+    encoding.Assign(aEncoding);
   }
 
   ErrorResult rv;
   nsAutoPtr<TextDecoder> decoder(new TextDecoder());
-  decoder->Init(NS_ConvertUTF8toUTF16(aCharset), false, rv);
-  if (rv.Failed()) {
-    rv.ClearMessage();
-    return rv.ErrorCode();
-  }
+  decoder->InitWithEncoding(encoding, false);
 
   decoder->Decode(aInput.BeginReading(), aInput.Length(), false,
                   aOutput, rv);
   return rv.ErrorCode();
 }
 
 /* static */
 bool
@@ -3506,90 +3500,16 @@ nsContentUtils::CheckForBOM(const unsign
     aCharset = "UTF-16LE";
   } else {
     found = false;
   }
 
   return found;
 }
 
-NS_IMPL_ISUPPORTS1(CharsetDetectionObserver,
-                   nsICharsetDetectionObserver)
-
-/* static */
-nsresult
-nsContentUtils::GuessCharset(const char *aData, uint32_t aDataLen,
-                             nsACString &aCharset)
-{
-  // First try the universal charset detector
-  nsCOMPtr<nsICharsetDetector> detector =
-    do_CreateInstance(NS_CHARSET_DETECTOR_CONTRACTID_BASE
-                      "universal_charset_detector");
-  if (!detector) {
-    // No universal charset detector, try the default charset detector
-    const nsAdoptingCString& detectorName =
-      Preferences::GetLocalizedCString("intl.charset.detector");
-    if (!detectorName.IsEmpty()) {
-      nsAutoCString detectorContractID;
-      detectorContractID.AssignLiteral(NS_CHARSET_DETECTOR_CONTRACTID_BASE);
-      detectorContractID += detectorName;
-      detector = do_CreateInstance(detectorContractID.get());
-    }
-  }
-
-  nsresult rv;
-
-  // The charset detector doesn't work for empty (null) aData. Testing
-  // aDataLen instead of aData so that we catch potential errors.
-  if (detector && aDataLen) {
-    nsRefPtr<CharsetDetectionObserver> observer =
-      new CharsetDetectionObserver();
-
-    rv = detector->Init(observer);
-    NS_ENSURE_SUCCESS(rv, rv);
-
-    bool dummy;
-    rv = detector->DoIt(aData, aDataLen, &dummy);
-    NS_ENSURE_SUCCESS(rv, rv);
-
-    rv = detector->Done();
-    NS_ENSURE_SUCCESS(rv, rv);
-
-    aCharset = observer->GetResult();
-  } else {
-    // no charset detector available, check the BOM
-    unsigned char sniffBuf[3];
-    uint32_t numRead =
-      (aDataLen >= sizeof(sniffBuf) ? sizeof(sniffBuf) : aDataLen);
-    memcpy(sniffBuf, aData, numRead);
-
-    CheckForBOM(sniffBuf, numRead, aCharset);
-  }
-
-  if (aCharset.IsEmpty()) {
-    // no charset detected, default to the system charset
-    nsCOMPtr<nsIPlatformCharset> platformCharset =
-      do_GetService(NS_PLATFORMCHARSET_CONTRACTID, &rv);
-    if (NS_SUCCEEDED(rv)) {
-      rv = platformCharset->GetCharset(kPlatformCharsetSel_PlainTextInFile,
-                                       aCharset);
-      if (NS_FAILED(rv)) {
-        NS_WARNING("Failed to get the system charset!");
-      }
-    }
-  }
-
-  if (aCharset.IsEmpty()) {
-    // no sniffed or default charset, assume UTF-8
-    aCharset.AssignLiteral("UTF-8");
-  }
-
-  return NS_OK;
-}
-
 /* static */
 void
 nsContentUtils::RegisterShutdownObserver(nsIObserver* aObserver)
 {
   nsCOMPtr<nsIObserverService> observerService =
     mozilla::services::GetObserverService();
   if (observerService) {
     observerService->AddObserver(aObserver, 
--- a/content/base/src/nsDOMFileReader.cpp
+++ b/content/base/src/nsDOMFileReader.cpp
@@ -383,17 +383,25 @@ nsDOMFileReader::DoOnStopRequest(nsIRequ
 
   nsresult rv = NS_OK;
   switch (mDataFormat) {
     case FILE_AS_ARRAYBUFFER:
       break; //Already accumulated mResultArrayBuffer
     case FILE_AS_BINARY:
       break; //Already accumulated mResult
     case FILE_AS_TEXT:
-      rv = GetAsText(mCharset, mFileData, mDataLen, mResult);
+      if (!mFileData) {
+        if (mDataLen) {
+          rv = NS_ERROR_OUT_OF_MEMORY;
+          break;
+        }
+        rv = GetAsText(file, mCharset, "", mDataLen, mResult);
+        break;
+      }
+      rv = GetAsText(file, mCharset, mFileData, mDataLen, mResult);
       break;
     case FILE_AS_DATAURL:
       rv = GetAsDataURL(file, mFileData, mDataLen, mResult);
       break;
   }
   
   mResult.SetIsVoid(false);
 
@@ -471,38 +479,53 @@ nsDOMFileReader::ReadFileContent(JSConte
     if (!mResultArrayBuffer) {
       NS_WARNING("Failed to create JS array buffer");
       aRv.Throw(NS_ERROR_FAILURE);
     }
   }
 }
 
 nsresult
-nsDOMFileReader::GetAsText(const nsACString &aCharset,
+nsDOMFileReader::GetAsText(nsIDOMBlob *aFile,
+                           const nsACString &aCharset,
                            const char *aFileData,
                            uint32_t aDataLen,
                            nsAString& aResult)
 {
-  nsresult rv;
-  nsAutoCString charsetGuess;
-  if (!aCharset.IsEmpty()) {
-    charsetGuess = aCharset;
-  } else {
-    rv = nsContentUtils::GuessCharset(aFileData, aDataLen, charsetGuess);
-    NS_ENSURE_SUCCESS(rv, rv);
+  // The BOM sniffing is baked into the "decode" part of the Encoding
+  // Standard, which the File API references.
+  nsAutoCString encoding;
+  if (!nsContentUtils::CheckForBOM(
+        reinterpret_cast<const unsigned char *>(aFileData),
+        aDataLen,
+        encoding)) {
+    // BOM sniffing failed. Try the API argument.
+    if (!EncodingUtils::FindEncodingForLabel(aCharset,
+                                             encoding)) {
+      // API argument failed. Try the type property of the blob.
+      nsAutoString type16;
+      aFile->GetType(type16);
+      NS_ConvertUTF16toUTF8 type(type16);
+      nsAutoCString specifiedCharset;
+      bool haveCharset;
+      int32_t charsetStart, charsetEnd;
+      NS_ExtractCharsetFromContentType(type,
+                                       specifiedCharset,
+                                       &haveCharset,
+                                       &charsetStart,
+                                       &charsetEnd);
+      if (!EncodingUtils::FindEncodingForLabel(specifiedCharset, encoding)) {
+        // Type property failed. Use UTF-8.
+        encoding.AssignLiteral("UTF-8");
+      }
+    }
   }
 
-  nsAutoCString charset;
-  if (!EncodingUtils::FindEncodingForLabel(charsetGuess, charset)) {
-    return NS_ERROR_DOM_ENCODING_NOT_SUPPORTED_ERR;
-  }
-
-  rv = ConvertStream(aFileData, aDataLen, charset.get(), aResult);
-
-  return NS_OK;
+  nsDependentCSubstring data(aFileData, aDataLen);
+  return nsContentUtils::ConvertStringFromEncoding(encoding, data, aResult);
 }
 
 nsresult
 nsDOMFileReader::GetAsDataURL(nsIDOMBlob *aFile,
                               const char *aFileData,
                               uint32_t aDataLen,
                               nsAString& aResult)
 {
@@ -522,38 +545,13 @@ nsDOMFileReader::GetAsDataURL(nsIDOMBlob
   rv = Base64Encode(Substring(aFileData, aDataLen), encodedData);
   NS_ENSURE_SUCCESS(rv, rv);
 
   AppendASCIItoUTF16(encodedData, aResult);
 
   return NS_OK;
 }
 
-nsresult
-nsDOMFileReader::ConvertStream(const char *aFileData,
-                               uint32_t aDataLen,
-                               const char *aCharset,
-                               nsAString &aResult)
-{
-  nsresult rv;
-
-  nsCOMPtr<nsIUnicodeDecoder> unicodeDecoder =
-    EncodingUtils::DecoderForEncoding(aCharset);
-
-  int32_t destLength;
-  rv = unicodeDecoder->GetMaxLength(aFileData, aDataLen, &destLength);
-  NS_ENSURE_SUCCESS(rv, rv);
-
-  if (!aResult.SetLength(destLength, fallible_t()))
-    return NS_ERROR_OUT_OF_MEMORY;
-
-  int32_t srcLength = aDataLen;
-  rv = unicodeDecoder->Convert(aFileData, &srcLength, aResult.BeginWriting(), &destLength);
-  aResult.SetLength(destLength); //Trim down to the correct size
-
-  return rv;
-}
-
 /* virtual */ JSObject*
 nsDOMFileReader::WrapObject(JSContext* aCx, JS::Handle<JSObject*> aScope)
 {
   return FileReaderBinding::Wrap(aCx, aScope, this);
 }
--- a/content/base/src/nsDOMFileReader.h
+++ b/content/base/src/nsDOMFileReader.h
@@ -120,20 +120,19 @@ protected:
     FILE_AS_BINARY,
     FILE_AS_TEXT,
     FILE_AS_DATAURL
   };
 
   void ReadFileContent(JSContext* aCx, nsIDOMBlob* aBlob,
                        const nsAString &aCharset, eDataFormat aDataFormat,
                        ErrorResult& aRv);
-  nsresult GetAsText(const nsACString &aCharset,
+  nsresult GetAsText(nsIDOMBlob *aFile, const nsACString &aCharset,
                      const char *aFileData, uint32_t aDataLen, nsAString &aResult);
   nsresult GetAsDataURL(nsIDOMBlob *aFile, const char *aFileData, uint32_t aDataLen, nsAString &aResult); 
-  nsresult ConvertStream(const char *aFileData, uint32_t aDataLen, const char *aCharset, nsAString &aResult); 
 
   void FreeFileData() {
     moz_free(mFileData);
     mFileData = nullptr;
     mDataLen = 0;
   }
 
   char *mFileData;
--- a/content/base/src/nsDocument.cpp
+++ b/content/base/src/nsDocument.cpp
@@ -8842,17 +8842,19 @@ nsDocument::ScrollToRef()
     }
 
     // If UTF-8 URI failed then try to assume the string as a
     // document's charset.
 
     if (NS_FAILED(rv)) {
       const nsACString &docCharset = GetDocumentCharacterSet();
 
-      rv = nsContentUtils::ConvertStringFromCharset(docCharset, unescapedRef, ref);
+      rv = nsContentUtils::ConvertStringFromEncoding(docCharset,
+                                                     unescapedRef,
+                                                     ref);
 
       if (NS_SUCCEEDED(rv) && !ref.IsEmpty()) {
         rv = shell->GoToAnchor(ref, mChangeScrollPosWhenScrollingToRef);
       }
     }
     if (NS_SUCCEEDED(rv)) {
       mScrolledToRefAlready = true;
     }
--- a/content/base/src/nsReferencedElement.cpp
+++ b/content/base/src/nsReferencedElement.cpp
@@ -29,18 +29,23 @@ nsReferencedElement::Reset(nsIContent* a
   aURI->GetRef(refPart);
   // Unescape %-escapes in the reference. The result will be in the
   // origin charset of the URL, hopefully...
   NS_UnescapeURL(refPart);
 
   nsAutoCString charset;
   aURI->GetOriginCharset(charset);
   nsAutoString ref;
-  nsresult rv = nsContentUtils::ConvertStringFromCharset(charset, refPart, ref);
+  nsresult rv = nsContentUtils::ConvertStringFromEncoding(charset,
+                                                          refPart,
+                                                          ref);
   if (NS_FAILED(rv)) {
+    // XXX Eww. If fallible malloc failed, using a conversion method that
+    // assumes UTF-8 and doesn't handle UTF-8 errors.
+    // https://bugzilla.mozilla.org/show_bug.cgi?id=951082
     CopyUTF8toUTF16(refPart, ref);
   }
   if (ref.IsEmpty())
     return;
 
   // Get the current document
   nsIDocument *doc = aFromContent->GetCurrentDoc();
   if (!doc)
--- a/content/html/document/src/nsHTMLDocument.cpp
+++ b/content/html/document/src/nsHTMLDocument.cpp
@@ -1197,18 +1197,18 @@ nsHTMLDocument::GetCookie(nsAString& aCo
 
       return;
     }
 
     nsXPIDLCString cookie;
     service->GetCookieString(codebaseURI, mChannel, getter_Copies(cookie));
     // CopyUTF8toUTF16 doesn't handle error
     // because it assumes that the input is valid.
-    nsContentUtils::ConvertStringFromCharset(NS_LITERAL_CSTRING("utf-8"),
-                                             cookie, aCookie);
+    nsContentUtils::ConvertStringFromEncoding(NS_LITERAL_CSTRING("UTF-8"),
+                                              cookie, aCookie);
   }
 }
 
 NS_IMETHODIMP
 nsHTMLDocument::SetCookie(const nsAString& aCookie)
 {
   ErrorResult rv;
   SetCookie(aCookie, rv);
--- a/dom/encoding/TextDecoder.cpp
+++ b/dom/encoding/TextDecoder.cpp
@@ -7,30 +7,37 @@
 #include "nsContentUtils.h"
 
 namespace mozilla {
 namespace dom {
 
 static const PRUnichar kReplacementChar = static_cast<PRUnichar>(0xFFFD);
 
 void
-TextDecoder::Init(const nsAString& aEncoding, const bool aFatal,
+TextDecoder::Init(const nsAString& aLabel, const bool aFatal,
                   ErrorResult& aRv)
 {
-  nsAutoString label(aEncoding);
+  nsAutoString label(aLabel);
   EncodingUtils::TrimSpaceCharacters(label);
 
+  nsAutoCString encoding;
   // Let encoding be the result of getting an encoding from label.
   // If encoding is failure or replacement, throw a TypeError.
-  if (!EncodingUtils::FindEncodingForLabel(label, mEncoding) ||
-      mEncoding.EqualsLiteral("replacement")) {
+  if (!EncodingUtils::FindEncodingForLabel(label, encoding) ||
+      encoding.EqualsLiteral("replacement")) {
     aRv.ThrowTypeError(MSG_ENCODING_NOT_SUPPORTED, &label);
     return;
   }
+  InitWithEncoding(encoding, aFatal);
+}
 
+void
+TextDecoder::InitWithEncoding(const nsACString& aEncoding, const bool aFatal)
+{
+  mEncoding = aEncoding;
   // If the constructor is called with an options argument,
   // and the fatal property of the dictionary is set,
   // set the internal fatal flag of the decoder object.
   mFatal = aFatal;
 
   // Create a decoder object for mEncoding.
   mDecoder = EncodingUtils::DecoderForEncoding(mEncoding);
 
--- a/dom/encoding/TextDecoder.h
+++ b/dom/encoding/TextDecoder.h
@@ -54,26 +54,34 @@ public:
 
   nsISupports*
   GetParentObject()
   {
     return nullptr;
   }
 
   /**
-   * Validates provided encoding and throws an exception if invalid encoding.
-   * If no encoding is provided then mEncoding is default initialised to "utf-8".
+   * Validates provided label and throws an exception if invalid label.
    *
-   * @param aEncoding    Optional encoding (case insensitive) provided.
-   *                     Default value is "utf-8" if no encoding is provided.
-   * @param aFatal       aFatal, indicates whether to throw an 'EncodingError'
-   *                     exception or not.
+   * @param aLabel       The encoding label (case insensitive) provided.
+   * @param aFatal       indicates whether to throw an 'EncodingError'
+   *                     exception or not when decoding.
    * @return aRv         EncodingError exception else null.
    */
-  void Init(const nsAString& aEncoding, const bool aFatal, ErrorResult& aRv);
+  void Init(const nsAString& aLabel, const bool aFatal, ErrorResult& aRv);
+
+  /**
+   * Performs initialization with a Gecko-canonical encoding name (as opposed
+   * to a label.)
+   *
+   * @param aEncoding    A Gecko-canonical encoding name
+   * @param aFatal       indicates whether to throw an 'EncodingError'
+   *                     exception or not when decoding.
+   */
+  void InitWithEncoding(const nsACString& aEncoding, const bool aFatal);
 
   /**
    * Return the encoding name.
    *
    * @param aEncoding, current encoding.
    */
   void GetEncoding(nsAString& aEncoding);
 
--- a/dom/file/ArchiveReader.cpp
+++ b/dom/file/ArchiveReader.cpp
@@ -10,16 +10,17 @@
 #include "ArchiveZipEvent.h"
 
 #include "nsIURI.h"
 #include "nsNetUtil.h"
 
 #include "mozilla/dom/ArchiveReaderBinding.h"
 #include "mozilla/dom/BindingDeclarations.h"
 #include "mozilla/Preferences.h"
+#include "mozilla/dom/EncodingUtils.h"
 
 using namespace mozilla;
 using namespace mozilla::dom;
 USING_FILE_NAMESPACE
 
 /* static */ already_AddRefed<ArchiveReader>
 ArchiveReader::Constructor(const GlobalObject& aGlobal,
                            nsIDOMBlob* aBlob,
@@ -30,23 +31,30 @@ ArchiveReader::Constructor(const GlobalO
   MOZ_ASSERT(PrefEnabled());
 
   nsCOMPtr<nsPIDOMWindow> window = do_QueryInterface(aGlobal.GetAsSupports());
   if (!window) {
     aError.Throw(NS_ERROR_UNEXPECTED);
     return nullptr;
   }
 
+  nsAutoCString encoding;
+  if (!EncodingUtils::FindEncodingForLabel(aOptions.mEncoding, encoding) ||
+      encoding.EqualsLiteral("replacement")) {
+    aError.ThrowTypeError(MSG_ENCODING_NOT_SUPPORTED, &aOptions.mEncoding);
+    return nullptr;
+  }
+
   nsRefPtr<ArchiveReader> reader =
-    new ArchiveReader(aBlob, window, aOptions.mEncoding);
+    new ArchiveReader(aBlob, window, encoding);
   return reader.forget();
 }
 
 ArchiveReader::ArchiveReader(nsIDOMBlob* aBlob, nsPIDOMWindow* aWindow,
-                             const nsString& aEncoding)
+                             const nsACString& aEncoding)
   : mBlob(aBlob)
   , mWindow(aWindow)
   , mStatus(NOT_STARTED)
   , mEncoding(aEncoding)
 {
   MOZ_ASSERT(aBlob);
   MOZ_ASSERT(aWindow);
 
--- a/dom/file/ArchiveReader.h
+++ b/dom/file/ArchiveReader.h
@@ -37,17 +37,17 @@ public:
   NS_DECL_CYCLE_COLLECTING_ISUPPORTS
   NS_DECL_CYCLE_COLLECTION_SCRIPT_HOLDER_CLASS(ArchiveReader)
 
   static already_AddRefed<ArchiveReader>
   Constructor(const GlobalObject& aGlobal, nsIDOMBlob* aBlob,
               const ArchiveReaderOptions& aOptions, ErrorResult& aError);
 
   ArchiveReader(nsIDOMBlob* aBlob, nsPIDOMWindow* aWindow,
-                const nsString& aEncoding);
+                const nsACString& aEncoding);
 
   nsIDOMWindow* GetParentObject() const
   {
     return mWindow;
   }
   virtual JSObject* WrapObject(JSContext* aCx,
                                JS::Handle<JSObject*> aScope) MOZ_OVERRIDE;
 
@@ -102,14 +102,14 @@ protected:
   nsTArray<nsRefPtr<ArchiveRequest> > mRequests;
 
   // Everything related to the blobs and the status:
   struct {
     nsTArray<nsCOMPtr<nsIDOMFile> > fileList;
     nsresult status;
   } mData;
 
-  nsString mEncoding;
+  nsCString mEncoding;
 };
 
 END_FILE_NAMESPACE
 
 #endif // mozilla_dom_file_domarchivereader_h__
--- a/dom/file/ArchiveZipEvent.cpp
+++ b/dom/file/ArchiveZipEvent.cpp
@@ -17,17 +17,17 @@ using namespace mozilla::dom;
 USING_FILE_NAMESPACE
 
 #ifndef PATH_MAX
 #  define PATH_MAX 65536 // The filename length is stored in 2 bytes
 #endif
 
 ArchiveZipItem::ArchiveZipItem(const char* aFilename,
                                const ZipCentral& aCentralStruct,
-                               const nsAString& aEncoding)
+                               const nsACString& aEncoding)
 : mFilename(aFilename),
   mCentralStruct(aCentralStruct),
   mEncoding(aEncoding)
 {
   MOZ_COUNT_CTOR(ArchiveZipItem);
 }
 
 ArchiveZipItem::~ArchiveZipItem()
@@ -38,18 +38,18 @@ ArchiveZipItem::~ArchiveZipItem()
 nsresult
 ArchiveZipItem::ConvertFilename()
 {
   if (mEncoding.IsEmpty()) {
     return NS_ERROR_FAILURE;
   }
 
   nsString filenameU;
-  nsresult rv = nsContentUtils::ConvertStringFromCharset(
-                  NS_ConvertUTF16toUTF8(mEncoding),
+  nsresult rv = nsContentUtils::ConvertStringFromEncoding(
+                  mEncoding,
                   mFilename, filenameU);
   NS_ENSURE_SUCCESS(rv, rv);
 
   if (filenameU.IsEmpty()) {
     return NS_ERROR_FAILURE;
   }
 
   mFilenameU = filenameU;
@@ -106,17 +106,17 @@ uint16_t
 ArchiveZipItem::StrToInt16(const uint8_t* aStr)
 {
   return (uint16_t) ((aStr [0]) | (aStr [1] << 8));
 }
 
 // ArchiveReaderZipEvent
 
 ArchiveReaderZipEvent::ArchiveReaderZipEvent(ArchiveReader* aArchiveReader,
-                                             const nsAString& aEncoding)
+                                             const nsACString& aEncoding)
 : ArchiveReaderEvent(aArchiveReader),
   mEncoding(aEncoding)
 {
 }
 
 // NOTE: this runs in a different thread!!
 nsresult
 ArchiveReaderZipEvent::Exec()
--- a/dom/file/ArchiveZipEvent.h
+++ b/dom/file/ArchiveZipEvent.h
@@ -20,17 +20,17 @@ BEGIN_FILE_NAMESPACE
 /**
  * ArchiveZipItem - ArchiveItem for ArchiveReaderZipEvent
  */
 class ArchiveZipItem : public ArchiveItem
 {
 public:
   ArchiveZipItem(const char* aFilename,
                  const ZipCentral& aCentralStruct,
-                 const nsAString& aEncoding);
+                 const nsACString& aEncoding);
   virtual ~ArchiveZipItem();
 
   nsresult GetFilename(nsString& aFilename) MOZ_OVERRIDE;
 
   // From zipItem to DOMFile:
   virtual nsIDOMFile* File(ArchiveReader* aArchiveReader) MOZ_OVERRIDE;
 
 public: // for the event
@@ -41,30 +41,30 @@ private:
   nsresult ConvertFilename();
 
 private: // data
   nsCString mFilename;
 
   nsString mFilenameU;
   ZipCentral mCentralStruct;
 
-  nsString mEncoding;
+  nsCString mEncoding;
 };
 
 /**
  * ArchiveReaderEvent implements the ArchiveReaderEvent for the ZIP format
  */
 class ArchiveReaderZipEvent : public ArchiveReaderEvent
 {
 public:
   ArchiveReaderZipEvent(ArchiveReader* aArchiveReader,
-                        const nsAString& aEncoding);
+                        const nsACString& aEncoding);
 
   nsresult Exec() MOZ_OVERRIDE;
 
 private:
-  nsString mEncoding;
+  nsCString mEncoding;
 };
 
 END_FILE_NAMESPACE
 
 #endif // mozilla_dom_file_domarchivezipevent_h__
 
--- a/dom/file/LockedFile.cpp
+++ b/dom/file/LockedFile.cpp
@@ -1035,35 +1035,36 @@ ReadHelper::GetSuccessResult(JSContext* 
 }
 
 nsresult
 ReadTextHelper::GetSuccessResult(JSContext* aCx,
                                  JS::Value* aVal)
 {
   nsresult rv;
 
-  nsCString charsetGuess;
-  if (!mEncoding.IsEmpty()) {
-    CopyUTF16toUTF8(mEncoding, charsetGuess);
-  }
-  else {
-    const nsCString& data = mStream->Data();
-    uint32_t dataLen = data.Length();
-    rv = nsContentUtils::GuessCharset(data.get(), dataLen, charsetGuess);
-    NS_ENSURE_SUCCESS(rv, rv);
-  }
-
-  nsCString charset;
-  if (!EncodingUtils::FindEncodingForLabel(charsetGuess, charset)) {
-    return NS_ERROR_DOM_ENCODING_NOT_SUPPORTED_ERR;
+  nsAutoCString encoding;
+  const nsCString& data = mStream->Data();
+  // The BOM sniffing is baked into the "decode" part of the Encoding
+  // Standard, which the File API references.
+  if (!nsContentUtils::CheckForBOM(
+        reinterpret_cast<const unsigned char *>(data.get()),
+        data.Length(),
+        encoding)) {
+    // BOM sniffing failed. Try the API argument.
+    if (!EncodingUtils::FindEncodingForLabel(mEncoding, encoding)) {
+      // API argument failed. Since we are dealing with a file system file,
+      // we don't have a meaningful type attribute for the blob available,
+      // so proceeding to the next step, which is defaulting to UTF-8.
+      encoding.AssignLiteral("UTF-8");
+    }
   }
 
   nsString tmpString;
-  rv = nsContentUtils::ConvertStringFromCharset(charset, mStream->Data(),
-                                                tmpString);
+  rv = nsContentUtils::ConvertStringFromEncoding(encoding, data,
+                                                 tmpString);
   NS_ENSURE_SUCCESS(rv, rv);
 
   JS::Rooted<JS::Value> rval(aCx);
   if (!xpc::StringToJsval(aCx, tmpString, &rval)) {
     NS_WARNING("Failed to convert string!");
     return NS_ERROR_FAILURE;
   }
 
--- a/dom/file/test/test_archivereader_nonUnicode.html
+++ b/dom/file/test/test_archivereader_nonUnicode.html
@@ -54,27 +54,21 @@
       test2();
     }
   }
 
   function test2()
   {
     var binaryFile = createNonUnicodeData();
 
-    var r = new ArchiveReader(binaryFile, { encoding: "random stuff" });
-    isnot(r, null, "ArchiveReader cannot be null");
-
-    // GetFilename
-    var handle = r.getFilenames();
-    isnot(handle, null, "ArchiveReader.getFilenames() cannot be null");
-    handle.onsuccess = function() {
-      ok(true, "ArchiveReader.getFilenames() should return a 'success'");
-      is(this.result instanceof Array, true, "ArchiveReader.getFilenames() should return an array");
-      is(this.result.length, 0, "ArchiveReader.getFilenames(): the array contains 0 item");
-      ok(this.reader, r, "ArchiveRequest.reader should be == ArchiveReader");
+    try {
+      new ArchiveReader(binaryFile, { encoding: "random stuff" });
+      ok(false, "Should have thrown for bogus encoding label.");
+    } catch (e) {
+      ok(e instanceof TypeError, "Expected a TypeError");
       finishTest();
     }
   }
 
   function testSteps()
   {
     test1();
     yield undefined;
--- a/dom/workers/FileReaderSync.cpp
+++ b/dom/workers/FileReaderSync.cpp
@@ -4,48 +4,39 @@
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #include "FileReaderSync.h"
 
 #include "jsfriendapi.h"
 #include "mozilla/Base64.h"
 #include "mozilla/dom/EncodingUtils.h"
+#include "nsContentUtils.h"
 #include "mozilla/dom/FileReaderSyncBinding.h"
 #include "nsCExternalHandlerService.h"
 #include "nsComponentManagerUtils.h"
 #include "nsCOMPtr.h"
 #include "nsDOMClassInfoID.h"
 #include "nsError.h"
 #include "nsIDOMFile.h"
-#include "nsICharsetDetector.h"
 #include "nsIConverterInputStream.h"
 #include "nsIInputStream.h"
-#include "nsIPlatformCharset.h"
 #include "nsISeekableStream.h"
 #include "nsISupportsImpl.h"
-#include "nsISupportsImpl.h"
 #include "nsNetUtil.h"
 #include "nsServiceManagerUtils.h"
 
 #include "File.h"
 #include "RuntimeService.h"
 
 USING_WORKERS_NAMESPACE
 using namespace mozilla;
 using mozilla::dom::Optional;
 using mozilla::dom::GlobalObject;
 
-NS_IMPL_ADDREF(FileReaderSync)
-NS_IMPL_RELEASE(FileReaderSync)
-
-NS_INTERFACE_MAP_BEGIN(FileReaderSync)
-  NS_INTERFACE_MAP_ENTRY(nsICharsetDetectionObserver)
-NS_INTERFACE_MAP_END
-
 // static
 already_AddRefed<FileReaderSync>
 FileReaderSync::Constructor(const GlobalObject& aGlobal, ErrorResult& aRv)
 {
   nsRefPtr<FileReaderSync> frs = new FileReaderSync();
 
   return frs.forget();
 }
@@ -154,47 +145,67 @@ FileReaderSync::ReadAsText(JS::Handle<JS
 
   nsCOMPtr<nsIInputStream> stream;
   nsresult rv = blob->GetInternalStream(getter_AddRefs(stream));
   if (NS_FAILED(rv)) {
     aRv.Throw(rv);
     return;
   }
 
-  nsCString charsetGuess;
-  if (!aEncoding.WasPassed() || aEncoding.Value().IsEmpty()) {
-    rv = GuessCharset(stream, charsetGuess);
-    if (NS_FAILED(rv)) {
-      aRv.Throw(rv);
-      return;
-    }
-
-    nsCOMPtr<nsISeekableStream> seekable = do_QueryInterface(stream);
-    if (!seekable) {
-      aRv.Throw(NS_ERROR_FAILURE);
-      return;
-    }
-
-    // Seek to 0 because guessing the charset advances the stream.
-    rv = seekable->Seek(nsISeekableStream::NS_SEEK_SET, 0);
-    if (NS_FAILED(rv)) {
-      aRv.Throw(rv);
-      return;
-    }
-  } else {
-    CopyUTF16toUTF8(aEncoding.Value(), charsetGuess);
-  }
-
-  nsCString charset;
-  if (!EncodingUtils::FindEncodingForLabel(charsetGuess, charset)) {
-    aRv.Throw(NS_ERROR_DOM_ENCODING_NOT_SUPPORTED_ERR);
+  nsAutoCString encoding;
+  unsigned char sniffBuf[3] = { 0, 0, 0 };
+  uint32_t numRead;
+  rv = stream->Read(reinterpret_cast<char*>(sniffBuf),
+                    sizeof(sniffBuf), &numRead);
+  if (NS_FAILED(rv)) {
+    aRv.Throw(rv);
     return;
   }
 
-  rv = ConvertStream(stream, charset.get(), aResult);
+  // The BOM sniffing is baked into the "decode" part of the Encoding
+  // Standard, which the File API references.
+  if (!nsContentUtils::CheckForBOM(sniffBuf, numRead, encoding)) {
+    // BOM sniffing failed. Try the API argument.
+    if (!aEncoding.WasPassed() ||
+        !EncodingUtils::FindEncodingForLabel(aEncoding.Value(),
+                                             encoding)) {
+      // API argument failed. Try the type property of the blob.
+      nsAutoString type16;
+      blob->GetType(type16);
+      NS_ConvertUTF16toUTF8 type(type16);
+      nsAutoCString specifiedCharset;
+      bool haveCharset;
+      int32_t charsetStart, charsetEnd;
+      NS_ExtractCharsetFromContentType(type,
+                                       specifiedCharset,
+                                       &haveCharset,
+                                       &charsetStart,
+                                       &charsetEnd);
+      if (!EncodingUtils::FindEncodingForLabel(specifiedCharset, encoding)) {
+        // Type property failed. Use UTF-8.
+        encoding.AssignLiteral("UTF-8");
+      }
+    }
+  }
+
+  nsCOMPtr<nsISeekableStream> seekable = do_QueryInterface(stream);
+  if (!seekable) {
+    aRv.Throw(NS_ERROR_FAILURE);
+    return;
+  }
+
+  // Seek to 0 because to undo the BOM sniffing advance. UTF-8 and UTF-16
+  // decoders will swallow the BOM.
+  rv = seekable->Seek(nsISeekableStream::NS_SEEK_SET, 0);
+  if (NS_FAILED(rv)) {
+    aRv.Throw(rv);
+    return;
+  }
+
+  rv = ConvertStream(stream, encoding.get(), aResult);
   if (NS_FAILED(rv)) {
     aRv.Throw(rv);
     return;
   }
 }
 
 void
 FileReaderSync::ReadAsDataURL(JS::Handle<JSObject*> aBlob, nsAString& aResult,
@@ -278,96 +289,8 @@ FileReaderSync::ConvertStream(nsIInputSt
     if (aResult.Length() - oldLength != result.Length()) {
       return NS_ERROR_OUT_OF_MEMORY;
     }
   }
 
   return rv;
 }
 
-nsresult
-FileReaderSync::GuessCharset(nsIInputStream *aStream, nsACString &aCharset)
-{
-  // First try the universal charset detector
-  nsCOMPtr<nsICharsetDetector> detector
-    = do_CreateInstance(NS_CHARSET_DETECTOR_CONTRACTID_BASE
-                        "universal_charset_detector");
-  if (!detector) {
-    RuntimeService* runtime = RuntimeService::GetService();
-    NS_ASSERTION(runtime, "This should never be null!");
-
-    // No universal charset detector, try the default charset detector
-    const nsACString& detectorName = runtime->GetDetectorName();
-
-    if (!detectorName.IsEmpty()) {
-      nsAutoCString detectorContractID;
-      detectorContractID.AssignLiteral(NS_CHARSET_DETECTOR_CONTRACTID_BASE);
-      detectorContractID += detectorName;
-      detector = do_CreateInstance(detectorContractID.get());
-    }
-  }
-
-  nsresult rv;
-  if (detector) {
-    detector->Init(this);
-
-    bool done;
-    uint32_t numRead;
-    do {
-      char readBuf[4096];
-      rv = aStream->Read(readBuf, sizeof(readBuf), &numRead);
-      NS_ENSURE_SUCCESS(rv, rv);
-      if (numRead <= 0) {
-        break;
-      }
-      rv = detector->DoIt(readBuf, numRead, &done);
-      NS_ENSURE_SUCCESS(rv, rv);
-    } while (!done);
-
-    rv = detector->Done();
-    NS_ENSURE_SUCCESS(rv, rv);
-  } else {
-    // no charset detector available, check the BOM
-    unsigned char sniffBuf[4];
-    uint32_t numRead;
-    rv = aStream->Read(reinterpret_cast<char*>(sniffBuf),
-                       sizeof(sniffBuf), &numRead);
-    NS_ENSURE_SUCCESS(rv, rv);
-
-    if (numRead >= 2 &&
-        sniffBuf[0] == 0xfe &&
-        sniffBuf[1] == 0xff) {
-      mCharset = "UTF-16BE";
-    } else if (numRead >= 2 &&
-               sniffBuf[0] == 0xff &&
-               sniffBuf[1] == 0xfe) {
-      mCharset = "UTF-16LE";
-    } else if (numRead >= 3 &&
-               sniffBuf[0] == 0xef &&
-               sniffBuf[1] == 0xbb &&
-               sniffBuf[2] == 0xbf) {
-      mCharset = "UTF-8";
-    }
-  }
-
-  if (mCharset.IsEmpty()) {
-    RuntimeService* runtime = RuntimeService::GetService();
-    mCharset = runtime->GetSystemCharset();
-  }
-
-  if (mCharset.IsEmpty()) {
-    // no sniffed or default charset, try UTF-8
-    mCharset.AssignLiteral("UTF-8");
-  }
-
-  aCharset = mCharset;
-  mCharset.Truncate();
-
-  return NS_OK;
-}
-
-NS_IMETHODIMP
-FileReaderSync::Notify(const char* aCharset, nsDetectionConfident aConf)
-{
-  mCharset.Assign(aCharset);
-
-  return NS_OK;
-}
--- a/dom/workers/FileReaderSync.h
+++ b/dom/workers/FileReaderSync.h
@@ -4,57 +4,50 @@
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #ifndef mozilla_dom_workers_filereadersync_h__
 #define mozilla_dom_workers_filereadersync_h__
 
 #include "Workers.h"
 
-#include "nsICharsetDetectionObserver.h"
-
 class nsIInputStream;
 class nsIDOMBlob;
 
 namespace mozilla {
 class ErrorResult;
 
 namespace dom {
 class GlobalObject;
 template<typename> class Optional;
 }
 }
 
 BEGIN_WORKERS_NAMESPACE
 
-class FileReaderSync MOZ_FINAL : public nsICharsetDetectionObserver
+class FileReaderSync MOZ_FINAL
 {
-  nsCString mCharset;
+  NS_INLINE_DECL_REFCOUNTING(FileReaderSync)
+
   nsresult ConvertStream(nsIInputStream *aStream, const char *aCharset,
                          nsAString &aResult);
-  nsresult GuessCharset(nsIInputStream *aStream, nsACString &aCharset);
 
 public:
   static already_AddRefed<FileReaderSync>
   Constructor(const GlobalObject& aGlobal, ErrorResult& aRv);
 
   JSObject* WrapObject(JSContext* aCx, JS::Handle<JSObject*> aScope);
 
-  NS_DECL_ISUPPORTS
-
   JSObject* ReadAsArrayBuffer(JSContext* aCx, JS::Handle<JSObject*> aScopeObj,
                               JS::Handle<JSObject*> aBlob,
                               ErrorResult& aRv);
   void ReadAsBinaryString(JS::Handle<JSObject*> aBlob, nsAString& aResult,
                           ErrorResult& aRv);
   void ReadAsText(JS::Handle<JSObject*> aBlob,
                   const Optional<nsAString>& aEncoding,
                   nsAString& aResult, ErrorResult& aRv);
   void ReadAsDataURL(JS::Handle<JSObject*> aBlob, nsAString& aResult,
                      ErrorResult& aRv);
-
-  // From nsICharsetDetectionObserver
-  NS_IMETHOD Notify(const char *aCharset, nsDetectionConfident aConf) MOZ_OVERRIDE;
 };
 
 END_WORKERS_NAMESPACE
 
 #endif // mozilla_dom_workers_filereadersync_h__