Bug 1071816 - Support loading unlabeled/BOMless UTF-8 text/html and text/plain files from file: URLs. r=emk.
authorHenri Sivonen <hsivonen@hsivonen.fi>
Tue, 04 Dec 2018 10:02:04 +0200
changeset 509907 5a6f372f62c150b9263a1a59377a352284b35114
parent 509906 8917a7a6d54fee4a13cc9229380e148615b22779
child 509908 ceee7e82063807987805c514c33b63ae41e8a16b
push id1953
push userffxbld-merge
push dateMon, 11 Mar 2019 12:10:20 +0000
treeherdermozilla-release@9c35dcbaa899 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersemk
bugs1071816
milestone66.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1071816 - Support loading unlabeled/BOMless UTF-8 text/html and text/plain files from file: URLs. r=emk.
dom/encoding/FallbackEncoding.cpp
dom/encoding/FallbackEncoding.h
dom/html/nsHTMLDocument.cpp
dom/html/nsHTMLDocument.h
extensions/universalchardet/tests/bug1071816-1_text.html
extensions/universalchardet/tests/bug1071816-2_text.html
extensions/universalchardet/tests/bug1071816-3_text.html
extensions/universalchardet/tests/bug1071816-4_text.html
extensions/universalchardet/tests/bug547487_text.html
extensions/universalchardet/tests/chrome.ini
extensions/universalchardet/tests/test_bug1071816-1.html
extensions/universalchardet/tests/test_bug1071816-2.html
extensions/universalchardet/tests/test_bug1071816-3.html
extensions/universalchardet/tests/test_bug1071816-4.html
extensions/universalchardet/tests/test_bug426271-utf-8.html
extensions/universalchardet/tests/test_bug547487.html
extensions/universalchardet/tests/test_bug638318.html
modules/libpref/init/all.js
parser/html/nsHtml5StreamParser.cpp
parser/html/nsHtml5StreamParser.h
parser/nsCharsetSource.h
--- a/dom/encoding/FallbackEncoding.cpp
+++ b/dom/encoding/FallbackEncoding.cpp
@@ -50,17 +50,16 @@ static const EncodingProp domainsFallbac
 static constexpr nsUConvProp nonParticipatingDomains[] = {
 #include "nonparticipatingdomains.properties.h"
 };
 
 NS_IMPL_ISUPPORTS(FallbackEncoding, nsIObserver)
 
 StaticRefPtr<FallbackEncoding> FallbackEncoding::sInstance;
 bool FallbackEncoding::sGuessFallbackFromTopLevelDomain = true;
-bool FallbackEncoding::sFallbackToUTF8ForFile = false;
 
 FallbackEncoding::FallbackEncoding() : mFallback(nullptr) {
   MOZ_ASSERT(!FallbackEncoding::sInstance, "Singleton already exists.");
 }
 
 NotNull<const Encoding*> FallbackEncoding::Get() {
   if (mFallback) {
     return WrapNotNull(mFallback);
@@ -136,18 +135,16 @@ FallbackEncoding::Observe(nsISupports* a
 void FallbackEncoding::Initialize() {
   MOZ_ASSERT(!FallbackEncoding::sInstance,
              "Initializing pre-existing fallback cache.");
   FallbackEncoding::sInstance = new FallbackEncoding;
   Preferences::RegisterCallback(FallbackEncoding::PrefChanged,
                                 "intl.charset.fallback.override");
   Preferences::AddBoolVarCache(&sGuessFallbackFromTopLevelDomain,
                                "intl.charset.fallback.tld");
-  Preferences::AddBoolVarCache(&sFallbackToUTF8ForFile,
-                               "intl.charset.fallback.utf8_for_file");
 
   nsCOMPtr<nsIObserverService> obs = mozilla::services::GetObserverService();
   if (obs) {
     obs->AddObserver(sInstance, "intl:requested-locales-changed", true);
   }
 }
 
 void FallbackEncoding::Shutdown() {
--- a/dom/encoding/FallbackEncoding.h
+++ b/dom/encoding/FallbackEncoding.h
@@ -23,21 +23,16 @@ class FallbackEncoding : public nsIObser
   NS_DECL_NSIOBSERVER
 
   /**
    * Whether FromTopLevelDomain() should be used.
    */
   static bool sGuessFallbackFromTopLevelDomain;
 
   /**
-   * Whether UTF-8 should be used for file URLs.
-   */
-  static bool sFallbackToUTF8ForFile;
-
-  /**
    * Gets the locale-dependent fallback encoding for legacy HTML and plain
    * text content.
    *
    * @param aFallback the outparam for the fallback encoding
    */
   static NotNull<const Encoding*> FromLocale();
 
   /**
--- a/dom/html/nsHTMLDocument.cpp
+++ b/dom/html/nsHTMLDocument.cpp
@@ -433,22 +433,16 @@ void nsHTMLDocument::TryTLD(int32_t& aCh
   aEncoding = FallbackEncoding::FromTopLevelDomain(tld);
 }
 
 void nsHTMLDocument::TryFallback(int32_t& aCharsetSource,
                                  NotNull<const Encoding*>& aEncoding) {
   if (kCharsetFromFallback <= aCharsetSource) return;
 
   aCharsetSource = kCharsetFromFallback;
-  bool isFile = false;
-  if (FallbackEncoding::sFallbackToUTF8ForFile && mDocumentURI &&
-      NS_SUCCEEDED(mDocumentURI->SchemeIs("file", &isFile)) && isFile) {
-    aEncoding = UTF_8_ENCODING;
-    return;
-  }
   aEncoding = FallbackEncoding::FromLocale();
 }
 
 void nsHTMLDocument::SetDocumentCharacterSet(
     NotNull<const Encoding*> aEncoding) {
   nsDocument::SetDocumentCharacterSet(aEncoding);
   // Make sure to stash this charset on our channel as needed if it's a wyciwyg
   // channel.
--- a/dom/html/nsHTMLDocument.h
+++ b/dom/html/nsHTMLDocument.h
@@ -277,18 +277,18 @@ class nsHTMLDocument : public nsDocument
                             int32_t& aCharsetSource,
                             NotNull<const Encoding*>& aEncoding);
   static void TryCacheCharset(nsICachingChannel* aCachingChannel,
                               int32_t& aCharsetSource,
                               NotNull<const Encoding*>& aEncoding);
   void TryParentCharset(nsIDocShell* aDocShell, int32_t& charsetSource,
                         NotNull<const Encoding*>& aEncoding);
   void TryTLD(int32_t& aCharsetSource, NotNull<const Encoding*>& aCharset);
-  void TryFallback(int32_t& aCharsetSource,
-                   NotNull<const Encoding*>& aEncoding);
+  static void TryFallback(int32_t& aCharsetSource,
+                          NotNull<const Encoding*>& aEncoding);
 
   // Override so we can munge the charset on our wyciwyg channel as needed.
   virtual void SetDocumentCharacterSet(
       NotNull<const Encoding*> aEncoding) override;
 
   // Tracks if we are currently processing any document.write calls (either
   // implicit or explicit). Note that if a write call writes out something which
   // would block the parser, then mWriteLevel will be incorrect until the parser
new file mode 100644
--- /dev/null
+++ b/extensions/universalchardet/tests/bug1071816-1_text.html
@@ -0,0 +1,9 @@
+<!DOCTYPE html>
+<html>
+<head>
+	<title>UTF-8</title>
+</head>
+<body>

+</body>
+</html>
\ No newline at end of file
new file mode 100644
--- /dev/null
+++ b/extensions/universalchardet/tests/bug1071816-2_text.html
@@ -0,0 +1,9 @@
+<!DOCTYPE html>
+<html>
+<head>
+	<title>windows-1252</title>
+</head>
+<body>
+
+</body>
+</html>
\ No newline at end of file
new file mode 100644
--- /dev/null
+++ b/extensions/universalchardet/tests/bug1071816-3_text.html
@@ -0,0 +1,536 @@
+<!DOCTYPE html>
+<html>
+<head>
+	<title>UTF-8</title>
+<!-- More than 1024 bytes
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+-->
+</head>
+<body>

+</body>
+</html>
\ No newline at end of file
new file mode 100644
--- /dev/null
+++ b/extensions/universalchardet/tests/bug1071816-4_text.html
@@ -0,0 +1,536 @@
+<!DOCTYPE html>
+<html>
+<head>
+	<title>windows-1252</title>
+<!-- More than 1024 bytes
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+a
+-->
+</head>
+<body>
+
+</body>
+</html>
\ No newline at end of file
deleted file mode 100644
--- a/extensions/universalchardet/tests/bug547487_text.html
+++ /dev/null
@@ -1,1 +0,0 @@
-The quick brown fox jumps over the lazy dog.
--- a/extensions/universalchardet/tests/chrome.ini
+++ b/extensions/universalchardet/tests/chrome.ini
@@ -1,17 +1,16 @@
 [DEFAULT]
 support-files =
   CharsetDetectionTests.js
   bug306272_text.html
   bug421271_text.html
   bug426271_text-euc-jp.html
   bug426271_text-utf-8.html
   bug431054_text.html
-  bug547487_text.html
   bug631751be_text.html
   bug631751le_text.html
   bug638318_text.html
   bug811363-1.text
   bug811363-2.text
   bug811363-3.text
   bug811363-4.text
   bug811363-5.text
@@ -19,24 +18,27 @@ support-files =
   bug811363-7.text
   bug811363-8.text
   bug811363-9.text
   bug811363-invalid-1.text
   bug811363-invalid-2.text
   bug811363-invalid-3.text
   bug811363-invalid-4.text
   bug811363-invalid-5.text
+  bug1071816-1_text.html
+  bug1071816-2_text.html
+  bug1071816-3_text.html
+  bug1071816-4_text.html
 
 [test_bug306272.html]
 [test_bug421271.html]
 [test_bug426271-euc-jp.html]
 [test_bug426271-utf-8.html]
 [test_bug431054-japanese.html]
 [test_bug431054.html]
-[test_bug547487.html]
 [test_bug631751be.html]
 [test_bug631751le.html]
 [test_bug638318.html]
 [test_bug811363-1-1.html]
 [test_bug811363-1-2.html]
 [test_bug811363-1-3.html]
 [test_bug811363-1-4.html]
 [test_bug811363-1-5.html]
@@ -44,9 +46,12 @@ support-files =
 [test_bug811363-2-2.html]
 [test_bug811363-2-3.html]
 [test_bug811363-2-4.html]
 [test_bug811363-2-5.html]
 [test_bug811363-2-6.html]
 [test_bug811363-2-7.html]
 [test_bug811363-2-8.html]
 [test_bug811363-2-9.html]
-
+[test_bug1071816-1.html]
+[test_bug1071816-2.html]
+[test_bug1071816-3.html]
+[test_bug1071816-4.html]
new file mode 100644
--- /dev/null
+++ b/extensions/universalchardet/tests/test_bug1071816-1.html
@@ -0,0 +1,31 @@
+<!DOCTYPE HTML>
+<html>
+<!--
+https://bugzilla.mozilla.org/show_bug.cgi?id=1071816
+-->
+<head>
+  <title>Test for Bug 1071816</title>
+  <script type="text/javascript" 
+          src="chrome://mochikit/content/tests/SimpleTest/SimpleTest.js">
+          </script>
+  <script type="text/javascript" src="CharsetDetectionTests.js"></script>
+  <link rel="stylesheet" type="text/css" 
+        href="chrome://mochikit/content/tests/SimpleTest/test.css" />
+</head>
+<body>
+<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=1071816">Mozilla Bug 1071816</a>
+<p id="display"></p>
+<div id="content" style="display: none">  
+</div>
+<iframe id="testframe"></iframe>
+<pre id="test">
+<script class="testbody" type="text/javascript">
+/** Test for Bug 1071816 **/
+/* Note! This test uses the chardet test harness but doesn't test chardet! */
+CharsetDetectionTests("bug1071816-1_text.html",
+		      "UTF-8",
+		      new Array(""));
+</script>
+</pre>
+</body>
+</html>
new file mode 100644
--- /dev/null
+++ b/extensions/universalchardet/tests/test_bug1071816-2.html
@@ -0,0 +1,31 @@
+<!DOCTYPE HTML>
+<html>
+<!--
+https://bugzilla.mozilla.org/show_bug.cgi?id=1071816
+-->
+<head>
+  <title>Test for Bug 1071816</title>
+  <script type="text/javascript" 
+          src="chrome://mochikit/content/tests/SimpleTest/SimpleTest.js">
+          </script>
+  <script type="text/javascript" src="CharsetDetectionTests.js"></script>
+  <link rel="stylesheet" type="text/css" 
+        href="chrome://mochikit/content/tests/SimpleTest/test.css" />
+</head>
+<body>
+<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=1071816">Mozilla Bug 1071816</a>
+<p id="display"></p>
+<div id="content" style="display: none">  
+</div>
+<iframe id="testframe"></iframe>
+<pre id="test">
+<script class="testbody" type="text/javascript">
+/** Test for Bug 1071816 **/
+/* Note! This test uses the chardet test harness but doesn't test chardet! */
+CharsetDetectionTests("bug1071816-2_text.html",
+		      "windows-1252",
+		      new Array(""));
+</script>
+</pre>
+</body>
+</html>
new file mode 100644
--- /dev/null
+++ b/extensions/universalchardet/tests/test_bug1071816-3.html
@@ -0,0 +1,31 @@
+<!DOCTYPE HTML>
+<html>
+<!--
+https://bugzilla.mozilla.org/show_bug.cgi?id=1071816
+-->
+<head>
+  <title>Test for Bug 1071816</title>
+  <script type="text/javascript" 
+          src="chrome://mochikit/content/tests/SimpleTest/SimpleTest.js">
+          </script>
+  <script type="text/javascript" src="CharsetDetectionTests.js"></script>
+  <link rel="stylesheet" type="text/css" 
+        href="chrome://mochikit/content/tests/SimpleTest/test.css" />
+</head>
+<body>
+<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=1071816">Mozilla Bug 1071816</a>
+<p id="display"></p>
+<div id="content" style="display: none">  
+</div>
+<iframe id="testframe"></iframe>
+<pre id="test">
+<script class="testbody" type="text/javascript">
+/** Test for Bug 1071816 **/
+/* Note! This test uses the chardet test harness but doesn't test chardet! */
+CharsetDetectionTests("bug1071816-3_text.html",
+		      "UTF-8",
+		      new Array(""));
+</script>
+</pre>
+</body>
+</html>
new file mode 100644
--- /dev/null
+++ b/extensions/universalchardet/tests/test_bug1071816-4.html
@@ -0,0 +1,31 @@
+<!DOCTYPE HTML>
+<html>
+<!--
+https://bugzilla.mozilla.org/show_bug.cgi?id=1071816
+-->
+<head>
+  <title>Test for Bug 1071816</title>
+  <script type="text/javascript" 
+          src="chrome://mochikit/content/tests/SimpleTest/SimpleTest.js">
+          </script>
+  <script type="text/javascript" src="CharsetDetectionTests.js"></script>
+  <link rel="stylesheet" type="text/css" 
+        href="chrome://mochikit/content/tests/SimpleTest/test.css" />
+</head>
+<body>
+<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=1071816">Mozilla Bug 1071816</a>
+<p id="display"></p>
+<div id="content" style="display: none">  
+</div>
+<iframe id="testframe"></iframe>
+<pre id="test">
+<script class="testbody" type="text/javascript">
+/** Test for Bug 1071816 **/
+/* Note! This test uses the chardet test harness but doesn't test chardet! */
+CharsetDetectionTests("bug1071816-4_text.html",
+		      "windows-1252",
+		      new Array(""));
+</script>
+</pre>
+</body>
+</html>
--- a/extensions/universalchardet/tests/test_bug426271-utf-8.html
+++ b/extensions/universalchardet/tests/test_bug426271-utf-8.html
@@ -15,17 +15,16 @@ https://bugzilla.mozilla.org/show_bug.cg
 <body>
 <a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=426271">Mozilla Bug 426271</a>
 <p id="display"></p>
 <div id="content" style="display: none">  
 </div>
 <iframe id="testframe"></iframe>
 <pre id="test">
 <script class="testbody" type="text/javascript">
-SimpleTest.expectAssertions(1);
 /** Test for Bug 426271 **/
 CharsetDetectionTests("bug426271_text-utf-8.html",
 		      "UTF-8",
 		      new Array("ja_parallel_state_machine"));
 </script>
 </pre>
 </body>
 </html>
deleted file mode 100644
--- a/extensions/universalchardet/tests/test_bug547487.html
+++ /dev/null
@@ -1,36 +0,0 @@
-<!DOCTYPE HTML>
-<html>
-<!--
-https://bugzilla.mozilla.org/show_bug.cgi?id=547487
--->
-<head>
-  <title>Test for Bug 547487</title>
-  <script type="text/javascript" 
-          src="chrome://mochikit/content/tests/SimpleTest/SimpleTest.js">
-          </script>
-  <script type="text/javascript" src="CharsetDetectionTests.js"></script>
-  <link rel="stylesheet" type="text/css" 
-        href="chrome://mochikit/content/tests/SimpleTest/test.css" />
-</head>
-<body>
-<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=547487">Mozilla Bug 547487</a>
-<p id="display"></p>
-<div id="content" style="display: none">  
-</div>
-<iframe id="testframe"></iframe>
-<pre id="test">
-<script class="testbody" type="text/javascript">
-/** Test for Bug 547487 **/
-CharsetDetectionTests("bug547487_text.html",
-                      "default",
-                      new Array("zhtw_parallel_state_machine",
-                                "zhcn_parallel_state_machine",
-                                "ja_parallel_state_machine",
-                                "ko_parallel_state_machine",
-                                "zh_parallel_state_machine",
-                                "cjk_parallel_state_machine",
-                                "universal_charset_detector"));
-</script>
-</pre>
-</body>
-</html>
--- a/extensions/universalchardet/tests/test_bug638318.html
+++ b/extensions/universalchardet/tests/test_bug638318.html
@@ -18,14 +18,14 @@ https://bugzilla.mozilla.org/show_bug.cg
 <div id="content" style="display: none">  
 </div>
 <iframe id="testframe"></iframe>
 <pre id="test">
 <script class="testbody" type="text/javascript">
 /** Test for Bug 638318 **/
 /* Note! This test uses the chardet test harness but doesn't test chardet! */
 CharsetDetectionTests("bug638318_text.html",
-		      "windows-1252",
+		      "UTF-8",
 		      new Array(""));
 </script>
 </pre>
 </body>
 </html>
--- a/modules/libpref/init/all.js
+++ b/modules/libpref/init/all.js
@@ -2388,17 +2388,16 @@ pref("converter.html2txt.header_strategy
 pref("converter.html2txt.always_include_ruby", false);
 
 pref("intl.accept_languages",               "chrome://global/locale/intl.properties");
 pref("intl.menuitems.alwaysappendaccesskeys","chrome://global/locale/intl.properties");
 pref("intl.menuitems.insertseparatorbeforeaccesskeys","chrome://global/locale/intl.properties");
 pref("intl.charset.detector",               "chrome://global/locale/intl.properties");
 pref("intl.charset.fallback.override",      "");
 pref("intl.charset.fallback.tld",           true);
-pref("intl.charset.fallback.utf8_for_file", false);
 pref("intl.ellipsis",                       "chrome://global-platform/locale/intl.properties");
 // this pref allows user to request that all internationalization formatters
 // like date/time formatting, unit formatting, calendars etc. should use
 // OS locale set instead of the app locale set.
 pref("intl.regional_prefs.use_os_locales",  false);
 // fallback charset list for Unicode conversion (converting from Unicode)
 // currently used for mail send only to handle symbol characters (e.g Euro, trademark, smartquotes)
 // for ISO-8859-1
--- a/parser/html/nsHtml5StreamParser.cpp
+++ b/parser/html/nsHtml5StreamParser.cpp
@@ -137,42 +137,43 @@ class nsHtml5LoadFlusher : public Runnab
 nsHtml5StreamParser::nsHtml5StreamParser(nsHtml5TreeOpExecutor* aExecutor,
                                          nsHtml5Parser* aOwner,
                                          eParserMode aMode)
     : mSniffingLength(0),
       mBomState(eBomState::BOM_SNIFFING_NOT_STARTED),
       mCharsetSource(kCharsetUninitialized),
       mEncoding(WINDOWS_1252_ENCODING),
       mReparseForbidden(false),
-      mLastBuffer(nullptr)  // Will be filled when starting
-      ,
+      mLastBuffer(nullptr),  // Will be filled when starting
       mExecutor(aExecutor),
       mTreeBuilder(new nsHtml5TreeBuilder(
           (aMode == VIEW_SOURCE_HTML || aMode == VIEW_SOURCE_XML)
               ? nullptr
               : mExecutor->GetStage(),
           aMode == NORMAL ? mExecutor->GetStage() : nullptr)),
       mTokenizer(new nsHtml5Tokenizer(mTreeBuilder, aMode == VIEW_SOURCE_XML)),
       mTokenizerMutex("nsHtml5StreamParser mTokenizerMutex"),
       mOwner(aOwner),
       mLastWasCR(false),
       mStreamState(eHtml5StreamState::STREAM_NOT_STARTED),
       mSpeculating(false),
       mAtEOF(false),
       mSpeculationMutex("nsHtml5StreamParser mSpeculationMutex"),
       mSpeculationFailureCount(0),
+      mLocalFileBytesBuffered(0),
       mTerminated(false),
       mInterrupted(false),
       mTerminatedMutex("nsHtml5StreamParser mTerminatedMutex"),
       mEventTarget(nsHtml5Module::GetStreamParserThread()->SerialEventTarget()),
       mExecutorFlusher(new nsHtml5ExecutorFlusher(aExecutor)),
       mLoadFlusher(new nsHtml5LoadFlusher(aExecutor)),
       mFeedChardet(false),
       mInitialEncodingWasFromParentFrame(false),
       mHasHadErrors(false),
+      mDecodingLocalFileAsUTF8(false),
       mFlushTimer(NS_NewTimer(mEventTarget)),
       mFlushTimerMutex("nsHtml5StreamParser mFlushTimerMutex"),
       mFlushTimerArmed(false),
       mFlushTimerEverFired(false),
       mMode(aMode) {
   NS_ASSERTION(NS_IsMainThread(), "Wrong thread!");
 #ifdef DEBUG
   mAtomTable.SetPermittedLookupEventTarget(mEventTarget);
@@ -310,30 +311,37 @@ void nsHtml5StreamParser::SetViewSourceT
   }
 }
 
 nsresult
 nsHtml5StreamParser::SetupDecodingAndWriteSniffingBufferAndCurrentSegment(
     Span<const uint8_t> aFromSegment) {
   NS_ASSERTION(IsParserThread(), "Wrong thread!");
   nsresult rv = NS_OK;
-  mUnicodeDecoder = mEncoding->NewDecoderWithBOMRemoval();
+  if (mDecodingLocalFileAsUTF8 && mCharsetSource <= kCharsetFromFileURLGuess) {
+    MOZ_ASSERT(mEncoding != UTF_8_ENCODING);
+    mUnicodeDecoder = UTF_8_ENCODING->NewDecoderWithBOMRemoval();
+  } else {
+    mDecodingLocalFileAsUTF8 = false;
+    mUnicodeDecoder = mEncoding->NewDecoderWithBOMRemoval();
+  }
   if (mSniffingBuffer) {
     rv = WriteStreamBytes(MakeSpan(mSniffingBuffer.get(), mSniffingLength));
     NS_ENSURE_SUCCESS(rv, rv);
     mSniffingBuffer = nullptr;
   }
   mMetaScanner = nullptr;
   return WriteStreamBytes(aFromSegment);
 }
 
 nsresult nsHtml5StreamParser::SetupDecodingFromBom(
     NotNull<const Encoding*> aEncoding) {
   NS_ASSERTION(IsParserThread(), "Wrong thread!");
   mEncoding = aEncoding;
+  mDecodingLocalFileAsUTF8 = false;
   mUnicodeDecoder = mEncoding->NewDecoderWithoutBOMHandling();
   mCharsetSource = kCharsetFromByteOrderMark;
   mFeedChardet = false;
   mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
   mSniffingBuffer = nullptr;
   mMetaScanner = nullptr;
   mBomState = BOM_SNIFFING_OVER;
   return NS_OK;
@@ -528,17 +536,17 @@ nsresult nsHtml5StreamParser::FinalizeSn
     mFeedChardet = false;
     return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment);
   }
   // Check for BOMless UTF-16 with Basic
   // Latin content for compat with IE. See bug 631751.
   SniffBOMlessUTF16BasicLatin(aFromSegment.To(aCountToSniffingLimit));
   // the charset may have been set now
   // maybe try chardet now;
-  if (mFeedChardet) {
+  if (mFeedChardet && !mDecodingLocalFileAsUTF8) {
     bool dontFeed;
     nsresult rv;
     if (mSniffingBuffer) {
       rv = mChardet->DoIt((const char*)mSniffingBuffer.get(), mSniffingLength,
                           &dontFeed);
       mFeedChardet = !dontFeed;
       NS_ENSURE_SUCCESS(rv, rv);
     }
@@ -666,20 +674,20 @@ nsresult nsHtml5StreamParser::SniffStrea
   }
 
   if (!mMetaScanner &&
       (mMode == NORMAL || mMode == VIEW_SOURCE_HTML || mMode == LOAD_AS_DATA)) {
     mMetaScanner = new nsHtml5MetaScanner(mTreeBuilder);
   }
 
   if (mSniffingLength + aFromSegment.Length() >=
-      NS_HTML5_STREAM_PARSER_SNIFFING_BUFFER_SIZE) {
+      SNIFFING_BUFFER_SIZE) {
     // this is the last buffer
     uint32_t countToSniffingLimit =
-        NS_HTML5_STREAM_PARSER_SNIFFING_BUFFER_SIZE - mSniffingLength;
+        SNIFFING_BUFFER_SIZE - mSniffingLength;
     if (mMode == NORMAL || mMode == VIEW_SOURCE_HTML || mMode == LOAD_AS_DATA) {
       nsHtml5ByteReadable readable(
           aFromSegment.Elements(),
           aFromSegment.Elements() + countToSniffingLimit);
       nsAutoCString charset;
       auto encoding = mMetaScanner->sniff(&readable);
       // Due to the way nsHtml5Portability reports OOM, ask the tree buider
       nsresult rv;
@@ -739,76 +747,120 @@ nsresult nsHtml5StreamParser::SniffStrea
       mFeedChardet = false;
       mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
       return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment);
     }
   }
 
   if (!mSniffingBuffer) {
     mSniffingBuffer = MakeUniqueFallible<uint8_t[]>(
-        NS_HTML5_STREAM_PARSER_SNIFFING_BUFFER_SIZE);
+        SNIFFING_BUFFER_SIZE);
     if (!mSniffingBuffer) {
       return NS_ERROR_OUT_OF_MEMORY;
     }
   }
   memcpy(&mSniffingBuffer[mSniffingLength], aFromSegment.Elements(),
          aFromSegment.Length());
   mSniffingLength += aFromSegment.Length();
   return NS_OK;
 }
 
 nsresult nsHtml5StreamParser::WriteStreamBytes(
     Span<const uint8_t> aFromSegment) {
   NS_ASSERTION(IsParserThread(), "Wrong thread!");
   // mLastBuffer should always point to a buffer of the size
-  // NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE.
+  // READ_BUFFER_SIZE.
   if (!mLastBuffer) {
     NS_WARNING("mLastBuffer should not be null!");
     MarkAsBroken(NS_ERROR_NULL_POINTER);
     return NS_ERROR_NULL_POINTER;
   }
   size_t totalRead = 0;
   auto src = aFromSegment;
   for (;;) {
-    auto dst = mLastBuffer->TailAsSpan(NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE);
+    auto dst = mLastBuffer->TailAsSpan(READ_BUFFER_SIZE);
     uint32_t result;
     size_t read;
     size_t written;
     bool hadErrors;
     Tie(result, read, written, hadErrors) =
         mUnicodeDecoder->DecodeToUTF16(src, dst, false);
-    if (recordreplay::IsRecordingOrReplaying()) {
+    if (!mDecodingLocalFileAsUTF8 && recordreplay::IsRecordingOrReplaying()) {
       recordreplay::AddContentParseData16(this, dst.data(), written);
     }
     if (hadErrors && !mHasHadErrors) {
+      if (mDecodingLocalFileAsUTF8) {
+        ReDecodeLocalFile();
+        return NS_OK;
+      }
       mHasHadErrors = true;
       if (mEncoding == UTF_8_ENCODING) {
         mTreeBuilder->TryToEnableEncodingMenu();
       }
     }
     src = src.From(read);
     totalRead += read;
     mLastBuffer->AdvanceEnd(written);
     if (result == kOutputFull) {
       RefPtr<nsHtml5OwningUTF16Buffer> newBuf =
           nsHtml5OwningUTF16Buffer::FalliblyCreate(
-              NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE);
+              READ_BUFFER_SIZE);
       if (!newBuf) {
         MarkAsBroken(NS_ERROR_OUT_OF_MEMORY);
         return NS_ERROR_OUT_OF_MEMORY;
       }
       mLastBuffer = (mLastBuffer->next = newBuf.forget());
     } else {
       MOZ_ASSERT(totalRead == aFromSegment.Length(),
                  "The Unicode decoder consumed the wrong number of bytes.");
+      if (mDecodingLocalFileAsUTF8 && mLocalFileBytesBuffered == LOCAL_FILE_UTF_8_BUFFER_SIZE) {
+        CommitLocalFileToUTF8();
+      }
       return NS_OK;
     }
   }
 }
 
+void nsHtml5StreamParser::ReDecodeLocalFile() {
+  MOZ_ASSERT(mDecodingLocalFileAsUTF8);
+  mDecodingLocalFileAsUTF8 = false;
+  mUnicodeDecoder = mEncoding->NewDecoderWithBOMRemoval();
+  mHasHadErrors = false;
+
+  // Throw away previous decoded data
+  mLastBuffer = mFirstBuffer;
+  mLastBuffer->next = nullptr;
+  mLastBuffer->setStart(0);
+  mLastBuffer->setEnd(0);
+
+  // Decode again
+  for (auto&& buffer : mBufferedLocalFileData) {
+    DoDataAvailable(buffer);
+  }
+}
+
+void nsHtml5StreamParser::CommitLocalFileToUTF8()
+{
+  MOZ_ASSERT(mDecodingLocalFileAsUTF8);
+  mDecodingLocalFileAsUTF8 = false;
+  mFeedChardet = false;
+  mEncoding = UTF_8_ENCODING;
+  mCharsetSource = kCharsetFromFileURLGuess;
+  mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
+
+  if (recordreplay::IsRecordingOrReplaying()) {
+    nsHtml5OwningUTF16Buffer* buffer = mLastBuffer;
+    while (buffer) {
+      recordreplay::AddContentParseData16(
+          this, buffer->getBuffer() + buffer->getStart(), buffer->getLength());
+      buffer = buffer->next;
+    }
+  }
+}
+
 class MaybeRunCollector : public Runnable {
  public:
   explicit MaybeRunCollector(nsIDocShell* aDocShell)
       : Runnable("MaybeRunCollector"), mDocShell(aDocShell) {}
 
   NS_IMETHOD Run() override {
     nsJSContext::MaybeRunNextCollectorSlice(mDocShell,
                                             JS::gcreason::HTML_PARSER);
@@ -838,21 +890,43 @@ nsresult nsHtml5StreamParser::OnStartReq
   }
 
   // For View Source, the parser should run with scripts "enabled" if a normal
   // load would have scripts enabled.
   bool scriptingEnabled =
       mMode == LOAD_AS_DATA ? false : mExecutor->IsScriptEnabled();
   mOwner->StartTokenizer(scriptingEnabled);
 
+  MOZ_ASSERT(!mDecodingLocalFileAsUTF8);
   bool isSrcdoc = false;
   nsCOMPtr<nsIChannel> channel;
   nsresult rv = GetChannel(getter_AddRefs(channel));
   if (NS_SUCCEEDED(rv)) {
     isSrcdoc = NS_IsSrcdocChannel(channel);
+    if (!isSrcdoc && mCharsetSource <= kCharsetFromFileURLGuess) {
+      nsCOMPtr<nsIURI> originalURI;
+      rv = channel->GetOriginalURI(getter_AddRefs(originalURI));
+      if (NS_SUCCEEDED(rv)) {
+        bool originalIsResource;
+        originalURI->SchemeIs("resource", &originalIsResource);
+        if (originalIsResource) {
+          mCharsetSource = kCharsetFromBuiltIn;
+          mEncoding = UTF_8_ENCODING;
+        } else {
+          nsCOMPtr<nsIURI> currentURI;
+          rv = channel->GetURI(getter_AddRefs(currentURI));
+          if (NS_SUCCEEDED(rv)) {
+            nsCOMPtr<nsIURI> innermost = NS_GetInnermostURI(currentURI);
+            bool innermostIsFile;
+            innermost->SchemeIs("file", &innermostIsFile);
+            mDecodingLocalFileAsUTF8 = innermostIsFile;
+          }
+        }
+      }
+    }
   }
   mTreeBuilder->setIsSrcdocDocument(isSrcdoc);
   mTreeBuilder->setScriptingEnabled(scriptingEnabled);
   mTreeBuilder->SetPreventScriptExecution(
       !((mMode == NORMAL) && scriptingEnabled));
   mTokenizer->start();
   mExecutor->Start();
   mExecutor->StartReadingFromStage();
@@ -873,25 +947,25 @@ nsresult nsHtml5StreamParser::OnStartReq
    * WillBuildModel to be called before the document has had its
    * script global object set.
    */
   rv = mExecutor->WillBuildModel(eDTDMode_unknown);
   NS_ENSURE_SUCCESS(rv, rv);
 
   RefPtr<nsHtml5OwningUTF16Buffer> newBuf =
       nsHtml5OwningUTF16Buffer::FalliblyCreate(
-          NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE);
+          READ_BUFFER_SIZE);
   if (!newBuf) {
     // marks this stream parser as terminated,
     // which prevents entry to code paths that
     // would use mFirstBuffer or mLastBuffer.
     return mExecutor->MarkAsBroken(NS_ERROR_OUT_OF_MEMORY);
   }
-  NS_ASSERTION(!mFirstBuffer, "How come we have the first buffer set?");
-  NS_ASSERTION(!mLastBuffer, "How come we have the last buffer set?");
+  MOZ_ASSERT(!mFirstBuffer, "How come we have the first buffer set?");
+  MOZ_ASSERT(!mLastBuffer, "How come we have the last buffer set?");
   mFirstBuffer = mLastBuffer = newBuf;
 
   rv = NS_OK;
 
   // The line below means that the encoding can end up being wrong if
   // a view-source URL is loaded without having the encoding hint from a
   // previous normal load in the history.
   mReparseForbidden = !(mMode == NORMAL || mMode == PLAIN_TEXT);
@@ -957,16 +1031,17 @@ nsresult nsHtml5StreamParser::OnStartReq
 
   // We are reloading a document.open()ed doc or loading JSON/WebVTT/etc. into
   // a browsing context. In the latter case, there's no need to remove the
   // BOM manually here, because the UTF-8 decoder removes it.
   mReparseForbidden = true;
   mFeedChardet = false;
 
   // Instantiate the converter here to avoid BOM sniffing.
+  mDecodingLocalFileAsUTF8 = false;
   mUnicodeDecoder = mEncoding->NewDecoderWithBOMRemoval();
   return NS_OK;
 }
 
 nsresult nsHtml5StreamParser::CheckListenerChain() {
   NS_ASSERTION(NS_IsMainThread(), "Should be on the main thread!");
   if (!mObserver) {
     return NS_OK;
@@ -985,74 +1060,84 @@ void nsHtml5StreamParser::DoStopRequest(
   MOZ_RELEASE_ASSERT(STREAM_BEING_READ == mStreamState,
                      "Stream ended without being open.");
   mTokenizerMutex.AssertCurrentThreadOwns();
 
   if (IsTerminated()) {
     return;
   }
 
-  mStreamState = STREAM_ENDED;
-
   if (!mUnicodeDecoder) {
     nsresult rv;
     Span<const uint8_t> empty;
     if (NS_FAILED(rv = FinalizeSniffing(empty, 0, true))) {
       MarkAsBroken(rv);
       return;
     }
-  } else if (mFeedChardet) {
+  }
+  if (mFeedChardet && !mDecodingLocalFileAsUTF8) {
     mChardet->Done();
   }
 
   MOZ_ASSERT(mUnicodeDecoder,
              "Should have a decoder after finalizing sniffing.");
 
   // mLastBuffer should always point to a buffer of the size
-  // NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE.
+  // READ_BUFFER_SIZE.
   if (!mLastBuffer) {
     NS_WARNING("mLastBuffer should not be null!");
     MarkAsBroken(NS_ERROR_NULL_POINTER);
     return;
   }
 
   Span<uint8_t> src;  // empty span
   for (;;) {
-    auto dst = mLastBuffer->TailAsSpan(NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE);
+    auto dst = mLastBuffer->TailAsSpan(READ_BUFFER_SIZE);
     uint32_t result;
     size_t read;
     size_t written;
     bool hadErrors;
     Tie(result, read, written, hadErrors) =
         mUnicodeDecoder->DecodeToUTF16(src, dst, true);
-    if (recordreplay::IsRecordingOrReplaying()) {
+    if (!mDecodingLocalFileAsUTF8 && recordreplay::IsRecordingOrReplaying()) {
       recordreplay::AddContentParseData16(this, dst.data(), written);
     }
     if (hadErrors && !mHasHadErrors) {
+      if (mDecodingLocalFileAsUTF8) {
+        ReDecodeLocalFile();
+        DoStopRequest();
+        return;
+      }
       mHasHadErrors = true;
       if (mEncoding == UTF_8_ENCODING) {
         mTreeBuilder->TryToEnableEncodingMenu();
       }
     }
     MOZ_ASSERT(read == 0, "How come an empty span was read form?");
     mLastBuffer->AdvanceEnd(written);
     if (result == kOutputFull) {
       RefPtr<nsHtml5OwningUTF16Buffer> newBuf =
           nsHtml5OwningUTF16Buffer::FalliblyCreate(
-              NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE);
+              READ_BUFFER_SIZE);
       if (!newBuf) {
         MarkAsBroken(NS_ERROR_OUT_OF_MEMORY);
         return;
       }
       mLastBuffer = (mLastBuffer->next = newBuf.forget());
     } else {
+      if (mDecodingLocalFileAsUTF8) {
+        MOZ_ASSERT(mLocalFileBytesBuffered < LOCAL_FILE_UTF_8_BUFFER_SIZE);
+        CommitLocalFileToUTF8();
+      }
       break;
     }
   }
 
+  mStreamState = STREAM_ENDED;
+
   if (IsTerminatedOrInterrupted()) {
     return;
   }
 
   ParseAvailableData();
 }
 
 class nsHtml5RequestStopper : public Runnable {
@@ -1079,47 +1164,100 @@ nsresult nsHtml5StreamParser::OnStopRequ
   }
   nsCOMPtr<nsIRunnable> stopper = new nsHtml5RequestStopper(this);
   if (NS_FAILED(mEventTarget->Dispatch(stopper, nsIThread::DISPATCH_NORMAL))) {
     NS_WARNING("Dispatching StopRequest event failed.");
   }
   return NS_OK;
 }
 
+void nsHtml5StreamParser::DoDataAvailableBuffer(mozilla::Buffer<uint8_t>&& aBuffer)
+{
+  if (MOZ_LIKELY(!mDecodingLocalFileAsUTF8)) {
+    DoDataAvailable(aBuffer);
+    return;
+  }
+  CheckedInt<size_t> bufferedPlusLength(aBuffer.Length());
+  bufferedPlusLength += mLocalFileBytesBuffered;
+  if (!bufferedPlusLength.isValid()) {
+    MarkAsBroken(NS_ERROR_OUT_OF_MEMORY);
+    return;
+  }
+  // Ensure that WriteStreamBytes() sees a buffer ending
+  // exactly at LOCAL_FILE_UTF_8_BUFFER_SIZE
+  // if we are about to cross the threshold. This way,
+  // Necko buffer boundaries don't affect user-visible
+  // behavior.
+  if (bufferedPlusLength.value() <= LOCAL_FILE_UTF_8_BUFFER_SIZE) {
+    // Truncation OK, because we just checked the range.
+    mLocalFileBytesBuffered = bufferedPlusLength.value();
+    mBufferedLocalFileData.AppendElement(std::move(aBuffer));
+    DoDataAvailable(mBufferedLocalFileData.LastElement());
+  } else {
+    // Truncation OK, because the constant is small enough.
+    auto span = aBuffer.AsSpan();
+    auto head = span.To(LOCAL_FILE_UTF_8_BUFFER_SIZE);
+    auto tail = span.From(LOCAL_FILE_UTF_8_BUFFER_SIZE);
+    // We make a theoretically useless copy here, because avoiding
+    // the copy adds too much complexity.
+    Maybe<Buffer<uint8_t>> maybe = Buffer<uint8_t>::CopyFrom(head);
+    if (maybe.isNothing()) {
+      MarkAsBroken(NS_ERROR_OUT_OF_MEMORY);
+      return;
+    }
+    mLocalFileBytesBuffered = LOCAL_FILE_UTF_8_BUFFER_SIZE;
+    mBufferedLocalFileData.AppendElement(std::move(*maybe));
+
+    DoDataAvailable(head);
+    // Re-decode may have happened here.
+    DoDataAvailable(tail);
+  }
+  // Do this clean-up here to avoid use-after-free when
+  // DoDataAvailable is passed a span pointing into an
+  // element of mBufferedLocalFileData.
+  if (!mDecodingLocalFileAsUTF8) {
+    mBufferedLocalFileData.Clear();
+  }
+}
+
+
 void nsHtml5StreamParser::DoDataAvailable(Span<const uint8_t> aBuffer) {
   NS_ASSERTION(IsParserThread(), "Wrong thread!");
   MOZ_RELEASE_ASSERT(STREAM_BEING_READ == mStreamState,
                      "DoDataAvailable called when stream not open.");
   mTokenizerMutex.AssertCurrentThreadOwns();
 
   if (IsTerminated()) {
     return;
   }
 
   nsresult rv;
   if (HasDecoder()) {
-    if (mFeedChardet) {
+    if (mFeedChardet && !mDecodingLocalFileAsUTF8) {
       bool dontFeed;
-      mChardet->DoIt((const char*)aBuffer.Elements(), aBuffer.Length(),
-                     &dontFeed);
+      mChardet->DoIt((const char*)aBuffer.Elements(), aBuffer.Length(), &dontFeed);
       mFeedChardet = !dontFeed;
     }
     rv = WriteStreamBytes(aBuffer);
   } else {
     rv = SniffStreamBytes(aBuffer);
   }
   if (NS_FAILED(rv)) {
     MarkAsBroken(rv);
     return;
   }
 
   if (IsTerminatedOrInterrupted()) {
     return;
   }
 
+  if (mDecodingLocalFileAsUTF8) {
+    return;
+  }
+
   ParseAvailableData();
 
   if (mFlushTimerArmed || mSpeculating) {
     return;
   }
 
   {
     mozilla::MutexAutoLock flushTimerLock(mFlushTimerMutex);
@@ -1140,35 +1278,35 @@ class nsHtml5DataAvailable : public Runn
  public:
   nsHtml5DataAvailable(nsHtml5StreamParser* aStreamParser,
                        Buffer<uint8_t>&& aData)
       : Runnable("nsHtml5DataAvailable"),
         mStreamParser(aStreamParser),
         mData(std::move(aData)) {}
   NS_IMETHOD Run() override {
     mozilla::MutexAutoLock autoLock(mStreamParser->mTokenizerMutex);
-    mStreamParser->DoDataAvailable(mData);
+    mStreamParser->DoDataAvailableBuffer(std::move(mData));
     return NS_OK;
   }
 };
 
 nsresult nsHtml5StreamParser::OnDataAvailable(nsIRequest* aRequest,
                                               nsISupports* aContext,
                                               nsIInputStream* aInStream,
                                               uint64_t aSourceOffset,
                                               uint32_t aLength) {
   nsresult rv;
   if (NS_FAILED(rv = mExecutor->IsBroken())) {
     return rv;
   }
 
-  NS_ASSERTION(mRequest == aRequest, "Got data on wrong stream.");
+  MOZ_ASSERT(mRequest == aRequest, "Got data on wrong stream.");
   uint32_t totalRead;
   // Main thread to parser thread dispatch requires copying to buffer first.
-  if (NS_IsMainThread()) {
+  if (MOZ_UNLIKELY(NS_IsMainThread())) {
     Maybe<Buffer<uint8_t>> maybe = Buffer<uint8_t>::Alloc(aLength);
     if (maybe.isNothing()) {
       return mExecutor->MarkAsBroken(NS_ERROR_OUT_OF_MEMORY);
     }
     Buffer<uint8_t> data(std::move(*maybe));
     rv = aInStream->Read(reinterpret_cast<char*>(data.Elements()),
                          data.Length(), &totalRead);
     NS_ENSURE_SUCCESS(rv, rv);
@@ -1176,29 +1314,43 @@ nsresult nsHtml5StreamParser::OnDataAvai
 
     nsCOMPtr<nsIRunnable> dataAvailable =
         new nsHtml5DataAvailable(this, std::move(data));
     if (NS_FAILED(mEventTarget->Dispatch(dataAvailable,
                                          nsIThread::DISPATCH_NORMAL))) {
       NS_WARNING("Dispatching DataAvailable event failed.");
     }
     return rv;
-  } else {
-    NS_ASSERTION(IsParserThread(), "Wrong thread!");
-    mozilla::MutexAutoLock autoLock(mTokenizerMutex);
+  }
+  MOZ_ASSERT(IsParserThread(), "Wrong thread!");
+  mozilla::MutexAutoLock autoLock(mTokenizerMutex);
 
-    // Read directly from response buffer.
-    rv = aInStream->ReadSegments(CopySegmentsToParser, this, aLength,
-                                 &totalRead);
-    if (NS_FAILED(rv)) {
-      NS_WARNING("Failed reading response data to parser");
-      return rv;
+  if (MOZ_UNLIKELY(mDecodingLocalFileAsUTF8)) {
+    // It's a bit sad to potentially buffer the first 1024
+    // bytes in two places, but it's a lot simpler than trying
+    // to optitize out that copy. It only happens for local files
+    // and not for the http(s) content anyway.
+    Maybe<Buffer<uint8_t>> maybe = Buffer<uint8_t>::Alloc(aLength);
+    if (maybe.isNothing()) {
+      MarkAsBroken(NS_ERROR_OUT_OF_MEMORY);
+      return NS_ERROR_OUT_OF_MEMORY;
     }
-    return NS_OK;
+    Buffer<uint8_t> data(std::move(*maybe));
+    rv = aInStream->Read(reinterpret_cast<char*>(data.Elements()),
+                         data.Length(), &totalRead);
+    NS_ENSURE_SUCCESS(rv, rv);
+    MOZ_ASSERT(totalRead == aLength);
+    DoDataAvailableBuffer(std::move(data));
+    return rv;
   }
+  // Read directly from response buffer.
+  rv = aInStream->ReadSegments(CopySegmentsToParser, this, aLength, &totalRead);
+  NS_ENSURE_SUCCESS(rv, rv);
+  MOZ_ASSERT(totalRead == aLength);
+  return rv;
 }
 
 /* static */ nsresult nsHtml5StreamParser::CopySegmentsToParser(
     nsIInputStream* aInStream, void* aClosure, const char* aFromSegment,
     uint32_t aToOffset, uint32_t aCount, uint32_t* aWriteCount) {
   nsHtml5StreamParser* parser = static_cast<nsHtml5StreamParser*>(aClosure);
 
   parser->DoDataAvailable(AsBytes(MakeSpan(aFromSegment, aCount)));
@@ -1310,18 +1462,19 @@ void nsHtml5StreamParser::FlushTreeOpsAn
   mTreeBuilder->Flush();
   nsCOMPtr<nsIRunnable> runnable(mExecutorFlusher);
   if (NS_FAILED(DispatchToMain(runnable.forget()))) {
     NS_WARNING("failed to dispatch executor flush event");
   }
 }
 
 void nsHtml5StreamParser::ParseAvailableData() {
-  NS_ASSERTION(IsParserThread(), "Wrong thread!");
+  MOZ_ASSERT(IsParserThread(), "Wrong thread!");
   mTokenizerMutex.AssertCurrentThreadOwns();
+  MOZ_ASSERT(!mDecodingLocalFileAsUTF8);
 
   if (IsTerminatedOrInterrupted()) {
     return;
   }
 
   if (mSpeculating && !IsSpeculationEnabled()) {
     return;
   }
--- a/parser/html/nsHtml5StreamParser.h
+++ b/parser/html/nsHtml5StreamParser.h
@@ -21,19 +21,16 @@
 #include "nsISerialEventTarget.h"
 #include "nsITimer.h"
 #include "nsICharsetDetector.h"
 #include "mozilla/dom/DocGroup.h"
 #include "mozilla/Buffer.h"
 
 class nsHtml5Parser;
 
-#define NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE 1024
-#define NS_HTML5_STREAM_PARSER_SNIFFING_BUFFER_SIZE 1024
-
 enum eParserMode {
   /**
    * Parse a document normally as HTML.
    */
   NORMAL,
 
   /**
    * View document as HTML source.
@@ -103,16 +100,20 @@ enum eHtml5StreamState {
   STREAM_ENDED = 2
 };
 
 class nsHtml5StreamParser final : public nsICharsetDetectionObserver {
   template <typename T>
   using NotNull = mozilla::NotNull<T>;
   using Encoding = mozilla::Encoding;
 
+  const uint32_t SNIFFING_BUFFER_SIZE = 1024;
+  const uint32_t READ_BUFFER_SIZE = 1024;
+  const uint32_t LOCAL_FILE_UTF_8_BUFFER_SIZE = 1024*1024*50; // 50 MB
+
   friend class nsHtml5RequestStopper;
   friend class nsHtml5DataAvailable;
   friend class nsHtml5StreamParserContinuation;
   friend class nsHtml5TimerKungFu;
   friend class nsHtml5StreamParserPtr;
 
  public:
   NS_DECL_CYCLE_COLLECTING_ISUPPORTS
@@ -239,16 +240,18 @@ class nsHtml5StreamParser final : public
    * timer.
    */
   void FlushTreeOpsAndDisarmTimer();
 
   void ParseAvailableData();
 
   void DoStopRequest();
 
+  void DoDataAvailableBuffer(mozilla::Buffer<uint8_t>&& aBuffer);
+
   void DoDataAvailable(mozilla::Span<const uint8_t> aBuffer);
 
   static nsresult CopySegmentsToParser(nsIInputStream* aInStream,
                                        void* aClosure, const char* aFromSegment,
                                        uint32_t aToOffset, uint32_t aCount,
                                        uint32_t* aWriteCount);
 
   bool IsTerminatedOrInterrupted() {
@@ -308,16 +311,29 @@ class nsHtml5StreamParser final : public
    *
    * @param aDecoderCharsetName The name for the decoder's charset
    *                            (UTF-16BE, UTF-16LE or UTF-8; the BOM has
    *                            been swallowed)
    */
   nsresult SetupDecodingFromBom(NotNull<const Encoding*> aEncoding);
 
   /**
+   * When speculatively decoding from file: URL as UTF-8, commit
+   * to UTF-8 as the non-speculative encoding and start processing
+   * the decoded data.
+   */
+  void CommitLocalFileToUTF8();
+
+  /**
+   * When speculatively decoding from file: URL as UTF-8, redecode
+   * using fallback and then continue normally with the fallback.
+   */
+  void ReDecodeLocalFile();
+
+  /**
    * Become confident or resolve and encoding name to its preferred form.
    * @param aEncoding the value of an internal encoding decl. Acts as an
    *                  out param, too, when the method returns true.
    * @return true if the parser needs to start using the new value of
    *         aEncoding and false if the parser became confident or if
    *         the encoding name did not specify a usable encoding
    */
   const Encoding* PreferredForInternalEncodingDecl(const nsACString& aEncoding);
@@ -481,16 +497,24 @@ class nsHtml5StreamParser final : public
   mozilla::Mutex mSpeculationMutex;
 
   /**
    * Number of times speculation has failed for this parser.
    */
   uint32_t mSpeculationFailureCount;
 
   /**
+   * Number of bytes already buffered into mBufferedLocalFileData.
+   * Never counts above LOCAL_FILE_UTF_8_BUFFER_SIZE.
+   */
+  uint32_t mLocalFileBytesBuffered;
+
+  nsTArray<mozilla::Buffer<uint8_t>> mBufferedLocalFileData;
+
+  /**
    * True to terminate early; protected by mTerminatedMutex
    */
   bool mTerminated;
   bool mInterrupted;
   mozilla::Mutex mTerminatedMutex;
 
   /**
    * The thread this stream parser runs on.
@@ -514,16 +538,22 @@ class nsHtml5StreamParser final : public
   /**
    * Whether the initial charset source was kCharsetFromParentFrame
    */
   bool mInitialEncodingWasFromParentFrame;
 
   bool mHasHadErrors;
 
   /**
+   * If true, we are decoding a local file that lacks an encoding
+   * declaration as UTF-8 and we are not tokenizing yet.
+   */
+  bool mDecodingLocalFileAsUTF8;
+
+  /**
    * Timer for flushing tree ops once in a while when not speculating.
    */
   nsCOMPtr<nsITimer> mFlushTimer;
 
   /**
    * Mutex for protecting access to mFlushTimer (but not for the two
    * mFlushTimerFoo booleans below).
    */
--- a/parser/nsCharsetSource.h
+++ b/parser/nsCharsetSource.h
@@ -4,24 +4,26 @@
 
 #ifndef nsCharsetSource_h_
 #define nsCharsetSource_h_
 
 // note: the value order defines the priority; higher numbers take priority
 #define kCharsetUninitialized 0
 #define kCharsetFromFallback 1
 #define kCharsetFromTopLevelDomain 2
-#define kCharsetFromDocTypeDefault 3  // This and up confident for XHR
-#define kCharsetFromCache 4
-#define kCharsetFromParentFrame 5
-#define kCharsetFromAutoDetection 6
-#define kCharsetFromHintPrevDoc 7
-#define kCharsetFromMetaPrescan 8  // this one and smaller: HTML5 Tentative
-#define kCharsetFromMetaTag 9      // this one and greater: HTML5 Confident
-#define kCharsetFromIrreversibleAutoDetection 10
-#define kCharsetFromChannel 11
-#define kCharsetFromOtherComponent 12
-#define kCharsetFromParentForced 13  // propagates to child frames
-#define kCharsetFromUserForced 14    // propagates to child frames
-#define kCharsetFromByteOrderMark 15
-#define kCharsetFromUtf8OnlyMime 16  // For JSON, WebVTT and such
+#define kCharsetFromFileURLGuess 3
+#define kCharsetFromDocTypeDefault 4  // This and up confident for XHR
+#define kCharsetFromCache 5
+#define kCharsetFromParentFrame 6
+#define kCharsetFromAutoDetection 7
+#define kCharsetFromHintPrevDoc 8
+#define kCharsetFromMetaPrescan 9  // this one and smaller: HTML5 Tentative
+#define kCharsetFromMetaTag 10     // this one and greater: HTML5 Confident
+#define kCharsetFromIrreversibleAutoDetection 11
+#define kCharsetFromChannel 12
+#define kCharsetFromOtherComponent 13
+#define kCharsetFromParentForced 14  // propagates to child frames
+#define kCharsetFromUserForced 15    // propagates to child frames
+#define kCharsetFromByteOrderMark 16
+#define kCharsetFromUtf8OnlyMime 17  // For JSON, WebVTT and such
+#define kCharsetFromBuiltIn 18       // resource: URLs
 
 #endif /* nsCharsetSource_h_ */