Bug 634541 - Make the HTML parser remove only one BOM when the input starts with multiple BOMs. r=bzbarsky
authorHenri Sivonen <hsivonen@hsivonen.fi>
Mon, 13 Aug 2018 08:01:56 +0000
changeset 486286 aeb2e2eaf0c48397b5ca52347a21c74f45366aec
parent 486273 957f7aa848b1a46d824d5e11de2e62c63ac4a526
child 486287 3a54c28335c447bac5ce3b1dbad39ff7a724f8d1
push id9719
push userffxbld-merge
push dateFri, 24 Aug 2018 17:49:46 +0000
treeherdermozilla-beta@719ec98fba77 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersbzbarsky
bugs634541
milestone63.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 634541 - Make the HTML parser remove only one BOM when the input starts with multiple BOMs. r=bzbarsky MozReview-Commit-ID: 1zoGFxx9MCm Differential Revision: https://phabricator.services.mozilla.com/D2926
parser/html/nsHtml5StreamParser.cpp
testing/web-platform/meta/MANIFEST.json
testing/web-platform/tests/encoding/remove-only-one-bom.html
testing/web-platform/tests/encoding/resources/two-boms-utf-16be.html
testing/web-platform/tests/encoding/resources/two-boms-utf-16le.html
testing/web-platform/tests/encoding/resources/two-boms-utf-8.html
--- a/parser/html/nsHtml5StreamParser.cpp
+++ b/parser/html/nsHtml5StreamParser.cpp
@@ -340,17 +340,17 @@ nsHtml5StreamParser::SetupDecodingAndWri
   return rv;
 }
 
 nsresult
 nsHtml5StreamParser::SetupDecodingFromBom(NotNull<const Encoding*> aEncoding)
 {
   NS_ASSERTION(IsParserThread(), "Wrong thread!");
   mEncoding = aEncoding;
-  mUnicodeDecoder = mEncoding->NewDecoderWithBOMRemoval();
+  mUnicodeDecoder = mEncoding->NewDecoderWithoutBOMHandling();
   mCharsetSource = kCharsetFromByteOrderMark;
   mFeedChardet = false;
   mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
   mSniffingBuffer = nullptr;
   mMetaScanner = nullptr;
   mBomState = BOM_SNIFFING_OVER;
   return NS_OK;
 }
--- a/testing/web-platform/meta/MANIFEST.json
+++ b/testing/web-platform/meta/MANIFEST.json
@@ -275968,16 +275968,31 @@
      {}
     ]
    ],
    "encoding/resources/text-plain-charset.py": [
     [
      {}
     ]
    ],
+   "encoding/resources/two-boms-utf-16be.html": [
+    [
+     {}
+    ]
+   ],
+   "encoding/resources/two-boms-utf-16le.html": [
+    [
+     {}
+    ]
+   ],
+   "encoding/resources/two-boms-utf-8.html": [
+    [
+     {}
+    ]
+   ],
    "encoding/resources/utf-32-big-endian-bom.html": [
     [
      {}
     ]
    ],
    "encoding/resources/utf-32-big-endian-bom.xml": [
     [
      {}
@@ -348193,16 +348208,22 @@
     ],
     [
      "/encoding/legacy-mb-tchinese/big5/big5-encode-href.html?9001-10000",
      {
       "timeout": "long"
      }
     ]
    ],
+   "encoding/remove-only-one-bom.html": [
+    [
+     "/encoding/remove-only-one-bom.html",
+     {}
+    ]
+   ],
    "encoding/replacement-encodings.any.js": [
     [
      "/encoding/replacement-encodings.any.html",
      {}
     ],
     [
      "/encoding/replacement-encodings.any.worker.html",
      {}
@@ -582068,16 +582089,20 @@
   "encoding/legacy-mb-tchinese/big5/big5_errors.html.headers": [
    "49773a44f8de6275f1fa88dca396ec0178e90acf",
    "support"
   ],
   "encoding/legacy-mb-tchinese/big5/big5_index.js": [
    "9ab1182ad7292cd6e4d8c9c5739f534e21976fd0",
    "support"
   ],
+  "encoding/remove-only-one-bom.html": [
+   "8b91f7f349b6ca2585e1786e791bc88840262698",
+   "testharness"
+  ],
   "encoding/replacement-encodings.any.js": [
    "784dd953e33ef9134a1b9647327bf169f3116ef8",
    "testharness"
   ],
   "encoding/resources/decode-common.js": [
    "19dd6939ac197efc5cd38d133a8cdc66e321ad04",
    "support"
   ],
@@ -582104,16 +582129,28 @@
   "encoding/resources/single-byte-raw.py": [
    "b4a6c90405ec85d2d15222c2445fd9dcb12c8462",
    "support"
   ],
   "encoding/resources/text-plain-charset.py": [
    "a1c07e70189bce25c4002f447a544b64224967fd",
    "support"
   ],
+  "encoding/resources/two-boms-utf-16be.html": [
+   "6a5b0a5517fd422b7859f8332416523198aebec0",
+   "support"
+  ],
+  "encoding/resources/two-boms-utf-16le.html": [
+   "535a40d398d3b50bba40269639524022ae0c2fcf",
+   "support"
+  ],
+  "encoding/resources/two-boms-utf-8.html": [
+   "83ea941a53699c09d06ee4f952ddad699e8ba3d6",
+   "support"
+  ],
   "encoding/resources/utf-32-big-endian-bom.html": [
    "79679cac759429625fde26de63a959d9f4bcb782",
    "support"
   ],
   "encoding/resources/utf-32-big-endian-bom.xml": [
    "399c35af827db47ca43d09d552b973ef31208a08",
    "support"
   ],
new file mode 100644
--- /dev/null
+++ b/testing/web-platform/tests/encoding/remove-only-one-bom.html
@@ -0,0 +1,20 @@
+<!doctype html>
+<meta charset=utf-8>
+<title></title>
+<script src=/resources/testharness.js></script>
+<script src=/resources/testharnessreport.js></script>
+<script>
+setup({explicit_done:true});
+function runTests() {
+    for (var i = 0; i < window.frames.length; i++) {
+        test(function() {
+            assert_equals(window.frames[i].window.document.body.textContent, "\uFEFF");
+        }, "Should have removed only one BOM from frame " + i);
+    }
+    done();
+}
+</script>
+<body onload="runTests()">
+<iframe src="resources/two-boms-utf-8.html"></iframe>
+<iframe src="resources/two-boms-utf-16le.html"></iframe>
+<iframe src="resources/two-boms-utf-16be.html"></iframe>
new file mode 100644
--- /dev/null
+++ b/testing/web-platform/tests/encoding/resources/two-boms-utf-16be.html
@@ -0,0 +1,1 @@
+
\ No newline at end of file
new file mode 100644
--- /dev/null
+++ b/testing/web-platform/tests/encoding/resources/two-boms-utf-16le.html
@@ -0,0 +1,1 @@
+
\ No newline at end of file
new file mode 100644
--- /dev/null
+++ b/testing/web-platform/tests/encoding/resources/two-boms-utf-8.html
@@ -0,0 +1,1 @@
+
\ No newline at end of file