Bug 1273282. Fix the handling of numeric entities in xml-stylesheet href values. r=peterv
authorBoris Zbarsky <bzbarsky@mit.edu>
Thu, 19 May 2016 13:48:11 -0400
changeset 322647 b4d88aafa06c679ac772bba4cdbb4cc8f4e51454
parent 322646 2aef443489891f7fa4bdf9f9b5206da6d9cfcb25
child 322648 64031b8696dfb6230ed0e16f149694014ee98e83
push id9671
push userraliiev@mozilla.com
push dateMon, 06 Jun 2016 20:27:52 +0000
treeherdermozilla-aurora@cea65ca3d0bd [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerspeterv
bugs1273282
milestone49.0a1
Bug 1273282. Fix the handling of numeric entities in xml-stylesheet href values. r=peterv
dom/base/nsContentUtils.cpp
parser/expat/lib/moz_extensions.c
parser/htmlparser/nsParserService.h
testing/web-platform/meta/MANIFEST.json
testing/web-platform/meta/html/infrastructure/urls/resolving-urls/query-encoding/utf-16be.html.ini
testing/web-platform/meta/html/infrastructure/urls/resolving-urls/query-encoding/utf-16le.html.ini
testing/web-platform/meta/html/infrastructure/urls/resolving-urls/query-encoding/utf-8.html.ini
testing/web-platform/meta/html/infrastructure/urls/resolving-urls/query-encoding/windows-1251.html.ini
testing/web-platform/tests/dom/nodes/ProcessingInstruction-escapes-1.xhtml
--- a/dom/base/nsContentUtils.cpp
+++ b/dom/base/nsContentUtils.cpp
@@ -1209,26 +1209,23 @@ nsContentUtils::GetPseudoAttributeValue(
           aValue.Truncate();
 
           return false;
         }
 
         if (*chunkEnd == kAmpersand) {
           aValue.Append(start, chunkEnd - start);
 
-          // Point to first character after the ampersand.
-          ++chunkEnd;
-
           const char16_t *afterEntity = nullptr;
           char16_t result[2];
           uint32_t count =
             MOZ_XMLTranslateEntity(reinterpret_cast<const char*>(chunkEnd),
-                                  reinterpret_cast<const char*>(iter),
-                                  reinterpret_cast<const char**>(&afterEntity),
-                                  result);
+                                   reinterpret_cast<const char*>(iter),
+                                   reinterpret_cast<const char**>(&afterEntity),
+                                   result);
           if (count == 0) {
             aValue.Truncate();
 
             return false;
           }
 
           aValue.Append(result, count);
 
--- a/parser/expat/lib/moz_extensions.c
+++ b/parser/expat/lib/moz_extensions.c
@@ -111,38 +111,46 @@ int MOZ_XMLIsNCNameChar(const char* ptr)
   default:
     return 0;
   }
 }
 
 int MOZ_XMLTranslateEntity(const char* ptr, const char* end, const char** next,
                            XML_Char* result)
 {
+  // Can we assert here somehow?
+  // MOZ_ASSERT(*ptr == '&');
+
   const ENCODING* enc = XmlGetUtf16InternalEncodingNS();
-  int tok = PREFIX(scanRef)(enc, ptr, end, next);
+  /* scanRef expects to be pointed to the char after the '&'. */
+  int tok = PREFIX(scanRef)(enc, ptr + enc->minBytesPerChar, end, next);
   if (tok <= XML_TOK_INVALID) {
     return 0;
   }
 
   if (tok == XML_TOK_CHAR_REF) {
+    /* XmlCharRefNumber expects to be pointed to the '&'. */
     int n = XmlCharRefNumber(enc, ptr);
 
     /* We could get away with just < 0, but better safe than sorry. */
     if (n <= 0) {
       return 0;
     }
 
     return XmlUtf16Encode(n, (unsigned short*)result);
   }
 
   if (tok == XML_TOK_ENTITY_REF) {
-    /* *next points to after the semicolon, so the entity ends at
+    /* XmlPredefinedEntityName expects to be pointed to the char after '&'.
+
+       *next points to after the semicolon, so the entity ends at
        *next - enc->minBytesPerChar. */
     XML_Char ch =
-      (XML_Char)XmlPredefinedEntityName(enc, ptr, *next - enc->minBytesPerChar);
+      (XML_Char)XmlPredefinedEntityName(enc, ptr + enc->minBytesPerChar,
+                                        *next - enc->minBytesPerChar);
     if (!ch) {
       return 0;
     }
 
     *result = ch;
     return 1;
   }
 
--- a/parser/htmlparser/nsParserService.h
+++ b/parser/htmlparser/nsParserService.h
@@ -5,16 +5,32 @@
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 #ifndef NS_PARSERSERVICE_H__
 #define NS_PARSERSERVICE_H__
 
 #include "nsIParserService.h"
 
 extern "C" int MOZ_XMLIsLetter(const char* ptr);
 extern "C" int MOZ_XMLIsNCNameChar(const char* ptr);
+/**
+ * Decodes an entity into the UTF-16 encoding of a Unicode character. If a ';'
+ * is found between `ptr` and `end` it will try to decode the entity and set
+ * `*next` to point to the character after the ;. The resulting UTF-16 code
+ * units will be written in `*result`, so if the entity is a valid numeric
+ * entity there needs to be space for at least two char16_t at the location
+ * `result` points to.
+ *
+ * @param ptr pointer to the ampersand.
+ * @param end pointer to the position after the last character of the
+ *            string.
+ * @param next [out] will be set to the character after the ';' or null if
+ *                   the decoding was unsuccessful.
+ * @param result the buffer to write the resulting UTF-16 character in.
+ * @return the number of char16_t written to `*result`.
+ */
 extern "C" int MOZ_XMLTranslateEntity(const char* ptr, const char* end,
                                       const char** next, char16_t* result);
 
 class nsParserService : public nsIParserService {
   virtual ~nsParserService();
 
 public:
   nsParserService();
--- a/testing/web-platform/meta/MANIFEST.json
+++ b/testing/web-platform/meta/MANIFEST.json
@@ -35986,16 +35986,22 @@
           }
         ],
         "dom/events/EventListener-incumbent-global-2.sub.html": [
           {
             "path": "dom/events/EventListener-incumbent-global-2.sub.html",
             "url": "/dom/events/EventListener-incumbent-global-2.sub.html"
           }
         ],
+        "dom/nodes/ProcessingInstruction-escapes-1.xhtml": [
+          {
+            "path": "dom/nodes/ProcessingInstruction-escapes-1.xhtml",
+            "url": "/dom/nodes/ProcessingInstruction-escapes-1.xhtml"
+          }
+        ],
         "html/browsers/browsing-the-web/navigating-across-documents/javascript-url-return-value-handling.html": [
           {
             "path": "html/browsers/browsing-the-web/navigating-across-documents/javascript-url-return-value-handling.html",
             "url": "/html/browsers/browsing-the-web/navigating-across-documents/javascript-url-return-value-handling.html"
           }
         ],
         "html/semantics/embedded-content/the-iframe-element/iframe-allowfullscreen.html": [
           {
--- a/testing/web-platform/meta/html/infrastructure/urls/resolving-urls/query-encoding/utf-16be.html.ini
+++ b/testing/web-platform/meta/html/infrastructure/urls/resolving-urls/query-encoding/utf-16be.html.ini
@@ -116,11 +116,8 @@
     expected: FAIL
 
   [importScripts() in a shared worker]
     expected: FAIL
 
   [SharedWorker() in a shared worker]
     expected: FAIL
 
-  [<?xml-stylesheet?> (CSS)]
-    expected: FAIL
-
--- a/testing/web-platform/meta/html/infrastructure/urls/resolving-urls/query-encoding/utf-16le.html.ini
+++ b/testing/web-platform/meta/html/infrastructure/urls/resolving-urls/query-encoding/utf-16le.html.ini
@@ -116,11 +116,8 @@
     expected: FAIL
 
   [importScripts() in a shared worker]
     expected: FAIL
 
   [SharedWorker() in a shared worker]
     expected: FAIL
 
-  [<?xml-stylesheet?> (CSS)]
-    expected: FAIL
-
--- a/testing/web-platform/meta/html/infrastructure/urls/resolving-urls/query-encoding/utf-8.html.ini
+++ b/testing/web-platform/meta/html/infrastructure/urls/resolving-urls/query-encoding/utf-8.html.ini
@@ -116,11 +116,8 @@
     expected: FAIL
 
   [importScripts() in a shared worker]
     expected: FAIL
 
   [SharedWorker() in a shared worker]
     expected: FAIL
 
-  [<?xml-stylesheet?> (CSS)]
-    expected: FAIL
-
--- a/testing/web-platform/meta/html/infrastructure/urls/resolving-urls/query-encoding/windows-1251.html.ini
+++ b/testing/web-platform/meta/html/infrastructure/urls/resolving-urls/query-encoding/windows-1251.html.ini
@@ -346,19 +346,16 @@
     expected: FAIL
 
   [importScripts() in a shared worker]
     expected: FAIL
 
   [SharedWorker() in a shared worker]
     expected: FAIL
 
-  [<?xml-stylesheet?> (CSS)]
-    expected: FAIL
-
   [Scheme ftp (getting <a>.href)]
     expected: FAIL
 
   [Scheme file (getting <a>.href)]
     expected: FAIL
 
   [Scheme gopher (getting <a>.href)]
     expected: FAIL
new file mode 100644
--- /dev/null
+++ b/testing/web-platform/tests/dom/nodes/ProcessingInstruction-escapes-1.xhtml
@@ -0,0 +1,33 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-stylesheet href="support/style.css" type="text/css"?>
+<?xml-stylesheet href="data:text/css,&#x41;&amp;&apos;" type="text/css"?>
+<?xml-stylesheet href="data:text/css,&#65;&amp;&apos;" type="text/css"?>
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head>
+<title>ProcessingInstruction numeric escapes</title>
+<link rel="help" href="https://dom.spec.whatwg.org/#dom-processinginstruction-target"/>
+<link rel="help" href="https://dom.spec.whatwg.org/#dom-characterdata-data"/>
+<script src="/resources/testharness.js"></script>
+<script src="/resources/testharnessreport.js"></script>
+</head>
+<body>
+<div id="log"/>
+<script>
+<![CDATA[
+test(function() {
+  var pienc = document.firstChild.nextSibling;
+  assert_true(pienc instanceof ProcessingInstruction)
+  assert_equals(pienc.target, "xml-stylesheet")
+  assert_equals(pienc.data, 'href="data:text/css,&#x41;&amp;&apos;" type="text/css"')
+  assert_equals(pienc.sheet.href, "data:text/css,A&'");
+
+  pienc = pienc.nextSibling;
+  assert_true(pienc instanceof ProcessingInstruction)
+  assert_equals(pienc.target, "xml-stylesheet")
+  assert_equals(pienc.data, 'href="data:text/css,&#65;&amp;&apos;" type="text/css"')
+  assert_equals(pienc.sheet.href, "data:text/css,A&'");
+})
+]]>
+</script>
+</body>
+</html>