Bug 1522783 - fix parsing documents with meta tags without content attributes in reader mode by updating to github c0c097c930c8e17969a9ecc143792daf799b215d, r=jaws
authorGijs Kruitbosch <gijskruitbosch@gmail.com>
Tue, 29 Jan 2019 18:30:06 +0000
changeset 455961 efb5e5425e2bc9baebb2a86730adbccf68deaa87
parent 455960 e12caed89db3e037deb713d8a22a5c9cfa542b1e
child 455962 4440fbf71c72e13cfcb6257bbae6024052ffd46d
child 455993 370ea4403e557215110d57f9c53801f753a0c5a6
push id35463
push usershindli@mozilla.com
push dateTue, 29 Jan 2019 21:38:17 +0000
treeherdermozilla-central@4440fbf71c72 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersjaws
bugs1522783
milestone67.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1522783 - fix parsing documents with meta tags without content attributes in reader mode by updating to github c0c097c930c8e17969a9ecc143792daf799b215d, r=jaws
toolkit/components/reader/Readability.js
--- a/toolkit/components/reader/Readability.js
+++ b/toolkit/components/reader/Readability.js
@@ -540,17 +540,26 @@ Readability.prototype = {
     while (node.firstChild) {
       replacement.appendChild(node.firstChild);
     }
     node.parentNode.replaceChild(replacement, node);
     if (node.readability)
       replacement.readability = node.readability;
 
     for (var i = 0; i < node.attributes.length; i++) {
-      replacement.setAttribute(node.attributes[i].name, node.attributes[i].value);
+      try {
+        replacement.setAttribute(node.attributes[i].name, node.attributes[i].value);
+      } catch (ex) {
+        /* it's possible for setAttribute() to throw if the attribute name
+         * isn't a valid XML Name. Such attributes can however be parsed from
+         * source in HTML docs, see https://github.com/whatwg/html/issues/4275,
+         * so we can hit them here and then throw. We don't care about such
+         * attributes so we ignore them.
+         */
+      }
     }
     return replacement;
   },
 
   /**
    * Prepare the article node for display. Clean out any inline styles,
    * iframes, forms, strip extraneous <p> tags, etc.
    *
@@ -1215,16 +1224,19 @@ Readability.prototype = {
     // name is a single value
     var namePattern = /^\s*(?:(dc|dcterm|og|twitter|weibo:(article|webpage))\s*[\.:]\s*)?(author|creator|description|title|site_name)\s*$/i;
 
     // Find description tags.
     this._forEachNode(metaElements, function(element) {
       var elementName = element.getAttribute("name");
       var elementProperty = element.getAttribute("property");
       var content = element.getAttribute("content");
+      if (!content) {
+        return;
+      }
       var matches = null;
       var name = null;
 
       if (elementProperty) {
         matches = elementProperty.match(propertyPattern);
         if (matches) {
           for (var i = matches.length - 1; i >= 0; i--) {
             // Convert to lowercase, and remove any whitespace