Backed out changeset 609e4952a46e (bug 1444082) for perma-failing android-4.3-arm7-api-16/opt-robocop-3 (RC3) CLOSED TREE
authorBogdan Tara <btara@mozilla.com>
Sat, 10 Mar 2018 01:14:09 +0200
changeset 462510 dd45b23b67c8eabffaeb17111d23e1d887e57173
parent 462509 010b589f89a0ded63676176e08255a927c429f94
child 462511 9a200b344efec72f896bf4ebda15a53b357da00a
push id1683
push usersfraser@mozilla.com
push dateThu, 26 Apr 2018 16:43:40 +0000
treeherdermozilla-release@5af6cb21869d [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
bugs1444082
milestone60.0a1
backs out609e4952a46e77839c34afac884876154bd24482
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Backed out changeset 609e4952a46e (bug 1444082) for perma-failing android-4.3-arm7-api-16/opt-robocop-3 (RC3) CLOSED TREE
toolkit/components/reader/JSDOMParser.js
toolkit/components/reader/Readability.js
toolkit/components/reader/ReaderWorker.js
toolkit/components/reader/test/readerModeNonArticle.html
--- a/toolkit/components/reader/JSDOMParser.js
+++ b/toolkit/components/reader/JSDOMParser.js
@@ -555,18 +555,17 @@
       delete this._textContent;
     },
     set textContent(newText) {
       this._textContent = newText;
       delete this._innerHTML;
     },
   };
 
-  var Document = function (url) {
-    this.documentURI = url;
+  var Document = function () {
     this.styleSheets = [];
     this.childNodes = [];
     this.children = [];
   };
 
   Document.prototype = {
     __proto__: Node.prototype,
 
@@ -596,30 +595,16 @@
       return node;
     },
 
     createTextNode: function (text) {
       var node = new Text();
       node.textContent = text;
       return node;
     },
-
-    get baseURI() {
-      if (!this.hasOwnProperty("_baseURI")) {
-        this._baseURI = this.documentURI;
-        var baseElements = this.getElementsByTagName("base");
-        var href = baseElements[0] && baseElements[0].getAttribute("href");
-        if (href) {
-          try {
-            this._baseURI = (new URL(href, this._baseURI)).href;
-          } catch (ex) {/* Just fall back to documentURI */}
-        }
-      }
-      return this._baseURI;
-    },
   };
 
   var Element = function (tag) {
     this.attributes = [];
     this.childNodes = [];
     this.children = [];
     this.nextElementSibling = this.previousElementSibling = null;
     this.localName = tag.toLowerCase();
@@ -1128,19 +1113,19 @@
       }
 
       return node;
     },
 
     /**
      * Parses an HTML string and returns a JS implementation of the Document.
      */
-    parse: function (html, url) {
+    parse: function (html) {
       this.html = html;
-      var doc = this.doc = new Document(url);
+      var doc = this.doc = new Document();
       this.readChildren(doc);
 
       // If this is an HTML document, remove root-level children except for the
       // <html> node
       if (doc.documentElement) {
         for (var i = doc.childNodes.length; --i >= 0;) {
           var child = doc.childNodes[i];
           if (child !== doc.documentElement) {
--- a/toolkit/components/reader/Readability.js
+++ b/toolkit/components/reader/Readability.js
@@ -36,17 +36,16 @@
 function Readability(uri, doc, options) {
   options = options || {};
 
   this._uri = uri;
   this._doc = doc;
   this._articleTitle = null;
   this._articleByline = null;
   this._articleDir = null;
-  this._attempts = [];
 
   // Configurable options
   this._debug = !!options.debug;
   this._maxElemsToParse = options.maxElemsToParse || this.DEFAULT_MAX_ELEMS_TO_PARSE;
   this._nbTopCandidates = options.nbTopCandidates || this.DEFAULT_N_TOP_CANDIDATES;
   this._wordThreshold = options.wordThreshold || this.DEFAULT_WORD_THRESHOLD;
   this._classesToPreserve = this.CLASSES_TO_PRESERVE.concat(options.classesToPreserve || []);
 
@@ -271,30 +270,44 @@ Readability.prototype = {
   /**
    * Converts each <a> and <img> uri in the given element to an absolute URI,
    * ignoring #ref URIs.
    *
    * @param Element
    * @return void
    */
   _fixRelativeUris: function(articleContent) {
-    var baseURI = this._doc.baseURI;
-    var documentURI = this._doc.documentURI;
+    var scheme = this._uri.scheme;
+    var prePath = this._uri.prePath;
+    var pathBase = this._uri.pathBase;
+
     function toAbsoluteURI(uri) {
-      // Leave hash links alone if the base URI matches the document URI:
-      if (baseURI == documentURI && uri.charAt(0) == "#") {
+      // If this is already an absolute URI, return it.
+      if (/^[a-zA-Z][a-zA-Z0-9\+\-\.]*:/.test(uri))
         return uri;
-      }
-      // Otherwise, resolve against base URI:
-      try {
-        return new URL(uri, baseURI).href;
-      } catch (ex) {
-        // Something went wrong, just return the original:
-      }
-      return uri;
+
+      // Scheme-rooted relative URI.
+      if (uri.substr(0, 2) == "//")
+        return scheme + "://" + uri.substr(2);
+
+      // Prepath-rooted relative URI.
+      if (uri[0] == "/")
+        return prePath + uri;
+
+      // Dotslash relative URI.
+      if (uri.indexOf("./") === 0)
+        return pathBase + uri.slice(2);
+
+      // Ignore hash URIs:
+      if (uri[0] == "#")
+        return uri;
+
+      // Standard relative URI; add entire path. pathBase already includes a
+      // trailing "/".
+      return pathBase + uri;
     }
 
     var links = articleContent.getElementsByTagName("a");
     this._forEachNode(links, function(link) {
       var href = link.getAttribute("href");
       if (href) {
         // Replace links with javascript: URIs with text content, since
         // they won't work after scripts have been removed from the page.
@@ -517,17 +530,16 @@ Readability.prototype = {
 
     // Clean out junk from the article content
     this._cleanConditionally(articleContent, "form");
     this._cleanConditionally(articleContent, "fieldset");
     this._clean(articleContent, "object");
     this._clean(articleContent, "embed");
     this._clean(articleContent, "h1");
     this._clean(articleContent, "footer");
-    this._clean(articleContent, "link");
 
     // Clean out elements have "share" in their id/class combinations from final top candidates,
     // which means we don't remove the top candidates even they have "share".
     this._forEachNode(articleContent.children, function(topCandidate) {
       this._cleanMatchedNodes(topCandidate, /share/);
     });
 
     // If there is only one h2 and its text content substantially equals article title,
@@ -1072,55 +1084,34 @@ Readability.prototype = {
           div.appendChild(children[0]);
         }
         articleContent.appendChild(div);
       }
 
       if (this._debug)
         this.log("Article content after paging: " + articleContent.innerHTML);
 
-      var parseSuccessful = true;
-
       // Now that we've gone through the full algorithm, check to see if
       // we got any meaningful content. If we didn't, we may need to re-run
       // grabArticle with different flags set. This gives us a higher likelihood of
       // finding the content, and the sieve approach gives us a higher likelihood of
       // finding the -right- content.
-      var textLength = this._getInnerText(articleContent, true).length;
-      if (textLength < this._wordThreshold) {
-        parseSuccessful = false;
+      if (this._getInnerText(articleContent, true).length < this._wordThreshold) {
         page.innerHTML = pageCacheHtml;
 
         if (this._flagIsActive(this.FLAG_STRIP_UNLIKELYS)) {
           this._removeFlag(this.FLAG_STRIP_UNLIKELYS);
-          this._attempts.push({articleContent: articleContent, textLength: textLength});
         } else if (this._flagIsActive(this.FLAG_WEIGHT_CLASSES)) {
           this._removeFlag(this.FLAG_WEIGHT_CLASSES);
-          this._attempts.push({articleContent: articleContent, textLength: textLength});
         } else if (this._flagIsActive(this.FLAG_CLEAN_CONDITIONALLY)) {
           this._removeFlag(this.FLAG_CLEAN_CONDITIONALLY);
-          this._attempts.push({articleContent: articleContent, textLength: textLength});
         } else {
-          this._attempts.push({articleContent: articleContent, textLength: textLength});
-          // No luck after removing flags, just return the longest text we found during the different loops
-          this._attempts.sort(function (a, b) {
-            return a.textLength < b.textLength;
-          });
-
-          // But first check if we actually have something
-          if (!this._attempts[0].textLength) {
-            return null;
-          }
-
-          articleContent = this._attempts[0].articleContent;
-          parseSuccessful = true;
+          return null;
         }
-      }
-
-      if (parseSuccessful) {
+      } else {
         // Find out text direction from ancestors of final top candidate.
         var ancestors = [parentOfTopCandidate, topCandidate].concat(this._getNodeAncestors(parentOfTopCandidate));
         this._someNode(ancestors, function(ancestor) {
           if (!ancestor.tagName)
             return false;
           var articleDir = ancestor.getAttribute("dir");
           if (articleDir) {
             this._articleDir = articleDir;
--- a/toolkit/components/reader/ReaderWorker.js
+++ b/toolkit/components/reader/ReaderWorker.js
@@ -42,12 +42,12 @@ var Agent = {
    *
    * @param {object} uri URI data for the document.
    * @param {string} serializedDoc The serialized document.
    * @param {object} options Options object to pass to Readability.
    *
    * @return {object} Article object returned from Readability.
    */
   parseDocument(uri, serializedDoc, options) {
-    let doc = new JSDOMParser().parse(serializedDoc, uri.spec);
+    let doc = new JSDOMParser().parse(serializedDoc);
     return new Readability(uri, doc, options).parse();
   },
 };
--- a/toolkit/components/reader/test/readerModeNonArticle.html
+++ b/toolkit/components/reader/test/readerModeNonArticle.html
@@ -1,9 +1,14 @@
 <!DOCTYPE html>
 <html>
 <head>
 <title>Non article title</title>
 <meta name="description" content="This is the non-article description." />
 </head>
 <body>
+<header>Site header</header>
+<div>
+<h1>Non article title</h1>
+<p>Woot!</p>
+</div>
 </body>
 </html>