No bug, update Readability to github tip, rs=me, a=RyanVM-for-aurora-orange-fix, a=sylvestre from bug 1150695
authorMargaret Leibovic <margaret.leibovic@gmail.com>
Fri, 03 Apr 2015 11:08:25 -0400
changeset 254977 c77a62b27a08
parent 254976 8a12cc844b1c
child 254978 c4248bd6b7c3
push id7913
push usergijskruitbosch@gmail.com
push date2015-04-13 16:49 +0000
treeherdermozilla-aurora@c77a62b27a08 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersme, RyanVM-for-aurora-orange-fix, sylvestre
bugs1150695
milestone39.0a2
No bug, update Readability to github tip, rs=me, a=RyanVM-for-aurora-orange-fix, a=sylvestre from bug 1150695
toolkit/components/reader/Readability.js
--- a/toolkit/components/reader/Readability.js
+++ b/toolkit/components/reader/Readability.js
@@ -1630,16 +1630,49 @@ Readability.prototype = {
     this._flags = this._flags | flag;
   },
 
   _removeFlag: function(flag) {
     this._flags = this._flags & ~flag;
   },
 
   /**
+   * Decides whether or not the document is reader-able without parsing the whole thing.
+   *
+   * @return boolean Whether or not we suspect parse() will suceeed at returning an article object.
+   */
+  isProbablyReaderable: function() {
+    var nodes = this._doc.getElementsByTagName("p");
+    if (nodes.length < 5) {
+      return false;
+    }
+
+    var possibleParagraphs = 0;
+    for (var i = 0; i < nodes.length; i++) {
+      var node = nodes[i];
+      var matchString = node.className + " " + node.id;
+
+      if (this.REGEXPS.unlikelyCandidates.test(matchString) &&
+          !this.REGEXPS.okMaybeItsACandidate.test(matchString)) {
+        continue;
+      }
+
+      if (node.textContent.trim().length < 100) {
+        continue;
+      }
+
+      possibleParagraphs++;
+      if (possibleParagraphs >= 5) {
+        return true;
+      }
+    }
+    return false;
+  },
+
+  /**
    * Runs readability.
    *
    * Workflow:
    *  1. Prep the document by removing script tags, css, etc.
    *  2. Build readability's DOM tree.
    *  3. Grab the article content from the current dom tree.
    *  4. Replace the current DOM tree with the new one.
    *  5. Read peacefully.