Bug 1143844 - Check document for readerable content to determine whether or not to show reader button. r=Gijs
authorMargaret Leibovic <margaret.leibovic@gmail.com>
Wed, 18 Mar 2015 13:42:52 -0700
changeset 251840 6b02cd43cc209464a518befbc2c05ab58f34d3b9
parent 251839 d86b13be5d27b0a43d2d4116e63d38f3f0df9801
child 251841 911e01832a4aec58d23b255d99217a64a07dcb23
push id1156
push userpbrosset@mozilla.com
push dateFri, 20 Mar 2015 16:00:24 +0000
reviewersGijs
bugs1143844
milestone39.0a1
Bug 1143844 - Check document for readerable content to determine whether or not to show reader button. r=Gijs
toolkit/components/reader/ReaderMode.jsm
--- a/toolkit/components/reader/ReaderMode.jsm
+++ b/toolkit/components/reader/ReaderMode.jsm
@@ -58,24 +58,57 @@ this.ReaderMode = {
           this.isEnabledForParseOnLoad = this._getStateForParseOnLoad();
         }
         break;
     }
   },
 
   /**
    * Decides whether or not a document is reader-able without parsing the whole thing.
-   * XXX: In the future, this should be smarter (bug 1143844).
    *
    * @param doc A document to parse.
    * @return boolean Whether or not we should show the reader mode button.
    */
   isProbablyReaderable: function(doc) {
     let uri = Services.io.newURI(doc.documentURI, null, null);
-    return this._shouldCheckUri(uri);
+
+    if (!this._shouldCheckUri(uri)) {
+      return false;
+    }
+
+    let REGEXPS = {
+      unlikelyCandidates: /combx|comment|community|disqus|extra|foot|header|menu|remark|rss|shoutbox|sidebar|sponsor|ad-break|agegate|pagination|pager|popup|tweet|twitter/i,
+      okMaybeItsACandidate: /and|article|body|column|main|shadow/i,
+    };
+
+    let nodes = doc.getElementsByTagName("p");
+    if (nodes.length < 5) {
+      return false;
+    }
+
+    let possibleParagraphs = 0;
+    for (let i = 0; i < nodes.length; i++) {
+      let node = nodes[i];
+      let matchString = node.className + " " + node.id;
+
+      if (REGEXPS.unlikelyCandidates.test(matchString) &&
+          !REGEXPS.okMaybeItsACandidate.test(matchString)) {
+        continue;
+      }
+
+      if (node.textContent.trim().length < 200) {
+        continue;
+      }
+
+      possibleParagraphs++;
+      if (possibleParagraphs >= 5) {
+        return true;
+      }
+    }
+    return false;
   },
 
   /**
    * Gets an article from a loaded browser's document. This method will not attempt
    * to parse certain URIs (e.g. about: URIs).
    *
    * @param doc A document to parse.
    * @return {Promise}