Bug 1143844 - Check document for readerable content to determine whether or not to show reader button. r=Gijs
☠☠ backed out by 80ade5ec1d31 ☠ ☠
authorMargaret Leibovic <margaret.leibovic@gmail.com>
Wed, 18 Mar 2015 13:42:52 -0700
changeset 234424 1dbf52164ffeeba315c79d4073441d3022a5e31f
parent 234423 2a397413f85f2031029925af9122a30f5427665d
child 234425 a244a42cfa0c0d434a59f954deb83c486dc02b07
push id28445
push userkwierso@gmail.com
push dateFri, 20 Mar 2015 02:10:04 +0000
treeherdermozilla-central@988afda9eac4 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersGijs
bugs1143844
milestone39.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1143844 - Check document for readerable content to determine whether or not to show reader button. r=Gijs
toolkit/components/reader/ReaderMode.jsm
--- a/toolkit/components/reader/ReaderMode.jsm
+++ b/toolkit/components/reader/ReaderMode.jsm
@@ -58,24 +58,57 @@ this.ReaderMode = {
           this.isEnabledForParseOnLoad = this._getStateForParseOnLoad();
         }
         break;
     }
   },
 
   /**
    * Decides whether or not a document is reader-able without parsing the whole thing.
-   * XXX: In the future, this should be smarter (bug 1143844).
    *
    * @param doc A document to parse.
    * @return boolean Whether or not we should show the reader mode button.
    */
   isProbablyReaderable: function(doc) {
     let uri = Services.io.newURI(doc.documentURI, null, null);
-    return this._shouldCheckUri(uri);
+
+    if (!this._shouldCheckUri(uri)) {
+      return false;
+    }
+
+    let REGEXPS = {
+      unlikelyCandidates: /combx|comment|community|disqus|extra|foot|header|menu|remark|rss|shoutbox|sidebar|sponsor|ad-break|agegate|pagination|pager|popup|tweet|twitter/i,
+      okMaybeItsACandidate: /and|article|body|column|main|shadow/i,
+    };
+
+    let nodes = doc.getElementsByTagName("p");
+    if (nodes.length < 5) {
+      return false;
+    }
+
+    let possibleParagraphs = 0;
+    for (let i = 0; i < nodes.length; i++) {
+      let node = nodes[i];
+      let matchString = node.className + " " + node.id;
+
+      if (REGEXPS.unlikelyCandidates.test(matchString) &&
+          !REGEXPS.okMaybeItsACandidate.test(matchString)) {
+        continue;
+      }
+
+      if (node.textContent.trim().length < 200) {
+        continue;
+      }
+
+      possibleParagraphs++;
+      if (possibleParagraphs >= 5) {
+        return true;
+      }
+    }
+    return false;
   },
 
   /**
    * Gets an article from a loaded browser's document. This method will not attempt
    * to parse certain URIs (e.g. about: URIs).
    *
    * @param doc A document to parse.
    * @return {Promise}