Bug 1150695 - Use isProbablyReaderable function from Readability.js. r=Gijs, a=sledru
authorMargaret Leibovic <margaret.leibovic@gmail.com>
Fri, 03 Apr 2015 16:25:22 -0400
changeset 258434 ab0337907115
parent 258433 c4a01c159cb6
child 258435 1b6ba1cb52f6
push id4668
push userryanvm@gmail.com
push date2015-04-13 16:23 +0000
treeherdermozilla-beta@002faed66e96 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersGijs, sledru
bugs1150695
milestone38.0
Bug 1150695 - Use isProbablyReaderable function from Readability.js. r=Gijs, a=sledru
toolkit/components/reader/ReaderMode.jsm
--- a/toolkit/components/reader/ReaderMode.jsm
+++ b/toolkit/components/reader/ReaderMode.jsm
@@ -13,16 +13,22 @@ Cu.import("resource://gre/modules/XPCOMU
 
 Cu.importGlobalProperties(["XMLHttpRequest"]);
 
 XPCOMUtils.defineLazyModuleGetter(this, "CommonUtils", "resource://services-common/utils.js");
 XPCOMUtils.defineLazyModuleGetter(this, "OS", "resource://gre/modules/osfile.jsm");
 XPCOMUtils.defineLazyModuleGetter(this, "ReaderWorker", "resource://gre/modules/reader/ReaderWorker.jsm");
 XPCOMUtils.defineLazyModuleGetter(this, "Task", "resource://gre/modules/Task.jsm");
 
+XPCOMUtils.defineLazyGetter(this, "Readability", function() {
+  let scope = {};
+  Services.scriptloader.loadSubScript("resource://gre/modules/reader/Readability.js", scope);
+  return scope["Readability"];
+});
+
 this.ReaderMode = {
   // Version of the cache schema.
   CACHE_VERSION: 1,
 
   DEBUG: 0,
 
   // Don't try to parse the page if it has too many elements (for memory and
   // performance reasons)
@@ -69,46 +75,17 @@ this.ReaderMode = {
    */
   isProbablyReaderable: function(doc) {
     let uri = Services.io.newURI(doc.location.href, null, null);
 
     if (!this._shouldCheckUri(uri)) {
       return false;
     }
 
-    let REGEXPS = {
-      unlikelyCandidates: /combx|comment|community|disqus|extra|foot|header|menu|remark|rss|shoutbox|sidebar|sponsor|ad-break|agegate|pagination|pager|popup|tweet|twitter/i,
-      okMaybeItsACandidate: /and|article|body|column|main|shadow/i,
-    };
-
-    let nodes = doc.getElementsByTagName("p");
-    if (nodes.length < 5) {
-      return false;
-    }
-
-    let possibleParagraphs = 0;
-    for (let i = 0; i < nodes.length; i++) {
-      let node = nodes[i];
-      let matchString = node.className + " " + node.id;
-
-      if (REGEXPS.unlikelyCandidates.test(matchString) &&
-          !REGEXPS.okMaybeItsACandidate.test(matchString)) {
-        continue;
-      }
-
-      if (node.textContent.trim().length < 200) {
-        continue;
-      }
-
-      possibleParagraphs++;
-      if (possibleParagraphs >= 5) {
-        return true;
-      }
-    }
-    return false;
+    return new Readability(uri, doc).isProbablyReaderable();
   },
 
   /**
    * Gets an article from a loaded browser's document. This method will not attempt
    * to parse certain URIs (e.g. about: URIs).
    *
    * @param doc A document to parse.
    * @return {Promise}