Bug 1150695 - Use isProbablyReaderable function from Readability.js. r=Gijs
authorMargaret Leibovic <margaret.leibovic@gmail.com>
Fri, 03 Apr 2015 16:25:22 -0400
changeset 238129 89e45c21e6314fde45ad73da2104cf70903c9155
parent 238128 af2151f856fbfe2391827c9d019dd661401bc4a5
child 238130 86d25f5945367e970ae1fdb9c1f6fc9ed634747b
push id28556
push userryanvm@gmail.com
push dateWed, 08 Apr 2015 16:40:55 +0000
treeherdermozilla-central@2c92a7df87c9 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersGijs
bugs1150695
milestone40.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1150695 - Use isProbablyReaderable function from Readability.js. r=Gijs
toolkit/components/reader/ReaderMode.jsm
--- a/toolkit/components/reader/ReaderMode.jsm
+++ b/toolkit/components/reader/ReaderMode.jsm
@@ -13,16 +13,22 @@ Cu.import("resource://gre/modules/XPCOMU
 
 Cu.importGlobalProperties(["XMLHttpRequest"]);
 
 XPCOMUtils.defineLazyModuleGetter(this, "CommonUtils", "resource://services-common/utils.js");
 XPCOMUtils.defineLazyModuleGetter(this, "OS", "resource://gre/modules/osfile.jsm");
 XPCOMUtils.defineLazyModuleGetter(this, "ReaderWorker", "resource://gre/modules/reader/ReaderWorker.jsm");
 XPCOMUtils.defineLazyModuleGetter(this, "Task", "resource://gre/modules/Task.jsm");
 
+XPCOMUtils.defineLazyGetter(this, "Readability", function() {
+  let scope = {};
+  Services.scriptloader.loadSubScript("resource://gre/modules/reader/Readability.js", scope);
+  return scope["Readability"];
+});
+
 this.ReaderMode = {
   // Version of the cache schema.
   CACHE_VERSION: 1,
 
   DEBUG: 0,
 
   // Don't try to parse the page if it has too many elements (for memory and
   // performance reasons)
@@ -73,46 +79,17 @@ this.ReaderMode = {
       return false;
     }
 
     let uri = Services.io.newURI(doc.location.href, null, null);
     if (!this._shouldCheckUri(uri)) {
       return false;
     }
 
-    let REGEXPS = {
-      unlikelyCandidates: /combx|comment|community|disqus|extra|foot|header|menu|remark|rss|shoutbox|sidebar|sponsor|ad-break|agegate|pagination|pager|popup|tweet|twitter/i,
-      okMaybeItsACandidate: /and|article|body|column|main|shadow/i,
-    };
-
-    let nodes = doc.getElementsByTagName("p");
-    if (nodes.length < 5) {
-      return false;
-    }
-
-    let possibleParagraphs = 0;
-    for (let i = 0; i < nodes.length; i++) {
-      let node = nodes[i];
-      let matchString = node.className + " " + node.id;
-
-      if (REGEXPS.unlikelyCandidates.test(matchString) &&
-          !REGEXPS.okMaybeItsACandidate.test(matchString)) {
-        continue;
-      }
-
-      if (node.textContent.trim().length < 100) {
-        continue;
-      }
-
-      possibleParagraphs++;
-      if (possibleParagraphs >= 5) {
-        return true;
-      }
-    }
-    return false;
+    return new Readability(uri, doc).isProbablyReaderable();
   },
 
   /**
    * Gets an article from a loaded browser's document. This method will not attempt
    * to parse certain URIs (e.g. about: URIs).
    *
    * @param doc A document to parse.
    * @return {Promise}