Bug 1107588 - Use an xhr to download reader mode content instead of creating new browser elements. r=mfinkle
authorMargaret Leibovic <margaret.leibovic@gmail.com>
Tue, 30 Dec 2014 14:56:09 -0500
changeset 221849 8a9910d1105bbc3f11b0f66c1996f06db0356ff5
parent 221848 5e34d8220c131d83090643565a54c733ebe50a02
child 221850 9198ef9c7df6797d1c393227d0c219ccacb36260
push id28048
push userphilringnalda@gmail.com
push dateSun, 04 Jan 2015 01:24:06 +0000
treeherdermozilla-central@b478b0c48bcc [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersmfinkle
bugs1107588
milestone37.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1107588 - Use an xhr to download reader mode content instead of creating new browser elements. r=mfinkle
mobile/android/base/tests/testReadingListCache.js
mobile/android/chrome/content/Reader.js
toolkit/components/reader/ReaderMode.jsm
--- a/mobile/android/base/tests/testReadingListCache.js
+++ b/mobile/android/base/tests/testReadingListCache.js
@@ -64,17 +64,17 @@ add_task(function* test_remove_article()
   let uri = Services.io.newURI(TEST_PAGES[0].url, null, null);
   yield ReaderMode.removeArticleFromCache(uri);
   let article = yield ReaderMode.getArticleFromCache(uri);
   do_check_eq(article, null);
 });
 
 add_task(function* test_parse_articles() {
   for (let testcase of TEST_PAGES) {
-    let article = yield Reader._downloadAndParseDocument(testcase.url);
+    let article = yield ReaderMode.downloadAndParseDocument(testcase.url);
     checkArticle(article, testcase);
   }
 });
 
 add_task(function* test_migrate_cache() {
   // Store an article in the old indexedDB reader mode cache.
   let cacheDB = yield new Promise((resolve, reject) => {
     let win = Services.wm.getMostRecentWindow("navigator:browser");
--- a/mobile/android/chrome/content/Reader.js
+++ b/mobile/android/chrome/content/Reader.js
@@ -165,72 +165,17 @@ let Reader = {
     let uri = Services.io.newURI(url, null, null);
     let article = yield ReaderMode.getArticleFromCache(uri);
     if (article) {
       return article;
     }
 
     // Article hasn't been found in the cache, we need to
     // download the page and parse the article out of it.
-    return yield this._downloadAndParseDocument(url);
-  }),
-
-  _downloadDocument: function (url) {
-    return new Promise((resolve, reject) => {
-      // We want to parse those arbitrary pages safely, outside the privileged
-      // context of chrome. We create a hidden browser element to fetch the
-      // loaded page's document object then discard the browser element.
-      let browser = document.createElement("browser");
-      browser.setAttribute("type", "content");
-      browser.setAttribute("collapsed", "true");
-      browser.setAttribute("disablehistory", "true");
-
-      document.documentElement.appendChild(browser);
-      browser.stop();
-
-      browser.webNavigation.allowAuth = false;
-      browser.webNavigation.allowImages = false;
-      browser.webNavigation.allowJavascript = false;
-      browser.webNavigation.allowMetaRedirects = true;
-      browser.webNavigation.allowPlugins = false;
-
-      browser.addEventListener("DOMContentLoaded", event => {
-        let doc = event.originalTarget;
-
-        // ignore on frames and other documents
-        if (doc != browser.contentDocument) {
-          return;
-        }
-
-        if (doc.location.href == "about:blank") {
-          reject("about:blank loaded; aborting");
-
-          // Request has finished with error, remove browser element
-          browser.parentNode.removeChild(browser);
-          return;
-        }
-
-        resolve({ browser, doc });
-      });
-
-      browser.loadURIWithFlags(url, Ci.nsIWebNavigation.LOAD_FLAGS_NONE,
-                               null, null, null);
-    });
-  },
-
-  _downloadAndParseDocument: Task.async(function* (url) {
-    let { browser, doc } = yield this._downloadDocument(url);
-
-    try {
-      let uri = Services.io.newURI(url, null, null);
-      let article = yield ReaderMode.readerParse(uri, doc);
-      return article;
-    } finally {
-      browser.parentNode.removeChild(browser);
-    }
+    return yield ReaderMode.downloadAndParseDocument(url);
   }),
 
   /**
    * Migrates old indexedDB reader mode cache to new JSON cache.
    */
   migrateCache: Task.async(function* () {
     let cacheDB = yield new Promise((resolve, reject) => {
       let request = window.indexedDB.open("about:reader", 1);
--- a/toolkit/components/reader/ReaderMode.jsm
+++ b/toolkit/components/reader/ReaderMode.jsm
@@ -56,19 +56,66 @@ let ReaderMode = {
     // First, try to find a parsed article in the cache.
     let article = yield this.getArticleFromCache(uri);
     if (article) {
       this.log("Page found in cache, return article immediately");
       return article;
     }
 
     let doc = browser.contentWindow.document;
-    return yield this.readerParse(uri, doc);
+    return yield this._readerParse(uri, doc);
+  }),
+
+  /**
+   * Downloads and parses a document from a URL.
+   *
+   * @param url URL to download and parse.
+   * @return {Promise}
+   * @resolves JS object representing the article, or null if no article is found.
+   */
+  downloadAndParseDocument: Task.async(function* (url) {
+    let uri = Services.io.newURI(url, null, null);
+    let doc = yield this._downloadDocument(url);
+    return yield this._readerParse(uri, doc);
   }),
 
+  _downloadDocument: function (url) {
+    return new Promise((resolve, reject) => {
+      let xhr = new XMLHttpRequest();
+      xhr.open("GET", url, true);
+      xhr.onerror = evt => reject(evt.error);
+      xhr.responseType = "document";
+      xhr.onload = evt => {
+        if (xhr.status !== 200) {
+          reject("Reader mode XHR failed with status: " + xhr.status);
+          return;
+        }
+
+        let doc = xhr.responseXML;
+
+        // Manually follow a meta refresh tag if one exists.
+        let meta = doc.querySelector("meta[http-equiv=refresh]");
+        if (meta) {
+          let content = meta.getAttribute("content");
+          if (content) {
+            let urlIndex = content.indexOf("URL=");
+            if (urlIndex > -1) {
+              let url = content.substring(urlIndex + 4);
+              this._downloadDocument(url).then((doc) => resolve(doc));
+              return;
+            }
+          }
+        }
+        resolve(doc);
+      }
+      xhr.send();
+    });
+  },
+
+
   /**
    * Retrieves an article from the cache given an article URI.
    *
    * @param uri The article URI.
    * @return {Promise}
    * @resolves JS object representing the article, or null if no article is found.
    * @rejects OS.File.Error
    */
@@ -133,17 +180,17 @@ let ReaderMode = {
    * Attempts to parse a document into an article. Heavy lifting happens
    * in readerWorker.js.
    *
    * @param uri The article URI.
    * @param doc The document to parse.
    * @return {Promise}
    * @resolves JS object representing the article, or null if no article is found.
    */
-  readerParse: function (uri, doc) {
+  _readerParse: function (uri, doc) {
     return new Promise((resolve, reject) => {
       let numTags = doc.getElementsByTagName("*").length;
       if (numTags > this.MAX_ELEMS_TO_PARSE) {
         this.log("Aborting parse for " + uri.spec + "; " + numTags + " elements found");
         resolve(null);
         return;
       }