Bug 1393924 - Collect description and preview image and store it into moz_places r=mak,Mardak
authorUrsula Sarracini
Thu, 07 Sep 2017 16:51:02 -0400
changeset 429060 1b2b3bc1d47b27d02c7187c818a7f5f5808ddc1f
parent 429059 f8bfb757632da365e6bc35c9f066573a75f82d7f
child 429061 1a536787340c6374e432028f23b6ae93b673680c
push id7761
push userjlund@mozilla.com
push dateFri, 15 Sep 2017 00:19:52 +0000
treeherdermozilla-beta@c38455951db4 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersmak, Mardak
bugs1393924
milestone57.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1393924 - Collect description and preview image and store it into moz_places r=mak,Mardak MozReview-Commit-ID: 4ZPGMpz21S9
browser/base/content/browser.js
browser/base/content/content.js
browser/base/content/tabbrowser.xml
browser/base/content/test/metaTags/.eslintrc.js
browser/base/content/test/metaTags/browser.ini
browser/base/content/test/metaTags/browser_meta_tags.js
browser/base/content/test/metaTags/meta_tags.html
browser/base/moz.build
browser/modules/ContentMetaHandler.jsm
browser/modules/moz.build
--- a/browser/base/content/browser.js
+++ b/browser/base/content/browser.js
@@ -1285,17 +1285,17 @@ var gBrowserInit = {
     Services.obs.addObserver(gPluginHandler.NPAPIPluginCrashed, "plugin-crashed");
 
     window.addEventListener("AppCommand", HandleAppCommandEvent, true);
 
     // These routines add message listeners. They must run before
     // loading the frame script to ensure that we don't miss any
     // message sent between when the frame script is loaded and when
     // the listener is registered.
-    DOMLinkHandler.init();
+    DOMEventHandler.init();
     gPageStyleMenu.init();
     LanguageDetectionListener.init();
     BrowserOnClick.init();
     FeedHandler.init();
     CompactTheme.init();
     AboutPrivateBrowsingListener.init();
     TrackingProtection.init();
     CaptivePortalWatcher.init();
@@ -3699,40 +3699,50 @@ var newWindowButtonObserver = {
       if (link.url) {
         let data = await getShortcutOrURIAndPostData(link.url);
         // Allow third-party services to fixup this URL.
         openNewWindowWith(data.url, null, data.postData, true);
       }
     }
   }
 }
-
-const DOMLinkHandler = {
+const DOMEventHandler = {
   init() {
     let mm = window.messageManager;
     mm.addMessageListener("Link:AddFeed", this);
     mm.addMessageListener("Link:SetIcon", this);
     mm.addMessageListener("Link:AddSearch", this);
+    mm.addMessageListener("Meta:SetPageInfo", this);
   },
 
   receiveMessage(aMsg) {
     switch (aMsg.name) {
       case "Link:AddFeed":
         let link = {type: aMsg.data.type, href: aMsg.data.href, title: aMsg.data.title};
         FeedHandler.addFeed(link, aMsg.target);
         break;
 
       case "Link:SetIcon":
         this.setIcon(aMsg.target, aMsg.data.url, aMsg.data.loadingPrincipal);
         break;
 
       case "Link:AddSearch":
         this.addSearch(aMsg.target, aMsg.data.engine, aMsg.data.url);
         break;
-    }
+
+      case "Meta:SetPageInfo":
+        this.setPageInfo(aMsg.data);
+        break;
+    }
+  },
+
+  setPageInfo(aData) {
+    const {url, description, previewImageURL} = aData;
+    gBrowser.setPageInfo(url, description, previewImageURL);
+    return true;
   },
 
   setIcon(aBrowser, aURL, aLoadingPrincipal) {
     if (gBrowser.isFailedIcon(aURL))
       return false;
 
     let tab = gBrowser.getTabForBrowser(aBrowser);
     if (!tab)
--- a/browser/base/content/content.js
+++ b/browser/base/content/content.js
@@ -12,16 +12,17 @@ var {classes: Cc, interfaces: Ci, utils:
 
 Cu.import("resource://gre/modules/XPCOMUtils.jsm");
 Cu.import("resource://gre/modules/Services.jsm");
 
 XPCOMUtils.defineLazyModuleGetters(this, {
   E10SUtils: "resource:///modules/E10SUtils.jsm",
   BrowserUtils: "resource://gre/modules/BrowserUtils.jsm",
   ContentLinkHandler: "resource:///modules/ContentLinkHandler.jsm",
+  ContentMetaHandler: "resource:///modules/ContentMetaHandler.jsm",
   ContentWebRTC: "resource:///modules/ContentWebRTC.jsm",
   SpellCheckHelper: "resource://gre/modules/InlineSpellChecker.jsm",
   InlineSpellCheckerContent: "resource://gre/modules/InlineSpellCheckerContent.jsm",
   LoginManagerContent: "resource://gre/modules/LoginManagerContent.jsm",
   LoginFormFactory: "resource://gre/modules/LoginManagerContent.jsm",
   InsecurePasswordUtils: "resource://gre/modules/InsecurePasswordUtils.jsm",
   PluginContent: "resource:///modules/PluginContent.jsm",
   PrivateBrowsingUtils: "resource://gre/modules/PrivateBrowsingUtils.jsm",
@@ -764,16 +765,17 @@ var ClickEventHandler = {
     // Note: makeURI() will throw if aUri is not a valid URI.
     return [href ? Services.io.newURI(href, null, baseURI).spec : null, null,
             node && node.ownerDocument.nodePrincipal];
   }
 };
 ClickEventHandler.init();
 
 ContentLinkHandler.init(this);
+ContentMetaHandler.init(this);
 
 // TODO: Load this lazily so the JSM is run only if a relevant event/message fires.
 var pluginContent = new PluginContent(global);
 
 addEventListener("DOMWindowFocus", function(event) {
   sendAsyncMessage("DOMWindowFocus", {});
 }, false);
 
--- a/browser/base/content/tabbrowser.xml
+++ b/browser/base/content/tabbrowser.xml
@@ -1017,16 +1017,30 @@
         <body>
           <![CDATA[
             let browser = aTab ? this.getBrowserForTab(aTab) : this.selectedBrowser;
             return browser.mIconURL;
           ]]>
         </body>
       </method>
 
+      <method name="setPageInfo">
+        <parameter name="aURL"/>
+        <parameter name="aDescription"/>
+        <parameter name="aPreviewImage"/>
+        <body>
+          <![CDATA[
+            if (aURL) {
+              let pageInfo = {url: aURL, description: aDescription, previewImageURL: aPreviewImage}
+              PlacesUtils.history.update(pageInfo).catch(Components.utils.reportError);
+            }
+          ]]>
+        </body>
+      </method>
+
       <method name="shouldLoadFavIcon">
         <parameter name="aURI"/>
         <body>
           <![CDATA[
             return (aURI &&
                     Services.prefs.getBoolPref("browser.chrome.site_icons") &&
                     Services.prefs.getBoolPref("browser.chrome.favicons") &&
                     ("schemeIs" in aURI) && (aURI.schemeIs("http") || aURI.schemeIs("https")));
new file mode 100644
--- /dev/null
+++ b/browser/base/content/test/metaTags/.eslintrc.js
@@ -0,0 +1,7 @@
+"use strict";
+
+module.exports = {
+  "extends": [
+    "plugin:mozilla/browser-test",
+  ]
+};
new file mode 100644
--- /dev/null
+++ b/browser/base/content/test/metaTags/browser.ini
@@ -0,0 +1,4 @@
+[DEFAULT]
+support-files =
+  meta_tags.html
+[browser_meta_tags.js]
new file mode 100644
--- /dev/null
+++ b/browser/base/content/test/metaTags/browser_meta_tags.js
@@ -0,0 +1,30 @@
+/* Any copyright is dedicated to the Public Domain.
+ * http://creativecommons.org/publicdomain/zero/1.0/
+ */
+
+/* globals gBrowser */
+/* This tests that with the page meta_tags.html, ContentMetaHandler.jsm parses out
+ * the meta tags avilable and only stores the best one for description and one for
+ * preview image url. In the case of this test, the best defined meta tags are
+ * "og:description" and "og:image:url". The list of meta tags and their order of
+ * preference is found in ContentMetaHandler.jsm. Because there is debounce logic
+ * in ContentLinkHandler.jsm to only make one single SQL update, we have to wait
+ * for some time before checking that the page info was stored correctly.
+ */
+add_task(async function test() {
+    Components.utils.import("resource://gre/modules/PlacesUtils.jsm");
+    const URL = "https://example.com/browser/browser/base/content/test/metaTags/meta_tags.html";
+    let tab = await BrowserTestUtils.openNewForegroundTab(gBrowser, URL);
+
+    // Wait until places has stored the page info
+    let pageInfo;
+    await BrowserTestUtils.waitForCondition(async () => {
+      pageInfo = await PlacesUtils.history.fetch(URL, {"includeMeta": true});
+      const {previewImageURL, description} = pageInfo;
+      return previewImageURL && description;
+    });
+    is(pageInfo.description, "og:description", "got the correct description");
+    is(pageInfo.previewImageURL.href, "og:image:url", "got the correct preview image");
+    await BrowserTestUtils.removeTab(tab);
+});
+
new file mode 100644
--- /dev/null
+++ b/browser/base/content/test/metaTags/meta_tags.html
@@ -0,0 +1,18 @@
+<!DOCTYPE HTML>
+<html>
+  <head>
+    <meta charset="UTF-8" />
+    <title>MetaTags</title>
+    <meta property="twitter:description" content="twitter:description" />
+    <meta property="og:description" content="og:description" />
+    <meta name="description" content="description" />
+    <meta name="unknown:tag" content="unknown:tag" />
+    <meta property="og:image" content="og:image" />
+    <meta property="twitter:image" content="twitter:image" />
+    <meta property="og:image:url" content="og:image:url" />
+    <meta name="thumbnail" content="thumbnail" />
+  </head>
+  <body>
+  </body>
+</html>
+
--- a/browser/base/moz.build
+++ b/browser/base/moz.build
@@ -19,16 +19,17 @@ MOCHITEST_CHROME_MANIFESTS += [
 
 BROWSER_CHROME_MANIFESTS += [
     'content/test/about/browser.ini',
     'content/test/alerts/browser.ini',
     'content/test/captivePortal/browser.ini',
     'content/test/contextMenu/browser.ini',
     'content/test/forms/browser.ini',
     'content/test/general/browser.ini',
+    'content/test/metaTags/browser.ini',
     'content/test/newtab/browser.ini',
     'content/test/pageinfo/browser.ini',
     'content/test/performance/browser.ini',
     'content/test/permissions/browser.ini',
     'content/test/plugins/browser.ini',
     'content/test/popupNotifications/browser.ini',
     'content/test/popups/browser.ini',
     'content/test/referrer/browser.ini',
new file mode 100644
--- /dev/null
+++ b/browser/modules/ContentMetaHandler.jsm
@@ -0,0 +1,132 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+"use strict";
+const {utils: Cu, interfaces: Ci, classes: Cc} = Components;
+Cu.importGlobalProperties(["URL"]);
+
+// Debounce time in milliseconds - this should be long enough to account for
+// sync script tags that could appear between desired meta tags
+const TIMEOUT_DELAY = 1000;
+
+// Possible description tags, listed in order from least favourable to most favourable
+const DESCRIPTION_RULES = [
+  "twitter:description",
+  "description",
+  "og:description"
+];
+
+// Possible image tags, listed in order from least favourable to most favourable
+const PREVIEW_IMAGE_RULES = [
+  "thumbnail",
+  "twitter:image",
+  "og:image",
+  "og:image:url",
+  "og:image:secure_url"
+];
+
+/*
+ * Checks if the incoming meta tag has a greater score than the current best
+ * score by checking the index of the meta tag in the list of rules provided.
+ *
+ * @param {Array} aRules
+ *          The list of rules for a given type of meta tag
+ * @param {String} aTag
+ *          The name or property of the incoming meta tag
+ * @param {String} aEntry
+ *          The current best entry for the given meta tag
+ *
+ * @returns {Boolean} true if the incoming meta tag is better than the current
+ *                    best meta tag of that same kind, false otherwise
+ */
+function shouldExtractMetadata(aRules, aTag, aEntry) {
+  return aRules.indexOf(aTag) > aEntry.currMaxScore;
+}
+
+this.EXPORTED_SYMBOLS = [ "ContentMetaHandler" ];
+
+/*
+ * This listens to DOMMetaAdded events and collects relevant metadata about the
+ * meta tag received. Then, it sends the metadata gathered from the meta tags
+ * and the url of the page as it's payload to be inserted into moz_places.
+ */
+
+this.ContentMetaHandler = {
+  init(chromeGlobal) {
+    chromeGlobal.addEventListener("DOMMetaAdded", event => {
+      const metaTag = event.originalTarget;
+      const window = metaTag.ownerGlobal;
+
+      // If there's no meta tag, or we're in a sub-frame, ignore this
+      if (!metaTag || !metaTag.ownerDocument || window != window.top) {
+        return;
+      }
+      this.handleMetaTag(metaTag, chromeGlobal);
+    });
+    // Stores a mapping of the best description and preview image collected so far
+    // for a given URL
+    this._metaTags = new Map();
+  },
+
+
+  handleMetaTag(metaTag, chromeGlobal) {
+    const url = metaTag.ownerDocument.documentURI;
+
+    let name = metaTag.name;
+    let prop = metaTag.getAttributeNS(null, "property");
+    if (!name && !prop) {
+      return;
+    }
+
+    let tag = name || prop;
+
+    const entry = this._metaTags.get(url) || {
+      description: {value: null, currMaxScore: -1},
+      image: {value: null, currMaxScore: -1},
+      timeout: null
+    };
+
+    if (shouldExtractMetadata(DESCRIPTION_RULES, tag, entry.description)) {
+      // Extract the description
+      const value = metaTag.getAttributeNS(null, "content");
+      if (value) {
+        entry.description.value = value;
+        entry.description.currMaxScore = DESCRIPTION_RULES.indexOf(tag);
+      }
+    } else if (shouldExtractMetadata(PREVIEW_IMAGE_RULES, tag, entry.image)) {
+      // Extract the preview image
+      const value = metaTag.getAttributeNS(null, "content");
+      if (value) {
+        entry.image.value = new URL(value, url).href;
+        entry.image.currMaxScore = PREVIEW_IMAGE_RULES.indexOf(tag);
+      }
+    } else {
+      // We don't care about other meta tags
+      return;
+    }
+
+    if (!this._metaTags.has(url)) {
+      this._metaTags.set(url, entry);
+    }
+
+    if (entry.timeout) {
+      entry.timeout.delay = TIMEOUT_DELAY;
+    } else {
+      // We want to debounce incoming meta tags until we're certain we have the
+      // best one for description and preview image, and only store that one
+      entry.timeout = Cc["@mozilla.org/timer;1"].createInstance(Ci.nsITimer);
+      entry.timeout.initWithCallback(() => {
+        entry.timeout = null;
+
+        // Save description and preview image to moz_places
+        chromeGlobal.sendAsyncMessage("Meta:SetPageInfo", {
+          url,
+          description: entry.description.value,
+          previewImageURL: entry.image.value
+        });
+        this._metaTags.delete(url);
+      }, TIMEOUT_DELAY, Ci.nsITimer.TYPE_ONE_SHOT);
+    }
+  }
+};
--- a/browser/modules/moz.build
+++ b/browser/modules/moz.build
@@ -127,16 +127,17 @@ EXTRA_JS_MODULES += [
     'AboutNewTab.jsm',
     'AttributionCode.jsm',
     'BrowserUITelemetry.jsm',
     'BrowserUsageTelemetry.jsm',
     'CastingApps.jsm',
     'ContentClick.jsm',
     'ContentCrashHandlers.jsm',
     'ContentLinkHandler.jsm',
+    'ContentMetaHandler.jsm',
     'ContentObservers.js',
     'ContentSearch.jsm',
     'ContentWebRTC.jsm',
     'DirectoryLinksProvider.jsm',
     'E10SUtils.jsm',
     'ExtensionsUI.jsm',
     'Feeds.jsm',
     'FormSubmitObserver.jsm',