Bug 1503674 - Remove unused PageMetadata. r=Gijs
authorMark Banner <standard8@mozilla.com>
Mon, 17 Dec 2018 09:33:12 +0000
changeset 450898 a6824881d78ace8bc153646477528a11afea36a7
parent 450897 4b128c704ca68d570978622fe85fc72c4fb180fa
child 450899 11457d8b6163291d33cf4554f0d655199c8bbb41
push id35220
push useraciure@mozilla.com
push dateMon, 17 Dec 2018 16:40:36 +0000
treeherdermozilla-central@183b5d2173b5 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersGijs
bugs1503674
milestone66.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1503674 - Remove unused PageMetadata. r=Gijs Differential Revision: https://phabricator.services.mozilla.com/D14678
browser/actors/PageMetadataChild.jsm
browser/actors/moz.build
browser/base/content/test/general/browser.ini
browser/base/content/test/general/browser_PageMetaData_pushstate.js
browser/components/nsBrowserGlue.js
devtools/client/responsive.html/browser/tunnel.js
toolkit/modules/PageMetadata.jsm
toolkit/modules/moz.build
toolkit/modules/tests/browser/browser.ini
toolkit/modules/tests/browser/browser_PageMetadata.js
toolkit/modules/tests/browser/metadata_simple.html
toolkit/modules/tests/browser/metadata_titles.html
toolkit/modules/tests/browser/metadata_titles_fallback.html
deleted file mode 100644
--- a/browser/actors/PageMetadataChild.jsm
+++ /dev/null
@@ -1,33 +0,0 @@
-/* vim: set ts=2 sw=2 sts=2 et tw=80: */
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-"use strict";
-
-var EXPORTED_SYMBOLS = ["PageMetadataChild"];
-
-ChromeUtils.import("resource://gre/actors/ActorChild.jsm");
-
-ChromeUtils.defineModuleGetter(this, "ContextMenuChild",
-                               "resource:///modules/ContextMenuChild.jsm");
-ChromeUtils.defineModuleGetter(this, "PageMetadata",
-                               "resource://gre/modules/PageMetadata.jsm");
-
-class PageMetadataChild extends ActorChild {
-  receiveMessage(message) {
-    switch (message.name) {
-      case "PageMetadata:GetPageData": {
-        let target = ContextMenuChild.getTarget(this.mm, message);
-        let result = PageMetadata.getData(this.content.document, target);
-        this.mm.sendAsyncMessage("PageMetadata:PageDataResult", result);
-        break;
-      }
-      case "PageMetadata:GetMicroformats": {
-        let target = ContextMenuChild.getTarget(this.mm, message);
-        let result = PageMetadata.getMicroformats(this.content.document, target);
-        this.mm.sendAsyncMessage("PageMetadata:MicroformatsResult", result);
-        break;
-      }
-    }
-  }
-}
--- a/browser/actors/moz.build
+++ b/browser/actors/moz.build
@@ -35,15 +35,14 @@ FINAL_TARGET_FILES.actors += [
     'DOMFullscreenChild.jsm',
     'FormSubmitChild.jsm',
     'LightweightThemeChild.jsm',
     'LightWeightThemeInstallChild.jsm',
     'LinkHandlerChild.jsm',
     'NetErrorChild.jsm',
     'OfflineAppsChild.jsm',
     'PageInfoChild.jsm',
-    'PageMetadataChild.jsm',
     'PageStyleChild.jsm',
     'PluginChild.jsm',
     'SearchTelemetryChild.jsm',
     'URIFixupChild.jsm',
     'WebRTCChild.jsm',
 ]
--- a/browser/base/content/test/general/browser.ini
+++ b/browser/base/content/test/general/browser.ini
@@ -344,18 +344,16 @@ support-files = test_offline_gzip.html g
 # DO NOT ADD MORE TESTS HERE. USE A TOPICAL DIRECTORY INSTEAD.
 [browser_printpreview.js]
 skip-if = os == 'win' # Bug 1384127
 # DO NOT ADD MORE TESTS HERE. USE A TOPICAL DIRECTORY INSTEAD.
 [browser_private_browsing_window.js]
 # DO NOT ADD MORE TESTS HERE. USE A TOPICAL DIRECTORY INSTEAD.
 [browser_private_no_prompt.js]
 # DO NOT ADD MORE TESTS HERE. USE A TOPICAL DIRECTORY INSTEAD.
-[browser_PageMetaData_pushstate.js]
-# DO NOT ADD MORE TESTS HERE. USE A TOPICAL DIRECTORY INSTEAD.
 [browser_refreshBlocker.js]
 support-files =
   refresh_header.sjs
   refresh_meta.sjs
 # DO NOT ADD MORE TESTS HERE. USE A TOPICAL DIRECTORY INSTEAD.
 [browser_relatedTabs.js]
 # DO NOT ADD MORE TESTS HERE. USE A TOPICAL DIRECTORY INSTEAD.
 [browser_remoteTroubleshoot.js]
deleted file mode 100644
--- a/browser/base/content/test/general/browser_PageMetaData_pushstate.js
+++ /dev/null
@@ -1,31 +0,0 @@
-/* Any copyright is dedicated to the Public Domain.
- * http://creativecommons.org/publicdomain/zero/1.0/
- */
-
-add_task(async function() {
-  let rooturi = "https://example.com/browser/toolkit/modules/tests/browser/";
-  await BrowserTestUtils.openNewForegroundTab(gBrowser, rooturi + "metadata_simple.html");
-  await ContentTask.spawn(gBrowser.selectedBrowser, { rooturi }, async function(args) {
-    ChromeUtils.import("resource://gre/modules/PageMetadata.jsm");
-
-    let result = PageMetadata.getData(content.document);
-    // Result should have description.
-    Assert.equal(result.url, args.rooturi + "metadata_simple.html", "metadata url is correct");
-    Assert.equal(result.title, "Test Title", "metadata title is correct");
-    Assert.equal(result.description, "A very simple test page", "description is correct");
-
-    content.history.pushState({}, "2", "2.html");
-    result = PageMetadata.getData(content.document);
-    // Result should not have description.
-    Assert.equal(result.url, args.rooturi + "2.html", "metadata url is correct");
-    Assert.equal(result.title, "Test Title", "metadata title is correct");
-    Assert.ok(!result.description, "description is undefined");
-
-    Assert.equal(content.document.documentURI, args.rooturi + "2.html",
-      "content.document has correct url");
-  });
-
-  is(gBrowser.currentURI.spec, rooturi + "2.html", "gBrowser has correct url");
-
-  gBrowser.removeTab(gBrowser.selectedTab);
-});
--- a/browser/components/nsBrowserGlue.js
+++ b/browser/components/nsBrowserGlue.js
@@ -197,26 +197,16 @@ let ACTORS = {
 
   PageInfo: {
     child: {
       module: "resource:///actors/PageInfoChild.jsm",
       messages: ["PageInfo:getData"],
     },
   },
 
-  PageMetadata: {
-    child: {
-      module: "resource:///actors/PageMetadataChild.jsm",
-      messages: [
-        "PageMetadata:GetPageData",
-        "PageMetadata:GetMicroformats",
-      ],
-    },
-  },
-
   PageStyle: {
     child: {
       module: "resource:///actors/PageStyleChild.jsm",
       group: "browsers",
       events: {
         "pageshow": {},
       },
       messages: [
--- a/devtools/client/responsive.html/browser/tunnel.js
+++ b/devtools/client/responsive.html/browser/tunnel.js
@@ -453,17 +453,16 @@ MessageManagerTunnel.prototype = {
     // Messages sent from InlineSpellChecker.jsm
     "InlineSpellChecker:",
     // Messages sent from MessageChannel.jsm
     "MessageChannel:",
     // Messages sent from pageinfo.js
     "PageInfo:",
     // Messages sent from printUtils.js
     "Printing:",
-    "PageMetadata:",
     // Messages sent from viewSourceUtils.js
     "ViewSource:",
   ],
 
   INNER_TO_OUTER_MESSAGE_PREFIXES: [
     // Messages sent to browser.xml
     "Autoscroll:",
     // Messages sent to nsContextMenu.js
@@ -475,17 +474,16 @@ MessageManagerTunnel.prototype = {
     // Messages sent to RemoteFinder.jsm
     "Finder:",
     // Messages sent to MessageChannel.jsm
     "MessageChannel:",
     // Messages sent to pageinfo.js
     "PageInfo:",
     // Messages sent to printUtils.js
     "Printing:",
-    "PageMetadata:",
     // Messages sent to viewSourceUtils.js
     "ViewSource:",
   ],
 
   OUTER_TO_INNER_FRAME_SCRIPTS: [
     // DevTools server for OOP frames
     "resource://devtools/server/startup/frame.js",
   ],
deleted file mode 100644
--- a/toolkit/modules/PageMetadata.jsm
+++ /dev/null
@@ -1,295 +0,0 @@
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-"use strict";
-
-var EXPORTED_SYMBOLS = ["PageMetadata"];
-
-ChromeUtils.import("resource://gre/modules/Services.jsm");
-ChromeUtils.import("resource://gre/modules/XPCOMUtils.jsm");
-ChromeUtils.import("resource://gre/modules/microformat-shiv.js");
-
-XPCOMUtils.defineLazyServiceGetter(this, "UnescapeService",
-                                   "@mozilla.org/feed-unescapehtml;1",
-                                   "nsIScriptableUnescapeHTML");
-
-
-/**
- * Maximum number of images to discover in the document, when no preview images
- * are explicitly specified by the metadata.
- * @type {Number}
- */
-const DISCOVER_IMAGES_MAX  = 5;
-
-
-/**
- * Extract metadata and microformats from a HTML document.
- * @type {Object}
- */
-var PageMetadata = {
-  /**
-   * Get all metadata from an HTML document. This includes:
-   * - URL
-   * - title
-   * - Metadata specified in <meta> tags, including OpenGraph data
-   * - Links specified in <link> tags (short, canonical, preview images, alternative)
-   * - Content that can be found in the page content that we consider useful metadata
-   * - Microformats
-   *
-   * @param {Document} document - Document to extract data from.
-   * @param {Element} [target] - Optional element to restrict microformats lookup to.
-   * @returns {Object} Object containing the various metadata, normalized to
-   *                   merge some common alternative names for metadata.
-   */
-  getData(document, target = null) {
-    let result = {
-      url: this._validateURL(document, document.documentURI),
-      title: document.title,
-      previews: [],
-    };
-
-    // if pushState was used to change the url, most likely all meta data is
-    // invalid. This is the case with several major sites that rely on
-    // pushState. In that case, we'll only return uri and title. If document is
-    // via XHR or something, there is no view or history.
-    if (document.defaultView) {
-      let docshell = document.defaultView.docShell;
-      let shentry = {};
-      if (docshell.getCurrentSHEntry(shentry) &&
-          shentry.value && shentry.value.URIWasModified) {
-        return result;
-      }
-    }
-
-    this._getMetaData(document, result);
-    this._getLinkData(document, result);
-    this._getPageData(document, result);
-    result.microformats = this.getMicroformats(document, target);
-
-    return result;
-  },
-
-  getMicroformats(document, target = null) {
-    if (target) {
-      return Microformats.getParent(target, {node: document});
-    }
-    return Microformats.get({node: document});
-  },
-
-  /**
-   * Get metadata as defined in <meta> tags.
-   * This adds properties to an existing result object.
-   *
-   * @param {Document} document - Document to extract data from.
-   * @param {Object}  result - Existing result object to add properties to.
-   */
-  _getMetaData(document, result) {
-    // Query for standardized meta data.
-    let elements = document.querySelectorAll("head > meta[property], head > meta[name]");
-    if (elements.length < 1) {
-      return;
-    }
-
-    for (let element of elements) {
-      let value = element.getAttribute("content");
-      if (!value) {
-        continue;
-      }
-      value = UnescapeService.unescape(value.trim());
-
-      let key = element.getAttribute("property") || element.getAttribute("name");
-      if (!key) {
-        continue;
-      }
-
-      // There are a wide array of possible meta tags, expressing articles,
-      // products, etc. so all meta tags are passed through but we touch up the
-      // most common attributes.
-      result[key] = value;
-
-      switch (key) {
-        case "title":
-        case "og:title": {
-          // Only set the title if one hasn't already been obtained (e.g. from the
-          // document title element).
-          if (!result.title) {
-            result.title = value;
-          }
-          break;
-        }
-
-        case "description":
-        case "og:description": {
-          result.description = value;
-          break;
-        }
-
-        case "og:site_name": {
-          result.siteName = value;
-          break;
-        }
-
-        case "medium":
-        case "og:type": {
-          result.medium = value;
-          break;
-        }
-
-        case "og:video": {
-          let url = this._validateURL(document, value);
-          if (url) {
-            result.source = url;
-          }
-          break;
-        }
-
-        case "og:url": {
-          let url = this._validateURL(document, value);
-          if (url) {
-            result.url = url;
-          }
-          break;
-        }
-
-        case "og:image": {
-          let url = this._validateURL(document, value);
-          if (url) {
-            result.previews.push(url);
-          }
-          break;
-        }
-      }
-    }
-  },
-
-  /**
-   * Get metadata as defined in <link> tags.
-   * This adds properties to an existing result object.
-   *
-   * @param {Document} document - Document to extract data from.
-   * @param {Object}  result - Existing result object to add properties to.
-   */
-  _getLinkData(document, result) {
-    let elements = document.querySelectorAll("head > link[rel], head > link[id]");
-
-    for (let element of elements) {
-      let url = element.getAttribute("href");
-      if (!url) {
-        continue;
-      }
-      url = this._validateURL(document, UnescapeService.unescape(url.trim()));
-
-      let key = element.getAttribute("rel") || element.getAttribute("id");
-      if (!key) {
-        continue;
-      }
-
-      switch (key) {
-        case "shorturl":
-        case "shortlink": {
-          result.shortUrl = url;
-          break;
-        }
-
-        case "canonicalurl":
-        case "canonical": {
-          result.url = url;
-          break;
-        }
-
-        case "image_src": {
-          result.previews.push(url);
-          break;
-        }
-
-        case "alternate": {
-          // Expressly for oembed support but we're liberal here and will let
-          // other alternate links through. oembed defines an href, supplied by
-          // the site, where you can fetch additional meta data about a page.
-          // We'll let the client fetch the oembed data themselves, but they
-          // need the data from this link.
-          if (!result.alternate) {
-            result.alternate = [];
-          }
-
-          result.alternate.push({
-            type: element.getAttribute("type"),
-            href: element.getAttribute("href"),
-            title: element.getAttribute("title"),
-          });
-        }
-      }
-    }
-  },
-
-  /**
-   * Scrape thought the page content for additional content that may be used to
-   * suppliment explicitly defined metadata. This includes:
-   * - First few images, when no preview image metadata is explicitly defined.
-   *
-   * This adds properties to an existing result object.
-   *
-   * @param {Document} document - Document to extract data from.
-   * @param {Object}  result - Existing result object to add properties to.
-   */
-  _getPageData(document, result) {
-    if (result.previews.length < 1) {
-      result.previews = this._getImageUrls(document);
-    }
-  },
-
-  /**
-   * Find the first few images in a document, for use as preview images.
-   * Will return upto DISCOVER_IMAGES_MAX number of images.
-   *
-   * @note This is not very clever. It does not (yet) check if any of the
-   *       images may be appropriate as a preview image.
-   *
-   * @param {Document} document - Document to extract data from.
-   * @return {[string]} Array of URLs.
-   */
-  _getImageUrls(document) {
-    let result = [];
-    let elements = document.querySelectorAll("img");
-
-    for (let element of elements) {
-      let src = element.getAttribute("src");
-      if (src) {
-        result.push(this._validateURL(document, UnescapeService.unescape(src)));
-
-        // We don't want a billion images.
-        // TODO: Move this magic number to a const.
-        if (result.length > DISCOVER_IMAGES_MAX) {
-          break;
-        }
-      }
-    }
-
-    return result;
-  },
-
-  /**
-   * Validate a URL. This involves resolving the URL if it's relative to the
-   * document location, ensuring it's using an expected scheme, and stripping
-   * the userPass portion of the URL.
-   *
-   * @param {Document} document - Document to use as the root location for a relative URL.
-   * @param {string} url - URL to validate.
-   * @return {string} Result URL.
-   */
-  _validateURL(document, url) {
-    let docURI = Services.io.newURI(document.documentURI);
-    let uri = Services.io.newURI(docURI.resolve(url));
-
-    if (!["http", "https"].includes(uri.scheme)) {
-      return null;
-    }
-
-    uri = uri.mutate()
-             .setUserPass("")
-             .finalize();
-
-    return uri.spec;
-  },
-};
--- a/toolkit/modules/moz.build
+++ b/toolkit/modules/moz.build
@@ -217,17 +217,16 @@ EXTRA_JS_MODULES += [
     'Integration.jsm',
     'JSONFile.jsm',
     'Log.jsm',
     'NewTabUtils.jsm',
     'NLP.jsm',
     'ObjectUtils.jsm',
     'offlineAppCache.jsm',
     'PageMenu.jsm',
-    'PageMetadata.jsm',
     'PermissionsUtils.jsm',
     'PopupNotifications.jsm',
     'Preferences.jsm',
     'PrivateBrowsingUtils.jsm',
     'ProfileAge.jsm',
     'Promise-backend.js',
     'Promise.jsm',
     'PromiseMessage.jsm',
--- a/toolkit/modules/tests/browser/browser.ini
+++ b/toolkit/modules/tests/browser/browser.ini
@@ -1,12 +1,11 @@
 [DEFAULT]
 support-files =
   dummy_page.html
-  metadata_*.html
   file_FinderIframeTest.html
   file_FinderSample.html
   file_WebNavigation_page1.html
   file_WebNavigation_page2.html
   file_WebNavigation_page3.html
   file_WebRequest_page1.html
   file_WebRequest_page2.html
   file_getSelectionDetails_inputs.html
@@ -46,11 +45,10 @@ skip-if = debug || os = "linux"
 [browser_InlineSpellChecker.js]
 [browser_WebNavigation.js]
 skip-if = true # Superseded by WebExtension tests
 [browser_WebRequest.js]
 skip-if = (verify && debug && (os == 'mac'))
 [browser_WebRequest_ancestors.js]
 [browser_WebRequest_cookies.js]
 [browser_WebRequest_filtering.js]
-[browser_PageMetadata.js]
 [browser_PromiseMessage.js]
 [browser_Troubleshoot.js]
deleted file mode 100644
--- a/toolkit/modules/tests/browser/browser_PageMetadata.js
+++ /dev/null
@@ -1,73 +0,0 @@
-/**
- * Tests PageMetadata.jsm, which extracts metadata and microdata from a
- * document.
- */
-
-var {PageMetadata} = ChromeUtils.import("resource://gre/modules/PageMetadata.jsm", {});
-
-var rootURL = "http://example.com/browser/toolkit/modules/tests/browser/";
-
-function promiseDocument(fileName) {
-  let url = rootURL + fileName;
-
-  return new Promise((resolve, reject) => {
-    let xhr = new XMLHttpRequest();
-    xhr.onload = () => resolve(xhr.responseXML);
-    xhr.onerror = () => reject(new Error("Error loading document"));
-    xhr.open("GET", url);
-    xhr.responseType = "document";
-    xhr.send();
-  });
-}
-
-/**
- * Load a simple document.
- */
-add_task(async function simpleDoc() {
-  let fileName = "metadata_simple.html";
-  info(`Loading a simple page, ${fileName}`);
-
-  let doc = await promiseDocument(fileName);
-  Assert.notEqual(doc, null,
-                  "Should have a document to analyse");
-
-  let data = PageMetadata.getData(doc);
-  Assert.notEqual(data, null,
-                  "Should have non-null result");
-  Assert.equal(data.url, rootURL + fileName,
-               "Should have expected url property");
-  Assert.equal(data.title, "Test Title",
-               "Should have expected title property");
-  Assert.equal(data.description, "A very simple test page",
-               "Should have expected title property");
-});
-
-add_task(async function titlesDoc() {
-  let fileName = "metadata_titles.html";
-  info(`Loading titles page, ${fileName}`);
-
-  let doc = await promiseDocument(fileName);
-  Assert.notEqual(doc, null,
-                  "Should have a document to analyse");
-
-  let data = PageMetadata.getData(doc);
-  Assert.notEqual(data, null,
-                  "Should have non-null result");
-  Assert.equal(data.title, "Test Titles",
-               "Should use the page title, not the open graph title");
-});
-
-add_task(async function titlesFallbackDoc() {
-  let fileName = "metadata_titles_fallback.html";
-  info(`Loading titles page, ${fileName}`);
-
-  let doc = await promiseDocument(fileName);
-  Assert.notEqual(doc, null,
-                  "Should have a document to analyse");
-
-  let data = PageMetadata.getData(doc);
-  Assert.notEqual(data, null,
-                  "Should have non-null result");
-  Assert.equal(data.title, "Title",
-               "Should use the open graph title");
-});
deleted file mode 100644
--- a/toolkit/modules/tests/browser/metadata_simple.html
+++ /dev/null
@@ -1,10 +0,0 @@
-<!DOCTYPE html>
-<html>
-  <head>
-    <title>Test Title</title>
-    <meta property="description" content="A very simple test page">
-  </head>
-  <body>
-    Llama.
-  </body>
-</html>
deleted file mode 100644
--- a/toolkit/modules/tests/browser/metadata_titles.html
+++ /dev/null
@@ -1,11 +0,0 @@
-<!DOCTYPE html>
-<html>
-  <head>
-    <title>Test Titles</title>
-    <meta property="description" content="A very simple test page" />
-    <meta property="og:title" content="Title" />
-  </head>
-  <body>
-    Llama.
-  </body>
-</html>
deleted file mode 100644
--- a/toolkit/modules/tests/browser/metadata_titles_fallback.html
+++ /dev/null
@@ -1,10 +0,0 @@
-<!DOCTYPE html>
-<html>
-  <head>
-    <meta property="description" content="A very simple test page" />
-    <meta property="og:title" content="Title" />
-  </head>
-  <body>
-    Llama.
-  </body>
-</html>