Bug 1420049. r=Gijs
authorUrsula Sarracini
Mon, 27 Nov 2017 10:40:01 -0500
changeset 703827 b991da77be9242769e1083c5929c78dbc10ebeb3
parent 703826 cec895f7cedad28bea953572480acefd7bec4d08
child 703828 244c97bc16e3f6b394c0915a4f7b62f97176e4d7
push id90984
push userbmo:gl@mozilla.com
push dateMon, 27 Nov 2017 20:54:52 +0000
reviewersGijs
bugs1420049
milestone59.0a1
Bug 1420049. r=Gijs
browser/base/content/test/metaTags/browser_meta_tags.js
browser/base/content/test/metaTags/meta_tags.html
browser/modules/ContentMetaHandler.jsm
--- a/browser/base/content/test/metaTags/browser_meta_tags.js
+++ b/browser/base/content/test/metaTags/browser_meta_tags.js
@@ -14,17 +14,17 @@ const URL = "https://example.com/browser
  * we have to wait for some time before checking that the page info was stored.
  */
 add_task(async function test_metadata() {
   const tab = await BrowserTestUtils.openNewForegroundTab(gBrowser, URL);
 
   // Wait until places has stored the page info
   const pageInfo = await waitForPageInfo(URL);
   is(pageInfo.description, "og:description", "got the correct description");
-  is(pageInfo.previewImageURL.href, "og:image:secure_url", "got the correct preview image");
+  is(pageInfo.previewImageURL.href, "https://test.com/og-image-secure-url.jpg", "got the correct preview image");
 
   await BrowserTestUtils.removeTab(tab);
   await PlacesTestUtils.clearHistory();
 });
 
 /**
  * This test is almost like the previous one except it opens a second tab to
  * make sure the extra tab does not cause the debounce logic to be skipped. If
@@ -35,14 +35,14 @@ add_task(async function multiple_tabs() 
 
   // Add a background tab to cause another page to load *without* putting the
   // desired URL in a background tab, which results in its timers being throttled.
   gBrowser.addTab();
 
   // Wait until places has stored the page info
   const pageInfo = await waitForPageInfo(URL);
   is(pageInfo.description, "og:description", "got the correct description");
-  is(pageInfo.previewImageURL.href, "og:image:secure_url", "got the correct preview image");
+  is(pageInfo.previewImageURL.href, "https://test.com/og-image-secure-url.jpg", "got the correct preview image");
 
   await BrowserTestUtils.removeTab(tab);
   await BrowserTestUtils.removeTab(gBrowser.selectedTab);
   await PlacesTestUtils.clearHistory();
 });
--- a/browser/base/content/test/metaTags/meta_tags.html
+++ b/browser/base/content/test/metaTags/meta_tags.html
@@ -2,26 +2,26 @@
 <html>
   <head>
     <meta charset="UTF-8" />
     <title>MetaTags</title>
     <meta property="twitter:description" content="twitter:description" />
     <meta property="og:description" content="og:description" />
     <meta name="description" content="description" />
     <meta name="unknown:tag" content="unknown:tag" />
-    <meta property="og:image" content="og:image" />
-    <meta property="twitter:image" content="twitter:image" />
-    <meta property="og:image:url" content="og:image:url" />
-    <meta name="thumbnail" content="thumbnail" />
+    <meta property="og:image" content="https://test.com/og-image.jpg" />
+    <meta property="twitter:image" content="https://test.com/twitter-image.jpg" />
+    <meta property="og:image:url" content="https://test.com/og-image-url" />
+    <meta name="thumbnail" content="https://test.com/thumbnail.jpg" />
   </head>
   <body>
     <script>
       function addMeta(tag) {
         const meta = document.createElement("meta");
-        meta.content = tag;
+        meta.content = "https://test.com/og-image-secure-url.jpg";
         meta.setAttribute("property", tag);
         document.head.appendChild(meta);
       }
 
       // Delay adding this "best" image tag to test that later tags are used.
       // Use a delay that is long enough for tests to check for wrong metadata.
       setTimeout(() => addMeta("og:image:secure_url"), 100);
     </script>
--- a/browser/modules/ContentMetaHandler.jsm
+++ b/browser/modules/ContentMetaHandler.jsm
@@ -6,16 +6,18 @@
 const {utils: Cu, interfaces: Ci, classes: Cc} = Components;
 Cu.importGlobalProperties(["URL"]);
 Cu.import("resource://gre/modules/Services.jsm");
 
 // Debounce time in milliseconds - this should be long enough to account for
 // sync script tags that could appear between desired meta tags
 const TIMEOUT_DELAY = 1000;
 
+const ACCEPTED_PROTOCOLS = ["http:", "https:"];
+
 // Possible description tags, listed in order from least favourable to most favourable
 const DESCRIPTION_RULES = [
   "twitter:description",
   "description",
   "og:description"
 ];
 
 // Possible image tags, listed in order from least favourable to most favourable
@@ -40,16 +42,38 @@ const PREVIEW_IMAGE_RULES = [
  *
  * @returns {Boolean} true if the incoming meta tag is better than the current
  *                    best meta tag of that same kind, false otherwise
  */
 function shouldExtractMetadata(aRules, aTag, aEntry) {
   return aRules.indexOf(aTag) > aEntry.currMaxScore;
 }
 
+/*
+ * Ensure that the preview image URL is safe and valid before storing
+ *
+ * @param {URL} aURL
+ *          A URL object that needs to be checked for valid principal and protocol
+ *
+ * @returns {Boolean} true if the preview URL is safe and can be stored, false otherwise
+ */
+function checkLoadURIStr(aURL) {
+  if (!ACCEPTED_PROTOCOLS.includes(aURL.protocol)) {
+    return false;
+  }
+  try {
+    let ssm = Services.scriptSecurityManager;
+    let principal = ssm.createNullPrincipal({});
+    ssm.checkLoadURIStrWithPrincipal(principal, aURL.href, ssm.DISALLOW_INHERIT_PRINCIPAL);
+  } catch (e) {
+    return false;
+  }
+  return true;
+}
+
 this.EXPORTED_SYMBOLS = [ "ContentMetaHandler" ];
 
 /*
  * This listens to DOMMetaAdded events and collects relevant metadata about the
  * meta tag received. Then, it sends the metadata gathered from the meta tags
  * and the url of the page as it's payload to be inserted into moz_places.
  */
 
@@ -83,28 +107,36 @@ this.ContentMetaHandler = {
     let tag = name || prop;
 
     const entry = metaTags.get(url) || {
       description: {value: null, currMaxScore: -1},
       image: {value: null, currMaxScore: -1},
       timeout: null
     };
 
+    // Malformed meta tag - do not store it
+    const content = metaTag.getAttributeNS(null, "content");
+    if (!content) {
+      return;
+    }
+
     if (shouldExtractMetadata(DESCRIPTION_RULES, tag, entry.description)) {
       // Extract the description
-      const value = metaTag.getAttributeNS(null, "content");
-      if (value) {
-        entry.description.value = value;
-        entry.description.currMaxScore = DESCRIPTION_RULES.indexOf(tag);
-      }
+      entry.description.value = content;
+      entry.description.currMaxScore = DESCRIPTION_RULES.indexOf(tag);
     } else if (shouldExtractMetadata(PREVIEW_IMAGE_RULES, tag, entry.image)) {
       // Extract the preview image
-      const value = metaTag.getAttributeNS(null, "content");
-      if (value) {
-        entry.image.value = new URL(value, url).href;
+      let value;
+      try {
+        value = new URL(content, url);
+      } catch (e) {
+        return;
+      }
+      if (value && checkLoadURIStr(value)) {
+        entry.image.value = value.href;
         entry.image.currMaxScore = PREVIEW_IMAGE_RULES.indexOf(tag);
       }
     } else {
       // We don't care about other meta tags
       return;
     }
 
     if (!metaTags.has(url)) {