Bug 1673868 - Fix inconsistent Telemetry reporting on SERP for DuckDuckGo with ads. r=Standard8
authormcheang <mcheang@mozilla.com>
Wed, 11 May 2022 00:12:51 +0000
changeset 616944 604f4b32a0977eff93794a72b129f25c479cd2f7
parent 616943 fdfffc18bd4b219cdd2eeda61b7c182679555018
child 616945 f03bd96745a01907e98138bc9da199b86e8e291d
push id39680
push userbszekely@mozilla.com
push dateWed, 11 May 2022 09:42:52 +0000
treeherdermozilla-central@87443e31b7c0 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersStandard8
bugs1673868
milestone102.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1673868 - Fix inconsistent Telemetry reporting on SERP for DuckDuckGo with ads. r=Standard8 DuckDuckGo SERP ads were inconsistently recorded. When the ad was loaded onto the page after 1000ms, the ad was not recorded by Telemetry. Network delays can cause the ad to be loaded later. To fix this, we detect two events, DOMContentloaded and load event. If the ad is not counted on DOMContentloaded we count the ad on load event when the whole page is loaded. Differential Revision: https://phabricator.services.mozilla.com/D145424
browser/actors/SearchSERPTelemetryChild.jsm
browser/actors/SearchSERPTelemetryParent.jsm
browser/components/BrowserGlue.jsm
browser/components/search/SearchSERPTelemetry.jsm
browser/components/search/test/browser/browser.ini
browser/components/search/test/browser/browser_search_telemetry_sources_ads.js
browser/components/search/test/browser/slow_loading_page_with_ads.html
browser/components/search/test/browser/slow_loading_page_with_ads.sjs
browser/components/search/test/browser/slow_loading_page_with_ads_on_load_event.html
--- a/browser/actors/SearchSERPTelemetryChild.jsm
+++ b/browser/actors/SearchSERPTelemetryChild.jsm
@@ -183,15 +183,24 @@ class SearchSERPTelemetryChild extends J
           check();
         }
         break;
       }
       case "DOMContentLoaded": {
         check();
         break;
       }
+      case "load": {
+        // We check both DOMContentLoaded and load in case the page has
+        // taken a long time to load and the ad is only detected on load.
+        // We still check at DOMContentLoaded because if the page hasn't
+        // finished loading and the user navigates away, we still want to know
+        // if there were ads on the page or not at that time.
+        check();
+        break;
+      }
       case "unload": {
         cancelCheck();
         break;
       }
     }
   }
 }
--- a/browser/actors/SearchSERPTelemetryParent.jsm
+++ b/browser/actors/SearchSERPTelemetryParent.jsm
@@ -8,13 +8,15 @@ var EXPORTED_SYMBOLS = ["SearchSERPTelem
 ChromeUtils.defineModuleGetter(
   this,
   "SearchSERPTelemetry",
   "resource:///modules/SearchSERPTelemetry.jsm"
 );
 
 class SearchSERPTelemetryParent extends JSWindowActorParent {
   receiveMessage(msg) {
+    let browser = this.browsingContext.top.embedderElement;
+
     if (msg.name == "SearchTelemetry:PageInfo") {
-      SearchSERPTelemetry.reportPageWithAds(msg.data);
+      SearchSERPTelemetry.reportPageWithAds(msg.data, browser);
     }
   }
 }
--- a/browser/components/BrowserGlue.jsm
+++ b/browser/components/BrowserGlue.jsm
@@ -692,16 +692,17 @@ let JSWINDOWACTORS = {
     child: {
       moduleURI: "resource:///actors/SearchSERPTelemetryChild.jsm",
       events: {
         DOMContentLoaded: {},
         pageshow: { mozSystemGroup: true },
         // The 'unload' event is only used to clean up state, and should not
         // force actor creation.
         unload: { createActor: false },
+        load: { mozSystemGroup: true, capture: true },
       },
     },
   },
 
   ShieldFrame: {
     parent: {
       moduleURI: "resource://normandy-content/ShieldFrameParent.jsm",
     },
--- a/browser/components/search/SearchSERPTelemetry.jsm
+++ b/browser/components/search/SearchSERPTelemetry.jsm
@@ -56,24 +56,24 @@ class TelemetryHandler {
   _searchProviderInfo = null;
 
   // An instance of remote settings that is used to access the provider info.
   _telemetrySettings;
 
   // _browserInfoByURL is a map of tracked search urls to objects containing:
   // * {object} info
   //   the search provider information associated with the url.
-  // * {WeakSet} browsers
-  //   a weak set of browsers that have the url loaded.
+  // * {WeakMap} browsers
+  //   a weak map of browsers that have the url loaded and their ad report state.
   // * {integer} count
   //   a manual count of browsers logged.
-  // We keep a weak set of browsers, in case we miss something on our counts
+  // We keep a weak map of browsers, in case we miss something on our counts
   // and cause a memory leak - worst case our map is slightly bigger than it
   // needs to be.
-  // The manual count is because WeakSet doesn't give us size/length
+  // The manual count is because WeakMap doesn't give us size/length
   // information, but we want to know when we can clean up our associated
   // entry.
   _browserInfoByURL = new Map();
 
   // _browserSourceMap is a map of the latest search source for a particular
   // browser - one of the KNOWN_SEARCH_SOURCES in BrowserSearchTelemetry.
   _browserSourceMap = new WeakMap();
 
@@ -194,18 +194,18 @@ class TelemetryHandler {
         newProvider.extraAdServersRegexps = provider.extraAdServersRegexps.map(
           r => new RegExp(r)
         );
       }
       return newProvider;
     });
   }
 
-  reportPageWithAds(info) {
-    this._contentHandler._reportPageWithAds(info);
+  reportPageWithAds(info, browser) {
+    this._contentHandler._reportPageWithAds(info, browser);
   }
 
   /**
    * This may start tracking a tab based on the URL. If the URL matches a search
    * partner, and it has a code, then we'll start tracking it. This will aid
    * determining if it is a page we should be tracking for adverts.
    *
    * @param {object} browser
@@ -235,23 +235,24 @@ class TelemetryHandler {
     } else if (this._browserSourceMap.has(browser)) {
       source = this._browserSourceMap.get(browser);
       this._browserSourceMap.delete(browser);
     }
 
     this._reportSerpPage(info, source, url);
 
     let item = this._browserInfoByURL.get(url);
+
     if (item) {
-      item.browsers.add(browser);
+      item.browsers.set(browser, "no ads reported");
       item.count++;
       item.source = source;
     } else {
-      this._browserInfoByURL.set(url, {
-        browsers: new WeakSet([browser]),
+      item = this._browserInfoByURL.set(url, {
+        browsers: new WeakMap().set(browser, "no ads reported"),
         info,
         count: 1,
         source,
       });
     }
   }
 
   /**
@@ -277,23 +278,23 @@ class TelemetryHandler {
    * Parts of the URL, like search params and hashes, may be mutated by scripts
    * on a page we're tracking. Since we don't want to keep track of that
    * ourselves in order to keep the list of browser objects a weak-referenced
    * set, we do optional fuzzy matching of URLs to fetch the most relevant item
    * that contains tracking information.
    *
    * @param {string} url URL to fetch the tracking data for.
    * @returns {object} Map containing the following members:
-   *   - {WeakSet} browsers
-   *     Set of browser elements that belong to `url`.
+   *   - {WeakMap} browsers
+   *     Map of browser elements that belong to `url` and their ad report state.
    *   - {object} info
    *     Info dictionary as returned by `_checkURLForSerpMatch`.
    *   - {number} count
    *     The number of browser element we can most accurately tell we're
-   *     tracking, since they're inside a WeakSet.
+   *     tracking, since they're inside a WeakMap.
    */
   _findBrowserItemForURL(url) {
     try {
       url = new URL(url);
     } catch (ex) {
       return null;
     }
 
@@ -771,30 +772,43 @@ class ContentHandler {
     });
   }
 
   /**
    * Logs telemetry for a page with adverts, if it is one of the partner search
    * provider pages that we're tracking.
    *
    * @param {object} info
-   * @param {boolean} info.hasAds Whether or not the page has adverts.
-   * @param {string} info.url The url of the page.
+   * @param {boolean} info.hasAds
+   *     Whether or not the page has adverts.
+   * @param {string} info.url
+   *     The url of the page.
+   * @param {object} browser
+   *     The browser associated with the page.
    */
-  _reportPageWithAds(info) {
+  _reportPageWithAds(info, browser) {
     let item = this._findBrowserItemForURL(info.url);
     if (!item) {
       logConsole.warn(
         "Expected to report URI for",
         info.url,
         "with ads but couldn't find the information"
       );
       return;
     }
 
+    let adReportState = item.browsers.get(browser);
+    if (adReportState == "ad reported") {
+      logConsole.debug(
+        "Ad was previously reported for browser with URI",
+        info.url
+      );
+      return;
+    }
+
     logConsole.debug(
       "Counting ads in page for",
       item.info.provider,
       item.info.type,
       item.source,
       info.url
     );
     Services.telemetry.keyedScalarAdd(
@@ -802,12 +816,14 @@ class ContentHandler {
       `${item.info.provider}:${item.info.oldType}`,
       1
     );
     Services.telemetry.keyedScalarAdd(
       SEARCH_WITH_ADS_SCALAR_BASE + item.source,
       `${item.info.provider}:${item.info.type}`,
       1
     );
+
+    item.browsers.set(browser, "ad reported");
   }
 }
 
 var SearchSERPTelemetry = new TelemetryHandler();
--- a/browser/components/search/test/browser/browser.ini
+++ b/browser/components/search/test/browser/browser.ini
@@ -44,16 +44,19 @@ skip-if = (os == "win" && processor == "
 [browser_search_telemetry_aboutHome.js]
 tags = search-telemetry
 [browser_search_telemetry_content.js]
 tags = search-telemetry
 [browser_search_telemetry_searchbar.js]
 https_first_disabled = true
 tags = search-telemetry
 support-files =
+  slow_loading_page_with_ads_on_load_event.html
+  slow_loading_page_with_ads.html
+  slow_loading_page_with_ads.sjs
   telemetrySearchSuggestions.sjs
   telemetrySearchSuggestions.xml
 [browser_search_telemetry_sources_ads.js]
 skip-if = !debug && (os == 'linux') # Bug 1515466
 tags = search-telemetry
 support-files =
   searchTelemetry.html
   searchTelemetryAd.html
--- a/browser/components/search/test/browser/browser_search_telemetry_sources_ads.js
+++ b/browser/components/search/test/browser/browser_search_telemetry_sources_ads.js
@@ -19,16 +19,25 @@ const TEST_PROVIDER_INFO = [
     telemetryId: "example",
     searchPageRegexp: /^http:\/\/mochi.test:.+\/browser\/browser\/components\/search\/test\/browser\/searchTelemetry(?:Ad)?.html/,
     queryParamName: "s",
     codeParamName: "abc",
     taggedCodes: ["ff"],
     followOnParamNames: ["a"],
     extraAdServersRegexps: [/^https:\/\/example\.com\/ad2?/],
   },
+  {
+    telemetryId: "slow-page-load",
+    searchPageRegexp: /^http:\/\/mochi.test:.+\/browser\/browser\/components\/search\/test\/browser\/slow_loading_page_with_ads(_on_load_event)?.html/,
+    queryParamName: "s",
+    codeParamName: "abc",
+    taggedCodes: ["ff"],
+    followOnParamNames: ["a"],
+    extraAdServersRegexps: [/^https:\/\/example\.com\/ad2?/],
+  },
 ];
 
 function getPageUrl(useExample = false, useAdPage = false) {
   let server = useExample ? "example.com" : "mochi.test:8888";
   let page = useAdPage ? "searchTelemetryAd.html" : "searchTelemetry.html";
   return `http://${server}/browser/browser/components/search/test/browser/${page}`;
 }
 
@@ -151,16 +160,80 @@ add_task(async function test_track_ad() 
       "browser.search.with_ads": { "example:sap": 1 },
       "browser.search.withads.unknown": { "example:tagged": 1 },
     }
   );
 
   BrowserTestUtils.removeTab(tab);
 });
 
+add_task(async function test_track_ad_on_DOMContentLoaded() {
+  Services.telemetry.clearScalars();
+  searchCounts.clear();
+
+  let url =
+    getRootDirectory(gTestPath).replace(
+      "chrome://mochitests/content",
+      "http://mochi.test:8888"
+    ) + "slow_loading_page_with_ads.html";
+
+  let observeAdPreviouslyRecorded = TestUtils.consoleMessageObserved(msg => {
+    return msg.wrappedJSObject.arguments[0].includes(
+      "Ad was previously reported for browser with URI"
+    );
+  });
+
+  let tab = await BrowserTestUtils.openNewForegroundTab(
+    gBrowser,
+    getSERPUrl(url)
+  );
+
+  // Observe ad was counted on DOMContentLoaded.
+  // We do not count the ad again on load.
+  await observeAdPreviouslyRecorded;
+
+  await assertSearchSourcesTelemetry(
+    { "slow-page-load.in-content:sap:ff": 1 },
+    {
+      "browser.search.content.unknown": { "slow-page-load:tagged:ff": 1 },
+      "browser.search.with_ads": { "slow-page-load:sap": 1 },
+      "browser.search.withads.unknown": { "slow-page-load:tagged": 1 },
+    }
+  );
+
+  BrowserTestUtils.removeTab(tab);
+});
+
+add_task(async function test_track_ad_on_load_event() {
+  Services.telemetry.clearScalars();
+  searchCounts.clear();
+
+  let url =
+    getRootDirectory(gTestPath).replace(
+      "chrome://mochitests/content",
+      "http://mochi.test:8888"
+    ) + "slow_loading_page_with_ads_on_load_event.html";
+
+  let tab = await BrowserTestUtils.openNewForegroundTab(
+    gBrowser,
+    getSERPUrl(url)
+  );
+
+  await assertSearchSourcesTelemetry(
+    { "slow-page-load.in-content:sap:ff": 1 },
+    {
+      "browser.search.content.unknown": { "slow-page-load:tagged:ff": 1 },
+      "browser.search.with_ads": { "slow-page-load:sap": 1 },
+      "browser.search.withads.unknown": { "slow-page-load:tagged": 1 },
+    }
+  );
+
+  BrowserTestUtils.removeTab(tab);
+});
+
 add_task(async function test_track_ad_organic() {
   Services.telemetry.clearScalars();
   searchCounts.clear();
 
   let tab = await BrowserTestUtils.openNewForegroundTab(
     gBrowser,
     getSERPUrl(getPageUrl(false, true), true)
   );
new file mode 100644
--- /dev/null
+++ b/browser/components/search/test/browser/slow_loading_page_with_ads.html
@@ -0,0 +1,13 @@
+<!DOCTYPE html>
+<html>
+<head>
+</head>
+<body>
+  <a id="ad1" href="https://example.com/ad">Ad link</a>
+  <a id="ad2" href="https://example.com/ad2">Second Ad link</a>
+  <!-- The iframe is used to include a sub-document load in the test, which
+       should not affect the recorded telemetry. -->
+  <iframe src="SearchTelemetry.html"></iframe>
+  <img src="http://mochi.test:8888/browser/browser/components/search/test/browser/slow_loading_page_with_ads.sjs">
+</body>
+</html>
new file mode 100644
--- /dev/null
+++ b/browser/components/search/test/browser/slow_loading_page_with_ads.sjs
@@ -0,0 +1,21 @@
+/* Any copyright is dedicated to the Public Domain.
+ * http://creativecommons.org/publicdomain/zero/1.0/ */
+
+function handleRequest(request, response) {
+  const DELAY_MS = 2000;
+  response.processAsync();
+
+  response.setHeader("Cache-Control", "no-cache", false);
+  response.setHeader("Content-Type", "image/png", false);
+  response.write("Start loading image");
+
+  let timer = Cc["@mozilla.org/timer;1"].createInstance(Ci.nsITimer);
+  timer.init(
+    () => {
+      response.write("Finish loading image");
+      response.finish();
+    },
+    DELAY_MS,
+    Ci.nsITimer.TYPE_ONE_SHOT
+  );
+}
new file mode 100644
--- /dev/null
+++ b/browser/components/search/test/browser/slow_loading_page_with_ads_on_load_event.html
@@ -0,0 +1,29 @@
+<!DOCTYPE html>
+<html>
+<head>
+</head>
+<body id='body'>
+</body>
+  <img src="http://mochi.test:8888/browser/browser/components/search/test/browser/slow_loading_page_with_ads.sjs">
+  <script>
+    setTimeout(() => {
+      let body = document.getElementById('body');
+      let ad1 = document.createElement('a');
+      ad1.setAttribute('id', 'ad1');
+      ad1.setAttribute('href', 'https://example.com/ad');
+      ad1.innerHTML = 'Ad link'
+
+      let ad2 = document.createElement('a');
+      ad2.setAttribute('id', 'ad2');
+      ad2.setAttribute('href', 'https://example.com/ad2');
+      ad2.innerHTML = 'Second Ad link'
+
+      let frame = document.createElement('iframe');
+      frame.setAttribute('src', 'searchTelemetry.html');
+
+      body.appendChild(ad1);
+      body.appendChild(ad2);
+      body.appendChild(frame);
+    }, 2000);
+  </script>
+</html>