Bug 1591880 - Expand the RegExp to match better and prevent double-counting clicks by ignoring beacon requests. r=Standard8
authorMike de Boer <mdeboer@mozilla.com>
Wed, 06 Nov 2019 15:29:15 +0000
changeset 501048 c4745a05b58b27a4f9f13bb19816c87fb407d67d
parent 501047 e9aa60b80b9b00e032e66c93a1ff6526ba62975b
child 501049 517b2dd9942590ffd8e210c4dcda627b8e6946f0
push id114167
push usercsabou@mozilla.com
push dateFri, 08 Nov 2019 00:35:25 +0000
treeherdermozilla-inbound@ac63c8962183 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersStandard8
bugs1591880
milestone72.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1591880 - Expand the RegExp to match better and prevent double-counting clicks by ignoring beacon requests. r=Standard8 Differential Revision: https://phabricator.services.mozilla.com/D51400
browser/components/search/SearchTelemetry.jsm
--- a/browser/components/search/SearchTelemetry.jsm
+++ b/browser/components/search/SearchTelemetry.jsm
@@ -42,17 +42,17 @@ const SEARCH_AD_CLICKS_SCALAR = "browser
 const SEARCH_PROVIDER_INFO = {
   google: {
     regexp: /^https:\/\/www\.google\.(?:.+)\/search/,
     queryParam: "q",
     codeParam: "client",
     codePrefixes: ["firefox"],
     followonParams: ["oq", "ved", "ei"],
     extraAdServersRegexps: [
-      /^https:\/\/www\.googleadservices\.com\/(?:pagead\/)?aclk/,
+      /^https:\/\/www\.google(?:adservices)?\.com\/(?:pagead\/)?aclk/,
     ],
   },
   duckduckgo: {
     regexp: /^https:\/\/duckduckgo\.com\//,
     queryParam: "q",
     codeParam: "t",
     codePrefixes: ["ff"],
   },
@@ -412,17 +412,17 @@ class TelemetryHandler {
    */
   _reportSerpPage(info, url) {
     let payload = `${info.provider}.in-content:${info.type}:${info.code ||
       "none"}`;
     let histogram = Services.telemetry.getKeyedHistogramById(
       SEARCH_COUNTS_HISTOGRAM_KEY
     );
     histogram.add(payload);
-    LOG(`SearchTelemetry: ${payload} for ${url}`);
+    LOG(`${payload} for ${url}`);
   }
 
   /**
    * Returns the current search provider information in use.
    * @see SEARCH_PROVIDER_INFO
    */
   get _searchProviderInfo() {
     if (!this.__searchProviderInfo) {
@@ -479,17 +479,17 @@ class ContentHandler {
 
   /**
    * Receives a message from the SearchTelemetryChild actor.
    *
    * @param {object} msg
    */
   receiveMessage(msg) {
     if (msg.name != "SearchTelemetry:PageInfo") {
-      LOG(`"Received unexpected message: ${msg.name}`);
+      LOG("Received unexpected message: " + msg.name);
       return;
     }
 
     this._reportPageWithAds(msg.data);
   }
 
   /**
    * Test-only function to override the search provider information for use
@@ -501,73 +501,81 @@ class ContentHandler {
     Services.ppmm.sharedData.set("SearchTelemetry:ProviderInfo", providerInfo);
   }
 
   /**
    * Listener that observes network activity, so that we can determine if a link
    * from a search provider page was followed, and if then if that link was an
    * ad click or not.
    *
-   * @param {nsISupports} httpChannel The channel that generated the activity.
-   * @param {number} activityType The type of activity.
-   * @param {number} activitySubtype The subtype for the activity.
-   * @param {PRTime} timestamp The time of the activity.
-   * @param {number} [extraSizeData] Any size data available for the activity.
-   * @param {string} [extraStringData] Any extra string data available for the
-   *   activity.
+   * @param {nsIChannel} nativeChannel   The channel that generated the activity.
+   * @param {number}     activityType    The type of activity.
+   * @param {number}     activitySubtype The subtype for the activity.
    */
   observeActivity(
-    httpChannel,
+    nativeChannel,
     activityType,
-    activitySubtype,
+    activitySubtype /*,
     timestamp,
     extraSizeData,
-    extraStringData
+    extraStringData*/
   ) {
+    // NOTE: the channel handling code here is inspired by WebRequest.jsm.
     if (
       !this._browserInfoByUrl.size ||
       activityType !=
         Ci.nsIHttpActivityObserver.ACTIVITY_TYPE_HTTP_TRANSACTION ||
       activitySubtype !=
-        Ci.nsIHttpActivityObserver.ACTIVITY_SUBTYPE_TRANSACTION_CLOSE
+        Ci.nsIHttpActivityObserver.ACTIVITY_SUBTYPE_RESPONSE_COMPLETE
     ) {
       return;
     }
 
-    let channel = httpChannel.QueryInterface(Ci.nsIHttpChannel);
-    let loadInfo;
-    try {
-      loadInfo = channel.loadInfo;
-    } catch (e) {
-      // Channels without a loadInfo are not pertinent.
+    // Sometimes we get a NullHttpChannel, which implements nsIHttpChannel but
+    // not nsIChannel.
+    if (!(nativeChannel instanceof Ci.nsIChannel)) {
+      return;
+    }
+    let channel = ChannelWrapper.get(nativeChannel);
+    // The wrapper is consistent across redirects, so we can use it to track state.
+    if (channel._adClickRecorded) {
+      LOG("Ad click already recorded");
       return;
     }
 
-    try {
-      let uri = channel.URI;
-      let triggerURI = loadInfo.triggeringPrincipal.URI;
-
-      if (!triggerURI || !this._browserInfoByUrl.has(triggerURI.spec)) {
+    // Make a trip through the event loop to make sure statuses have a chance to
+    // be processed before we get all the info.
+    Services.tm.dispatchToMainThread(() => {
+      // We suspect that No Content (204) responses are used to transfer or
+      // update beacons. They lead to use double-counting ad-clicks, so let's
+      // ignore them.
+      if (channel.statusCode == 204) {
+        LOG("Ignoring activity from ambiguous responses");
         return;
       }
 
-      let info = this._getProviderInfoForUrl(uri.spec, true);
+      let originURL = channel.originURI && channel.originURI.spec;
+      if (!originURL || !this._browserInfoByUrl.has(originURL)) {
+        return;
+      }
+
+      let URL = channel.finalURL;
+      let info = this._getProviderInfoForUrl(URL, true);
       if (!info) {
         return;
       }
 
-      Services.telemetry.keyedScalarAdd(SEARCH_AD_CLICKS_SCALAR, info[0], 1);
-      LOG(
-        `SearchTelemetry: Counting ad click in page for ${info[0]} ${
-          triggerURI.spec
-        }`
-      );
-    } catch (e) {
-      Cu.reportError(e);
-    }
+      try {
+        Services.telemetry.keyedScalarAdd(SEARCH_AD_CLICKS_SCALAR, info[0], 1);
+        channel._adClickRecorded = true;
+        LOG(`Counting ad click in page for ${info[0]} ${originURL} ${URL}`);
+      } catch (e) {
+        Cu.reportError(e);
+      }
+    });
   }
 
   /**
    * Adds a message listener for the window being registered to receive messages
    * from SearchTelemetryChild.
    *
    * @param {object} win The window to register.
    */
@@ -590,30 +598,26 @@ class ContentHandler {
    *
    * @param {object} info
    * @param {boolean} info.hasAds Whether or not the page has adverts.
    * @param {string} info.url The url of the page.
    */
   _reportPageWithAds(info) {
     let item = this._browserInfoByUrl.get(info.url);
     if (!item) {
-      LOG(`Expected to report URI with ads but couldn't find the information`);
+      LOG("Expected to report URI with ads but couldn't find the information");
       return;
     }
 
     Services.telemetry.keyedScalarAdd(
       SEARCH_WITH_ADS_SCALAR,
       item.info.provider,
       1
     );
-    LOG(
-      `SearchTelemetry: Counting ads in page for ${item.info.provider} ${
-        info.url
-      }`
-    );
+    LOG(`Counting ads in page for ${item.info.provider} ${info.url}`);
   }
 }
 
 /**
  * Outputs the message to the JavaScript console as well as to stdout.
  *
  * @param {string} msg The message to output.
  */