Bug 1482158 - Add telemetry for other major engines. r=mikedeboer
authorMichael Kaply <mozilla@kaply.com>
Thu, 06 Sep 2018 16:14:25 -0500
changeset 435543 b2e1ae4f75548489e995029090cedaad5b3f317d
parent 435542 609607ebb73e1a0b9e94c25f3f2558d7fccf4421
child 435544 b8e2a5e110fd37294924c1b4d0b93636def41f7b
push id107668
push usermozilla@kaply.com
push dateMon, 10 Sep 2018 20:56:55 +0000
treeherdermozilla-inbound@b2e1ae4f7554 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersmikedeboer
bugs1482158
milestone64.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1482158 - Add telemetry for other major engines. r=mikedeboer
browser/modules/BrowserUsageTelemetry.jsm
netwerk/base/nsIBrowserSearchService.idl
toolkit/components/search/nsSearchService.js
toolkit/components/search/tests/xpcshell/test_urltelemetry.js
toolkit/components/search/tests/xpcshell/xpcshell.ini
--- a/browser/modules/BrowserUsageTelemetry.jsm
+++ b/browser/modules/BrowserUsageTelemetry.jsm
@@ -8,23 +8,20 @@
 var EXPORTED_SYMBOLS = [
   "BrowserUsageTelemetry",
   "URLBAR_SELECTED_RESULT_TYPES",
   "URLBAR_SELECTED_RESULT_METHODS",
   "MINIMUM_TAB_COUNT_INTERVAL_MS",
  ];
 
 ChromeUtils.import("resource://gre/modules/Services.jsm");
-ChromeUtils.import("resource://gre/modules/XPCOMUtils.jsm");
 
 ChromeUtils.defineModuleGetter(this, "PrivateBrowsingUtils",
                                "resource://gre/modules/PrivateBrowsingUtils.jsm");
 
-XPCOMUtils.defineLazyGlobalGetters(this, ["URLSearchParams"]);
-
 // The upper bound for the count of the visited unique domain names.
 const MAX_UNIQUE_VISITED_DOMAINS = 100;
 
 // Observed topic names.
 const TAB_RESTORING_TOPIC = "SSTabRestoring";
 const TELEMETRY_SUBSESSIONSPLIT_TOPIC = "internal-telemetry-after-subsession-split";
 const DOMWINDOW_OPENED_TOPIC = "domwindowopened";
 const AUTOCOMPLETE_ENTER_TEXT_TOPIC = "autocomplete-did-enter-text";
@@ -197,39 +194,17 @@ let URICountListener = {
     if (shouldCountURI) {
       Services.telemetry.scalarAdd(UNFILTERED_URI_COUNT_SCALAR_NAME, 1);
     }
 
     if (!this.isHttpURI(uri)) {
       return;
     }
 
-    let parseURLResult = Services.search.parseSubmissionURL(uriSpec);
-    if (parseURLResult.engine) {
-      this._recordSearchTelemetry(uriSpec, parseURLResult);
-    } else if (this._urlsQueuedForParsing) {
-      if (Services.search.isInitialized) {
-        this._urlsQueuedForParsing = null;
-      } else {
-        this._urlsQueuedForParsing.push(uriSpec);
-        if (this._urlsQueuedForParsing.length == 1) {
-          Services.search.init(rv => {
-            if (Components.isSuccessCode(rv)) {
-              for (let url of this._urlsQueuedForParsing) {
-                let innerParseURLResult = Services.search.parseSubmissionURL(url);
-                if (innerParseURLResult.engine) {
-                  this._recordSearchTelemetry(url, innerParseURLResult);
-                }
-              }
-            }
-            this._urlsQueuedForParsing = null;
-          });
-        }
-      }
-    }
+    Services.search.recordSearchURLTelemetry(uriSpec);
 
     if (!shouldCountURI) {
       return;
     }
 
     // Update the URI counts.
     Services.telemetry.scalarAdd(TOTAL_URI_COUNT_SCALAR_NAME, 1);
 
@@ -257,41 +232,16 @@ let URICountListener = {
 
   /**
    * Reset the counts. This should be called when breaking a session in Telemetry.
    */
   reset() {
     this._domainSet.clear();
   },
 
-  _urlsQueuedForParsing: [],
-
-  _recordSearchTelemetry(url, parseURLResult) {
-    switch (parseURLResult.engine.identifier) {
-      case "google":
-      case "google-2018":
-        let type;
-        let queries = new URLSearchParams(url.split("#")[0].split("?")[1]);
-        let code = queries.get("client");
-        if (code) {
-          // Detecting follow-on searches for sap is a little tricky.
-          // There are a few parameters that only show up
-          // with follow-ons, so we look for those. (oq/ved/ei)
-          type = queries.has("oq") || queries.has("ved") || queries.has("ei") ? "sap-follow-on" : "sap";
-        } else {
-          type = "organic";
-        }
-        let payload = `google.in-content:${type}:${code || "none"}`;
-
-        let histogram = Services.telemetry.getKeyedHistogramById("SEARCH_COUNTS");
-        histogram.add(payload);
-        break;
-    }
-  },
-
   QueryInterface: ChromeUtils.generateQI([Ci.nsIWebProgressListener,
                                           Ci.nsISupportsWeakReference]),
 };
 
 let urlbarListener = {
 
   // This is needed for recordUrlbarSelectedResultMethod().
   selectedIndex: -1,
--- a/netwerk/base/nsIBrowserSearchService.idl
+++ b/netwerk/base/nsIBrowserSearchService.idl
@@ -514,16 +514,26 @@ interface nsIBrowserSearchService : nsIS
    * The expected URI parameter for the search terms must exist in the query
    * string, but other parameters are ignored.
    *
    * @param url
    *        String containing the URL to parse, for example
    *        "https://www.google.com/search?q=terms".
    */
   nsISearchParseSubmissionResult parseSubmissionURL(in AString url);
+
+  /**
+   * Determines if a URL is related to search and if so, records the 
+   * appropriate telemtry.
+   *
+   * @param url
+   *        String containing the URL to parse, for example
+   *        "https://www.google.com/search?q=terms".
+   */
+  boolean recordSearchURLTelemetry(in AString url);
 };
 
 %{ C++
 /**
  * The observer topic to listen to for actions performed on installed
  * search engines.
  */
 #define SEARCH_ENGINE_TOPIC "browser-search-engine-modified"
--- a/toolkit/components/search/nsSearchService.js
+++ b/toolkit/components/search/nsSearchService.js
@@ -183,16 +183,55 @@ var OS_UNSUPPORTED_PARAMS = [
 // specifies an updateURL, but not an updateInterval.
 const SEARCH_DEFAULT_UPDATE_INTERVAL = 7;
 
 // The default interval before checking again for the name of the
 // default engine for the region, in seconds. Only used if the response
 // from the server doesn't specify an interval.
 const SEARCH_GEO_DEFAULT_UPDATE_INTERVAL = 2592000; // 30 days.
 
+// Regular expressions used to identify common search URLS.
+const SEARCH_URL_REGEX = new RegExp([
+  /^https:\/\/www\.(google)\.(?:.+)\/search/,
+  /^https:\/\/(?:.*)search\.(yahoo)\.com\/search/,
+  /^https:\/\/www\.(bing)\.com\/search/,
+  /^https:\/\/(duckduckgo)\.com\//,
+  /^https:\/\/www\.(baidu)\.com\/(?:s|baidu)/,
+].map(regex => regex.source).join("|"));
+
+// Used to identify various parameters (query, code, etc.) in search URLS.
+const SEARCH_PROVIDER_INFO = {
+  "google": {
+    "queryParam": "q",
+    "codeParam": "client",
+    "codePrefixes": ["firefox"],
+    "followonParams": ["oq", "ved", "ei"],
+  },
+  "duckduckgo": {
+    "queryParam": "q",
+    "codeParam": "t",
+    "codePrefixes": ["ff"],
+  },
+  "yahoo": {
+    "queryParam": "p",
+  },
+  "baidu": {
+    "queryParam": "wd",
+    "codeParam": "tn",
+    "codePrefixes": ["monline_dg"],
+    "followonParams": ["oq"],
+  },
+  "bing": {
+    "queryParam": "q",
+    "codeParam": "pc",
+    "codePrefixes": ["MOZ", "MZ"],
+  },
+};
+
+const SEARCH_COUNTS_HISTOGRAM_KEY = "SEARCH_COUNTS";
 /**
  * Prefixed to all search debug output.
  */
 const SEARCH_LOG_PREFIX = "*** Search: ";
 
 /**
  * Outputs aText to the JavaScript console as well as to stdout.
  */
@@ -4415,16 +4454,68 @@ SearchService.prototype = {
     } catch (ex) {
       // Decoding errors will cause this match to be ignored.
       return gEmptyParseSubmissionResult;
     }
 
     return new ParseSubmissionResult(mapEntry.engine, terms, offset, length);
   },
 
+  recordSearchURLTelemetry(url) {
+    let matches = url.match(SEARCH_URL_REGEX);
+    if (!matches) {
+      return;
+    }
+    let provider = matches.filter((e, i) => e && i != 0)[0];
+    let searchProviderInfo = SEARCH_PROVIDER_INFO[provider];
+    let queries = new URLSearchParams(url.split("#")[0].split("?")[1]);
+    if (!queries.get(searchProviderInfo.queryParam)) {
+      return;
+    }
+    // Default to organic to simplify things.
+    // We override type in the sap cases.
+    let type = "organic";
+    let code;
+    if (searchProviderInfo.codeParam) {
+      code = queries.get(searchProviderInfo.codeParam);
+      if (code &&
+          searchProviderInfo.codePrefixes.some(p => code.startsWith(p))) {
+        if (searchProviderInfo.followonParams &&
+           searchProviderInfo.followonParams.some(p => queries.has(p))) {
+          type = "sap-follow-on";
+        } else {
+          type = "sap";
+        }
+      } else if (provider == "bing") {
+        // Bing requires lots of extra work related to cookies.
+        let secondaryCode = queries.get("form");
+        // This code is used for all Bing follow-on searches.
+        if (secondaryCode == "QBRE") {
+          for (let cookie of Services.cookies.getCookiesFromHost("www.bing.com")) {
+            if (cookie.name == "SRCHS") {
+              // If this cookie is present, it's probably an SAP follow-on.
+              // This might be an organic follow-on in the same session,
+              // but there is no way to tell the difference.
+              if (searchProviderInfo.codePrefixes.some(p => cookie.value.startsWith("PC=" + p))) {
+                type = "sap-follow-on";
+                code = cookie.value.split("=")[1];
+                break;
+              }
+            }
+          }
+        }
+      }
+    }
+
+    let payload = `${provider}.in-content:${type}:${code || "none"}`;
+    let histogram = Services.telemetry.getKeyedHistogramById(SEARCH_COUNTS_HISTOGRAM_KEY);
+    histogram.add(payload);
+    LOG("nsSearchService::recordSearchURLTelemetry: " + payload);
+  },
+
   // nsIObserver
   observe: function SRCH_SVC_observe(aEngine, aTopic, aVerb) {
     switch (aTopic) {
       case SEARCH_ENGINE_TOPIC:
         switch (aVerb) {
           case SEARCH_ENGINE_LOADED:
             var engine = aEngine.QueryInterface(Ci.nsISearchEngine);
             LOG("nsSearchService::observe: Done installation of " + engine.name
new file mode 100644
--- /dev/null
+++ b/toolkit/components/search/tests/xpcshell/test_urltelemetry.js
@@ -0,0 +1,83 @@
+/* Any copyright is dedicated to the Public Domain.
+   http://creativecommons.org/publicdomain/zero/1.0/ */
+
+add_task(async function test_parsing_search_urls() {
+  let hs;
+  // Google search access point.
+  Services.search.recordSearchURLTelemetry("https://www.google.com/search?q=test&ie=utf-8&oe=utf-8&client=firefox-b-1-ab");
+  hs = Services.telemetry.getKeyedHistogramById("SEARCH_COUNTS").snapshot();
+  Assert.ok(hs);
+  Assert.ok("google.in-content:sap:firefox-b-1-ab" in hs, "The histogram must contain the correct key");
+
+  // Google search access point follow-on.
+  Services.search.recordSearchURLTelemetry("https://www.google.com/search?client=firefox-b-1-ab&ei=EI_VALUE&q=test2&oq=test2&gs_l=GS_L_VALUE");
+  hs = Services.telemetry.getKeyedHistogramById("SEARCH_COUNTS").snapshot();
+  Assert.ok(hs);
+  Assert.ok("google.in-content:sap-follow-on:firefox-b-1-ab" in hs, "The histogram must contain the correct key");
+
+  // Google organic.
+  Services.search.recordSearchURLTelemetry("https://www.google.com/search?source=hp&ei=EI_VALUE&q=test&oq=test&gs_l=GS_L_VALUE");
+  hs = Services.telemetry.getKeyedHistogramById("SEARCH_COUNTS").snapshot();
+  Assert.ok(hs);
+  Assert.ok("google.in-content:organic:none" in hs, "The histogram must contain the correct key");
+
+  // Google organic UK.
+  Services.search.recordSearchURLTelemetry("https://www.google.co.uk/search?source=hp&ei=EI_VALUE&q=test&oq=test&gs_l=GS_L_VALUE");
+  hs = Services.telemetry.getKeyedHistogramById("SEARCH_COUNTS").snapshot();
+  Assert.ok(hs);
+  Assert.ok("google.in-content:organic:none" in hs, "The histogram must contain the correct key");
+
+  // Yahoo organic.
+  Services.search.recordSearchURLTelemetry("https://search.yahoo.com/search?p=test&fr=yfp-t&fp=1&toggle=1&cop=mss&ei=UTF-8");
+  hs = Services.telemetry.getKeyedHistogramById("SEARCH_COUNTS").snapshot();
+  Assert.ok(hs);
+  Assert.ok("yahoo.in-content:organic:none" in hs, "The histogram must contain the correct key");
+
+  // Yahoo organic UK.
+  Services.search.recordSearchURLTelemetry("https://uk.search.yahoo.com/search?p=test&fr=yfp-t&fp=1&toggle=1&cop=mss&ei=UTF-8");
+  hs = Services.telemetry.getKeyedHistogramById("SEARCH_COUNTS").snapshot();
+  Assert.ok(hs);
+  Assert.ok("yahoo.in-content:organic:none" in hs, "The histogram must contain the correct key");
+
+  // Bing search access point.
+  Services.search.recordSearchURLTelemetry("https://www.bing.com/search?q=test&pc=MOZI&form=MOZLBR");
+  hs = Services.telemetry.getKeyedHistogramById("SEARCH_COUNTS").snapshot();
+  Assert.ok(hs);
+  Assert.ok("bing.in-content:sap:MOZI" in hs, "The histogram must contain the correct key");
+
+  // Bing organic.
+  Services.search.recordSearchURLTelemetry("https://www.bing.com/search?q=test&qs=n&form=QBLH&sp=-1&pq=&sc=0-0&sk=&cvid=CVID_VALUE");
+  hs = Services.telemetry.getKeyedHistogramById("SEARCH_COUNTS").snapshot();
+  Assert.ok(hs);
+  Assert.ok("bing.in-content:organic:none" in hs, "The histogram must contain the correct key");
+
+  // DuckDuckGo search access point.
+  Services.search.recordSearchURLTelemetry("https://duckduckgo.com/?q=test&t=ffab");
+  hs = Services.telemetry.getKeyedHistogramById("SEARCH_COUNTS").snapshot();
+  Assert.ok(hs);
+  Assert.ok("duckduckgo.in-content:sap:ffab" in hs, "The histogram must contain the correct key");
+
+  // DuckDuckGo organic.
+  Services.search.recordSearchURLTelemetry("https://duckduckgo.com/?q=test&t=hi&ia=news");
+  hs = Services.telemetry.getKeyedHistogramById("SEARCH_COUNTS").snapshot();
+  Assert.ok(hs);
+  Assert.ok("duckduckgo.in-content:organic:hi" in hs, "The histogram must contain the correct key");
+
+  // Baidu search access point.
+  Services.search.recordSearchURLTelemetry("https://www.baidu.com/baidu?wd=test&tn=monline_dg&ie=utf-8");
+  hs = Services.telemetry.getKeyedHistogramById("SEARCH_COUNTS").snapshot();
+  Assert.ok(hs);
+  Assert.ok("baidu.in-content:sap:monline_dg" in hs, "The histogram must contain the correct key");
+
+  // Baidu search access point follow-on.
+  Services.search.recordSearchURLTelemetry("https://www.baidu.com/s?ie=utf-8&f=8&rsv_bp=1&tn=monline_dg&wd=test2&oq=test&rsv_pq=RSV_PQ_VALUE&rsv_t=RSV_T_VALUE&rqlang=cn&rsv_enter=1&rsv_sug3=2&rsv_sug2=0&inputT=227&rsv_sug4=397");
+  hs = Services.telemetry.getKeyedHistogramById("SEARCH_COUNTS").snapshot();
+  Assert.ok(hs);
+  Assert.ok("baidu.in-content:sap-follow-on:monline_dg" in hs, "The histogram must contain the correct key");
+
+  // Baidu organic.
+  Services.search.recordSearchURLTelemetry("https://www.baidu.com/s?ie=utf-8&f=8&rsv_bp=1&rsv_idx=1&ch=&tn=baidu&bar=&wd=test&rn=&oq=&rsv_pq=RSV_PQ_VALUE&rsv_t=RSV_T_VALUE&rqlang=cn");
+  hs = Services.telemetry.getKeyedHistogramById("SEARCH_COUNTS").snapshot();
+  Assert.ok(hs);
+  Assert.ok("baidu.in-content:organic:baidu" in hs, "The histogram must contain the correct key");
+});
--- a/toolkit/components/search/tests/xpcshell/xpcshell.ini
+++ b/toolkit/components/search/tests/xpcshell/xpcshell.ini
@@ -97,8 +97,9 @@ skip-if = (verify && !debug && (os == 'l
 [test_addEngineWithDetailsObject.js]
 [test_addEngineWithExtensionID.js]
 [test_chromeresource_icon1.js]
 [test_chromeresource_icon2.js]
 [test_engineUpdate.js]
 [test_paramSubstitution.js]
 [test_migrateWebExtensionEngine.js]
 [test_sendSubmissionURL.js]
+[test_urltelemetry.js]