Bug 841554 - Part 2: alter search counts provider to record all of Mozilla's partner engines in each locale. r=gps, a=bajaj
authorRichard Newman <rnewman@mozilla.com>
Mon, 25 Mar 2013 18:46:22 -0700
changeset 128710 f6d8dfa7859dba2523952a4ece8d0a9c23c3e0e2
parent 128709 fc077ab0cebf06003fd9bf75c8fc8d32e5e50e26
child 128711 ac5ed16b6a744194905111a812b6cd92da1de8f7
push id3564
push userryanvm@gmail.com
push dateThu, 28 Mar 2013 14:51:16 +0000
treeherdermozilla-aurora@ac5ed16b6a74 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersgps, bajaj
bugs841554
milestone21.0a2
Bug 841554 - Part 2: alter search counts provider to record all of Mozilla's partner engines in each locale. r=gps, a=bajaj
services/healthreport/providers.jsm
services/healthreport/tests/xpcshell/test_provider_searches.js
services/metrics/dataprovider.jsm
services/metrics/tests/xpcshell/test_metrics_provider.js
--- a/services/healthreport/providers.jsm
+++ b/services/healthreport/providers.jsm
@@ -1018,25 +1018,21 @@ PlacesProvider.prototype = Object.freeze
     PlacesDBUtils.telemetry(null, function onResult(data) {
       deferred.resolve(data);
     });
 
     return deferred.promise;
   },
 });
 
-
-/**
- * Records search counts per day per engine and where search initiated.
- */
-function SearchCountMeasurement() {
+function SearchCountMeasurement1() {
   Metrics.Measurement.call(this);
 }
 
-SearchCountMeasurement.prototype = Object.freeze({
+SearchCountMeasurement1.prototype = Object.freeze({
   __proto__: Metrics.Measurement.prototype,
 
   name: "counts",
   version: 1,
 
   // We only record searches for search engines that have partner agreements
   // with Mozilla.
   fields: {
@@ -1056,24 +1052,216 @@ SearchCountMeasurement.prototype = Objec
     "yahoo.contextmenu": DAILY_COUNTER_FIELD,
     "yahoo.searchbar": DAILY_COUNTER_FIELD,
     "yahoo.urlbar": DAILY_COUNTER_FIELD,
     "other.abouthome": DAILY_COUNTER_FIELD,
     "other.contextmenu": DAILY_COUNTER_FIELD,
     "other.searchbar": DAILY_COUNTER_FIELD,
     "other.urlbar": DAILY_COUNTER_FIELD,
   },
+});
 
-  // If an engine is removed from this list, it may not be reported any more.
-  // Verify side-effects are sane before removing an entry.
-  PARTNER_ENGINES: [
-    "amazon.com",
+/**
+ * Records search counts per day per engine and where search initiated.
+ *
+ * We want to record granular details for individual locale-specific search
+ * providers, but only if they're Mozilla partners. In order to do this, we
+ * track the nsISearchEngine identifier, which denotes shipped search engines,
+ * and intersect those with our partner list.
+ *
+ * We don't use the search engine name directly, because it is shared across
+ * locales; e.g., eBay-de and eBay both share the name "eBay".
+ */
+function SearchCountMeasurement2() {
+  this._fieldSpecs = null;
+  this._interestingEngines = null;   // Name -> ID. ("Amazon.com" -> "amazondotcom")
+
+  Metrics.Measurement.call(this);
+}
+
+SearchCountMeasurement2.prototype = Object.freeze({
+  __proto__: Metrics.Measurement.prototype,
+
+  name: "counts",
+  version: 2,
+
+  /**
+   * Default implementation; can be overridden by test helpers.
+   */
+  getDefaultEngines: function () {
+    return Services.search.getDefaultEngines();
+  },
+
+  _initialize: function () {
+    // Don't create all of these for every profile.
+    // There are 61 partner engines, translating to 244 fields.
+    // Instead, compute only those that are possible -- those for whom the
+    // provider is one of the default search engines.
+    // This set can grow over time, and change as users run different localized
+    // Firefox instances.
+    this._fieldSpecs = {};
+    this._interestingEngines = {};
+
+    for (let source of this.SOURCES) {
+      this._fieldSpecs["other." + source] = DAILY_COUNTER_FIELD;
+    }
+
+    let engines = this.getDefaultEngines();
+    for (let engine of engines) {
+      let id = engine.identifier;
+      if (!id || (this.PROVIDERS.indexOf(id) == -1)) {
+        continue;
+      }
+
+      this._interestingEngines[engine.name] = id;
+      let fieldPrefix = id + ".";
+      for (let source of this.SOURCES) {
+        this._fieldSpecs[fieldPrefix + source] = DAILY_COUNTER_FIELD;
+      }
+    }
+  },
+
+  // Our fields are dynamic, so we compute them into _fieldSpecs by looking at
+  // the current set of interesting engines.
+  get fields() {
+    if (!this._fieldSpecs) {
+      this._initialize();
+    }
+    return this._fieldSpecs;
+  },
+
+  get interestingEngines() {
+    if (!this._fieldSpecs) {
+      this._initialize();
+    }
+    return this._interestingEngines;
+  },
+
+  /**
+   * Override the default behavior: serializers should include every counter
+   * field from the DB, even if we don't currently have it registered.
+   *
+   * Do this so we don't have to register several hundred fields to match
+   * various Firefox locales.
+   *
+   * We use the "provider.type" syntax as a rudimentary check for validity.
+   *
+   * We trust that measurement versioning is sufficient to exclude old provider
+   * data.
+   */
+  shouldIncludeField: function (name) {
+    return name.indexOf(".") != -1;
+  },
+
+  /**
+   * The measurement type mechanism doesn't introspect the DB. Override it
+   * so that we can assume all unknown fields are counters.
+   */
+  fieldType: function (name) {
+    if (name in this.fields) {
+      return this.fields[name].type;
+    }
+
+    // Default to a counter.
+    return Metrics.Storage.FIELD_DAILY_COUNTER;
+  },
+
+  // You can compute the total list of fields by unifying the entire l10n repo
+  // set with the list of partners:
+  //
+  //   sort -u */*/searchplugins/list.txt | tr -d '^M' | uniq | grep -f partners.txt
+  //
+  // where partners.txt contains
+  //
+  //   amazon
+  //   aol
+  //   bing
+  //   eBay
+  //   google
+  //   mailru
+  //   mercadolibre
+  //   seznam
+  //   twitter
+  //   yahoo
+  //   yandex
+  //
+  // Please update this list as the set of partners changes.
+  //
+  PROVIDERS: [
+    "amazon-co-uk",
+    "amazon-de",
+    "amazon-en-GB",
+    "amazon-france",
+    "amazon-it",
+    "amazon-jp",
+    "amazondotcn",
+    "amazondotcom",
+    "amazondotcom-de",
+
+    "aol-en-GB",
+    "aol-web-search",
+
     "bing",
+
+    "eBay",
+    "eBay-de",
+    "eBay-en-GB",
+    "eBay-es",
+    "eBay-fi",
+    "eBay-france",
+    "eBay-hu",
+    "eBay-in",
+    "eBay-it",
+
     "google",
+    "google-jp",
+    "google-ku",
+    "google-maps-zh-TW",
+
+    "mailru",
+
+    "mercadolibre-ar",
+    "mercadolibre-cl",
+    "mercadolibre-mx",
+
+    "seznam-cz",
+
+    "twitter",
+    "twitter-de",
+    "twitter-ja",
+
     "yahoo",
+    "yahoo-NO",
+    "yahoo-answer-zh-TW",
+    "yahoo-ar",
+    "yahoo-bid-zh-TW",
+    "yahoo-br",
+    "yahoo-ch",
+    "yahoo-cl",
+    "yahoo-de",
+    "yahoo-en-GB",
+    "yahoo-es",
+    "yahoo-fi",
+    "yahoo-france",
+    "yahoo-fy-NL",
+    "yahoo-id",
+    "yahoo-in",
+    "yahoo-it",
+    "yahoo-jp",
+    "yahoo-jp-auctions",
+    "yahoo-mx",
+    "yahoo-sv-SE",
+    "yahoo-zh-TW",
+
+    "yandex",
+    "yandex-ru",
+    "yandex-slovari",
+    "yandex-tr",
+    "yandex.by",
+    "yandex.ru-be",
   ],
 
   SOURCES: [
     "abouthome",
     "contextmenu",
     "searchbar",
     "urlbar",
   ],
@@ -1082,41 +1270,41 @@ SearchCountMeasurement.prototype = Objec
 this.SearchesProvider = function () {
   Metrics.Provider.call(this);
 };
 
 this.SearchesProvider.prototype = Object.freeze({
   __proto__: Metrics.Provider.prototype,
 
   name: "org.mozilla.searches",
-  measurementTypes: [SearchCountMeasurement],
+  measurementTypes: [
+    SearchCountMeasurement1,
+    SearchCountMeasurement2,
+  ],
 
   /**
    * Record that a search occurred.
    *
    * @param engine
    *        (string) The search engine used. If the search engine is unknown,
    *        the search will be attributed to "other".
    * @param source
    *        (string) Where the search was initiated from. Must be one of the
-   *        SearchCountMeasurement.SOURCES values.
+   *        SearchCountMeasurement2.SOURCES values.
    *
    * @return Promise<>
    *         The promise is resolved when the storage operation completes.
    */
   recordSearch: function (engine, source) {
-    let m = this.getMeasurement("counts", 1);
+    let m = this.getMeasurement("counts", 2);
 
     if (m.SOURCES.indexOf(source) == -1) {
       throw new Error("Unknown source for search: " + source);
     }
 
-    let normalizedEngine = engine.toLowerCase();
-    if (m.PARTNER_ENGINES.indexOf(normalizedEngine) == -1) {
-      normalizedEngine = "other";
-    }
-
+    let id = m.interestingEngines[engine] || "other";
+    let field = id + "." + source;
     return this.enqueueStorageOperation(function recordSearch() {
-      return m.incrementDailyCounter(normalizedEngine + "." + source);
+      return m.incrementDailyCounter(field);
     });
   },
 });
 
--- a/services/healthreport/tests/xpcshell/test_provider_searches.js
+++ b/services/healthreport/tests/xpcshell/test_provider_searches.js
@@ -1,75 +1,123 @@
 /* Any copyright is dedicated to the Public Domain.
  * http://creativecommons.org/publicdomain/zero/1.0/ */
 
 "use strict";
 
 const {utils: Cu} = Components;
 
 Cu.import("resource://gre/modules/Metrics.jsm");
-Cu.import("resource://gre/modules/services/healthreport/providers.jsm");
+let bsp = Cu.import("resource://gre/modules/services/healthreport/providers.jsm");
+
+const DEFAULT_ENGINES = [
+  {name: "Amazon.com",    identifier: "amazondotcom"},
+  {name: "Bing",          identifier: "bing"},
+  {name: "Google",        identifier: "google"},
+  {name: "Yahoo",         identifier: "yahoo"},
+  {name: "Foobar Search", identifier: "foobar"},
+];
 
+function MockSearchCountMeasurement() {
+  bsp.SearchCountMeasurement2.call(this);
+}
+MockSearchCountMeasurement.prototype = {
+  __proto__: bsp.SearchCountMeasurement2.prototype,
+  getDefaultEngines: function () {
+    return DEFAULT_ENGINES;
+  },
+};
+
+function MockSearchesProvider() {
+  SearchesProvider.call(this);
+}
+MockSearchesProvider.prototype = {
+  __proto__: SearchesProvider.prototype,
+  measurementTypes: [MockSearchCountMeasurement],
+};
 
 function run_test() {
   run_next_test();
 }
 
 add_test(function test_constructor() {
   let provider = new SearchesProvider();
 
   run_next_test();
 });
 
 add_task(function test_record() {
   let storage = yield Metrics.Storage("record");
-  let provider = new SearchesProvider();
+  let provider = new MockSearchesProvider();
 
   yield provider.init(storage);
 
-  const ENGINES = [
-    "amazon.com",
-    "bing",
-    "google",
-    "yahoo",
-    "foobar",
-  ];
-
   let now = new Date();
 
-  for (let engine of ENGINES) {
-    yield provider.recordSearch(engine, "abouthome");
-    yield provider.recordSearch(engine, "contextmenu");
-    yield provider.recordSearch(engine, "searchbar");
-    yield provider.recordSearch(engine, "urlbar");
+  for (let engine of DEFAULT_ENGINES) {
+    yield provider.recordSearch(engine.name, "abouthome");
+    yield provider.recordSearch(engine.name, "contextmenu");
+    yield provider.recordSearch(engine.name, "searchbar");
+    yield provider.recordSearch(engine.name, "urlbar");
   }
 
   // Invalid sources should throw.
   let errored = false;
   try {
-    yield provider.recordSearch("google", "bad source");
+    yield provider.recordSearch(DEFAULT_ENGINES[0].name, "bad source");
   } catch (ex) {
     errored = true;
   } finally {
     do_check_true(errored);
   }
 
-  let m = provider.getMeasurement("counts", 1);
+  let m = provider.getMeasurement("counts", 2);
   let data = yield m.getValues();
   do_check_eq(data.days.size, 1);
   do_check_true(data.days.hasDay(now));
 
   let day = data.days.getDay(now);
-  for (let engine of ENGINES) {
-    if (engine == "foobar") {
-      engine = "other";
+  for (let engine of DEFAULT_ENGINES) {
+    let identifier = engine.identifier;
+    if (identifier == "foobar") {
+      identifier = "other";
     }
 
     for (let source of ["abouthome", "contextmenu", "searchbar", "urlbar"]) {
-      let field = engine + "." + source;
+      let field = identifier + "." + source;
       do_check_true(day.has(field));
       do_check_eq(day.get(field), 1);
     }
   }
 
   yield storage.close();
 });
 
+add_task(function test_includes_other_fields() {
+  let storage = yield Metrics.Storage("includes_other_fields");
+  let provider = new MockSearchesProvider();
+
+  yield provider.init(storage);
+  let m = provider.getMeasurement("counts", 2);
+
+  // Register a search against a provider that isn't live in this session.
+  let id = yield m.storage.registerField(m.id, "test.searchbar",
+                                         Metrics.Storage.FIELD_DAILY_COUNTER);
+
+  let testField = "test.searchbar";
+  let now = new Date();
+  yield m.storage.incrementDailyCounterFromFieldID(id, now);
+
+  // Make sure we don't know about it.
+  do_check_false(testField in m.fields);
+
+  // But we want to include it in payloads.
+  do_check_true(m.shouldIncludeField(testField));
+
+  // And we do so.
+  let data = yield provider.storage.getMeasurementValues(m.id);
+  let serializer = m.serializer(m.SERIALIZE_JSON);
+  let formatted = serializer.daily(data.days.getDay(now));
+  do_check_true(testField in formatted);
+  do_check_eq(formatted[testField], 1);
+
+  yield storage.close();
+});
--- a/services/metrics/dataprovider.jsm
+++ b/services/metrics/dataprovider.jsm
@@ -326,22 +326,40 @@ Measurement.prototype = Object.freeze({
   deleteLastNumeric: function (field) {
     return this.storage.deleteLastNumericFromFieldID(this.fieldID(field));
   },
 
   deleteLastText: function (field) {
     return this.storage.deleteLastTextFromFieldID(this.fieldID(field));
   },
 
+  /**
+   * This method is used by the default serializers to control whether a field
+   * is included in the output.
+   *
+   * There could be legacy fields in storage we no longer care about.
+   *
+   * This method is a hook to allow measurements to change this behavior, e.g.,
+   * to implement a dynamic fieldset.
+   *
+   * You will also need to override `fieldType`.
+   *
+   * @return (boolean) true if the specified field should be included in
+   *                   payload output.
+   */
+  shouldIncludeField: function (field) {
+    return field in this._fields;
+  },
+
   _serializeJSONSingular: function (data) {
     let result = {"_v": this.version};
 
     for (let [field, data] of data) {
       // There could be legacy fields in storage we no longer care about.
-      if (!(field in this._fields)) {
+      if (!this.shouldIncludeField(field)) {
         continue;
       }
 
       let type = this.fieldType(field);
 
       switch (type) {
         case this.storage.FIELD_LAST_NUMERIC:
         case this.storage.FIELD_LAST_TEXT:
@@ -362,17 +380,17 @@ Measurement.prototype = Object.freeze({
 
     return result;
   },
 
   _serializeJSONDay: function (data) {
     let result = {"_v": this.version};
 
     for (let [field, data] of data) {
-      if (!(field in this._fields)) {
+      if (!this.shouldIncludeField(field)) {
         continue;
       }
 
       let type = this.fieldType(field);
 
       switch (type) {
         case this.storage.FIELD_DAILY_COUNTER:
         case this.storage.FIELD_DAILY_DISCRETE_NUMERIC:
--- a/services/metrics/tests/xpcshell/test_metrics_provider.js
+++ b/services/metrics/tests/xpcshell/test_metrics_provider.js
@@ -280,10 +280,26 @@ add_task(function test_serialize_json_de
 
   do_check_eq(formatted["daily-last-numeric"], 4);
   do_check_eq(formatted["daily-last-text"], "apple");
 
   formatted = serializer.daily(data.days.getDay(yesterday));
   do_check_eq(formatted["daily-last-numeric"], 5);
   do_check_eq(formatted["daily-last-text"], "orange");
 
+  // Now let's turn off a field so that it's present in the DB
+  // but not present in the output.
+  let called = false;
+  let excluded = "daily-last-numeric";
+  Object.defineProperty(m, "shouldIncludeField", {
+    value: function fakeShouldIncludeField(field) {
+      called = true;
+      return field != excluded;
+    },
+  });
+
+  let limited = serializer.daily(data.days.getDay(yesterday));
+  do_check_true(called);
+  do_check_false(excluded in limited);
+  do_check_eq(formatted["daily-last-text"], "orange");
+
   yield provider.storage.close();
 });