Bug 1140037 - (Fx 39) Fuzz daily telemetry ping submission times to avoid submission volume spikes around midnight. r=dexter, a=lizzard
authorGeorg Fritzsche <georg.fritzsche@googlemail.com>
Fri, 27 Mar 2015 13:40:02 +0100
changeset 265949 2ae6455c2f58b2d6b5f505000779f4fe50f1e240
parent 265948 8316a66bfed34db3daa557ec462575fa40dc8ebd
child 265950 f8229bcf41affa3609866122f121f1bec021fa02
push id4718
push userraliiev@mozilla.com
push dateMon, 11 May 2015 18:39:53 +0000
treeherdermozilla-beta@c20c4ef55f08 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersdexter, lizzard
bugs1140037
milestone39.0a2
Bug 1140037 - (Fx 39) Fuzz daily telemetry ping submission times to avoid submission volume spikes around midnight. r=dexter, a=lizzard
toolkit/components/telemetry/TelemetryFile.jsm
toolkit/components/telemetry/TelemetryPing.jsm
toolkit/components/telemetry/tests/unit/head.js
toolkit/components/telemetry/tests/unit/test_TelemetryPing.js
toolkit/components/telemetry/tests/unit/test_TelemetrySession.js
--- a/toolkit/components/telemetry/TelemetryFile.jsm
+++ b/toolkit/components/telemetry/TelemetryFile.jsm
@@ -119,38 +119,51 @@ this.TelemetryFile = {
       p.push(this.savePing(ping, false));
       return p;}, [this.savePing(sessionPing, true)]);
 
     pendingPings = [];
     return Promise.all(p);
   },
 
   /**
-   * Add a ping to the saved pings directory so that it gets along with other pings. Note
-   * that the original ping file will not be modified.
+   * Add a ping from an existing file to the saved pings directory so that it gets saved
+   * and sent along with other pings.
+   * Note: that the original ping file will not be modified.
    *
    * @param {String} aFilePath The path to the ping file that needs to be added to the
    *                           saved pings directory.
    * @return {Promise} A promise resolved when the ping is saved to the pings directory.
    */
-  addPendingPing: function(aPingPath) {
+  addPendingPingFromFile: function(aPingPath) {
     // Pings in the saved ping directory need to have the ping id or slug (old format) as
     // the file name. We load the ping content, check that it is valid, and use it to save
     // the ping file with the correct file name.
     return loadPingFile(aPingPath).then(ping => {
-        // Append the ping to the pending list.
-        pendingPings.push(ping);
         // Since we read a ping successfully, update the related histogram.
         Telemetry.getHistogramById("READ_SAVED_PING_SUCCESS").add(1);
-        // Save the ping to the saved pings directory.
-        return this.savePing(ping, false);
+        this.addPendingPing(ping);
       });
   },
 
   /**
+   * Add a ping to the saved pings directory so that it gets saved
+   * and sent along with other pings.
+   * Note: that the original ping file will not be modified.
+   *
+   * @param {Object} ping The ping object.
+   * @return {Promise} A promise resolved when the ping is saved to the pings directory.
+   */
+  addPendingPing: function(ping) {
+    // Append the ping to the pending list.
+    pendingPings.push(ping);
+    // Save the ping to the saved pings directory.
+    return this.savePing(ping, false);
+  },
+
+  /**
    * Remove the file for a ping
    *
    * @param {object} ping The ping.
    * @returns {promise}
    */
   cleanupPingFile: function(ping) {
     return OS.File.remove(pingFilePath(ping));
   },
--- a/toolkit/components/telemetry/TelemetryPing.jsm
+++ b/toolkit/components/telemetry/TelemetryPing.jsm
@@ -16,16 +16,17 @@ Cu.import("resource://gre/modules/debug.
 Cu.import("resource://gre/modules/Services.jsm", this);
 Cu.import("resource://gre/modules/XPCOMUtils.jsm", this);
 Cu.import("resource://gre/modules/osfile.jsm", this);
 Cu.import("resource://gre/modules/Promise.jsm", this);
 Cu.import("resource://gre/modules/PromiseUtils.jsm", this);
 Cu.import("resource://gre/modules/Task.jsm", this);
 Cu.import("resource://gre/modules/DeferredTask.jsm", this);
 Cu.import("resource://gre/modules/Preferences.jsm");
+Cu.import("resource://gre/modules/Timer.jsm");
 
 const LOGGER_NAME = "Toolkit.Telemetry";
 const LOGGER_PREFIX = "TelemetryPing::";
 
 const PREF_BRANCH = "toolkit.telemetry.";
 const PREF_BRANCH_LOG = PREF_BRANCH + "log.";
 const PREF_SERVER = PREF_BRANCH + "server";
 const PREF_ENABLED = PREF_BRANCH + "enabled";
@@ -40,16 +41,24 @@ const PING_FORMAT_VERSION = 4;
 const TELEMETRY_DELAY = 60000;
 // Delay before initializing telemetry if we're testing (ms)
 const TELEMETRY_TEST_DELAY = 100;
 // The number of days to keep pings serialised on the disk in case of failures.
 const DEFAULT_RETENTION_DAYS = 14;
 // Timeout after which we consider a ping submission failed.
 const PING_SUBMIT_TIMEOUT_MS = 2 * 60 * 1000;
 
+// We treat pings before midnight as happening "at midnight" with this tolerance.
+const MIDNIGHT_TOLERANCE_MS = 15 * 60 * 1000;
+// For midnight fuzzing we want to affect pings around midnight with this tolerance.
+const MIDNIGHT_TOLERANCE_FUZZ_MS = 5 * 60 * 1000;
+// We try to spread "midnight" pings out over this interval.
+const MIDNIGHT_FUZZING_INTERVAL_MS = 60 * 60 * 1000;
+const MIDNIGHT_FUZZING_DELAY_MS = Math.random() * MIDNIGHT_FUZZING_INTERVAL_MS;
+
 XPCOMUtils.defineLazyServiceGetter(this, "Telemetry",
                                    "@mozilla.org/base/telemetry;1",
                                    "nsITelemetry");
 XPCOMUtils.defineLazyModuleGetter(this, "AsyncShutdown",
                                   "resource://gre/modules/AsyncShutdown.jsm");
 XPCOMUtils.defineLazyModuleGetter(this, "TelemetryFile",
                                   "resource://gre/modules/TelemetryFile.jsm");
 XPCOMUtils.defineLazyModuleGetter(this, "TelemetryLog",
@@ -96,23 +105,91 @@ function configureLogging() {
 
 function generateUUID() {
   let str = Cc["@mozilla.org/uuid-generator;1"].getService(Ci.nsIUUIDGenerator).generateUUID().toString();
   // strip {}
   return str.substring(1, str.length - 1);
 }
 
 /**
+ * This is a policy object used to override behavior for testing.
+ */
+let Policy = {
+  now: () => new Date(),
+  midnightPingFuzzingDelay: () => MIDNIGHT_FUZZING_DELAY_MS,
+  setPingSendTimeout: (callback, delayMs) => setTimeout(callback, delayMs),
+  clearPingSendTimeout: (id) => clearTimeout(id),
+};
+
+/**
  * Determine if the ping has new ping format or a legacy one.
  */
 function isNewPingFormat(aPing) {
   return ("id" in aPing) && ("application" in aPing) &&
          ("version" in aPing) && (aPing.version >= 2);
 }
 
+/**
+ * Takes a date and returns it trunctated to a date with daily precision.
+ */
+function truncateToDays(date) {
+  return new Date(date.getFullYear(),
+                  date.getMonth(),
+                  date.getDate(),
+                  0, 0, 0, 0);
+}
+
+function tomorrow(date) {
+  let d = new Date(date);
+  d.setDate(d.getDate() + 1);
+  return d;
+}
+
+/**
+ * Check if the difference between the times is within the provided tolerance.
+ * @param {Number} t1 A time in milliseconds.
+ * @param {Number} t2 A time in milliseconds.
+ * @param {Number} tolerance The tolerance, in milliseconds.
+ * @return {Boolean} True if the absolute time difference is within the tolerance, false
+ *                   otherwise.
+ */
+function areTimesClose(t1, t2, tolerance) {
+  return Math.abs(t1 - t2) <= tolerance;
+}
+
+/**
+ * Get the next midnight for a date.
+ * @param {Object} date The date object to check.
+ * @return {Object} The Date object representing the next midnight.
+ */
+function getNextMidnight(date) {
+  let nextMidnight = new Date(truncateToDays(date));
+  nextMidnight.setDate(nextMidnight.getDate() + 1);
+  return nextMidnight;
+}
+
+/**
+ * Get the midnight which is closer to the provided date.
+ * @param {Object} date The date object to check.
+ * @return {Object} The Date object representing the closes midnight, or null if midnight
+ *                  is not within the midnight tolerance.
+ */
+function getNearestMidnight(date, tolerance) {
+  let lastMidnight = truncateToDays(date);
+  if (areTimesClose(date.getTime(), lastMidnight.getTime(), tolerance)) {
+    return lastMidnight;
+  }
+
+  const nextMidnightDate = getNextMidnight(date);
+  if (areTimesClose(date.getTime(), nextMidnightDate.getTime(), tolerance)) {
+    return nextMidnightDate;
+  }
+  return null;
+}
+
 this.EXPORTED_SYMBOLS = ["TelemetryPing"];
 
 this.TelemetryPing = Object.freeze({
   Constants: Object.freeze({
     PREF_ENABLED: PREF_ENABLED,
     PREF_LOG_LEVEL: PREF_LOG_LEVEL,
     PREF_LOG_DUMP: PREF_LOG_DUMP,
     PREF_SERVER: PREF_SERVER,
@@ -268,16 +345,18 @@ let Impl = {
   // The previous build ID, if this is the first run with a new build.
   // Undefined if this is not the first run, or the previous build ID is unknown.
   _previousBuildID: undefined,
   _clientID: null,
   // A task performing delayed initialization
   _delayedInitTask: null,
   // The deferred promise resolved when the initialization task completes.
   _delayedInitTaskDeferred: null,
+  // Timer for scheduled ping sends.
+  _pingSendTimer: null,
 
   // This is a public barrier Telemetry clients can use to add blockers to the shutdown
   // of TelemetryPing.
   // After this barrier, clients can not submit Telemetry pings anymore.
   _shutdownBarrier: new AsyncShutdown.Barrier("TelemetryPing: Waiting for clients."),
   // This is a private barrier blocked by pending async ping activity (sending & saving).
   _connectionsBarrier: new AsyncShutdown.Barrier("TelemetryPing: Waiting for pending ping activity"),
 
@@ -393,24 +472,79 @@ let Impl = {
    * and adding it to the pending ping list.
    *
    * @param {String} aPingPath The path of the ping to add to the pending ping list.
    * @param {Boolean} [aRemoveOriginal] If true, deletes the ping at aPingPath after adding
    *                  it to the saved pings directory.
    * @return {Promise} Resolved when the ping is correctly moved to the saved pings directory.
    */
   addPendingPing: function(aPingPath, aRemoveOriginal) {
-    return TelemetryFile.addPendingPing(aPingPath).then(() => {
+    return TelemetryFile.addPendingPingFromFile(aPingPath).then(() => {
         if (aRemoveOriginal) {
           return OS.File.remove(aPingPath);
         }
       }, error => this._log.error("addPendingPing - Unable to add the pending ping", error));
   },
 
   /**
+   * This helper calculates the next time that we can send pings at.
+   * Currently this mostly redistributes ping sends around midnight to avoid submission
+   * spikes around local midnight for daily pings.
+   *
+   * @param now Date The current time.
+   * @return Number The next time (ms from UNIX epoch) when we can send pings.
+   */
+  _getNextPingSendTime: function(now) {
+    const midnight = getNearestMidnight(now, MIDNIGHT_FUZZING_INTERVAL_MS);
+
+    // Don't delay ping if we are not close to midnight.
+    if (!midnight) {
+      return now.getTime();
+    }
+
+    // Delay ping send if we are within the midnight fuzzing range.
+    // This is from: |midnight - MIDNIGHT_TOLERANCE_FUZZ_MS|
+    // to: |midnight + MIDNIGHT_FUZZING_INTERVAL_MS|
+    const midnightRangeStart = midnight.getTime() - MIDNIGHT_TOLERANCE_FUZZ_MS;
+    if (now.getTime() >= midnightRangeStart) {
+      // We spread those ping sends out between |midnight| and |midnight + midnightPingFuzzingDelay|.
+      return midnight.getTime() + Policy.midnightPingFuzzingDelay();
+    }
+
+    return now.getTime();
+  },
+
+  /**
+   * Try to send |ping| (and possibly other pending pings) if we can send now.
+   * If we can not send now, add |ping| to the pending pings for later sending.
+   *
+   * @param ping Object The ping data.
+   * @return Promise Promise that is resolved when the ping is sent or persisted as pending.
+   */
+  _maybeSendPing: function(ping) {
+    // Check if we can send pings now - otherwise schedule a later send.
+    const now = Policy.now();
+    const nextPingSendTime = this._getNextPingSendTime(now);
+    if (nextPingSendTime > now.getTime()) {
+      this._log.trace("_maybeSendPing - delaying ping send to " + new Date(nextPingSendTime));
+      this._reschedulePingSendTimer(nextPingSendTime);
+      return TelemetryFile.addPendingPing(ping);
+    }
+
+    // Once ping is assembled, send it along with the persisted ping in the backlog.
+    let p = [
+      // Persist the ping if sending it fails.
+      this.doPing(ping, false)
+          .catch(() => TelemetryFile.savePing(ping, true)),
+      this.sendPersistedPings(),
+    ];
+    return Promise.all(p);
+  },
+
+  /**
    * Build a complete ping and send data to the server. Record success/send-time in
    * histograms.
    *
    * @param {String} aType The type of the ping.
    * @param {Object} aPayload The actual data payload for the ping.
    * @param {Object} aOptions Options object.
    * @param {Number} aOptions.retentionDays The number of days to keep the ping on disk
    *                 if sending fails.
@@ -422,37 +556,37 @@ let Impl = {
    *
    * @returns {Promise} A promise that resolves when the ping is sent.
    */
   send: function send(aType, aPayload, aOptions) {
     this._log.trace("send - Type " + aType + ", Server " + this._server +
                     ", aOptions " + JSON.stringify(aOptions));
 
     let pingData = this.assemblePing(aType, aPayload, aOptions);
-    // Once ping is assembled, send it along with the persisted pings in the backlog.
-    let p = [
-      // Persist the ping if sending it fails.
-      this.doPing(pingData, false)
-          .catch(() => TelemetryFile.savePing(pingData, true)),
-      this.sendPersistedPings(),
-    ];
-
-    let promise = Promise.all(p);
-    this._trackPendingPingTask(promise);
-    return promise;
+    return this._maybeSendPing(pingData);
   },
 
   /**
    * Send the persisted pings to the server.
    *
    * @return Promise A promise that is resolved when all pings finished sending or failed.
    */
   sendPersistedPings: function sendPersistedPings() {
     this._log.trace("sendPersistedPings");
 
+    // Check if we can send pings now - otherwise schedule a later send.
+    const now = Policy.now();
+    const nextPingSendTime = this._getNextPingSendTime(now);
+    if (nextPingSendTime > now.getTime()) {
+      this._log.trace("sendPersistedPings - delaying ping send to " + new Date(nextPingSendTime));
+      this._reschedulePingSendTimer(nextPingSendTime);
+      return Promise.resolve();
+    }
+
+    // We can send now.
     let pingsIterator = Iterator(this.popPayloads());
     let p = [for (data of pingsIterator) this.doPing(data, true).catch((e) => {
       this._log.error("sendPersistedPings - doPing rejected", e);
     })];
 
     let promise = Promise.all(p);
     this._trackPendingPingTask(promise);
     return promise;
@@ -828,17 +962,21 @@ let Impl = {
       }
     }
     this._pendingPingRequests.clear();
 
     // Now do an orderly shutdown.
     try {
       // First wait for clients processing shutdown.
       yield this._shutdownBarrier.wait();
-      // Then wait for any outstanding async ping activity.
+
+      // Then clear scheduled ping sends...
+      this._clearPingSendTimer();
+
+      // ... and wait for any outstanding async ping activity.
       yield this._connectionsBarrier.wait();
     } finally {
       // Reset state.
       this._initialized = false;
       this._initStarted = false;
     }
   }),
 
@@ -908,9 +1046,22 @@ let Impl = {
     return {
       initialized: this._initialized,
       initStarted: this._initStarted,
       haveDelayedInitTask: !!this._delayedInitTask,
       shutdownBarrier: this._shutdownBarrier.state,
       connectionsBarrier: this._connectionsBarrier.state,
     };
   },
+
+  _reschedulePingSendTimer: function(timestamp) {
+    this._clearPingSendTimer();
+    const interval = timestamp - Policy.now();
+    this._pingSendTimer = Policy.setPingSendTimeout(() => this.sendPersistedPings(), interval);
+  },
+
+  _clearPingSendTimer: function() {
+    if (this._pingSendTimer) {
+      Policy.clearPingSendTimeout(this._pingSendTimer);
+      this._pingSendTimer = null;
+    }
+  },
 };
--- a/toolkit/components/telemetry/tests/unit/head.js
+++ b/toolkit/components/telemetry/tests/unit/head.js
@@ -88,20 +88,37 @@ function createAppInfo(id, name, version
 function fakeSchedulerTimer(set, clear) {
   let session = Components.utils.import("resource://gre/modules/TelemetrySession.jsm");
   session.Policy.setSchedulerTickTimeout = set;
   session.Policy.clearSchedulerTickTimeout = clear;
 }
 
 // Fake the current date.
 function fakeNow(date) {
-  let session = Cu.import("resource://gre/modules/TelemetrySession.jsm");
-  session.Policy.now = () => date;
-  let environment = Cu.import("resource://gre/modules/TelemetryEnvironment.jsm");
-  environment.Policy.now = () => date;
+  const modules = [
+    Components.utils.import("resource://gre/modules/TelemetrySession.jsm"),
+    Components.utils.import("resource://gre/modules/TelemetryEnvironment.jsm"),
+    Components.utils.import("resource://gre/modules/TelemetryPing.jsm"),
+  ];
+
+  for (let m of modules) {
+    m.Policy.now = () => date;
+  }
+}
+
+// Fake the timeout functions for TelemetryPing sending.
+function fakePingSendTimer(set, clear) {
+  let ping = Components.utils.import("resource://gre/modules/TelemetryPing.jsm");
+  ping.Policy.setPingSendTimeout = set;
+  ping.Policy.clearPingSendTimeout = clear;
+}
+
+function fakeMidnightPingFuzzingDelay(delayMs) {
+  let ping = Components.utils.import("resource://gre/modules/TelemetryPing.jsm");
+  ping.Policy.midnightPingFuzzingDelay = () => delayMs;
 }
 
 // Return a date that is |offset| ms in the future from |date|.
 function futureDate(date, offset) {
   return new Date(date.getTime() + offset);
 }
 
 function truncateToDays(aMsec) {
@@ -110,8 +127,12 @@ function truncateToDays(aMsec) {
 
 // Set logging preferences for all the tests.
 Services.prefs.setCharPref("toolkit.telemetry.log.level", "Trace");
 Services.prefs.setBoolPref("toolkit.telemetry.log.dump", true);
 TelemetryPing.initLogging();
 
 // Avoid timers interrupting test behavior.
 fakeSchedulerTimer(() => {}, () => {});
+fakePingSendTimer(() => {}, () => {});
+// Make pind sending predictable.
+fakeMidnightPingFuzzingDelay(0);
+
--- a/toolkit/components/telemetry/tests/unit/test_TelemetryPing.js
+++ b/toolkit/components/telemetry/tests/unit/test_TelemetryPing.js
@@ -197,27 +197,22 @@ add_task(function* asyncSetup() {
     let promisePingSetup = TelemetryPing.reset();
     do_check_eq(TelemetryPing.clientID, gDataReportingClientID);
     yield promisePingSetup;
   }
 });
 
 // Ensure that not overwriting an existing file fails silently
 add_task(function* test_overwritePing() {
-  let ping = {id: "foo"}
+  let ping = {id: "foo"};
   yield TelemetryFile.savePing(ping, true);
   yield TelemetryFile.savePing(ping, false);
   yield TelemetryFile.cleanupPingFile(ping);
 });
 
-// Sends a ping to a non existing server.
-add_task(function* test_noServerPing() {
-  yield sendPing(false, false);
-});
-
 // Checks that a sent ping is correctly received by a dummy http server.
 add_task(function* test_simplePing() {
   startWebserver();
 
   yield sendPing(false, false);
   let request = yield gRequestIterator.next();
 
   // Check that we have a version query parameter in the URL.
@@ -269,16 +264,80 @@ add_task(function* test_pingHasEnvironme
   // Test that we have the correct clientId.
   if (HAS_DATAREPORTINGSERVICE &&
       Services.prefs.getBoolPref(PREF_FHR_UPLOAD_ENABLED)) {
     Assert.equal(ping.clientId, gDataReportingClientID,
                  "The correct clientId must be reported.");
   }
 });
 
+// Test that we fuzz the submission time around midnight properly
+// to avoid overloading the telemetry servers.
+add_task(function* test_midnightPingSendFuzzing() {
+  const fuzzingDelay = 60 * 60 * 1000;
+  fakeMidnightPingFuzzingDelay(fuzzingDelay);
+  let now = new Date(2030, 5, 1, 11, 00, 0);
+  fakeNow(now);
+
+  let pingSendTimerCallback = null;
+  let pingSendTimeout = null;
+  fakePingSendTimer((callback, timeout) => {
+    pingSendTimerCallback = callback;
+    pingSendTimeout = timeout;
+  }, () => {});
+
+  gRequestIterator = Iterator(new Request());
+  yield TelemetryPing.reset();
+
+  // A ping submitted shortly before midnight should not get sent yet.
+  now = new Date(2030, 5, 1, 23, 55, 0);
+  fakeNow(now);
+  registerPingHandler((req, res) => {
+    Assert.ok(false, "No ping should be received yet.");
+  });
+  yield sendPing(true, true);
+
+  Assert.ok(!!pingSendTimerCallback);
+  Assert.deepEqual(futureDate(now, pingSendTimeout), new Date(2030, 5, 2, 1, 0, 0));
+
+  // A ping after midnight within the fuzzing delay should also not get sent.
+  now = new Date(2030, 5, 2, 0, 40, 0);
+  fakeNow(now);
+  pingSendTimeout = null;
+  yield sendPing(true, true);
+  Assert.deepEqual(futureDate(now, pingSendTimeout), new Date(2030, 5, 2, 1, 0, 0));
+
+  // The Request constructor restores the previous ping handler.
+  gRequestIterator = Iterator(new Request());
+
+  // Setting the clock to after the fuzzing delay, we should trigger the two ping sends
+  // with the timer callback.
+  now = futureDate(now, pingSendTimeout);
+  fakeNow(now);
+  yield pingSendTimerCallback();
+  let requests = [];
+  requests.push(yield gRequestIterator.next());
+  requests.push(yield gRequestIterator.next());
+  for (let req of requests) {
+    let ping = decodeRequestPayload(req);
+    checkPingFormat(ping, TEST_PING_TYPE, true, true);
+  }
+
+  // Moving the clock further we should still send pings immediately.
+  now = futureDate(now, 5 * 60 * 1000);
+  yield sendPing(true, true);
+  let request = yield gRequestIterator.next();
+  let ping = decodeRequestPayload(request);
+  checkPingFormat(ping, TEST_PING_TYPE, true, true);
+
+  // Clean-up.
+  fakeMidnightPingFuzzingDelay(0);
+  fakePingSendTimer(() => {}, () => {});
+});
+
 add_task(function* stopServer(){
   gHttpServer.stop(do_test_finished);
 });
 
 // An iterable sequence of http requests
 function Request() {
   let defers = [];
   let current = 0;
--- a/toolkit/components/telemetry/tests/unit/test_TelemetrySession.js
+++ b/toolkit/components/telemetry/tests/unit/test_TelemetrySession.js
@@ -1706,17 +1706,17 @@ add_task(function* test_sendDailyOnIdle(
   let request = yield gRequestIterator.next();
   Assert.ok(!!request);
   let ping = decodeRequestPayload(request);
 
   Assert.equal(ping.type, PING_TYPE_MAIN);
   Assert.equal(ping.payload.info.reason, REASON_DAILY);
 
   // We should also trigger a ping when going idle shortly before next midnight.
-  now = new Date(2040, 1, 2, 23, 55, 0);
+  now = new Date(2040, 1, 2, 23, 54, 0);
   fakeNow(now);
   yield fakeIdleNotification("idle");
 
   request = yield gRequestIterator.next();
   Assert.ok(!!request);
   ping = decodeRequestPayload(request);
 
   Assert.equal(ping.type, PING_TYPE_MAIN);