Bug 1187340 - Add probe to track pending telemetry ping load failures. r=gfritzsche
authorAlessio Placitelli <alessio.placitelli@gmail.com>
Thu, 30 Jul 2015 10:09:00 +0200
changeset 287271 f43ef293278b40468216335575a8b230f55102be
parent 287270 06ea5f2003a2484690df9a935050d23e1f32f4f7
child 287272 54d3c013705662f49286a3caf5f2b370c9767940
push id5067
push userraliiev@mozilla.com
push dateMon, 21 Sep 2015 14:04:52 +0000
treeherdermozilla-beta@14221ffe5b2f [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersgfritzsche
bugs1187340
milestone42.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1187340 - Add probe to track pending telemetry ping load failures. r=gfritzsche
toolkit/components/telemetry/Histograms.json
toolkit/components/telemetry/TelemetryStorage.jsm
toolkit/components/telemetry/tests/unit/test_TelemetrySendOldPings.js
--- a/toolkit/components/telemetry/Histograms.json
+++ b/toolkit/components/telemetry/Histograms.json
@@ -4650,16 +4650,28 @@
   "TELEMETRY_ARCHIVE_EVICTING_OVER_QUOTA_MS": {
     "alert_emails": ["telemetry-client-dev@mozilla.com"],
     "expires_in_version": "never",
     "kind": "exponential",
     "high": "300000",
     "n_buckets": 20,
     "description": "Time (ms) it takes for evicting over-quota pings"
   },
+  "TELEMETRY_PENDING_LOAD_FAILURE_READ": {
+    "alert_emails": ["telemetry-client-dev@mozilla.com"],
+    "expires_in_version": "never",
+    "kind": "count",
+    "description": "Number of pending Telemetry pings that failed to load from the disk"
+  },
+  "TELEMETRY_PENDING_LOAD_FAILURE_PARSE": {
+    "alert_emails": ["telemetry-client-dev@mozilla.com"],
+    "expires_in_version": "never",
+    "kind": "count",
+    "description": "Number of pending Telemetry pings that failed to parse once loaded from the disk"
+  },
   "TELEMETRY_PENDING_PINGS_SIZE_MB": {
     "alert_emails": ["telemetry-client-dev@mozilla.com"],
     "expires_in_version": "never",
     "kind": "linear",
     "high": "17",
     "n_buckets": 16,
     "description": "The size of the Telemetry pending pings directory (MB). The special value 17 is used to indicate over quota pings."
   },
--- a/toolkit/components/telemetry/TelemetryStorage.jsm
+++ b/toolkit/components/telemetry/TelemetryStorage.jsm
@@ -64,16 +64,44 @@ const ARCHIVE_SIZE_PROBE_SPECIAL_VALUE =
 
 // This special value is submitted when the pending pings is outside of the quota, as
 // we don't know the size of the pings above the quota.
 const PENDING_PINGS_SIZE_PROBE_SPECIAL_VALUE = 17;
 
 const UUID_REGEX = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
 
 /**
+ * This is thrown by |TelemetryStorage.loadPingFile| when reading the ping
+ * from the disk fails.
+ */
+function PingReadError(message="Error reading the ping file") {
+  Error.call(this, message);
+  let error = new Error();
+  this.name = "PingReadError";
+  this.message = message;
+  this.stack = error.stack;
+}
+PingReadError.prototype = Object.create(Error.prototype);
+PingReadError.prototype.constructor = PingReadError;
+
+/**
+ * This is thrown by |TelemetryStorage.loadPingFile| when parsing the ping JSON
+ * content fails.
+ */
+function PingParseError(message="Error parsing ping content") {
+  Error.call(this, message);
+  let error = new Error();
+  this.name = "PingParseError";
+  this.message = message;
+  this.stack = error.stack;
+}
+PingParseError.prototype = Object.create(Error.prototype);
+PingParseError.prototype.constructor = PingParseError;
+
+/**
  * This is a policy object used to override behavior for testing.
  */
 let Policy = {
   now: () => new Date(),
   getArchiveQuota: () => ARCHIVE_QUOTA_BYTES,
   getPendingPingsQuota: () => (AppConstants.platform in ["android", "gonk"])
                                 ? PENDING_PINGS_QUOTA_BYTES_MOBILE
                                 : PENDING_PINGS_QUOTA_BYTES_DESKTOP,
@@ -1166,17 +1194,27 @@ let TelemetryStorageImpl = {
   loadPendingPing: function(id) {
     this._log.trace("loadPendingPing - id: " + id);
     let info = this._pendingPings.get(id);
     if (!info) {
       this._log.trace("loadPendingPing - unknown id " + id);
       return Promise.reject(new Error("TelemetryStorage.loadPendingPing - no ping with id " + id));
     }
 
-    return this.loadPingFile(info.path, false);
+    return this.loadPingFile(info.path, false).catch(e => {
+      // If we failed to load the ping, check what happened and update the histogram.
+      // Then propagate the rejection.
+      if (e instanceof PingReadError) {
+        Telemetry.getHistogramById("TELEMETRY_PENDING_LOAD_FAILURE_READ").add();
+      } else if (e instanceof PingParseError) {
+        Telemetry.getHistogramById("TELEMETRY_PENDING_LOAD_FAILURE_PARSE").add();
+      }
+
+      return Promise.reject(e);
+    });
   },
 
   removePendingPing: function(id) {
     let info = this._pendingPings.get(id);
     if (!info) {
       this._log.trace("removePendingPing - unknown id " + id);
       return Promise.resolve();
     }
@@ -1314,31 +1352,45 @@ let TelemetryStorageImpl = {
   },
 
   /**
    * Loads a ping file.
    * @param {String} aFilePath The path of the ping file.
    * @param {Boolean} [aCompressed=false] If |true|, expects the file to be compressed using lz4.
    * @return {Promise<Object>} A promise resolved with the ping content or rejected if the
    *                           ping contains invalid data.
+   * @throws {PingReadError} There was an error while reading the ping file from the disk.
+   * @throws {PingParseError} There was an error while parsing the JSON content of the ping file.
    */
   loadPingFile: Task.async(function* (aFilePath, aCompressed = false) {
     let options = {};
     if (aCompressed) {
       options.compression = "lz4";
     }
-    let array = yield OS.File.read(aFilePath, options);
+
+    let array;
+    try {
+      array = yield OS.File.read(aFilePath, options);
+    } catch(e) {
+      throw new PingReadError(e.message);
+    };
+
     let decoder = new TextDecoder();
     let string = decoder.decode(array);
+    let ping;
+    try {
+      ping = JSON.parse(string);
+      // The ping's payload used to be stringified JSON.  Deal with that.
+      if (typeof(ping.payload) == "string") {
+        ping.payload = JSON.parse(ping.payload);
+      }
+    } catch (e) {
+      throw new PingParseError(e.message);
+    }
 
-    let ping = JSON.parse(string);
-    // The ping's payload used to be stringified JSON.  Deal with that.
-    if (typeof(ping.payload) == "string") {
-      ping.payload = JSON.parse(ping.payload);
-    }
     return ping;
   }),
 
   /**
    * Archived pings are saved with file names of the form:
    * "<timestamp>.<uuid>.<type>.[json|jsonlz4]"
    * This helper extracts that data from a given filename.
    *
--- a/toolkit/components/telemetry/tests/unit/test_TelemetrySendOldPings.js
+++ b/toolkit/components/telemetry/tests/unit/test_TelemetrySendOldPings.js
@@ -268,16 +268,64 @@ add_task(function* test_overdue_old_form
 
   // |TelemetryStorage.cleanup| doesn't know how to remove a ping with no slug or id,
   // so remove it manually so that the next test doesn't fail.
   yield OS.File.remove(PING_FILES_PATHS[3]);
 
   yield clearPendingPings();
 });
 
+add_task(function* test_corrupted_pending_pings() {
+  const TEST_TYPE = "test_corrupted";
+
+  Telemetry.getHistogramById("TELEMETRY_PENDING_LOAD_FAILURE_READ").clear();
+  Telemetry.getHistogramById("TELEMETRY_PENDING_LOAD_FAILURE_PARSE").clear();
+
+  // Save a pending ping and get its id.
+  let pendingPingId = yield TelemetryController.addPendingPing(TEST_TYPE, {}, {});
+
+  // Try to load it: there should be no error.
+  yield TelemetryStorage.loadPendingPing(pendingPingId);
+
+  let h = Telemetry.getHistogramById("TELEMETRY_PENDING_LOAD_FAILURE_READ").snapshot();
+  Assert.equal(h.sum, 0, "Telemetry must not report a pending ping load failure");
+  h = Telemetry.getHistogramById("TELEMETRY_PENDING_LOAD_FAILURE_PARSE").snapshot();
+  Assert.equal(h.sum, 0, "Telemetry must not report a pending ping parse failure");
+
+  // Delete it from the disk, so that its id will be kept in the cache but it will
+  // fail loading the file.
+  yield OS.File.remove(getSavePathForPingId(pendingPingId));
+
+  // Try to load a pending ping which isn't there anymore.
+  yield Assert.rejects(TelemetryStorage.loadPendingPing(pendingPingId),
+                       "Telemetry must fail loading a ping which isn't there");
+
+  h = Telemetry.getHistogramById("TELEMETRY_PENDING_LOAD_FAILURE_READ").snapshot();
+  Assert.equal(h.sum, 1, "Telemetry must report a pending ping load failure");
+  h = Telemetry.getHistogramById("TELEMETRY_PENDING_LOAD_FAILURE_PARSE").snapshot();
+  Assert.equal(h.sum, 0, "Telemetry must not report a pending ping parse failure");
+
+  // Save a new ping, so that it gets in the pending pings cache.
+  pendingPingId = yield TelemetryController.addPendingPing(TEST_TYPE, {}, {});
+  // Overwrite it with a corrupted JSON file and then try to load it.
+  const INVALID_JSON = "{ invalid,JSON { {1}";
+  yield OS.File.writeAtomic(getSavePathForPingId(pendingPingId), INVALID_JSON, { encoding: "utf-8" });
+
+  // Try to load the ping with the corrupted JSON content.
+  yield Assert.rejects(TelemetryStorage.loadPendingPing(pendingPingId),
+                       "Telemetry must fail loading a corrupted ping");
+
+  h = Telemetry.getHistogramById("TELEMETRY_PENDING_LOAD_FAILURE_READ").snapshot();
+  Assert.equal(h.sum, 1, "Telemetry must report a pending ping load failure");
+  h = Telemetry.getHistogramById("TELEMETRY_PENDING_LOAD_FAILURE_PARSE").snapshot();
+  Assert.equal(h.sum, 1, "Telemetry must report a pending ping parse failure");
+
+  yield clearPendingPings();
+});
+
 /**
  * Create some recent and overdue pings and verify that they get sent.
  */
 add_task(function* test_overdue_pings_trigger_send() {
   let pingTypes = [
     { num: RECENT_PINGS },
     { num: OVERDUE_PINGS, age: OVERDUE_PING_FILE_AGE },
   ];