Bug 1186955 - Telemetry should discard pings that are too big. r=gfritzsche
authorAlessio Placitelli <alessio.placitelli@gmail.com>
Wed, 29 Jul 2015 11:08:00 +0200
changeset 255629 8d8aaf9af22dd80b07000ad39011ceca1e4ab80d
parent 255628 cb1cfa8cff278c306427311ccfd7e34136619831
child 255630 0f05c264c1dcb33210c449dd86255ba262a9c401
push id14349
push useralessio.placitelli@gmail.com
push dateFri, 31 Jul 2015 18:46:07 +0000
treeherderfx-team@0f05c264c1dc [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersgfritzsche
bugs1186955
milestone42.0a1
Bug 1186955 - Telemetry should discard pings that are too big. r=gfritzsche
toolkit/components/telemetry/Histograms.json
toolkit/components/telemetry/TelemetrySend.jsm
toolkit/components/telemetry/TelemetryStorage.jsm
--- a/toolkit/components/telemetry/Histograms.json
+++ b/toolkit/components/telemetry/Histograms.json
@@ -4702,16 +4702,58 @@
   "TELEMETRY_PENDING_CHECKING_OVER_QUOTA_MS": {
     "alert_emails": ["telemetry-client-dev@mozilla.com"],
     "expires_in_version": "never",
     "kind": "exponential",
     "high": "300000",
     "n_buckets": 20,
     "description": "Time (ms) it takes for checking if the pending pings are over-quota"
   },
+  "TELEMETRY_PING_SIZE_EXCEEDED_SEND": {
+    "alert_emails": ["telemetry-client-dev@mozilla.com"],
+    "expires_in_version": "never",
+    "kind": "count",
+    "description": "Number of Telemetry pings discarded before sending because they exceeded the maximum size"
+  },
+  "TELEMETRY_PING_SIZE_EXCEEDED_PENDING": {
+    "alert_emails": ["telemetry-client-dev@mozilla.com"],
+    "expires_in_version": "never",
+    "kind": "count",
+    "description": "Number of Telemetry pending pings discarded because they exceeded the maximum size"
+  },
+  "TELEMETRY_PING_SIZE_EXCEEDED_ARCHIVED": {
+    "alert_emails": ["telemetry-client-dev@mozilla.com"],
+    "expires_in_version": "never",
+    "kind": "count",
+    "description": "Number of archived Telemetry pings discarded because they exceeded the maximum size"
+  },
+  "TELEMETRY_DISCARDED_PENDING_PINGS_SIZE_MB": {
+    "alert_emails": ["telemetry-client-dev@mozilla.com"],
+    "expires_in_version": "never",
+    "kind": "linear",
+    "high": "30",
+    "n_buckets": 29,
+    "description": "The size (MB) of the Telemetry pending pings exceeding the maximum file size"
+  },
+  "TELEMETRY_DISCARDED_ARCHIVED_PINGS_SIZE_MB": {
+    "alert_emails": ["telemetry-client-dev@mozilla.com"],
+    "expires_in_version": "never",
+    "kind": "linear",
+    "high": "30",
+    "n_buckets": 29,
+    "description": "The size (MB) of the Telemetry archived, compressed, pings exceeding the maximum file size"
+  },
+  "TELEMETRY_DISCARDED_SEND_PINGS_SIZE_MB": {
+    "alert_emails": ["telemetry-client-dev@mozilla.com"],
+    "expires_in_version": "never",
+    "kind": "linear",
+    "high": "30",
+    "n_buckets": 29,
+    "description": "The size (MB) of the ping data submitted to Telemetry exceeding the maximum size"
+  },
   "TELEMETRY_DISCARDED_CONTENT_PINGS_COUNT": {
     "alert_emails": ["perf-telemetry-alerts@mozilla.com"],
     "expires_in_version": "never",
     "kind": "count",
     "description": "Count of discarded content payloads."
   },
   "TELEMETRY_COMPRESS": {
     "expires_in_version": "never",
--- a/toolkit/components/telemetry/TelemetrySend.jsm
+++ b/toolkit/components/telemetry/TelemetrySend.jsm
@@ -923,16 +923,29 @@ let TelemetrySendImpl = {
     request.setRequestHeader("Content-Encoding", "gzip");
     let converter = Cc["@mozilla.org/intl/scriptableunicodeconverter"]
                     .createInstance(Ci.nsIScriptableUnicodeConverter);
     converter.charset = "UTF-8";
     startTime = new Date();
     let utf8Payload = converter.ConvertFromUnicode(JSON.stringify(networkPayload));
     utf8Payload += converter.Finish();
     Telemetry.getHistogramById("TELEMETRY_STRINGIFY").add(new Date() - startTime);
+
+    // Check the size and drop pings which are too big.
+    const pingSizeBytes = utf8Payload.length;
+    if (pingSizeBytes > TelemetryStorage.MAXIMUM_PING_SIZE) {
+      this._log.error("_doPing - submitted ping exceeds the size limit, size: " + pingSizeBytes);
+      Telemetry.getHistogramById("TELEMETRY_PING_SIZE_EXCEEDED_SEND").add();
+      Telemetry.getHistogramById("TELEMETRY_DISCARDED_SEND_PINGS_SIZE_MB")
+               .add(Math.floor(pingSizeBytes / 1024 / 1024));
+      // We don't need to call |request.abort()| as it was not sent yet.
+      this._pendingPingRequests.delete(id);
+      return TelemetryStorage.removePendingPing(id);
+    }
+
     let payloadStream = Cc["@mozilla.org/io/string-input-stream;1"]
                         .createInstance(Ci.nsIStringInputStream);
     startTime = new Date();
     payloadStream.data = gzipCompressString(utf8Payload);
     Telemetry.getHistogramById("TELEMETRY_COMPRESS").add(new Date() - startTime);
     startTime = new Date();
     request.send(payloadStream);
 
--- a/toolkit/components/telemetry/TelemetryStorage.jsm
+++ b/toolkit/components/telemetry/TelemetryStorage.jsm
@@ -54,16 +54,19 @@ const MAX_ARCHIVED_PINGS_RETENTION_MS = 
 
 // Maximum space the archive can take on disk (in Bytes).
 const ARCHIVE_QUOTA_BYTES = 120 * 1024 * 1024; // 120 MB
 // Maximum space the outgoing pings can take on disk, for Desktop (in Bytes).
 const PENDING_PINGS_QUOTA_BYTES_DESKTOP = 15 * 1024 * 1024; // 15 MB
 // Maximum space the outgoing pings can take on disk, for Mobile (in Bytes).
 const PENDING_PINGS_QUOTA_BYTES_MOBILE = 1024 * 1024; // 1 MB
 
+// The maximum size a pending/archived ping can take on disk.
+const PING_FILE_MAXIMUM_SIZE_BYTES = 1024 * 1024; // 1 MB
+
 // This special value is submitted when the archive is outside of the quota.
 const ARCHIVE_SIZE_PROBE_SPECIAL_VALUE = 300;
 
 // This special value is submitted when the pending pings is outside of the quota, as
 // we don't know the size of the pings above the quota.
 const PENDING_PINGS_SIZE_PROBE_SPECIAL_VALUE = 17;
 
 const UUID_REGEX = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
@@ -131,16 +134,22 @@ function waitForAll(it) {
 }
 
 this.TelemetryStorage = {
   get pingDirectoryPath() {
     return OS.Path.join(OS.Constants.Path.profileDir, "saved-telemetry-pings");
   },
 
   /**
+   * The maximum size a ping can have, in bytes.
+   */
+  get MAXIMUM_PING_SIZE() {
+    return PING_FILE_MAXIMUM_SIZE_BYTES;
+  },
+  /**
    * Shutdown & block on any outstanding async activity in this module.
    *
    * @return {Promise} Promise that is resolved when shutdown is complete.
    */
   shutdown: function() {
     return TelemetryStorageImpl.shutdown();
   },
 
@@ -639,23 +648,37 @@ let TelemetryStorageImpl = {
     if (!data) {
       this._log.trace("loadArchivedPing - no ping with id: " + id);
       return Promise.reject(new Error("TelemetryStorage.loadArchivedPing - no ping with id " + id));
     }
 
     const path = getArchivedPingPath(id, new Date(data.timestampCreated), data.type);
     const pathCompressed = path + "lz4";
 
+    // Purge pings which are too big.
+    let checkSize = function*(path) {
+      const fileSize = (yield OS.File.stat(path)).size;
+      if (fileSize > PING_FILE_MAXIMUM_SIZE_BYTES) {
+        Telemetry.getHistogramById("TELEMETRY_DISCARDED_ARCHIVED_PINGS_SIZE_MB")
+                 .add(Math.floor(fileSize / 1024 / 1024));
+        Telemetry.getHistogramById("TELEMETRY_PING_SIZE_EXCEEDED_ARCHIVED").add();
+        yield OS.File.remove(path, {ignoreAbsent: true});
+        throw new Error("loadArchivedPing - exceeded the maximum ping size: " + fileSize);
+      }
+    };
+
     try {
       // Try to load a compressed version of the archived ping first.
       this._log.trace("loadArchivedPing - loading ping from: " + pathCompressed);
+      yield* checkSize(pathCompressed);
       return yield this.loadPingFile(pathCompressed, /*compressed*/ true);
     } catch (ex if ex.becauseNoSuchFile) {
       // If that fails, look for the uncompressed version.
       this._log.trace("loadArchivedPing - compressed ping not found, loading: " + path);
+      yield* checkSize(path);
       return yield this.loadPingFile(path, /*compressed*/ false);
     }
   }),
 
   /**
    * Remove an archived ping from disk.
    *
    * @param {string} id The pings id.
@@ -666,16 +689,18 @@ let TelemetryStorageImpl = {
   _removeArchivedPing: Task.async(function*(id, timestampCreated, type) {
     this._log.trace("_removeArchivedPing - id: " + id + ", timestampCreated: " + timestampCreated + ", type: " + type);
     const path = getArchivedPingPath(id, new Date(timestampCreated), type);
     const pathCompressed = path + "lz4";
 
     this._log.trace("_removeArchivedPing - removing ping from: " + path);
     yield OS.File.remove(path, {ignoreAbsent: true});
     yield OS.File.remove(pathCompressed, {ignoreAbsent: true});
+    // Remove the ping from the cache.
+    this._archivedPings.delete(id);
   }),
 
   /**
    * Clean the pings archive by removing old pings.
    *
    * @return {Promise} Resolved when the cleanup task completes.
    */
   runCleanPingArchiveTask: function() {
@@ -808,16 +833,29 @@ let TelemetryStorageImpl = {
       // Get the size for this ping.
       const fileSize =
         yield getArchivedPingSize(ping.id, new Date(ping.timestampCreated), ping.type);
       if (!fileSize) {
         this._log.warn("_enforceArchiveQuota - Unable to find the size of ping " + ping.id);
         continue;
       }
 
+      // Enforce a maximum file size limit on archived pings.
+      if (fileSize > PING_FILE_MAXIMUM_SIZE_BYTES) {
+        this._log.error("_enforceArchiveQuota - removing file exceeding size limit, size: " + fileSize);
+        // We just remove the ping from the disk, we don't bother removing it from pingList
+        // since it won't contribute to the quota.
+        yield this._removeArchivedPing(ping.id, ping.timestampCreated, ping.type)
+                  .catch(e => this._log.error("_enforceArchiveQuota - failed to remove archived ping" + ping.id));
+        Telemetry.getHistogramById("TELEMETRY_DISCARDED_ARCHIVED_PINGS_SIZE_MB")
+                 .add(Math.floor(fileSize / 1024 / 1024));
+        Telemetry.getHistogramById("TELEMETRY_PING_SIZE_EXCEEDED_ARCHIVED").add();
+        continue;
+      }
+
       archiveSizeInBytes += fileSize;
 
       if (archiveSizeInBytes < SAFE_QUOTA) {
         // We save the index of the last ping which is ok to keep in order to speed up ping
         // pruning.
         lastPingIndexToKeep = i;
       } else if (archiveSizeInBytes > Policy.getArchiveQuota()) {
         // Ouch, our ping archive is too big. Bail out and start pruning!
@@ -852,19 +890,16 @@ let TelemetryStorageImpl = {
       if (this._shutdown) {
         this._log.trace("_enforceArchiveQuota - Terminating the clean up task due to shutdown");
         return;
       }
 
       // This list is guaranteed to be in order, so remove the pings at its
       // beginning (oldest).
       yield this._removeArchivedPing(ping.id, ping.timestampCreated, ping.type);
-
-      // Remove outdated pings from the cache.
-      this._archivedPings.delete(ping.id);
     }
 
     const endTimeStamp = Policy.now().getTime();
     submitProbes(ARCHIVE_SIZE_PROBE_SPECIAL_VALUE, pingsToPurge.length,
                  Math.ceil(endTimeStamp - startTimeStamp));
   }),
 
   _cleanArchive: Task.async(function*() {
@@ -1186,36 +1221,58 @@ let TelemetryStorageImpl = {
       this._pendingPings.set(ping.id, {
         path: path,
         lastModificationDate: Policy.now().getTime(),
       });
       this._log.trace("savePendingPing - saved ping with id " + ping.id);
     });
   },
 
-  loadPendingPing: function(id) {
+  loadPendingPing: Task.async(function*(id) {
     this._log.trace("loadPendingPing - id: " + id);
     let info = this._pendingPings.get(id);
     if (!info) {
       this._log.trace("loadPendingPing - unknown id " + id);
-      return Promise.reject(new Error("TelemetryStorage.loadPendingPing - no ping with id " + id));
+      throw new Error("TelemetryStorage.loadPendingPing - no ping with id " + id);
+    }
+
+    // Try to get the dimension of the ping. If that fails, update the histograms.
+    let fileSize = 0;
+    try {
+      fileSize = (yield OS.File.stat(info.path)).size;
+    } catch (e if e instanceof OS.File.Error && e.becauseNoSuchFile) {
+      // Fall through and let |loadPingFile| report the error.
     }
 
-    return this.loadPingFile(info.path, false).catch(e => {
+    // Purge pings which are too big.
+    if (fileSize > PING_FILE_MAXIMUM_SIZE_BYTES) {
+      yield this.removePendingPing(id);
+      Telemetry.getHistogramById("TELEMETRY_DISCARDED_PENDING_PINGS_SIZE_MB")
+               .add(Math.floor(fileSize / 1024 / 1024));
+      Telemetry.getHistogramById("TELEMETRY_PING_SIZE_EXCEEDED_PENDING").add();
+      throw new Error("loadPendingPing - exceeded the maximum ping size: " + fileSize);
+    }
+
+    // Try to load the ping file. Update the related histograms on failure.
+    let ping;
+    try {
+      ping = yield this.loadPingFile(info.path, false);
+    } catch(e) {
       // If we failed to load the ping, check what happened and update the histogram.
       // Then propagate the rejection.
       if (e instanceof PingReadError) {
         Telemetry.getHistogramById("TELEMETRY_PENDING_LOAD_FAILURE_READ").add();
       } else if (e instanceof PingParseError) {
         Telemetry.getHistogramById("TELEMETRY_PENDING_LOAD_FAILURE_PARSE").add();
       }
+      throw e;
+    };
 
-      return Promise.reject(e);
-    });
-  },
+    return ping;
+  }),
 
   removePendingPing: function(id) {
     let info = this._pendingPings.get(id);
     if (!info) {
       this._log.trace("removePendingPing - unknown id " + id);
       return Promise.resolve();
     }
 
@@ -1276,16 +1333,31 @@ let TelemetryStorageImpl = {
       let info;
       try {
         info = yield OS.File.stat(file.path);
       } catch (ex) {
         this._log.error("_scanPendingPings - failed to stat file " + file.path, ex);
         continue;
       }
 
+      // Enforce a maximum file size limit on pending pings.
+      if (info.size > PING_FILE_MAXIMUM_SIZE_BYTES) {
+        this._log.error("_scanPendingPings - removing file exceeding size limit " + file.path);
+        try {
+          yield OS.File.remove(file.path);
+        } catch (ex) {
+          this._log.error("_scanPendingPings - failed to remove file " + file.path, ex);
+        } finally {
+          Telemetry.getHistogramById("TELEMETRY_DISCARDED_PENDING_PINGS_SIZE_MB")
+                   .add(Math.floor(info.size / 1024 / 1024));
+          Telemetry.getHistogramById("TELEMETRY_PING_SIZE_EXCEEDED_PENDING").add();
+          continue;
+        }
+      }
+
       let id = OS.Path.basename(file.path);
       if (!UUID_REGEX.test(id)) {
         this._log.trace("_scanPendingPings - filename is not a UUID: " + id);
         id = Utils.generateUUID();
       }
 
       this._pendingPings.set(id, {
         path: file.path,