Bug 1186955 - Part 1 - Telemetry should discard pings that are too big. r=gfritzsche,a=ritu
authorAlessio Placitelli <alessio.placitelli@gmail.com>
Wed, 29 Jul 2015 11:08:00 +0200
changeset 281838 8ade7a203033f4410c2a44597fd80721fabba219
parent 281837 2fe3e641556d1589d6a29b2c0a54f4e9e630d632
child 281839 e1614491cc945742d5c04e26b1b89c7b58f7e70b
push id4932
push userjlund@mozilla.com
push dateMon, 10 Aug 2015 18:23:06 +0000
treeherdermozilla-beta@6dd5a4f5f745 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersgfritzsche, ritu
bugs1186955
milestone41.0a2
Bug 1186955 - Part 1 - Telemetry should discard pings that are too big. r=gfritzsche,a=ritu
toolkit/components/telemetry/Histograms.json
toolkit/components/telemetry/TelemetrySend.jsm
toolkit/components/telemetry/TelemetryStorage.jsm
--- a/toolkit/components/telemetry/Histograms.json
+++ b/toolkit/components/telemetry/Histograms.json
@@ -4567,16 +4567,58 @@
   "TELEMETRY_PENDING_CHECKING_OVER_QUOTA_MS": {
     "alert_emails": ["telemetry-client-dev@mozilla.com"],
     "expires_in_version": "never",
     "kind": "exponential",
     "high": "300000",
     "n_buckets": 20,
     "description": "Time (ms) it takes for checking if the pending pings are over-quota"
   },
+  "TELEMETRY_PING_SIZE_EXCEEDED_SEND": {
+    "alert_emails": ["telemetry-client-dev@mozilla.com"],
+    "expires_in_version": "never",
+    "kind": "count",
+    "description": "Number of Telemetry pings discarded before sending because they exceeded the maximum size"
+  },
+  "TELEMETRY_PING_SIZE_EXCEEDED_PENDING": {
+    "alert_emails": ["telemetry-client-dev@mozilla.com"],
+    "expires_in_version": "never",
+    "kind": "count",
+    "description": "Number of Telemetry pending pings discarded because they exceeded the maximum size"
+  },
+  "TELEMETRY_PING_SIZE_EXCEEDED_ARCHIVED": {
+    "alert_emails": ["telemetry-client-dev@mozilla.com"],
+    "expires_in_version": "never",
+    "kind": "count",
+    "description": "Number of archived Telemetry pings discarded because they exceeded the maximum size"
+  },
+  "TELEMETRY_DISCARDED_PENDING_PINGS_SIZE_MB": {
+    "alert_emails": ["telemetry-client-dev@mozilla.com"],
+    "expires_in_version": "never",
+    "kind": "linear",
+    "high": "30",
+    "n_buckets": 29,
+    "description": "The size (MB) of the Telemetry pending pings exceeding the maximum file size"
+  },
+  "TELEMETRY_DISCARDED_ARCHIVED_PINGS_SIZE_MB": {
+    "alert_emails": ["telemetry-client-dev@mozilla.com"],
+    "expires_in_version": "never",
+    "kind": "linear",
+    "high": "30",
+    "n_buckets": 29,
+    "description": "The size (MB) of the Telemetry archived, compressed, pings exceeding the maximum file size"
+  },
+  "TELEMETRY_DISCARDED_SEND_PINGS_SIZE_MB": {
+    "alert_emails": ["telemetry-client-dev@mozilla.com"],
+    "expires_in_version": "never",
+    "kind": "linear",
+    "high": "30",
+    "n_buckets": 29,
+    "description": "The size (MB) of the ping data submitted to Telemetry exceeding the maximum size"
+  },
   "TELEMETRY_DISCARDED_CONTENT_PINGS_COUNT": {
     "alert_emails": ["perf-telemetry-alerts@mozilla.com"],
     "expires_in_version": "never",
     "kind": "count",
     "description": "Count of discarded content payloads."
   },
   "TELEMETRY_COMPRESS": {
     "expires_in_version": "never",
--- a/toolkit/components/telemetry/TelemetrySend.jsm
+++ b/toolkit/components/telemetry/TelemetrySend.jsm
@@ -911,16 +911,29 @@ let TelemetrySendImpl = {
     request.setRequestHeader("Content-Encoding", "gzip");
     let converter = Cc["@mozilla.org/intl/scriptableunicodeconverter"]
                     .createInstance(Ci.nsIScriptableUnicodeConverter);
     converter.charset = "UTF-8";
     startTime = new Date();
     let utf8Payload = converter.ConvertFromUnicode(JSON.stringify(networkPayload));
     utf8Payload += converter.Finish();
     Telemetry.getHistogramById("TELEMETRY_STRINGIFY").add(new Date() - startTime);
+
+    // Check the size and drop pings which are too big.
+    const pingSizeBytes = utf8Payload.length;
+    if (pingSizeBytes > TelemetryStorage.MAXIMUM_PING_SIZE) {
+      this._log.error("_doPing - submitted ping exceeds the size limit, size: " + pingSizeBytes);
+      Telemetry.getHistogramById("TELEMETRY_PING_SIZE_EXCEEDED_SEND").add();
+      Telemetry.getHistogramById("TELEMETRY_DISCARDED_SEND_PINGS_SIZE_MB")
+               .add(Math.floor(pingSizeBytes / 1024 / 1024));
+      // We don't need to call |request.abort()| as it was not sent yet.
+      this._pendingPingRequests.delete(id);
+      return TelemetryStorage.removePendingPing(id);
+    }
+
     let payloadStream = Cc["@mozilla.org/io/string-input-stream;1"]
                         .createInstance(Ci.nsIStringInputStream);
     startTime = new Date();
     payloadStream.data = gzipCompressString(utf8Payload);
     Telemetry.getHistogramById("TELEMETRY_COMPRESS").add(new Date() - startTime);
     startTime = new Date();
     request.send(payloadStream);
 
--- a/toolkit/components/telemetry/TelemetryStorage.jsm
+++ b/toolkit/components/telemetry/TelemetryStorage.jsm
@@ -50,16 +50,19 @@ const MAX_ARCHIVED_PINGS_RETENTION_MS = 
 
 // Maximum space the archive can take on disk (in Bytes).
 const ARCHIVE_QUOTA_BYTES = 120 * 1024 * 1024; // 120 MB
 // Maximum space the outgoing pings can take on disk, for Desktop (in Bytes).
 const PENDING_PINGS_QUOTA_BYTES_DESKTOP = 15 * 1024 * 1024; // 15 MB
 // Maximum space the outgoing pings can take on disk, for Mobile (in Bytes).
 const PENDING_PINGS_QUOTA_BYTES_MOBILE = 1024 * 1024; // 1 MB
 
+// The maximum size a pending/archived ping can take on disk.
+const PING_FILE_MAXIMUM_SIZE_BYTES = 1024 * 1024; // 1 MB
+
 // This special value is submitted when the archive is outside of the quota.
 const ARCHIVE_SIZE_PROBE_SPECIAL_VALUE = 300;
 
 // This special value is submitted when the pending pings is outside of the quota, as
 // we don't know the size of the pings above the quota.
 const PENDING_PINGS_SIZE_PROBE_SPECIAL_VALUE = 17;
 
 const UUID_REGEX = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
@@ -99,16 +102,22 @@ function waitForAll(it) {
 }
 
 this.TelemetryStorage = {
   get pingDirectoryPath() {
     return OS.Path.join(OS.Constants.Path.profileDir, "saved-telemetry-pings");
   },
 
   /**
+   * The maximum size a ping can have, in bytes.
+   */
+  get MAXIMUM_PING_SIZE() {
+    return PING_FILE_MAXIMUM_SIZE_BYTES;
+  },
+  /**
    * Shutdown & block on any outstanding async activity in this module.
    *
    * @return {Promise} Promise that is resolved when shutdown is complete.
    */
   shutdown: function() {
     return TelemetryStorageImpl.shutdown();
   },
 
@@ -580,23 +589,37 @@ let TelemetryStorageImpl = {
     if (!data) {
       this._log.trace("loadArchivedPing - no ping with id: " + id);
       return Promise.reject(new Error("TelemetryStorage.loadArchivedPing - no ping with id " + id));
     }
 
     const path = getArchivedPingPath(id, new Date(data.timestampCreated), data.type);
     const pathCompressed = path + "lz4";
 
+    // Purge pings which are too big.
+    let checkSize = function*(path) {
+      const fileSize = (yield OS.File.stat(path)).size;
+      if (fileSize > PING_FILE_MAXIMUM_SIZE_BYTES) {
+        Telemetry.getHistogramById("TELEMETRY_DISCARDED_ARCHIVED_PINGS_SIZE_MB")
+                 .add(Math.floor(fileSize / 1024 / 1024));
+        Telemetry.getHistogramById("TELEMETRY_PING_SIZE_EXCEEDED_ARCHIVED").add();
+        yield OS.File.remove(path, {ignoreAbsent: true});
+        throw new Error("loadArchivedPing - exceeded the maximum ping size: " + fileSize);
+      }
+    };
+
     try {
       // Try to load a compressed version of the archived ping first.
       this._log.trace("loadArchivedPing - loading ping from: " + pathCompressed);
+      yield* checkSize(pathCompressed);
       return yield this.loadPingFile(pathCompressed, /*compressed*/ true);
     } catch (ex if ex.becauseNoSuchFile) {
       // If that fails, look for the uncompressed version.
       this._log.trace("loadArchivedPing - compressed ping not found, loading: " + path);
+      yield* checkSize(path);
       return yield this.loadPingFile(path, /*compressed*/ false);
     }
   }),
 
   /**
    * Remove an archived ping from disk.
    *
    * @param {string} id The pings id.
@@ -607,16 +630,18 @@ let TelemetryStorageImpl = {
   _removeArchivedPing: Task.async(function*(id, timestampCreated, type) {
     this._log.trace("_removeArchivedPing - id: " + id + ", timestampCreated: " + timestampCreated + ", type: " + type);
     const path = getArchivedPingPath(id, new Date(timestampCreated), type);
     const pathCompressed = path + "lz4";
 
     this._log.trace("_removeArchivedPing - removing ping from: " + path);
     yield OS.File.remove(path, {ignoreAbsent: true});
     yield OS.File.remove(pathCompressed, {ignoreAbsent: true});
+    // Remove the ping from the cache.
+    this._archivedPings.delete(id);
   }),
 
   /**
    * Clean the pings archive by removing old pings.
    *
    * @return {Promise} Resolved when the cleanup task completes.
    */
   runCleanPingArchiveTask: function() {
@@ -749,16 +774,29 @@ let TelemetryStorageImpl = {
       // Get the size for this ping.
       const fileSize =
         yield getArchivedPingSize(ping.id, new Date(ping.timestampCreated), ping.type);
       if (!fileSize) {
         this._log.warn("_enforceArchiveQuota - Unable to find the size of ping " + ping.id);
         continue;
       }
 
+      // Enforce a maximum file size limit on archived pings.
+      if (fileSize > PING_FILE_MAXIMUM_SIZE_BYTES) {
+        this._log.error("_enforceArchiveQuota - removing file exceeding size limit, size: " + fileSize);
+        // We just remove the ping from the disk, we don't bother removing it from pingList
+        // since it won't contribute to the quota.
+        yield this._removeArchivedPing(ping.id, ping.timestampCreated, ping.type)
+                  .catch(e => this._log.error("_enforceArchiveQuota - failed to remove archived ping" + ping.id));
+        Telemetry.getHistogramById("TELEMETRY_DISCARDED_ARCHIVED_PINGS_SIZE_MB")
+                 .add(Math.floor(fileSize / 1024 / 1024));
+        Telemetry.getHistogramById("TELEMETRY_PING_SIZE_EXCEEDED_ARCHIVED").add();
+        continue;
+      }
+
       archiveSizeInBytes += fileSize;
 
       if (archiveSizeInBytes < SAFE_QUOTA) {
         // We save the index of the last ping which is ok to keep in order to speed up ping
         // pruning.
         lastPingIndexToKeep = i;
       } else if (archiveSizeInBytes > Policy.getArchiveQuota()) {
         // Ouch, our ping archive is too big. Bail out and start pruning!
@@ -793,19 +831,16 @@ let TelemetryStorageImpl = {
       if (this._shutdown) {
         this._log.trace("_enforceArchiveQuota - Terminating the clean up task due to shutdown");
         return;
       }
 
       // This list is guaranteed to be in order, so remove the pings at its
       // beginning (oldest).
       yield this._removeArchivedPing(ping.id, ping.timestampCreated, ping.type);
-
-      // Remove outdated pings from the cache.
-      this._archivedPings.delete(ping.id);
     }
 
     const endTimeStamp = Policy.now().getTime();
     submitProbes(ARCHIVE_SIZE_PROBE_SPECIAL_VALUE, pingsToPurge.length,
                  Math.ceil(endTimeStamp - startTimeStamp));
   }),
 
   _cleanArchive: Task.async(function*() {
@@ -1127,26 +1162,44 @@ let TelemetryStorageImpl = {
       this._pendingPings.set(ping.id, {
         path: path,
         lastModificationDate: Policy.now().getTime(),
       });
       this._log.trace("savePendingPing - saved ping with id " + ping.id);
     });
   },
 
-  loadPendingPing: function(id) {
+  loadPendingPing: Task.async(function*(id) {
     this._log.trace("loadPendingPing - id: " + id);
     let info = this._pendingPings.get(id);
     if (!info) {
       this._log.trace("loadPendingPing - unknown id " + id);
-      return Promise.reject(new Error("TelemetryStorage.loadPendingPing - no ping with id " + id));
+      throw new Error("TelemetryStorage.loadPendingPing - no ping with id " + id);
+    }
+
+    // Try to get the dimension of the ping. If that fails, update the histograms.
+    let fileSize = 0;
+    try {
+      fileSize = (yield OS.File.stat(info.path)).size;
+    } catch (e if e instanceof OS.File.Error && e.becauseNoSuchFile) {
+      // Fall through and let |loadPingFile| report the error.
     }
 
-    return this.loadPingFile(info.path, false);
-  },
+    // Purge pings which are too big.
+    if (fileSize > PING_FILE_MAXIMUM_SIZE_BYTES) {
+      yield this.removePendingPing(id);
+      Telemetry.getHistogramById("TELEMETRY_DISCARDED_PENDING_PINGS_SIZE_MB")
+               .add(Math.floor(fileSize / 1024 / 1024));
+      Telemetry.getHistogramById("TELEMETRY_PING_SIZE_EXCEEDED_PENDING").add();
+      throw new Error("loadPendingPing - exceeded the maximum ping size: " + fileSize);
+    }
+
+    // Try to load the ping file. Update the related histograms on failure.
+    return yield this.loadPingFile(info.path, false);
+  }),
 
   removePendingPing: function(id) {
     let info = this._pendingPings.get(id);
     if (!info) {
       this._log.trace("removePendingPing - unknown id " + id);
       return Promise.resolve();
     }
 
@@ -1207,16 +1260,31 @@ let TelemetryStorageImpl = {
       let info;
       try {
         info = yield OS.File.stat(file.path);
       } catch (ex) {
         this._log.error("_scanPendingPings - failed to stat file " + file.path, ex);
         continue;
       }
 
+      // Enforce a maximum file size limit on pending pings.
+      if (info.size > PING_FILE_MAXIMUM_SIZE_BYTES) {
+        this._log.error("_scanPendingPings - removing file exceeding size limit " + file.path);
+        try {
+          yield OS.File.remove(file.path);
+        } catch (ex) {
+          this._log.error("_scanPendingPings - failed to remove file " + file.path, ex);
+        } finally {
+          Telemetry.getHistogramById("TELEMETRY_DISCARDED_PENDING_PINGS_SIZE_MB")
+                   .add(Math.floor(info.size / 1024 / 1024));
+          Telemetry.getHistogramById("TELEMETRY_PING_SIZE_EXCEEDED_PENDING").add();
+          continue;
+        }
+      }
+
       let id = OS.Path.basename(file.path);
       if (!UUID_REGEX.test(id)) {
         this._log.trace("_scanPendingPings - filename is not a UUID: " + id);
         id = Utils.generateUUID();
       }
 
       this._pendingPings.set(id, {
         path: file.path,