Bug 1162538 - Add the probes for archived ping count and evicted ping count. r=gfritzsche
authorAlessio Placitelli <alessio.placitelli@gmail.com>
Fri, 29 May 2015 07:48:00 +0200
changeset 246404 d96f8b19b33ad8f2b73d9ac56d241d252229bc77
parent 246403 65ecc0c1658e6b6c1d4ba3ae117064d062735159
child 246405 dd4e5f79d88e4dfd7963400ef7411c2fff076f0e
push id28829
push usercbook@mozilla.com
push dateMon, 01 Jun 2015 12:18:22 +0000
treeherdermozilla-central@2fa4bb097f03 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersgfritzsche
bugs1162538
milestone41.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1162538 - Add the probes for archived ping count and evicted ping count. r=gfritzsche
toolkit/components/telemetry/Histograms.json
toolkit/components/telemetry/TelemetryStorage.jsm
--- a/toolkit/components/telemetry/Histograms.json
+++ b/toolkit/components/telemetry/Histograms.json
@@ -4426,16 +4426,94 @@
     "alert_emails": ["perf-telemetry-alerts@mozilla.com"],
     "expires_in_version": "never",
     "kind": "exponential",
     "high": "3000",
     "n_buckets": 10,
     "extended_statistics_ok": true,
     "description": "Number of histograms with total count low errors"
   },
+  "TELEMETRY_ARCHIVE_DIRECTORIES_COUNT": {
+    "alert_emails": ["telemetry-client-dev@mozilla.com"],
+    "expires_in_version": "never",
+    "kind": "linear",
+    "high": "13",
+    "n_buckets": 12,
+    "description": "Number of directories in the archive at scan"
+  },
+  "TELEMETRY_ARCHIVE_OLDEST_DIRECTORY_AGE": {
+    "alert_emails": ["telemetry-client-dev@mozilla.com"],
+    "expires_in_version": "never",
+    "kind": "linear",
+    "high": "13",
+    "n_buckets": 12,
+    "description": "The age of the oldest Telemetry archive directory in months"
+  },
+  "TELEMETRY_ARCHIVE_SCAN_PING_COUNT": {
+    "alert_emails": ["telemetry-client-dev@mozilla.com"],
+    "expires_in_version": "never",
+    "kind": "exponential",
+    "high": "100000",
+    "n_buckets": 100,
+    "description": "Number of Telemetry pings in the archive at scan"
+  },
+  "TELEMETRY_ARCHIVE_SESSION_PING_COUNT": {
+    "alert_emails": ["telemetry-client-dev@mozilla.com"],
+    "expires_in_version": "never",
+    "kind": "count",
+    "description": "Number of Telemetry pings added to the archive during the session"
+  },
+  "TELEMETRY_ARCHIVE_SIZE_MB": {
+    "alert_emails": ["telemetry-client-dev@mozilla.com"],
+    "expires_in_version": "never",
+    "kind": "linear",
+    "high": "300",
+    "n_buckets": 60,
+    "description": "The size of the Telemetry archive (MB)"
+  },
+  "TELEMETRY_ARCHIVE_EVICTED_OVER_QUOTA": {
+    "alert_emails": ["telemetry-client-dev@mozilla.com"],
+    "expires_in_version": "never",
+    "kind": "exponential",
+    "high": "100000",
+    "n_buckets": 100,
+    "description": "Number of Telemetry pings evicted from the archive during cleanup, because they were over the quota"
+  },
+  "TELEMETRY_ARCHIVE_EVICTED_OLD_DIRS": {
+    "alert_emails": ["telemetry-client-dev@mozilla.com"],
+    "expires_in_version": "never",
+    "kind": "linear",
+    "high": "13",
+    "n_buckets": 12,
+    "description": "Number of Telemetry directories evicted from the archive during cleanup, because they were too old"
+  },
+  "TELEMETRY_ARCHIVE_EVICTING_DIRS_MS": {
+    "alert_emails": ["telemetry-client-dev@mozilla.com"],
+    "expires_in_version": "never",
+    "kind": "exponential",
+    "high": "300000",
+    "n_buckets": 20,
+    "description": "Time (ms) it takes for evicting old directories"
+  },
+  "TELEMETRY_ARCHIVE_CHECKING_OVER_QUOTA_MS": {
+    "alert_emails": ["telemetry-client-dev@mozilla.com"],
+    "expires_in_version": "never",
+    "kind": "exponential",
+    "high": "300000",
+    "n_buckets": 20,
+    "description": "Time (ms) it takes for checking if the archive is over-quota"
+  },
+  "TELEMETRY_ARCHIVE_EVICTING_OVER_QUOTA_MS": {
+    "alert_emails": ["telemetry-client-dev@mozilla.com"],
+    "expires_in_version": "never",
+    "kind": "exponential",
+    "high": "300000",
+    "n_buckets": 20,
+    "description": "Time (ms) it takes for evicting over-quota pings"
+  },
   "TELEMETRY_DISCARDED_CONTENT_PINGS_COUNT": {
     "alert_emails": ["perf-telemetry-alerts@mozilla.com"],
     "expires_in_version": "never",
     "kind": "count",
     "description": "Count of discarded content payloads."
   },
   "TELEMETRY_FILES_EVICTED": {
     "alert_emails": ["perf-telemetry-alerts@mozilla.com", "rvitillo@mozilla.com"],
--- a/toolkit/components/telemetry/TelemetryStorage.jsm
+++ b/toolkit/components/telemetry/TelemetryStorage.jsm
@@ -22,16 +22,17 @@ Cu.import("resource://gre/modules/Promis
 
 XPCOMUtils.defineLazyModuleGetter(this, 'Deprecated',
   'resource://gre/modules/Deprecated.jsm');
 
 const LOGGER_NAME = "Toolkit.Telemetry";
 const LOGGER_PREFIX = "TelemetryStorage::";
 
 const Telemetry = Services.telemetry;
+const Utils = TelemetryUtils;
 
 // Compute the path of the pings archive on the first use.
 const DATAREPORTING_DIR = "datareporting";
 const PINGS_ARCHIVE_DIR = "archived";
 const ABORTED_SESSION_FILE_NAME = "aborted-session-ping";
 XPCOMUtils.defineLazyGetter(this, "gDataReportingDir", function() {
   return OS.Path.join(OS.Constants.Path.profileDir, DATAREPORTING_DIR);
 });
@@ -54,16 +55,19 @@ const OVERDUE_PING_FILE_AGE = 7 * 24 * 6
 const MAX_LRU_PINGS = 50;
 
 // Maxmimum time, in milliseconds, archive pings should be retained.
 const MAX_ARCHIVED_PINGS_RETENTION_MS = 180 * 24 * 60 * 60 * 1000;  // 180 days
 
 // Maximum space the archive can take on disk (in Bytes).
 const ARCHIVE_QUOTA_BYTES = 120 * 1024 * 1024; // 120 MB
 
+// This special value is submitted when the archive is outside of the quota.
+const ARCHIVE_SIZE_PROBE_SPECIAL_VALUE = 300;
+
 // The number of outstanding saved pings that we have issued loading
 // requests for.
 let pingsLoaded = 0;
 
 // The number of pings that we have destroyed due to being older
 // than MAX_PING_FILE_AGE.
 let pingsDiscarded = 0;
 
@@ -508,16 +512,18 @@ let TelemetryStorageImpl = {
     yield OS.File.makeDir(OS.Path.dirname(filePath), { ignoreExisting: true,
                                                        from: OS.Constants.Path.profileDir });
     yield this.savePingToFile(ping, filePath, /*overwrite*/ true, /*compressed*/ true);
 
     this._archivedPings.set(ping.id, {
       timestampCreated: creationDate.getTime(),
       type: ping.type,
     });
+
+    Telemetry.getHistogramById("TELEMETRY_ARCHIVE_SESSION_PING_COUNT").add();
   }),
 
   /**
    * Load an archived ping from disk.
    *
    * @param {string} id The pings id.
    * @return {promise<object>} Promise that is resolved with the ping data.
    */
@@ -582,22 +588,25 @@ let TelemetryStorageImpl = {
 
   /**
    * Removes pings which are too old from the pings archive.
    * @return {Promise} Resolved when the ping age check is complete.
    */
   _purgeOldPings: Task.async(function*() {
     this._log.trace("_purgeOldPings");
 
-    const now = Policy.now().getTime();
+    const nowDate = Policy.now();
+    const startTimeStamp = nowDate.getTime();
     let dirIterator = new OS.File.DirectoryIterator(gPingsArchivePath);
     let subdirs = (yield dirIterator.nextBatch()).filter(e => e.isDir);
 
     // Keep track of the newest removed month to update the cache, if needed.
-    let newestRemovedMonth = null;
+    let newestRemovedMonthTimestamp = null;
+    let evictedDirsCount = 0;
+    let maxDirAgeInMonths = 0;
 
     // Walk through the monthly subdirs of the form <YYYY-MM>/
     for (let dir of subdirs) {
       if (this._shutdown) {
         this._log.trace("_purgeOldPings - Terminating the clean up task due to shutdown");
         return;
       }
 
@@ -608,55 +617,68 @@ let TelemetryStorageImpl = {
 
       const archiveDate = getDateFromArchiveDir(dir.name);
       if (!archiveDate) {
         this._log.warn("_purgeOldPings - skipping invalid subdirectory date " + dir.path);
         continue;
       }
 
       // If this archive directory is older than 180 days, remove it.
-      if (!TelemetryUtils.areTimesClose(archiveDate.getTime(), now,
-                                        MAX_ARCHIVED_PINGS_RETENTION_MS)) {
+      if ((startTimeStamp - archiveDate.getTime()) > MAX_ARCHIVED_PINGS_RETENTION_MS) {
         try {
           yield OS.File.removeDir(dir.path);
+          evictedDirsCount++;
 
           // Update the newest removed month.
-          if (archiveDate > newestRemovedMonth) {
-            newestRemovedMonth = archiveDate;
-          }
+          newestRemovedMonthTimestamp = Math.max(archiveDate, newestRemovedMonthTimestamp);
         } catch (ex) {
           this._log.error("_purgeOldPings - Unable to remove " + dir.path, ex);
         }
+      } else {
+        // We're not removing this directory, so record the age for the oldest directory.
+        const dirAgeInMonths = Utils.getElapsedTimeInMonths(archiveDate, nowDate);
+        maxDirAgeInMonths = Math.max(dirAgeInMonths, maxDirAgeInMonths);
       }
     }
 
     // Trigger scanning of the archived pings.
     yield this.loadArchivedPingList();
 
     // Refresh the cache: we could still skip this, but it's cheap enough to keep it
     // to avoid introducing task dependencies.
-    if (newestRemovedMonth) {
+    if (newestRemovedMonthTimestamp) {
       // Scan the archive cache for pings older than the newest directory pruned above.
       for (let [id, info] of this._archivedPings) {
         const timestampCreated = new Date(info.timestampCreated);
-        if (timestampCreated.getTime() > newestRemovedMonth.getTime()) {
+        if (timestampCreated.getTime() > newestRemovedMonthTimestamp) {
           continue;
         }
         // Remove outdated pings from the cache.
         this._archivedPings.delete(id);
       }
     }
+
+    const endTimeStamp = Policy.now().getTime();
+
+    // Save the time it takes to evict old directories and the eviction count.
+    Telemetry.getHistogramById("TELEMETRY_ARCHIVE_EVICTED_OLD_DIRS")
+             .add(evictedDirsCount);
+    Telemetry.getHistogramById("TELEMETRY_ARCHIVE_EVICTING_DIRS_MS")
+             .add(Math.ceil(endTimeStamp - startTimeStamp));
+    Telemetry.getHistogramById("TELEMETRY_ARCHIVE_OLDEST_DIRECTORY_AGE")
+             .add(maxDirAgeInMonths);
   }),
 
   /**
    * Enforce a disk quota for the pings archive.
    * @return {Promise} Resolved when the quota check is complete.
    */
   _enforceArchiveQuota: Task.async(function*() {
     this._log.trace("_enforceArchiveQuota");
+    const startTimeStamp = Policy.now().getTime();
 
     // Build an ordered list, from newer to older, of archived pings.
     let pingList = [for (p of this._archivedPings) {
       id: p[0],
       timestampCreated: p[1].timestampCreated,
       type: p[1].type,
     }];
 
@@ -693,18 +715,29 @@ let TelemetryStorageImpl = {
         // pruning.
         lastPingIndexToKeep = i;
       } else if (archiveSizeInBytes > Policy.getArchiveQuota()) {
         // Ouch, our ping archive is too big. Bail out and start pruning!
         break;
       }
     }
 
-    // Check if we're using too much space. If not, bail out.
+    // Save the time it takes to check if the archive is over-quota.
+    Telemetry.getHistogramById("TELEMETRY_ARCHIVE_CHECKING_OVER_QUOTA_MS")
+             .add(Math.round(Policy.now().getTime() - startTimeStamp));
+
+    let submitProbes = (sizeInMB, evictedPings, elapsedMs) => {
+      Telemetry.getHistogramById("TELEMETRY_ARCHIVE_SIZE_MB").add(sizeInMB);
+      Telemetry.getHistogramById("TELEMETRY_ARCHIVE_EVICTED_OVER_QUOTA").add(evictedPings);
+      Telemetry.getHistogramById("TELEMETRY_ARCHIVE_EVICTING_OVER_QUOTA_MS").add(elapsedMs);
+    };
+
+    // Check if we're using too much space. If not, submit the archive size and bail out.
     if (archiveSizeInBytes < Policy.getArchiveQuota()) {
+      submitProbes(Math.round(archiveSizeInBytes / 1024 / 1024), 0, 0);
       return;
     }
 
     this._log.info("_enforceArchiveQuota - archive size: " + archiveSizeInBytes + "bytes"
                    + ", safety quota: " + SAFE_QUOTA + "bytes");
 
     let pingsToPurge = pingList.slice(lastPingIndexToKeep + 1);
 
@@ -717,16 +750,20 @@ let TelemetryStorageImpl = {
 
       // This list is guaranteed to be in order, so remove the pings at its
       // beginning (oldest).
       yield this._removeArchivedPing(ping.id, ping.timestampCreated, ping.type);
 
       // Remove outdated pings from the cache.
       this._archivedPings.delete(ping.id);
     }
+
+    const endTimeStamp = Policy.now().getTime();
+    submitProbes(ARCHIVE_SIZE_PROBE_SPECIAL_VALUE, pingsToPurge.length,
+                 Math.ceil(endTimeStamp - startTimeStamp));
   }),
 
   _cleanArchive: Task.async(function*() {
     this._log.trace("cleanArchiveTask");
 
     if (!(yield OS.File.exists(gPingsArchivePath))) {
       return;
     }
@@ -777,30 +814,34 @@ let TelemetryStorageImpl = {
     // Since there's no archive loading task running, start it.
     this._scanArchiveTask = this._scanArchive().then(clear, clear);
     return this._scanArchiveTask;
   },
 
   _scanArchive: Task.async(function*() {
     this._log.trace("_scanArchive");
 
+    let submitProbes = (pingCount, dirCount) => {
+      Telemetry.getHistogramById("TELEMETRY_ARCHIVE_SCAN_PING_COUNT")
+               .add(pingCount);
+      Telemetry.getHistogramById("TELEMETRY_ARCHIVE_DIRECTORIES_COUNT")
+               .add(dirCount);
+    };
+
     if (!(yield OS.File.exists(gPingsArchivePath))) {
+      submitProbes(0, 0);
       return new Map();
     }
 
     let dirIterator = new OS.File.DirectoryIterator(gPingsArchivePath);
-    let subdirs = (yield dirIterator.nextBatch()).filter(e => e.isDir);
+    let subdirs =
+      (yield dirIterator.nextBatch()).filter(e => e.isDir).filter(e => isValidArchiveDir(e.name));
 
     // Walk through the monthly subdirs of the form <YYYY-MM>/
     for (let dir of subdirs) {
-      if (!isValidArchiveDir(dir.name)) {
-        this._log.warn("_scanArchive - skipping invalidly named subdirectory " + dir.path);
-        continue;
-      }
-
       this._log.trace("_scanArchive - checking in subdir: " + dir.path);
       let pingIterator = new OS.File.DirectoryIterator(dir.path);
       let pings = (yield pingIterator.nextBatch()).filter(e => !e.isDir);
 
       // Now process any ping files of the form "<timestamp>.<uuid>.<type>.[json|jsonlz4]".
       for (let p of pings) {
         // data may be null if the filename doesn't match the above format.
         let data = this._getArchivedPingDataFromFileName(p.name);
@@ -825,16 +866,18 @@ let TelemetryStorageImpl = {
           timestampCreated: data.timestamp,
           type: data.type,
         });
       }
     }
 
     // Mark the archive as scanned, so we no longer hit the disk.
     this._scannedArchiveDirectory = true;
+    // Update the ping and directories count histograms.
+    submitProbes(this._archivedPings.size, subdirs.length);
     return this._archivedPings;
   }),
 
   /**
    * Save a single ping to a file.
    *
    * @param {object} ping The content of the ping to save.
    * @param {string} file The destination file.