toolkit/components/crashes/CrashManager.jsm
author Mark Banner <standard8@mozilla.com>
Tue, 08 Nov 2016 12:49:32 +0000
changeset 348685 6868b4b3f1ae395fa6b244d9e9406aab1af00771
parent 344927 6594e386f0189fcef2aad159103a2e8fe1963e0d
child 350790 4873321befff7cc0c251b34b29192140070f1702
permissions -rw-r--r--
Bug 1315951 - Fix no-unused-vars issues in toolkit/components (except places). r=mossop MozReview-Commit-ID: 8svkCKZHxsq

/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

"use strict";

const {classes: Cc, interfaces: Ci, utils: Cu} = Components;
const myScope = this;

Cu.import("resource://gre/modules/Log.jsm", this);
Cu.import("resource://gre/modules/osfile.jsm", this);
Cu.import("resource://gre/modules/Promise.jsm", this);
Cu.import("resource://gre/modules/Services.jsm", this);
Cu.import("resource://gre/modules/Task.jsm", this);
Cu.import("resource://gre/modules/Timer.jsm", this);
Cu.import("resource://gre/modules/XPCOMUtils.jsm", this);
Cu.import("resource://gre/modules/TelemetryController.jsm");
Cu.import("resource://gre/modules/KeyValueParser.jsm");

this.EXPORTED_SYMBOLS = [
  "CrashManager",
];

/**
 * How long to wait after application startup before crash event files are
 * automatically aggregated.
 *
 * We defer aggregation for performance reasons, as we don't want too many
 * services competing for I/O immediately after startup.
 */
const AGGREGATE_STARTUP_DELAY_MS = 57000;

const MILLISECONDS_IN_DAY = 24 * 60 * 60 * 1000;

// Converts Date to days since UNIX epoch.
// This was copied from /services/metrics.storage.jsm. The implementation
// does not account for leap seconds.
function dateToDays(date) {
  return Math.floor(date.getTime() / MILLISECONDS_IN_DAY);
}


/**
 * A gateway to crash-related data.
 *
 * This type is generic and can be instantiated any number of times.
 * However, most applications will typically only have one instance
 * instantiated and that instance will point to profile and user appdata
 * directories.
 *
 * Instances are created by passing an object with properties.
 * Recognized properties are:
 *
 *   pendingDumpsDir (string) (required)
 *     Where dump files that haven't been uploaded are located.
 *
 *   submittedDumpsDir (string) (required)
 *     Where records of uploaded dumps are located.
 *
 *   eventsDirs (array)
 *     Directories (defined as strings) where events files are written. This
 *     instance will collects events from files in the directories specified.
 *
 *   storeDir (string)
 *     Directory we will use for our data store. This instance will write
 *     data files into the directory specified.
 *
 *   telemetryStoreSizeKey (string)
 *     Telemetry histogram to report store size under.
 */
this.CrashManager = function (options) {
  for (let k of ["pendingDumpsDir", "submittedDumpsDir", "eventsDirs",
    "storeDir"]) {
    if (!(k in options)) {
      throw new Error("Required key not present in options: " + k);
    }
  }

  this._log = Log.repository.getLogger("Crashes.CrashManager");

  for (let k in options) {
    let v = options[k];

    switch (k) {
      case "pendingDumpsDir":
        this._pendingDumpsDir = v;
        break;

      case "submittedDumpsDir":
        this._submittedDumpsDir = v;
        break;

      case "eventsDirs":
        this._eventsDirs = v;
        break;

      case "storeDir":
        this._storeDir = v;
        break;

      case "telemetryStoreSizeKey":
        this._telemetryStoreSizeKey = v;
        break;

      default:
        throw new Error("Unknown property in options: " + k);
    }
  }

  // Promise for in-progress aggregation operation. We store it on the
  // object so it can be returned for in-progress operations.
  this._aggregatePromise = null;

  // The CrashStore currently attached to this object.
  this._store = null;

  // A Task to retrieve the store. This is needed to avoid races when
  // _getStore() is called multiple times in a short interval.
  this._getStoreTask = null;

  // The timer controlling the expiration of the CrashStore instance.
  this._storeTimer = null;

  // This is a semaphore that prevents the store from being freed by our
  // timer-based resource freeing mechanism.
  this._storeProtectedCount = 0;
};

this.CrashManager.prototype = Object.freeze({
  // A crash in the main process.
  PROCESS_TYPE_MAIN: "main",

  // A crash in a content process.
  PROCESS_TYPE_CONTENT: "content",

  // A crash in a plugin process.
  PROCESS_TYPE_PLUGIN: "plugin",

  // A crash in a Gecko media plugin process.
  PROCESS_TYPE_GMPLUGIN: "gmplugin",

  // A crash in the GPU process.
  PROCESS_TYPE_GPU: "gpu",

  // A real crash.
  CRASH_TYPE_CRASH: "crash",

  // A hang.
  CRASH_TYPE_HANG: "hang",

  // Submission result values.
  SUBMISSION_RESULT_OK: "ok",
  SUBMISSION_RESULT_FAILED: "failed",

  DUMP_REGEX: /^([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})\.dmp$/i,
  SUBMITTED_REGEX: /^bp-(?:hr-)?([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})\.txt$/i,
  ALL_REGEX: /^(.*)$/,

  // How long the store object should persist in memory before being
  // automatically garbage collected.
  STORE_EXPIRATION_MS: 60 * 1000,

  // Number of days after which a crash with no activity will get purged.
  PURGE_OLDER_THAN_DAYS: 180,

  // The following are return codes for individual event file processing.
  // File processed OK.
  EVENT_FILE_SUCCESS: "ok",
  // The event appears to be malformed.
  EVENT_FILE_ERROR_MALFORMED: "malformed",
  // The type of event is unknown.
  EVENT_FILE_ERROR_UNKNOWN_EVENT: "unknown-event",

  /**
   * Obtain a list of all dumps pending upload.
   *
   * The returned value is a promise that resolves to an array of objects
   * on success. Each element in the array has the following properties:
   *
   *   id (string)
   *      The ID of the crash (a UUID).
   *
   *   path (string)
   *      The filename of the crash (<UUID.dmp>)
   *
   *   date (Date)
   *      When this dump was created
   *
   * The returned arry is sorted by the modified time of the file backing
   * the entry, oldest to newest.
   *
   * @return Promise<Array>
   */
  pendingDumps: function () {
    return this._getDirectoryEntries(this._pendingDumpsDir, this.DUMP_REGEX);
  },

  /**
   * Obtain a list of all dump files corresponding to submitted crashes.
   *
   * The returned value is a promise that resolves to an Array of
   * objects. Each object has the following properties:
   *
   *   path (string)
   *     The path of the file this entry comes from.
   *
   *   id (string)
   *     The crash UUID.
   *
   *   date (Date)
   *     The (estimated) date this crash was submitted.
   *
   * The returned array is sorted by the modified time of the file backing
   * the entry, oldest to newest.
   *
   * @return Promise<Array>
   */
  submittedDumps: function () {
    return this._getDirectoryEntries(this._submittedDumpsDir,
                                     this.SUBMITTED_REGEX);
  },

  /**
   * Aggregates "loose" events files into the unified "database."
   *
   * This function should be called periodically to collect metadata from
   * all events files into the central data store maintained by this manager.
   *
   * Once events have been stored in the backing store the corresponding
   * source files are deleted.
   *
   * Only one aggregation operation is allowed to occur at a time. If this
   * is called when an existing aggregation is in progress, the promise for
   * the original call will be returned.
   *
   * @return promise<int> The number of event files that were examined.
   */
  aggregateEventsFiles: function () {
    if (this._aggregatePromise) {
      return this._aggregatePromise;
    }

    return this._aggregatePromise = Task.spawn(function* () {
      if (this._aggregatePromise) {
        return this._aggregatePromise;
      }

      try {
        let unprocessedFiles = yield this._getUnprocessedEventsFiles();

        let deletePaths = [];
        let needsSave = false;

        this._storeProtectedCount++;
        for (let entry of unprocessedFiles) {
          try {
            let result = yield this._processEventFile(entry);

            switch (result) {
              case this.EVENT_FILE_SUCCESS:
                needsSave = true;
                // Fall through.

              case this.EVENT_FILE_ERROR_MALFORMED:
                deletePaths.push(entry.path);
                break;

              case this.EVENT_FILE_ERROR_UNKNOWN_EVENT:
                break;

              default:
                Cu.reportError("Unhandled crash event file return code. Please " +
                               "file a bug: " + result);
            }
          } catch (ex) {
            if (ex instanceof OS.File.Error) {
              this._log.warn("I/O error reading " + entry.path, ex);
            } else {
              // We should never encounter an exception. This likely represents
              // a coding error because all errors should be detected and
              // converted to return codes.
              //
              // If we get here, report the error and delete the source file
              // so we don't see it again.
              Cu.reportError("Exception when processing crash event file: " +
                             Log.exceptionStr(ex));
              deletePaths.push(entry.path);
            }
          }
        }

        if (needsSave) {
          let store = yield this._getStore();
          yield store.save();
        }

        for (let path of deletePaths) {
          try {
            yield OS.File.remove(path);
          } catch (ex) {
            this._log.warn("Error removing event file (" + path + ")", ex);
          }
        }

        return unprocessedFiles.length;

      } finally {
        this._aggregatePromise = false;
        this._storeProtectedCount--;
      }
    }.bind(this));
  },

  /**
   * Prune old crash data.
   *
   * @param date
   *        (Date) The cutoff point for pruning. Crashes without data newer
   *        than this will be pruned.
   */
  pruneOldCrashes: function (date) {
    return Task.spawn(function* () {
      let store = yield this._getStore();
      store.pruneOldCrashes(date);
      yield store.save();
    }.bind(this));
  },

  /**
   * Run tasks that should be periodically performed.
   */
  runMaintenanceTasks: function () {
    return Task.spawn(function* () {
      yield this.aggregateEventsFiles();

      let offset = this.PURGE_OLDER_THAN_DAYS * MILLISECONDS_IN_DAY;
      yield this.pruneOldCrashes(new Date(Date.now() - offset));
    }.bind(this));
  },

  /**
   * Schedule maintenance tasks for some point in the future.
   *
   * @param delay
   *        (integer) Delay in milliseconds when maintenance should occur.
   */
  scheduleMaintenance: function (delay) {
    let deferred = Promise.defer();

    setTimeout(() => {
      this.runMaintenanceTasks().then(deferred.resolve, deferred.reject);
    }, delay);

    return deferred.promise;
  },

  /**
   * Record the occurrence of a crash.
   *
   * This method skips event files altogether and writes directly and
   * immediately to the manager's data store.
   *
   * @param processType (string) One of the PROCESS_TYPE constants.
   * @param crashType (string) One of the CRASH_TYPE constants.
   * @param id (string) Crash ID. Likely a UUID.
   * @param date (Date) When the crash occurred.
   * @param metadata (dictionary) Crash metadata, may be empty.
   *
   * @return promise<null> Resolved when the store has been saved.
   */
  addCrash: function (processType, crashType, id, date, metadata) {
    return Task.spawn(function* () {
      let store = yield this._getStore();
      if (store.addCrash(processType, crashType, id, date, metadata)) {
        yield store.save();
      }
    }.bind(this));
  },

  /**
   * Record the remote ID for a crash.
   *
   * @param crashID (string) Crash ID. Likely a UUID.
   * @param remoteID (Date) Server/Breakpad ID.
   *
   * @return boolean True if the remote ID was recorded.
   */
  setRemoteCrashID: Task.async(function* (crashID, remoteID) {
    let store = yield this._getStore();
    if (store.setRemoteCrashID(crashID, remoteID)) {
      yield store.save();
    }
  }),

  /**
   * Generate a submission ID for use with addSubmission{Attempt,Result}.
   */
  generateSubmissionID() {
    return "sub-" + Cc["@mozilla.org/uuid-generator;1"]
                      .getService(Ci.nsIUUIDGenerator)
                      .generateUUID().toString().slice(1, -1);
  },

  /**
   * Record the occurrence of a submission attempt for a crash.
   *
   * @param crashID (string) Crash ID. Likely a UUID.
   * @param submissionID (string) Submission ID. Likely a UUID.
   * @param date (Date) When the attempt occurred.
   *
   * @return boolean True if the attempt was recorded and false if not.
   */
  addSubmissionAttempt: Task.async(function* (crashID, submissionID, date) {
    let store = yield this._getStore();
    if (store.addSubmissionAttempt(crashID, submissionID, date)) {
      yield store.save();
    }
  }),

  /**
   * Record the occurrence of a submission result for a crash.
   *
   * @param crashID (string) Crash ID. Likely a UUID.
   * @param submissionID (string) Submission ID. Likely a UUID.
   * @param date (Date) When the submission result was obtained.
   * @param result (string) One of the SUBMISSION_RESULT constants.
   *
   * @return boolean True if the result was recorded and false if not.
   */
  addSubmissionResult: Task.async(function* (crashID, submissionID, date, result) {
    let store = yield this._getStore();
    if (store.addSubmissionResult(crashID, submissionID, date, result)) {
      yield store.save();
    }
  }),

  /**
   * Set the classification of a crash.
   *
   * @param crashID (string) Crash ID. Likely a UUID.
   * @param classifications (array) Crash classifications.
   *
   * @return boolean True if the data was recorded and false if not.
   */
  setCrashClassifications: Task.async(function* (crashID, classifications) {
    let store = yield this._getStore();
    if (store.setCrashClassifications(crashID, classifications)) {
      yield store.save();
    }
  }),

  /**
   * Obtain the paths of all unprocessed events files.
   *
   * The promise-resolved array is sorted by file mtime, oldest to newest.
   */
  _getUnprocessedEventsFiles: function () {
    return Task.spawn(function* () {
      let entries = [];

      for (let dir of this._eventsDirs) {
        for (let e of yield this._getDirectoryEntries(dir, this.ALL_REGEX)) {
          entries.push(e);
        }
      }

      entries.sort((a, b) => { return a.date - b.date; });

      return entries;
    }.bind(this));
  },

  // See docs/crash-events.rst for the file format specification.
  _processEventFile: function (entry) {
    return Task.spawn(function* () {
      let data = yield OS.File.read(entry.path);
      let store = yield this._getStore();

      let decoder = new TextDecoder();
      data = decoder.decode(data);

      let type, time;
      let start = 0;
      for (let i = 0; i < 2; i++) {
        let index = data.indexOf("\n", start);
        if (index == -1) {
          return this.EVENT_FILE_ERROR_MALFORMED;
        }

        let sub = data.substring(start, index);
        switch (i) {
          case 0:
            type = sub;
            break;
          case 1:
            time = sub;
            try {
              time = parseInt(time, 10);
            } catch (ex) {
              return this.EVENT_FILE_ERROR_MALFORMED;
            }
        }

        start = index + 1;
      }
      let date = new Date(time * 1000);
      let payload = data.substring(start);

      return this._handleEventFilePayload(store, entry, type, date, payload);
    }.bind(this));
  },

  _handleEventFilePayload: function (store, entry, type, date, payload) {
      // The payload types and formats are documented in docs/crash-events.rst.
      // Do not change the format of an existing type. Instead, invent a new
      // type.
      // DO NOT ADD NEW TYPES WITHOUT DOCUMENTING!
      let lines = payload.split("\n");

      switch (type) {
        case "crash.main.1":
          if (lines.length > 1) {
            this._log.warn("Multiple lines unexpected in payload for " +
                           entry.path);
            return this.EVENT_FILE_ERROR_MALFORMED;
          }
          // fall-through
        case "crash.main.2":
          let crashID = lines[0];
          let metadata = parseKeyValuePairsFromLines(lines.slice(1));
          store.addCrash(this.PROCESS_TYPE_MAIN, this.CRASH_TYPE_CRASH,
                         crashID, date, metadata);

          // If we have a saved environment, use it. Otherwise report
          // the current environment.
          let crashEnvironment = null;
          let sessionId = null;
          let stackTraces = null;
          let reportMeta = Cu.cloneInto(metadata, myScope);
          if ('TelemetryEnvironment' in reportMeta) {
            try {
              crashEnvironment = JSON.parse(reportMeta.TelemetryEnvironment);
            } catch (e) {
              Cu.reportError(e);
            }
            delete reportMeta.TelemetryEnvironment;
          }
          if ('TelemetrySessionId' in reportMeta) {
            sessionId = reportMeta.TelemetrySessionId;
            delete reportMeta.TelemetrySessionId;
          }
          if ('StackTraces' in reportMeta) {
            try {
              stackTraces = JSON.parse(reportMeta.StackTraces);
            } catch (e) {
              Cu.reportError(e);
            }
            delete reportMeta.StackTraces;
          }
          TelemetryController.submitExternalPing("crash",
            {
              version: 1,
              crashDate: date.toISOString().slice(0, 10), // YYYY-MM-DD
              sessionId: sessionId,
              crashId: entry.id,
              stackTraces: stackTraces,
              metadata: reportMeta,
              hasCrashEnvironment: (crashEnvironment !== null),
            },
            {
              retentionDays: 180,
              addClientId: true,
              addEnvironment: true,
              overrideEnvironment: crashEnvironment,
            });
          break;

        case "crash.submission.1":
          if (lines.length == 3) {
            let [crashID, result, remoteID] = lines;
            store.addCrash(this.PROCESS_TYPE_MAIN, this.CRASH_TYPE_CRASH,
                           crashID, date);

            let submissionID = this.generateSubmissionID();
            let succeeded = result === "true";
            store.addSubmissionAttempt(crashID, submissionID, date);
            store.addSubmissionResult(crashID, submissionID, date,
                                      succeeded ? this.SUBMISSION_RESULT_OK :
                                                  this.SUBMISSION_RESULT_FAILED);
            if (succeeded) {
              store.setRemoteCrashID(crashID, remoteID);
            }
          } else {
            return this.EVENT_FILE_ERROR_MALFORMED;
          }
          break;

        default:
          return this.EVENT_FILE_ERROR_UNKNOWN_EVENT;
      }

      return this.EVENT_FILE_SUCCESS;
  },

  /**
   * The resolved promise is an array of objects with the properties:
   *
   *   path -- String filename
   *   id -- regexp.match()[1] (likely the crash ID)
   *   date -- Date mtime of the file
   */
  _getDirectoryEntries: function (path, re) {
    return Task.spawn(function* () {
      try {
        yield OS.File.stat(path);
      } catch (ex) {
        if (!(ex instanceof OS.File.Error) || !ex.becauseNoSuchFile) {
          throw ex;
        }
        return [];
      }

      let it = new OS.File.DirectoryIterator(path);
      let entries = [];

      try {
        yield it.forEach((entry, index, it) => {
          if (entry.isDir) {
            return undefined;
          }

          let match = re.exec(entry.name);
          if (!match) {
            return undefined;
          }

          return OS.File.stat(entry.path).then((info) => {
            entries.push({
              path: entry.path,
              id: match[1],
              date: info.lastModificationDate,
            });
          });
        });
      } finally {
        it.close();
      }

      entries.sort((a, b) => { return a.date - b.date; });

      return entries;
    }.bind(this));
  },

  _getStore: function () {
    if (this._getStoreTask) {
      return this._getStoreTask;
    }

    return this._getStoreTask = Task.spawn(function* () {
      try {
        if (!this._store) {
          yield OS.File.makeDir(this._storeDir, {
            ignoreExisting: true,
            unixMode: OS.Constants.libc.S_IRWXU,
          });

          let store = new CrashStore(this._storeDir,
                                     this._telemetryStoreSizeKey);
          yield store.load();

          this._store = store;
          this._storeTimer = Cc["@mozilla.org/timer;1"]
                               .createInstance(Ci.nsITimer);
        }

        // The application can go long periods without interacting with the
        // store. Since the store takes up resources, we automatically "free"
        // the store after inactivity so resources can be returned to the
        // system. We do this via a timer and a mechanism that tracks when the
        // store is being accessed.
        this._storeTimer.cancel();

        // This callback frees resources from the store unless the store
        // is protected from freeing by some other process.
        let timerCB = function () {
          if (this._storeProtectedCount) {
            this._storeTimer.initWithCallback(timerCB, this.STORE_EXPIRATION_MS,
                                              this._storeTimer.TYPE_ONE_SHOT);
            return;
          }

          // We kill the reference that we hold. GC will kill it later. If
          // someone else holds a reference, that will prevent GC until that
          // reference is gone.
          this._store = null;
          this._storeTimer = null;
        }.bind(this);

        this._storeTimer.initWithCallback(timerCB, this.STORE_EXPIRATION_MS,
                                          this._storeTimer.TYPE_ONE_SHOT);

        return this._store;
      } finally {
        this._getStoreTask = null;
      }
    }.bind(this));
  },

  /**
   * Obtain information about all known crashes.
   *
   * Returns an array of CrashRecord instances. Instances are read-only.
   */
  getCrashes: function () {
    return Task.spawn(function* () {
      let store = yield this._getStore();

      return store.crashes;
    }.bind(this));
  },

  getCrashCountsByDay: function () {
    return Task.spawn(function* () {
      let store = yield this._getStore();

      return store._countsByDay;
    }.bind(this));
  },
});

var gCrashManager;

/**
 * Interface to storage of crash data.
 *
 * This type handles storage of crash metadata. It exists as a separate type
 * from the crash manager for performance reasons: since all crash metadata
 * needs to be loaded into memory for access, we wish to easily dispose of all
 * associated memory when this data is no longer needed. Having an isolated
 * object whose references can easily be lost faciliates that simple disposal.
 *
 * When metadata is updated, the caller must explicitly persist the changes
 * to disk. This prevents excessive I/O during updates.
 *
 * The store has a mechanism for ensuring it doesn't grow too large. A ceiling
 * is placed on the number of daily events that can occur for events that can
 * occur with relatively high frequency, notably plugin crashes and hangs
 * (plugins can enter cycles where they repeatedly crash). If we've reached
 * the high water mark and new data arrives, it's silently dropped.
 * However, the count of actual events is always preserved. This allows
 * us to report on the severity of problems beyond the storage threshold.
 *
 * Main process crashes are excluded from limits because they are both
 * important and should be rare.
 *
 * @param storeDir (string)
 *        Directory the store should be located in.
 * @param telemetrySizeKey (string)
 *        The telemetry histogram that should be used to store the size
 *        of the data file.
 */
function CrashStore(storeDir, telemetrySizeKey) {
  this._storeDir = storeDir;
  this._telemetrySizeKey = telemetrySizeKey;

  this._storePath = OS.Path.join(storeDir, "store.json.mozlz4");

  // Holds the read data from disk.
  this._data = null;

  // Maps days since UNIX epoch to a Map of event types to counts.
  // This data structure is populated when the JSON file is loaded
  // and is also updated when new events are added.
  this._countsByDay = new Map();
}

CrashStore.prototype = Object.freeze({
  // Maximum number of events to store per day. This establishes a
  // ceiling on the per-type/per-day records that will be stored.
  HIGH_WATER_DAILY_THRESHOLD: 100,

  /**
   * Reset all data.
   */
  reset() {
    this._data = {
      v: 1,
      crashes: new Map(),
      corruptDate: null,
    };
    this._countsByDay = new Map();
  },

  /**
   * Load data from disk.
   *
   * @return Promise
   */
  load: function () {
    return Task.spawn(function* () {
      // Loading replaces data.
      this.reset();

      try {
        let decoder = new TextDecoder();
        let data = yield OS.File.read(this._storePath, {compression: "lz4"});
        data = JSON.parse(decoder.decode(data));

        if (data.corruptDate) {
          this._data.corruptDate = new Date(data.corruptDate);
        }

        // actualCounts is used to validate that the derived counts by
        // days stored in the payload matches up to actual data.
        let actualCounts = new Map();

        // In the past, submissions were stored as separate crash records
        // with an id of e.g. "someID-submission". If we find IDs ending
        // with "-submission", we will need to convert the data to be stored
        // as actual submissions.
        //
        // The old way of storing submissions was used from FF33 - FF34. We
        // drop this old data on the floor.
        for (let id in data.crashes) {
          if (id.endsWith("-submission")) {
            continue;
          }

          let crash = data.crashes[id];
          let denormalized = this._denormalize(crash);

          denormalized.submissions = new Map();
          if (crash.submissions) {
            for (let submissionID in crash.submissions) {
              let submission = crash.submissions[submissionID];
              denormalized.submissions.set(submissionID,
                                           this._denormalize(submission));
            }
          }

          this._data.crashes.set(id, denormalized);

          let key = dateToDays(denormalized.crashDate) + "-" + denormalized.type;
          actualCounts.set(key, (actualCounts.get(key) || 0) + 1);

          // If we have an OOM size, count the crash as an OOM in addition to
          // being a main process crash.
          if (denormalized.metadata &&
              denormalized.metadata.OOMAllocationSize) {
            let oomKey = key + "-oom";
            actualCounts.set(oomKey, (actualCounts.get(oomKey) || 0) + 1);
          }

        }

        // The validation in this loop is arguably not necessary. We perform
        // it as a defense against unknown bugs.
        for (let dayKey in data.countsByDay) {
          let day = parseInt(dayKey, 10);
          for (let type in data.countsByDay[day]) {
            this._ensureCountsForDay(day);

            let count = data.countsByDay[day][type];
            let key = day + "-" + type;

            // If the payload says we have data for a given day but we
            // don't, the payload is wrong. Ignore it.
            if (!actualCounts.has(key)) {
              continue;
            }

            // If we encountered more data in the payload than what the
            // data structure says, use the proper value.
            count = Math.max(count, actualCounts.get(key));

            this._countsByDay.get(day).set(type, count);
          }
        }
      } catch (ex) {
        // Missing files (first use) are allowed.
        if (!(ex instanceof OS.File.Error) || !ex.becauseNoSuchFile) {
          // If we can't load for any reason, mark a corrupt date in the instance
          // and swallow the error.
          //
          // The marking of a corrupted file is intentionally not persisted to
          // disk yet. Instead, we wait until the next save(). This is to give
          // non-permanent failures the opportunity to recover on their own.
          this._data.corruptDate = new Date();
        }
      }
    }.bind(this));
  },

  /**
   * Save data to disk.
   *
   * @return Promise<null>
   */
  save: function () {
    return Task.spawn(function* () {
      if (!this._data) {
        return;
      }

      let normalized = {
        // The version should be incremented whenever the format
        // changes.
        v: 1,
        // Maps crash IDs to objects defining the crash.
        crashes: {},
        // Maps days since UNIX epoch to objects mapping event types to
        // counts. This is a mirror of this._countsByDay. e.g.
        // {
        //    15000: {
        //        "main-crash": 2,
        //        "plugin-crash": 1
        //    }
        // }
        countsByDay: {},

        // When the store was last corrupted.
        corruptDate: null,
      };

      if (this._data.corruptDate) {
        normalized.corruptDate = this._data.corruptDate.getTime();
      }

      for (let [id, crash] of this._data.crashes) {
        let c = this._normalize(crash);

        c.submissions = {};
        for (let [submissionID, submission] of crash.submissions) {
          c.submissions[submissionID] = this._normalize(submission);
        }

        normalized.crashes[id] = c;
      }

      for (let [day, m] of this._countsByDay) {
        normalized.countsByDay[day] = {};
        for (let [type, count] of m) {
          normalized.countsByDay[day][type] = count;
        }
      }

      let encoder = new TextEncoder();
      let data = encoder.encode(JSON.stringify(normalized));
      let size = yield OS.File.writeAtomic(this._storePath, data, {
                                           tmpPath: this._storePath + ".tmp",
                                           compression: "lz4"});
      if (this._telemetrySizeKey) {
        Services.telemetry.getHistogramById(this._telemetrySizeKey).add(size);
      }
    }.bind(this));
  },

  /**
   * Normalize an object into one fit for serialization.
   *
   * This function along with _denormalize() serve to hack around the
   * default handling of Date JSON serialization because Date serialization
   * is undefined by JSON.
   *
   * Fields ending with "Date" are assumed to contain Date instances.
   * We convert these to milliseconds since epoch on output and back to
   * Date on input.
   */
  _normalize: function (o) {
    let normalized = {};

    for (let k in o) {
      let v = o[k];
      if (v && k.endsWith("Date")) {
        normalized[k] = v.getTime();
      } else {
        normalized[k] = v;
      }
    }

    return normalized;
  },

  /**
   * Convert a serialized object back to its native form.
   */
  _denormalize: function (o) {
    let n = {};

    for (let k in o) {
      let v = o[k];
      if (v && k.endsWith("Date")) {
        n[k] = new Date(parseInt(v, 10));
      } else {
        n[k] = v;
      }
    }

    return n;
  },

  /**
   * Prune old crash data.
   *
   * Crashes without recent activity are pruned from the store so the
   * size of the store is not unbounded. If there is activity on a crash,
   * that activity will keep the crash and all its data around for longer.
   *
   * @param date
   *        (Date) The cutoff at which data will be pruned. If an entry
   *        doesn't have data newer than this, it will be pruned.
   */
  pruneOldCrashes: function (date) {
    for (let crash of this.crashes) {
      let newest = crash.newestDate;
      if (!newest || newest.getTime() < date.getTime()) {
        this._data.crashes.delete(crash.id);
      }
    }
  },

  /**
   * Date the store was last corrupted and required a reset.
   *
   * May be null (no corruption has ever occurred) or a Date instance.
   */
  get corruptDate() {
    return this._data.corruptDate;
  },

  /**
   * The number of distinct crashes tracked.
   */
  get crashesCount() {
    return this._data.crashes.size;
  },

  /**
   * All crashes tracked.
   *
   * This is an array of CrashRecord.
   */
  get crashes() {
    let crashes = [];
    for (let [, crash] of this._data.crashes) {
      crashes.push(new CrashRecord(crash));
    }

    return crashes;
  },

  /**
   * Obtain a particular crash from its ID.
   *
   * A CrashRecord will be returned if the crash exists. null will be returned
   * if the crash is unknown.
   */
  getCrash: function (id) {
    for (let crash of this.crashes) {
      if (crash.id == id) {
        return crash;
      }
    }

    return null;
  },

  _ensureCountsForDay: function (day) {
    if (!this._countsByDay.has(day)) {
      this._countsByDay.set(day, new Map());
    }
  },

  /**
   * Ensure the crash record is present in storage.
   *
   * Returns the crash record if we're allowed to store it or null
   * if we've hit the high water mark.
   *
   * @param processType
   *        (string) One of the PROCESS_TYPE constants.
   * @param crashType
   *        (string) One of the CRASH_TYPE constants.
   * @param id
   *        (string) The crash ID.
   * @param date
   *        (Date) When this crash occurred.
   * @param metadata
   *        (dictionary) Crash metadata, may be empty.
   *
   * @return null | object crash record
   */
  _ensureCrashRecord: function (processType, crashType, id, date, metadata) {
    if (!id) {
      // Crashes are keyed on ID, so it's not really helpful to store crashes
      // without IDs.
      return null;
    }

    let type = processType + "-" + crashType;

    if (!this._data.crashes.has(id)) {
      let day = dateToDays(date);
      this._ensureCountsForDay(day);

      let count = (this._countsByDay.get(day).get(type) || 0) + 1;
      this._countsByDay.get(day).set(type, count);

      if (count > this.HIGH_WATER_DAILY_THRESHOLD &&
          processType != CrashManager.prototype.PROCESS_TYPE_MAIN) {
        return null;
      }

      // If we have an OOM size, count the crash as an OOM in addition to
      // being a main process crash.
      if (metadata && metadata.OOMAllocationSize) {
        let oomType = type + "-oom";
        let oomCount = (this._countsByDay.get(day).get(oomType) || 0) + 1;
        this._countsByDay.get(day).set(oomType, oomCount);
      }

      this._data.crashes.set(id, {
        id: id,
        remoteID: null,
        type: type,
        crashDate: date,
        submissions: new Map(),
        classifications: [],
        metadata: metadata,
      });
    }

    let crash = this._data.crashes.get(id);
    crash.type = type;
    crash.crashDate = date;

    return crash;
  },

  /**
   * Record the occurrence of a crash.
   *
   * @param processType (string) One of the PROCESS_TYPE constants.
   * @param crashType (string) One of the CRASH_TYPE constants.
   * @param id (string) Crash ID. Likely a UUID.
   * @param date (Date) When the crash occurred.
   * @param metadata (dictionary) Crash metadata, may be empty.
   *
   * @return boolean True if the crash was recorded and false if not.
   */
  addCrash: function (processType, crashType, id, date, metadata) {
    return !!this._ensureCrashRecord(processType, crashType, id, date, metadata);
  },

  /**
   * @return boolean True if the remote ID was recorded and false if not.
   */
  setRemoteCrashID: function (crashID, remoteID) {
    let crash = this._data.crashes.get(crashID);
    if (!crash || !remoteID) {
      return false;
    }

    crash.remoteID = remoteID;
    return true;
  },

  getCrashesOfType: function (processType, crashType) {
    let crashes = [];
    for (let crash of this.crashes) {
      if (crash.isOfType(processType, crashType)) {
        crashes.push(crash);
      }
    }

    return crashes;
  },

  /**
   * Ensure the submission record is present in storage.
   * @returns [submission, crash]
   */
  _ensureSubmissionRecord: function (crashID, submissionID) {
    let crash = this._data.crashes.get(crashID);
    if (!crash || !submissionID) {
      return null;
    }

    if (!crash.submissions.has(submissionID)) {
      crash.submissions.set(submissionID, {
        requestDate: null,
        responseDate: null,
        result: null,
      });
    }

    return [crash.submissions.get(submissionID), crash];
  },

  /**
   * @return boolean True if the attempt was recorded.
   */
  addSubmissionAttempt: function (crashID, submissionID, date) {
    let [submission, crash] =
      this._ensureSubmissionRecord(crashID, submissionID);
    if (!submission) {
      return false;
    }

    submission.requestDate = date;
    Services.telemetry.getKeyedHistogramById("PROCESS_CRASH_SUBMIT_ATTEMPT")
      .add(crash.type, 1);
    return true;
  },

  /**
   * @return boolean True if the response was recorded.
   */
  addSubmissionResult: function (crashID, submissionID, date, result) {
    let crash = this._data.crashes.get(crashID);
    if (!crash || !submissionID) {
      return false;
    }
    let submission = crash.submissions.get(submissionID);
    if (!submission) {
      return false;
    }

    submission.responseDate = date;
    submission.result = result;
    Services.telemetry.getKeyedHistogramById("PROCESS_CRASH_SUBMIT_SUCCESS")
      .add(crash.type, result == "ok");
    return true;
  },

  /**
   * @return boolean True if the classifications were set.
   */
  setCrashClassifications: function (crashID, classifications) {
    let crash = this._data.crashes.get(crashID);
    if (!crash) {
      return false;
    }

    crash.classifications = classifications;
    return true;
  },
});

/**
 * Represents an individual crash with metadata.
 *
 * This is a wrapper around the low-level anonymous JS objects that define
 * crashes. It exposes a consistent and helpful API.
 *
 * Instances of this type should only be constructured inside this module,
 * not externally. The constructor is not considered a public API.
 *
 * @param o (object)
 *        The crash's entry from the CrashStore.
 */
function CrashRecord(o) {
  this._o = o;
}

CrashRecord.prototype = Object.freeze({
  get id() {
    return this._o.id;
  },

  get remoteID() {
    return this._o.remoteID;
  },

  get crashDate() {
    return this._o.crashDate;
  },

  /**
   * Obtain the newest date in this record.
   *
   * This is a convenience getter. The returned value is used to determine when
   * to expire a record.
   */
  get newestDate() {
    // We currently only have 1 date, so this is easy.
    return this._o.crashDate;
  },

  get oldestDate() {
    return this._o.crashDate;
  },

  get type() {
    return this._o.type;
  },

  isOfType: function (processType, crashType) {
    return processType + "-" + crashType == this.type;
  },

  get submissions() {
    return this._o.submissions;
  },

  get classifications() {
    return this._o.classifications;
  },

  get metadata() {
    return this._o.metadata;
  },
});

/**
 * Obtain the global CrashManager instance used by the running application.
 *
 * CrashManager is likely only ever instantiated once per application lifetime.
 * The main reason it's implemented as a reusable type is to facilitate testing.
 */
XPCOMUtils.defineLazyGetter(this.CrashManager, "Singleton", function () {
  if (gCrashManager) {
    return gCrashManager;
  }

  let crPath = OS.Path.join(OS.Constants.Path.userApplicationDataDir,
                            "Crash Reports");
  let storePath = OS.Path.join(OS.Constants.Path.profileDir, "crashes");

  gCrashManager = new CrashManager({
    pendingDumpsDir: OS.Path.join(crPath, "pending"),
    submittedDumpsDir: OS.Path.join(crPath, "submitted"),
    eventsDirs: [OS.Path.join(crPath, "events"), OS.Path.join(storePath, "events")],
    storeDir: storePath,
    telemetryStoreSizeKey: "CRASH_STORE_COMPRESSED_BYTES",
  });

  // Automatically aggregate event files shortly after startup. This
  // ensures it happens with some frequency.
  //
  // There are performance considerations here. While this is doing
  // work and could negatively impact performance, the amount of work
  // is kept small per run by periodically aggregating event files.
  // Furthermore, well-behaving installs should not have much work
  // here to do. If there is a lot of work, that install has bigger
  // issues beyond reduced performance near startup.
  gCrashManager.scheduleMaintenance(AGGREGATE_STARTUP_DELAY_MS);

  return gCrashManager;
});