Bug 965587 - Add lock to Firefox Health Report uploading. r=rnewman, a=lsblakk
authorGregory Szorc <gps@mozilla.com>
Wed, 29 Jan 2014 15:28:57 -0800
changeset 176241 e78826467135604ccc5bf5a218a664b96eed65a4
parent 176240 f98919f562c903d9e1878ef0313dea58a28db47a
child 176242 6099c057f5313517d6dbb371f1b910970715633e
push id445
push userffxbld
push dateMon, 10 Mar 2014 22:05:19 +0000
treeherdermozilla-release@dc38b741b04e [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersrnewman, lsblakk
bugs965587
milestone28.0
Bug 965587 - Add lock to Firefox Health Report uploading. r=rnewman, a=lsblakk Previously, it was technically possible for the FHR client to have multiple simultaneous uploads. While this should never occur in well-behaving systems, server logs have indicated that this behavior might be occurring. This patch adds a lock around uploading to ensure only 1 upload operation may be active at a given time. To measure the impact of this change in the wild, we added a counter that increments whenever a held lock is encountered.
services/datareporting/policy.jsm
services/healthreport/healthreporter.jsm
services/healthreport/providers.jsm
services/healthreport/tests/xpcshell/test_healthreporter.js
--- a/services/datareporting/policy.jsm
+++ b/services/datareporting/policy.jsm
@@ -143,16 +143,17 @@ this.DataSubmissionRequest = function (p
   this.reason = null;
 }
 
 this.DataSubmissionRequest.prototype = Object.freeze({
   NO_DATA_AVAILABLE: "no-data-available",
   SUBMISSION_SUCCESS: "success",
   SUBMISSION_FAILURE_SOFT: "failure-soft",
   SUBMISSION_FAILURE_HARD: "failure-hard",
+  UPLOAD_IN_PROGRESS: "upload-in-progress",
 
   /**
    * No submission was attempted because no data was available.
    *
    * In the case of upload, this means there is no data to upload (perhaps
    * it isn't available yet). In case of remote deletion, it means that there
    * is no remote data to delete.
    */
@@ -205,16 +206,26 @@ this.DataSubmissionRequest.prototype = O
    *        (string) Why the failure occurred. For logging purposes only.
    */
   onSubmissionFailureHard: function onSubmissionFailureHard(reason=null) {
     this.state = this.SUBMISSION_FAILURE_HARD;
     this.reason = reason;
     this.promise.resolve(this);
     return this.promise.promise;
   },
+
+  /**
+   * The request was aborted because an upload was already in progress.
+   */
+  onUploadInProgress: function (reason=null) {
+    this.state = this.UPLOAD_IN_PROGRESS;
+    this.reason = reason;
+    this.promise.resolve(this);
+    return this.promise.promise;
+  },
 });
 
 /**
  * Manages scheduling of Firefox Health Report data submission.
  *
  * The rules of data submission are as follows:
  *
  *  1. Do not submit data more than once every 24 hours.
--- a/services/healthreport/healthreporter.jsm
+++ b/services/healthreport/healthreporter.jsm
@@ -1139,16 +1139,17 @@ AbstractHealthReporter.prototype = Objec
  *        (string) The preferences branch to use for state storage. The value
  *        must end with a period (.).
  *
  * @param policy
  *        (HealthReportPolicy) Policy driving execution of HealthReporter.
  */
 this.HealthReporter = function (branch, policy, sessionRecorder, stateLeaf=null) {
   this._stateLeaf = stateLeaf;
+  this._uploadInProgress = false;
 
   AbstractHealthReporter.call(this, branch, policy, sessionRecorder);
 
   if (!this.serverURI) {
     throw new Error("No server URI defined. Did you forget to define the pref?");
   }
 
   if (!this.serverNamespace) {
@@ -1306,16 +1307,17 @@ this.HealthReporter.prototype = Object.f
         this._policy.healthReportUploadEnabled) {
       // We don't care about what happens to this request. It's best
       // effort.
       let request = {
         onNoDataAvailable: function () {},
         onSubmissionSuccess: function () {},
         onSubmissionFailureSoft: function () {},
         onSubmissionFailureHard: function () {},
+        onUploadInProgress: function () {},
       };
 
       this._uploadData(request);
     }
 
     return result;
   },
 
@@ -1369,58 +1371,78 @@ this.HealthReporter.prototype = Object.f
   },
 
   _formatDate: function (date) {
     // Why, oh, why doesn't JS have a strftime() equivalent?
     return date.toISOString().substr(0, 10);
   },
 
   _uploadData: function (request) {
+    // Under ideal circumstances, clients should never race to this
+    // function. However, server logs have observed behavior where
+    // racing to this function could be a cause. So, this lock was
+    // instituted.
+    if (this._uploadInProgress) {
+      this._log.warn("Upload requested but upload already in progress.");
+      let provider = this.getProvider("org.mozilla.healthreport");
+      let promise = provider.recordEvent("uploadAlreadyInProgress");
+      request.onUploadInProgress("Upload already in progress.");
+      return promise;
+    }
+
     let id = CommonUtils.generateUUID();
 
     this._log.info("Uploading data to server: " + this.serverURI + " " +
                    this.serverNamespace + ":" + id);
     let client = new BagheeraClient(this.serverURI);
     let now = this._now();
 
     return Task.spawn(function doUpload() {
-      let payload = yield this.getJSONPayload();
+      try {
+        // The test for upload locking monkeypatches getJSONPayload.
+        // If the next two lines change, be sure to verify the test is
+        // accurate!
+        this._uploadInProgress = true;
+        let payload = yield this.getJSONPayload();
 
-      let histogram = Services.telemetry.getHistogramById(TELEMETRY_PAYLOAD_SIZE_UNCOMPRESSED);
-      histogram.add(payload.length);
+        let histogram = Services.telemetry.getHistogramById(TELEMETRY_PAYLOAD_SIZE_UNCOMPRESSED);
+        histogram.add(payload.length);
 
-      let lastID = this.lastSubmitID;
-      yield this._state.addRemoteID(id);
+        let lastID = this.lastSubmitID;
+        yield this._state.addRemoteID(id);
 
-      let hrProvider = this.getProvider("org.mozilla.healthreport");
-      if (hrProvider) {
-        let event = lastID ? "continuationUploadAttempt"
-                           : "firstDocumentUploadAttempt";
-        hrProvider.recordEvent(event, now);
-      }
+        let hrProvider = this.getProvider("org.mozilla.healthreport");
+        if (hrProvider) {
+          let event = lastID ? "continuationUploadAttempt"
+                             : "firstDocumentUploadAttempt";
+          hrProvider.recordEvent(event, now);
+        }
 
-      TelemetryStopwatch.start(TELEMETRY_UPLOAD, this);
-      let result;
-      try {
-        let options = {
-          deleteIDs: this._state.remoteIDs.filter((x) => { return x != id; }),
-          telemetryCompressed: TELEMETRY_PAYLOAD_SIZE_COMPRESSED,
-        };
-        result = yield client.uploadJSON(this.serverNamespace, id, payload,
-                                         options);
-        TelemetryStopwatch.finish(TELEMETRY_UPLOAD, this);
-      } catch (ex) {
-        TelemetryStopwatch.cancel(TELEMETRY_UPLOAD, this);
-        if (hrProvider) {
-          hrProvider.recordEvent("uploadClientFailure", now);
+        TelemetryStopwatch.start(TELEMETRY_UPLOAD, this);
+        let result;
+        try {
+          let options = {
+            deleteIDs: this._state.remoteIDs.filter((x) => { return x != id; }),
+            telemetryCompressed: TELEMETRY_PAYLOAD_SIZE_COMPRESSED,
+          };
+          result = yield client.uploadJSON(this.serverNamespace, id, payload,
+                                           options);
+          TelemetryStopwatch.finish(TELEMETRY_UPLOAD, this);
+        } catch (ex) {
+          TelemetryStopwatch.cancel(TELEMETRY_UPLOAD, this);
+          if (hrProvider) {
+            hrProvider.recordEvent("uploadClientFailure", now);
+          }
+          throw ex;
         }
-        throw ex;
+
+        yield this._onBagheeraResult(request, false, now, result);
+      } finally {
+        this._uploadInProgress = false;
       }
-
-      yield this._onBagheeraResult(request, false, now, result);
     }.bind(this));
   },
 
   /**
    * Request deletion of remote data.
    *
    * @param request
    *        (DataSubmissionRequest) Tracks progress of this request.
--- a/services/healthreport/providers.jsm
+++ b/services/healthreport/providers.jsm
@@ -1352,26 +1352,50 @@ HealthReportSubmissionMeasurement1.proto
     continuationUploadAttempt: DAILY_COUNTER_FIELD,
     uploadSuccess: DAILY_COUNTER_FIELD,
     uploadTransportFailure: DAILY_COUNTER_FIELD,
     uploadServerFailure: DAILY_COUNTER_FIELD,
     uploadClientFailure: DAILY_COUNTER_FIELD,
   },
 });
 
+function HealthReportSubmissionMeasurement2() {
+  Metrics.Measurement.call(this);
+}
+
+HealthReportSubmissionMeasurement2.prototype = Object.freeze({
+  __proto__: Metrics.Measurement.prototype,
+
+  name: "submissions",
+  version: 2,
+
+  fields: {
+    firstDocumentUploadAttempt: DAILY_COUNTER_FIELD,
+    continuationUploadAttempt: DAILY_COUNTER_FIELD,
+    uploadSuccess: DAILY_COUNTER_FIELD,
+    uploadTransportFailure: DAILY_COUNTER_FIELD,
+    uploadServerFailure: DAILY_COUNTER_FIELD,
+    uploadClientFailure: DAILY_COUNTER_FIELD,
+    uploadAlreadyInProgress: DAILY_COUNTER_FIELD,
+  },
+});
+
 this.HealthReportProvider = function () {
   Metrics.Provider.call(this);
 }
 
 HealthReportProvider.prototype = Object.freeze({
   __proto__: Metrics.Provider.prototype,
 
   name: "org.mozilla.healthreport",
 
-  measurementTypes: [HealthReportSubmissionMeasurement1],
+  measurementTypes: [
+    HealthReportSubmissionMeasurement1,
+    HealthReportSubmissionMeasurement2,
+  ],
 
   recordEvent: function (event, date=new Date()) {
-    let m = this.getMeasurement("submissions", 1);
+    let m = this.getMeasurement("submissions", 2);
     return this.enqueueStorageOperation(function recordCounter() {
       return m.incrementDailyCounter(event, date);
     });
   },
 });
--- a/services/healthreport/tests/xpcshell/test_healthreporter.js
+++ b/services/healthreport/tests/xpcshell/test_healthreporter.js
@@ -70,27 +70,28 @@ function shutdownServer(server) {
 
   return deferred.promise;
 }
 
 function getHealthReportProviderValues(reporter, day=null) {
   return Task.spawn(function getValues() {
     let p = reporter.getProvider("org.mozilla.healthreport");
     do_check_neq(p, null);
-    let m = p.getMeasurement("submissions", 1);
+    let m = p.getMeasurement("submissions", 2);
     do_check_neq(m, null);
 
     let data = yield reporter._storage.getMeasurementValues(m.id);
     if (!day) {
       throw new Task.Result(data);
     }
 
     do_check_true(data.days.hasDay(day));
     let serializer = m.serializer(m.SERIALIZE_JSON)
     let json = serializer.daily(data.days.getDay(day));
+    do_check_eq(json._v, 2);
 
     throw new Task.Result(json);
   });
 }
 
 function run_test() {
   run_next_test();
 }
@@ -560,17 +561,16 @@ add_task(function test_data_submission_t
     let deferred = Promise.defer();
     let request = new DataSubmissionRequest(deferred, new Date(Date.now + 30000));
     reporter.requestDataUpload(request);
 
     yield deferred.promise;
     do_check_eq(request.state, request.SUBMISSION_FAILURE_SOFT);
 
     let data = yield getHealthReportProviderValues(reporter, new Date());
-    do_check_eq(data._v, 1);
     do_check_eq(data.firstDocumentUploadAttempt, 1);
     do_check_eq(data.uploadTransportFailure, 1);
     do_check_eq(Object.keys(data).length, 3);
   } finally {
     reporter._shutdown();
   }
 });
 
@@ -587,17 +587,16 @@ add_task(function test_data_submission_s
     let deferred = Promise.defer();
     let now = new Date();
     let request = new DataSubmissionRequest(deferred, now);
     reporter.requestDataUpload(request);
     yield deferred.promise;
     do_check_eq(request.state, request.SUBMISSION_FAILURE_HARD);
 
     let data = yield getHealthReportProviderValues(reporter, now);
-    do_check_eq(data._v, 1);
     do_check_eq(data.firstDocumentUploadAttempt, 1);
     do_check_eq(data.uploadServerFailure, 1);
     do_check_eq(Object.keys(data).length, 3);
   } finally {
     yield shutdownServer(server);
     reporter._shutdown();
   }
 });
@@ -626,17 +625,16 @@ add_task(function test_data_submission_s
     }
 
     // Ensure data from providers made it to payload.
     let o = yield reporter.getJSONPayload(true);
     do_check_true("DummyProvider.DummyMeasurement" in o.data.last);
     do_check_true("DummyConstantProvider.DummyMeasurement" in o.data.last);
 
     let data = yield getHealthReportProviderValues(reporter, now);
-    do_check_eq(data._v, 1);
     do_check_eq(data.continuationUploadAttempt, 1);
     do_check_eq(data.uploadSuccess, 1);
     do_check_eq(Object.keys(data).length, 3);
 
     let d = reporter.lastPingDate;
     let id = reporter.lastSubmitID;
 
     reporter._shutdown();
@@ -677,17 +675,16 @@ add_task(function test_recurring_daily_p
     yield promise;
     do_check_neq(reporter.lastSubmitID, lastID);
     do_check_true(server.hasDocument(reporter.serverNamespace, reporter.lastSubmitID));
     do_check_false(server.hasDocument(reporter.serverNamespace, lastID));
 
     // now() on the health reporter instance wasn't munged. So, we should see
     // both requests attributed to the same day.
     let data = yield getHealthReportProviderValues(reporter, new Date());
-    do_check_eq(data._v, 1);
     do_check_eq(data.firstDocumentUploadAttempt, 1);
     do_check_eq(data.continuationUploadAttempt, 1);
     do_check_eq(data.uploadSuccess, 2);
     do_check_eq(Object.keys(data).length, 4);
   } finally {
     reporter._shutdown();
     yield shutdownServer(server);
   }
@@ -715,16 +712,64 @@ add_task(function test_request_remote_da
     do_check_false(reporter.haveRemoteData());
     do_check_false(server.hasDocument(reporter.serverNamespace, id));
   } finally {
     reporter._shutdown();
     yield shutdownServer(server);
   }
 });
 
+add_task(function test_multiple_simultaneous_uploads() {
+  let [reporter, server] = yield getReporterAndServer("multiple_simultaneous_uploads");
+
+  try {
+    let d1 = Promise.defer();
+    let d2 = Promise.defer();
+    let t1 = new Date(Date.now() - 1000);
+    let t2 = new Date(t1.getTime() + 500);
+    let r1 = new DataSubmissionRequest(d1, t1);
+    let r2 = new DataSubmissionRequest(d2, t2);
+
+    let getPayloadDeferred = Promise.defer();
+
+    Object.defineProperty(reporter, "getJSONPayload", {
+      configurable: true,
+      value: () => {
+        getPayloadDeferred.resolve();
+        delete reporter["getJSONPayload"];
+        return reporter.getJSONPayload();
+      },
+    });
+
+    let p1 = reporter.requestDataUpload(r1);
+    yield getPayloadDeferred.promise;
+    do_check_true(reporter._uploadInProgress);
+    let p2 = reporter.requestDataUpload(r2);
+
+    yield p1;
+    yield p2;
+
+    do_check_eq(r1.state, r1.SUBMISSION_SUCCESS);
+    do_check_eq(r2.state, r2.UPLOAD_IN_PROGRESS);
+
+    // They should both be resolved already.
+    yield d1;
+    yield d2;
+
+    let data = yield getHealthReportProviderValues(reporter, t1);
+    do_check_eq(data.firstDocumentUploadAttempt, 1);
+    do_check_false("continuationUploadAttempt" in data);
+    do_check_eq(data.uploadSuccess, 1);
+    do_check_eq(data.uploadAlreadyInProgress, 1);
+  } finally {
+    reporter._shutdown();
+    yield shutdownServer(server);
+  }
+});
+
 add_task(function test_policy_accept_reject() {
   let [reporter, server] = yield getReporterAndServer("policy_accept_reject");
 
   try {
     let policy = reporter._policy;
 
     do_check_false(policy.dataSubmissionPolicyAccepted);
     do_check_false(reporter.willUploadData);