Bug 1139460 - Part 5 - Fix race leading to TelemetryPing shutdown timeout by aborting all pending ping requests on shutdown. r=vladan,f=yoric,a=lmandel
authorGeorg Fritzsche <georg.fritzsche@googlemail.com>
Thu, 02 Apr 2015 21:33:46 +0200
changeset 267211 aabff522c149885e5ad4b96c0364bc6727f62a6e
parent 267210 4ae61b09eaefc93c30a0cf1cee60ba533a3bdcf3
child 267212 79582fe6d87e5202c533c97aeed9fbf297415a10
push id830
push userraliiev@mozilla.com
push dateFri, 19 Jun 2015 19:24:37 +0000
treeherdermozilla-release@932614382a68 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersvladan, lmandel
bugs1139460
milestone39.0a2
Bug 1139460 - Part 5 - Fix race leading to TelemetryPing shutdown timeout by aborting all pending ping requests on shutdown. r=vladan,f=yoric,a=lmandel
toolkit/components/telemetry/TelemetryPing.jsm
toolkit/components/telemetry/tests/unit/head.js
toolkit/components/telemetry/tests/unit/test_TelemetryPing.js
toolkit/components/telemetry/tests/unit/test_TelemetryPingShutdown.js
toolkit/components/telemetry/tests/unit/test_TelemetrySession.js
toolkit/components/telemetry/tests/unit/xpcshell.ini
--- a/toolkit/components/telemetry/TelemetryPing.jsm
+++ b/toolkit/components/telemetry/TelemetryPing.jsm
@@ -273,16 +273,19 @@ let Impl = {
 
   // This is a public barrier Telemetry clients can use to add blockers to the shutdown
   // of TelemetryPing.
   // After this barrier, clients can not submit Telemetry pings anymore.
   _shutdownBarrier: new AsyncShutdown.Barrier("TelemetryPing: Waiting for clients."),
   // This is a private barrier blocked by pending async ping activity (sending & saving).
   _connectionsBarrier: new AsyncShutdown.Barrier("TelemetryPing: Waiting for pending ping activity"),
 
+  // This tracks all pending ping requests to the server.
+  _pendingPingRequests: new Map(),
+
   /**
    * Get the data for the "application" section of the ping.
    */
   _getApplicationSection: function() {
     // Querying architecture and update channel can throw. Make sure to recover and null
     // those fields.
     let arch = null;
     try {
@@ -578,28 +581,31 @@ let Impl = {
                   .createInstance(Ci.nsIXMLHttpRequest);
     request.mozBackgroundRequest = true;
     request.timeout = PING_SUBMIT_TIMEOUT_MS;
 
     request.open("POST", url, true);
     request.overrideMimeType("text/plain");
     request.setRequestHeader("Content-Type", "application/json; charset=UTF-8");
 
+    this._pendingPingRequests.set(url, request);
+
     let startTime = new Date();
     let deferred = PromiseUtils.defer();
 
     let onRequestFinished = (success, event) => {
       let onCompletion = () => {
         if (success) {
           deferred.resolve();
         } else {
           deferred.reject(event);
         }
       };
 
+      this._pendingPingRequests.delete(url);
       this.onPingRequestFinished(success, startTime, ping, isPersisted)
         .then(() => onCompletion(),
               (error) => {
                 this._log.error("doPing - request success: " + success + ", error" + error);
                 onCompletion();
               });
     };
 
@@ -814,27 +820,39 @@ let Impl = {
   },
 
   // Do proper shutdown waiting and cleanup.
   _cleanupOnShutdown: Task.async(function*() {
     if (!this._initialized) {
       return;
     }
 
+    // Abort any pending ping XHRs.
+    for (let [url, request] of this._pendingPingRequests) {
+      this._log.trace("_cleanupOnShutdown - aborting ping request for " + url);
+      try {
+        request.abort();
+      } catch (e) {
+        this._log.error("_cleanupOnShutdown - failed to abort request to " + url, e);
+      }
+    }
+    this._pendingPingRequests.clear();
+
+    // Now do an orderly shutdown.
     try {
       // First wait for clients processing shutdown.
       yield this._shutdownBarrier.wait();
       // Then wait for any outstanding async ping activity.
       yield this._connectionsBarrier.wait();
 
       // Should down dependent components.
       try {
         yield TelemetryEnvironment.shutdown();
       } catch (e) {
-        this._log.error("shutdown - environment shutdown failure", e);
+        this._log.error("_cleanupOnShutdown - environment shutdown failure", e);
       }
     } finally {
       // Reset state.
       this._initialized = false;
       this._initStarted = false;
     }
   }),
 
--- a/toolkit/components/telemetry/tests/unit/head.js
+++ b/toolkit/components/telemetry/tests/unit/head.js
@@ -4,16 +4,18 @@
 Components.utils.import("resource://gre/modules/TelemetryPing.jsm", this);
 Components.utils.import("resource://gre/modules/Services.jsm", this);
 
 const gIsWindows = ("@mozilla.org/windows-registry-key;1" in Components.classes);
 const gIsMac = ("@mozilla.org/xpcom/mac-utils;1" in Components.classes);
 const gIsAndroid =  ("@mozilla.org/android/bridge;1" in Components.classes);
 const gIsGonk = ("@mozilla.org/cellbroadcast/gonkservice;1" in Components.classes);
 
+const HAS_DATAREPORTINGSERVICE = "@mozilla.org/datareporting/service;1" in Components.classes;
+
 let gOldAppInfo = null;
 let gGlobalScope = this;
 
 function loadAddonManager(id, name, version, platformVersion) {
   let ns = {};
   Cu.import("resource://gre/modules/Services.jsm", ns);
   let head = "../../../../mozapps/extensions/test/xpcshell/head_addons.js";
   let file = do_get_file(head);
--- a/toolkit/components/telemetry/tests/unit/test_TelemetryPing.js
+++ b/toolkit/components/telemetry/tests/unit/test_TelemetryPing.js
@@ -30,18 +30,16 @@ const PLATFORM_VERSION = "1.9.2";
 const APP_VERSION = "1";
 const APP_NAME = "XPCShell";
 
 const PREF_BRANCH = "toolkit.telemetry.";
 const PREF_ENABLED = PREF_BRANCH + "enabled";
 const PREF_FHR_UPLOAD_ENABLED = "datareporting.healthreport.uploadEnabled";
 const PREF_FHR_SERVICE_ENABLED = "datareporting.healthreport.service.enabled";
 
-const HAS_DATAREPORTINGSERVICE = "@mozilla.org/datareporting/service;1" in Cc;
-
 const Telemetry = Cc["@mozilla.org/base/telemetry;1"].getService(Ci.nsITelemetry);
 
 let gHttpServer = new HttpServer();
 let gServerStarted = false;
 let gRequestIterator = null;
 let gDataReportingClientID = null;
 
 XPCOMUtils.defineLazyGetter(this, "gDatareportingService",
new file mode 100644
--- /dev/null
+++ b/toolkit/components/telemetry/tests/unit/test_TelemetryPingShutdown.js
@@ -0,0 +1,73 @@
+/* Any copyright is dedicated to the Public Domain.
+ * http://creativecommons.org/publicdomain/zero/1.0/ */
+
+// Test that TelemetryPing sends close to shutdown don't lead
+// to AsyncShutdown timeouts.
+
+"use strict";
+
+const { utils: Cu, interfaces: Ci, classes: Cc } = Components;
+
+Cu.import("resource://gre/modules/Services.jsm", this);
+Cu.import("resource://gre/modules/TelemetryPing.jsm", this);
+Cu.import("resource://gre/modules/Timer.jsm", this);
+Cu.import("resource://gre/modules/XPCOMUtils.jsm", this);
+Cu.import("resource://gre/modules/AsyncShutdown.jsm", this);
+Cu.import("resource://testing-common/httpd.js", this);
+
+const PREF_BRANCH = "toolkit.telemetry.";
+const PREF_ENABLED = PREF_BRANCH + "enabled";
+const PREF_FHR_UPLOAD_ENABLED = "datareporting.healthreport.uploadEnabled";
+
+function contentHandler(metadata, response)
+{
+  dump("contentHandler called for path: " + metadata._path + "\n");
+  // We intentionally don't finish writing the response here to let the
+  // client time out.
+  response.processAsync();
+  response.setHeader("Content-Type", "text/plain");
+}
+
+function run_test() {
+  // Addon manager needs a profile directory
+  do_get_profile();
+  loadAddonManager("xpcshell@tests.mozilla.org", "XPCShell", "1", "1.9.2");
+
+  Services.prefs.setBoolPref(PREF_ENABLED, true);
+  Services.prefs.setBoolPref(PREF_FHR_UPLOAD_ENABLED, true);
+
+  // Send the needed startup notifications to the datareporting service
+  // to ensure that it has been initialized.
+  if (HAS_DATAREPORTINGSERVICE) {
+    let drs = Cc["@mozilla.org/datareporting/service;1"]
+                .getService(Ci.nsISupports)
+                .wrappedJSObject;
+    drs.observe(null, "app-startup", null);
+    drs.observe(null, "profile-after-change", null);
+  }
+
+  run_next_test();
+}
+
+add_task(function* test_sendTimeout() {
+  const TIMEOUT = 100;
+  // Enable testing mode for AsyncShutdown, otherwise some testing-only functionality
+  // is not available.
+  Services.prefs.setBoolPref("toolkit.asyncshutdown.testing", true);
+  Services.prefs.setIntPref("toolkit.asyncshutdown.crash_timeout", TIMEOUT);
+
+  let httpServer = new HttpServer();
+  httpServer.registerPrefixHandler("/", contentHandler);
+  httpServer.start(-1);
+
+  yield TelemetryPing.setup();
+  TelemetryPing.setServer("http://localhost:" + httpServer.identity.primaryPort);
+  TelemetryPing.send("test-ping-type", {});
+
+  // Trigger the AsyncShutdown phase TelemetryPing hangs off.
+  AsyncShutdown.profileBeforeChange._trigger();
+  AsyncShutdown.sendTelemetry._trigger();
+
+  // If we get here, we didn't time out in the shutdown routines.
+  Assert.ok(true, "Didn't time out on shutdown.");
+});
--- a/toolkit/components/telemetry/tests/unit/test_TelemetrySession.js
+++ b/toolkit/components/telemetry/tests/unit/test_TelemetrySession.js
@@ -65,17 +65,16 @@ const SEC_IN_ONE_DAY  = 24 * 60 * 60;
 const MS_IN_ONE_DAY   = SEC_IN_ONE_DAY * 1000;
 
 const PREF_BRANCH = "toolkit.telemetry.";
 const PREF_ENABLED = PREF_BRANCH + "enabled";
 const PREF_SERVER = PREF_BRANCH + "server";
 const PREF_FHR_UPLOAD_ENABLED = "datareporting.healthreport.uploadEnabled";
 const PREF_FHR_SERVICE_ENABLED = "datareporting.healthreport.service.enabled";
 
-const HAS_DATAREPORTINGSERVICE = "@mozilla.org/datareporting/service;1" in Cc;
 const SESSION_RECORDER_EXPECTED = HAS_DATAREPORTINGSERVICE &&
                                   Preferences.get(PREF_FHR_SERVICE_ENABLED, true);
 
 const DATAREPORTING_DIR = "datareporting";
 const ABORTED_PING_FILE_NAME = "aborted-session-ping";
 const ABORTED_SESSION_UPDATE_INTERVAL_MS = 5 * 60 * 1000;
 
 const Telemetry = Cc["@mozilla.org/base/telemetry;1"].getService(Ci.nsITelemetry);
--- a/toolkit/components/telemetry/tests/unit/xpcshell.ini
+++ b/toolkit/components/telemetry/tests/unit/xpcshell.ini
@@ -28,16 +28,17 @@ skip-if = android_version == "18"
 [test_TelemetryLockCount.js]
 [test_TelemetryLog.js]
 [test_TelemetryPing.js]
 # Bug 676989: test fails consistently on Android
 # fail-if = os == "android"
 # Bug 1144395: crash on Android 4.3
 skip-if = android_version == "18"
 [test_TelemetryPing_idle.js]
+[test_TelemetryPingShutdown.js]
 [test_TelemetryStopwatch.js]
 [test_TelemetryPingBuildID.js]
 # Bug 1144395: crash on Android 4.3
 skip-if = android_version == "18"
 [test_ThirdPartyCookieProbe.js]
 [test_TelemetrySendOldPings.js]
 skip-if = debug == true || os == "android" # Disabled due to intermittent orange on Android
 [test_TelemetrySession.js]