Bug 917764 - If clients fail to resolve AsyncShutdown blockers, crash the process. r=froydnj, r=ted
authorDavid Rajchenbach-Teller <dteller@mozilla.com>
Mon, 30 Sep 2013 10:59:40 -0400
changeset 157931 de4c92324898e9a0598066f7fbbe7d8e8579ae6d
parent 157930 44f96a5d7e180ec8d5108200d1418948249cb04f
child 157932 96084f7e34b3bf14115f359e6c287c2dc25a4c0e
push id4537
push userlsblakk@mozilla.com
push dateMon, 28 Oct 2013 22:18:47 +0000
treeherdermozilla-aurora@60c6fd67470e [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersfroydnj, ted
bugs917764
milestone27.0a1
Bug 917764 - If clients fail to resolve AsyncShutdown blockers, crash the process. r=froydnj, r=ted
modules/libpref/src/init/all.js
toolkit/crashreporter/test/unit/test_crash_AsyncShutdown.js
toolkit/crashreporter/test/unit/xpcshell.ini
toolkit/modules/AsyncShutdown.jsm
toolkit/modules/tests/xpcshell/test_AsyncShutdown.js
toolkit/modules/tests/xpcshell/test_phase.js
toolkit/modules/tests/xpcshell/xpcshell.ini
--- a/modules/libpref/src/init/all.js
+++ b/modules/libpref/src/init/all.js
@@ -478,16 +478,19 @@ pref("toolkit.telemetry.infoURL", "https
 // Determines whether full SQL strings are returned when they might contain sensitive info
 // i.e. dynamically constructed SQL strings or SQL executed by addons against addon DBs
 pref("toolkit.telemetry.debugSlowSql", false);
 
 // Identity module
 pref("toolkit.identity.enabled", false);
 pref("toolkit.identity.debug", false);
 
+// AsyncShutdown delay before crashing in case of shutdown freeze
+pref("toolkit.asyncshutdown.timeout.crash", 60000);
+
 // Enable deprecation warnings.
 pref("devtools.errorconsole.deprecation_warnings", true);
 
 // Disable remote debugging protocol logging
 pref("devtools.debugger.log", false);
 // Disable remote debugging connections
 pref("devtools.debugger.remote-enabled", false);
 pref("devtools.debugger.remote-port", 6000);
new file mode 100644
--- /dev/null
+++ b/toolkit/crashreporter/test/unit/test_crash_AsyncShutdown.js
@@ -0,0 +1,32 @@
+/* Any copyright is dedicated to the Public Domain.
+ * http://creativecommons.org/publicdomain/zero/1.0/ */
+
+function setup_crash() {
+  Components.utils.import("resource://gre/modules/AsyncShutdown.jsm", this);
+  Components.utils.import("resource://gre/modules/Services.jsm", this);
+  Components.utils.import("resource://gre/modules/Promise.jsm", this);
+
+  Services.prefs.setBoolPref("toolkit.asyncshutdown.testing", true);
+  Services.prefs.setIntPref("toolkit.asyncshutdown.crash_timeout", 10);
+
+  let TOPIC = "testing-async-shutdown-crash";
+  let phase = AsyncShutdown._getPhase(TOPIC);
+  phase.addBlocker("A blocker that is never satisfied", function() {
+    dump("Installing blocker\n");
+    let deferred = Promise.defer();
+    return deferred.promise;
+  });
+
+  Services.obs.notifyObservers(null, TOPIC, null);
+  dump("Waiting for crash\n");
+}
+
+function after_crash(mdump, extra) {
+  let info = JSON.parse(extra.AsyncShutdownTimeout);
+  do_check_true(info.phase == "testing-async-shutdown-crash");
+  do_check_true(info.conditions.indexOf("A blocker that is never satisfied") != -1);
+}
+
+function run_test() {
+  do_crash(setup_crash, after_crash);
+}
--- a/toolkit/crashreporter/test/unit/xpcshell.ini
+++ b/toolkit/crashreporter/test/unit/xpcshell.ini
@@ -19,8 +19,10 @@ skip-if = os == 'win'
 [test_crashreporter_crash_profile_lock.js]
 [test_override_exception_handler.js]
 run-if = os == 'win'
 
 [test_crashreporter_appmem.js]
 run-if = os == 'win' || os == 'linux'
 # we need to skip this due to bug 838613
 skip-if = os=='linux' && bits==32
+
+[test_crash_AsyncShutdown.js]
--- a/toolkit/modules/AsyncShutdown.jsm
+++ b/toolkit/modules/AsyncShutdown.jsm
@@ -41,38 +41,103 @@
  * resolved or rejected.
  */
 
 "use strict";
 
 const Cu = Components.utils;
 const Cc = Components.classes;
 const Ci = Components.interfaces;
+Cu.import("resource://gre/modules/XPCOMUtils.jsm", this);
 Cu.import("resource://gre/modules/Services.jsm", this);
-Cu.import("resource://gre/modules/Promise.jsm", this);
+
+XPCOMUtils.defineLazyModuleGetter(this, "Promise",
+  "resource://gre/modules/Promise.jsm");
+XPCOMUtils.defineLazyServiceGetter(this, "gDebug",
+  "@mozilla.org/xpcom/debug;1", "nsIDebug");
+Object.defineProperty(this, "gCrashReporter", {
+  get: function() {
+    delete this.gCrashReporter;
+    try {
+      let reporter = Cc["@mozilla.org/xre/app-info;1"].
+            getService(Ci.nsICrashReporter);
+      return this.gCrashReporter = reporter;
+    } catch (ex) {
+      return this.gCrashReporter = null;
+    }
+  },
+  configurable: true
+});
 
 // Display timeout warnings after 10 seconds
 const DELAY_WARNING_MS = 10 * 1000;
 
+
+// Crash the process if shutdown is really too long
+// (allowing for sleep).
+const PREF_DELAY_CRASH_MS = "toolkit.asyncshutdown.crash_timeout";
+let DELAY_CRASH_MS = 60 * 1000; // One minute
+try {
+  DELAY_CRASH_MS = Services.prefs.getIntPref(PREF_DELAY_CRASH_MS);
+} catch (ex) {
+  // Ignore errors
+}
+Services.prefs.addObserver(PREF_DELAY_CRASH_MS, function() {
+  DELAY_CRASH_MS = Services.prefs.getIntPref(PREF_DELAY_CRASH_MS);
+}, false);
+
+
 /**
  * Display a warning.
  *
  * As this code is generally used during shutdown, there are chances
  * that the UX will not be available to display warnings on the
  * console. We therefore use dump() rather than Cu.reportError().
  */
-function warn(msg, error = null) {
-  dump("WARNING: " + msg + "\n");
+function log(msg, prefix = "", error = null) {
+  dump(prefix + msg + "\n");
   if (error) {
-    dump("WARNING: " + error + "\n");
+    dump(prefix + error + "\n");
     if (typeof error == "object" && "stack" in error) {
-      dump("WARNING: " + error.stack + "\n");
+      dump(prefix + error.stack + "\n");
     }
   }
 }
+function warn(msg, error = null) {
+  return log(msg, "WARNING: ", error);
+}
+function err(msg, error = null) {
+  return log(msg, "ERROR: ", error);
+}
+
+/**
+ * Countdown for a given duration, skipping beats if the computer is too busy,
+ * sleeping or otherwise unavailable.
+ *
+ * @param {number} delay An approximate delay to wait in milliseconds (rounded
+ * up to the closest second).
+ *
+ * @return Deferred
+ */
+function looseTimer(delay) {
+  let DELAY_BEAT = 1000;
+  let timer = Cc["@mozilla.org/timer;1"].createInstance(Ci.nsITimer);
+  let beats = Math.ceil(delay / DELAY_BEAT);
+  let deferred = Promise.defer();
+  timer.initWithCallback(function() {
+    if (beats <= 0) {
+      deferred.resolve();
+    }
+    --beats;
+  }, DELAY_BEAT, Ci.nsITimer.TYPE_REPEATING_PRECISE_CAN_SKIP);
+  // Ensure that the timer is both canceled once we are done with it
+  // and not garbage-collected until then.
+  deferred.promise.then(() => timer.cancel(), () => timer.cancel());
+  return deferred;
+}
 
 this.EXPORTED_SYMBOLS = ["AsyncShutdown"];
 
 /**
  * {string} topic -> phase
  */
 let gPhases = new Map();
 
@@ -197,18 +262,23 @@ Spinner.prototype = {
     let conditions = this._conditions;
     this._conditions = null; // Too late to register
 
     if (conditions.size == 0) {
       // No need to spin anything
       return;
     }
 
+    // The promises for which we are waiting.
     let allPromises = [];
 
+    // Information to determine and report to the user which conditions
+    // are not satisfied yet.
+    let allMonitors = [];
+
     for (let {condition, name} of conditions) {
       // Gather all completion conditions
 
       try {
         if (typeof condition == "function") {
           // Normalize |condition| to the result of the function.
           try {
             condition = condition(topic);
@@ -221,39 +291,45 @@ Spinner.prototype = {
         // function returned |undefined| or failed), that new promise
         // isn't going to be terribly interesting, but it will behave
         // as a promise.
         condition = Promise.resolve(condition);
 
         // If the promise takes too long to be resolved/rejected,
         // we need to notify the user.
         //
-        // Future versions may decide to crash Firefox rather than
-        // let it loop and suck battery power forever.
+        // If it takes way too long, we need to crash.
 
         let timer = Cc["@mozilla.org/timer;1"].createInstance(Ci.nsITimer);
         timer.initWithCallback(function() {
           let msg = "A phase completion condition is" +
             " taking too long to complete." +
-            " Condition: " + name +
+            " Condition: " + monitor.name +
             " Phase: " + topic;
           warn(msg);
         }, DELAY_WARNING_MS, Ci.nsITimer.TYPE_ONE_SHOT);
 
+        let monitor = {
+          isFrozen: true,
+          name: name
+        };
         condition = condition.then(function onSuccess() {
-            timer.cancel();
+            timer.cancel(); // As a side-effect, this prevents |timer| from
+                            // being garbage-collected too early.
+            monitor.isFrozen = false;
           }, function onError(error) {
             timer.cancel();
             let msg = "A completion condition encountered an error" +
                 " while we were spinning the event loop." +
                 " Condition: " + name +
                 " Phase: " + topic;
             warn(msg, error);
+            monitor.isFrozen = false;
         });
-
+        allMonitors.push(monitor);
         allPromises.push(condition);
 
       } catch (error) {
           let msg = "A completion condition encountered an error" +
                 " while we were initializing the phase." +
                 " Condition: " + name +
                 " Phase: " + topic;
           warn(msg, error);
@@ -270,18 +346,62 @@ Spinner.prototype = {
       // However, let's be overcautious with async/shutdown error reporting.
       let msg = "An uncaught error appeared while completing the phase." +
             " Phase: " + topic;
       warn(msg, error);
     });
 
     let satisfied = false; // |true| once we have satisfied all conditions
 
+    // If after DELAY_CRASH_MS (approximately one minute, adjusted to take
+    // into account sleep and otherwise busy computer) we have not finished
+    // this shutdown phase, we assume that the shutdown is somehow frozen,
+    // presumably deadlocked. At this stage, the only thing we can do to
+    // avoid leaving the user's computer in an unstable (and battery-sucking)
+    // situation is report the issue and crash.
+    let timeToCrash = looseTimer(DELAY_CRASH_MS);
+    timeToCrash.promise.then(
+      function onTimeout() {
+        // Report the problem as best as we can, then crash.
+        let frozen = [];
+        for (let {name, isFrozen} of allMonitors) {
+          if (isFrozen) {
+            frozen.push(name);
+          }
+        }
+
+        let msg = "At least one completion condition failed to complete" +
+              " within a reasonable amount of time. Causing a crash to" +
+              " ensure that we do not leave the user with an unresponsive" +
+              " process draining resources." +
+              " Conditions: " + frozen.join(", ") +
+              " Phase: " + topic;
+        err(msg);
+        if (gCrashReporter) {
+          let data = {
+            phase: topic,
+            conditions: frozen
+          };
+          gCrashReporter.annotateCrashReport("AsyncShutdownTimeout",
+            JSON.stringify(data));
+        } else {
+          warn("No crash reporter available");
+        }
+
+        let error = new Error();
+        gDebug.abort(error.fileName, error.lineNumber + 1);
+      },
+      function onSatisfied() {
+        // The promise has been rejected, which means that we have satisfied
+        // all completion conditions.
+      });
+
     promise = promise.then(function() {
       satisfied = true;
+      timeToCrash.reject();
     }/* No error is possible here*/);
 
     // Now, spin the event loop
     let thread = Services.tm.mainThread;
     while(!satisfied) {
       thread.processNextEvent(true);
     }
   }
rename from toolkit/modules/tests/xpcshell/test_phase.js
rename to toolkit/modules/tests/xpcshell/test_AsyncShutdown.js
--- a/toolkit/modules/tests/xpcshell/xpcshell.ini
+++ b/toolkit/modules/tests/xpcshell/xpcshell.ini
@@ -1,17 +1,17 @@
 [DEFAULT]
 head =
 tail =
 support-files =
   propertyLists/bug710259_propertyListBinary.plist
   propertyLists/bug710259_propertyListXML.plist
   chromeappsstore.sqlite
 
-[test_phase.js]
+[test_AsyncShutdown.js]
 [test_dict.js]
 [test_FileUtils.js]
 [test_Http.js]
 [test_Preferences.js]
 [test_Promise.js]
 [test_propertyListsUtils.js]
 [test_readCertPrefs.js]
 [test_Services.js]