Bug 1164931 - Clear hangs when they're over (r=mrbkap)
authorBill McCloskey <billm@mozilla.com>
Fri, 01 Apr 2016 16:54:25 -0700
changeset 292298 d20bb8617b2597b89456413119b40ff02bf77b09
parent 292297 dd3e03fcb06b588bf1c2a9362b6aac6983ec8738
child 292299 333a3a16c9f35fb2ed063555424cc1c204d8de67
push id30154
push usercbook@mozilla.com
push dateFri, 08 Apr 2016 10:00:50 +0000
treeherdermozilla-central@d9b1a9829c8e [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersmrbkap
bugs1164931
milestone48.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1164931 - Clear hangs when they're over (r=mrbkap)
browser/modules/ProcessHangMonitor.jsm
browser/modules/test/browser_ProcessHangNotifications.js
dom/ipc/PProcessHangMonitor.ipdl
dom/ipc/ProcessHangMonitor.cpp
--- a/browser/modules/ProcessHangMonitor.jsm
+++ b/browser/modules/ProcessHangMonitor.jsm
@@ -17,58 +17,46 @@ Cu.import("resource://gre/modules/Servic
 /**
  * This JSM is responsible for observing content process hang reports
  * and asking the user what to do about them. See nsIHangReport for
  * the platform interface.
  */
 
 var ProcessHangMonitor = {
   /**
-   * If a hang hasn't been reported for more than 10 seconds, assume the
-   * content process has gotten unstuck (and hide the hang notification).
-   */
-  get HANG_EXPIRATION_TIME() {
-    try {
-      return Services.prefs.getIntPref("browser.hangNotification.expiration");
-    } catch (ex) {
-      return 10000;
-    }
-  },
-
-  /**
    * This timeout is the wait period applied after a user selects "Wait" in
    * an existing notification.
    */
   get WAIT_EXPIRATION_TIME() {
     try {
       return Services.prefs.getIntPref("browser.hangNotification.waitPeriod");
     } catch (ex) {
       return 10000;
     }
   },
 
   /**
    * Collection of hang reports that haven't expired or been dismissed
-   * by the user. The keys are nsIHangReports and values keys are
-   * timers. Each time the hang is reported, the timer is refreshed so
-   * it expires after HANG_EXPIRATION_TIME.
+   * by the user. These are nsIHangReports.
    */
-  _activeReports: new Map(),
+  _activeReports: new Set(),
 
   /**
-   * Collection of hang reports that have been suppressed for a
-   * short period of time.
+   * Collection of hang reports that have been suppressed for a short
+   * period of time. Value is an nsITimer for when the wait time
+   * expires.
    */
   _pausedReports: new Map(),
 
   /**
    * Initialize hang reporting. Called once in the parent process.
    */
   init: function() {
     Services.obs.addObserver(this, "process-hang-report", false);
+    Services.obs.addObserver(this, "clear-hang-report", false);
     Services.obs.addObserver(this, "xpcom-shutdown", false);
     Services.ww.registerNotification(this);
   },
 
   /**
    * Terminate JavaScript associated with the hang being reported for
    * the selected browser in |win|.
    */
@@ -127,42 +115,35 @@ var ProcessHangMonitor = {
    * Dismiss the notification, clear the report from the active list and set up
    * a new timer to track a wait period during which we won't notify.
    */
   waitLonger: function(win) {
     let report = this.findActiveReport(win.gBrowser.selectedBrowser);
     if (!report) {
       return;
     }
-    // Remove the report from the active list and cancel its timer.
+    // Remove the report from the active list.
     this.removeActiveReport(report);
 
     // NOTE, we didn't call userCanceled on nsIHangReport here. This insures
     // we don't repeatedly generate and cache crash report data for this hang
     // in the process hang reporter. It already has one report for the browser
     // process we want it hold onto.
 
     // Create a new wait timer with notify callback
     let timer = Cc["@mozilla.org/timer;1"].createInstance(Ci.nsITimer);
     timer.initWithCallback(() => {
       for (let [stashedReport, otherTimer] of this._pausedReports) {
         if (otherTimer === timer) {
           this.removePausedReport(stashedReport);
 
-          // Create a new notification display timeout timer
-          let timer = Cc["@mozilla.org/timer;1"].createInstance(Ci.nsITimer);
-          timer.initWithCallback(this, this.HANG_EXPIRATION_TIME, timer.TYPE_ONE_SHOT);
-
-          // Store the timer in the active reports map. If we receive a new
-          // observer notification for this hang, we'll redisplay the browser
-          // notification in reportHang below. If we do not receive a new
-          // observer, timer will take care fo cleaning up resources associated
-          // with this hang. The observer for active hangs fires about once
-          // a second.
-          this._activeReports.set(report, timer);
+          // We're still hung, so move the report back to the active
+          // list and update the UI.
+          this._activeReports.add(report);
+          this.updateWindows();
           break;
         }
       }
     }, this.WAIT_EXPIRATION_TIME, timer.TYPE_ONE_SHOT);
 
     this._pausedReports.set(report, timer);
 
     // remove the browser notification associated with this hang
@@ -184,23 +165,28 @@ var ProcessHangMonitor = {
     return func(report);
   },
 
   observe: function(subject, topic, data) {
     switch (topic) {
       case "xpcom-shutdown":
         Services.obs.removeObserver(this, "xpcom-shutdown");
         Services.obs.removeObserver(this, "process-hang-report");
+        Services.obs.removeObserver(this, "clear-hang-report");
         Services.ww.unregisterNotification(this);
         break;
 
       case "process-hang-report":
         this.reportHang(subject.QueryInterface(Ci.nsIHangReport));
         break;
 
+      case "clear-hang-report":
+        this.clearHang(subject.QueryInterface(Ci.nsIHangReport));
+        break;
+
       case "domwindowopened":
         // Install event listeners on the new window in case one of
         // its tabs is already hung.
         let win = subject.QueryInterface(Ci.nsIDOMWindow);
         let listener = (ev) => {
           win.removeEventListener("load", listener, true);
           this.updateWindows();
         };
@@ -209,17 +195,17 @@ var ProcessHangMonitor = {
     }
   },
 
   /**
    * Find a active hang report for the given <browser> element.
    */
   findActiveReport: function(browser) {
     let frameLoader = browser.QueryInterface(Ci.nsIFrameLoaderOwner).frameLoader;
-    for (let [report, timer] of this._activeReports) {
+    for (let report of this._activeReports) {
       if (report.isReportForBrowser(frameLoader)) {
         return report;
       }
     }
     return null;
   },
 
   /**
@@ -235,20 +221,16 @@ var ProcessHangMonitor = {
     return null;
   },
 
   /**
    * Remove an active hang report from the active list and cancel the timer
    * associated with it.
    */
   removeActiveReport: function(report) {
-    let timer = this._activeReports.get(report);
-    if (timer) {
-      timer.cancel();
-    }
     this._activeReports.delete(report);
     this.updateWindows();
   },
 
   /**
    * Remove a paused hang report from the paused list and cancel the timer
    * associated with it.
    */
@@ -377,19 +359,16 @@ var ProcessHangMonitor = {
 
   /**
    * Handle a potentially new hang report. If it hasn't been seen
    * before, show a notification for it in all open XUL windows.
    */
   reportHang: function(report) {
     // If this hang was already reported reset the timer for it.
     if (this._activeReports.has(report)) {
-      let timer = this._activeReports.get(report);
-      timer.cancel();
-      timer.initWithCallback(this, this.HANG_EXPIRATION_TIME, timer.TYPE_ONE_SHOT);
       // if this report is in active but doesn't have a notification associated
       // with it, display a notification.
       this.updateWindows();
       return;
     }
 
     // If this hang was already reported and paused by the user ignore it.
     if (this._pausedReports.has(report)) {
@@ -402,29 +381,18 @@ var ProcessHangMonitor = {
       // On non-e10s, SLOW_SCRIPT_NOTICE_COUNT is probed at nsGlobalWindow.cpp
       Services.telemetry.getHistogramById("SLOW_SCRIPT_NOTICE_COUNT").add();
     } else if (report.hangType == report.PLUGIN_HANG) {
       // On non-e10s we have sufficient plugin telemetry probes,
       // so PLUGIN_HANG_NOTICE_COUNT is only probed on e10s.
       Services.telemetry.getHistogramById("PLUGIN_HANG_NOTICE_COUNT").add();
     }
 
-    // Otherwise create a new timer and display the report.
-    let timer = Cc["@mozilla.org/timer;1"].createInstance(Ci.nsITimer);
-    timer.initWithCallback(this, this.HANG_EXPIRATION_TIME, timer.TYPE_ONE_SHOT);
-
-    this._activeReports.set(report, timer);
+    this._activeReports.add(report);
     this.updateWindows();
   },
 
-  /**
-   * Callback for when HANG_EXPIRATION_TIME has elapsed.
-   */
-  notify: function(timer) {
-    for (let [otherReport, otherTimer] of this._activeReports) {
-      if (otherTimer === timer) {
-        this.removeActiveReport(otherReport);
-        otherReport.userCanceled();
-        break;
-      }
-    }
+  clearHang: function(report) {
+    this.removeActiveReport(report);
+    this.removePausedReport(report);
+    report.userCanceled();
   },
 };
--- a/browser/modules/test/browser_ProcessHangNotifications.js
+++ b/browser/modules/test/browser_ProcessHangNotifications.js
@@ -121,52 +121,53 @@ add_task(function* waitForScriptTest() {
   let promise = promiseNotificationShown(window, "process-hang");
   Services.obs.notifyObservers(gTestHangReport, "process-hang-report", null);
   let notification = yield promise;
 
   let buttons = notification.currentNotification.getElementsByTagName("button");
   // Fails on aurora on-push builds, bug 1232204
   // is(buttons.length, buttonCount, "proper number of buttons");
 
-  yield pushPrefs(["browser.hangNotification.waitPeriod", 1000],
-                  ["browser.hangNotification.expiration", 2000]);
+  yield pushPrefs(["browser.hangNotification.waitPeriod", 1000]);
 
   function nocbcheck() {
     ok(false, "received a callback?");
   }
   let oldcb = gTestHangReport.testCallback;
   gTestHangReport.testCallback = nocbcheck;
   // Click the "Wait" button this time, we shouldn't get a callback at all.
   buttons[1].click();
   gTestHangReport.testCallback = oldcb;
 
   // send another hang pulse, we should not get a notification here
   Services.obs.notifyObservers(gTestHangReport, "process-hang-report", null);
   is(notification.currentNotification, null, "no notification should be visible");
 
-  // After selecting Wait, we should get a userCanceled callback after
-  // HANG_EXPIRATION_TIME.
-  yield promiseReportCallMade(gTestHangReport.TEST_CALLBACK_CANCELED);
+  gTestHangReport.testCallback = function() {};
+  Services.obs.notifyObservers(gTestHangReport, "clear-hang-report", null);
+  gTestHangReport.testCallback = oldcb;
 
   yield popPrefs();
 });
 
 /**
  * Test if hang reports receive user canceled callbacks after the content
  * process stops sending hang notifications.
  */
 
 add_task(function* hangGoesAwayTest() {
   yield pushPrefs(["browser.hangNotification.expiration", 1000]);
 
   let promise = promiseNotificationShown(window, "process-hang");
   Services.obs.notifyObservers(gTestHangReport, "process-hang-report", null);
   yield promise;
 
-  yield promiseReportCallMade(gTestHangReport.TEST_CALLBACK_CANCELED);
+  promise = promiseReportCallMade(gTestHangReport.TEST_CALLBACK_CANCELED);
+  Services.obs.notifyObservers(gTestHangReport, "clear-hang-report", null);
+  yield promise;
 
   yield popPrefs();
 });
 
 /**
  * Tests if hang reports receive a terminate plugin callback when the user selects
  * stop in response to a plugin hang.
  */
--- a/dom/ipc/PProcessHangMonitor.ipdl
+++ b/dom/ipc/PProcessHangMonitor.ipdl
@@ -28,16 +28,17 @@ union HangData
   SlowScriptData;
   PluginHangData;
 };
 
 protocol PProcessHangMonitor
 {
 parent:
   async HangEvidence(HangData data);
+  async ClearHang();
 
 child:
   async TerminateScript();
 
   async BeginStartingDebugger();
   async EndStartingDebugger();
 };
 
--- a/dom/ipc/ProcessHangMonitor.cpp
+++ b/dom/ipc/ProcessHangMonitor.cpp
@@ -85,16 +85,17 @@ class HangMonitorChild
                              unsigned aLineNo);
 
   bool IsDebuggerStartupComplete();
 
   void NotifyPluginHang(uint32_t aPluginId);
   void NotifyPluginHangAsync(uint32_t aPluginId);
 
   void ClearHang();
+  void ClearHangAsync();
 
   virtual bool RecvTerminateScript() override;
   virtual bool RecvBeginStartingDebugger() override;
   virtual bool RecvEndStartingDebugger() override;
 
   virtual void ActorDestroy(ActorDestroyReason aWhy) override;
 
   void Shutdown();
@@ -161,16 +162,21 @@ public:
     mActor = nullptr;
   }
 
   void SetHangData(const HangData& aHangData) { mHangData = aHangData; }
   void SetBrowserDumpId(nsAutoString& aId) {
     mBrowserDumpId = aId;
   }
 
+  void ClearHang() {
+    mHangData = HangData();
+    mBrowserDumpId.Truncate();
+  }
+
 private:
   ~HangMonitoredProcess() {}
 
   // Everything here is main thread-only.
   HangMonitorParent* mActor;
   ContentParent* mContentParent;
   HangData mHangData;
   nsAutoString mBrowserDumpId;
@@ -181,16 +187,17 @@ class HangMonitorParent
 {
 public:
   explicit HangMonitorParent(ProcessHangMonitor* aMonitor);
   virtual ~HangMonitorParent();
 
   void Open(Transport* aTransport, ProcessId aPid, MessageLoop* aIOLoop);
 
   virtual bool RecvHangEvidence(const HangData& aHangData) override;
+  virtual bool RecvClearHang() override;
 
   virtual void ActorDestroy(ActorDestroyReason aWhy) override;
 
   void SetProcess(HangMonitoredProcess* aProcess) { mProcess = aProcess; }
 
   void Shutdown();
 
   void TerminateScript();
@@ -434,24 +441,40 @@ HangMonitorChild::NotifyPluginHangAsync(
 }
 
 void
 HangMonitorChild::ClearHang()
 {
   MOZ_ASSERT(NS_IsMainThread());
 
   if (mSentReport) {
+    // bounce to background thread
+    MonitorLoop()->PostTask(
+      FROM_HERE,
+      NewRunnableMethod(this, &HangMonitorChild::ClearHangAsync));
+
     MonitorAutoLock lock(mMonitor);
     mSentReport = false;
     mTerminateScript = false;
     mStartDebugger = false;
     mFinishedStartingDebugger = false;
   }
 }
 
+void
+HangMonitorChild::ClearHangAsync()
+{
+  MOZ_RELEASE_ASSERT(MessageLoop::current() == MonitorLoop());
+
+  // bounce back to parent on background thread
+  if (mIPCOpen) {
+    Unused << SendClearHang();
+  }
+}
+
 /* HangMonitorParent implementation */
 
 HangMonitorParent::HangMonitorParent(ProcessHangMonitor* aMonitor)
  : mHangMonitor(aMonitor),
    mIPCOpen(true),
    mMonitor("HangMonitorParent lock"),
    mShutdownDone(false),
    mBrowserCrashDumpHashLock("mBrowserCrashDumpIds lock")
@@ -609,16 +632,61 @@ HangMonitorParent::RecvHangEvidence(cons
 
   nsCOMPtr<nsIRunnable> notifier =
     new HangObserverNotifier(mProcess, aHangData, crashId);
   NS_DispatchToMainThread(notifier);
 
   return true;
 }
 
+class ClearHangNotifier final : public nsRunnable
+{
+public:
+  explicit ClearHangNotifier(HangMonitoredProcess* aProcess)
+    : mProcess(aProcess)
+  {}
+
+  NS_IMETHOD
+  Run()
+  {
+    // chrome process, main thread
+    MOZ_RELEASE_ASSERT(NS_IsMainThread());
+    mProcess->ClearHang();
+
+    nsCOMPtr<nsIObserverService> observerService =
+      mozilla::services::GetObserverService();
+    observerService->NotifyObservers(mProcess, "clear-hang-report", nullptr);
+    return NS_OK;
+  }
+
+private:
+  RefPtr<HangMonitoredProcess> mProcess;
+};
+
+bool
+HangMonitorParent::RecvClearHang()
+{
+  // chrome process, background thread
+  MOZ_RELEASE_ASSERT(MessageLoop::current() == MonitorLoop());
+
+  if (!mReportHangs) {
+    return true;
+  }
+
+  mHangMonitor->InitiateCPOWTimeout();
+
+  MonitorAutoLock lock(mMonitor);
+
+  nsCOMPtr<nsIRunnable> notifier =
+    new ClearHangNotifier(mProcess);
+  NS_DispatchToMainThread(notifier);
+
+  return true;
+}
+
 void
 HangMonitorParent::TerminateScript()
 {
   MOZ_RELEASE_ASSERT(MessageLoop::current() == MonitorLoop());
 
   if (mIPCOpen) {
     Unused << SendTerminateScript();
   }