Bug 1652613 - record a telemetry event when we show the slow script notification warning, r=mconley
authorGijs Kruitbosch <gijskruitbosch@gmail.com>
Fri, 24 Jul 2020 00:50:12 +0000
changeset 541858 5bbd29a7db14dd5fb5b8eafeb4e8d4056c6f392d
parent 541857 6e90fed2ff8ac39ca8af803c1906c634682e53ae
child 541859 74d2da5438778a6f885443709dab3806c44c708a
push id37633
push userccoroiu@mozilla.com
push dateFri, 24 Jul 2020 09:32:06 +0000
treeherdermozilla-central@141543043270 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersmconley
bugs1652613
milestone80.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1652613 - record a telemetry event when we show the slow script notification warning, r=mconley Differential Revision: https://phabricator.services.mozilla.com/D83922
browser/modules/ProcessHangMonitor.jsm
toolkit/components/telemetry/Events.yaml
--- a/browser/modules/ProcessHangMonitor.jsm
+++ b/browser/modules/ProcessHangMonitor.jsm
@@ -34,36 +34,43 @@ var ProcessHangMonitor = {
   /**
    * Should only be set to true once the quit-application-granted notification
    * has been fired.
    */
   _shuttingDown: false,
 
   /**
    * Collection of hang reports that haven't expired or been dismissed
-   * by the user. These are nsIHangReports.
+   * by the user. These are nsIHangReports. They are mapped to objects
+   * containing:
+   * - notificationTime: when (Cu.now()) we first showed a notification
+   * - waitCount: how often the user asked to wait for the script to finish
+   * - lastReportFromChild: when (Cu.now()) we last got hang info from the
+   *   child.
    */
-  _activeReports: new Set(),
+  _activeReports: new Map(),
 
   /**
    * Collection of hang reports that have been suppressed for a short
-   * period of time. Value is an nsITimer for when the wait time
+   * period of time. Value is an object like in _activeReports, but also
+   * including a `timer` prop, which is an nsITimer for when the wait time
    * expires.
    */
   _pausedReports: new Map(),
 
   /**
    * Initialize hang reporting. Called once in the parent process.
    */
   init() {
     Services.obs.addObserver(this, "process-hang-report");
     Services.obs.addObserver(this, "clear-hang-report");
     Services.obs.addObserver(this, "quit-application-granted");
     Services.obs.addObserver(this, "xpcom-shutdown");
     Services.ww.registerNotification(this);
+    Services.telemetry.setEventRecordingEnabled("slow_script_warning", true);
   },
 
   /**
    * Terminate JavaScript associated with the hang being reported for
    * the selected browser in |win|.
    */
   terminateScript(win) {
     this.handleUserInput(win, report => report.terminateScript());
@@ -82,16 +89,17 @@ var ProcessHangMonitor = {
    * being reported for the selected browser in |win|.
    */
   debugScript(win) {
     this.handleUserInput(win, report => {
       function callback() {
         report.endStartingDebugger();
       }
 
+      this._recordTelemetryForReport(report, "debugging");
       report.beginStartingDebugger();
 
       let svc = Cc["@mozilla.org/dom/slow-script-debug;1"].getService(
         Ci.nsISlowScriptDebug
       );
       let handler = svc.remoteActivationHandler;
       handler.handleSlowScriptDebug(report.scriptBrowser, callback);
     });
@@ -113,16 +121,17 @@ var ProcessHangMonitor = {
   stopIt(win) {
     let report = this.findActiveReport(win.gBrowser.selectedBrowser);
     if (!report) {
       return;
     }
 
     switch (report.hangType) {
       case report.SLOW_SCRIPT:
+        this._recordTelemetryForReport(report, "user-aborted");
         this.terminateScript(win);
         break;
       case report.PLUGIN_HANG:
         this.terminatePlugin(win);
         break;
     }
   },
 
@@ -133,28 +142,30 @@ var ProcessHangMonitor = {
   stopGlobal(win) {
     let report = this.findActiveReport(win.gBrowser.selectedBrowser);
     if (!report) {
       return;
     }
 
     switch (report.hangType) {
       case report.SLOW_SCRIPT:
+        this._recordTelemetryForReport(report, "user-aborted");
         this.terminateGlobal(win);
         break;
     }
   },
 
   /**
    * Terminate whatever is causing this report, be it an add-on, page script,
    * or plug-in. This is done without updating any report notifications.
    */
-  stopHang(report) {
+  stopHang(report, endReason, backupInfo) {
     switch (report.hangType) {
       case report.SLOW_SCRIPT: {
+        this._recordTelemetryForReport(report, endReason, backupInfo);
         if (report.addonId) {
           report.terminateGlobal();
         } else {
           report.terminateScript();
         }
         break;
       }
       case report.PLUGIN_HANG: {
@@ -168,45 +179,50 @@ var ProcessHangMonitor = {
    * Dismiss the notification, clear the report from the active list and set up
    * a new timer to track a wait period during which we won't notify.
    */
   waitLonger(win) {
     let report = this.findActiveReport(win.gBrowser.selectedBrowser);
     if (!report) {
       return;
     }
+    // Update the other info we keep.
+    let reportInfo = this._activeReports.get(report);
+    reportInfo.waitCount++;
+
     // Remove the report from the active list.
     this.removeActiveReport(report);
 
     // NOTE, we didn't call userCanceled on nsIHangReport here. This insures
     // we don't repeatedly generate and cache crash report data for this hang
     // in the process hang reporter. It already has one report for the browser
     // process we want it hold onto.
 
     // Create a new wait timer with notify callback
     let timer = Cc["@mozilla.org/timer;1"].createInstance(Ci.nsITimer);
     timer.initWithCallback(
       () => {
-        for (let [stashedReport, otherTimer] of this._pausedReports) {
-          if (otherTimer === timer) {
+        for (let [stashedReport, pausedInfo] of this._pausedReports) {
+          if (pausedInfo.timer === timer) {
             this.removePausedReport(stashedReport);
 
             // We're still hung, so move the report back to the active
             // list and update the UI.
-            this._activeReports.add(report);
+            this._activeReports.set(report, pausedInfo);
             this.updateWindows();
             break;
           }
         }
       },
       this.WAIT_EXPIRATION_TIME,
       timer.TYPE_ONE_SHOT
     );
 
-    this._pausedReports.set(report, timer);
+    reportInfo.timer = timer;
+    this._pausedReports.set(report, reportInfo);
 
     // remove the browser notification associated with this hang
     this.updateWindows();
   },
 
   /**
    * If there is a hang report associated with the selected browser in
    * |win|, invoke |func| on that report and stop notifying the user
@@ -271,81 +287,81 @@ var ProcessHangMonitor = {
   /**
    * Called early on in the shutdown sequence. We take this opportunity to
    * take any pre-existing hang reports, and terminate them. We also put
    * ourselves in a state so that if any more hang reports show up while
    * we're shutting down, we terminate them immediately.
    */
   onQuitApplicationGranted() {
     this._shuttingDown = true;
-    this.stopAllHangs();
+    this.stopAllHangs("quit-application-granted");
     this.updateWindows();
   },
 
   onWindowClosed(win) {
     let maybeStopHang = report => {
       if (report.hangType == report.SLOW_SCRIPT) {
         let hungBrowserWindow = null;
         try {
           hungBrowserWindow = report.scriptBrowser.ownerGlobal;
         } catch (e) {
           // Ignore failures to get the script browser - we'll be
           // conservative, and assume that if we cannot access the
           // window that belongs to this report that we should stop
           // the hang.
         }
         if (!hungBrowserWindow || hungBrowserWindow == win) {
-          this.stopHang(report);
+          this.stopHang(report, "window-closed");
           return true;
         }
       } else if (report.hangType == report.PLUGIN_HANG) {
         // If any window has closed during a plug-in hang, we'll
         // do the conservative thing and terminate the plug-in.
         this.stopHang(report);
         return true;
       }
       return false;
     };
 
     // If there are any script hangs for browsers that are in this window
     // that is closing, we can stop them now.
-    for (let report of this._activeReports) {
+    for (let [report] of this._activeReports) {
       if (maybeStopHang(report)) {
         this._activeReports.delete(report);
       }
     }
 
     for (let [pausedReport] of this._pausedReports) {
       if (maybeStopHang(pausedReport)) {
         this.removePausedReport(pausedReport);
       }
     }
 
     this.updateWindows();
   },
 
-  stopAllHangs() {
-    for (let report of this._activeReports) {
-      this.stopHang(report);
+  stopAllHangs(endReason) {
+    for (let [report] of this._activeReports) {
+      this.stopHang(report, endReason);
     }
 
-    this._activeReports = new Set();
+    this._activeReports = new Map();
 
     for (let [pausedReport] of this._pausedReports) {
-      this.stopHang(pausedReport);
+      this.stopHang(pausedReport, endReason);
       this.removePausedReport(pausedReport);
     }
   },
 
   /**
    * Find a active hang report for the given <browser> element.
    */
   findActiveReport(browser) {
     let frameLoader = browser.frameLoader;
-    for (let report of this._activeReports) {
+    for (let report of this._activeReports.keys()) {
       if (report.isReportForBrowser(frameLoader)) {
         return report;
       }
     }
     return null;
   },
 
   /**
@@ -357,50 +373,113 @@ var ProcessHangMonitor = {
       if (report.isReportForBrowser(frameLoader)) {
         return report;
       }
     }
     return null;
   },
 
   /**
+   * Tell telemetry about the report.
+   */
+  _recordTelemetryForReport(report, endReason, backupInfo) {
+    let info =
+      this._activeReports.get(report) ||
+      this._pausedReports.get(report) ||
+      backupInfo;
+    if (!info) {
+      return;
+    }
+    try {
+      // Only report slow script hangs.
+      if (report.hangType != report.SLOW_SCRIPT) {
+        return;
+      }
+      let uri_type;
+      if (report.addonId) {
+        uri_type = "extension";
+      } else if (report.scriptFileName?.startsWith("debugger")) {
+        uri_type = "devtools";
+      } else {
+        try {
+          let url = new URL(report.scriptFileName);
+          if (url.protocol == "chrome:" || url.protocol == "resource:") {
+            uri_type = "browser";
+          } else {
+            uri_type = "content";
+          }
+        } catch (ex) {
+          Cu.reportError(ex);
+          uri_type = "unknown";
+        }
+      }
+      let uptime = 0;
+      if (info.notificationTime) {
+        uptime = Cu.now() - info.notificationTime;
+      }
+      uptime = "" + uptime;
+      // We combine the duration of the hang in the content process with the
+      // time since we were last told about the hang in the parent. This is
+      // not the same as the time we showed a notification, as we only do that
+      // for the currently selected browser. It's as messy as it is because
+      // there is no cross-process monotonically increasing timestamp we can
+      // use. :-(
+      let hangDuration =
+        report.hangDuration + Cu.now() - info.lastReportFromChild;
+      Services.telemetry.recordEvent(
+        "slow_script_warning",
+        "shown",
+        "content",
+        null,
+        {
+          end_reason: endReason,
+          hang_duration: "" + hangDuration,
+          uri_type,
+          uptime,
+          wait_count: "" + info.waitCount,
+        }
+      );
+    } catch (ex) {
+      Cu.reportError(ex);
+    }
+  },
+
+  /**
    * Remove an active hang report from the active list and cancel the timer
    * associated with it.
    */
   removeActiveReport(report) {
     this._activeReports.delete(report);
     this.updateWindows();
   },
 
   /**
    * Remove a paused hang report from the paused list and cancel the timer
    * associated with it.
    */
   removePausedReport(report) {
-    let timer = this._pausedReports.get(report);
-    if (timer) {
-      timer.cancel();
-    }
+    let info = this._pausedReports.get(report);
+    info?.timer?.cancel();
     this._pausedReports.delete(report);
   },
 
   /**
    * Iterate over all XUL windows and ensure that the proper hang
    * reports are shown for each one. Also install event handlers in
    * each window to watch for events that would cause a different hang
    * report to be displayed.
    */
   updateWindows() {
     let e = Services.wm.getEnumerator("navigator:browser");
 
     // If it turns out we have no windows (this can happen on macOS),
     // we have no opportunity to ask the user whether or not they want
     // to stop the hang or wait, so we'll opt for stopping the hang.
     if (!e.hasMoreElements()) {
-      this.stopAllHangs();
+      this.stopAllHangs("no-windows-left");
       return;
     }
 
     for (let win of e) {
       this.updateWindow(win);
 
       // Only listen for these events if there are active hang reports.
       if (this._activeReports.size) {
@@ -413,16 +492,20 @@ var ProcessHangMonitor = {
 
   /**
    * If there is a hang report for the current tab in |win|, display it.
    */
   updateWindow(win) {
     let report = this.findActiveReport(win.gBrowser.selectedBrowser);
 
     if (report) {
+      let info = this._activeReports.get(report);
+      if (info && !info.notificationTime) {
+        info.notificationTime = Cu.now();
+      }
       this.showNotification(win, report);
     } else {
       this.hideNotification(win);
     }
   },
 
   /**
    * Show the notification for a hang.
@@ -561,47 +644,57 @@ var ProcessHangMonitor = {
     }
   },
 
   /**
    * Handle a potentially new hang report. If it hasn't been seen
    * before, show a notification for it in all open XUL windows.
    */
   reportHang(report) {
+    let now = Cu.now();
     if (this._shuttingDown) {
-      this.stopHang(report);
+      this.stopHang(report, "shutdown-in-progress", {
+        lastReportFromChild: now,
+      });
       return;
     }
 
     // If this hang was already reported reset the timer for it.
     if (this._activeReports.has(report)) {
+      this._activeReports.get(report).lastReportFromChild = now;
       // if this report is in active but doesn't have a notification associated
       // with it, display a notification.
       this.updateWindows();
       return;
     }
 
     // If this hang was already reported and paused by the user ignore it.
     if (this._pausedReports.has(report)) {
+      this._pausedReports.get(report).lastReportFromChild = now;
       return;
     }
 
     // On e10s this counts slow-script/hanged-plugin notice only once.
     // This code is not reached on non-e10s.
     if (report.hangType == report.SLOW_SCRIPT) {
       // On non-e10s, SLOW_SCRIPT_NOTICE_COUNT is probed at nsGlobalWindow.cpp
       Services.telemetry.getHistogramById("SLOW_SCRIPT_NOTICE_COUNT").add();
     } else if (report.hangType == report.PLUGIN_HANG) {
       // On non-e10s we have sufficient plugin telemetry probes,
       // so PLUGIN_HANG_NOTICE_COUNT is only probed on e10s.
       Services.telemetry.getHistogramById("PLUGIN_HANG_NOTICE_COUNT").add();
     }
 
-    this._activeReports.add(report);
+    this._activeReports.set(report, {
+      lastReportFromChild: now,
+      waitCount: 0,
+    });
     this.updateWindows();
   },
 
   clearHang(report) {
+    this._recordTelemetryForReport(report, "cleared");
+
     this.removeActiveReport(report);
     this.removePausedReport(report);
     report.userCanceled();
   },
 };
--- a/toolkit/components/telemetry/Events.yaml
+++ b/toolkit/components/telemetry/Events.yaml
@@ -2266,16 +2266,42 @@ security.ui.certerror:
     record_in_processes: ["content"]
     products:
       - firefox
     extra_keys:
       is_frame: If the error page is loaded in an iframe.
       has_sts: If the error page is for a site with HSTS headers or with a pinned key.
       panel_open: If the advanced panel was open at the time of the interaction.
 
+slow_script_warning:
+  shown:
+    bug_numbers:
+      - 1652613
+    description: >
+      Recorded when a slow script hang is resolved.
+    products:
+      - "firefox"
+    record_in_processes: ["main"]
+    release_channel_collection: opt-out
+    expiry_version: "85"
+    notification_emails:
+      - esmyth@mozilla.com
+      - gkruitbosch@mozilla.com
+    # Whether the hung script was for a content or browser process.
+    objects: [
+      "browser",
+      "content",
+    ]
+    extra_keys:
+      end_reason: Why the warning was hidden (user action, the process becoming responsive again, the browser quitting, etc.)
+      wait_count: How many times the user elected to wait.
+      hang_duration: How long we believe the hang continued (ms).
+      uri_type: The kind of script URL that hung.
+      uptime: How long the notification was up (ms).
+
 webrtc.ui:
   share_display:
     objects:
       - screen
       - window
       - browser_window
     description: >
       Recorded when a display is shared. The value for this event is a unique