Bug 1636147 - Don't report dead workers as active r=dom-workers-and-storage-reviewers,sg,asuth
authorYaron Tausky <ytausky@mozilla.com>
Wed, 13 May 2020 17:48:19 +0000
changeset 529665 e63c8298618e3bf5f0aeac6f2bfdbe5eb8ce28d6
parent 529663 ea102a49618733aae60628d0f4304e8708274016
child 529666 76e23731501107962df0ec38fab982e102bf3e4c
push id37414
push usernbeleuzu@mozilla.com
push dateThu, 14 May 2020 02:40:10 +0000
treeherdermozilla-central@045d696faa87 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersdom-workers-and-storage-reviewers, sg, asuth
bugs1636147
milestone78.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1636147 - Don't report dead workers as active r=dom-workers-and-storage-reviewers,sg,asuth `RuntimeService::CrashIfHanging` uses `RuntimeService`'s registry to determine which workers are still active. This approach is flawed because the registry is updated on the main thread, so if the main thread is stuck somewhere, dead workers could be reported as active. This commit adds an additional check: if dispatching a `Runnable` to a worker fails, it assumes that the worker is dead. This way main thread hangs would not be reported as worker hangs. Differential Revision: https://phabricator.services.mozilla.com/D74256
dom/workers/RuntimeService.cpp
--- a/dom/workers/RuntimeService.cpp
+++ b/dom/workers/RuntimeService.cpp
@@ -1629,94 +1629,98 @@ class CrashIfHangingRunnable : public Wo
     mMsg.Assign("Canceled");
 
     MonitorAutoLock lock(mMonitor);
     lock.Notify();
 
     return NS_OK;
   }
 
-  void DispatchAndWait() {
+  bool DispatchAndWait() {
     MonitorAutoLock lock(mMonitor);
 
     if (!Dispatch()) {
-      mMsg.Assign("Dispatch Error");
-      return;
+      // The worker is already dead but the main thread still didn't remove it
+      // from RuntimeService's registry.
+      return false;
     }
 
     lock.Wait();
+    return true;
   }
 
   const nsCString& MsgData() const { return mMsg; }
 
  private:
   bool PreDispatch(WorkerPrivate* aWorkerPrivate) override { return true; }
 
   void PostDispatch(WorkerPrivate* aWorkerPrivate,
                     bool aDispatchResult) override {}
 
   Monitor mMonitor;
   nsCString mMsg;
 };
 
+struct ActiveWorkerStats {
+  template <uint32_t ActiveWorkerStats::*Category>
+  void Update(const nsTArray<WorkerPrivate*>& aWorkers) {
+    for (const auto worker : aWorkers) {
+      RefPtr<CrashIfHangingRunnable> runnable =
+          new CrashIfHangingRunnable(worker);
+      if (runnable->DispatchAndWait()) {
+        ++(this->*Category);
+
+        // BC: Busy Count
+        mMessage.AppendPrintf("-BC:%d", worker->BusyCount());
+        mMessage.Append(runnable->MsgData());
+      }
+    }
+  }
+
+  uint32_t mWorkers = 0;
+  uint32_t mServiceWorkers = 0;
+  nsCString mMessage;
+};
+
 }  // namespace
 
 void RuntimeService::CrashIfHanging() {
   MutexAutoLock lock(mMutex);
 
-  if (mDomainMap.IsEmpty()) {
-    return;
-  }
-
-  uint32_t activeWorkers = 0;
-  uint32_t activeServiceWorkers = 0;
+  ActiveWorkerStats activeStats;
   uint32_t inactiveWorkers = 0;
 
-  nsTArray<WorkerPrivate*> workers;
-
   for (auto iter = mDomainMap.Iter(); !iter.Done(); iter.Next()) {
     WorkerDomainInfo* aData = iter.UserData();
 
-    activeWorkers += aData->mActiveWorkers.Length();
-    activeServiceWorkers += aData->mActiveServiceWorkers.Length();
-
-    workers.AppendElements(aData->mActiveWorkers);
-    workers.AppendElements(aData->mActiveServiceWorkers);
+    activeStats.Update<&ActiveWorkerStats::mWorkers>(aData->mActiveWorkers);
+    activeStats.Update<&ActiveWorkerStats::mServiceWorkers>(
+        aData->mActiveServiceWorkers);
 
     // These might not be top-level workers...
     for (uint32_t index = 0; index < aData->mQueuedWorkers.Length(); index++) {
       WorkerPrivate* worker = aData->mQueuedWorkers[index];
       if (!worker->GetParent()) {
         ++inactiveWorkers;
       }
     }
   }
 
-  // We must have something pending...
-  MOZ_DIAGNOSTIC_ASSERT(activeWorkers + activeServiceWorkers + inactiveWorkers);
+  if (activeStats.mWorkers + activeStats.mServiceWorkers + inactiveWorkers ==
+      0) {
+    return;
+  }
 
   nsCString msg;
 
   // A: active Workers | S: active ServiceWorkers | Q: queued Workers
   msg.AppendPrintf("Workers Hanging - %d|A:%d|S:%d|Q:%d", mShuttingDown ? 1 : 0,
-                   activeWorkers, activeServiceWorkers, inactiveWorkers);
-
-  // For each thread, let's print some data to know what is going wrong.
-  for (uint32_t i = 0; i < workers.Length(); ++i) {
-    WorkerPrivate* workerPrivate = workers[i];
-
-    // BC: Busy Count
-    msg.AppendPrintf("-BC:%d", workerPrivate->BusyCount());
-
-    RefPtr<CrashIfHangingRunnable> runnable =
-        new CrashIfHangingRunnable(workerPrivate);
-    runnable->DispatchAndWait();
-
-    msg.Append(runnable->MsgData());
-  }
+                   activeStats.mWorkers, activeStats.mServiceWorkers,
+                   inactiveWorkers);
+  msg.Append(activeStats.mMessage);
 
   // This string will be leaked.
   MOZ_CRASH_UNSAFE(strdup(msg.BeginReading()));
 }
 
 // This spins the event loop until all workers are finished and their threads
 // have been joined.
 void RuntimeService::Cleanup() {