Bug 1500861 - Add shutdownWithTimeout method to nsIThreadPool. r=froydnj,erahm, a=RyanVM
authorValentin Gosu <valentin.gosu@gmail.com>
Fri, 26 Oct 2018 18:46:00 +0000
changeset 500901 442e20f46b5d670b0dafb158072dadde45d58e23
parent 500900 869a690e1c28c9baf1700cbe0100d58b282b008e
child 500902 c28a4c172db8362c9c66fe788484b6083d46b0c8
push id1864
push userffxbld-merge
push dateMon, 03 Dec 2018 15:51:40 +0000
treeherdermozilla-release@f040763d99ad [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersfroydnj, erahm, RyanVM
bugs1500861
milestone64.0
Bug 1500861 - Add shutdownWithTimeout method to nsIThreadPool. r=froydnj,erahm, a=RyanVM This method is necessary because some threads might be stuck making blocking calls. This means the thread is not processing any events, and we're unable to safely terminate it. Our solution here is to leak the stuck threads instead of waiting for them and potentially causing a shutdown hang. Differential Revision: https://phabricator.services.mozilla.com/D9601
netwerk/dns/nsHostResolver.cpp
xpcom/tests/gtest/TestThreadPool.cpp
xpcom/threads/nsIThreadPool.idl
xpcom/threads/nsThread.h
xpcom/threads/nsThreadPool.cpp
--- a/netwerk/dns/nsHostResolver.cpp
+++ b/netwerk/dns/nsHostResolver.cpp
@@ -844,38 +844,26 @@ nsHostResolver::Shutdown()
     pendingQHigh.clear();
     pendingQMed.clear();
     pendingQLow.clear();
     evictionQ.clear();
 
     for (auto iter = mRecordDB.Iter(); !iter.Done(); iter.Next()) {
         iter.UserData()->Cancel();
     }
-#ifdef NS_BUILD_REFCNT_LOGGING
 
-    // Logically join the outstanding worker threads with a timeout.
-    // Use this approach instead of PR_JoinThread() because that does
-    // not allow a timeout which may be necessary for a semi-responsive
-    // shutdown if the thread is blocked on a very slow DNS resolution.
-    // mActiveTaskCount is read outside of mLock, but the worst case
-    // scenario for that race is one extra 25ms sleep.
-
-    PRIntervalTime delay = PR_MillisecondsToInterval(25);
-    PRIntervalTime stopTime = PR_IntervalNow() + PR_SecondsToInterval(20);
-    while (mActiveTaskCount && PR_IntervalNow() < stopTime)
-        PR_Sleep(delay);
-#endif
+    // Shutdown the resolver threads, but with a timeout of 20 seconds.
+    // If the timeout is exceeded, any stuck threads will be leaked.
+    mResolverThreads->ShutdownWithTimeout(20 * 1000);
 
     {
         mozilla::DebugOnly<nsresult> rv = GetAddrInfoShutdown();
         NS_WARNING_ASSERTION(NS_SUCCEEDED(rv),
                              "Failed to shutdown GetAddrInfo");
     }
-
-    mResolverThreads->Shutdown();
 }
 
 nsresult
 nsHostResolver::GetHostRecord(const nsACString &host, uint16_t type,
                               uint16_t flags, uint16_t af, bool pb,
                               const nsCString &originSuffix,
                               nsHostRecord **result)
 {
--- a/xpcom/tests/gtest/TestThreadPool.cpp
+++ b/xpcom/tests/gtest/TestThreadPool.cpp
@@ -123,8 +123,46 @@ TEST(ThreadPool, Parallelism)
   bool done = false;
   nsCOMPtr<nsIRunnable> r1 = new Runnable1(mon, done);
   nsCOMPtr<nsIRunnable> r2 = new Runnable2(mon, done);
   pool->Dispatch(r1, NS_DISPATCH_NORMAL);
   pool->Dispatch(r2, NS_DISPATCH_NORMAL);
 
   pool->Shutdown();
 }
+
+TEST(ThreadPool, ShutdownWithTimeout)
+{
+  Task::sCount = 0;
+  nsCOMPtr<nsIThreadPool> pool = new nsThreadPool();
+
+  for (int i = 0; i < 4; ++i) {
+    nsCOMPtr<nsIRunnable> task = new Task(i);
+    EXPECT_TRUE(task);
+
+    pool->Dispatch(task, NS_DISPATCH_NORMAL);
+  }
+
+  // Wait for a max of 300 ms. All threads should be done by then.
+  pool->ShutdownWithTimeout(300);
+  EXPECT_EQ(Task::sCount, 4);
+
+  Task::sCount = 0;
+  pool = new nsThreadPool();
+  for (int i = 0; i < 3; ++i) {
+    nsCOMPtr<nsIRunnable> task = new Task(i);
+    EXPECT_TRUE(task);
+
+    pool->Dispatch(task, NS_DISPATCH_NORMAL);
+  }
+
+  pool->Dispatch(NS_NewRunnableFunction("infinite-loop", []() {
+      printf("### running from thread that never ends: %p\n",
+             (void *) PR_GetCurrentThread());
+      while(true) {
+          PR_Sleep(PR_MillisecondsToInterval(100));
+      }
+      EXPECT_TRUE(false); // We should never get here.
+    }), NS_DISPATCH_NORMAL);
+
+  pool->ShutdownWithTimeout(1000);
+  EXPECT_EQ(Task::sCount, 3);
+}
--- a/xpcom/threads/nsIThreadPool.idl
+++ b/xpcom/threads/nsIThreadPool.idl
@@ -37,16 +37,27 @@ interface nsIThreadPool : nsIEventTarget
    * function returns, the thread pool and all of its threads will be shutdown,
    * and it will no longer be possible to dispatch tasks to the thread pool.
    *
    * As a side effect, events on the current thread will be processed.
    */
   void shutdown();
 
   /**
+   * Shutdown the thread pool, but only wait for aTimeoutMs. After the timeout
+   * expires, any threads that have not shutdown yet are leaked and will not
+   * block shutdown.
+   *
+   * This method should only be used at during shutdown to cleanup threads that
+   * made blocking calls to code outside our control, and can't be safely
+   * terminated. We choose to leak them intentionally to avoid a shutdown hang.
+   */
+  [noscript] void shutdownWithTimeout(in long aTimeoutMs);
+
+  /**
    * Get/set the maximum number of threads allowed at one time in this pool.
    */
   attribute unsigned long threadLimit;
 
   /**
    * Get/set the maximum number of idle threads kept alive.
    */
   attribute unsigned long idleThreadLimit;
--- a/xpcom/threads/nsThread.h
+++ b/xpcom/threads/nsThread.h
@@ -176,16 +176,17 @@ protected:
     nsIThreadObserver* obs;
     nsThread::GetObserver(&obs);
     return already_AddRefed<nsIThreadObserver>(obs);
   }
 
   struct nsThreadShutdownContext* ShutdownInternal(bool aSync);
 
   friend class nsThreadManager;
+  friend class nsThreadPool;
 
   static mozilla::OffTheBooksMutex& ThreadListMutex();
   static mozilla::LinkedList<nsThread>& ThreadList();
   static void ClearThreadList();
 
   // The current number of active threads.
   static uint32_t sActiveThreads;
   // The maximum current number of active threads we've had in this session.
--- a/xpcom/threads/nsThreadPool.cpp
+++ b/xpcom/threads/nsThreadPool.cpp
@@ -356,16 +356,104 @@ nsThreadPool::Shutdown()
 
   for (int32_t i = 0; i < threads.Count(); ++i) {
     threads[i]->Shutdown();
   }
 
   return NS_OK;
 }
 
+template<typename Pred>
+static void
+SpinMTEventLoopUntil(Pred&& aPredicate,
+                     nsIThread* aThread,
+                     TimeDuration aTimeout)
+{
+  MOZ_ASSERT(NS_IsMainThread(), "Must be run on the main thread");
+
+  // From a latency perspective, spinning the event loop is like leaving script
+  // and returning to the event loop. Tell the watchdog we stopped running
+  // script (until we return).
+  mozilla::Maybe<xpc::AutoScriptActivity> asa;
+  asa.emplace(false);
+
+  TimeStamp deadline = TimeStamp::Now() + aTimeout;
+  while (!aPredicate() && TimeStamp::Now() < deadline) {
+    if (!NS_ProcessNextEvent(aThread, false)) {
+      PR_Sleep(PR_MillisecondsToInterval(1));
+    }
+  }
+}
+
+NS_IMETHODIMP
+nsThreadPool::ShutdownWithTimeout(int32_t aTimeoutMs)
+{
+  if (!NS_IsMainThread()) {
+    return NS_ERROR_NOT_AVAILABLE;
+  }
+
+  nsCOMArray<nsIThread> threads;
+  nsCOMPtr<nsIThreadPoolListener> listener;
+  {
+    MutexAutoLock lock(mMutex);
+    mShutdown = true;
+    mEventsAvailable.NotifyAll();
+
+    threads.AppendObjects(mThreads);
+    mThreads.Clear();
+
+    // Swap in a null listener so that we release the listener at the end of
+    // this method. The listener will be kept alive as long as the other threads
+    // that were created when it was set.
+    mListener.swap(listener);
+  }
+
+  // IMPORTANT! Never dereference these pointers, as the objects may go away at
+  // any time. We just use the pointers values for comparison, to check if the
+  // thread has been shut down or not.
+  nsTArray<nsThreadShutdownContext*> contexts;
+
+  // It's important that we shutdown the threads while outside the event queue
+  // monitor.  Otherwise, we could end up dead-locking.
+  for (int32_t i = 0; i < threads.Count(); ++i) {
+    // Shutdown async
+    nsThreadShutdownContext* maybeContext =
+      static_cast<nsThread*>(threads[i])->ShutdownInternal(false);
+    contexts.AppendElement(maybeContext);
+  }
+
+  NotNull<nsThread*> currentThread =
+    WrapNotNull(nsThreadManager::get().GetCurrentThread());
+
+  // We spin the event loop until all of the threads in the thread pool
+  // have shut down, or the timeout expires.
+  SpinMTEventLoopUntil([&]() {
+      for (nsIThread* thread : threads) {
+        if (static_cast<nsThread*>(thread)->mThread) {
+          return false;
+        }
+      }
+      return true;
+    }, currentThread, TimeDuration::FromMilliseconds(aTimeoutMs));
+
+  // For any threads that have not shutdown yet, we need to remove them from
+  // mRequestedShutdownContexts so the thread manager does not wait for them
+  // at shutdown.
+  for (int32_t i = 0; i < threads.Count(); ++i) {
+    nsThread* thread = static_cast<nsThread*>(threads[i]);
+    // If mThread is not null on the thread it means that it hasn't shutdown
+    // context[i] corresponds to thread[i]
+    if (thread->mThread && contexts[i]) {
+      currentThread->mRequestedShutdownContexts.RemoveElement(contexts[i]);
+    }
+  }
+
+  return NS_OK;
+}
+
 NS_IMETHODIMP
 nsThreadPool::GetThreadLimit(uint32_t* aValue)
 {
   *aValue = mThreadLimit;
   return NS_OK;
 }
 
 NS_IMETHODIMP