Bug 1500861 - Add shutdownWithTimeout method to nsIThreadPool. r=froydnj,erahm, a=RyanVM
This method is necessary because some threads might be stuck making blocking
calls. This means the thread is not processing any events, and we're unable
to safely terminate it. Our solution here is to leak the stuck threads
instead of waiting for them and potentially causing a shutdown hang.
Differential Revision:
https://phabricator.services.mozilla.com/D9601
--- a/netwerk/dns/nsHostResolver.cpp
+++ b/netwerk/dns/nsHostResolver.cpp
@@ -844,38 +844,26 @@ nsHostResolver::Shutdown()
pendingQHigh.clear();
pendingQMed.clear();
pendingQLow.clear();
evictionQ.clear();
for (auto iter = mRecordDB.Iter(); !iter.Done(); iter.Next()) {
iter.UserData()->Cancel();
}
-#ifdef NS_BUILD_REFCNT_LOGGING
- // Logically join the outstanding worker threads with a timeout.
- // Use this approach instead of PR_JoinThread() because that does
- // not allow a timeout which may be necessary for a semi-responsive
- // shutdown if the thread is blocked on a very slow DNS resolution.
- // mActiveTaskCount is read outside of mLock, but the worst case
- // scenario for that race is one extra 25ms sleep.
-
- PRIntervalTime delay = PR_MillisecondsToInterval(25);
- PRIntervalTime stopTime = PR_IntervalNow() + PR_SecondsToInterval(20);
- while (mActiveTaskCount && PR_IntervalNow() < stopTime)
- PR_Sleep(delay);
-#endif
+ // Shutdown the resolver threads, but with a timeout of 20 seconds.
+ // If the timeout is exceeded, any stuck threads will be leaked.
+ mResolverThreads->ShutdownWithTimeout(20 * 1000);
{
mozilla::DebugOnly<nsresult> rv = GetAddrInfoShutdown();
NS_WARNING_ASSERTION(NS_SUCCEEDED(rv),
"Failed to shutdown GetAddrInfo");
}
-
- mResolverThreads->Shutdown();
}
nsresult
nsHostResolver::GetHostRecord(const nsACString &host, uint16_t type,
uint16_t flags, uint16_t af, bool pb,
const nsCString &originSuffix,
nsHostRecord **result)
{
--- a/xpcom/tests/gtest/TestThreadPool.cpp
+++ b/xpcom/tests/gtest/TestThreadPool.cpp
@@ -123,8 +123,46 @@ TEST(ThreadPool, Parallelism)
bool done = false;
nsCOMPtr<nsIRunnable> r1 = new Runnable1(mon, done);
nsCOMPtr<nsIRunnable> r2 = new Runnable2(mon, done);
pool->Dispatch(r1, NS_DISPATCH_NORMAL);
pool->Dispatch(r2, NS_DISPATCH_NORMAL);
pool->Shutdown();
}
+
+TEST(ThreadPool, ShutdownWithTimeout)
+{
+ Task::sCount = 0;
+ nsCOMPtr<nsIThreadPool> pool = new nsThreadPool();
+
+ for (int i = 0; i < 4; ++i) {
+ nsCOMPtr<nsIRunnable> task = new Task(i);
+ EXPECT_TRUE(task);
+
+ pool->Dispatch(task, NS_DISPATCH_NORMAL);
+ }
+
+ // Wait for a max of 300 ms. All threads should be done by then.
+ pool->ShutdownWithTimeout(300);
+ EXPECT_EQ(Task::sCount, 4);
+
+ Task::sCount = 0;
+ pool = new nsThreadPool();
+ for (int i = 0; i < 3; ++i) {
+ nsCOMPtr<nsIRunnable> task = new Task(i);
+ EXPECT_TRUE(task);
+
+ pool->Dispatch(task, NS_DISPATCH_NORMAL);
+ }
+
+ pool->Dispatch(NS_NewRunnableFunction("infinite-loop", []() {
+ printf("### running from thread that never ends: %p\n",
+ (void *) PR_GetCurrentThread());
+ while(true) {
+ PR_Sleep(PR_MillisecondsToInterval(100));
+ }
+ EXPECT_TRUE(false); // We should never get here.
+ }), NS_DISPATCH_NORMAL);
+
+ pool->ShutdownWithTimeout(1000);
+ EXPECT_EQ(Task::sCount, 3);
+}
--- a/xpcom/threads/nsIThreadPool.idl
+++ b/xpcom/threads/nsIThreadPool.idl
@@ -37,16 +37,27 @@ interface nsIThreadPool : nsIEventTarget
* function returns, the thread pool and all of its threads will be shutdown,
* and it will no longer be possible to dispatch tasks to the thread pool.
*
* As a side effect, events on the current thread will be processed.
*/
void shutdown();
/**
+ * Shutdown the thread pool, but only wait for aTimeoutMs. After the timeout
+ * expires, any threads that have not shutdown yet are leaked and will not
+ * block shutdown.
+ *
+ * This method should only be used at during shutdown to cleanup threads that
+ * made blocking calls to code outside our control, and can't be safely
+ * terminated. We choose to leak them intentionally to avoid a shutdown hang.
+ */
+ [noscript] void shutdownWithTimeout(in long aTimeoutMs);
+
+ /**
* Get/set the maximum number of threads allowed at one time in this pool.
*/
attribute unsigned long threadLimit;
/**
* Get/set the maximum number of idle threads kept alive.
*/
attribute unsigned long idleThreadLimit;
--- a/xpcom/threads/nsThread.h
+++ b/xpcom/threads/nsThread.h
@@ -176,16 +176,17 @@ protected:
nsIThreadObserver* obs;
nsThread::GetObserver(&obs);
return already_AddRefed<nsIThreadObserver>(obs);
}
struct nsThreadShutdownContext* ShutdownInternal(bool aSync);
friend class nsThreadManager;
+ friend class nsThreadPool;
static mozilla::OffTheBooksMutex& ThreadListMutex();
static mozilla::LinkedList<nsThread>& ThreadList();
static void ClearThreadList();
// The current number of active threads.
static uint32_t sActiveThreads;
// The maximum current number of active threads we've had in this session.
--- a/xpcom/threads/nsThreadPool.cpp
+++ b/xpcom/threads/nsThreadPool.cpp
@@ -356,16 +356,104 @@ nsThreadPool::Shutdown()
for (int32_t i = 0; i < threads.Count(); ++i) {
threads[i]->Shutdown();
}
return NS_OK;
}
+template<typename Pred>
+static void
+SpinMTEventLoopUntil(Pred&& aPredicate,
+ nsIThread* aThread,
+ TimeDuration aTimeout)
+{
+ MOZ_ASSERT(NS_IsMainThread(), "Must be run on the main thread");
+
+ // From a latency perspective, spinning the event loop is like leaving script
+ // and returning to the event loop. Tell the watchdog we stopped running
+ // script (until we return).
+ mozilla::Maybe<xpc::AutoScriptActivity> asa;
+ asa.emplace(false);
+
+ TimeStamp deadline = TimeStamp::Now() + aTimeout;
+ while (!aPredicate() && TimeStamp::Now() < deadline) {
+ if (!NS_ProcessNextEvent(aThread, false)) {
+ PR_Sleep(PR_MillisecondsToInterval(1));
+ }
+ }
+}
+
+NS_IMETHODIMP
+nsThreadPool::ShutdownWithTimeout(int32_t aTimeoutMs)
+{
+ if (!NS_IsMainThread()) {
+ return NS_ERROR_NOT_AVAILABLE;
+ }
+
+ nsCOMArray<nsIThread> threads;
+ nsCOMPtr<nsIThreadPoolListener> listener;
+ {
+ MutexAutoLock lock(mMutex);
+ mShutdown = true;
+ mEventsAvailable.NotifyAll();
+
+ threads.AppendObjects(mThreads);
+ mThreads.Clear();
+
+ // Swap in a null listener so that we release the listener at the end of
+ // this method. The listener will be kept alive as long as the other threads
+ // that were created when it was set.
+ mListener.swap(listener);
+ }
+
+ // IMPORTANT! Never dereference these pointers, as the objects may go away at
+ // any time. We just use the pointers values for comparison, to check if the
+ // thread has been shut down or not.
+ nsTArray<nsThreadShutdownContext*> contexts;
+
+ // It's important that we shutdown the threads while outside the event queue
+ // monitor. Otherwise, we could end up dead-locking.
+ for (int32_t i = 0; i < threads.Count(); ++i) {
+ // Shutdown async
+ nsThreadShutdownContext* maybeContext =
+ static_cast<nsThread*>(threads[i])->ShutdownInternal(false);
+ contexts.AppendElement(maybeContext);
+ }
+
+ NotNull<nsThread*> currentThread =
+ WrapNotNull(nsThreadManager::get().GetCurrentThread());
+
+ // We spin the event loop until all of the threads in the thread pool
+ // have shut down, or the timeout expires.
+ SpinMTEventLoopUntil([&]() {
+ for (nsIThread* thread : threads) {
+ if (static_cast<nsThread*>(thread)->mThread) {
+ return false;
+ }
+ }
+ return true;
+ }, currentThread, TimeDuration::FromMilliseconds(aTimeoutMs));
+
+ // For any threads that have not shutdown yet, we need to remove them from
+ // mRequestedShutdownContexts so the thread manager does not wait for them
+ // at shutdown.
+ for (int32_t i = 0; i < threads.Count(); ++i) {
+ nsThread* thread = static_cast<nsThread*>(threads[i]);
+ // If mThread is not null on the thread it means that it hasn't shutdown
+ // context[i] corresponds to thread[i]
+ if (thread->mThread && contexts[i]) {
+ currentThread->mRequestedShutdownContexts.RemoveElement(contexts[i]);
+ }
+ }
+
+ return NS_OK;
+}
+
NS_IMETHODIMP
nsThreadPool::GetThreadLimit(uint32_t* aValue)
{
*aValue = mThreadLimit;
return NS_OK;
}
NS_IMETHODIMP