Bug 1294032 - Fix poll hang - on shutdown hang in poll post event multiple times. r=mcmanus
authorDragana Damjanovic <dd.mozilla@gmail.com>
Tue, 13 Sep 2016 04:43:00 -0400
changeset 357253 4bba6c0a00db9aae2ff285df40b547575a3f997c
parent 357252 be466c64f9c0874ba95432d0841cbd1887ea4fb5
child 357254 d434f479d145930cfd7b156a6a78586408defc3c
push id1324
push usermtabara@mozilla.com
push dateMon, 16 Jan 2017 13:07:44 +0000
treeherdermozilla-release@a01c49833940 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersmcmanus
bugs1294032
milestone51.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1294032 - Fix poll hang - on shutdown hang in poll post event multiple times. r=mcmanus
netwerk/base/nsSocketTransportService2.cpp
netwerk/base/nsSocketTransportService2.h
--- a/netwerk/base/nsSocketTransportService2.cpp
+++ b/netwerk/base/nsSocketTransportService2.cpp
@@ -49,16 +49,18 @@ Atomic<PRThread*, Relaxed> gSocketThread
 #define KEEPALIVE_PROBE_COUNT_PREF "network.tcp.keepalive.probe_count"
 #define SOCKET_LIMIT_TARGET 1000U
 #define SOCKET_LIMIT_MIN      50U
 #define BLIP_INTERVAL_PREF "network.activity.blipIntervalMilliseconds"
 #define MAX_TIME_BETWEEN_TWO_POLLS "network.sts.max_time_for_events_between_two_polls"
 #define TELEMETRY_PREF "toolkit.telemetry.enabled"
 #define MAX_TIME_FOR_PR_CLOSE_DURING_SHUTDOWN "network.sts.max_time_for_pr_close_during_shutdown"
 
+#define REPAIR_POLLABLE_EVENT_TIME 10
+
 uint32_t nsSocketTransportService::gMaxCount;
 PRCallOnceType nsSocketTransportService::gMaxCountInitOnce;
 
 //-----------------------------------------------------------------------------
 // ctor/dtor (called on the main/UI thread by the service manager)
 
 nsSocketTransportService::nsSocketTransportService()
     : mThread(nullptr)
@@ -80,16 +82,19 @@ nsSocketTransportService::nsSocketTransp
     , mKeepaliveProbeCount(kDefaultTCPKeepCount)
     , mKeepaliveEnabledPref(false)
     , mServingPendingQueue(false)
     , mMaxTimePerPollIter(100)
     , mTelemetryEnabledPref(false)
     , mMaxTimeForPrClosePref(PR_SecondsToInterval(5))
     , mSleepPhase(false)
     , mProbedMaxCount(false)
+#if defined(XP_WIN)
+    , mPolling(false)
+#endif
 {
     NS_ASSERTION(NS_IsMainThread(), "wrong thread");
 
     PR_CallOnce(&gMaxCountInitOnce, DiscoverMaxCount);
     mActiveList = (SocketContext *)
         moz_xmalloc(sizeof(SocketContext) * mActiveListSize);
     mIdleList = (SocketContext *)
         moz_xmalloc(sizeof(SocketContext) * mIdleListSize);
@@ -756,16 +761,23 @@ nsSocketTransportService::OnDispatchedEv
     if (PR_GetCurrentThread() == gSocketThread) {
         // this check is redundant to one done inside ::Signal(), but
         // we can do it here and skip obtaining the lock - given that
         // this is a relatively common occurance its worth the
         // redundant code
         SOCKET_LOG(("OnDispatchedEvent Same Thread Skip Signal\n"));
         return NS_OK;
     }
+#else
+    if (gIOService->IsNetTearingDown()) {
+        // Poll can hang sometimes. If we are in shutdown, we are going to
+        // start a watchdog. If we do not exit poll within
+        // REPAIR_POLLABLE_EVENT_TIME signal a pollable event again.
+        StartPollWatchdog();
+    }
 #endif
 
     MutexAutoLock lock(mLock);
     if (mPollableEvent) {
         mPollableEvent->Signal();
     }
     return NS_OK;
 }
@@ -1063,17 +1075,23 @@ nsSocketTransportService::DoPollIteratio
 #endif
 
     // Measures seconds spent while blocked on PR_Poll
     uint32_t pollInterval = 0;
     int32_t n = 0;
     *pollDuration = 0;
     if (!gIOService->IsNetTearingDown()) {
         // Let's not do polling during shutdown.
+#if defined(XP_WIN)
+        StartPolling();
+#endif
         n = Poll(&pollInterval, pollDuration);
+#if defined(XP_WIN)
+        EndPolling();
+#endif
     }
 
     if (n < 0) {
         SOCKET_LOG(("  PR_Poll error [%d] os error [%d]\n", PR_GetError(),
                     PR_GetOSError()));
     }
     else {
         //
@@ -1304,16 +1322,23 @@ nsSocketTransportService::Observe(nsISup
     }
 
     if (!strcmp(topic, NS_TIMER_CALLBACK_TOPIC)) {
         nsCOMPtr<nsITimer> timer = do_QueryInterface(subject);
         if (timer == mAfterWakeUpTimer) {
             mAfterWakeUpTimer = nullptr;
             mSleepPhase = false;
         }
+
+#if defined(XP_WIN)
+        if (timer == mPollRepairTimer) {
+            DoPollRepair();
+        }
+#endif
+
     } else if (!strcmp(topic, NS_WIDGET_SLEEP_OBSERVER_TOPIC)) {
         mSleepPhase = true;
         if (mAfterWakeUpTimer) {
             mAfterWakeUpTimer->Cancel();
             mAfterWakeUpTimer = nullptr;
         }
     } else if (!strcmp(topic, NS_WIDGET_WAKE_OBSERVER_TOPIC)) {
         if (mSleepPhase && !mAfterWakeUpTimer) {
@@ -1526,10 +1551,56 @@ nsSocketTransportService::GetSocketConne
 {
     NS_ASSERTION(PR_GetCurrentThread() == gSocketThread, "wrong thread");
     for (uint32_t i = 0; i < mActiveCount; i++)
         AnalyzeConnection(data, &mActiveList[i], true);
     for (uint32_t i = 0; i < mIdleCount; i++)
         AnalyzeConnection(data, &mIdleList[i], false);
 }
 
+#if defined(XP_WIN)
+void
+nsSocketTransportService::StartPollWatchdog()
+{
+    MutexAutoLock lock(mLock);
+
+    // Poll can hang sometimes. If we are in shutdown, we are going to start a
+    // watchdog. If we do not exit poll within REPAIR_POLLABLE_EVENT_TIME
+    // signal a pollable event again.
+    MOZ_ASSERT(gIOService->IsNetTearingDown());
+    if (mPolling && !mPollRepairTimer) {
+        mPollRepairTimer = do_CreateInstance(NS_TIMER_CONTRACTID);
+        mPollRepairTimer->Init(this, REPAIR_POLLABLE_EVENT_TIME,
+                               nsITimer::TYPE_REPEATING_SLACK);
+    }
+}
+
+void
+nsSocketTransportService::DoPollRepair()
+{
+    MutexAutoLock lock(mLock);
+    if (mPolling && mPollableEvent) {
+        mPollableEvent->Signal();
+    } else if (mPollRepairTimer) {
+        mPollRepairTimer->Cancel();
+    }
+}
+
+void
+nsSocketTransportService::StartPolling()
+{
+    MutexAutoLock lock(mLock);
+    mPolling = true;
+}
+
+void
+nsSocketTransportService::EndPolling()
+{
+    MutexAutoLock lock(mLock);
+    mPolling = false;
+    if (mPollRepairTimer) {
+        mPollRepairTimer->Cancel();
+    }
+}
+#endif
+
 } // namespace net
 } // namespace mozilla
--- a/netwerk/base/nsSocketTransportService2.h
+++ b/netwerk/base/nsSocketTransportService2.h
@@ -255,16 +255,25 @@ private:
                            SocketContext *context, bool aActive);
 
     void ClosePrivateConnections();
     void DetachSocketWithGuard(bool aGuardLocals,
                                SocketContext *socketList,
                                int32_t index);
 
     void MarkTheLastElementOfPendingQueue();
+
+#if defined(XP_WIN)
+    Atomic<bool> mPolling;
+    nsCOMPtr<nsITimer> mPollRepairTimer;
+    void StartPollWatchdog();
+    void DoPollRepair();
+    void StartPolling();
+    void EndPolling();
+#endif
 };
 
 extern nsSocketTransportService *gSocketTransportService;
 extern Atomic<PRThread*, Relaxed> gSocketThread;
 
 } // namespace net
 } // namespace mozilla