Bug 1294032 - Fix poll hang - on shutdown hang in poll post event multiple times. r=mcmanus, a=ritu
authorDragana Damjanovic <dd.mozilla@gmail.com>
Tue, 13 Sep 2016 04:43:00 -0400
changeset 350437 41b6179131cc835322f044a14476735c23baecd3
parent 350436 cf24eff35969aeb96c9a018ebaac87a5b4c56962
child 350438 14bd404a269a6721c68a802e708f1465c9576b2c
push id1230
push userjlund@mozilla.com
push dateMon, 31 Oct 2016 18:13:35 +0000
treeherdermozilla-release@5e06e3766db2 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersmcmanus, ritu
bugs1294032
milestone50.0
Bug 1294032 - Fix poll hang - on shutdown hang in poll post event multiple times. r=mcmanus, a=ritu
netwerk/base/nsSocketTransportService2.cpp
netwerk/base/nsSocketTransportService2.h
--- a/netwerk/base/nsSocketTransportService2.cpp
+++ b/netwerk/base/nsSocketTransportService2.cpp
@@ -49,16 +49,18 @@ Atomic<PRThread*, Relaxed> gSocketThread
 #define KEEPALIVE_PROBE_COUNT_PREF "network.tcp.keepalive.probe_count"
 #define SOCKET_LIMIT_TARGET 1000U
 #define SOCKET_LIMIT_MIN      50U
 #define BLIP_INTERVAL_PREF "network.activity.blipIntervalMilliseconds"
 #define MAX_TIME_BETWEEN_TWO_POLLS "network.sts.max_time_for_events_between_two_polls"
 #define TELEMETRY_PREF "toolkit.telemetry.enabled"
 #define MAX_TIME_FOR_PR_CLOSE_DURING_SHUTDOWN "network.sts.max_time_for_pr_close_during_shutdown"
 
+#define REPAIR_POLLABLE_EVENT_TIME 10
+
 uint32_t nsSocketTransportService::gMaxCount;
 PRCallOnceType nsSocketTransportService::gMaxCountInitOnce;
 
 //-----------------------------------------------------------------------------
 // ctor/dtor (called on the main/UI thread by the service manager)
 
 nsSocketTransportService::nsSocketTransportService()
     : mThread(nullptr)
@@ -80,16 +82,19 @@ nsSocketTransportService::nsSocketTransp
     , mKeepaliveProbeCount(kDefaultTCPKeepCount)
     , mKeepaliveEnabledPref(false)
     , mServingPendingQueue(false)
     , mMaxTimePerPollIter(100)
     , mTelemetryEnabledPref(false)
     , mMaxTimeForPrClosePref(PR_SecondsToInterval(5))
     , mSleepPhase(false)
     , mProbedMaxCount(false)
+#if defined(XP_WIN)
+    , mPolling(false)
+#endif
 {
     NS_ASSERTION(NS_IsMainThread(), "wrong thread");
 
     PR_CallOnce(&gMaxCountInitOnce, DiscoverMaxCount);
     mActiveList = (SocketContext *)
         moz_xmalloc(sizeof(SocketContext) * mActiveListSize);
     mIdleList = (SocketContext *)
         moz_xmalloc(sizeof(SocketContext) * mIdleListSize);
@@ -756,16 +761,23 @@ nsSocketTransportService::OnDispatchedEv
     if (PR_GetCurrentThread() == gSocketThread) {
         // this check is redundant to one done inside ::Signal(), but
         // we can do it here and skip obtaining the lock - given that
         // this is a relatively common occurance its worth the
         // redundant code
         SOCKET_LOG(("OnDispatchedEvent Same Thread Skip Signal\n"));
         return NS_OK;
     }
+#else
+    if (gIOService->IsNetTearingDown()) {
+        // Poll can hang sometimes. If we are in shutdown, we are going to
+        // start a watchdog. If we do not exit poll within
+        // REPAIR_POLLABLE_EVENT_TIME signal a pollable event again.
+        StartPollWatchdog();
+    }
 #endif
 
     MutexAutoLock lock(mLock);
     if (mPollableEvent) {
         mPollableEvent->Signal();
     }
     return NS_OK;
 }
@@ -1073,17 +1085,23 @@ nsSocketTransportService::DoPollIteratio
 #endif
 
     // Measures seconds spent while blocked on PR_Poll
     uint32_t pollInterval = 0;
     int32_t n = 0;
     *pollDuration = 0;
     if (!gIOService->IsNetTearingDown()) {
         // Let's not do polling during shutdown.
+#if defined(XP_WIN)
+        StartPolling();
+#endif
         n = Poll(&pollInterval, pollDuration);
+#if defined(XP_WIN)
+        EndPolling();
+#endif
     }
 
     if (n < 0) {
         SOCKET_LOG(("  PR_Poll error [%d] os error [%d]\n", PR_GetError(),
                     PR_GetOSError()));
     }
     else {
         //
@@ -1314,16 +1332,23 @@ nsSocketTransportService::Observe(nsISup
     }
 
     if (!strcmp(topic, NS_TIMER_CALLBACK_TOPIC)) {
         nsCOMPtr<nsITimer> timer = do_QueryInterface(subject);
         if (timer == mAfterWakeUpTimer) {
             mAfterWakeUpTimer = nullptr;
             mSleepPhase = false;
         }
+
+#if defined(XP_WIN)
+        if (timer == mPollRepairTimer) {
+            DoPollRepair();
+        }
+#endif
+
     } else if (!strcmp(topic, NS_WIDGET_SLEEP_OBSERVER_TOPIC)) {
         mSleepPhase = true;
         if (mAfterWakeUpTimer) {
             mAfterWakeUpTimer->Cancel();
             mAfterWakeUpTimer = nullptr;
         }
     } else if (!strcmp(topic, NS_WIDGET_WAKE_OBSERVER_TOPIC)) {
         if (mSleepPhase && !mAfterWakeUpTimer) {
@@ -1531,10 +1556,56 @@ nsSocketTransportService::GetSocketConne
 {
     NS_ASSERTION(PR_GetCurrentThread() == gSocketThread, "wrong thread");
     for (uint32_t i = 0; i < mActiveCount; i++)
         AnalyzeConnection(data, &mActiveList[i], true);
     for (uint32_t i = 0; i < mIdleCount; i++)
         AnalyzeConnection(data, &mIdleList[i], false);
 }
 
+#if defined(XP_WIN)
+void
+nsSocketTransportService::StartPollWatchdog()
+{
+    MutexAutoLock lock(mLock);
+
+    // Poll can hang sometimes. If we are in shutdown, we are going to start a
+    // watchdog. If we do not exit poll within REPAIR_POLLABLE_EVENT_TIME
+    // signal a pollable event again.
+    MOZ_ASSERT(gIOService->IsNetTearingDown());
+    if (mPolling && !mPollRepairTimer) {
+        mPollRepairTimer = do_CreateInstance(NS_TIMER_CONTRACTID);
+        mPollRepairTimer->Init(this, REPAIR_POLLABLE_EVENT_TIME,
+                               nsITimer::TYPE_REPEATING_SLACK);
+    }
+}
+
+void
+nsSocketTransportService::DoPollRepair()
+{
+    MutexAutoLock lock(mLock);
+    if (mPolling && mPollableEvent) {
+        mPollableEvent->Signal();
+    } else if (mPollRepairTimer) {
+        mPollRepairTimer->Cancel();
+    }
+}
+
+void
+nsSocketTransportService::StartPolling()
+{
+    MutexAutoLock lock(mLock);
+    mPolling = true;
+}
+
+void
+nsSocketTransportService::EndPolling()
+{
+    MutexAutoLock lock(mLock);
+    mPolling = false;
+    if (mPollRepairTimer) {
+        mPollRepairTimer->Cancel();
+    }
+}
+#endif
+
 } // namespace net
 } // namespace mozilla
--- a/netwerk/base/nsSocketTransportService2.h
+++ b/netwerk/base/nsSocketTransportService2.h
@@ -255,16 +255,25 @@ private:
                            SocketContext *context, bool aActive);
 
     void ClosePrivateConnections();
     void DetachSocketWithGuard(bool aGuardLocals,
                                SocketContext *socketList,
                                int32_t index);
 
     void MarkTheLastElementOfPendingQueue();
+
+#if defined(XP_WIN)
+    Atomic<bool> mPolling;
+    nsCOMPtr<nsITimer> mPollRepairTimer;
+    void StartPollWatchdog();
+    void DoPollRepair();
+    void StartPolling();
+    void EndPolling();
+#endif
 };
 
 extern nsSocketTransportService *gSocketTransportService;
 extern Atomic<PRThread*, Relaxed> gSocketThread;
 
 } // namespace net
 } // namespace mozilla