Bug 1359507: Replace the stack walk workaround lock with an atomic counter of suppressions. r=mstange,froydnj
authorDavid Major <dmajor@mozilla.com>
Wed, 03 May 2017 12:10:48 -0400
changeset 356300 ab0c15933c749f9c8cac1b7f774bbe0cc5addfab
parent 356299 c291fe4687abed22476ea08d2c062a536ce5a28c
child 356301 fbad88a61127ff137f22d004d6cc0002397b7328
push id31760
push userkwierso@gmail.com
push dateWed, 03 May 2017 20:42:27 +0000
treeherdermozilla-central@b25ad0674afd [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersmstange, froydnj
bugs1359507
milestone55.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1359507: Replace the stack walk workaround lock with an atomic counter of suppressions. r=mstange,froydnj This fixes a deadlock by removing one of the two sides of a mutual-wait.
js/src/jit/ProcessExecutableMemory.cpp
mozglue/build/WindowsDllBlocklist.cpp
mozglue/misc/StackWalk.cpp
mozglue/misc/StackWalk_windows.h
--- a/js/src/jit/ProcessExecutableMemory.cpp
+++ b/js/src/jit/ProcessExecutableMemory.cpp
@@ -155,38 +155,30 @@ RegisterExecutableMemory(void* p, size_t
 
     DWORD oldProtect;
     if (!VirtualProtect(p, pageSize, PAGE_EXECUTE_READ, &oldProtect))
         MOZ_CRASH();
 
     // XXX NB: The profiler believes this function is only called from the main
     // thread. If that ever becomes untrue, the profiler must be updated
     // immediately.
-    AcquireStackWalkWorkaroundLock();
-
-    bool success = RtlAddFunctionTable(&r->runtimeFunction, 1, reinterpret_cast<DWORD64>(p));
-
-    ReleaseStackWalkWorkaroundLock();
-
-    return success;
+    AutoSuppressStackWalking suppress;
+    return RtlAddFunctionTable(&r->runtimeFunction, 1, reinterpret_cast<DWORD64>(p));
 }
 
 static void
 UnregisterExecutableMemory(void* p, size_t bytes, size_t pageSize)
 {
     ExceptionHandlerRecord* r = reinterpret_cast<ExceptionHandlerRecord*>(p);
 
     // XXX NB: The profiler believes this function is only called from the main
     // thread. If that ever becomes untrue, the profiler must be updated
     // immediately.
-    AcquireStackWalkWorkaroundLock();
-
+    AutoSuppressStackWalking suppress;
     RtlDeleteFunctionTable(&r->runtimeFunction);
-
-    ReleaseStackWalkWorkaroundLock();
 }
 # endif
 
 static void*
 ReserveProcessExecutableMemory(size_t bytes)
 {
 # ifdef HAVE_64BIT_BUILD
     size_t pageSize = gc::SystemPageSize();
--- a/mozglue/build/WindowsDllBlocklist.cpp
+++ b/mozglue/build/WindowsDllBlocklist.cpp
@@ -704,23 +704,23 @@ patched_LdrLoadDll (PWCHAR filePath, PUL
     }
   }
 
 continue_loading:
 #ifdef DEBUG_very_verbose
   printf_stderr("LdrLoadDll: continuing load... ('%S')\n", moduleFileName->Buffer);
 #endif
 
+#ifdef _M_AMD64
   // Prevent the stack walker from suspending this thread when LdrLoadDll
   // holds the RtlLookupFunctionEntry lock.
-  AcquireStackWalkWorkaroundLock();
-  NTSTATUS ret = stub_LdrLoadDll(filePath, flags, moduleFileName, handle);
-  ReleaseStackWalkWorkaroundLock();
+  AutoSuppressStackWalking suppress;
+#endif
 
-  return ret;
+  return stub_LdrLoadDll(filePath, flags, moduleFileName, handle);
 }
 
 WindowsDllInterceptor NtDllIntercept;
 
 } // namespace
 
 MFBT_API void
 DllBlocklist_Initialize()
--- a/mozglue/misc/StackWalk.cpp
+++ b/mozglue/misc/StackWalk.cpp
@@ -4,17 +4,16 @@
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 /* API for getting a stack trace of the C/C++ stack on the current thread */
 
 #include "mozilla/ArrayUtils.h"
 #include "mozilla/Assertions.h"
 #include "mozilla/IntegerPrintfMacros.h"
-#include "mozilla/ScopeExit.h"
 #include "mozilla/StackWalk.h"
 
 #include <string.h>
 
 using namespace mozilla;
 
 // The presence of this address is the stack must stop the stack walk. If
 // there is no such address, the structure will be {nullptr, true}.
@@ -184,16 +183,17 @@ StackWalkInitCriticalAddress()
 
 #if defined(_WIN32) && (defined(_M_IX86) || defined(_M_AMD64) || defined(_M_IA64)) // WIN32 x86 stack walking code
 
 #include <windows.h>
 #include <process.h>
 #include <stdio.h>
 #include <malloc.h>
 #include "mozilla/ArrayUtils.h"
+#include "mozilla/Atomics.h"
 #include "mozilla/StackWalk_windows.h"
 
 #include <imagehlp.h>
 // We need a way to know if we are building for WXP (or later), as if we are, we
 // need to use the newer 64-bit APIs. API_VERSION_NUMBER seems to fit the bill.
 // A value of 9 indicates we want to use the new APIs.
 #if API_VERSION_NUMBER < 9
 #error Too old imagehlp.h
@@ -218,19 +218,70 @@ struct WalkStackData
   uint32_t sp_count;
   void* platformData;
 };
 
 DWORD gStackWalkThread;
 CRITICAL_SECTION gDbgHelpCS;
 
 #ifdef _M_AMD64
+// Because various Win64 APIs acquire function-table locks, we need a way of
+// preventing stack walking while those APIs are being called. Otherwise, the
+// stack walker may suspend a thread holding such a lock, and deadlock when the
+// stack unwind code attempts to wait for that lock.
+//
+// We're using an atomic counter rather than a critical section because we
+// don't require mutual exclusion with the stack walker. If the stack walker
+// determines that it's safe to start unwinding the suspended thread (i.e.
+// there are no suppressions when the unwind begins), then it's safe to
+// continue unwinding that thread even if other threads request suppressions
+// in the meantime, because we can't deadlock with those other threads.
+//
+// XXX: This global variable is a larger-than-necessary hammer. A more scoped
+// solution would be to maintain a counter per thread, but then it would be
+// more difficult for WalkStackMain64 to read the suspended thread's counter.
+static Atomic<size_t> sStackWalkSuppressions;
+
+MFBT_API
+AutoSuppressStackWalking::AutoSuppressStackWalking()
+{
+  ++sStackWalkSuppressions;
+}
+
+MFBT_API
+AutoSuppressStackWalking::~AutoSuppressStackWalking()
+{
+  --sStackWalkSuppressions;
+}
+
 static uint8_t* sJitCodeRegionStart;
 static size_t sJitCodeRegionSize;
-#endif
+
+MFBT_API void
+RegisterJitCodeRegion(uint8_t* aStart, size_t aSize)
+{
+  // Currently we can only handle one JIT code region at a time
+  MOZ_RELEASE_ASSERT(!sJitCodeRegionStart);
+
+  sJitCodeRegionStart = aStart;
+  sJitCodeRegionSize = aSize;
+}
+
+MFBT_API void
+UnregisterJitCodeRegion(uint8_t* aStart, size_t aSize)
+{
+  // Currently we can only handle one JIT code region at a time
+  MOZ_RELEASE_ASSERT(sJitCodeRegionStart &&
+                     sJitCodeRegionStart == aStart &&
+                     sJitCodeRegionSize == aSize);
+
+  sJitCodeRegionStart = nullptr;
+  sJitCodeRegionSize = 0;
+}
+#endif // _M_AMD64
 
 // Routine to print an error message to standard error.
 static void
 PrintError(const char* aPrefix)
 {
   LPSTR lpMsgBuf;
   DWORD lastErr = GetLastError();
   FormatMessageA(
@@ -349,27 +400,29 @@ WalkStackMain64(struct WalkStackData* aD
 #endif
   frame64.AddrPC.Mode      = AddrModeFlat;
   frame64.AddrStack.Mode   = AddrModeFlat;
   frame64.AddrFrame.Mode   = AddrModeFlat;
   frame64.AddrReturn.Mode  = AddrModeFlat;
 #endif
 
 #ifdef _M_AMD64
-  // Workaround possible deadlock where the thread we're profiling happens to
-  // be in RtlLookupFunctionEntry (see below) or in RtlAddFunctionTable or
-  // RtlDeleteFunctionTable when starting or shutting down the JS engine.
-  // On Win64 each of these Rtl* functions will take a lock, so we need to make
-  // sure we don't deadlock when a suspended thread is holding it.
-  if (!TryAcquireStackWalkWorkaroundLock()) {
+  // If there are any active suppressions, then at least one thread (we don't
+  // know which) is holding a lock that can deadlock RtlVirtualUnwind. Since
+  // that thread may be the one that we're trying to unwind, we can't proceed.
+  //
+  // But if there are no suppressions, then our target thread can't be holding
+  // a lock, and it's safe to proceed. By virtue of being suspended, the target
+  // thread can't acquire any new locks during the unwind process, so we only
+  // need to do this check once. After that, sStackWalkSuppressions can be
+  // changed by other threads while we're unwinding, and that's fine because
+  // we can't deadlock with those threads.
+  if (sStackWalkSuppressions) {
     return;
   }
-  auto releaseLock = mozilla::MakeScopeExit([] {
-    ReleaseStackWalkWorkaroundLock();
-  });
 
   bool firstFrame = true;
 
   // In the loop below we'll need to special-case stack frames in this DLL.
   HMODULE msmpeg2vdec = GetModuleHandleW(L"msmpeg2vdec.dll");
 #endif
 
   // Skip our own stack walking frames.
@@ -495,86 +548,16 @@ WalkStackMain64(struct WalkStackData* aD
 #if defined(_M_IX86) || defined(_M_IA64)
     if (frame64.AddrReturn.Offset == 0) {
       break;
     }
 #endif
   }
 }
 
-// The JIT needs to allocate executable memory. Because of the inanity of
-// the win64 APIs, this requires locks that stalk walkers also need. Provide
-// another lock to allow synchronization around these resources.
-#ifdef _M_AMD64
-
-struct CriticalSectionAutoInitializer {
-    CRITICAL_SECTION lock;
-
-    CriticalSectionAutoInitializer() {
-      InitializeCriticalSection(&lock);
-    }
-};
-
-static CriticalSectionAutoInitializer gWorkaroundLock;
-
-#endif // _M_AMD64
-
-MFBT_API void
-AcquireStackWalkWorkaroundLock()
-{
-#ifdef _M_AMD64
-  EnterCriticalSection(&gWorkaroundLock.lock);
-#endif
-}
-
-MFBT_API bool
-TryAcquireStackWalkWorkaroundLock()
-{
-#ifdef _M_AMD64
-  return TryEnterCriticalSection(&gWorkaroundLock.lock);
-#else
-  return true;
-#endif
-}
-
-MFBT_API void
-ReleaseStackWalkWorkaroundLock()
-{
-#ifdef _M_AMD64
-  LeaveCriticalSection(&gWorkaroundLock.lock);
-#endif
-}
-
-MFBT_API void
-RegisterJitCodeRegion(uint8_t* aStart, size_t aSize)
-{
-#ifdef _M_AMD64
-  // Currently we can only handle one JIT code region at a time
-  MOZ_RELEASE_ASSERT(!sJitCodeRegionStart);
-
-  sJitCodeRegionStart = aStart;
-  sJitCodeRegionSize = aSize;
-#endif
-}
-
-MFBT_API void
-UnregisterJitCodeRegion(uint8_t* aStart, size_t aSize)
-{
-#ifdef _M_AMD64
-  // Currently we can only handle one JIT code region at a time
-  MOZ_RELEASE_ASSERT(sJitCodeRegionStart &&
-                     sJitCodeRegionStart == aStart &&
-                     sJitCodeRegionSize == aSize);
-
-  sJitCodeRegionStart = nullptr;
-  sJitCodeRegionSize = 0;
-#endif
-}
-
-
 static unsigned int WINAPI
 WalkStackThread(void* aData)
 {
   BOOL msgRet;
   MSG msg;
 
   // Call PeekMessage to force creation of a message queue so that
   // other threads can safely post events to us.
--- a/mozglue/misc/StackWalk_windows.h
+++ b/mozglue/misc/StackWalk_windows.h
@@ -1,27 +1,26 @@
 #ifndef mozilla_StackWalk_windows_h
 #define mozilla_StackWalk_windows_h
 
 #include "mozilla/Types.h"
 
+#ifdef _M_AMD64
 /**
  * Allow stack walkers to work around the egregious win64 dynamic lookup table
  * list API by locking around SuspendThread to avoid deadlock.
  *
  * See comment in StackWalk.cpp
  */
-MFBT_API void
-AcquireStackWalkWorkaroundLock();
-
-MFBT_API bool
-TryAcquireStackWalkWorkaroundLock();
-
-MFBT_API void
-ReleaseStackWalkWorkaroundLock();
+struct MOZ_RAII AutoSuppressStackWalking
+{
+  MFBT_API AutoSuppressStackWalking();
+  MFBT_API ~AutoSuppressStackWalking();
+};
 
 MFBT_API void
 RegisterJitCodeRegion(uint8_t* aStart, size_t size);
 
 MFBT_API void
 UnregisterJitCodeRegion(uint8_t* aStart, size_t size);
+#endif // _M_AMD64
 
 #endif // mozilla_StackWalk_windows_h