Bug 1337499 - Take the Win64 stackwalk lock in WalkStackMain64 to avoid deadlocks. r=mstange
authorJan de Mooij <jdemooij@mozilla.com>
Fri, 17 Feb 2017 10:51:11 +0100
changeset 372594 1ce8c513421edf003562a3fd57b3adfac91ea97a
parent 372496 ccda17e0776efd915eb6deb30b95d7b83c60703e
child 372595 91e8324552875e1e00cfec7064622571e7f39945
push id10863
push userjlorenzo@mozilla.com
push dateMon, 06 Mar 2017 23:02:23 +0000
treeherdermozilla-aurora@0931190cd725 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersmstange
bugs1337499
milestone54.0a1
Bug 1337499 - Take the Win64 stackwalk lock in WalkStackMain64 to avoid deadlocks. r=mstange
mozglue/misc/StackWalk.cpp
tools/profiler/core/platform-win32.cc
xpcom/threads/HangMonitor.cpp
--- a/mozglue/misc/StackWalk.cpp
+++ b/mozglue/misc/StackWalk.cpp
@@ -4,16 +4,17 @@
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 /* API for getting a stack trace of the C/C++ stack on the current thread */
 
 #include "mozilla/ArrayUtils.h"
 #include "mozilla/Assertions.h"
 #include "mozilla/IntegerPrintfMacros.h"
+#include "mozilla/ScopeExit.h"
 #include "mozilla/StackWalk.h"
 
 #include <string.h>
 
 using namespace mozilla;
 
 // The presence of this address is the stack must stop the stack walk. If
 // there is no such address, the structure will be {nullptr, true}.
@@ -332,16 +333,30 @@ WalkStackMain64(struct WalkStackData* aD
   frame64.AddrFrame.Offset = context.RsBSP;
 #endif
   frame64.AddrPC.Mode      = AddrModeFlat;
   frame64.AddrStack.Mode   = AddrModeFlat;
   frame64.AddrFrame.Mode   = AddrModeFlat;
   frame64.AddrReturn.Mode  = AddrModeFlat;
 #endif
 
+#ifdef _WIN64
+  // Workaround possible deadlock where the thread we're profiling happens to
+  // be in RtlLookupFunctionEntry (see below) or in RtlAddFunctionTable or
+  // RtlDeleteFunctionTable when starting or shutting down the JS engine.
+  // On Win64 each of these Rtl* functions will take a lock, so we need to make
+  // sure we don't deadlock when a suspended thread is holding it.
+  if (!TryAcquireStackWalkWorkaroundLock()) {
+    return;
+  }
+  auto releaseLock = mozilla::MakeScopeExit([] {
+    ReleaseStackWalkWorkaroundLock();
+  });
+#endif
+
   // Skip our own stack walking frames.
   int skip = (aData->walkCallingThread ? 3 : 0) + aData->skipFrames;
 
   // Now walk the stack.
   while (true) {
     DWORD64 addr;
     DWORD64 spaddr;
 
--- a/tools/profiler/core/platform-win32.cc
+++ b/tools/profiler/core/platform-win32.cc
@@ -29,19 +29,16 @@
 #include <windows.h>
 #include <mmsystem.h>
 #include <process.h>
 #include "ProfileEntry.h"
 
 // Memory profile
 #include "nsMemoryReporterManager.h"
 
-#include "mozilla/StackWalk_windows.h"
-
-
 class PlatformData {
  public:
   // Get a handle to the calling thread. This is the thread that we are
   // going to profile. We need to make a copy of the handle because we are
   // going to use it in the sampler thread. Using GetThreadHandle() will
   // not work in this case. We're using OpenThread because DuplicateHandle
   // for some reason doesn't work in Chrome's sandbox.
   explicit PlatformData(int aThreadId) : profiled_thread_(OpenThread(
@@ -233,39 +230,16 @@ class SamplerThread
 #else
     context.ContextFlags = CONTEXT_CONTROL;
 #endif
     if (!GetThreadContext(profiled_thread, &context)) {
       ResumeThread(profiled_thread);
       return;
     }
 
-    // Threads that may invoke JS require extra attention. Since, on windows,
-    // the jits also need to modify the same dynamic function table that we need
-    // to get a stack trace, we have to be wary of that to avoid deadlock.
-    //
-    // When embedded in Gecko, for threads that aren't the main thread,
-    // CanInvokeJS consults an unlocked value in the nsIThread, so we must
-    // consult this after suspending the profiled thread to avoid racing
-    // against a value change.
-    if (aThreadInfo->CanInvokeJS()) {
-      if (!TryAcquireStackWalkWorkaroundLock()) {
-        ResumeThread(profiled_thread);
-        return;
-      }
-
-      // It is safe to immediately drop the lock. We only need to contend with
-      // the case in which the profiled thread held needed system resources.
-      // If the profiled thread had held those resources, the trylock would have
-      // failed. Anyone else who grabs those resources will continue to make
-      // progress, since those threads are not suspended. Because of this,
-      // we cannot deadlock with them, and should let them run as they please.
-      ReleaseStackWalkWorkaroundLock();
-    }
-
 #if V8_HOST_ARCH_X64
     sample->pc = reinterpret_cast<Address>(context.Rip);
     sample->sp = reinterpret_cast<Address>(context.Rsp);
     sample->fp = reinterpret_cast<Address>(context.Rbp);
 #else
     sample->pc = reinterpret_cast<Address>(context.Eip);
     sample->sp = reinterpret_cast<Address>(context.Esp);
     sample->fp = reinterpret_cast<Address>(context.Ebp);
--- a/xpcom/threads/HangMonitor.cpp
+++ b/xpcom/threads/HangMonitor.cpp
@@ -11,19 +11,16 @@
 #include "mozilla/Monitor.h"
 #include "mozilla/Preferences.h"
 #include "mozilla/ProcessedStack.h"
 #include "mozilla/Telemetry.h"
 #include "mozilla/StaticPtr.h"
 #include "mozilla/UniquePtr.h"
 #include "nsReadableUtils.h"
 #include "mozilla/StackWalk.h"
-#ifdef _WIN64
-#include "mozilla/StackWalk_windows.h"
-#endif
 #include "nsThreadUtils.h"
 #include "nsXULAppAPI.h"
 #include "GeckoProfiler.h"
 
 #ifdef MOZ_CRASHREPORTER
 #include "nsExceptionHandler.h"
 #endif
 
@@ -146,40 +143,29 @@ GetChromeHangReport(Telemetry::Processed
 {
   MOZ_ASSERT(winMainThreadHandle);
 
   // The thread we're about to suspend might have the alloc lock
   // so allocate ahead of time
   std::vector<uintptr_t> rawStack;
   rawStack.reserve(MAX_CALL_STACK_PCS);
 
-  // Workaround possible deadlock where the main thread is running a
-  // long-standing JS job, and happens to be in the JIT allocator when we
-  // suspend it. Since, on win 64, this requires holding a process lock that
-  // MozStackWalk requires, take this "workaround lock" to avoid deadlock.
-#ifdef _WIN64
-  AcquireStackWalkWorkaroundLock();
-#endif
   DWORD ret = ::SuspendThread(winMainThreadHandle);
   bool suspended = false;
   if (ret != -1) {
     // SuspendThread is asynchronous, so the thread may still be running. Use
     // GetThreadContext to ensure it's really suspended.
     // See https://blogs.msdn.microsoft.com/oldnewthing/20150205-00/?p=44743.
     CONTEXT context;
     context.ContextFlags = CONTEXT_CONTROL;
     if (::GetThreadContext(winMainThreadHandle, &context)) {
       suspended = true;
     }
   }
 
-#ifdef _WIN64
-  ReleaseStackWalkWorkaroundLock();
-#endif
-
   if (!suspended) {
     if (ret != -1) {
       MOZ_ALWAYS_TRUE(::ResumeThread(winMainThreadHandle) != DWORD(-1));
     }
     return;
   }
 
   MozStackWalk(ChromeStackWalker, /* skipFrames */ 0, /* maxFrames */ 0,