Bug 1302914 - Use GetThreadContext after calling SuspendThread to ensure threads are really suspended. r=froydnj
authorJan de Mooij <jdemooij@mozilla.com>
Thu, 29 Sep 2016 20:05:36 +0200
changeset 315868 02428eab5922de5d7ef07cf7ac4f2035bda417d5
parent 315867 2b788911d3e2e946a8f17a79f6cfe28821ffa933
child 315869 cce196d1d7b48958abcb1b23bc3afce6f223f074
push id20634
push usercbook@mozilla.com
push dateFri, 30 Sep 2016 10:10:13 +0000
treeherderfx-team@afe79b010d13 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersfroydnj
bugs1302914
milestone52.0a1
Bug 1302914 - Use GetThreadContext after calling SuspendThread to ensure threads are really suspended. r=froydnj
tools/profiler/core/platform-win32.cc
xpcom/threads/HangMonitor.cpp
xpcom/threads/ThreadStackHelper.cpp
--- a/tools/profiler/core/platform-win32.cc
+++ b/tools/profiler/core/platform-win32.cc
@@ -188,16 +188,32 @@ class SamplerThread : public Thread {
 
     // Unique Set Size is not supported on Windows.
     sample->ussMemory = 0;
 
     static const DWORD kSuspendFailed = static_cast<DWORD>(-1);
     if (SuspendThread(profiled_thread) == kSuspendFailed)
       return;
 
+    // SuspendThread is asynchronous, so the thread may still be running.
+    // Call GetThreadContext first to ensure the thread is really suspended.
+    // See https://blogs.msdn.microsoft.com/oldnewthing/20150205-00/?p=44743.
+
+    // Using only CONTEXT_CONTROL is faster but on 64-bit it causes crashes in
+    // RtlVirtualUnwind (see bug 1120126) so we set all the flags.
+#if V8_HOST_ARCH_X64
+    context.ContextFlags = CONTEXT_FULL;
+#else
+    context.ContextFlags = CONTEXT_CONTROL;
+#endif
+    if (!GetThreadContext(profiled_thread, &context)) {
+      ResumeThread(profiled_thread);
+      return;
+    }
+
 #ifdef MOZ_STACKWALKING
     // Threads that may invoke JS require extra attention. Since, on windows,
     // the jits also need to modify the same dynamic function table that we need
     // to get a stack trace, we have to be wary of that to avoid deadlock.
     //
     // When embedded in Gecko, for threads that aren't the main thread,
     // CanInvokeJS consults an unlocked value in the nsIThread, so we must
     // consult this after suspending the profiled thread to avoid racing
@@ -213,36 +229,29 @@ class SamplerThread : public Thread {
       // If the profiled thread had held those resources, the trylock would have
       // failed. Anyone else who grabs those resources will continue to make
       // progress, since those threads are not suspended. Because of this,
       // we cannot deadlock with them, and should let them run as they please.
       ReleaseStackWalkWorkaroundLock();
     }
 #endif
 
-    // Using only CONTEXT_CONTROL is faster but on 64-bit it causes crashes in
-    // RtlVirtualUnwind (see bug 1120126) so we set all the flags.
 #if V8_HOST_ARCH_X64
-    context.ContextFlags = CONTEXT_FULL;
+    sample->pc = reinterpret_cast<Address>(context.Rip);
+    sample->sp = reinterpret_cast<Address>(context.Rsp);
+    sample->fp = reinterpret_cast<Address>(context.Rbp);
 #else
-    context.ContextFlags = CONTEXT_CONTROL;
+    sample->pc = reinterpret_cast<Address>(context.Eip);
+    sample->sp = reinterpret_cast<Address>(context.Esp);
+    sample->fp = reinterpret_cast<Address>(context.Ebp);
 #endif
-    if (GetThreadContext(profiled_thread, &context) != 0) {
-#if V8_HOST_ARCH_X64
-      sample->pc = reinterpret_cast<Address>(context.Rip);
-      sample->sp = reinterpret_cast<Address>(context.Rsp);
-      sample->fp = reinterpret_cast<Address>(context.Rbp);
-#else
-      sample->pc = reinterpret_cast<Address>(context.Eip);
-      sample->sp = reinterpret_cast<Address>(context.Esp);
-      sample->fp = reinterpret_cast<Address>(context.Ebp);
-#endif
-      sample->context = &context;
-      sampler->Tick(sample);
-    }
+
+    sample->context = &context;
+    sampler->Tick(sample);
+
     ResumeThread(profiled_thread);
   }
 
   Sampler* sampler_;
   int interval_; // units: ms
 
   // Protects the process wide state below.
   static SamplerThread* instance_;
--- a/xpcom/threads/HangMonitor.cpp
+++ b/xpcom/threads/HangMonitor.cpp
@@ -153,23 +153,39 @@ GetChromeHangReport(Telemetry::Processed
   // Workaround possible deadlock where the main thread is running a
   // long-standing JS job, and happens to be in the JIT allocator when we
   // suspend it. Since, on win 64, this requires holding a process lock that
   // MozStackWalk requires, take this "workaround lock" to avoid deadlock.
 #ifdef _WIN64
   AcquireStackWalkWorkaroundLock();
 #endif
   DWORD ret = ::SuspendThread(winMainThreadHandle);
+  bool suspended = false;
+  if (ret != -1) {
+    // SuspendThread is asynchronous, so the thread may still be running. Use
+    // GetThreadContext to ensure it's really suspended.
+    // See https://blogs.msdn.microsoft.com/oldnewthing/20150205-00/?p=44743.
+    CONTEXT context;
+    context.ContextFlags = CONTEXT_CONTROL;
+    if (::GetThreadContext(winMainThreadHandle, &context)) {
+      suspended = true;
+    }
+  }
+
 #ifdef _WIN64
   ReleaseStackWalkWorkaroundLock();
 #endif
 
-  if (ret == -1) {
+  if (!suspended) {
+    if (ret != -1) {
+      MOZ_ALWAYS_TRUE(::ResumeThread(winMainThreadHandle) != DWORD(-1));
+    }
     return;
   }
+
   MozStackWalk(ChromeStackWalker, /* skipFrames */ 0, /* maxFrames */ 0,
                reinterpret_cast<void*>(&rawStack),
                reinterpret_cast<uintptr_t>(winMainThreadHandle), nullptr);
   ret = ::ResumeThread(winMainThreadHandle);
   if (ret == -1) {
     return;
   }
   aStack = Telemetry::GetStackAndModules(rawStack);
--- a/xpcom/threads/ThreadStackHelper.cpp
+++ b/xpcom/threads/ThreadStackHelper.cpp
@@ -242,18 +242,25 @@ ThreadStackHelper::GetStack(Stack& aStac
     MOZ_ASSERT(false);
     return;
   }
   if (::SuspendThread(mThreadID) == DWORD(-1)) {
     MOZ_ASSERT(false);
     return;
   }
 
-  FillStackBuffer();
-  FillThreadContext();
+  // SuspendThread is asynchronous, so the thread may still be running. Use
+  // GetThreadContext to ensure it's really suspended.
+  // See https://blogs.msdn.microsoft.com/oldnewthing/20150205-00/?p=44743.
+  CONTEXT context;
+  context.ContextFlags = CONTEXT_CONTROL;
+  if (::GetThreadContext(mThreadID, &context)) {
+    FillStackBuffer();
+    FillThreadContext();
+  }
 
   MOZ_ALWAYS_TRUE(::ResumeThread(mThreadID) != DWORD(-1));
 
 #elif defined(XP_MACOSX)
 # if defined(MOZ_VALGRIND) && defined(RUNNING_ON_VALGRIND)
   if (RUNNING_ON_VALGRIND) {
     /* thread_suspend and thread_resume sometimes hang runs on Valgrind,
        for unknown reasons.  So, just avoid them.  See bug 1100911. */