Bug 1302914 - Use GetThreadContext after calling SuspendThread to ensure threads are really suspended. r=froydnj
authorJan de Mooij <jdemooij@mozilla.com>
Thu, 29 Sep 2016 20:05:36 +0200
changeset 315860 02428eab5922de5d7ef07cf7ac4f2035bda417d5
parent 315859 2b788911d3e2e946a8f17a79f6cfe28821ffa933
child 315861 cce196d1d7b48958abcb1b23bc3afce6f223f074
push id30757
push usercbook@mozilla.com
push dateFri, 30 Sep 2016 10:02:43 +0000
treeherdermozilla-central@5ffed033557e [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersfroydnj
bugs1302914
milestone52.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1302914 - Use GetThreadContext after calling SuspendThread to ensure threads are really suspended. r=froydnj
tools/profiler/core/platform-win32.cc
xpcom/threads/HangMonitor.cpp
xpcom/threads/ThreadStackHelper.cpp
--- a/tools/profiler/core/platform-win32.cc
+++ b/tools/profiler/core/platform-win32.cc
@@ -188,16 +188,32 @@ class SamplerThread : public Thread {
 
     // Unique Set Size is not supported on Windows.
     sample->ussMemory = 0;
 
     static const DWORD kSuspendFailed = static_cast<DWORD>(-1);
     if (SuspendThread(profiled_thread) == kSuspendFailed)
       return;
 
+    // SuspendThread is asynchronous, so the thread may still be running.
+    // Call GetThreadContext first to ensure the thread is really suspended.
+    // See https://blogs.msdn.microsoft.com/oldnewthing/20150205-00/?p=44743.
+
+    // Using only CONTEXT_CONTROL is faster but on 64-bit it causes crashes in
+    // RtlVirtualUnwind (see bug 1120126) so we set all the flags.
+#if V8_HOST_ARCH_X64
+    context.ContextFlags = CONTEXT_FULL;
+#else
+    context.ContextFlags = CONTEXT_CONTROL;
+#endif
+    if (!GetThreadContext(profiled_thread, &context)) {
+      ResumeThread(profiled_thread);
+      return;
+    }
+
 #ifdef MOZ_STACKWALKING
     // Threads that may invoke JS require extra attention. Since, on windows,
     // the jits also need to modify the same dynamic function table that we need
     // to get a stack trace, we have to be wary of that to avoid deadlock.
     //
     // When embedded in Gecko, for threads that aren't the main thread,
     // CanInvokeJS consults an unlocked value in the nsIThread, so we must
     // consult this after suspending the profiled thread to avoid racing
@@ -213,36 +229,29 @@ class SamplerThread : public Thread {
       // If the profiled thread had held those resources, the trylock would have
       // failed. Anyone else who grabs those resources will continue to make
       // progress, since those threads are not suspended. Because of this,
       // we cannot deadlock with them, and should let them run as they please.
       ReleaseStackWalkWorkaroundLock();
     }
 #endif
 
-    // Using only CONTEXT_CONTROL is faster but on 64-bit it causes crashes in
-    // RtlVirtualUnwind (see bug 1120126) so we set all the flags.
 #if V8_HOST_ARCH_X64
-    context.ContextFlags = CONTEXT_FULL;
+    sample->pc = reinterpret_cast<Address>(context.Rip);
+    sample->sp = reinterpret_cast<Address>(context.Rsp);
+    sample->fp = reinterpret_cast<Address>(context.Rbp);
 #else
-    context.ContextFlags = CONTEXT_CONTROL;
+    sample->pc = reinterpret_cast<Address>(context.Eip);
+    sample->sp = reinterpret_cast<Address>(context.Esp);
+    sample->fp = reinterpret_cast<Address>(context.Ebp);
 #endif
-    if (GetThreadContext(profiled_thread, &context) != 0) {
-#if V8_HOST_ARCH_X64
-      sample->pc = reinterpret_cast<Address>(context.Rip);
-      sample->sp = reinterpret_cast<Address>(context.Rsp);
-      sample->fp = reinterpret_cast<Address>(context.Rbp);
-#else
-      sample->pc = reinterpret_cast<Address>(context.Eip);
-      sample->sp = reinterpret_cast<Address>(context.Esp);
-      sample->fp = reinterpret_cast<Address>(context.Ebp);
-#endif
-      sample->context = &context;
-      sampler->Tick(sample);
-    }
+
+    sample->context = &context;
+    sampler->Tick(sample);
+
     ResumeThread(profiled_thread);
   }
 
   Sampler* sampler_;
   int interval_; // units: ms
 
   // Protects the process wide state below.
   static SamplerThread* instance_;
--- a/xpcom/threads/HangMonitor.cpp
+++ b/xpcom/threads/HangMonitor.cpp
@@ -153,23 +153,39 @@ GetChromeHangReport(Telemetry::Processed
   // Workaround possible deadlock where the main thread is running a
   // long-standing JS job, and happens to be in the JIT allocator when we
   // suspend it. Since, on win 64, this requires holding a process lock that
   // MozStackWalk requires, take this "workaround lock" to avoid deadlock.
 #ifdef _WIN64
   AcquireStackWalkWorkaroundLock();
 #endif
   DWORD ret = ::SuspendThread(winMainThreadHandle);
+  bool suspended = false;
+  if (ret != -1) {
+    // SuspendThread is asynchronous, so the thread may still be running. Use
+    // GetThreadContext to ensure it's really suspended.
+    // See https://blogs.msdn.microsoft.com/oldnewthing/20150205-00/?p=44743.
+    CONTEXT context;
+    context.ContextFlags = CONTEXT_CONTROL;
+    if (::GetThreadContext(winMainThreadHandle, &context)) {
+      suspended = true;
+    }
+  }
+
 #ifdef _WIN64
   ReleaseStackWalkWorkaroundLock();
 #endif
 
-  if (ret == -1) {
+  if (!suspended) {
+    if (ret != -1) {
+      MOZ_ALWAYS_TRUE(::ResumeThread(winMainThreadHandle) != DWORD(-1));
+    }
     return;
   }
+
   MozStackWalk(ChromeStackWalker, /* skipFrames */ 0, /* maxFrames */ 0,
                reinterpret_cast<void*>(&rawStack),
                reinterpret_cast<uintptr_t>(winMainThreadHandle), nullptr);
   ret = ::ResumeThread(winMainThreadHandle);
   if (ret == -1) {
     return;
   }
   aStack = Telemetry::GetStackAndModules(rawStack);
--- a/xpcom/threads/ThreadStackHelper.cpp
+++ b/xpcom/threads/ThreadStackHelper.cpp
@@ -242,18 +242,25 @@ ThreadStackHelper::GetStack(Stack& aStac
     MOZ_ASSERT(false);
     return;
   }
   if (::SuspendThread(mThreadID) == DWORD(-1)) {
     MOZ_ASSERT(false);
     return;
   }
 
-  FillStackBuffer();
-  FillThreadContext();
+  // SuspendThread is asynchronous, so the thread may still be running. Use
+  // GetThreadContext to ensure it's really suspended.
+  // See https://blogs.msdn.microsoft.com/oldnewthing/20150205-00/?p=44743.
+  CONTEXT context;
+  context.ContextFlags = CONTEXT_CONTROL;
+  if (::GetThreadContext(mThreadID, &context)) {
+    FillStackBuffer();
+    FillThreadContext();
+  }
 
   MOZ_ALWAYS_TRUE(::ResumeThread(mThreadID) != DWORD(-1));
 
 #elif defined(XP_MACOSX)
 # if defined(MOZ_VALGRIND) && defined(RUNNING_ON_VALGRIND)
   if (RUNNING_ON_VALGRIND) {
     /* thread_suspend and thread_resume sometimes hang runs on Valgrind,
        for unknown reasons.  So, just avoid them.  See bug 1100911. */