Backed out 4 changesets (bug 1357829) for build bustage in xpcshell\selftest.py on windows 8 x64 opt. a=backout
authorIris Hsiao <ihsiao@mozilla.com>
Fri, 19 May 2017 11:27:38 +0800
changeset 407153 8e98dab5054dd093a37ba20c62cf0523e484cfbd
parent 407152 183c3537157290d7ccef475651b6bc35db914924
child 407154 9c654bbab98cfb39cc2daa32a738f74a6c987670
child 407269 ec61b5fa7f2d4daa52d7e8ca40d7b019e55f79d2
child 407366 1667def6751501149dc69e7d9608cffdf3c19e7d
child 407469 d1d22e5531961946348c04e804e1208619182094
push id7391
push usermtabara@mozilla.com
push dateMon, 12 Jun 2017 13:08:53 +0000
treeherdermozilla-beta@2191d7f87e2e [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersbackout
bugs1357829
milestone55.0a1
backs out8ea202bb1103a693837e46374eb37ef9091ac15a
cebe4d7abeda54f0e14a6e9c400e3256aa3e4db0
378d473c961972a1d273f53fbbe151300384c8d7
86ebe868d443bd5da60d86844bd622ae1865ea92
first release with
nightly linux32
8e98dab5054d / 55.0a1 / 20170519133342 / files
nightly linux64
8e98dab5054d / 55.0a1 / 20170519133342 / files
nightly mac
8e98dab5054d / 55.0a1 / 20170519030205 / files
nightly win32
8e98dab5054d / 55.0a1 / 20170519030205 / files
nightly win64
8e98dab5054d / 55.0a1 / 20170519030205 / files
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
releases
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Backed out 4 changesets (bug 1357829) for build bustage in xpcshell\selftest.py on windows 8 x64 opt. a=backout Backed out changeset 8ea202bb1103 (bug 1357829) Backed out changeset cebe4d7abeda (bug 1357829) Backed out changeset 378d473c9619 (bug 1357829) Backed out changeset 86ebe868d443 (bug 1357829)
toolkit/components/telemetry/tests/unit/test_ThreadHangStats.js
toolkit/components/telemetry/tests/unit/xpcshell.ini
tools/profiler/core/platform-linux-android.cpp
tools/profiler/core/platform-macos.cpp
tools/profiler/core/platform-win32.cpp
tools/profiler/core/platform.cpp
tools/profiler/public/GeckoProfiler.h
xpcom/threads/BackgroundHangMonitor.cpp
xpcom/threads/ThreadStackHelper.cpp
xpcom/threads/ThreadStackHelper.h
--- a/toolkit/components/telemetry/tests/unit/test_ThreadHangStats.js
+++ b/toolkit/components/telemetry/tests/unit/test_ThreadHangStats.js
@@ -79,23 +79,26 @@ function run_test() {
 
       ok(Array.isArray(endHangs.hangs));
       notEqual(endHangs.hangs.length, 0);
 
       ok(Array.isArray(endHangs.hangs[0].stack));
       notEqual(endHangs.hangs[0].stack.length, 0);
       equal(typeof endHangs.hangs[0].stack[0], "string");
 
-      // Make sure one of the hangs is a permanent
-      // hang containing a native stack.
-      ok(endHangs.hangs.some((hang) => (
-        hang.nativeStack &&
-        Array.isArray(hang.nativeStack.memoryMap) &&
-        Array.isArray(hang.nativeStack.stacks)
-      )));
+      // Native stack gathering is only enabled on Windows x86.
+      if (mozinfo.os == "win" && mozinfo.bits == 32) {
+        // Make sure one of the hangs is a permanent
+        // hang containing a native stack.
+        ok(endHangs.hangs.some((hang) => (
+          hang.nativeStack &&
+          Array.isArray(hang.nativeStack.memoryMap) &&
+          Array.isArray(hang.nativeStack.stacks)
+        )));
+      }
 
       check_histogram(endHangs.hangs[0].histogram);
 
       do_test_finished();
     };
 
     check_results();
   });
--- a/toolkit/components/telemetry/tests/unit/xpcshell.ini
+++ b/toolkit/components/telemetry/tests/unit/xpcshell.ini
@@ -50,17 +50,16 @@ tags = addons
 [test_TelemetryControllerBuildID.js]
 [test_TelemetrySendOldPings.js]
 skip-if = os == "android" # Disabled due to intermittent orange on Android
 tags = addons
 [test_TelemetrySession.js]
 tags = addons
 [test_TelemetrySession_activeTicks.js]
 [test_ThreadHangStats.js]
-skip-if = os == "android" || os == "linux" # BHR is disabled on linux (bug 1365309)
 run-sequentially = Bug 1046307, test can fail intermittently when CPU load is high
 [test_TelemetrySend.js]
 [test_ChildHistograms.js]
 skip-if = os == "android" # Disabled due to crashes (see bug 1331366)
 tags = addons
 [test_ChildScalars.js]
 skip-if = os == "android" # Disabled due to crashes (see bug 1331366)
 [test_TelemetryReportingPolicy.js]
--- a/tools/profiler/core/platform-linux-android.cpp
+++ b/tools/profiler/core/platform-linux-android.cpp
@@ -125,17 +125,17 @@ public:
 
   ~PlatformData()
   {
     MOZ_COUNT_DTOR(PlatformData);
   }
 };
 
 ////////////////////////////////////////////////////////////////////////
-// BEGIN Sampler target specifics
+// BEGIN SamplerThread target specifics
 
 // The only way to reliably interrupt a Linux thread and inspect its register
 // and stack state is by sending a signal to it, and doing the work inside the
 // signal handler.  But we don't want to run much code inside the signal
 // handler, since POSIX severely restricts what we can do in signal handlers.
 // So we use a system of semaphores to suspend the thread and allow the
 // sampler thread to do all the work of unwinding and copying out whatever
 // data it wants.
@@ -194,70 +194,84 @@ struct SigHandlerCoordinator
   }
 
   sem_t mMessage2; // To sampler: "context is in sSigHandlerCoordinator"
   sem_t mMessage3; // To samplee: "resume"
   sem_t mMessage4; // To sampler: "finished with sSigHandlerCoordinator"
   ucontext_t mUContext; // Context at signal
 };
 
-struct SigHandlerCoordinator* Sampler::sSigHandlerCoordinator = nullptr;
+struct SigHandlerCoordinator* SamplerThread::sSigHandlerCoordinator = nullptr;
 
 static void
 SigprofHandler(int aSignal, siginfo_t* aInfo, void* aContext)
 {
   // Avoid TSan warning about clobbering errno.
   int savedErrno = errno;
 
   MOZ_ASSERT(aSignal == SIGPROF);
-  MOZ_ASSERT(Sampler::sSigHandlerCoordinator);
+  MOZ_ASSERT(SamplerThread::sSigHandlerCoordinator);
 
   // By sending us this signal, the sampler thread has sent us message 1 in
   // the comment above, with the meaning "|sSigHandlerCoordinator| is ready
   // for use, please copy your register context into it."
-  Sampler::sSigHandlerCoordinator->mUContext =
+  SamplerThread::sSigHandlerCoordinator->mUContext =
     *static_cast<ucontext_t*>(aContext);
 
   // Send message 2: tell the sampler thread that the context has been copied
   // into |sSigHandlerCoordinator->mUContext|.  sem_post can never fail by
   // being interrupted by a signal, so there's no loop around this call.
-  int r = sem_post(&Sampler::sSigHandlerCoordinator->mMessage2);
+  int r = sem_post(&SamplerThread::sSigHandlerCoordinator->mMessage2);
   MOZ_ASSERT(r == 0);
 
   // At this point, the sampler thread assumes we are suspended, so we must
   // not touch any global state here.
 
   // Wait for message 3: the sampler thread tells us to resume.
   while (true) {
-    r = sem_wait(&Sampler::sSigHandlerCoordinator->mMessage3);
+    r = sem_wait(&SamplerThread::sSigHandlerCoordinator->mMessage3);
     if (r == -1 && errno == EINTR) {
       // Interrupted by a signal.  Try again.
       continue;
     }
     // We don't expect any other kind of failure
     MOZ_ASSERT(r == 0);
     break;
   }
 
   // Send message 4: tell the sampler thread that we are finished accessing
   // |sSigHandlerCoordinator|.  After this point it is not safe to touch
   // |sSigHandlerCoordinator|.
-  r = sem_post(&Sampler::sSigHandlerCoordinator->mMessage4);
+  r = sem_post(&SamplerThread::sSigHandlerCoordinator->mMessage4);
   MOZ_ASSERT(r == 0);
 
   errno = savedErrno;
 }
 
-Sampler::Sampler(PSLockRef aLock)
-  : mMyPid(getpid())
+static void*
+ThreadEntry(void* aArg)
+{
+  auto thread = static_cast<SamplerThread*>(aArg);
+  thread->mSamplerTid = gettid();
+  thread->Run();
+  return nullptr;
+}
+
+SamplerThread::SamplerThread(PSLockRef aLock, uint32_t aActivityGeneration,
+                             double aIntervalMilliseconds)
+  : mActivityGeneration(aActivityGeneration)
+  , mIntervalMicroseconds(
+      std::max(1, int(floor(aIntervalMilliseconds * 1000 + 0.5))))
+  , mMyPid(getpid())
   // We don't know what the sampler thread's ID will be until it runs, so set
-  // mSamplerTid to a dummy value and fill it in for real in
-  // SuspendAndSampleAndResumeThread().
+  // mSamplerTid to a dummy value and fill it in for real in ThreadEntry().
   , mSamplerTid(-1)
 {
+  MOZ_RELEASE_ASSERT(NS_IsMainThread());
+
 #if defined(USE_EHABI_STACKWALK)
   mozilla::EHABIStackWalkInit();
 #elif defined(USE_LUL_STACKWALK)
   bool createdLUL = false;
   lul::LUL* lul = CorePS::Lul(aLock);
   if (!lul) {
     lul = new lul::LUL(logging_sink_for_LUL);
     CorePS::SetLul(aLock, lul);
@@ -285,39 +299,76 @@ Sampler::Sampler(PSLockRef aLock)
 
     // Has a test been requested?
     if (PR_GetEnv("MOZ_PROFILER_LUL_TEST")) {
       int nTests = 0, nTestsPassed = 0;
       RunLulUnitTests(&nTests, &nTestsPassed, lul);
     }
   }
 #endif
+
+  // Start the sampling thread. It repeatedly sends a SIGPROF signal. Sending
+  // the signal ourselves instead of relying on itimer provides much better
+  // accuracy.
+  if (pthread_create(&mThread, nullptr, ThreadEntry, this) != 0) {
+    MOZ_CRASH("pthread_create failed");
+  }
+}
+
+SamplerThread::~SamplerThread()
+{
+  pthread_join(mThread, nullptr);
+}
+
+void
+SamplerThread::Stop(PSLockRef aLock)
+{
+  MOZ_RELEASE_ASSERT(NS_IsMainThread());
+
+  // Restore old signal handler. This is global state so it's important that
+  // we do it now, while gPSMutex is locked. It's safe to do this now even
+  // though this SamplerThread is still alive, because the next time the main
+  // loop of Run() iterates it won't get past the mActivityGeneration check,
+  // and so won't send any signals.
+  sigaction(SIGPROF, &mOldSigprofHandler, 0);
 }
 
 void
-Sampler::Disable(PSLockRef aLock)
+SamplerThread::SleepMicro(uint32_t aMicroseconds)
 {
-  // Restore old signal handler. This is global state so it's important that
-  // we do it now, while gPSMutex is locked.
-  sigaction(SIGPROF, &mOldSigprofHandler, 0);
+  if (aMicroseconds >= 1000000) {
+    // Use usleep for larger intervals, because the nanosleep
+    // code below only supports intervals < 1 second.
+    MOZ_ALWAYS_TRUE(!::usleep(aMicroseconds));
+    return;
+  }
+
+  struct timespec ts;
+  ts.tv_sec  = 0;
+  ts.tv_nsec = aMicroseconds * 1000UL;
+
+  int rv = ::nanosleep(&ts, &ts);
+
+  while (rv != 0 && errno == EINTR) {
+    // Keep waiting in case of interrupt.
+    // nanosleep puts the remaining time back into ts.
+    rv = ::nanosleep(&ts, &ts);
+  }
+
+  MOZ_ASSERT(!rv, "nanosleep call failed");
 }
 
-template<typename Func>
 void
-Sampler::SuspendAndSampleAndResumeThread(PSLockRef aLock,
-                                         TickSample& aSample,
-                                         const Func& aDoSample)
+SamplerThread::SuspendAndSampleAndResumeThread(PSLockRef aLock,
+                                               TickSample& aSample)
 {
   // Only one sampler thread can be sampling at once.  So we expect to have
   // complete control over |sSigHandlerCoordinator|.
   MOZ_ASSERT(!sSigHandlerCoordinator);
 
-  if (mSamplerTid == -1) {
-    mSamplerTid = gettid();
-  }
   int sampleeTid = aSample.mThreadId;
   MOZ_RELEASE_ASSERT(sampleeTid != mSamplerTid);
 
   //----------------------------------------------------------------//
   // Suspend the samplee thread and get its context.
 
   SigHandlerCoordinator coord;   // on sampler thread's stack
   sSigHandlerCoordinator = &coord;
@@ -354,17 +405,17 @@ Sampler::SuspendAndSampleAndResumeThread
   // malloc implementation, risks deadlock.
 
   // The samplee thread is now frozen and sSigHandlerCoordinator->mUContext is
   // valid.  We can poke around in it and unwind its stack as we like.
 
   // Extract the current PC and sp.
   FillInSample(aSample, &sSigHandlerCoordinator->mUContext);
 
-  aDoSample();
+  Tick(aLock, ActivePS::Buffer(aLock), aSample);
 
   //----------------------------------------------------------------//
   // Resume the target thread.
 
   // Send message 3 to the samplee, which tells it to resume.
   r = sem_post(&sSigHandlerCoordinator->mMessage3);
   MOZ_ASSERT(r == 0);
 
@@ -384,90 +435,16 @@ Sampler::SuspendAndSampleAndResumeThread
   //
   // WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
 
   // This isn't strictly necessary, but doing so does help pick up anomalies
   // in which the signal handler is running when it shouldn't be.
   sSigHandlerCoordinator = nullptr;
 }
 
-// END Sampler target specifics
-////////////////////////////////////////////////////////////////////////
-
-////////////////////////////////////////////////////////////////////////
-// BEGIN SamplerThread target specifics
-
-static void*
-ThreadEntry(void* aArg)
-{
-  auto thread = static_cast<SamplerThread*>(aArg);
-  thread->Run();
-  return nullptr;
-}
-
-SamplerThread::SamplerThread(PSLockRef aLock, uint32_t aActivityGeneration,
-                             double aIntervalMilliseconds)
-  : Sampler(aLock)
-  , mActivityGeneration(aActivityGeneration)
-  , mIntervalMicroseconds(
-      std::max(1, int(floor(aIntervalMilliseconds * 1000 + 0.5))))
-{
-  MOZ_RELEASE_ASSERT(NS_IsMainThread());
-
-  // Start the sampling thread. It repeatedly sends a SIGPROF signal. Sending
-  // the signal ourselves instead of relying on itimer provides much better
-  // accuracy.
-  if (pthread_create(&mThread, nullptr, ThreadEntry, this) != 0) {
-    MOZ_CRASH("pthread_create failed");
-  }
-}
-
-SamplerThread::~SamplerThread()
-{
-  pthread_join(mThread, nullptr);
-}
-
-void
-SamplerThread::SleepMicro(uint32_t aMicroseconds)
-{
-  if (aMicroseconds >= 1000000) {
-    // Use usleep for larger intervals, because the nanosleep
-    // code below only supports intervals < 1 second.
-    MOZ_ALWAYS_TRUE(!::usleep(aMicroseconds));
-    return;
-  }
-
-  struct timespec ts;
-  ts.tv_sec  = 0;
-  ts.tv_nsec = aMicroseconds * 1000UL;
-
-  int rv = ::nanosleep(&ts, &ts);
-
-  while (rv != 0 && errno == EINTR) {
-    // Keep waiting in case of interrupt.
-    // nanosleep puts the remaining time back into ts.
-    rv = ::nanosleep(&ts, &ts);
-  }
-
-  MOZ_ASSERT(!rv, "nanosleep call failed");
-}
-
-void
-SamplerThread::Stop(PSLockRef aLock)
-{
-  MOZ_RELEASE_ASSERT(NS_IsMainThread());
-
-  // Restore old signal handler. This is global state so it's important that
-  // we do it now, while gPSMutex is locked. It's safe to do this now even
-  // though this SamplerThread is still alive, because the next time the main
-  // loop of Run() iterates it won't get past the mActivityGeneration check,
-  // and so won't send any signals.
-  Sampler::Disable(aLock);
-}
-
 // END SamplerThread target specifics
 ////////////////////////////////////////////////////////////////////////
 
 #if defined(GP_OS_linux)
 
 // We use pthread_atfork() to temporarily disable signal delivery during any
 // fork() call. Without that, fork() can be repeatedly interrupted by signal
 // delivery, requiring it to be repeatedly restarted, which can lead to *long*
--- a/tools/profiler/core/platform-macos.cpp
+++ b/tools/profiler/core/platform-macos.cpp
@@ -57,32 +57,64 @@ public:
 private:
   // Note: for mProfiledThread Mach primitives are used instead of pthread's
   // because the latter doesn't provide thread manipulation primitives required.
   // For details, consult "Mac OS X Internals" book, Section 7.3.
   thread_act_t mProfiledThread;
 };
 
 ////////////////////////////////////////////////////////////////////////
-// BEGIN Sampler target specifics
+// BEGIN SamplerThread target specifics
+
+static void*
+ThreadEntry(void* aArg)
+{
+  auto thread = static_cast<SamplerThread*>(aArg);
+  thread->Run();
+  return nullptr;
+}
 
-Sampler::Sampler(PSLockRef aLock)
+SamplerThread::SamplerThread(PSLockRef aLock, uint32_t aActivityGeneration,
+                             double aIntervalMilliseconds)
+  : mActivityGeneration(aActivityGeneration)
+  , mIntervalMicroseconds(
+      std::max(1, int(floor(aIntervalMilliseconds * 1000 + 0.5))))
 {
+  MOZ_RELEASE_ASSERT(NS_IsMainThread());
+
+  pthread_attr_t* attr_ptr = nullptr;
+  if (pthread_create(&mThread, attr_ptr, ThreadEntry, this) != 0) {
+    MOZ_CRASH("pthread_create failed");
+  }
+}
+
+SamplerThread::~SamplerThread()
+{
+  pthread_join(mThread, nullptr);
 }
 
 void
-Sampler::Disable(PSLockRef aLock)
+SamplerThread::Stop(PSLockRef aLock)
 {
+  MOZ_RELEASE_ASSERT(NS_IsMainThread());
 }
 
-template<typename Func>
 void
-Sampler::SuspendAndSampleAndResumeThread(PSLockRef aLock,
-                                         TickSample& aSample,
-                                         const Func& aDoSample)
+SamplerThread::SleepMicro(uint32_t aMicroseconds)
+{
+  usleep(aMicroseconds);
+  // FIXME: the OSX 10.12 page for usleep says "The usleep() function is
+  // obsolescent.  Use nanosleep(2) instead."  This implementation could be
+  // merged with the linux-android version.  Also, this doesn't handle the
+  // case where the usleep call is interrupted by a signal.
+}
+
+void
+SamplerThread::SuspendAndSampleAndResumeThread(PSLockRef aLock,
+                                               TickSample& aSample)
 {
   thread_act_t samplee_thread = aSample.mPlatformData->ProfiledThread();
 
   //----------------------------------------------------------------//
   // Suspend the samplee thread and get its context.
 
   // We're using thread_suspend on OS X because pthread_kill (which is what we
   // at one time used on Linux) has less consistent performance and causes
@@ -129,83 +161,31 @@ Sampler::SuspendAndSampleAndResumeThread
   if (thread_get_state(samplee_thread,
                        flavor,
                        reinterpret_cast<natural_t*>(&state),
                        &count) == KERN_SUCCESS) {
     aSample.mPC = reinterpret_cast<Address>(state.REGISTER_FIELD(ip));
     aSample.mSP = reinterpret_cast<Address>(state.REGISTER_FIELD(sp));
     aSample.mFP = reinterpret_cast<Address>(state.REGISTER_FIELD(bp));
 
-    aDoSample();
+    Tick(aLock, ActivePS::Buffer(aLock), aSample);
   }
 
 #undef REGISTER_FIELD
 
   //----------------------------------------------------------------//
   // Resume the target thread.
 
   thread_resume(samplee_thread);
 
   // The profiler's critical section ends here.
   //
   // WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
 }
 
-// END Sampler target specifics
-////////////////////////////////////////////////////////////////////////
-
-////////////////////////////////////////////////////////////////////////
-// BEGIN SamplerThread target specifics
-
-static void*
-ThreadEntry(void* aArg)
-{
-  auto thread = static_cast<SamplerThread*>(aArg);
-  thread->Run();
-  return nullptr;
-}
-
-SamplerThread::SamplerThread(PSLockRef aLock, uint32_t aActivityGeneration,
-                             double aIntervalMilliseconds)
-  : Sampler(aLock)
-  , mActivityGeneration(aActivityGeneration)
-  , mIntervalMicroseconds(
-      std::max(1, int(floor(aIntervalMilliseconds * 1000 + 0.5))))
-{
-  MOZ_RELEASE_ASSERT(NS_IsMainThread());
-
-  pthread_attr_t* attr_ptr = nullptr;
-  if (pthread_create(&mThread, attr_ptr, ThreadEntry, this) != 0) {
-    MOZ_CRASH("pthread_create failed");
-  }
-}
-
-SamplerThread::~SamplerThread()
-{
-  pthread_join(mThread, nullptr);
-}
-
-void
-SamplerThread::SleepMicro(uint32_t aMicroseconds)
-{
-  usleep(aMicroseconds);
-  // FIXME: the OSX 10.12 page for usleep says "The usleep() function is
-  // obsolescent.  Use nanosleep(2) instead."  This implementation could be
-  // merged with the linux-android version.  Also, this doesn't handle the
-  // case where the usleep call is interrupted by a signal.
-}
-
-void
-SamplerThread::Stop(PSLockRef aLock)
-{
-  MOZ_RELEASE_ASSERT(NS_IsMainThread());
-
-  Sampler::Disable(aLock);
-}
-
 // END SamplerThread target specifics
 ////////////////////////////////////////////////////////////////////////
 
 static void
 PlatformInit(PSLockRef aLock)
 {
 }
 
--- a/tools/profiler/core/platform-win32.cpp
+++ b/tools/profiler/core/platform-win32.cpp
@@ -74,37 +74,115 @@ uintptr_t
 GetThreadHandle(PlatformData* aData)
 {
   return (uintptr_t) aData->ProfiledThread();
 }
 
 static const HANDLE kNoThread = INVALID_HANDLE_VALUE;
 
 ////////////////////////////////////////////////////////////////////////
-// BEGIN Sampler target specifics
+// BEGIN SamplerThread target specifics
+
+static unsigned int __stdcall
+ThreadEntry(void* aArg)
+{
+  auto thread = static_cast<SamplerThread*>(aArg);
+  thread->Run();
+  return 0;
+}
+
+SamplerThread::SamplerThread(PSLockRef aLock, uint32_t aActivityGeneration,
+                             double aIntervalMilliseconds)
+    : mActivityGeneration(aActivityGeneration)
+    , mIntervalMicroseconds(
+        std::max(1, int(floor(aIntervalMilliseconds * 1000 + 0.5))))
+{
+  MOZ_RELEASE_ASSERT(NS_IsMainThread());
 
-Sampler::Sampler(PSLockRef aLock)
+  // By default we'll not adjust the timer resolution which tends to be
+  // around 16ms. However, if the requested interval is sufficiently low
+  // we'll try to adjust the resolution to match.
+  if (mIntervalMicroseconds < 10*1000) {
+    ::timeBeginPeriod(mIntervalMicroseconds / 1000);
+  }
+
+  // Create a new thread. It is important to use _beginthreadex() instead of
+  // the Win32 function CreateThread(), because the CreateThread() does not
+  // initialize thread-specific structures in the C runtime library.
+  mThread = reinterpret_cast<HANDLE>(
+      _beginthreadex(nullptr,
+                     /* stack_size */ 0,
+                     ThreadEntry,
+                     this,
+                     /* initflag */ 0,
+                     nullptr));
+  if (mThread == 0) {
+    MOZ_CRASH("_beginthreadex failed");
+  }
+}
+
+SamplerThread::~SamplerThread()
 {
+  WaitForSingleObject(mThread, INFINITE);
+
+  // Close our own handle for the thread.
+  if (mThread != kNoThread) {
+    CloseHandle(mThread);
+  }
 }
 
 void
-Sampler::Disable(PSLockRef aLock)
+SamplerThread::Stop(PSLockRef aLock)
 {
+  MOZ_RELEASE_ASSERT(NS_IsMainThread());
+
+  // Disable any timer resolution changes we've made. Do it now while
+  // gPSMutex is locked, i.e. before any other SamplerThread can be created
+  // and call ::timeBeginPeriod().
+  //
+  // It's safe to do this now even though this SamplerThread is still alive,
+  // because the next time the main loop of Run() iterates it won't get past
+  // the mActivityGeneration check, and so it won't make any more ::Sleep()
+  // calls.
+  if (mIntervalMicroseconds < 10 * 1000) {
+    ::timeEndPeriod(mIntervalMicroseconds / 1000);
+  }
 }
 
-template<typename Func>
 void
-Sampler::SuspendAndSampleAndResumeThread(PSLockRef aLock,
-                                         TickSample& aSample,
-                                         const Func& aDoSample)
+SamplerThread::SleepMicro(uint32_t aMicroseconds)
+{
+  // For now, keep the old behaviour of minimum Sleep(1), even for
+  // smaller-than-usual sleeps after an overshoot, unless the user has
+  // explicitly opted into a sub-millisecond profiler interval.
+  if (mIntervalMicroseconds >= 1000) {
+    ::Sleep(std::max(1u, aMicroseconds / 1000));
+  } else {
+    TimeStamp start = TimeStamp::Now();
+    TimeStamp end = start + TimeDuration::FromMicroseconds(aMicroseconds);
+
+    // First, sleep for as many whole milliseconds as possible.
+    if (aMicroseconds >= 1000) {
+      ::Sleep(aMicroseconds / 1000);
+    }
+
+    // Then, spin until enough time has passed.
+    while (TimeStamp::Now() < end) {
+      _mm_pause();
+    }
+  }
+}
+
+void
+SamplerThread::SuspendAndSampleAndResumeThread(PSLockRef aLock,
+                                               TickSample& aSample)
 {
   HANDLE profiled_thread = aSample.mPlatformData->ProfiledThread();
-  if (profiled_thread == nullptr) {
+  if (profiled_thread == nullptr)
     return;
-  }
 
   // Context used for sampling the register state of the profiled thread.
   CONTEXT context;
   memset(&context, 0, sizeof(context));
 
   //----------------------------------------------------------------//
   // Suspend the samplee thread and get its context.
 
@@ -145,127 +223,28 @@ Sampler::SuspendAndSampleAndResumeThread
 #else
   aSample.mPC = reinterpret_cast<Address>(context.Eip);
   aSample.mSP = reinterpret_cast<Address>(context.Esp);
   aSample.mFP = reinterpret_cast<Address>(context.Ebp);
 #endif
 
   aSample.mContext = &context;
 
-  aDoSample();
+  Tick(aLock, ActivePS::Buffer(aLock), aSample);
 
   //----------------------------------------------------------------//
   // Resume the target thread.
 
   ResumeThread(profiled_thread);
 
   // The profiler's critical section ends here.
   //
   // WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
 }
 
-// END Sampler target specifics
-////////////////////////////////////////////////////////////////////////
-
-////////////////////////////////////////////////////////////////////////
-// BEGIN SamplerThread target specifics
-
-static unsigned int __stdcall
-ThreadEntry(void* aArg)
-{
-  auto thread = static_cast<SamplerThread*>(aArg);
-  thread->Run();
-  return 0;
-}
-
-SamplerThread::SamplerThread(PSLockRef aLock, uint32_t aActivityGeneration,
-                             double aIntervalMilliseconds)
-    : Sampler(aLock)
-    , mActivityGeneration(aActivityGeneration)
-    , mIntervalMicroseconds(
-        std::max(1, int(floor(aIntervalMilliseconds * 1000 + 0.5))))
-{
-  MOZ_RELEASE_ASSERT(NS_IsMainThread());
-
-  // By default we'll not adjust the timer resolution which tends to be
-  // around 16ms. However, if the requested interval is sufficiently low
-  // we'll try to adjust the resolution to match.
-  if (mIntervalMicroseconds < 10*1000) {
-    ::timeBeginPeriod(mIntervalMicroseconds / 1000);
-  }
-
-  // Create a new thread. It is important to use _beginthreadex() instead of
-  // the Win32 function CreateThread(), because the CreateThread() does not
-  // initialize thread-specific structures in the C runtime library.
-  mThread = reinterpret_cast<HANDLE>(
-      _beginthreadex(nullptr,
-                     /* stack_size */ 0,
-                     ThreadEntry,
-                     this,
-                     /* initflag */ 0,
-                     nullptr));
-  if (mThread == 0) {
-    MOZ_CRASH("_beginthreadex failed");
-  }
-}
-
-SamplerThread::~SamplerThread()
-{
-  WaitForSingleObject(mThread, INFINITE);
-
-  // Close our own handle for the thread.
-  if (mThread != kNoThread) {
-    CloseHandle(mThread);
-  }
-}
-
-void
-SamplerThread::SleepMicro(uint32_t aMicroseconds)
-{
-  // For now, keep the old behaviour of minimum Sleep(1), even for
-  // smaller-than-usual sleeps after an overshoot, unless the user has
-  // explicitly opted into a sub-millisecond profiler interval.
-  if (mIntervalMicroseconds >= 1000) {
-    ::Sleep(std::max(1u, aMicroseconds / 1000));
-  } else {
-    TimeStamp start = TimeStamp::Now();
-    TimeStamp end = start + TimeDuration::FromMicroseconds(aMicroseconds);
-
-    // First, sleep for as many whole milliseconds as possible.
-    if (aMicroseconds >= 1000) {
-      ::Sleep(aMicroseconds / 1000);
-    }
-
-    // Then, spin until enough time has passed.
-    while (TimeStamp::Now() < end) {
-      _mm_pause();
-    }
-  }
-}
-
-void
-SamplerThread::Stop(PSLockRef aLock)
-{
-  MOZ_RELEASE_ASSERT(NS_IsMainThread());
-
-  // Disable any timer resolution changes we've made. Do it now while
-  // gPSMutex is locked, i.e. before any other SamplerThread can be created
-  // and call ::timeBeginPeriod().
-  //
-  // It's safe to do this now even though this SamplerThread is still alive,
-  // because the next time the main loop of Run() iterates it won't get past
-  // the mActivityGeneration check, and so it won't make any more ::Sleep()
-  // calls.
-  if (mIntervalMicroseconds < 10 * 1000) {
-    ::timeEndPeriod(mIntervalMicroseconds / 1000);
-  }
-
-  Sampler::Disable(aLock);
-}
-
 // END SamplerThread target specifics
 ////////////////////////////////////////////////////////////////////////
 
 static void
 PlatformInit(PSLockRef aLock)
 {
 }
 
--- a/tools/profiler/core/platform.cpp
+++ b/tools/profiler/core/platform.cpp
@@ -975,50 +975,70 @@ StackWalkCallback(uint32_t aFrameNumber,
   NativeStack* nativeStack = static_cast<NativeStack*>(aClosure);
   MOZ_ASSERT(nativeStack->count < nativeStack->size);
   nativeStack->sp_array[nativeStack->count] = aSP;
   nativeStack->pc_array[nativeStack->count] = aPC;
   nativeStack->count++;
 }
 
 static void
-DoNativeBacktrace(PSLockRef aLock, NativeStack& aNativeStack,
+DoNativeBacktrace(PSLockRef aLock, ProfileBuffer* aBuffer,
                   const TickSample& aSample)
 {
+  void* pc_array[1000];
+  void* sp_array[1000];
+  NativeStack nativeStack = {
+    pc_array,
+    sp_array,
+    mozilla::ArrayLength(pc_array),
+    0
+  };
+
   // Start with the current function. We use 0 as the frame number here because
   // the FramePointerStackWalk() and MozStackWalk() calls below will use 1..N.
   // This is a bit weird but it doesn't matter because StackWalkCallback()
   // doesn't use the frame number argument.
-  StackWalkCallback(/* frameNum */ 0, aSample.mPC, aSample.mSP, &aNativeStack);
-
-  uint32_t maxFrames = uint32_t(aNativeStack.size - aNativeStack.count);
+  StackWalkCallback(/* frameNum */ 0, aSample.mPC, aSample.mSP, &nativeStack);
+
+  uint32_t maxFrames = uint32_t(nativeStack.size - nativeStack.count);
 
 #if defined(GP_OS_darwin) || (defined(GP_PLAT_x86_windows))
   void* stackEnd = aSample.mStackTop;
   if (aSample.mFP >= aSample.mSP && aSample.mFP <= stackEnd) {
     FramePointerStackWalk(StackWalkCallback, /* skipFrames */ 0, maxFrames,
-                          &aNativeStack, reinterpret_cast<void**>(aSample.mFP),
+                          &nativeStack, reinterpret_cast<void**>(aSample.mFP),
                           stackEnd);
   }
 #else
   // Win64 always omits frame pointers so for it we use the slower
   // MozStackWalk().
   uintptr_t thread = GetThreadHandle(aSample.mPlatformData);
   MOZ_ASSERT(thread);
-  MozStackWalk(StackWalkCallback, /* skipFrames */ 0, maxFrames, &aNativeStack,
+  MozStackWalk(StackWalkCallback, /* skipFrames */ 0, maxFrames, &nativeStack,
                thread, /* platformData */ nullptr);
 #endif
+
+  MergeStacksIntoProfile(aLock, aBuffer, aSample, nativeStack);
 }
 #endif
 
 #ifdef USE_EHABI_STACKWALK
 static void
-DoNativeBacktrace(PSLockRef aLock, NativeStack& aNativeStack,
+DoNativeBacktrace(PSLockRef aLock, ProfileBuffer* aBuffer,
                   const TickSample& aSample)
 {
+  void* pc_array[1000];
+  void* sp_array[1000];
+  NativeStack nativeStack = {
+    pc_array,
+    sp_array,
+    mozilla::ArrayLength(pc_array),
+    0
+  };
+
   const mcontext_t* mcontext =
     &reinterpret_cast<ucontext_t*>(aSample.mContext)->uc_mcontext;
   mcontext_t savedContext;
   NotNull<RacyThreadInfo*> racyInfo = aSample.mRacyInfo;
 
   // The pseudostack contains an "EnterJIT" frame whenever we enter
   // JIT code with profiling enabled; the stack pointer value points
   // the saved registers.  We use this to unwind resume unwinding
@@ -1029,21 +1049,21 @@ DoNativeBacktrace(PSLockRef aLock, Nativ
     volatile js::ProfileEntry& entry = racyInfo->mStack[i - 1];
     if (!entry.isJs() && strcmp(entry.label(), "EnterJIT") == 0) {
       // Found JIT entry frame.  Unwind up to that point (i.e., force
       // the stack walk to stop before the block of saved registers;
       // note that it yields nondecreasing stack pointers), then restore
       // the saved state.
       uint32_t* vSP = reinterpret_cast<uint32_t*>(entry.stackAddress());
 
-      aNativeStack.count += EHABIStackWalk(*mcontext,
-                                           /* stackBase = */ vSP,
-                                           aNativeStack.sp_array + aNativeStack.count,
-                                           aNativeStack.pc_array + aNativeStack.count,
-                                           aNativeStack.size - aNativeStack.count);
+      nativeStack.count += EHABIStackWalk(*mcontext,
+                                          /* stackBase = */ vSP,
+                                          sp_array + nativeStack.count,
+                                          pc_array + nativeStack.count,
+                                          nativeStack.size - nativeStack.count);
 
       memset(&savedContext, 0, sizeof(savedContext));
 
       // See also: struct EnterJITStack in js/src/jit/arm/Trampoline-arm.cpp
       savedContext.arm_r4  = *vSP++;
       savedContext.arm_r5  = *vSP++;
       savedContext.arm_r6  = *vSP++;
       savedContext.arm_r7  = *vSP++;
@@ -1055,21 +1075,23 @@ DoNativeBacktrace(PSLockRef aLock, Nativ
       savedContext.arm_sp  = reinterpret_cast<uint32_t>(vSP);
       savedContext.arm_pc  = savedContext.arm_lr;
       mcontext = &savedContext;
     }
   }
 
   // Now unwind whatever's left (starting from either the last EnterJIT frame
   // or, if no EnterJIT was found, the original registers).
-  aNativeStack.count += EHABIStackWalk(*mcontext,
-                                       aSample.mStackTop,
-                                       aNativeStack.sp_array + aNativeStack.count,
-                                       aNativeStack.pc_array + aNativeStack.count,
-                                       aNativeStack.size - aNativeStack.count);
+  nativeStack.count += EHABIStackWalk(*mcontext,
+                                      aSample.mStackTop,
+                                      sp_array + nativeStack.count,
+                                      pc_array + nativeStack.count,
+                                      nativeStack.size - nativeStack.count);
+
+  MergeStacksIntoProfile(aLock, aBuffer, aSample, nativeStack);
 }
 #endif
 
 #ifdef USE_LUL_STACKWALK
 
 // See the comment at the callsite for why this function is necessary.
 #if defined(MOZ_HAVE_ASAN_BLACKLIST)
 MOZ_ASAN_BLACKLIST static void
@@ -1084,17 +1106,17 @@ ASAN_memcpy(void* aDst, const void* aSrc
 
   for (size_t i = 0; i < aLen; i++) {
     dst[i] = src[i];
   }
 }
 #endif
 
 static void
-DoNativeBacktrace(PSLockRef aLock, NativeStack& aNativeStack,
+DoNativeBacktrace(PSLockRef aLock, ProfileBuffer* aBuffer,
                   const TickSample& aSample)
 {
   const mcontext_t* mc =
     &reinterpret_cast<ucontext_t*>(aSample.mContext)->uc_mcontext;
 
   lul::UnwindRegs startRegs;
   memset(&startRegs, 0, sizeof(startRegs));
 
@@ -1192,77 +1214,82 @@ DoNativeBacktrace(PSLockRef aLock, Nativ
       memcpy(&stackImg.mContents[0], (void*)start, nToCopy);
 #endif
       (void)VALGRIND_MAKE_MEM_DEFINED(&stackImg.mContents[0], nToCopy);
     }
   }
 
   // The maximum number of frames that LUL will produce.  Setting it
   // too high gives a risk of it wasting a lot of time looping on
-  // corrupted stacks. Limit the size of the passed-in native stack
-  // to not exceed this number.
+  // corrupted stacks.
   const int MAX_NATIVE_FRAMES = 256;
-  if (aNativeStack.size > MAX_NATIVE_FRAMES) {
-    aNativeStack.size = MAX_NATIVE_FRAMES;
-  }
 
   size_t scannedFramesAllowed = 0;
+
+  uintptr_t framePCs[MAX_NATIVE_FRAMES];
+  uintptr_t frameSPs[MAX_NATIVE_FRAMES];
+  size_t framesAvail = mozilla::ArrayLength(framePCs);
+  size_t framesUsed  = 0;
   size_t scannedFramesAcquired = 0, framePointerFramesAcquired = 0;
   lul::LUL* lul = CorePS::Lul(aLock);
-  lul->Unwind(reinterpret_cast<uintptr_t*>(aNativeStack.pc_array),
-              reinterpret_cast<uintptr_t*>(aNativeStack.sp_array),
-              &aNativeStack.count,
-              &framePointerFramesAcquired, &scannedFramesAcquired,
-              aNativeStack.size, scannedFramesAllowed,
+  lul->Unwind(&framePCs[0], &frameSPs[0],
+              &framesUsed, &framePointerFramesAcquired, &scannedFramesAcquired,
+              framesAvail, scannedFramesAllowed,
               &startRegs, &stackImg);
 
+  NativeStack nativeStack = {
+    reinterpret_cast<void**>(framePCs),
+    reinterpret_cast<void**>(frameSPs),
+    mozilla::ArrayLength(framePCs),
+    framesUsed
+  };
+
+  MergeStacksIntoProfile(aLock, aBuffer, aSample, nativeStack);
+
   // Update stats in the LUL stats object.  Unfortunately this requires
   // three global memory operations.
   lul->mStats.mContext += 1;
-  lul->mStats.mCFI     += aNativeStack.count - 1 - framePointerFramesAcquired -
-                                                   scannedFramesAcquired;
+  lul->mStats.mCFI     += framesUsed - 1 - framePointerFramesAcquired -
+                                           scannedFramesAcquired;
   lul->mStats.mFP      += framePointerFramesAcquired;
   lul->mStats.mScanned += scannedFramesAcquired;
 }
 
 #endif
 
-void
-Tick(PSLockRef aLock, const TickSample& aSample, ProfileBuffer* aBuffer)
+static void
+DoSampleStackTrace(PSLockRef aLock, ProfileBuffer* aBuffer,
+                   const TickSample& aSample)
 {
-  MOZ_RELEASE_ASSERT(ActivePS::Exists(aLock));
-
+  NativeStack nativeStack = { nullptr, nullptr, 0, 0 };
+  MergeStacksIntoProfile(aLock, aBuffer, aSample, nativeStack);
+
+  if (ActivePS::FeatureLeaf(aLock)) {
+    aBuffer->addTag(ProfileBufferEntry::NativeLeafAddr((void*)aSample.mPC));
+  }
+}
+
+// This function is called for each sampling period with the current program
+// counter. It is called within a signal and so must be re-entrant.
+static void
+Tick(PSLockRef aLock, ProfileBuffer* aBuffer, const TickSample& aSample)
+{
   aBuffer->addTagThreadId(aSample.mThreadId, aSample.mLastSample);
 
   mozilla::TimeDuration delta =
     aSample.mTimeStamp - CorePS::ProcessStartTime(aLock);
   aBuffer->addTag(ProfileBufferEntry::Time(delta.ToMilliseconds()));
 
-  void* pc_array[1000];
-  void* sp_array[1000];
-  NativeStack nativeStack = {
-    pc_array,
-    sp_array,
-    mozilla::ArrayLength(pc_array),
-    0
-  };
-
 #if defined(HAVE_NATIVE_UNWIND)
   if (ActivePS::FeatureStackWalk(aLock)) {
-    DoNativeBacktrace(aLock, nativeStack, aSample);
-
-    MergeStacksIntoProfile(aLock, aBuffer, aSample, nativeStack);
+    DoNativeBacktrace(aLock, aBuffer, aSample);
   } else
 #endif
   {
-    MergeStacksIntoProfile(aLock, aBuffer, aSample, nativeStack);
-
-    if (ActivePS::FeatureLeaf(aLock)) {
-      aBuffer->addTag(ProfileBufferEntry::NativeLeafAddr((void*)aSample.mPC));
-    }
+    DoSampleStackTrace(aLock, aBuffer, aSample);
   }
 
   // Don't process the PseudoStack's markers if we're synchronously sampling
   // the current thread.
   if (!aSample.mIsSynchronous) {
     ProfilerMarkerLinkedList* pendingMarkersList =
       aSample.mRacyInfo->GetPendingMarkers();
     while (pendingMarkersList && pendingMarkersList->peek()) {
@@ -1651,94 +1678,38 @@ PrintUsageThenExit(int aExitCode)
     "does not support"
 #endif
   );
 
   exit(aExitCode);
 }
 
 ////////////////////////////////////////////////////////////////////////
-// BEGIN Sampler
+// BEGIN SamplerThread
 
 #if defined(GP_OS_linux) || defined(GP_OS_android)
 struct SigHandlerCoordinator;
 #endif
 
-// Sampler performs setup and teardown of the state required to sample with the
-// profiler. Sampler may exist when ActivePS is not present.
-//
-// SuspendAndSampleAndResumeThread must only be called from a single thread,
-// and must not sample the thread it is being called from. A separate Sampler
-// instance must be used for each thread which wants to capture samples.
-
-// WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
-//
-// With the exception of SamplerThread, all Sampler objects must be Disable-d
-// before releasing the lock which was used to create them. This avoids races
-// on linux with the SIGPROF signal handler.
-
-class Sampler
-{
-public:
-  // Sets up the profiler such that it can begin sampling.
-  explicit Sampler(PSLockRef aLock);
-
-  // Disable the sampler, restoring it to its previous state. This must be
-  // called once, and only once, before the Sampler is destroyed.
-  void Disable(PSLockRef aLock);
-
-  // This method suspends and resumes the samplee thread. It calls the passed-in
-  // function like object aDoSample while the samplee thread is suspended, after
-  // filling in register values in aSample.
-  //
-  // Func must be a function-like object of type `void()`.
-  template<typename Func>
-  void SuspendAndSampleAndResumeThread(PSLockRef aLock,
-                                       TickSample& aSample,
-                                       const Func& aDoSample);
-
-private:
-#if defined(GP_OS_linux) || defined(GP_OS_android)
-  // Used to restore the SIGPROF handler when ours is removed.
-  struct sigaction mOldSigprofHandler;
-
-  // This process' ID. Needed as an argument for tgkill in
-  // SuspendAndSampleAndResumeThread.
-  int mMyPid;
-
-  // The sampler thread's ID.  Used to assert that it is not sampling itself,
-  // which would lead to deadlock.
-  int mSamplerTid;
-
-public:
-  // This is the one-and-only variable used to communicate between the sampler
-  // thread and the samplee thread's signal handler. It's static because the
-  // samplee thread's signal handler is static.
-  static struct SigHandlerCoordinator* sSigHandlerCoordinator;
-#endif
-};
-
-// END Sampler
-////////////////////////////////////////////////////////////////////////
-
-////////////////////////////////////////////////////////////////////////
-// BEGIN SamplerThread
-
 // The sampler thread controls sampling and runs whenever the profiler is
 // active. It periodically runs through all registered threads, finds those
 // that should be sampled, then pauses and samples them.
 
-class SamplerThread : public Sampler
+class SamplerThread
 {
 public:
   // Creates a sampler thread, but doesn't start it.
   SamplerThread(PSLockRef aLock, uint32_t aActivityGeneration,
                 double aIntervalMilliseconds);
   ~SamplerThread();
 
+  // This runs on the sampler thread.  It suspends and resumes the samplee
+  // threads.
+  void SuspendAndSampleAndResumeThread(PSLockRef aLock, TickSample& aSample);
+
   // This runs on (is!) the sampler thread.
   void Run();
 
   // This runs on the main thread.
   void Stop(PSLockRef aLock);
 
 private:
   // This suspends the calling thread for the given number of microseconds.
@@ -1753,16 +1724,36 @@ private:
 
   // The OS-specific handle for the sampler thread.
 #if defined(GP_OS_windows)
   HANDLE mThread;
 #elif defined(GP_OS_darwin) || defined(GP_OS_linux) || defined(GP_OS_android)
   pthread_t mThread;
 #endif
 
+#if defined(GP_OS_linux) || defined(GP_OS_android)
+  // Used to restore the SIGPROF handler when ours is removed.
+  struct sigaction mOldSigprofHandler;
+
+  // This process' ID.  Needed as an argument for tgkill in
+  // SuspendAndSampleAndResumeThread.
+  int mMyPid;
+
+public:
+  // The sampler thread's ID.  Used to assert that it is not sampling itself,
+  // which would lead to deadlock.
+  int mSamplerTid;
+
+  // This is the one-and-only variable used to communicate between the sampler
+  // thread and the samplee thread's signal handler. It's static because the
+  // samplee thread's signal handler is static.
+  static struct SigHandlerCoordinator* sSigHandlerCoordinator;
+#endif
+
+private:
   SamplerThread(const SamplerThread&) = delete;
   void operator=(const SamplerThread&) = delete;
 };
 
 // This function is required because we need to create a SamplerThread within
 // ActivePS's constructor, but SamplerThread is defined after ActivePS. It
 // could probably be removed by moving some code around.
 static SamplerThread*
@@ -1836,19 +1827,17 @@ SamplerThread::Run()
             rssMemory = nsMemoryReporterManager::ResidentFast();
 #if defined(GP_OS_linux) || defined(GP_OS_android)
             ussMemory = nsMemoryReporterManager::ResidentUnique();
 #endif
           }
 
           TickSample sample(info, rssMemory, ussMemory);
 
-          SuspendAndSampleAndResumeThread(lock, sample, [&] {
-              Tick(lock, sample, ActivePS::Buffer(lock));
-            });
+          SuspendAndSampleAndResumeThread(lock, sample);
         }
 
 #if defined(USE_LUL_STACKWALK)
         // The LUL unwind object accumulates frame statistics. Periodically we
         // should poke it to give it a chance to print those statistics.  This
         // involves doing I/O (fprintf, __android_log_print, etc.) and so
         // can't safely be done from the critical section inside
         // SuspendAndSampleAndResumeThread, which is why it is done here.
@@ -2784,17 +2773,17 @@ profiler_get_backtrace()
   sample.PopulateContext(&context);
 #elif defined(GP_OS_darwin)
   sample.PopulateContext(nullptr);
 #else
 # error "unknown platform"
 #endif
 #endif
 
-  Tick(lock, sample, buffer);
+  Tick(lock, buffer, sample);
 
   return UniqueProfilerBacktrace(
     new ProfilerBacktrace("SyncProfile", tid, buffer));
 }
 
 void
 ProfilerBacktraceDestructor::operator()(ProfilerBacktrace* aBacktrace)
 {
@@ -2999,64 +2988,21 @@ profiler_clear_js_context()
   }
 
   // We don't call info->StopJSSampling() here; there's no point doing that for
   // a JS thread that is in the process of disappearing.
 
   info->mContext = nullptr;
 }
 
-int
-profiler_current_thread_id()
+void*
+profiler_get_stack_top()
 {
-  return Thread::GetCurrentId();
-}
-
-// NOTE: The callback function passed in will be called while the target thread
-// is paused. Doing stuff in this function like allocating which may try to
-// claim locks is a surefire way to deadlock.
-void
-profiler_suspend_and_sample_thread(int aThreadId,
-                                   const std::function<void(void**, size_t)>& aCallback,
-                                   bool aSampleNative /* = true */)
-{
-  // Allocate the space for the native stack
-  void* pc_array[1000];
-  void* sp_array[1000];
-  NativeStack nativeStack = {
-    pc_array,
-    sp_array,
-    mozilla::ArrayLength(pc_array),
-    0
-  };
-
-  // Lock the profiler mutex
   PSAutoLock lock(gPSMutex);
-
-  const CorePS::ThreadVector& liveThreads = CorePS::LiveThreads(lock);
-  for (uint32_t i = 0; i < liveThreads.size(); i++) {
-    ThreadInfo* info = liveThreads.at(i);
-
-    if (info->ThreadId() == aThreadId) {
-      // Suspend, sample, and then resume the target thread.
-      Sampler sampler(lock);
-      TickSample sample(info, 0, 0);
-      sampler.SuspendAndSampleAndResumeThread(lock, sample, [&] {
-          // The target thread is now suspended, collect a native backtrace, and
-          // call the callback.
-#if defined(HAVE_NATIVE_UNWIND)
-          if (aSampleNative) {
-            DoNativeBacktrace(lock, nativeStack, sample);
-          }
-#endif
-          aCallback(nativeStack.pc_array, nativeStack.count);
-        });
-
-      // NOTE: Make sure to disable the sampler before it is destroyed, in case
-      // the profiler is running at the same time.
-      sampler.Disable(lock);
-      break;
-    }
+  ThreadInfo* threadInfo = FindLiveThreadInfo(lock);
+  if (threadInfo) {
+    return threadInfo->StackTop();
   }
+  return nullptr;
 }
 
 // END externally visible functions
 ////////////////////////////////////////////////////////////////////////
--- a/tools/profiler/public/GeckoProfiler.h
+++ b/tools/profiler/public/GeckoProfiler.h
@@ -17,17 +17,16 @@
 // The profiler collects samples that include native stacks and
 // platform-independent "pseudostacks".
 
 #ifndef GeckoProfiler_h
 #define GeckoProfiler_h
 
 #include <stdint.h>
 #include <stdarg.h>
-#include <functional>
 
 #include "mozilla/Assertions.h"
 #include "mozilla/Attributes.h"
 #include "js/TypeDecls.h"
 #include "mozilla/GuardObjects.h"
 #include "mozilla/UniquePtr.h"
 
 class SpliceableJSONWriter;
@@ -372,29 +371,21 @@ PROFILER_FUNC(bool profiler_thread_is_sl
 PROFILER_FUNC_VOID(profiler_js_interrupt_callback())
 
 // The number of milliseconds since the process started. Operates the same
 // whether the profiler is active or inactive.
 PROFILER_FUNC(double profiler_time(), 0)
 
 PROFILER_FUNC_VOID(profiler_log(const char *str))
 
-PROFILER_FUNC(int profiler_current_thread_id(), 0)
-
-// This method suspends the thread identified by aThreadId, optionally samples
-// it for its native stack, and then calls the callback. The callback is passed
-// the native stack's program counters and length as two arguments if
-// aSampleNative is true.
+// Gets the stack top of the current thread.
 //
-// WARNING: The target thread is suspended during the callback. Do not try to
-// allocate or acquire any locks, or you could deadlock. The target thread will
-// have resumed by the time that this function returns.
-PROFILER_FUNC_VOID(profiler_suspend_and_sample_thread(int aThreadId,
-                                                      const std::function<void(void**, size_t)>& aCallback,
-                                                      bool aSampleNative = true))
+// The thread must have been previously registered with the profiler, otherwise
+// this method will return nullptr.
+PROFILER_FUNC(void* profiler_get_stack_top(), nullptr)
 
 // End of the functions defined whether the profiler is enabled or not.
 
 #if defined(MOZ_GECKO_PROFILER)
 
 #include <stdlib.h>
 #include <signal.h>
 #include "js/ProfilingStack.h"
--- a/xpcom/threads/BackgroundHangMonitor.cpp
+++ b/xpcom/threads/BackgroundHangMonitor.cpp
@@ -593,29 +593,31 @@ BackgroundHangMonitor::DisableOnBeta() {
 void
 BackgroundHangMonitor::Startup()
 {
 #ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR
   MOZ_ASSERT(!BackgroundHangManager::sInstance, "Already initialized");
 
   if (!strcmp(NS_STRINGIFY(MOZ_UPDATE_CHANNEL), "beta")) {
     if (XRE_IsParentProcess()) { // cached ClientID hasn't been read yet
+      ThreadStackHelper::Startup();
       BackgroundHangThread::Startup();
       BackgroundHangManager::sInstance = new BackgroundHangManager();
 
       nsCOMPtr<nsIObserverService> observerService = mozilla::services::GetObserverService();
       MOZ_ASSERT(observerService);
 
       observerService->AddObserver(BackgroundHangManager::sInstance, "profile-after-change", false);
       return;
     } else if(DisableOnBeta()){
       return;
     }
   }
 
+  ThreadStackHelper::Startup();
   BackgroundHangThread::Startup();
   BackgroundHangManager::sInstance = new BackgroundHangManager();
 #endif
 }
 
 void
 BackgroundHangMonitor::Shutdown()
 {
@@ -626,16 +628,17 @@ BackgroundHangMonitor::Shutdown()
   }
 
   MOZ_ASSERT(BackgroundHangManager::sInstance, "Not initialized");
   /* Scope our lock inside Shutdown() because the sInstance object can
      be destroyed as soon as we set sInstance to nullptr below, and
      we don't want to hold the lock when it's being destroyed. */
   BackgroundHangManager::sInstance->Shutdown();
   BackgroundHangManager::sInstance = nullptr;
+  ThreadStackHelper::Shutdown();
   BackgroundHangManager::sDisabled = true;
 #endif
 }
 
 BackgroundHangMonitor::BackgroundHangMonitor(const char* aName,
                                              uint32_t aTimeoutMs,
                                              uint32_t aMaxTimeoutMs,
                                              ThreadType aThreadType)
--- a/xpcom/threads/ThreadStackHelper.cpp
+++ b/xpcom/threads/ThreadStackHelper.cpp
@@ -4,16 +4,19 @@
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #include "ThreadStackHelper.h"
 #include "MainThreadUtils.h"
 #include "nsJSPrincipals.h"
 #include "nsScriptSecurityManager.h"
 #include "jsfriendapi.h"
+#ifdef MOZ_THREADSTACKHELPER_NATIVE
+#include "shared-libraries.h"
+#endif
 #ifdef MOZ_THREADSTACKHELPER_PSEUDO
 #include "PseudoStack.h"
 #endif
 
 #include "mozilla/Assertions.h"
 #include "mozilla/Attributes.h"
 #include "mozilla/IntegerPrintfMacros.h"
 #include "mozilla/Move.h"
@@ -57,30 +60,101 @@
 // Some NDKs don't define this constant even though the kernel supports it.
 #define __NR_rt_tgsigqueueinfo (__NR_SYSCALL_BASE+363)
 #endif
 #ifndef SYS_rt_tgsigqueueinfo
 #define SYS_rt_tgsigqueueinfo __NR_rt_tgsigqueueinfo
 #endif
 #endif
 
+#ifdef MOZ_THREADSTACKHELPER_NATIVE
+#if defined(MOZ_THREADSTACKHELPER_X86) || \
+    defined(MOZ_THREADSTACKHELPER_X64)
+// On these architectures, the stack grows downwards (toward lower addresses).
+#define MOZ_THREADSTACKHELPER_STACK_GROWS_DOWN
+#else
+#error "Unsupported architecture"
+#endif
+#endif // MOZ_THREADSTACKHELPER_NATIVE
+
 namespace mozilla {
 
+void
+ThreadStackHelper::Startup()
+{
+#if defined(XP_LINUX)
+  MOZ_ASSERT(NS_IsMainThread());
+  if (!sInitialized) {
+    // TODO: centralize signal number allocation
+    sFillStackSignum = SIGRTMIN + 4;
+    if (sFillStackSignum > SIGRTMAX) {
+      // Leave uninitialized
+      MOZ_ASSERT(false);
+      return;
+    }
+    struct sigaction sigact = {};
+    sigact.sa_sigaction = FillStackHandler;
+    sigemptyset(&sigact.sa_mask);
+    sigact.sa_flags = SA_SIGINFO | SA_RESTART;
+    MOZ_ALWAYS_TRUE(!::sigaction(sFillStackSignum, &sigact, nullptr));
+  }
+  sInitialized++;
+#endif
+}
+
+void
+ThreadStackHelper::Shutdown()
+{
+#if defined(XP_LINUX)
+  MOZ_ASSERT(NS_IsMainThread());
+  if (sInitialized == 1) {
+    struct sigaction sigact = {};
+    sigact.sa_handler = SIG_DFL;
+    MOZ_ALWAYS_TRUE(!::sigaction(sFillStackSignum, &sigact, nullptr));
+  }
+  sInitialized--;
+#endif
+}
+
 ThreadStackHelper::ThreadStackHelper()
+  : mStackToFill(nullptr)
 #ifdef MOZ_THREADSTACKHELPER_PSEUDO
-  : mStackToFill(nullptr)
   , mPseudoStack(profiler_get_pseudo_stack())
   , mMaxStackSize(Stack::sMaxInlineStorage)
   , mMaxBufferSize(512)
 #endif
+{
+#if defined(XP_LINUX)
+  MOZ_ALWAYS_TRUE(!::sem_init(&mSem, 0, 0));
+  mThreadID = ::syscall(SYS_gettid);
+#elif defined(XP_WIN)
+  mInitialized = !!::DuplicateHandle(
+    ::GetCurrentProcess(), ::GetCurrentThread(),
+    ::GetCurrentProcess(), &mThreadID,
+    THREAD_SUSPEND_RESUME
 #ifdef MOZ_THREADSTACKHELPER_NATIVE
-  , mNativeStackToFill(nullptr)
+    | THREAD_GET_CONTEXT | THREAD_QUERY_INFORMATION
 #endif
+    , FALSE, 0);
+  mStackTop = profiler_get_stack_top();
+  MOZ_ASSERT(mInitialized);
+#elif defined(XP_MACOSX)
+  mThreadID = mach_thread_self();
+#endif
+}
+
+ThreadStackHelper::~ThreadStackHelper()
 {
-  mThreadId = profiler_current_thread_id();
+#if defined(XP_LINUX)
+  MOZ_ALWAYS_TRUE(!::sem_destroy(&mSem));
+#elif defined(XP_WIN)
+  if (mInitialized) {
+    MOZ_ALWAYS_TRUE(!!::CloseHandle(mThreadID));
+  }
+#endif
 }
 
 namespace {
 template<typename T>
 class ScopedSetPtr
 {
 private:
   T*& mPtr;
@@ -92,68 +166,171 @@ public:
 
 void
 ThreadStackHelper::GetPseudoStack(Stack& aStack)
 {
   GetStacksInternal(&aStack, nullptr);
 }
 
 void
+ThreadStackHelper::GetStacksInternal(Stack* aStack, NativeStack* aNativeStack)
+{
+  // Always run PrepareStackBuffer first to clear aStack
+  if (aStack && !PrepareStackBuffer(*aStack)) {
+    // Skip and return empty aStack
+    return;
+  }
+
+  ScopedSetPtr<Stack> stackPtr(mStackToFill, aStack);
+
+#if defined(XP_LINUX)
+  if (!sInitialized) {
+    MOZ_ASSERT(false);
+    return;
+  }
+  if (aStack) {
+    siginfo_t uinfo = {};
+    uinfo.si_signo = sFillStackSignum;
+    uinfo.si_code = SI_QUEUE;
+    uinfo.si_pid = getpid();
+    uinfo.si_uid = getuid();
+    uinfo.si_value.sival_ptr = this;
+    if (::syscall(SYS_rt_tgsigqueueinfo, uinfo.si_pid,
+                  mThreadID, sFillStackSignum, &uinfo)) {
+      // rt_tgsigqueueinfo was added in Linux 2.6.31.
+      // Could have failed because the syscall did not exist.
+      return;
+    }
+    MOZ_ALWAYS_TRUE(!::sem_wait(&mSem));
+  }
+
+#elif defined(XP_WIN)
+  if (!mInitialized) {
+    MOZ_ASSERT(false);
+    return;
+  }
+
+  // NOTE: We can only perform frame pointer stack walking on non win64
+  // platforms, because Win64 always omits frame pointers. We don't want to use
+  // MozStackWalk here, so we just skip collecting stacks entirely.
+#ifndef MOZ_THREADSTACKHELPER_X64
+  if (aNativeStack) {
+    aNativeStack->reserve(Telemetry::HangStack::sMaxNativeFrames);
+  }
+#endif
+
+  if (::SuspendThread(mThreadID) == DWORD(-1)) {
+    MOZ_ASSERT(false);
+    return;
+  }
+
+  // SuspendThread is asynchronous, so the thread may still be running. Use
+  // GetThreadContext to ensure it's really suspended.
+  // See https://blogs.msdn.microsoft.com/oldnewthing/20150205-00/?p=44743.
+  CONTEXT context;
+  memset(&context, 0, sizeof(context));
+  context.ContextFlags = CONTEXT_CONTROL;
+  if (::GetThreadContext(mThreadID, &context)) {
+    if (aStack) {
+      FillStackBuffer();
+    }
+
+#ifndef MOZ_THREADSTACKHELPER_X64
+    if (aNativeStack) {
+      auto callback = [](uint32_t, void* aPC, void*, void* aClosure) {
+        NativeStack* stack = static_cast<NativeStack*>(aClosure);
+        stack->push_back(reinterpret_cast<uintptr_t>(aPC));
+      };
+
+      // Now we need to get our frame pointer, our stack pointer, and our stack
+      // top. Rather than registering and storing the stack tops ourselves, we use
+      // the gecko profiler to look it up.
+      void** framePointer = reinterpret_cast<void**>(context.Ebp);
+      void** stackPointer = reinterpret_cast<void**>(context.Esp);
+
+      MOZ_ASSERT(mStackTop, "The thread should be registered by the profiler");
+
+      // Double check that the values we pulled for the thread make sense before
+      // walking the stack.
+      if (mStackTop && framePointer >= stackPointer && framePointer < mStackTop) {
+        // NOTE: In bug 1346415 this was changed to use FramePointerStackWalk.
+        // This was done because lowering the background hang timer threshold
+        // would cause it to fire on infra early during the boot process, causing
+        // a deadlock in MozStackWalk when the target thread was holding the
+        // windows-internal lock on the function table, as it would be suspended
+        // before we tried to grab the lock to walk its stack.
+        //
+        // FramePointerStackWalk is implemented entirely in userspace and thus
+        // doesn't have the same issues with deadlocking. Unfortunately as 64-bit
+        // windows is not guaranteed to have frame pointers, the stack walking
+        // code is only enabled on 32-bit windows builds (bug 1357829).
+        FramePointerStackWalk(callback, /* skipFrames */ 0,
+                              /* maxFrames */ Telemetry::HangStack::sMaxNativeFrames,
+                              reinterpret_cast<void*>(aNativeStack), framePointer,
+                              mStackTop);
+      }
+    }
+#endif
+  }
+
+  MOZ_ALWAYS_TRUE(::ResumeThread(mThreadID) != DWORD(-1));
+
+#elif defined(XP_MACOSX)
+# if defined(MOZ_VALGRIND) && defined(RUNNING_ON_VALGRIND)
+  if (RUNNING_ON_VALGRIND) {
+    /* thread_suspend and thread_resume sometimes hang runs on Valgrind,
+       for unknown reasons.  So, just avoid them.  See bug 1100911. */
+    return;
+  }
+# endif
+
+  if (aStack) {
+    if (::thread_suspend(mThreadID) != KERN_SUCCESS) {
+      MOZ_ASSERT(false);
+      return;
+    }
+
+    FillStackBuffer();
+
+    MOZ_ALWAYS_TRUE(::thread_resume(mThreadID) == KERN_SUCCESS);
+  }
+
+#endif
+}
+
+void
 ThreadStackHelper::GetNativeStack(NativeStack& aNativeStack)
 {
+#ifdef MOZ_THREADSTACKHELPER_NATIVE
   GetStacksInternal(nullptr, &aNativeStack);
+#endif // MOZ_THREADSTACKHELPER_NATIVE
 }
 
 void
 ThreadStackHelper::GetPseudoAndNativeStack(Stack& aStack, NativeStack& aNativeStack)
 {
   GetStacksInternal(&aStack, &aNativeStack);
 }
 
-void
-ThreadStackHelper::GetStacksInternal(Stack* aStack, NativeStack* aNativeStack)
-{
-#if defined(MOZ_THREADSTACKHELPER_PSEUDO) || defined(MOZ_THREADSTACKHELPER_NATIVE)
-  // Always run PrepareStackBuffer first to clear aStack
-  if (aStack && !PrepareStackBuffer(*aStack)) {
-    // Skip and return empty aStack
-    return;
-  }
+#ifdef XP_LINUX
 
-  // Prepare the native stack
-  if (aNativeStack) {
-    aNativeStack->clear();
-    aNativeStack->reserve(Telemetry::HangStack::sMaxNativeFrames);
-  }
+int ThreadStackHelper::sInitialized;
+int ThreadStackHelper::sFillStackSignum;
 
-#ifdef MOZ_THREADSTACKHELPER_PSEUDO
-  ScopedSetPtr<Stack> stackPtr(mStackToFill, aStack);
-#endif
-#ifdef MOZ_THREADSTACKHELPER_NATIVE
-  ScopedSetPtr<NativeStack> nativeStackPtr(mNativeStackToFill, aNativeStack);
-#endif
-
-  auto callback = [&, this] (void** aPCs, size_t aCount) {
-    FillStackBuffer();
+void
+ThreadStackHelper::FillStackHandler(int aSignal, siginfo_t* aInfo,
+                                    void* aContext)
+{
+  ThreadStackHelper* const helper =
+    reinterpret_cast<ThreadStackHelper*>(aInfo->si_value.sival_ptr);
+  helper->FillStackBuffer();
+  ::sem_post(&helper->mSem);
+}
 
-#ifdef MOZ_THREADSTACKHELPER_NATIVE
-    if (mNativeStackToFill) {
-      while (aCount-- &&
-             mNativeStackToFill->size() < mNativeStackToFill->capacity()) {
-        mNativeStackToFill->push_back(reinterpret_cast<uintptr_t>(aPCs[aCount]));
-      }
-    }
-#endif
-  };
-
-  profiler_suspend_and_sample_thread(mThreadId,
-                                     callback,
-                                     /* aSampleNative = */ !!aNativeStack);
-#endif
-}
+#endif // XP_LINUX
 
 bool
 ThreadStackHelper::PrepareStackBuffer(Stack& aStack)
 {
   // Return false to skip getting the stack and return an empty stack
   aStack.clear();
 #ifdef MOZ_THREADSTACKHELPER_PSEUDO
   /* Normally, provided the profiler is enabled, it would be an error if we
@@ -302,19 +479,19 @@ ThreadStackHelper::AppendJSEntry(const v
   return label;
 }
 
 #endif // MOZ_THREADSTACKHELPER_PSEUDO
 
 void
 ThreadStackHelper::FillStackBuffer()
 {
-#ifdef MOZ_THREADSTACKHELPER_PSEUDO
   MOZ_ASSERT(mStackToFill->empty());
 
+#ifdef MOZ_THREADSTACKHELPER_PSEUDO
   size_t reservedSize = mStackToFill->capacity();
   size_t reservedBufferSize = mStackToFill->AvailableBufferSize();
   intptr_t availableBufferSize = intptr_t(reservedBufferSize);
 
   // Go from front to back
   const volatile js::ProfileEntry* entry = mPseudoStack->mStack;
   const volatile js::ProfileEntry* end = entry + mPseudoStack->stackSize();
   // Deduplicate identical, consecutive frames
--- a/xpcom/threads/ThreadStackHelper.h
+++ b/xpcom/threads/ThreadStackHelper.h
@@ -17,30 +17,35 @@
 #include <semaphore.h>
 #include <sys/types.h>
 #elif defined(XP_WIN)
 #include <windows.h>
 #elif defined(XP_MACOSX)
 #include <mach/mach.h>
 #endif
 
-// Support pseudostack and native stack on these platforms.
+// Support pseudostack on these platforms.
 #if defined(XP_LINUX) || defined(XP_WIN) || defined(XP_MACOSX)
 #  ifdef MOZ_GECKO_PROFILER
 #    define MOZ_THREADSTACKHELPER_PSEUDO
-#    define MOZ_THREADSTACKHELPER_NATIVE
 #  endif
 #endif
 
-// NOTE: Currently, due to a problem with LUL stackwalking initialization taking
-// a long time (bug 1365309), we don't perform pseudostack or native stack
-// walking on Linux.
-#if defined(XP_LINUX)
-#  undef MOZ_THREADSTACKHELPER_NATIVE
-#  undef MOZ_THREADSTACKHELPER_PSEUDO
+#if defined(MOZ_THREADSTACKHELPER_PSEUDO) && defined(XP_WIN)
+#  define MOZ_THREADSTACKHELPER_NATIVE
+#  if defined(__i386__) || defined(_M_IX86)
+#    define MOZ_THREADSTACKHELPER_X86
+#  elif defined(__x86_64__) || defined(_M_X64)
+#    define MOZ_THREADSTACKHELPER_X64
+#  elif defined(__arm__) || defined(_M_ARM)
+#    define MOZ_THREADSTACKHELPER_ARM
+#  else
+     // Unsupported architecture
+#    undef MOZ_THREADSTACKHELPER_NATIVE
+#  endif
 #endif
 
 namespace mozilla {
 
 /**
  * ThreadStackHelper is used to retrieve the profiler pseudo-stack of a
  * thread, as an alternative of using the profiler to take a profile.
  * The target thread first declares an ThreadStackHelper instance;
@@ -57,40 +62,48 @@ public:
 
   // When a native stack is gathered, this vector holds the raw program counter
   // values that FramePointerStackWalk will return to us after it walks the
   // stack. When gathering the Telemetry payload, Telemetry will take care of
   // mapping these program counters to proper addresses within modules.
   typedef Telemetry::NativeHangStack NativeStack;
 
 private:
+  Stack* mStackToFill;
 #ifdef MOZ_THREADSTACKHELPER_PSEUDO
-  Stack* mStackToFill;
   const PseudoStack* const mPseudoStack;
   size_t mMaxStackSize;
   size_t mMaxBufferSize;
 #endif
-#ifdef MOZ_THREADSTACKHELPER_NATIVE
-  NativeStack* mNativeStackToFill;
-#endif
 
   bool PrepareStackBuffer(Stack& aStack);
   void FillStackBuffer();
 #ifdef MOZ_THREADSTACKHELPER_PSEUDO
   const char* AppendJSEntry(const volatile js::ProfileEntry* aEntry,
                             intptr_t& aAvailableBufferSize,
                             const char* aPrevLabel);
 #endif
 
 public:
   /**
+   * Initialize ThreadStackHelper. Must be called from main thread.
+   */
+  static void Startup();
+  /**
+   * Uninitialize ThreadStackHelper. Must be called from main thread.
+   */
+  static void Shutdown();
+
+  /**
    * Create a ThreadStackHelper instance targeting the current thread.
    */
   ThreadStackHelper();
 
+  ~ThreadStackHelper();
+
   /**
    * Retrieve the current pseudostack of the thread associated
    * with this ThreadStackHelper.
    *
    * @param aStack Stack instance to be filled.
    */
   void GetPseudoStack(Stack& aStack);
 
@@ -112,16 +125,34 @@ public:
    */
   void GetPseudoAndNativeStack(Stack& aStack, NativeStack& aNativeStack);
 
 private:
   // Fill in the passed aStack and aNativeStack datastructures with backtraces.
   // If only aStack needs to be collected, nullptr may be passed for
   // aNativeStack, and vice versa.
   void GetStacksInternal(Stack* aStack, NativeStack* aNativeStack);
+#if defined(XP_LINUX)
+private:
+  static int sInitialized;
+  static int sFillStackSignum;
 
-  // The profiler's unique thread identifier for the target thread.
-  int mThreadId;
+  static void FillStackHandler(int aSignal, siginfo_t* aInfo, void* aContext);
+
+  sem_t mSem;
+  pid_t mThreadID;
+
+#elif defined(XP_WIN)
+private:
+  bool mInitialized;
+  HANDLE mThreadID;
+  void* mStackTop;
+
+#elif defined(XP_MACOSX)
+private:
+  thread_act_t mThreadID;
+
+#endif
 };
 
 } // namespace mozilla
 
 #endif // mozilla_ThreadStackHelper_h