Backed out 4 changesets (bug 1357829) for build bustage in xpcshell\selftest.py on windows 8 x64 opt. a=backout
authorIris Hsiao <ihsiao@mozilla.com>
Fri, 19 May 2017 11:27:38 +0800
changeset 580912 8e98dab5054dd093a37ba20c62cf0523e484cfbd
parent 580850 183c3537157290d7ccef475651b6bc35db914924
child 580913 9c57d5bd70d0a7d69b27f815b6b087c7dfba0af4
child 580943 ec61b5fa7f2d4daa52d7e8ca40d7b019e55f79d2
child 580954 a55cddd8ac8c6c156551108d7181a1e6421abc9e
child 580982 eaaf7f3a380811880f1b6a5a3729125bf42c51d5
child 580985 cf863e85811c008f01826e69110501b609b22eab
child 580990 6b0dd1324fab3ef0e4a8e8eb90f7cc76d1021e72
child 581043 2a01dedf6ecce19d476bada801bc6f155875b3e7
child 581044 9c8316951f6f5a0c3b99ec7457a200042273fe70
child 581061 3547e3b2db25d2b16e41835db301d29d606fda31
child 581068 ba599cc3fe04ac385d8b3b99eb676fb2b32523ef
child 581074 80ae656c64dd39fcb091788a965e56cc035d2ef7
child 581099 6ae029493580ad01aa99b3b6988d1fe0201ae6e5
child 581100 9657f7ce955524ad834ff632857d94a709c0e52d
child 581104 7d3a6fa96f04222ff37ed54f183475a4c9406dc7
child 581110 e1ea27f87fb78f18d40c5aca49862e231ed81f11
child 581134 3d19eeb5e53667258da2cb27d1a8ae850c925b2c
child 581143 eb89ea058fdcfddf3e5328cc11651a4ab84c1507
child 581155 99bbc3b0079ece42e21113d60afe5596aa68eb92
child 581164 35bdac83eef0e4e6f0728f2582de7614da2abc3a
child 581165 fb3224375a1ffe4ef8788a25e6b864833a46fe86
child 581168 805d89fbe730ecf1698d5d7440cc0339be0f8169
child 581169 332bc6580d2b3f3667d96b5ee3d566fe3e751f40
child 581171 32be8d97472f5774964455edc78f6feb73a5c780
child 581179 18541f371c2e8272f04619892c12de91a6ae89b1
child 581180 2ccf10ab61cfd710b2b1e80db76723784299c8fa
child 581181 e2535cdf89a870e72f286bb2a85e930584c1201d
child 581182 ea7c6a82a745fe79ae830969b54da1ddf86c647a
child 581187 06a143a8352e9fb37b1f21fa56d2659dbcee0d79
child 581194 d1d22e5531961946348c04e804e1208619182094
child 581231 4edaee6106621099bdc7107b308372ec2bfca864
child 581295 1667def6751501149dc69e7d9608cffdf3c19e7d
child 581321 9c654bbab98cfb39cc2daa32a738f74a6c987670
child 581331 c5f4cb1b8f1eb74a88a0322366802adfe165de01
child 581427 63e371caaa8c9b5ae87078a00953598118c2099c
child 581431 e0b7fc0a3a17f91827812c13a8eac65b2281d96d
child 582332 be78efea05263de3b4658e905245eec5f8785fd1
child 582425 c76042ee15a82ad3391b8a05adaac1154284e257
child 582430 bf90ed3a84be7548bb0b53334bae6effd721efe9
child 582465 600c55cd74bf1e7f423d39805b259f30a1271255
child 583641 b3b8ba0da4e94f4fb61ec59755a8a0d839b69a36
child 583643 18a4de9da92ccb8b784d9101d2c4019728db1842
child 584352 ba282c85f60a76cc6d8e8b72f6a11084cedb9ff1
push id59712
push userbmo:tiago.paez11@gmail.com
push dateFri, 19 May 2017 04:56:22 +0000
reviewersbackout
bugs1357829
milestone55.0a1
backs out8ea202bb1103a693837e46374eb37ef9091ac15a
cebe4d7abeda54f0e14a6e9c400e3256aa3e4db0
378d473c961972a1d273f53fbbe151300384c8d7
86ebe868d443bd5da60d86844bd622ae1865ea92
Backed out 4 changesets (bug 1357829) for build bustage in xpcshell\selftest.py on windows 8 x64 opt. a=backout Backed out changeset 8ea202bb1103 (bug 1357829) Backed out changeset cebe4d7abeda (bug 1357829) Backed out changeset 378d473c9619 (bug 1357829) Backed out changeset 86ebe868d443 (bug 1357829)
toolkit/components/telemetry/tests/unit/test_ThreadHangStats.js
toolkit/components/telemetry/tests/unit/xpcshell.ini
tools/profiler/core/platform-linux-android.cpp
tools/profiler/core/platform-macos.cpp
tools/profiler/core/platform-win32.cpp
tools/profiler/core/platform.cpp
tools/profiler/public/GeckoProfiler.h
xpcom/threads/BackgroundHangMonitor.cpp
xpcom/threads/ThreadStackHelper.cpp
xpcom/threads/ThreadStackHelper.h
--- a/toolkit/components/telemetry/tests/unit/test_ThreadHangStats.js
+++ b/toolkit/components/telemetry/tests/unit/test_ThreadHangStats.js
@@ -79,23 +79,26 @@ function run_test() {
 
       ok(Array.isArray(endHangs.hangs));
       notEqual(endHangs.hangs.length, 0);
 
       ok(Array.isArray(endHangs.hangs[0].stack));
       notEqual(endHangs.hangs[0].stack.length, 0);
       equal(typeof endHangs.hangs[0].stack[0], "string");
 
-      // Make sure one of the hangs is a permanent
-      // hang containing a native stack.
-      ok(endHangs.hangs.some((hang) => (
-        hang.nativeStack &&
-        Array.isArray(hang.nativeStack.memoryMap) &&
-        Array.isArray(hang.nativeStack.stacks)
-      )));
+      // Native stack gathering is only enabled on Windows x86.
+      if (mozinfo.os == "win" && mozinfo.bits == 32) {
+        // Make sure one of the hangs is a permanent
+        // hang containing a native stack.
+        ok(endHangs.hangs.some((hang) => (
+          hang.nativeStack &&
+          Array.isArray(hang.nativeStack.memoryMap) &&
+          Array.isArray(hang.nativeStack.stacks)
+        )));
+      }
 
       check_histogram(endHangs.hangs[0].histogram);
 
       do_test_finished();
     };
 
     check_results();
   });
--- a/toolkit/components/telemetry/tests/unit/xpcshell.ini
+++ b/toolkit/components/telemetry/tests/unit/xpcshell.ini
@@ -50,17 +50,16 @@ tags = addons
 [test_TelemetryControllerBuildID.js]
 [test_TelemetrySendOldPings.js]
 skip-if = os == "android" # Disabled due to intermittent orange on Android
 tags = addons
 [test_TelemetrySession.js]
 tags = addons
 [test_TelemetrySession_activeTicks.js]
 [test_ThreadHangStats.js]
-skip-if = os == "android" || os == "linux" # BHR is disabled on linux (bug 1365309)
 run-sequentially = Bug 1046307, test can fail intermittently when CPU load is high
 [test_TelemetrySend.js]
 [test_ChildHistograms.js]
 skip-if = os == "android" # Disabled due to crashes (see bug 1331366)
 tags = addons
 [test_ChildScalars.js]
 skip-if = os == "android" # Disabled due to crashes (see bug 1331366)
 [test_TelemetryReportingPolicy.js]
--- a/tools/profiler/core/platform-linux-android.cpp
+++ b/tools/profiler/core/platform-linux-android.cpp
@@ -125,17 +125,17 @@ public:
 
   ~PlatformData()
   {
     MOZ_COUNT_DTOR(PlatformData);
   }
 };
 
 ////////////////////////////////////////////////////////////////////////
-// BEGIN Sampler target specifics
+// BEGIN SamplerThread target specifics
 
 // The only way to reliably interrupt a Linux thread and inspect its register
 // and stack state is by sending a signal to it, and doing the work inside the
 // signal handler.  But we don't want to run much code inside the signal
 // handler, since POSIX severely restricts what we can do in signal handlers.
 // So we use a system of semaphores to suspend the thread and allow the
 // sampler thread to do all the work of unwinding and copying out whatever
 // data it wants.
@@ -194,70 +194,84 @@ struct SigHandlerCoordinator
   }
 
   sem_t mMessage2; // To sampler: "context is in sSigHandlerCoordinator"
   sem_t mMessage3; // To samplee: "resume"
   sem_t mMessage4; // To sampler: "finished with sSigHandlerCoordinator"
   ucontext_t mUContext; // Context at signal
 };
 
-struct SigHandlerCoordinator* Sampler::sSigHandlerCoordinator = nullptr;
+struct SigHandlerCoordinator* SamplerThread::sSigHandlerCoordinator = nullptr;
 
 static void
 SigprofHandler(int aSignal, siginfo_t* aInfo, void* aContext)
 {
   // Avoid TSan warning about clobbering errno.
   int savedErrno = errno;
 
   MOZ_ASSERT(aSignal == SIGPROF);
-  MOZ_ASSERT(Sampler::sSigHandlerCoordinator);
+  MOZ_ASSERT(SamplerThread::sSigHandlerCoordinator);
 
   // By sending us this signal, the sampler thread has sent us message 1 in
   // the comment above, with the meaning "|sSigHandlerCoordinator| is ready
   // for use, please copy your register context into it."
-  Sampler::sSigHandlerCoordinator->mUContext =
+  SamplerThread::sSigHandlerCoordinator->mUContext =
     *static_cast<ucontext_t*>(aContext);
 
   // Send message 2: tell the sampler thread that the context has been copied
   // into |sSigHandlerCoordinator->mUContext|.  sem_post can never fail by
   // being interrupted by a signal, so there's no loop around this call.
-  int r = sem_post(&Sampler::sSigHandlerCoordinator->mMessage2);
+  int r = sem_post(&SamplerThread::sSigHandlerCoordinator->mMessage2);
   MOZ_ASSERT(r == 0);
 
   // At this point, the sampler thread assumes we are suspended, so we must
   // not touch any global state here.
 
   // Wait for message 3: the sampler thread tells us to resume.
   while (true) {
-    r = sem_wait(&Sampler::sSigHandlerCoordinator->mMessage3);
+    r = sem_wait(&SamplerThread::sSigHandlerCoordinator->mMessage3);
     if (r == -1 && errno == EINTR) {
       // Interrupted by a signal.  Try again.
       continue;
     }
     // We don't expect any other kind of failure
     MOZ_ASSERT(r == 0);
     break;
   }
 
   // Send message 4: tell the sampler thread that we are finished accessing
   // |sSigHandlerCoordinator|.  After this point it is not safe to touch
   // |sSigHandlerCoordinator|.
-  r = sem_post(&Sampler::sSigHandlerCoordinator->mMessage4);
+  r = sem_post(&SamplerThread::sSigHandlerCoordinator->mMessage4);
   MOZ_ASSERT(r == 0);
 
   errno = savedErrno;
 }
 
-Sampler::Sampler(PSLockRef aLock)
-  : mMyPid(getpid())
+static void*
+ThreadEntry(void* aArg)
+{
+  auto thread = static_cast<SamplerThread*>(aArg);
+  thread->mSamplerTid = gettid();
+  thread->Run();
+  return nullptr;
+}
+
+SamplerThread::SamplerThread(PSLockRef aLock, uint32_t aActivityGeneration,
+                             double aIntervalMilliseconds)
+  : mActivityGeneration(aActivityGeneration)
+  , mIntervalMicroseconds(
+      std::max(1, int(floor(aIntervalMilliseconds * 1000 + 0.5))))
+  , mMyPid(getpid())
   // We don't know what the sampler thread's ID will be until it runs, so set
-  // mSamplerTid to a dummy value and fill it in for real in
-  // SuspendAndSampleAndResumeThread().
+  // mSamplerTid to a dummy value and fill it in for real in ThreadEntry().
   , mSamplerTid(-1)
 {
+  MOZ_RELEASE_ASSERT(NS_IsMainThread());
+
 #if defined(USE_EHABI_STACKWALK)
   mozilla::EHABIStackWalkInit();
 #elif defined(USE_LUL_STACKWALK)
   bool createdLUL = false;
   lul::LUL* lul = CorePS::Lul(aLock);
   if (!lul) {
     lul = new lul::LUL(logging_sink_for_LUL);
     CorePS::SetLul(aLock, lul);
@@ -285,39 +299,76 @@ Sampler::Sampler(PSLockRef aLock)
 
     // Has a test been requested?
     if (PR_GetEnv("MOZ_PROFILER_LUL_TEST")) {
       int nTests = 0, nTestsPassed = 0;
       RunLulUnitTests(&nTests, &nTestsPassed, lul);
     }
   }
 #endif
+
+  // Start the sampling thread. It repeatedly sends a SIGPROF signal. Sending
+  // the signal ourselves instead of relying on itimer provides much better
+  // accuracy.
+  if (pthread_create(&mThread, nullptr, ThreadEntry, this) != 0) {
+    MOZ_CRASH("pthread_create failed");
+  }
+}
+
+SamplerThread::~SamplerThread()
+{
+  pthread_join(mThread, nullptr);
+}
+
+void
+SamplerThread::Stop(PSLockRef aLock)
+{
+  MOZ_RELEASE_ASSERT(NS_IsMainThread());
+
+  // Restore old signal handler. This is global state so it's important that
+  // we do it now, while gPSMutex is locked. It's safe to do this now even
+  // though this SamplerThread is still alive, because the next time the main
+  // loop of Run() iterates it won't get past the mActivityGeneration check,
+  // and so won't send any signals.
+  sigaction(SIGPROF, &mOldSigprofHandler, 0);
 }
 
 void
-Sampler::Disable(PSLockRef aLock)
+SamplerThread::SleepMicro(uint32_t aMicroseconds)
 {
-  // Restore old signal handler. This is global state so it's important that
-  // we do it now, while gPSMutex is locked.
-  sigaction(SIGPROF, &mOldSigprofHandler, 0);
+  if (aMicroseconds >= 1000000) {
+    // Use usleep for larger intervals, because the nanosleep
+    // code below only supports intervals < 1 second.
+    MOZ_ALWAYS_TRUE(!::usleep(aMicroseconds));
+    return;
+  }
+
+  struct timespec ts;
+  ts.tv_sec  = 0;
+  ts.tv_nsec = aMicroseconds * 1000UL;
+
+  int rv = ::nanosleep(&ts, &ts);
+
+  while (rv != 0 && errno == EINTR) {
+    // Keep waiting in case of interrupt.
+    // nanosleep puts the remaining time back into ts.
+    rv = ::nanosleep(&ts, &ts);
+  }
+
+  MOZ_ASSERT(!rv, "nanosleep call failed");
 }
 
-template<typename Func>
 void
-Sampler::SuspendAndSampleAndResumeThread(PSLockRef aLock,
-                                         TickSample& aSample,
-                                         const Func& aDoSample)
+SamplerThread::SuspendAndSampleAndResumeThread(PSLockRef aLock,
+                                               TickSample& aSample)
 {
   // Only one sampler thread can be sampling at once.  So we expect to have
   // complete control over |sSigHandlerCoordinator|.
   MOZ_ASSERT(!sSigHandlerCoordinator);
 
-  if (mSamplerTid == -1) {
-    mSamplerTid = gettid();
-  }
   int sampleeTid = aSample.mThreadId;
   MOZ_RELEASE_ASSERT(sampleeTid != mSamplerTid);
 
   //----------------------------------------------------------------//
   // Suspend the samplee thread and get its context.
 
   SigHandlerCoordinator coord;   // on sampler thread's stack
   sSigHandlerCoordinator = &coord;
@@ -354,17 +405,17 @@ Sampler::SuspendAndSampleAndResumeThread
   // malloc implementation, risks deadlock.
 
   // The samplee thread is now frozen and sSigHandlerCoordinator->mUContext is
   // valid.  We can poke around in it and unwind its stack as we like.
 
   // Extract the current PC and sp.
   FillInSample(aSample, &sSigHandlerCoordinator->mUContext);
 
-  aDoSample();
+  Tick(aLock, ActivePS::Buffer(aLock), aSample);
 
   //----------------------------------------------------------------//
   // Resume the target thread.
 
   // Send message 3 to the samplee, which tells it to resume.
   r = sem_post(&sSigHandlerCoordinator->mMessage3);
   MOZ_ASSERT(r == 0);
 
@@ -384,90 +435,16 @@ Sampler::SuspendAndSampleAndResumeThread
   //
   // WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
 
   // This isn't strictly necessary, but doing so does help pick up anomalies
   // in which the signal handler is running when it shouldn't be.
   sSigHandlerCoordinator = nullptr;
 }
 
-// END Sampler target specifics
-////////////////////////////////////////////////////////////////////////
-
-////////////////////////////////////////////////////////////////////////
-// BEGIN SamplerThread target specifics
-
-static void*
-ThreadEntry(void* aArg)
-{
-  auto thread = static_cast<SamplerThread*>(aArg);
-  thread->Run();
-  return nullptr;
-}
-
-SamplerThread::SamplerThread(PSLockRef aLock, uint32_t aActivityGeneration,
-                             double aIntervalMilliseconds)
-  : Sampler(aLock)
-  , mActivityGeneration(aActivityGeneration)
-  , mIntervalMicroseconds(
-      std::max(1, int(floor(aIntervalMilliseconds * 1000 + 0.5))))
-{
-  MOZ_RELEASE_ASSERT(NS_IsMainThread());
-
-  // Start the sampling thread. It repeatedly sends a SIGPROF signal. Sending
-  // the signal ourselves instead of relying on itimer provides much better
-  // accuracy.
-  if (pthread_create(&mThread, nullptr, ThreadEntry, this) != 0) {
-    MOZ_CRASH("pthread_create failed");
-  }
-}
-
-SamplerThread::~SamplerThread()
-{
-  pthread_join(mThread, nullptr);
-}
-
-void
-SamplerThread::SleepMicro(uint32_t aMicroseconds)
-{
-  if (aMicroseconds >= 1000000) {
-    // Use usleep for larger intervals, because the nanosleep
-    // code below only supports intervals < 1 second.
-    MOZ_ALWAYS_TRUE(!::usleep(aMicroseconds));
-    return;
-  }
-
-  struct timespec ts;
-  ts.tv_sec  = 0;
-  ts.tv_nsec = aMicroseconds * 1000UL;
-
-  int rv = ::nanosleep(&ts, &ts);
-
-  while (rv != 0 && errno == EINTR) {
-    // Keep waiting in case of interrupt.
-    // nanosleep puts the remaining time back into ts.
-    rv = ::nanosleep(&ts, &ts);
-  }
-
-  MOZ_ASSERT(!rv, "nanosleep call failed");
-}
-
-void
-SamplerThread::Stop(PSLockRef aLock)
-{
-  MOZ_RELEASE_ASSERT(NS_IsMainThread());
-
-  // Restore old signal handler. This is global state so it's important that
-  // we do it now, while gPSMutex is locked. It's safe to do this now even
-  // though this SamplerThread is still alive, because the next time the main
-  // loop of Run() iterates it won't get past the mActivityGeneration check,
-  // and so won't send any signals.
-  Sampler::Disable(aLock);
-}
-
 // END SamplerThread target specifics
 ////////////////////////////////////////////////////////////////////////
 
 #if defined(GP_OS_linux)
 
 // We use pthread_atfork() to temporarily disable signal delivery during any
 // fork() call. Without that, fork() can be repeatedly interrupted by signal
 // delivery, requiring it to be repeatedly restarted, which can lead to *long*
--- a/tools/profiler/core/platform-macos.cpp
+++ b/tools/profiler/core/platform-macos.cpp
@@ -57,32 +57,64 @@ public:
 private:
   // Note: for mProfiledThread Mach primitives are used instead of pthread's
   // because the latter doesn't provide thread manipulation primitives required.
   // For details, consult "Mac OS X Internals" book, Section 7.3.
   thread_act_t mProfiledThread;
 };
 
 ////////////////////////////////////////////////////////////////////////
-// BEGIN Sampler target specifics
+// BEGIN SamplerThread target specifics
+
+static void*
+ThreadEntry(void* aArg)
+{
+  auto thread = static_cast<SamplerThread*>(aArg);
+  thread->Run();
+  return nullptr;
+}
 
-Sampler::Sampler(PSLockRef aLock)
+SamplerThread::SamplerThread(PSLockRef aLock, uint32_t aActivityGeneration,
+                             double aIntervalMilliseconds)
+  : mActivityGeneration(aActivityGeneration)
+  , mIntervalMicroseconds(
+      std::max(1, int(floor(aIntervalMilliseconds * 1000 + 0.5))))
 {
+  MOZ_RELEASE_ASSERT(NS_IsMainThread());
+
+  pthread_attr_t* attr_ptr = nullptr;
+  if (pthread_create(&mThread, attr_ptr, ThreadEntry, this) != 0) {
+    MOZ_CRASH("pthread_create failed");
+  }
+}
+
+SamplerThread::~SamplerThread()
+{
+  pthread_join(mThread, nullptr);
 }
 
 void
-Sampler::Disable(PSLockRef aLock)
+SamplerThread::Stop(PSLockRef aLock)
 {
+  MOZ_RELEASE_ASSERT(NS_IsMainThread());
 }
 
-template<typename Func>
 void
-Sampler::SuspendAndSampleAndResumeThread(PSLockRef aLock,
-                                         TickSample& aSample,
-                                         const Func& aDoSample)
+SamplerThread::SleepMicro(uint32_t aMicroseconds)
+{
+  usleep(aMicroseconds);
+  // FIXME: the OSX 10.12 page for usleep says "The usleep() function is
+  // obsolescent.  Use nanosleep(2) instead."  This implementation could be
+  // merged with the linux-android version.  Also, this doesn't handle the
+  // case where the usleep call is interrupted by a signal.
+}
+
+void
+SamplerThread::SuspendAndSampleAndResumeThread(PSLockRef aLock,
+                                               TickSample& aSample)
 {
   thread_act_t samplee_thread = aSample.mPlatformData->ProfiledThread();
 
   //----------------------------------------------------------------//
   // Suspend the samplee thread and get its context.
 
   // We're using thread_suspend on OS X because pthread_kill (which is what we
   // at one time used on Linux) has less consistent performance and causes
@@ -129,83 +161,31 @@ Sampler::SuspendAndSampleAndResumeThread
   if (thread_get_state(samplee_thread,
                        flavor,
                        reinterpret_cast<natural_t*>(&state),
                        &count) == KERN_SUCCESS) {
     aSample.mPC = reinterpret_cast<Address>(state.REGISTER_FIELD(ip));
     aSample.mSP = reinterpret_cast<Address>(state.REGISTER_FIELD(sp));
     aSample.mFP = reinterpret_cast<Address>(state.REGISTER_FIELD(bp));
 
-    aDoSample();
+    Tick(aLock, ActivePS::Buffer(aLock), aSample);
   }
 
 #undef REGISTER_FIELD
 
   //----------------------------------------------------------------//
   // Resume the target thread.
 
   thread_resume(samplee_thread);
 
   // The profiler's critical section ends here.
   //
   // WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
 }
 
-// END Sampler target specifics
-////////////////////////////////////////////////////////////////////////
-
-////////////////////////////////////////////////////////////////////////
-// BEGIN SamplerThread target specifics
-
-static void*
-ThreadEntry(void* aArg)
-{
-  auto thread = static_cast<SamplerThread*>(aArg);
-  thread->Run();
-  return nullptr;
-}
-
-SamplerThread::SamplerThread(PSLockRef aLock, uint32_t aActivityGeneration,
-                             double aIntervalMilliseconds)
-  : Sampler(aLock)
-  , mActivityGeneration(aActivityGeneration)
-  , mIntervalMicroseconds(
-      std::max(1, int(floor(aIntervalMilliseconds * 1000 + 0.5))))
-{
-  MOZ_RELEASE_ASSERT(NS_IsMainThread());
-
-  pthread_attr_t* attr_ptr = nullptr;
-  if (pthread_create(&mThread, attr_ptr, ThreadEntry, this) != 0) {
-    MOZ_CRASH("pthread_create failed");
-  }
-}
-
-SamplerThread::~SamplerThread()
-{
-  pthread_join(mThread, nullptr);
-}
-
-void
-SamplerThread::SleepMicro(uint32_t aMicroseconds)
-{
-  usleep(aMicroseconds);
-  // FIXME: the OSX 10.12 page for usleep says "The usleep() function is
-  // obsolescent.  Use nanosleep(2) instead."  This implementation could be
-  // merged with the linux-android version.  Also, this doesn't handle the
-  // case where the usleep call is interrupted by a signal.
-}
-
-void
-SamplerThread::Stop(PSLockRef aLock)
-{
-  MOZ_RELEASE_ASSERT(NS_IsMainThread());
-
-  Sampler::Disable(aLock);
-}
-
 // END SamplerThread target specifics
 ////////////////////////////////////////////////////////////////////////
 
 static void
 PlatformInit(PSLockRef aLock)
 {
 }
 
--- a/tools/profiler/core/platform-win32.cpp
+++ b/tools/profiler/core/platform-win32.cpp
@@ -74,37 +74,115 @@ uintptr_t
 GetThreadHandle(PlatformData* aData)
 {
   return (uintptr_t) aData->ProfiledThread();
 }
 
 static const HANDLE kNoThread = INVALID_HANDLE_VALUE;
 
 ////////////////////////////////////////////////////////////////////////
-// BEGIN Sampler target specifics
+// BEGIN SamplerThread target specifics
+
+static unsigned int __stdcall
+ThreadEntry(void* aArg)
+{
+  auto thread = static_cast<SamplerThread*>(aArg);
+  thread->Run();
+  return 0;
+}
+
+SamplerThread::SamplerThread(PSLockRef aLock, uint32_t aActivityGeneration,
+                             double aIntervalMilliseconds)
+    : mActivityGeneration(aActivityGeneration)
+    , mIntervalMicroseconds(
+        std::max(1, int(floor(aIntervalMilliseconds * 1000 + 0.5))))
+{
+  MOZ_RELEASE_ASSERT(NS_IsMainThread());
 
-Sampler::Sampler(PSLockRef aLock)
+  // By default we'll not adjust the timer resolution which tends to be
+  // around 16ms. However, if the requested interval is sufficiently low
+  // we'll try to adjust the resolution to match.
+  if (mIntervalMicroseconds < 10*1000) {
+    ::timeBeginPeriod(mIntervalMicroseconds / 1000);
+  }
+
+  // Create a new thread. It is important to use _beginthreadex() instead of
+  // the Win32 function CreateThread(), because the CreateThread() does not
+  // initialize thread-specific structures in the C runtime library.
+  mThread = reinterpret_cast<HANDLE>(
+      _beginthreadex(nullptr,
+                     /* stack_size */ 0,
+                     ThreadEntry,
+                     this,
+                     /* initflag */ 0,
+                     nullptr));
+  if (mThread == 0) {
+    MOZ_CRASH("_beginthreadex failed");
+  }
+}
+
+SamplerThread::~SamplerThread()
 {
+  WaitForSingleObject(mThread, INFINITE);
+
+  // Close our own handle for the thread.
+  if (mThread != kNoThread) {
+    CloseHandle(mThread);
+  }
 }
 
 void
-Sampler::Disable(PSLockRef aLock)
+SamplerThread::Stop(PSLockRef aLock)
 {
+  MOZ_RELEASE_ASSERT(NS_IsMainThread());
+
+  // Disable any timer resolution changes we've made. Do it now while
+  // gPSMutex is locked, i.e. before any other SamplerThread can be created
+  // and call ::timeBeginPeriod().
+  //
+  // It's safe to do this now even though this SamplerThread is still alive,
+  // because the next time the main loop of Run() iterates it won't get past
+  // the mActivityGeneration check, and so it won't make any more ::Sleep()
+  // calls.
+  if (mIntervalMicroseconds < 10 * 1000) {
+    ::timeEndPeriod(mIntervalMicroseconds / 1000);
+  }
 }
 
-template<typename Func>
 void
-Sampler::SuspendAndSampleAndResumeThread(PSLockRef aLock,
-                                         TickSample& aSample,
-                                         const Func& aDoSample)
+SamplerThread::SleepMicro(uint32_t aMicroseconds)
+{
+  // For now, keep the old behaviour of minimum Sleep(1), even for
+  // smaller-than-usual sleeps after an overshoot, unless the user has
+  // explicitly opted into a sub-millisecond profiler interval.
+  if (mIntervalMicroseconds >= 1000) {
+    ::Sleep(std::max(1u, aMicroseconds / 1000));
+  } else {
+    TimeStamp start = TimeStamp::Now();
+    TimeStamp end = start + TimeDuration::FromMicroseconds(aMicroseconds);
+
+    // First, sleep for as many whole milliseconds as possible.
+    if (aMicroseconds >= 1000) {
+      ::Sleep(aMicroseconds / 1000);
+    }
+
+    // Then, spin until enough time has passed.
+    while (TimeStamp::Now() < end) {
+      _mm_pause();
+    }
+  }
+}
+
+void
+SamplerThread::SuspendAndSampleAndResumeThread(PSLockRef aLock,
+                                               TickSample& aSample)
 {
   HANDLE profiled_thread = aSample.mPlatformData->ProfiledThread();
-  if (profiled_thread == nullptr) {
+  if (profiled_thread == nullptr)
     return;
-  }
 
   // Context used for sampling the register state of the profiled thread.
   CONTEXT context;
   memset(&context, 0, sizeof(context));
 
   //----------------------------------------------------------------//
   // Suspend the samplee thread and get its context.
 
@@ -145,127 +223,28 @@ Sampler::SuspendAndSampleAndResumeThread
 #else
   aSample.mPC = reinterpret_cast<Address>(context.Eip);
   aSample.mSP = reinterpret_cast<Address>(context.Esp);
   aSample.mFP = reinterpret_cast<Address>(context.Ebp);
 #endif
 
   aSample.mContext = &context;
 
-  aDoSample();
+  Tick(aLock, ActivePS::Buffer(aLock), aSample);
 
   //----------------------------------------------------------------//
   // Resume the target thread.
 
   ResumeThread(profiled_thread);
 
   // The profiler's critical section ends here.
   //
   // WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
 }
 
-// END Sampler target specifics
-////////////////////////////////////////////////////////////////////////
-
-////////////////////////////////////////////////////////////////////////
-// BEGIN SamplerThread target specifics
-
-static unsigned int __stdcall
-ThreadEntry(void* aArg)
-{
-  auto thread = static_cast<SamplerThread*>(aArg);
-  thread->Run();
-  return 0;
-}
-
-SamplerThread::SamplerThread(PSLockRef aLock, uint32_t aActivityGeneration,
-                             double aIntervalMilliseconds)
-    : Sampler(aLock)
-    , mActivityGeneration(aActivityGeneration)
-    , mIntervalMicroseconds(
-        std::max(1, int(floor(aIntervalMilliseconds * 1000 + 0.5))))
-{
-  MOZ_RELEASE_ASSERT(NS_IsMainThread());
-
-  // By default we'll not adjust the timer resolution which tends to be
-  // around 16ms. However, if the requested interval is sufficiently low
-  // we'll try to adjust the resolution to match.
-  if (mIntervalMicroseconds < 10*1000) {
-    ::timeBeginPeriod(mIntervalMicroseconds / 1000);
-  }
-
-  // Create a new thread. It is important to use _beginthreadex() instead of
-  // the Win32 function CreateThread(), because the CreateThread() does not
-  // initialize thread-specific structures in the C runtime library.
-  mThread = reinterpret_cast<HANDLE>(
-      _beginthreadex(nullptr,
-                     /* stack_size */ 0,
-                     ThreadEntry,
-                     this,
-                     /* initflag */ 0,
-                     nullptr));
-  if (mThread == 0) {
-    MOZ_CRASH("_beginthreadex failed");
-  }
-}
-
-SamplerThread::~SamplerThread()
-{
-  WaitForSingleObject(mThread, INFINITE);
-
-  // Close our own handle for the thread.
-  if (mThread != kNoThread) {
-    CloseHandle(mThread);
-  }
-}
-
-void
-SamplerThread::SleepMicro(uint32_t aMicroseconds)
-{
-  // For now, keep the old behaviour of minimum Sleep(1), even for
-  // smaller-than-usual sleeps after an overshoot, unless the user has
-  // explicitly opted into a sub-millisecond profiler interval.
-  if (mIntervalMicroseconds >= 1000) {
-    ::Sleep(std::max(1u, aMicroseconds / 1000));
-  } else {
-    TimeStamp start = TimeStamp::Now();
-    TimeStamp end = start + TimeDuration::FromMicroseconds(aMicroseconds);
-
-    // First, sleep for as many whole milliseconds as possible.
-    if (aMicroseconds >= 1000) {
-      ::Sleep(aMicroseconds / 1000);
-    }
-
-    // Then, spin until enough time has passed.
-    while (TimeStamp::Now() < end) {
-      _mm_pause();
-    }
-  }
-}
-
-void
-SamplerThread::Stop(PSLockRef aLock)
-{
-  MOZ_RELEASE_ASSERT(NS_IsMainThread());
-
-  // Disable any timer resolution changes we've made. Do it now while
-  // gPSMutex is locked, i.e. before any other SamplerThread can be created
-  // and call ::timeBeginPeriod().
-  //
-  // It's safe to do this now even though this SamplerThread is still alive,
-  // because the next time the main loop of Run() iterates it won't get past
-  // the mActivityGeneration check, and so it won't make any more ::Sleep()
-  // calls.
-  if (mIntervalMicroseconds < 10 * 1000) {
-    ::timeEndPeriod(mIntervalMicroseconds / 1000);
-  }
-
-  Sampler::Disable(aLock);
-}
-
 // END SamplerThread target specifics
 ////////////////////////////////////////////////////////////////////////
 
 static void
 PlatformInit(PSLockRef aLock)
 {
 }
 
--- a/tools/profiler/core/platform.cpp
+++ b/tools/profiler/core/platform.cpp
@@ -975,50 +975,70 @@ StackWalkCallback(uint32_t aFrameNumber,
   NativeStack* nativeStack = static_cast<NativeStack*>(aClosure);
   MOZ_ASSERT(nativeStack->count < nativeStack->size);
   nativeStack->sp_array[nativeStack->count] = aSP;
   nativeStack->pc_array[nativeStack->count] = aPC;
   nativeStack->count++;
 }
 
 static void
-DoNativeBacktrace(PSLockRef aLock, NativeStack& aNativeStack,
+DoNativeBacktrace(PSLockRef aLock, ProfileBuffer* aBuffer,
                   const TickSample& aSample)
 {
+  void* pc_array[1000];
+  void* sp_array[1000];
+  NativeStack nativeStack = {
+    pc_array,
+    sp_array,
+    mozilla::ArrayLength(pc_array),
+    0
+  };
+
   // Start with the current function. We use 0 as the frame number here because
   // the FramePointerStackWalk() and MozStackWalk() calls below will use 1..N.
   // This is a bit weird but it doesn't matter because StackWalkCallback()
   // doesn't use the frame number argument.
-  StackWalkCallback(/* frameNum */ 0, aSample.mPC, aSample.mSP, &aNativeStack);
-
-  uint32_t maxFrames = uint32_t(aNativeStack.size - aNativeStack.count);
+  StackWalkCallback(/* frameNum */ 0, aSample.mPC, aSample.mSP, &nativeStack);
+
+  uint32_t maxFrames = uint32_t(nativeStack.size - nativeStack.count);
 
 #if defined(GP_OS_darwin) || (defined(GP_PLAT_x86_windows))
   void* stackEnd = aSample.mStackTop;
   if (aSample.mFP >= aSample.mSP && aSample.mFP <= stackEnd) {
     FramePointerStackWalk(StackWalkCallback, /* skipFrames */ 0, maxFrames,
-                          &aNativeStack, reinterpret_cast<void**>(aSample.mFP),
+                          &nativeStack, reinterpret_cast<void**>(aSample.mFP),
                           stackEnd);
   }
 #else
   // Win64 always omits frame pointers so for it we use the slower
   // MozStackWalk().
   uintptr_t thread = GetThreadHandle(aSample.mPlatformData);
   MOZ_ASSERT(thread);
-  MozStackWalk(StackWalkCallback, /* skipFrames */ 0, maxFrames, &aNativeStack,
+  MozStackWalk(StackWalkCallback, /* skipFrames */ 0, maxFrames, &nativeStack,
                thread, /* platformData */ nullptr);
 #endif
+
+  MergeStacksIntoProfile(aLock, aBuffer, aSample, nativeStack);
 }
 #endif
 
 #ifdef USE_EHABI_STACKWALK
 static void
-DoNativeBacktrace(PSLockRef aLock, NativeStack& aNativeStack,
+DoNativeBacktrace(PSLockRef aLock, ProfileBuffer* aBuffer,
                   const TickSample& aSample)
 {
+  void* pc_array[1000];
+  void* sp_array[1000];
+  NativeStack nativeStack = {
+    pc_array,
+    sp_array,
+    mozilla::ArrayLength(pc_array),
+    0
+  };
+
   const mcontext_t* mcontext =
     &reinterpret_cast<ucontext_t*>(aSample.mContext)->uc_mcontext;
   mcontext_t savedContext;
   NotNull<RacyThreadInfo*> racyInfo = aSample.mRacyInfo;
 
   // The pseudostack contains an "EnterJIT" frame whenever we enter
   // JIT code with profiling enabled; the stack pointer value points
   // the saved registers.  We use this to unwind resume unwinding
@@ -1029,21 +1049,21 @@ DoNativeBacktrace(PSLockRef aLock, Nativ
     volatile js::ProfileEntry& entry = racyInfo->mStack[i - 1];
     if (!entry.isJs() && strcmp(entry.label(), "EnterJIT") == 0) {
       // Found JIT entry frame.  Unwind up to that point (i.e., force
       // the stack walk to stop before the block of saved registers;
       // note that it yields nondecreasing stack pointers), then restore
       // the saved state.
       uint32_t* vSP = reinterpret_cast<uint32_t*>(entry.stackAddress());
 
-      aNativeStack.count += EHABIStackWalk(*mcontext,
-                                           /* stackBase = */ vSP,
-                                           aNativeStack.sp_array + aNativeStack.count,
-                                           aNativeStack.pc_array + aNativeStack.count,
-                                           aNativeStack.size - aNativeStack.count);
+      nativeStack.count += EHABIStackWalk(*mcontext,
+                                          /* stackBase = */ vSP,
+                                          sp_array + nativeStack.count,
+                                          pc_array + nativeStack.count,
+                                          nativeStack.size - nativeStack.count);
 
       memset(&savedContext, 0, sizeof(savedContext));
 
       // See also: struct EnterJITStack in js/src/jit/arm/Trampoline-arm.cpp
       savedContext.arm_r4  = *vSP++;
       savedContext.arm_r5  = *vSP++;
       savedContext.arm_r6  = *vSP++;
       savedContext.arm_r7  = *vSP++;
@@ -1055,21 +1075,23 @@ DoNativeBacktrace(PSLockRef aLock, Nativ
       savedContext.arm_sp  = reinterpret_cast<uint32_t>(vSP);
       savedContext.arm_pc  = savedContext.arm_lr;
       mcontext = &savedContext;
     }
   }
 
   // Now unwind whatever's left (starting from either the last EnterJIT frame
   // or, if no EnterJIT was found, the original registers).
-  aNativeStack.count += EHABIStackWalk(*mcontext,
-                                       aSample.mStackTop,
-                                       aNativeStack.sp_array + aNativeStack.count,
-                                       aNativeStack.pc_array + aNativeStack.count,
-                                       aNativeStack.size - aNativeStack.count);
+  nativeStack.count += EHABIStackWalk(*mcontext,
+                                      aSample.mStackTop,
+                                      sp_array + nativeStack.count,
+                                      pc_array + nativeStack.count,
+                                      nativeStack.size - nativeStack.count);
+
+  MergeStacksIntoProfile(aLock, aBuffer, aSample, nativeStack);
 }
 #endif
 
 #ifdef USE_LUL_STACKWALK
 
 // See the comment at the callsite for why this function is necessary.
 #if defined(MOZ_HAVE_ASAN_BLACKLIST)
 MOZ_ASAN_BLACKLIST static void
@@ -1084,17 +1106,17 @@ ASAN_memcpy(void* aDst, const void* aSrc
 
   for (size_t i = 0; i < aLen; i++) {
     dst[i] = src[i];
   }
 }
 #endif
 
 static void
-DoNativeBacktrace(PSLockRef aLock, NativeStack& aNativeStack,
+DoNativeBacktrace(PSLockRef aLock, ProfileBuffer* aBuffer,
                   const TickSample& aSample)
 {
   const mcontext_t* mc =
     &reinterpret_cast<ucontext_t*>(aSample.mContext)->uc_mcontext;
 
   lul::UnwindRegs startRegs;
   memset(&startRegs, 0, sizeof(startRegs));
 
@@ -1192,77 +1214,82 @@ DoNativeBacktrace(PSLockRef aLock, Nativ
       memcpy(&stackImg.mContents[0], (void*)start, nToCopy);
 #endif
       (void)VALGRIND_MAKE_MEM_DEFINED(&stackImg.mContents[0], nToCopy);
     }
   }
 
   // The maximum number of frames that LUL will produce.  Setting it
   // too high gives a risk of it wasting a lot of time looping on
-  // corrupted stacks. Limit the size of the passed-in native stack
-  // to not exceed this number.
+  // corrupted stacks.
   const int MAX_NATIVE_FRAMES = 256;
-  if (aNativeStack.size > MAX_NATIVE_FRAMES) {
-    aNativeStack.size = MAX_NATIVE_FRAMES;
-  }
 
   size_t scannedFramesAllowed = 0;
+
+  uintptr_t framePCs[MAX_NATIVE_FRAMES];
+  uintptr_t frameSPs[MAX_NATIVE_FRAMES];
+  size_t framesAvail = mozilla::ArrayLength(framePCs);
+  size_t framesUsed  = 0;
   size_t scannedFramesAcquired = 0, framePointerFramesAcquired = 0;
   lul::LUL* lul = CorePS::Lul(aLock);
-  lul->Unwind(reinterpret_cast<uintptr_t*>(aNativeStack.pc_array),
-              reinterpret_cast<uintptr_t*>(aNativeStack.sp_array),
-              &aNativeStack.count,
-              &framePointerFramesAcquired, &scannedFramesAcquired,
-              aNativeStack.size, scannedFramesAllowed,
+  lul->Unwind(&framePCs[0], &frameSPs[0],
+              &framesUsed, &framePointerFramesAcquired, &scannedFramesAcquired,
+              framesAvail, scannedFramesAllowed,
               &startRegs, &stackImg);
 
+  NativeStack nativeStack = {
+    reinterpret_cast<void**>(framePCs),
+    reinterpret_cast<void**>(frameSPs),
+    mozilla::ArrayLength(framePCs),
+    framesUsed
+  };
+
+  MergeStacksIntoProfile(aLock, aBuffer, aSample, nativeStack);
+
   // Update stats in the LUL stats object.  Unfortunately this requires
   // three global memory operations.
   lul->mStats.mContext += 1;
-  lul->mStats.mCFI     += aNativeStack.count - 1 - framePointerFramesAcquired -
-                                                   scannedFramesAcquired;
+  lul->mStats.mCFI     += framesUsed - 1 - framePointerFramesAcquired -
+                                           scannedFramesAcquired;
   lul->mStats.mFP      += framePointerFramesAcquired;
   lul->mStats.mScanned += scannedFramesAcquired;
 }
 
 #endif
 
-void
-Tick(PSLockRef aLock, const TickSample& aSample, ProfileBuffer* aBuffer)
+static void
+DoSampleStackTrace(PSLockRef aLock, ProfileBuffer* aBuffer,
+                   const TickSample& aSample)
 {
-  MOZ_RELEASE_ASSERT(ActivePS::Exists(aLock));
-
+  NativeStack nativeStack = { nullptr, nullptr, 0, 0 };
+  MergeStacksIntoProfile(aLock, aBuffer, aSample, nativeStack);
+
+  if (ActivePS::FeatureLeaf(aLock)) {
+    aBuffer->addTag(ProfileBufferEntry::NativeLeafAddr((void*)aSample.mPC));
+  }
+}
+
+// This function is called for each sampling period with the current program
+// counter. It is called within a signal and so must be re-entrant.
+static void
+Tick(PSLockRef aLock, ProfileBuffer* aBuffer, const TickSample& aSample)
+{
   aBuffer->addTagThreadId(aSample.mThreadId, aSample.mLastSample);
 
   mozilla::TimeDuration delta =
     aSample.mTimeStamp - CorePS::ProcessStartTime(aLock);
   aBuffer->addTag(ProfileBufferEntry::Time(delta.ToMilliseconds()));
 
-  void* pc_array[1000];
-  void* sp_array[1000];
-  NativeStack nativeStack = {
-    pc_array,
-    sp_array,
-    mozilla::ArrayLength(pc_array),
-    0
-  };
-
 #if defined(HAVE_NATIVE_UNWIND)
   if (ActivePS::FeatureStackWalk(aLock)) {
-    DoNativeBacktrace(aLock, nativeStack, aSample);
-
-    MergeStacksIntoProfile(aLock, aBuffer, aSample, nativeStack);
+    DoNativeBacktrace(aLock, aBuffer, aSample);
   } else
 #endif
   {
-    MergeStacksIntoProfile(aLock, aBuffer, aSample, nativeStack);
-
-    if (ActivePS::FeatureLeaf(aLock)) {
-      aBuffer->addTag(ProfileBufferEntry::NativeLeafAddr((void*)aSample.mPC));
-    }
+    DoSampleStackTrace(aLock, aBuffer, aSample);
   }
 
   // Don't process the PseudoStack's markers if we're synchronously sampling
   // the current thread.
   if (!aSample.mIsSynchronous) {
     ProfilerMarkerLinkedList* pendingMarkersList =
       aSample.mRacyInfo->GetPendingMarkers();
     while (pendingMarkersList && pendingMarkersList->peek()) {
@@ -1651,94 +1678,38 @@ PrintUsageThenExit(int aExitCode)
     "does not support"
 #endif
   );
 
   exit(aExitCode);
 }
 
 ////////////////////////////////////////////////////////////////////////
-// BEGIN Sampler
+// BEGIN SamplerThread
 
 #if defined(GP_OS_linux) || defined(GP_OS_android)
 struct SigHandlerCoordinator;
 #endif
 
-// Sampler performs setup and teardown of the state required to sample with the
-// profiler. Sampler may exist when ActivePS is not present.
-//
-// SuspendAndSampleAndResumeThread must only be called from a single thread,
-// and must not sample the thread it is being called from. A separate Sampler
-// instance must be used for each thread which wants to capture samples.
-
-// WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
-//
-// With the exception of SamplerThread, all Sampler objects must be Disable-d
-// before releasing the lock which was used to create them. This avoids races
-// on linux with the SIGPROF signal handler.
-
-class Sampler
-{
-public:
-  // Sets up the profiler such that it can begin sampling.
-  explicit Sampler(PSLockRef aLock);
-
-  // Disable the sampler, restoring it to its previous state. This must be
-  // called once, and only once, before the Sampler is destroyed.
-  void Disable(PSLockRef aLock);
-
-  // This method suspends and resumes the samplee thread. It calls the passed-in
-  // function like object aDoSample while the samplee thread is suspended, after
-  // filling in register values in aSample.
-  //
-  // Func must be a function-like object of type `void()`.
-  template<typename Func>
-  void SuspendAndSampleAndResumeThread(PSLockRef aLock,
-                                       TickSample& aSample,
-                                       const Func& aDoSample);
-
-private:
-#if defined(GP_OS_linux) || defined(GP_OS_android)
-  // Used to restore the SIGPROF handler when ours is removed.
-  struct sigaction mOldSigprofHandler;
-
-  // This process' ID. Needed as an argument for tgkill in
-  // SuspendAndSampleAndResumeThread.
-  int mMyPid;
-
-  // The sampler thread's ID.  Used to assert that it is not sampling itself,
-  // which would lead to deadlock.
-  int mSamplerTid;
-
-public:
-  // This is the one-and-only variable used to communicate between the sampler
-  // thread and the samplee thread's signal handler. It's static because the
-  // samplee thread's signal handler is static.
-  static struct SigHandlerCoordinator* sSigHandlerCoordinator;
-#endif
-};
-
-// END Sampler
-////////////////////////////////////////////////////////////////////////
-
-////////////////////////////////////////////////////////////////////////
-// BEGIN SamplerThread
-
 // The sampler thread controls sampling and runs whenever the profiler is
 // active. It periodically runs through all registered threads, finds those
 // that should be sampled, then pauses and samples them.
 
-class SamplerThread : public Sampler
+class SamplerThread
 {
 public:
   // Creates a sampler thread, but doesn't start it.
   SamplerThread(PSLockRef aLock, uint32_t aActivityGeneration,
                 double aIntervalMilliseconds);
   ~SamplerThread();
 
+  // This runs on the sampler thread.  It suspends and resumes the samplee
+  // threads.
+  void SuspendAndSampleAndResumeThread(PSLockRef aLock, TickSample& aSample);
+
   // This runs on (is!) the sampler thread.
   void Run();
 
   // This runs on the main thread.
   void Stop(PSLockRef aLock);
 
 private:
   // This suspends the calling thread for the given number of microseconds.
@@ -1753,16 +1724,36 @@ private:
 
   // The OS-specific handle for the sampler thread.
 #if defined(GP_OS_windows)
   HANDLE mThread;
 #elif defined(GP_OS_darwin) || defined(GP_OS_linux) || defined(GP_OS_android)
   pthread_t mThread;
 #endif
 
+#if defined(GP_OS_linux) || defined(GP_OS_android)
+  // Used to restore the SIGPROF handler when ours is removed.
+  struct sigaction mOldSigprofHandler;
+
+  // This process' ID.  Needed as an argument for tgkill in
+  // SuspendAndSampleAndResumeThread.
+  int mMyPid;
+
+public:
+  // The sampler thread's ID.  Used to assert that it is not sampling itself,
+  // which would lead to deadlock.
+  int mSamplerTid;
+
+  // This is the one-and-only variable used to communicate between the sampler
+  // thread and the samplee thread's signal handler. It's static because the
+  // samplee thread's signal handler is static.
+  static struct SigHandlerCoordinator* sSigHandlerCoordinator;
+#endif
+
+private:
   SamplerThread(const SamplerThread&) = delete;
   void operator=(const SamplerThread&) = delete;
 };
 
 // This function is required because we need to create a SamplerThread within
 // ActivePS's constructor, but SamplerThread is defined after ActivePS. It
 // could probably be removed by moving some code around.
 static SamplerThread*
@@ -1836,19 +1827,17 @@ SamplerThread::Run()
             rssMemory = nsMemoryReporterManager::ResidentFast();
 #if defined(GP_OS_linux) || defined(GP_OS_android)
             ussMemory = nsMemoryReporterManager::ResidentUnique();
 #endif
           }
 
           TickSample sample(info, rssMemory, ussMemory);
 
-          SuspendAndSampleAndResumeThread(lock, sample, [&] {
-              Tick(lock, sample, ActivePS::Buffer(lock));
-            });
+          SuspendAndSampleAndResumeThread(lock, sample);
         }
 
 #if defined(USE_LUL_STACKWALK)
         // The LUL unwind object accumulates frame statistics. Periodically we
         // should poke it to give it a chance to print those statistics.  This
         // involves doing I/O (fprintf, __android_log_print, etc.) and so
         // can't safely be done from the critical section inside
         // SuspendAndSampleAndResumeThread, which is why it is done here.
@@ -2784,17 +2773,17 @@ profiler_get_backtrace()
   sample.PopulateContext(&context);
 #elif defined(GP_OS_darwin)
   sample.PopulateContext(nullptr);
 #else
 # error "unknown platform"
 #endif
 #endif
 
-  Tick(lock, sample, buffer);
+  Tick(lock, buffer, sample);
 
   return UniqueProfilerBacktrace(
     new ProfilerBacktrace("SyncProfile", tid, buffer));
 }
 
 void
 ProfilerBacktraceDestructor::operator()(ProfilerBacktrace* aBacktrace)
 {
@@ -2999,64 +2988,21 @@ profiler_clear_js_context()
   }
 
   // We don't call info->StopJSSampling() here; there's no point doing that for
   // a JS thread that is in the process of disappearing.
 
   info->mContext = nullptr;
 }
 
-int
-profiler_current_thread_id()
+void*
+profiler_get_stack_top()
 {
-  return Thread::GetCurrentId();
-}
-
-// NOTE: The callback function passed in will be called while the target thread
-// is paused. Doing stuff in this function like allocating which may try to
-// claim locks is a surefire way to deadlock.
-void
-profiler_suspend_and_sample_thread(int aThreadId,
-                                   const std::function<void(void**, size_t)>& aCallback,
-                                   bool aSampleNative /* = true */)
-{
-  // Allocate the space for the native stack
-  void* pc_array[1000];
-  void* sp_array[1000];
-  NativeStack nativeStack = {
-    pc_array,
-    sp_array,
-    mozilla::ArrayLength(pc_array),
-    0
-  };
-
-  // Lock the profiler mutex
   PSAutoLock lock(gPSMutex);
-
-  const CorePS::ThreadVector& liveThreads = CorePS::LiveThreads(lock);
-  for (uint32_t i = 0; i < liveThreads.size(); i++) {
-    ThreadInfo* info = liveThreads.at(i);
-
-    if (info->ThreadId() == aThreadId) {
-      // Suspend, sample, and then resume the target thread.
-      Sampler sampler(lock);
-      TickSample sample(info, 0, 0);
-      sampler.SuspendAndSampleAndResumeThread(lock, sample, [&] {
-          // The target thread is now suspended, collect a native backtrace, and
-          // call the callback.
-#if defined(HAVE_NATIVE_UNWIND)
-          if (aSampleNative) {
-            DoNativeBacktrace(lock, nativeStack, sample);
-          }
-#endif
-          aCallback(nativeStack.pc_array, nativeStack.count);
-        });
-
-      // NOTE: Make sure to disable the sampler before it is destroyed, in case
-      // the profiler is running at the same time.
-      sampler.Disable(lock);
-      break;
-    }
+  ThreadInfo* threadInfo = FindLiveThreadInfo(lock);
+  if (threadInfo) {
+    return threadInfo->StackTop();
   }
+  return nullptr;
 }
 
 // END externally visible functions
 ////////////////////////////////////////////////////////////////////////
--- a/tools/profiler/public/GeckoProfiler.h
+++ b/tools/profiler/public/GeckoProfiler.h
@@ -17,17 +17,16 @@
 // The profiler collects samples that include native stacks and
 // platform-independent "pseudostacks".
 
 #ifndef GeckoProfiler_h
 #define GeckoProfiler_h
 
 #include <stdint.h>
 #include <stdarg.h>
-#include <functional>
 
 #include "mozilla/Assertions.h"
 #include "mozilla/Attributes.h"
 #include "js/TypeDecls.h"
 #include "mozilla/GuardObjects.h"
 #include "mozilla/UniquePtr.h"
 
 class SpliceableJSONWriter;
@@ -372,29 +371,21 @@ PROFILER_FUNC(bool profiler_thread_is_sl
 PROFILER_FUNC_VOID(profiler_js_interrupt_callback())
 
 // The number of milliseconds since the process started. Operates the same
 // whether the profiler is active or inactive.
 PROFILER_FUNC(double profiler_time(), 0)
 
 PROFILER_FUNC_VOID(profiler_log(const char *str))
 
-PROFILER_FUNC(int profiler_current_thread_id(), 0)
-
-// This method suspends the thread identified by aThreadId, optionally samples
-// it for its native stack, and then calls the callback. The callback is passed
-// the native stack's program counters and length as two arguments if
-// aSampleNative is true.
+// Gets the stack top of the current thread.
 //
-// WARNING: The target thread is suspended during the callback. Do not try to
-// allocate or acquire any locks, or you could deadlock. The target thread will
-// have resumed by the time that this function returns.
-PROFILER_FUNC_VOID(profiler_suspend_and_sample_thread(int aThreadId,
-                                                      const std::function<void(void**, size_t)>& aCallback,
-                                                      bool aSampleNative = true))
+// The thread must have been previously registered with the profiler, otherwise
+// this method will return nullptr.
+PROFILER_FUNC(void* profiler_get_stack_top(), nullptr)
 
 // End of the functions defined whether the profiler is enabled or not.
 
 #if defined(MOZ_GECKO_PROFILER)
 
 #include <stdlib.h>
 #include <signal.h>
 #include "js/ProfilingStack.h"
--- a/xpcom/threads/BackgroundHangMonitor.cpp
+++ b/xpcom/threads/BackgroundHangMonitor.cpp
@@ -593,29 +593,31 @@ BackgroundHangMonitor::DisableOnBeta() {
 void
 BackgroundHangMonitor::Startup()
 {
 #ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR
   MOZ_ASSERT(!BackgroundHangManager::sInstance, "Already initialized");
 
   if (!strcmp(NS_STRINGIFY(MOZ_UPDATE_CHANNEL), "beta")) {
     if (XRE_IsParentProcess()) { // cached ClientID hasn't been read yet
+      ThreadStackHelper::Startup();
       BackgroundHangThread::Startup();
       BackgroundHangManager::sInstance = new BackgroundHangManager();
 
       nsCOMPtr<nsIObserverService> observerService = mozilla::services::GetObserverService();
       MOZ_ASSERT(observerService);
 
       observerService->AddObserver(BackgroundHangManager::sInstance, "profile-after-change", false);
       return;
     } else if(DisableOnBeta()){
       return;
     }
   }
 
+  ThreadStackHelper::Startup();
   BackgroundHangThread::Startup();
   BackgroundHangManager::sInstance = new BackgroundHangManager();
 #endif
 }
 
 void
 BackgroundHangMonitor::Shutdown()
 {
@@ -626,16 +628,17 @@ BackgroundHangMonitor::Shutdown()
   }
 
   MOZ_ASSERT(BackgroundHangManager::sInstance, "Not initialized");
   /* Scope our lock inside Shutdown() because the sInstance object can
      be destroyed as soon as we set sInstance to nullptr below, and
      we don't want to hold the lock when it's being destroyed. */
   BackgroundHangManager::sInstance->Shutdown();
   BackgroundHangManager::sInstance = nullptr;
+  ThreadStackHelper::Shutdown();
   BackgroundHangManager::sDisabled = true;
 #endif
 }
 
 BackgroundHangMonitor::BackgroundHangMonitor(const char* aName,
                                              uint32_t aTimeoutMs,
                                              uint32_t aMaxTimeoutMs,
                                              ThreadType aThreadType)
--- a/xpcom/threads/ThreadStackHelper.cpp
+++ b/xpcom/threads/ThreadStackHelper.cpp
@@ -4,16 +4,19 @@
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #include "ThreadStackHelper.h"
 #include "MainThreadUtils.h"
 #include "nsJSPrincipals.h"
 #include "nsScriptSecurityManager.h"
 #include "jsfriendapi.h"
+#ifdef MOZ_THREADSTACKHELPER_NATIVE
+#include "shared-libraries.h"
+#endif
 #ifdef MOZ_THREADSTACKHELPER_PSEUDO
 #include "PseudoStack.h"
 #endif
 
 #include "mozilla/Assertions.h"
 #include "mozilla/Attributes.h"
 #include "mozilla/IntegerPrintfMacros.h"
 #include "mozilla/Move.h"
@@ -57,30 +60,101 @@
 // Some NDKs don't define this constant even though the kernel supports it.
 #define __NR_rt_tgsigqueueinfo (__NR_SYSCALL_BASE+363)
 #endif
 #ifndef SYS_rt_tgsigqueueinfo
 #define SYS_rt_tgsigqueueinfo __NR_rt_tgsigqueueinfo
 #endif
 #endif
 
+#ifdef MOZ_THREADSTACKHELPER_NATIVE
+#if defined(MOZ_THREADSTACKHELPER_X86) || \
+    defined(MOZ_THREADSTACKHELPER_X64)
+// On these architectures, the stack grows downwards (toward lower addresses).
+#define MOZ_THREADSTACKHELPER_STACK_GROWS_DOWN
+#else
+#error "Unsupported architecture"
+#endif
+#endif // MOZ_THREADSTACKHELPER_NATIVE
+
 namespace mozilla {
 
+void
+ThreadStackHelper::Startup()
+{
+#if defined(XP_LINUX)
+  MOZ_ASSERT(NS_IsMainThread());
+  if (!sInitialized) {
+    // TODO: centralize signal number allocation
+    sFillStackSignum = SIGRTMIN + 4;
+    if (sFillStackSignum > SIGRTMAX) {
+      // Leave uninitialized
+      MOZ_ASSERT(false);
+      return;
+    }
+    struct sigaction sigact = {};
+    sigact.sa_sigaction = FillStackHandler;
+    sigemptyset(&sigact.sa_mask);
+    sigact.sa_flags = SA_SIGINFO | SA_RESTART;
+    MOZ_ALWAYS_TRUE(!::sigaction(sFillStackSignum, &sigact, nullptr));
+  }
+  sInitialized++;
+#endif
+}
+
+void
+ThreadStackHelper::Shutdown()
+{
+#if defined(XP_LINUX)
+  MOZ_ASSERT(NS_IsMainThread());
+  if (sInitialized == 1) {
+    struct sigaction sigact = {};
+    sigact.sa_handler = SIG_DFL;
+    MOZ_ALWAYS_TRUE(!::sigaction(sFillStackSignum, &sigact, nullptr));
+  }
+  sInitialized--;
+#endif
+}
+
 ThreadStackHelper::ThreadStackHelper()
+  : mStackToFill(nullptr)
 #ifdef MOZ_THREADSTACKHELPER_PSEUDO
-  : mStackToFill(nullptr)
   , mPseudoStack(profiler_get_pseudo_stack())
   , mMaxStackSize(Stack::sMaxInlineStorage)
   , mMaxBufferSize(512)
 #endif
+{
+#if defined(XP_LINUX)
+  MOZ_ALWAYS_TRUE(!::sem_init(&mSem, 0, 0));
+  mThreadID = ::syscall(SYS_gettid);
+#elif defined(XP_WIN)
+  mInitialized = !!::DuplicateHandle(
+    ::GetCurrentProcess(), ::GetCurrentThread(),
+    ::GetCurrentProcess(), &mThreadID,
+    THREAD_SUSPEND_RESUME
 #ifdef MOZ_THREADSTACKHELPER_NATIVE
-  , mNativeStackToFill(nullptr)
+    | THREAD_GET_CONTEXT | THREAD_QUERY_INFORMATION
 #endif
+    , FALSE, 0);
+  mStackTop = profiler_get_stack_top();
+  MOZ_ASSERT(mInitialized);
+#elif defined(XP_MACOSX)
+  mThreadID = mach_thread_self();
+#endif
+}
+
+ThreadStackHelper::~ThreadStackHelper()
 {
-  mThreadId = profiler_current_thread_id();
+#if defined(XP_LINUX)
+  MOZ_ALWAYS_TRUE(!::sem_destroy(&mSem));
+#elif defined(XP_WIN)
+  if (mInitialized) {
+    MOZ_ALWAYS_TRUE(!!::CloseHandle(mThreadID));
+  }
+#endif
 }
 
 namespace {
 template<typename T>
 class ScopedSetPtr
 {
 private:
   T*& mPtr;
@@ -92,68 +166,171 @@ public:
 
 void
 ThreadStackHelper::GetPseudoStack(Stack& aStack)
 {
   GetStacksInternal(&aStack, nullptr);
 }
 
 void
+ThreadStackHelper::GetStacksInternal(Stack* aStack, NativeStack* aNativeStack)
+{
+  // Always run PrepareStackBuffer first to clear aStack
+  if (aStack && !PrepareStackBuffer(*aStack)) {
+    // Skip and return empty aStack
+    return;
+  }
+
+  ScopedSetPtr<Stack> stackPtr(mStackToFill, aStack);
+
+#if defined(XP_LINUX)
+  if (!sInitialized) {
+    MOZ_ASSERT(false);
+    return;
+  }
+  if (aStack) {
+    siginfo_t uinfo = {};
+    uinfo.si_signo = sFillStackSignum;
+    uinfo.si_code = SI_QUEUE;
+    uinfo.si_pid = getpid();
+    uinfo.si_uid = getuid();
+    uinfo.si_value.sival_ptr = this;
+    if (::syscall(SYS_rt_tgsigqueueinfo, uinfo.si_pid,
+                  mThreadID, sFillStackSignum, &uinfo)) {
+      // rt_tgsigqueueinfo was added in Linux 2.6.31.
+      // Could have failed because the syscall did not exist.
+      return;
+    }
+    MOZ_ALWAYS_TRUE(!::sem_wait(&mSem));
+  }
+
+#elif defined(XP_WIN)
+  if (!mInitialized) {
+    MOZ_ASSERT(false);
+    return;
+  }
+
+  // NOTE: We can only perform frame pointer stack walking on non win64
+  // platforms, because Win64 always omits frame pointers. We don't want to use
+  // MozStackWalk here, so we just skip collecting stacks entirely.
+#ifndef MOZ_THREADSTACKHELPER_X64
+  if (aNativeStack) {
+    aNativeStack->reserve(Telemetry::HangStack::sMaxNativeFrames);
+  }
+#endif
+
+  if (::SuspendThread(mThreadID) == DWORD(-1)) {
+    MOZ_ASSERT(false);
+    return;
+  }
+
+  // SuspendThread is asynchronous, so the thread may still be running. Use
+  // GetThreadContext to ensure it's really suspended.
+  // See https://blogs.msdn.microsoft.com/oldnewthing/20150205-00/?p=44743.
+  CONTEXT context;
+  memset(&context, 0, sizeof(context));
+  context.ContextFlags = CONTEXT_CONTROL;
+  if (::GetThreadContext(mThreadID, &context)) {
+    if (aStack) {
+      FillStackBuffer();
+    }
+
+#ifndef MOZ_THREADSTACKHELPER_X64
+    if (aNativeStack) {
+      auto callback = [](uint32_t, void* aPC, void*, void* aClosure) {
+        NativeStack* stack = static_cast<NativeStack*>(aClosure);
+        stack->push_back(reinterpret_cast<uintptr_t>(aPC));
+      };
+
+      // Now we need to get our frame pointer, our stack pointer, and our stack
+      // top. Rather than registering and storing the stack tops ourselves, we use
+      // the gecko profiler to look it up.
+      void** framePointer = reinterpret_cast<void**>(context.Ebp);
+      void** stackPointer = reinterpret_cast<void**>(context.Esp);
+
+      MOZ_ASSERT(mStackTop, "The thread should be registered by the profiler");
+
+      // Double check that the values we pulled for the thread make sense before
+      // walking the stack.
+      if (mStackTop && framePointer >= stackPointer && framePointer < mStackTop) {
+        // NOTE: In bug 1346415 this was changed to use FramePointerStackWalk.
+        // This was done because lowering the background hang timer threshold
+        // would cause it to fire on infra early during the boot process, causing
+        // a deadlock in MozStackWalk when the target thread was holding the
+        // windows-internal lock on the function table, as it would be suspended
+        // before we tried to grab the lock to walk its stack.
+        //
+        // FramePointerStackWalk is implemented entirely in userspace and thus
+        // doesn't have the same issues with deadlocking. Unfortunately as 64-bit
+        // windows is not guaranteed to have frame pointers, the stack walking
+        // code is only enabled on 32-bit windows builds (bug 1357829).
+        FramePointerStackWalk(callback, /* skipFrames */ 0,
+                              /* maxFrames */ Telemetry::HangStack::sMaxNativeFrames,
+                              reinterpret_cast<void*>(aNativeStack), framePointer,
+                              mStackTop);
+      }
+    }
+#endif
+  }
+
+  MOZ_ALWAYS_TRUE(::ResumeThread(mThreadID) != DWORD(-1));
+
+#elif defined(XP_MACOSX)
+# if defined(MOZ_VALGRIND) && defined(RUNNING_ON_VALGRIND)
+  if (RUNNING_ON_VALGRIND) {
+    /* thread_suspend and thread_resume sometimes hang runs on Valgrind,
+       for unknown reasons.  So, just avoid them.  See bug 1100911. */
+    return;
+  }
+# endif
+
+  if (aStack) {
+    if (::thread_suspend(mThreadID) != KERN_SUCCESS) {
+      MOZ_ASSERT(false);
+      return;
+    }
+
+    FillStackBuffer();
+
+    MOZ_ALWAYS_TRUE(::thread_resume(mThreadID) == KERN_SUCCESS);
+  }
+
+#endif
+}
+
+void
 ThreadStackHelper::GetNativeStack(NativeStack& aNativeStack)
 {
+#ifdef MOZ_THREADSTACKHELPER_NATIVE
   GetStacksInternal(nullptr, &aNativeStack);
+#endif // MOZ_THREADSTACKHELPER_NATIVE
 }
 
 void
 ThreadStackHelper::GetPseudoAndNativeStack(Stack& aStack, NativeStack& aNativeStack)
 {
   GetStacksInternal(&aStack, &aNativeStack);
 }
 
-void
-ThreadStackHelper::GetStacksInternal(Stack* aStack, NativeStack* aNativeStack)
-{
-#if defined(MOZ_THREADSTACKHELPER_PSEUDO) || defined(MOZ_THREADSTACKHELPER_NATIVE)
-  // Always run PrepareStackBuffer first to clear aStack
-  if (aStack && !PrepareStackBuffer(*aStack)) {
-    // Skip and return empty aStack
-    return;
-  }
+#ifdef XP_LINUX
 
-  // Prepare the native stack
-  if (aNativeStack) {
-    aNativeStack->clear();
-    aNativeStack->reserve(Telemetry::HangStack::sMaxNativeFrames);
-  }
+int ThreadStackHelper::sInitialized;
+int ThreadStackHelper::sFillStackSignum;
 
-#ifdef MOZ_THREADSTACKHELPER_PSEUDO
-  ScopedSetPtr<Stack> stackPtr(mStackToFill, aStack);
-#endif
-#ifdef MOZ_THREADSTACKHELPER_NATIVE
-  ScopedSetPtr<NativeStack> nativeStackPtr(mNativeStackToFill, aNativeStack);
-#endif
-
-  auto callback = [&, this] (void** aPCs, size_t aCount) {
-    FillStackBuffer();
+void
+ThreadStackHelper::FillStackHandler(int aSignal, siginfo_t* aInfo,
+                                    void* aContext)
+{
+  ThreadStackHelper* const helper =
+    reinterpret_cast<ThreadStackHelper*>(aInfo->si_value.sival_ptr);
+  helper->FillStackBuffer();
+  ::sem_post(&helper->mSem);
+}
 
-#ifdef MOZ_THREADSTACKHELPER_NATIVE
-    if (mNativeStackToFill) {
-      while (aCount-- &&
-             mNativeStackToFill->size() < mNativeStackToFill->capacity()) {
-        mNativeStackToFill->push_back(reinterpret_cast<uintptr_t>(aPCs[aCount]));
-      }
-    }
-#endif
-  };
-
-  profiler_suspend_and_sample_thread(mThreadId,
-                                     callback,
-                                     /* aSampleNative = */ !!aNativeStack);
-#endif
-}
+#endif // XP_LINUX
 
 bool
 ThreadStackHelper::PrepareStackBuffer(Stack& aStack)
 {
   // Return false to skip getting the stack and return an empty stack
   aStack.clear();
 #ifdef MOZ_THREADSTACKHELPER_PSEUDO
   /* Normally, provided the profiler is enabled, it would be an error if we
@@ -302,19 +479,19 @@ ThreadStackHelper::AppendJSEntry(const v
   return label;
 }
 
 #endif // MOZ_THREADSTACKHELPER_PSEUDO
 
 void
 ThreadStackHelper::FillStackBuffer()
 {
-#ifdef MOZ_THREADSTACKHELPER_PSEUDO
   MOZ_ASSERT(mStackToFill->empty());
 
+#ifdef MOZ_THREADSTACKHELPER_PSEUDO
   size_t reservedSize = mStackToFill->capacity();
   size_t reservedBufferSize = mStackToFill->AvailableBufferSize();
   intptr_t availableBufferSize = intptr_t(reservedBufferSize);
 
   // Go from front to back
   const volatile js::ProfileEntry* entry = mPseudoStack->mStack;
   const volatile js::ProfileEntry* end = entry + mPseudoStack->stackSize();
   // Deduplicate identical, consecutive frames
--- a/xpcom/threads/ThreadStackHelper.h
+++ b/xpcom/threads/ThreadStackHelper.h
@@ -17,30 +17,35 @@
 #include <semaphore.h>
 #include <sys/types.h>
 #elif defined(XP_WIN)
 #include <windows.h>
 #elif defined(XP_MACOSX)
 #include <mach/mach.h>
 #endif
 
-// Support pseudostack and native stack on these platforms.
+// Support pseudostack on these platforms.
 #if defined(XP_LINUX) || defined(XP_WIN) || defined(XP_MACOSX)
 #  ifdef MOZ_GECKO_PROFILER
 #    define MOZ_THREADSTACKHELPER_PSEUDO
-#    define MOZ_THREADSTACKHELPER_NATIVE
 #  endif
 #endif
 
-// NOTE: Currently, due to a problem with LUL stackwalking initialization taking
-// a long time (bug 1365309), we don't perform pseudostack or native stack
-// walking on Linux.
-#if defined(XP_LINUX)
-#  undef MOZ_THREADSTACKHELPER_NATIVE
-#  undef MOZ_THREADSTACKHELPER_PSEUDO
+#if defined(MOZ_THREADSTACKHELPER_PSEUDO) && defined(XP_WIN)
+#  define MOZ_THREADSTACKHELPER_NATIVE
+#  if defined(__i386__) || defined(_M_IX86)
+#    define MOZ_THREADSTACKHELPER_X86
+#  elif defined(__x86_64__) || defined(_M_X64)
+#    define MOZ_THREADSTACKHELPER_X64
+#  elif defined(__arm__) || defined(_M_ARM)
+#    define MOZ_THREADSTACKHELPER_ARM
+#  else
+     // Unsupported architecture
+#    undef MOZ_THREADSTACKHELPER_NATIVE
+#  endif
 #endif
 
 namespace mozilla {
 
 /**
  * ThreadStackHelper is used to retrieve the profiler pseudo-stack of a
  * thread, as an alternative of using the profiler to take a profile.
  * The target thread first declares an ThreadStackHelper instance;
@@ -57,40 +62,48 @@ public:
 
   // When a native stack is gathered, this vector holds the raw program counter
   // values that FramePointerStackWalk will return to us after it walks the
   // stack. When gathering the Telemetry payload, Telemetry will take care of
   // mapping these program counters to proper addresses within modules.
   typedef Telemetry::NativeHangStack NativeStack;
 
 private:
+  Stack* mStackToFill;
 #ifdef MOZ_THREADSTACKHELPER_PSEUDO
-  Stack* mStackToFill;
   const PseudoStack* const mPseudoStack;
   size_t mMaxStackSize;
   size_t mMaxBufferSize;
 #endif
-#ifdef MOZ_THREADSTACKHELPER_NATIVE
-  NativeStack* mNativeStackToFill;
-#endif
 
   bool PrepareStackBuffer(Stack& aStack);
   void FillStackBuffer();
 #ifdef MOZ_THREADSTACKHELPER_PSEUDO
   const char* AppendJSEntry(const volatile js::ProfileEntry* aEntry,
                             intptr_t& aAvailableBufferSize,
                             const char* aPrevLabel);
 #endif
 
 public:
   /**
+   * Initialize ThreadStackHelper. Must be called from main thread.
+   */
+  static void Startup();
+  /**
+   * Uninitialize ThreadStackHelper. Must be called from main thread.
+   */
+  static void Shutdown();
+
+  /**
    * Create a ThreadStackHelper instance targeting the current thread.
    */
   ThreadStackHelper();
 
+  ~ThreadStackHelper();
+
   /**
    * Retrieve the current pseudostack of the thread associated
    * with this ThreadStackHelper.
    *
    * @param aStack Stack instance to be filled.
    */
   void GetPseudoStack(Stack& aStack);
 
@@ -112,16 +125,34 @@ public:
    */
   void GetPseudoAndNativeStack(Stack& aStack, NativeStack& aNativeStack);
 
 private:
   // Fill in the passed aStack and aNativeStack datastructures with backtraces.
   // If only aStack needs to be collected, nullptr may be passed for
   // aNativeStack, and vice versa.
   void GetStacksInternal(Stack* aStack, NativeStack* aNativeStack);
+#if defined(XP_LINUX)
+private:
+  static int sInitialized;
+  static int sFillStackSignum;
 
-  // The profiler's unique thread identifier for the target thread.
-  int mThreadId;
+  static void FillStackHandler(int aSignal, siginfo_t* aInfo, void* aContext);
+
+  sem_t mSem;
+  pid_t mThreadID;
+
+#elif defined(XP_WIN)
+private:
+  bool mInitialized;
+  HANDLE mThreadID;
+  void* mStackTop;
+
+#elif defined(XP_MACOSX)
+private:
+  thread_act_t mThreadID;
+
+#endif
 };
 
 } // namespace mozilla
 
 #endif // mozilla_ThreadStackHelper_h