Bug 1342306 (part 1) - Introduce SamplerThread class in platform-linux-android.cpp. r=jseward.
authorNicholas Nethercote <nnethercote@mozilla.com>
Fri, 03 Mar 2017 15:52:35 +1100
changeset 394980 54a86d54e1c131ba32d7f5cb40651c44cfa4ed9f
parent 394979 693e0c6a44e0b221805405eb9058ac461db622a2
child 394981 bd730342c51508fd903a657030ae281163bf2c90
push id1468
push userasasaki@mozilla.com
push dateMon, 05 Jun 2017 19:31:07 +0000
treeherdermozilla-release@0641fc6ee9d1 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersjseward
bugs1342306
milestone54.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1342306 (part 1) - Introduce SamplerThread class in platform-linux-android.cpp. r=jseward. This makes it more like platform-{macos,win32}.cpp, and will make it easier to introduce locking around all the globals in platform.cpp. The following things were moved into the new class. - Types: - SigHandlerCoordinator - Functions: - SigProfHandler() - SigprofSender() as Run() - Variables: - gOldSigprofHandler as mOldSigprofHandler - gSigprofSenderThread as mThread - gIntervalMicro as mIntervalMicro - gSigHandlerCoordinator as sSigHandlerCoordinator sInstance is the singleton instance. PlatformStart() and PlatformStop() now just delegate to StartSampler/StopSampler. The patch also does the following tidy-ups. Linux: - gWasPaused is now cleared in the parent and child processes after forking. - LUL: inlined and removed gLUL_initialization_routine(). - LUL: now only calling EnabledUnwinding() and doing the unit tests on the first call to profiler_start(), instead of all of them. Mac: - Removed a useless call to pthread_self() -- mThread is already set by the earlier call to pthread_create(). - Removed some low-value checking involving kNoThread. Mac + Win: - Renamed SamplerThread::mInstance as sInstance, because it's static. - Merged SamplerThread::Start() with the constructor. - Tweaked how mInterval/mIntervalMicro is initialized, so it can become const. All platforms: - Converted NULL to nullptr throughout. - A few other very minor clean-ups, e.g. formatting.
tools/profiler/core/platform-linux-android.cpp
tools/profiler/core/platform-macos.cpp
tools/profiler/core/platform-win32.cpp
--- a/tools/profiler/core/platform-linux-android.cpp
+++ b/tools/profiler/core/platform-linux-android.cpp
@@ -68,87 +68,85 @@
 // Memory profile
 #include "nsMemoryReporterManager.h"
 
 #include <string.h>
 #include <list>
 
 using namespace mozilla;
 
-// All accesses to this variable are on the main thread, so no locking is
-// needed.
-static struct sigaction gOldSigprofHandler;
-
-// All accesses to this variable are on the main thread, so no locking is
-// needed.
-static pthread_t gSigprofSenderThread;
-
 #if defined(USE_LUL_STACKWALK)
 // A singleton instance of the library.  It is initialised at first
 // use.  Currently only the main thread can call PlatformStart(), so
 // there is no need for a mechanism to ensure that it is only
 // created once in a multi-thread-use situation.
 lul::LUL* gLUL = nullptr;
-
-// This is the gLUL initialization routine.
-static void gLUL_initialization_routine(void)
-{
-  MOZ_ASSERT(!gLUL);
-  MOZ_ASSERT(gettid() == getpid()); /* "this is the main thread" */
-  gLUL = new lul::LUL(logging_sink_for_LUL);
-  // Read all the unwind info currently available.
-  read_procmaps(gLUL);
-}
 #endif
 
 /* static */ Thread::tid_t
 Thread::GetCurrentId()
 {
   return gettid();
 }
 
 #if !defined(GP_OS_android)
+
 // Keep track of when any of our threads calls fork(), so we can
 // temporarily disable signal delivery during the fork() call.  Not
 // doing so appears to cause a kind of race, in which signals keep
 // getting delivered to the thread doing fork(), which keeps causing
 // it to fail and be restarted; hence forward progress is delayed a
 // great deal.  A side effect of this is to permanently disable
 // sampling in the child process.  See bug 837390.
 
 // Unfortunately this is only doable on non-Android, since Bionic
 // doesn't have pthread_atfork.
 
 // This records the current state at the time we paused it.
 static bool gWasPaused = false;
 
-// In the parent, just before the fork, record the pausedness state,
-// and then pause.
-static void paf_prepare(void) {
+// In the parent, before the fork, record the pausedness state, and then pause.
+static void
+paf_prepare()
+{
   // This function can run off the main thread.
+
   gWasPaused = gIsPaused;
   gIsPaused = true;
 }
 
-// In the parent, just after the fork, return pausedness to the
-// pre-fork state.
-static void paf_parent(void) {
+// In the parent, after the fork, return pausedness to the pre-fork state.
+static void
+paf_parent()
+{
   // This function can run off the main thread.
+
   gIsPaused = gWasPaused;
+  gWasPaused = false;
+}
+
+// In the child, after the fork, leave the profiler paused.
+static void
+paf_child()
+{
+  // This function can run off the main thread.
+
+  gWasPaused = false;
 }
 
 // Set up the fork handlers.
-static void* setup_atfork() {
-  pthread_atfork(paf_prepare, paf_parent, NULL);
-  return NULL;
+static void*
+setup_atfork()
+{
+  pthread_atfork(paf_prepare, paf_parent, paf_child);
+  return nullptr;
 }
+
 #endif /* !defined(GP_OS_android) */
 
-static int gIntervalMicro;
-
 static void SetSampleContext(TickSample* sample, mcontext_t& mcontext)
 {
   // Extracting the sample from the context is extremely machine dependent.
 #if defined(GP_ARCH_x86)
   sample->pc = reinterpret_cast<Address>(mcontext.gregs[REG_EIP]);
   sample->sp = reinterpret_cast<Address>(mcontext.gregs[REG_ESP]);
   sample->fp = reinterpret_cast<Address>(mcontext.gregs[REG_EBP]);
 #elif defined(GP_ARCH_amd64)
@@ -160,131 +158,16 @@ static void SetSampleContext(TickSample*
   sample->sp = reinterpret_cast<Address>(mcontext.arm_sp);
   sample->fp = reinterpret_cast<Address>(mcontext.arm_fp);
   sample->lr = reinterpret_cast<Address>(mcontext.arm_lr);
 #else
 # error "bad platform"
 #endif
 }
 
-// The only way to reliably interrupt a Linux thread and inspect its register
-// and stack state is by sending a signal to it, and doing the work inside the
-// signal handler.  But we don't want to run much code inside the signal
-// handler, since POSIX severely restricts what we can do in signal handlers.
-// So we use a system of semaphores to suspend the thread and allow the
-// sampler thread to do all the work of unwinding and copying out whatever
-// data it wants.
-
-// A four-message protocol is used to reliably suspend and later resume the
-// thread to be sampled (the samplee):
-//
-// Sampler (signal sender) thread              Samplee (thread to be sampled)
-//
-// Prepare the SigHandlerCoordinator
-// and point gSigHandlerCoordinator at it
-//
-// send SIGPROF to samplee ------- MSG 1 ----> (enter signal handler)
-// wait(mMessage2)                             Copy register state
-//                                               into gSigHandlerCoordinator
-//                         <------ MSG 2 ----- post(mMessage2)
-// Samplee is now suspended.                   wait(mMessage3)
-//   Examine its stack/register
-//   state at leisure
-//
-// Release samplee:
-//   post(mMessage3)       ------- MSG 3 ----->
-// wait(mMessage4)                              Samplee now resumes.  Tell
-//                                                the sampler that we are done.
-//                         <------ MSG 4 ------ post(mMessage4)
-// Now we know the samplee's signal             (leave signal handler)
-//   handler has finished using
-//   gSigHandlerCoordinator.  We can
-//   safely reuse it for some other thread.
-
-// A type used to coordinate between the sampler (signal sending) thread and
-// the thread currently being sampled (the samplee, which receives the
-// signals).
-//
-// The first message is sent using a SIGPROF signal delivery.  The subsequent
-// three are sent using sem_wait/sem_post pairs.  They are named accordingly
-// in the following struct.
-
-struct SigHandlerCoordinator
-{
-  SigHandlerCoordinator()
-  {
-    PodZero(&mUContext);
-    int r =  sem_init(&mMessage2, /* pshared */0, 0);
-    r     |= sem_init(&mMessage3, /* pshared */0, 0);
-    r     |= sem_init(&mMessage4, /* pshared */0, 0);
-    MOZ_ASSERT(r == 0);
-  }
-
-  ~SigHandlerCoordinator()
-  {
-    int r =  sem_destroy(&mMessage2);
-    r     |= sem_destroy(&mMessage3);
-    r     |= sem_destroy(&mMessage4);
-    MOZ_ASSERT(r == 0);
-  }
-
-  sem_t mMessage2; // to sampler: "context is in gSigHandlerCoordinator"
-  sem_t mMessage3; // to samplee: "resume"
-  sem_t mMessage4; // to sampler: "finished with gSigHandlerCoordinator"
-  ucontext_t mUContext; // Context at signal
-};
-
-// This is the one-and-only global variable used to communicate between
-// the sampler thread and the samplee thread's signal handler.
-static SigHandlerCoordinator* gSigHandlerCoordinator = nullptr;
-
-static void
-SigprofHandler(int aSignal, siginfo_t* aInfo, void* aContext)
-{
-  // Avoid TSan warning about clobbering errno.
-  int savedErrno = errno;
-
-  MOZ_ASSERT(aSignal == SIGPROF);
-  MOZ_ASSERT(gSigHandlerCoordinator);
-
-  // By sending us this signal, the sampler thread has sent us message 1 in
-  // the comment above, with the meaning "|gSigHandlerCoordinator| is ready
-  // for use, please copy your register context into it."
-  gSigHandlerCoordinator->mUContext = *static_cast<ucontext_t*>(aContext);
-
-  // Send message 2: tell the sampler thread that the context has been copied
-  // into |gSigHandlerCoordinator->mUContext|.  sem_post can never fail by
-  // being interrupted by a signal, so there's no loop around this call.
-  int r = sem_post(&gSigHandlerCoordinator->mMessage2);
-  MOZ_ASSERT(r == 0);
-
-  // At this point, the sampler thread assumes we are suspended, so we must
-  // not touch any global state here.
-
-  // Wait for message 3: the sampler thread tells us to resume.
-  while (true) {
-    r = sem_wait(&gSigHandlerCoordinator->mMessage3);
-    if (r == -1 && errno == EINTR) {
-      // Interrupted by a signal.  Now what?
-      continue; // try again
-    }
-    // We don't expect any other kind of failure
-    MOZ_ASSERT(r == 0);
-   break;
-  }
-
-  // Send message 4: tell the sampler thread that we are finished accessing
-  // |gSigHandlerCoordinator|.  After this point it is not safe to touch
-  // |gSigHandlerCoordinator|.
-  r = sem_post(&gSigHandlerCoordinator->mMessage4);
-  MOZ_ASSERT(r == 0);
-
-  errno = savedErrno;
-}
-
 #if defined(GP_OS_android)
 #define SYS_tgkill __NR_tgkill
 #endif
 
 int tgkill(pid_t tgid, pid_t tid, int signalno) {
   return syscall(SYS_tgkill, tgid, tid, signalno);
 }
 
@@ -333,213 +216,366 @@ SleepMicro(int aMicroseconds)
     // Keep waiting in case of interrupt.
     // nanosleep puts the remaining time back into ts.
     rv = ::nanosleep(&ts, &ts);
   }
 
   MOZ_ASSERT(!rv, "nanosleep call failed");
 }
 
-static void*
-SigprofSender(void* aArg)
+// The sampler thread controls sampling and runs whenever the profiler is
+// active. It periodically runs through all registered threads, finds those
+// that should be sampled, then pauses and samples them.
+//
+// The only way to reliably interrupt a Linux thread and inspect its register
+// and stack state is by sending a signal to it, and doing the work inside the
+// signal handler.  But we don't want to run much code inside the signal
+// handler, since POSIX severely restricts what we can do in signal handlers.
+// So we use a system of semaphores to suspend the thread and allow the
+// sampler thread to do all the work of unwinding and copying out whatever
+// data it wants.
+//
+// A four-message protocol is used to reliably suspend and later resume the
+// thread to be sampled (the samplee):
+//
+// Sampler (signal sender) thread              Samplee (thread to be sampled)
+//
+// Prepare the SigHandlerCoordinator
+// and point sSigHandlerCoordinator at it
+//
+// send SIGPROF to samplee ------- MSG 1 ----> (enter signal handler)
+// wait(mMessage2)                             Copy register state
+//                                               into sSigHandlerCoordinator
+//                         <------ MSG 2 ----- post(mMessage2)
+// Samplee is now suspended.                   wait(mMessage3)
+//   Examine its stack/register
+//   state at leisure
+//
+// Release samplee:
+//   post(mMessage3)       ------- MSG 3 ----->
+// wait(mMessage4)                              Samplee now resumes.  Tell
+//                                                the sampler that we are done.
+//                         <------ MSG 4 ------ post(mMessage4)
+// Now we know the samplee's signal             (leave signal handler)
+//   handler has finished using
+//   sSigHandlerCoordinator.  We can
+//   safely reuse it for some other thread.
+//
+class SamplerThread
 {
-  // This function runs on its own thread.
-
-  // Taken from platform_thread_posix.cc
-  prctl(PR_SET_NAME, "SamplerThread", 0, 0, 0);
-
-  int vm_tgid_ = getpid();
-  DebugOnly<int> my_tid = gettid();
-
-  TimeDuration lastSleepOverhead = 0;
-  TimeStamp sampleStart = TimeStamp::Now();
-  while (gIsActive) {
-    gBuffer->deleteExpiredStoredMarkers();
-
-    if (!gIsPaused) {
-      StaticMutexAutoLock lock(gRegisteredThreadsMutex);
+private:
+  // A type used to coordinate between the sampler (signal sending) thread and
+  // the thread currently being sampled (the samplee, which receives the
+  // signals).
+  //
+  // The first message is sent using a SIGPROF signal delivery.  The subsequent
+  // three are sent using sem_wait/sem_post pairs.  They are named accordingly
+  // in the following struct.
+  struct SigHandlerCoordinator
+  {
+    SigHandlerCoordinator()
+    {
+      PodZero(&mUContext);
+      int r = sem_init(&mMessage2, /* pshared */ 0, 0);
+      r    |= sem_init(&mMessage3, /* pshared */ 0, 0);
+      r    |= sem_init(&mMessage4, /* pshared */ 0, 0);
+      MOZ_ASSERT(r == 0);
+    }
 
-      bool isFirstProfiledThread = true;
-      for (uint32_t i = 0; i < gRegisteredThreads->size(); i++) {
-        ThreadInfo* info = (*gRegisteredThreads)[i];
+    ~SigHandlerCoordinator()
+    {
+      int r = sem_destroy(&mMessage2);
+      r    |= sem_destroy(&mMessage3);
+      r    |= sem_destroy(&mMessage4);
+      MOZ_ASSERT(r == 0);
+    }
 
-        // This will be null if we're not interested in profiling this thread.
-        if (!info->HasProfile() || info->IsPendingDelete()) {
-          continue;
-        }
+    sem_t mMessage2; // To sampler: "context is in sSigHandlerCoordinator"
+    sem_t mMessage3; // To samplee: "resume"
+    sem_t mMessage4; // To sampler: "finished with sSigHandlerCoordinator"
+    ucontext_t mUContext; // Context at signal
+  };
+
+  static void* ThreadEntry(void* aArg)
+  {
+    auto thread = static_cast<SamplerThread*>(aArg);
+    prctl(PR_SET_NAME, "SamplerThread", 0, 0, 0);
+    thread->Run();
+    return nullptr;
+  }
 
-        if (info->Stack()->CanDuplicateLastSampleDueToSleep() &&
-            gBuffer->DuplicateLastSample(info->ThreadId(), gStartTime)) {
-          continue;
-        }
-
-        info->UpdateThreadResponsiveness();
+public:
+  explicit SamplerThread(double aInterval)
+    : mIntervalMicro(std::max(1, int(floor(aInterval * 1000 + 0.5))))
+  {
+    MOZ_RELEASE_ASSERT(NS_IsMainThread());
 
-        int threadId = info->ThreadId();
-        MOZ_ASSERT(threadId != my_tid);
+#if defined(USE_EHABI_STACKWALK)
+    mozilla::EHABIStackWalkInit();
+#elif defined(USE_LUL_STACKWALK)
+    bool createdLUL = false;
+    if (!gLUL) {
+      gLUL = new lul::LUL(logging_sink_for_LUL);
+      // Read all the unwind info currently available.
+      read_procmaps(gLUL);
+      createdLUL = true;
+    }
+#endif
 
-        int64_t rssMemory = 0;
-        int64_t ussMemory = 0;
-        if (isFirstProfiledThread && gProfileMemory) {
-          rssMemory = nsMemoryReporterManager::ResidentFast();
-          ussMemory = nsMemoryReporterManager::ResidentUnique();
-        }
+    // Request profiling signals.
+    LOG("Request signal");
+    struct sigaction sa;
+    sa.sa_sigaction = MOZ_SIGNAL_TRAMPOLINE(SigprofHandler);
+    sigemptyset(&sa.sa_mask);
+    sa.sa_flags = SA_RESTART | SA_SIGINFO;
+    if (sigaction(SIGPROF, &sa, &mOldSigprofHandler) != 0) {
+      MOZ_CRASH("Error installing SIGPROF handler in the profiler");
+    }
+    LOG("Signal installed");
 
-        // Suspend the samplee thread and get its context.
-        SigHandlerCoordinator coord;   // on sampler thread's stack
-        gSigHandlerCoordinator = &coord;
+#if defined(USE_LUL_STACKWALK)
+    if (createdLUL) {
+      // Switch into unwind mode. After this point, we can't add or remove any
+      // unwind info to/from this LUL instance. The only thing we can do with
+      // it is Unwind() calls.
+      gLUL->EnableUnwinding();
 
-        // Send message 1 to the samplee (the thread to be sampled), by
-        // signalling at it.
-        int r = tgkill(vm_tgid_, threadId, SIGPROF);
-        MOZ_ASSERT(r == 0);
+      // Has a test been requested?
+      if (PR_GetEnv("MOZ_PROFILER_LUL_TEST")) {
+         int nTests = 0, nTestsPassed = 0;
+         RunLulUnitTests(&nTests, &nTestsPassed, gLUL);
+      }
+    }
+#endif
+
+    // Start the sampling thread. It repeatedly sends a SIGPROF signal. Sending
+    // the signal ourselves instead of relying on itimer provides much better
+    // accuracy.
+    if (pthread_create(&mThread, nullptr, ThreadEntry, this) != 0) {
+      MOZ_CRASH("pthread_create failed");
+    }
+    LOG("Sampler thread started");
+  }
+
+  void Join() {
+    MOZ_RELEASE_ASSERT(NS_IsMainThread());
 
-        // Wait for message 2 from the samplee, indicating that the context is
-        // available and that the thread is suspended.
-        while (true) {
-          r = sem_wait(&gSigHandlerCoordinator->mMessage2);
-          if (r == -1 && errno == EINTR) {
-            // Interrupted by a signal.  Try again.
-            continue;
-          }
-          // We don't expect any other kind of failure.
-          MOZ_ASSERT(r == 0);
-          break;
-        }
+    // Wait for Run() termination (Run() will exit because gIsActive has been
+    // set to false).
+    pthread_join(mThread, nullptr);
+
+    // Restore old signal handler
+    sigaction(SIGPROF, &mOldSigprofHandler, 0);
+  }
+
+  static void StartSampler(double aInterval) {
+    MOZ_RELEASE_ASSERT(NS_IsMainThread());
+    MOZ_RELEASE_ASSERT(!sInstance);
+
+    sInstance = new SamplerThread(aInterval);
+  }
+
+  static void StopSampler() {
+    MOZ_RELEASE_ASSERT(NS_IsMainThread());
+
+    sInstance->Join();
+    delete sInstance;
+    sInstance = nullptr;
+  }
+
+  static void SigprofHandler(int aSignal, siginfo_t* aInfo, void* aContext) {
+    // Avoid TSan warning about clobbering errno.
+    int savedErrno = errno;
+
+    MOZ_ASSERT(aSignal == SIGPROF);
+    MOZ_ASSERT(sSigHandlerCoordinator);
 
-        // The samplee thread is now frozen and
-        // gSigHandlerCoordinator->mUContext is valid.  We can poke around in
-        // it and unwind its stack as we like.
+    // By sending us this signal, the sampler thread has sent us message 1 in
+    // the comment above, with the meaning "|sSigHandlerCoordinator| is ready
+    // for use, please copy your register context into it."
+    sSigHandlerCoordinator->mUContext = *static_cast<ucontext_t*>(aContext);
+
+    // Send message 2: tell the sampler thread that the context has been copied
+    // into |sSigHandlerCoordinator->mUContext|.  sem_post can never fail by
+    // being interrupted by a signal, so there's no loop around this call.
+    int r = sem_post(&sSigHandlerCoordinator->mMessage2);
+    MOZ_ASSERT(r == 0);
 
-        TickSample sample;
-        sample.context = &gSigHandlerCoordinator->mUContext;
+    // At this point, the sampler thread assumes we are suspended, so we must
+    // not touch any global state here.
+
+    // Wait for message 3: the sampler thread tells us to resume.
+    while (true) {
+      r = sem_wait(&sSigHandlerCoordinator->mMessage3);
+      if (r == -1 && errno == EINTR) {
+        // Interrupted by a signal.  Try again.
+        continue;
+      }
+      // We don't expect any other kind of failure
+      MOZ_ASSERT(r == 0);
+      break;
+    }
 
-        // Extract the current pc and sp.
-        SetSampleContext(&sample,
-                         gSigHandlerCoordinator->mUContext.uc_mcontext);
-        sample.threadInfo = info;
-        sample.timestamp = mozilla::TimeStamp::Now();
-        sample.rssMemory = rssMemory;
-        sample.ussMemory = ussMemory;
+    // Send message 4: tell the sampler thread that we are finished accessing
+    // |sSigHandlerCoordinator|.  After this point it is not safe to touch
+    // |sSigHandlerCoordinator|.
+    r = sem_post(&sSigHandlerCoordinator->mMessage4);
+    MOZ_ASSERT(r == 0);
 
-        Tick(gBuffer, &sample);
+    errno = savedErrno;
+  }
+
+  void Run() {
+    // This function runs on the sampler thread.
+
+    int vm_tgid_ = getpid();
+    DebugOnly<int> my_tid = gettid();
 
-        // Send message 3 to the samplee, which tells it to resume.
-        r = sem_post(&gSigHandlerCoordinator->mMessage3);
-        MOZ_ASSERT(r == 0);
+    TimeDuration lastSleepOverhead = 0;
+    TimeStamp sampleStart = TimeStamp::Now();
+
+    while (gIsActive) {
+      gBuffer->deleteExpiredStoredMarkers();
 
-        // Wait for message 4 from the samplee, which tells us that it has
-        // finished with |gSigHandlerCoordinator|.
-        while (true) {
-          r = sem_wait(&gSigHandlerCoordinator->mMessage4);
-          if (r == -1 && errno == EINTR) {
+      if (!gIsPaused) {
+        StaticMutexAutoLock lock(gRegisteredThreadsMutex);
+
+        bool isFirstProfiledThread = true;
+
+        for (uint32_t i = 0; i < gRegisteredThreads->size(); i++) {
+          ThreadInfo* info = (*gRegisteredThreads)[i];
+
+          // This will be null if we're not interested in profiling this thread.
+          if (!info->HasProfile() || info->IsPendingDelete()) {
             continue;
           }
-          MOZ_ASSERT(r == 0);
-          break;
-        }
+
+          if (info->Stack()->CanDuplicateLastSampleDueToSleep() &&
+              gBuffer->DuplicateLastSample(info->ThreadId(), gStartTime)) {
+            continue;
+          }
+
+          info->UpdateThreadResponsiveness();
+
+          int threadId = info->ThreadId();
+          MOZ_ASSERT(threadId != my_tid);
 
-        // This isn't strictly necessary, but doing so does help pick up
-        // anomalies in which the signal handler is running even though this
-        // loop thinks it shouldn't be.
-        gSigHandlerCoordinator = nullptr;
+          int64_t rssMemory = 0;
+          int64_t ussMemory = 0;
+          if (isFirstProfiledThread && gProfileMemory) {
+            rssMemory = nsMemoryReporterManager::ResidentFast();
+            ussMemory = nsMemoryReporterManager::ResidentUnique();
+          }
+
+          // Suspend the samplee thread and get its context.
+          SigHandlerCoordinator coord;   // on sampler thread's stack
+          sSigHandlerCoordinator = &coord;
 
-        isFirstProfiledThread = false;
-      }
-#if defined(USE_LUL_STACKWALK)
-      // The LUL unwind object accumulates frame statistics. Periodically we
-      // should poke it to give it a chance to print those statistics. This
-      // involves doing I/O (fprintf, __android_log_print, etc.) and so can't
-      // safely be done from the unwinder threads, which is why it is done
-      // here.
-      gLUL->MaybeShowStats();
-#endif
-    }
+          // Send message 1 to the samplee (the thread to be sampled), by
+          // signalling at it.
+          int r = tgkill(vm_tgid_, threadId, SIGPROF);
+          MOZ_ASSERT(r == 0);
+
+          // Wait for message 2 from the samplee, indicating that the context
+          // is available and that the thread is suspended.
+          while (true) {
+            r = sem_wait(&sSigHandlerCoordinator->mMessage2);
+            if (r == -1 && errno == EINTR) {
+              // Interrupted by a signal.  Try again.
+              continue;
+            }
+            // We don't expect any other kind of failure.
+            MOZ_ASSERT(r == 0);
+            break;
+          }
+
+          // The samplee thread is now frozen and
+          // sSigHandlerCoordinator->mUContext is valid.  We can poke around
+          // in it and unwind its stack as we like.
+
+          TickSample sample;
+          sample.context = &sSigHandlerCoordinator->mUContext;
 
-    TimeStamp targetSleepEndTime =
-      sampleStart + TimeDuration::FromMicroseconds(gIntervalMicro);
-    TimeStamp beforeSleep = TimeStamp::Now();
-    TimeDuration targetSleepDuration = targetSleepEndTime - beforeSleep;
-    double sleepTime = std::max(0.0, (targetSleepDuration - lastSleepOverhead).ToMicroseconds());
-    SleepMicro(sleepTime);
-    sampleStart = TimeStamp::Now();
-    lastSleepOverhead = sampleStart - (beforeSleep + TimeDuration::FromMicroseconds(sleepTime));
-  }
-  return 0;
-}
+          // Extract the current pc and sp.
+          SetSampleContext(&sample,
+                           sSigHandlerCoordinator->mUContext.uc_mcontext);
+          sample.threadInfo = info;
+          sample.timestamp = mozilla::TimeStamp::Now();
+          sample.rssMemory = rssMemory;
+          sample.ussMemory = ussMemory;
+
+          Tick(gBuffer, &sample);
+
+          // Send message 3 to the samplee, which tells it to resume.
+          r = sem_post(&sSigHandlerCoordinator->mMessage3);
+          MOZ_ASSERT(r == 0);
+
+          // Wait for message 4 from the samplee, which tells us that it has
+          // finished with |sSigHandlerCoordinator|.
+          while (true) {
+            r = sem_wait(&sSigHandlerCoordinator->mMessage4);
+            if (r == -1 && errno == EINTR) {
+              continue;
+            }
+            MOZ_ASSERT(r == 0);
+            break;
+          }
 
-static void
-PlatformStart(double aInterval)
-{
-  MOZ_RELEASE_ASSERT(NS_IsMainThread());
+          // This isn't strictly necessary, but doing so does help pick up
+          // anomalies in which the signal handler is running even though
+          // this loop thinks it shouldn't be.
+          sSigHandlerCoordinator = nullptr;
 
-#if defined(USE_EHABI_STACKWALK)
-  mozilla::EHABIStackWalkInit();
-#elif defined(USE_LUL_STACKWALK)
-  // NOTE: this isn't thread-safe.  But we expect PlatformStart() to be
-  // called only from the main thread, so this is OK in general.
-  if (!gLUL) {
-     gLUL_initialization_routine();
-  }
+          isFirstProfiledThread = false;
+        }
+#if defined(USE_LUL_STACKWALK)
+        // The LUL unwind object accumulates frame statistics. Periodically
+        // we should poke it to give it a chance to print those statistics.
+        // This involves doing I/O (fprintf, __android_log_print, etc.) and
+        // so can't safely be done from the unwinder threads, which is why it
+        // is done here.
+        gLUL->MaybeShowStats();
 #endif
+      }
 
-  gIntervalMicro = floor(aInterval * 1000 + 0.5);
-  if (gIntervalMicro <= 0) {
-    gIntervalMicro = 1;
+      TimeStamp targetSleepEndTime =
+        sampleStart + TimeDuration::FromMicroseconds(mIntervalMicro);
+      TimeStamp beforeSleep = TimeStamp::Now();
+      TimeDuration targetSleepDuration = targetSleepEndTime - beforeSleep;
+      double sleepTime = std::max(0.0, (targetSleepDuration - lastSleepOverhead).ToMicroseconds());
+      SleepMicro(sleepTime);
+      sampleStart = TimeStamp::Now();
+      lastSleepOverhead = sampleStart - (beforeSleep + TimeDuration::FromMicroseconds(sleepTime));
+    }
   }
 
-  // Initialize signal handler communication
-  gSigHandlerCoordinator = nullptr;
+private:
+  // The pthread_t for the sampler thread.
+  pthread_t mThread;
 
-  // Request profiling signals.
-  LOG("Request signal");
-  struct sigaction sa;
-  sa.sa_sigaction = MOZ_SIGNAL_TRAMPOLINE(SigprofHandler);
-  sigemptyset(&sa.sa_mask);
-  sa.sa_flags = SA_RESTART | SA_SIGINFO;
-  if (sigaction(SIGPROF, &sa, &gOldSigprofHandler) != 0) {
-    MOZ_CRASH("Error installing signal");
-  }
-  LOG("Signal installed");
+  // The interval between samples, measured in microseconds.
+  const int mIntervalMicro;
 
-#if defined(USE_LUL_STACKWALK)
-  // Switch into unwind mode.  After this point, we can't add or
-  // remove any unwind info to/from this LUL instance.  The only thing
-  // we can do with it is Unwind() calls.
-  gLUL->EnableUnwinding();
+  // Used to restore the SIGPROF handler when ours is removed.
+  struct sigaction mOldSigprofHandler;
+
+  static SamplerThread* sInstance;
 
-  // Has a test been requested?
-  if (PR_GetEnv("MOZ_PROFILER_LUL_TEST")) {
-     int nTests = 0, nTestsPassed = 0;
-     RunLulUnitTests(&nTests, &nTestsPassed, gLUL);
-  }
-#endif
-
-  // Start a thread that sends SIGPROF signal to VM thread. Sending the signal
-  // ourselves instead of relying on itimer provides much better accuracy.
-  if (pthread_create(&gSigprofSenderThread, NULL, SigprofSender, NULL) != 0) {
-    MOZ_CRASH("pthread_create failed");
-  }
-  LOG("Profiler thread started");
-}
+  // This is the one-and-only variable used to communicate between the sampler
+  // thread and the samplee thread's signal handler. It's static because the
+  // samplee thread's signal handler is static.
+  static SigHandlerCoordinator* sSigHandlerCoordinator;
 
-static void
-PlatformStop()
-{
-  MOZ_RELEASE_ASSERT(NS_IsMainThread());
-
-  gIntervalMicro = 0;
+  SamplerThread(const SamplerThread&) = delete;
+  void operator=(const SamplerThread&) = delete;
+};
 
-  // Wait for SigprofSender() termination (SigprofSender() will exit because
-  // gIsActive has been set to false).
-  pthread_join(gSigprofSenderThread, NULL);
-
-  // Restore old signal handler
-  sigaction(SIGPROF, &gOldSigprofHandler, 0);
-}
+SamplerThread* SamplerThread::sInstance = nullptr;
+SamplerThread::SigHandlerCoordinator* SamplerThread::sSigHandlerCoordinator =
+  nullptr;
 
 #if defined(GP_OS_android)
 static struct sigaction gOldSigstartHandler;
 const int SIGSTART = SIGUSR2;
 
 static void freeArray(const char** array, int size) {
   for (int i = 0; i < size; i++) {
     free((void*) array[i]);
@@ -550,17 +586,17 @@ static uint32_t readCSVArray(char* csvLi
   uint32_t count;
   char* savePtr;
   int newlinePos = strlen(csvList) - 1;
   if (csvList[newlinePos] == '\n') {
     csvList[newlinePos] = '\0';
   }
 
   char* item = strtok_r(csvList, ",", &savePtr);
-  for (count = 0; item; item = strtok_r(NULL, ",", &savePtr)) {
+  for (count = 0; item; item = strtok_r(nullptr, ",", &savePtr)) {
     int length = strlen(item) + 1;  // Include \0
     char* newBuf = (char*) malloc(sizeof(char) * length);
     buffer[count] = newBuf;
     strncpy(newBuf, item, length);
     count++;
   }
 
   return count;
@@ -575,19 +611,19 @@ ReadProfilerVars(const char* fileName,
   FILE* file = fopen(fileName, "r");
   const int bufferSize = 1024;
   char line[bufferSize];
   char* feature;
   char* value;
   char* savePtr;
 
   if (file) {
-    while (fgets(line, bufferSize, file) != NULL) {
+    while (fgets(line, bufferSize, file) != nullptr) {
       feature = strtok_r(line, "=", &savePtr);
-      value = strtok_r(NULL, "", &savePtr);
+      value = strtok_r(nullptr, "", &savePtr);
 
       if (strncmp(feature, "MOZ_PROFILER_INTERVAL", bufferSize) == 0) {
         set_profiler_interval(value);
       } else if (strncmp(feature, "MOZ_PROFILER_ENTRIES", bufferSize) == 0) {
         set_profiler_entries(value);
       } else if (strncmp(feature, "MOZ_PROFILER_FEATURES", bufferSize) == 0) {
         *featureCount = readCSVArray(value, features);
       } else if (strncmp(feature, "threads", bufferSize) == 0) {
@@ -640,31 +676,47 @@ static void
 PlatformInit()
 {
   LOG("Registering start signal");
   struct sigaction sa;
   sa.sa_sigaction = StartSignalHandler;
   sigemptyset(&sa.sa_mask);
   sa.sa_flags = SA_RESTART | SA_SIGINFO;
   if (sigaction(SIGSTART, &sa, &gOldSigstartHandler) != 0) {
-    LOG("Error installing signal");
+    MOZ_CRASH("Error installing SIGSTART handler in the profiler");
   }
 }
 
 #else
 
 static void
 PlatformInit()
 {
   // Set up the fork handlers.
   setup_atfork();
 }
 
 #endif
 
+static void
+PlatformStart(double aInterval)
+{
+  MOZ_RELEASE_ASSERT(NS_IsMainThread());
+
+  SamplerThread::StartSampler(aInterval);
+}
+
+static void
+PlatformStop()
+{
+  MOZ_RELEASE_ASSERT(NS_IsMainThread());
+
+  SamplerThread::StopSampler();
+}
+
 void TickSample::PopulateContext(void* aContext)
 {
   MOZ_ASSERT(aContext);
   ucontext_t* pContext = reinterpret_cast<ucontext_t*>(aContext);
   if (!getcontext(pContext)) {
     context = pContext;
     SetSampleContext(this,
                      reinterpret_cast<ucontext_t*>(aContext)->uc_mcontext);
--- a/tools/profiler/core/platform-macos.cpp
+++ b/tools/profiler/core/platform-macos.cpp
@@ -34,21 +34,16 @@
 // Memory profile
 #include "nsMemoryReporterManager.h"
 
 using mozilla::TimeStamp;
 using mozilla::TimeDuration;
 
 // this port is based off of v8 svn revision 9837
 
-#ifdef DEBUG
-// 0 is never a valid thread id on MacOSX since a pthread_t is a pointer.
-static const pthread_t kNoThread = (pthread_t) 0;
-#endif
-
 class PlatformData {
  public:
   PlatformData() : profiled_thread_(mach_thread_self())
   {
   }
 
   ~PlatformData() {
     // Deallocate Mach port for thread.
@@ -76,75 +71,63 @@ PlatformDataDestructor::operator()(Platf
   delete aData;
 }
 
 // The sampler thread controls sampling and runs whenever the profiler is
 // active. It periodically runs through all registered threads, finds those
 // that should be sampled, then pauses and samples them.
 class SamplerThread
 {
-public:
-  explicit SamplerThread(double aInterval)
-    : mIntervalMicro(floor(aInterval * 1000 + 0.5))
-  {
-    if (mIntervalMicro <= 0) {
-      mIntervalMicro = 1;
-    }
-  }
-
+private:
   static void SetThreadName() {
     // pthread_setname_np is only available in 10.6 or later, so test
     // for it at runtime.
     int (*dynamic_pthread_setname_np)(const char*);
     *reinterpret_cast<void**>(&dynamic_pthread_setname_np) =
       dlsym(RTLD_DEFAULT, "pthread_setname_np");
     if (!dynamic_pthread_setname_np)
       return;
 
     dynamic_pthread_setname_np("SamplerThread");
   }
 
   static void* ThreadEntry(void* aArg) {
-    SamplerThread* thread = reinterpret_cast<SamplerThread*>(aArg);
-
-    thread->mThread = pthread_self();
+    auto thread = static_cast<SamplerThread*>(aArg);
     SetThreadName();
-    MOZ_ASSERT(thread->mThread != kNoThread);
     thread->Run();
-    return NULL;
+    return nullptr;
   }
 
-  void Start() {
-    pthread_attr_t* attr_ptr = NULL;
+public:
+  explicit SamplerThread(double aInterval)
+    : mIntervalMicro(std::max(1, int(floor(aInterval * 1000 + 0.5))))
+  {
+    pthread_attr_t* attr_ptr = nullptr;
     if (pthread_create(&mThread, attr_ptr, ThreadEntry, this) != 0) {
       MOZ_CRASH("pthread_create failed");
     }
-    MOZ_ASSERT(mThread != kNoThread);
   }
 
   void Join() {
-    pthread_join(mThread, NULL);
+    pthread_join(mThread, nullptr);
   }
 
   static void StartSampler(double aInterval) {
     MOZ_RELEASE_ASSERT(NS_IsMainThread());
-    MOZ_RELEASE_ASSERT(!mInstance);
+    MOZ_RELEASE_ASSERT(!sInstance);
 
-    if (mInstance == NULL) {
-      mInstance = new SamplerThread(aInterval);
-      mInstance->Start();
-    }
+    sInstance = new SamplerThread(aInterval);
   }
 
   static void StopSampler() {
     MOZ_RELEASE_ASSERT(NS_IsMainThread());
 
-    mInstance->Join();
-    delete mInstance;
-    mInstance = NULL;
+    sInstance->Join();
+    delete sInstance;
+    sInstance = nullptr;
   }
 
   void Run() {
     // This function runs on the sampler thread.
 
     TimeDuration lastSleepOverhead = 0;
     TimeStamp sampleStart = TimeStamp::Now();
 
@@ -243,25 +226,26 @@ public:
       Tick(gBuffer, &sample);
     }
     thread_resume(profiled_thread);
   }
 
 private:
   pthread_t mThread;
 
-  int mIntervalMicro;
+  // The interval between samples, measured in microseconds.
+  const int mIntervalMicro;
 
-  static SamplerThread* mInstance;
+  static SamplerThread* sInstance;
 
   SamplerThread(const SamplerThread&) = delete;
   void operator=(const SamplerThread&) = delete;
 };
 
-SamplerThread* SamplerThread::mInstance = NULL;
+SamplerThread* SamplerThread::sInstance = nullptr;
 
 static void
 PlatformInit()
 {
 }
 
 static void
 PlatformStart(double aInterval)
--- a/tools/profiler/core/platform-win32.cpp
+++ b/tools/profiler/core/platform-win32.cpp
@@ -45,19 +45,19 @@ class PlatformData {
   explicit PlatformData(int aThreadId) : profiled_thread_(OpenThread(
                                                THREAD_GET_CONTEXT |
                                                THREAD_SUSPEND_RESUME |
                                                THREAD_QUERY_INFORMATION,
                                                false,
                                                aThreadId)) {}
 
   ~PlatformData() {
-    if (profiled_thread_ != NULL) {
+    if (profiled_thread_ != nullptr) {
       CloseHandle(profiled_thread_);
-      profiled_thread_ = NULL;
+      profiled_thread_ = nullptr;
     }
   }
 
   HANDLE profiled_thread() { return profiled_thread_; }
 
  private:
   HANDLE profiled_thread_;
 };
@@ -82,75 +82,68 @@ GetThreadHandle(PlatformData* aData)
 
 static const HANDLE kNoThread = INVALID_HANDLE_VALUE;
 
 // The sampler thread controls sampling and runs whenever the profiler is
 // active. It periodically runs through all registered threads, finds those
 // that should be sampled, then pauses and samples them.
 class SamplerThread
 {
- public:
+private:
+  static unsigned int __stdcall ThreadEntry(void* aArg) {
+    auto thread = static_cast<SamplerThread*>(aArg);
+    thread->Run();
+    return 0;
+  }
+
+public:
   explicit SamplerThread(double aInterval)
-    : mThread(kNoThread)
-    , mInterval(floor(aInterval + 0.5))
+    : mInterval(std::max(1, int(floor(aInterval + 0.5))))
   {
-    if (mInterval <= 0) {
-      mInterval = 1;
+    // Create a new thread. It is important to use _beginthreadex() instead of
+    // the Win32 function CreateThread(), because the CreateThread() does not
+    // initialize thread-specific structures in the C runtime library.
+    mThread = reinterpret_cast<HANDLE>(
+        _beginthreadex(nullptr,
+                       /* stack_size */ 0,
+                       ThreadEntry,
+                       this,
+                       /* initflag */ 0,
+                       (unsigned int*) &mThreadId));
+    if (mThread == 0) {
+      MOZ_CRASH("_beginthreadex failed");
     }
   }
 
   ~SamplerThread() {
     // Close our own handle for the thread.
     if (mThread != kNoThread) {
       CloseHandle(mThread);
     }
   }
 
-  static unsigned int __stdcall ThreadEntry(void* aArg) {
-    SamplerThread* thread = reinterpret_cast<SamplerThread*>(aArg);
-    thread->Run();
-    return 0;
-  }
-
-  // Create a new thread. It is important to use _beginthreadex() instead of
-  // the Win32 function CreateThread(), because the CreateThread() does not
-  // initialize thread-specific structures in the C runtime library.
-  void Start() {
-    mThread = reinterpret_cast<HANDLE>(
-        _beginthreadex(NULL,
-                       /* stack_size */ 0,
-                       ThreadEntry,
-                       this,
-                       /* initflag */ 0,
-                       (unsigned int*) &mThreadId));
-    if (mThread == 0) {
-      MOZ_CRASH("_beginthreadex failed");
-    }
-  }
-
   void Join() {
     if (mThreadId != Thread::GetCurrentId()) {
       WaitForSingleObject(mThread, INFINITE);
     }
   }
 
   static void StartSampler(double aInterval) {
     MOZ_RELEASE_ASSERT(NS_IsMainThread());
-    MOZ_RELEASE_ASSERT(!mInstance);
+    MOZ_RELEASE_ASSERT(!sInstance);
 
-    mInstance = new SamplerThread(aInterval);
-    mInstance->Start();
+    sInstance = new SamplerThread(aInterval);
   }
 
   static void StopSampler() {
     MOZ_RELEASE_ASSERT(NS_IsMainThread());
 
-    mInstance->Join();
-    delete mInstance;
-    mInstance = NULL;
+    sInstance->Join();
+    delete sInstance;
+    sInstance = nullptr;
   }
 
   void Run() {
     // This function runs on the sampler thread.
 
     // By default we'll not adjust the timer resolution which tends to be around
     // 16ms. However, if the requested interval is sufficiently low we'll try to
     // adjust the resolution to match.
@@ -190,17 +183,17 @@ class SamplerThread
     if (mInterval < 10)
         ::timeEndPeriod(mInterval);
   }
 
   void SampleContext(ThreadInfo* aThreadInfo, bool isFirstProfiledThread)
   {
     uintptr_t thread = GetThreadHandle(aThreadInfo->GetPlatformData());
     HANDLE profiled_thread = reinterpret_cast<HANDLE>(thread);
-    if (profiled_thread == NULL)
+    if (profiled_thread == nullptr)
       return;
 
     // Context used for sampling the register state of the profiled thread.
     CONTEXT context;
     memset(&context, 0, sizeof(context));
 
     TickSample sample;
 
@@ -253,26 +246,26 @@ class SamplerThread
 
     ResumeThread(profiled_thread);
   }
 
 private:
   HANDLE mThread;
   Thread::tid_t mThreadId;
 
-  int mInterval; // units: ms
+  // The interval between samples, measured in milliseconds.
+  const int mInterval;
 
-  // Protects the process wide state below.
-  static SamplerThread* mInstance;
+  static SamplerThread* sInstance;
 
   SamplerThread(const SamplerThread&) = delete;
   void operator=(const SamplerThread&) = delete;
 };
 
-SamplerThread* SamplerThread::mInstance = NULL;
+SamplerThread* SamplerThread::sInstance = nullptr;
 
 static void
 PlatformInit()
 {
 }
 
 static void
 PlatformStart(double aInterval)