Bug 1342102 - Use the same threading structure in platform-linux-android.cpp as for the -macos and -win32 versions. r=n.nethercote.
authorJulian Seward <jseward@acm.org>
Wed, 01 Mar 2017 14:57:34 +0100
changeset 345317 1e1880fc4d8f9f79dafb440b041efaf84d73bed0
parent 345316 1adc95479dd12cf02d2617bb390488cbb31f6af9
child 345318 831f13e13f320430af31fa37553fadddfa898585
push id31436
push userkwierso@gmail.com
push dateThu, 02 Mar 2017 01:18:52 +0000
treeherdermozilla-central@e91de6fb2b3d [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersn
bugs1342102
milestone54.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1342102 - Use the same threading structure in platform-linux-android.cpp as for the -macos and -win32 versions. r=n.nethercote. The MacOS and Windows profiler cores have a threading structure where one thread ("sampler thread") collects information from a thread to be profiled ("samplee thread") by suspending it, getting its register state, unwinding its stack, and then resuming it. This requires kernel-level primitives to perform the suspend, get-registers and resume steps. The Linux/Android core is different, because none of those three primitives exist. Until now, data collection has been done by sending a SIGPROF to the samplee, and collecting all relevant data within the signal handler. This has a number of disadvantages: (1) Current work to rationalise/clean up the threading structure of the profiler is complicated by the need to reason about/verify two different schemes. In particular, the Tick call in the Windows and MacOS implementations will produce its output on the sampler thread. In the Linux implementation that is produced on the sampled threads. (2) Dynamic verification results (primarily, absence of data races and deadlocks) established for the Linux implementation are less likely to carry over to the other two implementations, because the threading structures are different. (3) It causes a lot of duplicated code in platform-*.cpp. For example SamplerThread::Run() in the -win32.cpp and -macos.cpp files are very similar. Ideally all three could be merged into a single file with the identical logic commoned up. (4) Running lots of code -- the entire contents of Tick -- in a signal handler isn't considered good practice. POSIX severely restricts the set of functions we can safely call from within a signal handler. This commit changes the Linux implementation by using semaphores to implement the suspend and resume primitives, and moves the bulk of the data collection work to the sampler thread. By doing this, it causes the Linux implementation to have the same threading structure as the other two.
tools/profiler/core/platform-linux-android.cpp
--- a/tools/profiler/core/platform-linux-android.cpp
+++ b/tools/profiler/core/platform-linux-android.cpp
@@ -57,16 +57,17 @@
 #include <execinfo.h>   // backtrace, backtrace_symbols
 #endif  // def __GLIBC__
 #include <strings.h>    // index
 #include <errno.h>
 #include <stdarg.h>
 
 #include "prenv.h"
 #include "mozilla/LinuxSignal.h"
+#include "mozilla/PodOperations.h"
 #include "mozilla/DebugOnly.h"
 
 // Memory profile
 #include "nsMemoryReporterManager.h"
 
 #include <string.h>
 #include <list>
 
@@ -138,30 +139,19 @@ static void paf_parent(void) {
 static void* setup_atfork() {
   pthread_atfork(paf_prepare, paf_parent, NULL);
   return NULL;
 }
 #endif /* !defined(GP_OS_android) */
 
 static int gIntervalMicro;
 
-// Global variables through which data is sent from SigprofSender() to
-// SigprofHandler(). gSignalHandlingDone provides inter-thread synchronization.
-static ThreadInfo* gCurrentThreadInfo;
-static int64_t gRssMemory;
-static int64_t gUssMemory;
-
-// Semaphore used to coordinate SigprofSender() and SigprofHandler().
-static sem_t gSignalHandlingDone;
-
-static void SetSampleContext(TickSample* sample, void* context)
+static void SetSampleContext(TickSample* sample, mcontext_t& mcontext)
 {
   // Extracting the sample from the context is extremely machine dependent.
-  ucontext_t* ucontext = reinterpret_cast<ucontext_t*>(context);
-  mcontext_t& mcontext = ucontext->uc_mcontext;
 #if defined(GP_ARCH_x86)
   sample->pc = reinterpret_cast<Address>(mcontext.gregs[REG_EIP]);
   sample->sp = reinterpret_cast<Address>(mcontext.gregs[REG_ESP]);
   sample->fp = reinterpret_cast<Address>(mcontext.gregs[REG_EBP]);
 #elif defined(GP_ARCH_amd64)
   sample->pc = reinterpret_cast<Address>(mcontext.gregs[REG_RIP]);
   sample->sp = reinterpret_cast<Address>(mcontext.gregs[REG_RSP]);
   sample->fp = reinterpret_cast<Address>(mcontext.gregs[REG_RBP]);
@@ -170,35 +160,128 @@ static void SetSampleContext(TickSample*
   sample->sp = reinterpret_cast<Address>(mcontext.arm_sp);
   sample->fp = reinterpret_cast<Address>(mcontext.arm_fp);
   sample->lr = reinterpret_cast<Address>(mcontext.arm_lr);
 #else
 # error "bad platform"
 #endif
 }
 
+// The only way to reliably interrupt a Linux thread and inspect its register
+// and stack state is by sending a signal to it, and doing the work inside the
+// signal handler.  But we don't want to run much code inside the signal
+// handler, since POSIX severely restricts what we can do in signal handlers.
+// So we use a system of semaphores to suspend the thread and allow the
+// sampler thread to do all the work of unwinding and copying out whatever
+// data it wants.
+
+// A four-message protocol is used to reliably suspend and later resume the
+// thread to be sampled (the samplee):
+//
+// Sampler (signal sender) thread              Samplee (thread to be sampled)
+//
+// Prepare the SigHandlerCoordinator
+// and point gSigHandlerCoordinator at it
+//
+// send SIGPROF to samplee ------- MSG 1 ----> (enter signal handler)
+// wait(mMessage2)                             Copy register state
+//                                               into gSigHandlerCoordinator
+//                         <------ MSG 2 ----- post(mMessage2)
+// Samplee is now suspended.                   wait(mMessage3)
+//   Examine its stack/register
+//   state at leisure
+//
+// Release samplee:
+//   post(mMessage3)       ------- MSG 3 ----->
+// wait(mMessage4)                              Samplee now resumes.  Tell
+//                                                the sampler that we are done.
+//                         <------ MSG 4 ------ post(mMessage4)
+// Now we know the samplee's signal             (leave signal handler)
+//   handler has finished using
+//   gSigHandlerCoordinator.  We can
+//   safely reuse it for some other thread.
+
+// A type used to coordinate between the sampler (signal sending) thread and
+// the thread currently being sampled (the samplee, which receives the
+// signals).
+//
+// The first message is sent using a SIGPROF signal delivery.  The subsequent
+// three are sent using sem_wait/sem_post pairs.  They are named accordingly
+// in the following struct.
+
+struct SigHandlerCoordinator
+{
+  SigHandlerCoordinator()
+  {
+    PodZero(&mUContext);
+    int r =  sem_init(&mMessage2, /* pshared */0, 0);
+    r     |= sem_init(&mMessage3, /* pshared */0, 0);
+    r     |= sem_init(&mMessage4, /* pshared */0, 0);
+    MOZ_ASSERT(r == 0);
+  }
+
+  ~SigHandlerCoordinator()
+  {
+    int r =  sem_destroy(&mMessage2);
+    r     |= sem_destroy(&mMessage3);
+    r     |= sem_destroy(&mMessage4);
+    MOZ_ASSERT(r == 0);
+  }
+
+  sem_t mMessage2; // to sampler: "context is in gSigHandlerCoordinator"
+  sem_t mMessage3; // to samplee: "resume"
+  sem_t mMessage4; // to sampler: "finished with gSigHandlerCoordinator"
+  ucontext_t mUContext; // Context at signal
+};
+
+// This is the one-and-only global variable used to communicate between
+// the sampler thread and the samplee thread's signal handler.
+static SigHandlerCoordinator* gSigHandlerCoordinator = nullptr;
+
 static void
-SigprofHandler(int signal, siginfo_t* info, void* context)
+SigprofHandler(int aSignal, siginfo_t* aInfo, void* aContext)
 {
   // Avoid TSan warning about clobbering errno.
   int savedErrno = errno;
 
-  TickSample sample;
-  sample.context = context;
+  MOZ_ASSERT(aSignal == SIGPROF);
+  MOZ_ASSERT(gSigHandlerCoordinator);
+
+  // By sending us this signal, the sampler thread has sent us message 1 in
+  // the comment above, with the meaning "|gSigHandlerCoordinator| is ready
+  // for use, please copy your register context into it."
+  gSigHandlerCoordinator->mUContext = *static_cast<ucontext_t*>(aContext);
+
+  // Send message 2: tell the sampler thread that the context has been copied
+  // into |gSigHandlerCoordinator->mUContext|.  sem_post can never fail by
+  // being interrupted by a signal, so there's no loop around this call.
+  int r = sem_post(&gSigHandlerCoordinator->mMessage2);
+  MOZ_ASSERT(r == 0);
+
+  // At this point, the sampler thread assumes we are suspended, so we must
+  // not touch any global state here.
 
-  // Extract the current pc and sp.
-  SetSampleContext(&sample, context);
-  sample.threadInfo = gCurrentThreadInfo;
-  sample.timestamp = mozilla::TimeStamp::Now();
-  sample.rssMemory = gRssMemory;
-  sample.ussMemory = gUssMemory;
+  // Wait for message 3: the sampler thread tells us to resume.
+  while (true) {
+    r = sem_wait(&gSigHandlerCoordinator->mMessage3);
+    if (r == -1 && errno == EINTR) {
+      // Interrupted by a signal.  Now what?
+      continue; // try again
+    }
+    // We don't expect any other kind of failure
+    MOZ_ASSERT(r == 0);
+   break;
+  }
 
-  Tick(&sample);
+  // Send message 4: tell the sampler thread that we are finished accessing
+  // |gSigHandlerCoordinator|.  After this point it is not safe to touch
+  // |gSigHandlerCoordinator|.
+  r = sem_post(&gSigHandlerCoordinator->mMessage4);
+  MOZ_ASSERT(r == 0);
 
-  sem_post(&gSignalHandlingDone);
   errno = savedErrno;
 }
 
 #if defined(GP_OS_android)
 #define SYS_tgkill __NR_tgkill
 #endif
 
 int tgkill(pid_t tgid, pid_t tid, int signalno) {
@@ -285,52 +368,84 @@ SigprofSender(void* aArg)
 
         if (info->Stack()->CanDuplicateLastSampleDueToSleep()) {
           info->DuplicateLastSample(gStartTime);
           continue;
         }
 
         info->UpdateThreadResponsiveness();
 
-        // We use gCurrentThreadInfo to pass the ThreadInfo for the
-        // thread we're profiling to the signal handler.
-        gCurrentThreadInfo = info;
-
         int threadId = info->ThreadId();
         MOZ_ASSERT(threadId != my_tid);
 
-        // Profile from the signal sender for information which is not signal
-        // safe, and will have low variation between the emission of the signal
-        // and the signal handler catch.
+        int64_t rssMemory = 0;
+        int64_t ussMemory = 0;
         if (isFirstProfiledThread && gProfileMemory) {
-          gRssMemory = nsMemoryReporterManager::ResidentFast();
-          gUssMemory = nsMemoryReporterManager::ResidentUnique();
-        } else {
-          gRssMemory = 0;
-          gUssMemory = 0;
+          rssMemory = nsMemoryReporterManager::ResidentFast();
+          ussMemory = nsMemoryReporterManager::ResidentUnique();
+        }
+
+        // Suspend the samplee thread and get its context.
+        SigHandlerCoordinator coord;   // on sampler thread's stack
+        gSigHandlerCoordinator = &coord;
+
+        // Send message 1 to the samplee (the thread to be sampled), by
+        // signalling at it.
+        int r = tgkill(vm_tgid_, threadId, SIGPROF);
+        MOZ_ASSERT(r == 0);
+
+        // Wait for message 2 from the samplee, indicating that the context is
+        // available and that the thread is suspended.
+        while (true) {
+          r = sem_wait(&gSigHandlerCoordinator->mMessage2);
+          if (r == -1 && errno == EINTR) {
+            // Interrupted by a signal.  Try again.
+            continue;
+          }
+          // We don't expect any other kind of failure.
+          MOZ_ASSERT(r == 0);
+          break;
         }
 
-        // Profile from the signal handler for information which is signal safe
-        // and needs to be precise too, such as the stack of the interrupted
-        // thread.
-        if (tgkill(vm_tgid_, threadId, SIGPROF) != 0) {
-          printf_stderr("profiler failed to signal tid=%d\n", threadId);
-#ifdef DEBUG
-          abort();
-#else
-          continue;
-#endif
+        // The samplee thread is now frozen and
+        // gSigHandlerCoordinator->mUContext is valid.  We can poke around in
+        // it and unwind its stack as we like.
+
+        TickSample sample;
+        sample.context = &gSigHandlerCoordinator->mUContext;
+
+        // Extract the current pc and sp.
+        SetSampleContext(&sample,
+                         gSigHandlerCoordinator->mUContext.uc_mcontext);
+        sample.threadInfo = info;
+        sample.timestamp = mozilla::TimeStamp::Now();
+        sample.rssMemory = rssMemory;
+        sample.ussMemory = ussMemory;
+
+        Tick(&sample);
+
+        // Send message 3 to the samplee, which tells it to resume.
+        r = sem_post(&gSigHandlerCoordinator->mMessage3);
+        MOZ_ASSERT(r == 0);
+
+        // Wait for message 4 from the samplee, which tells us that it has
+        // finished with |gSigHandlerCoordinator|.
+        while (true) {
+          r = sem_wait(&gSigHandlerCoordinator->mMessage4);
+          if (r == -1 && errno == EINTR) {
+            continue;
+          }
+          MOZ_ASSERT(r == 0);
+          break;
         }
 
-        // Wait for the signal handler to run before moving on to the next one.
-        sem_wait(&gSignalHandlingDone);
-
-        gCurrentThreadInfo = nullptr;
-        gRssMemory = 0;
-        gUssMemory = 0;
+        // This isn't strictly necessary, but doing so does help pick up
+        // anomalies in which the signal handler is running even though this
+        // loop thinks it shouldn't be.
+        gSigHandlerCoordinator = nullptr;
 
         isFirstProfiledThread = false;
       }
 #if defined(USE_LUL_STACKWALK)
       // The LUL unwind object accumulates frame statistics. Periodically we
       // should poke it to give it a chance to print those statistics. This
       // involves doing I/O (fprintf, __android_log_print, etc.) and so can't
       // safely be done from the unwinder threads, which is why it is done
@@ -367,22 +482,17 @@ PlatformStart(double aInterval)
 #endif
 
   gIntervalMicro = floor(aInterval * 1000 + 0.5);
   if (gIntervalMicro <= 0) {
     gIntervalMicro = 1;
   }
 
   // Initialize signal handler communication
-  gCurrentThreadInfo = nullptr;
-  gRssMemory = 0;
-  gUssMemory = 0;
-  if (sem_init(&gSignalHandlingDone, /* pshared: */ 0, /* value: */ 0) != 0) {
-    MOZ_CRASH("Error initializing semaphore");
-  }
+  gSigHandlerCoordinator = nullptr;
 
   // Request profiling signals.
   LOG("Request signal");
   struct sigaction sa;
   sa.sa_sigaction = MOZ_SIGNAL_TRAMPOLINE(SigprofHandler);
   sigemptyset(&sa.sa_mask);
   sa.sa_flags = SA_RESTART | SA_SIGINFO;
   if (sigaction(SIGPROF, &sa, &gOldSigprofHandler) != 0) {
@@ -551,12 +661,13 @@ PlatformInit()
 #endif
 
 void TickSample::PopulateContext(void* aContext)
 {
   MOZ_ASSERT(aContext);
   ucontext_t* pContext = reinterpret_cast<ucontext_t*>(aContext);
   if (!getcontext(pContext)) {
     context = pContext;
-    SetSampleContext(this, aContext);
+    SetSampleContext(this,
+                     reinterpret_cast<ucontext_t*>(aContext)->uc_mcontext);
   }
 }