Bug 961959 - Fix profiler breakage for the Nuwa process. r=khuey, r=BenWa, a=1.3+
authorCervantes Yu <cyu@mozilla.com>
Wed, 22 Jan 2014 18:56:45 +0800
changeset 176031 3a2b0045377ed8d529e333ba2a2b8dc1f95a37dd
parent 176030 ef4d9be503aaa502194cb7b6f705036d19a0a697
child 176032 91868aab924205c0260109fe157dce9928670445
push id445
push userffxbld
push dateMon, 10 Mar 2014 22:05:19 +0000
treeherdermozilla-release@dc38b741b04e [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerskhuey, BenWa, 1
bugs961959
milestone28.0a2
Bug 961959 - Fix profiler breakage for the Nuwa process. r=khuey, r=BenWa, a=1.3+
configure.in
mozglue/build/Nuwa.cpp
tools/profiler/platform-linux.cc
--- a/configure.in
+++ b/configure.in
@@ -7022,17 +7022,17 @@ if test "$OS_TARGET" = Android; then
     WRAP_LDFLAGS="${WRAP_LDFLAGS} -L$_objdir/dist/lib -lmozglue"
     WRAP_LDFLAGS="${WRAP_LDFLAGS} -Wl,--wrap=getaddrinfo,--wrap=freeaddrinfo,--wrap=gai_strerror"
     WRAP_LDFLAGS="${WRAP_LDFLAGS} -Wl,--wrap=PR_GetEnv,--wrap=PR_SetEnv"
     if test -z "$gonkdir"; then
         WRAP_LDFLAGS="${WRAP_LDFLAGS} -Wl,--wrap=fork,--wrap=pthread_atfork,--wrap=raise"
         WRAP_LDFLAGS="${WRAP_LDFLAGS} -Wl,--wrap=memccpy,--wrap=memchr,--wrap=memrchr,--wrap=memcmp,--wrap=memcpy,--wrap=memmove,--wrap=memset,--wrap=memmem,--wrap=memswap,--wrap=index,--wrap=strchr,--wrap=strrchr,--wrap=strlen,--wrap=strcmp,--wrap=strcpy,--wrap=strcat,--wrap=strcasecmp,--wrap=strncasecmp,--wrap=strstr,--wrap=strcasestr,--wrap=strtok,--wrap=strtok_r,--wrap=strerror,--wrap=strerror_r,--wrap=strnlen,--wrap=strncat,--wrap=strncmp,--wrap=strncpy,--wrap=strlcat,--wrap=strlcpy,--wrap=strcspn,--wrap=strpbrk,--wrap=strsep,--wrap=strspn,--wrap=strcoll,--wrap=strxfrm"
     fi
     if test "$MOZ_WIDGET_TOOLKIT" = gonk -a -n "$MOZ_NUWA_PROCESS"; then
-        WRAP_LDFLAGS="${WRAP_LDFLAGS} -Wl,--wrap=pthread_create,--wrap=epoll_wait,--wrap=poll,--wrap=pthread_cond_timedwait,--wrap=__pthread_cond_timedwait,--wrap=pthread_cond_wait,--wrap=epoll_create,--wrap=epoll_ctl,--wrap=close,--wrap=pthread_key_create,--wrap=pthread_key_delete,--wrap=socketpair,--wrap=pthread_self,--wrap=pthread_mutex_lock,--wrap=pthread_join,--wrap=pipe,--wrap=pipe2"
+        WRAP_LDFLAGS="${WRAP_LDFLAGS} -Wl,--wrap=pthread_create,--wrap=epoll_wait,--wrap=poll,--wrap=pthread_cond_timedwait,--wrap=__pthread_cond_timedwait,--wrap=pthread_cond_wait,--wrap=epoll_create,--wrap=epoll_ctl,--wrap=close,--wrap=pthread_key_create,--wrap=pthread_key_delete,--wrap=socketpair,--wrap=pthread_self,--wrap=pthread_mutex_lock,--wrap=pthread_join,--wrap=pipe,--wrap=pipe2,--wrap=tgkill"
     fi
 fi
 
 dnl ========================================================
 dnl = Use malloc wrapper lib
 dnl ========================================================
 MOZ_ARG_ENABLE_BOOL(wrap-malloc,
 [  --enable-wrap-malloc    Wrap malloc calls (gnu linker only)],
--- a/mozglue/build/Nuwa.cpp
+++ b/mozglue/build/Nuwa.cpp
@@ -10,26 +10,32 @@
 #include <errno.h>
 #include <fcntl.h>
 #include <setjmp.h>
 #include <signal.h>
 #include <poll.h>
 #include <pthread.h>
 #include <alloca.h>
 #include <sys/epoll.h>
+#include <sys/prctl.h>
 #include <sys/types.h>
 #include <sys/socket.h>
 #include <sys/stat.h>
+#include <sys/syscall.h>
 #include <vector>
 
 #include "mozilla/LinkedList.h"
 #include "Nuwa.h"
 
 using namespace mozilla;
 
+extern "C" MFBT_API int tgkill(pid_t tgid, pid_t tid, int signalno) {
+  return syscall(__NR_tgkill, tgid, tid, signalno);
+}
+
 /**
  * Provides the wrappers to a selected set of pthread and system-level functions
  * as the basis for implementing Zygote-like preforking mechanism.
  */
 
 /**
  * Real functions for the wrappers.
  */
@@ -57,17 +63,16 @@ int __real___pthread_cond_timedwait(pthr
 int __real_pthread_mutex_lock(pthread_mutex_t *mtx);
 int __real_poll(struct pollfd *fds, nfds_t nfds, int timeout);
 int __real_epoll_create(int size);
 int __real_socketpair(int domain, int type, int protocol, int sv[2]);
 int __real_pipe2(int __pipedes[2], int flags);
 int __real_pipe(int __pipedes[2]);
 int __real_epoll_ctl(int aEpollFd, int aOp, int aFd, struct epoll_event *aEvent);
 int __real_close(int aFd);
-
 }
 
 #define REAL(s) __real_##s
 
 /**
  * A Nuwa process is started by preparing.  After preparing, it waits
  * for all threads becoming frozen. Then, it is ready while all
  * threads are frozen.
@@ -134,16 +139,18 @@ TLSInfoList;
  * The stack size is chosen carefully so the frozen threads doesn't consume too
  * much memory in the Nuwa process. The threads shouldn't run deep recursive
  * methods or do large allocations on the stack to avoid stack overflow.
  */
 #ifndef NUWA_STACK_SIZE
 #define NUWA_STACK_SIZE (1024 * 32)
 #endif
 
+#define NATIVE_THREAD_NAME_LENGTH 16
+
 struct thread_info : public mozilla::LinkedListElement<thread_info> {
   pthread_t origThreadID;
   pthread_t recreatedThreadID;
   pthread_attr_t threadAttr;
   jmp_buf jmpEnv;
   jmp_buf retEnv;
 
   int flags;
@@ -155,16 +162,20 @@ struct thread_info : public mozilla::Lin
   // after the thread is recreated.
   void (*recrFunc)(void *arg);
   void *recrArg;
 
   TLSInfoList tlsInfo;
 
   pthread_mutex_t *reacquireMutex;
   void *stk;
+
+  pid_t origNativeThreadID;
+  pid_t recreatedNativeThreadID;
+  char nativeThreadName[NATIVE_THREAD_NAME_LENGTH];
 };
 
 typedef struct thread_info thread_info_t;
 
 static thread_info_t *sCurrentRecreatingThread = nullptr;
 
 /**
  * This function runs the custom recreation function registered when calling
@@ -207,16 +218,17 @@ static TLSKeySet sTLSKeys;
 /**
  * This mutex is used to block the running threads and freeze their contexts.
  * PrepareNuwaProcess() is the first one to acquire the lock. Further attempts
  * to acquire this mutex (in the freeze point macros) will block and freeze the
  * calling thread.
  */
 static pthread_mutex_t sThreadFreezeLock = PTHREAD_MUTEX_INITIALIZER;
 
+static thread_info_t sMainThread;
 static LinkedList<thread_info_t> sAllThreads;
 static int sThreadCount = 0;
 static int sThreadFreezeCount = 0;
 /**
  * This mutex protects the access to thread info:
  * sAllThreads, sThreadCount, sThreadFreezeCount, sRecreateVIPCount.
  */
 static pthread_mutex_t sThreadCountLock = PTHREAD_MUTEX_INITIALIZER;
@@ -272,16 +284,42 @@ GetThreadInfo(pthread_t threadID) {
   }
   thread_info_t *tinfo = GetThreadInfoInner(threadID);
   if (sIsNuwaProcess) {
     pthread_mutex_unlock(&sThreadCountLock);
   }
   return tinfo;
 }
 
+/**
+ * Get thread info using the specified native thread ID.
+ *
+ * @return thread_info_t with nativeThreadID == specified threadID
+ */
+static thread_info_t*
+GetThreadInfo(pid_t threadID) {
+  if (sIsNuwaProcess) {
+    REAL(pthread_mutex_lock)(&sThreadCountLock);
+  }
+  thread_info_t *thrinfo = nullptr;
+  for (thread_info_t *tinfo = sAllThreads.getFirst();
+       tinfo;
+       tinfo = tinfo->getNext()) {
+    if (tinfo->origNativeThreadID == threadID) {
+      thrinfo = tinfo;
+      break;
+    }
+  }
+  if (sIsNuwaProcess) {
+    pthread_mutex_unlock(&sThreadCountLock);
+  }
+
+  return thrinfo;
+}
+
 #if !defined(HAVE_THREAD_TLS_KEYWORD)
 /**
  * Get thread info of the current thread.
  *
  * @return thread_info_t for the current thread.
  */
 static thread_info_t *
 GetCurThreadInfo() {
@@ -444,16 +482,17 @@ EpollManager* EpollManager::sInstance;
 static thread_info_t *
 thread_info_new(void) {
   /* link tinfo to sAllThreads */
   thread_info_t *tinfo = new thread_info_t();
   tinfo->flags = 0;
   tinfo->recrFunc = nullptr;
   tinfo->recrArg = nullptr;
   tinfo->recreatedThreadID = 0;
+  tinfo->recreatedNativeThreadID = 0;
   tinfo->reacquireMutex = nullptr;
   tinfo->stk = malloc(NUWA_STACK_SIZE);
   pthread_attr_init(&tinfo->threadAttr);
 
   REAL(pthread_mutex_lock)(&sThreadCountLock);
   // Insert to the tail.
   sAllThreads.insertBack(tinfo);
 
@@ -492,16 +531,17 @@ static void *
   void *r;
 
   // Save thread info; especially, stackaddr & stacksize.
   // Reuse the stack in the new thread.
   pthread_getattr_np(REAL(pthread_self)(), &tinfo->threadAttr);
 
   SET_THREAD_INFO(tinfo);
   tinfo->origThreadID = REAL(pthread_self)();
+  tinfo->origNativeThreadID = gettid();
 
   pthread_cleanup_push(thread_info_cleanup, tinfo);
 
   r = tinfo->startupFunc(tinfo->startupArg);
 
   if (!sIsNuwaProcess) {
     return r;
   }
@@ -614,16 +654,17 @@ RestoreTLSInfo(thread_info_t *tinfo) {
     const void *value = it->second;
     if (pthread_setspecific(key, value)) {
       abort();
     }
   }
 
   SET_THREAD_INFO(tinfo);
   tinfo->recreatedThreadID = REAL(pthread_self)();
+  tinfo->recreatedNativeThreadID = gettid();
 }
 
 extern "C" MFBT_API int
 __wrap_pthread_key_create(pthread_key_t *key, void (*destructor)(void*)) {
   int rv = REAL(pthread_key_create)(key, destructor);
   if (rv != 0) {
     return rv;
   }
@@ -1210,16 +1251,37 @@ extern "C" MFBT_API int
     EpollManager::Singleton()->FindEpollInfo(aFd);
   if (info) {
     EpollManager::Singleton()->RemoveEpollInfo(aFd);
   }
 
   return rv;
 }
 
+extern "C" MFBT_API int
+__wrap_tgkill(pid_t tgid, pid_t tid, int signalno)
+{
+  if (sIsNuwaProcess) {
+    return tgkill(tgid, tid, signalno);
+  }
+
+  if (tid == sMainThread.origNativeThreadID) {
+    return tgkill(tgid, sMainThread.recreatedNativeThreadID, signalno);
+  }
+
+  thread_info_t *tinfo = (tid == sMainThread.origNativeThreadID ?
+      &sMainThread :
+      GetThreadInfo(tid));
+  if (!tinfo) {
+    return tgkill(tgid, tid, signalno);
+  }
+
+  return tgkill(tgid, tinfo->recreatedNativeThreadID, signalno);
+}
+
 static void *
 thread_recreate_startup(void *arg) {
   /*
    * Dark Art!! Never do the same unless you are ABSOLUTELY sure what you are
    * doing!
    *
    * The stack space collapsed by this frame had been reserved by
    * thread_create_startup().  And thread_create_startup() will
@@ -1227,16 +1289,17 @@ thread_recreate_startup(void *arg) {
    * all collapsed values does not affect the result.
    *
    * All outer frames of thread_create_startup() and
    * thread_recreate_startup() are equivalent, so
    * thread_create_startup() will return successfully.
    */
   thread_info_t *tinfo = (thread_info_t *)arg;
 
+  prctl(PR_SET_NAME, (unsigned long)&tinfo->nativeThreadName, 0, 0, 0);
   RestoreTLSInfo(tinfo);
 
   if (setjmp(tinfo->retEnv) != 0) {
     return nullptr;
   }
 
   // longjump() to recreate the stack on the new thread.
   longjmp(tinfo->jmpEnv, 1);
@@ -1262,16 +1325,19 @@ thread_recreate(thread_info_t *tinfo) {
 /**
  * Recreate all threads in a process forked from an Nuwa process.
  */
 static void
 RecreateThreads() {
   sIsNuwaProcess = false;
   sIsFreezing = false;
 
+  sMainThread.recreatedThreadID = pthread_self();
+  sMainThread.recreatedNativeThreadID = gettid();
+
   // Run registered constructors.
   for (std::vector<nuwa_construct_t>::iterator ctr = sConstructors.begin();
        ctr != sConstructors.end();
        ctr++) {
     (*ctr).construct((*ctr).arg);
   }
   sConstructors.clear();
 
@@ -1551,16 +1617,20 @@ MFBT_API void
 PrepareNuwaProcess() {
   sIsNuwaProcess = true;
   // Explicitly ignore SIGCHLD so we don't have to call watpid() to reap
   // dead child processes.
   signal(SIGCHLD, SIG_IGN);
 
   // Make marked threads block in one freeze point.
   REAL(pthread_mutex_lock)(&sThreadFreezeLock);
+
+  // Populate sMainThread for mapping of tgkill.
+  sMainThread.origThreadID = pthread_self();
+  sMainThread.origNativeThreadID = gettid();
 }
 
 // Make current process as a Nuwa process.
 MFBT_API void
 MakeNuwaProcess() {
   void (*GetProtoFdInfos)(NuwaProtoFdInfo *, int, int *) = nullptr;
   void (*OnNuwaProcessReady)() = nullptr;
   sIsFreezing = true;
@@ -1602,16 +1672,20 @@ NuwaMarkCurrentThread(void (*recreate)(v
   thread_info_t *tinfo = CUR_THREAD_INFO;
   if (tinfo == nullptr) {
     abort();
   }
 
   tinfo->flags |= TINFO_FLAG_NUWA_SUPPORT;
   tinfo->recrFunc = recreate;
   tinfo->recrArg = arg;
+
+  // XXX Thread name might be set later than this call. If this is the case, we
+  // might need to delay getting the thread name.
+  prctl(PR_GET_NAME, (unsigned long)&tinfo->nativeThreadName, 0, 0, 0);
 }
 
 /**
  * Mark the current thread as not supporting Nuwa. Don't recreate this thread in
  * the spawned process.
  */
 MFBT_API void
 NuwaSkipCurrentThread() {
--- a/tools/profiler/platform-linux.cc
+++ b/tools/profiler/platform-linux.cc
@@ -78,16 +78,20 @@
 #define USE_EHABI_STACKWALK
 #include "EHABIStackWalk.h"
 #endif
 
 #include <string.h>
 #include <stdio.h>
 #include <list>
 
+#ifdef MOZ_NUWA_PROCESS
+#include "ipc/Nuwa.h"
+#endif
+
 #define SIGNAL_SAVE_PROFILE SIGUSR2
 
 #if defined(__GLIBC__)
 // glibc doesn't implement gettid(2).
 #include <sys/syscall.h>
 pid_t gettid()
 {
   return (pid_t) syscall(SYS_gettid);
@@ -224,19 +228,25 @@ static void ProfilerSignalHandler(int si
   sample->timestamp = mozilla::TimeStamp::Now();
 
   Sampler::GetActiveSampler()->Tick(sample);
 
   sCurrentThreadProfile = NULL;
   sem_post(&sSignalHandlingDone);
 }
 
+// If the Nuwa process is enabled, we need to use the wrapper of tgkill() to
+// perform the mapping of thread ID.
+#ifdef MOZ_NUWA_PROCESS
+extern "C" MFBT_API int tgkill(pid_t tgid, pid_t tid, int signalno);
+#else
 int tgkill(pid_t tgid, pid_t tid, int signalno) {
   return syscall(SYS_tgkill, tgid, tid, signalno);
 }
+#endif
 
 class PlatformData : public Malloced {
  public:
   PlatformData()
   {}
 };
 
 /* static */ PlatformData*
@@ -258,16 +268,28 @@ static void* SignalSender(void* arg) {
   // pthread_atfork isn't available on Android.
   void* initialize_atfork = NULL;
 # else
   // This call is done just once, at the first call to SenderEntry.
   // It returns NULL.
   static void* initialize_atfork = setup_atfork();
 # endif
 
+#ifdef MOZ_NUWA_PROCESS
+  // If the Nuwa process is enabled, we need to mark and freeze the sampler
+  // thread in the Nuwa process and have this thread recreated in the spawned
+  // child.
+  if(IsNuwaProcess()) {
+    NuwaMarkCurrentThread(nullptr, nullptr);
+    // Freeze the thread here so the spawned child will get the correct tgid
+    // from the getpid() call below.
+    NuwaFreezeCurrentThread();
+  }
+#endif
+
   int vm_tgid_ = getpid();
 
   while (SamplerRegistry::sampler->IsActive()) {
     SamplerRegistry::sampler->HandleSaveRequest();
 
     if (!SamplerRegistry::sampler->IsPaused()) {
       mozilla::MutexAutoLock lock(*Sampler::sRegisteredThreadsMutex);
       std::vector<ThreadInfo*> threads =