Bug 734691 - Port multi-thread support to win/mac. r=snorp,smaug
authorBenoit Girard <b56girard@gmail.com>
Wed, 03 Apr 2013 18:59:17 -0400
changeset 129226 f2e44e02f8745de3e0c9c278d0d6913b46243f08
parent 129225 cb02e3858e1ff76ad8ef0f09ff4ab4efdb7b5da0
child 129227 c0e86fd53a375f8e843a4d44017d4948a539d5e1
push id24562
push userryanvm@gmail.com
push dateFri, 19 Apr 2013 01:24:04 +0000
treeherdermozilla-central@f8d27fe5d7c0 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerssnorp, smaug
bugs734691
milestone23.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 734691 - Port multi-thread support to win/mac. r=snorp,smaug
dom/workers/RuntimeService.cpp
tools/profiler/GeckoProfiler.h
tools/profiler/GeckoProfilerImpl.h
tools/profiler/Makefile.in
tools/profiler/ProfileEntry.cpp
tools/profiler/ProfileEntry.h
tools/profiler/PseudoStack.h
tools/profiler/TableTicker.cpp
tools/profiler/TableTicker.h
tools/profiler/platform-linux.cc
tools/profiler/platform-macos.cc
tools/profiler/platform-win32.cc
tools/profiler/platform.cpp
tools/profiler/platform.h
--- a/dom/workers/RuntimeService.cpp
+++ b/dom/workers/RuntimeService.cpp
@@ -261,16 +261,20 @@ ErrorReporter(JSContext* aCx, const char
   WorkerPrivate* worker = GetWorkerPrivateFromContext(aCx);
   return worker->ReportError(aCx, aMessage, aReport);
 }
 
 JSBool
 OperationCallback(JSContext* aCx)
 {
   WorkerPrivate* worker = GetWorkerPrivateFromContext(aCx);
+
+  // Now is a good time to turn on profiling if it's pending.
+  profiler_js_operation_callback();
+
   return worker->OperationCallback(aCx);
 }
 
 class LogViolationDetailsRunnable : public nsRunnable
 {
   WorkerPrivate* mWorkerPrivate;
   nsString mFileName;
   uint32_t mLineNum;
@@ -513,25 +517,29 @@ public:
 
     JSContext* cx = CreateJSContextForWorker(workerPrivate);
     if (!cx) {
       // XXX need to fire an error at parent.
       NS_ERROR("Failed to create runtime and context!");
       return NS_ERROR_FAILURE;
     }
 
+    JSRuntime* rt = JS_GetRuntime(cx);
+
     profiler_register_thread("WebWorker");
+#ifdef MOZ_ENABLE_PROFILER_SPS
+    if (PseudoStack* stack = mozilla_get_pseudo_stack())
+      stack->sampleRuntime(rt);
+#endif
 
     {
       JSAutoRequest ar(cx);
       workerPrivate->DoRunLoop(cx);
     }
 
-    JSRuntime* rt = JS_GetRuntime(cx);
-
     // XXX Bug 666963 - CTypes can create another JSContext for use with
     // closures, and then it holds that context in a reserved slot on the CType
     // prototype object. We have to destroy that context before we can destroy
     // the runtime, and we also have to make sure that it isn't the last context
     // to be destroyed (otherwise it will assert). To accomplish this we create
     // an unused dummy context, destroy our real context, and then destroy the
     // dummy. Once this bug is resolved we can remove this nastiness and simply
     // call JS_DestroyContextNoGC on our context.
@@ -540,16 +548,20 @@ public:
       JS_DestroyContext(cx);
       JS_DestroyContext(dummyCx);
     }
     else {
       NS_WARNING("Failed to create dummy context!");
       JS_DestroyContext(cx);
     }
 
+#ifdef MOZ_ENABLE_PROFILER_SPS
+    if (PseudoStack* stack = mozilla_get_pseudo_stack())
+      stack->sampleRuntime(nullptr);
+#endif
     JS_DestroyRuntime(rt);
 
     workerPrivate->ScheduleDeletion(false);
     profiler_unregister_thread();
     return NS_OK;
   }
 };
 
--- a/tools/profiler/GeckoProfiler.h
+++ b/tools/profiler/GeckoProfiler.h
@@ -133,15 +133,19 @@ static inline void profiler_print_locati
 static inline void profiler_lock() {}
 
 // Re-enable the profiler and notify 'profiler-unlocked'.
 static inline void profiler_unlock() {}
 
 static inline void profiler_register_thread(const char* name) {}
 static inline void profiler_unregister_thread() {}
 
+// Call by the JSRuntime's operation callback. This is used to enable
+// profiling on auxilerary threads.
+static inline void profiler_js_operation_callback() {}
+
 #else
 
 #include "GeckoProfilerImpl.h"
 
 #endif
 
 #endif // ifndef SAMPLER_H
--- a/tools/profiler/GeckoProfilerImpl.h
+++ b/tools/profiler/GeckoProfilerImpl.h
@@ -147,16 +147,27 @@ void profiler_register_thread(const char
 }
 
 static inline
 void profiler_unregister_thread()
 {
   mozilla_sampler_unregister_thread();
 }
 
+static inline
+void profiler_js_operation_callback()
+{
+  PseudoStack *stack = tlsPseudoStack.get();
+  if (!stack) {
+    return;
+  }
+
+  stack->jsOperationCallback();
+}
+
 // we want the class and function name but can't easily get that using preprocessor macros
 // __func__ doesn't have the class name and __PRETTY_FUNCTION__ has the parameters
 
 #define SAMPLER_APPEND_LINE_NUMBER_PASTE(id, line) id ## line
 #define SAMPLER_APPEND_LINE_NUMBER_EXPAND(id, line) SAMPLER_APPEND_LINE_NUMBER_PASTE(id, line)
 #define SAMPLER_APPEND_LINE_NUMBER(id) SAMPLER_APPEND_LINE_NUMBER_EXPAND(id, __LINE__)
 
 #define PROFILER_LABEL(name_space, info) mozilla::SamplerStackFrameRAII SAMPLER_APPEND_LINE_NUMBER(sampler_raii)(name_space "::" info, __LINE__)
--- a/tools/profiler/Makefile.in
+++ b/tools/profiler/Makefile.in
@@ -29,16 +29,19 @@ MODULE_NAME     = nsProfilerModule
 LIBRARY_NAME    = profiler
 EXPORT_LIBRARY  = 1
 LIBXUL_LIBRARY  = 1
 IS_COMPONENT    = 1
 ifndef _MSC_VER
 FAIL_ON_WARNINGS = 1
 endif # !_MSC_VER
 
+# Uncomment for better debugging in opt builds
+#MOZ_OPTIMIZE_FLAGS += -O0 -g
+
 CPPSRCS		= \
   platform.cpp \
   nsProfilerFactory.cpp \
   nsProfiler.cpp \
   TableTicker.cpp \
   BreakpadSampler.cpp \
   UnwinderThread2.cpp \
   ProfileEntry.cpp \
--- a/tools/profiler/ProfileEntry.cpp
+++ b/tools/profiler/ProfileEntry.cpp
@@ -129,26 +129,30 @@ std::ostream& operator<<(std::ostream& s
 ////////////////////////////////////////////////////////////////////////
 
 
 ////////////////////////////////////////////////////////////////////////
 // BEGIN ThreadProfile
 
 #define DYNAMIC_MAX_STRING 512
 
-ThreadProfile::ThreadProfile(const char* aName, int aEntrySize, PseudoStack *aStack, int aThreadId, bool aIsMainThread)
+ThreadProfile::ThreadProfile(const char* aName, int aEntrySize,
+                             PseudoStack *aStack, int aThreadId,
+                             PlatformData* aPlatform,
+                             bool aIsMainThread)
   : mWritePos(0)
   , mLastFlushPos(0)
   , mReadPos(0)
   , mEntrySize(aEntrySize)
   , mPseudoStack(aStack)
   , mMutex("ThreadProfile::mMutex")
   , mName(strdup(aName))
   , mThreadId(aThreadId)
   , mIsMainThread(aIsMainThread)
+  , mPlatformData(aPlatform)
 {
   mEntries = new ProfileEntry[mEntrySize];
 }
 
 ThreadProfile::~ThreadProfile()
 {
   free(mName);
   delete[] mEntries;
--- a/tools/profiler/ProfileEntry.h
+++ b/tools/profiler/ProfileEntry.h
@@ -8,17 +8,16 @@
 
 #include <ostream>
 #include "GeckoProfilerImpl.h"
 #include "JSAObjectBuilder.h"
 #include "platform.h"
 #include "mozilla/Mutex.h"
 
 class ThreadProfile;
-class ThreadProfile;
 
 class ProfileEntry
 {
 public:
   ProfileEntry();
 
   // aTagData must not need release (i.e. be a string from the text segment)
   ProfileEntry(char aTagName, const char *aTagData);
@@ -52,17 +51,19 @@ private:
   char mTagName;
 };
 
 typedef void (*IterateTagsCallback)(const ProfileEntry& entry, const char* tagStringData);
 
 class ThreadProfile
 {
 public:
-  ThreadProfile(const char* aName, int aEntrySize, PseudoStack *aStack, int aThreadId, bool aIsMainThread);
+  ThreadProfile(const char* aName, int aEntrySize, PseudoStack *aStack,
+                int aThreadId, PlatformData* aPlatformData,
+                bool aIsMainThread);
   ~ThreadProfile();
   void addTag(ProfileEntry aTag);
   void flush();
   void erase();
   char* processDynamicTag(int readPos, int* tagsConsumed, char* tagBuff);
   void IterateTags(IterateTagsCallback aCallback);
   friend std::ostream& operator<<(std::ostream& stream,
                                   const ThreadProfile& profile);
@@ -71,26 +72,28 @@ public:
   PseudoStack* GetPseudoStack();
   mozilla::Mutex* GetMutex();
   void BuildJSObject(JSAObjectBuilder& b, JSCustomObject* profile);
 
   bool IsMainThread() const { return mIsMainThread; }
   const char* Name() const { return mName; }
   int ThreadId() const { return mThreadId; }
 
+  PlatformData* GetPlatformData() { return mPlatformData; }
 private:
   // Circular buffer 'Keep One Slot Open' implementation
   // for simplicity
   ProfileEntry* mEntries;
   int            mWritePos; // points to the next entry we will write to
   int            mLastFlushPos; // points to the next entry since the last flush()
   int            mReadPos;  // points to the next entry we will read to
   int            mEntrySize;
   PseudoStack*   mPseudoStack;
   mozilla::Mutex mMutex;
   char*          mName;
   int            mThreadId;
   bool           mIsMainThread;
+  PlatformData*  mPlatformData;  // Platform specific data.
 };
 
 std::ostream& operator<<(std::ostream& stream, const ThreadProfile& profile);
 
 #endif /* ndef MOZ_PROFILE_ENTRY_H */
--- a/tools/profiler/PseudoStack.h
+++ b/tools/profiler/PseudoStack.h
@@ -215,16 +215,20 @@ public:
   void enableJSSampling() {
     if (mRuntime) {
       js::EnableRuntimeProfilingStack(mRuntime, true);
       mStartJSSampling = false;
     } else {
       mStartJSSampling = true;
     }
   }
+  void jsOperationCallback() {
+    if (mStartJSSampling)
+      enableJSSampling();
+  }
   void disableJSSampling() {
     mStartJSSampling = false;
     if (mRuntime)
       js::EnableRuntimeProfilingStack(mRuntime, false);
   }
 
   // Keep a list of active checkpoints
   StackEntry volatile mStack[1024];
--- a/tools/profiler/TableTicker.cpp
+++ b/tools/profiler/TableTicker.cpp
@@ -174,16 +174,20 @@ void TableTicker::BuildJSObject(JSAObjec
   b.DefineProperty(profile, "threads", threads);
 
   SetPaused(true);
 
   {
     mozilla::MutexAutoLock lock(*sRegisteredThreadsMutex);
 
     for (size_t i = 0; i < sRegisteredThreads->size(); i++) {
+      // Thread not being profiled, skip it
+      if (!sRegisteredThreads->at(i)->Profile())
+        continue;
+
       MutexAutoLock lock(*sRegisteredThreads->at(i)->Profile()->GetMutex());
 
       JSCustomObject* threadSamples = b.CreateObject();
       sRegisteredThreads->at(i)->Profile()->BuildJSObject(b, threadSamples);
       b.ArrayPush(threads, threadSamples);
     }
   }
 
@@ -286,34 +290,34 @@ void StackWalkCallback(void* aPC, void* 
   array->sp_array[array->count] = aSP;
   array->array[array->count] = aPC;
   array->count++;
 }
 
 void TableTicker::doNativeBacktrace(ThreadProfile &aProfile, TickSample* aSample)
 {
 #ifndef XP_MACOSX
-  uintptr_t thread = GetThreadHandle(platform_data());
+  uintptr_t thread = GetThreadHandle(aSample->threadProfile->GetPlatformData());
   MOZ_ASSERT(thread);
 #endif
   void* pc_array[1000];
   void* sp_array[1000];
   PCArray array = {
     pc_array,
     sp_array,
     mozilla::ArrayLength(pc_array),
     0
   };
 
   // Start with the current function.
   StackWalkCallback(aSample->pc, aSample->sp, &array);
 
   uint32_t maxFrames = uint32_t(array.size - array.count);
 #ifdef XP_MACOSX
-  pthread_t pt = GetProfiledThread(platform_data());
+  pthread_t pt = GetProfiledThread(aSample->threadProfile->GetPlatformData());
   void *stackEnd = reinterpret_cast<void*>(-1);
   if (pt)
     stackEnd = static_cast<char*>(pthread_get_stackaddr_np(pt));
   nsresult rv = NS_OK;
   if (aSample->fp >= aSample->sp && aSample->fp <= stackEnd)
     rv = FramePointerStackWalk(StackWalkCallback, /* skipFrames */ 0,
                                maxFrames, &array,
                                reinterpret_cast<void**>(aSample->fp), stackEnd);
@@ -384,21 +388,16 @@ void doSampleStackTrace(PseudoStack *aSt
     aProfile.addTag(ProfileEntry('L', (void*)sample->lr));
 #endif
   }
 #endif
 }
 
 void TableTicker::Tick(TickSample* sample)
 {
-  if (!sample->threadProfile) {
-    // Platform doesn't support multithread, so use the main thread profile we created
-    sample->threadProfile = GetPrimaryThreadProfile();
-  }
-
   ThreadProfile& currThreadProfile = *sample->threadProfile;
 
   // Marker(s) come before the sample
   PseudoStack* stack = currThreadProfile.GetPseudoStack();
   for (int i = 0; stack->getMarker(i) != NULL; i++) {
     addDynamicTag(currThreadProfile, 'm', stack->getMarker(i));
   }
   stack->mQueueClearMarker = true;
@@ -469,17 +468,17 @@ void mozilla_sampler_print_location1()
 
   PseudoStack *stack = tlsPseudoStack.get();
   if (!stack) {
     MOZ_ASSERT(false);
     return;
   }
 
   ThreadProfile threadProfile("Temp", PROFILE_DEFAULT_ENTRY, stack,
-                              0, false);
+                              0, Sampler::AllocPlatformData(0), false);
   doSampleStackTrace(stack, threadProfile, NULL);
 
   threadProfile.flush();
 
   printf_stderr("Backtrace:\n");
   threadProfile.IterateTags(print_callback);
 }
 
--- a/tools/profiler/TableTicker.h
+++ b/tools/profiler/TableTicker.h
@@ -21,40 +21,46 @@ extern int sFrameNumber;
 extern int sLastFrameNumber;
 extern unsigned int sCurrentEventGeneration;
 extern unsigned int sLastSampledEventGeneration;
 
 class BreakpadSampler;
 
 class TableTicker: public Sampler {
  public:
-  TableTicker(int aInterval, int aEntrySize, PseudoStack *aStack,
+  TableTicker(int aInterval, int aEntrySize,
               const char** aFeatures, uint32_t aFeatureCount)
     : Sampler(aInterval, true, aEntrySize)
     , mPrimaryThreadProfile(nullptr)
     , mStartTime(TimeStamp::Now())
     , mSaveRequested(false)
   {
     mUseStackWalk = hasFeature(aFeatures, aFeatureCount, "stackwalk");
 
     //XXX: It's probably worth splitting the jank profiler out from the regular profiler at some point
     mJankOnly = hasFeature(aFeatures, aFeatureCount, "jank");
     mProfileJS = hasFeature(aFeatures, aFeatureCount, "js");
+    mProfileThreads = true || hasFeature(aFeatures, aFeatureCount, "threads");
     mAddLeafAddresses = hasFeature(aFeatures, aFeatureCount, "leaf");
 
     {
       mozilla::MutexAutoLock lock(*sRegisteredThreadsMutex);
 
       // Create ThreadProfile for each registered thread
       for (uint32_t i = 0; i < sRegisteredThreads->size(); i++) {
         ThreadInfo* info = sRegisteredThreads->at(i);
+
+        if (!info->IsMainThread() && !mProfileThreads)
+          continue;
+
         ThreadProfile* profile = new ThreadProfile(info->Name(),
                                                    aEntrySize,
                                                    info->Stack(),
                                                    info->ThreadId(),
+                                                   info->GetPlatformData(),
                                                    info->IsMainThread());
         profile->addTag(ProfileEntry('m', "Start"));
 
         info->SetProfile(profile);
       }
 
       SetActiveSampler(this);
     }
@@ -76,18 +82,16 @@ class TableTicker: public Sampler {
         if (profile) {
           delete profile;
           info->SetProfile(nullptr);
         }
       }
     }
   }
 
-  virtual void SampleStack(TickSample* sample) {}
-
   // Called within a signal. This function must be reentrant
   virtual void Tick(TickSample* sample);
 
   // Called within a signal. This function must be reentrant
   virtual void RequestSave()
   {
     mSaveRequested = true;
   }
@@ -110,17 +114,18 @@ class TableTicker: public Sampler {
 
     return mPrimaryThreadProfile;
   }
 
   void ToStreamAsJSON(std::ostream& stream);
   virtual JSObject *ToJSObject(JSContext *aCx);
   JSCustomObject *GetMetaJSCustomObject(JSAObjectBuilder& b);
 
-  const bool ProfileJS() { return mProfileJS; }
+  bool ProfileJS() const { return mProfileJS; }
+  bool ProfileThreads() const { return mProfileThreads; }
 
   virtual BreakpadSampler* AsBreakpadSampler() { return nullptr; }
 
 protected:
   // Not implemented on platforms which do not support backtracing
   void doNativeBacktrace(ThreadProfile &aProfile, TickSample* aSample);
 
   void BuildJSObject(JSAObjectBuilder& b, JSCustomObject* profile);
@@ -128,23 +133,24 @@ protected:
   // This represent the application's main thread (SAMPLER_INIT)
   ThreadProfile* mPrimaryThreadProfile;
   TimeStamp mStartTime;
   bool mSaveRequested;
   bool mAddLeafAddresses;
   bool mUseStackWalk;
   bool mJankOnly;
   bool mProfileJS;
+  bool mProfileThreads;
 };
 
 class BreakpadSampler: public TableTicker {
  public:
-  BreakpadSampler(int aInterval, int aEntrySize, PseudoStack *aStack,
+  BreakpadSampler(int aInterval, int aEntrySize,
               const char** aFeatures, uint32_t aFeatureCount)
-    : TableTicker(aInterval, aEntrySize, aStack, aFeatures, aFeatureCount)
+    : TableTicker(aInterval, aEntrySize, aFeatures, aFeatureCount)
   {}
 
   // Called within a signal. This function must be reentrant
   virtual void Tick(TickSample* sample);
 
   virtual BreakpadSampler* AsBreakpadSampler() { return this; }
 };
 
--- a/tools/profiler/platform-linux.cc
+++ b/tools/profiler/platform-linux.cc
@@ -59,16 +59,17 @@
 #include <strings.h>    // index
 #include <errno.h>
 #include <stdarg.h>
 #include "platform.h"
 #include "GeckoProfilerImpl.h"
 #include "mozilla/Mutex.h"
 #include "ProfileEntry.h"
 #include "nsThreadUtils.h"
+#include "TableTicker.h"
 
 #include <string.h>
 #include <stdio.h>
 #include <list>
 
 #define SIGNAL_SAVE_PROFILE SIGUSR2
 
 #if defined(__GLIBC__)
@@ -120,16 +121,29 @@ static void* setup_atfork() {
   return NULL;
 }
 #endif /* !defined(ANDROID) */
 
 #ifdef ANDROID
 #include "android-signal-defs.h"
 #endif
 
+struct SamplerRegistry {
+  static void AddActiveSampler(Sampler *sampler) {
+    ASSERT(!SamplerRegistry::sampler);
+    SamplerRegistry::sampler = sampler;
+  }
+  static void RemoveActiveSampler(Sampler *sampler) {
+    SamplerRegistry::sampler = NULL;
+  }
+  static Sampler *sampler;
+};
+
+Sampler *SamplerRegistry::sampler = NULL;
+
 static ThreadProfile* sCurrentThreadProfile = NULL;
 
 static void ProfilerSaveSignalHandler(int signal, siginfo_t* info, void* context) {
   Sampler::GetActiveSampler()->RequestSave();
 }
 
 #ifdef ANDROID
 #define V8_HOST_ARCH_ARM 1
@@ -186,208 +200,199 @@ static void ProfilerSignalHandler(int si
   sample->threadProfile = sCurrentThreadProfile;
   sample->timestamp = mozilla::TimeStamp::Now();
 
   Sampler::GetActiveSampler()->Tick(sample);
 
   sCurrentThreadProfile = NULL;
 }
 
-#ifndef XP_MACOSX
 int tgkill(pid_t tgid, pid_t tid, int signalno) {
   return syscall(SYS_tgkill, tgid, tid, signalno);
 }
-#endif
 
-class Sampler::PlatformData : public Malloced {
+class PlatformData : public Malloced {
  public:
-  explicit PlatformData(Sampler* sampler)
-      : sampler_(sampler),
-        signal_handler_installed_(false),
-        vm_tgid_(getpid()),
-#ifndef XP_MACOSX
-        vm_tid_(gettid()),
-#endif
-        signal_sender_launched_(false)
-#ifdef XP_MACOSX
-        , signal_receiver_(pthread_self())
-#endif
-  {
-  }
-
-  void SignalSender() {
-    while (sampler_->IsActive()) {
-      sampler_->HandleSaveRequest();
-
-      if (!sampler_->IsPaused()) {
-#ifdef XP_MACOSX
-        pthread_kill(signal_receiver_, SIGPROF);
-#else
-
-        std::vector<ThreadInfo*> threads = GetRegisteredThreads();
-
-        for (uint32_t i = 0; i < threads.size(); i++) {
-          ThreadInfo* info = threads[i];
-
-          // We use sCurrentThreadProfile the ThreadProfile for the
-          // thread we're profiling to the signal handler
-          sCurrentThreadProfile = info->Profile();
-
-          int threadId = info->ThreadId();
-          if (threadId == 0) {
-            threadId = vm_tid_;
-          }
-
-          if (tgkill(vm_tgid_, threadId, SIGPROF) != 0) {
-            printf_stderr("profiler failed to signal tid=%d\n", threadId);
-#ifdef DEBUG
-            abort();
-#endif
-            continue;
-          }
-
-          // Wait for the signal handler to run before moving on to the next one
-          while (sCurrentThreadProfile)
-            sched_yield();
-        }
-#endif
-      }
-
-      // Convert ms to us and subtract 100 us to compensate delays
-      // occuring during signal delivery.
-      // TODO measure and confirm this.
-      const useconds_t interval = sampler_->interval_ * 1000 - 100;
-      //int result = usleep(interval);
-      usleep(interval);
-    }
-  }
-
-  Sampler* sampler_;
-  bool signal_handler_installed_;
-  struct sigaction old_sigprof_signal_handler_;
-  struct sigaction old_sigsave_signal_handler_;
-  pid_t vm_tgid_;
-  pid_t vm_tid_;
-  bool signal_sender_launched_;
-  pthread_t signal_sender_thread_;
-#ifdef XP_MACOSX
-  pthread_t signal_receiver_;
-#endif
+  PlatformData()
+  {}
 };
 
+/* static */ PlatformData*
+Sampler::AllocPlatformData(int aThreadId)
+{
+  return new PlatformData;
+}
 
-static void* SenderEntry(void* arg) {
+/* static */ void
+Sampler::FreePlatformData(PlatformData* aData)
+{
+  delete aData;
+}
+
+static void* SignalSender(void* arg) {
 # if defined(ANDROID)
   // pthread_atfork isn't available on Android.
   void* initialize_atfork = NULL;
 # else
   // This call is done just once, at the first call to SenderEntry.
   // It returns NULL.
   static void* initialize_atfork = setup_atfork();
 # endif
-  Sampler::PlatformData* data =
-      reinterpret_cast<Sampler::PlatformData*>(arg);
-  data->SignalSender();
+
+  int vm_tgid_ = getpid();
+
+  while (SamplerRegistry::sampler->IsActive()) {
+    SamplerRegistry::sampler->HandleSaveRequest();
+
+    if (!SamplerRegistry::sampler->IsPaused()) {
+      mozilla::MutexAutoLock lock(*Sampler::sRegisteredThreadsMutex);
+      std::vector<ThreadInfo*> threads =
+        SamplerRegistry::sampler->GetRegisteredThreads();
+
+      for (uint32_t i = 0; i < threads.size(); i++) {
+        ThreadInfo* info = threads[i];
+
+        // This will be null if we're not interested in profiling this thread.
+        if (!info->Profile())
+          continue;
+
+        // We use sCurrentThreadProfile the ThreadProfile for the
+        // thread we're profiling to the signal handler
+        sCurrentThreadProfile = info->Profile();
+
+        int threadId = info->ThreadId();
+
+        if (tgkill(vm_tgid_, threadId, SIGPROF) != 0) {
+          printf_stderr("profiler failed to signal tid=%d\n", threadId);
+#ifdef DEBUG
+          abort();
+#endif
+          continue;
+        }
+
+        // Wait for the signal handler to run before moving on to the next one
+        while (sCurrentThreadProfile)
+          sched_yield();
+      }
+    }
+
+    // Convert ms to us and subtract 100 us to compensate delays
+    // occuring during signal delivery.
+    // TODO measure and confirm this.
+    const useconds_t interval =
+      SamplerRegistry::sampler->interval() * 1000 - 100;
+    //int result = usleep(interval);
+    usleep(interval);
+  }
   return initialize_atfork; // which is guaranteed to be NULL
 }
 
-
 Sampler::Sampler(int interval, bool profiling, int entrySize)
     : interval_(interval),
       profiling_(profiling),
       paused_(false),
       active_(false),
       entrySize_(entrySize) {
-  data_ = new PlatformData(this);
 }
 
 Sampler::~Sampler() {
-  ASSERT(!data_->signal_sender_launched_);
-  delete data_;
+  ASSERT(!signal_sender_launched_);
 }
 
 
 void Sampler::Start() {
   LOG("Sampler started");
 
+  SamplerRegistry::AddActiveSampler(this);
+
   // Request profiling signals.
   LOG("Request signal");
   struct sigaction sa;
   sa.sa_sigaction = ProfilerSignalHandler;
   sigemptyset(&sa.sa_mask);
   sa.sa_flags = SA_RESTART | SA_SIGINFO;
-  if (sigaction(SIGPROF, &sa, &data_->old_sigprof_signal_handler_) != 0) {
+  if (sigaction(SIGPROF, &sa, &old_sigprof_signal_handler_) != 0) {
     LOG("Error installing signal");
     return;
   }
 
   // Request save profile signals
   struct sigaction sa2;
   sa2.sa_sigaction = ProfilerSaveSignalHandler;
   sigemptyset(&sa2.sa_mask);
   sa2.sa_flags = SA_RESTART | SA_SIGINFO;
-  if (sigaction(SIGNAL_SAVE_PROFILE, &sa2, &data_->old_sigsave_signal_handler_) != 0) {
+  if (sigaction(SIGNAL_SAVE_PROFILE, &sa2, &old_sigsave_signal_handler_) != 0) {
     LOG("Error installing start signal");
     return;
   }
   LOG("Signal installed");
-  data_->signal_handler_installed_ = true;
+  signal_handler_installed_ = true;
 
   // Start a thread that sends SIGPROF signal to VM thread.
   // Sending the signal ourselves instead of relying on itimer provides
   // much better accuracy.
   SetActive(true);
   if (pthread_create(
-          &data_->signal_sender_thread_, NULL, SenderEntry, data_) == 0) {
-    data_->signal_sender_launched_ = true;
+        &signal_sender_thread_, NULL, SignalSender, NULL) == 0) {
+    signal_sender_launched_ = true;
   }
   LOG("Profiler thread started");
 }
 
 
 void Sampler::Stop() {
   SetActive(false);
 
   // Wait for signal sender termination (it will exit after setting
   // active_ to false).
-  if (data_->signal_sender_launched_) {
-    pthread_join(data_->signal_sender_thread_, NULL);
-    data_->signal_sender_launched_ = false;
+  if (signal_sender_launched_) {
+    pthread_join(signal_sender_thread_, NULL);
+    signal_sender_launched_ = false;
   }
 
+  SamplerRegistry::RemoveActiveSampler(this);
+
   // Restore old signal handler
-  if (data_->signal_handler_installed_) {
-    sigaction(SIGNAL_SAVE_PROFILE, &data_->old_sigsave_signal_handler_, 0);
-    sigaction(SIGPROF, &data_->old_sigprof_signal_handler_, 0);
-    data_->signal_handler_installed_ = false;
+  if (signal_handler_installed_) {
+    sigaction(SIGNAL_SAVE_PROFILE, &old_sigsave_signal_handler_, 0);
+    sigaction(SIGPROF, &old_sigprof_signal_handler_, 0);
+    signal_handler_installed_ = false;
   }
 }
 
 bool Sampler::RegisterCurrentThread(const char* aName, PseudoStack* aPseudoStack, bool aIsMainThread)
 {
-  mozilla::MutexAutoLock lock(*sRegisteredThreadsMutex);
+  mozilla::MutexAutoLock lock(*Sampler::sRegisteredThreadsMutex);
+
+  ThreadInfo* info = new ThreadInfo(aName, gettid(),
+    aIsMainThread, aPseudoStack);
 
-  ThreadInfo* info = new ThreadInfo(aName, gettid(), aIsMainThread, aPseudoStack);
+  bool profileThread = sActiveSampler &&
+    (aIsMainThread || sActiveSampler->ProfileThreads());
 
-  if (sActiveSampler) {
+  if (profileThread) {
     // We need to create the ThreadProfile now
     info->SetProfile(new ThreadProfile(info->Name(),
                                        sActiveSampler->EntrySize(),
                                        info->Stack(),
                                        info->ThreadId(),
+                                       info->GetPlatformData(),
                                        aIsMainThread));
+    if (sActiveSampler->ProfileJS()) {
+      info->Profile()->GetPseudoStack()->enableJSSampling();
+    }
   }
 
   sRegisteredThreads->push_back(info);
   return true;
 }
 
 void Sampler::UnregisterCurrentThread()
 {
-  mozilla::MutexAutoLock lock(*sRegisteredThreadsMutex);
+  mozilla::MutexAutoLock lock(*Sampler::sRegisteredThreadsMutex);
 
   int id = gettid();
 
   for (uint32_t i = 0; i < sRegisteredThreads->size(); i++) {
     ThreadInfo* info = sRegisteredThreads->at(i);
     if (info->ThreadId() == id) {
       delete info;
       sRegisteredThreads->erase(sRegisteredThreads->begin() + i);
--- a/tools/profiler/platform-macos.cc
+++ b/tools/profiler/platform-macos.cc
@@ -26,16 +26,17 @@
 #include <stdarg.h>
 #include <stdlib.h>
 #include <string.h>
 #include <errno.h>
 
 #include "nsThreadUtils.h"
 
 #include "platform.h"
+#include "TableTicker.h"
 #include "UnwinderThread2.h"  /* uwt__register_thread_for_profiling */
 
 // this port is based off of v8 svn revision 9837
 
 // XXX: this is a very stubbed out implementation
 // that only supports a single Sampler
 struct SamplerRegistry {
   static void AddActiveSampler(Sampler *sampler) {
@@ -86,31 +87,23 @@ class MacOSMutex : public Mutex {
 Mutex* OS::CreateMutex() {
   return new MacOSMutex();
 }
 
 void OS::Sleep(int milliseconds) {
   usleep(1000 * milliseconds);
 }
 
-class Thread::PlatformData : public Malloced {
- public:
-  PlatformData() : thread_(kNoThread) {}
-  pthread_t thread_;  // Thread handle for pthread.
-};
-
 Thread::Thread(const char* name)
-    : data_(new PlatformData),
-      stack_size_(0) {
+    : stack_size_(0) {
   set_name(name);
 }
 
 
 Thread::~Thread() {
-  delete data_;
 }
 
 
 static void SetThreadName(const char* name) {
   // pthread_setname_np is only available in 10.6 or later, so test
   // for it at runtime.
   int (*dynamic_pthread_setname_np)(const char*);
   *reinterpret_cast<void**>(&dynamic_pthread_setname_np) =
@@ -136,19 +129,19 @@ static void* ThreadEntry(void* arg) {
   extern bool sps_version2();
   if (sps_version2()) {
     // Register this thread for profiling.
     int aLocal;
     uwt__register_thread_for_profiling( &aLocal );
   }
   // END temp hack for SPS v1-vs-v2
 
-  thread->data()->thread_ = pthread_self();
+  thread->thread_ = pthread_self();
   SetThreadName(thread->name());
-  ASSERT(thread->data()->thread_ != kNoThread);
+  ASSERT(thread->thread_ != kNoThread);
   thread->Run();
   return NULL;
 }
 
 
 void Thread::set_name(const char* name) {
   strncpy(name_, name, sizeof(name_));
   name_[sizeof(name_) - 1] = '\0';
@@ -158,25 +151,25 @@ void Thread::set_name(const char* name) 
 void Thread::Start() {
   pthread_attr_t* attr_ptr = NULL;
   pthread_attr_t attr;
   if (stack_size_ > 0) {
     pthread_attr_init(&attr);
     pthread_attr_setstacksize(&attr, static_cast<size_t>(stack_size_));
     attr_ptr = &attr;
   }
-  pthread_create(&data_->thread_, attr_ptr, ThreadEntry, this);
-  ASSERT(data_->thread_ != kNoThread);
+  pthread_create(&thread_, attr_ptr, ThreadEntry, this);
+  ASSERT(thread_ != kNoThread);
 }
 
 void Thread::Join() {
-  pthread_join(data_->thread_, NULL);
+  pthread_join(thread_, NULL);
 }
 
-class Sampler::PlatformData : public Malloced {
+class PlatformData : public Malloced {
  public:
   PlatformData() : profiled_thread_(mach_thread_self())
   {
     profiled_pthread_ = pthread_from_mach_thread_np(profiled_thread_);
   }
 
   ~PlatformData() {
     // Deallocate Mach port for thread.
@@ -192,66 +185,85 @@ class Sampler::PlatformData : public Mal
   // For details, consult "Mac OS X Internals" book, Section 7.3.
   thread_act_t profiled_thread_;
   // we also store the pthread because Mach threads have no concept of stack
   // and we want to be able to get the stack size when we need to unwind the
   // stack using frame pointers.
   pthread_t profiled_pthread_;
 };
 
+/* static */ PlatformData*
+Sampler::AllocPlatformData(int aThreadId)
+{
+  return new PlatformData;
+}
+
+/* static */ void
+Sampler::FreePlatformData(PlatformData* aData)
+{
+  delete aData;
+}
 
 class SamplerThread : public Thread {
  public:
   explicit SamplerThread(int interval)
       : Thread("SamplerThread")
       , interval_(interval) {}
 
   static void AddActiveSampler(Sampler* sampler) {
-    ScopedLock lock(mutex_);
+    mozilla::MutexAutoLock lock(*Sampler::sRegisteredThreadsMutex);
     SamplerRegistry::AddActiveSampler(sampler);
     if (instance_ == NULL) {
       instance_ = new SamplerThread(sampler->interval());
       instance_->Start();
     } else {
       ASSERT(instance_->interval_ == sampler->interval());
     }
   }
 
   static void RemoveActiveSampler(Sampler* sampler) {
-    ScopedLock lock(mutex_);
+    mozilla::MutexAutoLock lock(*Sampler::sRegisteredThreadsMutex);
     instance_->Join();
     //XXX: unlike v8 we need to remove the active sampler after doing the Join
     // because we drop the sampler immediately
     SamplerRegistry::RemoveActiveSampler(sampler);
     delete instance_;
     instance_ = NULL;
-    /*
-    if (SamplerRegistry::GetState() == SamplerRegistry::HAS_NO_SAMPLERS) {
-      RuntimeProfiler::StopRuntimeProfilerThreadBeforeShutdown(instance_);
-      delete instance_;
-      instance_ = NULL;
-    }
-    */
   }
 
   // Implement Thread::Run().
   virtual void Run() {
     while (SamplerRegistry::sampler->IsActive()) {
-      if (!SamplerRegistry::sampler->IsPaused())
-        SampleContext(SamplerRegistry::sampler);
+      {
+        mozilla::MutexAutoLock lock(*Sampler::sRegisteredThreadsMutex);
+        std::vector<ThreadInfo*> threads =
+          SamplerRegistry::sampler->GetRegisteredThreads();
+        for (uint32_t i = 0; i < threads.size(); i++) {
+          ThreadInfo* info = threads[i];
+
+          // This will be null if we're not interested in profiling this thread.
+          if (!info->Profile())
+            continue;
+
+          ThreadProfile* thread_profile = info->Profile();
+
+          if (!SamplerRegistry::sampler->IsPaused())
+            SampleContext(SamplerRegistry::sampler, thread_profile);
+        }
+      }
       OS::Sleep(interval_);
     }
   }
 
-  void SampleContext(Sampler* sampler) {
-    thread_act_t profiled_thread = sampler->platform_data()->profiled_thread();
+  void SampleContext(Sampler* sampler, ThreadProfile* thread_profile) {
+    thread_act_t profiled_thread =
+      thread_profile->GetPlatformData()->profiled_thread();
+
     TickSample sample_obj;
     TickSample* sample = &sample_obj;
-    //TickSample* sample = CpuProfiler::TickSampleEvent(sampler->isolate());
-    //if (sample == NULL) sample = &sample_obj;
 
     if (KERN_SUCCESS != thread_suspend(profiled_thread)) return;
 
 #if V8_HOST_ARCH_X64
     thread_state_flavor_t flavor = x86_THREAD_STATE64;
     x86_thread_state64_t state;
     mach_msg_type_number_t count = x86_THREAD_STATE64_COUNT;
 #if __DARWIN_UNIX03
@@ -271,23 +283,21 @@ class SamplerThread : public Thread {
 #else
 #error Unsupported Mac OS X host architecture.
 #endif  // V8_HOST_ARCH
 
     if (thread_get_state(profiled_thread,
                          flavor,
                          reinterpret_cast<natural_t*>(&state),
                          &count) == KERN_SUCCESS) {
-      //sample->state = sampler->isolate()->current_vm_state();
       sample->pc = reinterpret_cast<Address>(state.REGISTER_FIELD(ip));
       sample->sp = reinterpret_cast<Address>(state.REGISTER_FIELD(sp));
       sample->fp = reinterpret_cast<Address>(state.REGISTER_FIELD(bp));
       sample->timestamp = mozilla::TimeStamp::Now();
-      sample->threadProfile = NULL;
-      sampler->SampleStack(sample);
+      sample->threadProfile = thread_profile;
       sampler->Tick(sample);
     }
     thread_resume(profiled_thread);
   }
 
   const int interval_;
   //RuntimeProfilerRateLimiter rate_limiter_;
 
@@ -295,36 +305,31 @@ class SamplerThread : public Thread {
   static Mutex* mutex_;
   static SamplerThread* instance_;
 
   DISALLOW_COPY_AND_ASSIGN(SamplerThread);
 };
 
 #undef REGISTER_FIELD
 
-
-Mutex* SamplerThread::mutex_ = OS::CreateMutex();
 SamplerThread* SamplerThread::instance_ = NULL;
 
-
 Sampler::Sampler(int interval, bool profiling, int entrySize)
     : // isolate_(isolate),
       interval_(interval),
       profiling_(profiling),
       paused_(false),
       active_(false),
       entrySize_(entrySize) /*,
       samples_taken_(0)*/ {
-  data_ = new PlatformData;
 }
 
 
 Sampler::~Sampler() {
   ASSERT(!IsActive());
-  delete data_;
 }
 
 
 void Sampler::Start() {
   ASSERT(!IsActive());
   SetActive(true);
   SamplerThread::AddActiveSampler(this);
 }
@@ -332,39 +337,61 @@ void Sampler::Start() {
 
 void Sampler::Stop() {
   ASSERT(IsActive());
   SetActive(false);
   SamplerThread::RemoveActiveSampler(this);
 }
 
 pthread_t
-Sampler::GetProfiledThread(Sampler::PlatformData* aData)
+Sampler::GetProfiledThread(PlatformData* aData)
 {
   return aData->profiled_pthread();
 }
 
+#include <sys/syscall.h>
+pid_t gettid()
+{
+  return (pid_t) syscall(SYS_thread_selfid);
+}
+
 bool Sampler::RegisterCurrentThread(const char* aName, PseudoStack* aPseudoStack, bool aIsMainThread)
 {
-  mozilla::MutexAutoLock lock(*sRegisteredThreadsMutex);
+  mozilla::MutexAutoLock lock(*Sampler::sRegisteredThreadsMutex);
+
+  ThreadInfo* info = new ThreadInfo(aName, gettid(),
+    aIsMainThread, aPseudoStack);
 
-  if (!aIsMainThread)
-    return false;
+  bool profileThread = sActiveSampler &&
+    (aIsMainThread || sActiveSampler->ProfileThreads());
 
-  ThreadInfo* info = new ThreadInfo(aName, 0, true, aPseudoStack);
-
-  if (sActiveSampler) {
+  if (profileThread) {
     // We need to create the ThreadProfile now
     info->SetProfile(new ThreadProfile(info->Name(),
                                        sActiveSampler->EntrySize(),
                                        info->Stack(),
                                        info->ThreadId(),
-                                       true));
+                                       info->GetPlatformData(),
+                                       aIsMainThread));
+    if (sActiveSampler->ProfileJS()) {
+      info->Profile()->GetPseudoStack()->enableJSSampling();
+    }
   }
 
   sRegisteredThreads->push_back(info);
   return true;
 }
 
 void Sampler::UnregisterCurrentThread()
 {
-  // We only have the main thread currently and that will never be unregistered
-}
\ No newline at end of file
+  mozilla::MutexAutoLock lock(*Sampler::sRegisteredThreadsMutex);
+
+  int id = gettid();
+
+  for (uint32_t i = 0; i < sRegisteredThreads->size(); i++) {
+    ThreadInfo* info = sRegisteredThreads->at(i);
+    if (info->ThreadId() == id) {
+      delete info;
+      sRegisteredThreads->erase(sRegisteredThreads->begin() + i);
+      break;
+    }
+  }
+}
--- a/tools/profiler/platform-win32.cc
+++ b/tools/profiler/platform-win32.cc
@@ -25,47 +25,59 @@
 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE.
 
 #include <windows.h>
 #include <mmsystem.h>
 #include <process.h>
 #include "platform.h"
-
+#include "TableTicker.h"
 #include "ProfileEntry.h"
 
-class Sampler::PlatformData : public Malloced {
+class PlatformData : public Malloced {
  public:
   // Get a handle to the calling thread. This is the thread that we are
   // going to profile. We need to make a copy of the handle because we are
   // going to use it in the sampler thread. Using GetThreadHandle() will
   // not work in this case. We're using OpenThread because DuplicateHandle
   // for some reason doesn't work in Chrome's sandbox.
-  PlatformData() : profiled_thread_(OpenThread(THREAD_GET_CONTEXT |
+  PlatformData(int aThreadId) : profiled_thread_(OpenThread(THREAD_GET_CONTEXT |
                                                THREAD_SUSPEND_RESUME |
                                                THREAD_QUERY_INFORMATION,
                                                false,
-                                               GetCurrentThreadId())) {}
+                                               aThreadId)) {}
 
   ~PlatformData() {
     if (profiled_thread_ != NULL) {
       CloseHandle(profiled_thread_);
       profiled_thread_ = NULL;
     }
   }
 
   HANDLE profiled_thread() { return profiled_thread_; }
 
  private:
   HANDLE profiled_thread_;
 };
 
+/* static */ PlatformData*
+Sampler::AllocPlatformData(int aThreadId)
+{
+  return new PlatformData(aThreadId);
+}
+
+/* static */ void
+Sampler::FreePlatformData(PlatformData* aData)
+{
+  delete aData;
+}
+
 uintptr_t
-Sampler::GetThreadHandle(Sampler::PlatformData* aData)
+Sampler::GetThreadHandle(PlatformData* aData)
 {
   return (uintptr_t) aData->profiled_thread();
 }
 
 class SamplerThread : public Thread {
  public:
   SamplerThread(int interval, Sampler* sampler)
       : Thread("SamplerThread")
@@ -92,41 +104,59 @@ class SamplerThread : public Thread {
 
     // By default we'll not adjust the timer resolution which tends to be around
     // 16ms. However, if the requested interval is sufficiently low we'll try to
     // adjust the resolution to match.
     if (interval_ < 10)
         ::timeBeginPeriod(interval_);
 
     while (sampler_->IsActive()) {
-      if (!sampler_->IsPaused())
-        SampleContext(sampler_);
+      {
+        mozilla::MutexAutoLock lock(*Sampler::sRegisteredThreadsMutex);
+        std::vector<ThreadInfo*> threads =
+          sampler_->GetRegisteredThreads();
+        for (uint32_t i = 0; i < threads.size(); i++) {
+          ThreadInfo* info = threads[i];
+
+          // This will be null if we're not interested in profiling this thread.
+          if (!info->Profile())
+            continue;
+
+          ThreadProfile* thread_profile = info->Profile();
+
+          if (!sampler_->IsPaused()) {
+            SampleContext(sampler_, thread_profile);
+          }
+        }
+      }
       OS::Sleep(interval_);
     }
 
     // disable any timer resolution changes we've made
     if (interval_ < 10)
         ::timeEndPeriod(interval_);
   }
 
-  void SampleContext(Sampler* sampler) {
-    HANDLE profiled_thread = sampler->platform_data()->profiled_thread();
+  void SampleContext(Sampler* sampler, ThreadProfile* thread_profile) {
+    uintptr_t thread = Sampler::GetThreadHandle(
+                               thread_profile->GetPlatformData());
+    HANDLE profiled_thread = reinterpret_cast<HANDLE>(thread);
     if (profiled_thread == NULL)
       return;
 
     // Context used for sampling the register state of the profiled thread.
     CONTEXT context;
     memset(&context, 0, sizeof(context));
 
     TickSample sample_obj;
     TickSample* sample = &sample_obj;
 
     // Grab the timestamp before pausing the thread, to avoid deadlocks.
     sample->timestamp = mozilla::TimeStamp::Now();
-    sample->threadProfile = NULL;
+    sample->threadProfile = thread_profile;
 
     static const DWORD kSuspendFailed = static_cast<DWORD>(-1);
     if (SuspendThread(profiled_thread) == kSuspendFailed)
       return;
 
     context.ContextFlags = CONTEXT_CONTROL;
     if (GetThreadContext(profiled_thread, &context) != 0) {
 #if V8_HOST_ARCH_X64
@@ -134,17 +164,16 @@ class SamplerThread : public Thread {
       sample->sp = reinterpret_cast<Address>(context.Rsp);
       sample->fp = reinterpret_cast<Address>(context.Rbp);
 #else
       sample->pc = reinterpret_cast<Address>(context.Eip);
       sample->sp = reinterpret_cast<Address>(context.Esp);
       sample->fp = reinterpret_cast<Address>(context.Ebp);
 #endif
       sample->context = &context;
-      sampler->SampleStack(sample);
       sampler->Tick(sample);
     }
     ResumeThread(profiled_thread);
   }
 
   Sampler* sampler_;
   const int interval_;
 
@@ -157,23 +186,21 @@ class SamplerThread : public Thread {
 SamplerThread* SamplerThread::instance_ = NULL;
 
 
 Sampler::Sampler(int interval, bool profiling, int entrySize)
     : interval_(interval),
       profiling_(profiling),
       paused_(false),
       active_(false),
-      entrySize_(entrySize),
-      data_(new PlatformData) {
+      entrySize_(entrySize) {
 }
 
 Sampler::~Sampler() {
   ASSERT(!IsActive());
-  delete data_;
 }
 
 void Sampler::Start() {
   ASSERT(!IsActive());
   SetActive(true);
   SamplerThread::StartSampler(this);
 }
 
@@ -187,84 +214,92 @@ void Sampler::Stop() {
 static const HANDLE kNoThread = INVALID_HANDLE_VALUE;
 
 static unsigned int __stdcall ThreadEntry(void* arg) {
   Thread* thread = reinterpret_cast<Thread*>(arg);
   thread->Run();
   return 0;
 }
 
-class Thread::PlatformData : public Malloced {
- public:
-  explicit PlatformData(HANDLE thread) : thread_(thread) {}
-  HANDLE thread_;
-  unsigned thread_id_;
-};
-
 // Initialize a Win32 thread object. The thread has an invalid thread
 // handle until it is started.
 Thread::Thread(const char* name)
     : stack_size_(0) {
-  data_ = new PlatformData(kNoThread);
+  thread_ = kNoThread;
   set_name(name);
 }
 
 void Thread::set_name(const char* name) {
   strncpy(name_, name, sizeof(name_));
   name_[sizeof(name_) - 1] = '\0';
 }
 
 // Close our own handle for the thread.
 Thread::~Thread() {
-  if (data_->thread_ != kNoThread) CloseHandle(data_->thread_);
-  delete data_;
+  if (thread_ != kNoThread) CloseHandle(thread_);
 }
 
 // Create a new thread. It is important to use _beginthreadex() instead of
 // the Win32 function CreateThread(), because the CreateThread() does not
 // initialize thread specific structures in the C runtime library.
 void Thread::Start() {
-  data_->thread_ = reinterpret_cast<HANDLE>(
+  thread_ = reinterpret_cast<HANDLE>(
       _beginthreadex(NULL,
                      static_cast<unsigned>(stack_size_),
                      ThreadEntry,
                      this,
                      0,
-                     &data_->thread_id_));
+                     &thread_id_));
 }
 
 // Wait for thread to terminate.
 void Thread::Join() {
-  if (data_->thread_id_ != GetCurrentThreadId()) {
-    WaitForSingleObject(data_->thread_, INFINITE);
+  if (thread_id_ != GetCurrentThreadId()) {
+    WaitForSingleObject(thread_, INFINITE);
   }
 }
 
 void OS::Sleep(int milliseconds) {
   ::Sleep(milliseconds);
 }
 
 bool Sampler::RegisterCurrentThread(const char* aName, PseudoStack* aPseudoStack, bool aIsMainThread)
 {
-  mozilla::MutexAutoLock lock(*sRegisteredThreadsMutex);
+  mozilla::MutexAutoLock lock(*Sampler::sRegisteredThreadsMutex);
+
+  ThreadInfo* info = new ThreadInfo(aName, GetCurrentThreadId(),
+    aIsMainThread, aPseudoStack);
 
-  if (!aIsMainThread)
-    return false;
+  bool profileThread = sActiveSampler &&
+    (aIsMainThread || sActiveSampler->ProfileThreads());
 
-  ThreadInfo* info = new ThreadInfo(aName, 0, true, aPseudoStack);
-
-  if (sActiveSampler) {
+  if (profileThread) {
     // We need to create the ThreadProfile now
     info->SetProfile(new ThreadProfile(info->Name(),
                                        sActiveSampler->EntrySize(),
                                        info->Stack(),
-                                       info->ThreadId(),
-                                       true));
+                                       GetCurrentThreadId(),
+                                       info->GetPlatformData(),
+                                       aIsMainThread));
+    if (sActiveSampler->ProfileJS()) {
+      info->Profile()->GetPseudoStack()->enableJSSampling();
+    }
   }
 
   sRegisteredThreads->push_back(info);
   return true;
 }
 
 void Sampler::UnregisterCurrentThread()
 {
-  // We only have the main thread currently and that will never be unregistered
-}
\ No newline at end of file
+  mozilla::MutexAutoLock lock(*Sampler::sRegisteredThreadsMutex);
+
+  int id = GetCurrentThreadId();
+
+  for (uint32_t i = 0; i < sRegisteredThreads->size(); i++) {
+    ThreadInfo* info = sRegisteredThreads->at(i);
+    if (info->ThreadId() == id) {
+      delete info;
+      sRegisteredThreads->erase(sRegisteredThreads->begin() + i);
+      break;
+    }
+  }
+}
--- a/tools/profiler/platform.cpp
+++ b/tools/profiler/platform.cpp
@@ -23,42 +23,60 @@
 mozilla::ThreadLocal<PseudoStack *> tlsPseudoStack;
 mozilla::ThreadLocal<TableTicker *> tlsTicker;
 // We need to track whether we've been initialized otherwise
 // we end up using tlsStack without initializing it.
 // Because tlsStack is totally opaque to us we can't reuse
 // it as the flag itself.
 bool stack_key_initialized;
 
-TimeStamp sLastTracerEvent; // is raced on
-int       sFrameNumber = 0;
-int       sLastFrameNumber = 0;
+TimeStamp   sLastTracerEvent; // is raced on
+int         sFrameNumber = 0;
+int         sLastFrameNumber = 0;
+static bool sIsProfiling = false; // is raced on
 
 /* used to keep track of the last event that we sampled during */
 unsigned int sLastSampledEventGeneration = 0;
 
 /* a counter that's incremented everytime we get responsiveness event
  * note: it might also be worth trackplaing everytime we go around
  * the event loop */
 unsigned int sCurrentEventGeneration = 0;
 /* we don't need to worry about overflow because we only treat the
  * case of them being the same as special. i.e. we only run into
  * a problem if 2^32 events happen between samples that we need
  * to know are associated with different events */
 
-std::vector<ThreadInfo*>* Sampler::sRegisteredThreads = new std::vector<ThreadInfo*>();
-mozilla::Mutex* Sampler::sRegisteredThreadsMutex = new mozilla::Mutex("sRegisteredThreads mutex");
+std::vector<ThreadInfo*>* Sampler::sRegisteredThreads = nullptr;
+mozilla::Mutex* Sampler::sRegisteredThreadsMutex = nullptr;
+
+TableTicker* Sampler::sActiveSampler;
+
+void Sampler::Startup() {
+  sRegisteredThreads = new std::vector<ThreadInfo*>();
+  sRegisteredThreadsMutex = new mozilla::Mutex("sRegisteredThreads mutex");
+}
 
-Sampler* Sampler::sActiveSampler;
+void Sampler::Shutdown() {
+  while (sRegisteredThreads->size() > 0) {
+    delete sRegisteredThreads->back();
+    sRegisteredThreads->pop_back();
+  }
+
+  delete sRegisteredThreadsMutex;
+  delete sRegisteredThreads;
+}
 
 ThreadInfo::~ThreadInfo() {
   free(mName);
 
   if (mProfile)
     delete mProfile;
+
+  Sampler::FreePlatformData(mPlatformData);
 }
 
 bool sps_version2()
 {
   static int version = 0; // Raced on, potentially
 
   if (version == 0) {
     bool allow2 = false; // Is v2 allowable on this platform?
@@ -230,19 +248,23 @@ void mozilla_sampler_init()
 
   LOG("BEGIN mozilla_sampler_init");
   if (!tlsPseudoStack.init() || !tlsTicker.init()) {
     LOG("Failed to init.");
     return;
   }
   stack_key_initialized = true;
 
+  Sampler::Startup();
+
   PseudoStack *stack = new PseudoStack();
   tlsPseudoStack.set(stack);
 
+  Sampler::RegisterCurrentThread("Gecko", stack, true);
+
   if (sps_version2()) {
     // Read mode settings from MOZ_PROFILER_MODE and interval
     // settings from MOZ_PROFILER_INTERVAL and stack-scan threshhold
     // from MOZ_PROFILER_STACK_SCAN.
     read_profiler_env_vars();
 
     // Create the unwinder thread.  ATM there is only one.
     uwt__init();
@@ -301,19 +323,20 @@ void mozilla_sampler_shutdown()
   // Shut down and reap the unwinder thread.  We have to do this
   // before stopping the sampler, so as to guarantee that the unwinder
   // thread doesn't try to access memory that the subsequent call to
   // mozilla_sampler_stop causes to be freed.
   if (sps_version2()) {
     uwt__deinit();
   }
 
-  Sampler::FreeRegisteredThreads();
+  profiler_stop();
 
-  profiler_stop();
+  Sampler::Shutdown();
+
   // We can't delete the Stack because we can be between a
   // sampler call_enter/call_exit point.
   // TODO Need to find a safe time to delete Stack
 }
 
 void mozilla_sampler_save()
 {
   TableTicker *t = tlsTicker.get();
@@ -362,16 +385,17 @@ const char** mozilla_sampler_get_feature
 #if defined(MOZ_PROFILING) && defined(HAVE_NATIVE_UNWIND)
     "stackwalk",
 #endif
 #if defined(ENABLE_SPS_LEAF_DATA)
     "leaf",
 #endif
     "jank",
     "js",
+    "threads",
     NULL
   };
 
   return features;
 }
 
 // Values are only honored on the first start
 void mozilla_sampler_start(int aProfileEntries, int aInterval,
@@ -393,26 +417,39 @@ void mozilla_sampler_start(int aProfileE
 
   // Reset the current state if the profiler is running
   profiler_stop();
 
   TableTicker* t;
   if (sps_version2()) {
     t = new BreakpadSampler(aInterval ? aInterval : PROFILE_DEFAULT_INTERVAL,
                            aProfileEntries ? aProfileEntries : PROFILE_DEFAULT_ENTRY,
-                           stack, aFeatures, aFeatureCount);
+                           aFeatures, aFeatureCount);
   } else {
     t = new TableTicker(aInterval ? aInterval : PROFILE_DEFAULT_INTERVAL,
                         aProfileEntries ? aProfileEntries : PROFILE_DEFAULT_ENTRY,
-                        stack, aFeatures, aFeatureCount);
+                        aFeatures, aFeatureCount);
   }
   tlsTicker.set(t);
   t->Start();
-  if (t->ProfileJS())
-      stack->enableJSSampling();
+  if (t->ProfileJS()) {
+      mozilla::MutexAutoLock lock(*Sampler::sRegisteredThreadsMutex);
+      std::vector<ThreadInfo*> threads = t->GetRegisteredThreads();
+
+      for (uint32_t i = 0; i < threads.size(); i++) {
+        ThreadInfo* info = threads[i];
+        ThreadProfile* thread_profile = info->Profile();
+        if (!thread_profile) {
+          continue;
+        }
+        thread_profile->GetPseudoStack()->enableJSSampling();
+      }
+  }
+
+  sIsProfiling = true;
 
   nsCOMPtr<nsIObserverService> os = mozilla::services::GetObserverService();
   if (os)
     os->NotifyObservers(nullptr, "profiler-started", nullptr);
 }
 
 void mozilla_sampler_stop()
 {
@@ -430,32 +467,26 @@ void mozilla_sampler_stop()
   delete t;
   tlsTicker.set(NULL);
   PseudoStack *stack = tlsPseudoStack.get();
   ASSERT(stack != NULL);
 
   if (disableJS)
     stack->disableJSSampling();
 
+  sIsProfiling = false;
+
   nsCOMPtr<nsIObserverService> os = mozilla::services::GetObserverService();
   if (os)
     os->NotifyObservers(nullptr, "profiler-stopped", nullptr);
 }
 
 bool mozilla_sampler_is_active()
 {
-  if (!stack_key_initialized)
-    profiler_init();
-
-  TableTicker *t = tlsTicker.get();
-  if (!t) {
-    return false;
-  }
-
-  return t->IsActive();
+  return sIsProfiling;
 }
 
 static double sResponsivenessTimes[100];
 static unsigned int sResponsivenessLoc = 0;
 void mozilla_sampler_responsiveness(const TimeStamp& aTime)
 {
   if (!sLastTracerEvent.IsNull()) {
     if (sResponsivenessLoc == 100) {
--- a/tools/profiler/platform.h
+++ b/tools/profiler/platform.h
@@ -30,24 +30,33 @@
 #define TOOLS_PLATFORM_H_
 
 #ifdef ANDROID
 #include <android/log.h>
 #else
 #define __android_log_print(a, ...)
 #endif
 
+#ifdef XP_UNIX
+#include <pthread.h>
+#endif
+
 #include "mozilla/StandardInteger.h"
 #include "mozilla/Util.h"
 #include "mozilla/unused.h"
 #include "mozilla/TimeStamp.h"
+#include "mozilla/Mutex.h"
 #include "PlatformMacros.h"
 #include "v8-support.h"
 #include <vector>
 
+#ifdef XP_WIN
+#include <windows.h>
+#endif
+
 #define ASSERT(a) MOZ_ASSERT(a)
 
 #ifdef ANDROID
 # if defined(__arm__) || defined(__thumb__)
 #  define ENABLE_SPS_LEAF_DATA
 #  define ENABLE_ARM_LR_SAVING
 # endif
 # define LOG(text) \
@@ -178,24 +187,27 @@ class Thread {
 
   // Abstract method for run handler.
   virtual void Run() = 0;
 
   // The thread name length is limited to 16 based on Linux's implementation of
   // prctl().
   static const int kMaxThreadNameLength = 16;
 
-  class PlatformData;
-  PlatformData* data() { return data_; }
+#ifdef XP_WIN
+  HANDLE thread_;
+  unsigned thread_id_;
+#endif
+#if defined(XP_MACOSX)
+  pthread_t thread_;
+#endif
 
  private:
   void set_name(const char *name);
 
-  PlatformData* data_;
-
   char name_[kMaxThreadNameLength];
   int stack_size_;
 
   DISALLOW_COPY_AND_ASSIGN(Thread);
 };
 
 // ----------------------------------------------------------------------------
 // HAVE_NATIVE_UNWIND
@@ -259,55 +271,27 @@ class TickSample {
   void*   context;   // The context from the signal handler, if available. On
                      // Win32 this may contain the windows thread context.
   ThreadProfile* threadProfile;
   static const int kMaxFramesCount = 64;
   int frames_count;  // Number of captured frames.
   mozilla::TimeStamp timestamp;
 };
 
-class ThreadInfo {
- public:
-  ThreadInfo(const char* aName, int aThreadId, bool aIsMainThread, PseudoStack* aPseudoStack)
-    : mName(strdup(aName))
-    , mThreadId(aThreadId)
-    , mIsMainThread(aIsMainThread)
-    , mPseudoStack(aPseudoStack)
-    , mProfile(NULL) {}
-
-  virtual ~ThreadInfo();
-
-  const char* Name() const { return mName; }
-  int ThreadId() const { return mThreadId; }
-
-  bool IsMainThread() const { return mIsMainThread; }
-  PseudoStack* Stack() const { return mPseudoStack; }
-  
-  void SetProfile(ThreadProfile* aProfile) { mProfile = aProfile; }
-  ThreadProfile* Profile() const { return mProfile; }
-
- private:
-  char* mName;
-  int mThreadId;
-  const bool mIsMainThread;
-  PseudoStack* mPseudoStack;
-  ThreadProfile* mProfile;
-};
-
+class ThreadInfo;
+class PlatformData;
+class TableTicker;
 class Sampler {
  public:
   // Initialize sampler.
   explicit Sampler(int interval, bool profiling, int entrySize);
   virtual ~Sampler();
 
   int interval() const { return interval_; }
 
-  // Performs stack sampling.
-  virtual void SampleStack(TickSample* sample) = 0;
-
   // This method is called for each sampling period with the current
   // program counter.
   virtual void Tick(TickSample* sample) = 0;
 
   // Request a save from a signal handler
   virtual void RequestSave() = 0;
   // Process any outstanding request outside a signal handler.
   virtual void HandleSaveRequest() = 0;
@@ -321,63 +305,97 @@ class Sampler {
 
   // Whether the sampler is running (that is, consumes resources).
   bool IsActive() const { return active_; }
 
   // Low overhead way to stop the sampler from ticking
   bool IsPaused() const { return paused_; }
   void SetPaused(bool value) { NoBarrier_Store(&paused_, value); }
 
+  virtual bool ProfileThreads() const = 0;
+
   int EntrySize() { return entrySize_; }
 
-  class PlatformData;
-
-  PlatformData* platform_data() { return data_; }
+  // We can't new/delete the type safely without defining it
+  // (-Wdelete-incomplete). Use these Alloc/Free functions instead.
+  static PlatformData* AllocPlatformData(int aThreadId);
+  static void FreePlatformData(PlatformData*);
 
   // If we move the backtracing code into the platform files we won't
   // need to have these hacks
 #ifdef XP_WIN
   // xxxehsan sucky hack :(
   static uintptr_t GetThreadHandle(PlatformData*);
 #endif
 #ifdef XP_MACOSX
   static pthread_t GetProfiledThread(PlatformData*);
 #endif
 
   static std::vector<ThreadInfo*> GetRegisteredThreads() {
-    mozilla::MutexAutoLock lock(*sRegisteredThreadsMutex);
-
     return *sRegisteredThreads;
   }
 
   static bool RegisterCurrentThread(const char* aName, PseudoStack* aPseudoStack, bool aIsMainThread);
   static void UnregisterCurrentThread();
 
+  static void Startup();
   // Should only be called on shutdown
-  static void FreeRegisteredThreads() {
-    while (sRegisteredThreads->size() > 0) {
-      sRegisteredThreads->pop_back();
-    }
+  static void Shutdown();
 
-    delete sRegisteredThreadsMutex;
-    delete sRegisteredThreads;
-  }
+  static TableTicker* GetActiveSampler() { return sActiveSampler; }
+  static void SetActiveSampler(TableTicker* sampler) { sActiveSampler = sampler; }
 
-  static Sampler* GetActiveSampler() { return sActiveSampler; }
-  static void SetActiveSampler(Sampler* sampler) { sActiveSampler = sampler; }
-
+  static mozilla::Mutex* sRegisteredThreadsMutex;
  protected:
   static std::vector<ThreadInfo*>* sRegisteredThreads;
-  static mozilla::Mutex* sRegisteredThreadsMutex;
-  static Sampler* sActiveSampler;
+  static TableTicker* sActiveSampler;
 
  private:
   void SetActive(bool value) { NoBarrier_Store(&active_, value); }
 
   const int interval_;
   const bool profiling_;
   Atomic32 paused_;
   Atomic32 active_;
   const int entrySize_;
-  PlatformData* data_;  // Platform specific data.
+
+  // Refactor me!
+#if defined(SPS_OS_linux) || defined(SPS_OS_android)
+  bool signal_handler_installed_;
+  struct sigaction old_sigprof_signal_handler_;
+  struct sigaction old_sigsave_signal_handler_;
+  bool signal_sender_launched_;
+  pthread_t signal_sender_thread_;
+#endif
+};
+
+class ThreadInfo {
+ public:
+  ThreadInfo(const char* aName, int aThreadId, bool aIsMainThread, PseudoStack* aPseudoStack)
+    : mName(strdup(aName))
+    , mThreadId(aThreadId)
+    , mIsMainThread(aIsMainThread)
+    , mPseudoStack(aPseudoStack)
+    , mPlatformData(Sampler::AllocPlatformData(aThreadId))
+    , mProfile(NULL) {}
+
+  virtual ~ThreadInfo();
+
+  const char* Name() const { return mName; }
+  int ThreadId() const { return mThreadId; }
+
+  bool IsMainThread() const { return mIsMainThread; }
+  PseudoStack* Stack() const { return mPseudoStack; }
+  
+  void SetProfile(ThreadProfile* aProfile) { mProfile = aProfile; }
+  ThreadProfile* Profile() const { return mProfile; }
+
+  PlatformData* GetPlatformData() const { return mPlatformData; }
+ private:
+  char* mName;
+  int mThreadId;
+  const bool mIsMainThread;
+  PseudoStack* mPseudoStack;
+  PlatformData* mPlatformData;
+  ThreadProfile* mProfile;
 };
 
 #endif /* ndef TOOLS_PLATFORM_H_ */