Bug 1362894 - Make profiler_call_{enter,exit} |inline|. r=mstange.
authorNicholas Nethercote <nnethercote@mozilla.com>
Wed, 10 May 2017 20:13:21 +1000
changeset 406660 17c6f55e1bdc5c931f12c9893b76cd77ecf39918
parent 406659 30813e40f36c5e5227f429d283283529bbab8743
child 406661 cd5ae949a6b9a2b28f46e4aff033468cf91f7ae5
push id7391
push usermtabara@mozilla.com
push dateMon, 12 Jun 2017 13:08:53 +0000
treeherdermozilla-beta@2191d7f87e2e [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersmstange
bugs1362894, 1359000
milestone55.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1362894 - Make profiler_call_{enter,exit} |inline|. r=mstange. Bug 1359000 moved these functions from GeckoProfiler.h to platform.cpp, which allowed a lot of follow-up simplifications. But it hurt performance. This patch moves them back to GeckoProfiler.h and makes them |inline| again. This required adding a second TLS pointer, sPseudoStack. Comments in the patch explain why.
tools/profiler/core/platform.cpp
tools/profiler/public/GeckoProfiler.h
tools/profiler/public/PseudoStack.h
--- a/tools/profiler/core/platform.cpp
+++ b/tools/profiler/core/platform.cpp
@@ -475,39 +475,69 @@ uint32_t ActivePS::sNextGeneration = 0;
 // The mutex that guards accesses to CorePS and ActivePS.
 static PSMutex gPSMutex;
 
 // Each live thread has a ThreadInfo, and we store a reference to it in TLS.
 // This class encapsulates that TLS.
 class TLSInfo
 {
 public:
-  static bool Init(PSLockRef) { return sThreadInfo.init(); }
+  static bool Init(PSLockRef)
+  {
+    bool ok1 = sThreadInfo.init();
+    bool ok2 = sPseudoStack.init();
+    return ok1 && ok2;
+  }
 
   // Get the entire ThreadInfo. Accesses are guarded by gPSMutex.
   static ThreadInfo* Info(PSLockRef) { return sThreadInfo.get(); }
 
   // Get only the RacyThreadInfo. Accesses are not guarded by gPSMutex.
   static RacyThreadInfo* RacyInfo()
   {
     ThreadInfo* info = sThreadInfo.get();
     return info ? info->RacyInfo().get() : nullptr;
   }
 
-  static void SetInfo(PSLockRef, ThreadInfo* aInfo) { sThreadInfo.set(aInfo); }
+  // Get only the PseudoStack. Accesses are not guarded by gPSMutex. RacyInfo()
+  // can also be used to get the PseudoStack, but that is marginally slower
+  // because it requires an extra pointer indirection.
+  static PseudoStack* Stack() { return sPseudoStack.get(); }
+
+  static void SetInfo(PSLockRef, ThreadInfo* aInfo)
+  {
+    sThreadInfo.set(aInfo);
+    sPseudoStack.set(aInfo ? aInfo->RacyInfo().get() : nullptr);  // an upcast
+  }
 
 private:
   // This is a non-owning reference to the ThreadInfo; CorePS::mLiveThreads is
   // the owning reference. On thread destruction, this reference is cleared and
   // the ThreadInfo is destroyed or transferred to CorePS::mDeadThreads.
   static MOZ_THREAD_LOCAL(ThreadInfo*) sThreadInfo;
 };
 
 MOZ_THREAD_LOCAL(ThreadInfo*) TLSInfo::sThreadInfo;
 
+// Although you can access a thread's PseudoStack via TLSInfo::sThreadInfo, we
+// also have a second TLS pointer directly to the PseudoStack. Here's why.
+//
+// - We need to be able to push to and pop from the PseudoStack in
+//   profiler_call_{enter,exit}.
+//
+// - Those two functions are hot and must be defined in GeckoProfiler.h so they
+//   can be inlined.
+//
+// - We don't want to expose TLSInfo (and ThreadInfo) in GeckoProfiler.h.
+//
+// This second pointer isn't ideal, but does provide a way to satisfy those
+// constraints. TLSInfo manages it, except for the uses in
+// profiler_call_{enter,exit}.
+MOZ_THREAD_LOCAL(PseudoStack*) sPseudoStack;
+
 // The name of the main thread.
 static const char* const kMainThreadName = "GeckoMain";
 
 static bool
 CanNotifyObservers()
 {
   MOZ_RELEASE_ASSERT(NS_IsMainThread());
 
@@ -2773,25 +2803,25 @@ profiler_get_backtrace_noalloc(char *out
   output[0] = output[1] = '\0';
 
   PSAutoLock lock(gPSMutex);
 
   if (!ActivePS::Exists(lock)) {
     return;
   }
 
-  RacyThreadInfo* racyInfo = TLSInfo::RacyInfo();
-  if (!racyInfo) {
+  PseudoStack* pseudoStack = TLSInfo::Stack();
+  if (!pseudoStack) {
     return;
   }
 
   bool includeDynamicString = !ActivePS::FeaturePrivacy(lock);
 
-  volatile js::ProfileEntry* pseudoFrames = racyInfo->mStack;
-  uint32_t pseudoCount = racyInfo->stackSize();
+  volatile js::ProfileEntry* pseudoFrames = pseudoStack->mStack;
+  uint32_t pseudoCount = pseudoStack->stackSize();
 
   for (uint32_t i = 0; i < pseudoCount; i++) {
     const char* label = pseudoFrames[i].label();
     const char* dynamicString =
       includeDynamicString ? pseudoFrames[i].getDynamicString() : nullptr;
     size_t labelLength = strlen(label);
     if (dynamicString) {
       // Put the label, a space, and the dynamic string into output.
@@ -2906,17 +2936,17 @@ profiler_log(const char* aStr)
   profiler_tracing("log", aStr);
 }
 
 PseudoStack*
 profiler_get_pseudo_stack()
 {
   // This function runs both on and off the main thread.
 
-  return TLSInfo::RacyInfo();
+  return TLSInfo::Stack();
 }
 
 void
 profiler_set_js_context(JSContext* aCx)
 {
   // This function runs both on and off the main thread.
 
   MOZ_ASSERT(aCx);
@@ -2957,55 +2987,16 @@ profiler_clear_js_context()
   }
 
   // We don't call info->StopJSSampling() here; there's no point doing that for
   // a JS thread that is in the process of disappearing.
 
   info->mContext = nullptr;
 }
 
-// A short-lived, non-owning PseudoStack reference is created between each
-// profiler_call_enter() / profiler_call_exit() call pair. RAII objects (e.g.
-// SamplerStackFrameRAII) ensure that these calls are balanced. Furthermore,
-// the RAII objects exist within the thread itself, which means they are
-// necessarily bounded by the lifetime of the thread, which ensures that the
-// references held can't be used after the PseudoStack is destroyed.
-void*
-profiler_call_enter(const char* aInfo,
-                    js::ProfileEntry::Category aCategory,
-                    void* aFrameAddress, bool aCopy, uint32_t aLine,
-                    const char* aDynamicString)
-{
-  // This function runs both on and off the main thread.
-
-  PseudoStack* pseudoStack = TLSInfo::RacyInfo();   // an upcast
-  if (!pseudoStack) {
-    return pseudoStack;
-  }
-  pseudoStack->push(aInfo, aCategory, aFrameAddress, aCopy, aLine,
-                    aDynamicString);
-
-  // The handle is meant to support future changes but for now it is simply
-  // used to avoid having to call TLSInfo::RacyInfo() in profiler_call_exit().
-  return pseudoStack;
-}
-
-void
-profiler_call_exit(void* aHandle)
-{
-  // This function runs both on and off the main thread.
-
-  if (!aHandle) {
-    return;
-  }
-
-  PseudoStack* pseudoStack = static_cast<PseudoStack*>(aHandle);
-  pseudoStack->pop();
-}
-
 void*
 profiler_get_stack_top()
 {
   PSAutoLock lock(gPSMutex);
   ThreadInfo* threadInfo = FindLiveThreadInfo(lock);
   if (threadInfo) {
     return threadInfo->StackTop();
   }
--- a/tools/profiler/public/GeckoProfiler.h
+++ b/tools/profiler/public/GeckoProfiler.h
@@ -23,16 +23,17 @@
 #include <stdint.h>
 #include <stdarg.h>
 
 #include "mozilla/Assertions.h"
 #include "mozilla/Attributes.h"
 #include "js/TypeDecls.h"
 #include "mozilla/GuardObjects.h"
 #include "mozilla/UniquePtr.h"
+#include "PseudoStack.h"
 
 class SpliceableJSONWriter;
 
 namespace mozilla {
 class MallocAllocPolicy;
 class TimeStamp;
 template <class T,
           size_t MinInlineCapacity,
@@ -395,25 +396,62 @@ PROFILER_FUNC(void* profiler_get_stack_t
 
 // Make sure that we can use std::min here without the Windows headers messing with us.
 #ifdef min
 # undef min
 #endif
 
 class nsISupports;
 class ProfilerMarkerPayload;
-class PseudoStack;
+
+// This exists purely for profiler_call_{enter,exit}. See the comment on the
+// definition in platform.cpp for details.
+extern MOZ_THREAD_LOCAL(PseudoStack*) sPseudoStack;
 
 // Returns a handle to pass on exit. This can check that we are popping the
 // correct callstack. Operates the same whether the profiler is active or not.
-void* profiler_call_enter(const char* aInfo,
-                          js::ProfileEntry::Category aCategory,
-                          void* aFrameAddress, bool aCopy, uint32_t aLine,
-                          const char* aDynamicString = nullptr);
-void profiler_call_exit(void* aHandle);
+//
+// A short-lived, non-owning PseudoStack reference is created between each
+// profiler_call_enter() / profiler_call_exit() call pair. RAII objects (e.g.
+// SamplerStackFrameRAII) ensure that these calls are balanced. Furthermore,
+// the RAII objects exist within the thread itself, which means they are
+// necessarily bounded by the lifetime of the thread, which ensures that the
+// references held can't be used after the PseudoStack is destroyed.
+inline void*
+profiler_call_enter(const char* aInfo,
+                    js::ProfileEntry::Category aCategory,
+                    void* aFrameAddress, bool aCopy, uint32_t aLine,
+                    const char* aDynamicString = nullptr)
+{
+  // This function runs both on and off the main thread.
+
+  PseudoStack* pseudoStack = sPseudoStack.get();
+  if (!pseudoStack) {
+    return pseudoStack;
+  }
+  pseudoStack->push(aInfo, aCategory, aFrameAddress, aCopy, aLine,
+                    aDynamicString);
+
+  // The handle is meant to support future changes but for now it is simply
+  // used to avoid having to call TLSInfo::RacyInfo() in profiler_call_exit().
+  return pseudoStack;
+}
+
+inline void
+profiler_call_exit(void* aHandle)
+{
+  // This function runs both on and off the main thread.
+
+  if (!aHandle) {
+    return;
+  }
+
+  PseudoStack* pseudoStack = static_cast<PseudoStack*>(aHandle);
+  pseudoStack->pop();
+}
 
 // Adds a marker to the PseudoStack. A no-op if the profiler is inactive or in
 // privacy mode.
 void profiler_add_marker(const char *aMarker,
                          ProfilerMarkerPayload *aPayload = nullptr);
 
 #define PROFILER_APPEND_LINE_NUMBER_PASTE(id, line) id ## line
 #define PROFILER_APPEND_LINE_NUMBER_EXPAND(id, line) \
--- a/tools/profiler/public/PseudoStack.h
+++ b/tools/profiler/public/PseudoStack.h
@@ -5,16 +5,17 @@
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #ifndef PseudoStack_h
 #define PseudoStack_h
 
 #include "mozilla/ArrayUtils.h"
 #include "js/ProfilingStack.h"
 #include "StoreSequencer.h"
+#include "nsISupportsImpl.h"  // for MOZ_COUNT_{CTOR,DTOR}
 
 #include <stdlib.h>
 #include <stdint.h>
 
 #include <algorithm>
 
 // The PseudoStack members are read by signal handlers, so the mutation of them
 // needs to be signal-safe.