Bug 1716959 - On-thread sampling uses a per-thread JS frame buffer that's only allocated when there's also a JSContext - r=canaltinova
authorGerald Squelart <gsquelart@mozilla.com>
Wed, 25 Aug 2021 00:55:59 +0000
changeset 589855 9fb7bba8eec3aa600363ca080dbdeb0b5b42ade0
parent 589854 2772adb20068e217426b0c6ed4a284b1574ff858
child 589856 e1e1fb107a1a8fd3bfb9baa98c64ef000dc2d5e8
push id148586
push usergsquelart@mozilla.com
push dateWed, 25 Aug 2021 01:12:58 +0000
treeherderautoland@c8933e49a64b [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerscanaltinova
bugs1716959
milestone93.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1716959 - On-thread sampling uses a per-thread JS frame buffer that's only allocated when there's also a JSContext - r=canaltinova MergeStack requires a fairly large buffer to store JS frames, too big to be allocated on the stack without risking a stack overflow. Until now, there was only one buffer, stored in CorePS, and only accessible while holding the Profiler gPSMutex. Now each thread that has a JSContext, also has its own JS frame buffer, which is accessible on the thread without needing any lock. The Profiler's Sampler still uses the CorePS buffer for its periodic sampling, but it won't prevent parallel on-thread sampling anymore. The appropriate buffer is passed to ExtractJsFrames and then MergeStacks. MergeStacks accepts a null pointer, which happens on threads where there is no JSContext, and therefore no JS to sample. Differential Revision: https://phabricator.services.mozilla.com/D122087
tools/profiler/core/ProfilerThreadRegistrationData.cpp
tools/profiler/core/platform.cpp
tools/profiler/public/ProfilerThreadRegistrationData.h
tools/profiler/tests/gtest/GeckoProfiler.cpp
--- a/tools/profiler/core/ProfilerThreadRegistrationData.cpp
+++ b/tools/profiler/core/ProfilerThreadRegistrationData.cpp
@@ -111,32 +111,49 @@ void ThreadRegistrationLockedRWFromAnyTh
   // Check invariants.
   MOZ_ASSERT(mIsBeingProfiled == !!mProfiledThreadData);
 }
 
 void ThreadRegistrationLockedRWFromAnyThread::
     ClearIsBeingProfiledAndProfiledThreadData(const PSAutoLock&) {
   mIsBeingProfiled = false;
   mProfiledThreadData = nullptr;
+
   // Check invariants.
   MOZ_ASSERT(mIsBeingProfiled == !!mProfiledThreadData);
 }
 
 void ThreadRegistrationLockedRWOnThread::SetJSContext(JSContext* aJSContext) {
   MOZ_ASSERT(aJSContext && !mJSContext);
 
   mJSContext = aJSContext;
 
+  // We now have a JSContext, allocate a JsFramesBuffer to allow unlocked
+  // on-thread sampling.
+  MOZ_ASSERT(!mJsFrameBuffer);
+  mJsFrameBuffer = new JsFrame[MAX_JS_FRAMES];
+
   // We give the JS engine a non-owning reference to the ProfilingStack. It's
   // important that the JS engine doesn't touch this once the thread dies.
   js::SetContextProfilingStack(aJSContext, &ProfilingStackRef());
+
+  // Check invariants.
+  MOZ_ASSERT(!!mJSContext == !!mJsFrameBuffer);
 }
 
 void ThreadRegistrationLockedRWOnThread::ClearJSContext() {
   mJSContext = nullptr;
+
+  if (mJsFrameBuffer) {
+    delete[] mJsFrameBuffer;
+    mJsFrameBuffer = nullptr;
+  }
+
+  // Check invariants.
+  MOZ_ASSERT(!!mJSContext == !!mJsFrameBuffer);
 }
 
 void ThreadRegistrationLockedRWOnThread::PollJSSampling() {
   // We can't start/stop profiling until we have the thread's JSContext.
   if (mJSContext) {
     // It is possible for mJSSampling to go through the following sequences.
     //
     // - INACTIVE, ACTIVE_REQUESTED, INACTIVE_REQUESTED, INACTIVE
--- a/tools/profiler/core/platform.cpp
+++ b/tools/profiler/core/platform.cpp
@@ -332,18 +332,20 @@ typedef const PSAutoLock& PSLockRef;
 
 #define PS_GET_AND_SET(type_, name_)                  \
   PS_GET(type_, name_)                                \
   static void Set##name_(PSLockRef, type_ a##name_) { \
     MOZ_ASSERT(sInstance);                            \
     sInstance->m##name_ = a##name_;                   \
   }
 
-static const size_t MAX_JS_FRAMES = 1024;
-using JsFrameBuffer = JS::ProfilingFrameIterator::Frame[MAX_JS_FRAMES];
+static constexpr size_t MAX_JS_FRAMES =
+    mozilla::profiler::ThreadRegistrationData::MAX_JS_FRAMES;
+using JsFrame = mozilla::profiler::ThreadRegistrationData::JsFrame;
+using JsFrameBuffer = mozilla::profiler::ThreadRegistrationData::JsFrameBuffer;
 
 // All functions in this file can run on multiple threads unless they have an
 // NS_IsMainThread() assertion.
 
 // This class contains the profiler's core global state, i.e. that which is
 // valid even when the profiler is not active. Most profile operations can't do
 // anything useful when this class is not instantiated, so we release-assert
 // its non-nullness in all such operations.
@@ -1566,16 +1568,20 @@ class StackWalkControl {
 // copied frames.
 // This copy is necessary since, like the native stack, the JS stack is iterated
 // youngest-to-oldest and we need to iterate oldest-to-youngest in MergeStacks.
 static uint32_t ExtractJsFrames(
     bool aIsSynchronous,
     const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread& aThreadData,
     const Registers& aRegs, ProfilerStackCollector& aCollector,
     JsFrameBuffer aJsFrames, StackWalkControl* aStackWalkControlIfSupported) {
+  MOZ_ASSERT(aJsFrames,
+             "ExtractJsFrames should only be called if there is a "
+             "JsFrameBuffer to fill.");
+
   uint32_t jsFramesCount = 0;
 
   // Only walk jit stack if profiling frame iterator is turned on.
   JSContext* context = aThreadData.GetJSContext();
   if (context && JS::IsProfilingEnabledForContext(context)) {
     AutoWalkJSStack autoWalkJSStack;
 
     if (autoWalkJSStack.walkAllowed) {
@@ -1643,22 +1649,24 @@ static uint32_t ExtractJsFrames(
 }
 
 // Merges the profiling stack, native stack, and JS stack, outputting the
 // details to aCollector.
 static void MergeStacks(
     uint32_t aFeatures, bool aIsSynchronous,
     const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread& aThreadData,
     const Registers& aRegs, const NativeStack& aNativeStack,
-    ProfilerStackCollector& aCollector, JsFrameBuffer aJsFrames,
+    ProfilerStackCollector& aCollector, JsFrame* aJsFrames,
     uint32_t aJsFramesCount) {
   // WARNING: this function runs within the profiler's "critical section".
   // WARNING: this function might be called while the profiler is inactive, and
   //          cannot rely on ActivePS.
 
+  MOZ_ASSERT_IF(!aJsFrames, aJsFramesCount == 0);
+
   const ProfilingStack& profilingStack = aThreadData.ProfilingStackCRef();
   const js::ProfilingStackFrame* profilingStackFrames = profilingStack.frames;
   uint32_t profilingStackFrameCount = profilingStack.stackSize();
 
   // While the profiling stack array is ordered oldest-to-youngest, the JS and
   // native arrays are ordered youngest-to-oldest. We must add frames to aInfo
   // oldest-to-youngest. Thus, iterate over the profiling stack forwards and JS
   // and native arrays backwards. Note: this means the terminating condition
@@ -2252,55 +2260,55 @@ static void DoNativeBacktrace(
 // ActivePS's ProfileBuffer. (This should only be called from DoSyncSample()
 // and DoPeriodicSample().)
 //
 // The grammar for entry sequences is in a comment above
 // ProfileBuffer::StreamSamplesToJSON.
 static inline void DoSharedSample(
     PSLockRef aLock, bool aIsSynchronous, uint32_t aFeatures,
     const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread& aThreadData,
-    const Registers& aRegs, uint64_t aSamplePos, uint64_t aBufferRangeStart,
-    ProfileBuffer& aBuffer,
+    JsFrame* aJsFrames, const Registers& aRegs, uint64_t aSamplePos,
+    uint64_t aBufferRangeStart, ProfileBuffer& aBuffer,
     StackCaptureOptions aCaptureOptions = StackCaptureOptions::Full) {
   // WARNING: this function runs within the profiler's "critical section".
 
   MOZ_ASSERT(!aBuffer.IsThreadSafe(),
              "Mutexes cannot be used inside this critical section");
 
   MOZ_RELEASE_ASSERT(ActivePS::Exists(aLock));
 
   ProfileBufferCollector collector(aBuffer, aSamplePos, aBufferRangeStart);
-  JsFrameBuffer& jsFrames = CorePS::JsFrames(aLock);
   StackWalkControl* stackWalkControlIfSupported = nullptr;
 #if defined(HAVE_NATIVE_UNWIND)
   const bool captureNative = ProfilerFeature::HasStackWalk(aFeatures) &&
                              aCaptureOptions == StackCaptureOptions::Full;
   StackWalkControl stackWalkControl;
   if constexpr (StackWalkControl::scIsSupported) {
     if (captureNative) {
       stackWalkControlIfSupported = &stackWalkControl;
     }
   }
 #endif  // defined(HAVE_NATIVE_UNWIND)
   const uint32_t jsFramesCount =
-      ExtractJsFrames(aIsSynchronous, aThreadData, aRegs, collector, jsFrames,
-                      stackWalkControlIfSupported);
+      aJsFrames ? ExtractJsFrames(aIsSynchronous, aThreadData, aRegs, collector,
+                                  aJsFrames, stackWalkControlIfSupported)
+                : 0;
   NativeStack nativeStack;
 #if defined(HAVE_NATIVE_UNWIND)
   if (captureNative) {
     DoNativeBacktrace(aThreadData, aRegs, nativeStack,
                       stackWalkControlIfSupported);
 
     MergeStacks(aFeatures, aIsSynchronous, aThreadData, aRegs, nativeStack,
-                collector, jsFrames, jsFramesCount);
+                collector, aJsFrames, jsFramesCount);
   } else
 #endif
   {
     MergeStacks(aFeatures, aIsSynchronous, aThreadData, aRegs, nativeStack,
-                collector, jsFrames, jsFramesCount);
+                collector, aJsFrames, jsFramesCount);
 
     // We can't walk the whole native stack, but we can record the top frame.
     if (ProfilerFeature::HasLeaf(aFeatures) &&
         aCaptureOptions == StackCaptureOptions::Full) {
       aBuffer.AddEntry(ProfileBufferEntry::NativeLeafAddr((void*)aRegs.mPC));
     }
   }
 }
@@ -2320,31 +2328,34 @@ static void DoSyncSample(
 
   const uint64_t samplePos =
       aBuffer.AddThreadIdEntry(aThreadData.Info().ThreadId());
 
   TimeDuration delta = aNow - CorePS::ProcessStartTime();
   aBuffer.AddEntry(ProfileBufferEntry::Time(delta.ToMilliseconds()));
 
   DoSharedSample(aLock, /* aIsSynchronous = */ true, aFeatures, aThreadData,
-                 aRegs, samplePos, bufferRangeStart, aBuffer, aCaptureOptions);
+                 aThreadData.GetJsFrameBuffer(), aRegs, samplePos,
+                 bufferRangeStart, aBuffer, aCaptureOptions);
 }
 
 // Writes the components of a periodic sample to ActivePS's ProfileBuffer.
 // The ThreadId entry is already written in the main ProfileBuffer, its location
 // is `aSamplePos`, we can write the rest to `aBuffer` (which may be different).
 static inline void DoPeriodicSample(
     PSLockRef aLock,
     const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread& aThreadData,
     const Registers& aRegs, uint64_t aSamplePos, uint64_t aBufferRangeStart,
     ProfileBuffer& aBuffer) {
   // WARNING: this function runs within the profiler's "critical section".
 
+  JsFrameBuffer& jsFrames = CorePS::JsFrames(aLock);
   DoSharedSample(aLock, /* aIsSynchronous = */ false, ActivePS::Features(aLock),
-                 aThreadData, aRegs, aSamplePos, aBufferRangeStart, aBuffer);
+                 aThreadData, jsFrames, aRegs, aSamplePos, aBufferRangeStart,
+                 aBuffer);
 }
 
 // END sampling/unwinding code
 ////////////////////////////////////////////////////////////////////////
 
 ////////////////////////////////////////////////////////////////////////
 // BEGIN saving/streaming code
 
@@ -5652,43 +5663,44 @@ void profiler_clear_js_context() {
           lockedThreadData->ClearJSContext();
         }
       });
 }
 
 static void profiler_suspend_and_sample_thread(
     PSLockRef aLock,
     const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread& aThreadData,
-    bool aIsSynchronous, uint32_t aFeatures, ProfilerStackCollector& aCollector,
-    bool aSampleNative) {
+    JsFrame* aJsFrames, bool aIsSynchronous, uint32_t aFeatures,
+    ProfilerStackCollector& aCollector, bool aSampleNative) {
   const ThreadRegistrationInfo& info = aThreadData.Info();
 
   if (info.IsMainThread()) {
     aCollector.SetIsMainThread();
   }
 
   // Allocate the space for the native stack
   NativeStack nativeStack;
 
   auto collectStack = [&](const Registers& aRegs, const TimeStamp& aNow) {
     // The target thread is now suspended. Collect a native backtrace,
     // and call the callback.
-    JsFrameBuffer& jsFrames = CorePS::JsFrames(aLock);
     StackWalkControl* stackWalkControlIfSupported = nullptr;
 #if defined(HAVE_FASTINIT_NATIVE_UNWIND)
     StackWalkControl stackWalkControl;
     if constexpr (StackWalkControl::scIsSupported) {
       if (aSampleNative) {
         stackWalkControlIfSupported = &stackWalkControl;
       }
     }
 #endif
     const uint32_t jsFramesCount =
-        ExtractJsFrames(aIsSynchronous, aThreadData, aRegs, aCollector,
-                        jsFrames, stackWalkControlIfSupported);
+        aJsFrames
+            ? ExtractJsFrames(aIsSynchronous, aThreadData, aRegs, aCollector,
+                              aJsFrames, stackWalkControlIfSupported)
+            : 0;
 
 #if defined(HAVE_FASTINIT_NATIVE_UNWIND)
     if (aSampleNative) {
       // We can only use FramePointerStackWalk or MozStackWalk from
       // suspend_and_sample_thread as other stackwalking methods may not be
       // initialized.
 #  if defined(USE_FRAME_POINTER_STACK_WALK)
       DoFramePointerBacktrace(aThreadData, aRegs, nativeStack,
@@ -5696,22 +5708,22 @@ static void profiler_suspend_and_sample_
 #  elif defined(USE_MOZ_STACK_WALK)
       DoMozStackWalkBacktrace(aThreadData, aRegs, nativeStack,
                               stackWalkControlIfSupported);
 #  else
 #    error "Invalid configuration"
 #  endif
 
       MergeStacks(aFeatures, aIsSynchronous, aThreadData, aRegs, nativeStack,
-                  aCollector, jsFrames, jsFramesCount);
+                  aCollector, aJsFrames, jsFramesCount);
     } else
 #endif
     {
       MergeStacks(aFeatures, aIsSynchronous, aThreadData, aRegs, nativeStack,
-                  aCollector, jsFrames, jsFramesCount);
+                  aCollector, aJsFrames, jsFramesCount);
 
       if (ProfilerFeature::HasLeaf(aFeatures)) {
         aCollector.CollectNativeLeafAddr((void*)aRegs.mPC);
       }
     }
   };
 
   if (aIsSynchronous) {
@@ -5749,31 +5761,32 @@ void profiler_suspend_and_sample_thread(
     ThreadRegistration::WithOnThreadRef(
         [&](ThreadRegistration::OnThreadRef aOnThreadRef) {
           aOnThreadRef.WithUnlockedReaderAndAtomicRWOnThread(
               [&](const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread&
                       aThreadData) {
                 // TODO: Remove this lock when on-thread sampling doesn't
                 // require it anymore.
                 PSAutoLock lock;
-                profiler_suspend_and_sample_thread(lock, aThreadData, true,
-                                                   aFeatures, aCollector,
-                                                   aSampleNative);
+                profiler_suspend_and_sample_thread(
+                    lock, aThreadData, aThreadData.GetJsFrameBuffer(), true,
+                    aFeatures, aCollector, aSampleNative);
               });
         });
   } else {
     // Lock the profiler before accessing the ThreadRegistry.
     PSAutoLock lock;
     ThreadRegistry::WithOffThreadRef(
         aThreadId, [&](ThreadRegistry::OffThreadRef aOffThreadRef) {
           aOffThreadRef.WithLockedRWFromAnyThread(
               [&](const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread&
                       aThreadData) {
-                profiler_suspend_and_sample_thread(lock, aThreadData, false,
-                                                   aFeatures, aCollector,
+                JsFrameBuffer& jsFrames = CorePS::JsFrames(lock);
+                profiler_suspend_and_sample_thread(lock, aThreadData, jsFrames,
+                                                   false, aFeatures, aCollector,
                                                    aSampleNative);
               });
         });
   }
 }
 
 // END externally visible functions
 ////////////////////////////////////////////////////////////////////////
--- a/tools/profiler/public/ProfilerThreadRegistrationData.h
+++ b/tools/profiler/public/ProfilerThreadRegistrationData.h
@@ -32,16 +32,17 @@
 // unless that object was *really* constructed as that sub-class at least, even
 // if that sub-class only adds member functions!)
 // And where appropriate, these references will come along with the required
 // lock.
 
 #ifndef ProfilerThreadRegistrationData_h
 #define ProfilerThreadRegistrationData_h
 
+#include "js/ProfilingFrameIterator.h"
 #include "js/ProfilingStack.h"
 #include "mozilla/Atomics.h"
 #include "mozilla/MemoryReporting.h"
 #include "mozilla/ProfilerThreadPlatformData.h"
 #include "mozilla/ProfilerThreadRegistrationInfo.h"
 #include "nsCOMPtr.h"
 #include "nsIThread.h"
 
@@ -64,16 +65,20 @@ class ThreadRegistrationData {
     // Not including data that is not fully owned here.
     return 0;
   }
 
   size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const {
     return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf);
   }
 
+  static constexpr size_t MAX_JS_FRAMES = 1024;
+  using JsFrame = JS::ProfilingFrameIterator::Frame;
+  using JsFrameBuffer = JsFrame[MAX_JS_FRAMES];
+
   // `protected` to allow derived classes to read all data members.
  protected:
   ThreadRegistrationData(const char* aName, const void* aStackTop);
 
 #ifdef DEBUG
   // Destructor only used to check invariants.
   ~ThreadRegistrationData() {
     MOZ_ASSERT(mIsBeingProfiled == !!mProfiledThreadData);
@@ -100,16 +105,20 @@ class ThreadRegistrationData {
   // Written from thread, read from thread and suspended thread.
   nsCOMPtr<nsIThread> mThread;
 
   // If this is a JS thread, this is its JSContext, which is required for any
   // JS sampling.
   // Written from thread, read from thread and suspended thread.
   JSContext* mJSContext = nullptr;
 
+  // If mJSContext is not null, this points at the start of a JsFrameBuffer to
+  // be used for on-thread synchronous sampling.
+  JsFrame* mJsFrameBuffer = nullptr;
+
   // The profiler needs to start and stop JS sampling of JS threads at various
   // times. However, the JS engine can only do the required actions on the
   // JS thread itself ("on-thread"), not from another thread ("off-thread").
   // Therefore, we have the following two-step process.
   //
   // - The profiler requests (on-thread or off-thread) that the JS sampling be
   //   started/stopped, by changing mJSSampling to the appropriate REQUESTED
   //   state.
@@ -348,16 +357,19 @@ class ThreadRegistrationUnlockedReaderAn
     : public ThreadRegistrationUnlockedRWForLockedProfiler {
  public:
   // IMPORTANT! IMPORTANT! IMPORTANT! IMPORTANT! IMPORTANT! IMPORTANT!
   // Non-atomic members read here MUST be written from LockedRWOnThread (to
   // guarantee that they are only modified on this thread.)
 
   [[nodiscard]] JSContext* GetJSContext() const { return mJSContext; }
 
+  // Not null when JSContext is not null. Points at the start of JsFrameBuffer.
+  [[nodiscard]] JsFrame* GetJsFrameBuffer() const { return mJsFrameBuffer; }
+
  protected:
   ThreadRegistrationUnlockedReaderAndAtomicRWOnThread(const char* aName,
                                                       const void* aStackTop)
       : ThreadRegistrationUnlockedRWForLockedProfiler(aName, aStackTop) {}
 };
 
 // Accessing locked data from the thread, or from any thread through the locked
 // profiler:
--- a/tools/profiler/tests/gtest/GeckoProfiler.cpp
+++ b/tools/profiler/tests/gtest/GeckoProfiler.cpp
@@ -296,16 +296,17 @@ static void TestConstUnlockedReaderAndAt
     const TimeStamp& aBeforeRegistration, const TimeStamp& aAfterRegistration,
     const void* aOnStackObject,
     ProfilerThreadId aThreadId = profiler_current_thread_id()) {
   TestConstUnlockedRWForLockedProfiler(aData, aBeforeRegistration,
                                        aAfterRegistration, aOnStackObject,
                                        aThreadId);
 
   EXPECT_EQ(aData.GetJSContext(), nullptr);
+  EXPECT_EQ(aData.GetJsFrameBuffer(), nullptr);
 };
 
 static void TestUnlockedRWForLockedProfiler(
     profiler::ThreadRegistration::UnlockedRWForLockedProfiler& aData,
     const TimeStamp& aBeforeRegistration, const TimeStamp& aAfterRegistration,
     const void* aOnStackObject,
     ProfilerThreadId aThreadId = profiler_current_thread_id()) {
   TestConstUnlockedRWForLockedProfiler(aData, aBeforeRegistration,