Bug 1727877 - Only allocate JS frame buffer when thread is being profiled - r=canaltinova
authorGerald Squelart <gsquelart@mozilla.com>
Wed, 01 Sep 2021 00:38:10 +0000
changeset 590398 e6242f4c09c53bfddb41c412c3778f1b63c81e44
parent 590397 6d1a446c453b41cb4ff72dcd856d5c298d0f3f5e
child 590399 82a89acc5da21887ef42b849363e35f017397309
push id38752
push usermalexandru@mozilla.com
push dateWed, 01 Sep 2021 03:48:05 +0000
treeherdermozilla-central@a14cf6b742de [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerscanaltinova
bugs1727877
milestone93.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1727877 - Only allocate JS frame buffer when thread is being profiled - r=canaltinova The buffer was previously allocated as soon as a JSContext was present, meaning that some memory was used even when the profiler was not running, which is the case most of the time for most users. This saves some memory, at the cost of having to lock the per-thread ThreadRegistration data when sampling a thread with a JSContext. This should have low contention risk, because it's mostly used when sampling on the thread, while the periodic sampler uses its own buffer so it doesn't need to lock the per-thread data. Differential Revision: https://phabricator.services.mozilla.com/D123910
tools/profiler/core/ProfilerThreadRegistrationData.cpp
tools/profiler/core/platform.cpp
tools/profiler/public/ProfilerThreadRegistrationData.h
tools/profiler/tests/gtest/GeckoProfiler.cpp
--- a/tools/profiler/core/ProfilerThreadRegistrationData.cpp
+++ b/tools/profiler/core/ProfilerThreadRegistrationData.cpp
@@ -103,57 +103,74 @@ void ThreadRegistrationLockedRWFromAnyTh
         ProfiledThreadData* aProfiledThreadData, const PSAutoLock&) {
   MOZ_ASSERT(!mIsBeingProfiled);
   mIsBeingProfiled = true;
 
   MOZ_ASSERT(!mProfiledThreadData);
   MOZ_ASSERT(aProfiledThreadData);
   mProfiledThreadData = aProfiledThreadData;
 
+  if (mJSContext) {
+    // The thread is now being profiled, and we already have a JSContext,
+    // allocate a JsFramesBuffer to allow profiler-unlocked on-thread sampling.
+    MOZ_ASSERT(!mJsFrameBuffer);
+    mJsFrameBuffer = new JsFrame[MAX_JS_FRAMES];
+  }
+
   // Check invariants.
   MOZ_ASSERT(mIsBeingProfiled == !!mProfiledThreadData);
+  MOZ_ASSERT((mJSContext && mIsBeingProfiled) == !!mJsFrameBuffer);
 }
 
 void ThreadRegistrationLockedRWFromAnyThread::
     ClearIsBeingProfiledAndProfiledThreadData(const PSAutoLock&) {
   mIsBeingProfiled = false;
   mProfiledThreadData = nullptr;
 
+  if (mJsFrameBuffer) {
+    delete[] mJsFrameBuffer;
+    mJsFrameBuffer = nullptr;
+  }
+
   // Check invariants.
   MOZ_ASSERT(mIsBeingProfiled == !!mProfiledThreadData);
+  MOZ_ASSERT((mJSContext && mIsBeingProfiled) == !!mJsFrameBuffer);
 }
 
 void ThreadRegistrationLockedRWOnThread::SetJSContext(JSContext* aJSContext) {
   MOZ_ASSERT(aJSContext && !mJSContext);
 
   mJSContext = aJSContext;
 
-  // We now have a JSContext, allocate a JsFramesBuffer to allow unlocked
-  // on-thread sampling.
-  MOZ_ASSERT(!mJsFrameBuffer);
-  mJsFrameBuffer = new JsFrame[MAX_JS_FRAMES];
+  if (mProfiledThreadData) {
+    MOZ_ASSERT(mIsBeingProfiled == !!mProfiledThreadData);
+    // We now have a JSContext, and the thread is already being profiled,
+    // allocate a JsFramesBuffer to allow profiler-unlocked on-thread sampling.
+    MOZ_ASSERT(!mJsFrameBuffer);
+    mJsFrameBuffer = new JsFrame[MAX_JS_FRAMES];
+  }
 
   // We give the JS engine a non-owning reference to the ProfilingStack. It's
   // important that the JS engine doesn't touch this once the thread dies.
   js::SetContextProfilingStack(aJSContext, &ProfilingStackRef());
 
   // Check invariants.
-  MOZ_ASSERT(!!mJSContext == !!mJsFrameBuffer);
+  MOZ_ASSERT((mJSContext && mIsBeingProfiled) == !!mJsFrameBuffer);
 }
 
 void ThreadRegistrationLockedRWOnThread::ClearJSContext() {
   mJSContext = nullptr;
 
   if (mJsFrameBuffer) {
     delete[] mJsFrameBuffer;
     mJsFrameBuffer = nullptr;
   }
 
   // Check invariants.
-  MOZ_ASSERT(!!mJSContext == !!mJsFrameBuffer);
+  MOZ_ASSERT((mJSContext && mIsBeingProfiled) == !!mJsFrameBuffer);
 }
 
 void ThreadRegistrationLockedRWOnThread::PollJSSampling() {
   // We can't start/stop profiling until we have the thread's JSContext.
   if (mJSContext) {
     // It is possible for mJSSampling to go through the following sequences.
     //
     // - INACTIVE, ACTIVE_REQUESTED, INACTIVE_REQUESTED, INACTIVE
--- a/tools/profiler/core/platform.cpp
+++ b/tools/profiler/core/platform.cpp
@@ -2334,19 +2334,35 @@ static void DoSyncSample(
   const uint64_t bufferRangeStart = aBuffer.BufferRangeStart();
 
   const uint64_t samplePos =
       aBuffer.AddThreadIdEntry(aThreadData.Info().ThreadId());
 
   TimeDuration delta = aNow - CorePS::ProcessStartTime();
   aBuffer.AddEntry(ProfileBufferEntry::Time(delta.ToMilliseconds()));
 
-  DoSharedSample(/* aIsSynchronous = */ true, aFeatures, aThreadData,
-                 aThreadData.GetJsFrameBuffer(), aRegs, samplePos,
-                 bufferRangeStart, aBuffer, aCaptureOptions);
+  if (!aThreadData.GetJSContext()) {
+    // No JSContext, there is no JS frame buffer (and no need for it).
+    DoSharedSample(/* aIsSynchronous = */ true, aFeatures, aThreadData,
+                   /* aJsFrames = */ nullptr, aRegs, samplePos,
+                   bufferRangeStart, aBuffer, aCaptureOptions);
+  } else {
+    // JSContext is present, we need to lock the thread data to access the JS
+    // frame buffer.
+    ThreadRegistration::WithOnThreadRef([&](ThreadRegistration::OnThreadRef
+                                                aOnThreadRef) {
+      aOnThreadRef.WithConstLockedRWOnThread(
+          [&](const ThreadRegistration::LockedRWOnThread& aLockedThreadData) {
+            DoSharedSample(/* aIsSynchronous = */ true, aFeatures, aThreadData,
+                           aLockedThreadData.GetJsFrameBuffer(), aRegs,
+                           samplePos, bufferRangeStart, aBuffer,
+                           aCaptureOptions);
+          });
+    });
+  }
 }
 
 // Writes the components of a periodic sample to ActivePS's ProfileBuffer.
 // The ThreadId entry is already written in the main ProfileBuffer, its location
 // is `aSamplePos`, we can write the rest to `aBuffer` (which may be different).
 static inline void DoPeriodicSample(
     PSLockRef aLock,
     const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread& aThreadData,
@@ -5768,32 +5784,50 @@ void profiler_suspend_and_sample_thread(
     ThreadRegistration::WithOnThreadRef(
         [&](ThreadRegistration::OnThreadRef aOnThreadRef) {
           aOnThreadRef.WithUnlockedReaderAndAtomicRWOnThread(
               [&](const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread&
                       aThreadData) {
                 // TODO: Remove this lock when on-thread sampling doesn't
                 // require it anymore.
                 PSAutoLock lock;
-                profiler_suspend_and_sample_thread(
-                    lock, aThreadData, aThreadData.GetJsFrameBuffer(), true,
-                    aFeatures, aCollector, aSampleNative);
+                if (!aThreadData.GetJSContext()) {
+                  // No JSContext, there is no JS frame buffer (and no need for
+                  // it).
+                  profiler_suspend_and_sample_thread(
+                      lock, aThreadData, /* aJsFrames = */ nullptr,
+                      /* aIsSynchronous = */ true, aFeatures, aCollector,
+                      aSampleNative);
+                } else {
+                  // JSContext is present, we need to lock the thread data to
+                  // access the JS frame buffer.
+                  aOnThreadRef.WithConstLockedRWOnThread(
+                      [&](const ThreadRegistration::LockedRWOnThread&
+                              aLockedThreadData) {
+                        profiler_suspend_and_sample_thread(
+                            lock, aThreadData,
+                            aLockedThreadData.GetJsFrameBuffer(),
+                            /* aIsSynchronous = */ true, aFeatures, aCollector,
+                            aSampleNative);
+                      });
+                }
               });
         });
   } else {
     // Lock the profiler before accessing the ThreadRegistry.
     PSAutoLock lock;
     ThreadRegistry::WithOffThreadRef(
         aThreadId, [&](ThreadRegistry::OffThreadRef aOffThreadRef) {
           aOffThreadRef.WithLockedRWFromAnyThread(
               [&](const ThreadRegistration::UnlockedReaderAndAtomicRWOnThread&
                       aThreadData) {
                 JsFrameBuffer& jsFrames = CorePS::JsFrames(lock);
                 profiler_suspend_and_sample_thread(lock, aThreadData, jsFrames,
-                                                   false, aFeatures, aCollector,
+                                                   /* aIsSynchronous = */ false,
+                                                   aFeatures, aCollector,
                                                    aSampleNative);
               });
         });
   }
 }
 
 // END externally visible functions
 ////////////////////////////////////////////////////////////////////////
--- a/tools/profiler/public/ProfilerThreadRegistrationData.h
+++ b/tools/profiler/public/ProfilerThreadRegistrationData.h
@@ -105,18 +105,18 @@ class ThreadRegistrationData {
   // Written from thread, read from thread and suspended thread.
   nsCOMPtr<nsIThread> mThread;
 
   // If this is a JS thread, this is its JSContext, which is required for any
   // JS sampling.
   // Written from thread, read from thread and suspended thread.
   JSContext* mJSContext = nullptr;
 
-  // If mJSContext is not null, this points at the start of a JsFrameBuffer to
-  // be used for on-thread synchronous sampling.
+  // If mJSContext is not null AND the thread is being profiled, this points at
+  // the start of a JsFrameBuffer to be used for on-thread synchronous sampling.
   JsFrame* mJsFrameBuffer = nullptr;
 
   // The profiler needs to start and stop JS sampling of JS threads at various
   // times. However, the JS engine can only do the required actions on the
   // JS thread itself ("on-thread"), not from another thread ("off-thread").
   // Therefore, we have the following two-step process.
   //
   // - The profiler requests (on-thread or off-thread) that the JS sampling be
@@ -357,19 +357,16 @@ class ThreadRegistrationUnlockedReaderAn
     : public ThreadRegistrationUnlockedRWForLockedProfiler {
  public:
   // IMPORTANT! IMPORTANT! IMPORTANT! IMPORTANT! IMPORTANT! IMPORTANT!
   // Non-atomic members read here MUST be written from LockedRWOnThread (to
   // guarantee that they are only modified on this thread.)
 
   [[nodiscard]] JSContext* GetJSContext() const { return mJSContext; }
 
-  // Not null when JSContext is not null. Points at the start of JsFrameBuffer.
-  [[nodiscard]] JsFrame* GetJsFrameBuffer() const { return mJsFrameBuffer; }
-
  protected:
   ThreadRegistrationUnlockedReaderAndAtomicRWOnThread(const char* aName,
                                                       const void* aStackTop)
       : ThreadRegistrationUnlockedRWForLockedProfiler(aName, aStackTop) {}
 };
 
 // Accessing locked data from the thread, or from any thread through the locked
 // profiler:
@@ -377,16 +374,20 @@ class ThreadRegistrationUnlockedReaderAn
 // Like above, and profiler can also read&write mutex-protected members.
 class ThreadRegistrationLockedRWFromAnyThread
     : public ThreadRegistrationUnlockedReaderAndAtomicRWOnThread {
  public:
   void SetIsBeingProfiledWithProfiledThreadData(
       ProfiledThreadData* aProfiledThreadData, const PSAutoLock&);
   void ClearIsBeingProfiledAndProfiledThreadData(const PSAutoLock&);
 
+  // Not null when JSContext is not null AND this thread is being profiled.
+  // Points at the start of JsFrameBuffer.
+  [[nodiscard]] JsFrame* GetJsFrameBuffer() const { return mJsFrameBuffer; }
+
   [[nodiscard]] const nsCOMPtr<nsIEventTarget> GetEventTarget() const {
     return mThread;
   }
 
   void ResetMainThread(nsIThread* aThread) { mThread = aThread; }
 
   // aDelay is the time the event that is currently running on the thread was
   // queued before starting to run (if a PrioritizedEventQueue
--- a/tools/profiler/tests/gtest/GeckoProfiler.cpp
+++ b/tools/profiler/tests/gtest/GeckoProfiler.cpp
@@ -296,17 +296,16 @@ static void TestConstUnlockedReaderAndAt
     const TimeStamp& aBeforeRegistration, const TimeStamp& aAfterRegistration,
     const void* aOnStackObject,
     ProfilerThreadId aThreadId = profiler_current_thread_id()) {
   TestConstUnlockedRWForLockedProfiler(aData, aBeforeRegistration,
                                        aAfterRegistration, aOnStackObject,
                                        aThreadId);
 
   EXPECT_EQ(aData.GetJSContext(), nullptr);
-  EXPECT_EQ(aData.GetJsFrameBuffer(), nullptr);
 };
 
 static void TestUnlockedRWForLockedProfiler(
     profiler::ThreadRegistration::UnlockedRWForLockedProfiler& aData,
     const TimeStamp& aBeforeRegistration, const TimeStamp& aAfterRegistration,
     const void* aOnStackObject,
     ProfilerThreadId aThreadId = profiler_current_thread_id()) {
   TestConstUnlockedRWForLockedProfiler(aData, aBeforeRegistration,
@@ -338,16 +337,17 @@ static void TestConstLockedRWFromAnyThre
     const profiler::ThreadRegistration::LockedRWFromAnyThread& aData,
     const TimeStamp& aBeforeRegistration, const TimeStamp& aAfterRegistration,
     const void* aOnStackObject,
     ProfilerThreadId aThreadId = profiler_current_thread_id()) {
   TestConstUnlockedReaderAndAtomicRWOnThread(aData, aBeforeRegistration,
                                              aAfterRegistration, aOnStackObject,
                                              aThreadId);
 
+  EXPECT_EQ(aData.GetJsFrameBuffer(), nullptr);
   EXPECT_EQ(aData.GetEventTarget(), nullptr);
 };
 
 static void TestLockedRWFromAnyThread(
     profiler::ThreadRegistration::LockedRWFromAnyThread& aData,
     const TimeStamp& aBeforeRegistration, const TimeStamp& aAfterRegistration,
     const void* aOnStackObject,
     ProfilerThreadId aThreadId = profiler_current_thread_id()) {