Bug 1583868 - New profiler feature "nostacksampling" - r=gregtatum
authorGerald Squelart <gsquelart@mozilla.com>
Tue, 01 Oct 2019 22:49:13 +0000
changeset 495932 ad006751f4e4ab22a12b161cf1ed2e420ae3004d
parent 495931 d8416b3169478c376bff79007b78459c0f4165d0
child 495933 5ad3cfbe42f65ff59c245418d05feca58a580d47
push id114140
push userdvarga@mozilla.com
push dateWed, 02 Oct 2019 18:04:51 +0000
treeherdermozilla-inbound@32eb0ea893f3 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersgregtatum
bugs1583868
milestone71.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1583868 - New profiler feature "nostacksampling" - r=gregtatum Previously, the absence of "stackwalk", "leaf", and "javascript" implied that the test/user didn't want any sampling, but this caused issues in some tests that enabled "stackwalk" on platforms that didn't support stack-walking, which ended up suppressing label-only stacks that the test expected. we now have an explicit feature "nostacksampling" that disables backtraces from the samplers in both profilers. This effectively cancels "stackwalk", "leaf", and "javascript" if present. Differential Revision: https://phabricator.services.mozilla.com/D47731
mozglue/baseprofiler/core/platform.cpp
mozglue/baseprofiler/public/BaseProfiler.h
toolkit/components/extensions/schemas/geckoProfiler.json
tools/profiler/core/platform.cpp
tools/profiler/public/GeckoProfiler.h
--- a/mozglue/baseprofiler/core/platform.cpp
+++ b/mozglue/baseprofiler/core/platform.cpp
@@ -1963,16 +1963,28 @@ static SamplerThread* NewSamplerThread(P
 }
 
 // This function is the sampler thread.  This implementation is used for all
 // targets.
 void SamplerThread::Run() {
   // TODO: If possible, name this thread later on, after NSPR becomes available.
   // PR_SetCurrentThreadName("SamplerThread");
 
+  // Features won't change during this SamplerThread's lifetime, so we can
+  // determine now whether stack sampling is required.
+  const bool noStackSampling = []() {
+    PSAutoLock lock;
+    if (!ActivePS::Exists(lock)) {
+      // If there is no active profiler, it doesn't matter what we return,
+      // because this thread will exit before any stack sampling is attempted.
+      return false;
+    }
+    return ActivePS::FeatureNoStackSampling(lock);
+  }();
+
   // Use local BlocksRingBuffer&ProfileBuffer to capture the stack.
   // (This is to avoid touching the CorePS::BlocksRingBuffer lock while
   // a thread is suspended, because that thread could be working with
   // the CorePS::BlocksRingBuffer as well.)
   BlocksRingBuffer localBlocksRingBuffer(
       BlocksRingBuffer::ThreadSafety::WithoutMutex);
   ProfileBuffer localProfileBuffer(localBlocksRingBuffer,
                                    MakePowerOfTwo32<65536>());
@@ -2002,19 +2014,16 @@ void SamplerThread::Run() {
         return;
       }
 
       ActivePS::ClearExpiredExitProfiles(lock);
 
       TimeStamp expiredMarkersCleaned = TimeStamp::NowUnfuzzed();
 
       if (!ActivePS::IsPaused(lock)) {
-        const Vector<LiveProfiledThreadData>& liveThreads =
-            ActivePS::LiveProfiledThreads(lock);
-
         TimeDuration delta = sampleStart - CorePS::ProcessStartTime();
         ProfileBuffer& buffer = ActivePS::Buffer(lock);
 
         // handle per-process generic counters
         const Vector<BaseProfilerCount*>& counters = CorePS::Counters(lock);
         for (auto& counter : counters) {
           // create Buffer entries for each counter
           buffer.AddEntry(ProfileBufferEntry::CounterId(counter));
@@ -2029,77 +2038,82 @@ void SamplerThread::Run() {
           buffer.AddEntry(ProfileBufferEntry::CounterKey(0));
           buffer.AddEntry(ProfileBufferEntry::Count(count));
           if (number) {
             buffer.AddEntry(ProfileBufferEntry::Number(number));
           }
         }
         TimeStamp countersSampled = TimeStamp::NowUnfuzzed();
 
-        for (auto& thread : liveThreads) {
-          RegisteredThread* registeredThread = thread.mRegisteredThread;
-          ProfiledThreadData* profiledThreadData =
-              thread.mProfiledThreadData.get();
-          RefPtr<ThreadInfo> info = registeredThread->Info();
-
-          // If the thread is asleep and has been sampled before in the same
-          // sleep episode, find and copy the previous sample, as that's
-          // cheaper than taking a new sample.
-          if (registeredThread->RacyRegisteredThread()
-                  .CanDuplicateLastSampleDueToSleep()) {
-            bool dup_ok = ActivePS::Buffer(lock).DuplicateLastSample(
-                info->ThreadId(), CorePS::ProcessStartTime(),
-                profiledThreadData->LastSample());
-            if (dup_ok) {
-              continue;
+        if (!noStackSampling) {
+          const Vector<LiveProfiledThreadData>& liveThreads =
+              ActivePS::LiveProfiledThreads(lock);
+
+          for (auto& thread : liveThreads) {
+            RegisteredThread* registeredThread = thread.mRegisteredThread;
+            ProfiledThreadData* profiledThreadData =
+                thread.mProfiledThreadData.get();
+            RefPtr<ThreadInfo> info = registeredThread->Info();
+
+            // If the thread is asleep and has been sampled before in the same
+            // sleep episode, find and copy the previous sample, as that's
+            // cheaper than taking a new sample.
+            if (registeredThread->RacyRegisteredThread()
+                    .CanDuplicateLastSampleDueToSleep()) {
+              bool dup_ok = ActivePS::Buffer(lock).DuplicateLastSample(
+                  info->ThreadId(), CorePS::ProcessStartTime(),
+                  profiledThreadData->LastSample());
+              if (dup_ok) {
+                continue;
+              }
             }
+
+            AUTO_PROFILER_STATS(base_SamplerThread_Run_DoPeriodicSample);
+
+            TimeStamp now = TimeStamp::NowUnfuzzed();
+
+            // Add the thread ID now, so we know its position in the main
+            // buffer, which is used by some JS data. (DoPeriodicSample only
+            // knows about the temporary local buffer.)
+            uint64_t samplePos =
+                buffer.AddThreadIdEntry(registeredThread->Info()->ThreadId());
+            profiledThreadData->LastSample() = Some(samplePos);
+
+            // Also add the time, so it's always there after the thread ID, as
+            // expected by the parser. (Other stack data is optional.)
+            TimeDuration delta = now - CorePS::ProcessStartTime();
+            buffer.AddEntry(ProfileBufferEntry::Time(delta.ToMilliseconds()));
+
+            mSampler.SuspendAndSampleAndResumeThread(
+                lock, *registeredThread, [&](const Registers& aRegs) {
+                  DoPeriodicSample(lock, *registeredThread, *profiledThreadData,
+                                   aRegs, samplePos, localProfileBuffer);
+                });
+
+            // If data is complete, copy it into the global buffer.
+            auto state = localBlocksRingBuffer.GetState();
+            if (state.mClearedBlockCount != previousState.mClearedBlockCount) {
+              LOG("Stack sample too big for local storage, needed %u bytes",
+                  unsigned(state.mRangeEnd.ConvertToU64() -
+                           previousState.mRangeEnd.ConvertToU64()));
+            } else if (state.mRangeEnd.ConvertToU64() -
+                           previousState.mRangeEnd.ConvertToU64() >=
+                       CorePS::CoreBlocksRingBuffer().BufferLength()->Value()) {
+              LOG("Stack sample too big for profiler storage, needed %u bytes",
+                  unsigned(state.mRangeEnd.ConvertToU64() -
+                           previousState.mRangeEnd.ConvertToU64()));
+            } else {
+              CorePS::CoreBlocksRingBuffer().AppendContents(
+                  localBlocksRingBuffer);
+            }
+
+            // Clean up for the next run.
+            localBlocksRingBuffer.Clear();
+            previousState = localBlocksRingBuffer.GetState();
           }
-
-          AUTO_PROFILER_STATS(base_SamplerThread_Run_DoPeriodicSample);
-
-          TimeStamp now = TimeStamp::NowUnfuzzed();
-
-          // Add the thread ID now, so we know its position in the main buffer,
-          // which is used by some JS data.
-          // (DoPeriodicSample only knows about the temporary local buffer.)
-          uint64_t samplePos =
-              buffer.AddThreadIdEntry(registeredThread->Info()->ThreadId());
-          profiledThreadData->LastSample() = Some(samplePos);
-
-          // Also add the time, so it's always there after the thread ID, as
-          // expected by the parser. (Other stack data is optional.)
-          TimeDuration delta = now - CorePS::ProcessStartTime();
-          buffer.AddEntry(ProfileBufferEntry::Time(delta.ToMilliseconds()));
-
-          mSampler.SuspendAndSampleAndResumeThread(
-              lock, *registeredThread, [&](const Registers& aRegs) {
-                DoPeriodicSample(lock, *registeredThread, *profiledThreadData,
-                                 aRegs, samplePos, localProfileBuffer);
-              });
-
-          // If data is complete, copy it into the global buffer.
-          auto state = localBlocksRingBuffer.GetState();
-          if (state.mClearedBlockCount != previousState.mClearedBlockCount) {
-            LOG("Stack sample too big for local storage, needed %u bytes",
-                unsigned(state.mRangeEnd.ConvertToU64() -
-                         previousState.mRangeEnd.ConvertToU64()));
-          } else if (state.mRangeEnd.ConvertToU64() -
-                         previousState.mRangeEnd.ConvertToU64() >=
-                     CorePS::CoreBlocksRingBuffer().BufferLength()->Value()) {
-            LOG("Stack sample too big for profiler storage, needed %u bytes",
-                unsigned(state.mRangeEnd.ConvertToU64() -
-                         previousState.mRangeEnd.ConvertToU64()));
-          } else {
-            CorePS::CoreBlocksRingBuffer().AppendContents(
-                localBlocksRingBuffer);
-          }
-
-          // Clean up for the next run.
-          localBlocksRingBuffer.Clear();
-          previousState = localBlocksRingBuffer.GetState();
         }
 
 #  if defined(USE_LUL_STACKWALK)
         // The LUL unwind object accumulates frame statistics. Periodically we
         // should poke it to give it a chance to print those statistics.  This
         // involves doing I/O (fprintf, __android_log_print, etc.) and so
         // can't safely be done from the critical section inside
         // SuspendAndSampleAndResumeThread, which is why it is done here.
--- a/mozglue/baseprofiler/public/BaseProfiler.h
+++ b/mozglue/baseprofiler/public/BaseProfiler.h
@@ -123,52 +123,56 @@ class SpliceableJSONWriter;
 // Profiler features
 //---------------------------------------------------------------------------
 
 // Higher-order macro containing all the feature info in one place. Define
 // |MACRO| appropriately to extract the relevant parts. Note that the number
 // values are used internally only and so can be changed without consequence.
 // Any changes to this list should also be applied to the feature list in
 // toolkit/components/extensions/schemas/geckoProfiler.json.
-#  define BASE_PROFILER_FOR_EACH_FEATURE(MACRO)                               \
-    MACRO(0, "java", Java, "Profile Java code, Android only")                 \
-                                                                              \
-    MACRO(1, "js", JS,                                                        \
-          "Get the JS engine to expose the JS stack to the profiler")         \
-                                                                              \
-    /* The DevTools profiler doesn't want the native addresses. */            \
-    MACRO(2, "leaf", Leaf, "Include the C++ leaf node if not stackwalking")   \
-                                                                              \
-    MACRO(3, "mainthreadio", MainThreadIO,                                    \
-          "Add main thread I/O to the profile")                               \
-                                                                              \
-    MACRO(4, "privacy", Privacy,                                              \
-          "Do not include user-identifiable information")                     \
-                                                                              \
-    MACRO(5, "responsiveness", Responsiveness,                                \
-          "Collect thread responsiveness information")                        \
-                                                                              \
-    MACRO(6, "screenshots", Screenshots,                                      \
-          "Take a snapshot of the window on every composition")               \
-                                                                              \
-    MACRO(7, "seqstyle", SequentialStyle,                                     \
-          "Disable parallel traversal in styling")                            \
-                                                                              \
-    MACRO(8, "stackwalk", StackWalk,                                          \
-          "Walk the C++ stack, not available on all platforms")               \
-                                                                              \
-    MACRO(9, "tasktracer", TaskTracer,                                        \
-          "Start profiling with feature TaskTracer")                          \
-                                                                              \
-    MACRO(10, "threads", Threads, "Profile the registered secondary threads") \
-                                                                              \
-    MACRO(11, "trackopts", TrackOptimizations,                                \
-          "Have the JavaScript engine track JIT optimizations")               \
-                                                                              \
-    MACRO(12, "jstracer", JSTracer, "Enable tracing of the JavaScript engine")
+#  define BASE_PROFILER_FOR_EACH_FEATURE(MACRO)                                \
+    MACRO(0, "java", Java, "Profile Java code, Android only")                  \
+                                                                               \
+    MACRO(1, "js", JS,                                                         \
+          "Get the JS engine to expose the JS stack to the profiler")          \
+                                                                               \
+    /* The DevTools profiler doesn't want the native addresses. */             \
+    MACRO(2, "leaf", Leaf, "Include the C++ leaf node if not stackwalking")    \
+                                                                               \
+    MACRO(3, "mainthreadio", MainThreadIO,                                     \
+          "Add main thread I/O to the profile")                                \
+                                                                               \
+    MACRO(4, "privacy", Privacy,                                               \
+          "Do not include user-identifiable information")                      \
+                                                                               \
+    MACRO(5, "responsiveness", Responsiveness,                                 \
+          "Collect thread responsiveness information")                         \
+                                                                               \
+    MACRO(6, "screenshots", Screenshots,                                       \
+          "Take a snapshot of the window on every composition")                \
+                                                                               \
+    MACRO(7, "seqstyle", SequentialStyle,                                      \
+          "Disable parallel traversal in styling")                             \
+                                                                               \
+    MACRO(8, "stackwalk", StackWalk,                                           \
+          "Walk the C++ stack, not available on all platforms")                \
+                                                                               \
+    MACRO(9, "tasktracer", TaskTracer,                                         \
+          "Start profiling with feature TaskTracer")                           \
+                                                                               \
+    MACRO(10, "threads", Threads, "Profile the registered secondary threads")  \
+                                                                               \
+    MACRO(11, "trackopts", TrackOptimizations,                                 \
+          "Have the JavaScript engine track JIT optimizations")                \
+                                                                               \
+    MACRO(12, "jstracer", JSTracer, "Enable tracing of the JavaScript engine") \
+                                                                               \
+    MACRO(14, "nostacksampling", NoStackSampling,                              \
+          "Disable all stack sampling: Cancels \"js\", \"leaf\", "             \
+          "\"stackwalk\" and labels")
 
 struct ProfilerFeature {
 #  define DECLARE(n_, str_, Name_, desc_)                     \
     static constexpr uint32_t Name_ = (1u << n_);             \
     static constexpr bool Has##Name_(uint32_t aFeatures) {    \
       return aFeatures & Name_;                               \
     }                                                         \
     static constexpr void Set##Name_(uint32_t& aFeatures) {   \
--- a/toolkit/components/extensions/schemas/geckoProfiler.json
+++ b/toolkit/components/extensions/schemas/geckoProfiler.json
@@ -32,16 +32,17 @@
           "screenshots",
           "seqstyle",
           "stackwalk",
           "tasktracer",
           "threads",
           "trackopts",
           "jstracer",
           "jsallocations",
+          "nostacksampling",
           "nativeallocations",
           "preferencereads"
         ]
       },
       {
         "id": "supports",
         "type": "string",
         "enum": [
--- a/tools/profiler/core/platform.cpp
+++ b/tools/profiler/core/platform.cpp
@@ -230,27 +230,16 @@ static uint32_t DefaultFeatures() {
 // Extra default features when MOZ_PROFILER_STARTUP is set (even if not
 // available).
 static uint32_t StartupExtraDefaultFeatures() {
   // Enable mainthreadio by default for startup profiles as startup is heavy on
   // I/O operations, and main thread I/O is really important to see there.
   return ProfilerFeature::MainThreadIO;
 }
 
-// Does `aFeatures` include any feature that requires stack sampling to run?
-static constexpr bool FeaturesImplyStackSampling(uint32_t aFeatures) {
-  // Currently, StackWalk (or just Leaf) and JS require the sampler to record
-  // stack traces.
-  // Previously, turning off all these still recorded stacks with only labels.
-  // TODO: Evaluate whether label-only stacks are useful, and whether there
-  // should be a separate feature to turn off stack samples. See bug 1583430.
-  return aFeatures & (ProfilerFeature::JS | ProfilerFeature::Leaf |
-                      ProfilerFeature::StackWalk);
-}
-
 // The class is a thin shell around mozglue PlatformMutex. It does not preserve
 // behavior in JS record/replay. It provides a mechanism to determine if it is
 // locked or not in order for memory hooks to avoid re-entering the profiler
 // locked state.
 class PSMutex : private ::mozilla::detail::MutexImpl {
  public:
   PSMutex()
       : ::mozilla::detail::MutexImpl(
@@ -2580,22 +2569,24 @@ static SamplerThread* NewSamplerThread(P
 
 // This function is the sampler thread.  This implementation is used for all
 // targets.
 void SamplerThread::Run() {
   PR_SetCurrentThreadName("SamplerThread");
 
   // Features won't change during this SamplerThread's lifetime, so we can
   // determine now whether stack sampling is required.
-  const bool stackSamplingRequired = []() {
+  const bool noStackSampling = []() {
     PSAutoLock lock(gPSMutex);
     if (!ActivePS::Exists(lock)) {
+      // If there is no active profiler, it doesn't matter what we return,
+      // because this thread will exit before any stack sampling is attempted.
       return false;
     }
-    return FeaturesImplyStackSampling(ActivePS::Features(lock));
+    return ActivePS::FeatureNoStackSampling(lock);
   }();
 
   // Use local BlocksRingBuffer&ProfileBuffer to capture the stack.
   // (This is to avoid touching the CorePS::BlocksRingBuffer lock while
   // a thread is suspended, because that thread could be working with
   // the CorePS::BlocksRingBuffer as well.)
   BlocksRingBuffer localBlocksRingBuffer(
       BlocksRingBuffer::ThreadSafety::WithoutMutex);
@@ -2651,17 +2642,17 @@ void SamplerThread::Run() {
           buffer.AddEntry(ProfileBufferEntry::CounterKey(0));
           buffer.AddEntry(ProfileBufferEntry::Count(count));
           if (number) {
             buffer.AddEntry(ProfileBufferEntry::Number(number));
           }
         }
         TimeStamp countersSampled = TimeStamp::NowUnfuzzed();
 
-        if (stackSamplingRequired) {
+        if (!noStackSampling) {
           const Vector<LiveProfiledThreadData>& liveThreads =
               ActivePS::LiveProfiledThreads(lock);
 
           for (auto& thread : liveThreads) {
             RegisteredThread* registeredThread = thread.mRegisteredThread;
             ProfiledThreadData* profiledThreadData =
                 thread.mProfiledThreadData.get();
             RefPtr<ThreadInfo> info = registeredThread->Info();
--- a/tools/profiler/public/GeckoProfiler.h
+++ b/tools/profiler/public/GeckoProfiler.h
@@ -167,18 +167,23 @@ class Vector;
     MACRO(11, "trackopts", TrackOptimizations,                                 \
           "Have the JavaScript engine track JIT optimizations")                \
                                                                                \
     MACRO(12, "jstracer", JSTracer, "Enable tracing of the JavaScript engine") \
                                                                                \
     MACRO(13, "jsallocations", JSAllocations,                                  \
           "Have the JavaScript engine track allocations")                      \
                                                                                \
+    MACRO(14, "nostacksampling", NoStackSampling,                              \
+          "Disable all stack sampling: Cancels \"js\", \"leaf\", "             \
+          "\"stackwalk\" and labels")                                          \
+                                                                               \
     MACRO(15, "preferencereads", PreferenceReads,                              \
           "Track when preferences are read")                                   \
+                                                                               \
     MACRO(16, "nativeallocations", NativeAllocations,                          \
           "Collect the stacks from a smaller subset of all native "            \
           "allocations, biasing towards collecting larger allocations")
 
 struct ProfilerFeature {
 #  define DECLARE(n_, str_, Name_, desc_)                     \
     static constexpr uint32_t Name_ = (1u << n_);             \
     static constexpr bool Has##Name_(uint32_t aFeatures) {    \