Bug 1577658 - Factor DoStreamSamplesToJSON out of ProfileBuffer::StreamSamplesToJSON - r=canaltinova
authorGerald Squelart <gsquelart@mozilla.com>
Thu, 21 Oct 2021 05:47:25 +0000
changeset 596535 d4ba9282fd84174a791b582a8c076c401e3a2f2c
parent 596534 b9eb9afeae642c69ae3304242dc300b351fd4d2e
child 596536 98084f19515245665af798e6e9017cbae8fbf5d6
push id38900
push usernfay@mozilla.com
push dateThu, 21 Oct 2021 09:36:31 +0000
treeherdermozilla-central@f12b7ea34395 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerscanaltinova
bugs1577658
milestone95.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1577658 - Factor DoStreamSamplesToJSON out of ProfileBuffer::StreamSamplesToJSON - r=canaltinova This refactoring is a bit more complex (to prepare for the next patch). The body of StreamSamplesToJSON was moved to a function that takes a lambda, which will provide streaming parameters relevant to the thread id of each read sample. For now, it's only used by StreamSamplesToJSON, so in this case the lambda only checks if the entry thread id is the expected one, and it provides the JSON writer and other things needed to write these samples. One extra complexity is that the `ProfileSample sample` that was outside the loop is now removed -- because in later patches this code will handle samples from different threads, so they cannot all share this one sample object. Instead the information that must be carried over is in the streaming parameters (mPreviousStackState and mPreviousStack), so that they correspond to one thread each. But the overall logic is the same, apart from where this out-of-the-loop information lives. And another difference is that old samples (before `aSinceTime`) are fully processed, and then discarded just before being output, because we need their information to be in the thread's UniqueStacks, in case it's used by a later same-as-before sample. Differential Revision: https://phabricator.services.mozilla.com/D128443
tools/profiler/core/ProfileBuffer.h
tools/profiler/core/ProfileBufferEntry.cpp
--- a/tools/profiler/core/ProfileBuffer.h
+++ b/tools/profiler/core/ProfileBuffer.h
@@ -171,16 +171,24 @@ class ProfileBuffer final {
   // - Adding JIT info.
   // - Streaming stacks to JSON.
   // Mutable because it's accessed from non-multithreaded const methods.
   mutable mozilla::ProfileBufferChunkManagerSingle mWorkerChunkManager{
       mozilla::ProfileBufferChunk::Create(
           mozilla::ProfileBufferChunk::SizeofChunkMetadata() +
           mozilla::ProfileBufferChunkManager::scExpectedMaximumStackSize)};
 
+  // GetStreamingParametersForThreadCallback:
+  //   (ProfilerThreadId) -> Maybe<StreamingParametersForThread>
+  template <typename GetStreamingParametersForThreadCallback>
+  ProfilerThreadId DoStreamSamplesToJSON(
+      GetStreamingParametersForThreadCallback&&
+          aGetStreamingParametersForThreadCallback,
+      double aSinceTime) const;
+
   double mFirstSamplingTimeUs = 0.0;
   double mLastSamplingTimeUs = 0.0;
   ProfilerStats mIntervalsUs;
   ProfilerStats mOverheadsUs;
   ProfilerStats mLockingsUs;
   ProfilerStats mCleaningsUs;
   ProfilerStats mCountersUs;
   ProfilerStats mThreadsUs;
--- a/tools/profiler/core/ProfileBufferEntry.cpp
+++ b/tools/profiler/core/ProfileBufferEntry.cpp
@@ -4,16 +4,17 @@
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #include "ProfileBufferEntry.h"
 
 #include "mozilla/ProfilerMarkers.h"
 #include "platform.h"
 #include "ProfileBuffer.h"
+#include "ProfiledThreadData.h"
 #include "ProfilerBacktrace.h"
 #include "ProfilerRustBindings.h"
 
 #include "js/ProfilingFrameIterator.h"
 #include "jsapi.h"
 #include "jsfriendapi.h"
 #include "mozilla/Logging.h"
 #include "mozilla/ScopeExit.h"
@@ -537,17 +538,16 @@ void JITFrameInfo::AddInfoForRange(
 
   MOZ_RELEASE_ASSERT(mRanges.append(JITFrameInfoForBufferRange{
       aRangeStart, aRangeEnd, std::move(jitAddressToJITFrameMap),
       std::move(jitFrameToFrameJSONMap)}));
 }
 
 struct ProfileSample {
   uint32_t mStack = 0;
-  bool mStackIsEmpty = false;
   double mTime = 0.0;
   Maybe<double> mResponsiveness;
   RunningTimes mRunningTimes;
 };
 
 // Write CPU measurements with "Delta" unit, which is some amount of work that
 // happened since the previous sample.
 static void WriteDelta(AutoArraySchemaWriter& aSchemaWriter, uint32_t aProperty,
@@ -818,33 +818,48 @@ class EntryGetter {
 // or possibly flaky hardware.
 #define ERROR_AND_CONTINUE(msg)                            \
   {                                                        \
     fprintf(stderr, "ProfileBuffer parse error: %s", msg); \
     MOZ_ASSERT(false, msg);                                \
     continue;                                              \
   }
 
-ProfilerThreadId ProfileBuffer::StreamSamplesToJSON(
-    SpliceableJSONWriter& aWriter, ProfilerThreadId aThreadId,
-    double aSinceTime, UniqueStacks& aUniqueStacks) const {
+struct StreamingParametersForThread {
+  SpliceableJSONWriter& mWriter;
+  UniqueStacks& mUniqueStacks;
+  ThreadStreamingContext::PreviousStackState& mPreviousStackState;
+  uint32_t& mPreviousStack;
+
+  StreamingParametersForThread(
+      SpliceableJSONWriter& aWriter, UniqueStacks& aUniqueStacks,
+      ThreadStreamingContext::PreviousStackState& aPreviousStackState,
+      uint32_t& aPreviousStack)
+      : mWriter(aWriter),
+        mUniqueStacks(aUniqueStacks),
+        mPreviousStackState(aPreviousStackState),
+        mPreviousStack(aPreviousStack) {}
+};
+
+// GetStreamingParametersForThreadCallback:
+//   (ProfilerThreadId) -> Maybe<StreamingParametersForThread>
+template <typename GetStreamingParametersForThreadCallback>
+ProfilerThreadId ProfileBuffer::DoStreamSamplesToJSON(
+    GetStreamingParametersForThreadCallback&&
+        aGetStreamingParametersForThreadCallback,
+    double aSinceTime) const {
   UniquePtr<char[]> dynStrBuf = MakeUnique<char[]>(kMaxFrameKeyLength);
 
   return mEntries.Read([&](ProfileChunkedBuffer::Reader* aReader) {
     MOZ_ASSERT(aReader,
                "ProfileChunkedBuffer cannot be out-of-session when sampler is "
                "running");
 
     ProfilerThreadId processedThreadId;
 
-    // This ProfileSample object gets filled with relevant data related to each
-    // sample. Parts of it may be reused from one sample to the next, in
-    // particular when stacks are identical in `SameSample` entries.
-    ProfileSample sample;
-
     EntryGetter e(*aReader);
 
     for (;;) {
       // This block skips entries until we find the start of the next sample.
       // This is useful in three situations.
       //
       // - The circular buffer overwrites old entries, so when we start parsing
       //   we might be in the middle of a sample, and we must skip forward to
@@ -867,61 +882,67 @@ ProfilerThreadId ProfileBuffer::StreamSa
 
       // Due to the skip_to_next_sample block above, if we have an entry here it
       // must be a ThreadId entry.
       MOZ_ASSERT(e.Get().IsThreadId());
 
       ProfilerThreadId threadId = e.Get().GetThreadId();
       e.Next();
 
+      Maybe<StreamingParametersForThread> streamingParameters =
+          std::forward<GetStreamingParametersForThreadCallback>(
+              aGetStreamingParametersForThreadCallback)(threadId);
+
       // Ignore samples that are for the wrong thread.
-      if (threadId != aThreadId && aThreadId.IsSpecified()) {
+      if (!streamingParameters) {
         continue;
       }
 
-      MOZ_ASSERT(
-          aThreadId.IsSpecified() || !processedThreadId.IsSpecified(),
-          "Unspecified aThreadId should only be used with 1-sample buffer");
+      SpliceableJSONWriter& writer = streamingParameters->mWriter;
+      UniqueStacks& uniqueStacks = streamingParameters->mUniqueStacks;
+      ThreadStreamingContext::PreviousStackState& previousStackState =
+          streamingParameters->mPreviousStackState;
+      uint32_t& previousStack = streamingParameters->mPreviousStack;
 
-      auto ReadStack = [&](EntryGetter& e, uint64_t entryPosition,
+      auto ReadStack = [&](EntryGetter& e, double time, uint64_t entryPosition,
                            const Maybe<double>& unresponsiveDuration,
-                           const RunningTimes& aRunningTimes) {
+                           const RunningTimes& runningTimes) {
         UniqueStacks::StackKey stack =
-            aUniqueStacks.BeginStack(UniqueStacks::FrameKey("(root)"));
+            uniqueStacks.BeginStack(UniqueStacks::FrameKey("(root)"));
 
         int numFrames = 0;
         while (e.Has()) {
           if (e.Get().IsNativeLeafAddr()) {
             numFrames++;
 
             void* pc = e.Get().GetPtr();
             e.Next();
 
             nsAutoCString buf;
 
-            if (!aUniqueStacks.mCodeAddressService ||
-                !aUniqueStacks.mCodeAddressService->GetFunction(pc, buf) ||
+            if (!uniqueStacks.mCodeAddressService ||
+                !uniqueStacks.mCodeAddressService->GetFunction(pc, buf) ||
                 buf.IsEmpty()) {
               buf.AppendASCII("0x");
               // `AppendInt` only knows `uint32_t` or `uint64_t`, but because
               // these are just aliases for *two* of (`unsigned`, `unsigned
               // long`, and `unsigned long long`), a call with `uintptr_t` could
               // use the third type and therefore would be ambiguous.
               // So we want to force using exactly `uint32_t` or `uint64_t`,
               // whichever matches the size of `uintptr_t`.
               // (The outer cast to `uint` should then be a no-op.)
               using uint =
                   std::conditional_t<sizeof(uintptr_t) <= sizeof(uint32_t),
                                      uint32_t, uint64_t>;
               buf.AppendInt(static_cast<uint>(reinterpret_cast<uintptr_t>(pc)),
                             16);
             }
 
-            stack = aUniqueStacks.AppendFrame(
-                stack, UniqueStacks::FrameKey(buf.get()));
+            stack = uniqueStacks.AppendFrame(stack,
+                                             UniqueStacks::FrameKey(buf.get()));
 
           } else if (e.Get().IsLabel()) {
             numFrames++;
 
             const char* label = e.Get().GetString();
             e.Next();
 
             using FrameFlags = js::ProfilingStackFrame::Flags;
@@ -998,87 +1019,91 @@ ProfilerThreadId ProfileBuffer::StreamSa
 
             Maybe<JS::ProfilingCategoryPair> categoryPair;
             if (e.Has() && e.Get().IsCategoryPair()) {
               categoryPair =
                   Some(JS::ProfilingCategoryPair(uint32_t(e.Get().GetInt())));
               e.Next();
             }
 
-            stack = aUniqueStacks.AppendFrame(
+            stack = uniqueStacks.AppendFrame(
                 stack,
                 UniqueStacks::FrameKey(std::move(frameLabel), relevantForJS,
                                        isBaselineInterp, innerWindowID, line,
                                        column, categoryPair));
 
           } else if (e.Get().IsJitReturnAddr()) {
             numFrames++;
 
             // A JIT frame may expand to multiple frames due to inlining.
             void* pc = e.Get().GetPtr();
             const Maybe<Vector<UniqueStacks::FrameKey>>& frameKeys =
-                aUniqueStacks.LookupFramesForJITAddressFromBufferPos(
+                uniqueStacks.LookupFramesForJITAddressFromBufferPos(
                     pc, entryPosition ? entryPosition : e.CurPos());
             MOZ_RELEASE_ASSERT(
                 frameKeys,
                 "Attempting to stream samples for a buffer range "
                 "for which we don't have JITFrameInfo?");
             for (const UniqueStacks::FrameKey& frameKey : *frameKeys) {
-              stack = aUniqueStacks.AppendFrame(stack, frameKey);
+              stack = uniqueStacks.AppendFrame(stack, frameKey);
             }
 
             e.Next();
 
           } else {
             break;
           }
         }
 
         // Even if this stack is considered empty, it contains the root frame,
         // which needs to be in the JSON output because following "same samples"
         // may refer to it when reusing this sample.mStack.
-        sample.mStack = aUniqueStacks.GetOrAddStackIndex(stack);
-        sample.mStackIsEmpty = (numFrames == 0);
+        const uint32_t stackIndex = uniqueStacks.GetOrAddStackIndex(stack);
+
+        // And store that possibly-empty stack in case it's followed by "same
+        // sample" entries.
+        previousStack = stackIndex;
+        previousStackState = (numFrames == 0)
+                                 ? ThreadStreamingContext::eStackWasEmpty
+                                 : ThreadStreamingContext::eStackWasNotEmpty;
 
-        if (sample.mStackIsEmpty && aRunningTimes.IsEmpty()) {
+        // Even if too old or empty, we did process a sample for this thread id.
+        processedThreadId = threadId;
+
+        // Discard samples that are too old.
+        if (time < aSinceTime) {
+          return;
+        }
+
+        if (numFrames == 0 && runningTimes.IsEmpty()) {
           // It is possible to have empty stacks if native stackwalking is
           // disabled. Skip samples with empty stacks, unless we have useful
           // running times.
           return;
         }
 
-        if (unresponsiveDuration.isSome()) {
-          sample.mResponsiveness = unresponsiveDuration;
-        }
-
-        sample.mRunningTimes = aRunningTimes;
-
-        WriteSample(aWriter, sample);
-
-        processedThreadId = threadId;
+        WriteSample(writer, ProfileSample{stackIndex, time,
+                                          unresponsiveDuration, runningTimes});
       };  // End of `ReadStack(EntryGetter&)` lambda.
 
       if (e.Has() && e.Get().IsTime()) {
-        sample.mTime = e.Get().GetDouble();
+        double time = e.Get().GetDouble();
         e.Next();
-
-        // Ignore samples that are too old.
-        if (sample.mTime < aSinceTime) {
-          continue;
-        }
+        // Note: Even if this sample is too old (before aSinceTime), we still
+        // need to read it, so that its frames are in the tables, in case there
+        // is a same-sample following it that would be after aSinceTime, which
+        // would need these frames to be present.
 
-        ReadStack(e, 0, Nothing{}, RunningTimes{});
+        ReadStack(e, time, 0, Nothing{}, RunningTimes{});
       } else if (e.Has() && e.Get().IsTimeBeforeCompactStack()) {
-        sample.mTime = e.Get().GetDouble();
-
-        // Ignore samples that are too old.
-        if (sample.mTime < aSinceTime) {
-          e.Next();
-          continue;
-        }
+        double time = e.Get().GetDouble();
+        // Note: Even if this sample is too old (before aSinceTime), we still
+        // need to read it, so that its frames are in the tables, in case there
+        // is a same-sample following it that would be after aSinceTime, which
+        // would need these frames to be present.
 
         RunningTimes runningTimes;
         Maybe<double> unresponsiveDuration;
 
         ProfileChunkedBuffer::BlockIterator it = e.Iterator();
         for (;;) {
           ++it;
           if (it.IsAtEnd()) {
@@ -1103,43 +1128,46 @@ ProfilerThreadId ProfileBuffer::StreamSa
           if (kind == ProfileBufferEntry::Kind::CompactStack) {
             ProfileChunkedBuffer tempBuffer(
                 ProfileChunkedBuffer::ThreadSafety::WithoutMutex,
                 mWorkerChunkManager);
             er.ReadIntoObject(tempBuffer);
             tempBuffer.Read([&](ProfileChunkedBuffer::Reader* aReader) {
               MOZ_ASSERT(aReader,
                          "Local ProfileChunkedBuffer cannot be out-of-session");
+              // This is a compact stack, it should only contain one sample.
               EntryGetter stackEntryGetter(*aReader);
-              ReadStack(stackEntryGetter,
+              ReadStack(stackEntryGetter, time,
                         it.CurrentBlockIndex().ConvertToProfileBufferIndex(),
                         unresponsiveDuration, runningTimes);
             });
             mWorkerChunkManager.Reset(tempBuffer.GetAllChunks());
             break;
           }
 
           MOZ_ASSERT(kind >= ProfileBufferEntry::Kind::LEGACY_LIMIT,
                      "There should be no legacy entries between "
                      "TimeBeforeCompactStack and CompactStack");
           er.SetRemainingBytes(0);
         }
 
         e.RestartAfter(it);
       } else if (e.Has() && e.Get().IsTimeBeforeSameSample()) {
-        if (sample.mTime == 0.0) {
+        if (previousStackState == ThreadStreamingContext::eNoStackYet) {
           // We don't have any full sample yet, we cannot duplicate a "previous"
           // one. This should only happen at most once per thread, for the very
           // first sample.
           continue;
         }
 
+        ProfileSample sample;
+
         // Keep the same `mStack` as previously output.
         // Note that it may be empty, this is checked below before writing it.
-        (void)sample.mStack;
+        sample.mStack = previousStack;
 
         sample.mTime = e.Get().GetDouble();
 
         // Ignore samples that are too old.
         if (sample.mTime < aSinceTime) {
           e.Next();
           continue;
         }
@@ -1160,22 +1188,23 @@ ProfilerThreadId ProfileBuffer::StreamSa
 
           // There may be running times before the SameSample.
           if (kind == ProfileBufferEntry::Kind::RunningTimes) {
             er.ReadIntoObject(sample.mRunningTimes);
             continue;
           }
 
           if (kind == ProfileBufferEntry::Kind::SameSample) {
-            if (sample.mStackIsEmpty && sample.mRunningTimes.IsEmpty()) {
+            if (previousStackState == ThreadStreamingContext::eStackWasEmpty &&
+                sample.mRunningTimes.IsEmpty()) {
               // Skip samples with empty stacks, unless we have useful running
               // times.
               break;
             }
-            WriteSample(aWriter, sample);
+            WriteSample(writer, sample);
             break;
           }
 
           MOZ_ASSERT(kind >= ProfileBufferEntry::Kind::LEGACY_LIMIT,
                      "There should be no legacy entries between "
                      "TimeBeforeSameSample and SameSample");
           er.SetRemainingBytes(0);
         }
@@ -1185,16 +1214,44 @@ ProfilerThreadId ProfileBuffer::StreamSa
         ERROR_AND_CONTINUE("expected a Time entry");
       }
     }
 
     return processedThreadId;
   });
 }
 
+ProfilerThreadId ProfileBuffer::StreamSamplesToJSON(
+    SpliceableJSONWriter& aWriter, ProfilerThreadId aThreadId,
+    double aSinceTime, UniqueStacks& aUniqueStacks) const {
+  ThreadStreamingContext::PreviousStackState previousStackState =
+      ThreadStreamingContext::eNoStackYet;
+  uint32_t stack = 0u;
+#ifdef DEBUG
+  int processedCount = 0;
+#endif  // DEBUG
+  return DoStreamSamplesToJSON(
+      [&](ProfilerThreadId aReadThreadId) {
+        Maybe<StreamingParametersForThread> streamingParameters;
+#ifdef DEBUG
+        ++processedCount;
+        MOZ_ASSERT(
+            aThreadId.IsSpecified() ||
+                (processedCount == 1 && aReadThreadId.IsSpecified()),
+            "Unspecified aThreadId should only be used with 1-sample buffer");
+#endif  // DEBUG
+        if (!aThreadId.IsSpecified() || aThreadId == aReadThreadId) {
+          streamingParameters.emplace(aWriter, aUniqueStacks,
+                                      previousStackState, stack);
+        }
+        return streamingParameters;
+      },
+      aSinceTime);
+}
+
 void ProfileBuffer::AddJITInfoForRange(uint64_t aRangeStart,
                                        ProfilerThreadId aThreadId,
                                        JSContext* aContext,
                                        JITFrameInfo& aJITFrameInfo) const {
   // We can only process JitReturnAddr entries if we have a JSContext.
   MOZ_RELEASE_ASSERT(aContext);
 
   aRangeStart = std::max(aRangeStart, BufferRangeStart());