Bug 1592625 - All threads' allocation markers get stored on the main thread; r=gerald
authorGreg Tatum <gtatum@mozilla.com>
Wed, 13 Nov 2019 16:19:11 +0000
changeset 501802 439f620efb11ff3e421a3ffdab1deb6b76b79281
parent 501801 5991133b6ac7b2535495c4f0a1ea3011f08885d1
child 501803 e2b8a34e2aac27d21773cb2d3da1278f47550e4f
push id114172
push userdluca@mozilla.com
push dateTue, 19 Nov 2019 11:31:10 +0000
treeherdermozilla-inbound@b5c5ba07d3db [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersgerald
bugs1592625
milestone72.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1592625 - All threads' allocation markers get stored on the main thread; r=gerald This change makes it so that all allocations (from any thread in a process) are stored on the main thread in the profile. This way it's easy to balance allocations with frees. Memory happens more in a per-process model, than a per-thread model. The front-end can then process the information and display it in more interesting ways. Before allocations and deallocations were only stored on the thread where they were being generated. It was easy to miss deallocations with the old model. Differential Revision: https://phabricator.services.mozilla.com/D51937
tools/profiler/core/memory_hooks.cpp
tools/profiler/core/memory_hooks.h
tools/profiler/core/platform.cpp
tools/profiler/public/GeckoProfiler.h
--- a/tools/profiler/core/memory_hooks.cpp
+++ b/tools/profiler/core/memory_hooks.cpp
@@ -265,16 +265,22 @@ class ThreadIntercept {
   static PROFILER_THREAD_LOCAL(bool) tlsIsBlocked;
 
   // This is a quick flag to check and see if the allocations feature is enabled
   // or disabled.
   static mozilla::Atomic<bool, mozilla::Relaxed,
                          mozilla::recordreplay::Behavior::DontPreserve>
       sAllocationsFeatureEnabled;
 
+  // The markers will be stored on the main thread. Retain the id to the main
+  // thread of this process here.
+  static mozilla::Atomic<int, mozilla::Relaxed,
+                         mozilla::recordreplay::Behavior::DontPreserve>
+      sMainThreadId;
+
   ThreadIntercept() = default;
 
   // Only allow consumers to access this information if they run
   // ThreadIntercept::MaybeGet and ask through the non-static version.
   static bool IsBlocked_() {
     // When the native allocations feature is turned on, memory hooks run on
     // every single allocation. For a subset of these allocations, the stack
     // gets sampled by running profiler_get_backtrace(), which locks the
@@ -311,26 +317,36 @@ class ThreadIntercept {
 
   void Unblock() {
     MOZ_ASSERT(tlsIsBlocked.get());
     tlsIsBlocked.set(false);
   }
 
   bool IsBlocked() const { return ThreadIntercept::IsBlocked_(); }
 
-  static void EnableAllocationFeature() { sAllocationsFeatureEnabled = true; }
+  static void EnableAllocationFeature(int aMainThreadId) {
+    sAllocationsFeatureEnabled = true;
+    sMainThreadId = aMainThreadId;
+  }
 
   static void DisableAllocationFeature() { sAllocationsFeatureEnabled = false; }
+
+  static int MainThreadId() { return sMainThreadId; }
 };
 
 PROFILER_THREAD_LOCAL(bool) ThreadIntercept::tlsIsBlocked;
+
 mozilla::Atomic<bool, mozilla::Relaxed,
                 mozilla::recordreplay::Behavior::DontPreserve>
     ThreadIntercept::sAllocationsFeatureEnabled(false);
 
+mozilla::Atomic<int, mozilla::Relaxed,
+                mozilla::recordreplay::Behavior::DontPreserve>
+    ThreadIntercept::sMainThreadId(0);
+
 // An object of this class must be created (on the stack) before running any
 // code that might allocate.
 class AutoBlockIntercepts {
   ThreadIntercept& mThreadIntercept;
 
  public:
   // Disallow copy and assign.
   AutoBlockIntercepts(const AutoBlockIntercepts&) = delete;
@@ -377,17 +393,18 @@ static void AllocCallback(void* aPtr, si
   // configured probability. It takes into account the byte size so that
   // larger allocations are weighted heavier than smaller allocations.
   MOZ_ASSERT(gBernoulli,
              "gBernoulli must be properly installed for the memory hooks.");
   if (
       // First perform the Bernoulli trial.
       gBernoulli->trial(actualSize) &&
       // Second, attempt to add a marker if the Bernoulli trial passed.
-      profiler_add_native_allocation_marker(static_cast<int64_t>(actualSize))) {
+      profiler_add_native_allocation_marker(ThreadIntercept::MainThreadId(),
+                                            static_cast<int64_t>(actualSize))) {
     MOZ_ASSERT(gAllocationTracker,
                "gAllocationTracker must be properly installed for the memory "
                "hooks.");
     // Only track the memory if the allocation marker was actually added to the
     // profiler.
     gAllocationTracker->AddMemoryAddress(aPtr);
   }
 
@@ -419,17 +436,18 @@ static void FreeCallback(void* aPtr) {
   // Perform a bernoulli trial, which will return true or false based on its
   // configured probability. It takes into account the byte size so that
   // larger allocations are weighted heavier than smaller allocations.
   MOZ_ASSERT(
       gAllocationTracker,
       "gAllocationTracker must be properly installed for the memory hooks.");
   if (gAllocationTracker->RemoveMemoryAddressIfFound(aPtr)) {
     // This size here is negative, indicating a deallocation.
-    profiler_add_native_allocation_marker(signedSize);
+    profiler_add_native_allocation_marker(ThreadIntercept::MainThreadId(),
+                                          signedSize);
   }
 }
 
 }  // namespace profiler
 }  // namespace mozilla
 
 //---------------------------------------------------------------------------
 // malloc/free interception
@@ -554,17 +572,17 @@ void install_memory_hooks() {
 }
 
 // Remove the hooks, but leave the sCounter machinery. Deleting the counter
 // would race with any existing memory hooks that are currently running. Rather
 // than adding overhead here of mutexes it's cheaper for the performance to just
 // leak these values.
 void remove_memory_hooks() { jemalloc_replace_dynamic(nullptr); }
 
-void enable_native_allocations() {
+void enable_native_allocations(int aMainThreadId) {
   // The bloat log tracks allocations and deallocations. This can conflict
   // with the memory hook machinery, as the bloat log creates its own
   // allocations. This means we can re-enter inside the bloat log machinery. At
   // this time, the bloat log does not know about cannot handle the native
   // allocation feature. For now just disable the feature.
   //
   // At the time of this writing, we hit this assertion:
   // IsIdle(oldState) || IsRead(oldState) in Checker::StartReadOp()
@@ -580,17 +598,17 @@ void enable_native_allocations() {
   //    #09: nsBaseHashtable<nsDepCharHashKey, nsAutoPtr<BloatEntry>, ...
   //    #10: GetBloatEntry(char const*, unsigned int)
   //    #11: NS_LogCtor
   //    #12: profiler_get_backtrace()
   //    ...
   if (!PR_GetEnv("XPCOM_MEM_BLOAT_LOG")) {
     EnsureBernoulliIsInstalled();
     EnsureAllocationTrackerIsInstalled();
-    ThreadIntercept::EnableAllocationFeature();
+    ThreadIntercept::EnableAllocationFeature(aMainThreadId);
   }
 }
 
 // This is safe to call even if native allocations hasn't been enabled.
 void disable_native_allocations() {
   ThreadIntercept::DisableAllocationFeature();
   if (gAllocationTracker) {
     gAllocationTracker->Reset();
--- a/tools/profiler/core/memory_hooks.h
+++ b/tools/profiler/core/memory_hooks.h
@@ -8,16 +8,16 @@
 #define memory_hooks_h
 
 #if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY)
 namespace mozilla {
 namespace profiler {
 
 void install_memory_hooks();
 void remove_memory_hooks();
-void enable_native_allocations();
+void enable_native_allocations(int aMainThreadId);
 void disable_native_allocations();
 
 }  // namespace profiler
 }  // namespace mozilla
 #endif
 
 #endif
--- a/tools/profiler/core/platform.cpp
+++ b/tools/profiler/core/platform.cpp
@@ -3975,16 +3975,17 @@ static void locked_profiler_start(PSLock
       // in the Gecko Profiler profile, and shown as a new track under the
       // corresponding Gecko Profiler thread.
       ActivePS::AddBaseProfileThreads(aLock, std::move(baseprofile));
     }
   }
 #endif
 
   // Set up profiling for each registered thread, if appropriate.
+  Maybe<int> mainThreadId;
   int tid = profiler_current_thread_id();
   const Vector<UniquePtr<RegisteredThread>>& registeredThreads =
       CorePS::RegisteredThreads(aLock);
   for (auto& registeredThread : registeredThreads) {
     RefPtr<ThreadInfo> info = registeredThread->Info();
 
     if (ActivePS::ShouldProfileThread(aLock, info)) {
       registeredThread->RacyRegisteredThread().SetIsBeingProfiled(true);
@@ -4000,16 +4001,19 @@ static void locked_profiler_start(PSLock
           registeredThread->PollJSSampling();
         } else if (info->IsMainThread()) {
           // Dispatch a runnable to the main thread to call PollJSSampling(),
           // so that we don't have wait for the next JS interrupt callback in
           // order to start profiling JS.
           TriggerPollJSSamplingOnMainThread();
         }
       }
+      if (info->IsMainThread()) {
+        mainThreadId = Some(info->ThreadId());
+      }
       registeredThread->RacyRegisteredThread().ReinitializeOnResume();
       if (registeredThread->GetJSContext()) {
         profiledThreadData->NotifyReceivedJSContext(0);
       }
     }
   }
 
   // Setup support for pushing/popping labels in mozglue.
@@ -4029,17 +4033,24 @@ static void locked_profiler_start(PSLock
       javaInterval = 10;
     }
     java::GeckoJavaSampler::Start(javaInterval, 1000);
   }
 #endif
 
 #if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY)
   if (ActivePS::FeatureNativeAllocations(aLock)) {
-    mozilla::profiler::enable_native_allocations();
+    if (mainThreadId.isSome()) {
+      mozilla::profiler::enable_native_allocations(mainThreadId.value());
+    } else {
+      NS_WARNING(
+          "The nativeallocations feature is turned on, but the main thread is "
+          "not being profiled. The allocations are only stored on the main "
+          "thread.");
+    }
   }
 #endif
 
   // At the very end, set up RacyFeatures.
   RacyFeatures::SetActive(ActivePS::Features(aLock));
 }
 
 void profiler_start(PowerOfTwo32 aCapacity, double aInterval,
@@ -4638,24 +4649,26 @@ void profiler_add_js_allocation_marker(J
       JsAllocationMarkerPayload(TimeStamp::Now(), std::move(info),
                                 profiler_get_backtrace()));
 }
 
 bool profiler_is_locked_on_current_thread() {
   return gPSMutex.IsLockedOnCurrentThread();
 }
 
-bool profiler_add_native_allocation_marker(const int64_t aSize) {
+bool profiler_add_native_allocation_marker(int aMainThreadId, int64_t aSize) {
   if (!profiler_can_accept_markers()) {
-    return;
+    return false;
   }
   AUTO_PROFILER_STATS(add_marker_with_NativeAllocationMarkerPayload);
-  profiler_add_marker("Native allocation", JS::ProfilingCategoryPair::OTHER,
-                      NativeAllocationMarkerPayload(TimeStamp::Now(), aSize,
-                                                    profiler_get_backtrace()));
+  profiler_add_marker_for_thread(
+      aMainThreadId, JS::ProfilingCategoryPair::OTHER, "Native allocation",
+      MakeUnique<NativeAllocationMarkerPayload>(TimeStamp::Now(), aSize,
+                                                profiler_get_backtrace()));
+  return true;
 }
 
 void profiler_add_network_marker(
     nsIURI* aURI, int32_t aPriority, uint64_t aChannelId, NetworkLoadType aType,
     mozilla::TimeStamp aStart, mozilla::TimeStamp aEnd, int64_t aCount,
     mozilla::net::CacheDisposition aCacheDisposition,
     const mozilla::net::TimingStruct* aTimings, nsIURI* aRedirectURI,
     UniqueProfilerBacktrace aSource) {
--- a/tools/profiler/public/GeckoProfiler.h
+++ b/tools/profiler/public/GeckoProfiler.h
@@ -773,17 +773,17 @@ void profiler_add_marker(const char* aMa
                          JS::ProfilingCategoryPair aCategoryPair,
                          const ProfilerMarkerPayload& aPayload);
 
 void profiler_add_js_marker(const char* aMarkerName);
 void profiler_add_js_allocation_marker(JS::RecordAllocationInfo&& info);
 
 // Returns true or or false depending on whether the marker was actually added
 // or not.
-bool profiler_add_native_allocation_marker(int64_t aSize);
+bool profiler_add_native_allocation_marker(int aMainThreadId, int64_t aSize);
 
 // Returns true if the profiler lock is currently held *on the current thread*.
 // This may be used by re-entrant code that may call profiler functions while
 // the profiler already has the lock (which would deadlock).
 bool profiler_is_locked_on_current_thread();
 
 // Insert a marker in the profile timeline for a specified thread.
 void profiler_add_marker_for_thread(