Bug 1582741 - Balance the frees to the allocations in native allocation tracking; r=njn,gerald
authorGreg Tatum <gtatum@mozilla.com>
Wed, 13 Nov 2019 16:17:38 +0000
changeset 501816 b37cf462c34198a34ffe38ec1b9fc0ca56ab9a25
parent 501815 b1c0de5b727cc3bb5c03866793f7e2669c65ce3c
child 501817 5991133b6ac7b2535495c4f0a1ea3011f08885d1
push id100439
push usergtatum@mozilla.com
push dateWed, 13 Nov 2019 20:05:58 +0000
treeherderautoland@e2b8a34e2aac [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersnjn, gerald
bugs1582741
milestone72.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1582741 - Balance the frees to the allocations in native allocation tracking; r=njn,gerald This patch creates a HashSet that tracks the allocations that are tracked by the profiler. This way, we only collect markers for deallocations that have a matching allocation. A following commit makes it so that all of the markers are collected on the main thread, but for now this is still done on a per-thread basis. Differential Revision: https://phabricator.services.mozilla.com/D51935
tools/profiler/core/memory_hooks.cpp
tools/profiler/core/platform.cpp
tools/profiler/public/GeckoProfiler.h
--- a/tools/profiler/core/memory_hooks.cpp
+++ b/tools/profiler/core/memory_hooks.cpp
@@ -9,16 +9,17 @@
 #include "nscore.h"
 
 #include "mozilla/Assertions.h"
 #include "mozilla/Atomics.h"
 #include "mozilla/FastBernoulliTrial.h"
 #include "mozilla/IntegerPrintfMacros.h"
 #include "mozilla/JSONWriter.h"
 #include "mozilla/MemoryReporting.h"
+#include "mozilla/PlatformMutex.h"
 #include "mozilla/ProfilerCounts.h"
 #include "mozilla/ThreadLocal.h"
 
 #include "GeckoProfiler.h"
 #include "prenv.h"
 #include "replace_malloc.h"
 
 #include <ctype.h>
@@ -89,16 +90,165 @@ static void EnsureBernoulliIsInstalled()
   if (!gBernoulli) {
     // This is only installed once. See the gBernoulli definition for more
     // information.
     gBernoulli =
         new FastBernoulliTrial(0.0003, 0x8e26eeee166bc8ca, 0x56820f304a9c9ae0);
   }
 }
 
+// This class provides infallible allocations (they abort on OOM) like
+// mozalloc's InfallibleAllocPolicy, except that memory hooks are bypassed. This
+// policy is used by the HashSet.
+class InfallibleAllocWithoutHooksPolicy {
+  static void ExitOnFailure(const void* aP) {
+    if (!aP) {
+      MOZ_CRASH("Profiler memory hooks out of memory; aborting");
+    }
+  }
+
+ public:
+  template <typename T>
+  static T* maybe_pod_malloc(size_t aNumElems) {
+    if (aNumElems & mozilla::tl::MulOverflowMask<sizeof(T)>::value) {
+      return nullptr;
+    }
+    return (T*)gMallocTable.malloc(aNumElems * sizeof(T));
+  }
+
+  template <typename T>
+  static T* maybe_pod_calloc(size_t aNumElems) {
+    return (T*)gMallocTable.calloc(aNumElems, sizeof(T));
+  }
+
+  template <typename T>
+  static T* maybe_pod_realloc(T* aPtr, size_t aOldSize, size_t aNewSize) {
+    if (aNewSize & mozilla::tl::MulOverflowMask<sizeof(T)>::value) {
+      return nullptr;
+    }
+    return (T*)gMallocTable.realloc(aPtr, aNewSize * sizeof(T));
+  }
+
+  template <typename T>
+  static T* pod_malloc(size_t aNumElems) {
+    T* p = maybe_pod_malloc<T>(aNumElems);
+    ExitOnFailure(p);
+    return p;
+  }
+
+  template <typename T>
+  static T* pod_calloc(size_t aNumElems) {
+    T* p = maybe_pod_calloc<T>(aNumElems);
+    ExitOnFailure(p);
+    return p;
+  }
+
+  template <typename T>
+  static T* pod_realloc(T* aPtr, size_t aOldSize, size_t aNewSize) {
+    T* p = maybe_pod_realloc(aPtr, aOldSize, aNewSize);
+    ExitOnFailure(p);
+    return p;
+  }
+
+  template <typename T>
+  static void free_(T* aPtr, size_t aSize = 0) {
+    gMallocTable.free(aPtr);
+  }
+
+  static void reportAllocOverflow() { ExitOnFailure(nullptr); }
+  bool checkSimulatedOOM() const { return true; }
+};
+
+// We can't use mozilla::Mutex because it causes re-entry into the memory hooks.
+// Define a custom implementation here.
+class Mutex : private ::mozilla::detail::MutexImpl {
+ public:
+  Mutex()
+      : ::mozilla::detail::MutexImpl(
+            ::mozilla::recordreplay::Behavior::DontPreserve) {}
+
+  void Lock() { ::mozilla::detail::MutexImpl::lock(); }
+  void Unlock() { ::mozilla::detail::MutexImpl::unlock(); }
+};
+
+class MutexAutoLock {
+  MutexAutoLock(const MutexAutoLock&) = delete;
+  void operator=(const MutexAutoLock&) = delete;
+
+  Mutex& mMutex;
+
+ public:
+  explicit MutexAutoLock(Mutex& aMutex) : mMutex(aMutex) { mMutex.Lock(); }
+  ~MutexAutoLock() { mMutex.Unlock(); }
+};
+
+//---------------------------------------------------------------------------
+// Tracked allocations
+//---------------------------------------------------------------------------
+
+// The allocation tracker is shared between multiple threads, and is the
+// coordinator for knowing when allocations have been tracked. The mutable
+// internal state is protected by a mutex, and managed by the methods.
+//
+// The tracker knows about all the allocations that we have added to the
+// profiler. This way, whenever any given piece of memory is freed, we can see
+// if it was previously tracked, and we can track its deallocation.
+
+class AllocationTracker {
+  // This type tracks all of the allocations that we have captured. This way, we
+  // can see if a deallocation is inside of this set. We want to provide a
+  // balanced view into the allocations and deallocations.
+  typedef mozilla::HashSet<const void*, mozilla::DefaultHasher<const void*>,
+                           InfallibleAllocWithoutHooksPolicy>
+      AllocationSet;
+
+ public:
+  AllocationTracker() : mAllocations(), mMutex() {}
+
+  void AddMemoryAddress(const void* memoryAddress) {
+    MutexAutoLock lock(mMutex);
+    if (!mAllocations.put(memoryAddress)) {
+      MOZ_CRASH("Out of memory while tracking native allocations.");
+    };
+  }
+
+  void Reset() {
+    MutexAutoLock lock(mMutex);
+    mAllocations.clearAndCompact();
+  }
+
+  // Returns true when the memory address is found and removed, otherwise that
+  // memory address is not being tracked and it returns false.
+  bool RemoveMemoryAddressIfFound(const void* memoryAddress) {
+    MutexAutoLock lock(mMutex);
+
+    auto ptr = mAllocations.lookup(memoryAddress);
+    if (ptr) {
+      // The memory was present. It no longer needs to be tracked.
+      mAllocations.remove(ptr);
+      return true;
+    }
+
+    return false;
+  }
+
+ private:
+  AllocationSet mAllocations;
+  Mutex mMutex;
+};
+
+static AllocationTracker* gAllocationTracker;
+
+static void EnsureAllocationTrackerIsInstalled() {
+  if (!gAllocationTracker) {
+    // This is only installed once.
+    gAllocationTracker = new AllocationTracker();
+  }
+}
+
 //---------------------------------------------------------------------------
 // Per-thread blocking of intercepts
 //---------------------------------------------------------------------------
 
 // On MacOS, and Linux the first __thread/thread_local access calls malloc,
 // which leads to an infinite loop. So we use pthread-based TLS instead, which
 // somehow doesn't have this problem.
 #if !defined(XP_DARWIN) && !defined(XP_LINUX)
@@ -223,18 +373,27 @@ static void AllocCallback(void* aPtr, si
   // hooks from recursing on any new allocations coming in.
   AutoBlockIntercepts block(threadIntercept.ref());
 
   // Perform a bernoulli trial, which will return true or false based on its
   // configured probability. It takes into account the byte size so that
   // larger allocations are weighted heavier than smaller allocations.
   MOZ_ASSERT(gBernoulli,
              "gBernoulli must be properly installed for the memory hooks.");
-  if (gBernoulli->trial(actualSize)) {
-    profiler_add_native_allocation_marker((int64_t)actualSize);
+  if (
+      // First perform the Bernoulli trial.
+      gBernoulli->trial(actualSize) &&
+      // Second, attempt to add a marker if the Bernoulli trial passed.
+      profiler_add_native_allocation_marker(static_cast<int64_t>(actualSize))) {
+    MOZ_ASSERT(gAllocationTracker,
+               "gAllocationTracker must be properly installed for the memory "
+               "hooks.");
+    // Only track the memory if the allocation marker was actually added to the
+    // profiler.
+    gAllocationTracker->AddMemoryAddress(aPtr);
   }
 
   // We're ignoring aReqSize here
 }
 
 static void FreeCallback(void* aPtr) {
   if (!aPtr) {
     return;
@@ -255,19 +414,21 @@ static void FreeCallback(void* aPtr) {
 
   // The next part of the function requires allocations, so block the memory
   // hooks from recursing on any new allocations coming in.
   AutoBlockIntercepts block(threadIntercept.ref());
 
   // Perform a bernoulli trial, which will return true or false based on its
   // configured probability. It takes into account the byte size so that
   // larger allocations are weighted heavier than smaller allocations.
-  MOZ_ASSERT(gBernoulli,
-             "gBernoulli must be properly installed for the memory hooks.");
-  if (gBernoulli->trial(unsignedSize)) {
+  MOZ_ASSERT(
+      gAllocationTracker,
+      "gAllocationTracker must be properly installed for the memory hooks.");
+  if (gAllocationTracker->RemoveMemoryAddressIfFound(aPtr)) {
+    // This size here is negative, indicating a deallocation.
     profiler_add_native_allocation_marker(signedSize);
   }
 }
 
 }  // namespace profiler
 }  // namespace mozilla
 
 //---------------------------------------------------------------------------
@@ -394,17 +555,17 @@ void install_memory_hooks() {
 
 // Remove the hooks, but leave the sCounter machinery. Deleting the counter
 // would race with any existing memory hooks that are currently running. Rather
 // than adding overhead here of mutexes it's cheaper for the performance to just
 // leak these values.
 void remove_memory_hooks() { jemalloc_replace_dynamic(nullptr); }
 
 void enable_native_allocations() {
-  // The bloat log tracks allocations and de-allocations. This can conflict
+  // The bloat log tracks allocations and deallocations. This can conflict
   // with the memory hook machinery, as the bloat log creates its own
   // allocations. This means we can re-enter inside the bloat log machinery. At
   // this time, the bloat log does not know about cannot handle the native
   // allocation feature. For now just disable the feature.
   //
   // At the time of this writing, we hit this assertion:
   // IsIdle(oldState) || IsRead(oldState) in Checker::StartReadOp()
   //
@@ -418,19 +579,23 @@ void enable_native_allocations() {
   //    #08: PLDHashTable::Add(void const*, std::nothrow_t const&)
   //    #09: nsBaseHashtable<nsDepCharHashKey, nsAutoPtr<BloatEntry>, ...
   //    #10: GetBloatEntry(char const*, unsigned int)
   //    #11: NS_LogCtor
   //    #12: profiler_get_backtrace()
   //    ...
   if (!PR_GetEnv("XPCOM_MEM_BLOAT_LOG")) {
     EnsureBernoulliIsInstalled();
+    EnsureAllocationTrackerIsInstalled();
     ThreadIntercept::EnableAllocationFeature();
   }
 }
 
 // This is safe to call even if native allocations hasn't been enabled.
 void disable_native_allocations() {
   ThreadIntercept::DisableAllocationFeature();
+  if (gAllocationTracker) {
+    gAllocationTracker->Reset();
+  }
 }
 
 }  // namespace profiler
 }  // namespace mozilla
--- a/tools/profiler/core/platform.cpp
+++ b/tools/profiler/core/platform.cpp
@@ -4638,17 +4638,17 @@ void profiler_add_js_allocation_marker(J
       JsAllocationMarkerPayload(TimeStamp::Now(), std::move(info),
                                 profiler_get_backtrace()));
 }
 
 bool profiler_is_locked_on_current_thread() {
   return gPSMutex.IsLockedOnCurrentThread();
 }
 
-void profiler_add_native_allocation_marker(const int64_t aSize) {
+bool profiler_add_native_allocation_marker(const int64_t aSize) {
   if (!profiler_can_accept_markers()) {
     return;
   }
   AUTO_PROFILER_STATS(add_marker_with_NativeAllocationMarkerPayload);
   profiler_add_marker("Native allocation", JS::ProfilingCategoryPair::OTHER,
                       NativeAllocationMarkerPayload(TimeStamp::Now(), aSize,
                                                     profiler_get_backtrace()));
 }
--- a/tools/profiler/public/GeckoProfiler.h
+++ b/tools/profiler/public/GeckoProfiler.h
@@ -770,17 +770,20 @@ void profiler_add_marker(const char* aMa
     } while (false)
 
 void profiler_add_marker(const char* aMarkerName,
                          JS::ProfilingCategoryPair aCategoryPair,
                          const ProfilerMarkerPayload& aPayload);
 
 void profiler_add_js_marker(const char* aMarkerName);
 void profiler_add_js_allocation_marker(JS::RecordAllocationInfo&& info);
-void profiler_add_native_allocation_marker(int64_t aSize);
+
+// Returns true or or false depending on whether the marker was actually added
+// or not.
+bool profiler_add_native_allocation_marker(int64_t aSize);
 
 // Returns true if the profiler lock is currently held *on the current thread*.
 // This may be used by re-entrant code that may call profiler functions while
 // the profiler already has the lock (which would deadlock).
 bool profiler_is_locked_on_current_thread();
 
 // Insert a marker in the profile timeline for a specified thread.
 void profiler_add_marker_for_thread(