Bug 1476757 - Grow and shrink the profiler buffer dynamically so as to not waste memory when a large buffer size limit is picked. r=njn
authorMarkus Stange <mstange@themasta.com>
Mon, 22 Oct 2018 15:52:17 +0000
changeset 490764 3428510869a9b347639d3f79506d7225be92c191
parent 490763 ef82ba4b7f22f673870bc71ad4564bf0ef228b06
child 490765 9b894301f1fd348275e16569334aef00a336a751
push id247
push userfmarier@mozilla.com
push dateSat, 27 Oct 2018 01:06:44 +0000
reviewersnjn
bugs1476757
milestone64.0a1
Bug 1476757 - Grow and shrink the profiler buffer dynamically so as to not waste memory when a large buffer size limit is picked. r=njn Depends on D6264 Differential Revision: https://phabricator.services.mozilla.com/D6265
tools/profiler/core/platform.cpp
--- a/tools/profiler/core/platform.cpp
+++ b/tools/profiler/core/platform.cpp
@@ -39,16 +39,17 @@
 #include "mozilla/UniquePtr.h"
 #include "mozilla/Vector.h"
 #include "GeckoProfiler.h"
 #include "VTuneProfiler.h"
 #include "GeckoProfilerReporter.h"
 #include "ProfilerIOInterposeObserver.h"
 #include "mozilla/AutoProfilerLabel.h"
 #include "mozilla/ExtensionPolicyService.h"
+#include "mozilla/MathAlgorithms.h"
 #include "mozilla/Scheduler.h"
 #include "mozilla/StackWalk.h"
 #include "mozilla/StaticPtr.h"
 #include "mozilla/SystemGroup.h"
 #include "mozilla/ThreadLocal.h"
 #include "mozilla/TimeStamp.h"
 #include "mozilla/Tuple.h"
 #include "mozilla/extensions/WebExtensionPolicy.h"
@@ -338,16 +339,18 @@ private:
 #ifdef USE_LUL_STACKWALK
   // LUL's state. Null prior to the first activation, non-null thereafter.
   UniquePtr<lul::LUL> mLul;
 #endif
 };
 
 CorePS* CorePS::sInstance = nullptr;
 
+static const uint32_t kInitialProfileBufferCapacity = 4096;
+
 class SamplerThread;
 
 static SamplerThread*
 NewSamplerThread(PSLockRef aLock, uint32_t aGeneration, double aInterval);
 
 struct LiveProfiledThreadData
 {
   RegisteredThread* mRegisteredThread;
@@ -385,17 +388,17 @@ private:
   }
 
   ActivePS(PSLockRef aLock, uint32_t aCapacity, double aInterval,
            uint32_t aFeatures, const char** aFilters, uint32_t aFilterCount)
     : mGeneration(sNextGeneration++)
     , mCapacity(aCapacity)
     , mInterval(aInterval)
     , mFeatures(AdjustFeatures(aFeatures, aFilterCount))
-    , mBuffer(MakeUnique<ProfileBuffer>(aCapacity))
+    , mBuffer(MakeUnique<ProfileBuffer>(std::min(kInitialProfileBufferCapacity, aCapacity)))
       // The new sampler thread doesn't start sampling immediately because the
       // main loop within Run() is blocked until this function's caller unlocks
       // gPSMutex.
     , mSamplerThread(NewSamplerThread(aLock, mGeneration, aInterval))
     , mInterposeObserver(ProfilerFeature::HasMainThreadIO(aFeatures)
                          ? new ProfilerIOInterposeObserver()
                          : nullptr)
 #undef HAS_FEATURE
@@ -664,16 +667,59 @@ public:
       [bufferRangeStart](UniquePtr<ProfiledThreadData>& aProfiledThreadData) {
         Maybe<uint64_t> bufferPosition =
           aProfiledThreadData->BufferPositionWhenUnregistered();
         MOZ_RELEASE_ASSERT(bufferPosition, "should have unregistered this thread");
         return *bufferPosition < bufferRangeStart;
       });
   }
 
+  static void EnsureAdequateBufferCapacity(PSLockRef aLockRef)
+  {
+    ProfileBuffer& buffer = Buffer(aLockRef);
+    uint32_t maxCapacity = RoundUpPow2(Capacity(aLockRef));
+    uint32_t minCapacity = std::min(kInitialProfileBufferCapacity, maxCapacity);
+    uint32_t usedSize = buffer.Length();
+    uint32_t currentCapacity = buffer.mCapacity;
+    // The usedSize should always be between 25% and 90% of the capacity.
+    // If the usedSize exceeds 90% of the capacity, enlarge the capacity.
+    // Enlarging the capacity will at least double it, so then the usedSize
+    // will be a bit above 45% of the new capacity.
+    // If the usedSize goes below 25% of the capacity, shrink the capacity.
+    // Shrinking the capacity will at least halve it, so then the usedSize will
+    // be at most 70% of the new capacity.
+    uint32_t minDesiredCapacity = usedSize * 100 / 90;
+    uint32_t maxDesiredCapacity = usedSize * 100 / 25;
+
+    // Clamp the desired capacities to the hard minimum and maximum.
+    minDesiredCapacity = Clamp(minDesiredCapacity, minCapacity, maxCapacity);
+    maxDesiredCapacity = Clamp(maxDesiredCapacity, minCapacity, maxCapacity);
+
+    // Now find newCapacity such that newCapacity is a power of two and such that
+    // minDesiredCapacity <= newCapacity <= maxDesiredCapacity.
+    // Such a value exists, because either maxDesiredCapacity >= 2 * minDesiredCapacity
+    // (if no clamping happened above), or at least one of them is a power of two (because
+    // both minCapacity and maxCapacity are powers of two).
+    // Usually multiple such values exist; in that case we want to find the one that is
+    // closer to the current capacity.
+    uint32_t newCapacity = currentCapacity;
+    while (newCapacity < minDesiredCapacity) {
+      // Enlarge the buffer.
+      newCapacity *= 2;
+    }
+    while (newCapacity > maxDesiredCapacity) {
+      // Shrink the buffer.
+      newCapacity /= 2;
+    }
+
+    MOZ_RELEASE_ASSERT(newCapacity >= minCapacity);
+    MOZ_RELEASE_ASSERT(newCapacity <= maxCapacity);
+    buffer.SetCapacityPow2(newCapacity);
+  }
+
 private:
   // The singleton instance.
   static ActivePS* sInstance;
 
   // We need to track activity generations. If we didn't we could have the
   // following scenario.
   //
   // - profiler_stop() locks gPSMutex, de-instantiates ActivePS, unlocks
@@ -2259,16 +2305,24 @@ SamplerThread::Run()
         // The LUL unwind object accumulates frame statistics. Periodically we
         // should poke it to give it a chance to print those statistics.  This
         // involves doing I/O (fprintf, __android_log_print, etc.) and so
         // can't safely be done from the critical section inside
         // SuspendAndSampleAndResumeThread, which is why it is done here.
         CorePS::Lul(lock)->MaybeShowStats();
 #endif
       }
+
+      // This needs to be done outside the profiler's "critical section".
+      // The buffer entries are added just after stackwalking *while the target
+      // thread is suspended*, and during that time we can't allocate, so we
+      // can't grow the buffer as we add the entries. Instead, we need to grow the
+      // buffer ahead of time, by using some heuristic to predict whether growing
+      // the buffer is necessary.
+      ActivePS::EnsureAdequateBufferCapacity(lock);
     }
     // gPSMutex is not held after this point.
 
     // Calculate how long a sleep to request.  After the sleep, measure how
     // long we actually slept and take the difference into account when
     // calculating the sleep interval for the next iteration.  This is an
     // attempt to keep "to schedule" in the presence of inaccuracy of the
     // actual sleep intervals.