Bug 1475899: Part 4 - Add memory reporter for committed thread stack sizes on Linux. r=erahm
☠☠ backed out by 30dae375573f ☠ ☠
authorKris Maglione <maglione.k@gmail.com>
Sat, 14 Jul 2018 02:21:30 -0700
changeset 482341 b4394660fde2b05e972c491246570d8f79d8a7c6
parent 482340 e89ebe1f22f28d2b667514cb66d39606136a2f58
child 482342 fbf0e4b12c8e05cabca321d352df32735b8baec9
push id9719
push userffxbld-merge
push dateFri, 24 Aug 2018 17:49:46 +0000
treeherdermozilla-beta@719ec98fba77 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerserahm
bugs1475899
milestone63.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1475899: Part 4 - Add memory reporter for committed thread stack sizes on Linux. r=erahm Later patches add support for Windows. OS-X will need a follow-up. MozReview-Commit-ID: DDd6uir4KzM
xpcom/base/nsMemoryReporterManager.cpp
xpcom/threads/nsThread.cpp
xpcom/threads/nsThread.h
--- a/xpcom/base/nsMemoryReporterManager.cpp
+++ b/xpcom/base/nsMemoryReporterManager.cpp
@@ -24,16 +24,17 @@
 #if defined(XP_UNIX) || defined(MOZ_DMD)
 #include "nsMemoryInfoDumper.h"
 #endif
 #include "nsNetCID.h"
 #include "mozilla/Attributes.h"
 #include "mozilla/MemoryReportingProcess.h"
 #include "mozilla/PodOperations.h"
 #include "mozilla/Preferences.h"
+#include "mozilla/ResultExtensions.h"
 #include "mozilla/Services.h"
 #include "mozilla/Telemetry.h"
 #include "mozilla/UniquePtrExtensions.h"
 #include "mozilla/dom/MemoryReportTypes.h"
 #include "mozilla/dom/ContentParent.h"
 #include "mozilla/gfx/GPUProcessManager.h"
 #include "mozilla/ipc/FileDescriptorUtils.h"
 
@@ -51,16 +52,19 @@ using namespace dom;
 
 #if defined(MOZ_MEMORY)
 #  define HAVE_JEMALLOC_STATS 1
 #  include "mozmemory.h"
 #endif  // MOZ_MEMORY
 
 #if defined(XP_LINUX)
 
+#include "mozilla/MemoryMapping.h"
+#include "nsThread.h"
+
 #include <malloc.h>
 #include <string.h>
 #include <stdlib.h>
 
 static MOZ_MUST_USE nsresult
 GetProcSelfStatmField(int aField, int64_t* aN)
 {
   // There are more than two fields, but we're only interested in the first
@@ -1393,16 +1397,118 @@ public:
       "Memory used by dynamic atom objects and chars (which are stored "
       "at the end of each atom object).");
 
     return NS_OK;
   }
 };
 NS_IMPL_ISUPPORTS(AtomTablesReporter, nsIMemoryReporter)
 
+#ifdef XP_LINUX
+class ThreadStacksReporter final : public nsIMemoryReporter
+{
+  ~ThreadStacksReporter() = default;
+
+public:
+  NS_DECL_ISUPPORTS
+
+  NS_IMETHOD CollectReports(nsIHandleReportCallback* aHandleReport,
+                            nsISupports* aData, bool aAnonymize) override
+  {
+    nsTArray<MemoryMapping> mappings(1024);
+    MOZ_TRY(GetMemoryMappings(mappings));
+
+    // Enumerating over active threads requires holding a lock, so we collect
+    // info on all threads, and then call our reporter callbacks after releasing
+    // the lock.
+    struct ThreadData
+    {
+      nsCString mName;
+      uint32_t mThreadId;
+      size_t mPrivateSize;
+    };
+    AutoTArray<ThreadData, 32> threads;
+
+    for (auto* thread : nsThread::Enumerate()) {
+      if (!thread->StackBase()) {
+        continue;
+      }
+
+      int idx = mappings.BinaryIndexOf(thread->StackBase());
+      if (idx < 0) {
+        continue;
+      }
+      // Referenced() is the combined size of all pages in the region which have
+      // ever been touched, and are therefore consuming memory. For stack
+      // regions, these pages are guaranteed to be un-shared unless we fork
+      // after creating threads (which we don't).
+      size_t privateSize = mappings[idx].Referenced();
+
+      // On Linux, we have to be very careful matching memory regions to thread
+      // stacks.
+      //
+      // To begin with, the kernel only reports VM stats for regions of all
+      // adjacent pages with the same flags, protection, and backing file.
+      // There's no way to get finer-grained usage information for a subset of
+      // those pages.
+      //
+      // Stack segments always have a guard page at the bottom of the stack
+      // (assuming we only support stacks that grow down), so there's no danger
+      // of them being merged with other stack regions. At the top, there's no
+      // protection page, and no way to allocate one without using pthreads
+      // directly and allocating our own stacks. So we get around the problem by
+      // adding an extra VM flag (NOHUGEPAGES) to our stack region, which we
+      // don't expect to be set on any heap regions. But this is not fool-proof.
+      //
+      // A second kink is that different C libraries (and different versions
+      // thereof) report stack base locations and sizes differently with regard
+      // to the guard page. For the libraries that include the guard page in the
+      // stack size base pointer, we need to adjust those values to compensate.
+      // But it's possible that our logic will get out of sync with library
+      // changes, or someone will compile with an unexpected library.
+      //
+      //
+      // The upshot of all of this is that there may be configurations that our
+      // special cases don't cover. And if there are, we want to know about it.
+      // So assert that total size of the memory region we're reporting actually
+      // matches the allocated size of the thread stack.
+      MOZ_ASSERT(mappings[idx].Size() == thread->StackSize(),
+                 "Mapping region size doesn't match stack allocation size");
+
+      threads.AppendElement(ThreadData{
+        nsCString(PR_GetThreadName(thread->GetPRThread())),
+        thread->ThreadId(),
+        // On Linux, it's possible (but unlikely) that our stack region will
+        // have been merged with adjacent heap regions, in which case we'll get
+        // combined size information for both. So we take the minimum of the
+        // reported private size and the requested stack size to avoid the
+        // possible of majorly over-reporting in that case.
+        std::min(privateSize, thread->StackSize()),
+      });
+    }
+
+    for (auto& thread : threads) {
+      nsPrintfCString path("explicit/thread-stacks/%s (tid=%u)",
+                           thread.mName.get(), thread.mThreadId);
+
+      aHandleReport->Callback(
+          EmptyCString(), path,
+          KIND_NONHEAP, UNITS_BYTES,
+          thread.mPrivateSize,
+          NS_LITERAL_CSTRING("The sizes of thread stacks which have been "
+                             "committed to memory."),
+          aData);
+    }
+
+    return NS_OK;
+  }
+};
+NS_IMPL_ISUPPORTS(ThreadStacksReporter, nsIMemoryReporter)
+#endif
+
 #ifdef DEBUG
 
 // Ideally, this would be implemented in BlockingResourceBase.cpp.
 // However, this ends up breaking the linking step of various unit tests due
 // to adding a new dependency to libdmd for a commonly used feature (mutexes)
 // in  DMD  builds. So instead we do it here.
 class DeadlockDetectorReporter final : public nsIMemoryReporter
 {
@@ -1554,16 +1660,20 @@ nsMemoryReporterManager::Init()
 #endif
 
 #ifdef HAVE_SYSTEM_HEAP_REPORTER
   RegisterStrongReporter(new SystemHeapReporter());
 #endif
 
   RegisterStrongReporter(new AtomTablesReporter());
 
+#ifdef XP_LINUX
+  RegisterStrongReporter(new ThreadStacksReporter());
+#endif
+
 #ifdef DEBUG
   RegisterStrongReporter(new DeadlockDetectorReporter());
 #endif
 
 #ifdef MOZ_GECKO_PROFILER
   // We have to register this here rather than in profiler_init() because
   // profiler_init() runs prior to nsMemoryReporterManager's creation.
   RegisterStrongReporter(new GeckoProfilerReporter());
--- a/xpcom/threads/nsThread.cpp
+++ b/xpcom/threads/nsThread.cpp
@@ -2,16 +2,17 @@
 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #include "nsThread.h"
 
 #include "base/message_loop.h"
+#include "base/platform_thread.h"
 
 // Chromium's logging can sometimes leak through...
 #ifdef LOG
 #undef LOG
 #endif
 
 #include "mozilla/ReentrantMonitor.h"
 #include "nsMemoryPressure.h"
@@ -405,16 +406,17 @@ nsThread::Enumerate()
 nsThread::ThreadFunc(void* aArg)
 {
   using mozilla::ipc::BackgroundChild;
 
   ThreadInitData* initData = static_cast<ThreadInitData*>(aArg);
   nsThread* self = initData->thread;  // strong reference
 
   self->mThread = PR_GetCurrentThread();
+  self->mThreadId = uint32_t(PlatformThread::CurrentId());
   self->mVirtualThread = GetCurrentVirtualThread();
   self->mEventTarget->SetCurrentThread();
   SetupCurrentThreadForChaosMode();
 
   if (!initData->name.IsEmpty()) {
     NS_SetCurrentThreadName(initData->name.BeginReading());
   }
 
--- a/xpcom/threads/nsThread.h
+++ b/xpcom/threads/nsThread.h
@@ -69,16 +69,18 @@ public:
   PRThread* GetPRThread()
   {
     return mThread;
   }
 
   const void* StackBase() const { return mStackBase; }
   size_t StackSize() const { return mStackSize; }
 
+  uint32_t ThreadId() const { return mThreadId; }
+
   // If this flag is true, then the nsThread was created using
   // nsIThreadManager::NewThread.
   bool ShutdownRequired()
   {
     return mShutdownRequired;
   }
 
   // Clear the observer list.
@@ -172,16 +174,17 @@ protected:
   RefPtr<mozilla::ThreadEventTarget> mEventTarget;
 
   mozilla::CycleCollectedJSContext* mScriptObserver;
 
   // Only accessed on the target thread.
   nsAutoTObserverArray<NotNull<nsCOMPtr<nsIThreadObserver>>, 2> mEventObservers;
 
   int32_t   mPriority;
+  uint32_t  mThreadId;
   PRThread* mThread;
   uint32_t  mNestedEventLoopDepth;
   uint32_t  mStackSize;
   void*     mStackBase = nullptr;
 
   // The shutdown context for ourselves.
   struct nsThreadShutdownContext* mShutdownContext;
   // The shutdown contexts for any other threads we've asked to shut down.