Bug 1094552 (part 6) - DMD: add support for cumulative heap profiling. r=mccr8.
authorNicholas Nethercote <nnethercote@mozilla.com>
Thu, 30 Oct 2014 20:22:47 -0700
changeset 244771 9fae0441be6665f1bc2fcca42adca87dfd990616
parent 244770 c5229ba7f507cbde260d189782eda1120edb4353
child 244772 8312755becd6ad3c938bb0029a1bb52b05d3d727
push id4489
push userraliiev@mozilla.com
push dateMon, 23 Feb 2015 15:17:55 +0000
treeherdermozilla-beta@fd7c3dc24146 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersmccr8
bugs1094552
milestone37.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1094552 (part 6) - DMD: add support for cumulative heap profiling. r=mccr8. By adding a new "cumulative" mode.
memory/replace/dmd/DMD.cpp
memory/replace/dmd/DMD.h
memory/replace/dmd/dmd.py
memory/replace/dmd/test/SmokeDMD.cpp
memory/replace/dmd/test/full-empty-cumulative-expected.txt
memory/replace/dmd/test/full-empty-dark-matter-expected.txt
memory/replace/dmd/test/full-empty-live-expected.txt
memory/replace/dmd/test/full-sampled-live-expected.txt
memory/replace/dmd/test/full-unsampled1-dark-matter-expected.txt
memory/replace/dmd/test/full-unsampled1-live-expected.txt
memory/replace/dmd/test/full-unsampled2-cumulative-expected.txt
memory/replace/dmd/test/full-unsampled2-dark-matter-expected.txt
memory/replace/dmd/test/script-diff-live-expected.txt
memory/replace/dmd/test/script-diff-live1.json
memory/replace/dmd/test/script-diff-live2.json
memory/replace/dmd/test/test_dmd.js
memory/replace/dmd/test/xpcshell.ini
python/mozbuild/mozbuild/mach_commands.py
xpcom/base/nsMemoryReporterManager.cpp
--- a/memory/replace/dmd/DMD.cpp
+++ b/memory/replace/dmd/DMD.cpp
@@ -33,16 +33,17 @@
 #include "js/Vector.h"
 
 #include "mozilla/Assertions.h"
 #include "mozilla/HashFunctions.h"
 #include "mozilla/IntegerPrintfMacros.h"
 #include "mozilla/JSONWriter.h"
 #include "mozilla/Likely.h"
 #include "mozilla/MemoryReporting.h"
+#include "mozilla/SegmentedVector.h"
 
 // CodeAddressService is defined entirely in the header, so this does not make
 // DMD depend on XPCOM's object file.
 #include "CodeAddressService.h"
 
 // replace_malloc.h needs to be included before replace_malloc_bridge.h,
 // which DMD.h includes, so DMD.h needs to be included after replace_malloc.h.
 // MOZ_REPLACE_ONLY_MEMALIGN saves us from having to define
@@ -331,17 +332,22 @@ class Options
     // For each live block, this mode outputs: size (usable and slop),
     // allocation stack, and whether it's sampled. This mode is good for live
     // heap profiling.
     Live,
 
     // Like "Live", but for each live block it also outputs: zero or more
     // report stacks. This mode is good for identifying where memory reporters
     // should be added. This is the default mode.
-    DarkMatter
+    DarkMatter,
+
+    // Like "Live", but also outputs the same data for dead blocks. This mode
+    // does cumulative heap profiling, which is good for identifying where large
+    // amounts of short-lived allocations occur.
+    Cumulative
   };
 
   char* mDMDEnvVar;   // a saved copy, for later printing
 
   Mode mMode;
   NumOption<size_t> mSampleBelowSize;
   NumOption<uint32_t> mMaxFrames;
   bool mShowDumpStats;
@@ -352,16 +358,17 @@ class Options
                       long aMin, long aMax, long* aValue);
   static bool GetBool(const char* aArg, const char* aOptionName, bool* aValue);
 
 public:
   explicit Options(const char* aDMDEnvVar);
 
   bool IsLiveMode()       const { return mMode == Live; }
   bool IsDarkMatterMode() const { return mMode == DarkMatter; }
+  bool IsCumulativeMode() const { return mMode == Cumulative; }
 
   const char* DMDEnvVar() const { return mDMDEnvVar; }
 
   size_t SampleBelowSize() const { return mSampleBelowSize.mActual; }
   size_t MaxFrames()       const { return mMaxFrames.mActual; }
   size_t ShowDumpStats()   const { return mShowDumpStats; }
 };
 
@@ -433,20 +440,20 @@ public:
     mIsLocked = false;
     MutexBase::Unlock();
   }
 
   bool IsLocked() { return mIsLocked; }
 };
 
 // This lock must be held while manipulating global state such as
-// gStackTraceTable, gLiveBlockTable, etc. Note that gOptions is *not*
-// protected by this lock because it is only written to by Options(), which is
-// only invoked at start-up and in ResetEverything(), which is only used by
-// SmokeDMD.cpp.
+// gStackTraceTable, gLiveBlockTable, gDeadBlockList. Note that gOptions is
+// *not* protected by this lock because it is only written to by Options(),
+// which is only invoked at start-up and in ResetEverything(), which is only
+// used by SmokeDMD.cpp.
 static Mutex* gStateLock = nullptr;
 
 class AutoLockState
 {
   DISALLOW_COPY_AND_ASSIGN(AutoLockState);
 
 public:
   AutoLockState()  { gStateLock->Lock(); }
@@ -845,20 +852,20 @@ class LiveBlock
   // to be |mutable|.
   //
   // Only used in DarkMatter mode.
   mutable TaggedPtr<const StackTrace*> mReportStackTrace_mReportedOnAlloc[2];
 
 public:
   LiveBlock(const void* aPtr, size_t aReqSize,
             const StackTrace* aAllocStackTrace, bool aIsSampled)
-    : mPtr(aPtr),
-      mReqSize(aReqSize),
-      mAllocStackTrace_mIsSampled(aAllocStackTrace, aIsSampled),
-      mReportStackTrace_mReportedOnAlloc()     // all fields get zeroed
+    : mPtr(aPtr)
+    , mReqSize(aReqSize)
+    , mAllocStackTrace_mIsSampled(aAllocStackTrace, aIsSampled)
+    , mReportStackTrace_mReportedOnAlloc()     // all fields get zeroed
   {
     MOZ_ASSERT(aAllocStackTrace);
   }
 
   const void* Address() const { return mPtr; }
 
   size_t ReqSize() const { return mReqSize; }
 
@@ -977,30 +984,88 @@ public:
   {
     return aB.mPtr == aPtr;
   }
 };
 
 typedef js::HashSet<LiveBlock, LiveBlock, InfallibleAllocPolicy> LiveBlockTable;
 static LiveBlockTable* gLiveBlockTable = nullptr;
 
+// A freed heap block.
+class DeadBlock
+{
+  const size_t mReqSize;    // size requested
+  const size_t mSlopSize;   // slop above size requested
+
+  // Ptr: |mAllocStackTrace| - stack trace where this block was allocated.
+  // Tag bit 0: |mIsSampled| - was this block sampled? (if so, slop == 0).
+  TaggedPtr<const StackTrace* const>
+    mAllocStackTrace_mIsSampled;
+
+public:
+  DeadBlock()
+    : mReqSize(0)
+    , mSlopSize(0)
+    , mAllocStackTrace_mIsSampled(nullptr, 0)
+  {}
+
+  explicit DeadBlock(const LiveBlock& aLb)
+    : mReqSize(aLb.ReqSize())
+    , mSlopSize(aLb.SlopSize())
+    , mAllocStackTrace_mIsSampled(aLb.AllocStackTrace(), aLb.IsSampled())
+  {
+    MOZ_ASSERT(AllocStackTrace());
+    MOZ_ASSERT_IF(IsSampled(), SlopSize() == 0);
+  }
+
+  ~DeadBlock() {}
+
+  size_t ReqSize()    const { return mReqSize; }
+  size_t SlopSize()   const { return mSlopSize; }
+  size_t UsableSize() const { return mReqSize + mSlopSize; }
+
+  bool IsSampled() const
+  {
+    return mAllocStackTrace_mIsSampled.Tag();
+  }
+
+  const StackTrace* AllocStackTrace() const
+  {
+    return mAllocStackTrace_mIsSampled.Ptr();
+  }
+
+  void AddStackTracesToTable(StackTraceSet& aStackTraces) const
+  {
+    aStackTraces.put(AllocStackTrace());  // never null
+  }
+};
+
+static const size_t kDeadBlockListSegmentSize = 16384;
+typedef SegmentedVector<DeadBlock, kDeadBlockListSegmentSize,
+                        InfallibleAllocPolicy> DeadBlockList;
+static DeadBlockList* gDeadBlockList = nullptr;
+
 // Add a pointer to each live stack trace into the given StackTraceSet.  (A
 // stack trace is live if it's used by one of the live blocks.)
 static void
 GatherUsedStackTraces(StackTraceSet& aStackTraces)
 {
   MOZ_ASSERT(gStateLock->IsLocked());
   MOZ_ASSERT(Thread::Fetch()->InterceptsAreBlocked());
 
   aStackTraces.finish();
   aStackTraces.init(512);
 
   for (auto r = gLiveBlockTable->all(); !r.empty(); r.popFront()) {
     r.front().AddStackTracesToTable(aStackTraces);
   }
+
+  for (auto iter = gDeadBlockList->Iter(); !iter.Done(); iter.Next()) {
+    iter.Get().AddStackTracesToTable(aStackTraces);
+  }
 }
 
 // Delete stack traces that we aren't using, and compact our hashtable.
 static void
 GCStackTraces()
 {
   MOZ_ASSERT(gStateLock->IsLocked());
   MOZ_ASSERT(Thread::Fetch()->InterceptsAreBlocked());
@@ -1058,26 +1123,36 @@ AllocCallback(void* aPtr, size_t aReqSiz
   } else {
     // If this block size is larger than the sample size, record it exactly.
     LiveBlock b(aPtr, aReqSize, StackTrace::Get(aT), /* isSampled */ false);
     (void)gLiveBlockTable->putNew(aPtr, b);
   }
 }
 
 static void
-FreeCallback(void* aPtr, Thread* aT)
+FreeCallback(void* aPtr, Thread* aT, DeadBlock* aDeadBlock)
 {
   if (!aPtr) {
     return;
   }
 
   AutoLockState lock;
   AutoBlockIntercepts block(aT);
 
-  gLiveBlockTable->remove(aPtr);
+  if (LiveBlockTable::Ptr lb = gLiveBlockTable->lookup(aPtr)) {
+    if (gOptions->IsCumulativeMode()) {
+      // Copy it out so it can be added to the dead block list later.
+      new (aDeadBlock) DeadBlock(*lb);
+    }
+    gLiveBlockTable->remove(lb);
+  } else {
+    // We have no record of the block. Do nothing. Either:
+    // - We're sampling and we skipped this block. This is likely.
+    // - It's a bogus pointer.
+  }
 
   if (gStackTraceTable->count() > gGCStackTraceTableWhenSizeExceeds) {
     GCStackTraces();
   }
 }
 
 //---------------------------------------------------------------------------
 // malloc/free interception
@@ -1163,25 +1238,32 @@ replace_realloc(void* aOldPtr, size_t aS
   if (!aOldPtr) {
     return replace_malloc(aSize);
   }
 
   // Be very careful here!  Must remove the block from the table before doing
   // the realloc to avoid races, just like in replace_free().
   // Nb: This does an unnecessary hashtable remove+add if the block doesn't
   // move, but doing better isn't worth the effort.
-  FreeCallback(aOldPtr, t);
+  DeadBlock db;
+  FreeCallback(aOldPtr, t, &db);
   void* ptr = gMallocTable->realloc(aOldPtr, aSize);
   if (ptr) {
     AllocCallback(ptr, aSize, t);
+    if (gOptions->IsCumulativeMode() && db.AllocStackTrace()) {
+      AutoLockState lock;
+      gDeadBlockList->InfallibleAppend(db);
+    }
   } else {
-    // If realloc fails, we re-insert the old pointer.  It will look like it
-    // was allocated for the first time here, which is untrue, and the slop
-    // bytes will be zero, which may be untrue.  But this case is rare and
-    // doing better isn't worth the effort.
+    // If realloc fails, we undo the prior operations by re-inserting the old
+    // pointer into the live block table. We don't have to do anything with the
+    // dead block list because the dead block hasn't yet been inserted. The
+    // block will end up looking like it was allocated for the first time here,
+    // which is untrue, and the slop bytes will be zero, which may be untrue.
+    // But this case is rare and doing better isn't worth the effort.
     AllocCallback(aOldPtr, gMallocTable->malloc_usable_size(aOldPtr), t);
   }
   return ptr;
 }
 
 void*
 replace_memalign(size_t aAlignment, size_t aSize)
 {
@@ -1214,17 +1296,22 @@ replace_free(void* aPtr)
   Thread* t = Thread::Fetch();
   if (t->InterceptsAreBlocked()) {
     return InfallibleAllocPolicy::free_(aPtr);
   }
 
   // Do the actual free after updating the table.  Otherwise, another thread
   // could call malloc and get the freed block and update the table, and then
   // our update here would remove the newly-malloc'd block.
-  FreeCallback(aPtr, t);
+  DeadBlock db;
+  FreeCallback(aPtr, t, &db);
+  if (gOptions->IsCumulativeMode() && db.AllocStackTrace()) {
+    AutoLockState lock;
+    gDeadBlockList->InfallibleAppend(db);
+  }
   gMallocTable->free(aPtr);
 }
 
 namespace mozilla {
 namespace dmd {
 
 //---------------------------------------------------------------------------
 // Options (Part 2)
@@ -1318,16 +1405,18 @@ Options::Options(const char* aDMDEnvVar)
 
       // Handle arg
       long myLong;
       bool myBool;
       if (strcmp(arg, "--mode=live") == 0) {
         mMode = Options::Live;
       } else if (strcmp(arg, "--mode=dark-matter") == 0) {
         mMode = Options::DarkMatter;
+      } else if (strcmp(arg, "--mode=cumulative") == 0) {
+        mMode = Options::Cumulative;
 
       } else if (GetLong(arg, "--sample-below", 1, mSampleBelowSize.mMax,
                  &myLong)) {
         mSampleBelowSize.mActual = myLong;
 
       } else if (GetLong(arg, "--max-frames", 1, mMaxFrames.mMax, &myLong)) {
         mMaxFrames.mActual = myLong;
 
@@ -1354,17 +1443,17 @@ Options::BadArg(const char* aArg)
   StatusMsg("\n");
   StatusMsg("Bad entry in the $DMD environment variable: '%s'.\n", aArg);
   StatusMsg("\n");
   StatusMsg("$DMD must be a whitespace-separated list of |--option=val|\n");
   StatusMsg("entries.\n");
   StatusMsg("\n");
   StatusMsg("The following options are allowed;  defaults are shown in [].\n");
   StatusMsg("  --mode=<mode>                Profiling mode [dark-matter]\n");
-  StatusMsg("      where <mode> is one of: live, dark-matter\n");
+  StatusMsg("      where <mode> is one of: live, dark-matter, cumulative\n");
   StatusMsg("  --sample-below=<1..%d> Sample blocks smaller than this [%d]\n",
             int(mSampleBelowSize.mMax),
             int(mSampleBelowSize.mDefault));
   StatusMsg("                               (prime numbers are recommended)\n");
   StatusMsg("  --max-frames=<1..%d>         Max. depth of stack traces [%d]\n",
             int(mMaxFrames.mMax),
             int(mMaxFrames.mDefault));
   StatusMsg("  --show-dump-stats=<yes|no>   Show stats about dumps? [no]\n");
@@ -1425,16 +1514,22 @@ Init(const malloc_table_t* aMallocTable)
   {
     AutoLockState lock;
 
     gStackTraceTable = InfallibleAllocPolicy::new_<StackTraceTable>();
     gStackTraceTable->init(8192);
 
     gLiveBlockTable = InfallibleAllocPolicy::new_<LiveBlockTable>();
     gLiveBlockTable->init(8192);
+
+    // Create this even if the mode isn't Cumulative, in case the mode is
+    // changed later on (as is done by SmokeDMD.cpp, for example). It's tiny
+    // when empty, so space isn't a concern.
+    gDeadBlockList =
+      InfallibleAllocPolicy::new_<DeadBlockList>(kDeadBlockListSegmentSize);
   }
 
   gIsDMDInitialized = true;
 }
 
 //---------------------------------------------------------------------------
 // Block reporting and unreporting
 //---------------------------------------------------------------------------
@@ -1511,16 +1606,18 @@ SizeOfInternal(Sizes* aSizes)
       aSizes->mStackTracesUnused += MallocSizeOf(st);
     }
   }
 
   aSizes->mStackTraceTable =
     gStackTraceTable->sizeOfIncludingThis(MallocSizeOf);
 
   aSizes->mLiveBlockTable = gLiveBlockTable->sizeOfIncludingThis(MallocSizeOf);
+
+  aSizes->mDeadBlockList = gDeadBlockList->SizeOfIncludingThis(MallocSizeOf);
 }
 
 void
 DMDFuncs::SizeOf(Sizes* aSizes)
 {
   aSizes->Clear();
 
   AutoBlockIntercepts block(Thread::Fetch());
@@ -1639,31 +1736,34 @@ AnalyzeImpl(UniquePtr<JSONWriteFunc> aWr
     writer.StartObjectProperty("invocation");
     {
       writer.StringProperty("dmdEnvVar", gOptions->DMDEnvVar());
       const char* mode;
       if (gOptions->IsLiveMode()) {
         mode = "live";
       } else if (gOptions->IsDarkMatterMode()) {
         mode = "dark-matter";
+      } else if (gOptions->IsCumulativeMode()) {
+        mode = "cumulative";
       } else {
         MOZ_ASSERT(false);
         mode = "(unknown DMD mode)";
       }
       writer.StringProperty("mode", mode);
       writer.IntProperty("sampleBelowSize", gOptions->SampleBelowSize());
     }
     writer.EndObject();
 
     StatusMsg("  Constructing the heap block list...\n");
 
     ToIdStringConverter isc;
 
     writer.StartArrayProperty("blockList");
     {
+      // Live blocks.
       for (auto r = gLiveBlockTable->all(); !r.empty(); r.popFront()) {
         const LiveBlock& b = r.front();
         b.AddStackTracesToTable(usedStackTraces);
 
         writer.StartObjectElement(writer.SingleLineStyle);
         {
           if (!b.IsSampled()) {
             writer.IntProperty("req", b.ReqSize());
@@ -1683,16 +1783,34 @@ AnalyzeImpl(UniquePtr<JSONWriteFunc> aWr
                 writer.StringElement(isc.ToIdString(b.ReportStackTrace2()));
               }
             }
             writer.EndArray();
           }
         }
         writer.EndObject();
       }
+
+      // Dead blocks.
+      for (auto iter = gDeadBlockList->Iter(); !iter.Done(); iter.Next()) {
+        const DeadBlock& b = iter.Get();
+        b.AddStackTracesToTable(usedStackTraces);
+
+        writer.StartObjectElement(writer.SingleLineStyle);
+        {
+          if (!b.IsSampled()) {
+            writer.IntProperty("req", b.ReqSize());
+            if (b.SlopSize() > 0) {
+              writer.IntProperty("slop", b.SlopSize());
+            }
+          }
+          writer.StringProperty("alloc", isc.ToIdString(b.AllocStackTrace()));
+        }
+        writer.EndObject();
+      }
     }
     writer.EndArray();
 
     StatusMsg("  Constructing the stack trace table...\n");
 
     writer.StartObjectProperty("traceTable");
     {
       for (auto r = usedStackTraces.all(); !r.empty(); r.popFront()) {
@@ -1756,16 +1874,20 @@ AnalyzeImpl(UniquePtr<JSONWriteFunc> aWr
       Show(gStackTraceTable->capacity(), buf2, kBufLen),
       Show(gStackTraceTable->count(),    buf3, kBufLen));
 
     StatusMsg("      Live block table:     %10s bytes (%s entries, %s used)\n",
       Show(sizes.mLiveBlockTable,       buf1, kBufLen),
       Show(gLiveBlockTable->capacity(), buf2, kBufLen),
       Show(gLiveBlockTable->count(),    buf3, kBufLen));
 
+    StatusMsg("      Dead block list:      %10s bytes (%s entries)\n",
+      Show(sizes.mDeadBlockList,     buf1, kBufLen),
+      Show(gDeadBlockList->Length(), buf2, kBufLen));
+
     StatusMsg("    }\n");
     StatusMsg("    Data structures that are destroyed after Dump() ends {\n");
 
     StatusMsg("      Location service:      %10s bytes\n",
       Show(locService->SizeOfIncludingThis(MallocSizeOf), buf1, kBufLen));
     StatusMsg("      Used stack traces set: %10s bytes\n",
       Show(usedStackTraces.sizeOfExcludingThis(MallocSizeOf), buf1, kBufLen));
     StatusMsg("      Used PCs set:          %10s bytes\n",
@@ -1814,13 +1936,14 @@ DMDFuncs::ResetEverything(const char* aO
   AutoLockState lock;
 
   // Reset options.
   InfallibleAllocPolicy::delete_(gOptions);
   gOptions = InfallibleAllocPolicy::new_<Options>(aOptions);
 
   // Clear all existing blocks.
   gLiveBlockTable->clear();
+  gDeadBlockList->Clear();
   gSmallBlockActualSizeCounter = 0;
 }
 
 }   // namespace dmd
 }   // namespace mozilla
--- a/memory/replace/dmd/DMD.h
+++ b/memory/replace/dmd/DMD.h
@@ -24,16 +24,17 @@ class JSONWriteFunc;
 namespace dmd {
 
 struct Sizes
 {
   size_t mStackTracesUsed;
   size_t mStackTracesUnused;
   size_t mStackTraceTable;
   size_t mLiveBlockTable;
+  size_t mDeadBlockList;
 
   Sizes() { Clear(); }
   void Clear() { memset(this, 0, sizeof(Sizes)); }
 };
 
 // See further below for a description of each method. The DMDFuncs class
 // should contain a virtual method for each of them (except IsRunning,
 // which can be inferred from the DMDFuncs singleton existing).
@@ -151,17 +152,17 @@ ClearReports()
 //   "version": 2,
 //
 //   // Information about how DMD was invoked. A mandatory object.
 //   "invocation": {
 //     // The contents of the $DMD environment variable. A mandatory string.
 //     "dmdEnvVar": "1",
 //
 //     // The profiling mode. A mandatory string taking one of the following
-//     // values: "live", "dark-matter".
+//     // values: "live", "dark-matter", "cumulative".
 //     "mode": "dark-matter",
 //
 //     // The value of the --sample-below-size option. A mandatory integer.
 //     "sampleBelowSize": 4093
 //   },
 //
 //   // Details of all analyzed heap blocks. A mandatory array.
 //   "blockList": [
--- a/memory/replace/dmd/dmd.py
+++ b/memory/replace/dmd/dmd.py
@@ -275,17 +275,17 @@ def getDigestFromFile(args, inputFile):
     invocation = j['invocation']
     dmdEnvVar = invocation['dmdEnvVar']
     mode = invocation['mode']
     sampleBelowSize = invocation['sampleBelowSize']
     blockList = j['blockList']
     traceTable = j['traceTable']
     frameTable = j['frameTable']
 
-    if not mode in ['live', 'dark-matter']:
+    if not mode in ['live', 'dark-matter', 'cumulative']:
         raise Exception("bad 'mode' property: '{:s}'".format(mode))
 
     heapIsSampled = sampleBelowSize > 1     # is sampling present?
 
     # Remove allocation functions at the start of traces.
     if args.ignore_alloc_fns:
         # Build a regexp that matches every function in allocatorFns.
         escapedAllocatorFns = map(re.escape, allocatorFns)
@@ -331,18 +331,18 @@ def getDigestFromFile(args, inputFile):
         desc = []
         for n, frameKey in enumerate(traceTable[traceKey], start=1):
             desc.append(fmt.format(n, frameTable[frameKey][3:]))
         return desc
 
     # Aggregate blocks into records. All sufficiently similar blocks go into a
     # single record.
 
-    if mode == 'live':
-        liveRecords = collections.defaultdict(Record)
+    if mode in ['live', 'cumulative']:
+        liveOrCumulativeRecords = collections.defaultdict(Record)
     elif mode == 'dark-matter':
         unreportedRecords    = collections.defaultdict(Record)
         onceReportedRecords  = collections.defaultdict(Record)
         twiceReportedRecords = collections.defaultdict(Record)
 
     heapUsableSize = 0
     heapBlocks = 0
 
@@ -364,19 +364,19 @@ def getDigestFromFile(args, inputFile):
         # and we trim the final frame of each they should be considered
         # equivalent because the untrimmed frame descriptions (D1 and D2)
         # match.
         def makeRecordKeyPart(traceKey):
             return str(map(lambda frameKey: frameTable[frameKey],
                            traceTable[traceKey]))
 
         allocatedAtTraceKey = block['alloc']
-        if mode == 'live':
+        if mode in ['live', 'cumulative']:
             recordKey = makeRecordKeyPart(allocatedAtTraceKey)
-            records = liveRecords
+            records = liveOrCumulativeRecords
         elif mode == 'dark-matter':
             recordKey = makeRecordKeyPart(allocatedAtTraceKey)
             if 'reps' in block:
                 reportedAtTraceKeys = block['reps']
                 for reportedAtTraceKey in reportedAtTraceKeys:
                     recordKey += makeRecordKeyPart(reportedAtTraceKey)
                 if len(reportedAtTraceKeys) == 1:
                     records = onceReportedRecords
@@ -409,34 +409,34 @@ def getDigestFromFile(args, inputFile):
         record.slopSize   += slopSize
         record.usableSize += usableSize
         record.isSampled   = record.isSampled or isSampled
         if record.allocatedAtDesc == None:
             record.allocatedAtDesc = \
                 buildTraceDescription(traceTable, frameTable,
                                       allocatedAtTraceKey)
 
-        if mode == 'live':
+        if mode in ['live', 'cumulative']:
             pass
         elif mode == 'dark-matter':
             if 'reps' in block and record.reportedAtDescs == []:
                 f = lambda k: buildTraceDescription(traceTable, frameTable, k)
                 record.reportedAtDescs = map(f, reportedAtTraceKeys)
         record.usableSizes[(usableSize, isSampled)] += 1
 
     # All the processed data for a single DMD file is called a "digest".
     digest = {}
     digest['dmdEnvVar'] = dmdEnvVar
     digest['mode'] = mode
     digest['sampleBelowSize'] = sampleBelowSize
     digest['heapUsableSize'] = heapUsableSize
     digest['heapBlocks'] = heapBlocks
     digest['heapIsSampled'] = heapIsSampled
-    if mode == 'live':
-        digest['liveRecords'] = liveRecords
+    if mode in ['live', 'cumulative']:
+        digest['liveOrCumulativeRecords'] = liveOrCumulativeRecords
     elif mode == 'dark-matter':
         digest['unreportedRecords'] = unreportedRecords
         digest['onceReportedRecords'] = onceReportedRecords
         digest['twiceReportedRecords'] = twiceReportedRecords
     return digest
 
 
 def diffRecords(args, records1, records2):
@@ -470,19 +470,20 @@ def diffDigests(args, d1, d2):
 
     d3 = {}
     d3['dmdEnvVar'] = (d1['dmdEnvVar'], d2['dmdEnvVar'])
     d3['mode'] = d1['mode']
     d3['sampleBelowSize'] = (d1['sampleBelowSize'], d2['sampleBelowSize'])
     d3['heapUsableSize'] = d2['heapUsableSize'] - d1['heapUsableSize']
     d3['heapBlocks']     = d2['heapBlocks']     - d1['heapBlocks']
     d3['heapIsSampled']  = d2['heapIsSampled'] or d1['heapIsSampled']
-    if d1['mode'] == 'live':
-        d3['liveRecords'] = diffRecords(args, d1['liveRecords'],
-                                              d2['liveRecords'])
+    if d1['mode'] in ['live', 'cumulative']:
+        d3['liveOrCumulativeRecords'] = \
+            diffRecords(args, d1['liveOrCumulativeRecords'],
+                              d2['liveOrCumulativeRecords'])
     elif d1['mode'] == 'dark-matter':
         d3['unreportedRecords']    = diffRecords(args, d1['unreportedRecords'],
                                                        d2['unreportedRecords'])
         d3['onceReportedRecords']  = diffRecords(args, d1['onceReportedRecords'],
                                                        d2['onceReportedRecords'])
         d3['twiceReportedRecords'] = diffRecords(args, d1['twiceReportedRecords'],
                                                        d2['twiceReportedRecords'])
     return d3
@@ -490,18 +491,18 @@ def diffDigests(args, d1, d2):
 
 def printDigest(args, digest):
     dmdEnvVar       = digest['dmdEnvVar']
     mode            = digest['mode']
     sampleBelowSize = digest['sampleBelowSize']
     heapUsableSize  = digest['heapUsableSize']
     heapIsSampled   = digest['heapIsSampled']
     heapBlocks      = digest['heapBlocks']
-    if mode == 'live':
-        liveRecords = digest['liveRecords']
+    if mode in ['live', 'cumulative']:
+        liveOrCumulativeRecords = digest['liveOrCumulativeRecords']
     elif mode == 'dark-matter':
         unreportedRecords    = digest['unreportedRecords']
         onceReportedRecords  = digest['onceReportedRecords']
         twiceReportedRecords = digest['twiceReportedRecords']
 
     separator = '#' + '-' * 65 + '\n'
 
     def number(n, isSampled):
@@ -584,27 +585,27 @@ def printDigest(args, digest):
                         if count > 1:
                             out(' x {:,d}'.format(count), end='')
                         isFirst = False
                 out()
 
             out('  {:4.2f}% of the heap ({:4.2f}% cumulative)'.
                 format(perc(record.usableSize, heapUsableSize),
                        perc(kindCumulativeUsableSize, heapUsableSize)))
-            if mode == 'live':
+            if mode in ['live', 'cumulative']:
                 pass
             elif mode == 'dark-matter':
                 out('  {:4.2f}% of {:} ({:4.2f}% cumulative)'.
                     format(perc(record.usableSize, kindUsableSize),
                            recordKind,
                            perc(kindCumulativeUsableSize, kindUsableSize)))
             out('  Allocated at {')
             printStack(record.allocatedAtDesc)
             out('  }')
-            if mode == 'live':
+            if mode in ['live', 'cumulative']:
                 pass
             elif mode == 'dark-matter':
                 for n, reportedAtDesc in enumerate(record.reportedAtDescs):
                     again = 'again ' if n > 0 else ''
                     out('  Reported {:}at {{'.format(again))
                     printStack(reportedAtDesc)
                     out('  }')
             out('}\n')
@@ -626,36 +627,36 @@ def printDigest(args, digest):
     # Print invocation(s).
     if type(dmdEnvVar) is not tuple:
         printInvocation('', dmdEnvVar, sampleBelowSize)
     else:
         printInvocation(' 1', dmdEnvVar[0], sampleBelowSize[0])
         printInvocation(' 2', dmdEnvVar[1], sampleBelowSize[1])
 
     # Print records.
-    if mode == 'live':
-        liveUsableSize, liveBlocks = \
-            printRecords('live', liveRecords, heapUsableSize)
+    if mode in ['live', 'cumulative']:
+        liveOrCumulativeUsableSize, liveOrCumulativeBlocks = \
+            printRecords(mode, liveOrCumulativeRecords, heapUsableSize)
     elif mode == 'dark-matter':
         twiceReportedUsableSize, twiceReportedBlocks = \
             printRecords('twice-reported', twiceReportedRecords, heapUsableSize)
 
         unreportedUsableSize, unreportedBlocks = \
             printRecords('unreported', unreportedRecords, heapUsableSize)
 
         onceReportedUsableSize, onceReportedBlocks = \
             printRecords('once-reported', onceReportedRecords, heapUsableSize)
 
     # Print summary.
     out(separator)
     out('Summary {')
-    if mode == 'live':
+    if mode in ['live', 'cumulative']:
         out('  Total: {:} bytes in {:} blocks'.
-            format(number(liveUsableSize, heapIsSampled),
-                   number(liveBlocks, heapIsSampled)))
+            format(number(liveOrCumulativeUsableSize, heapIsSampled),
+                   number(liveOrCumulativeBlocks, heapIsSampled)))
     elif mode == 'dark-matter':
         fmt = '  {:15} {:>12} bytes ({:6.2f}%) in {:>7} blocks ({:6.2f}%)'
         out(fmt.
             format('Total:',
                    number(heapUsableSize, heapIsSampled),
                    100,
                    number(heapBlocks, heapIsSampled),
                    100))
--- a/memory/replace/dmd/test/SmokeDMD.cpp
+++ b/memory/replace/dmd/test/SmokeDMD.cpp
@@ -121,32 +121,35 @@ TestUnsampled(const char* aTestName, int
   int i;
   char* a = nullptr;
   for (i = 0; i < aSeven + 3; i++) {
       a = (char*) malloc(100);
       UseItOrLoseIt(a, aSeven);
   }
   free(a);
 
+  // A no-op.
+  free(nullptr);
+
   // Note: 8 bytes is the smallest requested size that gives consistent
   // behaviour across all platforms with jemalloc.
   // Analyze 1: reported.
   // Analyze 2: thrice-reported.
   char* a2 = (char*) malloc(8);
   Report(a2);
 
   // Analyze 1: reported.
   // Analyze 2: reportedness carries over, due to ReportOnAlloc.
   char* b = (char*) malloc(10);
   ReportOnAlloc(b);
 
   // ReportOnAlloc, then freed.
   // Analyze 1: freed, irrelevant.
   // Analyze 2: freed, irrelevant.
-  char* b2 = (char*) malloc(1);
+  char* b2 = (char*) malloc(8);
   ReportOnAlloc(b2);
   free(b2);
 
   // Analyze 1: reported 4 times.
   // Analyze 2: freed, irrelevant.
   char* c = (char*) calloc(10, 3);
   Report(c);
   for (int i = 0; i < aSeven - 4; i++) {
@@ -341,21 +344,23 @@ RunTests()
   int *x = (int*)malloc(100);
   UseItOrLoseIt(x, seven);
   MOZ_RELEASE_ASSERT(IsRunning());
 
   // Please keep this in sync with run_test in test_dmd.js.
 
   TestEmpty("empty", "live");
   TestEmpty("empty", "dark-matter");
+  TestEmpty("empty", "cumulative");
 
   TestUnsampled("unsampled", 1, "live",        seven);
   TestUnsampled("unsampled", 1, "dark-matter", seven);
 
   TestUnsampled("unsampled", 2, "dark-matter", seven);
+  TestUnsampled("unsampled", 2, "cumulative",  seven);
 
   TestSampled("sampled", "live", seven);
 }
 
 int main()
 {
   RunTests();
 
new file mode 100644
--- /dev/null
+++ b/memory/replace/dmd/test/full-empty-cumulative-expected.txt
@@ -0,0 +1,18 @@
+#-----------------------------------------------------------------
+# dmd.py --filter-stacks-for-testing -o full-empty-cumulative-actual.txt full-empty-cumulative.json
+
+Invocation {
+  $DMD = '--mode=cumulative --sample-below=1'
+  Sample-below size = 1
+}
+
+#-----------------------------------------------------------------
+
+# no cumulative heap blocks
+
+#-----------------------------------------------------------------
+
+Summary {
+  Total: 0 bytes in 0 blocks
+}
+
new file mode 100644
--- /dev/null
+++ b/memory/replace/dmd/test/full-unsampled2-cumulative-expected.txt
@@ -0,0 +1,163 @@
+#-----------------------------------------------------------------
+# dmd.py --filter-stacks-for-testing -o full-unsampled2-cumulative-actual.txt full-unsampled2-cumulative.json
+
+Invocation {
+  $DMD = '--mode=cumulative --sample-below=1 --show-dump-stats=yes'
+  Sample-below size = 1
+}
+
+#-----------------------------------------------------------------
+
+Cumulative {
+  1 block in heap block record 1 of 16
+  8,192 bytes (4,097 requested / 4,095 slop)
+  47.10% of the heap (47.10% cumulative)
+  Allocated at {
+    #01: ... DMD.cpp ...
+  }
+}
+
+Cumulative {
+  1 block in heap block record 2 of 16
+  4,096 bytes (4,096 requested / 0 slop)
+  23.55% of the heap (70.65% cumulative)
+  Allocated at {
+    #01: ... DMD.cpp ...
+  }
+}
+
+Cumulative {
+  10 blocks in heap block record 3 of 16
+  1,120 bytes (1,000 requested / 120 slop)
+  Individual block sizes: 112 x 10
+  6.44% of the heap (77.09% cumulative)
+  Allocated at {
+    #01: ... DMD.cpp ...
+  }
+}
+
+Cumulative {
+  1 block in heap block record 4 of 16
+  1,024 bytes (1,024 requested / 0 slop)
+  5.89% of the heap (82.98% cumulative)
+  Allocated at {
+    #01: ... DMD.cpp ...
+  }
+}
+
+Cumulative {
+  1 block in heap block record 5 of 16
+  1,024 bytes (1,023 requested / 1 slop)
+  5.89% of the heap (88.87% cumulative)
+  Allocated at {
+    #01: ... DMD.cpp ...
+  }
+}
+
+Cumulative {
+  6 blocks in heap block record 6 of 16
+  528 bytes (528 requested / 0 slop)
+  Individual block sizes: 128; 112; 96; 80; 64; 48
+  3.04% of the heap (91.90% cumulative)
+  Allocated at {
+    #01: ... DMD.cpp ...
+  }
+}
+
+Cumulative {
+  6 blocks in heap block record 7 of 16
+  528 bytes (528 requested / 0 slop)
+  Individual block sizes: 128; 112; 96; 80; 64; 48
+  3.04% of the heap (94.94% cumulative)
+  Allocated at {
+    #01: ... DMD.cpp ...
+  }
+}
+
+Cumulative {
+  1 block in heap block record 8 of 16
+  512 bytes (512 requested / 0 slop)
+  2.94% of the heap (97.88% cumulative)
+  Allocated at {
+    #01: ... DMD.cpp ...
+  }
+}
+
+Cumulative {
+  1 block in heap block record 9 of 16
+  80 bytes (79 requested / 1 slop)
+  0.46% of the heap (98.34% cumulative)
+  Allocated at {
+    #01: ... DMD.cpp ...
+  }
+}
+
+Cumulative {
+  1 block in heap block record 10 of 16
+  80 bytes (78 requested / 2 slop)
+  0.46% of the heap (98.80% cumulative)
+  Allocated at {
+    #01: ... DMD.cpp ...
+  }
+}
+
+Cumulative {
+  1 block in heap block record 11 of 16
+  80 bytes (77 requested / 3 slop)
+  0.46% of the heap (99.26% cumulative)
+  Allocated at {
+    #01: ... DMD.cpp ...
+  }
+}
+
+Cumulative {
+  1 block in heap block record 12 of 16
+  64 bytes (64 requested / 0 slop)
+  0.37% of the heap (99.63% cumulative)
+  Allocated at {
+    #01: ... DMD.cpp ...
+  }
+}
+
+Cumulative {
+  1 block in heap block record 13 of 16
+  32 bytes (30 requested / 2 slop)
+  0.18% of the heap (99.82% cumulative)
+  Allocated at {
+    #01: ... DMD.cpp ...
+  }
+}
+
+Cumulative {
+  1 block in heap block record 14 of 16
+  16 bytes (10 requested / 6 slop)
+  0.09% of the heap (99.91% cumulative)
+  Allocated at {
+    #01: ... DMD.cpp ...
+  }
+}
+
+Cumulative {
+  1 block in heap block record 15 of 16
+  8 bytes (8 requested / 0 slop)
+  0.05% of the heap (99.95% cumulative)
+  Allocated at {
+    #01: ... DMD.cpp ...
+  }
+}
+
+Cumulative {
+  1 block in heap block record 16 of 16
+  8 bytes (8 requested / 0 slop)
+  0.05% of the heap (100.00% cumulative)
+  Allocated at {
+    #01: ... DMD.cpp ...
+  }
+}
+
+#-----------------------------------------------------------------
+
+Summary {
+  Total: 17,392 bytes in 35 blocks
+}
+
--- a/memory/replace/dmd/test/test_dmd.js
+++ b/memory/replace/dmd/test/test_dmd.js
@@ -134,21 +134,23 @@ function run_test() {
     test(name, [jsonFile.path]);
     jsonFile.remove(true);
   }
 
   // Please keep this in sync with RunTests() in SmokeDMD.cpp.
 
   test2("empty", "live");
   test2("empty", "dark-matter");
+  test2("empty", "cumulative");
 
   test2("unsampled1", "live");
   test2("unsampled1", "dark-matter");
 
   test2("unsampled2", "dark-matter");
+  test2("unsampled2", "cumulative");
 
   test2("sampled", "live");
 
   // These tests only test the post-processing script. They use hand-written
   // JSON files as input. Ideally the JSON files would contain comments
   // explaining how they work, but JSON doesn't allow comments, so I've put
   // explanations here.
 
--- a/memory/replace/dmd/test/xpcshell.ini
+++ b/memory/replace/dmd/test/xpcshell.ini
@@ -1,15 +1,17 @@
 [DEFAULT]
 support-files =
   full-empty-live-expected.txt
   full-empty-dark-matter-expected.txt
+  full-empty-cumulative-expected.txt
   full-unsampled1-live-expected.txt
   full-unsampled1-dark-matter-expected.txt
   full-unsampled2-dark-matter-expected.txt
+  full-unsampled2-cumulative-expected.txt
   full-sampled-live-expected.txt
   script-max-frames.json
   script-max-frames-8-expected.txt
   script-max-frames-3-expected.txt
   script-max-frames-1-expected.txt
   script-sort-by.json.gz
   script-sort-by-usable-expected.txt
   script-sort-by-req-expected.txt
--- a/python/mozbuild/mozbuild/mach_commands.py
+++ b/python/mozbuild/mozbuild/mach_commands.py
@@ -888,17 +888,17 @@ class RunProgram(MachCommandBase):
     # "continue" to (safely) resume execution.  There are ways to implement
     # automatic resuming; see the bug.
     @CommandArgument('--slowscript', action='store_true', group='debugging',
         help='Do not set the JS_DISABLE_SLOW_SCRIPT_SIGNALS env variable; when not set, recoverable but misleading SIGSEGV instances may occur in Ion/Odin JIT code.')
 
     @CommandArgumentGroup('DMD')
     @CommandArgument('--dmd', action='store_true', group='DMD',
         help='Enable DMD. The following arguments have no effect without this.')
-    @CommandArgument('--mode', choices=['live', 'dark-matter'], group='DMD',
+    @CommandArgument('--mode', choices=['live', 'dark-matter', 'cumulative'], group='DMD',
          help='Profiling mode. The default is \'dark-matter\'.')
     @CommandArgument('--sample-below', default=None, type=str, group='DMD',
         help='Sample blocks smaller than this. Use 1 for no sampling. The default is 4093.')
     @CommandArgument('--max-frames', default=None, type=str, group='DMD',
         help='The maximum depth of stack traces. The default and maximum is 24.')
     @CommandArgument('--show-dump-stats', action='store_true', group='DMD',
         help='Show stats when doing dumps.')
     def run(self, params, remote, background, noprofile, debug, debugger,
--- a/xpcom/base/nsMemoryReporterManager.cpp
+++ b/xpcom/base/nsMemoryReporterManager.cpp
@@ -951,16 +951,20 @@ public:
     REPORT("explicit/dmd/stack-traces/table",
            sizes.mStackTraceTable,
            "Memory used by DMD's stack trace table.");
 
     REPORT("explicit/dmd/live-block-table",
            sizes.mLiveBlockTable,
            "Memory used by DMD's live block table.");
 
+    REPORT("explicit/dmd/dead-block-list",
+           sizes.mDeadBlockList,
+           "Memory used by DMD's dead block list.");
+
 #undef REPORT
 
     return NS_OK;
   }
 
 private:
   ~DMDReporter() {}
 };