Bug 717853 - Add a native version of DMD. r=jlebar, r=glandium, a=jlebar
authorNicholas Nethercote <nnethercote@mozilla.com>
Mon, 10 Dec 2012 16:05:07 -0800
changeset 118825 266c3b5c9f0fbfd11e6431a1c66e03810c84a8c7
parent 118824 0a3bc21498fdbcf7c475a3a42bc0885e1bb8e2bb
child 118826 48ab9f9c0faa1f17785ecd3238a5c8e74a93e14c
push id2960
push userryanvm@gmail.com
push dateFri, 14 Dec 2012 01:37:23 +0000
treeherdermozilla-aurora@4db8f6cdd896 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersjlebar, glandium, jlebar
bugs717853
milestone19.0a2
Bug 717853 - Add a native version of DMD. r=jlebar, r=glandium, a=jlebar
b2g/installer/package-manifest.in
config/config.mk
configure.in
dom/base/nsJSEnvironment.cpp
extensions/spellcheck/hunspell/src/hunspell_alloc_hooks.h
extensions/spellcheck/hunspell/src/mozHunspell.cpp
gfx/thebes/gfxAndroidPlatform.cpp
js/src/config/config.mk
memory/replace/dmd/DMD.cpp
memory/replace/dmd/DMD.h
memory/replace/dmd/Makefile.in
memory/replace/dmd/README
memory/replace/dmd/test-expected.dmd
storage/src/mozStorageService.cpp
toolkit/library/Makefile.in
toolkit/toolkit-makefiles.sh
toolkit/toolkit-tiers.mk
xpcom/base/nsIMemoryReporter.idl
xpcom/base/nsMemoryInfoDumper.cpp
xpcom/base/nsMemoryReporterManager.cpp
xpcom/base/nsStackWalk.cpp
--- a/b2g/installer/package-manifest.in
+++ b/b2g/installer/package-manifest.in
@@ -49,16 +49,19 @@
 #ifndef MOZ_STATIC_JS
 @BINPATH@/@DLL_PREFIX@mozjs@DLL_SUFFIX@
 #endif
 @BINPATH@/@DLL_PREFIX@plc4@DLL_SUFFIX@
 @BINPATH@/@DLL_PREFIX@plds4@DLL_SUFFIX@
 @BINPATH@/@DLL_PREFIX@xpcom@DLL_SUFFIX@
 @BINPATH@/@DLL_PREFIX@nspr4@DLL_SUFFIX@
 @BINPATH@/@DLL_PREFIX@mozalloc@DLL_SUFFIX@
+#ifdef MOZ_DMD
+@BINPATH@/@DLL_PREFIX@dmd@DLL_SUFFIX@
+#endif
 #ifdef XP_MACOSX
 @BINPATH@/XUL
 #else
 @BINPATH@/@DLL_PREFIX@xul@DLL_SUFFIX@
 #endif
 #ifdef XP_MACOSX
 @BINPATH@/@MOZ_CHILD_PROCESS_NAME@.app/
 #else
--- a/config/config.mk
+++ b/config/config.mk
@@ -194,27 +194,27 @@ OS_CFLAGS += -UDEBUG -DNDEBUG
 ifdef HAVE_64BIT_OS
 OS_LDFLAGS += -DEBUG -OPT:REF,ICF
 else
 OS_LDFLAGS += -DEBUG -OPT:REF
 endif
 endif
 
 #
-# Handle trace-malloc in optimized builds.
+# Handle trace-malloc and DMD in optimized builds.
 # No opt to give sane callstacks.
 #
-ifdef NS_TRACE_MALLOC
+ifneq (,$(NS_TRACE_MALLOC)$(MOZ_DMD))
 MOZ_OPTIMIZE_FLAGS=-Zi -Od -UDEBUG -DNDEBUG
 ifdef HAVE_64BIT_OS
 OS_LDFLAGS = -DEBUG -PDB:NONE -OPT:REF,ICF
 else
 OS_LDFLAGS = -DEBUG -PDB:NONE -OPT:REF
 endif
-endif # NS_TRACE_MALLOC
+endif # NS_TRACE_MALLOC || MOZ_DMD
 
 endif # MOZ_DEBUG
 
 # We don't build a static CRT when building a custom CRT,
 # it appears to be broken. So don't link to jemalloc if
 # the Makefile wants static CRT linking.
 ifeq ($(MOZ_MEMORY)_$(USE_STATIC_LIBS),1_1)
 # Disable default CRT libs and add the right lib path for the linker
@@ -463,30 +463,30 @@ endif # FAIL_ON_WARNINGS
 
 ifeq ($(OS_ARCH)_$(GNU_CC),WINNT_)
 #// Currently, unless USE_STATIC_LIBS is defined, the multithreaded
 #// DLL version of the RTL is used...
 #//
 #//------------------------------------------------------------------------
 ifdef USE_STATIC_LIBS
 RTL_FLAGS=-MT          # Statically linked multithreaded RTL
-ifneq (,$(MOZ_DEBUG)$(NS_TRACE_MALLOC))
+ifneq (,$(MOZ_DEBUG)$(NS_TRACE_MALLOC)$(MOZ_DMD))
 ifndef MOZ_NO_DEBUG_RTL
 RTL_FLAGS=-MTd         # Statically linked multithreaded MSVC4.0 debug RTL
 endif
-endif # MOZ_DEBUG || NS_TRACE_MALLOC
+endif # MOZ_DEBUG || NS_TRACE_MALLOC || MOZ_DMD
 
 else # !USE_STATIC_LIBS
 
 RTL_FLAGS=-MD          # Dynamically linked, multithreaded RTL
-ifneq (,$(MOZ_DEBUG)$(NS_TRACE_MALLOC))
+ifneq (,$(MOZ_DEBUG)$(NS_TRACE_MALLOC)$(MOZ_DMD))
 ifndef MOZ_NO_DEBUG_RTL
 RTL_FLAGS=-MDd         # Dynamically linked, multithreaded MSVC4.0 debug RTL
 endif
-endif # MOZ_DEBUG || NS_TRACE_MALLOC
+endif # MOZ_DEBUG || NS_TRACE_MALLOC || MOZ_DMD
 endif # USE_STATIC_LIBS
 endif # WINNT && !GNU_CC
 
 ifeq ($(OS_ARCH),Darwin)
 # Compiling ObjC requires an Apple compiler anyway, so it's ok to set
 # host CMFLAGS here.
 HOST_CMFLAGS += -fobjc-exceptions
 HOST_CMMFLAGS += -fobjc-exceptions
--- a/configure.in
+++ b/configure.in
@@ -6904,28 +6904,55 @@ if test -n "$MOZ_DEBUG"; then
     AC_DEFINE(MOZ_DUMP_PAINTING)
 fi
 
 dnl ========================================================
 dnl = Enable trace malloc
 dnl ========================================================
 NS_TRACE_MALLOC=${MOZ_TRACE_MALLOC}
 MOZ_ARG_ENABLE_BOOL(trace-malloc,
-[  --enable-trace-malloc   Enable malloc tracing; also disables jemalloc],
+[  --enable-trace-malloc   Enable malloc tracing; also disables DMD and jemalloc],
     NS_TRACE_MALLOC=1,
     NS_TRACE_MALLOC= )
 if test "$NS_TRACE_MALLOC"; then
   # Please, Mr. Linker Man, don't take away our symbol names
   MOZ_COMPONENTS_VERSION_SCRIPT_LDFLAGS=
   USE_ELF_DYNSTR_GC=
   AC_DEFINE(NS_TRACE_MALLOC)
 fi
 AC_SUBST(NS_TRACE_MALLOC)
 
 dnl ========================================================
+dnl = Enable DMD
+dnl ========================================================
+
+MOZ_ARG_ENABLE_BOOL(dmd,
+[  --enable-dmd            Enable DMD; also enables jemalloc and replace-malloc and disables DMDV],
+    MOZ_DMD=1,
+    MOZ_DMD= )
+
+if test "$NS_TRACE_MALLOC"; then        # trace-malloc disables DMD
+    MOZ_DMD=
+fi
+if test "$MOZ_DMD"; then
+    USE_ELF_DYNSTR_GC=
+    AC_DEFINE(MOZ_DMD)
+
+    if test "${CPU_ARCH}" = "arm"; then
+        CFLAGS="$CFLAGS -funwind-tables"
+        CXXFLAGS="$CXXFLAGS -funwind-tables"
+    fi
+
+    MOZ_MEMORY=1                        # DMD enables jemalloc
+    MOZ_REPLACE_MALLOC=1                # DMD enables replace-malloc
+    MOZ_DMDV=                           # DMD disables DMDV
+fi
+AC_SUBST(MOZ_DMD)
+
+dnl ========================================================
 dnl = Enable jemalloc
 dnl ========================================================
 MOZ_ARG_ENABLE_BOOL(jemalloc,
 [  --enable-jemalloc       Replace memory allocator with jemalloc],
     MOZ_MEMORY=1,
     MOZ_MEMORY=)
 
 if test "$NS_TRACE_MALLOC"; then
@@ -7510,19 +7537,19 @@ dnl = Support for demangling undefined s
 dnl ========================================================
 if test -z "$SKIP_LIBRARY_CHECKS"; then
     AC_LANG_SAVE
     AC_LANG_CPLUSPLUS
     AC_CHECK_FUNCS(__cxa_demangle, HAVE_DEMANGLE=1, HAVE_DEMANGLE=)
     AC_LANG_RESTORE
 fi
 
-# Demangle only for debug or trace-malloc builds
+# Demangle only for debug or trace-malloc or DMD builds
 MOZ_DEMANGLE_SYMBOLS=
-if test "$HAVE_DEMANGLE" && test "$MOZ_DEBUG" -o "$NS_TRACE_MALLOC"; then
+if test "$HAVE_DEMANGLE" && test "$MOZ_DEBUG" -o "$NS_TRACE_MALLOC" -o "$MOZ_DMD"; then
     MOZ_DEMANGLE_SYMBOLS=1
     AC_DEFINE(MOZ_DEMANGLE_SYMBOLS)
 fi
 AC_SUBST(MOZ_DEMANGLE_SYMBOLS)
 
 dnl ========================================================
 dnl = Support for gcc stack unwinding (from gcc 3.3)
 dnl ========================================================
@@ -8317,17 +8344,17 @@ fi
 
 dnl Build Sync Services if required
 AC_SUBST(MOZ_SERVICES_SYNC)
 if test -n "$MOZ_SERVICES_SYNC"; then
   AC_DEFINE(MOZ_SERVICES_SYNC)
 fi
 
 dnl ========================================================
-if test "$MOZ_DEBUG" -o "$NS_TRACE_MALLOC"; then
+if test "$MOZ_DEBUG" -o "$NS_TRACE_MALLOC" -o "$MOZ_DMD"; then
     MOZ_COMPONENTS_VERSION_SCRIPT_LDFLAGS=
 fi
 
 if test "$MOZ_APP_COMPONENT_INCLUDE"; then
   AC_DEFINE_UNQUOTED(MOZ_APP_COMPONENT_INCLUDE, "$MOZ_APP_COMPONENT_INCLUDE")
 fi
 
 if test "$MOZ_APP_COMPONENT_MODULES"; then
--- a/dom/base/nsJSEnvironment.cpp
+++ b/dom/base/nsJSEnvironment.cpp
@@ -2621,16 +2621,80 @@ static JSFunctionSpec TraceMallocFunctio
     JS_FS("TraceMallocCloseLogFD",      TraceMallocCloseLogFD,      1, 0),
     JS_FS("TraceMallocLogTimestamp",    TraceMallocLogTimestamp,    1, 0),
     JS_FS("TraceMallocDumpAllocations", TraceMallocDumpAllocations, 1, 0),
     JS_FS_END
 };
 
 #endif /* NS_TRACE_MALLOC */
 
+#ifdef MOZ_DMD
+
+#include <errno.h>
+
+namespace mozilla {
+namespace dmd {
+
+// See https://wiki.mozilla.org/Performance/MemShrink/DMD for instructions on
+// how to use DMD.
+
+static JSBool
+MaybeReportAndDump(JSContext *cx, unsigned argc, jsval *vp, bool report)
+{
+  JSString *str = JS_ValueToString(cx, argc ? JS_ARGV(cx, vp)[0] : JSVAL_VOID);
+  if (!str)
+    return JS_FALSE;
+  JSAutoByteString pathname(cx, str);
+  if (!pathname)
+    return JS_FALSE;
+
+  FILE* fp = fopen(pathname.ptr(), "w");
+  if (!fp) {
+    JS_ReportError(cx, "DMD can't open %s: %s",
+                   pathname.ptr(), strerror(errno));
+    return JS_FALSE;
+  }
+
+  if (report) {
+    fprintf(stderr, "DMD: running reporters...\n");
+    dmd::RunReporters();
+  }
+  dmd::Writer writer(FpWrite, fp);
+  dmd::Dump(writer);
+
+  fclose(fp);
+
+  JS_SET_RVAL(cx, vp, JSVAL_VOID);
+  return JS_TRUE;
+}
+
+static JSBool
+ReportAndDump(JSContext *cx, unsigned argc, jsval *vp)
+{
+  return MaybeReportAndDump(cx, argc, vp, /* report = */ true);
+}
+
+static JSBool
+Dump(JSContext *cx, unsigned argc, jsval *vp)
+{
+  return MaybeReportAndDump(cx, argc, vp, /* report = */ false);
+}
+
+
+} // namespace dmd
+} // namespace mozilla
+
+static JSFunctionSpec DMDFunctions[] = {
+    JS_FS("DMDReportAndDump", dmd::ReportAndDump, 1, 0),
+    JS_FS("DMDDump",          dmd::Dump,          1, 0),
+    JS_FS_END
+};
+
+#endif  // defined(MOZ_DMD)
+
 #ifdef MOZ_JPROF
 
 #include <signal.h>
 
 inline bool
 IsJProfAction(struct sigaction *action)
 {
     return (action->sa_sigaction &&
@@ -2730,25 +2794,32 @@ static JSFunctionSpec JProfFunctions[] =
 
 #endif /* defined(MOZ_JPROF) */
 
 #ifdef MOZ_DMDV
 
 // See https://wiki.mozilla.org/Performance/MemShrink/DMD for instructions on
 // how to use DMDV.
 
+namespace mozilla {
+namespace dmdv {
+
 static JSBool
-DMDVCheckAndDumpJS(JSContext *cx, unsigned argc, jsval *vp)
+ReportAndDump(JSContext *cx, unsigned argc, jsval *vp)
 {
-  mozilla::DMDVCheckAndDump();
+  mozilla::dmd::RunReporters();
+  mozilla::dmdv::Dump();
   return JS_TRUE;
 }
 
+} // namespace dmdv
+} // namespace mozilla
+
 static JSFunctionSpec DMDVFunctions[] = {
-    JS_FS("DMDV",                       DMDVCheckAndDumpJS,         0, 0),
+    JS_FS("DMDVReportAndDump", dmdv::ReportAndDump, 0, 0),
     JS_FS_END
 };
 
 #endif /* defined(MOZ_DMDV) */
 
 nsresult
 nsJSContext::InitClasses(JSObject* aGlobalObj)
 {
@@ -2762,16 +2833,21 @@ nsJSContext::InitClasses(JSObject* aGlob
   // Attempt to initialize profiling functions
   ::JS_DefineProfilingFunctions(mContext, aGlobalObj);
 
 #ifdef NS_TRACE_MALLOC
   // Attempt to initialize TraceMalloc functions
   ::JS_DefineFunctions(mContext, aGlobalObj, TraceMallocFunctions);
 #endif
 
+#ifdef MOZ_DMD
+  // Attempt to initialize DMD functions
+  ::JS_DefineFunctions(mContext, aGlobalObj, DMDFunctions);
+#endif
+
 #ifdef MOZ_JPROF
   // Attempt to initialize JProf functions
   ::JS_DefineFunctions(mContext, aGlobalObj, JProfFunctions);
 #endif
 
 #ifdef MOZ_DMDV
   // Attempt to initialize DMDV functions
   ::JS_DefineFunctions(mContext, aGlobalObj, DMDVFunctions);
--- a/extensions/spellcheck/hunspell/src/hunspell_alloc_hooks.h
+++ b/extensions/spellcheck/hunspell/src/hunspell_alloc_hooks.h
@@ -77,17 +77,22 @@ inline void hunspell_free(void* ptr)
   moz_free(ptr);
 }
 #define free(ptr) hunspell_free(ptr)
 
 inline void* hunspell_realloc(void* ptr, size_t size)
 {
   HunspellReportMemoryDeallocation(ptr);
   void* result = moz_realloc(ptr, size);
-  HunspellReportMemoryAllocation(result);
+  if (result) {
+    HunspellReportMemoryAllocation(result);
+  } else {
+    // realloc failed;  undo the HunspellReportMemoryDeallocation from above
+    HunspellReportMemoryAllocation(ptr);
+  }
   return result;
 }
 #define realloc(ptr, size) hunspell_realloc(ptr, size)
 
 inline char* hunspell_strdup(const char* str)
 {
   char* result = moz_strdup(str);
   HunspellReportMemoryAllocation(result);
--- a/extensions/spellcheck/hunspell/src/mozHunspell.cpp
+++ b/extensions/spellcheck/hunspell/src/mozHunspell.cpp
@@ -93,24 +93,24 @@ NS_INTERFACE_MAP_END
 NS_IMPL_CYCLE_COLLECTION_3(mozHunspell,
                            mPersonalDictionary,
                            mEncoder,
                            mDecoder)
 
 // Memory reporting stuff.
 static int64_t gHunspellAllocatedSize = 0;
 
-NS_MEMORY_REPORTER_MALLOC_SIZEOF_FUN(HunspellMallocSizeOfForCounterInc, "hunspell")
-NS_MEMORY_REPORTER_MALLOC_SIZEOF_FUN_UN(HunspellMallocSizeOfForCounterDec)
+NS_MEMORY_REPORTER_MALLOC_SIZEOF_ON_ALLOC_FUN(HunspellMallocSizeOfOnAlloc, "hunspell")
+NS_MEMORY_REPORTER_MALLOC_SIZEOF_ON_FREE_FUN(HunspellMallocSizeOfOnFree)
 
 void HunspellReportMemoryAllocation(void* ptr) {
-  gHunspellAllocatedSize += HunspellMallocSizeOfForCounterInc(ptr);
+  gHunspellAllocatedSize += HunspellMallocSizeOfOnAlloc(ptr);
 }
 void HunspellReportMemoryDeallocation(void* ptr) {
-  gHunspellAllocatedSize -= HunspellMallocSizeOfForCounterDec(ptr);
+  gHunspellAllocatedSize -= HunspellMallocSizeOfOnFree(ptr);
 }
 static int64_t HunspellGetCurrentAllocatedSize() {
   return gHunspellAllocatedSize;
 }
 
 NS_MEMORY_REPORTER_IMPLEMENT(Hunspell,
   "explicit/spell-check",
   KIND_HEAP,
--- a/gfx/thebes/gfxAndroidPlatform.cpp
+++ b/gfx/thebes/gfxAndroidPlatform.cpp
@@ -41,44 +41,44 @@ GetFreetypeSize()
 NS_MEMORY_REPORTER_IMPLEMENT(Freetype,
     "explicit/freetype",
     KIND_HEAP,
     UNITS_BYTES,
     GetFreetypeSize,
     "Memory used by Freetype."
 )
 
-NS_MEMORY_REPORTER_MALLOC_SIZEOF_FUN(FreetypeMallocSizeOfForCounterInc, "freetype")
-NS_MEMORY_REPORTER_MALLOC_SIZEOF_FUN_UN(FreetypeMallocSizeOfForCounterDec)
+NS_MEMORY_REPORTER_MALLOC_SIZEOF_ON_ALLOC_FUN(FreetypeMallocSizeOfOnAlloc, "freetype")
+NS_MEMORY_REPORTER_MALLOC_SIZEOF_ON_FREE_FUN(FreetypeMallocSizeOfOnFree)
 
 static void*
 CountingAlloc(FT_Memory memory, long size)
 {
     void *p = malloc(size);
-    sFreetypeMemoryUsed += FreetypeMallocSizeOfForCounterInc(p);
+    sFreetypeMemoryUsed += FreetypeMallocSizeOfOnAlloc(p);
     return p;
 }
 
 static void
 CountingFree(FT_Memory memory, void* p)
 {
-    sFreetypeMemoryUsed -= FreetypeMallocSizeOfForCounterDec(p);
+    sFreetypeMemoryUsed -= FreetypeMallocSizeOfOnFree(p);
     free(p);
 }
 
 static void*
 CountingRealloc(FT_Memory memory, long cur_size, long new_size, void* p)
 {
-    sFreetypeMemoryUsed -= FreetypeMallocSizeOfForCounterDec(p);
+    sFreetypeMemoryUsed -= FreetypeMallocSizeOfOnFree(p);
     void *pnew = realloc(p, new_size);
     if (pnew) {
-        sFreetypeMemoryUsed += FreetypeMallocSizeOfForCounterInc(pnew);
+        sFreetypeMemoryUsed += FreetypeMallocSizeOfOnAlloc(pnew);
     } else {
         // realloc failed;  undo the decrement from above
-        sFreetypeMemoryUsed += FreetypeMallocSizeOfForCounterInc(p);
+        sFreetypeMemoryUsed += FreetypeMallocSizeOfOnAlloc(p);
     }
     return pnew;
 }
 
 gfxAndroidPlatform::gfxAndroidPlatform()
 {
     // A custom allocator.  It counts allocations, enabling memory reporting.
     sFreetypeMemoryRecord.user    = nullptr;
--- a/js/src/config/config.mk
+++ b/js/src/config/config.mk
@@ -194,27 +194,27 @@ OS_CFLAGS += -UDEBUG -DNDEBUG
 ifdef HAVE_64BIT_OS
 OS_LDFLAGS += -DEBUG -OPT:REF,ICF
 else
 OS_LDFLAGS += -DEBUG -OPT:REF
 endif
 endif
 
 #
-# Handle trace-malloc in optimized builds.
+# Handle trace-malloc and DMD in optimized builds.
 # No opt to give sane callstacks.
 #
-ifdef NS_TRACE_MALLOC
+ifneq (,$(NS_TRACE_MALLOC)$(MOZ_DMD))
 MOZ_OPTIMIZE_FLAGS=-Zi -Od -UDEBUG -DNDEBUG
 ifdef HAVE_64BIT_OS
 OS_LDFLAGS = -DEBUG -PDB:NONE -OPT:REF,ICF
 else
 OS_LDFLAGS = -DEBUG -PDB:NONE -OPT:REF
 endif
-endif # NS_TRACE_MALLOC
+endif # NS_TRACE_MALLOC || MOZ_DMD
 
 endif # MOZ_DEBUG
 
 # We don't build a static CRT when building a custom CRT,
 # it appears to be broken. So don't link to jemalloc if
 # the Makefile wants static CRT linking.
 ifeq ($(MOZ_MEMORY)_$(USE_STATIC_LIBS),1_1)
 # Disable default CRT libs and add the right lib path for the linker
@@ -463,30 +463,30 @@ endif # FAIL_ON_WARNINGS
 
 ifeq ($(OS_ARCH)_$(GNU_CC),WINNT_)
 #// Currently, unless USE_STATIC_LIBS is defined, the multithreaded
 #// DLL version of the RTL is used...
 #//
 #//------------------------------------------------------------------------
 ifdef USE_STATIC_LIBS
 RTL_FLAGS=-MT          # Statically linked multithreaded RTL
-ifneq (,$(MOZ_DEBUG)$(NS_TRACE_MALLOC))
+ifneq (,$(MOZ_DEBUG)$(NS_TRACE_MALLOC)$(MOZ_DMD))
 ifndef MOZ_NO_DEBUG_RTL
 RTL_FLAGS=-MTd         # Statically linked multithreaded MSVC4.0 debug RTL
 endif
-endif # MOZ_DEBUG || NS_TRACE_MALLOC
+endif # MOZ_DEBUG || NS_TRACE_MALLOC || MOZ_DMD
 
 else # !USE_STATIC_LIBS
 
 RTL_FLAGS=-MD          # Dynamically linked, multithreaded RTL
-ifneq (,$(MOZ_DEBUG)$(NS_TRACE_MALLOC))
+ifneq (,$(MOZ_DEBUG)$(NS_TRACE_MALLOC)$(MOZ_DMD))
 ifndef MOZ_NO_DEBUG_RTL
 RTL_FLAGS=-MDd         # Dynamically linked, multithreaded MSVC4.0 debug RTL
 endif
-endif # MOZ_DEBUG || NS_TRACE_MALLOC
+endif # MOZ_DEBUG || NS_TRACE_MALLOC || MOZ_DMD
 endif # USE_STATIC_LIBS
 endif # WINNT && !GNU_CC
 
 ifeq ($(OS_ARCH),Darwin)
 # Compiling ObjC requires an Apple compiler anyway, so it's ok to set
 # host CMFLAGS here.
 HOST_CMFLAGS += -fobjc-exceptions
 HOST_CMMFLAGS += -fobjc-exceptions
new file mode 100644
--- /dev/null
+++ b/memory/replace/dmd/DMD.cpp
@@ -0,0 +1,2051 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "DMD.h"
+
+#include <ctype.h>
+#include <errno.h>
+#include <limits.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#ifdef XP_WIN
+#error "Windows not supported yet, sorry."
+// XXX: This will be needed when Windows is supported (bug 819839).
+//#include <process.h>
+//#define getpid _getpid
+#else
+#include <unistd.h>
+#endif
+
+#ifdef ANDROID
+#include <android/log.h>
+#endif
+
+#include "nscore.h"
+#include "nsStackWalk.h"
+
+#include "js/HashTable.h"
+#include "js/Vector.h"
+
+#include "mozilla/Assertions.h"
+#include "mozilla/HashFunctions.h"
+#include "mozilla/Likely.h"
+
+// MOZ_REPLACE_ONLY_MEMALIGN saves us from having to define
+// replace_{posix_memalign,aligned_alloc,valloc}.  It requires defining
+// PAGE_SIZE.  Nb: sysconf() is expensive, but it's only used for (the obsolete
+// and rarely used) valloc.
+#define MOZ_REPLACE_ONLY_MEMALIGN 1
+#define PAGE_SIZE sysconf(_SC_PAGESIZE)
+#include "replace_malloc.h"
+#undef MOZ_REPLACE_ONLY_MEMALIGN
+#undef PAGE_SIZE
+
+namespace mozilla {
+namespace dmd {
+
+//---------------------------------------------------------------------------
+// Utilities
+//---------------------------------------------------------------------------
+
+#ifndef DISALLOW_COPY_AND_ASSIGN
+#define DISALLOW_COPY_AND_ASSIGN(T) \
+  T(const T&);                      \
+  void operator=(const T&)
+#endif
+
+static const malloc_table_t* gMallocTable = nullptr;
+
+// This enables/disables DMD.
+static bool gIsDMDRunning = false;
+
+enum Mode {
+  Normal,   // run normally
+  Test,     // do some basic correctness tests
+  Stress    // do some performance stress tests
+};
+static Mode gMode = Normal;
+
+// This provides infallible allocations (they abort on OOM).  We use it for all
+// of DMD's own allocations, which fall into the following three cases.
+// - Direct allocations (the easy case).
+// - Indirect allocations in js::{Vector,HashSet,HashMap} -- this class serves
+//   as their AllocPolicy.
+// - Other indirect allocations (e.g. NS_StackWalk) -- see the comments on
+//   Thread::mBlockIntercepts and in replace_malloc for how these work.
+//
+class InfallibleAllocPolicy
+{
+  static void ExitOnFailure(const void* aP);
+
+public:
+  static void* malloc_(size_t aSize)
+  {
+    void* p = gMallocTable->malloc(aSize);
+    ExitOnFailure(p);
+    return p;
+  }
+
+  static void* calloc_(size_t aSize)
+  {
+    void* p = gMallocTable->calloc(1, aSize);
+    ExitOnFailure(p);
+    return p;
+  }
+
+  // This realloc_ is the one we use for direct reallocs within DMD.
+  static void* realloc_(void* aPtr, size_t aNewSize)
+  {
+    void* p = gMallocTable->realloc(aPtr, aNewSize);
+    ExitOnFailure(p);
+    return p;
+  }
+
+  // This realloc_ is required for this to be a JS container AllocPolicy.
+  static void* realloc_(void* aPtr, size_t aOldSize, size_t aNewSize)
+  {
+    return InfallibleAllocPolicy::realloc_(aPtr, aNewSize);
+  }
+
+  static void* memalign_(size_t aAlignment, size_t aSize)
+  {
+    void* p = gMallocTable->memalign(aAlignment, aSize);
+    ExitOnFailure(p);
+    return p;
+  }
+
+  static void free_(void* aPtr) { gMallocTable->free(aPtr); }
+
+  static char* strdup_(const char* aStr)
+  {
+    char* s = (char*) gMallocTable->malloc(strlen(aStr));
+    ExitOnFailure(s);
+    strcpy(s, aStr);
+    return s;
+  }
+
+  template <class T>
+  static T* new_()
+  {
+    void* mem = malloc_(sizeof(T));
+    ExitOnFailure(mem);
+    return new (mem) T;
+  }
+
+  template <class T, typename P1>
+  static T* new_(P1 p1)
+  {
+    void* mem = malloc_(sizeof(T));
+    ExitOnFailure(mem);
+    return new (mem) T(p1);
+  }
+
+  static void reportAllocOverflow() { ExitOnFailure(nullptr); }
+};
+
+static void
+StatusMsg(const char* aFmt, ...)
+{
+  va_list ap;
+  va_start(ap, aFmt);
+#ifdef ANDROID
+  __android_log_vprint(ANDROID_LOG_INFO, "DMD", aFmt, ap);
+#else
+  // The +64 is easily enough for the "DMD[<pid>] " prefix and the NUL.
+  char* fmt = (char*) InfallibleAllocPolicy::malloc_(strlen(aFmt) + 64);
+  sprintf(fmt, "DMD[%d] %s", getpid(), aFmt);
+  vfprintf(stderr, fmt, ap);
+  InfallibleAllocPolicy::free_(fmt);
+#endif
+  va_end(ap);
+}
+
+/* static */ void
+InfallibleAllocPolicy::ExitOnFailure(const void* aP)
+{
+  if (!aP) {
+    StatusMsg("out of memory;  aborting\n");
+    MOZ_CRASH();
+  }
+}
+
+void
+Writer::Write(const char* aFmt, ...) const
+{
+  va_list ap;
+  va_start(ap, aFmt);
+  mWriterFun(mWriteState, aFmt, ap);
+  va_end(ap);
+}
+
+#define W(...) aWriter.Write(__VA_ARGS__);
+
+#define WriteTitle(...)                                                       \
+  W("------------------------------------------------------------------\n");  \
+  W(__VA_ARGS__);                                                             \
+  W("------------------------------------------------------------------\n\n");
+
+MOZ_EXPORT void
+FpWrite(void* aWriteState, const char* aFmt, va_list aAp)
+{
+  FILE* fp = static_cast<FILE*>(aWriteState);
+  vfprintf(fp, aFmt, aAp);
+}
+
+static double
+Percent(size_t part, size_t whole)
+{
+  return (whole == 0) ? 0 : 100 * (double)part / whole;
+}
+
+// Commifies the number and prepends a '~' if requested.  Best used with
+// |kBufLen| and |gBuf[1234]|, because they should be big enough for any number
+// we'll see.
+static char*
+Show(size_t n, char* buf, size_t buflen, bool addTilde = false)
+{
+  int nc = 0, i = 0, lasti = buflen - 2;
+  buf[lasti + 1] = '\0';
+  if (n == 0) {
+    buf[lasti - i] = '0';
+    i++;
+  } else {
+    while (n > 0) {
+      if (((i - nc) % 3) == 0 && i != 0) {
+        buf[lasti - i] = ',';
+        i++;
+        nc++;
+      }
+      buf[lasti - i] = static_cast<char>((n % 10) + '0');
+      i++;
+      n /= 10;
+    }
+  }
+  int firstCharIndex = lasti - i + 1;
+
+  if (addTilde) {
+    firstCharIndex--;
+    buf[firstCharIndex] = '~';
+  }
+
+  MOZ_ASSERT(firstCharIndex >= 0);
+  return &buf[firstCharIndex];
+}
+
+static const char*
+Plural(size_t aN)
+{
+  return aN == 1 ? "" : "s";
+}
+
+// Used by calls to Show().
+static const size_t kBufLen = 64;
+static char gBuf1[kBufLen];
+static char gBuf2[kBufLen];
+static char gBuf3[kBufLen];
+static char gBuf4[kBufLen];
+
+static const size_t kNoSize = size_t(-1);
+
+//---------------------------------------------------------------------------
+// The global lock
+//---------------------------------------------------------------------------
+
+#ifdef XP_WIN
+
+#error "Windows not supported yet, sorry."
+
+#else
+
+#include <pthread.h>
+#include <sys/types.h>
+
+// MutexBase implements the platform-specific parts of a mutex.
+class MutexBase
+{
+  pthread_mutex_t mMutex;
+
+  DISALLOW_COPY_AND_ASSIGN(MutexBase);
+
+public:
+  MutexBase()
+    : mMutex(PTHREAD_MUTEX_INITIALIZER)
+  {}
+
+  void Lock()
+  {
+    pthread_mutex_lock(&mMutex);
+  }
+
+  void Unlock()
+  {
+    pthread_mutex_unlock(&mMutex);
+  }
+};
+
+#endif
+
+class Mutex : private MutexBase
+{
+  bool mIsLocked;
+
+  DISALLOW_COPY_AND_ASSIGN(Mutex);
+
+public:
+  Mutex()
+    : mIsLocked(false)
+  {}
+
+  void Lock()
+  {
+    MutexBase::Lock();
+    MOZ_ASSERT(!mIsLocked);
+    mIsLocked = true;
+  }
+
+  void Unlock()
+  {
+    MOZ_ASSERT(mIsLocked);
+    mIsLocked = false;
+    MutexBase::Unlock();
+  }
+
+  bool IsLocked()
+  {
+    return mIsLocked;
+  }
+};
+
+// This lock must be held while manipulating global state, such as
+// gStackTraceTable, gLiveBlockTable, etc.
+static Mutex* gStateLock = nullptr;
+
+class AutoLockState
+{
+  DISALLOW_COPY_AND_ASSIGN(AutoLockState);
+
+public:
+  AutoLockState()
+  {
+    gStateLock->Lock();
+  }
+  ~AutoLockState()
+  {
+    gStateLock->Unlock();
+  }
+};
+
+class AutoUnlockState
+{
+  DISALLOW_COPY_AND_ASSIGN(AutoUnlockState);
+
+public:
+  AutoUnlockState()
+  {
+    gStateLock->Unlock();
+  }
+  ~AutoUnlockState()
+  {
+    gStateLock->Lock();
+  }
+};
+
+//---------------------------------------------------------------------------
+// Thread-local storage and blocking of intercepts
+//---------------------------------------------------------------------------
+
+#ifdef XP_WIN
+
+#error "Windows not supported yet, sorry."
+
+#else
+
+#include <pthread.h>
+
+#define DMD_TLS_INDEX_TYPE               pthread_key_t
+#define DMD_CREATE_TLS_INDEX(i_)         pthread_key_create(&(i_), nullptr)
+#define DMD_DESTROY_TLS_INDEX(i_)        pthread_key_delete((i_))
+#define DMD_GET_TLS_DATA(i_)             pthread_getspecific((i_))
+#define DMD_SET_TLS_DATA(i_, v_)         pthread_setspecific((i_), (v_))
+
+#endif
+
+static DMD_TLS_INDEX_TYPE gTlsIndex;
+
+class Thread
+{
+  // Required for allocation via InfallibleAllocPolicy::new_.
+  friend class InfallibleAllocPolicy;
+
+  // When true, this blocks intercepts, which allows malloc interception
+  // functions to themselves call malloc.  (Nb: for direct calls to malloc we
+  // can just use InfallibleAllocPolicy::{malloc_,new_}, but we sometimes
+  // indirectly call vanilla malloc via functions like NS_StackWalk.)
+  bool mBlockIntercepts;
+
+  Thread()
+    : mBlockIntercepts(false)
+  {}
+
+  DISALLOW_COPY_AND_ASSIGN(Thread);
+
+public:
+  static Thread* Fetch();
+
+  bool blockIntercepts()
+  {
+    MOZ_ASSERT(!mBlockIntercepts);
+    return mBlockIntercepts = true;
+  }
+
+  bool unblockIntercepts()
+  {
+    MOZ_ASSERT(mBlockIntercepts);
+    return mBlockIntercepts = false;
+  }
+
+  bool interceptsAreBlocked() const
+  {
+    return mBlockIntercepts;
+  }
+};
+
+/* static */ Thread*
+Thread::Fetch()
+{
+  Thread* t = static_cast<Thread*>(DMD_GET_TLS_DATA(gTlsIndex));
+
+  if (MOZ_UNLIKELY(!t)) {
+    // This memory is never freed, even if the thread dies.  It's a leak, but
+    // only a tiny one.
+    t = InfallibleAllocPolicy::new_<Thread>();
+    DMD_SET_TLS_DATA(gTlsIndex, t);
+  }
+
+  return t;
+}
+
+// An object of this class must be created (on the stack) before running any
+// code that might allocate.
+class AutoBlockIntercepts
+{
+  Thread* const mT;
+
+  DISALLOW_COPY_AND_ASSIGN(AutoBlockIntercepts);
+
+public:
+  AutoBlockIntercepts(Thread* aT)
+    : mT(aT)
+  {
+    mT->blockIntercepts();
+  }
+  ~AutoBlockIntercepts()
+  {
+    MOZ_ASSERT(mT->interceptsAreBlocked());
+    mT->unblockIntercepts();
+  }
+};
+
+//---------------------------------------------------------------------------
+// Stack traces
+//---------------------------------------------------------------------------
+
+static void
+PcInfo(const void* aPc, nsCodeAddressDetails* aDetails)
+{
+  // NS_DescribeCodeAddress can (on Linux) acquire a lock inside
+  // the shared library loader.  Another thread might call malloc
+  // while holding that lock (when loading a shared library).  So
+  // we have to exit gStateLock around this call.  For details, see
+  // https://bugzilla.mozilla.org/show_bug.cgi?id=363334#c3
+  {
+    AutoUnlockState unlock;
+    (void)NS_DescribeCodeAddress(const_cast<void*>(aPc), aDetails);
+  }
+  if (!aDetails->function[0]) {
+    strcpy(aDetails->function, "???");
+  }
+}
+
+class StackTrace
+{
+  static const uint32_t MaxDepth = 24;
+
+  uint32_t mLength;             // The number of PCs.
+  void* mPcs[MaxDepth];         // The PCs themselves.
+
+public:
+  StackTrace() : mLength(0) {}
+
+  uint32_t Length() const { return mLength; }
+  void* Pc(uint32_t i) const { MOZ_ASSERT(i < mLength); return mPcs[i]; }
+
+  uint32_t Size() const { return mLength * sizeof(mPcs[0]); }
+
+  // The stack trace returned by this function is interned in gStackTraceTable,
+  // and so is immortal and unmovable.
+  static const StackTrace* Get(Thread* aT);
+
+  void Sort()
+  {
+    qsort(mPcs, mLength, sizeof(mPcs[0]), StackTrace::QsortCmp);
+  }
+
+  void Print(const Writer& aWriter) const;
+
+  // Hash policy.
+
+  typedef StackTrace* Lookup;
+
+  static uint32_t hash(const StackTrace* const& aSt)
+  {
+    return mozilla::HashBytes(aSt->mPcs, aSt->Size());
+  }
+
+  static bool match(const StackTrace* const& aA,
+                    const StackTrace* const& aB)
+  {
+    return aA->mLength == aB->mLength &&
+           memcmp(aA->mPcs, aB->mPcs, aA->Size()) == 0;
+  }
+
+private:
+  static void StackWalkCallback(void* aPc, void* aSp, void* aClosure)
+  {
+    StackTrace* st = (StackTrace*) aClosure;
+
+    // Only fill to MaxDepth.
+    // XXX: bug 818793 will allow early bailouts.
+    if (st->mLength < MaxDepth) {
+      st->mPcs[st->mLength] = aPc;
+      st->mLength++;
+    }
+  }
+
+  static int QsortCmp(const void* aA, const void* aB)
+  {
+    const void* const a = *static_cast<const void* const*>(aA);
+    const void* const b = *static_cast<const void* const*>(aB);
+    if (a < b) return -1;
+    if (a > b) return  1;
+    return 0;
+  }
+};
+
+typedef js::HashSet<StackTrace*, StackTrace, InfallibleAllocPolicy>
+        StackTraceTable;
+static StackTraceTable* gStackTraceTable = nullptr;
+
+void
+StackTrace::Print(const Writer& aWriter) const
+{
+  if (mLength == 0) {
+    W("   (empty)\n");
+    return;
+  }
+
+  if (gMode == Test) {
+    // Don't print anything because there's too much variation.
+    W("   (stack omitted due to test mode)\n");
+    return;
+  }
+
+  for (uint32_t i = 0; i < mLength; i++) {
+    nsCodeAddressDetails details;
+    void* pc = mPcs[i];
+    PcInfo(pc, &details);
+    if (details.function[0]) {
+      W("   %14p %s[%s +0x%X]\n", pc, details.function, details.library,
+        details.loffset);
+    }
+  }
+}
+
+/* static */ const StackTrace*
+StackTrace::Get(Thread* aT)
+{
+  MOZ_ASSERT(gStateLock->IsLocked());
+  MOZ_ASSERT(aT->interceptsAreBlocked());
+
+  // On Windows, NS_StackWalk can acquire a lock from the shared library
+  // loader.  Another thread might call malloc while holding that lock (when
+  // loading a shared library).  So we can't be in gStateLock during the call
+  // to NS_StackWalk.  For details, see
+  // https://bugzilla.mozilla.org/show_bug.cgi?id=374829#c8
+  StackTrace tmp;
+  {
+#ifdef XP_WIN
+    AutoUnlockState unlock;
+#endif
+    // In normal operation, skip=3 gets us past various malloc wrappers into
+    // more interesting stuff.  But in test mode we need to skip a bit less to
+    // sufficiently differentiate some similar stacks.
+    uint32_t skip = (gMode == Test) ? 2 : 3;
+    nsresult rv = NS_StackWalk(StackWalkCallback, skip, &tmp, 0, nullptr);
+    if (NS_FAILED(rv) || tmp.mLength == 0) {
+      tmp.mLength = 0;
+    }
+  }
+
+  StackTraceTable::AddPtr p = gStackTraceTable->lookupForAdd(&tmp);
+  if (!p) {
+    StackTrace* stnew = InfallibleAllocPolicy::new_<StackTrace>(tmp);
+    (void)gStackTraceTable->add(p, stnew);
+  }
+  return *p;
+}
+
+//---------------------------------------------------------------------------
+// Heap blocks
+//---------------------------------------------------------------------------
+
+static const char* gUnreportedName = "unreported";
+
+// This is used by both |Block|s and |BlockGroups|.
+class BlockKey
+{
+protected:
+  enum Kind {
+    Live,               // for all live blocks, reported or not
+    DoubleReport        // for blocks that have been double-reported
+  };
+
+  const Kind mKind;
+
+public:
+  const StackTrace* const mAllocStackTrace;     // never null
+
+protected:
+  union
+  {
+    // Blocks can be reported in two ways.
+    // - The most common is via a memory reporter traversal -- the block is
+    //   reported when the reporter runs, causing DMD to mark it as reported,
+    //   and DMD must clear the marking once it has finished its analysis.
+    // - Less common are ones that are reported immediately on allocation.  DMD
+    //   must *not* clear the markings of these blocks once it has finished its
+    //   analysis.  The |mReportedOnAlloc| field is set for such blocks.
+    struct
+    {
+      const StackTrace* mReportStackTrace;  // nullptr if unreported
+      const char*       mReporterName;      // gUnreportedName if unreported
+      bool              mReportedOnAlloc;   // true if block was reported
+    } mLive;                                //   immediately on allocation
+
+    struct
+    {
+      // When double-reports occur we record (and later print) the stack trace
+      // and reporter name of *both* the reporting locations.
+      // Nb: These are really |* const|, but that confuses some compilers.
+      const StackTrace* mReportStackTrace1;   // const, never null
+      const StackTrace* mReportStackTrace2;   // const, never null
+      const char*       mReporterName1;       // const, never gUnreportedName
+      const char*       mReporterName2;       // const, never gUnreportedName
+    } mDoubleReport;
+  };
+
+  // Use these safer accessors where possible instead of raw union accesses.
+
+  #define GETTER(kind, type, name) \
+    type name() const { \
+      MOZ_ASSERT(mKind == kind); \
+      return m##kind.m##name; \
+    }
+  #define GETTER_AND_SETTER(kind, type, name) \
+    GETTER(kind, type, name) \
+    void Set##name(type a##name) { \
+      MOZ_ASSERT(mKind == kind); \
+      m##kind.m##name = a##name; \
+    }
+
+  GETTER_AND_SETTER(Live, const StackTrace*, ReportStackTrace)
+  GETTER_AND_SETTER(Live, const char*,       ReporterName)
+  GETTER_AND_SETTER(Live, bool,              ReportedOnAlloc)
+
+  GETTER(DoubleReport, const StackTrace*, ReportStackTrace1)
+  GETTER(DoubleReport, const StackTrace*, ReportStackTrace2)
+  GETTER(DoubleReport, const char*,       ReporterName1)
+  GETTER(DoubleReport, const char*,       ReporterName2)
+
+  #undef GETTER
+  #undef SETTER
+
+public:
+  // This constructor is used for |Live| Blocks.
+  BlockKey(const StackTrace* aAllocStackTrace)
+    : mKind(Live),
+      mAllocStackTrace(aAllocStackTrace)
+  {
+    mLive.mReportStackTrace = nullptr;
+    mLive.mReporterName = gUnreportedName;
+    mLive.mReportedOnAlloc = false;
+    MOZ_ASSERT(IsSaneLiveBlock());
+  }
+
+  // This constructor is used for |DoubleReport| Blocks.
+  BlockKey(const StackTrace* aAllocStackTrace,
+           const StackTrace* aReportStackTrace1,
+           const StackTrace* aReportStackTrace2,
+           const char* aReporterName1, const char* aReporterName2)
+    : mKind(DoubleReport),
+      mAllocStackTrace(aAllocStackTrace)
+  {
+    mDoubleReport.mReportStackTrace1 = aReportStackTrace1;
+    mDoubleReport.mReportStackTrace2 = aReportStackTrace2;
+    mDoubleReport.mReporterName1 = aReporterName1;
+    mDoubleReport.mReporterName2 = aReporterName2;
+    MOZ_ASSERT(IsSaneDoubleReportBlock());
+  }
+
+  bool IsSaneLiveBlock() const
+  {
+    bool hasReporterName = ReporterName() != gUnreportedName;
+    return mKind == Live &&
+           mAllocStackTrace &&
+           (( ReportStackTrace() &&  hasReporterName) ||
+            (!ReportStackTrace() && !hasReporterName && !ReportedOnAlloc()));
+  }
+
+  bool IsSaneDoubleReportBlock() const
+  {
+    return mKind == DoubleReport &&
+           mAllocStackTrace &&
+           ReportStackTrace1() &&
+           ReportStackTrace2() &&
+           ReporterName1() != gUnreportedName &&
+           ReporterName2() != gUnreportedName;
+  }
+
+  bool IsLive() const { return mKind == Live; }
+
+  bool IsReported() const
+  {
+    MOZ_ASSERT(IsSaneLiveBlock());  // should only call this on live blocks
+    bool isRep = ReporterName() != gUnreportedName;
+    return isRep;
+  }
+
+  // Quasi-hash policy (used by BlockGroup's hash policy).
+  //
+  // Hash() and Match() both assume that identical reporter names have
+  // identical pointers.  In practice this always happens because they are
+  // static strings (as specified in the NS_MEMORY_REPORTER_MALLOC_SIZEOF_FUN
+  // macro).  This is true even for multi-reporters.  (If it ever became
+  // untrue, the worst that would happen is that some blocks that should be in
+  // the same block group would end up in separate block groups.)
+
+  static uint32_t Hash(const BlockKey& aKey)
+  {
+    if (aKey.mKind == Live) {
+      return mozilla::HashGeneric(aKey.mAllocStackTrace,
+                                  aKey.ReportStackTrace(),
+                                  aKey.ReporterName());
+    }
+
+    if (aKey.mKind == DoubleReport) {
+      return mozilla::HashGeneric(aKey.mAllocStackTrace,
+                                  aKey.ReportStackTrace1(),
+                                  aKey.ReportStackTrace2(),
+                                  aKey.ReporterName1(),
+                                  aKey.ReporterName2());
+    }
+
+    MOZ_CRASH();
+  }
+
+  static bool Match(const BlockKey& aA, const BlockKey& aB)
+  {
+    if (aA.mKind == Live && aB.mKind == Live) {
+      return aA.mAllocStackTrace   == aB.mAllocStackTrace &&
+             aA.ReportStackTrace() == aB.ReportStackTrace() &&
+             aA.ReporterName()     == aB.ReporterName();
+    }
+
+    if (aA.mKind == DoubleReport && aB.mKind == DoubleReport) {
+      return aA.mAllocStackTrace    == aB.mAllocStackTrace &&
+             aA.ReportStackTrace1() == aB.ReportStackTrace1() &&
+             aA.ReportStackTrace2() == aB.ReportStackTrace2() &&
+             aA.ReporterName1()     == aB.ReporterName1() &&
+             aA.ReporterName2()     == aB.ReporterName2();
+    }
+
+    MOZ_CRASH();  // Nb: aA.mKind should always equal aB.mKind.
+  }
+};
+
+class BlockSize
+{
+  static const size_t kSlopBits = sizeof(size_t) * 8 - 1;  // 31 or 63
+
+public:
+  size_t mReq;              // size requested
+  size_t mSlop:kSlopBits;   // additional bytes allocated due to rounding up
+  size_t mSampled:1;        // were one or more blocks contributing to this
+                            //   BlockSize sampled?
+  BlockSize()
+    : mReq(0),
+      mSlop(0),
+      mSampled(false)
+  {}
+
+  BlockSize(size_t aReq, size_t aSlop, bool aSampled)
+    : mReq(aReq),
+      mSlop(aSlop),
+      mSampled(aSampled)
+  {}
+
+  size_t Usable() const { return mReq + mSlop; }
+
+  void Add(const BlockSize& aBlockSize)
+  {
+    mReq  += aBlockSize.mReq;
+    mSlop += aBlockSize.mSlop;
+    mSampled = mSampled || aBlockSize.mSampled;
+  }
+
+  static int Cmp(const BlockSize& aA, const BlockSize& aB)
+  {
+    // Primary sort: put bigger usable sizes before smaller usable sizes.
+    if (aA.Usable() > aB.Usable()) return -1;
+    if (aA.Usable() < aB.Usable()) return  1;
+
+    // Secondary sort: put non-sampled groups before sampled groups.
+    if (!aA.mSampled &&  aB.mSampled) return -1;
+    if ( aA.mSampled && !aB.mSampled) return  1;
+
+    return 0;
+  }
+};
+
+// A live heap block.
+class Block : public BlockKey
+{
+public:
+  const BlockSize mBlockSize;
+
+public:
+  Block(size_t aReqSize, size_t aSlopSize, const StackTrace* aAllocStackTrace,
+        bool aIsExact)
+    : BlockKey(aAllocStackTrace),
+      mBlockSize(aReqSize, aSlopSize, aIsExact)
+  {}
+
+  void Report(Thread* aT, const char* aReporterName, bool aReportedOnAlloc);
+
+  void UnreportIfNotReportedOnAlloc();
+};
+
+// Nb: js::DefaultHasher<void*> is a high quality hasher.
+typedef js::HashMap<const void*, Block, js::DefaultHasher<const void*>,
+                    InfallibleAllocPolicy> BlockTable;
+static BlockTable* gLiveBlockTable = nullptr;
+
+//---------------------------------------------------------------------------
+// malloc/free callbacks
+//---------------------------------------------------------------------------
+
+static size_t gSampleBelowSize = 0;
+static size_t gSmallBlockActualSizeCounter = 0;
+
+static void
+AllocCallback(void* aPtr, size_t aReqSize, Thread* aT)
+{
+  MOZ_ASSERT(gIsDMDRunning);
+
+  if (!aPtr) {
+    return;
+  }
+
+  AutoLockState lock;
+  AutoBlockIntercepts block(aT);
+
+  size_t actualSize = gMallocTable->malloc_usable_size(aPtr);
+  size_t slopSize   = actualSize - aReqSize;
+
+  if (actualSize < gSampleBelowSize) {
+    // If this allocation is smaller than the sample-below size, increment the
+    // cumulative counter.  Then, if that counter now exceeds the sample size,
+    // blame this allocation for gSampleBelowSize bytes.  This precludes the
+    // measurement of slop.
+    gSmallBlockActualSizeCounter += actualSize;
+    if (gSmallBlockActualSizeCounter >= gSampleBelowSize) {
+      gSmallBlockActualSizeCounter -= gSampleBelowSize;
+
+      Block b(gSampleBelowSize, /* slopSize */ 0, StackTrace::Get(aT),
+              /* sampled */ true);
+      (void)gLiveBlockTable->putNew(aPtr, b);
+    }
+  } else {
+    // If this block size is larger than the sample size, record it exactly.
+    Block b(aReqSize, slopSize, StackTrace::Get(aT), /* sampled */ false);
+    (void)gLiveBlockTable->putNew(aPtr, b);
+  }
+}
+
+static void
+FreeCallback(void* aPtr, Thread* aT)
+{
+  MOZ_ASSERT(gIsDMDRunning);
+
+  if (!aPtr) {
+    return;
+  }
+
+  AutoLockState lock;
+  AutoBlockIntercepts block(aT);
+
+  gLiveBlockTable->remove(aPtr);
+}
+
+//---------------------------------------------------------------------------
+// malloc/free interception
+//---------------------------------------------------------------------------
+
+static void Init(const malloc_table_t* aMallocTable);
+
+}   // namespace dmd
+}   // namespace mozilla
+
+void
+replace_init(const malloc_table_t* aMallocTable)
+{
+  mozilla::dmd::Init(aMallocTable);
+}
+
+void*
+replace_malloc(size_t aSize)
+{
+  using namespace mozilla::dmd;
+
+  if (!gIsDMDRunning) {
+    // DMD hasn't started up, either because it wasn't enabled by the user, or
+    // we're still in Init() and something has indirectly called malloc.  Do a
+    // vanilla malloc.  (In the latter case, if it fails we'll crash.  But
+    // OOM is highly unlikely so early on.)
+    return gMallocTable->malloc(aSize);
+  }
+
+  Thread* t = Thread::Fetch();
+  if (t->interceptsAreBlocked()) {
+    // Intercepts are blocked, which means this must be a call to malloc
+    // triggered indirectly by DMD (e.g. via NS_StackWalk).  Be infallible.
+    return InfallibleAllocPolicy::malloc_(aSize);
+  }
+
+  // This must be a call to malloc from outside DMD.  Intercept it.
+  void* ptr = gMallocTable->malloc(aSize);
+  AllocCallback(ptr, aSize, t);
+  return ptr;
+}
+
+void*
+replace_calloc(size_t aCount, size_t aSize)
+{
+  using namespace mozilla::dmd;
+
+  if (!gIsDMDRunning) {
+    return gMallocTable->calloc(aCount, aSize);
+  }
+
+  Thread* t = Thread::Fetch();
+  if (t->interceptsAreBlocked()) {
+    return InfallibleAllocPolicy::calloc_(aCount * aSize);
+  }
+
+  void* ptr = gMallocTable->calloc(aCount, aSize);
+  AllocCallback(ptr, aCount * aSize, t);
+  return ptr;
+}
+
+void*
+replace_realloc(void* aOldPtr, size_t aSize)
+{
+  using namespace mozilla::dmd;
+
+  if (!gIsDMDRunning) {
+    return gMallocTable->realloc(aOldPtr, aSize);
+  }
+
+  Thread* t = Thread::Fetch();
+  if (t->interceptsAreBlocked()) {
+    return InfallibleAllocPolicy::realloc_(aOldPtr, aSize);
+  }
+
+  // If |aOldPtr| is NULL, the call is equivalent to |malloc(aSize)|.
+  if (!aOldPtr) {
+    return replace_malloc(aSize);
+  }
+
+  // Be very careful here!  Must remove the block from the table before doing
+  // the realloc to avoid races, just like in replace_free().
+  // Nb: This does an unnecessary hashtable remove+add if the block doesn't
+  // move, but doing better isn't worth the effort.
+  FreeCallback(aOldPtr, t);
+  void* ptr = gMallocTable->realloc(aOldPtr, aSize);
+  if (ptr) {
+    AllocCallback(ptr, aSize, t);
+  } else {
+    // If realloc fails, we re-insert the old pointer.  It will look like it
+    // was allocated for the first time here, which is untrue, and the slop
+    // bytes will be zero, which may be untrue.  But this case is rare and
+    // doing better isn't worth the effort.
+    AllocCallback(aOldPtr, gMallocTable->malloc_usable_size(aOldPtr), t);
+  }
+  return ptr;
+}
+
+void*
+replace_memalign(size_t aAlignment, size_t aSize)
+{
+  using namespace mozilla::dmd;
+
+  if (!gIsDMDRunning) {
+    return gMallocTable->memalign(aAlignment, aSize);
+  }
+
+  Thread* t = Thread::Fetch();
+  if (t->interceptsAreBlocked()) {
+    return InfallibleAllocPolicy::memalign_(aAlignment, aSize);
+  }
+
+  void* ptr = gMallocTable->memalign(aAlignment, aSize);
+  AllocCallback(ptr, aSize, t);
+  return ptr;
+}
+
+void
+replace_free(void* aPtr)
+{
+  using namespace mozilla::dmd;
+
+  if (!gIsDMDRunning) {
+    gMallocTable->free(aPtr);
+    return;
+  }
+
+  Thread* t = Thread::Fetch();
+  if (t->interceptsAreBlocked()) {
+    return InfallibleAllocPolicy::free_(aPtr);
+  }
+
+  // Do the actual free after updating the table.  Otherwise, another thread
+  // could call malloc and get the freed block and update the table, and then
+  // our update here would remove the newly-malloc'd block.
+  FreeCallback(aPtr, t);
+  gMallocTable->free(aPtr);
+}
+
+namespace mozilla {
+namespace dmd {
+
+//---------------------------------------------------------------------------
+// Block groups
+//---------------------------------------------------------------------------
+
+// A group of one or more heap blocks with a common BlockKey.
+class BlockGroup : public BlockKey
+{
+  friend class FrameGroup;      // FrameGroups are created from BlockGroups
+
+  // The (inherited) BlockKey is used as the key in BlockGroupTable, and the
+  // other members constitute the value, so it's ok for them to be |mutable|.
+private:
+  mutable uint32_t  mNumBlocks;     // number of blocks with this BlockKey
+  mutable BlockSize mCombinedSize;  // combined size of those blocks
+
+public:
+  explicit BlockGroup(const BlockKey& aKey)
+    : BlockKey(aKey),
+      mNumBlocks(0),
+      mCombinedSize()
+  {}
+
+  const BlockSize& CombinedSize() const { return mCombinedSize; }
+
+  // The |const| qualifier is something of a lie, but is necessary so this type
+  // can be used in js::HashSet, and it fits with the |mutable| fields above.
+  void Add(const Block& aB) const
+  {
+    mNumBlocks++;
+    mCombinedSize.Add(aB.mBlockSize);
+  }
+
+  void Print(const Writer& aWriter, uint32_t aM, uint32_t aN,
+             const char* aStr, const char* astr,
+             size_t aCategoryUsableSize, size_t aCumulativeUsableSize,
+             size_t aTotalUsableSize) const;
+
+  static int QsortCmp(const void* aA, const void* aB)
+  {
+    const BlockGroup* const a = *static_cast<const BlockGroup* const*>(aA);
+    const BlockGroup* const b = *static_cast<const BlockGroup* const*>(aB);
+
+    return BlockSize::Cmp(a->mCombinedSize, b->mCombinedSize);
+  }
+
+  static const char* const kName;   // for PrintSortedGroups
+
+  // Hash policy
+
+  typedef BlockKey Lookup;
+
+  static uint32_t hash(const BlockKey& aKey)
+  {
+    return BlockKey::Hash(aKey);
+  }
+
+  static bool match(const BlockGroup& aBg, const BlockKey& aKey)
+  {
+    return BlockKey::Match(aBg, aKey);
+  }
+};
+
+const char* const BlockGroup::kName = "block";
+
+typedef js::HashSet<BlockGroup, BlockGroup, InfallibleAllocPolicy>
+        BlockGroupTable;
+BlockGroupTable* gDoubleReportBlockGroupTable = nullptr;
+
+void
+BlockGroup::Print(const Writer& aWriter, uint32_t aM, uint32_t aN,
+                  const char* aStr, const char* astr,
+                  size_t aCategoryUsableSize, size_t aCumulativeUsableSize,
+                  size_t aTotalUsableSize) const
+{
+  bool showTilde = mCombinedSize.mSampled;
+
+  W("%s: %s block%s in block group %s of %s\n",
+    aStr,
+    Show(mNumBlocks, gBuf1, kBufLen, showTilde), Plural(mNumBlocks),
+    Show(aM, gBuf2, kBufLen),
+    Show(aN, gBuf3, kBufLen));
+
+  W(" %s bytes (%s requested / %s slop)\n",
+    Show(mCombinedSize.Usable(), gBuf1, kBufLen, showTilde),
+    Show(mCombinedSize.mReq,     gBuf2, kBufLen, showTilde),
+    Show(mCombinedSize.mSlop,    gBuf3, kBufLen, showTilde));
+
+  if (mKind == BlockKey::Live) {
+    W(" %4.2f%% of the heap (%4.2f%% cumulative); "
+      " %4.2f%% of %s (%4.2f%% cumulative)\n",
+      Percent(mCombinedSize.Usable(), aTotalUsableSize),
+      Percent(aCumulativeUsableSize, aTotalUsableSize),
+      Percent(mCombinedSize.Usable(), aCategoryUsableSize),
+      astr,
+      Percent(aCumulativeUsableSize, aCategoryUsableSize));
+  }
+
+  W(" Allocated at\n");
+  mAllocStackTrace->Print(aWriter);
+
+  if (mKind == BlockKey::Live) {
+    if (IsReported()) {
+      W("\n Reported by '%s' at\n", ReporterName());
+      ReportStackTrace()->Print(aWriter);
+    }
+
+  } else if (mKind == BlockKey::DoubleReport) {
+    W("\n Previously reported by '%s' at\n", ReporterName1());
+    ReportStackTrace1()->Print(aWriter);
+
+    W("\n Now reported by '%s' at\n", ReporterName2());
+    ReportStackTrace2()->Print(aWriter);
+
+  } else {
+    MOZ_NOT_REACHED();
+  }
+
+  W("\n");
+}
+
+//---------------------------------------------------------------------------
+// Stack frame groups
+//---------------------------------------------------------------------------
+
+// A group of one or more stack frames (from heap block allocation stack
+// traces) with a common PC.
+class FrameGroup
+{
+  // mPc is used as the key in FrameGroupTable, and the other members
+  // constitute the value, so it's ok for them to be |mutable|.
+  const void* const mPc;
+  mutable size_t    mNumBlocks;
+  mutable size_t    mNumBlockGroups;
+  mutable BlockSize mCombinedSize;
+
+public:
+  explicit FrameGroup(const void* aPc)
+    : mPc(aPc),
+      mNumBlocks(0),
+      mNumBlockGroups(0),
+      mCombinedSize()
+  {}
+
+  const BlockSize& CombinedSize() const { return mCombinedSize; }
+
+  // The |const| qualifier is something of a lie, but is necessary so this type
+  // can be used in js::HashSet, and it fits with the |mutable| fields above.
+  void Add(const BlockGroup& aBg) const
+  {
+    mNumBlocks += aBg.mNumBlocks;
+    mNumBlockGroups++;
+    mCombinedSize.Add(aBg.mCombinedSize);
+  }
+
+  void Print(const Writer& aWriter, uint32_t aM, uint32_t aN,
+             const char* aStr, const char* astr,
+             size_t aCategoryUsableSize, size_t aCumulativeUsableSize,
+             size_t aTotalUsableSize) const;
+
+  static int QsortCmp(const void* aA, const void* aB)
+  {
+    const FrameGroup* const a = *static_cast<const FrameGroup* const*>(aA);
+    const FrameGroup* const b = *static_cast<const FrameGroup* const*>(aB);
+
+    return BlockSize::Cmp(a->mCombinedSize, b->mCombinedSize);
+  }
+
+  static const char* const kName;   // for PrintSortedGroups
+
+  // Hash policy
+
+  typedef const void* Lookup;
+
+  static uint32_t hash(const Lookup& aPc)
+  {
+    return mozilla::HashGeneric(aPc);
+  }
+
+  static bool match(const FrameGroup& aFg, const Lookup& aPc)
+  {
+    return aFg.mPc == aPc;
+  }
+};
+
+const char* const FrameGroup::kName = "frame";
+
+typedef js::HashSet<FrameGroup, FrameGroup, InfallibleAllocPolicy>
+        FrameGroupTable;
+
+void
+FrameGroup::Print(const Writer& aWriter, uint32_t aM, uint32_t aN,
+                  const char* aStr, const char* astr,
+                  size_t aCategoryUsableSize, size_t aCumulativeUsableSize,
+                  size_t aTotalUsableSize) const
+{
+  (void)aCumulativeUsableSize;
+
+  bool showTilde = mCombinedSize.mSampled;
+
+  nsCodeAddressDetails details;
+  PcInfo(mPc, &details);
+
+  W("%s: %s block%s and %s block group%s in frame group %s of %s\n",
+    aStr,
+    Show(mNumBlocks, gBuf1, kBufLen, showTilde), Plural(mNumBlocks),
+    Show(mNumBlockGroups, gBuf2, kBufLen, showTilde), Plural(mNumBlockGroups),
+    Show(aM, gBuf3, kBufLen),
+    Show(aN, gBuf4, kBufLen));
+
+  W(" %s bytes (%s requested / %s slop)\n",
+    Show(mCombinedSize.Usable(), gBuf1, kBufLen, showTilde),
+    Show(mCombinedSize.mReq,     gBuf2, kBufLen, showTilde),
+    Show(mCombinedSize.mSlop,    gBuf3, kBufLen, showTilde));
+
+  W(" %4.2f%% of the heap;  %4.2f%% of %s\n",
+    Percent(mCombinedSize.Usable(), aTotalUsableSize),
+    Percent(mCombinedSize.Usable(), aCategoryUsableSize),
+    astr);
+
+  W(" PC is\n");
+  W("   %14p %s[%s +0x%X]\n\n", mPc, details.function, details.library,
+    details.loffset);
+}
+
+//---------------------------------------------------------------------------
+// DMD start-up
+//---------------------------------------------------------------------------
+
+static void RunTestMode(FILE* fp);
+static void RunStressMode();
+
+static const char* gDMDEnvVar = nullptr;
+
+// Given an |aOptionName| like "foo", succeed if |aArg| has the form "foo=blah"
+// (where "blah" is non-empty) and return the pointer to "blah".  |aArg| can
+// have leading space chars (but not other whitespace).
+static const char*
+OptionValueIfMatch(const char* aArg, const char* aOptionName)
+{
+  MOZ_ASSERT(!isspace(*aArg));  // any leading whitespace should not remain
+  size_t optionLen = strlen(aOptionName);
+  if (strncmp(aArg, aOptionName, optionLen) == 0 && aArg[optionLen] == '=' &&
+      aArg[optionLen + 1]) {
+    return aArg + optionLen + 1;
+  }
+  return nullptr;
+}
+
+// Extracts a |long| value for an option from an argument.  It must be within
+// the range |aMin..aMax| (inclusive).
+static bool
+OptionLong(const char* aArg, const char* aOptionName, long aMin, long aMax,
+           long* aN)
+{
+  if (const char* optionValue = OptionValueIfMatch(aArg, aOptionName)) {
+    char* endPtr;
+    *aN = strtol(optionValue, &endPtr, /* base */ 10);
+    if (!*endPtr && aMin <= *aN && *aN <= aMax &&
+        *aN != LONG_MIN && *aN != LONG_MAX) {
+      return true;
+    }
+  }
+  return false;
+}
+
+static const size_t gMaxSampleBelowSize = 100 * 1000 * 1000;    // bytes
+
+static void
+BadArg(const char* aArg)
+{
+  StatusMsg("\n");
+  StatusMsg("Bad entry in the $DMD environment variable: '%s'.\n", aArg);
+  StatusMsg("\n");
+  StatusMsg("Valid values of $DMD are:\n");
+  StatusMsg("- undefined or \"\" or \"0\", which disables DMD, or\n");
+  StatusMsg("- \"1\", which enables it with the default options, or\n");
+  StatusMsg("- a whitespace-separated list of |--option=val| entries, which\n");
+  StatusMsg("  enables it with non-default options.\n");
+  StatusMsg("\n");
+  StatusMsg("The following options are allowed;  defaults are shown in [].\n");
+  StatusMsg("  --sample-below=<1..%d> Sample blocks smaller than this [1]\n",
+            int(gMaxSampleBelowSize));
+  StatusMsg("  --mode=<normal|test|stress>   Which mode to run in? [normal]\n");
+  StatusMsg("\n");
+  exit(1);
+}
+
+// WARNING: this function runs *very* early -- before all static initializers
+// have run.  For this reason, non-scalar globals such as gStateLock and
+// gStackTraceTable are allocated dynamically (so we can guarantee their
+// construction in this function) rather than statically.
+static void
+Init(const malloc_table_t* aMallocTable)
+{
+  MOZ_ASSERT(!gIsDMDRunning);
+
+  gMallocTable = aMallocTable;
+
+  // Set defaults of things that can be affected by the $DMD env var.
+  gMode = Normal;
+  gSampleBelowSize = 1;
+
+  // DMD is controlled by the |DMD| environment variable.
+  // - If it's unset or empty or "0", DMD doesn't run.
+  // - Otherwise, the contents dictate DMD's behaviour.
+
+  char* e = getenv("DMD");
+
+  StatusMsg("$DMD = '%s'\n", e);
+
+  if (!e || strcmp(e, "") == 0 || strcmp(e, "0") == 0) {
+    StatusMsg("DMD is not enabled\n");
+    return;
+  }
+
+  // Save it so we can print it in Dump().
+  gDMDEnvVar = e = InfallibleAllocPolicy::strdup_(e);
+
+  if (strcmp(e, "1") != 0) {
+    bool isEnd = false;
+    while (!isEnd) {
+      // Consume leading whitespace.
+      while (isspace(*e)) {
+        e++;
+      }
+
+      // Save the start of the arg.
+      const char* arg = e;
+
+      // Find the first char after the arg, and temporarily change it to '\0'
+      // to isolate the arg.
+      while (!isspace(*e) && *e != '\0') {
+        e++;
+      }
+      char replacedChar = *e;
+      isEnd = replacedChar == '\0';
+      *e = '\0';
+
+      // Handle arg
+      long myLong;
+      if (OptionLong(arg, "--sample-below", 1, gMaxSampleBelowSize, &myLong)) {
+        gSampleBelowSize = myLong;
+
+      } else if (strcmp(arg, "--mode=normal") == 0) {
+        gMode = Normal;
+      } else if (strcmp(arg, "--mode=test")   == 0) {
+        gMode = Test;
+      } else if (strcmp(arg, "--mode=stress") == 0) {
+        gMode = Stress;
+
+      } else if (strcmp(arg, "") == 0) {
+        // This can only happen if there is trailing whitespace.  Ignore.
+        MOZ_ASSERT(isEnd);
+
+      } else {
+        BadArg(arg);
+      }
+
+      // Undo the temporary isolation.
+      *e = replacedChar;
+    }
+  }
+
+  // Finished parsing $DMD.
+
+  StatusMsg("DMD is enabled\n");
+
+  gStateLock = InfallibleAllocPolicy::new_<Mutex>();
+
+  gSmallBlockActualSizeCounter = 0;
+
+  FILE* testFp;
+
+  if (gMode == Test) {
+    // fopen() allocates.  So do this before setting gIsDMDRunning so those
+    // allocations don't show up in our results.
+    const char* filename = "test.dmd";
+    testFp = fopen(filename, "w");
+    if (!testFp) {
+      StatusMsg("can't create test file %s: %s\n", filename, strerror(errno));
+      exit(1);
+    }
+  }
+
+  DMD_CREATE_TLS_INDEX(gTlsIndex);
+
+  gStackTraceTable = InfallibleAllocPolicy::new_<StackTraceTable>();
+  gStackTraceTable->init(65536);
+
+  gLiveBlockTable = InfallibleAllocPolicy::new_<BlockTable>();
+  gLiveBlockTable->init(65536);
+
+  gDoubleReportBlockGroupTable = InfallibleAllocPolicy::new_<BlockGroupTable>();
+  gDoubleReportBlockGroupTable->init(0);
+
+  // Set this as late as possible, so that allocations during initialization
+  // aren't intercepted.  Once this is set, we are intercepting malloc et al.
+  // in earnest.
+  gIsDMDRunning = true;
+
+  if (gMode == Test) {
+    StatusMsg("running test mode...\n");
+    RunTestMode(testFp);
+    StatusMsg("finished test mode\n");
+    fclose(testFp);
+    exit(0);
+  }
+
+  if (gMode == Stress) {
+    StatusMsg("running stress mode...\n");
+    RunStressMode();
+    StatusMsg("finished stress mode\n");
+    exit(0);
+  }
+}
+
+//---------------------------------------------------------------------------
+// DMD reporting and unreporting
+//---------------------------------------------------------------------------
+
+static void
+AddBlockToBlockGroupTable(BlockGroupTable& aTable, const BlockKey& aKey,
+                          const Block& aBlock)
+{
+  BlockGroupTable::AddPtr p = aTable.lookupForAdd(aKey);
+  if (!p) {
+    BlockGroup bg(aKey);
+    (void)aTable.add(p, bg);
+  }
+  p->Add(aBlock);
+}
+
+void
+Block::Report(Thread* aT, const char* aReporterName, bool aOnAlloc)
+{
+  MOZ_ASSERT(mKind == BlockKey::Live);
+  if (IsReported()) {
+    BlockKey doubleReportKey(mAllocStackTrace,
+                             ReportStackTrace(), StackTrace::Get(aT),
+                             ReporterName(), aReporterName);
+    AddBlockToBlockGroupTable(*gDoubleReportBlockGroupTable,
+                              doubleReportKey, *this);
+  } else {
+    SetReporterName(aReporterName);
+    SetReportStackTrace(StackTrace::Get(aT));
+    SetReportedOnAlloc(aOnAlloc);
+  }
+}
+
+void
+Block::UnreportIfNotReportedOnAlloc()
+{
+  MOZ_ASSERT(mKind == BlockKey::Live);
+  if (!ReportedOnAlloc()) {
+    SetReporterName(gUnreportedName);
+    SetReportStackTrace(nullptr);
+  }
+}
+
+static void
+ReportHelper(const void* aPtr, const char* aReporterName, bool aOnAlloc)
+{
+  if (!gIsDMDRunning || !aPtr) {
+    return;
+  }
+
+  Thread* t = Thread::Fetch();
+
+  AutoBlockIntercepts block(t);
+  AutoLockState lock;
+
+  if (BlockTable::Ptr p = gLiveBlockTable->lookup(aPtr)) {
+    p->value.Report(t, aReporterName, aOnAlloc);
+  } else {
+    // We have no record of the block.  Do nothing.  Either:
+    // - We're sampling and we skipped this block.  This is likely.
+    // - It's a bogus pointer.  This is unlikely because Report() is almost
+    //   always called in conjunction with a malloc_size_of-style function.
+  }
+}
+
+MOZ_EXPORT void
+Report(const void* aPtr, const char* aReporterName)
+{
+  ReportHelper(aPtr, aReporterName, /* onAlloc */ false);
+}
+
+MOZ_EXPORT void
+ReportOnAlloc(const void* aPtr, const char* aReporterName)
+{
+  ReportHelper(aPtr, aReporterName, /* onAlloc */ true);
+}
+
+//---------------------------------------------------------------------------
+// DMD output
+//---------------------------------------------------------------------------
+
+// This works for both BlockGroups and FrameGroups.
+template <class TGroup>
+static void
+PrintSortedGroups(const Writer& aWriter, const char* aStr, const char* astr,
+                  const js::HashSet<TGroup, TGroup, InfallibleAllocPolicy>& aTGroupTable,
+                  size_t aCategoryUsableSize, size_t aTotalUsableSize)
+{
+  const char* name = TGroup::kName;
+  StatusMsg("  creating and sorting %s %s group array...\n", astr, name);
+
+  // Convert the table into a sorted array.
+  js::Vector<const TGroup*, 0, InfallibleAllocPolicy> tgArray;
+  tgArray.reserve(aTGroupTable.count());
+  typedef js::HashSet<TGroup, TGroup, InfallibleAllocPolicy> TGroupTable;
+  for (typename TGroupTable::Range r = aTGroupTable.all();
+       !r.empty();
+       r.popFront()) {
+    tgArray.infallibleAppend(&r.front());
+  }
+  qsort(tgArray.begin(), tgArray.length(), sizeof(tgArray[0]),
+        TGroup::QsortCmp);
+
+  WriteTitle("%s %ss\n", aStr, name);
+
+  if (tgArray.length() == 0) {
+    W("(none)\n\n");
+    return;
+  }
+
+  // Limit the number of block groups printed, because fix-linux-stack.pl is
+  // too damn slow.  Note that we don't break out of this loop because we need
+  // to keep adding to |cumulativeUsableSize|.
+  static const uint32_t MaxTGroups = 1000;
+  uint32_t numTGroups = tgArray.length();
+
+  StatusMsg("  printing %s %s group array...\n", astr, name);
+  size_t cumulativeUsableSize = 0;
+  for (uint32_t i = 0; i < numTGroups; i++) {
+    const TGroup* tg = tgArray[i];
+    cumulativeUsableSize += tg->CombinedSize().Usable();
+    if (i < MaxTGroups) {
+      tg->Print(aWriter, i+1, numTGroups, aStr, astr, aCategoryUsableSize,
+                cumulativeUsableSize, aTotalUsableSize);
+    } else if (i == MaxTGroups) {
+      W("%s: stopping after %s %s groups\n\n", aStr,
+        Show(MaxTGroups, gBuf1, kBufLen), name);
+    }
+  }
+
+  MOZ_ASSERT(aCategoryUsableSize == kNoSize ||
+             aCategoryUsableSize == cumulativeUsableSize);
+}
+
+static void
+PrintSortedBlockAndFrameGroups(const Writer& aWriter,
+                               const char* aStr, const char* astr,
+                               const BlockGroupTable& aBlockGroupTable,
+                               size_t aCategoryUsableSize,
+                               size_t aTotalUsableSize)
+{
+  PrintSortedGroups(aWriter, aStr, astr, aBlockGroupTable, aCategoryUsableSize,
+                    aTotalUsableSize);
+
+  // Frame groups are totally dependent on vagaries of stack traces, so we
+  // can't show them in test mode.
+  if (gMode == Test) {
+    return;
+  }
+
+  FrameGroupTable frameGroupTable;
+  frameGroupTable.init(2048);
+  for (BlockGroupTable::Range r = aBlockGroupTable.all();
+       !r.empty();
+       r.popFront()) {
+    const BlockGroup& bg = r.front();
+    const StackTrace* st = bg.mAllocStackTrace;
+    MOZ_ASSERT(bg.IsLive());
+
+    // A single PC can appear multiple times in a stack trace.  We ignore
+    // duplicates by first sorting and then ignoring adjacent duplicates.
+    StackTrace sorted(*st);
+    sorted.Sort();              // sorts the copy, not the original
+    void* prevPc = (void*)intptr_t(-1);
+    for (uint32_t i = 0; i < sorted.Length(); i++) {
+      void* pc = sorted.Pc(i);
+      if (pc == prevPc) {
+        continue;               // ignore duplicate
+      }
+      prevPc = pc;
+
+      FrameGroupTable::AddPtr p = frameGroupTable.lookupForAdd(pc);
+      if (!p) {
+        FrameGroup fg(pc);
+        (void)frameGroupTable.add(p, fg);
+      }
+      p->Add(bg);
+    }
+  }
+  PrintSortedGroups(aWriter, aStr, astr, frameGroupTable, kNoSize,
+                    aTotalUsableSize);
+}
+
+// This is only needed because of the |const void*| vs |void*| arg mismatch.
+static size_t
+MallocSizeOf(const void* aPtr)
+{
+  return gMallocTable->malloc_usable_size(const_cast<void*>(aPtr));
+}
+
+static void
+ShowExecutionMeasurements(const Writer& aWriter)
+{
+  // Stats are non-deterministic, so don't show it in test mode.
+  if (gMode == Test) {
+    return;
+  }
+
+  WriteTitle("Execution measurements\n");
+
+  size_t sizeOfStackTraceTable =
+    gStackTraceTable->sizeOfIncludingThis(MallocSizeOf);
+  for (StackTraceTable::Range r = gStackTraceTable->all();
+       !r.empty();
+       r.popFront()) {
+    StackTrace* const& st = r.front();
+    sizeOfStackTraceTable += MallocSizeOf(st);
+  }
+  W("Stack trace table: %s of %s entries used, taking up %s bytes\n",
+    Show(gStackTraceTable->count(),    gBuf1, kBufLen),
+    Show(gStackTraceTable->capacity(), gBuf2, kBufLen),
+    Show(sizeOfStackTraceTable, gBuf3, kBufLen));
+
+  W("Live block table:  %s of %s entries used, taking up %s bytes\n",
+    Show(gLiveBlockTable->count(),    gBuf1, kBufLen),
+    Show(gLiveBlockTable->capacity(), gBuf2, kBufLen),
+    Show(gLiveBlockTable->sizeOfIncludingThis(MallocSizeOf), gBuf3, kBufLen));
+
+  W("\n");
+}
+
+static void
+ClearState()
+{
+  // Unreport all blocks, except those that were reported on allocation,
+  // because they need to keep their reported marking.
+  for (BlockTable::Range r = gLiveBlockTable->all(); !r.empty(); r.popFront()) {
+    r.front().value.UnreportIfNotReportedOnAlloc();
+  }
+
+  // Clear errors.
+  gDoubleReportBlockGroupTable->finish();
+  gDoubleReportBlockGroupTable->init();
+}
+
+MOZ_EXPORT void
+Dump(Writer aWriter)
+{
+  if (!gIsDMDRunning) {
+    const char* msg = "cannot Dump();  DMD was not enabled at startup\n";
+    StatusMsg("%s", msg);
+    W("%s", msg);
+    return;
+  }
+
+  AutoBlockIntercepts block(Thread::Fetch());
+  AutoLockState lock;
+
+  static int dumpCount = 1;
+  StatusMsg("Dump %d {\n", dumpCount++);
+
+  StatusMsg("  gathering live block groups...\n");
+
+  BlockGroupTable unreportedBlockGroupTable;
+  (void)unreportedBlockGroupTable.init(2048);
+  size_t unreportedUsableSize = 0;
+
+  BlockGroupTable reportedBlockGroupTable;
+  (void)reportedBlockGroupTable.init(2048);
+  size_t reportedUsableSize = 0;
+
+  bool anyBlocksSampled = false;
+
+  for (BlockTable::Range r = gLiveBlockTable->all(); !r.empty(); r.popFront()) {
+    const Block& b = r.front().value;
+    if (!b.IsReported()) {
+      unreportedUsableSize += b.mBlockSize.Usable();
+      AddBlockToBlockGroupTable(unreportedBlockGroupTable, b, b);
+    } else {
+      reportedUsableSize += b.mBlockSize.Usable();
+      AddBlockToBlockGroupTable(reportedBlockGroupTable, b, b);
+    }
+    anyBlocksSampled = anyBlocksSampled || b.mBlockSize.mSampled;
+  }
+  size_t totalUsableSize = unreportedUsableSize + reportedUsableSize;
+
+  WriteTitle("Invocation\n");
+  W("$DMD = '%s'\n\n", gDMDEnvVar);
+
+  PrintSortedGroups(aWriter, "Double-reported", "double-reported",
+                    *gDoubleReportBlockGroupTable, kNoSize, kNoSize);
+
+  PrintSortedBlockAndFrameGroups(aWriter, "Unreported", "unreported",
+                                 unreportedBlockGroupTable,
+                                 unreportedUsableSize, totalUsableSize);
+
+  PrintSortedBlockAndFrameGroups(aWriter, "Reported", "reported",
+                                 reportedBlockGroupTable,
+                                 reportedUsableSize, totalUsableSize);
+
+  bool showTilde = anyBlocksSampled;
+  WriteTitle("Summary\n");
+  W("Total:      %s bytes\n",
+    Show(totalUsableSize, gBuf1, kBufLen, showTilde));
+  W("Reported:   %s bytes (%5.2f%%)\n",
+    Show(reportedUsableSize, gBuf1, kBufLen, showTilde),
+    Percent(reportedUsableSize, totalUsableSize));
+  W("Unreported: %s bytes (%5.2f%%)\n",
+    Show(unreportedUsableSize, gBuf1, kBufLen, showTilde),
+    Percent(unreportedUsableSize, totalUsableSize));
+
+  W("\n");
+
+  ShowExecutionMeasurements(aWriter);
+
+  ClearState();
+
+  StatusMsg("}\n");
+}
+
+//---------------------------------------------------------------------------
+// Testing
+//---------------------------------------------------------------------------
+
+// This function checks that heap blocks that have the same stack trace but
+// different (or no) reporters get aggregated separately.
+void foo()
+{
+   char* a[6];
+   for (int i = 0; i < 6; i++) {
+      a[i] = (char*) malloc(128 - 16*i);
+   }
+
+   for (int i = 0; i <= 1; i++)
+      Report(a[i], "a01");              // reported
+   Report(a[2], "a23");                 // reported
+   Report(a[3], "a23");                 // reported
+   // a[4], a[5] unreported
+}
+
+// This stops otherwise-unused variables from being optimized away.
+static void
+UseItOrLoseIt(void* a)
+{
+  if (a == 0) {
+    fprintf(stderr, "UseItOrLoseIt: %p\n", a);
+  }
+}
+
+// The output from this should be compared against test-expected.dmd.  It's
+// been tested on Linux64, and probably will give different results on other
+// platforms.
+static void
+RunTestMode(FILE* fp)
+{
+  Writer writer(FpWrite, fp);
+
+  // 0th Dump.  Zero for everything.
+  Dump(writer);
+
+  // 1st Dump: 1 freed, 9 out of 10 unreported.
+  // 2nd Dump: still present and unreported.
+  int i;
+  char* a;
+  for (i = 0; i < 10; i++) {
+      a = (char*) malloc(100);
+      UseItOrLoseIt(a);
+  }
+  free(a);
+
+  // Min-sized block.
+  // 1st Dump: reported.
+  // 2nd Dump: re-reported, twice;  double-report warning.
+  char* a2 = (char*) malloc(0);
+  Report(a2, "a2");
+
+  // Operator new[].
+  // 1st Dump: reported.
+  // 2nd Dump: reportedness carries over, due to ReportOnAlloc.
+  char* b = new char[10];
+  ReportOnAlloc(b, "b");
+
+  // ReportOnAlloc, then freed.
+  // 1st Dump: freed, irrelevant.
+  // 2nd Dump: freed, irrelevant.
+  char* b2 = new char;
+  ReportOnAlloc(b2, "b2");
+  free(b2);
+
+  // 1st Dump: reported, plus 3 double-report warnings.
+  // 2nd Dump: freed, irrelevant.
+  char* c = (char*) calloc(10, 3);
+  Report(c, "c");
+  for (int i = 0; i < 3; i++) {
+    Report(c, "c");
+  }
+
+  // 1st Dump: ignored.
+  // 2nd Dump: irrelevant.
+  Report((void*)(intptr_t)i, "d");
+
+  // jemalloc rounds this up to 8192.
+  // 1st Dump: reported.
+  // 2nd Dump: freed.
+  char* e = (char*) malloc(4096);
+  e = (char*) realloc(e, 4097);
+  Report(e, "e");
+
+  // First realloc is like malloc;  second realloc is shrinking.
+  // 1st Dump: reported.
+  // 2nd Dump: re-reported.
+  char* e2 = (char*) realloc(nullptr, 1024);
+  e2 = (char*) realloc(e2, 512);
+  Report(e2, "e2");
+
+  // First realloc is like malloc;  second realloc creates a min-sized block.
+  // 1st Dump: reported (re-use "a2" reporter name because the order of this
+  //           report and the "a2" above is non-deterministic).
+  // 2nd Dump: freed, irrelevant.
+  char* e3 = (char*) realloc(nullptr, 1024);
+  e3 = (char*) realloc(e3, 0);
+  MOZ_ASSERT(e3);
+  Report(e3, "a2");
+
+  // 1st Dump: freed, irrelevant.
+  // 2nd Dump: freed, irrelevant.
+  char* f = (char*) malloc(64);
+  free(f);
+
+  // 1st Dump: ignored.
+  // 2nd Dump: irrelevant.
+  Report((void*)(intptr_t)0x0, "zero");
+
+  // 1st Dump: mixture of reported and unreported.
+  // 2nd Dump: all unreported.
+  foo();
+  foo();
+
+  // All the odd-ball ones.
+  // 1st Dump: all unreported.
+  // 2nd Dump: all freed, irrelevant.
+  // XXX: no memalign on Mac
+//void* x = memalign(64, 65);           // rounds up to 128
+//UseItOrLoseIt(x);
+  // XXX: posix_memalign doesn't work on B2G, apparently
+//void* y;
+//posix_memalign(&y, 128, 129);         // rounds up to 256
+//UseItOrLoseIt(y);
+  void* z = valloc(1);                  // rounds up to 4096
+  UseItOrLoseIt(z);
+//aligned_alloc(64, 256);               // XXX: C11 only
+
+  // 1st Dump.
+  Dump(writer);
+
+  //---------
+
+  Report(a2, "a2b");
+  Report(a2, "a2b");
+  free(c);
+  free(e);
+  Report(e2, "e2b");
+  free(e3);
+//free(x);
+//free(y);
+  free(z);
+
+  // 2nd Dump.
+  Dump(writer);
+
+  //---------
+
+  // Clear all knowledge of existing blocks to give us a clean slate.
+  gLiveBlockTable->clear();
+
+  // Reset the counter just in case |sample-size| was specified in $DMD.
+  // Otherwise the assertions fail.
+  gSmallBlockActualSizeCounter = 0;
+  gSampleBelowSize = 128;
+
+  char* s;
+
+  // This equals the sample size, and so is recorded exactly.  It should be
+  // listed before groups of the same size that are sampled.
+  s = (char*) malloc(128);
+  UseItOrLoseIt(s);
+
+  // This exceeds the sample size, and so is recorded exactly.
+  s = (char*) malloc(144);
+  UseItOrLoseIt(s);
+
+  // These together constitute exactly one sample.
+  for (int i = 0; i < 16; i++) {
+    s = (char*) malloc(8);
+    UseItOrLoseIt(s);
+  }
+  MOZ_ASSERT(gSmallBlockActualSizeCounter == 0);
+
+  // These fall 8 bytes short of a full sample.
+  for (int i = 0; i < 15; i++) {
+    s = (char*) malloc(8);
+    UseItOrLoseIt(s);
+  }
+  MOZ_ASSERT(gSmallBlockActualSizeCounter == 120);
+
+  // This exceeds the sample size, and so is recorded exactly.
+  s = (char*) malloc(256);
+  UseItOrLoseIt(s);
+  MOZ_ASSERT(gSmallBlockActualSizeCounter == 120);
+
+  // This gets more than to a full sample from the |i < 15| loop above.
+  s = (char*) malloc(96);
+  UseItOrLoseIt(s);
+  MOZ_ASSERT(gSmallBlockActualSizeCounter == 88);
+
+  // This gets to another full sample.
+  for (int i = 0; i < 5; i++) {
+    s = (char*) malloc(8);
+    UseItOrLoseIt(s);
+  }
+  MOZ_ASSERT(gSmallBlockActualSizeCounter == 0);
+
+  // This allocates 16, 32, ..., 128 bytes, which results a block group that
+  // contains a mix of sample and non-sampled blocks, and so should be printed
+  // with '~' signs.
+  for (int i = 1; i <= 8; i++) {
+    s = (char*) malloc(i * 16);
+    UseItOrLoseIt(s);
+  }
+  MOZ_ASSERT(gSmallBlockActualSizeCounter == 64);
+
+  // At the end we're 64 bytes into the current sample so we report ~1,424
+  // bytes of allocation overall, which is 64 less than the real value 1,488.
+
+  Dump(writer);
+}
+
+//---------------------------------------------------------------------------
+// Stress testing microbenchmark
+//---------------------------------------------------------------------------
+
+MOZ_NEVER_INLINE static void
+stress5()
+{
+  for (int i = 0; i < 10; i++) {
+    void* x = malloc(64);
+    UseItOrLoseIt(x);
+    if (i & 1) {
+      free(x);
+    }
+  }
+}
+
+MOZ_NEVER_INLINE static void
+stress4()
+{
+  stress5(); stress5(); stress5(); stress5(); stress5();
+  stress5(); stress5(); stress5(); stress5(); stress5();
+}
+
+MOZ_NEVER_INLINE static void
+stress3()
+{
+  for (int i = 0; i < 10; i++) {
+    stress4();
+  }
+}
+
+MOZ_NEVER_INLINE static void
+stress2()
+{
+  stress3(); stress3(); stress3(); stress3(); stress3();
+  stress3(); stress3(); stress3(); stress3(); stress3();
+}
+
+MOZ_NEVER_INLINE static void
+stress1()
+{
+  for (int i = 0; i < 10; i++) {
+    stress2();
+  }
+}
+
+// This stress test does lots of allocations and frees, which is where most of
+// DMD's overhead occurs.  It allocates 1,000,000 64-byte blocks, spread evenly
+// across 1,000 distinct stack traces.  It frees every second one immediately
+// after allocating it.
+//
+// It's highly artificial, but it's deterministic and easy to run.  It can be
+// timed under different conditions to glean performance data.
+static void
+RunStressMode()
+{
+  stress1(); stress1(); stress1(); stress1(); stress1();
+  stress1(); stress1(); stress1(); stress1(); stress1();
+}
+
+}   // namespace dmd
+}   // namespace mozilla
new file mode 100644
--- /dev/null
+++ b/memory/replace/dmd/DMD.h
@@ -0,0 +1,55 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef DMD_h___
+#define DMD_h___
+
+#include <stdarg.h>
+
+#include "mozilla/Types.h"
+
+namespace mozilla {
+namespace dmd {
+
+// Mark a heap block as reported by a memory reporter.
+MOZ_EXPORT void
+Report(const void* aPtr, const char* aReporterName);
+
+// Mark a heap block as reported immediately on allocation.
+MOZ_EXPORT void
+ReportOnAlloc(const void* aPtr, const char* aReporterName);
+
+class Writer
+{
+public:
+  typedef void (*WriterFun)(void* aWriteState, const char* aFmt, va_list aAp);
+
+  Writer(WriterFun aWriterFun, void* aWriteState)
+    : mWriterFun(aWriterFun), mWriteState(aWriteState)
+  {}
+
+  void Write(const char* aFmt, ...) const;
+
+private:
+  WriterFun mWriterFun;
+  void*     mWriteState;
+};
+
+// Checks which heap blocks have been reported, and dumps a human-readable
+// summary (via |aWrite|).  If |aWrite| is nullptr it will dump to stderr.
+// Beware:  this output may have very long lines.
+MOZ_EXPORT void
+Dump(Writer aWriter);
+
+// A useful |WriterFun|.  If |fp| is a FILE* you want |Dump|'s output to be
+// written to, call |Dump(FpWrite, fp)|.
+MOZ_EXPORT void
+FpWrite(void* aFp, const char* aFmt, va_list aAp);
+
+} // namespace mozilla
+} // namespace dmd
+
+#endif /* DMD_h___ */
new file mode 100644
--- /dev/null
+++ b/memory/replace/dmd/Makefile.in
@@ -0,0 +1,39 @@
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+DEPTH		= @DEPTH@
+topsrcdir	= @top_srcdir@
+srcdir		= @srcdir@
+VPATH		= @srcdir@
+
+include $(DEPTH)/config/autoconf.mk
+
+MODULE		= dmd
+LIBRARY_NAME	= dmd
+FORCE_SHARED_LIB= 1
+
+DEFINES 	+= -DMOZ_NO_MOZALLOC
+
+CPPSRCS		= DMD.cpp
+
+VPATH 		+= $(topsrcdir)/xpcom/base
+CPPSRCS 	+= nsStackWalk.cpp
+
+VPATH 		+= $(topsrcdir)/nsprpub/lib/libc/src
+CSRCS 		+= strcpy.c
+
+VPATH 		+= $(topsrcdir)/mfbt
+CPPSRCS 	+= HashFunctions.cpp
+
+EXPORTS 	= DMD.h
+
+# Disable mozglue.
+WRAP_LDFLAGS 	=
+MOZ_GLUE_LDFLAGS=
+
+STL_FLAGS 	=
+
+include $(topsrcdir)/config/config.mk
+include $(topsrcdir)/config/rules.mk
new file mode 100644
--- /dev/null
+++ b/memory/replace/dmd/README
@@ -0,0 +1,2 @@
+This is DMD.  See https://wiki.mozilla.org/Performance/MemShrink/DMD for
+details on how to use it.
new file mode 100644
--- /dev/null
+++ b/memory/replace/dmd/test-expected.dmd
@@ -0,0 +1,355 @@
+------------------------------------------------------------------
+Invocation
+------------------------------------------------------------------
+
+$DMD = '--mode=test'
+
+------------------------------------------------------------------
+Double-reported blocks
+------------------------------------------------------------------
+
+(none)
+
+------------------------------------------------------------------
+Unreported blocks
+------------------------------------------------------------------
+
+(none)
+
+------------------------------------------------------------------
+Reported blocks
+------------------------------------------------------------------
+
+(none)
+
+------------------------------------------------------------------
+Summary
+------------------------------------------------------------------
+
+Total:      0 bytes
+Reported:   0 bytes ( 0.00%)
+Unreported: 0 bytes ( 0.00%)
+
+------------------------------------------------------------------
+Invocation
+------------------------------------------------------------------
+
+$DMD = '--mode=test'
+
+------------------------------------------------------------------
+Double-reported blocks
+------------------------------------------------------------------
+
+Double-reported: 3 blocks in block group 1 of 1
+ 96 bytes (90 requested / 6 slop)
+ Allocated at
+   (stack omitted due to test mode)
+
+ Previously reported by 'c' at
+   (stack omitted due to test mode)
+
+ Now reported by 'c' at
+   (stack omitted due to test mode)
+
+------------------------------------------------------------------
+Unreported blocks
+------------------------------------------------------------------
+
+Unreported: 1 block in block group 1 of 4
+ 4,096 bytes (1 requested / 4,095 slop)
+ 27.44% of the heap (27.44% cumulative);  76.88% of unreported (76.88% cumulative)
+ Allocated at
+   (stack omitted due to test mode)
+
+Unreported: 9 blocks in block group 2 of 4
+ 1,008 bytes (900 requested / 108 slop)
+ 6.75% of the heap (34.19% cumulative);  18.92% of unreported (95.80% cumulative)
+ Allocated at
+   (stack omitted due to test mode)
+
+Unreported: 2 blocks in block group 3 of 4
+ 112 bytes (112 requested / 0 slop)
+ 0.75% of the heap (34.94% cumulative);  2.10% of unreported (97.90% cumulative)
+ Allocated at
+   (stack omitted due to test mode)
+
+Unreported: 2 blocks in block group 4 of 4
+ 112 bytes (112 requested / 0 slop)
+ 0.75% of the heap (35.69% cumulative);  2.10% of unreported (100.00% cumulative)
+ Allocated at
+   (stack omitted due to test mode)
+
+------------------------------------------------------------------
+Reported blocks
+------------------------------------------------------------------
+
+Reported: 1 block in block group 1 of 12
+ 8,192 bytes (4,097 requested / 4,095 slop)
+ 54.88% of the heap (54.88% cumulative);  85.33% of reported (85.33% cumulative)
+ Allocated at
+   (stack omitted due to test mode)
+
+ Reported by 'e' at
+   (stack omitted due to test mode)
+
+Reported: 1 block in block group 2 of 12
+ 512 bytes (512 requested / 0 slop)
+ 3.43% of the heap (58.31% cumulative);  5.33% of reported (90.67% cumulative)
+ Allocated at
+   (stack omitted due to test mode)
+
+ Reported by 'e2' at
+   (stack omitted due to test mode)
+
+Reported: 2 blocks in block group 3 of 12
+ 240 bytes (240 requested / 0 slop)
+ 1.61% of the heap (59.91% cumulative);  2.50% of reported (93.17% cumulative)
+ Allocated at
+   (stack omitted due to test mode)
+
+ Reported by 'a01' at
+   (stack omitted due to test mode)
+
+Reported: 2 blocks in block group 4 of 12
+ 240 bytes (240 requested / 0 slop)
+ 1.61% of the heap (61.52% cumulative);  2.50% of reported (95.67% cumulative)
+ Allocated at
+   (stack omitted due to test mode)
+
+ Reported by 'a01' at
+   (stack omitted due to test mode)
+
+Reported: 1 block in block group 5 of 12
+ 96 bytes (96 requested / 0 slop)
+ 0.64% of the heap (62.17% cumulative);  1.00% of reported (96.67% cumulative)
+ Allocated at
+   (stack omitted due to test mode)
+
+ Reported by 'a23' at
+   (stack omitted due to test mode)
+
+Reported: 1 block in block group 6 of 12
+ 96 bytes (96 requested / 0 slop)
+ 0.64% of the heap (62.81% cumulative);  1.00% of reported (97.67% cumulative)
+ Allocated at
+   (stack omitted due to test mode)
+
+ Reported by 'a23' at
+   (stack omitted due to test mode)
+
+Reported: 1 block in block group 7 of 12
+ 80 bytes (80 requested / 0 slop)
+ 0.54% of the heap (63.34% cumulative);  0.83% of reported (98.50% cumulative)
+ Allocated at
+   (stack omitted due to test mode)
+
+ Reported by 'a23' at
+   (stack omitted due to test mode)
+
+Reported: 1 block in block group 8 of 12
+ 80 bytes (80 requested / 0 slop)
+ 0.54% of the heap (63.88% cumulative);  0.83% of reported (99.33% cumulative)
+ Allocated at
+   (stack omitted due to test mode)
+
+ Reported by 'a23' at
+   (stack omitted due to test mode)
+
+Reported: 1 block in block group 9 of 12
+ 32 bytes (30 requested / 2 slop)
+ 0.21% of the heap (64.09% cumulative);  0.33% of reported (99.67% cumulative)
+ Allocated at
+   (stack omitted due to test mode)
+
+ Reported by 'c' at
+   (stack omitted due to test mode)
+
+Reported: 1 block in block group 10 of 12
+ 16 bytes (10 requested / 6 slop)
+ 0.11% of the heap (64.20% cumulative);  0.17% of reported (99.83% cumulative)
+ Allocated at
+   (stack omitted due to test mode)
+
+ Reported by 'b' at
+   (stack omitted due to test mode)
+
+Reported: 1 block in block group 11 of 12
+ 8 bytes (0 requested / 8 slop)
+ 0.05% of the heap (64.26% cumulative);  0.08% of reported (99.92% cumulative)
+ Allocated at
+   (stack omitted due to test mode)
+
+ Reported by 'a2' at
+   (stack omitted due to test mode)
+
+Reported: 1 block in block group 12 of 12
+ 8 bytes (0 requested / 8 slop)
+ 0.05% of the heap (64.31% cumulative);  0.08% of reported (100.00% cumulative)
+ Allocated at
+   (stack omitted due to test mode)
+
+ Reported by 'a2' at
+   (stack omitted due to test mode)
+
+------------------------------------------------------------------
+Summary
+------------------------------------------------------------------
+
+Total:      14,928 bytes
+Reported:   9,600 bytes (64.31%)
+Unreported: 5,328 bytes (35.69%)
+
+------------------------------------------------------------------
+Invocation
+------------------------------------------------------------------
+
+$DMD = '--mode=test'
+
+------------------------------------------------------------------
+Double-reported blocks
+------------------------------------------------------------------
+
+Double-reported: 1 block in block group 1 of 1
+ 8 bytes (0 requested / 8 slop)
+ Allocated at
+   (stack omitted due to test mode)
+
+ Previously reported by 'a2b' at
+   (stack omitted due to test mode)
+
+ Now reported by 'a2b' at
+   (stack omitted due to test mode)
+
+------------------------------------------------------------------
+Unreported blocks
+------------------------------------------------------------------
+
+Unreported: 9 blocks in block group 1 of 3
+ 1,008 bytes (900 requested / 108 slop)
+ 38.77% of the heap (38.77% cumulative);  48.84% of unreported (48.84% cumulative)
+ Allocated at
+   (stack omitted due to test mode)
+
+Unreported: 6 blocks in block group 2 of 3
+ 528 bytes (528 requested / 0 slop)
+ 20.31% of the heap (59.08% cumulative);  25.58% of unreported (74.42% cumulative)
+ Allocated at
+   (stack omitted due to test mode)
+
+Unreported: 6 blocks in block group 3 of 3
+ 528 bytes (528 requested / 0 slop)
+ 20.31% of the heap (79.38% cumulative);  25.58% of unreported (100.00% cumulative)
+ Allocated at
+   (stack omitted due to test mode)
+
+------------------------------------------------------------------
+Reported blocks
+------------------------------------------------------------------
+
+Reported: 1 block in block group 1 of 3
+ 512 bytes (512 requested / 0 slop)
+ 19.69% of the heap (19.69% cumulative);  95.52% of reported (95.52% cumulative)
+ Allocated at
+   (stack omitted due to test mode)
+
+ Reported by 'e2b' at
+   (stack omitted due to test mode)
+
+Reported: 1 block in block group 2 of 3
+ 16 bytes (10 requested / 6 slop)
+ 0.62% of the heap (20.31% cumulative);  2.99% of reported (98.51% cumulative)
+ Allocated at
+   (stack omitted due to test mode)
+
+ Reported by 'b' at
+   (stack omitted due to test mode)
+
+Reported: 1 block in block group 3 of 3
+ 8 bytes (0 requested / 8 slop)
+ 0.31% of the heap (20.62% cumulative);  1.49% of reported (100.00% cumulative)
+ Allocated at
+   (stack omitted due to test mode)
+
+ Reported by 'a2b' at
+   (stack omitted due to test mode)
+
+------------------------------------------------------------------
+Summary
+------------------------------------------------------------------
+
+Total:      2,600 bytes
+Reported:   536 bytes (20.62%)
+Unreported: 2,064 bytes (79.38%)
+
+------------------------------------------------------------------
+Invocation
+------------------------------------------------------------------
+
+$DMD = '--mode=test'
+
+------------------------------------------------------------------
+Double-reported blocks
+------------------------------------------------------------------
+
+(none)
+
+------------------------------------------------------------------
+Unreported blocks
+------------------------------------------------------------------
+
+Unreported: ~4 blocks in block group 1 of 7
+ ~512 bytes (~512 requested / ~0 slop)
+ 35.96% of the heap (35.96% cumulative);  35.96% of unreported (35.96% cumulative)
+ Allocated at
+   (stack omitted due to test mode)
+
+Unreported: 1 block in block group 2 of 7
+ 256 bytes (256 requested / 0 slop)
+ 17.98% of the heap (53.93% cumulative);  17.98% of unreported (53.93% cumulative)
+ Allocated at
+   (stack omitted due to test mode)
+
+Unreported: 1 block in block group 3 of 7
+ 144 bytes (144 requested / 0 slop)
+ 10.11% of the heap (64.04% cumulative);  10.11% of unreported (64.04% cumulative)
+ Allocated at
+   (stack omitted due to test mode)
+
+Unreported: 1 block in block group 4 of 7
+ 128 bytes (128 requested / 0 slop)
+ 8.99% of the heap (73.03% cumulative);  8.99% of unreported (73.03% cumulative)
+ Allocated at
+   (stack omitted due to test mode)
+
+Unreported: ~1 block in block group 5 of 7
+ ~128 bytes (~128 requested / ~0 slop)
+ 8.99% of the heap (82.02% cumulative);  8.99% of unreported (82.02% cumulative)
+ Allocated at
+   (stack omitted due to test mode)
+
+Unreported: ~1 block in block group 6 of 7
+ ~128 bytes (~128 requested / ~0 slop)
+ 8.99% of the heap (91.01% cumulative);  8.99% of unreported (91.01% cumulative)
+ Allocated at
+   (stack omitted due to test mode)
+
+Unreported: ~1 block in block group 7 of 7
+ ~128 bytes (~128 requested / ~0 slop)
+ 8.99% of the heap (100.00% cumulative);  8.99% of unreported (100.00% cumulative)
+ Allocated at
+   (stack omitted due to test mode)
+
+------------------------------------------------------------------
+Reported blocks
+------------------------------------------------------------------
+
+(none)
+
+------------------------------------------------------------------
+Summary
+------------------------------------------------------------------
+
+Total:      ~1,424 bytes
+Reported:   ~0 bytes ( 0.00%)
+Unreported: ~1,424 bytes (100.00%)
+
--- a/storage/src/mozStorageService.cpp
+++ b/storage/src/mozStorageService.cpp
@@ -513,24 +513,67 @@ namespace {
 // allocated for a given request.  SQLite uses this function before all
 // allocations, and may be able to use any excess bytes caused by the rounding.
 //
 // Note: the wrappers for moz_malloc, moz_realloc and moz_malloc_usable_size
 // are necessary because the sqlite_mem_methods type signatures differ slightly
 // from the standard ones -- they use int instead of size_t.  But we don't need
 // a wrapper for moz_free.
 
+#ifdef MOZ_DMD
+
+#include "DMD.h"
+
+// sqlite does its own memory accounting, and we use its numbers in our memory
+// reporters.  But we don't want sqlite's heap blocks to show up in DMD's
+// output as unreported, so we mark them as reported when they're allocated and
+// mark them as unreported when they are freed.
+//
+// In other words, we are marking all sqlite heap blocks as reported even
+// though we're not reporting them ourselves.  Instead we're trusting that
+// sqlite is fully and correctly accounting for all of its heap blocks via its
+// own memory accounting.
+
+NS_MEMORY_REPORTER_MALLOC_SIZEOF_ON_ALLOC_FUN(sqliteMallocSizeOfOnAlloc, "sqlite")
+NS_MEMORY_REPORTER_MALLOC_SIZEOF_ON_FREE_FUN(sqliteMallocSizeOfOnFree)
+
+#endif
+
 static void *sqliteMemMalloc(int n)
 {
-  return ::moz_malloc(n);
+  void* p = ::moz_malloc(n);
+#ifdef MOZ_DMD
+  sqliteMallocSizeOfOnAlloc(p);
+#endif
+  return p;
+}
+
+static void sqliteMemFree(void *p)
+{
+#ifdef MOZ_DMD
+  sqliteMallocSizeOfOnFree(p);
+#endif
+  ::moz_free(p);
 }
 
 static void *sqliteMemRealloc(void *p, int n)
 {
+#ifdef MOZ_DMD
+  sqliteMallocSizeOfOnFree(p);
+  void *pnew = ::moz_realloc(p, n);
+  if (pnew) {
+    sqliteMallocSizeOfOnAlloc(pnew);
+  } else {
+    // realloc failed;  undo the sqliteMallocSizeOfOnFree from above
+    sqliteMallocSizeOfOnAlloc(p);
+  }
+  return pnew;
+#else
   return ::moz_realloc(p, n);
+#endif
 }
 
 static int sqliteMemSize(void *p)
 {
   return ::moz_malloc_usable_size(p);
 }
 
 static int sqliteMemRoundup(int n)
@@ -549,24 +592,24 @@ static int sqliteMemInit(void *p)
 }
 
 static void sqliteMemShutdown(void *p)
 {
 }
 
 const sqlite3_mem_methods memMethods = {
   &sqliteMemMalloc,
-  &moz_free,
+  &sqliteMemFree,
   &sqliteMemRealloc,
   &sqliteMemSize,
   &sqliteMemRoundup,
   &sqliteMemInit,
   &sqliteMemShutdown,
   NULL
-}; 
+};
 
 } // anonymous namespace
 
 #endif  // MOZ_STORAGE_MEMORY
 
 nsresult
 Service::initialize()
 {
--- a/toolkit/library/Makefile.in
+++ b/toolkit/library/Makefile.in
@@ -389,16 +389,20 @@ endif
 ifdef MOZ_NATIVE_LIBVPX
 EXTRA_DSO_LDOPTS += $(MOZ_LIBVPX_LIBS)
 endif
 
 ifndef MOZ_TREE_PIXMAN
 EXTRA_DSO_LDOPTS += $(MOZ_PIXMAN_LIBS)
 endif
 
+ifdef MOZ_DMD
+EXTRA_DSO_LDOPTS += $(call EXPAND_LIBNAME_PATH,dmd,$(DIST)/lib)
+endif
+
 EXTRA_DSO_LDOPTS += $(call EXPAND_LIBNAME_PATH,gkmedias,$(DIST)/lib)
 
 ifdef MOZ_WEBRTC
 ifdef MOZ_PEERCONNECTION
 COMPONENT_LIBS += peerconnection
 endif
 ifdef MOZ_WEBRTC_SIGNALING
 EXTRA_DSO_LDOPTS += \
--- a/toolkit/toolkit-makefiles.sh
+++ b/toolkit/toolkit-makefiles.sh
@@ -1592,16 +1592,22 @@ fi
 
 if [ "$NS_TRACE_MALLOC" ]; then
   add_makefiles "
     tools/trace-malloc/Makefile
     tools/trace-malloc/lib/Makefile
   "
 fi
 
+if [ "$MOZ_DMD" ]; then
+  add_makefiles "
+    memory/replace/dmd/Makefile
+  "
+fi
+
 if [ "$MOZ_MAPINFO" ]; then
   add_makefiles "
     tools/codesighs/Makefile
   "
 fi
 
 if [ "$MOZ_XTF" ]; then
   add_makefiles "
--- a/toolkit/toolkit-tiers.mk
+++ b/toolkit/toolkit-tiers.mk
@@ -10,16 +10,20 @@ include $(topsrcdir)/config/nspr/build.m
 include $(topsrcdir)/config/js/build.mk
 
 TIERS += platform
 
 ifdef NS_TRACE_MALLOC
 tier_platform_dirs = tools/trace-malloc/lib
 endif
 
+ifdef MOZ_DMD
+tier_platform_dirs += memory/replace/dmd
+endif
+
 ifdef MOZ_TREE_FREETYPE
 tier_platform_staticdirs += modules/freetype2
 endif
 
 # this must precede xpcom
 ifdef MOZ_DMDV
 tier_platform_dirs += tools/dmdv
 endif
--- a/xpcom/base/nsIMemoryReporter.idl
+++ b/xpcom/base/nsIMemoryReporter.idl
@@ -352,71 +352,96 @@ interface nsIMemoryReporterManager : nsI
 // instead of nsCOMPtr<nsIMemoryReporter>.
 
 nsresult NS_RegisterMemoryReporter(nsIMemoryReporter *reporter);
 nsresult NS_RegisterMemoryMultiReporter(nsIMemoryMultiReporter *reporter);
 
 nsresult NS_UnregisterMemoryReporter(nsIMemoryReporter *reporter);
 nsresult NS_UnregisterMemoryMultiReporter(nsIMemoryMultiReporter *reporter);
 
+#if defined(MOZ_DMDV) || defined(MOZ_DMD)
+namespace mozilla {
+namespace dmd {
+// This runs all the memory reporters but does nothing with the results;  i.e.
+// it does the minimal amount of work possible for DMD/DMDV to do its thing.
+void RunReporters();
+}
+}
+#endif  // defined(MOZ_DMDV) || defined(MOZ_DMD)
+
+#ifdef MOZ_DMDV
+
+#if defined(MOZ_MEMORY)
+#error "MOZ_DMDV precludes MOZ_MEMORY"
+#endif
+
 // Because DMDV is not a tool that comes with the standard Valgrind
 // distribution, we have to #include our own local copy of dmdv.h.  Ugly but
 // unavoidable.
-#ifdef MOZ_DMDV
-#if MOZ_MEMORY
-#error "--disable-jemalloc should have been forced when --enable-dmdv was specified"
-#endif
 #include "dmdv.h"
-#endif
+
+#define MOZ_REPORT(ptr, usable, name)          VALGRIND_DMDV_REPORT(ptr, usable, name)
+#define MOZ_REPORT_ON_ALLOC(ptr, usable, name) VALGRIND_DMDV_REPORT(ptr, usable, name)
 
 namespace mozilla {
+namespace dmdv {
+// This dumps the DMDV output to stderr (or somewhere else, if one of
+// DMDV/Valgrind's logging options was used).
+void Dump();
+}
+}
 
-/*
- * Functions generated via this macro should be used by all traversal-based
- * memory reporters.  Such functions return |moz_malloc_size_of(ptr)|;  this
- * will always be zero on some obscure platforms.
- *
- * You might be wondering why we have a macro that creates multiple functions
- * distinguished only by |name|, instead of a single MemoryReporterMallocSizeOf
- * function.  It's mostly to help with DMDV integration, though it sometimes
- * also helps with debugging and temporary ad hoc profiling.  The |name| chosen
- * doesn't matter greatly, but it's best to make it similar to the path used by
- * the relevant memory reporter(s).
- */
+#elif defined(MOZ_DMD)
+
+#if !defined(MOZ_MEMORY)
+#error "MOZ_DMD requires MOZ_MEMORY"
+#endif
+
+#include "DMD.h"
+
+#define MOZ_REPORT(ptr, usable, name)          mozilla::dmd::Report(ptr, name)
+#define MOZ_REPORT_ON_ALLOC(ptr, usable, name) mozilla::dmd::ReportOnAlloc(ptr, name)
+
+#else
+
+#define MOZ_REPORT(ptr, usable, name)
+#define MOZ_REPORT_ON_ALLOC(ptr, usable, name)
+
+#endif  /* defined(MOZ_DMDV) || defined(MOZ_DMD) */
+
+// Functions generated via this macro should be used by all traversal-based
+// memory reporters.  Such functions return |moz_malloc_size_of(ptr)|;  this
+// will always be zero on some obscure platforms.
+//
+// You might be wondering why we have a macro that creates multiple
+// functions distinguished only by |name|, instead of a single
+// MemoryReporterMallocSizeOf function.  It's mostly to help with DMDV/DMD
+// integration, though it sometimes also helps with debugging and temporary
+// ad hoc profiling.  The |name| chosen doesn't matter greatly, but it's
+// best to make it similar to the path used by the relevant memory
+// reporter(s).
 #define NS_MEMORY_REPORTER_MALLOC_SIZEOF_FUN(fn, name)                        \
   static size_t fn(const void *ptr)                                           \
   {                                                                           \
       size_t usable = moz_malloc_size_of(ptr);                                \
-      VALGRIND_DMDV_REPORT(ptr, usable, name);                                \
-      return usable;                                                          \
-  }
-
-/*
- * Like NS_MEMORY_REPORTER_MALLOC_SIZEOF_FUN, but the created function sends an
- * "unreport" message to DMDV.
- */
-#define NS_MEMORY_REPORTER_MALLOC_SIZEOF_FUN_UN(fn)                           \
-  static size_t fn(const void *ptr)                                           \
-  {                                                                           \
-      size_t usable = moz_malloc_size_of(ptr);                                \
-      VALGRIND_DMDV_UNREPORT(ptr);                                            \
+      MOZ_REPORT(ptr, usable, name);                                          \
       return usable;                                                          \
   }
 
-#ifdef MOZ_DMDV
-
-/*
- * This runs all the memory reporters but does nothing with the results;  i.e.
- * it does the minimal amount of work possible for DMDV to do its thing.  Then
- * it dumps the DMDV output to stderr (or somewhere else, if one of
- * DMDV/Valgrind's logging options was used).
- */
-void DMDVCheckAndDump();
-
-#else
-
-#define VALGRIND_DMDV_REPORT(ptr, usable, name)
-#define VALGRIND_DMDV_UNREPORT(ptr)
-
-#endif  /* defined(MOZ_DMDV) */
-}
+// Functions generated by the next two macros should be used by wrapping
+// allocators that report heap blocks as soon as they are allocated and
+// unreport them as soon as they are freed.  Such allocators are used in cases
+// where we have third-party code that we cannot modify.  The two functions
+// must always be used in tandem.
+#define NS_MEMORY_REPORTER_MALLOC_SIZEOF_ON_ALLOC_FUN(fn, name)               \
+  static size_t fn(const void *ptr)                                           \
+  {                                                                           \
+      size_t usable = moz_malloc_size_of(ptr);                                \
+      MOZ_REPORT_ON_ALLOC(ptr, usable, name);                                 \
+      return usable;                                                          \
+  }
+#define NS_MEMORY_REPORTER_MALLOC_SIZEOF_ON_FREE_FUN(fn)                      \
+  static size_t fn(const void *ptr)                                           \
+  {                                                                           \
+      return moz_malloc_size_of(ptr);                                         \
+  }
 
 %}
--- a/xpcom/base/nsMemoryInfoDumper.cpp
+++ b/xpcom/base/nsMemoryInfoDumper.cpp
@@ -464,68 +464,104 @@ class DumpMultiReporterCallback MOZ_FINA
 
 NS_IMPL_ISUPPORTS1(
     DumpMultiReporterCallback
     , nsIMemoryMultiReporterCallback
     )
 
 } // namespace mozilla
 
+static void
+MakeFilename(const char *aPrefix, const nsAString &aIdentifier,
+             const char *aSuffix, nsACString &aResult)
+{
+  aResult = nsPrintfCString("%s-%s-%d.%s",
+                            aPrefix,
+                            NS_ConvertUTF16toUTF8(aIdentifier).get(),
+                            getpid(), aSuffix);
+}
+
+static nsresult
+OpenTempFile(const nsACString &aFilename, nsIFile* *aFile)
+{
+  nsresult rv = NS_GetSpecialDirectory(NS_OS_TEMP_DIR, aFile);
+  NS_ENSURE_SUCCESS(rv, rv);
+
+  nsCOMPtr<nsIFile> file(*aFile);
+
+  rv = file->AppendNative(aFilename);
+  NS_ENSURE_SUCCESS(rv, rv);
+
+  rv = file->CreateUnique(nsIFile::NORMAL_FILE_TYPE, 0644);
+  NS_ENSURE_SUCCESS(rv, rv);
+#ifdef ANDROID
+  {
+    // On android the default system umask is 0077 which makes these files
+    // unreadable to the shell user. In order to pull the dumps off a non-rooted
+    // device we need to chmod them to something world-readable.
+    nsAutoCString path;
+    rv = file->GetNativePath(path);
+    if (NS_SUCCEEDED(rv)) {
+      chmod(PromiseFlatCString(path).get(), 0644);
+    }
+  }
+#endif
+  return NS_OK;
+}
+
+#ifdef MOZ_DMD
+struct DMDWriteState
+{
+  static const size_t kBufSize = 4096;
+  char mBuf[kBufSize];
+  nsRefPtr<nsGZFileWriter> mGZWriter;
+
+  DMDWriteState(nsGZFileWriter *aGZWriter)
+    : mGZWriter(aGZWriter)
+  {}
+};
+
+static void DMDWrite(void* aState, const char* aFmt, va_list ap)
+{
+  DMDWriteState *state = (DMDWriteState*)aState;
+  vsnprintf(state->mBuf, state->kBufSize, aFmt, ap);
+  unused << state->mGZWriter->Write(state->mBuf);
+}
+#endif
+
 /* static */ nsresult
 nsMemoryInfoDumper::DumpMemoryReportsToFileImpl(
   const nsAString& aIdentifier)
 {
+  MOZ_ASSERT(!aIdentifier.IsEmpty());
+
   // Open a new file named something like
   //
-  //   incomplete-memory-report-<-identifier>-<pid>-42.json.gz
+  //   incomplete-memory-report-<identifier>-<pid>.json.gz
   //
   // in NS_OS_TEMP_DIR for writing.  When we're finished writing the report,
   // we'll rename this file and get rid of the "incomplete-" prefix.
   //
   // We do this because we don't want scripts which poll the filesystem
   // looking for memory report dumps to grab a file before we're finished
   // writing to it.
 
-  nsCOMPtr<nsIFile> tmpFile;
-  nsresult rv = NS_GetSpecialDirectory(NS_OS_TEMP_DIR,
-                                       getter_AddRefs(tmpFile));
+  // Note that |mrFilename| is missing the "incomplete-" prefix; we'll tack
+  // that on in a moment.
+  nsCString mrFilename;
+  MakeFilename("memory-report", aIdentifier, ".json.gz", mrFilename);
+
+  nsCOMPtr<nsIFile> mrTmpFile;
+  nsresult rv;
+  rv = OpenTempFile(NS_LITERAL_CSTRING("incomplete-") + mrFilename,
+                    getter_AddRefs(mrTmpFile));
   NS_ENSURE_SUCCESS(rv, rv);
 
-  // Note that |filename| is missing the "incomplete-" prefix; we'll tack
-  // that on in a moment.
-  nsAutoCString filename;
-  filename.AppendLiteral("memory-report");
-  if (!aIdentifier.IsEmpty()) {
-    filename.AppendLiteral("-");
-    filename.Append(NS_ConvertUTF16toUTF8(aIdentifier));
-  }
-  filename.AppendLiteral("-");
-  filename.AppendInt(getpid());
-  filename.AppendLiteral(".json.gz");
-
-  rv = tmpFile->AppendNative(NS_LITERAL_CSTRING("incomplete-") + filename);
-  NS_ENSURE_SUCCESS(rv, rv);
-
-  rv = tmpFile->CreateUnique(nsIFile::NORMAL_FILE_TYPE, 0644);
-  NS_ENSURE_SUCCESS(rv, rv);
-#ifdef ANDROID
-  {
-    // On android the default system umask is 0077 which makes these files
-    // unreadable to the shell user. In order to pull the dumps off a non-rooted
-    // device we need to chmod them to something world-readable.
-    nsAutoCString path;
-    rv = tmpFile->GetNativePath(path);
-    if (NS_SUCCEEDED(rv)) {
-      chmod(PromiseFlatCString(path).get(), 0644);
-    }
-  }
-#endif
-
   nsRefPtr<nsGZFileWriter> writer = new nsGZFileWriter();
-  rv = writer->Init(tmpFile);
+  rv = writer->Init(mrTmpFile);
   NS_ENSURE_SUCCESS(rv, rv);
 
   // Dump the memory reports to the file.
 
   // Increment this number if the format changes.
   DUMP(writer, "{\n  \"version\": 1,\n");
 
   DUMP(writer, "  \"hasMozMallocUsableSize\": ");
@@ -588,42 +624,73 @@ nsMemoryInfoDumper::DumpMemoryReportsToF
     r->CollectReports(cb, writer);
   }
 
   DUMP(writer, "\n  ]\n}");
 
   rv = writer->Finish();
   NS_ENSURE_SUCCESS(rv, rv);
 
+#ifdef MOZ_DMD
+  // Open a new file named something like
+  //
+  //   dmd-<identifier>-<pid>.txt.gz
+  //
+  // in NS_OS_TEMP_DIR for writing, and dump DMD output to it.  This must occur
+  // after the memory reporters have been run (above), but before the
+  // memory-reports file has been renamed (so scripts can detect the DMD file,
+  // if present).
+
+  nsCString dmdFilename;
+  MakeFilename("dmd", aIdentifier, ".txt.gz", dmdFilename);
+
+  nsCOMPtr<nsIFile> dmdFile;
+  rv = OpenTempFile(dmdFilename, getter_AddRefs(dmdFile));
+  NS_ENSURE_SUCCESS(rv, rv);
+
+  nsRefPtr<nsGZFileWriter> dmdWriter = new nsGZFileWriter();
+  rv = dmdWriter->Init(dmdFile);
+  NS_ENSURE_SUCCESS(rv, rv);
+
+  // Dump DMD output to the file.
+
+  DMDWriteState state(dmdWriter);
+  dmd::Writer w(DMDWrite, &state);
+  mozilla::dmd::Dump(w);
+
+  rv = dmdWriter->Finish();
+  NS_ENSURE_SUCCESS(rv, rv);
+#endif  // MOZ_DMD
+
   // Rename the file, now that we're done dumping the report.  The file's
   // ultimate destination is "memory-report<-identifier>-<pid>.json.gz".
 
-  nsCOMPtr<nsIFile> dstFile;
-  rv = NS_GetSpecialDirectory(NS_OS_TEMP_DIR, getter_AddRefs(dstFile));
+  nsCOMPtr<nsIFile> mrFinalFile;
+  rv = NS_GetSpecialDirectory(NS_OS_TEMP_DIR, getter_AddRefs(mrFinalFile));
   NS_ENSURE_SUCCESS(rv, rv);
 
-  rv = dstFile->AppendNative(filename);
+  rv = mrFinalFile->AppendNative(mrFilename);
   NS_ENSURE_SUCCESS(rv, rv);
 
-  rv = dstFile->CreateUnique(nsIFile::NORMAL_FILE_TYPE, 0600);
+  rv = mrFinalFile->CreateUnique(nsIFile::NORMAL_FILE_TYPE, 0600);
   NS_ENSURE_SUCCESS(rv, rv);
 
-  nsAutoString dstFileName;
-  rv = dstFile->GetLeafName(dstFileName);
+  nsAutoString mrActualFinalFilename;
+  rv = mrFinalFile->GetLeafName(mrActualFinalFilename);
   NS_ENSURE_SUCCESS(rv, rv);
 
-  rv = tmpFile->MoveTo(/* directory */ nullptr, dstFileName);
+  rv = mrTmpFile->MoveTo(/* directory */ nullptr, mrActualFinalFilename);
   NS_ENSURE_SUCCESS(rv, rv);
 
   nsCOMPtr<nsIConsoleService> cs =
     do_GetService(NS_CONSOLESERVICE_CONTRACTID, &rv);
   NS_ENSURE_SUCCESS(rv, rv);
 
   nsString path;
-  tmpFile->GetPath(path);
+  mrTmpFile->GetPath(path);
   NS_ENSURE_SUCCESS(rv, rv);
 
   nsString msg = NS_LITERAL_STRING(
     "nsIMemoryInfoDumper dumped reports to ");
   msg.Append(path);
   return cs->LogStringMessage(msg.get());
 }
 
--- a/xpcom/base/nsMemoryReporterManager.cpp
+++ b/xpcom/base/nsMemoryReporterManager.cpp
@@ -705,17 +705,17 @@ struct MemoryReport {
     ~MemoryReport() 
     {
         MOZ_COUNT_DTOR(MemoryReport);
     }
     const nsCString path;
     int64_t amount;
 };
 
-#ifdef DEBUG
+#if defined(DEBUG) && !defined(MOZ_DMD)
 // This is just a wrapper for int64_t that implements nsISupports, so it can be
 // passed to nsIMemoryMultiReporter::CollectReports.
 class Int64Wrapper MOZ_FINAL : public nsISupports {
 public:
     NS_DECL_ISUPPORTS
     Int64Wrapper() : mValue(0) { }
     int64_t mValue;
 };
@@ -741,17 +741,17 @@ public:
         }
         return NS_OK;
     }
 };
 NS_IMPL_ISUPPORTS1(
   ExplicitNonHeapCountingCallback
 , nsIMemoryMultiReporterCallback
 )
-#endif
+#endif  // defined(DEBUG) && !defined(MOZ_DMD)
 
 NS_IMETHODIMP
 nsMemoryReporterManager::GetExplicit(int64_t *aExplicit)
 {
     NS_ENSURE_ARG_POINTER(aExplicit);
     *aExplicit = 0;
 #ifndef HAVE_JEMALLOC_STATS
     return NS_ERROR_NOT_AVAILABLE;
@@ -800,31 +800,33 @@ nsMemoryReporterManager::GetExplicit(int
     // For each multi-reporter we could call CollectReports and filter out the
     // non-explicit, non-NONHEAP measurements.  But that's lots of wasted work,
     // so we instead use GetExplicitNonHeap() which exists purely for this
     // purpose.
     //
     // (Actually, in debug builds we also do it the slow way and compare the
     // result to the result obtained from GetExplicitNonHeap().  This
     // guarantees the two measurement paths are equivalent.  This is wise
-    // because it's easy for memory reporters to have bugs.)
+    // because it's easy for memory reporters to have bugs.  But there's an
+    // exception if DMD is enabled, because that makes DMD think that all the
+    // blocks are double-counted.)
 
     int64_t explicitNonHeapMultiSize = 0;
     nsCOMPtr<nsISimpleEnumerator> e2;
     EnumerateMultiReporters(getter_AddRefs(e2));
     while (NS_SUCCEEDED(e2->HasMoreElements(&more)) && more) {
       nsCOMPtr<nsIMemoryMultiReporter> r;
       e2->GetNext(getter_AddRefs(r));
       int64_t n;
       rv = r->GetExplicitNonHeap(&n);
       NS_ENSURE_SUCCESS(rv, rv);
       explicitNonHeapMultiSize += n;
     }
 
-#ifdef DEBUG
+#if defined(DEBUG) && !defined(MOZ_DMD)
     nsRefPtr<ExplicitNonHeapCountingCallback> cb =
       new ExplicitNonHeapCountingCallback();
     nsRefPtr<Int64Wrapper> wrappedExplicitNonHeapMultiSize2 =
       new Int64Wrapper();
     nsCOMPtr<nsISimpleEnumerator> e3;
     EnumerateMultiReporters(getter_AddRefs(e3));
     while (NS_SUCCEEDED(e3->HasMoreElements(&more)) && more) {
       nsCOMPtr<nsIMemoryMultiReporter> r;
@@ -837,17 +839,17 @@ nsMemoryReporterManager::GetExplicit(int
     // NS_ASSERTION but they occasionally don't match due to races (bug
     // 728990).
     if (explicitNonHeapMultiSize != explicitNonHeapMultiSize2) {
         NS_WARNING(nsPrintfCString("The two measurements of 'explicit' memory "
                                    "usage don't match (%lld vs %lld)",
                                    explicitNonHeapMultiSize,
                                    explicitNonHeapMultiSize2).get());
     }
-#endif  // DEBUG
+#endif  // defined(DEBUG) && !defined(MOZ_DMD)
 
     *aExplicit = heapAllocated + explicitNonHeapNormalSize + explicitNonHeapMultiSize;
     return NS_OK;
 #endif // HAVE_JEMALLOC_STATS
 }
 
 NS_IMETHODIMP
 nsMemoryReporterManager::GetHasMozMallocUsableSize(bool *aHas)
@@ -1012,19 +1014,20 @@ nsresult
 NS_UnregisterMemoryMultiReporter (nsIMemoryMultiReporter *reporter)
 {
     nsCOMPtr<nsIMemoryReporterManager> mgr = do_GetService("@mozilla.org/memory-reporter-manager;1");
     if (mgr == nullptr)
         return NS_ERROR_FAILURE;
     return mgr->UnregisterMultiReporter(reporter);
 }
 
-namespace mozilla {
+#if defined(MOZ_DMDV) || defined(MOZ_DMD)
 
-#ifdef MOZ_DMDV
+namespace mozilla {
+namespace dmd {
 
 class NullMultiReporterCallback : public nsIMemoryMultiReporterCallback
 {
 public:
     NS_DECL_ISUPPORTS
 
     NS_IMETHOD Callback(const nsACString &aProcess, const nsACString &aPath,
                         int32_t aKind, int32_t aUnits, int64_t aAmount,
@@ -1036,17 +1039,17 @@ public:
     }
 };
 NS_IMPL_ISUPPORTS1(
   NullMultiReporterCallback
 , nsIMemoryMultiReporterCallback
 )
 
 void
-DMDVCheckAndDump()
+RunReporters()
 {
     nsCOMPtr<nsIMemoryReporterManager> mgr =
         do_GetService("@mozilla.org/memory-reporter-manager;1");
 
     // Do vanilla reporters.
     nsCOMPtr<nsISimpleEnumerator> e;
     mgr->EnumerateReporters(getter_AddRefs(e));
     bool more;
@@ -1084,15 +1087,30 @@ DMDVCheckAndDump()
     nsCOMPtr<nsISimpleEnumerator> e2;
     mgr->EnumerateMultiReporters(getter_AddRefs(e2));
     nsRefPtr<NullMultiReporterCallback> cb = new NullMultiReporterCallback();
     while (NS_SUCCEEDED(e2->HasMoreElements(&more)) && more) {
       nsCOMPtr<nsIMemoryMultiReporter> r;
       e2->GetNext(getter_AddRefs(r));
       r->CollectReports(cb, nullptr);
     }
+}
 
+} // namespace dmd
+} // namespace mozilla
+
+#endif  // defined(MOZ_DMDV) || defined(MOZ_DMD)
+
+#ifdef MOZ_DMDV
+namespace mozilla {
+namespace dmdv {
+
+void
+Dump()
+{
     VALGRIND_DMDV_CHECK_REPORTING;
 }
 
+} // namespace dmdv
+} // namespace mozilla
+
 #endif  /* defined(MOZ_DMDV) */
 
-}
--- a/xpcom/base/nsStackWalk.cpp
+++ b/xpcom/base/nsStackWalk.cpp
@@ -1167,17 +1167,19 @@ NS_StackWalk(NS_WalkStackCallback aCallb
   return FramePointerStackWalk(aCallback, aSkipFrames,
                                aClosure, bp, stackEnd);
 
 }
 
 #elif defined(HAVE__UNWIND_BACKTRACE)
 
 // libgcc_s.so symbols _Unwind_Backtrace@@GCC_3.3 and _Unwind_GetIP@@GCC_3.0
+#ifndef _GNU_SOURCE
 #define _GNU_SOURCE
+#endif
 #include <unwind.h>
 
 struct unwind_info {
     NS_WalkStackCallback callback;
     int skip;
     void *closure;
 };