Bug 820652 (part 3) - DMD: Distinguish BlockSize and GroupSize. r+a=jlebar
authorNicholas Nethercote <nnethercote@mozilla.com>
Tue, 11 Dec 2012 19:47:46 -0800
changeset 118907 1c4ab9d6959d311b0898d28dd1d4ef38c77d851c
parent 118906 2720605c97eff82071c689772e8efc32033e0d1a
child 118908 b33ca67de6137349ba6a815d905f79ed720f2a81
push id2984
push userryanvm@gmail.com
push dateTue, 18 Dec 2012 03:08:28 +0000
treeherdermozilla-aurora@68ae24dc739c [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
bugs820652
milestone19.0a2
Bug 820652 (part 3) - DMD: Distinguish BlockSize and GroupSize. r+a=jlebar
memory/replace/dmd/DMD.cpp
--- a/memory/replace/dmd/DMD.cpp
+++ b/memory/replace/dmd/DMD.cpp
@@ -144,16 +144,23 @@ public:
     void* mem = malloc_(sizeof(T));
     ExitOnFailure(mem);
     return new (mem) T(p1);
   }
 
   static void reportAllocOverflow() { ExitOnFailure(nullptr); }
 };
 
+// This is only needed because of the |const void*| vs |void*| arg mismatch.
+static size_t
+MallocSizeOf(const void* aPtr)
+{
+  return gMallocTable->malloc_usable_size(const_cast<void*>(aPtr));
+}
+
 static void
 StatusMsg(const char* aFmt, ...)
 {
   va_list ap;
   va_start(ap, aFmt);
 #ifdef ANDROID
   __android_log_vprint(ANDROID_LOG_INFO, "DMD", aFmt, ap);
 #else
@@ -738,72 +745,59 @@ public:
            aA.mReportStackTrace2 == aB.mReportStackTrace2 &&
            aA.mReporterName1     == aB.mReporterName1 &&
            aA.mReporterName2     == aB.mReporterName2;
   }
 };
 
 class BlockSize
 {
-  static const size_t kSlopBits = sizeof(size_t) * 8 - 1;  // 31 or 63
+  static const size_t kReqBits = sizeof(size_t) * 8 - 1;    // 31 or 63
+
+  // This assumes that we'll never request an allocation of 2 GiB or more on
+  // 32-bit platforms.
+  const size_t mReq:kReqBits;   // size requested
+  const size_t mSampled:1;      // was this block sampled?  (if so, slop == 0)
 
 public:
-  size_t mReq;              // size requested
-  size_t mSlop:kSlopBits;   // additional bytes allocated due to rounding up
-  size_t mSampled:1;        // were one or more blocks contributing to this
-                            //   BlockSize sampled?
-  BlockSize()
-    : mReq(0),
-      mSlop(0),
-      mSampled(false)
-  {}
-
-  BlockSize(size_t aReq, size_t aSlop, bool aSampled)
+  BlockSize(size_t aReq, bool aSampled)
     : mReq(aReq),
-      mSlop(aSlop),
       mSampled(aSampled)
   {}
 
-  size_t Usable() const { return mReq + mSlop; }
+  size_t Req() const { return mReq; }
 
-  void Add(const BlockSize& aBlockSize)
+  // Sampled blocks always have zero slop.
+  size_t Slop(const void* aPtr) const
   {
-    mReq  += aBlockSize.mReq;
-    mSlop += aBlockSize.mSlop;
-    mSampled = mSampled || aBlockSize.mSampled;
+    return mSampled ? 0 : MallocSizeOf(aPtr) - mReq;
   }
 
-  static int Cmp(const BlockSize& aA, const BlockSize& aB)
+  size_t Usable(const void* aPtr) const
   {
-    // Primary sort: put bigger usable sizes before smaller usable sizes.
-    if (aA.Usable() > aB.Usable()) return -1;
-    if (aA.Usable() < aB.Usable()) return  1;
+    return mSampled ? mReq : MallocSizeOf(aPtr);
+  }
 
-    // Secondary sort: put non-sampled groups before sampled groups.
-    if (!aA.mSampled &&  aB.mSampled) return -1;
-    if ( aA.mSampled && !aB.mSampled) return  1;
-
-    return 0;
-  }
+  bool IsSampled() const { return mSampled; }
 };
 
 // A live heap block.
 class LiveBlock : public LiveBlockKey
 {
 public:
   const BlockSize mBlockSize;
 
 public:
-  LiveBlock(size_t aReqSize, size_t aSlopSize,
-            const StackTrace* aAllocStackTrace, bool aIsExact)
+  LiveBlock(size_t aReqSize, const StackTrace* aAllocStackTrace, bool aSampled)
     : LiveBlockKey(aAllocStackTrace),
-      mBlockSize(aReqSize, aSlopSize, aIsExact)
+      mBlockSize(aReqSize, aSampled)
   {}
 
-  void Report(Thread* aT, const char* aReporterName, bool aReportedOnAlloc);
+  void Report(Thread* aT, const void* aPtr, const char* aReporterName,
+              bool aReportedOnAlloc);
 
   void UnreportIfNotReportedOnAlloc();
 };
 
 // Nb: js::DefaultHasher<void*> is a high quality hasher.
 typedef js::HashMap<const void*, LiveBlock, js::DefaultHasher<const void*>,
                     InfallibleAllocPolicy> LiveBlockTable;
 static LiveBlockTable* gLiveBlockTable = nullptr;
@@ -823,34 +817,32 @@ AllocCallback(void* aPtr, size_t aReqSiz
   if (!aPtr) {
     return;
   }
 
   AutoLockState lock;
   AutoBlockIntercepts block(aT);
 
   size_t actualSize = gMallocTable->malloc_usable_size(aPtr);
-  size_t slopSize   = actualSize - aReqSize;
 
   if (actualSize < gSampleBelowSize) {
     // If this allocation is smaller than the sample-below size, increment the
     // cumulative counter.  Then, if that counter now exceeds the sample size,
     // blame this allocation for gSampleBelowSize bytes.  This precludes the
     // measurement of slop.
     gSmallBlockActualSizeCounter += actualSize;
     if (gSmallBlockActualSizeCounter >= gSampleBelowSize) {
       gSmallBlockActualSizeCounter -= gSampleBelowSize;
 
-      LiveBlock b(gSampleBelowSize, /* slopSize */ 0, StackTrace::Get(aT),
-                  /* sampled */ true);
+      LiveBlock b(gSampleBelowSize, StackTrace::Get(aT), /* sampled */ true);
       (void)gLiveBlockTable->putNew(aPtr, b);
     }
   } else {
     // If this block size is larger than the sample size, record it exactly.
-    LiveBlock b(aReqSize, slopSize, StackTrace::Get(aT), /* sampled */ false);
+    LiveBlock b(aReqSize, StackTrace::Get(aT), /* sampled */ false);
     (void)gLiveBlockTable->putNew(aPtr, b);
   }
 }
 
 static void
 FreeCallback(void* aPtr, Thread* aT)
 {
   MOZ_ASSERT(gIsDMDRunning);
@@ -1005,39 +997,88 @@ replace_free(void* aPtr)
 
 namespace mozilla {
 namespace dmd {
 
 //---------------------------------------------------------------------------
 // Live and double-report block groups
 //---------------------------------------------------------------------------
 
+class GroupSize
+{
+  static const size_t kReqBits = sizeof(size_t) * 8 - 1;  // 31 or 63
+
+  size_t mReq;              // size requested
+  size_t mSlop:kReqBits;    // slop bytes
+  size_t mSampled:1;        // were one or more blocks contributing to this
+                            //   GroupSize sampled?
+public:
+  GroupSize()
+    : mReq(0),
+      mSlop(0),
+      mSampled(false)
+  {}
+
+  size_t Req()    const { return mReq; }
+  size_t Slop()   const { return mSlop; }
+  size_t Usable() const { return mReq + mSlop; }
+
+  bool IsSampled() const { return mSampled; }
+
+  void Add(const void* aPtr, const BlockSize& aBlockSize)
+  {
+    mReq  += aBlockSize.Req();
+    mSlop += aBlockSize.Slop(aPtr);
+    mSampled = mSampled || aBlockSize.IsSampled();
+  }
+
+  void Add(const GroupSize& aGroupSize)
+  {
+    mReq  += aGroupSize.Req();
+    mSlop += aGroupSize.Slop();
+    mSampled = mSampled || aGroupSize.IsSampled();
+  }
+
+  static int Cmp(const GroupSize& aA, const GroupSize& aB)
+  {
+    // Primary sort: put bigger usable sizes before smaller usable sizes.
+    if (aA.Usable() > aB.Usable()) return -1;
+    if (aA.Usable() < aB.Usable()) return  1;
+
+    // Secondary sort: put non-sampled groups before sampled groups.
+    if (!aA.mSampled &&  aB.mSampled) return -1;
+    if ( aA.mSampled && !aB.mSampled) return  1;
+
+    return 0;
+  }
+};
+
 class BlockGroup
 {
 protected:
   // {Live,DoubleReport}BlockKey serve as the key in
   // {Live,DoubleReport}BlockGroupTable.  Thes two fields constitute the value,
   // so it's ok for them to be |mutable|.
   mutable uint32_t  mNumBlocks;     // number of blocks with this LiveBlockKey
-  mutable BlockSize mCombinedSize;  // combined size of those blocks
+  mutable GroupSize mGroupSize;     // combined size of those blocks
 
 public:
   BlockGroup()
     : mNumBlocks(0),
-      mCombinedSize()
+      mGroupSize()
   {}
 
-  const BlockSize& CombinedSize() const { return mCombinedSize; }
+  const GroupSize& GroupSize() const { return mGroupSize; }
 
   // The |const| qualifier is something of a lie, but is necessary so this type
   // can be used in js::HashSet, and it fits with the |mutable| fields above.
-  void Add(const LiveBlock& aB) const
+  void Add(const void* aPtr, const LiveBlock& aB) const
   {
     mNumBlocks++;
-    mCombinedSize.Add(aB.mBlockSize);
+    mGroupSize.Add(aPtr, aB.mBlockSize);
   }
 
   static const char* const kName;   // for PrintSortedGroups
 };
 
 const char* const BlockGroup::kName = "block";
 
 // A group of one or more live heap blocks with a common LiveBlockKey.
@@ -1058,47 +1099,47 @@ public:
 
   static int QsortCmp(const void* aA, const void* aB)
   {
     const LiveBlockGroup* const a =
       *static_cast<const LiveBlockGroup* const*>(aA);
     const LiveBlockGroup* const b =
       *static_cast<const LiveBlockGroup* const*>(aB);
 
-    return BlockSize::Cmp(a->mCombinedSize, b->mCombinedSize);
+    return GroupSize::Cmp(a->mGroupSize, b->mGroupSize);
   }
 };
 
 typedef js::HashSet<LiveBlockGroup, LiveBlockGroup, InfallibleAllocPolicy>
         LiveBlockGroupTable;
 
 void
 LiveBlockGroup::Print(const Writer& aWriter, uint32_t aM, uint32_t aN,
                       const char* aStr, const char* astr,
                       size_t aCategoryUsableSize, size_t aCumulativeUsableSize,
                       size_t aTotalUsableSize) const
 {
-  bool showTilde = mCombinedSize.mSampled;
+  bool showTilde = mGroupSize.IsSampled();
 
   W("%s: %s block%s in block group %s of %s\n",
     aStr,
     Show(mNumBlocks, gBuf1, kBufLen, showTilde), Plural(mNumBlocks),
     Show(aM, gBuf2, kBufLen),
     Show(aN, gBuf3, kBufLen));
 
   W(" %s bytes (%s requested / %s slop)\n",
-    Show(mCombinedSize.Usable(), gBuf1, kBufLen, showTilde),
-    Show(mCombinedSize.mReq,     gBuf2, kBufLen, showTilde),
-    Show(mCombinedSize.mSlop,    gBuf3, kBufLen, showTilde));
+    Show(mGroupSize.Usable(), gBuf1, kBufLen, showTilde),
+    Show(mGroupSize.Req(),    gBuf2, kBufLen, showTilde),
+    Show(mGroupSize.Slop(),   gBuf3, kBufLen, showTilde));
 
   W(" %4.2f%% of the heap (%4.2f%% cumulative); "
     " %4.2f%% of %s (%4.2f%% cumulative)\n",
-    Percent(mCombinedSize.Usable(), aTotalUsableSize),
+    Percent(mGroupSize.Usable(), aTotalUsableSize),
     Percent(aCumulativeUsableSize, aTotalUsableSize),
-    Percent(mCombinedSize.Usable(), aCategoryUsableSize),
+    Percent(mGroupSize.Usable(), aCategoryUsableSize),
     astr,
     Percent(aCumulativeUsableSize, aCategoryUsableSize));
 
   W(" Allocated at\n");
   mAllocStackTrace->Print(aWriter);
 
   if (IsReported()) {
     W("\n Reported by '%s' at\n", mReporterName);
@@ -1125,43 +1166,43 @@ public:
 
   static int QsortCmp(const void* aA, const void* aB)
   {
     const DoubleReportBlockGroup* const a =
       *static_cast<const DoubleReportBlockGroup* const*>(aA);
     const DoubleReportBlockGroup* const b =
       *static_cast<const DoubleReportBlockGroup* const*>(aB);
 
-    return BlockSize::Cmp(a->mCombinedSize, b->mCombinedSize);
+    return GroupSize::Cmp(a->mGroupSize, b->mGroupSize);
   }
 };
 
 typedef js::HashSet<DoubleReportBlockGroup, DoubleReportBlockGroup,
                     InfallibleAllocPolicy> DoubleReportBlockGroupTable;
 DoubleReportBlockGroupTable* gDoubleReportBlockGroupTable = nullptr;
 
 void
 DoubleReportBlockGroup::Print(const Writer& aWriter, uint32_t aM, uint32_t aN,
                               const char* aStr, const char* astr,
                               size_t aCategoryUsableSize,
                               size_t aCumulativeUsableSize,
                               size_t aTotalUsableSize) const
 {
-  bool showTilde = mCombinedSize.mSampled;
+  bool showTilde = mGroupSize.IsSampled();
 
   W("%s: %s block%s in block group %s of %s\n",
     aStr,
     Show(mNumBlocks, gBuf1, kBufLen, showTilde), Plural(mNumBlocks),
     Show(aM, gBuf2, kBufLen),
     Show(aN, gBuf3, kBufLen));
 
   W(" %s bytes (%s requested / %s slop)\n",
-    Show(mCombinedSize.Usable(), gBuf1, kBufLen, showTilde),
-    Show(mCombinedSize.mReq,     gBuf2, kBufLen, showTilde),
-    Show(mCombinedSize.mSlop,    gBuf3, kBufLen, showTilde));
+    Show(mGroupSize.Usable(), gBuf1, kBufLen, showTilde),
+    Show(mGroupSize.Req(),    gBuf2, kBufLen, showTilde),
+    Show(mGroupSize.Slop(),   gBuf3, kBufLen, showTilde));
 
   W(" Allocated at\n");
   mAllocStackTrace->Print(aWriter);
 
   W("\n Previously reported by '%s' at\n", mReporterName1);
   mReportStackTrace1->Print(aWriter);
 
   W("\n Now reported by '%s' at\n", mReporterName2);
@@ -1178,48 +1219,48 @@ DoubleReportBlockGroup::Print(const Writ
 // traces) with a common PC.
 class FrameGroup
 {
   // mPc is used as the key in FrameGroupTable, and the other members
   // constitute the value, so it's ok for them to be |mutable|.
   const void* const mPc;
   mutable size_t    mNumBlocks;
   mutable size_t    mNumBlockGroups;
-  mutable BlockSize mCombinedSize;
+  mutable GroupSize mGroupSize;
 
 public:
   explicit FrameGroup(const void* aPc)
     : mPc(aPc),
       mNumBlocks(0),
       mNumBlockGroups(0),
-      mCombinedSize()
+      mGroupSize()
   {}
 
-  const BlockSize& CombinedSize() const { return mCombinedSize; }
+  const GroupSize& GroupSize() const { return mGroupSize; }
 
   // The |const| qualifier is something of a lie, but is necessary so this type
   // can be used in js::HashSet, and it fits with the |mutable| fields above.
   void Add(const LiveBlockGroup& aBg) const
   {
     mNumBlocks += aBg.mNumBlocks;
     mNumBlockGroups++;
-    mCombinedSize.Add(aBg.mCombinedSize);
+    mGroupSize.Add(aBg.mGroupSize);
   }
 
   void Print(const Writer& aWriter, uint32_t aM, uint32_t aN,
              const char* aStr, const char* astr,
              size_t aCategoryUsableSize, size_t aCumulativeUsableSize,
              size_t aTotalUsableSize) const;
 
   static int QsortCmp(const void* aA, const void* aB)
   {
     const FrameGroup* const a = *static_cast<const FrameGroup* const*>(aA);
     const FrameGroup* const b = *static_cast<const FrameGroup* const*>(aB);
 
-    return BlockSize::Cmp(a->mCombinedSize, b->mCombinedSize);
+    return GroupSize::Cmp(a->mGroupSize, b->mGroupSize);
   }
 
   static const char* const kName;   // for PrintSortedGroups
 
   // Hash policy
 
   typedef const void* Lookup;
 
@@ -1242,36 +1283,36 @@ typedef js::HashSet<FrameGroup, FrameGro
 void
 FrameGroup::Print(const Writer& aWriter, uint32_t aM, uint32_t aN,
                   const char* aStr, const char* astr,
                   size_t aCategoryUsableSize, size_t aCumulativeUsableSize,
                   size_t aTotalUsableSize) const
 {
   (void)aCumulativeUsableSize;
 
-  bool showTilde = mCombinedSize.mSampled;
+  bool showTilde = mGroupSize.IsSampled();
 
   nsCodeAddressDetails details;
   PcInfo(mPc, &details);
 
   W("%s: %s block%s and %s block group%s in frame group %s of %s\n",
     aStr,
     Show(mNumBlocks, gBuf1, kBufLen, showTilde), Plural(mNumBlocks),
     Show(mNumBlockGroups, gBuf2, kBufLen, showTilde), Plural(mNumBlockGroups),
     Show(aM, gBuf3, kBufLen),
     Show(aN, gBuf4, kBufLen));
 
   W(" %s bytes (%s requested / %s slop)\n",
-    Show(mCombinedSize.Usable(), gBuf1, kBufLen, showTilde),
-    Show(mCombinedSize.mReq,     gBuf2, kBufLen, showTilde),
-    Show(mCombinedSize.mSlop,    gBuf3, kBufLen, showTilde));
+    Show(mGroupSize.Usable(), gBuf1, kBufLen, showTilde),
+    Show(mGroupSize.Req(),    gBuf2, kBufLen, showTilde),
+    Show(mGroupSize.Slop(),   gBuf3, kBufLen, showTilde));
 
   W(" %4.2f%% of the heap;  %4.2f%% of %s\n",
-    Percent(mCombinedSize.Usable(), aTotalUsableSize),
-    Percent(mCombinedSize.Usable(), aCategoryUsableSize),
+    Percent(mGroupSize.Usable(), aTotalUsableSize),
+    Percent(mGroupSize.Usable(), aCategoryUsableSize),
     astr);
 
   W(" PC is\n");
   W("   %14p %s[%s +0x%X]\n\n", mPc, details.function, details.library,
     details.loffset);
 }
 
 //---------------------------------------------------------------------------
@@ -1490,29 +1531,30 @@ Init(const malloc_table_t* aMallocTable)
   gIsDMDRunning = true;
 }
 
 //---------------------------------------------------------------------------
 // DMD reporting and unreporting
 //---------------------------------------------------------------------------
 
 void
-LiveBlock::Report(Thread* aT, const char* aReporterName, bool aOnAlloc)
+LiveBlock::Report(Thread* aT, const void* aPtr, const char* aReporterName,
+                  bool aOnAlloc)
 {
   if (IsReported()) {
     DoubleReportBlockKey doubleReportKey(mAllocStackTrace,
                                          mReportStackTrace, StackTrace::Get(aT),
                                          mReporterName, aReporterName);
     DoubleReportBlockGroupTable::AddPtr p =
       gDoubleReportBlockGroupTable->lookupForAdd(doubleReportKey);
     if (!p) {
       DoubleReportBlockGroup bg(doubleReportKey);
       (void)gDoubleReportBlockGroupTable->add(p, bg);
     }
-    p->Add(*this);
+    p->Add(aPtr, *this);
 
   } else {
     mReporterName     = aReporterName;
     mReportStackTrace = StackTrace::Get(aT);
     mReportedOnAlloc  = aOnAlloc;
   }
 }
 
@@ -1533,17 +1575,17 @@ ReportHelper(const void* aPtr, const cha
   }
 
   Thread* t = Thread::Fetch();
 
   AutoBlockIntercepts block(t);
   AutoLockState lock;
 
   if (LiveBlockTable::Ptr p = gLiveBlockTable->lookup(aPtr)) {
-    p->value.Report(t, aReporterName, aOnAlloc);
+    p->value.Report(t, aPtr, aReporterName, aOnAlloc);
   } else {
     // We have no record of the block.  Do nothing.  Either:
     // - We're sampling and we skipped this block.  This is likely.
     // - It's a bogus pointer.  This is unlikely because Report() is almost
     //   always called in conjunction with a malloc_size_of-style function.
   }
 }
 
@@ -1597,17 +1639,17 @@ PrintSortedGroups(const Writer& aWriter,
   // to keep adding to |cumulativeUsableSize|.
   static const uint32_t MaxTGroups = 1000;
   uint32_t numTGroups = tgArray.length();
 
   StatusMsg("  printing %s %s group array...\n", astr, name);
   size_t cumulativeUsableSize = 0;
   for (uint32_t i = 0; i < numTGroups; i++) {
     const TGroup* tg = tgArray[i];
-    cumulativeUsableSize += tg->CombinedSize().Usable();
+    cumulativeUsableSize += tg->GroupSize().Usable();
     if (i < MaxTGroups) {
       tg->Print(aWriter, i+1, numTGroups, aStr, astr, aCategoryUsableSize,
                 cumulativeUsableSize, aTotalUsableSize);
     } else if (i == MaxTGroups) {
       W("%s: stopping after %s %s groups\n\n", aStr,
         Show(MaxTGroups, gBuf1, kBufLen), name);
     }
   }
@@ -1659,23 +1701,16 @@ PrintSortedBlockAndFrameGroups(const Wri
       }
       p->Add(bg);
     }
   }
   PrintSortedGroups(aWriter, aStr, astr, frameGroupTable, kNoSize,
                     aTotalUsableSize);
 }
 
-// This is only needed because of the |const void*| vs |void*| arg mismatch.
-static size_t
-MallocSizeOf(const void* aPtr)
-{
-  return gMallocTable->malloc_usable_size(const_cast<void*>(aPtr));
-}
-
 // Note that, unlike most SizeOf* functions, this function does not take a
 // |nsMallocSizeOfFun| argument.  That's because those arguments are primarily
 // to aid DMD track heap blocks... but DMD deliberately doesn't track heap
 // blocks it allocated for itself!
 MOZ_EXPORT void
 SizeOf(Sizes* aSizes)
 {
   aSizes->mStackTraces = 0;
@@ -1737,32 +1772,33 @@ Dump(Writer aWriter)
   (void)reportedLiveBlockGroupTable.init(1024);
   size_t reportedUsableSize = 0;
 
   bool anyBlocksSampled = false;
 
   for (LiveBlockTable::Range r = gLiveBlockTable->all();
        !r.empty();
        r.popFront()) {
+    const void* pc = r.front().key;
     const LiveBlock& b = r.front().value;
 
     size_t& size = !b.IsReported() ? unreportedUsableSize : reportedUsableSize;
-    size += b.mBlockSize.Usable();
+    size += b.mBlockSize.Usable(pc);
 
     LiveBlockGroupTable& table = !b.IsReported()
                                ? unreportedLiveBlockGroupTable
                                : reportedLiveBlockGroupTable;
     LiveBlockGroupTable::AddPtr p = table.lookupForAdd(b);
     if (!p) {
       LiveBlockGroup bg(b);
       (void)table.add(p, bg);
     }
-    p->Add(b);
+    p->Add(pc, b);
 
-    anyBlocksSampled = anyBlocksSampled || b.mBlockSize.mSampled;
+    anyBlocksSampled = anyBlocksSampled || b.mBlockSize.IsSampled();
   }
   size_t totalUsableSize = unreportedUsableSize + reportedUsableSize;
 
   WriteTitle("Invocation\n");
   W("$DMD = '%s'\n", gDMDEnvVar);
   W("Sample-below size = %lld\n\n", (long long)(gSampleBelowSize));
 
   PrintSortedGroups(aWriter, "Double-reported", "double-reported",