Bug 1131600 - Add telemetry probes to get detailed disk cache hit rate, r=mcmanus
authorMichal Novotny <michal.novotny@gmail.com>
Mon, 23 Feb 2015 12:26:06 +0100
changeset 230329 4c63c386c39e5923318693cf662f68f1e7913144
parent 230328 2a21df64de7d090015e20dd413b1f54af4a271ce
child 230330 9429baa55ac29aa16838575cb46e8b12625b79fc
push id28322
push userkwierso@gmail.com
push dateTue, 24 Feb 2015 00:05:31 +0000
treeherdermozilla-central@368c62292249 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersmcmanus
bugs1131600
milestone38.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1131600 - Add telemetry probes to get detailed disk cache hit rate, r=mcmanus
netwerk/cache2/CacheEntry.cpp
netwerk/cache2/CacheFileUtils.cpp
netwerk/cache2/CacheFileUtils.h
netwerk/cache2/CacheIndex.cpp
netwerk/cache2/CacheIndex.h
toolkit/components/telemetry/Histograms.json
--- a/netwerk/cache2/CacheEntry.cpp
+++ b/netwerk/cache2/CacheEntry.cpp
@@ -407,24 +407,21 @@ NS_IMETHODIMP CacheEntry::OnFileReady(ns
 {
   LOG(("CacheEntry::OnFileReady [this=%p, rv=0x%08x, new=%d]",
       this, aResult, aIsNew));
 
   MOZ_ASSERT(!mLoadStart.IsNull());
 
   if (NS_SUCCEEDED(aResult)) {
     if (aIsNew) {
-      mozilla::Telemetry::AccumulateTimeDelta(
-        mozilla::Telemetry::NETWORK_CACHE_V2_MISS_TIME_MS,
-        mLoadStart);
-    }
-    else {
-      mozilla::Telemetry::AccumulateTimeDelta(
-        mozilla::Telemetry::NETWORK_CACHE_V2_HIT_TIME_MS,
-        mLoadStart);
+      CacheFileUtils::DetailedCacheHitTelemetry::AddRecord(
+        CacheFileUtils::DetailedCacheHitTelemetry::MISS, mLoadStart);
+    } else {
+      CacheFileUtils::DetailedCacheHitTelemetry::AddRecord(
+        CacheFileUtils::DetailedCacheHitTelemetry::HIT, mLoadStart);
     }
   }
 
   // OnFileReady, that is the only code that can transit from LOADING
   // to any follow-on state, can only be invoked ones on an entry,
   // thus no need to lock.  Until this moment there is no consumer that
   // could manipulate the entry state.
   mozilla::MutexAutoLock lock(mLock);
--- a/netwerk/cache2/CacheFileUtils.cpp
+++ b/netwerk/cache2/CacheFileUtils.cpp
@@ -419,11 +419,121 @@ ValidityMap::SizeOfExcludingThis(mozilla
 }
 
 ValidityPair&
 ValidityMap::operator[](uint32_t aIdx)
 {
   return mMap.ElementAt(aIdx);
 }
 
+StaticMutex DetailedCacheHitTelemetry::sLock;
+uint32_t DetailedCacheHitTelemetry::sRecordCnt = 0;
+DetailedCacheHitTelemetry::HitRate DetailedCacheHitTelemetry::sHRStats[kNumOfRanges];
+
+DetailedCacheHitTelemetry::HitRate::HitRate()
+{
+  Reset();
+}
+
+void
+DetailedCacheHitTelemetry::HitRate::AddRecord(ERecType aType)
+{
+  if (aType == HIT) {
+    ++mHitCnt;
+  } else {
+    ++mMissCnt;
+  }
+}
+
+uint32_t
+DetailedCacheHitTelemetry::HitRate::GetHitRateBucket(uint32_t aNumOfBuckets) const
+{
+  uint32_t bucketIdx = (aNumOfBuckets * mHitCnt) / (mHitCnt + mMissCnt);
+  if (bucketIdx == aNumOfBuckets) { // make sure 100% falls into the last bucket
+    --bucketIdx;
+  }
+
+  return bucketIdx;
+}
+
+uint32_t
+DetailedCacheHitTelemetry::HitRate::Count()
+{
+  return mHitCnt + mMissCnt;
+}
+
+void
+DetailedCacheHitTelemetry::HitRate::Reset()
+{
+  mHitCnt = 0;
+  mMissCnt = 0;
+}
+
+// static
+void
+DetailedCacheHitTelemetry::AddRecord(ERecType aType, TimeStamp aLoadStart)
+{
+  bool isUpToDate = false;
+  CacheIndex::IsUpToDate(&isUpToDate);
+  if (!isUpToDate) {
+    // Ignore the record when the entry file count might be incorrect
+    return;
+  }
+
+  uint32_t entryCount;
+  nsresult rv = CacheIndex::GetEntryFileCount(&entryCount);
+  if (NS_FAILED(rv)) {
+    return;
+  }
+
+  uint32_t rangeIdx = entryCount / kRangeSize;
+  if (rangeIdx >= kNumOfRanges) { // The last range has no upper limit.
+    rangeIdx = kNumOfRanges - 1;
+  }
+
+  uint32_t hitMissValue = 2 * rangeIdx; // 2 values per range
+  if (aType == MISS) { // The order is HIT, MISS
+    ++hitMissValue;
+  }
+
+  StaticMutexAutoLock lock(sLock);
+
+  if (aType == MISS) {
+    mozilla::Telemetry::AccumulateTimeDelta(
+      mozilla::Telemetry::NETWORK_CACHE_V2_MISS_TIME_MS,
+      aLoadStart);
+  } else {
+    mozilla::Telemetry::AccumulateTimeDelta(
+      mozilla::Telemetry::NETWORK_CACHE_V2_HIT_TIME_MS,
+      aLoadStart);
+  }
+
+  Telemetry::Accumulate(Telemetry::NETWORK_CACHE_HIT_MISS_STAT_PER_CACHE_SIZE,
+                        hitMissValue);
+
+  sHRStats[rangeIdx].AddRecord(aType);
+  ++sRecordCnt;
+
+  if (sRecordCnt < kTotalSamplesReportLimit) {
+    return;
+  }
+
+  sRecordCnt = 0;
+
+  for (uint32_t i = 0; i < kNumOfRanges; ++i) {
+    if (sHRStats[i].Count() >= kHitRateSamplesReportLimit) {
+      // The telemetry enums are grouped by buckets as follows:
+      // Telemetry value : 0,1,2,3, ... ,19,20,21,22, ... ,398,399
+      // Hit rate bucket : 0,0,0,0, ... , 0, 1, 1, 1, ... , 19, 19
+      // Cache size range: 0,1,2,3, ... ,19, 0, 1, 2, ... , 18, 19
+      uint32_t bucketOffset = sHRStats[i].GetHitRateBucket(kHitRateBuckets) *
+                              kNumOfRanges;
+
+      Telemetry::Accumulate(Telemetry::NETWORK_CACHE_HIT_RATE_PER_CACHE_SIZE,
+                            bucketOffset + i);
+      sHRStats[i].Reset();
+    }
+  }
+}
+
 } // CacheFileUtils
 } // net
 } // mozilla
--- a/netwerk/cache2/CacheFileUtils.h
+++ b/netwerk/cache2/CacheFileUtils.h
@@ -4,16 +4,18 @@
 
 #ifndef CacheFileUtils__h__
 #define CacheFileUtils__h__
 
 #include "nsError.h"
 #include "nsCOMPtr.h"
 #include "nsString.h"
 #include "nsTArray.h"
+#include "mozilla/StaticMutex.h"
+#include "mozilla/TimeStamp.h"
 
 class nsILoadContextInfo;
 class nsACString;
 
 namespace mozilla {
 namespace net {
 namespace CacheFileUtils {
 
@@ -81,13 +83,71 @@ public:
   size_t SizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf) const;
 
   ValidityPair& operator[](uint32_t aIdx);
 
 private:
   nsTArray<ValidityPair> mMap;
 };
 
+
+class DetailedCacheHitTelemetry {
+public:
+  enum ERecType {
+    HIT  = 0,
+    MISS = 1
+  };
+
+  static void AddRecord(ERecType aType, TimeStamp aLoadStart);
+
+private:
+  class HitRate {
+  public:
+    HitRate();
+
+    void     AddRecord(ERecType aType);
+    // Returns the bucket index that the current hit rate falls into according
+    // to the given aNumOfBuckets.
+    uint32_t GetHitRateBucket(uint32_t aNumOfBuckets) const;
+    uint32_t Count();
+    void     Reset();
+
+  private:
+    uint32_t mHitCnt;
+    uint32_t mMissCnt;
+  };
+
+  // Group the hits and misses statistics by cache files count ranges (0-5000,
+  // 5001-10000, ... , 95001- )
+  static const uint32_t kRangeSize = 5000;
+  static const uint32_t kNumOfRanges = 20;
+
+  // Use the same ranges to report an average hit rate. Report the hit rates
+  // (and reset the counters) every kTotalSamplesReportLimit samples.
+  static const uint32_t kTotalSamplesReportLimit = 1000;
+
+  // Report hit rate for a given cache size range only if it contains
+  // kHitRateSamplesReportLimit or more samples. This limit should avoid
+  // reporting a biased statistics.
+  static const uint32_t kHitRateSamplesReportLimit = 500;
+
+  // All hit rates are accumulated in a single telemetry probe, so to use
+  // a sane number of enumerated values the hit rate is divided into buckets
+  // instead of using a percent value. This constant defines number of buckets
+  // that we divide the hit rates into. I.e. we'll report ranges 0%-5%, 5%-10%,
+  // 10-%15%, ...
+  static const uint32_t kHitRateBuckets = 20;
+
+  // Protects sRecordCnt, sHitStats and Telemetry::Accumulated() calls.
+  static StaticMutex sLock;
+
+  // Counter of samples that is compared against kTotalSamplesReportLimit.
+  static uint32_t sRecordCnt;
+ 
+  // Hit rate statistics for every cache size range.
+  static HitRate sHRStats[kNumOfRanges];
+};
+
 } // CacheFileUtils
 } // net
 } // mozilla
 
 #endif
--- a/netwerk/cache2/CacheIndex.cpp
+++ b/netwerk/cache2/CacheIndex.cpp
@@ -1306,16 +1306,39 @@ CacheIndex::GetCacheSize(uint32_t *_retv
 
   *_retval = index->mIndexStats.Size();
   LOG(("CacheIndex::GetCacheSize() - returning %u", *_retval));
   return NS_OK;
 }
 
 // static
 nsresult
+CacheIndex::GetEntryFileCount(uint32_t *_retval)
+{
+  LOG(("CacheIndex::GetEntryFileCount()"));
+
+  nsRefPtr<CacheIndex> index = gInstance;
+
+  if (!index) {
+    return NS_ERROR_NOT_INITIALIZED;
+  }
+
+  CacheIndexAutoLock lock(index);
+
+  if (!index->IsIndexUsable()) {
+    return NS_ERROR_NOT_AVAILABLE;
+  }
+
+  *_retval = index->mIndexStats.ActiveEntriesCount();
+  LOG(("CacheIndex::GetEntryFileCount() - returning %u", *_retval));
+  return NS_OK;
+}
+
+// static
+nsresult
 CacheIndex::GetCacheStats(nsILoadContextInfo *aInfo, uint32_t *aSize, uint32_t *aCount)
 {
   LOG(("CacheIndex::GetCacheStats() [info=%p]", aInfo));
 
   nsRefPtr<CacheIndex> index = gInstance;
 
   if (!index) {
     return NS_ERROR_NOT_INITIALIZED;
--- a/netwerk/cache2/CacheIndex.h
+++ b/netwerk/cache2/CacheIndex.h
@@ -646,16 +646,19 @@ public:
   // Checks if a cache entry is currently forced valid. Used to prevent an entry
   // (that has been forced valid) from being evicted when the cache size reaches
   // its limit.
   static bool IsForcedValidEntry(const SHA1Sum::Hash *aHash);
 
   // Returns cache size in kB.
   static nsresult GetCacheSize(uint32_t *_retval);
 
+  // Returns number of entry files in the cache
+  static nsresult GetEntryFileCount(uint32_t *_retval);
+
   // Synchronously returns the disk occupation and number of entries per-context.
   // Callable on any thread.
   static nsresult GetCacheStats(nsILoadContextInfo *aInfo, uint32_t *aSize, uint32_t *aCount);
 
   // Asynchronously gets the disk cache size, used for display in the UI.
   static nsresult AsyncGetDiskConsumption(nsICacheStorageConsumptionObserver* aObserver);
 
   // Returns an iterator that returns entries matching a given context that were
--- a/toolkit/components/telemetry/Histograms.json
+++ b/toolkit/components/telemetry/Histograms.json
@@ -6597,16 +6597,28 @@
   },
   "NETWORK_CACHE_SIZE_FULL_FAT": {
     "expires_in_version": "42",
     "kind": "linear",
     "high": "500",
     "n_buckets": 50,
     "description": "Size (in MB) of a cache that reached a file count limit"
   },
+  "NETWORK_CACHE_HIT_MISS_STAT_PER_CACHE_SIZE": {
+    "expires_in_version": "never",
+    "kind": "enumerated",
+    "n_values": 40,
+    "description": "Hit/Miss count split by cache size in file count (0=Hit 0-5000, 1=Miss 0-5000, 2=Hit 5001-10000, ...)"
+  },
+  "NETWORK_CACHE_HIT_RATE_PER_CACHE_SIZE": {
+    "expires_in_version": "never",
+    "kind": "enumerated",
+    "n_values": 400,
+    "description": "Hit rate for a specific cache size in file count. The hit rate is split into 20 buckets, the lower limit of the range in percents is 5*n/20. The cache size is divided into 20 ranges of length 5000, the lower limit of the range is 5000*(n%20)"
+  },
   "DATABASE_LOCKED_EXCEPTION": {
     "expires_in_version": "42",
     "kind": "enumerated",
     "description": "Record database locks when opening one of Fennec's databases. The index corresponds to how many attempts, beginning with 0.",
     "n_values": "5"
   },
   "DATABASE_SUCCESSFUL_UNLOCK": {
     "expires_in_version": "42",