Bug 1477213 - Replace Name->ID map with pre-generated hash table. r=kmag,gfritzsche
authorJan-Erik Rediger <jrediger@mozilla.com>
Mon, 27 Aug 2018 14:25:30 +0000
changeset 491237 fcd411537143c5d926136eb3aa39a6ff677bf326
parent 491236 600af91dfc45a4517375ab5b08da82e7d0c9efa6
child 491238 afeaf5a5abe3a232d7d7538528967568f651237d
push id1815
push userffxbld-merge
push dateMon, 15 Oct 2018 10:40:45 +0000
treeherdermozilla-release@18d4c09e9378 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerskmag, gfritzsche
bugs1477213
milestone63.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1477213 - Replace Name->ID map with pre-generated hash table. r=kmag,gfritzsche By using a hash table based on a perfect hash function, generated at build time, we can avoid dynamic memory allocation for every process at start. Depends on D2927 Differential Revision: https://phabricator.services.mozilla.com/D2928
toolkit/components/telemetry/TelemetryHistogram.cpp
toolkit/components/telemetry/gen_histogram_phf.py
toolkit/components/telemetry/moz.build
--- a/toolkit/components/telemetry/TelemetryHistogram.cpp
+++ b/toolkit/components/telemetry/TelemetryHistogram.cpp
@@ -20,16 +20,17 @@
 #include "mozilla/Atomics.h"
 #include "mozilla/JSONWriter.h"
 #include "mozilla/StartupTimeline.h"
 #include "mozilla/StaticMutex.h"
 #include "mozilla/Unused.h"
 
 #include "TelemetryCommon.h"
 #include "TelemetryHistogram.h"
+#include "TelemetryHistogramNameMap.h"
 #include "TelemetryScalar.h"
 #include "ipc/TelemetryIPCAccumulator.h"
 
 #include "base/histogram.h"
 
 #include <limits>
 
 using base::Histogram;
@@ -40,16 +41,17 @@ using base::LinearHistogram;
 using mozilla::MakeTuple;
 using mozilla::StaticMutexNotRecorded;
 using mozilla::StaticMutexAutoLock;
 using mozilla::Telemetry::HistogramAccumulation;
 using mozilla::Telemetry::KeyedHistogramAccumulation;
 using mozilla::Telemetry::HistogramID;
 using mozilla::Telemetry::ProcessID;
 using mozilla::Telemetry::HistogramCount;
+using mozilla::Telemetry::HistogramIDByNameLookup;
 using mozilla::Telemetry::Common::LogToBrowserConsole;
 using mozilla::Telemetry::Common::RecordedProcessType;
 using mozilla::Telemetry::Common::AutoHashtable;
 using mozilla::Telemetry::Common::GetNameForProcessID;
 using mozilla::Telemetry::Common::GetIDForProcessName;
 using mozilla::Telemetry::Common::IsExpiredVersion;
 using mozilla::Telemetry::Common::CanRecordDataset;
 using mozilla::Telemetry::Common::CanRecordProduct;
@@ -231,35 +233,31 @@ bool gCanRecordBase = false;
 bool gCanRecordExtended = false;
 
 // The storage for actual Histogram instances.
 // We use separate ones for plain and keyed histograms.
 Histogram** gHistogramStorage;
 // Keyed histograms internally map string keys to individual Histogram instances.
 KeyedHistogram** gKeyedHistogramStorage;
 
-// Cache of histogram name to a histogram id.
-StringToHistogramIdMap gNameToHistogramIDMap(HistogramCount);
-
 // To simplify logic below we use a single histogram instance for all expired histograms.
 Histogram* gExpiredHistogram = nullptr;
 
 // The single placeholder for expired keyed histograms.
 KeyedHistogram* gExpiredKeyedHistogram = nullptr;
 
 // This tracks whether recording is enabled for specific histograms.
 // To utilize C++ initialization rules, we invert the meaning to "disabled".
 bool gHistogramRecordingDisabled[HistogramCount] = {};
 
 // This is for gHistogramInfos, gHistogramStringTable
 #include "TelemetryHistogramData.inc"
 
 } // namespace
 
-
 ////////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////////
 //
 // PRIVATE CONSTANTS
 
 namespace {
 
 // List of histogram IDs which should have recording disabled initially.
@@ -413,22 +411,28 @@ internal_GetKeyedHistogramById(Histogram
 }
 
 // Look up a histogram id from a histogram name.
 nsresult
 internal_GetHistogramIdByName(const StaticMutexAutoLock& aLock,
                               const nsACString& name,
                               HistogramID* id)
 {
-  const bool found = gNameToHistogramIDMap.Get(name, id);
-  if (!found) {
-    return NS_ERROR_ILLEGAL_VALUE;
+  const uint32_t idx = HistogramIDByNameLookup(name);
+  MOZ_ASSERT(idx < HistogramCount, "Intermediate lookup should always give a valid index.");
+
+  // The lookup hashes the input and uses it as an index into the value array.
+  // Hash collisions can still happen for unknown values,
+  // therefore we check that the name matches.
+  if (name.Equals(gHistogramInfos[idx].name())) {
+    *id = HistogramID(idx);
+    return NS_OK;
   }
 
-  return NS_OK;
+  return NS_ERROR_ILLEGAL_VALUE;
 }
 
 // Clear a histogram from storage.
 void
 internal_ClearHistogramById(const StaticMutexAutoLock& aLock,
                             HistogramID histogramId,
                             ProcessID processId)
 {
@@ -1989,39 +1993,16 @@ void TelemetryHistogram::InitializeGloba
 
   if (XRE_IsParentProcess()) {
     gHistogramStorage =
       new Histogram*[HistogramCount * size_t(ProcessID::Count)] {};
     gKeyedHistogramStorage =
       new KeyedHistogram*[HistogramCount * size_t(ProcessID::Count)] {};
   }
 
-  // gNameToHistogramIDMap should have been pre-sized correctly at the
-  // declaration point further up in this file.
-
-  // Populate the static histogram name->id cache.
-  // Note that the histogram names come from a static table so we can wrap them
-  // in a literal string to avoid allocations when it gets copied.
-  for (uint32_t i = 0; i < HistogramCount; i++) {
-    auto name = gHistogramInfos[i].name();
-
-    // Make sure the name pointer is in a valid region. See bug 1428612.
-    MOZ_DIAGNOSTIC_ASSERT(name >= gHistogramStringTable);
-    MOZ_DIAGNOSTIC_ASSERT(
-        uintptr_t(name) < (uintptr_t(gHistogramStringTable) + sizeof(gHistogramStringTable)));
-
-    nsCString wrappedName;
-    wrappedName.AssignLiteral(name, strlen(name));
-    gNameToHistogramIDMap.Put(wrappedName, HistogramID(i));
-  }
-
-#ifdef DEBUG
-  gNameToHistogramIDMap.MarkImmutable();
-#endif
-
     // Some Telemetry histograms depend on the value of C++ constants and hardcode
     // their values in Histograms.json.
     // We add static asserts here for those values to match so that future changes
     // don't go unnoticed.
     static_assert((JS::gcreason::NUM_TELEMETRY_REASONS + 1) ==
                         gHistogramInfos[mozilla::Telemetry::GC_MINOR_REASON].bucketCount &&
                   (JS::gcreason::NUM_TELEMETRY_REASONS + 1) ==
                         gHistogramInfos[mozilla::Telemetry::GC_MINOR_REASON_LONG].bucketCount &&
@@ -2041,17 +2022,16 @@ void TelemetryHistogram::InitializeGloba
   gInitDone = true;
 }
 
 void TelemetryHistogram::DeInitializeGlobalState()
 {
   StaticMutexAutoLock locker(gTelemetryHistogramMutex);
   gCanRecordBase = false;
   gCanRecordExtended = false;
-  gNameToHistogramIDMap.Clear();
   gInitDone = false;
 
   // FactoryGet `new`s Histograms for us, but requires us to manually delete.
   if (XRE_IsParentProcess()) {
     for (size_t i = 0; i < HistogramCount * size_t(ProcessID::Count); ++i) {
       if (i < HistogramCount * size_t(ProcessID::Count)
           && gKeyedHistogramStorage[i] != gExpiredKeyedHistogram) {
         delete gKeyedHistogramStorage[i];
@@ -2576,18 +2556,17 @@ TelemetryHistogram::GetKeyedHistogramSna
   }
   return NS_OK;
 }
 
 size_t
 TelemetryHistogram::GetMapShallowSizesOfExcludingThis(mozilla::MallocSizeOf
                                                       aMallocSizeOf)
 {
-  StaticMutexAutoLock locker(gTelemetryHistogramMutex);
-  return gNameToHistogramIDMap.ShallowSizeOfExcludingThis(aMallocSizeOf);
+  return 0;
 }
 
 size_t
 TelemetryHistogram::GetHistogramSizesOfIncludingThis(mozilla::MallocSizeOf
                                                      aMallocSizeOf)
 {
   StaticMutexAutoLock locker(gTelemetryHistogramMutex);
 
new file mode 100644
--- /dev/null
+++ b/toolkit/components/telemetry/gen_histogram_phf.py
@@ -0,0 +1,65 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from __future__ import print_function
+from shared_telemetry_utils import ParserError
+from perfecthash import PerfectHash
+
+PHFSIZE = 512
+
+import parse_histograms
+import sys
+import buildconfig
+
+
+banner = """/* This file is auto-generated, see gen_histogram_phf.py.  */
+"""
+
+header = """
+#ifndef mozilla_TelemetryHistogramNameMap_h
+#define mozilla_TelemetryHistogramNameMap_h
+
+namespace mozilla {
+namespace Telemetry {
+"""
+
+footer = """
+} // namespace mozilla
+} // namespace Telemetry
+#endif // mozilla_TelemetryHistogramNameMap_h
+"""
+
+
+def main(output, *filenames):
+    """
+    Generate a Perfect Hash Table for the Histogram name -> Histogram ID lookup.
+    The table is immutable once generated and we can avoid any dynamic memory allocation.
+    """
+
+    output.write(banner)
+    output.write(header)
+
+    try:
+        histograms = list(parse_histograms.from_files(filenames))
+        histograms = [h for h in histograms if h.record_on_os(buildconfig.substs["OS_TARGET"])]
+    except ParserError as ex:
+        print("\nError processing histograms:\n" + str(ex) + "\n")
+        sys.exit(1)
+
+    histograms = [(bytearray(hist.name(), 'ascii'), idx) for (idx, hist) in enumerate(histograms)]
+    name_phf = PerfectHash(histograms, PHFSIZE)
+
+    output.write(name_phf.cxx_codegen(
+        name='HistogramIDByNameLookup',
+        entry_type="uint32_t",
+        lower_entry=lambda (_, v): str(v),
+        key_type="const nsACString&",
+        key_bytes="aKey.BeginReading()",
+        key_length="aKey.Length()"))
+
+    output.write(footer)
+
+
+if __name__ == '__main__':
+    main(sys.stdout, *sys.argv[1:])
--- a/toolkit/components/telemetry/moz.build
+++ b/toolkit/components/telemetry/moz.build
@@ -116,16 +116,17 @@ PYTHON_UNITTEST_MANIFESTS += [
 
 GENERATED_FILES = [
     'EventArtifactDefinitions.json',
     'ScalarArtifactDefinitions.json',
     'TelemetryEventData.h',
     'TelemetryEventEnums.h',
     'TelemetryHistogramData.inc',
     'TelemetryHistogramEnums.h',
+    'TelemetryHistogramNameMap.h',
     'TelemetryProcessData.h',
     'TelemetryProcessEnums.h',
     'TelemetryScalarData.h',
     'TelemetryScalarEnums.h',
 ]
 
 # Generate histogram files.
 histogram_files = [
@@ -137,16 +138,20 @@ histogram_files = [
 data = GENERATED_FILES['TelemetryHistogramData.inc']
 data.script = 'gen_histogram_data.py'
 data.inputs = histogram_files
 
 enums = GENERATED_FILES['TelemetryHistogramEnums.h']
 enums.script = 'gen_histogram_enum.py'
 enums.inputs = histogram_files
 
+data = GENERATED_FILES['TelemetryHistogramNameMap.h']
+data.script = 'gen_histogram_phf.py'
+data.inputs = histogram_files
+
 # Generate scalar files.
 scalar_files = [
     'Scalars.yaml',
 ]
 
 scalar_data = GENERATED_FILES['TelemetryScalarData.h']
 scalar_data.script = 'gen_scalar_data.py'
 scalar_data.inputs = scalar_files