bug 1438335 - Record when we have to clamp large Telemetry accumulations. r=Dexter,francois a=lizzard
authorChris H-C <chutten@mozilla.com>
Thu, 15 Feb 2018 16:13:36 -0500
changeset 455056 72a0030b41dc6c325b178911d7ae5e20c1348dba
parent 455055 dc5d87eb8bfc23ee6e113657e230d9f829a06023
child 455057 4a24da7b8cd51443fcdb3bb9a52a3c7483894829
push id1648
push usermtabara@mozilla.com
push dateThu, 01 Mar 2018 12:45:47 +0000
treeherdermozilla-release@cbb9688c2eeb [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersDexter, francois, lizzard
bugs1438335
milestone59.0
bug 1438335 - Record when we have to clamp large Telemetry accumulations. r=Dexter,francois a=lizzard We need to clamp accumulations to fit in our data representation (int). This patch records the number of times, and for which probes, we had to do so. MozReview-Commit-ID: GSs3oFvLKlL
toolkit/components/telemetry/Scalars.yaml
toolkit/components/telemetry/TelemetryHistogram.cpp
toolkit/components/telemetry/tests/gtest/TestHistograms.cpp
--- a/toolkit/components/telemetry/Scalars.yaml
+++ b/toolkit/components/telemetry/Scalars.yaml
@@ -1065,16 +1065,34 @@ telemetry:
     kind: uint
     keyed: true
     notification_emails:
       - telemetry-client-dev@mozilla.com
     release_channel_collection: opt-out
     record_in_processes:
       - all
 
+  accumulate_clamped_values:
+    bug_numbers:
+      - 1438335
+    description: >
+      The count of accumulations that had to be clamped down to not overflow,
+      keyed to the histogram name of the overflowing accumulation.
+      This is doubled for non-keyed desktop Firefox Histograms because of
+      saved-session pings.
+    expires: never
+    kind: uint
+    keyed: true
+    notification_emails:
+      - telemetry-client-dev@mozilla.com
+      - chutten@mozilla.com
+    release_channel_collection: opt-in
+    record_in_processes:
+      - 'main'
+
 telemetry.discarded:
   accumulations:
     bug_numbers:
       - 1369041
     description: >
       Number of discarded accumulations to histograms in child processes
     expires: "never"
     kind: uint
--- a/toolkit/components/telemetry/TelemetryHistogram.cpp
+++ b/toolkit/components/telemetry/TelemetryHistogram.cpp
@@ -608,16 +608,19 @@ internal_HistogramAdd(Histogram& histogr
     (aProcessType == ProcessID::Parent && !internal_IsRecordingEnabled(id))) {
     return NS_OK;
   }
 
   // The internal representation of a base::Histogram's buckets uses `int`.
   // Clamp large values of `value` to be INT_MAX so they continue to be treated
   // as large values (instead of negative ones).
   if (value > INT_MAX) {
+    TelemetryScalar::Add(
+      mozilla::Telemetry::ScalarID::TELEMETRY_ACCUMULATE_CLAMPED_VALUES,
+      NS_ConvertASCIItoUTF16(gHistogramInfos[id].name()), 1);
     value = INT_MAX;
   }
 
   // It is safe to add to the histogram now: the subsession histogram was already
   // cloned from this so we won't add the sample twice.
   histogram.Add(value);
 
   return NS_OK;
@@ -818,16 +821,26 @@ KeyedHistogram::Add(const nsCString& key
 #if !defined(MOZ_WIDGET_ANDROID)
   Histogram* subsession = GetHistogram(key, true);
   MOZ_ASSERT(subsession);
   if (!subsession) {
     return NS_ERROR_FAILURE;
   }
 #endif
 
+  // The internal representation of a base::Histogram's buckets uses `int`.
+  // Clamp large values of `sample` to be INT_MAX so they continue to be treated
+  // as large values (instead of negative ones).
+  if (sample > INT_MAX) {
+    TelemetryScalar::Add(
+      mozilla::Telemetry::ScalarID::TELEMETRY_ACCUMULATE_CLAMPED_VALUES,
+      NS_ConvertASCIItoUTF16(mHistogramInfo.name()), 1);
+    sample = INT_MAX;
+  }
+
   histogram->Add(sample);
 #if !defined(MOZ_WIDGET_ANDROID)
   subsession->Add(sample);
 #endif
   return NS_OK;
 }
 
 void
--- a/toolkit/components/telemetry/tests/gtest/TestHistograms.cpp
+++ b/toolkit/components/telemetry/tests/gtest/TestHistograms.cpp
@@ -307,16 +307,17 @@ TEST_F(TelemetryTestFixture, AccumulateL
 }
 
 TEST_F(TelemetryTestFixture, AccumulateLinearHistogram_DifferentSamples)
 {
   nsTArray<uint32_t> samples({4, 8, 2147483646, uint32_t(INT_MAX) + 1, UINT32_MAX});
 
   AutoJSContextWithGlobal cx(mCleanGlobal);
 
+  mTelemetry->ClearScalars();
   GetAndClearHistogram(cx.GetJSContext(), mTelemetry, NS_LITERAL_CSTRING("TELEMETRY_TEST_LINEAR"),
                         false);
 
   // Accumulate in histogram
   Telemetry::Accumulate(Telemetry::TELEMETRY_TEST_LINEAR, samples);
 
   // Get a snapshot of all histograms
   JS::RootedValue snapshot(cx.GetJSContext());
@@ -345,16 +346,25 @@ TEST_F(TelemetryTestFixture, AccumulateL
   JS::ToUint32(cx.GetJSContext(), countLast, &uCountLast);
 
   const uint32_t kExpectedCountFirst = 2;
   // We expect 2147483646 to be in the last bucket, as well the two samples above 2^31
   // (prior to bug 1438335, values between INT_MAX and UINT32_MAX would end up as 0s)
   const uint32_t kExpectedCountLast = 3;
   ASSERT_EQ(uCountFirst, kExpectedCountFirst) << "The first bucket did not accumulate the correct number of values";
   ASSERT_EQ(uCountLast, kExpectedCountLast) << "The last bucket did not accumulate the correct number of values";
+
+  // We accumulated two values that had to be clamped. We expect the count in
+  // 'telemetry.accumulate_clamped_values' to be 4
+  const uint32_t expectedAccumulateClampedCount = 4;
+  JS::RootedValue scalarsSnapshot(cx.GetJSContext());
+  GetScalarsSnapshot(true, cx.GetJSContext(),&scalarsSnapshot);
+  CheckKeyedUintScalar("telemetry.accumulate_clamped_values",
+                       "TELEMETRY_TEST_LINEAR", cx.GetJSContext(),
+                       scalarsSnapshot, expectedAccumulateClampedCount);
 }
 
 TEST_F(TelemetryTestFixture, AccumulateKeyedCountHistogram_MultipleSamples)
 {
   const nsTArray<uint32_t> samples({5, 10, 15});
   const uint32_t kExpectedSum = 5 + 10 + 15;
 
   AutoJSContextWithGlobal cx(mCleanGlobal);
@@ -387,20 +397,21 @@ TEST_F(TelemetryTestFixture, AccumulateK
   JS::ToUint32(cx.GetJSContext(), sum, &uSum);
   ASSERT_EQ(uSum, kExpectedSum) << "The histogram is not returning expected sum";
 }
 
 TEST_F(TelemetryTestFixture, TestKeyedLinearHistogram_MultipleSamples)
 {
   AutoJSContextWithGlobal cx(mCleanGlobal);
 
+  mTelemetry->ClearScalars();
   GetAndClearHistogram(cx.GetJSContext(), mTelemetry,
                        NS_LITERAL_CSTRING("TELEMETRY_TEST_KEYED_LINEAR"), true);
 
-  const nsTArray<uint32_t> samples({1,5,250000});
+  const nsTArray<uint32_t> samples({1, 5, 250000, UINT_MAX});
   // Test the accumulation on the key 'testkey', using
   // the API that accepts histogram IDs.
   Telemetry::Accumulate(Telemetry::TELEMETRY_TEST_KEYED_LINEAR,
                         NS_LITERAL_CSTRING("testkey"), samples);
 
   // Get a snapshot for all the histograms
   JS::RootedValue snapshot(cx.GetJSContext());
   GetSnapshots(cx.GetJSContext(), mTelemetry, "TELEMETRY_TEST_KEYED_LINEAR", &snapshot, true);
@@ -429,21 +440,30 @@ TEST_F(TelemetryTestFixture, TestKeyedLi
 
   // Check that the counts match.
   uint32_t uCountFirst = 0;
   uint32_t uCountLast = 0;
   JS::ToUint32(cx.GetJSContext(), countFirst, &uCountFirst);
   JS::ToUint32(cx.GetJSContext(), countLast, &uCountLast);
 
   const uint32_t kExpectedCountFirst = 2;
-  const uint32_t kExpectedCountLast = 1;
+  const uint32_t kExpectedCountLast = 2;
   ASSERT_EQ(uCountFirst, kExpectedCountFirst)
     << "The first bucket did not accumulate the correct number of values for key 'testkey'";
   ASSERT_EQ(uCountLast, kExpectedCountLast)
     << "The last bucket did not accumulate the correct number of values for key 'testkey'";
+
+  // We accumulated one keyed values that had to be clamped. We expect the
+  // count in 'telemetry.accumulate_clamped_values' to be 1
+  const uint32_t expectedAccumulateClampedCount = 1;
+  JS::RootedValue scalarsSnapshot(cx.GetJSContext());
+  GetScalarsSnapshot(true, cx.GetJSContext(),&scalarsSnapshot);
+  CheckKeyedUintScalar("telemetry.accumulate_clamped_values",
+                       "TELEMETRY_TEST_KEYED_LINEAR", cx.GetJSContext(),
+                       scalarsSnapshot, expectedAccumulateClampedCount);
 }
 
 TEST_F(TelemetryTestFixture, TestKeyedKeysHistogram_MultipleSamples)
 {
   AutoJSContextWithGlobal cx(mCleanGlobal);
   mTelemetry->ClearScalars();
   const nsTArray<uint32_t> samples({false, false, true, 32, true});