bug 1460595 - Change storage semantics Telemetry Events r=Dexter
authorChris H-C <chutten@mozilla.com>
Fri, 11 May 2018 15:45:39 -0400
changeset 423662 f99bc6f4d002
parent 423661 2eeccf1db8a0
child 423663 264376b1bd3f
push id65557
push userchutten@mozilla.com
push dateTue, 26 Jun 2018 14:09:22 +0000
treeherderautoland@378ddda2ffc6 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersDexter
bugs1460595
milestone63.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
bug 1460595 - Change storage semantics Telemetry Events r=Dexter Instead of a hard cap, introduce a topic to notify when we reach the now- preference-controlled per-process event record limit. Also add the capability to leave some records behind in storage for the next call. MozReview-Commit-ID: EY40tqKxxeW
toolkit/components/telemetry/Telemetry.cpp
toolkit/components/telemetry/TelemetryEvent.cpp
toolkit/components/telemetry/TelemetryEvent.h
toolkit/components/telemetry/nsITelemetry.idl
--- a/toolkit/components/telemetry/Telemetry.cpp
+++ b/toolkit/components/telemetry/Telemetry.cpp
@@ -1660,20 +1660,22 @@ NS_IMETHODIMP
 TelemetryImpl::RecordEvent(const nsACString & aCategory, const nsACString & aMethod,
                            const nsACString & aObject, JS::HandleValue aValue,
                            JS::HandleValue aExtra, JSContext* aCx, uint8_t optional_argc)
 {
   return TelemetryEvent::RecordEvent(aCategory, aMethod, aObject, aValue, aExtra, aCx, optional_argc);
 }
 
 NS_IMETHODIMP
-TelemetryImpl::SnapshotEvents(uint32_t aDataset, bool aClear, JSContext* aCx,
-                                     uint8_t optional_argc, JS::MutableHandleValue aResult)
+TelemetryImpl::SnapshotEvents(uint32_t aDataset, bool aClear,
+                              uint32_t aEventLimit, JSContext* aCx,
+                              uint8_t optional_argc, JS::MutableHandleValue aResult)
 {
-  return TelemetryEvent::CreateSnapshots(aDataset, aClear, aCx, optional_argc, aResult);
+  return TelemetryEvent::CreateSnapshots(aDataset, aClear,
+                                         aEventLimit, aCx, optional_argc, aResult);
 }
 
 NS_IMETHODIMP
 TelemetryImpl::RegisterEvents(const nsACString& aCategory,
                               JS::Handle<JS::Value> aEventData,
                               JSContext* cx)
 {
   return TelemetryEvent::RegisterEvents(aCategory, aEventData, false, cx);
--- a/toolkit/components/telemetry/TelemetryEvent.cpp
+++ b/toolkit/components/telemetry/TelemetryEvent.cpp
@@ -1,24 +1,27 @@
 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #include <prtime.h>
 #include <limits>
+#include "nsIObserverService.h"
 #include "nsITelemetry.h"
 #include "nsHashKeys.h"
 #include "nsDataHashtable.h"
 #include "nsClassHashtable.h"
 #include "nsTArray.h"
+#include "mozilla/Preferences.h"
 #include "mozilla/StaticMutex.h"
 #include "mozilla/Unused.h"
 #include "mozilla/Maybe.h"
+#include "mozilla/Services.h"
 #include "mozilla/StaticPtr.h"
 #include "mozilla/Pair.h"
 #include "jsapi.h"
 #include "nsJSUtils.h"
 #include "nsXULAppAPI.h"
 #include "nsUTF8Utils.h"
 #include "nsPrintfCString.h"
 
@@ -96,20 +99,16 @@ namespace {
 
 const uint32_t kEventCount = mozilla::Telemetry::EventID::EventCount;
 // This is a special event id used to mark expired events, to make expiry checks
 // cheap at runtime.
 const uint32_t kExpiredEventId = std::numeric_limits<uint32_t>::max();
 static_assert(kExpiredEventId > kEventCount,
               "Built-in event count should be less than the expired event id.");
 
-// This is the hard upper limit on the number of event records we keep in storage.
-// If we cross this limit, we will drop any further event recording until elements
-// are removed from storage.
-const uint32_t kMaxEventRecords = 1000;
 // Maximum length of any passed value string, in UTF8 byte sequence length.
 const uint32_t kMaxValueByteLength = 80;
 // Maximum length of any string value in the extra dictionary, in UTF8 byte sequence length.
 const uint32_t kMaxExtraValueByteLength = 80;
 // Maximum length of dynamic method names, in UTF8 byte sequence length.
 const uint32_t kMaxMethodNameByteLength = 20;
 // Maximum length of dynamic object names, in UTF8 byte sequence length.
 const uint32_t kMaxObjectNameByteLength = 20;
@@ -469,24 +468,16 @@ RecordEvent(const StaticMutexAutoLock& l
 
   // Fixup the process id only for non-builtin (e.g. supporting build faster)
   // dynamic events.
   if (eventKey->dynamic &&
       !(*gDynamicEventInfo)[eventKey->id].builtin) {
     processType = ProcessID::Dynamic;
   }
 
-  EventRecordArray* eventRecords =
-    GetEventRecordsForProcess(lock, processType, *eventKey);
-
-  // Apply hard limit on event count in storage.
-  if (eventRecords->Length() >= kMaxEventRecords) {
-    return RecordEventResult::StorageLimitReached;
-  }
-
   // Check whether the extra keys passed are valid.
   if (!CheckExtraKeysValid(*eventKey, extra)) {
     return RecordEventResult::InvalidExtraKey;
   }
 
   // Check whether we can record this event.
   if (!CanRecordEvent(lock, *eventKey, processType)) {
     return RecordEventResult::Ok;
@@ -497,18 +488,27 @@ RecordEvent(const StaticMutexAutoLock& l
   TelemetryScalar::SummarizeEvent(UniqueEventName(category, method, object),
                                   processType, eventKey->dynamic);
 
   // Check whether this event's category has recording enabled
   if (!gEnabledCategories.GetEntry(GetCategory(lock, *eventKey))) {
     return RecordEventResult::Ok;
   }
 
-  // Add event record.
+  EventRecordArray* eventRecords =
+    GetEventRecordsForProcess(lock, processType, *eventKey);
   eventRecords->AppendElement(EventRecord(timestamp, *eventKey, value, extra));
+
+  // Notify observers when we hit the "event" ping event record limit.
+  static uint32_t sEventPingLimit =
+    mozilla::Preferences::GetUint("toolkit.telemetry.eventping.eventLimit", 1000);
+  if (eventRecords->Length() == sEventPingLimit) {
+    return RecordEventResult::StorageLimitReached;
+  }
+
   return RecordEventResult::Ok;
 }
 
 RecordEventResult
 ShouldRecordChildEvent(const StaticMutexAutoLock& lock, const nsACString& category,
                        const nsACString& method, const nsACString& object)
 {
   EventKey* eventKey = GetEventKey(lock, category, method, object);
@@ -913,20 +913,25 @@ TelemetryEvent::RecordEvent(const nsACSt
     case RecordEventResult::InvalidExtraKey: {
       nsPrintfCString msg(R"(Invalid extra key for event ["%s", "%s", "%s"].)",
                           PromiseFlatCString(aCategory).get(),
                           PromiseFlatCString(aMethod).get(),
                           PromiseFlatCString(aObject).get());
       LogToBrowserConsole(nsIScriptError::warningFlag, NS_ConvertUTF8toUTF16(msg));
       return NS_OK;
     }
-    case RecordEventResult::StorageLimitReached:
+    case RecordEventResult::StorageLimitReached: {
       LogToBrowserConsole(nsIScriptError::warningFlag,
                           NS_LITERAL_STRING("Event storage limit reached."));
+      nsCOMPtr<nsIObserverService> serv = mozilla::services::GetObserverService();
+      if (serv) {
+        serv->NotifyObservers(nullptr, "event-telemetry-storage-limit-reached", nullptr);
+      }
       return NS_OK;
+    }
     default:
       return NS_OK;
   }
 }
 
 static bool
 GetArrayPropertyValues(JSContext* cx, JS::HandleObject obj, const char* property,
                        nsTArray<nsCString>* results)
@@ -1107,69 +1112,87 @@ TelemetryEvent::RegisterEvents(const nsA
     StaticMutexAutoLock locker(gTelemetryEventsMutex);
     RegisterEvents(locker, aCategory, newEventInfos, newEventExpired, aBuiltin);
   }
 
   return NS_OK;
 }
 
 nsresult
-TelemetryEvent::CreateSnapshots(uint32_t aDataset, bool aClear, JSContext* cx,
+TelemetryEvent::CreateSnapshots(uint32_t aDataset, bool aClear,
+                                uint32_t aEventLimit, JSContext* cx,
                                 uint8_t optional_argc, JS::MutableHandleValue aResult)
 {
   if (!XRE_IsParentProcess()) {
     return NS_ERROR_FAILURE;
   }
 
   // Creating a JS snapshot of the events is a two-step process:
   // (1) Lock the storage and copy the events into function-local storage.
   // (2) Serialize the events into JS.
   // We can't hold a lock for (2) because we will run into deadlocks otherwise
   // from JS recording Telemetry.
 
   // (1) Extract the events from storage with a lock held.
   nsTArray<mozilla::Pair<const char*, EventRecordArray>> processEvents;
+  nsTArray<mozilla::Pair<uint32_t, EventRecordArray>> leftovers;
   {
     StaticMutexAutoLock locker(gTelemetryEventsMutex);
 
     if (!gInitDone) {
       return NS_ERROR_FAILURE;
     }
 
     // The snapshotting function is the same for both static and dynamic builtin events.
     // We can use the same function and store the events in the same output storage.
-    auto snapshotter = [aDataset, &locker, &processEvents]
+    auto snapshotter = [aDataset, &locker, &processEvents, &leftovers, aClear, optional_argc, aEventLimit]
                        (EventRecordsMapType& aProcessStorage)
     {
 
       for (auto iter = aProcessStorage.Iter(); !iter.Done(); iter.Next()) {
         const EventRecordArray* eventStorage = static_cast<EventRecordArray*>(iter.Data());
         EventRecordArray events;
+        EventRecordArray leftoverEvents;
 
         const uint32_t len = eventStorage->Length();
         for (uint32_t i = 0; i < len; ++i) {
           const EventRecord& record = (*eventStorage)[i];
           if (IsInDataset(GetDataset(locker, record.GetEventKey()), aDataset)) {
-            events.AppendElement(record);
+            // If we have a limit, adhere to it. If we have a limit and are
+            // going to clear, save the leftovers for later.
+            if (optional_argc < 2 || events.Length() < aEventLimit) {
+              events.AppendElement(record);
+            } else if (aClear) {
+              leftoverEvents.AppendElement(record);
+            }
           }
         }
 
         if (events.Length()) {
           const char* processName = GetNameForProcessID(ProcessID(iter.Key()));
           processEvents.AppendElement(mozilla::MakePair(processName, std::move(events)));
+          if (leftoverEvents.Length()) {
+            leftovers.AppendElement(mozilla::MakePair(iter.Key(),
+                                                      std::move(leftoverEvents)));
+          }
         }
       }
     };
 
     // Take a snapshot of the plain and dynamic builtin events.
     snapshotter(gEventRecords);
-
     if (aClear) {
       gEventRecords.Clear();
+      for (auto pair : leftovers) {
+        gEventRecords.Put(pair.first(),
+                          new EventRecordArray(std::move(pair.second())));
+      }
+      leftovers.Clear();
     }
+
   }
 
   // (2) Serialize the events to a JS object.
   JS::RootedObject rootObj(cx, JS_NewPlainObject(cx));
   if (!rootObj) {
     return NS_ERROR_FAILURE;
   }
 
--- a/toolkit/components/telemetry/TelemetryEvent.h
+++ b/toolkit/components/telemetry/TelemetryEvent.h
@@ -33,17 +33,18 @@ nsresult RecordEvent(const nsACString& a
                      const nsACString& aObject, JS::HandleValue aValue,
                      JS::HandleValue aExtra, JSContext* aCx,
                      uint8_t optional_argc);
 
 void SetEventRecordingEnabled(const nsACString& aCategory, bool aEnabled);
 nsresult RegisterEvents(const nsACString& aCategory, JS::Handle<JS::Value> aEventData,
                         bool aBuiltin, JSContext* cx);
 
-nsresult CreateSnapshots(uint32_t aDataset, bool aClear, JSContext* aCx,
+nsresult CreateSnapshots(uint32_t aDataset, bool aClear,
+                         uint32_t aEventLimit, JSContext* aCx,
                          uint8_t optional_argc, JS::MutableHandleValue aResult);
 
 // Record events from child processes.
 nsresult RecordChildEvents(mozilla::Telemetry::ProcessID aProcessType,
                            const nsTArray<mozilla::Telemetry::ChildEventData>& aEvents);
 
 // Only to be used for testing.
 void ClearEvents();
--- a/toolkit/components/telemetry/nsITelemetry.idl
+++ b/toolkit/components/telemetry/nsITelemetry.idl
@@ -467,20 +467,22 @@ interface nsITelemetry : nsISupports
    *   [
    *     // [timestamp, category, method, object, stringValue, extraValues]
    *     [43245, "category1", "method1", "object1", "string value", null],
    *     [43258, "category1", "method2", "object1", null, {"key1": "string value"}],
    *     ...
    *   ]
    *
    * @param aDataset DATASET_RELEASE_CHANNEL_OPTOUT or DATASET_RELEASE_CHANNEL_OPTIN.
-   * @param [aClear=false] Whether to clear out the scalars after snapshotting.
+   * @param [aClear=false] Whether to clear out the events after snapshotting.
+   * @param aEventLimit How many events per process to limit the snapshot to contain, all if unspecified.
+   *                    Even if aClear, the leftover event records are not cleared.
    */
   [implicit_jscontext, optional_argc]
-  jsval snapshotEvents(in uint32_t aDataset, [optional] in boolean aClear);
+  jsval snapshotEvents(in uint32_t aDataset, [optional] in boolean aClear, [optional] in uint32_t aEventLimit);
 
   /**
    * Register new events to record them from addons. This allows registering multiple
    * events for a category. They will be valid only for the current Firefox session.
    * Note that events shipping in Firefox should be registered in Events.yaml.
    *
    * @param aCategory The unique category the events are registered in.
    * @param aEventData An object that contains registration data for 1-N events of the form: