bug 1460595 - Change storage semantics Telemetry Events r=Dexter a=lizzard
authorChris H-C <chutten@mozilla.com>
Fri, 11 May 2018 15:45:39 -0400
changeset 477868 6d2d353e5bfe
parent 477867 5a480e17c6f5
child 477869 6b3b9d073cf2
push id9448
push userarchaeopteryx@coole-files.de
push dateMon, 09 Jul 2018 17:22:41 +0000
treeherdermozilla-beta@32cde6cdb297 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersDexter, lizzard
bugs1460595
milestone62.0
bug 1460595 - Change storage semantics Telemetry Events r=Dexter a=lizzard Instead of a hard cap, introduce a topic to notify when we reach the now- preference-controlled per-process event record limit. Also add the capability to leave some records behind in storage for the next call. MozReview-Commit-ID: EY40tqKxxeW
toolkit/components/telemetry/Telemetry.cpp
toolkit/components/telemetry/TelemetryEvent.cpp
toolkit/components/telemetry/TelemetryEvent.h
toolkit/components/telemetry/nsITelemetry.idl
--- a/toolkit/components/telemetry/Telemetry.cpp
+++ b/toolkit/components/telemetry/Telemetry.cpp
@@ -1661,20 +1661,22 @@ NS_IMETHODIMP
 TelemetryImpl::RecordEvent(const nsACString & aCategory, const nsACString & aMethod,
                            const nsACString & aObject, JS::HandleValue aValue,
                            JS::HandleValue aExtra, JSContext* aCx, uint8_t optional_argc)
 {
   return TelemetryEvent::RecordEvent(aCategory, aMethod, aObject, aValue, aExtra, aCx, optional_argc);
 }
 
 NS_IMETHODIMP
-TelemetryImpl::SnapshotEvents(uint32_t aDataset, bool aClear, JSContext* aCx,
-                                     uint8_t optional_argc, JS::MutableHandleValue aResult)
+TelemetryImpl::SnapshotEvents(uint32_t aDataset, bool aClear,
+                              uint32_t aEventLimit, JSContext* aCx,
+                              uint8_t optional_argc, JS::MutableHandleValue aResult)
 {
-  return TelemetryEvent::CreateSnapshots(aDataset, aClear, aCx, optional_argc, aResult);
+  return TelemetryEvent::CreateSnapshots(aDataset, aClear,
+                                         aEventLimit, aCx, optional_argc, aResult);
 }
 
 NS_IMETHODIMP
 TelemetryImpl::RegisterEvents(const nsACString& aCategory,
                               JS::Handle<JS::Value> aEventData,
                               JSContext* cx)
 {
   return TelemetryEvent::RegisterEvents(aCategory, aEventData, false, cx);
--- a/toolkit/components/telemetry/TelemetryEvent.cpp
+++ b/toolkit/components/telemetry/TelemetryEvent.cpp
@@ -1,24 +1,27 @@
 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #include <prtime.h>
 #include <limits>
+#include "nsIObserverService.h"
 #include "nsITelemetry.h"
 #include "nsHashKeys.h"
 #include "nsDataHashtable.h"
 #include "nsClassHashtable.h"
 #include "nsTArray.h"
+#include "mozilla/Preferences.h"
 #include "mozilla/StaticMutex.h"
 #include "mozilla/Unused.h"
 #include "mozilla/Maybe.h"
+#include "mozilla/Services.h"
 #include "mozilla/StaticPtr.h"
 #include "mozilla/Pair.h"
 #include "jsapi.h"
 #include "nsJSUtils.h"
 #include "nsXULAppAPI.h"
 #include "nsUTF8Utils.h"
 #include "nsPrintfCString.h"
 
@@ -96,20 +99,16 @@ namespace {
 
 const uint32_t kEventCount = mozilla::Telemetry::EventID::EventCount;
 // This is a special event id used to mark expired events, to make expiry checks
 // cheap at runtime.
 const uint32_t kExpiredEventId = std::numeric_limits<uint32_t>::max();
 static_assert(kExpiredEventId > kEventCount,
               "Built-in event count should be less than the expired event id.");
 
-// This is the hard upper limit on the number of event records we keep in storage.
-// If we cross this limit, we will drop any further event recording until elements
-// are removed from storage.
-const uint32_t kMaxEventRecords = 1000;
 // Maximum length of any passed value string, in UTF8 byte sequence length.
 const uint32_t kMaxValueByteLength = 80;
 // Maximum length of any string value in the extra dictionary, in UTF8 byte sequence length.
 const uint32_t kMaxExtraValueByteLength = 80;
 // Maximum length of dynamic method names, in UTF8 byte sequence length.
 const uint32_t kMaxMethodNameByteLength = 20;
 // Maximum length of dynamic object names, in UTF8 byte sequence length.
 const uint32_t kMaxObjectNameByteLength = 20;
@@ -469,24 +468,16 @@ RecordEvent(const StaticMutexAutoLock& l
 
   // Fixup the process id only for non-builtin (e.g. supporting build faster)
   // dynamic events.
   if (eventKey->dynamic &&
       !(*gDynamicEventInfo)[eventKey->id].builtin) {
     processType = ProcessID::Dynamic;
   }
 
-  EventRecordArray* eventRecords =
-    GetEventRecordsForProcess(lock, processType, *eventKey);
-
-  // Apply hard limit on event count in storage.
-  if (eventRecords->Length() >= kMaxEventRecords) {
-    return RecordEventResult::StorageLimitReached;
-  }
-
   // Check whether the extra keys passed are valid.
   if (!CheckExtraKeysValid(*eventKey, extra)) {
     return RecordEventResult::InvalidExtraKey;
   }
 
   // Check whether we can record this event.
   if (!CanRecordEvent(lock, *eventKey, processType)) {
     return RecordEventResult::Ok;
@@ -497,18 +488,27 @@ RecordEvent(const StaticMutexAutoLock& l
   TelemetryScalar::SummarizeEvent(UniqueEventName(category, method, object),
                                   processType, eventKey->dynamic);
 
   // Check whether this event's category has recording enabled
   if (!gEnabledCategories.GetEntry(GetCategory(lock, *eventKey))) {
     return RecordEventResult::Ok;
   }
 
-  // Add event record.
+  EventRecordArray* eventRecords =
+    GetEventRecordsForProcess(lock, processType, *eventKey);
   eventRecords->AppendElement(EventRecord(timestamp, *eventKey, value, extra));
+
+  // Notify observers when we hit the "event" ping event record limit.
+  static uint32_t sEventPingLimit =
+    mozilla::Preferences::GetUint("toolkit.telemetry.eventping.eventLimit", 1000);
+  if (eventRecords->Length() == sEventPingLimit) {
+    return RecordEventResult::StorageLimitReached;
+  }
+
   return RecordEventResult::Ok;
 }
 
 RecordEventResult
 ShouldRecordChildEvent(const StaticMutexAutoLock& lock, const nsACString& category,
                        const nsACString& method, const nsACString& object)
 {
   EventKey* eventKey = GetEventKey(lock, category, method, object);
@@ -913,20 +913,25 @@ TelemetryEvent::RecordEvent(const nsACSt
     case RecordEventResult::InvalidExtraKey: {
       nsPrintfCString msg(R"(Invalid extra key for event ["%s", "%s", "%s"].)",
                           PromiseFlatCString(aCategory).get(),
                           PromiseFlatCString(aMethod).get(),
                           PromiseFlatCString(aObject).get());
       LogToBrowserConsole(nsIScriptError::warningFlag, NS_ConvertUTF8toUTF16(msg));
       return NS_OK;
     }
-    case RecordEventResult::StorageLimitReached:
+    case RecordEventResult::StorageLimitReached: {
       LogToBrowserConsole(nsIScriptError::warningFlag,
                           NS_LITERAL_STRING("Event storage limit reached."));
+      nsCOMPtr<nsIObserverService> serv = mozilla::services::GetObserverService();
+      if (serv) {
+        serv->NotifyObservers(nullptr, "event-telemetry-storage-limit-reached", nullptr);
+      }
       return NS_OK;
+    }
     default:
       return NS_OK;
   }
 }
 
 static bool
 GetArrayPropertyValues(JSContext* cx, JS::HandleObject obj, const char* property,
                        nsTArray<nsCString>* results)
@@ -1107,69 +1112,87 @@ TelemetryEvent::RegisterEvents(const nsA
     StaticMutexAutoLock locker(gTelemetryEventsMutex);
     RegisterEvents(locker, aCategory, newEventInfos, newEventExpired, aBuiltin);
   }
 
   return NS_OK;
 }
 
 nsresult
-TelemetryEvent::CreateSnapshots(uint32_t aDataset, bool aClear, JSContext* cx,
+TelemetryEvent::CreateSnapshots(uint32_t aDataset, bool aClear,
+                                uint32_t aEventLimit, JSContext* cx,
                                 uint8_t optional_argc, JS::MutableHandleValue aResult)
 {
   if (!XRE_IsParentProcess()) {
     return NS_ERROR_FAILURE;
   }
 
   // Creating a JS snapshot of the events is a two-step process:
   // (1) Lock the storage and copy the events into function-local storage.
   // (2) Serialize the events into JS.
   // We can't hold a lock for (2) because we will run into deadlocks otherwise
   // from JS recording Telemetry.
 
   // (1) Extract the events from storage with a lock held.
   nsTArray<mozilla::Pair<const char*, EventRecordArray>> processEvents;
+  nsTArray<mozilla::Pair<uint32_t, EventRecordArray>> leftovers;
   {
     StaticMutexAutoLock locker(gTelemetryEventsMutex);
 
     if (!gInitDone) {
       return NS_ERROR_FAILURE;
     }
 
     // The snapshotting function is the same for both static and dynamic builtin events.
     // We can use the same function and store the events in the same output storage.
-    auto snapshotter = [aDataset, &locker, &processEvents]
+    auto snapshotter = [aDataset, &locker, &processEvents, &leftovers, aClear, optional_argc, aEventLimit]
                        (EventRecordsMapType& aProcessStorage)
     {
 
       for (auto iter = aProcessStorage.Iter(); !iter.Done(); iter.Next()) {
         const EventRecordArray* eventStorage = static_cast<EventRecordArray*>(iter.Data());
         EventRecordArray events;
+        EventRecordArray leftoverEvents;
 
         const uint32_t len = eventStorage->Length();
         for (uint32_t i = 0; i < len; ++i) {
           const EventRecord& record = (*eventStorage)[i];
           if (IsInDataset(GetDataset(locker, record.GetEventKey()), aDataset)) {
-            events.AppendElement(record);
+            // If we have a limit, adhere to it. If we have a limit and are
+            // going to clear, save the leftovers for later.
+            if (optional_argc < 2 || events.Length() < aEventLimit) {
+              events.AppendElement(record);
+            } else if (aClear) {
+              leftoverEvents.AppendElement(record);
+            }
           }
         }
 
         if (events.Length()) {
           const char* processName = GetNameForProcessID(ProcessID(iter.Key()));
           processEvents.AppendElement(mozilla::MakePair(processName, std::move(events)));
+          if (leftoverEvents.Length()) {
+            leftovers.AppendElement(mozilla::MakePair(iter.Key(),
+                                                      std::move(leftoverEvents)));
+          }
         }
       }
     };
 
     // Take a snapshot of the plain and dynamic builtin events.
     snapshotter(gEventRecords);
-
     if (aClear) {
       gEventRecords.Clear();
+      for (auto pair : leftovers) {
+        gEventRecords.Put(pair.first(),
+                          new EventRecordArray(std::move(pair.second())));
+      }
+      leftovers.Clear();
     }
+
   }
 
   // (2) Serialize the events to a JS object.
   JS::RootedObject rootObj(cx, JS_NewPlainObject(cx));
   if (!rootObj) {
     return NS_ERROR_FAILURE;
   }
 
--- a/toolkit/components/telemetry/TelemetryEvent.h
+++ b/toolkit/components/telemetry/TelemetryEvent.h
@@ -33,17 +33,18 @@ nsresult RecordEvent(const nsACString& a
                      const nsACString& aObject, JS::HandleValue aValue,
                      JS::HandleValue aExtra, JSContext* aCx,
                      uint8_t optional_argc);
 
 void SetEventRecordingEnabled(const nsACString& aCategory, bool aEnabled);
 nsresult RegisterEvents(const nsACString& aCategory, JS::Handle<JS::Value> aEventData,
                         bool aBuiltin, JSContext* cx);
 
-nsresult CreateSnapshots(uint32_t aDataset, bool aClear, JSContext* aCx,
+nsresult CreateSnapshots(uint32_t aDataset, bool aClear,
+                         uint32_t aEventLimit, JSContext* aCx,
                          uint8_t optional_argc, JS::MutableHandleValue aResult);
 
 // Record events from child processes.
 nsresult RecordChildEvents(mozilla::Telemetry::ProcessID aProcessType,
                            const nsTArray<mozilla::Telemetry::ChildEventData>& aEvents);
 
 // Only to be used for testing.
 void ClearEvents();
--- a/toolkit/components/telemetry/nsITelemetry.idl
+++ b/toolkit/components/telemetry/nsITelemetry.idl
@@ -467,20 +467,22 @@ interface nsITelemetry : nsISupports
    *   [
    *     // [timestamp, category, method, object, stringValue, extraValues]
    *     [43245, "category1", "method1", "object1", "string value", null],
    *     [43258, "category1", "method2", "object1", null, {"key1": "string value"}],
    *     ...
    *   ]
    *
    * @param aDataset DATASET_RELEASE_CHANNEL_OPTOUT or DATASET_RELEASE_CHANNEL_OPTIN.
-   * @param [aClear=false] Whether to clear out the scalars after snapshotting.
+   * @param [aClear=false] Whether to clear out the events after snapshotting.
+   * @param aEventLimit How many events per process to limit the snapshot to contain, all if unspecified.
+   *                    Even if aClear, the leftover event records are not cleared.
    */
   [implicit_jscontext, optional_argc]
-  jsval snapshotEvents(in uint32_t aDataset, [optional] in boolean aClear);
+  jsval snapshotEvents(in uint32_t aDataset, [optional] in boolean aClear, [optional] in uint32_t aEventLimit);
 
   /**
    * Register new events to record them from addons. This allows registering multiple
    * events for a category. They will be valid only for the current Firefox session.
    * Note that events shipping in Firefox should be registered in Events.yaml.
    *
    * @param aCategory The unique category the events are registered in.
    * @param aEventData An object that contains registration data for 1-N events of the form: