bug 1460595 - Add a second storage for Telemetry Events r?Dexter draft
authorChris H-C <chutten@mozilla.com>
Fri, 11 May 2018 15:45:39 -0400
changeset 798706 e4f526b8a7556b6d3af3deedc44d844e750e991d
parent 798705 90241de0f066c4a4def83cc947d46d96bfd9e047
child 798707 f499c1f9dc02b871e63e239191b25e85c22a34e8
push id110834
push userbmo:chutten@mozilla.com
push dateWed, 23 May 2018 12:25:06 +0000
reviewersDexter
bugs1460595
milestone62.0a1
bug 1460595 - Add a second storage for Telemetry Events r?Dexter Both the "event" and "main" ping need events, and both need them on separate schedules and with different limits. The "main" ping never wants to know about more than 1000 events, and will ask for them once a subsession. The "event" ping wants to know about all of the events, and will ask for them at least once an hour, up to as often as once every ten minutes. It may wish to leave some of them behind for the next interval. To handle these two independent requirements we're duplicating event records. We can remove one of the two later when we remove event reporting from the "main" ping. MozReview-Commit-ID: EY40tqKxxeW
toolkit/components/telemetry/Telemetry.cpp
toolkit/components/telemetry/TelemetryEvent.cpp
toolkit/components/telemetry/TelemetryEvent.h
toolkit/components/telemetry/nsITelemetry.idl
--- a/toolkit/components/telemetry/Telemetry.cpp
+++ b/toolkit/components/telemetry/Telemetry.cpp
@@ -1791,20 +1791,22 @@ NS_IMETHODIMP
 TelemetryImpl::RecordEvent(const nsACString & aCategory, const nsACString & aMethod,
                            const nsACString & aObject, JS::HandleValue aValue,
                            JS::HandleValue aExtra, JSContext* aCx, uint8_t optional_argc)
 {
   return TelemetryEvent::RecordEvent(aCategory, aMethod, aObject, aValue, aExtra, aCx, optional_argc);
 }
 
 NS_IMETHODIMP
-TelemetryImpl::SnapshotEvents(uint32_t aDataset, bool aClear, JSContext* aCx,
-                                     uint8_t optional_argc, JS::MutableHandleValue aResult)
+TelemetryImpl::SnapshotEvents(uint32_t aDataset, bool aClear,
+                              bool aEventPingStorage, uint32_t aEventLimit, JSContext* aCx,
+                              uint8_t optional_argc, JS::MutableHandleValue aResult)
 {
-  return TelemetryEvent::CreateSnapshots(aDataset, aClear, aCx, optional_argc, aResult);
+  return TelemetryEvent::CreateSnapshots(aDataset, aClear, aEventPingStorage,
+                                         aEventLimit, aCx, optional_argc, aResult);
 }
 
 NS_IMETHODIMP
 TelemetryImpl::RegisterEvents(const nsACString& aCategory,
                               JS::Handle<JS::Value> aEventData,
                               JSContext* cx)
 {
   return TelemetryEvent::RegisterEvents(aCategory, aEventData, false, cx);
--- a/toolkit/components/telemetry/TelemetryEvent.cpp
+++ b/toolkit/components/telemetry/TelemetryEvent.cpp
@@ -1,24 +1,27 @@
 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #include <prtime.h>
 #include <limits>
+#include "nsIObserverService.h"
 #include "nsITelemetry.h"
 #include "nsHashKeys.h"
 #include "nsDataHashtable.h"
 #include "nsClassHashtable.h"
 #include "nsTArray.h"
+#include "mozilla/Preferences.h"
 #include "mozilla/StaticMutex.h"
 #include "mozilla/Unused.h"
 #include "mozilla/Maybe.h"
+#include "mozilla/Services.h"
 #include "mozilla/StaticPtr.h"
 #include "mozilla/Pair.h"
 #include "jsapi.h"
 #include "nsJSUtils.h"
 #include "nsXULAppAPI.h"
 #include "nsUTF8Utils.h"
 #include "nsPrintfCString.h"
 
@@ -324,16 +327,20 @@ typedef nsUint32HashKey ProcessIDHashKey
 typedef nsTArray<EventRecord> EventRecordArray;
 typedef nsClassHashtable<ProcessIDHashKey, EventRecordArray> EventRecordsMapType;
 
 EventRecordsMapType gEventRecords;
 // Provide separate storage for "dynamic builtin" events needed to
 // support "build faster" in local developer builds.
 EventRecordsMapType gBuiltinEventRecords;
 
+// Duplicate the event storage to support the "event" ping.
+EventRecordsMapType gEventPingEventRecords;
+EventRecordsMapType gEventPingBuiltinEventRecords;
+
 // The details on dynamic events that are recorded from addons are registered here.
 StaticAutoPtr<nsTArray<DynamicEventInfo>> gDynamicEventInfo;
 
 } // namespace
 
 ////////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////////
 //
@@ -402,27 +409,32 @@ CanRecordEvent(const StaticMutexAutoLock
 bool
 IsExpired(const EventKey& key)
 {
   return key.id == kExpiredEventId;
 }
 
 EventRecordArray*
 GetEventRecordsForProcess(const StaticMutexAutoLock& lock, ProcessID processType,
-                          const EventKey& eventKey, bool isDynamicBuiltin)
+                          const EventKey& eventKey, bool isDynamicBuiltin,
+                          bool aEventPingStorage)
 {
-  // Put dynamic-builtin events (used to support "build faster") in a
-  // separate storage.
-  EventRecordsMapType& processStorage =
-    isDynamicBuiltin ? gBuiltinEventRecords : gEventRecords;
+  EventRecordsMapType* processStorage;
+  if (aEventPingStorage) {
+    processStorage = isDynamicBuiltin ? &gEventPingBuiltinEventRecords
+                                      : &gEventPingEventRecords;
+  } else {
+    processStorage = isDynamicBuiltin ? &gBuiltinEventRecords
+                                      : &gEventRecords;
+  }
 
   EventRecordArray* eventRecords = nullptr;
-  if (!processStorage.Get(uint32_t(processType), &eventRecords)) {
+  if (!processStorage->Get(uint32_t(processType), &eventRecords)) {
     eventRecords = new EventRecordArray();
-    processStorage.Put(uint32_t(processType), eventRecords);
+    processStorage->Put(uint32_t(processType), eventRecords);
   }
   return eventRecords;
 }
 
 EventKey*
 GetEventKey(const StaticMutexAutoLock& lock, const nsACString& category,
            const nsACString& method, const nsACString& object)
 {
@@ -481,48 +493,62 @@ RecordEvent(const StaticMutexAutoLock& l
 
   // Fixup the process id only for non-builtin (e.g. supporting build faster)
   // dynamic events.
   if (eventKey->dynamic &&
       !(*gDynamicEventInfo)[eventKey->id].builtin) {
     processType = ProcessID::Dynamic;
   }
 
-  bool isDynamicBuiltin = eventKey->dynamic && (*gDynamicEventInfo)[eventKey->id].builtin;
-  EventRecordArray* eventRecords =
-    GetEventRecordsForProcess(lock, processType, *eventKey, isDynamicBuiltin);
-
-  // Apply hard limit on event count in storage.
-  if (eventRecords->Length() >= kMaxEventRecords) {
-    return RecordEventResult::StorageLimitReached;
-  }
-
   // Check whether the extra keys passed are valid.
   if (!CheckExtraKeysValid(*eventKey, extra)) {
     return RecordEventResult::InvalidExtraKey;
   }
 
   // Check whether we can record this event.
   if (!CanRecordEvent(lock, *eventKey, processType)) {
     return RecordEventResult::Ok;
   }
 
   // Count the number of times this event has been recorded, even if its
-  // category does not have recording enabled.
+  // category does not have recording enabled or we've reached storage one's
+  // limit.
   TelemetryScalar::SummarizeEvent(UniqueEventName(category, method, object),
                                   processType, eventKey->dynamic);
 
   // Check whether this event's category has recording enabled
   if (!gEnabledCategories.GetEntry(GetCategory(lock, *eventKey))) {
     return RecordEventResult::Ok;
   }
 
-  // Add event record.
-  eventRecords->AppendElement(EventRecord(timestamp, *eventKey, value, extra));
-  return RecordEventResult::Ok;
+  bool isDynamicBuiltin = eventKey->dynamic && (*gDynamicEventInfo)[eventKey->id].builtin;
+
+  EventRecordArray* eventPingRecords =
+    GetEventRecordsForProcess(lock, processType, *eventKey, isDynamicBuiltin,
+                              true);
+  eventPingRecords->AppendElement(EventRecord(timestamp, *eventKey, value, extra));
+
+  // Notify observers when we hit the "event" ping limit in storage two.
+  static uint32_t sEventPingLimit =
+    mozilla::Preferences::GetUint("toolkit.telemetry.eventping.eventLimit", 1000);
+  RecordEventResult res = RecordEventResult::Ok;
+  if (eventPingRecords->Length() == sEventPingLimit) {
+    res = RecordEventResult::StorageLimitReached;
+  }
+
+  EventRecordArray* eventRecords =
+    GetEventRecordsForProcess(lock, processType, *eventKey, isDynamicBuiltin,
+                              false);
+
+  // Apply hard limit on event count in storage one.
+  if (eventRecords->Length() < kMaxEventRecords) {
+    eventRecords->AppendElement(EventRecord(timestamp, *eventKey, value, extra));
+  }
+
+  return res;
 }
 
 RecordEventResult
 ShouldRecordChildEvent(const StaticMutexAutoLock& lock, const nsACString& category,
                        const nsACString& method, const nsACString& object)
 {
   EventKey* eventKey = GetEventKey(lock, category, method, object);
   if (!eventKey) {
@@ -753,16 +779,18 @@ TelemetryEvent::DeInitializeGlobalState(
   gCanRecordBase = false;
   gCanRecordExtended = false;
 
   gEventNameIDMap.Clear();
   gCategoryNameIDMap.Clear();
   gEnabledCategories.Clear();
   gEventRecords.Clear();
   gBuiltinEventRecords.Clear();
+  gEventPingEventRecords.Clear();
+  gEventPingBuiltinEventRecords.Clear();
 
   gDynamicEventInfo = nullptr;
 
   gInitDone = false;
 }
 
 void
 TelemetryEvent::SetCanRecordBase(bool b)
@@ -921,20 +949,25 @@ TelemetryEvent::RecordEvent(const nsACSt
     case RecordEventResult::InvalidExtraKey: {
       nsPrintfCString msg(R"(Invalid extra key for event ["%s", "%s", "%s"].)",
                           PromiseFlatCString(aCategory).get(),
                           PromiseFlatCString(aMethod).get(),
                           PromiseFlatCString(aObject).get());
       LogToBrowserConsole(nsIScriptError::warningFlag, NS_ConvertUTF8toUTF16(msg));
       return NS_OK;
     }
-    case RecordEventResult::StorageLimitReached:
+    case RecordEventResult::StorageLimitReached: {
       LogToBrowserConsole(nsIScriptError::warningFlag,
                           NS_LITERAL_STRING("Event storage limit reached."));
+      nsCOMPtr<nsIObserverService> serv = mozilla::services::GetObserverService();
+      if (serv) {
+        serv->NotifyObservers(nullptr, "event-telemetry-storage-limit-reached", nullptr);
+      }
       return NS_OK;
+    }
     default:
       return NS_OK;
   }
 }
 
 static bool
 GetArrayPropertyValues(JSContext* cx, JS::HandleObject obj, const char* property,
                        nsTArray<nsCString>* results)
@@ -1115,71 +1148,105 @@ TelemetryEvent::RegisterEvents(const nsA
     StaticMutexAutoLock locker(gTelemetryEventsMutex);
     RegisterEvents(locker, aCategory, newEventInfos, newEventExpired);
   }
 
   return NS_OK;
 }
 
 nsresult
-TelemetryEvent::CreateSnapshots(uint32_t aDataset, bool aClear, JSContext* cx,
+TelemetryEvent::CreateSnapshots(uint32_t aDataset, bool aClear,
+                                bool aEventPingStorage, uint32_t aEventLimit, JSContext* cx,
                                 uint8_t optional_argc, JS::MutableHandleValue aResult)
 {
   if (!XRE_IsParentProcess()) {
     return NS_ERROR_FAILURE;
   }
 
   // Creating a JS snapshot of the events is a two-step process:
   // (1) Lock the storage and copy the events into function-local storage.
   // (2) Serialize the events into JS.
   // We can't hold a lock for (2) because we will run into deadlocks otherwise
   // from JS recording Telemetry.
 
   // (1) Extract the events from storage with a lock held.
   nsTArray<mozilla::Pair<const char*, EventRecordArray>> processEvents;
+  nsTArray<mozilla::Pair<uint32_t, EventRecordArray>> leftovers;
   {
     StaticMutexAutoLock locker(gTelemetryEventsMutex);
 
     if (!gInitDone) {
       return NS_ERROR_FAILURE;
     }
 
     // The snapshotting function is the same for both static and dynamic builtin events.
     // We can use the same function and store the events in the same output storage.
-    auto snapshotter = [aDataset, &locker, &processEvents]
+    auto snapshotter = [aDataset, &locker, &processEvents, &leftovers, aClear, optional_argc, aEventLimit]
                        (EventRecordsMapType& aProcessStorage)
     {
 
       for (auto iter = aProcessStorage.Iter(); !iter.Done(); iter.Next()) {
         const EventRecordArray* eventStorage = static_cast<EventRecordArray*>(iter.Data());
         EventRecordArray events;
+        EventRecordArray leftoverEvents;
 
         const uint32_t len = eventStorage->Length();
         for (uint32_t i = 0; i < len; ++i) {
           const EventRecord& record = (*eventStorage)[i];
           if (IsInDataset(GetDataset(locker, record.GetEventKey()), aDataset)) {
-            events.AppendElement(record);
+            // If we have a limit, adhere to it. If we have a limit and are
+            // going to clear, save the leftovers for later.
+            if (optional_argc < 3 || events.Length() < aEventLimit) {
+              events.AppendElement(record);
+            } else if (aClear) {
+              leftoverEvents.AppendElement(record);
+            }
           }
         }
 
         if (events.Length()) {
           const char* processName = GetNameForProcessID(ProcessID(iter.Key()));
           processEvents.AppendElement(mozilla::MakePair(processName, Move(events)));
+          if (leftoverEvents.Length()) {
+            leftovers.AppendElement(mozilla::MakePair(iter.Key(),
+                                                      Move(leftoverEvents)));
+          }
         }
       }
     };
 
     // Take a snapshot of the plain and dynamic builtin events.
-    snapshotter(gEventRecords);
-    snapshotter(gBuiltinEventRecords);
+    if (aEventPingStorage) {
+      snapshotter(gEventPingEventRecords);
+      snapshotter(gEventPingBuiltinEventRecords);
+      if (aClear) {
+        gEventPingEventRecords.Clear();
+        gEventPingBuiltinEventRecords.Clear();
+        for (auto pair : leftovers) {
+          gEventPingEventRecords.Put(pair.first(),
+                                     new EventRecordArray(pair.second()));
+          gEventPingBuiltinEventRecords.Put(pair.first(),
+                                            new EventRecordArray(Move(pair.second())));
+        }
+      }
+    } else {
+      snapshotter(gEventRecords);
+      snapshotter(gBuiltinEventRecords);
+      if (aClear) {
+        gEventRecords.Clear();
+        gBuiltinEventRecords.Clear();
+        for (auto pair : leftovers) {
+          gEventRecords.Put(pair.first(),
+                            new EventRecordArray(pair.second()));
+          gBuiltinEventRecords.Put(pair.first(),
+                                   new EventRecordArray(Move(pair.second())));
+        }
+      }
+    }
 
-    if (aClear) {
-      gEventRecords.Clear();
-      gBuiltinEventRecords.Clear();
-    }
   }
 
   // (2) Serialize the events to a JS object.
   JS::RootedObject rootObj(cx, JS_NewPlainObject(cx));
   if (!rootObj) {
     return NS_ERROR_FAILURE;
   }
 
@@ -1209,16 +1276,18 @@ TelemetryEvent::ClearEvents()
   StaticMutexAutoLock lock(gTelemetryEventsMutex);
 
   if (!gInitDone) {
     return;
   }
 
   gEventRecords.Clear();
   gBuiltinEventRecords.Clear();
+  gEventPingEventRecords.Clear();
+  gEventPingBuiltinEventRecords.Clear();
 }
 
 void
 TelemetryEvent::SetEventRecordingEnabled(const nsACString& category, bool enabled)
 {
   StaticMutexAutoLock locker(gTelemetryEventsMutex);
 
   uint32_t categoryId;
@@ -1253,16 +1322,18 @@ TelemetryEvent::SizeOfIncludingThis(mozi
         partial += (*eventRecords)[i].SizeOfExcludingThis(aMallocSizeOf);
       }
     }
     return partial;
   };
 
   n += getSizeOfRecords(gEventRecords);
   n += getSizeOfRecords(gBuiltinEventRecords);
+  n += getSizeOfRecords(gEventPingEventRecords);
+  n += getSizeOfRecords(gEventPingBuiltinEventRecords);
 
   n += gEventNameIDMap.ShallowSizeOfExcludingThis(aMallocSizeOf);
   for (auto iter = gEventNameIDMap.ConstIter(); !iter.Done(); iter.Next()) {
     n += iter.Key().SizeOfExcludingThisIfUnshared(aMallocSizeOf);
   }
 
   n += gCategoryNameIDMap.ShallowSizeOfExcludingThis(aMallocSizeOf);
   for (auto iter = gCategoryNameIDMap.ConstIter(); !iter.Done(); iter.Next()) {
--- a/toolkit/components/telemetry/TelemetryEvent.h
+++ b/toolkit/components/telemetry/TelemetryEvent.h
@@ -33,17 +33,18 @@ nsresult RecordEvent(const nsACString& a
                      const nsACString& aObject, JS::HandleValue aValue,
                      JS::HandleValue aExtra, JSContext* aCx,
                      uint8_t optional_argc);
 
 void SetEventRecordingEnabled(const nsACString& aCategory, bool aEnabled);
 nsresult RegisterEvents(const nsACString& aCategory, JS::Handle<JS::Value> aEventData,
                         bool aBuiltin, JSContext* cx);
 
-nsresult CreateSnapshots(uint32_t aDataset, bool aClear, JSContext* aCx,
+nsresult CreateSnapshots(uint32_t aDataset, bool aClear,
+                         bool aEventPingStorage, uint32_t aEventLimit, JSContext* aCx,
                          uint8_t optional_argc, JS::MutableHandleValue aResult);
 
 // Record events from child processes.
 nsresult RecordChildEvents(mozilla::Telemetry::ProcessID aProcessType,
                            const nsTArray<mozilla::Telemetry::ChildEventData>& aEvents);
 
 // Only to be used for testing.
 void ClearEvents();
--- a/toolkit/components/telemetry/nsITelemetry.idl
+++ b/toolkit/components/telemetry/nsITelemetry.idl
@@ -475,20 +475,25 @@ interface nsITelemetry : nsISupports
    *   [
    *     // [timestamp, category, method, object, stringValue, extraValues]
    *     [43245, "category1", "method1", "object1", "string value", null],
    *     [43258, "category1", "method2", "object1", null, {"key1": "string value"}],
    *     ...
    *   ]
    *
    * @param aDataset DATASET_RELEASE_CHANNEL_OPTOUT or DATASET_RELEASE_CHANNEL_OPTIN.
-   * @param [aClear=false] Whether to clear out the scalars after snapshotting.
+   * @param [aClear=false] Whether to clear out the events after snapshotting.
+   * @param [aEventPingStorage=false] Whether to snapshot the events duplicated to the "event"-ping-specific storage.
+   *                                  When true, aClear controls whether we clear the "event" ping storage.
+   * @param aEventLimit How many events per process to limit the snapshot to contain, all if unspecified.
+   *                    Even if aClear, the leftover event records are not cleared.
    */
   [implicit_jscontext, optional_argc]
-  jsval snapshotEvents(in uint32_t aDataset, [optional] in boolean aClear);
+  jsval snapshotEvents(in uint32_t aDataset, [optional] in boolean aClear,
+                       [optional] in boolean aEventPingStorage, [optional] in uint32_t aEventLimit);
 
   /**
    * Register new events to record them from addons. This allows registering multiple
    * events for a category. They will be valid only for the current Firefox session.
    * Note that events shipping in Firefox should be registered in Events.yaml.
    *
    * @param aCategory The unique category the events are registered in.
    * @param aEventData An object that contains registration data for 1-N events of the form: