Bug 1577937 - Throw a StaticDataMutex around sTelemetry r=janerik,froydnj
authorChris H-C <chutten@mozilla.com>
Thu, 12 Sep 2019 19:58:01 +0000
changeset 492920 8dd82e13bdf6ec7198b4c4ea62e6c51d43e4f3ec
parent 492919 5e185d44f74f660421c75fc56a40531ddafa34f6
child 492921 8c5640edebc663298f77cab04a849690e9e042d1
push id36569
push usernerli@mozilla.com
push dateFri, 13 Sep 2019 04:42:54 +0000
treeherdermozilla-central@9b65c42a8736 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersjanerik, froydnj
bugs1577937
milestone71.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1577937 - Throw a StaticDataMutex around sTelemetry r=janerik,froydnj It could very well happen that someone calls into a static method of TelemetryImpl during Telemetry shutdown. There could be a race where sTelemetry is non-null at the top of a method like CanRecordExtended, but is null by the time we try to use it. So let's put a StaticDataMutex around it. This isn't trying to mutex the data itself. Instead I'm just trying to protect places where the pointer is being read or changed. Differential Revision: https://phabricator.services.mozilla.com/D44905
toolkit/components/telemetry/core/Telemetry.cpp
--- a/toolkit/components/telemetry/core/Telemetry.cpp
+++ b/toolkit/components/telemetry/core/Telemetry.cpp
@@ -23,16 +23,17 @@
 #include "jsfriendapi.h"
 #include "js/GCAPI.h"
 #include "mozilla/dom/ToJSValue.h"
 #include "mozilla/dom/Promise.h"
 #include "mozilla/Atomics.h"
 #include "mozilla/Attributes.h"
 #include "mozilla/BackgroundHangMonitor.h"
 #include "mozilla/Components.h"
+#include "mozilla/DataMutex.h"
 #include "mozilla/DebugOnly.h"
 #include "mozilla/FStream.h"
 #include "mozilla/IOInterposer.h"
 #include "mozilla/Likely.h"
 #include "mozilla/MathAlgorithms.h"
 #include "mozilla/MemoryReporting.h"
 #include "mozilla/MemoryTelemetry.h"
 #include "mozilla/ModuleUtils.h"
@@ -181,17 +182,17 @@ class TelemetryImpl final : public nsITe
 
   bool AddSQLInfo(JSContext* cx, JS::Handle<JSObject*> rootObj, bool mainThread,
                   bool privateSQL);
   bool GetSQLStats(JSContext* cx, JS::MutableHandle<JS::Value> ret,
                    bool includePrivateSql);
 
   void ReadLateWritesStacks(nsIFile* aProfileDir);
 
-  static TelemetryImpl* sTelemetry;
+  static StaticDataMutex<TelemetryImpl*> sTelemetry;
   AutoHashtable<SlowSQLEntryType> mPrivateSQL;
   AutoHashtable<SlowSQLEntryType> mSanitizedSQL;
   Mutex mHashMutex;
   Atomic<bool, SequentiallyConsistent, recordreplay::Behavior::DontPreserve>
       mCanRecordBase;
   Atomic<bool, SequentiallyConsistent, recordreplay::Behavior::DontPreserve>
       mCanRecordExtended;
 
@@ -206,17 +207,17 @@ class TelemetryImpl final : public nsITe
   uint32_t mLastShutdownTime;
   uint32_t mFailedLockCount;
   nsCOMArray<nsIFetchTelemetryDataCallback> mCallbacks;
   friend class nsFetchTelemetryData;
 
   WebrtcTelemetry mWebrtcTelemetry;
 };
 
-TelemetryImpl* TelemetryImpl::sTelemetry = nullptr;
+StaticDataMutex<TelemetryImpl*> TelemetryImpl::sTelemetry(nullptr, nullptr);
 
 MOZ_DEFINE_MALLOC_SIZE_OF(TelemetryMallocSizeOf)
 
 NS_IMETHODIMP
 TelemetryImpl::CollectReports(nsIHandleReportCallback* aHandleReport,
                               nsISupports* aData, bool aAnonymize) {
   mozilla::MallocSizeOf aMallocSizeOf = TelemetryMallocSizeOf;
 
@@ -338,42 +339,49 @@ nsresult GetFailedProfileLockFile(nsIFil
 
 class nsFetchTelemetryData : public Runnable {
  public:
   nsFetchTelemetryData(PathCharPtr aShutdownTimeFilename,
                        nsIFile* aFailedProfileLockFile, nsIFile* aProfileDir)
       : mozilla::Runnable("nsFetchTelemetryData"),
         mShutdownTimeFilename(aShutdownTimeFilename),
         mFailedProfileLockFile(aFailedProfileLockFile),
-        mTelemetry(TelemetryImpl::sTelemetry),
         mProfileDir(aProfileDir) {}
 
  private:
   PathCharPtr mShutdownTimeFilename;
   nsCOMPtr<nsIFile> mFailedProfileLockFile;
-  RefPtr<TelemetryImpl> mTelemetry;
   nsCOMPtr<nsIFile> mProfileDir;
 
  public:
   void MainThread() {
-    mTelemetry->mCachedTelemetryData = true;
-    for (unsigned int i = 0, n = mTelemetry->mCallbacks.Count(); i < n; ++i) {
-      mTelemetry->mCallbacks[i]->Complete();
+    auto lock = TelemetryImpl::sTelemetry.Lock();
+    auto telemetry = lock.ref();
+    telemetry->mCachedTelemetryData = true;
+    for (unsigned int i = 0, n = telemetry->mCallbacks.Count(); i < n; ++i) {
+      telemetry->mCallbacks[i]->Complete();
     }
-    mTelemetry->mCallbacks.Clear();
+    telemetry->mCallbacks.Clear();
   }
 
   NS_IMETHOD Run() override {
-    LoadFailedLockCount(mTelemetry->mFailedLockCount);
-    mTelemetry->mLastShutdownTime =
-        ReadLastShutdownDuration(mShutdownTimeFilename);
-    mTelemetry->ReadLateWritesStacks(mProfileDir);
+    uint32_t failedLockCount = 0;
+    uint32_t lastShutdownDuration = 0;
+    LoadFailedLockCount(failedLockCount);
+    lastShutdownDuration = ReadLastShutdownDuration(mShutdownTimeFilename);
+    {
+      auto lock = TelemetryImpl::sTelemetry.Lock();
+      auto telemetry = lock.ref();
+      telemetry->mFailedLockCount = failedLockCount;
+      telemetry->mLastShutdownTime = lastShutdownDuration;
+      telemetry->ReadLateWritesStacks(mProfileDir);
+    }
 
     TelemetryScalar::Set(Telemetry::ScalarID::BROWSER_TIMINGS_LAST_SHUTDOWN,
-                         mTelemetry->mLastShutdownTime);
+                         lastShutdownDuration);
 
     nsCOMPtr<nsIRunnable> e =
         NewRunnableMethod("nsFetchTelemetryData::MainThread", this,
                           &nsFetchTelemetryData::MainThread);
     NS_ENSURE_STATE(e);
     NS_DispatchToMainThread(e);
     return NS_OK;
   }
@@ -1170,19 +1178,22 @@ TelemetryImpl::GetIsOfficialTelemetry(bo
   *ret = true;
 #else
   *ret = false;
 #endif
   return NS_OK;
 }
 
 already_AddRefed<nsITelemetry> TelemetryImpl::CreateTelemetryInstance() {
-  MOZ_ASSERT(
-      sTelemetry == nullptr,
-      "CreateTelemetryInstance may only be called once, via GetService()");
+  {
+    auto lock = sTelemetry.Lock();
+    MOZ_ASSERT(
+        *lock == nullptr,
+        "CreateTelemetryInstance may only be called once, via GetService()");
+  }
 
   bool useTelemetry = false;
 #ifndef FUZZING
   if ((XRE_IsParentProcess() || XRE_IsContentProcess() || XRE_IsGPUProcess() ||
        XRE_IsSocketProcess()) &&
       // Telemetry is never accumulated when recording or replaying, both
       // because the resulting measurements might be biased and because
       // measurements might occur at non-deterministic points in execution
@@ -1197,27 +1208,31 @@ already_AddRefed<nsITelemetry> Telemetry
   TelemetryScalar::InitializeGlobalState(useTelemetry, useTelemetry);
 
   // Only record events from the parent process.
   TelemetryEvent::InitializeGlobalState(XRE_IsParentProcess(),
                                         XRE_IsParentProcess());
   TelemetryOrigin::InitializeGlobalState();
 
   // Now, create and initialize the Telemetry global state.
-  sTelemetry = new TelemetryImpl();
+  TelemetryImpl* telemetry = new TelemetryImpl();
+  {
+    auto lock = sTelemetry.Lock();
+    *lock = telemetry;
+  }
 
   // AddRef for the local reference
-  NS_ADDREF(sTelemetry);
+  NS_ADDREF(telemetry);
   // AddRef for the caller
-  nsCOMPtr<nsITelemetry> ret = sTelemetry;
+  nsCOMPtr<nsITelemetry> ret = telemetry;
 
-  sTelemetry->mCanRecordBase = useTelemetry;
-  sTelemetry->mCanRecordExtended = useTelemetry;
+  telemetry->mCanRecordBase = useTelemetry;
+  telemetry->mCanRecordExtended = useTelemetry;
 
-  sTelemetry->InitMemoryReporter();
+  telemetry->InitMemoryReporter();
   InitHistogramRecordingEnabled();  // requires sTelemetry to exist
 
 #if defined(MOZ_TELEMETRY_GECKOVIEW)
   // We only want to add persistence for GeckoView, but both
   // GV and Fennec are on Android. So just init persistence if this
   // is Android but not Fennec.
   if (GetCurrentProduct() == SupportedProduct::Geckoview) {
     TelemetryGeckoViewPersistence::InitPersistence();
@@ -1225,17 +1240,20 @@ already_AddRefed<nsITelemetry> Telemetry
 #endif
 
   return ret.forget();
 }
 
 void TelemetryImpl::ShutdownTelemetry() {
   // No point in collecting IO beyond this point
   ClearIOReporting();
-  NS_IF_RELEASE(sTelemetry);
+  {
+    auto lock = sTelemetry.Lock();
+    NS_IF_RELEASE(lock.ref());
+  }
 
   // Lastly, de-initialise the TelemetryHistogram and TelemetryScalar global
   // states, so as to release any heap storage that would otherwise be kept
   // alive by it.
   TelemetryHistogram::DeInitializeGlobalState();
   TelemetryScalar::DeInitializeGlobalState();
   TelemetryEvent::DeInitializeGlobalState();
   TelemetryOrigin::DeInitializeGlobalState();
@@ -1245,23 +1263,25 @@ void TelemetryImpl::ShutdownTelemetry() 
   if (GetCurrentProduct() == SupportedProduct::Geckoview) {
     TelemetryGeckoViewPersistence::DeInitPersistence();
   }
 #endif
 }
 
 void TelemetryImpl::StoreSlowSQL(const nsACString& sql, uint32_t delay,
                                  SanitizedState state) {
+  auto lock = sTelemetry.Lock();
+  auto telemetry = lock.ref();
   AutoHashtable<SlowSQLEntryType>* slowSQLMap = nullptr;
   if (state == Sanitized)
-    slowSQLMap = &(sTelemetry->mSanitizedSQL);
+    slowSQLMap = &(telemetry->mSanitizedSQL);
   else
-    slowSQLMap = &(sTelemetry->mPrivateSQL);
+    slowSQLMap = &(telemetry->mPrivateSQL);
 
-  MutexAutoLock hashMutex(sTelemetry->mHashMutex);
+  MutexAutoLock hashMutex(telemetry->mHashMutex);
 
   SlowSQLEntryType* entry = slowSQLMap->GetEntry(sql);
   if (!entry) {
     entry = slowSQLMap->PutEntry(sql);
     if (MOZ_UNLIKELY(!entry)) return;
     entry->GetModifiableData()->mainThread.hitCount = 0;
     entry->GetModifiableData()->mainThread.totalTime = 0;
     entry->GetModifiableData()->otherThreads.hitCount = 0;
@@ -1450,17 +1470,22 @@ static const TrackedDBEntry kTrackedDBPr
 const uint32_t kMaxSlowStatementLength = 1000;
 
 void TelemetryImpl::RecordSlowStatement(const nsACString& sql,
                                         const nsACString& dbName,
                                         uint32_t delay) {
   MOZ_ASSERT(!sql.IsEmpty());
   MOZ_ASSERT(!dbName.IsEmpty());
 
-  if (!sTelemetry || !TelemetryHistogram::CanRecordExtended()) return;
+  {
+    auto lock = sTelemetry.Lock();
+    if (!lock.ref() || !TelemetryHistogram::CanRecordExtended()) {
+      return;
+    }
+  }
 
   bool recordStatement = false;
 
   for (const TrackedDBEntry& nameEntry : kTrackedDBs) {
     MOZ_ASSERT(nameEntry.mNameLength);
     const nsDependentCString name(nameEntry.mName, nameEntry.mNameLength);
     if (dbName == name) {
       recordStatement = true;
@@ -1499,53 +1524,61 @@ void TelemetryImpl::RecordSlowStatement(
   nsAutoCString fullSQL;
   fullSQL.AppendPrintf("%s /* %s */", nsPromiseFlatCString(sql).get(),
                        nsPromiseFlatCString(dbName).get());
   StoreSlowSQL(fullSQL, delay, Unsanitized);
 }
 
 void TelemetryImpl::RecordIceCandidates(const uint32_t iceCandidateBitmask,
                                         const bool success) {
-  if (!sTelemetry || !TelemetryHistogram::CanRecordExtended()) return;
+  auto lock = sTelemetry.Lock();
+  auto telemetry = lock.ref();
+  if (!telemetry || !TelemetryHistogram::CanRecordExtended()) return;
 
-  sTelemetry->mWebrtcTelemetry.RecordIceCandidateMask(iceCandidateBitmask,
-                                                      success);
+  telemetry->mWebrtcTelemetry.RecordIceCandidateMask(iceCandidateBitmask,
+                                                     success);
 }
 
 #if defined(MOZ_GECKO_PROFILER)
 
 void TelemetryImpl::DoStackCapture(const nsACString& aKey) {
   if (Telemetry::CanRecordExtended() && XRE_IsParentProcess()) {
-    sTelemetry->mStackCapturer.Capture(aKey);
+    auto lock = sTelemetry.Lock();
+    auto telemetry = lock.ref();
+    telemetry->mStackCapturer.Capture(aKey);
   }
 }
 #endif
 
 nsresult TelemetryImpl::CaptureStack(const nsACString& aKey) {
 #ifdef MOZ_GECKO_PROFILER
   TelemetryImpl::DoStackCapture(aKey);
 #endif
   return NS_OK;
 }
 
 bool TelemetryImpl::CanRecordBase() {
-  if (!sTelemetry) {
+  auto lock = sTelemetry.Lock();
+  auto telemetry = lock.ref();
+  if (!telemetry) {
     return false;
   }
   bool canRecordBase;
-  nsresult rv = sTelemetry->GetCanRecordBase(&canRecordBase);
+  nsresult rv = telemetry->GetCanRecordBase(&canRecordBase);
   return NS_SUCCEEDED(rv) && canRecordBase;
 }
 
 bool TelemetryImpl::CanRecordExtended() {
-  if (!sTelemetry) {
+  auto lock = sTelemetry.Lock();
+  auto telemetry = lock.ref();
+  if (!telemetry) {
     return false;
   }
   bool canRecordExtended;
-  nsresult rv = sTelemetry->GetCanRecordExtended(&canRecordExtended);
+  nsresult rv = telemetry->GetCanRecordExtended(&canRecordExtended);
   return NS_SUCCEEDED(rv) && canRecordExtended;
 }
 
 bool TelemetryImpl::CanRecordReleaseData() { return CanRecordBase(); }
 
 bool TelemetryImpl::CanRecordPrereleaseData() { return CanRecordExtended(); }
 
 NS_IMPL_ISUPPORTS(TelemetryImpl, nsITelemetry, nsIMemoryReporter)