Bug 1364235 - Collect telemetry stats on startup cache hits and misses r=kmag
authorDoug Thayer <dothayer@mozilla.com>
Tue, 05 Mar 2019 16:52:57 +0000
changeset 462409 1a681dc60b60a4040959de4b44fa8d6a4f8f08e1
parent 462408 bd68f490ca078d2b7a5a10f47b2f9d5f1abbdbc4
child 462410 4e5d97c935151d28d1e06943133951918ac28568
push id79632
push userdothayer@mozilla.com
push dateTue, 05 Mar 2019 18:39:45 +0000
treeherderautoland@1a681dc60b60 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerskmag
bugs1364235, 1264235
milestone67.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1364235 - Collect telemetry stats on startup cache hits and misses r=kmag In bug 1264235 we have some indication that observed bugs with the startup cache might have been resolved, but we don't really know until we collect data. Collecting these stats will give us the ability to have more certainty that the startup cache is functioning correctly in the wild. Differential Revision: https://phabricator.services.mozilla.com/D19573
js/xpconnect/loader/ScriptPreloader.cpp
js/xpconnect/loader/ScriptPreloader.h
startupcache/StartupCache.cpp
startupcache/StartupCache.h
startupcache/moz.build
startupcache/test/browser.ini
startupcache/test/browser_startupcache_telemetry.js
testing/marionette/harness/marionette_harness/tests/unit/test_startup_caches.py
testing/marionette/harness/marionette_harness/tests/unit/unit-tests.ini
toolkit/components/telemetry/Histograms.json
toolkit/components/telemetry/Scalars.yaml
--- a/js/xpconnect/loader/ScriptPreloader.cpp
+++ b/js/xpconnect/loader/ScriptPreloader.cpp
@@ -894,22 +894,33 @@ void ScriptPreloader::NoteScript(const n
   script->mProcessTypes += processType;
 }
 
 JSScript* ScriptPreloader::GetCachedScript(JSContext* cx,
                                            const nsCString& path) {
   // If a script is used by both the parent and the child, it's stored only
   // in the child cache.
   if (mChildCache) {
-    auto script = mChildCache->GetCachedScript(cx, path);
+    RootedScript script(cx, mChildCache->GetCachedScriptInternal(cx, path));
     if (script) {
+      Telemetry::AccumulateCategorical(
+          Telemetry::LABELS_SCRIPT_PRELOADER_REQUESTS::HitChild);
       return script;
     }
   }
 
+  RootedScript script(cx, GetCachedScriptInternal(cx, path));
+  Telemetry::AccumulateCategorical(
+      script ? Telemetry::LABELS_SCRIPT_PRELOADER_REQUESTS::Hit
+             : Telemetry::LABELS_SCRIPT_PRELOADER_REQUESTS::Miss);
+  return script;
+}
+
+JSScript* ScriptPreloader::GetCachedScriptInternal(JSContext* cx,
+                                                   const nsCString& path) {
   auto script = mScripts.Get(path);
   if (script) {
     return WaitForCachedScript(cx, script);
   }
 
   return nullptr;
 }
 
@@ -934,26 +945,30 @@ JSScript* ScriptPreloader::WaitForCached
     // entered the mutex.
     MaybeFinishOffThreadDecode();
 
     if (!script->mReadyToExecute &&
         script->mSize < MAX_MAINTHREAD_DECODE_SIZE) {
       LOG(Info, "Script is small enough to recompile on main thread\n");
 
       script->mReadyToExecute = true;
+      Telemetry::ScalarAdd(
+          Telemetry::ScalarID::SCRIPT_PRELOADER_MAINTHREAD_RECOMPILE, 1);
     } else {
       while (!script->mReadyToExecute) {
         mal.Wait();
 
         MonitorAutoUnlock mau(mMonitor);
         MaybeFinishOffThreadDecode();
       }
     }
 
-    LOG(Debug, "Waited %fms\n", (TimeStamp::Now() - start).ToMilliseconds());
+    double waitedMS = (TimeStamp::Now() - start).ToMilliseconds();
+    Telemetry::Accumulate(Telemetry::SCRIPT_PRELOADER_WAIT_TIME, int(waitedMS));
+    LOG(Debug, "Waited %fms\n", waitedMS);
   }
 
   return script->GetJSScript(cx);
 }
 
 /* static */
 void ScriptPreloader::OffThreadDecodeCallback(JS::OffThreadToken* token,
                                               void* context) {
--- a/js/xpconnect/loader/ScriptPreloader.h
+++ b/js/xpconnect/loader/ScriptPreloader.h
@@ -98,16 +98,17 @@ class ScriptPreloader : public nsIObserv
 
   Result<Ok, nsresult> InitCache(const Maybe<ipc::FileDescriptor>& cacheFile,
                                  ScriptCacheChild* cacheChild);
 
   bool Active() { return mCacheInitialized && !mStartupFinished; }
 
  private:
   Result<Ok, nsresult> InitCacheInternal(JS::HandleObject scope = nullptr);
+  JSScript* GetCachedScriptInternal(JSContext* cx, const nsCString& name);
 
  public:
   void Trace(JSTracer* trc);
 
   static ProcessType CurrentProcessType() {
     MOZ_ASSERT(sProcessType != ProcessType::Uninitialized);
     return sProcessType;
   }
--- a/startupcache/StartupCache.cpp
+++ b/startupcache/StartupCache.cpp
@@ -8,16 +8,17 @@
 #include "PLDHashTable.h"
 #include "mozilla/IOInterposer.h"
 #include "mozilla/MemoryReporting.h"
 #include "mozilla/scache/StartupCache.h"
 
 #include "nsAutoPtr.h"
 #include "nsClassHashtable.h"
 #include "nsComponentManagerUtils.h"
+#include "nsCRT.h"
 #include "nsDirectoryServiceUtils.h"
 #include "nsIClassInfo.h"
 #include "nsIFile.h"
 #include "nsIObserver.h"
 #include "nsIObserverService.h"
 #include "nsIOutputStream.h"
 #include "nsIStorageStream.h"
 #include "nsIStreamBufferAccess.h"
@@ -256,22 +257,31 @@ nsresult StartupCache::GetBuffer(const c
   if (!mStartupWriteInitiated) {
     CacheEntry* entry;
     nsDependentCString idStr(id);
     mTable.Get(idStr, &entry);
     if (entry) {
       *outbuf = MakeUnique<char[]>(entry->size);
       memcpy(outbuf->get(), entry->data.get(), entry->size);
       *length = entry->size;
+      Telemetry::AccumulateCategorical(
+          Telemetry::LABELS_STARTUP_CACHE_REQUESTS::HitMemory);
       return NS_OK;
     }
   }
 
   nsresult rv = GetBufferFromZipArchive(mArchive, true, id, outbuf, length);
-  if (NS_SUCCEEDED(rv)) return rv;
+  if (NS_SUCCEEDED(rv)) {
+    Telemetry::AccumulateCategorical(
+        Telemetry::LABELS_STARTUP_CACHE_REQUESTS::HitDisk);
+    return rv;
+  }
+
+  Telemetry::AccumulateCategorical(
+      Telemetry::LABELS_STARTUP_CACHE_REQUESTS::Miss);
 
   RefPtr<nsZipArchive> omnijar =
       mozilla::Omnijar::GetReader(mozilla::Omnijar::APP);
   // no need to checksum omnijarred entries
   rv = GetBufferFromZipArchive(omnijar, false, id, outbuf, length);
   if (NS_SUCCEEDED(rv)) return rv;
 
   omnijar = mozilla::Omnijar::GetReader(mozilla::Omnijar::GRE);
@@ -417,17 +427,24 @@ void StartupCache::WriteToDisk() {
 
   // We succesfully wrote the archive to disk; mark the disk file as trusted
   gIgnoreDiskCache = false;
 
   // Our reader's view of the archive is outdated now, reload it.
   LoadArchive();
 }
 
-void StartupCache::InvalidateCache() {
+void StartupCache::InvalidateCache(bool memoryOnly) {
+  if (memoryOnly) {
+    // The memoryOnly option is just for testing purposes. We want to ensure
+    // that we're nuking the in-memory form but that we preserve everything
+    // on disk.
+    WriteToDisk();
+    return;
+  }
   WaitOnWriteThread();
   mPendingWrites.Clear();
   mTable.Clear();
   mArchive = nullptr;
   nsresult rv = mFile->Remove(false);
   if (NS_FAILED(rv) && rv != NS_ERROR_FILE_TARGET_DOES_NOT_EXIST &&
       rv != NS_ERROR_FILE_NOT_FOUND) {
     gIgnoreDiskCache = true;
@@ -501,17 +518,17 @@ nsresult StartupCacheListener::Observe(n
   StartupCache* sc = StartupCache::GetSingleton();
   if (!sc) return NS_OK;
 
   if (strcmp(topic, NS_XPCOM_SHUTDOWN_OBSERVER_ID) == 0) {
     // Do not leave the thread running past xpcom shutdown
     sc->WaitOnWriteThread();
     StartupCache::gShutdownInitiated = true;
   } else if (strcmp(topic, "startupcache-invalidate") == 0) {
-    sc->InvalidateCache();
+    sc->InvalidateCache(data && nsCRT::strcmp(data, u"memoryOnly") == 0);
   }
   return NS_OK;
 }
 
 nsresult StartupCache::GetDebugObjectOutputStream(
     nsIObjectOutputStream* aStream, nsIObjectOutputStream** aOutStream) {
   NS_ENSURE_ARG_POINTER(aStream);
 #ifdef DEBUG
--- a/startupcache/StartupCache.h
+++ b/startupcache/StartupCache.h
@@ -113,17 +113,17 @@ class StartupCache : public nsIMemoryRep
   nsresult GetBuffer(const char* id, UniquePtr<char[]>* outbuf,
                      uint32_t* length);
 
   // Stores a buffer. Caller yields ownership.
   nsresult PutBuffer(const char* id, UniquePtr<char[]>&& inbuf,
                      uint32_t length);
 
   // Removes the cache file.
-  void InvalidateCache();
+  void InvalidateCache(bool memoryOnly = false);
 
   // Signal that data should not be loaded from the cache file
   static void IgnoreDiskCache();
 
   // In DEBUG builds, returns a stream that will attempt to check for
   // and disallow multiple writes of the same object.
   nsresult GetDebugObjectOutputStream(nsIObjectOutputStream* aStream,
                                       nsIObjectOutputStream** outStream);
--- a/startupcache/moz.build
+++ b/startupcache/moz.build
@@ -3,16 +3,17 @@
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 with Files("**"):
     BUG_COMPONENT = ("Core", "XPCOM")
 
 TEST_DIRS += ['test']
+BROWSER_CHROME_MANIFESTS += ['test/browser.ini']
 
 EXPORTS.mozilla.scache += [
     'StartupCache.h',
     'StartupCacheUtils.h',
 ]
 
 UNIFIED_SOURCES += [
     'StartupCache.cpp',
new file mode 100644
--- /dev/null
+++ b/startupcache/test/browser.ini
@@ -0,0 +1,1 @@
+[browser_startupcache_telemetry.js]
new file mode 100644
--- /dev/null
+++ b/startupcache/test/browser_startupcache_telemetry.js
@@ -0,0 +1,40 @@
+/* Any copyright is dedicated to the Public Domain.
+   http://creativecommons.org/publicdomain/zero/1.0/ */
+
+"use strict";
+
+const LABELS_STARTUP_CACHE_REQUESTS = {
+  HitMemory: 0,
+  HitDisk: 1,
+  Miss: 2,
+};
+
+add_task(async function() {
+  Services.obs.notifyObservers(null, "startupcache-invalidate");
+  Services.telemetry.getSnapshotForHistograms("main", true);
+  let newWin = await BrowserTestUtils.openNewBrowserWindow();
+  let snapshot = Services.telemetry.getSnapshotForHistograms("main", true);
+  function histValue(label) {
+    return snapshot.parent.STARTUP_CACHE_REQUESTS.values[label] || 0;
+  }
+  Assert.equal(histValue(LABELS_STARTUP_CACHE_REQUESTS.HitMemory), 0);
+  Assert.equal(histValue(LABELS_STARTUP_CACHE_REQUESTS.HitDisk), 0);
+  Assert.notEqual(histValue(LABELS_STARTUP_CACHE_REQUESTS.Miss), 0);
+  await BrowserTestUtils.closeWindow(newWin);
+
+  newWin = await BrowserTestUtils.openNewBrowserWindow();
+  snapshot = Services.telemetry.getSnapshotForHistograms("main", true);
+  Assert.notEqual(histValue(LABELS_STARTUP_CACHE_REQUESTS.HitMemory), 0);
+  Assert.equal(histValue(LABELS_STARTUP_CACHE_REQUESTS.HitDisk), 0);
+  Assert.notEqual(histValue(LABELS_STARTUP_CACHE_REQUESTS.Miss), 0);
+  await BrowserTestUtils.closeWindow(newWin);
+
+  Services.obs.notifyObservers(null, "startupcache-invalidate", "memoryOnly");
+  newWin = await BrowserTestUtils.openNewBrowserWindow();
+  snapshot = Services.telemetry.getSnapshotForHistograms("main", true);
+  Assert.notEqual(histValue(LABELS_STARTUP_CACHE_REQUESTS.HitMemory), 0);
+  Assert.notEqual(histValue(LABELS_STARTUP_CACHE_REQUESTS.HitDisk), 0);
+  // Some misses can come through - just ensure it's a small number
+  Assert.ok(histValue(LABELS_STARTUP_CACHE_REQUESTS.Miss) < 5);
+  await BrowserTestUtils.closeWindow(newWin);
+});
new file mode 100644
--- /dev/null
+++ b/testing/marionette/harness/marionette_harness/tests/unit/test_startup_caches.py
@@ -0,0 +1,80 @@
+# Any copyright is dedicated to the Public Domain.
+# http://creativecommons.org/publicdomain/zero/1.0/
+
+from __future__ import absolute_import
+
+import time
+import os
+from marionette_harness import MarionetteTestCase
+
+LABELS_SCRIPT_PRELOADER_REQUESTS = {
+  "Hit": "0",
+  "HitChild": "1",
+  "Miss": "2",
+}
+
+
+class TestScriptPreloader(MarionetteTestCase):
+    def test_preloader_requests_histogram(self):
+        start_time = time.time()
+        self.invalidate_caches()
+        self.marionette.restart(clean=False, in_app=True)
+        histogram = self.get_histogram("SCRIPT_PRELOADER_REQUESTS")
+        misses = histogram.get(LABELS_SCRIPT_PRELOADER_REQUESTS["Miss"], 0)
+        hits = histogram.get(LABELS_SCRIPT_PRELOADER_REQUESTS["Hit"], 0)
+        child_hits = histogram.get(LABELS_SCRIPT_PRELOADER_REQUESTS["HitChild"], 0)
+        self.assertMuchGreaterThan(misses, hits)
+        self.assertMuchGreaterThan(misses, child_hits)
+
+        profile = self.marionette.profile_path
+        self.wait_for_file_change(start_time, "{}/startupCache/scriptCache.bin".format(profile))
+        self.wait_for_file_change(start_time, "{}/startupCache/scriptCache-child.bin".format(profile))
+        self.marionette.restart(clean=False, in_app=True)
+        histogram = self.get_histogram("SCRIPT_PRELOADER_REQUESTS")
+        misses = histogram.get(LABELS_SCRIPT_PRELOADER_REQUESTS["Miss"], 0)
+        hits = histogram.get(LABELS_SCRIPT_PRELOADER_REQUESTS["Hit"], 0)
+        child_hits = histogram.get(LABELS_SCRIPT_PRELOADER_REQUESTS["HitChild"], 0)
+        # This is just heuristic. We certainly want to be made aware if this ratio
+        # changes and we didn't intend it to.
+        self.assertSimilar(misses, hits)
+        self.assertNotEqual(child_hits, 0)
+
+    def assertMuchGreaterThan(self, lhs, rhs):
+        self.assertTrue(lhs > 4 * rhs)
+
+    def assertSimilar(self, lhs, rhs):
+        self.assertTrue(lhs < 2 * rhs)
+        self.assertTrue(lhs > rhs / 2)
+
+    def wait_for_file_change(self, start_time, path):
+        expires = time.time() + 20
+        while True:
+            try:
+                if os.stat(path).st_mtime > start_time:
+                    return
+                if time.time() > expires:
+                    raise Error("Never observed file change for {}".format(path))
+                time.sleep(1)
+            except OSError:
+                pass
+
+
+    def wait_for_observer_notification(self, name):
+        with self.marionette.using_context(self.marionette.CONTEXT_CHROME):
+            return self.marionette.execute_script("""
+                let [resolve] = arguments;
+                Services.obs.addObserver(() => resolve(), "{}");
+            """.format(name))
+
+    def get_histogram(self, name):
+        with self.marionette.using_context(self.marionette.CONTEXT_CHROME):
+            return self.marionette.execute_script("""
+                let snapshot = Services.telemetry.getSnapshotForHistograms("main", true);
+                return snapshot.parent.{}.values;
+            """.format(name))
+
+    def invalidate_caches(self):
+        with self.marionette.using_context(self.marionette.CONTEXT_CHROME):
+            return self.marionette.execute_script(
+                "Services.obs.notifyObservers(null, 'startupcache-invalidate');"
+            )
--- a/testing/marionette/harness/marionette_harness/tests/unit/unit-tests.ini
+++ b/testing/marionette/harness/marionette_harness/tests/unit/unit-tests.ini
@@ -124,8 +124,11 @@ skip-if = appname == 'fennec' # Bug 1330
 
 [test_select.py]
 [test_crash.py]
 skip-if = asan || manage_instance == false || appname == 'fennec' # Bug 1298921
 [test_localization.py]
 
 [test_reftest.py]
 skip-if = appname == 'fennec' # Bug 1519552
+
+[test_startup_caches.py]
+skip-if = appname == 'fennec'
--- a/toolkit/components/telemetry/Histograms.json
+++ b/toolkit/components/telemetry/Histograms.json
@@ -4675,16 +4675,47 @@
   },
   "MOZ_STORAGE_ASYNC_REQUESTS_SUCCESS": {
     "record_in_processes": ["main", "content"],
     "alert_emails": ["perf-telemetry-alerts@mozilla.com"],
     "expires_in_version": "40",
     "kind": "boolean",
     "description": "mozStorage async requests success *** No longer needed (bug 1156565). Delete histogram and accumulation code! ***"
   },
+  "STARTUP_CACHE_REQUESTS": {
+    "record_in_processes": ["main"],
+    "alert_emails": ["dothayer@mozilla.com"],
+    "expires_in_version": "73",
+    "kind": "categorical",
+    "labels": ["HitMemory", "HitDisk", "Miss"],
+    "bug_numbers": [1364235],
+    "releaseChannelCollection": "opt-out",
+    "description": "Record hits and misses to the startup cache, with categories."
+  },
+  "SCRIPT_PRELOADER_REQUESTS": {
+    "record_in_processes": ["main", "content"],
+    "alert_emails": ["dothayer@mozilla.com"],
+    "expires_in_version": "73",
+    "kind": "categorical",
+    "labels": ["Hit", "HitChild", "Miss"],
+    "bug_numbers": [1364235],
+    "releaseChannelCollection": "opt-out",
+    "description": "Record hits and misses to the script preloader, with categories."
+  },
+  "SCRIPT_PRELOADER_WAIT_TIME": {
+    "record_in_processes": ["main", "content"],
+    "alert_emails": ["dothayer@mozilla.com"],
+    "expires_in_version": "73",
+    "kind": "exponential",
+    "high": 10000,
+    "n_buckets": 20,
+    "bug_numbers": [1364235],
+    "releaseChannelCollection": "opt-out",
+    "description": "Time spent waiting for off-thread compiles in the script preloader."
+  },
   "STARTUP_MEASUREMENT_ERRORS": {
     "record_in_processes": ["main", "content"],
     "expires_in_version": "default",
     "kind": "enumerated",
     "n_values": 16,
     "description": "Flags errors in startup calculation()"
   },
   "NETWORK_DISK_CACHE_TRASHRENAME": {
--- a/toolkit/components/telemetry/Scalars.yaml
+++ b/toolkit/components/telemetry/Scalars.yaml
@@ -2490,16 +2490,33 @@ startup:
     kind: string
     notification_emails:
       - dtownsend@mozilla.com
       - rtestard@mozilla.com
     release_channel_collection: opt-out
     record_in_processes:
       - main
 
+script.preloader:
+  mainthread_recompile:
+    bug_numbers:
+      - 1364235
+    description:
+      How many times we ended up recompiling a script from the script preloader
+      on the main thread.
+    expires: "73"
+    keyed: false
+    kind: uint
+    notification_emails:
+      - dothayer@mozilla.com
+    release_channel_collection: opt-out
+    record_in_processes:
+      - 'main'
+      - 'content'
+
 # The following section is for probes testing the Telemetry system. They will not be
 # submitted in pings and are only used for testing.
 telemetry.test:
   unsigned_int_kind:
     bug_numbers:
       - 1276190
     description: >
       This is a test uint type with a really long description, maybe spanning even multiple