Bug 1044020 - Add duration of shutdown phases to Telemetry. r=froydnj
authorDavid Rajchenbach-Teller <dteller@mozilla.com>
Mon, 10 Nov 2014 13:58:14 +0100
changeset 240215 a063964b77c07dd8b4548ee71978436e111fe0a9
parent 240214 f5c3c0341072988936a492691538b48e28eb87cf
child 240216 a748ffac1abf661da8eb62b99c6248a881b00819
push id4311
push userraliiev@mozilla.com
push dateMon, 12 Jan 2015 19:37:41 +0000
treeherdermozilla-beta@150c9fed433b [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersfroydnj
bugs1044020
milestone36.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1044020 - Add duration of shutdown phases to Telemetry. r=froydnj
browser/installer/package-manifest.in
build/sanitizers/lsan_suppressions.txt
toolkit/components/moz.build
toolkit/components/telemetry/Histograms.json
toolkit/components/terminator/moz.build
toolkit/components/terminator/nsTerminator.cpp
toolkit/components/terminator/nsTerminator.h
toolkit/components/terminator/nsTerminatorTelemetry.js
toolkit/components/terminator/terminator.manifest
toolkit/components/terminator/tests/xpcshell/test_terminator_record.js
toolkit/components/terminator/tests/xpcshell/test_terminator_reload.js
toolkit/components/terminator/tests/xpcshell/xpcshell.ini
toolkit/crashreporter/test/unit/test_crash_terminator.js
--- a/browser/installer/package-manifest.in
+++ b/browser/installer/package-manifest.in
@@ -909,16 +909,19 @@ bin/libfreebl_32int64_3.so
 @BINPATH@/metro/defaults
 @BINPATH@/metro/modules
 #endif
 
 @BINPATH@/components/DataStore.manifest
 @BINPATH@/components/DataStoreImpl.js
 @BINPATH@/components/dom_datastore.xpt
 
+; Shutdown Terminator
+@BINPATH@/components/nsTerminatorTelemetry.js
+@BINPATH@/components/terminator.manifest
 
 #ifdef MOZ_ASAN
 #ifdef CLANG_CXX
 @BINPATH@/llvm-symbolizer
 #endif
 #ifdef CLANG_CL
 @BINPATH@/clang_rt.asan_dynamic-i386.dll
 #endif
--- a/build/sanitizers/lsan_suppressions.txt
+++ b/build/sanitizers/lsan_suppressions.txt
@@ -21,16 +21,18 @@ leak:profiler_init
 # Bug 981220 - Pixman fails to free TLS memory.
 leak:pixman_implementation_lookup_composite
 
 # Bug 987918 - Font shutdown leaks when CLEANUP_MEMORY is not enabled.
 leak:libfontconfig.so
 leak:GI___strdup
 # The symbol is really __GI___strdup, but if you have the leading _, it doesn't suppress it.
 
+# Bug 1078015 - If the process terminates during a PR_Sleep, LSAN  detects a leak
+leak:PR_Sleep
 
 ###
 ### Bug 979928 - WebRTC leaks. m2, m3.
 ###
 
 # WebRTC leaks added for Mochitest 2.
 leak:NR_reg_init
 # nr_reg_local_init should be redundant with NR_reg_init, but with 34 on Aurora
--- a/toolkit/components/moz.build
+++ b/toolkit/components/moz.build
@@ -40,17 +40,16 @@ DIRS += [
     'promiseworker',
     'prompts',
     'protobuf',
     'reflect',
     'sqlite',
     'startup',
     'statusfilter',
     'telemetry',
-    'terminator',
     'thumbnails',
     'typeaheadfind',
     'urlformatter',
     'viewconfig',
     'viewsource',
     'workerloader',
     'workerlz4',
     'xulstore'
@@ -81,14 +80,17 @@ if CONFIG['MOZ_TOOLKIT_SEARCH']:
     DIRS += ['search']
 
 if CONFIG['MOZ_URL_CLASSIFIER']:
     DIRS += ['url-classifier']
 
 if CONFIG['MOZ_CAPTIVEDETECT']:
     DIRS += ['captivedetect']
 
+if CONFIG['MOZ_WIDGET_TOOLKIT'] != "gonk" and CONFIG['MOZ_WIDGET_TOOLKIT'] != 'android':
+    DIRS += ['terminator']
+
 DIRS += ['build']
 
 EXTRA_COMPONENTS += [
     'nsDefaultCLH.js',
     'nsDefaultCLH.manifest',
 ]
--- a/toolkit/components/telemetry/Histograms.json
+++ b/toolkit/components/telemetry/Histograms.json
@@ -6770,16 +6770,44 @@
   },
   "SOCIAL_SIDEBAR_OPEN_DURATION": {
     "expires_in_version": "40",
     "kind": "exponential",
     "high": "10000000",
     "n_buckets": 10,
     "description": "Sidebar showing: seconds that the sidebar has been opened"
   },
+  "SHUTDOWN_PHASE_DURATION_TICKS_QUIT_APPLICATION": {
+    "expires_in_version": "never",
+    "kind": "exponential",
+    "high": 65,
+    "n_buckets": 10,
+    "description": "Duration of shutdown phase quit-application, as measured by the shutdown terminator, in seconds of activity"
+  },
+  "SHUTDOWN_PHASE_DURATION_TICKS_PROFILE_CHANGE_TEARDOWN": {
+    "expires_in_version": "never",
+    "kind": "exponential",
+    "high": 65,
+    "n_buckets": 10,
+    "description": "Duration of shutdown phase profile-change-teardown, as measured by the shutdown terminator, in seconds of activity"
+  },
+  "SHUTDOWN_PHASE_DURATION_TICKS_XPCOM_WILL_SHUTDOWN": {
+    "expires_in_version": "never",
+    "kind": "exponential",
+    "high": 65,
+    "n_buckets": 10,
+    "description": "Duration of shutdown phase xpcom-will-shutdown, as measured by the shutdown terminator, in seconds of activity"
+  },
+  "SHUTDOWN_PHASE_DURATION_TICKS_PROFILE_BEFORE_CHANGE": {
+    "expires_in_version": "never",
+    "kind": "exponential",
+    "high": 65,
+    "n_buckets": 10,
+    "description": "Duration of shutdown phase profile-before-change, as measured by the shutdown terminator, in seconds of activity"
+  },
   "BR_9_2_1_SUBJECT_ALT_NAMES": {
     "expires_in_version": "never",
     "kind": "enumerated",
     "n_values": 8,
     "description": "Baseline Requirements section 9.2.1: subject alternative names extension (0: ok, 1 or more: error)"
   },
   "BR_9_2_2_SUBJECT_COMMON_NAME": {
     "expires_in_version": "never",
--- a/toolkit/components/terminator/moz.build
+++ b/toolkit/components/terminator/moz.build
@@ -1,21 +1,24 @@
 # -*- Mode: python; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 40 -*-
 # vim: set filetype=python:
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
+XPCSHELL_TESTS_MANIFESTS += ['tests/xpcshell/xpcshell.ini']
+
 SOURCES += [
     'nsTerminator.cpp',
 ]
 
 EXPORTS += [
     'nsTerminator.h',
 ]
 
 EXTRA_COMPONENTS += [
+    'nsTerminatorTelemetry.js',
     'terminator.manifest',
 ]
 
 FINAL_LIBRARY = 'xul'
 
 FAIL_ON_WARNINGS = True
--- a/toolkit/components/terminator/nsTerminator.cpp
+++ b/toolkit/components/terminator/nsTerminator.cpp
@@ -13,31 +13,40 @@
  * We spawn a thread during quit-application. If any of the shutdown
  * steps takes more than n milliseconds (63000 by default), kill the
  * process as fast as possible, without any cleanup.
  */
 
 #include "nsTerminator.h"
 
 #include "prthread.h"
+#include "prmon.h"
+#include "plstr.h"
+#include "prio.h"
+
 #include "nsString.h"
 #include "nsServiceManagerUtils.h"
+#include "nsDirectoryServiceUtils.h"
+#include "nsAppDirectoryServiceDefs.h"
 
 #include "nsIObserverService.h"
 #include "nsIPrefService.h"
 #if defined(MOZ_CRASHREPORTER)
 #include "nsExceptionHandler.h"
 #endif
 
 #include "mozilla/ArrayUtils.h"
+#include "mozilla/Attributes.h"
 #include "mozilla/DebugOnly.h"
+#include "mozilla/MemoryChecking.h"
 #include "mozilla/Preferences.h"
 #include "mozilla/Services.h"
 #include "mozilla/UniquePtr.h"
 #include "mozilla/unused.h"
+#include "mozilla/Telemetry.h"
 
 // Normally, the number of milliseconds that AsyncShutdown waits until
 // it decides to crash is specified as a preference. We use the
 // following value as a fallback if for some reason the preference is
 // absent.
 #define FALLBACK_ASYNCSHUTDOWN_CRASH_AFTER_MS 60000
 
 // Additional number of milliseconds to wait until we decide to exit
@@ -46,132 +55,377 @@
 
 // One second, in ticks.
 #define TICK_DURATION 1000
 
 namespace mozilla {
 
 namespace {
 
-/**
- * Set to `true` by the main thread whenever we pass a shutdown phase,
- * which means that the shutdown is still ongoing. Reset to `false` by
- * the Terminator thread, once it has acknowledged the progress.
- */
-Atomic<bool> gProgress(false);
+// Utility function: create a thread that is non-joinable,
+// does not prevent the process from terminating, is never
+// cooperatively scheduled, and uses a default stack size.
+PRThread* CreateSystemThread(void (*start)(void* arg),
+                             void* arg)
+{
+  PRThread* thread = PR_CreateThread(
+    PR_SYSTEM_THREAD, /* This thread will not prevent the process from terminating */
+    start,
+    arg,
+    PR_PRIORITY_LOW,
+    PR_GLOBAL_THREAD /* Make sure that the thread is never cooperatively scheduled */,
+    PR_UNJOINABLE_THREAD,
+    0 /* Use default stack size */
+  );
+  MOZ_LSAN_INTENTIONALLY_LEAK_OBJECT(thread); // This pointer will never be deallocated.
+  return thread;
+}
+
+
+////////////////////////////////////////////
+//
+// The watchdog
+//
+// This nspr thread is in charge of crashing the process if any stage of shutdown
+// lasts more than some predefined duration. As a side-effect, it measures the
+// duration of each stage of shutdown.
+//
+
+// The heartbeat of the operation.
+//
+// Main thread:
+//
+// * Whenever a shutdown step has been completed, the main thread
+// swaps gHeartbeat to 0 to mark that the shutdown process is still
+// progressing. The value swapped away indicates the number of ticks
+// it took for the shutdown step to advance.
+//
+// Watchdog thread:
+//
+// * Every tick, the watchdog thread increments gHearbeat atomically.
+//
+// A note about precision:
+// Since gHeartbeat is generally reset to 0 between two ticks, this means
+// that gHeartbeat stays at 0 less than one tick. Consequently, values
+// extracted from gHeartbeat must be considered rounded up.
+Atomic<uint32_t> gHeartbeat(0);
 
 struct Options {
-  int32_t crashAfterMS;
+  /**
+   * How many ticks before we should crash the process.
+   */
+  uint32_t crashAfterTicks;
 };
 
+/**
+ * Entry point for the watchdog thread
+ */
 void
-Run(void* arg)
+RunWatchdog(void* arg)
 {
   PR_SetCurrentThreadName("Shutdown Hang Terminator");
 
   // Let's copy and deallocate options, that's one less leak to worry
   // about.
   UniquePtr<Options> options((Options*)arg);
-  int32_t crashAfterMS = options->crashAfterMS;
+  uint32_t crashAfterTicks = options->crashAfterTicks;
   options = nullptr;
 
-  int32_t timeToLive = crashAfterMS;
+  const uint32_t timeToLive = crashAfterTicks;
   while (true) {
     //
     // We do not want to sleep for the entire duration,
     // as putting the computer to sleep would suddenly
     // cause us to timeout on wakeup.
     //
     // Rather, we prefer sleeping for at most 1 second
     // at a time. If the computer sleeps then wakes up,
     // we have lost at most one second, which is much
     // more reasonable.
     //
     PR_Sleep(TICK_DURATION);
-    if (gProgress.exchange(false)) {
-      // We have passed at least one shutdown phase while waiting.
-      // Shutdown is still alive, reset the countdown.
-      timeToLive = crashAfterMS;
-      continue;
-    }
-    timeToLive -= TICK_DURATION;
-    if (timeToLive >= 0) {
+
+    if (gHeartbeat++ < timeToLive) {
       continue;
     }
 
     // Shutdown is apparently dead. Crash the process.
     MOZ_CRASH("Shutdown too long, probably frozen, causing a crash.");
   }
 }
 
-} // anonymous namespace
+////////////////////////////////////////////
+//
+// Writer thread
+//
+// This nspr thread is in charge of writing to disk statistics produced by the
+// watchdog thread and collected by the main thread. Note that we use a nspr
+// thread rather than usual XPCOM I/O simply because we outlive XPCOM and its
+// threads.
+//
+
+// Utility class, used by UniquePtr<> to close nspr files.
+class PR_CloseDelete
+{
+public:
+  MOZ_CONSTEXPR PR_CloseDelete() {}
+
+  PR_CloseDelete(const PR_CloseDelete& aOther)
+  {}
+
+  void operator()(PRFileDesc* aPtr) const
+  {
+    PR_Close(aPtr);
+  }
+};
+
+//
+// Communication between the main thread and the writer thread.
+//
+// Main thread:
+//
+// * Whenever a shutdown step has been completed, the main thread
+// obtains the number of ticks from the watchdog threads, builds
+// a string representing all the data gathered so far, places
+// this string in `gWriteData`, and wakes up the writer thread
+// using `gWriteReady`. If `gWriteData` already contained a non-null
+// pointer, this means that the writer thread is lagging behind the
+// main thread, and the main thread cleans up the memory.
+//
+// Writer thread:
+//
+// * When awake, the writer thread swaps `gWriteData` to nullptr. If
+// `gWriteData` contained data to write, the . If so, the writer
+// thread writes the data to a file named "ShutdownDuration.json.tmp",
+// then moves that file to "ShutdownDuration.json" and cleans up the
+// data. If `gWriteData` contains a nullptr, the writer goes to sleep
+// until it is awkened using `gWriteReady`.
+//
+//
+// The data written by the writer thread will be read by another
+// module upon the next restart and fed to Telemetry.
+//
+Atomic<nsCString*> gWriteData(nullptr);
+PRMonitor* gWriteReady = nullptr;
+
+void RunWriter(void* arg)
+{
+  PR_SetCurrentThreadName("Shutdown Statistics Writer");
+
+  MOZ_LSAN_INTENTIONALLY_LEAK_OBJECT(arg);
+  // Shutdown will generally complete before we have a chance to
+  // deallocate. This is not a leak.
+
+  // Setup destinationPath and tmpFilePath
+
+  nsCString destinationPath(static_cast<char*>(arg));
+  nsAutoCString tmpFilePath;
+  tmpFilePath.Append(destinationPath);
+  tmpFilePath.AppendLiteral(".tmp");
+
+  // Cleanup any file leftover from a previous run
+  unused << PR_Delete(tmpFilePath.get());
+  unused << PR_Delete(destinationPath.get());
 
-static char const *const sObserverTopics[] = {
-  "quit-application",
-  "profile-change-teardown",
-  "profile-before-change",
-  "xpcom-will-shutdown",
-  "xpcom-shutdown",
+  while (true) {
+    //
+    // Check whether we have received data from the main thread.
+    //
+    // We perform the check before waiting on `gWriteReady` as we may
+    // have received data while we were busy writing.
+    //
+    // Also note that gWriteData may have been modified several times
+    // since we last checked. That's ok, we are not losing any important
+    // data (since we keep adding data), and we are not leaking memory
+    // (since the main thread deallocates any data that hasn't been
+    // consumed by the writer thread).
+    //
+    UniquePtr<nsCString> data(gWriteData.exchange(nullptr));
+    if (!data) {
+      // Data is not available yet.
+      // Wait until the main thread provides it.
+      PR_EnterMonitor(gWriteReady);
+      PR_Wait(gWriteReady, PR_INTERVAL_NO_TIMEOUT);
+      PR_ExitMonitor(gWriteReady);
+      continue;
+    }
+
+    MOZ_LSAN_INTENTIONALLY_LEAK_OBJECT(data.get());
+    // Shutdown may complete before we have a chance to deallocate.
+    // This is not a leak.
+
+    //
+    // Write to a temporary file
+    //
+    // In case of any error, we simply give up. Since the data is
+    // hardly critical, we don't want to spend too much effort
+    // salvaging it.
+    //
+    UniquePtr<PRFileDesc, PR_CloseDelete>
+      tmpFileDesc(PR_Open(tmpFilePath.get(),
+                          PR_WRONLY | PR_TRUNCATE | PR_CREATE_FILE,
+                          00600));
+
+    // Shutdown may complete before we have a chance to close the file.
+    // This is not a leak.
+    MOZ_LSAN_INTENTIONALLY_LEAK_OBJECT(tmpFileDesc.get());
+
+    if (tmpFileDesc == nullptr) {
+      break;
+    }
+    if (PR_Write(tmpFileDesc.get(), data->get(), data->Length()) == -1) {
+      break;
+    }
+    tmpFileDesc.reset();
+
+    //
+    // Rename on top of destination file.
+    //
+    // This is not sufficient to guarantee that the destination file
+    // will be written correctly, but, again, we don't care enough
+    // about the data to make more efforts.
+    //
+    if (PR_Rename(tmpFilePath.get(), destinationPath.get()) != PR_SUCCESS) {
+      break;
+    }
+  }
+}
+
+/**
+ * A step during shutdown.
+ *
+ * Shutdown is divided in steps, which all map to an observer
+ * notification. The duration of a step is defined as the number of
+ * ticks between the time we receive a notification and the next one.
+ */
+struct ShutdownStep
+{
+  char const* const mTopic;
+  int mTicks;
+
+  MOZ_CONSTEXPR ShutdownStep(const char *const topic)
+    : mTopic(topic)
+    , mTicks(-1)
+  {}
+
 };
 
+static ShutdownStep sShutdownSteps[] = {
+  ShutdownStep("quit-application"),
+  ShutdownStep("profile-change-teardown"),
+  ShutdownStep("profile-before-change"),
+  ShutdownStep("xpcom-will-shutdown"),
+  ShutdownStep("xpcom-shutdown"),
+};
+
+} // anonymous namespace
+
 NS_IMPL_ISUPPORTS(nsTerminator, nsIObserver)
 
 nsTerminator::nsTerminator()
   : mInitialized(false)
+  , mCurrentStep(-1)
 {
 }
 
 // During startup, register as an observer for all interesting topics.
 nsresult
 nsTerminator::SelfInit()
 {
   nsCOMPtr<nsIObserverService> os = mozilla::services::GetObserverService();
   if (!os) {
     return NS_ERROR_UNEXPECTED;
   }
 
-  for (size_t i = 0; i < ArrayLength(sObserverTopics); ++i) {
-    DebugOnly<nsresult> rv = os->AddObserver(this, sObserverTopics[i], false);
+  for (size_t i = 0; i < ArrayLength(sShutdownSteps); ++i) {
+    DebugOnly<nsresult> rv = os->AddObserver(this, sShutdownSteps[i].mTopic, false);
 #if defined(DEBUG)
     NS_WARN_IF(NS_FAILED(rv));
 #endif // defined(DEBUG)
   }
+
   return NS_OK;
 }
 
-// Actually launch the thread. This takes place at the first sign of shutdown.
+// Actually launch these threads. This takes place at the first sign of shutdown.
 void
-nsTerminator::Start() {
-  // Determine how long we need to wait
+nsTerminator::Start()
+{
+  MOZ_ASSERT(!mInitialized);
+  StartWatchdog();
+  StartWriter();
+  mInitialized = true;
+}
 
+// Prepare, allocate and start the watchdog thread.
+// By design, it will never finish, nor be deallocated.
+void
+nsTerminator::StartWatchdog()
+{
   int32_t crashAfterMS =
     Preferences::GetInt("toolkit.asyncshutdown.crash_timeout",
                         FALLBACK_ASYNCSHUTDOWN_CRASH_AFTER_MS);
+  // Ignore negative values
+  if (crashAfterMS <= 0) {
+    crashAfterMS = FALLBACK_ASYNCSHUTDOWN_CRASH_AFTER_MS;
+  }
 
   // Add a little padding, to ensure that we do not crash before
   // AsyncShutdown.
-  crashAfterMS += ADDITIONAL_WAIT_BEFORE_CRASH_MS;
+  if (crashAfterMS > INT32_MAX - ADDITIONAL_WAIT_BEFORE_CRASH_MS) {
+    // Defend against overflow
+    crashAfterMS = INT32_MAX;
+  } else {
+    crashAfterMS += ADDITIONAL_WAIT_BEFORE_CRASH_MS;
+  }
 
   UniquePtr<Options> options(new Options());
-  options->crashAfterMS = crashAfterMS;
+  options->crashAfterTicks = crashAfterMS / TICK_DURATION;
+
+  DebugOnly<PRThread*> watchdogThread = CreateSystemThread(RunWatchdog,
+                                                options.release());
+  MOZ_ASSERT(watchdogThread);
+}
+
+// Prepare, allocate and start the writer thread. By design, it will never
+// finish, nor be deallocated. In case of error, we degrade
+// gracefully to not writing Telemetry data.
+void
+nsTerminator::StartWriter()
+{
 
-  // Allocate and start the thread.
-  // By design, it will never finish, nor be deallocated.
-  DebugOnly<PRThread*> thread = PR_CreateThread(
-    PR_SYSTEM_THREAD, /* This thread will not prevent the process from terminating */
-    Run,
-    options.release(),
-    PR_PRIORITY_LOW,
-    PR_GLOBAL_THREAD /* Make sure that the thread is never cooperatively scheduled */,
-    PR_UNJOINABLE_THREAD,
-    0 /* Use default stack size */
-  );
+  if (!Telemetry::CanRecord()) {
+    return;
+  }
+  nsCOMPtr<nsIFile> profLD;
+  nsresult rv = NS_GetSpecialDirectory(NS_APP_USER_PROFILE_LOCAL_50_DIR,
+                                       getter_AddRefs(profLD));
+  if (NS_FAILED(rv)) {
+    return;
+  }
+
+  rv = profLD->Append(NS_LITERAL_STRING("ShutdownDuration.json"));
+  if (NS_FAILED(rv)) {
+    return;
+  }
 
-  MOZ_ASSERT(thread);
-  mInitialized = true;
+  nsAutoString path;
+  rv = profLD->GetPath(path);
+  if (NS_FAILED(rv)) {
+    return;
+  }
+
+  gWriteReady = PR_NewMonitor();
+  MOZ_LSAN_INTENTIONALLY_LEAK_OBJECT(gWriteReady); // We will never deallocate this object
+  PRThread* writerThread = CreateSystemThread(RunWriter,
+                                              ToNewUTF8String(path));
+
+  if (!writerThread) {
+    return;
+  }
 }
 
 NS_IMETHODIMP
 nsTerminator::Observe(nsISupports *, const char *aTopic, const char16_t *)
 {
   if (strcmp(aTopic, "profile-after-change") == 0) {
     return SelfInit();
   }
@@ -180,24 +434,106 @@ nsTerminator::Observe(nsISupports *, con
 
   // As we have seen examples in the wild of shutdown notifications
   // not being sent (or not being sent in the expected order), we do
   // not assume a specific order.
   if (!mInitialized) {
     Start();
   }
 
-  // Inform the thread that we have advanced by one phase.
-  gProgress.exchange(true);
+  UpdateHeartbeat(aTopic);
+  UpdateTelemetry();
+  UpdateCrashReport(aTopic);
 
-#if defined(MOZ_CRASHREPORTER)
-  // In case of crash, we wish to know where in shutdown we are
-  unused << CrashReporter::AnnotateCrashReport(NS_LITERAL_CSTRING("ShutdownProgress"),
-                                               nsAutoCString(aTopic));
-#endif // defined(MOZ_CRASH_REPORTER)
-
+  // Perform a little cleanup
   nsCOMPtr<nsIObserverService> os = mozilla::services::GetObserverService();
   MOZ_RELEASE_ASSERT(os);
   (void)os->RemoveObserver(this, aTopic);
+
   return NS_OK;
 }
 
+void
+nsTerminator::UpdateHeartbeat(const char* aTopic)
+{
+  // Reset the clock, find out how long the current phase has lasted.
+  uint32_t ticks = gHeartbeat.exchange(0);
+  if (mCurrentStep > 0) {
+    sShutdownSteps[mCurrentStep].mTicks = ticks;
+  }
+
+  // Find out where we now are in the current shutdown.
+  // Don't assume that shutdown takes place in the expected order.
+  int nextStep = -1;
+  for (size_t i = 0; i < ArrayLength(sShutdownSteps); ++i) {
+    if (strcmp(sShutdownSteps[i].mTopic, aTopic) == 0) {
+      nextStep = i;
+      break;
+    }
+  }
+  MOZ_ASSERT(nextStep != -1);
+  mCurrentStep = nextStep;
+}
+
+void
+nsTerminator::UpdateTelemetry()
+{
+  if (!Telemetry::CanRecord() || !gWriteReady) {
+    return;
+  }
+
+  //
+  // We need Telemetry data on the effective duration of each step,
+  // to be able to tune the time-to-crash of each of both the
+  // Terminator and AsyncShutdown. However, at this stage, it is too
+  // late to record such data into Telemetry, so we write it to disk
+  // and read it upon the next startup.
+  //
+
+  // Build JSON.
+  UniquePtr<nsCString> telemetryData(new nsCString());
+  telemetryData->AppendLiteral("{");
+  size_t fields = 0;
+  for (size_t i = 0; i < ArrayLength(sShutdownSteps); ++i) {
+    if (sShutdownSteps[i].mTicks < 0) {
+      // Ignore this field.
+      continue;
+    }
+    if (fields++ > 0) {
+      telemetryData->Append(", ");
+    }
+    telemetryData->AppendLiteral("\"");
+    telemetryData->Append(sShutdownSteps[i].mTopic);
+    telemetryData->AppendLiteral("\": ");
+    telemetryData->AppendInt(sShutdownSteps[i].mTicks);
+  }
+  telemetryData->AppendLiteral("}");
+
+  if (fields == 0) {
+    // Nothing to write
+      return;
+  }
+
+  //
+  // Send data to the worker thread.
+  //
+  delete gWriteData.exchange(telemetryData.release()); // Clear any data that hasn't been written yet
+
+  // In case the worker thread was sleeping, wake it up.
+  PR_EnterMonitor(gWriteReady);
+  PR_Notify(gWriteReady);
+  PR_ExitMonitor(gWriteReady);
+}
+
+void
+nsTerminator::UpdateCrashReport(const char* aTopic)
+{
+#if defined(MOZ_CRASHREPORTER)
+  // In case of crash, we wish to know where in shutdown we are
+  nsAutoCString report(aTopic);
+
+  unused << CrashReporter::AnnotateCrashReport(NS_LITERAL_CSTRING("ShutdownProgress"),
+                                               report);
+#endif // defined(MOZ_CRASH_REPORTER)
+}
+
+
 } // namespace mozilla
--- a/toolkit/components/terminator/nsTerminator.h
+++ b/toolkit/components/terminator/nsTerminator.h
@@ -17,20 +17,27 @@ public:
   NS_DECL_ISUPPORTS
   NS_DECL_NSIOBSERVER
 
   nsTerminator();
 
 private:
   nsresult SelfInit();
   void Start();
+  void StartWatchdog();
+  void StartWriter();
+
+  void UpdateHeartbeat(const char* aTopic);
+  void UpdateTelemetry();
+  void UpdateCrashReport(const char* aTopic);
 
   ~nsTerminator() {}
 
   bool mInitialized;
+  int32_t mCurrentStep;
 };
 
 }
 
 #define NS_TOOLKIT_TERMINATOR_CID { 0x2e59cc70, 0xf83a, 0x412f, \
   { 0x89, 0xd4, 0x45, 0x38, 0x85, 0x83, 0x72, 0x17 } }
 #define NS_TOOLKIT_TERMINATOR_CONTRACTID "@mozilla.org/toolkit/shutdown-terminator;1"
 
new file mode 100644
--- /dev/null
+++ b/toolkit/components/terminator/nsTerminatorTelemetry.js
@@ -0,0 +1,105 @@
+/* -*- indent-tabs-mode: nil; js-indent-level: 2 -*- */
+/* vim: set ts=2 et sw=2 tw=80 filetype=javascript: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+"use strict";
+
+/**
+ * Read the data saved by nsTerminator during shutdown and feed it to the
+ * relevant telemetry histograms.
+ */
+
+const Cc = Components.classes;
+const Ci = Components.interfaces;
+const Cu = Components.utils;
+const Cr = Components.results;
+
+Cu.import("resource://gre/modules/XPCOMUtils.jsm");
+
+XPCOMUtils.defineLazyModuleGetter(this, "OS",
+  "resource://gre/modules/osfile.jsm");
+XPCOMUtils.defineLazyModuleGetter(this, "Promise",
+  "resource://gre/modules/Promise.jsm");
+XPCOMUtils.defineLazyModuleGetter(this, "Task",
+  "resource://gre/modules/Task.jsm");
+XPCOMUtils.defineLazyModuleGetter(this, "setTimeout",
+  "resource://gre/modules/Timer.jsm");
+XPCOMUtils.defineLazyModuleGetter(this, "Services",
+  "resource://gre/modules/Services.jsm");
+
+function nsTerminatorTelemetry() {}
+
+let HISTOGRAMS = {
+  "quit-application": "SHUTDOWN_PHASE_DURATION_TICKS_QUIT_APPLICATION",
+  "profile-change-teardown": "SHUTDOWN_PHASE_DURATION_TICKS_PROFILE_CHANGE_TEARDOWN",
+  "profile-before-change":  "SHUTDOWN_PHASE_DURATION_TICKS_PROFILE_BEFORE_CHANGE",
+  "xpcom-will-shutdown": "SHUTDOWN_PHASE_DURATION_TICKS_XPCOM_WILL_SHUTDOWN",
+};
+
+nsTerminatorTelemetry.prototype = {
+  classID: Components.ID("{3f78ada1-cba2-442a-82dd-d5fb300ddea7}"),
+
+  _xpcom_factory: XPCOMUtils.generateSingletonFactory(nsTerminatorTelemetry),
+
+  //////////////////////////////////////////////////////////////////////////////
+  //// nsISupports
+
+  QueryInterface: XPCOMUtils.generateQI([Ci.nsIObserver]),
+
+  //////////////////////////////////////////////////////////////////////////////
+  //// nsIObserver
+
+  observe: function DS_observe(aSubject, aTopic, aData)
+  {
+    Task.spawn(function*() {
+      //
+      // This data is hardly critical, reading it can wait for a few seconds.
+      //
+      yield new Promise(resolve => setTimeout(resolve, 3000));
+
+      let PATH = OS.Path.join(OS.Constants.Path.localProfileDir,
+        "ShutdownDuration.json");
+      let raw;
+      try {
+        raw = yield OS.File.read(PATH, { encoding: "utf-8" });
+      } catch (ex if ex.becauseNoSuchFile) {
+        return;
+      }
+      // Let other errors be reported by Promise's error-reporting.
+
+      // Clean up
+      OS.File.remove(PATH);
+      OS.File.remove(PATH + ".tmp");
+
+      let data = JSON.parse(raw);
+      for (let k of Object.keys(data)) {
+        let id = HISTOGRAMS[k];
+        try {
+          let histogram = Services.telemetry.getHistogramById(id);
+          if (!histogram) {
+            throw new Error("Unknown histogram " + id);
+          }
+
+          histogram.add(Number.parseInt(data[k]));
+        } catch (ex) {
+          // Make sure that the error is reported and causes test failures,
+          // but otherwise, ignore it.
+          Promise.reject(ex);
+          continue;
+        }
+      }
+
+      // Inform observers that we are done.
+      Services.obs.notifyObservers(null,
+        "shutdown-terminator-telemetry-updated",
+        "");
+    });
+  },
+};
+
+////////////////////////////////////////////////////////////////////////////////
+//// Module
+
+this.NSGetFactory = XPCOMUtils.generateNSGetFactory([nsTerminatorTelemetry]);
--- a/toolkit/components/terminator/terminator.manifest
+++ b/toolkit/components/terminator/terminator.manifest
@@ -1,2 +1,5 @@
 category profile-after-change nsTerminator @mozilla.org/toolkit/shutdown-terminator;1
 
+component {3f78ada1-cba2-442a-82dd-d5fb300ddea7} nsTerminatorTelemetry.js
+contract @mozilla.org/toolkit/shutdown-terminator-telemetry;1 {3f78ada1-cba2-442a-82dd-d5fb300ddea7}
+category profile-after-change nsTerminatorTelemetry @mozilla.org/toolkit/shutdown-terminator-telemetry;1
new file mode 100644
--- /dev/null
+++ b/toolkit/components/terminator/tests/xpcshell/test_terminator_record.js
@@ -0,0 +1,108 @@
+/* Any copyright is dedicated to the Public Domain.
+ * http://creativecommons.org/publicdomain/zero/1.0/ */
+
+"use strict";
+
+
+// Test that the Shutdown Terminator records durations correctly
+
+const Cu = Components.utils;
+const Cc = Components.classes;
+const Ci = Components.interfaces;
+
+Cu.import("resource://gre/modules/Services.jsm", this);
+Cu.import("resource://gre/modules/osfile.jsm", this);
+Cu.import("resource://gre/modules/Timer.jsm", this);
+Cu.import("resource://gre/modules/Task.jsm", this);
+
+let {Path, File, Constants} = OS;
+
+let PATH;
+let PATH_TMP;
+let terminator;
+
+add_task(function* init() {
+  do_get_profile();
+  PATH = Path.join(Constants.Path.localProfileDir, "ShutdownDuration.json");
+  PATH_TMP = PATH + ".tmp";
+
+  // Initialize the terminator
+  // (normally, this is done through the manifest file, but xpcshell
+  // doesn't take them into account).
+  do_print("Initializing the Terminator");
+  terminator = Cc["@mozilla.org/toolkit/shutdown-terminator;1"].
+    createInstance(Ci.nsIObserver);
+  terminator.observe(null, "profile-after-change", null);
+});
+
+let promiseShutdownDurationData = Task.async(function*() {
+  // Wait until PATH exists.
+  // Timeout if it is never created.
+  do_print("Waiting for file creation: " + PATH);
+  while (true) {
+    if ((yield OS.File.exists(PATH))) {
+      break;
+    }
+
+    do_print("The file does not exist yet. Waiting 1 second.");
+    yield new Promise(resolve => setTimeout(resolve, 1000));
+  }
+
+  do_print("The file has been created");
+  let raw = yield OS.File.read(PATH, { encoding: "utf-8"} );
+  do_print(raw);
+  return JSON.parse(raw);
+});
+
+add_task(function* test_record() {
+  let PHASE0 = "profile-change-teardown";
+  let PHASE1 = "profile-before-change";
+  let PHASE2 = "xpcom-will-shutdown";
+  let t0 = Date.now();
+
+  do_print("Starting shutdown");
+  terminator.observe(null, "profile-change-teardown", null);
+
+  do_print("Moving to next phase");
+  terminator.observe(null, PHASE1, null);
+
+  let data = yield promiseShutdownDurationData();
+
+  let t1 = Date.now();
+
+  Assert.ok(PHASE0 in data, "The file contains the expected key");
+  let duration = data[PHASE0];
+  Assert.equal(typeof duration, "number");
+  Assert.ok(duration >= 0, "Duration is a non-negative number");
+  Assert.ok(duration <= Math.ceil((t1 - t0) / 1000) + 1,
+    "Duration is reasonable");
+
+  Assert.equal(Object.keys(data).length, 1, "Data does not contain other durations");
+
+  do_print("Cleaning up and moving to next phase");
+  yield File.remove(PATH);
+  yield File.remove(PATH_TMP);
+
+  do_print("Waiting at least one tick");
+  let WAIT_MS = 2000;
+  yield new Promise(resolve => setTimeout(resolve, WAIT_MS));
+
+  terminator.observe(null, PHASE2, null);
+  data = yield promiseShutdownDurationData();
+
+  let t2 = Date.now();
+
+  Assert.equal(Object.keys(data).sort().join(", "),
+               [PHASE0, PHASE1].sort().join(", "),
+               "The file contains the expected keys");
+  Assert.equal(data[PHASE0], duration, "Duration of phase 0 hasn't changed");
+  let duration2 = data[PHASE1];
+  Assert.equal(typeof duration2, "number");
+  Assert.ok(duration2 >= WAIT_MS / 2000, "We have waited at least " + (WAIT_MS / 2000) + " ticks");
+  Assert.ok(duration2 <= Math.ceil((t2 - t1) / 1000) + 1,
+    "Duration is reasonable");
+});
+
+function run_test() {
+  run_next_test();
+}
new file mode 100644
--- /dev/null
+++ b/toolkit/components/terminator/tests/xpcshell/test_terminator_reload.js
@@ -0,0 +1,85 @@
+/* Any copyright is dedicated to the Public Domain.
+ * http://creativecommons.org/publicdomain/zero/1.0/ */
+
+"use strict";
+
+
+// Test that the Shutdown Terminator reloads durations correctly
+
+const Cu = Components.utils;
+const Cc = Components.classes;
+const Ci = Components.interfaces;
+
+Cu.import("resource://gre/modules/Services.jsm", this);
+Cu.import("resource://gre/modules/osfile.jsm", this);
+Cu.import("resource://gre/modules/Timer.jsm", this);
+Cu.import("resource://gre/modules/Task.jsm", this);
+
+let {Path, File, Constants} = OS;
+
+let PATH;
+
+let HISTOGRAMS = {
+  "quit-application": "SHUTDOWN_PHASE_DURATION_TICKS_QUIT_APPLICATION",
+  "profile-change-teardown": "SHUTDOWN_PHASE_DURATION_TICKS_PROFILE_CHANGE_TEARDOWN",
+  "profile-before-change":  "SHUTDOWN_PHASE_DURATION_TICKS_PROFILE_BEFORE_CHANGE",
+  "xpcom-will-shutdown": "SHUTDOWN_PHASE_DURATION_TICKS_XPCOM_WILL_SHUTDOWN",
+};
+
+add_task(function* init() {
+  do_get_profile();
+  PATH = Path.join(Constants.Path.localProfileDir, "ShutdownDuration.json");
+});
+
+add_task(function* test_reload() {
+  do_print("Forging data");
+  let data = {};
+  let telemetrySnapshots = Services.telemetry.histogramSnapshots;
+  let i = 0;
+  for (let k of Object.keys(HISTOGRAMS)) {
+    let id = HISTOGRAMS[k];
+    data[k] = i++;
+    Assert.equal(telemetrySnapshots[id] || undefined, undefined, "Histogram " + id + " is empty");
+  }
+
+
+  yield OS.File.writeAtomic(PATH, JSON.stringify(data));
+
+  const TOPIC = "shutdown-terminator-telemetry-updated";
+
+  let wait = new Promise(resolve =>
+    Services.obs.addObserver(
+      function observer() {
+        do_print("Telemetry has been updated");
+        Services.obs.removeObserver(observer, TOPIC);
+        resolve();
+      },
+      TOPIC,
+      false));
+
+  do_print("Starting nsTerminatorTelemetry");
+  let tt = Cc["@mozilla.org/toolkit/shutdown-terminator-telemetry;1"].
+    createInstance(Ci.nsIObserver);
+  tt.observe(null, "profile-after-change", "");
+
+  do_print("Waiting until telemetry is updated");
+  // Now wait until Telemetry is updated
+  yield wait;
+
+  telemetrySnapshots = Services.telemetry.histogramSnapshots;
+  for (let k of Object.keys(HISTOGRAMS)) {
+    let id = HISTOGRAMS[k];
+    do_print("Testing histogram " + id);
+    let snapshot = telemetrySnapshots[id];
+    let count = 0;
+    for (let x of snapshot.counts) {
+      count += x;
+    }
+    Assert.equal(count, 1, "We have added one item");
+  }
+
+});
+
+function run_test() {
+  run_next_test();
+}
new file mode 100644
--- /dev/null
+++ b/toolkit/components/terminator/tests/xpcshell/xpcshell.ini
@@ -0,0 +1,7 @@
+[DEFAULT]
+head=
+tail=
+
+[test_terminator_record.js]
+[test_terminator_reload.js]
+skip-if = (os == "android" || appname == "b2g")
--- a/toolkit/crashreporter/test/unit/test_crash_terminator.js
+++ b/toolkit/crashreporter/test/unit/test_crash_terminator.js
@@ -16,23 +16,25 @@ function setup_crash() {
   // doesn't take them into account).
   let terminator = Components.classes["@mozilla.org/toolkit/shutdown-terminator;1"].
     createInstance(Components.interfaces.nsIObserver);
   terminator.observe(null, "profile-after-change", null);
 
   // Inform the terminator that shutdown has started
   // Pick an arbitrary notification
   terminator.observe(null, "xpcom-will-shutdown", null);
+  terminator.observe(null, "profile-before-change", null);
 
   dump("Waiting (actively) for the crash\n");
   while(true) {
     Services.tm.currentThread.processNextEvent(true);
   }
 };
 
 
 function after_crash(mdump, extra) {
-  Assert.equal(extra.ShutdownProgress, "xpcom-will-shutdown");
+  do_print("Crash signature: " + JSON.stringify(extra, null, "\t"));
+  Assert.equal(extra.ShutdownProgress, "profile-before-change");
 }
 
 function run_test() {
   do_crash(setup_crash, after_crash);
 }