Bug 1044020 - Add duration of shutdown phases to Telemetry. r=froydnj
authorDavid Rajchenbach-Teller <dteller@mozilla.com>
Mon, 10 Nov 2014 13:58:14 +0100
changeset 232115 a063964b77c07dd8b4548ee71978436e111fe0a9
parent 232114 f5c3c0341072988936a492691538b48e28eb87cf
child 232116 a748ffac1abf661da8eb62b99c6248a881b00819
push id7326
push userbhearsum@mozilla.com
push dateFri, 28 Nov 2014 15:58:42 +0000
treeherdermozilla-aurora@d3a3b2a0f2f8 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersfroydnj
bugs1044020
milestone36.0a1
Bug 1044020 - Add duration of shutdown phases to Telemetry. r=froydnj
browser/installer/package-manifest.in
build/sanitizers/lsan_suppressions.txt
toolkit/components/moz.build
toolkit/components/telemetry/Histograms.json
toolkit/components/terminator/moz.build
toolkit/components/terminator/nsTerminator.cpp
toolkit/components/terminator/nsTerminator.h
toolkit/components/terminator/nsTerminatorTelemetry.js
toolkit/components/terminator/terminator.manifest
toolkit/components/terminator/tests/xpcshell/test_terminator_record.js
toolkit/components/terminator/tests/xpcshell/test_terminator_reload.js
toolkit/components/terminator/tests/xpcshell/xpcshell.ini
toolkit/crashreporter/test/unit/test_crash_terminator.js
--- a/browser/installer/package-manifest.in
+++ b/browser/installer/package-manifest.in
@@ -909,16 +909,19 @@ bin/libfreebl_32int64_3.so
 @BINPATH@/metro/defaults
 @BINPATH@/metro/modules
 #endif
 
 @BINPATH@/components/DataStore.manifest
 @BINPATH@/components/DataStoreImpl.js
 @BINPATH@/components/dom_datastore.xpt
 
+; Shutdown Terminator
+@BINPATH@/components/nsTerminatorTelemetry.js
+@BINPATH@/components/terminator.manifest
 
 #ifdef MOZ_ASAN
 #ifdef CLANG_CXX
 @BINPATH@/llvm-symbolizer
 #endif
 #ifdef CLANG_CL
 @BINPATH@/clang_rt.asan_dynamic-i386.dll
 #endif
--- a/build/sanitizers/lsan_suppressions.txt
+++ b/build/sanitizers/lsan_suppressions.txt
@@ -21,16 +21,18 @@ leak:profiler_init
 # Bug 981220 - Pixman fails to free TLS memory.
 leak:pixman_implementation_lookup_composite
 
 # Bug 987918 - Font shutdown leaks when CLEANUP_MEMORY is not enabled.
 leak:libfontconfig.so
 leak:GI___strdup
 # The symbol is really __GI___strdup, but if you have the leading _, it doesn't suppress it.
 
+# Bug 1078015 - If the process terminates during a PR_Sleep, LSAN  detects a leak
+leak:PR_Sleep
 
 ###
 ### Bug 979928 - WebRTC leaks. m2, m3.
 ###
 
 # WebRTC leaks added for Mochitest 2.
 leak:NR_reg_init
 # nr_reg_local_init should be redundant with NR_reg_init, but with 34 on Aurora
--- a/toolkit/components/moz.build
+++ b/toolkit/components/moz.build
@@ -40,17 +40,16 @@ DIRS += [
     'promiseworker',
     'prompts',
     'protobuf',
     'reflect',
     'sqlite',
     'startup',
     'statusfilter',
     'telemetry',
-    'terminator',
     'thumbnails',
     'typeaheadfind',
     'urlformatter',
     'viewconfig',
     'viewsource',
     'workerloader',
     'workerlz4',
     'xulstore'
@@ -81,14 +80,17 @@ if CONFIG['MOZ_TOOLKIT_SEARCH']:
     DIRS += ['search']
 
 if CONFIG['MOZ_URL_CLASSIFIER']:
     DIRS += ['url-classifier']
 
 if CONFIG['MOZ_CAPTIVEDETECT']:
     DIRS += ['captivedetect']
 
+if CONFIG['MOZ_WIDGET_TOOLKIT'] != "gonk" and CONFIG['MOZ_WIDGET_TOOLKIT'] != 'android':
+    DIRS += ['terminator']
+
 DIRS += ['build']
 
 EXTRA_COMPONENTS += [
     'nsDefaultCLH.js',
     'nsDefaultCLH.manifest',
 ]
--- a/toolkit/components/telemetry/Histograms.json
+++ b/toolkit/components/telemetry/Histograms.json
@@ -6770,16 +6770,44 @@
   },
   "SOCIAL_SIDEBAR_OPEN_DURATION": {
     "expires_in_version": "40",
     "kind": "exponential",
     "high": "10000000",
     "n_buckets": 10,
     "description": "Sidebar showing: seconds that the sidebar has been opened"
   },
+  "SHUTDOWN_PHASE_DURATION_TICKS_QUIT_APPLICATION": {
+    "expires_in_version": "never",
+    "kind": "exponential",
+    "high": 65,
+    "n_buckets": 10,
+    "description": "Duration of shutdown phase quit-application, as measured by the shutdown terminator, in seconds of activity"
+  },
+  "SHUTDOWN_PHASE_DURATION_TICKS_PROFILE_CHANGE_TEARDOWN": {
+    "expires_in_version": "never",
+    "kind": "exponential",
+    "high": 65,
+    "n_buckets": 10,
+    "description": "Duration of shutdown phase profile-change-teardown, as measured by the shutdown terminator, in seconds of activity"
+  },
+  "SHUTDOWN_PHASE_DURATION_TICKS_XPCOM_WILL_SHUTDOWN": {
+    "expires_in_version": "never",
+    "kind": "exponential",
+    "high": 65,
+    "n_buckets": 10,
+    "description": "Duration of shutdown phase xpcom-will-shutdown, as measured by the shutdown terminator, in seconds of activity"
+  },
+  "SHUTDOWN_PHASE_DURATION_TICKS_PROFILE_BEFORE_CHANGE": {
+    "expires_in_version": "never",
+    "kind": "exponential",
+    "high": 65,
+    "n_buckets": 10,
+    "description": "Duration of shutdown phase profile-before-change, as measured by the shutdown terminator, in seconds of activity"
+  },
   "BR_9_2_1_SUBJECT_ALT_NAMES": {
     "expires_in_version": "never",
     "kind": "enumerated",
     "n_values": 8,
     "description": "Baseline Requirements section 9.2.1: subject alternative names extension (0: ok, 1 or more: error)"
   },
   "BR_9_2_2_SUBJECT_COMMON_NAME": {
     "expires_in_version": "never",
--- a/toolkit/components/terminator/moz.build
+++ b/toolkit/components/terminator/moz.build
@@ -1,21 +1,24 @@
 # -*- Mode: python; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 40 -*-
 # vim: set filetype=python:
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
+XPCSHELL_TESTS_MANIFESTS += ['tests/xpcshell/xpcshell.ini']
+
 SOURCES += [
     'nsTerminator.cpp',
 ]
 
 EXPORTS += [
     'nsTerminator.h',
 ]
 
 EXTRA_COMPONENTS += [
+    'nsTerminatorTelemetry.js',
     'terminator.manifest',
 ]
 
 FINAL_LIBRARY = 'xul'
 
 FAIL_ON_WARNINGS = True
--- a/toolkit/components/terminator/nsTerminator.cpp
+++ b/toolkit/components/terminator/nsTerminator.cpp
@@ -13,31 +13,40 @@
  * We spawn a thread during quit-application. If any of the shutdown
  * steps takes more than n milliseconds (63000 by default), kill the
  * process as fast as possible, without any cleanup.
  */
 
 #include "nsTerminator.h"
 
 #include "prthread.h"
+#include "prmon.h"
+#include "plstr.h"
+#include "prio.h"
+
 #include "nsString.h"
 #include "nsServiceManagerUtils.h"
+#include "nsDirectoryServiceUtils.h"
+#include "nsAppDirectoryServiceDefs.h"
 
 #include "nsIObserverService.h"
 #include "nsIPrefService.h"
 #if defined(MOZ_CRASHREPORTER)
 #include "nsExceptionHandler.h"
 #endif
 
 #include "mozilla/ArrayUtils.h"
+#include "mozilla/Attributes.h"
 #include "mozilla/DebugOnly.h"
+#include "mozilla/MemoryChecking.h"
 #include "mozilla/Preferences.h"
 #include "mozilla/Services.h"
 #include "mozilla/UniquePtr.h"
 #include "mozilla/unused.h"
+#include "mozilla/Telemetry.h"
 
 // Normally, the number of milliseconds that AsyncShutdown waits until
 // it decides to crash is specified as a preference. We use the
 // following value as a fallback if for some reason the preference is
 // absent.
 #define FALLBACK_ASYNCSHUTDOWN_CRASH_AFTER_MS 60000
 
 // Additional number of milliseconds to wait until we decide to exit
@@ -46,132 +55,377 @@
 
 // One second, in ticks.
 #define TICK_DURATION 1000
 
 namespace mozilla {
 
 namespace {
 
-/**
- * Set to `true` by the main thread whenever we pass a shutdown phase,
- * which means that the shutdown is still ongoing. Reset to `false` by
- * the Terminator thread, once it has acknowledged the progress.
- */
-Atomic<bool> gProgress(false);
+// Utility function: create a thread that is non-joinable,
+// does not prevent the process from terminating, is never
+// cooperatively scheduled, and uses a default stack size.
+PRThread* CreateSystemThread(void (*start)(void* arg),
+                             void* arg)
+{
+  PRThread* thread = PR_CreateThread(
+    PR_SYSTEM_THREAD, /* This thread will not prevent the process from terminating */
+    start,
+    arg,
+    PR_PRIORITY_LOW,
+    PR_GLOBAL_THREAD /* Make sure that the thread is never cooperatively scheduled */,
+    PR_UNJOINABLE_THREAD,
+    0 /* Use default stack size */
+  );
+  MOZ_LSAN_INTENTIONALLY_LEAK_OBJECT(thread); // This pointer will never be deallocated.
+  return thread;
+}
+
+
+////////////////////////////////////////////
+//
+// The watchdog
+//
+// This nspr thread is in charge of crashing the process if any stage of shutdown
+// lasts more than some predefined duration. As a side-effect, it measures the
+// duration of each stage of shutdown.
+//
+
+// The heartbeat of the operation.
+//
+// Main thread:
+//
+// * Whenever a shutdown step has been completed, the main thread
+// swaps gHeartbeat to 0 to mark that the shutdown process is still
+// progressing. The value swapped away indicates the number of ticks
+// it took for the shutdown step to advance.
+//
+// Watchdog thread:
+//
+// * Every tick, the watchdog thread increments gHearbeat atomically.
+//
+// A note about precision:
+// Since gHeartbeat is generally reset to 0 between two ticks, this means
+// that gHeartbeat stays at 0 less than one tick. Consequently, values
+// extracted from gHeartbeat must be considered rounded up.
+Atomic<uint32_t> gHeartbeat(0);
 
 struct Options {
-  int32_t crashAfterMS;
+  /**
+   * How many ticks before we should crash the process.
+   */
+  uint32_t crashAfterTicks;
 };
 
+/**
+ * Entry point for the watchdog thread
+ */
 void
-Run(void* arg)
+RunWatchdog(void* arg)
 {
   PR_SetCurrentThreadName("Shutdown Hang Terminator");
 
   // Let's copy and deallocate options, that's one less leak to worry
   // about.
   UniquePtr<Options> options((Options*)arg);
-  int32_t crashAfterMS = options->crashAfterMS;
+  uint32_t crashAfterTicks = options->crashAfterTicks;
   options = nullptr;
 
-  int32_t timeToLive = crashAfterMS;
+  const uint32_t timeToLive = crashAfterTicks;
   while (true) {
     //
     // We do not want to sleep for the entire duration,
     // as putting the computer to sleep would suddenly
     // cause us to timeout on wakeup.
     //
     // Rather, we prefer sleeping for at most 1 second
     // at a time. If the computer sleeps then wakes up,
     // we have lost at most one second, which is much
     // more reasonable.
     //
     PR_Sleep(TICK_DURATION);
-    if (gProgress.exchange(false)) {
-      // We have passed at least one shutdown phase while waiting.
-      // Shutdown is still alive, reset the countdown.
-      timeToLive = crashAfterMS;
-      continue;
-    }
-    timeToLive -= TICK_DURATION;
-    if (timeToLive >= 0) {
+
+    if (gHeartbeat++ < timeToLive) {
       continue;
     }
 
     // Shutdown is apparently dead. Crash the process.
     MOZ_CRASH("Shutdown too long, probably frozen, causing a crash.");
   }
 }
 
-} // anonymous namespace
+////////////////////////////////////////////
+//
+// Writer thread
+//
+// This nspr thread is in charge of writing to disk statistics produced by the
+// watchdog thread and collected by the main thread. Note that we use a nspr
+// thread rather than usual XPCOM I/O simply because we outlive XPCOM and its
+// threads.
+//
+
+// Utility class, used by UniquePtr<> to close nspr files.
+class PR_CloseDelete
+{
+public:
+  MOZ_CONSTEXPR PR_CloseDelete() {}
+
+  PR_CloseDelete(const PR_CloseDelete& aOther)
+  {}
+
+  void operator()(PRFileDesc* aPtr) const
+  {
+    PR_Close(aPtr);
+  }
+};
+
+//
+// Communication between the main thread and the writer thread.
+//
+// Main thread:
+//
+// * Whenever a shutdown step has been completed, the main thread
+// obtains the number of ticks from the watchdog threads, builds
+// a string representing all the data gathered so far, places
+// this string in `gWriteData`, and wakes up the writer thread
+// using `gWriteReady`. If `gWriteData` already contained a non-null
+// pointer, this means that the writer thread is lagging behind the
+// main thread, and the main thread cleans up the memory.
+//
+// Writer thread:
+//
+// * When awake, the writer thread swaps `gWriteData` to nullptr. If
+// `gWriteData` contained data to write, the . If so, the writer
+// thread writes the data to a file named "ShutdownDuration.json.tmp",
+// then moves that file to "ShutdownDuration.json" and cleans up the
+// data. If `gWriteData` contains a nullptr, the writer goes to sleep
+// until it is awkened using `gWriteReady`.
+//
+//
+// The data written by the writer thread will be read by another
+// module upon the next restart and fed to Telemetry.
+//
+Atomic<nsCString*> gWriteData(nullptr);
+PRMonitor* gWriteReady = nullptr;
+
+void RunWriter(void* arg)
+{
+  PR_SetCurrentThreadName("Shutdown Statistics Writer");
+
+  MOZ_LSAN_INTENTIONALLY_LEAK_OBJECT(arg);
+  // Shutdown will generally complete before we have a chance to
+  // deallocate. This is not a leak.
+
+  // Setup destinationPath and tmpFilePath
+
+  nsCString destinationPath(static_cast<char*>(arg));
+  nsAutoCString tmpFilePath;
+  tmpFilePath.Append(destinationPath);
+  tmpFilePath.AppendLiteral(".tmp");
+
+  // Cleanup any file leftover from a previous run
+  unused << PR_Delete(tmpFilePath.get());
+  unused << PR_Delete(destinationPath.get());
 
-static char const *const sObserverTopics[] = {
-  "quit-application",
-  "profile-change-teardown",
-  "profile-before-change",
-  "xpcom-will-shutdown",
-  "xpcom-shutdown",
+  while (true) {
+    //
+    // Check whether we have received data from the main thread.
+    //
+    // We perform the check before waiting on `gWriteReady` as we may
+    // have received data while we were busy writing.
+    //
+    // Also note that gWriteData may have been modified several times
+    // since we last checked. That's ok, we are not losing any important
+    // data (since we keep adding data), and we are not leaking memory
+    // (since the main thread deallocates any data that hasn't been
+    // consumed by the writer thread).
+    //
+    UniquePtr<nsCString> data(gWriteData.exchange(nullptr));
+    if (!data) {
+      // Data is not available yet.
+      // Wait until the main thread provides it.
+      PR_EnterMonitor(gWriteReady);
+      PR_Wait(gWriteReady, PR_INTERVAL_NO_TIMEOUT);
+      PR_ExitMonitor(gWriteReady);
+      continue;
+    }
+
+    MOZ_LSAN_INTENTIONALLY_LEAK_OBJECT(data.get());
+    // Shutdown may complete before we have a chance to deallocate.
+    // This is not a leak.
+
+    //
+    // Write to a temporary file
+    //
+    // In case of any error, we simply give up. Since the data is
+    // hardly critical, we don't want to spend too much effort
+    // salvaging it.
+    //
+    UniquePtr<PRFileDesc, PR_CloseDelete>
+      tmpFileDesc(PR_Open(tmpFilePath.get(),
+                          PR_WRONLY | PR_TRUNCATE | PR_CREATE_FILE,
+                          00600));
+
+    // Shutdown may complete before we have a chance to close the file.
+    // This is not a leak.
+    MOZ_LSAN_INTENTIONALLY_LEAK_OBJECT(tmpFileDesc.get());
+
+    if (tmpFileDesc == nullptr) {
+      break;
+    }
+    if (PR_Write(tmpFileDesc.get(), data->get(), data->Length()) == -1) {
+      break;
+    }
+    tmpFileDesc.reset();
+
+    //
+    // Rename on top of destination file.
+    //
+    // This is not sufficient to guarantee that the destination file
+    // will be written correctly, but, again, we don't care enough
+    // about the data to make more efforts.
+    //
+    if (PR_Rename(tmpFilePath.get(), destinationPath.get()) != PR_SUCCESS) {
+      break;
+    }
+  }
+}
+
+/**
+ * A step during shutdown.
+ *
+ * Shutdown is divided in steps, which all map to an observer
+ * notification. The duration of a step is defined as the number of
+ * ticks between the time we receive a notification and the next one.
+ */
+struct ShutdownStep
+{
+  char const* const mTopic;
+  int mTicks;
+
+  MOZ_CONSTEXPR ShutdownStep(const char *const topic)
+    : mTopic(topic)
+    , mTicks(-1)
+  {}
+
 };
 
+static ShutdownStep sShutdownSteps[] = {
+  ShutdownStep("quit-application"),
+  ShutdownStep("profile-change-teardown"),
+  ShutdownStep("profile-before-change"),
+  ShutdownStep("xpcom-will-shutdown"),
+  ShutdownStep("xpcom-shutdown"),
+};
+
+} // anonymous namespace
+
 NS_IMPL_ISUPPORTS(nsTerminator, nsIObserver)
 
 nsTerminator::nsTerminator()
   : mInitialized(false)
+  , mCurrentStep(-1)
 {
 }
 
 // During startup, register as an observer for all interesting topics.
 nsresult
 nsTerminator::SelfInit()
 {
   nsCOMPtr<nsIObserverService> os = mozilla::services::GetObserverService();
   if (!os) {
     return NS_ERROR_UNEXPECTED;
   }
 
-  for (size_t i = 0; i < ArrayLength(sObserverTopics); ++i) {
-    DebugOnly<nsresult> rv = os->AddObserver(this, sObserverTopics[i], false);
+  for (size_t i = 0; i < ArrayLength(sShutdownSteps); ++i) {
+    DebugOnly<nsresult> rv = os->AddObserver(this, sShutdownSteps[i].mTopic, false);
 #if defined(DEBUG)
     NS_WARN_IF(NS_FAILED(rv));
 #endif // defined(DEBUG)
   }
+
   return NS_OK;
 }
 
-// Actually launch the thread. This takes place at the first sign of shutdown.
+// Actually launch these threads. This takes place at the first sign of shutdown.
 void
-nsTerminator::Start() {
-  // Determine how long we need to wait
+nsTerminator::Start()
+{
+  MOZ_ASSERT(!mInitialized);
+  StartWatchdog();
+  StartWriter();
+  mInitialized = true;
+}
 
+// Prepare, allocate and start the watchdog thread.
+// By design, it will never finish, nor be deallocated.
+void
+nsTerminator::StartWatchdog()
+{
   int32_t crashAfterMS =
     Preferences::GetInt("toolkit.asyncshutdown.crash_timeout",
                         FALLBACK_ASYNCSHUTDOWN_CRASH_AFTER_MS);
+  // Ignore negative values
+  if (crashAfterMS <= 0) {
+    crashAfterMS = FALLBACK_ASYNCSHUTDOWN_CRASH_AFTER_MS;
+  }
 
   // Add a little padding, to ensure that we do not crash before
   // AsyncShutdown.
-  crashAfterMS += ADDITIONAL_WAIT_BEFORE_CRASH_MS;
+  if (crashAfterMS > INT32_MAX - ADDITIONAL_WAIT_BEFORE_CRASH_MS) {
+    // Defend against overflow
+    crashAfterMS = INT32_MAX;
+  } else {
+    crashAfterMS += ADDITIONAL_WAIT_BEFORE_CRASH_MS;
+  }
 
   UniquePtr<Options> options(new Options());
-  options->crashAfterMS = crashAfterMS;
+  options->crashAfterTicks = crashAfterMS / TICK_DURATION;
+
+  DebugOnly<PRThread*> watchdogThread = CreateSystemThread(RunWatchdog,
+                                                options.release());
+  MOZ_ASSERT(watchdogThread);
+}
+
+// Prepare, allocate and start the writer thread. By design, it will never
+// finish, nor be deallocated. In case of error, we degrade
+// gracefully to not writing Telemetry data.
+void
+nsTerminator::StartWriter()
+{
 
-  // Allocate and start the thread.
-  // By design, it will never finish, nor be deallocated.
-  DebugOnly<PRThread*> thread = PR_CreateThread(
-    PR_SYSTEM_THREAD, /* This thread will not prevent the process from terminating */
-    Run,
-    options.release(),
-    PR_PRIORITY_LOW,
-    PR_GLOBAL_THREAD /* Make sure that the thread is never cooperatively scheduled */,
-    PR_UNJOINABLE_THREAD,
-    0 /* Use default stack size */
-  );
+  if (!Telemetry::CanRecord()) {
+    return;
+  }
+  nsCOMPtr<nsIFile> profLD;
+  nsresult rv = NS_GetSpecialDirectory(NS_APP_USER_PROFILE_LOCAL_50_DIR,
+                                       getter_AddRefs(profLD));
+  if (NS_FAILED(rv)) {
+    return;
+  }
+
+  rv = profLD->Append(NS_LITERAL_STRING("ShutdownDuration.json"));
+  if (NS_FAILED(rv)) {
+    return;
+  }
 
-  MOZ_ASSERT(thread);
-  mInitialized = true;
+  nsAutoString path;
+  rv = profLD->GetPath(path);
+  if (NS_FAILED(rv)) {
+    return;
+  }
+
+  gWriteReady = PR_NewMonitor();
+  MOZ_LSAN_INTENTIONALLY_LEAK_OBJECT(gWriteReady); // We will never deallocate this object
+  PRThread* writerThread = CreateSystemThread(RunWriter,
+                                              ToNewUTF8String(path));
+
+  if (!writerThread) {
+    return;
+  }
 }
 
 NS_IMETHODIMP
 nsTerminator::Observe(nsISupports *, const char *aTopic, const char16_t *)
 {
   if (strcmp(aTopic, "profile-after-change") == 0) {
     return SelfInit();
   }
@@ -180,24 +434,106 @@ nsTerminator::Observe(nsISupports *, con
 
   // As we have seen examples in the wild of shutdown notifications
   // not being sent (or not being sent in the expected order), we do
   // not assume a specific order.
   if (!mInitialized) {
     Start();
   }
 
-  // Inform the thread that we have advanced by one phase.
-  gProgress.exchange(true);
+  UpdateHeartbeat(aTopic);
+  UpdateTelemetry();
+  UpdateCrashReport(aTopic);
 
-#if defined(MOZ_CRASHREPORTER)
-  // In case of crash, we wish to know where in shutdown we are
-  unused << CrashReporter::AnnotateCrashReport(NS_LITERAL_CSTRING("ShutdownProgress"),
-                                               nsAutoCString(aTopic));
-#endif // defined(MOZ_CRASH_REPORTER)
-
+  // Perform a little cleanup
   nsCOMPtr<nsIObserverService> os = mozilla::services::GetObserverService();
   MOZ_RELEASE_ASSERT(os);
   (void)os->RemoveObserver(this, aTopic);
+
   return NS_OK;
 }
 
+void
+nsTerminator::UpdateHeartbeat(const char* aTopic)
+{
+  // Reset the clock, find out how long the current phase has lasted.
+  uint32_t ticks = gHeartbeat.exchange(0);
+  if (mCurrentStep > 0) {
+    sShutdownSteps[mCurrentStep].mTicks = ticks;
+  }
+
+  // Find out where we now are in the current shutdown.
+  // Don't assume that shutdown takes place in the expected order.
+  int nextStep = -1;
+  for (size_t i = 0; i < ArrayLength(sShutdownSteps); ++i) {
+    if (strcmp(sShutdownSteps[i].mTopic, aTopic) == 0) {
+      nextStep = i;
+      break;
+    }
+  }
+  MOZ_ASSERT(nextStep != -1);
+  mCurrentStep = nextStep;
+}
+
+void
+nsTerminator::UpdateTelemetry()
+{
+  if (!Telemetry::CanRecord() || !gWriteReady) {
+    return;
+  }
+
+  //
+  // We need Telemetry data on the effective duration of each step,
+  // to be able to tune the time-to-crash of each of both the
+  // Terminator and AsyncShutdown. However, at this stage, it is too
+  // late to record such data into Telemetry, so we write it to disk
+  // and read it upon the next startup.
+  //
+
+  // Build JSON.
+  UniquePtr<nsCString> telemetryData(new nsCString());
+  telemetryData->AppendLiteral("{");
+  size_t fields = 0;
+  for (size_t i = 0; i < ArrayLength(sShutdownSteps); ++i) {
+    if (sShutdownSteps[i].mTicks < 0) {
+      // Ignore this field.
+      continue;
+    }
+    if (fields++ > 0) {
+      telemetryData->Append(", ");
+    }
+    telemetryData->AppendLiteral("\"");
+    telemetryData->Append(sShutdownSteps[i].mTopic);
+    telemetryData->AppendLiteral("\": ");
+    telemetryData->AppendInt(sShutdownSteps[i].mTicks);
+  }
+  telemetryData->AppendLiteral("}");
+
+  if (fields == 0) {
+    // Nothing to write
+      return;
+  }
+
+  //
+  // Send data to the worker thread.
+  //
+  delete gWriteData.exchange(telemetryData.release()); // Clear any data that hasn't been written yet
+
+  // In case the worker thread was sleeping, wake it up.
+  PR_EnterMonitor(gWriteReady);
+  PR_Notify(gWriteReady);
+  PR_ExitMonitor(gWriteReady);
+}
+
+void
+nsTerminator::UpdateCrashReport(const char* aTopic)
+{
+#if defined(MOZ_CRASHREPORTER)
+  // In case of crash, we wish to know where in shutdown we are
+  nsAutoCString report(aTopic);
+
+  unused << CrashReporter::AnnotateCrashReport(NS_LITERAL_CSTRING("ShutdownProgress"),
+                                               report);
+#endif // defined(MOZ_CRASH_REPORTER)
+}
+
+
 } // namespace mozilla
--- a/toolkit/components/terminator/nsTerminator.h
+++ b/toolkit/components/terminator/nsTerminator.h
@@ -17,20 +17,27 @@ public:
   NS_DECL_ISUPPORTS
   NS_DECL_NSIOBSERVER
 
   nsTerminator();
 
 private:
   nsresult SelfInit();
   void Start();
+  void StartWatchdog();
+  void StartWriter();
+
+  void UpdateHeartbeat(const char* aTopic);
+  void UpdateTelemetry();
+  void UpdateCrashReport(const char* aTopic);
 
   ~nsTerminator() {}
 
   bool mInitialized;
+  int32_t mCurrentStep;
 };
 
 }
 
 #define NS_TOOLKIT_TERMINATOR_CID { 0x2e59cc70, 0xf83a, 0x412f, \
   { 0x89, 0xd4, 0x45, 0x38, 0x85, 0x83, 0x72, 0x17 } }
 #define NS_TOOLKIT_TERMINATOR_CONTRACTID "@mozilla.org/toolkit/shutdown-terminator;1"
 
new file mode 100644
--- /dev/null
+++ b/toolkit/components/terminator/nsTerminatorTelemetry.js
@@ -0,0 +1,105 @@
+/* -*- indent-tabs-mode: nil; js-indent-level: 2 -*- */
+/* vim: set ts=2 et sw=2 tw=80 filetype=javascript: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+"use strict";
+
+/**
+ * Read the data saved by nsTerminator during shutdown and feed it to the
+ * relevant telemetry histograms.
+ */
+
+const Cc = Components.classes;
+const Ci = Components.interfaces;
+const Cu = Components.utils;
+const Cr = Components.results;
+
+Cu.import("resource://gre/modules/XPCOMUtils.jsm");
+
+XPCOMUtils.defineLazyModuleGetter(this, "OS",
+  "resource://gre/modules/osfile.jsm");
+XPCOMUtils.defineLazyModuleGetter(this, "Promise",
+  "resource://gre/modules/Promise.jsm");
+XPCOMUtils.defineLazyModuleGetter(this, "Task",
+  "resource://gre/modules/Task.jsm");
+XPCOMUtils.defineLazyModuleGetter(this, "setTimeout",
+  "resource://gre/modules/Timer.jsm");
+XPCOMUtils.defineLazyModuleGetter(this, "Services",
+  "resource://gre/modules/Services.jsm");
+
+function nsTerminatorTelemetry() {}
+
+let HISTOGRAMS = {
+  "quit-application": "SHUTDOWN_PHASE_DURATION_TICKS_QUIT_APPLICATION",
+  "profile-change-teardown": "SHUTDOWN_PHASE_DURATION_TICKS_PROFILE_CHANGE_TEARDOWN",
+  "profile-before-change":  "SHUTDOWN_PHASE_DURATION_TICKS_PROFILE_BEFORE_CHANGE",
+  "xpcom-will-shutdown": "SHUTDOWN_PHASE_DURATION_TICKS_XPCOM_WILL_SHUTDOWN",
+};
+
+nsTerminatorTelemetry.prototype = {
+  classID: Components.ID("{3f78ada1-cba2-442a-82dd-d5fb300ddea7}"),
+
+  _xpcom_factory: XPCOMUtils.generateSingletonFactory(nsTerminatorTelemetry),
+
+  //////////////////////////////////////////////////////////////////////////////
+  //// nsISupports
+
+  QueryInterface: XPCOMUtils.generateQI([Ci.nsIObserver]),
+
+  //////////////////////////////////////////////////////////////////////////////
+  //// nsIObserver
+
+  observe: function DS_observe(aSubject, aTopic, aData)
+  {
+    Task.spawn(function*() {
+      //
+      // This data is hardly critical, reading it can wait for a few seconds.
+      //
+      yield new Promise(resolve => setTimeout(resolve, 3000));
+
+      let PATH = OS.Path.join(OS.Constants.Path.localProfileDir,
+        "ShutdownDuration.json");
+      let raw;
+      try {
+        raw = yield OS.File.read(PATH, { encoding: "utf-8" });
+      } catch (ex if ex.becauseNoSuchFile) {
+        return;
+      }
+      // Let other errors be reported by Promise's error-reporting.
+
+      // Clean up
+      OS.File.remove(PATH);
+      OS.File.remove(PATH + ".tmp");
+
+      let data = JSON.parse(raw);
+      for (let k of Object.keys(data)) {
+        let id = HISTOGRAMS[k];
+        try {
+          let histogram = Services.telemetry.getHistogramById(id);
+          if (!histogram) {
+            throw new Error("Unknown histogram " + id);
+          }
+
+          histogram.add(Number.parseInt(data[k]));
+        } catch (ex) {
+          // Make sure that the error is reported and causes test failures,
+          // but otherwise, ignore it.
+          Promise.reject(ex);
+          continue;
+        }
+      }
+
+      // Inform observers that we are done.
+      Services.obs.notifyObservers(null,
+        "shutdown-terminator-telemetry-updated",
+        "");
+    });
+  },
+};
+
+////////////////////////////////////////////////////////////////////////////////
+//// Module
+
+this.NSGetFactory = XPCOMUtils.generateNSGetFactory([nsTerminatorTelemetry]);
--- a/toolkit/components/terminator/terminator.manifest
+++ b/toolkit/components/terminator/terminator.manifest
@@ -1,2 +1,5 @@
 category profile-after-change nsTerminator @mozilla.org/toolkit/shutdown-terminator;1
 
+component {3f78ada1-cba2-442a-82dd-d5fb300ddea7} nsTerminatorTelemetry.js
+contract @mozilla.org/toolkit/shutdown-terminator-telemetry;1 {3f78ada1-cba2-442a-82dd-d5fb300ddea7}
+category profile-after-change nsTerminatorTelemetry @mozilla.org/toolkit/shutdown-terminator-telemetry;1
new file mode 100644
--- /dev/null
+++ b/toolkit/components/terminator/tests/xpcshell/test_terminator_record.js
@@ -0,0 +1,108 @@
+/* Any copyright is dedicated to the Public Domain.
+ * http://creativecommons.org/publicdomain/zero/1.0/ */
+
+"use strict";
+
+
+// Test that the Shutdown Terminator records durations correctly
+
+const Cu = Components.utils;
+const Cc = Components.classes;
+const Ci = Components.interfaces;
+
+Cu.import("resource://gre/modules/Services.jsm", this);
+Cu.import("resource://gre/modules/osfile.jsm", this);
+Cu.import("resource://gre/modules/Timer.jsm", this);
+Cu.import("resource://gre/modules/Task.jsm", this);
+
+let {Path, File, Constants} = OS;
+
+let PATH;
+let PATH_TMP;
+let terminator;
+
+add_task(function* init() {
+  do_get_profile();
+  PATH = Path.join(Constants.Path.localProfileDir, "ShutdownDuration.json");
+  PATH_TMP = PATH + ".tmp";
+
+  // Initialize the terminator
+  // (normally, this is done through the manifest file, but xpcshell
+  // doesn't take them into account).
+  do_print("Initializing the Terminator");
+  terminator = Cc["@mozilla.org/toolkit/shutdown-terminator;1"].
+    createInstance(Ci.nsIObserver);
+  terminator.observe(null, "profile-after-change", null);
+});
+
+let promiseShutdownDurationData = Task.async(function*() {
+  // Wait until PATH exists.
+  // Timeout if it is never created.
+  do_print("Waiting for file creation: " + PATH);
+  while (true) {
+    if ((yield OS.File.exists(PATH))) {
+      break;
+    }
+
+    do_print("The file does not exist yet. Waiting 1 second.");
+    yield new Promise(resolve => setTimeout(resolve, 1000));
+  }
+
+  do_print("The file has been created");
+  let raw = yield OS.File.read(PATH, { encoding: "utf-8"} );
+  do_print(raw);
+  return JSON.parse(raw);
+});
+
+add_task(function* test_record() {
+  let PHASE0 = "profile-change-teardown";
+  let PHASE1 = "profile-before-change";
+  let PHASE2 = "xpcom-will-shutdown";
+  let t0 = Date.now();
+
+  do_print("Starting shutdown");
+  terminator.observe(null, "profile-change-teardown", null);
+
+  do_print("Moving to next phase");
+  terminator.observe(null, PHASE1, null);
+
+  let data = yield promiseShutdownDurationData();
+
+  let t1 = Date.now();
+
+  Assert.ok(PHASE0 in data, "The file contains the expected key");
+  let duration = data[PHASE0];
+  Assert.equal(typeof duration, "number");
+  Assert.ok(duration >= 0, "Duration is a non-negative number");
+  Assert.ok(duration <= Math.ceil((t1 - t0) / 1000) + 1,
+    "Duration is reasonable");
+
+  Assert.equal(Object.keys(data).length, 1, "Data does not contain other durations");
+
+  do_print("Cleaning up and moving to next phase");
+  yield File.remove(PATH);
+  yield File.remove(PATH_TMP);
+
+  do_print("Waiting at least one tick");
+  let WAIT_MS = 2000;
+  yield new Promise(resolve => setTimeout(resolve, WAIT_MS));
+
+  terminator.observe(null, PHASE2, null);
+  data = yield promiseShutdownDurationData();
+
+  let t2 = Date.now();
+
+  Assert.equal(Object.keys(data).sort().join(", "),
+               [PHASE0, PHASE1].sort().join(", "),
+               "The file contains the expected keys");
+  Assert.equal(data[PHASE0], duration, "Duration of phase 0 hasn't changed");
+  let duration2 = data[PHASE1];
+  Assert.equal(typeof duration2, "number");
+  Assert.ok(duration2 >= WAIT_MS / 2000, "We have waited at least " + (WAIT_MS / 2000) + " ticks");
+  Assert.ok(duration2 <= Math.ceil((t2 - t1) / 1000) + 1,
+    "Duration is reasonable");
+});
+
+function run_test() {
+  run_next_test();
+}
new file mode 100644
--- /dev/null
+++ b/toolkit/components/terminator/tests/xpcshell/test_terminator_reload.js
@@ -0,0 +1,85 @@
+/* Any copyright is dedicated to the Public Domain.
+ * http://creativecommons.org/publicdomain/zero/1.0/ */
+
+"use strict";
+
+
+// Test that the Shutdown Terminator reloads durations correctly
+
+const Cu = Components.utils;
+const Cc = Components.classes;
+const Ci = Components.interfaces;
+
+Cu.import("resource://gre/modules/Services.jsm", this);
+Cu.import("resource://gre/modules/osfile.jsm", this);
+Cu.import("resource://gre/modules/Timer.jsm", this);
+Cu.import("resource://gre/modules/Task.jsm", this);
+
+let {Path, File, Constants} = OS;
+
+let PATH;
+
+let HISTOGRAMS = {
+  "quit-application": "SHUTDOWN_PHASE_DURATION_TICKS_QUIT_APPLICATION",
+  "profile-change-teardown": "SHUTDOWN_PHASE_DURATION_TICKS_PROFILE_CHANGE_TEARDOWN",
+  "profile-before-change":  "SHUTDOWN_PHASE_DURATION_TICKS_PROFILE_BEFORE_CHANGE",
+  "xpcom-will-shutdown": "SHUTDOWN_PHASE_DURATION_TICKS_XPCOM_WILL_SHUTDOWN",
+};
+
+add_task(function* init() {
+  do_get_profile();
+  PATH = Path.join(Constants.Path.localProfileDir, "ShutdownDuration.json");
+});
+
+add_task(function* test_reload() {
+  do_print("Forging data");
+  let data = {};
+  let telemetrySnapshots = Services.telemetry.histogramSnapshots;
+  let i = 0;
+  for (let k of Object.keys(HISTOGRAMS)) {
+    let id = HISTOGRAMS[k];
+    data[k] = i++;
+    Assert.equal(telemetrySnapshots[id] || undefined, undefined, "Histogram " + id + " is empty");
+  }
+
+
+  yield OS.File.writeAtomic(PATH, JSON.stringify(data));
+
+  const TOPIC = "shutdown-terminator-telemetry-updated";
+
+  let wait = new Promise(resolve =>
+    Services.obs.addObserver(
+      function observer() {
+        do_print("Telemetry has been updated");
+        Services.obs.removeObserver(observer, TOPIC);
+        resolve();
+      },
+      TOPIC,
+      false));
+
+  do_print("Starting nsTerminatorTelemetry");
+  let tt = Cc["@mozilla.org/toolkit/shutdown-terminator-telemetry;1"].
+    createInstance(Ci.nsIObserver);
+  tt.observe(null, "profile-after-change", "");
+
+  do_print("Waiting until telemetry is updated");
+  // Now wait until Telemetry is updated
+  yield wait;
+
+  telemetrySnapshots = Services.telemetry.histogramSnapshots;
+  for (let k of Object.keys(HISTOGRAMS)) {
+    let id = HISTOGRAMS[k];
+    do_print("Testing histogram " + id);
+    let snapshot = telemetrySnapshots[id];
+    let count = 0;
+    for (let x of snapshot.counts) {
+      count += x;
+    }
+    Assert.equal(count, 1, "We have added one item");
+  }
+
+});
+
+function run_test() {
+  run_next_test();
+}
new file mode 100644
--- /dev/null
+++ b/toolkit/components/terminator/tests/xpcshell/xpcshell.ini
@@ -0,0 +1,7 @@
+[DEFAULT]
+head=
+tail=
+
+[test_terminator_record.js]
+[test_terminator_reload.js]
+skip-if = (os == "android" || appname == "b2g")
--- a/toolkit/crashreporter/test/unit/test_crash_terminator.js
+++ b/toolkit/crashreporter/test/unit/test_crash_terminator.js
@@ -16,23 +16,25 @@ function setup_crash() {
   // doesn't take them into account).
   let terminator = Components.classes["@mozilla.org/toolkit/shutdown-terminator;1"].
     createInstance(Components.interfaces.nsIObserver);
   terminator.observe(null, "profile-after-change", null);
 
   // Inform the terminator that shutdown has started
   // Pick an arbitrary notification
   terminator.observe(null, "xpcom-will-shutdown", null);
+  terminator.observe(null, "profile-before-change", null);
 
   dump("Waiting (actively) for the crash\n");
   while(true) {
     Services.tm.currentThread.processNextEvent(true);
   }
 };
 
 
 function after_crash(mdump, extra) {
-  Assert.equal(extra.ShutdownProgress, "xpcom-will-shutdown");
+  do_print("Crash signature: " + JSON.stringify(extra, null, "\t"));
+  Assert.equal(extra.ShutdownProgress, "profile-before-change");
 }
 
 function run_test() {
   do_crash(setup_crash, after_crash);
 }