Bug 1498942 - Don't attempt to take minidumps of hung content processes when shutting down. r=Ehsan, a=jcristau
authorGabriele Svelto <gsvelto@mozilla.com>
Tue, 13 Nov 2018 15:34:49 +0000
changeset 501281 be6c9c1f12a731ab06f4236848354e8190bb252e
parent 501280 d4ce8b3cdee86a47d1e201bde99f81756b45193a
child 501282 ceba8351e044c15a408e8302d41bafc21e70e528
push id1864
push userffxbld-merge
push dateMon, 03 Dec 2018 15:51:40 +0000
treeherdermozilla-release@f040763d99ad [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersEhsan, jcristau
bugs1498942
milestone64.0
Bug 1498942 - Don't attempt to take minidumps of hung content processes when shutting down. r=Ehsan, a=jcristau This patch also takes the timeout for killing a content process back to its original value and removes a related but long unused field. Differential Revision: https://phabricator.services.mozilla.com/D11739
dom/ipc/ContentParent.cpp
dom/ipc/ContentParent.h
modules/libpref/init/StaticPrefList.h
--- a/dom/ipc/ContentParent.cpp
+++ b/dom/ipc/ContentParent.cpp
@@ -2993,16 +2993,18 @@ NS_INTERFACE_MAP_END
 
 NS_IMETHODIMP
 ContentParent::Observe(nsISupports* aSubject,
                        const char* aTopic,
                        const char16_t* aData)
 {
   if (mSubprocess && (!strcmp(aTopic, "profile-before-change") ||
                       !strcmp(aTopic, "xpcom-shutdown"))) {
+    mShuttingDown = true;
+
     // Make sure that our process will get scheduled.
     ProcessPriorityManager::SetProcessPriority(this,
                                                PROCESS_PRIORITY_FOREGROUND);
 
     // Okay to call ShutDownProcess multiple times.
     ShutDownProcess(SEND_SHUTDOWN_MESSAGE);
     MarkAsDead();
 
@@ -3295,36 +3297,24 @@ ContentParent::ForceKillTimerCallback(ns
   if (PR_GetEnv("XPCSHELL_TEST_PROFILE_DIR")) {
     return;
   }
 
   auto self = static_cast<ContentParent*>(aClosure);
   self->KillHard("ShutDownKill");
 }
 
-// WARNING: aReason appears in telemetry, so any new value passed in requires
-// data review.
 void
-ContentParent::KillHard(const char* aReason)
-{
-  AUTO_PROFILER_LABEL("ContentParent::KillHard", OTHER);
-
-  // On Windows, calling KillHard multiple times causes problems - the
-  // process handle becomes invalid on the first call, causing a second call
-  // to crash our process - more details in bug 890840.
-  if (mCalledKillHard) {
-    return;
-  }
-  mCalledKillHard = true;
-  mForceKillTimer = nullptr;
-
+ContentParent::GeneratePairedMinidump(const char* aReason)
+{
   // We're about to kill the child process associated with this content.
   // Something has gone wrong to get us here, so we generate a minidump
-  // of the parent and child for submission to the crash server.
-  if (mCrashReporter) {
+  // of the parent and child for submission to the crash server unless we're
+  // already shutting down.
+  if (mCrashReporter && !mShuttingDown) {
     // GeneratePairedMinidump creates two minidumps for us - the main
     // one is for the content process we're about to kill, and the other
     // one is for the main browser process. That second one is the extra
     // minidump tagging along, so we have to tell the crash reporter that
     // it exists and is being appended.
     nsAutoCString additionalDumps("browser");
     mCrashReporter->AddAnnotation(
       CrashReporter::Annotation::additional_minidumps, additionalDumps);
@@ -3337,16 +3327,35 @@ ContentParent::KillHard(const char* aRea
                                                 nullptr,
                                                 NS_LITERAL_CSTRING("browser")))
     {
       mCreatedPairedMinidumps = mCrashReporter->FinalizeCrashReport();
     }
 
     Telemetry::Accumulate(Telemetry::SUBPROCESS_KILL_HARD, reason, 1);
   }
+}
+
+// WARNING: aReason appears in telemetry, so any new value passed in requires
+// data review.
+void
+ContentParent::KillHard(const char* aReason)
+{
+  AUTO_PROFILER_LABEL("ContentParent::KillHard", OTHER);
+
+  // On Windows, calling KillHard multiple times causes problems - the
+  // process handle becomes invalid on the first call, causing a second call
+  // to crash our process - more details in bug 890840.
+  if (mCalledKillHard) {
+    return;
+  }
+  mCalledKillHard = true;
+  mForceKillTimer = nullptr;
+
+  GeneratePairedMinidump(aReason);
 
   ProcessHandle otherProcessHandle;
   if (!base::OpenProcessHandle(OtherPid(), &otherProcessHandle)) {
     NS_ERROR("Failed to open child process when attempting kill.");
     return;
   }
 
   if (!KillProcess(otherProcessHandle, base::PROCESS_END_KILLED_BY_USER,
--- a/dom/ipc/ContentParent.h
+++ b/dom/ipc/ContentParent.h
@@ -785,16 +785,19 @@ private:
 
   // Launch the subprocess and associated initialization.
   // Returns false if the process fails to start.
   bool LaunchSubprocess(hal::ProcessPriority aInitialPriority = hal::PROCESS_PRIORITY_FOREGROUND);
 
   // Common initialization after sub process launch.
   void InitInternal(ProcessPriority aPriority);
 
+  // Generate a minidump for the child process and one for the main process
+  void GeneratePairedMinidump(const char* aReason);
+
   virtual ~ContentParent();
 
   void Init();
 
   // Some information could be sent to content very early, it
   // should be send from this function. This function should only be
   // called after the process has been transformed to browser.
   void ForwardKnownInfo();
@@ -1295,18 +1298,16 @@ private:
   int32_t mGeolocationWatchID;
 
   // This contains the id for the JS plugin (@see nsFakePluginTag) if this is the
   // ContentParent for a process containing iframes for that JS plugin.
   // If this is not a ContentParent for a JS plugin then it contains the value
   // nsFakePluginTag::NOT_JSPLUGIN.
   int32_t mJSPluginID;
 
-  nsCString mKillHardAnnotation;
-
   // After we initiate shutdown, we also start a timer to ensure
   // that even content processes that are 100% blocked (say from
   // SIGSTOP), are still killed eventually.  This task enforces that
   // timer.
   nsCOMPtr<nsITimer> mForceKillTimer;
   // How many tabs we're waiting to finish their destruction
   // sequence.  Precisely, how many TabParents have called
   // NotifyTabDestroying() but not called NotifyTabDestroyed().
@@ -1314,16 +1315,17 @@ private:
   // True only while this process is in "good health" and may be used for
   // new remote tabs.
   bool mIsAvailable;
   // True only while remote content is being actively used from this process.
   // After mIsAlive goes to false, some previously scheduled IPC traffic may
   // still pass through.
   bool mIsAlive;
 
+  bool mShuttingDown;
   bool mIsForBrowser;
 
   // Whether this process is recording or replaying its execution, and any
   // associated recording file.
   RecordReplayState mRecordReplayState;
   nsString mRecordingFile;
 
   // When recording or replaying, the child process is a middleman. This vector
--- a/modules/libpref/init/StaticPrefList.h
+++ b/modules/libpref/init/StaticPrefList.h
@@ -179,17 +179,17 @@ VARCACHE_PREF(
 )
 #undef PREF_VALUE
 
 // How long a content process can take before closing its IPC channel
 // after shutdown is initiated.  If the process exceeds the timeout,
 // we fear the worst and kill it.
 #if !defined(DEBUG) && !defined(MOZ_ASAN) && !defined(MOZ_VALGRIND) && \
     !defined(MOZ_TSAN)
-# define PREF_VALUE 10
+# define PREF_VALUE 5
 #else
 # define PREF_VALUE 0
 #endif
 VARCACHE_PREF(
   "dom.ipc.tabs.shutdownTimeoutSecs",
    dom_ipc_tabs_shutdownTimeoutSecs,
   RelaxedAtomicUint32, PREF_VALUE
 )