Bug 1497057 - Remove ability to restart replaying children, r=mccr8.
authorBrian Hackett <bhackett1024@gmail.com>
Tue, 09 Oct 2018 14:24:14 -1000
changeset 498948 31c68d4d5e8b3e117d19795debe841a968797441
parent 498947 e36764125483b541b8647df406ef29ac2eb89876
child 498949 eff92a00f2d86994ecc2fd5c80e13e958b7b45b4
push id1864
push userffxbld-merge
push dateMon, 03 Dec 2018 15:51:40 +0000
treeherdermozilla-release@f040763d99ad [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersmccr8
bugs1497057
milestone64.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1497057 - Remove ability to restart replaying children, r=mccr8.
dom/ipc/ContentParent.cpp
dom/ipc/ContentParent.h
dom/ipc/PContent.ipdl
toolkit/recordreplay/ipc/ChildProcess.cpp
toolkit/recordreplay/ipc/ParentInternal.h
--- a/dom/ipc/ContentParent.cpp
+++ b/dom/ipc/ContentParent.cpp
@@ -2097,31 +2097,16 @@ ContentParent::RecvCreateReplayingProces
   mReplayingChildren[aChannelId] = new GeckoChildProcessHost(GeckoProcessType_Content);
   if (!mReplayingChildren[aChannelId]->LaunchAndWaitForProcessHandle(extraArgs)) {
     return IPC_FAIL_NO_REASON(this);
   }
 
   return IPC_OK();
 }
 
-mozilla::ipc::IPCResult
-ContentParent::RecvTerminateReplayingProcess(const uint32_t& aChannelId)
-{
-  // We should only get this message from the child if it is recording or replaying.
-  if (!this->IsRecordingOrReplaying()) {
-    return IPC_FAIL_NO_REASON(this);
-  }
-
-  if (aChannelId < mReplayingChildren.length() && mReplayingChildren[aChannelId]) {
-    DelayedDeleteSubprocess(mReplayingChildren[aChannelId]);
-    mReplayingChildren[aChannelId] = nullptr;
-  }
-  return IPC_OK();
-}
-
 jsipc::CPOWManager*
 ContentParent::GetCPOWManager()
 {
   if (PJavaScriptParent* p = LoneManagedOrNullAsserts(ManagedPJavaScriptParent())) {
     return CPOWManagerFor(p);
   }
   return nullptr;
 }
--- a/dom/ipc/ContentParent.h
+++ b/dom/ipc/ContentParent.h
@@ -302,17 +302,16 @@ public:
                                                          bool* aIsForBrowser) override;
 
   virtual mozilla::ipc::IPCResult RecvBridgeToChildProcess(const ContentParentId& aCpId,
                                                            Endpoint<PContentBridgeParent>* aEndpoint) override;
 
   virtual mozilla::ipc::IPCResult RecvOpenRecordReplayChannel(const uint32_t& channelId,
                                                               FileDescriptor* connection) override;
   virtual mozilla::ipc::IPCResult RecvCreateReplayingProcess(const uint32_t& aChannelId) override;
-  virtual mozilla::ipc::IPCResult RecvTerminateReplayingProcess(const uint32_t& aChannelId) override;
 
   virtual mozilla::ipc::IPCResult RecvCreateGMPService() override;
 
   virtual mozilla::ipc::IPCResult RecvLoadPlugin(const uint32_t& aPluginId, nsresult* aRv,
                                                  uint32_t* aRunID,
                                                  Endpoint<PPluginModuleParent>* aEndpoint) override;
 
   virtual mozilla::ipc::IPCResult RecvMaybeReloadPlugins() override;
--- a/dom/ipc/PContent.ipdl
+++ b/dom/ipc/PContent.ipdl
@@ -737,17 +737,16 @@ parent:
                             TabId tabId)
         returns (ContentParentId cpId, bool isForBrowser);
     sync BridgeToChildProcess(ContentParentId cpId)
         returns (Endpoint<PContentBridgeParent> endpoint);
 
     sync OpenRecordReplayChannel(uint32_t channelId)
         returns (FileDescriptor connection);
     async CreateReplayingProcess(uint32_t channelId);
-    async TerminateReplayingProcess(uint32_t channelId);
 
     async CreateGMPService();
 
     async InitStreamFilter(uint64_t channelId, nsString addonId)
         returns (Endpoint<PStreamFilterChild> aEndpoint);
 
     /**
      * This call connects the content process to a plugin process. This call
--- a/toolkit/recordreplay/ipc/ChildProcess.cpp
+++ b/toolkit/recordreplay/ipc/ChildProcess.cpp
@@ -18,45 +18,40 @@ namespace parent {
 static IntroductionMessage* gIntroductionMessage;
 
 // How many channels have been constructed so far.
 static size_t gNumChannels;
 
 // Whether children might be debugged and should not be treated as hung.
 static bool gChildrenAreDebugging;
 
-// Whether we are allowed to restart crashed/hung child processes.
-static bool gRestartEnabled;
-
 /* static */ void
 ChildProcessInfo::SetIntroductionMessage(IntroductionMessage* aMessage)
 {
   gIntroductionMessage = aMessage;
 }
 
 ChildProcessInfo::ChildProcessInfo(UniquePtr<ChildRole> aRole,
                                    const Maybe<RecordingProcessData>& aRecordingProcessData)
   : mChannel(nullptr)
   , mRecording(aRecordingProcessData.isSome())
   , mRecoveryStage(RecoveryStage::None)
   , mPaused(false)
   , mPausedMessage(nullptr)
   , mLastCheckpoint(CheckpointId::Invalid)
   , mNumRecoveredMessages(0)
-  , mNumRestarts(0)
   , mRole(std::move(aRole))
   , mPauseNeeded(false)
 {
   MOZ_RELEASE_ASSERT(NS_IsMainThread());
 
   static bool gFirst = false;
   if (!gFirst) {
     gFirst = true;
     gChildrenAreDebugging = !!getenv("WAIT_AT_START");
-    gRestartEnabled = !getenv("NO_RESTARTS");
   }
 
   mRole->SetProcess(this);
 
   LaunchSubprocess(aRecordingProcessData);
 
   // Replaying processes always save the first checkpoint, if saving
   // checkpoints is allowed. This is currently assumed by the rewinding
@@ -201,17 +196,17 @@ ChildProcessInfo::OnIncomingMessage(size
   // Ignore messages from channels for subprocesses we terminated already.
   if (aChannelId != mChannel->GetId()) {
     return;
   }
 
   // Always handle fatal errors in the same way.
   if (aMsg.mType == MessageType::FatalError) {
     const FatalErrorMessage& nmsg = static_cast<const FatalErrorMessage&>(aMsg);
-    AttemptRestart(nmsg.Error());
+    OnCrash(nmsg.Error());
     return;
   }
 
   mLastMessageTime = TimeStamp::Now();
 
   if (IsRecovering()) {
     OnIncomingRecoveryMessage(aMsg);
     return;
@@ -518,78 +513,27 @@ ChildProcessInfo::LaunchSubprocess(const
 
   // The child should send us a HitCheckpoint with an invalid ID to pause.
   WaitUntilPaused();
 
   MOZ_RELEASE_ASSERT(gIntroductionMessage);
   SendMessage(*gIntroductionMessage);
 }
 
-///////////////////////////////////////////////////////////////////////////////
-// Recovering Crashed / Hung Children
-///////////////////////////////////////////////////////////////////////////////
-
-// The number of times we will restart a process before giving up.
-static const size_t MaxRestarts = 5;
-
-bool
-ChildProcessInfo::CanRestart()
-{
-  return gRestartEnabled
-      && !IsRecording()
-      && !IsPaused()
-      && !IsRecovering()
-      && mNumRestarts < MaxRestarts;
-}
-
 void
-ChildProcessInfo::AttemptRestart(const char* aWhy)
+ChildProcessInfo::OnCrash(const char* aWhy)
 {
   MOZ_RELEASE_ASSERT(NS_IsMainThread());
 
-  PrintSpew("Warning: Child process died [%d]: %s\n", (int) GetId(), aWhy);
-
-  if (!CanRestart()) {
-    CrashReporter::AnnotateCrashReport(CrashReporter::Annotation::RecordReplayError,
-                                       nsAutoCString(aWhy));
-    Shutdown();
-  }
-
-  mNumRestarts++;
-
-  dom::ContentChild::GetSingleton()->SendTerminateReplayingProcess(mChannel->GetId());
-
-  bool newPaused = mPaused;
-  Message* newPausedMessage = mPausedMessage;
-
-  mPaused = false;
-  mPausedMessage = nullptr;
-
-  size_t newLastCheckpoint = mLastCheckpoint;
-  mLastCheckpoint = CheckpointId::Invalid;
-
-  InfallibleVector<Message*> newMessages;
-  newMessages.append(mMessages.begin(), mMessages.length());
-  mMessages.clear();
-
-  InfallibleVector<size_t> newShouldSaveCheckpoints;
-  newShouldSaveCheckpoints.append(mShouldSaveCheckpoints.begin(), mShouldSaveCheckpoints.length());
-  mShouldSaveCheckpoints.clear();
-
-  LaunchSubprocess(Nothing());
-
-  // Disallow child processes from intentionally crashing after restarting.
-  SendMessage(SetAllowIntentionalCrashesMessage(false));
-
-  for (size_t checkpoint : newShouldSaveCheckpoints) {
-    SendMessage(SetSaveCheckpointMessage(checkpoint, true));
-  }
-
-  Recover(newPaused, newPausedMessage, newLastCheckpoint,
-          newMessages.begin(), newMessages.length());
+  // If a child process crashes or hangs then annotate the crash report and
+  // shut down cleanly so that we don't mask the report with our own crash.
+  // We want the crash to happen quickly so the user doesn't get a hanged tab.
+  CrashReporter::AnnotateCrashReport(CrashReporter::Annotation::RecordReplayError,
+                                     nsAutoCString(aWhy));
+  Shutdown();
 }
 
 ///////////////////////////////////////////////////////////////////////////////
 // Handling Channel Messages
 ///////////////////////////////////////////////////////////////////////////////
 
 // When messages are received from child processes, we want their handler to
 // execute on the main thread. The main thread might be blocked in WaitUntil,
@@ -661,17 +605,17 @@ ChildProcessInfo::WaitUntil(const std::f
             // Use SendMessageRaw to avoid problems if we are recovering.
             CrashReporter::AnnotateCrashReport(CrashReporter::Annotation::RecordReplayHang,
                                                true);
             SendMessageRaw(TerminateMessage());
             sentTerminateMessage = true;
           } else {
             // The child is still non-responsive after sending the terminate
             // message, fail without producing a minidump.
-            AttemptRestart("Child process non-responsive");
+            OnCrash("Child process non-responsive");
           }
         }
         gMonitor->WaitUntil(deadline);
       }
     }
   }
 }
 
--- a/toolkit/recordreplay/ipc/ParentInternal.h
+++ b/toolkit/recordreplay/ipc/ParentInternal.h
@@ -236,19 +236,16 @@ class ChildProcessInfo
   // will need to be sent to another process to recover it to the same state as
   // this process.
   InfallibleVector<Message*> mMessages;
 
   // In the PlayingMessages recovery stage, how much of mMessages has been sent
   // to the process.
   size_t mNumRecoveredMessages;
 
-  // The number of times we have restarted this process.
-  size_t mNumRestarts;
-
   // Current role of this process.
   UniquePtr<ChildRole> mRole;
 
   // Unsorted list of the checkpoints the process has been instructed to save.
   // Those at or before the most recent checkpoint will have been saved.
   InfallibleVector<size_t> mShouldSaveCheckpoints;
 
   // Sorted major checkpoints for this process. See ParentIPC.cpp.
@@ -271,18 +268,17 @@ class ChildProcessInfo
     BeforeLastCheckpoint,
     AfterLastCheckpoint
   };
   Disposition GetDisposition();
 
   void Recover(bool aPaused, Message* aPausedMessage, size_t aLastCheckpoint,
                Message** aMessages, size_t aNumMessages);
 
-  bool CanRestart();
-  void AttemptRestart(const char* aWhy);
+  void OnCrash(const char* aWhy);
   void LaunchSubprocess(const Maybe<RecordingProcessData>& aRecordingProcessData);
 
 public:
   ChildProcessInfo(UniquePtr<ChildRole> aRole,
                    const Maybe<RecordingProcessData>& aRecordingProcessData);
   ~ChildProcessInfo();
 
   ChildRole* Role() { return mRole.get(); }