Bug 1497057 - Remove ability to restart replaying children, r=mccr8.
authorBrian Hackett <bhackett1024@gmail.com>
Tue, 09 Oct 2018 14:24:14 -1000
changeset 488786 31c68d4d5e8b3e117d19795debe841a968797441
parent 488785 e36764125483b541b8647df406ef29ac2eb89876
child 488787 eff92a00f2d86994ecc2fd5c80e13e958b7b45b4
push id246
push userfmarier@mozilla.com
push dateSat, 13 Oct 2018 00:15:40 +0000
reviewersmccr8
bugs1497057
milestone64.0a1
Bug 1497057 - Remove ability to restart replaying children, r=mccr8.
dom/ipc/ContentParent.cpp
dom/ipc/ContentParent.h
dom/ipc/PContent.ipdl
toolkit/recordreplay/ipc/ChildProcess.cpp
toolkit/recordreplay/ipc/ParentInternal.h
--- a/dom/ipc/ContentParent.cpp
+++ b/dom/ipc/ContentParent.cpp
@@ -2097,31 +2097,16 @@ ContentParent::RecvCreateReplayingProces
   mReplayingChildren[aChannelId] = new GeckoChildProcessHost(GeckoProcessType_Content);
   if (!mReplayingChildren[aChannelId]->LaunchAndWaitForProcessHandle(extraArgs)) {
     return IPC_FAIL_NO_REASON(this);
   }
 
   return IPC_OK();
 }
 
-mozilla::ipc::IPCResult
-ContentParent::RecvTerminateReplayingProcess(const uint32_t& aChannelId)
-{
-  // We should only get this message from the child if it is recording or replaying.
-  if (!this->IsRecordingOrReplaying()) {
-    return IPC_FAIL_NO_REASON(this);
-  }
-
-  if (aChannelId < mReplayingChildren.length() && mReplayingChildren[aChannelId]) {
-    DelayedDeleteSubprocess(mReplayingChildren[aChannelId]);
-    mReplayingChildren[aChannelId] = nullptr;
-  }
-  return IPC_OK();
-}
-
 jsipc::CPOWManager*
 ContentParent::GetCPOWManager()
 {
   if (PJavaScriptParent* p = LoneManagedOrNullAsserts(ManagedPJavaScriptParent())) {
     return CPOWManagerFor(p);
   }
   return nullptr;
 }
--- a/dom/ipc/ContentParent.h
+++ b/dom/ipc/ContentParent.h
@@ -302,17 +302,16 @@ public:
                                                          bool* aIsForBrowser) override;
 
   virtual mozilla::ipc::IPCResult RecvBridgeToChildProcess(const ContentParentId& aCpId,
                                                            Endpoint<PContentBridgeParent>* aEndpoint) override;
 
   virtual mozilla::ipc::IPCResult RecvOpenRecordReplayChannel(const uint32_t& channelId,
                                                               FileDescriptor* connection) override;
   virtual mozilla::ipc::IPCResult RecvCreateReplayingProcess(const uint32_t& aChannelId) override;
-  virtual mozilla::ipc::IPCResult RecvTerminateReplayingProcess(const uint32_t& aChannelId) override;
 
   virtual mozilla::ipc::IPCResult RecvCreateGMPService() override;
 
   virtual mozilla::ipc::IPCResult RecvLoadPlugin(const uint32_t& aPluginId, nsresult* aRv,
                                                  uint32_t* aRunID,
                                                  Endpoint<PPluginModuleParent>* aEndpoint) override;
 
   virtual mozilla::ipc::IPCResult RecvMaybeReloadPlugins() override;
--- a/dom/ipc/PContent.ipdl
+++ b/dom/ipc/PContent.ipdl
@@ -737,17 +737,16 @@ parent:
                             TabId tabId)
         returns (ContentParentId cpId, bool isForBrowser);
     sync BridgeToChildProcess(ContentParentId cpId)
         returns (Endpoint<PContentBridgeParent> endpoint);
 
     sync OpenRecordReplayChannel(uint32_t channelId)
         returns (FileDescriptor connection);
     async CreateReplayingProcess(uint32_t channelId);
-    async TerminateReplayingProcess(uint32_t channelId);
 
     async CreateGMPService();
 
     async InitStreamFilter(uint64_t channelId, nsString addonId)
         returns (Endpoint<PStreamFilterChild> aEndpoint);
 
     /**
      * This call connects the content process to a plugin process. This call
--- a/toolkit/recordreplay/ipc/ChildProcess.cpp
+++ b/toolkit/recordreplay/ipc/ChildProcess.cpp
@@ -18,45 +18,40 @@ namespace parent {
 static IntroductionMessage* gIntroductionMessage;
 
 // How many channels have been constructed so far.
 static size_t gNumChannels;
 
 // Whether children might be debugged and should not be treated as hung.
 static bool gChildrenAreDebugging;
 
-// Whether we are allowed to restart crashed/hung child processes.
-static bool gRestartEnabled;
-
 /* static */ void
 ChildProcessInfo::SetIntroductionMessage(IntroductionMessage* aMessage)
 {
   gIntroductionMessage = aMessage;
 }
 
 ChildProcessInfo::ChildProcessInfo(UniquePtr<ChildRole> aRole,
                                    const Maybe<RecordingProcessData>& aRecordingProcessData)
   : mChannel(nullptr)
   , mRecording(aRecordingProcessData.isSome())
   , mRecoveryStage(RecoveryStage::None)
   , mPaused(false)
   , mPausedMessage(nullptr)
   , mLastCheckpoint(CheckpointId::Invalid)
   , mNumRecoveredMessages(0)
-  , mNumRestarts(0)
   , mRole(std::move(aRole))
   , mPauseNeeded(false)
 {
   MOZ_RELEASE_ASSERT(NS_IsMainThread());
 
   static bool gFirst = false;
   if (!gFirst) {
     gFirst = true;
     gChildrenAreDebugging = !!getenv("WAIT_AT_START");
-    gRestartEnabled = !getenv("NO_RESTARTS");
   }
 
   mRole->SetProcess(this);
 
   LaunchSubprocess(aRecordingProcessData);
 
   // Replaying processes always save the first checkpoint, if saving
   // checkpoints is allowed. This is currently assumed by the rewinding
@@ -201,17 +196,17 @@ ChildProcessInfo::OnIncomingMessage(size
   // Ignore messages from channels for subprocesses we terminated already.
   if (aChannelId != mChannel->GetId()) {
     return;
   }
 
   // Always handle fatal errors in the same way.
   if (aMsg.mType == MessageType::FatalError) {
     const FatalErrorMessage& nmsg = static_cast<const FatalErrorMessage&>(aMsg);
-    AttemptRestart(nmsg.Error());
+    OnCrash(nmsg.Error());
     return;
   }
 
   mLastMessageTime = TimeStamp::Now();
 
   if (IsRecovering()) {
     OnIncomingRecoveryMessage(aMsg);
     return;
@@ -518,78 +513,27 @@ ChildProcessInfo::LaunchSubprocess(const
 
   // The child should send us a HitCheckpoint with an invalid ID to pause.
   WaitUntilPaused();
 
   MOZ_RELEASE_ASSERT(gIntroductionMessage);
   SendMessage(*gIntroductionMessage);
 }
 
-///////////////////////////////////////////////////////////////////////////////
-// Recovering Crashed / Hung Children
-///////////////////////////////////////////////////////////////////////////////
-
-// The number of times we will restart a process before giving up.
-static const size_t MaxRestarts = 5;
-
-bool
-ChildProcessInfo::CanRestart()
-{
-  return gRestartEnabled
-      && !IsRecording()
-      && !IsPaused()
-      && !IsRecovering()
-      && mNumRestarts < MaxRestarts;
-}
-
 void
-ChildProcessInfo::AttemptRestart(const char* aWhy)
+ChildProcessInfo::OnCrash(const char* aWhy)
 {
   MOZ_RELEASE_ASSERT(NS_IsMainThread());
 
-  PrintSpew("Warning: Child process died [%d]: %s\n", (int) GetId(), aWhy);
-
-  if (!CanRestart()) {
-    CrashReporter::AnnotateCrashReport(CrashReporter::Annotation::RecordReplayError,
-                                       nsAutoCString(aWhy));
-    Shutdown();
-  }
-
-  mNumRestarts++;
-
-  dom::ContentChild::GetSingleton()->SendTerminateReplayingProcess(mChannel->GetId());
-
-  bool newPaused = mPaused;
-  Message* newPausedMessage = mPausedMessage;
-
-  mPaused = false;
-  mPausedMessage = nullptr;
-
-  size_t newLastCheckpoint = mLastCheckpoint;
-  mLastCheckpoint = CheckpointId::Invalid;
-
-  InfallibleVector<Message*> newMessages;
-  newMessages.append(mMessages.begin(), mMessages.length());
-  mMessages.clear();
-
-  InfallibleVector<size_t> newShouldSaveCheckpoints;
-  newShouldSaveCheckpoints.append(mShouldSaveCheckpoints.begin(), mShouldSaveCheckpoints.length());
-  mShouldSaveCheckpoints.clear();
-
-  LaunchSubprocess(Nothing());
-
-  // Disallow child processes from intentionally crashing after restarting.
-  SendMessage(SetAllowIntentionalCrashesMessage(false));
-
-  for (size_t checkpoint : newShouldSaveCheckpoints) {
-    SendMessage(SetSaveCheckpointMessage(checkpoint, true));
-  }
-
-  Recover(newPaused, newPausedMessage, newLastCheckpoint,
-          newMessages.begin(), newMessages.length());
+  // If a child process crashes or hangs then annotate the crash report and
+  // shut down cleanly so that we don't mask the report with our own crash.
+  // We want the crash to happen quickly so the user doesn't get a hanged tab.
+  CrashReporter::AnnotateCrashReport(CrashReporter::Annotation::RecordReplayError,
+                                     nsAutoCString(aWhy));
+  Shutdown();
 }
 
 ///////////////////////////////////////////////////////////////////////////////
 // Handling Channel Messages
 ///////////////////////////////////////////////////////////////////////////////
 
 // When messages are received from child processes, we want their handler to
 // execute on the main thread. The main thread might be blocked in WaitUntil,
@@ -661,17 +605,17 @@ ChildProcessInfo::WaitUntil(const std::f
             // Use SendMessageRaw to avoid problems if we are recovering.
             CrashReporter::AnnotateCrashReport(CrashReporter::Annotation::RecordReplayHang,
                                                true);
             SendMessageRaw(TerminateMessage());
             sentTerminateMessage = true;
           } else {
             // The child is still non-responsive after sending the terminate
             // message, fail without producing a minidump.
-            AttemptRestart("Child process non-responsive");
+            OnCrash("Child process non-responsive");
           }
         }
         gMonitor->WaitUntil(deadline);
       }
     }
   }
 }
 
--- a/toolkit/recordreplay/ipc/ParentInternal.h
+++ b/toolkit/recordreplay/ipc/ParentInternal.h
@@ -236,19 +236,16 @@ class ChildProcessInfo
   // will need to be sent to another process to recover it to the same state as
   // this process.
   InfallibleVector<Message*> mMessages;
 
   // In the PlayingMessages recovery stage, how much of mMessages has been sent
   // to the process.
   size_t mNumRecoveredMessages;
 
-  // The number of times we have restarted this process.
-  size_t mNumRestarts;
-
   // Current role of this process.
   UniquePtr<ChildRole> mRole;
 
   // Unsorted list of the checkpoints the process has been instructed to save.
   // Those at or before the most recent checkpoint will have been saved.
   InfallibleVector<size_t> mShouldSaveCheckpoints;
 
   // Sorted major checkpoints for this process. See ParentIPC.cpp.
@@ -271,18 +268,17 @@ class ChildProcessInfo
     BeforeLastCheckpoint,
     AfterLastCheckpoint
   };
   Disposition GetDisposition();
 
   void Recover(bool aPaused, Message* aPausedMessage, size_t aLastCheckpoint,
                Message** aMessages, size_t aNumMessages);
 
-  bool CanRestart();
-  void AttemptRestart(const char* aWhy);
+  void OnCrash(const char* aWhy);
   void LaunchSubprocess(const Maybe<RecordingProcessData>& aRecordingProcessData);
 
 public:
   ChildProcessInfo(UniquePtr<ChildRole> aRole,
                    const Maybe<RecordingProcessData>& aRecordingProcessData);
   ~ChildProcessInfo();
 
   ChildRole* Role() { return mRole.get(); }