Bug 1504372 - Always generate minidumps for replaying child crashes, r=mccr8.
authorBrian Hackett <bhackett1024@gmail.com>
Fri, 02 Nov 2018 14:30:39 -1000
changeset 444776 b29555ff89f8c15d967311e7ba9d3f78ddf377d4
parent 444775 d389be1e24391c06947a209321409534a7591c28
child 444777 825a721d809360ac7a198259e7df28c94915fbc2
push id35002
push userncsoregi@mozilla.com
push dateWed, 07 Nov 2018 09:53:22 +0000
treeherdermozilla-central@070757a0160c [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersmccr8
bugs1504372
milestone65.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1504372 - Always generate minidumps for replaying child crashes, r=mccr8.
toolkit/recordreplay/ipc/Channel.cpp
toolkit/recordreplay/ipc/Channel.h
toolkit/recordreplay/ipc/ChildIPC.cpp
toolkit/recordreplay/ipc/ChildProcess.cpp
toolkit/recordreplay/ipc/ParentInternal.h
--- a/toolkit/recordreplay/ipc/Channel.cpp
+++ b/toolkit/recordreplay/ipc/Channel.cpp
@@ -146,16 +146,17 @@ Channel::ThreadMain(void* aChannelArg)
     channel->mHandler(msg);
   }
 }
 
 void
 Channel::SendMessage(const Message& aMsg)
 {
   MOZ_RELEASE_ASSERT(NS_IsMainThread() ||
+                     aMsg.mType == MessageType::BeginFatalError ||
                      aMsg.mType == MessageType::FatalError ||
                      aMsg.mType == MessageType::MiddlemanCallRequest);
 
   // Block until the channel is initialized.
   if (!mInitialized) {
     MonitorAutoLock lock(mMonitor);
     while (!mInitialized) {
       mMonitor.Wait();
--- a/toolkit/recordreplay/ipc/Channel.h
+++ b/toolkit/recordreplay/ipc/Channel.h
@@ -104,18 +104,23 @@ namespace recordreplay {
   /* Messages sent from the child process to the middleman. */ \
                                                                \
   /* Sent in response to a FlushRecording, telling the middleman that the flush */ \
   /* has finished. */                                          \
   _Macro(RecordingFlushed)                                     \
                                                                \
   /* A critical error occurred and execution cannot continue. The child will */ \
   /* stop executing after sending this message and will wait to be terminated. */ \
+  /* A minidump for the child has been generated. */           \
   _Macro(FatalError)                                           \
                                                                \
+  /* Sent when a fatal error has occurred, but before the minidump has been */ \
+  /* generated. */                                             \
+  _Macro(BeginFatalError)                                      \
+                                                               \
   /* The child's graphics were repainted. */                   \
   _Macro(Paint)                                                \
                                                                \
   /* Notify the middleman that a checkpoint or breakpoint was hit. */ \
   /* The child will pause after sending these messages. */     \
   _Macro(HitCheckpoint)                                        \
   _Macro(HitBreakpoint)                                        \
                                                                \
@@ -366,16 +371,18 @@ struct FatalErrorMessage : public Messag
 {
   explicit FatalErrorMessage(uint32_t aSize)
     : Message(MessageType::FatalError, aSize)
   {}
 
   const char* Error() const { return Data<FatalErrorMessage, const char>(); }
 };
 
+typedef EmptyMessage<MessageType::BeginFatalError> BeginFatalErrorMessage;
+
 // The format for graphics data which will be sent to the middleman process.
 // This needs to match the format expected for canvas image data, to avoid
 // transforming the data before rendering it in the middleman process.
 static const gfx::SurfaceFormat gSurfaceFormat = gfx::SurfaceFormat::R8G8B8X8;
 
 struct PaintMessage : public Message
 {
   // Checkpoint whose state is being painted.
--- a/toolkit/recordreplay/ipc/ChildIPC.cpp
+++ b/toolkit/recordreplay/ipc/ChildIPC.cpp
@@ -103,18 +103,19 @@ ChannelMessageHandler(Message* aMsg)
     // processes. When sent to a recording process (which the middleman manages
     // directly) they signal that a clean shutdown is needed, while when sent
     // to a replaying process (which the UI process manages) they signal that
     // the process should crash, since it seems to be hanged.
     if (IsRecording()) {
       PrintSpew("Terminate message received, exiting...\n");
       _exit(0);
     } else {
-      MOZ_CRASH("Hanged replaying process");
+      ReportFatalError(Nothing(), "Hung replaying process");
     }
+    break;
   }
   case MessageType::SetIsActive: {
     const SetIsActiveMessage& nmsg = (const SetIsActiveMessage&) *aMsg;
     PauseMainThreadAndInvokeCallback([=]() { SetIsActiveChild(nmsg.mActive); });
     break;
   }
   case MessageType::SetAllowIntentionalCrashes: {
     const SetAllowIntentionalCrashesMessage& nmsg = (const SetAllowIntentionalCrashesMessage&) *aMsg;
@@ -345,16 +346,20 @@ CreateCheckpoint()
   if (!HasDivergedFromRecording()) {
     NewCheckpoint(/* aTemporary = */ false);
   }
 }
 
 void
 ReportFatalError(const Maybe<MinidumpInfo>& aMinidump, const char* aFormat, ...)
 {
+  // Notify the middleman that we are crashing and are going to try to write a
+  // minidump.
+  gChannel->SendMessage(BeginFatalErrorMessage());
+
   // Unprotect any memory which might be written while producing the minidump.
   UnrecoverableSnapshotFailure();
 
   AutoEnsurePassThroughThreadEvents pt;
 
 #ifdef MOZ_CRASHREPORTER
   MinidumpInfo info = aMinidump.isSome()
                       ? aMinidump.ref()
--- a/toolkit/recordreplay/ipc/ChildProcess.cpp
+++ b/toolkit/recordreplay/ipc/ChildProcess.cpp
@@ -35,16 +35,18 @@ ChildProcessInfo::ChildProcessInfo(Uniqu
   , mRecording(aRecordingProcessData.isSome())
   , mRecoveryStage(RecoveryStage::None)
   , mPaused(false)
   , mPausedMessage(nullptr)
   , mLastCheckpoint(CheckpointId::Invalid)
   , mNumRecoveredMessages(0)
   , mRole(std::move(aRole))
   , mPauseNeeded(false)
+  , mHasBegunFatalError(false)
+  , mHasFatalError(false)
 {
   MOZ_RELEASE_ASSERT(NS_IsMainThread());
 
   static bool gFirst = false;
   if (!gFirst) {
     gFirst = true;
     gChildrenAreDebugging = !!getenv("WAIT_AT_START");
   }
@@ -194,17 +196,21 @@ ChildProcessInfo::OnIncomingMessage(size
   MOZ_RELEASE_ASSERT(NS_IsMainThread());
 
   // Ignore messages from channels for subprocesses we terminated already.
   if (aChannelId != mChannel->GetId()) {
     return;
   }
 
   // Always handle fatal errors in the same way.
-  if (aMsg.mType == MessageType::FatalError) {
+  if (aMsg.mType == MessageType::BeginFatalError) {
+    mHasBegunFatalError = true;
+    return;
+  } else if (aMsg.mType == MessageType::FatalError) {
+    mHasFatalError = true;
     const FatalErrorMessage& nmsg = static_cast<const FatalErrorMessage&>(aMsg);
     OnCrash(nmsg.Error());
     return;
   }
 
   mLastMessageTime = TimeStamp::Now();
 
   if (IsRecovering()) {
@@ -522,22 +528,33 @@ ChildProcessInfo::LaunchSubprocess(const
   SendMessage(*gIntroductionMessage);
 }
 
 void
 ChildProcessInfo::OnCrash(const char* aWhy)
 {
   MOZ_RELEASE_ASSERT(NS_IsMainThread());
 
-  // If a child process crashes or hangs then annotate the crash report and
-  // shut down cleanly so that we don't mask the report with our own crash.
-  // We want the crash to happen quickly so the user doesn't get a hanged tab.
+  // If a child process crashes or hangs then annotate the crash report.
   CrashReporter::AnnotateCrashReport(CrashReporter::Annotation::RecordReplayError,
                                      nsAutoCString(aWhy));
-  Shutdown();
+
+  // If we received a FatalError message then the child generated a minidump.
+  // Shut down cleanly so that we don't mask the report with our own crash.
+  if (mHasFatalError) {
+    Shutdown();
+  }
+
+  // Indicate when we crash if the child tried to send us a fatal error message
+  // but had a problem either unprotecting system memory or generating the
+  // minidump.
+  MOZ_RELEASE_ASSERT(!mHasBegunFatalError);
+
+  // The child crashed without producing a minidump, produce one ourselves.
+  MOZ_CRASH("Unexpected child crash");
 }
 
 ///////////////////////////////////////////////////////////////////////////////
 // Handling Channel Messages
 ///////////////////////////////////////////////////////////////////////////////
 
 // When messages are received from child processes, we want their handler to
 // execute on the main thread. The main thread might be blocked in WaitUntil,
@@ -608,17 +625,17 @@ ChildProcessInfo::WaitUntil(const std::f
             // wait another HangSeconds before hitting the restart case below.
             // Use SendMessageRaw to avoid problems if we are recovering.
             CrashReporter::AnnotateCrashReport(CrashReporter::Annotation::RecordReplayHang,
                                                true);
             SendMessageRaw(TerminateMessage());
             sentTerminateMessage = true;
           } else {
             // The child is still non-responsive after sending the terminate
-            // message, fail without producing a minidump.
+            // message.
             OnCrash("Child process non-responsive");
           }
         }
         gMonitor->WaitUntil(deadline);
       }
     }
   }
 }
--- a/toolkit/recordreplay/ipc/ParentInternal.h
+++ b/toolkit/recordreplay/ipc/ParentInternal.h
@@ -269,16 +269,21 @@ class ChildProcessInfo
   InfallibleVector<size_t> mShouldSaveCheckpoints;
 
   // Sorted major checkpoints for this process. See ParentIPC.cpp.
   InfallibleVector<size_t> mMajorCheckpoints;
 
   // Whether we need this child to pause while the recording is updated.
   bool mPauseNeeded;
 
+  // Flags for whether we have received messages from the child indicating it
+  // is crashing.
+  bool mHasBegunFatalError;
+  bool mHasFatalError;
+
   void OnIncomingMessage(size_t aChannelId, const Message& aMsg);
   void OnIncomingRecoveryMessage(const Message& aMsg);
   void SendNextRecoveryMessage();
   void SendMessageRaw(const Message& aMsg);
 
   static void MaybeProcessPendingMessageRunnable();
   void ReceiveChildMessageOnMainThread(size_t aChannelId, Message* aMsg);