Bug 1482275 Part 1 - Trigger crashes in hanged replaying processes, r=froydnj.
authorBrian Hackett <bhackett1024@gmail.com>
Tue, 14 Aug 2018 00:48:28 +0000
changeset 431432 51d56958d3d0016f38591a340eef901845cd2d7e
parent 431431 7846bdd3762cf494ec24efc9cfc3472dc715ce4f
child 431433 d492e85d1040e12a5b34c25a45156e02d91d22fe
push id34443
push usercsabou@mozilla.com
push dateWed, 15 Aug 2018 00:53:32 +0000
treeherdermozilla-central@b80906e2fbc9 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersfroydnj
bugs1482275
milestone63.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1482275 Part 1 - Trigger crashes in hanged replaying processes, r=froydnj.
toolkit/crashreporter/CrashAnnotations.yaml
toolkit/recordreplay/ipc/Channel.h
toolkit/recordreplay/ipc/ChildIPC.cpp
toolkit/recordreplay/ipc/ChildProcess.cpp
--- a/toolkit/crashreporter/CrashAnnotations.yaml
+++ b/toolkit/crashreporter/CrashAnnotations.yaml
@@ -578,16 +578,22 @@ ProxyStreamValid:
     Set to "false" when encountering an invalid IPC proxy stream.
   type: string
 
 RecordReplayError:
   description: >
     Any fatal error that occurred while recording/replaying a tab.
   type: string
 
+RecordReplayHang:
+  description: >
+    The presence of this annotation indicates that this crash was generated in
+    response to a hang in a replaying process.
+  type: boolean
+
 ReleaseChannel:
   description: >
     Application release channel (e.g. default, beta, ...)
   type: string
   ping: true
 
 RemoteType:
   description: >
--- a/toolkit/recordreplay/ipc/Channel.h
+++ b/toolkit/recordreplay/ipc/Channel.h
@@ -49,17 +49,18 @@ namespace recordreplay {
 // flushing a new index to the file.
 
 #define ForEachMessageType(_Macro)                             \
   /* Messages sent from the middleman to the child process. */ \
                                                                \
   /* Sent at startup. */                                       \
   _Macro(Introduction)                                         \
                                                                \
-  /* Sent to recording processes when exiting. */              \
+  /* Sent to recording processes when exiting, or to force a hanged replaying */ \
+  /* process to crash. */                                      \
   _Macro(Terminate)                                            \
                                                                \
   /* Flush the current recording to disk. */                   \
   _Macro(FlushRecording)                                       \
                                                                \
   /* Poke a child that is recording to create an artificial checkpoint, rather than */ \
   /* (potentially) idling indefinitely. This has no effect on a replaying process. */ \
   _Macro(CreateCheckpoint)                                     \
--- a/toolkit/recordreplay/ipc/ChildIPC.cpp
+++ b/toolkit/recordreplay/ipc/ChildIPC.cpp
@@ -81,19 +81,27 @@ ChannelMessageHandler(Message* aMsg)
     // paint and finished initializing.
     if (navigation::IsInitialized()) {
       uint8_t data = 0;
       DirectWrite(gCheckpointWriteFd, &data, 1);
     }
     break;
   }
   case MessageType::Terminate: {
-    PrintSpew("Terminate message received, exiting...\n");
-    MOZ_RELEASE_ASSERT(IsRecording());
-    _exit(0);
+    // Terminate messages behave differently in recording vs. replaying
+    // processes. When sent to a recording process (which the middleman manages
+    // directly) they signal that a clean shutdown is needed, while when sent
+    // to a replaying process (which the UI process manages) they signal that
+    // the process should crash, since it seems to be hanged.
+    if (IsRecording()) {
+      PrintSpew("Terminate message received, exiting...\n");
+      _exit(0);
+    } else {
+      MOZ_CRASH("Hanged replaying process");
+    }
   }
   case MessageType::SetIsActive: {
     const SetIsActiveMessage& nmsg = (const SetIsActiveMessage&) *aMsg;
     PauseMainThreadAndInvokeCallback([=]() { SetIsActiveChild(nmsg.mActive); });
     break;
   }
   case MessageType::SetAllowIntentionalCrashes: {
     const SetAllowIntentionalCrashesMessage& nmsg = (const SetAllowIntentionalCrashesMessage&) *aMsg;
--- a/toolkit/recordreplay/ipc/ChildProcess.cpp
+++ b/toolkit/recordreplay/ipc/ChildProcess.cpp
@@ -638,27 +638,40 @@ ChildProcessInfo::MaybeProcessPendingMes
 // considering that child to be hung.
 static const size_t HangSeconds = 5;
 
 void
 ChildProcessInfo::WaitUntil(const std::function<bool()>& aCallback)
 {
   MOZ_RELEASE_ASSERT(NS_IsMainThread());
 
+  bool sentTerminateMessage = false;
   while (!aCallback()) {
     MonitorAutoLock lock(*gMonitor);
     if (!MaybeProcessPendingMessage(this)) {
       if (gChildrenAreDebugging) {
         // Don't watch for hangs when children are being debugged.
         gMonitor->Wait();
       } else {
         TimeStamp deadline = mLastMessageTime + TimeDuration::FromSeconds(HangSeconds);
         if (TimeStamp::Now() >= deadline) {
           MonitorAutoUnlock unlock(*gMonitor);
-          AttemptRestart("Child process non-responsive");
+          if (!sentTerminateMessage) {
+            // Try to get the child to crash, so that we can get a minidump.
+            // Sending the message will reset mLastMessageTime so we get to
+            // wait another HangSeconds before hitting the restart case below.
+            CrashReporter::AnnotateCrashReport(CrashReporter::Annotation::RecordReplayHang,
+                                               true);
+            SendMessage(TerminateMessage());
+            sentTerminateMessage = true;
+          } else {
+            // The child is still non-responsive after sending the terminate
+            // message, fail without producing a minidump.
+            AttemptRestart("Child process non-responsive");
+          }
         }
         gMonitor->WaitUntil(deadline);
       }
     }
   }
 }
 
 // Runnable created on the main thread to handle any tasks sent by the replay