Bug 943174 - Avoid waitpid on already-reaped content processes. r=bsmedberg a=lsblakk
authorJed Davis <jld@mozilla.com>
Wed, 09 Apr 2014 21:49:09 -0700
changeset 185455 7906c1b48e0d53f3853911f7ea2e13c83f7e0ffb
parent 185454 eef9ddcce2046e863b9565a3ede048fe1099db3f
child 185456 77cb0b448bb28d0ea7a8001833d2e1d79d413eb2
push idunknown
push userunknown
push dateunknown
reviewersbsmedberg, lsblakk
bugs943174, 989042
milestone30.0a2
Bug 943174 - Avoid waitpid on already-reaped content processes. r=bsmedberg a=lsblakk Includes fix for bug 989042 (BSD build breakage caused by m-c patch for bug 943174).
dom/ipc/ContentParent.cpp
ipc/chromium/src/base/process_util_posix.cc
ipc/glue/GeckoChildProcessHost.cpp
ipc/glue/GeckoChildProcessHost.h
--- a/dom/ipc/ContentParent.cpp
+++ b/dom/ipc/ContentParent.cpp
@@ -10,16 +10,21 @@
 
 #include "ContentParent.h"
 
 #if defined(ANDROID) || defined(LINUX)
 # include <sys/time.h>
 # include <sys/resource.h>
 #endif
 
+#ifdef MOZ_WIDGET_GONK
+#include <sys/types.h>
+#include <sys/wait.h>
+#endif
+
 #include "chrome/common/process_watcher.h"
 
 #include "AppProcessChecker.h"
 #include "AudioChannelService.h"
 #include "CrashReporterParent.h"
 #include "IHistory.h"
 #include "IDBFactory.h"
 #include "IndexedDBParent.h"
@@ -912,22 +917,27 @@ bool
 ContentParent::SetPriorityAndCheckIsAlive(ProcessPriority aPriority)
 {
     ProcessPriorityManager::SetProcessPriority(this, aPriority);
 
     // Now that we've set this process's priority, check whether the process is
     // still alive.  Hopefully we've set the priority to FOREGROUND*, so the
     // process won't unexpectedly crash after this point!
     //
-    // It's not legal to call DidProcessCrash on Windows if the process has not
-    // terminated yet, so we have to skip this check here.
-#ifndef XP_WIN
-    bool exited = false;
-    base::DidProcessCrash(&exited, mSubprocess->GetChildProcessHandle());
-    if (exited) {
+    // Bug 943174: use waitid() with WNOWAIT so that, if the process
+    // did exit, we won't consume its zombie and confuse the
+    // GeckoChildProcessHost dtor.  Also, if the process isn't a
+    // direct child because of Nuwa this will fail with ECHILD, and we
+    // need to assume the child is alive in that case rather than
+    // assuming it's dead (as is otherwise a reasonable fallback).
+#ifdef MOZ_WIDGET_GONK
+    siginfo_t info;
+    info.si_pid = 0;
+    if (waitid(P_PID, Pid(), &info, WNOWAIT | WNOHANG | WEXITED) == 0
+        && info.si_pid != 0) {
         return false;
     }
 #endif
 
     return true;
 }
 
 // Helper for ContentParent::TransformPreallocatedIntoApp.
@@ -2385,16 +2395,17 @@ ContentParent::KillHard()
     mForceKillTask = nullptr;
     // This ensures the process is eventually killed, but doesn't
     // immediately KILLITWITHFIRE because we want to get a minidump if
     // possible.  After a timeout though, the process is forceably
     // killed.
     if (!KillProcess(OtherProcess(), 1, false)) {
         NS_WARNING("failed to kill subprocess!");
     }
+    mSubprocess->SetAlreadyDead();
     XRE_GetIOMessageLoop()->PostTask(
         FROM_HERE,
         NewRunnableFunction(&ProcessWatcher::EnsureProcessTerminated,
                             OtherProcess(), /*force=*/true));
     //We do clean-up here 
     MessageLoop::current()->PostDelayedTask(
         FROM_HERE,
         NewRunnableMethod(this, &ContentParent::ShutDownProcess,
--- a/ipc/chromium/src/base/process_util_posix.cc
+++ b/ipc/chromium/src/base/process_util_posix.cc
@@ -243,22 +243,29 @@ ProcessMetrics* ProcessMetrics::CreatePr
 }
 
 ProcessMetrics::~ProcessMetrics() { }
 
 bool DidProcessCrash(bool* child_exited, ProcessHandle handle) {
   int status;
   const int result = HANDLE_EINTR(waitpid(handle, &status, WNOHANG));
   if (result == -1) {
-    // The dead process originally spawned from Nuwa might be taken as not
-    // crashed because the above waitpid() call returns -1 and ECHILD. The
-    // caller shouldn't behave incorrectly because of this false negative.
+    // This shouldn't happen, but sometimes it does.  The error is
+    // probably ECHILD and the reason is probably that a pid was
+    // waited on again after a previous wait reclaimed its zombie.
+    // (It could also occur if the process isn't a direct child, but
+    // don't do that.)  This is bad, because it risks interfering with
+    // an unrelated child process if the pid is reused.
+    //
+    // So, lacking reliable information, we indicate that the process
+    // is dead, in the hope that the caller will give up and stop
+    // calling us.  See also bug 943174 and bug 933680.
     CHROMIUM_LOG(ERROR) << "waitpid failed pid:" << handle << " errno:" << errno;
     if (child_exited)
-      *child_exited = false;
+      *child_exited = true;
     return false;
   } else if (result == 0) {
     // the child hasn't exited yet.
     if (child_exited)
       *child_exited = false;
     return false;
   }
 
--- a/ipc/glue/GeckoChildProcessHost.cpp
+++ b/ipc/glue/GeckoChildProcessHost.cpp
@@ -380,16 +380,22 @@ GeckoChildProcessHost::Join()
   AssertIOThread();
 
   if (!mChildProcessHandle) {
     return;
   }
 
   // If this fails, there's nothing we can do.
   base::KillProcess(mChildProcessHandle, 0, /*wait*/true);
+  SetAlreadyDead();
+}
+
+void
+GeckoChildProcessHost::SetAlreadyDead()
+{
   mChildProcessHandle = 0;
 }
 
 int32_t GeckoChildProcessHost::mChildCounter = 0;
 
 //
 // Wrapper function for handling GECKO_SEPARATE_NSPR_LOGS
 //
@@ -891,16 +897,20 @@ GeckoExistingProcessHost(GeckoProcessTyp
   , mExistingFileDescriptor(aFileDescriptor)
 {
   NS_ASSERTION(aFileDescriptor.IsValid(),
                "Expected file descriptor to be valid");
 }
 
 GeckoExistingProcessHost::~GeckoExistingProcessHost()
 {
+  // Bug 943174: If we don't do this, ~GeckoChildProcessHost will try
+  // to wait on a process that isn't a direct child, and bad things
+  // will happen.
+  SetAlreadyDead();
 }
 
 bool
 GeckoExistingProcessHost::PerformAsyncLaunch(StringVector aExtraOpts,
                                              base::ProcessArchitecture aArch)
 {
   SetHandle(mExistingProcessHandle);
 
--- a/ipc/glue/GeckoChildProcessHost.h
+++ b/ipc/glue/GeckoChildProcessHost.h
@@ -118,16 +118,19 @@ public:
 #endif
 
   /**
    * Must run on the IO thread.  Cause the OS process to exit and
    * ensure its OS resources are cleaned up.
    */
   void Join();
 
+  // For bug 943174: Skip the EnsureProcessTerminated call in the destructor.
+  void SetAlreadyDead();
+
   void SetSandboxEnabled(bool aSandboxEnabled) {
     mSandboxEnabled = aSandboxEnabled;
   }
 
 protected:
   GeckoProcessType mProcessType;
   bool mSandboxEnabled;
   ChildPrivileges mPrivileges;