Bug 1487287 - Move child process launch off the I/O thread. r=mccr8
authorJed Davis <jld@mozilla.com>
Tue, 05 Feb 2019 00:15:22 +0000
changeset 517640 073aa330041c00d056c52ccbe26f57f65a25cf91
parent 517639 242953b250a5109cd71e2ec1226936e31150c6ef
child 517641 cc2d1c3652f540f116e81e828be73b1d20a65230
push id2032
push userffxbld-merge
push dateMon, 13 May 2019 09:36:57 +0000
treeherdermozilla-release@455c1065dcbe [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersmccr8
bugs1487287
milestone67.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1487287 - Move child process launch off the I/O thread. r=mccr8 Launching processes takes enough time that we should avoid blocking the parent process's IPC I/O thread for it; it's less bad for responsiveness than blocking the main thread, but it's not good. On Windows we need to use a dedicated thread, because the sandbox isn't thread-safe and it asserts that the same thread is used for every launch. Otherwise, a thread pool is used. (Or, in the Web Replay middleman process, where there isn't enough of XPCOM for any of this, launching the actual content processes remains on the I/O thread.) Depends on D18011 Differential Revision: https://phabricator.services.mozilla.com/D8946
ipc/glue/GeckoChildProcessHost.cpp
ipc/glue/GeckoChildProcessHost.h
--- a/ipc/glue/GeckoChildProcessHost.cpp
+++ b/ipc/glue/GeckoChildProcessHost.cpp
@@ -28,22 +28,26 @@
 #  include "nsAppDirectoryServiceDefs.h"
 #endif
 
 #include "nsExceptionHandler.h"
 
 #include "nsDirectoryServiceDefs.h"
 #include "nsIFile.h"
 #include "nsPrintfCString.h"
+#include "nsIObserverService.h"
 
-#include "mozilla/ClearOnShutdown.h"
 #include "mozilla/ipc/BrowserProcessSubThread.h"
 #include "mozilla/ipc/EnvironmentMap.h"
 #include "mozilla/Omnijar.h"
+#include "mozilla/RecordReplay.h"
 #include "mozilla/Scoped.h"
+#include "mozilla/Services.h"
+#include "mozilla/SharedThreadPool.h"
+#include "mozilla/StaticMutex.h"
 #include "mozilla/Telemetry.h"
 #include "ProtocolUtils.h"
 #include <sys/stat.h>
 
 #ifdef XP_WIN
 #  include "nsIWinTaskbar.h"
 #  include <stdlib.h>
 #  define NS_TASKBAR_CONTRACTID "@mozilla.org/windows-taskbar;1"
@@ -62,16 +66,17 @@
 #include "nsTArray.h"
 #include "nsClassHashtable.h"
 #include "nsHashKeys.h"
 #include "nsNativeCharsetUtils.h"
 #include "nscore.h"  // for NS_FREE_PERMANENT_DATA
 #include "private/pprio.h"
 
 using mozilla::MonitorAutoLock;
+using mozilla::StaticMutexAutoLock;
 using mozilla::ipc::GeckoChildProcessHost;
 
 namespace mozilla {
 MOZ_TYPE_SPECIFIC_SCOPED_POINTER_TEMPLATE(ScopedPRFileDesc, PRFileDesc,
                                           PR_Close)
 }
 
 using mozilla::ScopedPRFileDesc;
@@ -476,41 +481,150 @@ void GeckoChildProcessHost::GetChildLogN
     buffer.Append(origLogName);
   }
 
   // Append child-specific postfix to name
   buffer.AppendLiteral(".child-");
   buffer.AppendInt(mChildCounter);
 }
 
-bool GeckoChildProcessHost::RunPerformAsyncLaunch(
-    std::vector<std::string> aExtraOpts) {
-  InitializeChannel();
+namespace {
+// Windows needs a single dedicated thread for process launching,
+// because of thread-safety restrictions/assertions in the sandbox
+// code.  (This implementation isn't itself Windows-specific, so
+// the ifdef can be changed to test on other platforms.)
+#ifdef XP_WIN
+
+static mozilla::StaticMutex gIPCLaunchThreadMutex;
+static mozilla::StaticRefPtr<nsIThread> gIPCLaunchThread;
+
+class IPCLaunchThreadObserver final : public nsIObserver {
+ public:
+  NS_DECL_ISUPPORTS
+  NS_DECL_NSIOBSERVER
+ protected:
+  virtual ~IPCLaunchThreadObserver() = default;
+};
+
+NS_IMPL_ISUPPORTS(IPCLaunchThreadObserver, nsIObserver, nsISupports)
+
+NS_IMETHODIMP
+IPCLaunchThreadObserver::Observe(nsISupports* aSubject, const char* aTopic,
+                                 const char16_t* aData) {
+  MOZ_RELEASE_ASSERT(strcmp(aTopic, "xpcom-shutdown-threads") == 0);
+  StaticMutexAutoLock lock(gIPCLaunchThreadMutex);
+
+  nsresult rv = NS_OK;
+  if (gIPCLaunchThread) {
+    rv = gIPCLaunchThread->Shutdown();
+    gIPCLaunchThread = nullptr;
+  }
+  mozilla::Unused << NS_WARN_IF(NS_FAILED(rv));
+  return rv;
+}
 
-  bool ok = PerformAsyncLaunch(aExtraOpts);
-  if (!ok) {
-    CHROMIUM_LOG(ERROR) << "Failed to launch "
-                        << XRE_ChildProcessTypeToString(mProcessType)
-                        << " subprocess";
-    Telemetry::Accumulate(
-        Telemetry::SUBPROCESS_LAUNCH_FAILURE,
-        nsDependentCString(XRE_ChildProcessTypeToString(mProcessType)));
-    // WaitUntilConnected might be waiting for us to signal.
-    // If something failed let's set the error state and notify.
+static nsCOMPtr<nsIEventTarget> GetIPCLauncher() {
+  StaticMutexAutoLock lock(gIPCLaunchThreadMutex);
+  if (!gIPCLaunchThread) {
+    nsCOMPtr<nsIThread> thread;
+    nsresult rv = NS_NewNamedThread(NS_LITERAL_CSTRING("IPC Launch"),
+                                    getter_AddRefs(thread));
+    if (!NS_WARN_IF(NS_FAILED(rv))) {
+      NS_DispatchToMainThread(
+          NS_NewRunnableFunction("GeckoChildProcessHost::GetIPCLauncher", [] {
+            nsCOMPtr<nsIObserverService> obsService =
+                mozilla::services::GetObserverService();
+            nsCOMPtr<nsIObserver> obs = new IPCLaunchThreadObserver();
+            obsService->AddObserver(obs, "xpcom-shutdown-threads", false);
+          }));
+      gIPCLaunchThread = thread.forget();
+    }
+  }
+
+  nsCOMPtr<nsIEventTarget> thread = gIPCLaunchThread.get();
+  return thread;
+}
+
+#else  // XP_WIN
+
+// Non-Windows platforms can use an on-demand thread pool.
+
+static nsCOMPtr<nsIEventTarget> GetIPCLauncher() {
+  nsCOMPtr<nsIEventTarget> pool =
+      mozilla::SharedThreadPool::Get(NS_LITERAL_CSTRING("IPC Launch"));
+  return pool;
+}
+
+#endif  // XP_WIN
+}  // anonymous namespace
+
+void GeckoChildProcessHost::RunPerformAsyncLaunch(
+    std::vector<std::string> aExtraOpts) {
+  // Warning: rejecting the promise allows `this` to be deleted.  Do
+  // not use `this` after calling the `fail` function (including
+  // destructors of AutoLock objects).
+  //
+  // (Deletion happens on the I/O thread, so it's safe to access
+  // `this` afterwards from RunPerformAsyncLaunch itself, but not from
+  // the launchWrapper closure.  For simplicity, it's just treated
+  // like `delete this` everywhere.)
+  auto fail = [this] {
     {
       MonitorAutoLock lock(mMonitor);
       mProcessState = PROCESS_ERROR;
       lock.Notify();
     }
-    // Warning: rejecting the promise allows `this` to be deleted.
-    // (Deletion happens on the I/O thread, so currently it won't happen
-    // until this method returns, but the next patch will change that.)
     mHandlePromise->Reject(LaunchError{}, __func__);
+  };
+
+  // This (probably?) needs to happen on the I/O thread.
+  InitializeChannel();
+
+  // But the rest of this doesn't, and shouldn't block IPC messages:
+  auto launchWrapper = [this, fail, aExtraOpts = std::move(aExtraOpts)]() {
+    bool ok = PerformAsyncLaunch(aExtraOpts);
+
+    if (!ok) {
+      // WaitUntilConnected might be waiting for us to signal.
+      // If something failed let's set the error state and notify.
+      CHROMIUM_LOG(ERROR) << "Failed to launch "
+                          << XRE_ChildProcessTypeToString(mProcessType)
+                          << " subprocess";
+      Telemetry::Accumulate(
+          Telemetry::SUBPROCESS_LAUNCH_FAILURE,
+          nsDependentCString(XRE_ChildProcessTypeToString(mProcessType)));
+      fail();
+    }
+  };
+
+  // The Web Replay middleman process launches the actual content
+  // processes, and doesn't initialize enough of XPCOM to use thread
+  // pools.
+  if (!mozilla::recordreplay::IsMiddleman()) {
+    auto launcher = GetIPCLauncher();
+    MOZ_DIAGNOSTIC_ASSERT(launcher != nullptr);
+    // Creating a thread pool shouldn't normally fail, but in case it
+    // does, use the fallback we already have for the middleman case.
+    if (launcher != nullptr) {
+      nsresult rv = launcher->Dispatch(
+          NS_NewRunnableFunction(
+              "ipc::GeckoChildProcessHost::PerformAsyncLaunch", launchWrapper),
+          NS_DISPATCH_NORMAL);
+      if (NS_FAILED(rv)) {
+        CHROMIUM_LOG(ERROR) << "Failed to dispatch launch task for "
+                            << XRE_ChildProcessTypeToString(mProcessType)
+                            << " process; launching during shutdown?";
+        fail();
+      }
+      return;
+    }
   }
-  return ok;
+
+  // Fall back to launching on the I/O thread.
+  launchWrapper();
 }
 
 void
 #if defined(XP_WIN)
 AddAppDirToCommandLine(CommandLine& aCmdLine)
 #else
 AddAppDirToCommandLine(std::vector<std::string>& aCmdLine)
 #endif
@@ -1109,17 +1223,22 @@ bool GeckoChildProcessHost::PerformAsync
     MOZ_CRASH("cannot open handle to child process");
   }
 
   CrashReporter::RegisterChildCrashAnnotationFileDescriptor(
       base::GetProcId(process), crashAnnotationReadPipe.forget());
 
   {
     MonitorAutoLock lock(mMonitor);
-    mProcessState = PROCESS_CREATED;
+    // This runs on a launch thread, but the OnChannel{Connected,Error}
+    // callbacks run on the I/O thread, so it's possible that the state already
+    // advanced beyond PROCESS_CREATED.
+    if (mProcessState < PROCESS_CREATED) {
+      mProcessState = PROCESS_CREATED;
+    }
     lock.Notify();
   }
 
   mLaunchOptions = nullptr;
 
   Telemetry::AccumulateTimeDelta(Telemetry::CHILD_PROCESS_LAUNCH_MS, startTS);
 
   // Warning: resolving the promise allows `this` to be deleted.
@@ -1136,17 +1255,16 @@ bool GeckoChildProcessHost::OpenPrivileg
   return base::OpenPrivilegedProcessHandle(aPid, &mChildProcessHandle);
 }
 
 void GeckoChildProcessHost::OnChannelConnected(int32_t peer_pid) {
   if (!OpenPrivilegedHandle(peer_pid)) {
     MOZ_CRASH("can't open handle to child process");
   }
   MonitorAutoLock lock(mMonitor);
-  MOZ_DIAGNOSTIC_ASSERT(mProcessState == PROCESS_CREATED);
   mProcessState = PROCESS_CONNECTED;
   lock.Notify();
 }
 
 void GeckoChildProcessHost::OnMessageReceived(IPC::Message&& aMsg) {
   // We never process messages ourself, just save them up for the next
   // listener.
   mQueue.push(std::move(aMsg));
@@ -1154,17 +1272,16 @@ void GeckoChildProcessHost::OnMessageRec
 
 void GeckoChildProcessHost::OnChannelError() {
   // Update the process state to an error state if we have a channel
   // error before we're connected. This fixes certain failures,
   // but does not address the full range of possible issues described
   // in the FIXME comment below.
   MonitorAutoLock lock(mMonitor);
   if (mProcessState < PROCESS_CONNECTED) {
-    MOZ_DIAGNOSTIC_ASSERT(mProcessState == PROCESS_CREATED);
     mProcessState = PROCESS_ERROR;
     lock.Notify();
   }
   // FIXME/bug 773925: save up this error for the next listener.
 }
 
 RefPtr<GeckoChildProcessHost::HandlePromise>
 GeckoChildProcessHost::WhenProcessHandleReady() {
--- a/ipc/glue/GeckoChildProcessHost.h
+++ b/ipc/glue/GeckoChildProcessHost.h
@@ -185,23 +185,23 @@ class GeckoChildProcessHost : public Chi
 #endif
   RefPtr<HandlePromise::Private> mHandlePromise;
 
   bool OpenPrivilegedHandle(base::ProcessId aPid);
 
  private:
   DISALLOW_EVIL_CONSTRUCTORS(GeckoChildProcessHost);
 
-  // Does the actual work for AsyncLaunch, on the IO thread.
-  // (TODO, bug 1487287: move this to its own thread(s).)
+  // Does the actual work for AsyncLaunch; run in a thread pool
+  // (or, on Windows, a dedicated thread).
   bool PerformAsyncLaunch(StringVector aExtraOpts);
 
-  // Also called on the I/O thread; creates channel, launches, and
-  // consolidates error handling.
-  bool RunPerformAsyncLaunch(StringVector aExtraOpts);
+  // Called on the I/O thread; creates channel, dispatches
+  // PerformAsyncLaunch, and consolidates error handling.
+  void RunPerformAsyncLaunch(StringVector aExtraOpts);
 
   enum class BinaryPathType { Self, PluginContainer };
 
   static BinaryPathType GetPathToBinary(FilePath& exePath,
                                         GeckoProcessType processType);
 
   // The buffer is passed to preserve its lifetime until we are done
   // with launching the sub-process.