Bug 821192 - Ensure that content processes don't see an inconsistent app dir. r=bent, r=dhylands, a=blocking-basecamp
authorChris Jones <jones.chris.g@gmail.com>
Fri, 28 Dec 2012 01:45:16 -0800
changeset 119137 0a687d4da87ad077383d5e3397026ad5f98d66f3
parent 119136 0687a40d2076b9f5bd29090fc5f39de4f7f6f8a8
child 119138 b2296f9aaa555fa8abcad2920932ff08f858fd3a
push id3074
push userryanvm@gmail.com
push dateSat, 29 Dec 2012 15:50:53 +0000
treeherdermozilla-aurora@b2d7c39b98d3 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersbent, dhylands, blocking-basecamp
bugs821192
milestone19.0a2
Bug 821192 - Ensure that content processes don't see an inconsistent app dir. r=bent, r=dhylands, a=blocking-basecamp Bug 821192, part 1: Fix the watchdog timeout code. r=dhylands Bug 821192, part 2: Add an interface to join all live content processes. r=bent Bug 821192, part 3: Join all subprocesses before restarting the main process, when we're e.g. about to apply an update. r=dhylands
dom/ipc/ContentParent.cpp
dom/ipc/ContentParent.h
dom/power/PowerManagerService.cpp
hal/linux/LinuxPower.cpp
ipc/glue/GeckoChildProcessHost.cpp
ipc/glue/GeckoChildProcessHost.h
--- a/dom/ipc/ContentParent.cpp
+++ b/dom/ipc/ContentParent.cpp
@@ -161,16 +161,20 @@ MemoryReportRequestParent::~MemoryReport
 {
     MOZ_COUNT_DTOR(MemoryReportRequestParent);
 }
 
 nsDataHashtable<nsStringHashKey, ContentParent*>* ContentParent::gAppContentParents;
 nsTArray<ContentParent*>* ContentParent::gNonAppContentParents;
 nsTArray<ContentParent*>* ContentParent::gPrivateContent;
 
+// This is true when subprocess launching is enabled.  This is the
+// case between StartUp() and ShutDown() or JoinAllSubprocesses().
+static bool sCanLaunchSubprocesses;
+
 // The first content child has ID 1, so the chrome process can have ID 0.
 static uint64_t gContentChildID = 1;
 
 // Try to keep an app process always preallocated, to get
 // initialization off the critical path of app startup.
 static bool sKeepAppProcessPreallocated;
 static StaticRefPtr<ContentParent> sPreallocatedAppProcess;
 static CancelableTask* sPreallocateAppProcessTask;
@@ -254,23 +258,74 @@ ContentParent::StartUp()
             "dom.ipc.processPrelaunch.delayMs", 1000);
 
         MOZ_ASSERT(!sPreallocateAppProcessTask);
 
         // Let's not slow down the main process initialization. Wait until
         // the main process goes idle before we preallocate a process
         MessageLoop::current()->PostIdleTask(FROM_HERE, NewRunnableFunction(FirstIdle));
     }
+
+    sCanLaunchSubprocesses = true;
 }
 
 /*static*/ void
 ContentParent::ShutDown()
 {
     // No-op for now.  We rely on normal process shutdown and
     // ClearOnShutdown() to clean up our state.
+    sCanLaunchSubprocesses = false;
+}
+
+/*static*/ void
+ContentParent::JoinProcessesIOThread(const nsTArray<ContentParent*>* aProcesses,
+                                     Monitor* aMonitor, bool* aDone)
+{
+    const nsTArray<ContentParent*>& processes = *aProcesses;
+    for (uint32_t i = 0; i < processes.Length(); ++i) {
+        if (GeckoChildProcessHost* process = processes[i]->mSubprocess) {
+            process->Join();
+        }
+    }
+    {
+        MonitorAutoLock lock(*aMonitor);
+        *aDone = true;
+        lock.Notify();
+    }
+    // Don't touch any arguments to this function from now on.
+}
+
+/*static*/ void
+ContentParent::JoinAllSubprocesses()
+{
+    MOZ_ASSERT(NS_IsMainThread());
+
+    nsAutoTArray<ContentParent*, 8> processes;
+    GetAll(processes);
+    if (processes.IsEmpty()) {
+        printf_stderr("There are no live subprocesses.");
+        return;
+    }
+
+    printf_stderr("Subprocesses are still alive.  Doing emergency join.\n");
+
+    bool done = false;
+    Monitor monitor("mozilla.dom.ContentParent.JoinAllSubprocesses");
+    XRE_GetIOMessageLoop()->PostTask(FROM_HERE,
+                                     NewRunnableFunction(
+                                         &ContentParent::JoinProcessesIOThread,
+                                         &processes, &monitor, &done));
+    {
+        MonitorAutoLock lock(monitor);
+        while (!done) {
+            lock.Wait();
+        }
+    }
+
+    sCanLaunchSubprocesses = false;
 }
 
 /*static*/ ContentParent*
 ContentParent::GetNewOrUsed(bool aForBrowserElement)
 {
     if (!gNonAppContentParents)
         gNonAppContentParents = new nsTArray<ContentParent*>();
 
@@ -322,16 +377,20 @@ PrivilegesForApp(mozIApplication* aApp)
         }
     }
     return base::PRIVILEGES_DEFAULT;
 }
 
 /*static*/ TabParent*
 ContentParent::CreateBrowserOrApp(const TabContext& aContext)
 {
+    if (!sCanLaunchSubprocesses) {
+        return nullptr;
+    }
+
     if (aContext.IsBrowserElement() || !aContext.HasOwnApp()) {
         if (ContentParent* cp = GetNewOrUsed(aContext.IsBrowserElement())) {
             nsRefPtr<TabParent> tp(new TabParent(aContext));
             PBrowserParent* browser = cp->SendPBrowserConstructor(
                 tp.forget().get(), // DeallocPBrowserParent() releases this ref.
                 aContext.AsIPCTabContext(),
                 /* chromeFlags */ 0);
             return static_cast<TabParent*>(browser);
--- a/dom/ipc/ContentParent.h
+++ b/dom/ipc/ContentParent.h
@@ -66,16 +66,23 @@ class ContentParent : public PContentPar
 public:
     /**
      * Start up the content-process machinery.  This might include
      * scheduling pre-launch tasks.
      */
     static void StartUp();
     /** Shut down the content-process machinery. */
     static void ShutDown();
+    /**
+     * Ensure that all subprocesses are terminated and their OS
+     * resources have been reaped.  This is synchronous and can be
+     * very expensive in general.  It also bypasses the normal
+     * shutdown process.
+     */
+    static void JoinAllSubprocesses();
 
     static ContentParent* GetNewOrUsed(bool aForBrowserElement = false);
 
     /**
      * Get or create a content process for the given TabContext.
      */
     static TabParent* CreateBrowserOrApp(const TabContext& aContext);
 
@@ -131,16 +138,19 @@ protected:
 
 private:
     typedef base::ChildPrivileges ChildOSPrivileges;
 
     static nsDataHashtable<nsStringHashKey, ContentParent*> *gAppContentParents;
     static nsTArray<ContentParent*>* gNonAppContentParents;
     static nsTArray<ContentParent*>* gPrivateContent;
 
+    static void JoinProcessesIOThread(const nsTArray<ContentParent*>* aProcesses,
+                                      Monitor* aMonitor, bool* aDone);
+
     static void PreallocateAppProcess();
     static void DelayedPreallocateAppProcess();
     static void ScheduleDelayedPreallocateAppProcess();
     static already_AddRefed<ContentParent> MaybeTakePreallocatedAppProcess();
     static void FirstIdle();
 
     // Hide the raw constructor methods since we don't want client code
     // using them.
--- a/dom/power/PowerManagerService.cpp
+++ b/dom/power/PowerManagerService.cpp
@@ -1,13 +1,14 @@
 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
+#include "mozilla/dom/ContentParent.h"
 #include "mozilla/Hal.h"
 #include "mozilla/HalWakeLock.h"
 #include "mozilla/ClearOnShutdown.h"
 #include "mozilla/Preferences.h"
 #include "mozilla/Services.h"
 #include "nsIDOMWakeLockListener.h"
 #include "nsIDOMWindow.h"
 #include "nsIObserverService.h"
@@ -134,16 +135,22 @@ PowerManagerService::PowerOff()
 
 NS_IMETHODIMP
 PowerManagerService::Restart()
 {
   // FIXME/bug 796826 this implementation is currently gonk-specific,
   // because it relies on the Gonk to initialize the Gecko processes to
   // restart B2G. It's better to do it here to have a real "restart".
   StartForceQuitWatchdog(eHalShutdownMode_Restart, mWatchdogTimeoutSecs);
+  // Ensure all content processes are dead before we continue
+  // restarting.  This code is used to restart to apply updates, and
+  // if we don't join all the subprocesses, race conditions can cause
+  // them to see an inconsistent view of the application directory.
+  ContentParent::JoinAllSubprocesses();
+
   // To synchronize any unsaved user data before restarting.
   SyncProfile();
 #ifdef XP_UNIX
   sync();
 #endif
   _exit(0);
   MOZ_NOT_REACHED();
   return NS_OK;
--- a/hal/linux/LinuxPower.cpp
+++ b/hal/linux/LinuxPower.cpp
@@ -66,17 +66,26 @@ QuitHard(hal::ShutdownMode aMode)
 // Function to complusively shut down the system with a given mode when timeout.
 static void*
 ForceQuitWatchdog(void* aParamPtr)
 {
   watchdogParam_t* paramPtr = reinterpret_cast<watchdogParam_t*>(aParamPtr);
   if (paramPtr->timeoutSecs > 0 && paramPtr->timeoutSecs <= 30) {
     // If we shut down normally before the timeout, this thread will
     // be harmlessly reaped by the OS.
-    sleep(paramPtr->timeoutSecs);
+    TimeStamp deadline =
+      (TimeStamp::Now() + TimeDuration::FromSeconds(paramPtr->timeoutSecs));
+    while (true) {
+      TimeDuration remaining = (deadline - TimeStamp::Now());
+      int sleepSeconds = int(remaining.ToSeconds());
+      if (sleepSeconds <= 0) {
+        break;
+      }
+      sleep(sleepSeconds);
+    }
   }
   hal::ShutdownMode mode = paramPtr->mode;
   delete paramPtr;
   QuitHard(mode);
   return nullptr;
 }
 
 void
--- a/ipc/glue/GeckoChildProcessHost.cpp
+++ b/ipc/glue/GeckoChildProcessHost.cpp
@@ -359,16 +359,30 @@ GeckoChildProcessHost::InitializeChannel
 {
   CreateChannel();
 
   MonitorAutoLock lock(mMonitor);
   mProcessState = CHANNEL_INITIALIZED;
   lock.Notify();
 }
 
+void
+GeckoChildProcessHost::Join()
+{
+  AssertIOThread();
+
+  if (!mChildProcessHandle) {
+    return;
+  }
+
+  // If this fails, there's nothing we can do.
+  base::KillProcess(mChildProcessHandle, 0, /*wait*/true);
+  mChildProcessHandle = 0;
+}
+
 int32_t GeckoChildProcessHost::mChildCounter = 0;
 
 //
 // Wrapper function for handling GECKO_SEPARATE_NSPR_LOGS
 //
 bool
 GeckoChildProcessHost::PerformAsyncLaunch(std::vector<std::string> aExtraOpts, base::ProcessArchitecture arch)
 {
--- a/ipc/glue/GeckoChildProcessHost.h
+++ b/ipc/glue/GeckoChildProcessHost.h
@@ -91,16 +91,21 @@ public:
   }
 
 #ifdef XP_MACOSX
   task_t GetChildTask() {
     return mChildTask;
   }
 #endif
 
+  /**
+   * Must run on the IO thread.  Cause the OS process to exit and
+   * ensure its OS resources are cleaned up.
+   */
+  void Join();
 
 protected:
   GeckoProcessType mProcessType;
   ChildPrivileges mPrivileges;
   Monitor mMonitor;
   FilePath mProcessPath;
   // This value must be accessed while holding mMonitor.
   enum {