Bug 828114: Set a timer to ensure content processes are killed if their tabs take a long time to shut down. r=jlebar a=blocking-basecamp
authorChris Jones <jones.chris.g@gmail.com>
Thu, 10 Jan 2013 14:22:14 +0100
changeset 118393 b8bae43380b66e7c7416d5e75cee834906b52bdc
parent 118392 bf0d25a344fb6c36ca74e8e2388304b2e7ef5c25
child 118394 a051840bec181e5487c4363eaf2cab52040ddbd5
push id24166
push userMs2ger@gmail.com
push dateFri, 11 Jan 2013 13:57:41 +0000
treeherdermozilla-central@63c4b0f66a0c [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersjlebar, blocking-basecamp
bugs828114
milestone21.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 828114: Set a timer to ensure content processes are killed if their tabs take a long time to shut down. r=jlebar a=blocking-basecamp
dom/ipc/ContentParent.cpp
dom/ipc/ContentParent.h
dom/ipc/TabParent.cpp
dom/ipc/TabParent.h
modules/libpref/src/init/all.js
--- a/dom/ipc/ContentParent.cpp
+++ b/dom/ipc/ContentParent.cpp
@@ -642,16 +642,21 @@ struct DelayedDeleteContentParentTask : 
     nsRefPtr<ContentParent> mObj;
 };
 
 }
 
 void
 ContentParent::ActorDestroy(ActorDestroyReason why)
 {
+    if (mForceKillTask) {
+        mForceKillTask->Cancel();
+        mForceKillTask = nullptr;
+    }
+
     nsRefPtr<nsFrameMessageManager> ppm = mMessageManager;
     if (ppm) {
       ppm->ReceiveMessage(static_cast<nsIContentFrameMessageManager*>(ppm.get()),
                           CHILD_PROCESS_SHUTDOWN_MESSAGE, false,
                           nullptr, nullptr, nullptr);
     }
     nsCOMPtr<nsIThreadObserver>
         kungFuDeathGrip(static_cast<nsIThreadObserver*>(this));
@@ -748,25 +753,56 @@ ContentParent::ActorDestroy(ActorDestroy
     // all hell breaks loose.
     //
     // This runnable ensures that a reference to |this| lives on at
     // least until after the current task finishes running.
     NS_DispatchToCurrentThread(new DelayedDeleteContentParentTask(this));
 }
 
 void
-ContentParent::NotifyTabDestroyed(PBrowserParent* aTab)
+ContentParent::NotifyTabDestroying(PBrowserParent* aTab)
 {
     // There can be more than one PBrowser for a given app process
+    // because of popup windows.  PBrowsers can also destroy
+    // concurrently.  When all the PBrowsers are destroying, kick off
+    // another task to ensure the child process *really* shuts down,
+    // even if the PBrowsers themselves never finish destroying.
+    int32_t numLiveTabs = ManagedPBrowserParent().Length();
+    ++mNumDestroyingTabs;
+    if (mNumDestroyingTabs != numLiveTabs) {
+        return;
+    }
+
+    // We're dying now, so prevent this content process from being
+    // recycled during its shutdown procedure.
+    MarkAsDead();
+
+    MOZ_ASSERT(!mForceKillTask);
+    int32_t timeoutSecs =
+        Preferences::GetInt("dom.ipc.tabs.shutdownTimeoutSecs", 5);
+    if (timeoutSecs > 0) {
+        MessageLoop::current()->PostDelayedTask(
+            FROM_HERE,
+            mForceKillTask = NewRunnableMethod(this, &ContentParent::KillHard),
+            timeoutSecs * 1000);
+    }
+}
+
+void
+ContentParent::NotifyTabDestroyed(PBrowserParent* aTab,
+                                  bool aNotifiedDestroying)
+{
+    if (aNotifiedDestroying) {
+        --mNumDestroyingTabs;
+    }
+
+    // There can be more than one PBrowser for a given app process
     // because of popup windows.  When the last one closes, shut
     // us down.
     if (ManagedPBrowserParent().Length() == 1) {
-        // Prevent this content process from being recycled, since
-        // it's dying.
-        MarkAsDead();
         MessageLoop::current()->PostTask(
             FROM_HERE,
             NewRunnableMethod(this, &ContentParent::ShutDownProcess));
     }
 }
 
 TestShellParent*
 ContentParent::CreateTestShell()
@@ -793,16 +829,18 @@ ContentParent::ContentParent(const nsASt
                              ChildOSPrivileges aOSPrivileges)
     : mSubprocess(nullptr)
     , mOSPrivileges(aOSPrivileges)
     , mChildID(CONTENT_PARENT_UNKNOWN_CHILD_ID)
     , mGeolocationWatchID(-1)
     , mRunToCompletionDepth(0)
     , mShouldCallUnblockChild(false)
     , mAppManifestURL(aAppManifestURL)
+    , mForceKillTask(nullptr)
+    , mNumDestroyingTabs(0)
     , mIsAlive(true)
     , mIsDestroyed(false)
     , mSendPermissionUpdates(false)
     , mIsForBrowser(aIsForBrowser)
 {
     // From this point on, NS_WARNING, NS_ASSERTION, etc. should print out the
     // PID along with the warning.
     nsDebugImpl::SetMultiprocessMode("Parent");
@@ -851,16 +889,20 @@ ContentParent::ContentParent(const nsASt
 
         //Sending all information to content process
         unused << SendAppInfo(version, buildID);
     }
 }
 
 ContentParent::~ContentParent()
 {
+    if (mForceKillTask) {
+        mForceKillTask->Cancel();
+    }
+
     if (OtherProcess())
         base::CloseProcessHandle(OtherProcess());
 
     NS_ASSERTION(NS_IsMainThread(), "Wrong thread!");
 
     // We should be removed from all these lists in ActorDestroy.
     MOZ_ASSERT(!gPrivateContent || !gPrivateContent->Contains(this));
     if (mAppManifestURL.IsEmpty()) {
@@ -1522,16 +1564,17 @@ ContentParent::GetOrCreateActorForBlob(n
   }
 
   return actor;
 }
 
 void
 ContentParent::KillHard()
 {
+    mForceKillTask = nullptr;
     // This ensures the process is eventually killed, but doesn't
     // immediately KILLITWITHFIRE because we want to get a minidump if
     // possible.  After a timeout though, the process is forceably
     // killed.
     if (!KillProcess(OtherProcess(), 1, false)) {
         NS_WARNING("failed to kill subprocess!");
     }
     XRE_GetIOMessageLoop()->PostTask(
--- a/dom/ipc/ContentParent.h
+++ b/dom/ipc/ContentParent.h
@@ -100,18 +100,21 @@ public:
     /**
      * MessageManagerCallback methods that we override.
      */
     virtual bool DoSendAsyncMessage(const nsAString& aMessage,
                                     const mozilla::dom::StructuredCloneData& aData);
     virtual bool CheckPermission(const nsAString& aPermission);
     virtual bool CheckManifestURL(const nsAString& aManifestURL);
 
+    /** Notify that a tab is beginning its destruction sequence. */
+    void NotifyTabDestroying(PBrowserParent* aTab);
     /** Notify that a tab was destroyed during normal operation. */
-    void NotifyTabDestroyed(PBrowserParent* aTab);
+    void NotifyTabDestroyed(PBrowserParent* aTab,
+                            bool aNotifiedDestroying);
 
     TestShellParent* CreateTestShell();
     bool DestroyTestShell(TestShellParent* aTestShell);
     TestShellParent* GetTestShellSingleton();
 
     void ReportChildAlreadyBlocked();
     bool RequestRunToCompletion();
 
@@ -352,16 +355,25 @@ private:
     // registered in the child process.  To update this, one
     // can broadcast the topic "child-memory-reporter-request" using
     // the nsIObserverService.
     nsCOMArray<nsIMemoryReporter> mMemoryReporters;
 
     const nsString mAppManifestURL;
     nsRefPtr<nsFrameMessageManager> mMessageManager;
 
+    // After we initiate shutdown, we also start a timer to ensure
+    // that even content processes that are 100% blocked (say from
+    // SIGSTOP), are still killed eventually.  This task enforces that
+    // timer.
+    CancelableTask* mForceKillTask;
+    // How many tabs we're waiting to finish their destruction
+    // sequence.  Precisely, how many TabParents have called
+    // NotifyTabDestroying() but not called NotifyTabDestroyed().
+    int32_t mNumDestroyingTabs;
     // True only while this is ready to be used to host remote tabs.
     // This must not be used for new purposes after mIsAlive goes to
     // false, but some previously scheduled IPC traffic may still pass
     // through.
     bool mIsAlive;
     // True after the OS-level shutdown sequence has been initiated.
     // After going true, any use of this at all, including lingering
     // IPC traffic passing through, will cause assertions to fail.
--- a/dom/ipc/TabParent.cpp
+++ b/dom/ipc/TabParent.cpp
@@ -82,16 +82,17 @@ TabParent::TabParent(const TabContext& a
   , mIMEComposing(false)
   , mIMECompositionEnding(false)
   , mIMECompositionStart(0)
   , mIMESeqno(0)
   , mEventCaptureDepth(0)
   , mDimensions(0, 0)
   , mDPI(0)
   , mShown(false)
+  , mMarkedDestroying(false)
   , mIsDestroyed(false)
 {
 }
 
 TabParent::~TabParent()
 {
 }
 
@@ -119,23 +120,27 @@ TabParent::Destroy()
   for (uint32_t i = 0; i < idbParents.Length(); ++i) {
     static_cast<IndexedDBParent*>(idbParents[i])->Disconnect();
   }
 
   if (RenderFrameParent* frame = GetRenderFrame()) {
     frame->Destroy();
   }
   mIsDestroyed = true;
+
+  ContentParent* cp = static_cast<ContentParent*>(Manager());
+  cp->NotifyTabDestroying(this);
+  mMarkedDestroying = true;
 }
 
 bool
 TabParent::Recv__delete__()
 {
   ContentParent* cp = static_cast<ContentParent*>(Manager());
-  cp->NotifyTabDestroyed(this);
+  cp->NotifyTabDestroyed(this, mMarkedDestroying);
   return true;
 }
 
 void
 TabParent::ActorDestroy(ActorDestroyReason why)
 {
   if (sEventCapturer == this) {
     sEventCapturer = nullptr;
--- a/dom/ipc/TabParent.h
+++ b/dom/ipc/TabParent.h
@@ -290,16 +290,19 @@ private:
     // notify it of input events targeting us.
     bool UseAsyncPanZoom();
     // If we have a render frame currently, notify it that we're about
     // to dispatch |aEvent| to our child.  If there's a relevant
     // transform in place, |aOutEvent| is the transformed |aEvent| to
     // dispatch to content.
     void MaybeForwardEventToRenderFrame(const nsInputEvent& aEvent,
                                         nsInputEvent* aOutEvent);
+    // When true, we've initiated normal shutdown and notified our
+    // managing PContent.
+    bool mMarkedDestroying;
     // When true, the TabParent is invalid and we should not send IPC messages
     // anymore.
     bool mIsDestroyed;
 };
 
 } // namespace dom
 } // namespace mozilla
 
--- a/modules/libpref/src/init/all.js
+++ b/modules/libpref/src/init/all.js
@@ -1757,25 +1757,30 @@ pref("dom.ipc.plugins.parentTimeoutSecs"
 pref("dom.ipc.plugins.processLaunchTimeoutSecs", 45);
 #ifdef XP_WIN
 // How long a plugin is allowed to process a synchronous IPC message 
 // before we display the plugin hang UI
 pref("dom.ipc.plugins.hangUITimeoutSecs", 5);
 // Minimum time that the plugin hang UI will be displayed
 pref("dom.ipc.plugins.hangUIMinDisplaySecs", 10);
 #endif
+// How long a content process can take before closing its IPC channel
+// after shutdown is initiated.  If the process exceeds the timeout,
+// we fear the worst and kill it.
+pref("dom.ipc.tabs.shutdownTimeoutSecs", 5);
 #else
 // No timeout in DEBUG builds
 pref("dom.ipc.plugins.timeoutSecs", 0);
 pref("dom.ipc.plugins.processLaunchTimeoutSecs", 0);
 pref("dom.ipc.plugins.parentTimeoutSecs", 0);
 #ifdef XP_WIN
 pref("dom.ipc.plugins.hangUITimeoutSecs", 0);
 pref("dom.ipc.plugins.hangUIMinDisplaySecs", 0);
 #endif
+pref("dom.ipc.tabs.shutdownTimeoutSecs", 0);
 #endif
 
 #ifdef XP_WIN
 // Disable oopp for java on windows. They run their own
 // process isolation which conflicts with our implementation.
 pref("dom.ipc.plugins.java.enabled", false);
 #endif