Bug 1240985 - Fix some timeout/cancel interactions (r=dvander)
authorBill McCloskey <billm@mozilla.com>
Fri, 22 Jan 2016 17:52:23 -0800
changeset 282000 3e614d3776e0ae263746e9f5c0006c964bdb6dff
parent 281999 3274c88c07a24be53588e6ae97f62d8949d8ef50
child 282001 cbdcef6d6e013e038e0ad70150f599ffba8445e1
push id29950
push usercbook@mozilla.com
push dateThu, 28 Jan 2016 11:14:03 +0000
treeherdermozilla-central@2b73b0a4d52b [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersdvander
bugs1240985
milestone47.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1240985 - Fix some timeout/cancel interactions (r=dvander)
ipc/glue/MessageChannel.cpp
ipc/glue/MessageChannel.h
--- a/ipc/glue/MessageChannel.cpp
+++ b/ipc/glue/MessageChannel.cpp
@@ -597,30 +597,19 @@ MessageChannel::MaybeInterceptSpecialIOM
             mChannelState = ChannelClosing;
             if (LoggingEnabled()) {
                 printf("NOTE: %s process received `Goodbye', closing down\n",
                        (mSide == ChildSide) ? "child" : "parent");
             }
             return true;
         } else if (CANCEL_MESSAGE_TYPE == aMsg.type()) {
             IPC_LOG("Cancel from message");
-
-            if (aMsg.transaction_id() == mTimedOutMessageSeqno) {
-                // An unusual case: We timed out a transaction which the other
-                // side then cancelled. In this case we just leave the timedout
-                // state and try to forget this ever happened.
-                mTimedOutMessageSeqno = 0;
-                return true;
-            } else {
-                MOZ_RELEASE_ASSERT(mCurrentTransaction == aMsg.transaction_id());
-                CancelCurrentTransactionInternal();
-                NotifyWorkerThread();
-                IPC_LOG("Notified");
-                return true;
-            }
+            CancelTransaction(aMsg.transaction_id());
+            NotifyWorkerThread();
+            return true;
         }
     }
     return false;
 }
 
 bool
 MessageChannel::ShouldDeferMessage(const Message& aMsg)
 {
@@ -940,17 +929,17 @@ MessageChannel::Send(Message* aMsg, Mess
 
     if (mCurrentTransaction &&
         (msg->priority() < DispatchingSyncMessagePriority() ||
          msg->priority() < AwaitingSyncReplyPriority()))
     {
         MOZ_ASSERT(DispatchingSyncMessage() || DispatchingAsyncMessage());
         IPC_LOG("Cancel from Send");
         CancelMessage *cancel = new CancelMessage(mCurrentTransaction);
-        CancelCurrentTransactionInternal();
+        CancelTransaction(mCurrentTransaction);
         mLink->SendMessage(cancel);
     }
 
     IPC_ASSERT(msg->is_sync(), "can only Send() sync messages here");
 
     if (mCurrentTransaction) {
         IPC_ASSERT(msg->priority() >= DispatchingSyncMessagePriority(),
                    "can't send sync message of a lesser priority than what's being dispatched");
@@ -2105,44 +2094,84 @@ MessageChannel::GetTopmostMessageRouting
     if (mCxxStackFrames.empty()) {
         return MSG_ROUTING_NONE;
     }
     const InterruptFrame& frame = mCxxStackFrames.back();
     return frame.GetRoutingId();
 }
 
 void
-MessageChannel::CancelCurrentTransactionInternal()
+MessageChannel::CancelTransaction(int transaction)
 {
     mMonitor->AssertCurrentThreadOwns();
 
     // When we cancel a transaction, we need to behave as if there's no longer
     // any IPC on the stack. Anything we were dispatching or sending will get
     // canceled. Consequently, we have to update the state variables below.
     //
     // We also need to ensure that when any IPC functions on the stack return,
     // they don't reset these values using an RAII class like AutoSetValue. To
     // avoid that, these RAII classes check if the variable they set has been
     // tampered with (by us). If so, they don't reset the variable to the old
     // value.
 
-    IPC_LOG("CancelInternal: current xid=%d", mCurrentTransaction);
+    IPC_LOG("CancelTransaction: xid=%d prios=%d", transaction, mPendingSendPriorities);
+
+    // An unusual case: We timed out a transaction which the other side then
+    // cancelled. In this case we just leave the timedout state and try to
+    // forget this ever happened.
+    if (transaction == mTimedOutMessageSeqno) {
+        IPC_LOG("Cancelled timed out message %d", mTimedOutMessageSeqno);
+        mTimedOutMessageSeqno = 0;
 
-    MOZ_ASSERT(mCurrentTransaction);
-    mCurrentTransaction = 0;
+        // Normally mCurrentTransaction == 0 here. But it can be non-zero if:
+        // 1. Parent sends hi prio message H.
+        // 2. Parent times out H.
+        // 3. Child dispatches H and sends nested message H' (same transaction).
+        // 4. Parent dispatches H' and cancels.
+        MOZ_ASSERT_IF(mCurrentTransaction, mCurrentTransaction == transaction);
+        mCurrentTransaction = 0;
+
+        // During a timeout Send should always fail.
+        MOZ_ASSERT(!mAwaitingSyncReply);
+    } else {
+        MOZ_ASSERT(mCurrentTransaction == transaction);
+        mCurrentTransaction = 0;
 
-    mAwaitingSyncReply = false;
-    mAwaitingSyncReplyPriority = 0;
+        mAwaitingSyncReply = false;
+        mAwaitingSyncReplyPriority = 0;
+    }
+
+    DebugOnly<bool> foundSync = false;
+    for (MessageQueue::iterator it = mPending.begin(); it != mPending.end(); ) {
+        Message &msg = *it;
 
-    for (size_t i = 0; i < mPending.size(); i++) {
+        // If there was a race between the parent and the child, then we may
+        // have a queued sync message. We want to drop this message from the
+        // queue since it will get cancelled along with the transaction being
+        // cancelled. We don't bother doing this for normal priority messages
+        // because the child is just going to crash in that case, and we want to
+        // avoid processing messages out of order in the short time before it
+        // crashes.
+        if (msg.is_sync() && msg.priority() != IPC::Message::PRIORITY_NORMAL) {
+            MOZ_ASSERT(!foundSync);
+            MOZ_ASSERT(msg.transaction_id() != transaction);
+            IPC_LOG("Removing msg from queue seqno=%d xid=%d", msg.seqno(), msg.transaction_id());
+            foundSync = true;
+            it = mPending.erase(it);
+            continue;
+        }
+
         // There may be messages in the queue that we expected to process from
         // ProcessPendingRequests. However, Send will no longer call that
         // function once it's been canceled. So we may need to process these
         // messages in the normal event loop instead.
         mWorkerLoop->PostTask(FROM_HERE, new DequeueTask(mDequeueOneTask));
+
+        it++;
     }
 
     // We could also zero out mDispatchingSyncMessage here. However, that would
     // cause a race because mDispatchingSyncMessage is a worker-thread-only
     // field and we can be called on the I/O thread. Luckily, we can check to
     // see if mCurrentTransaction is 0 before examining DispatchSyncMessage.
 }
 
@@ -2156,17 +2185,17 @@ MessageChannel::CancelCurrentTransaction
         {
             MOZ_CRASH("Intentional crash: we're running a nested event loop "
                       "while processing an urgent message");
         }
 
         IPC_LOG("Cancel requested: current xid=%d", mCurrentTransaction);
         MOZ_ASSERT(DispatchingSyncMessage());
         CancelMessage *cancel = new CancelMessage(mCurrentTransaction);
-        CancelCurrentTransactionInternal();
+        CancelTransaction(mCurrentTransaction);
         mLink->SendMessage(cancel);
     }
 }
 
 void
 CancelCPOWs()
 {
     if (gParentProcessBlocker) {
--- a/ipc/glue/MessageChannel.h
+++ b/ipc/glue/MessageChannel.h
@@ -303,17 +303,17 @@ class MessageChannel : HasResultCodes
     // necessarily.
     bool WaitForSyncNotify(bool aHandleWindowsMessages);
     bool WaitForInterruptNotify();
 
     bool WaitResponse(bool aWaitTimedOut);
 
     bool ShouldContinueFromTimeout();
 
-    void CancelCurrentTransactionInternal();
+    void CancelTransaction(int transaction);
 
     // The "remote view of stack depth" can be different than the
     // actual stack depth when there are out-of-turn replies.  When we
     // receive one, our actual Interrupt stack depth doesn't decrease, but
     // the other side (that sent the reply) thinks it has.  So, the
     // "view" returned here is |stackDepth| minus the number of
     // out-of-turn replies.
     //