Bug 1038854 - Synchronize mutexes in pthread_cond_wait after recreating threads to avoid race conditions. Parts by Ting-Yu, r=khuey. Parts by Kyle Huey, r=thinker, a=bajaj
authorTing-Yu Chou <janus926@gmail.com>
Mon, 04 Aug 2014 15:36:00 -0400
changeset 172454 5ec370c6581bc7af478995fbb3e0374c9e200f0b
parent 172453 29342bbb26a15d736a7ea631125cce590f27905b
child 172455 8160021ea49f6bac8328e3d5583dcf470a0efc2b
push id729
push userryanvm@gmail.com
push dateTue, 19 Aug 2014 23:29:50 +0000
reviewerskhuey, thinker, bajaj
bugs1038854
milestone28.0
Bug 1038854 - Synchronize mutexes in pthread_cond_wait after recreating threads to avoid race conditions. Parts by Ting-Yu, r=khuey. Parts by Kyle Huey, r=thinker, a=bajaj
mozglue/build/Nuwa.cpp
--- a/mozglue/build/Nuwa.cpp
+++ b/mozglue/build/Nuwa.cpp
@@ -163,17 +163,31 @@ struct thread_info : public mozilla::Lin
 
   // The thread specific function to recreate the new thread. It's executed
   // after the thread is recreated.
   void (*recrFunc)(void *arg);
   void *recrArg;
 
   TLSInfoList tlsInfo;
 
-  pthread_mutex_t *reacquireMutex;
+  /**
+   * We must ensure that the recreated thread has entered pthread_cond_wait() or
+   * similar functions before proceeding to recreate the next one. Otherwise, if
+   * the next thread depends on the same mutex, it may be used in an incorrect
+   * state.  To do this, the main thread must unconditionally acquire the mutex.
+   * The mutex is unconditionally released when the recreated thread enters
+   * pthread_cond_wait().  The recreated thread may have locked the mutex itself
+   * (if the pthread_mutex_trylock succeeded) or another thread may have already
+   * held the lock.  If the recreated thread did lock the mutex we must balance
+   * that with another unlock on the main thread, which is signaled by
+   * condMutexNeedsBalancing.
+   */
+  pthread_mutex_t *condMutex;
+  bool condMutexNeedsBalancing;
+
   void *stk;
 
   pid_t origNativeThreadID;
   pid_t recreatedNativeThreadID;
   char nativeThreadName[NATIVE_THREAD_NAME_LENGTH];
 };
 
 typedef struct thread_info thread_info_t;
@@ -486,17 +500,18 @@ static thread_info_t *
 thread_info_new(void) {
   /* link tinfo to sAllThreads */
   thread_info_t *tinfo = new thread_info_t();
   tinfo->flags = 0;
   tinfo->recrFunc = nullptr;
   tinfo->recrArg = nullptr;
   tinfo->recreatedThreadID = 0;
   tinfo->recreatedNativeThreadID = 0;
-  tinfo->reacquireMutex = nullptr;
+  tinfo->condMutex = nullptr;
+  tinfo->condMutexNeedsBalancing = false;
   tinfo->stk = malloc(NUWA_STACK_SIZE + PAGE_SIZE);
 
   // We use a smaller stack size. Add protection to stack overflow: mprotect()
   // stack top (the page at the lowest address) so we crash instead of corrupt
   // other content that is malloc()'d.
   unsigned long long pageGuard = ((unsigned long long)tinfo->stk);
   pageGuard &= PAGE_ALIGN_MASK;
   if (pageGuard != (unsigned long long) tinfo->stk) {
@@ -973,29 +988,42 @@ extern "C" MFBT_API int
   THREAD_FREEZE_POINT1_VIP();
   if (freezePoint2) {
     RECREATE_CONTINUE();
     RECREATE_PASS_VIP();
     RECREATE_GATE_VIP();
     return rv;
   }
   if (recreated && mtx) {
-    if (!freezePoint1 && pthread_mutex_trylock(mtx)) {
+    if (!freezePoint1) {
+      tinfo->condMutex = mtx;
       // The thread was frozen in pthread_cond_wait() after releasing mtx in the
       // Nuwa process. In recreating this thread, We failed to reacquire mtx
       // with the pthread_mutex_trylock() call, that is, mtx was acquired by
       // another thread. Because of this, we need the main thread's help to
       // reacquire mtx so that it will be in a valid state.
-      tinfo->reacquireMutex = mtx;
+      if (!pthread_mutex_trylock(mtx)) {
+        tinfo->condMutexNeedsBalancing = true;
+      }
     }
     RECREATE_CONTINUE();
     RECREATE_PASS_VIP();
   }
   rv = REAL(pthread_cond_wait)(cond, mtx);
   if (recreated && mtx) {
+    // We have reacquired mtx. The main thread also wants to acquire mtx to
+    // synchronize with us. If the main thread didn't get a chance to acquire
+    // mtx let it do that now. The main thread clears condMutex after acquiring
+    // it to signal us.
+    if (tinfo->condMutex) {
+      // We need mtx to end up locked, so tell the main thread not to unlock
+      // mtx after it locks it.
+      tinfo->condMutexNeedsBalancing = false;
+      pthread_mutex_unlock(mtx);
+    }
     // We still need to be gated as not to acquire another mutex associated with
     // another VIP thread and interfere with it.
     RECREATE_GATE_VIP();
   }
   THREAD_FREEZE_POINT2_VIP();
 
   return rv;
 }
@@ -1009,24 +1037,31 @@ extern "C" MFBT_API int
   THREAD_FREEZE_POINT1_VIP();
   if (freezePoint2) {
     RECREATE_CONTINUE();
     RECREATE_PASS_VIP();
     RECREATE_GATE_VIP();
     return rv;
   }
   if (recreated && mtx) {
-    if (!freezePoint1 && pthread_mutex_trylock(mtx)) {
-      tinfo->reacquireMutex = mtx;
+    if (!freezePoint1) {
+      tinfo->condMutex = mtx;
+      if (!pthread_mutex_trylock(mtx)) {
+        tinfo->condMutexNeedsBalancing = true;
+      }
     }
     RECREATE_CONTINUE();
     RECREATE_PASS_VIP();
   }
   rv = REAL(pthread_cond_timedwait)(cond, mtx, abstime);
   if (recreated && mtx) {
+    if (tinfo->condMutex) {
+      tinfo->condMutexNeedsBalancing = false;
+      pthread_mutex_unlock(mtx);
+    }
     RECREATE_GATE_VIP();
   }
   THREAD_FREEZE_POINT2_VIP();
 
   return rv;
 }
 
 extern "C" int __pthread_cond_timedwait(pthread_cond_t *cond,
@@ -1044,24 +1079,31 @@ extern "C" MFBT_API int
   THREAD_FREEZE_POINT1_VIP();
   if (freezePoint2) {
     RECREATE_CONTINUE();
     RECREATE_PASS_VIP();
     RECREATE_GATE_VIP();
     return rv;
   }
   if (recreated && mtx) {
-    if (!freezePoint1 && pthread_mutex_trylock(mtx)) {
-      tinfo->reacquireMutex = mtx;
+    if (!freezePoint1) {
+      tinfo->condMutex = mtx;
+      if (!pthread_mutex_trylock(mtx)) {
+        tinfo->condMutexNeedsBalancing = true;
+      }
     }
     RECREATE_CONTINUE();
     RECREATE_PASS_VIP();
   }
   rv = REAL(__pthread_cond_timedwait)(cond, mtx, abstime, clock);
   if (recreated && mtx) {
+    if (tinfo->condMutex) {
+      tinfo->condMutexNeedsBalancing = false;
+      pthread_mutex_unlock(mtx);
+    }
     RECREATE_GATE_VIP();
   }
   THREAD_FREEZE_POINT2_VIP();
 
   return rv;
 }
 
 extern "C" MFBT_API int
@@ -1360,18 +1402,27 @@ RecreateThreads() {
   pthread_mutex_unlock(&sThreadCountLock);
 
   RECREATE_START();
   while (tinfo != nullptr) {
     if (tinfo->flags & TINFO_FLAG_NUWA_SUPPORT) {
       RECREATE_BEFORE(tinfo);
       thread_recreate(tinfo);
       RECREATE_WAIT();
-      if (tinfo->reacquireMutex) {
-        REAL(pthread_mutex_lock)(tinfo->reacquireMutex);
+      if (tinfo->condMutex) {
+        // Synchronize with the recreated thread in pthread_cond_wait().
+        REAL(pthread_mutex_lock)(tinfo->condMutex);
+        // Tell the other thread that we have successfully locked the mutex.
+        // NB: condMutex can only be touched while it is held, so we must clear
+        // it here and store the mutex locally.
+        pthread_mutex_t *mtx = tinfo->condMutex;
+        tinfo->condMutex = nullptr;
+        if (tinfo->condMutexNeedsBalancing) {
+          pthread_mutex_unlock(mtx);
+        }
       }
     } else if(!(tinfo->flags & TINFO_FLAG_NUWA_SKIP)) {
       // An unmarked thread is found other than the main thread.
 
       // All threads should be marked as one of SUPPORT or SKIP, or
       // abort the process to make sure all threads in the Nuwa
       // process are Nuwa-aware.
       abort();