Bug 1273475 - Fix deadlock and potential crash when PSM shuts down NSS. r=ttaubert, r=jcj, a=lizzard
authorDavid Keeler <dkeeler@mozilla.com>
Tue, 30 Aug 2016 16:22:30 -0700
changeset 342525 2406bffc63707c295244333c34bd6e81750f4157
parent 342524 c8383470a0c4c93462dd1eccaa2944af134527b3
child 342526 e2d17f2657b8b8239f2e8c2e72ce54e7a2c34d9a
push id1183
push userraliiev@mozilla.com
push dateMon, 05 Sep 2016 20:01:49 +0000
treeherdermozilla-release@3148731bed45 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersttaubert, jcj, lizzard
bugs1273475
milestone49.0
Bug 1273475 - Fix deadlock and potential crash when PSM shuts down NSS. r=ttaubert, r=jcj, a=lizzard This fixes two issues: 1. nsNSSShutDownList::evaporateAllNSSResources could deadlock by acquiring sListLock and then the singleton's mNSSActivityStateLock in nsNSSActivityState::restrictActivityToCurrentThread. 2. Calling UnloadLoadableRoots before nsNSSShutDownList::evaporateAllNSSResources could result in removing modules that were still in use, causing assertion failures and potential crashes. MozReview-Commit-ID: 8ZgZTVw7sWh
security/manager/ssl/nsNSSComponent.cpp
security/manager/ssl/nsNSSShutDown.cpp
security/manager/ssl/tests/unit/test_nss_shutdown.js
security/manager/ssl/tests/unit/xpcshell.ini
--- a/security/manager/ssl/nsNSSComponent.cpp
+++ b/security/manager/ssl/nsNSSComponent.cpp
@@ -253,16 +253,17 @@ nsNSSComponent::nsNSSComponent()
   :mutex("nsNSSComponent.mutex"),
    mNSSInitialized(false),
 #ifndef MOZ_NO_SMART_CARDS
    mThreadList(nullptr),
 #endif
    mCertVerificationThread(nullptr)
 {
   MOZ_LOG(gPIPNSSLog, LogLevel::Debug, ("nsNSSComponent::ctor\n"));
+  MOZ_ASSERT(NS_IsMainThread());
 
   NS_ASSERTION( (0 == mInstanceCount), "nsNSSComponent is a singleton, but instantiated multiple times!");
   ++mInstanceCount;
 }
 
 void
 nsNSSComponent::deleteBackgroundThreads()
 {
@@ -288,16 +289,17 @@ nsNSSComponent::createBackgroundThreads(
     delete mCertVerificationThread;
     mCertVerificationThread = nullptr;
   }
 }
 
 nsNSSComponent::~nsNSSComponent()
 {
   MOZ_LOG(gPIPNSSLog, LogLevel::Debug, ("nsNSSComponent::dtor\n"));
+  MOZ_ASSERT(NS_IsMainThread());
   NS_ASSERTION(!mCertVerificationThread,
                "Cert verification thread should have been cleaned up.");
 
   deleteBackgroundThreads();
 
   // All cleanup code requiring services needs to happen in xpcom_shutdown
 
   ShutdownNSS();
@@ -1865,16 +1867,17 @@ nsNSSComponent::InitializeNSS()
 
 void
 nsNSSComponent::ShutdownNSS()
 {
   // Can be called both during init and profile change,
   // needs mutex protection.
 
   MOZ_LOG(gPIPNSSLog, LogLevel::Debug, ("nsNSSComponent::ShutdownNSS\n"));
+  MOZ_ASSERT(NS_IsMainThread());
 
   MutexAutoLock lock(mutex);
 
   if (mNSSInitialized) {
     mNSSInitialized = false;
 
     PK11_SetPasswordFunc((PK11PasswordFunc)nullptr);
 
@@ -1887,37 +1890,41 @@ nsNSSComponent::ShutdownNSS()
 
 #ifndef MOZ_NO_SMART_CARDS
     ShutdownSmartCardThreads();
 #endif
     SSL_ClearSessionCache();
     // TLSServerSocket may be run with the session cache enabled. This ensures
     // those resources are cleaned up.
     Unused << SSL_ShutdownServerSessionIDCache();
-    UnloadLoadableRoots();
 #ifndef MOZ_NO_EV_CERTS
     CleanupIdentityInfo();
 #endif
-    MOZ_LOG(gPIPNSSLog, LogLevel::Debug, ("evaporating psm resources\n"));
-    nsNSSShutDownList::evaporateAllNSSResources();
+    MOZ_LOG(gPIPNSSLog, LogLevel::Debug, ("evaporating psm resources"));
+    if (NS_FAILED(nsNSSShutDownList::evaporateAllNSSResources())) {
+      MOZ_LOG(gPIPNSSLog, LogLevel::Error, ("failed to evaporate resources"));
+      return;
+    }
+    UnloadLoadableRoots();
     EnsureNSSInitialized(nssShutdown);
     if (SECSuccess != ::NSS_Shutdown()) {
-      MOZ_LOG(gPIPNSSLog, LogLevel::Error, ("NSS SHUTDOWN FAILURE\n"));
-    }
-    else {
-      MOZ_LOG(gPIPNSSLog, LogLevel::Debug, ("NSS shutdown =====>> OK <<=====\n"));
+      MOZ_LOG(gPIPNSSLog, LogLevel::Error, ("NSS SHUTDOWN FAILURE"));
+    } else {
+      MOZ_LOG(gPIPNSSLog, LogLevel::Debug, ("NSS shutdown =====>> OK <<====="));
     }
   }
 }
 
 nsresult
 nsNSSComponent::Init()
 {
-  // No mutex protection.
-  // Assume Init happens before any concurrency on "this" can start.
+  MOZ_ASSERT(NS_IsMainThread());
+  if (!NS_IsMainThread()) {
+    return NS_ERROR_NOT_SAME_THREAD;
+  }
 
   nsresult rv = NS_OK;
 
   MOZ_LOG(gPIPNSSLog, LogLevel::Debug, ("Beginning NSS initialization\n"));
 
   rv = InitializePIPNSSBundle();
   if (NS_FAILED(rv)) {
     MOZ_LOG(gPIPNSSLog, LogLevel::Error, ("Unable to create pipnss bundle.\n"));
--- a/security/manager/ssl/nsNSSShutDown.cpp
+++ b/security/manager/ssl/nsNSSShutDown.cpp
@@ -122,28 +122,49 @@ nsresult nsNSSShutDownList::doPK11Logout
     }
   }
 
   return NS_OK;
 }
 
 nsresult nsNSSShutDownList::evaporateAllNSSResources()
 {
+  MOZ_ASSERT(NS_IsMainThread());
+  if (!NS_IsMainThread()) {
+    return NS_ERROR_NOT_SAME_THREAD;
+  }
+
   StaticMutexAutoLock lock(sListLock);
+  // Other threads can acquire an nsNSSShutDownPreventionLock and cause this
+  // thread to block when it calls restructActivityToCurrentThread, below. If
+  // those other threads then attempt to create an object that must be
+  // remembered by the shut down list, they will call
+  // nsNSSShutDownList::remember, which attempts to acquire sListLock.
+  // Consequently, holding sListLock while we're in
+  // restrictActivityToCurrentThread would result in deadlock. sListLock
+  // protects the singleton, so if we enforce that the singleton only be created
+  // and destroyed on the main thread, and if we similarly enforce that this
+  // function is only called on the main thread, what we can do is check that
+  // the singleton hasn't already gone away and then we don't actually have to
+  // hold sListLock while calling restrictActivityToCurrentThread.
   if (!singleton) {
     return NS_OK;
   }
 
-  PRStatus rv = singleton->mActivityState.restrictActivityToCurrentThread();
-  if (rv != PR_SUCCESS) {
-    MOZ_LOG(gPIPNSSLog, LogLevel::Debug, ("failed to restrict activity to current thread\n"));
-    return NS_ERROR_FAILURE;
+  {
+    StaticMutexAutoUnlock unlock(sListLock);
+    PRStatus rv = singleton->mActivityState.restrictActivityToCurrentThread();
+    if (rv != PR_SUCCESS) {
+      MOZ_LOG(gPIPNSSLog, LogLevel::Debug,
+              ("failed to restrict activity to current thread"));
+      return NS_ERROR_FAILURE;
+    }
   }
 
-  MOZ_LOG(gPIPNSSLog, LogLevel::Debug, ("now evaporating NSS resources\n"));
+  MOZ_LOG(gPIPNSSLog, LogLevel::Debug, ("now evaporating NSS resources"));
 
   // Never free more than one entry, because other threads might be calling
   // us and remove themselves while we are iterating over the list,
   // and the behaviour of changing the list while iterating is undefined.
   while (singleton) {
     auto iter = singleton->mObjects.Iter();
     if (iter.Done()) {
       break;
@@ -186,16 +207,17 @@ bool nsNSSShutDownList::construct(const 
     singleton = new nsNSSShutDownList();
   }
 
   return !!singleton;
 }
 
 void nsNSSShutDownList::shutdown()
 {
+  MOZ_ASSERT(NS_IsMainThread());
   StaticMutexAutoLock lock(sListLock);
   sInShutdown = true;
 
   if (singleton) {
     delete singleton;
   }
 }
 
new file mode 100644
--- /dev/null
+++ b/security/manager/ssl/tests/unit/test_nss_shutdown.js
@@ -0,0 +1,44 @@
+/* -*- indent-tabs-mode: nil; js-indent-level: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+"use strict";
+
+// This test attempts to ensure that PSM doesn't deadlock or crash when shutting
+// down NSS while a background thread is attempting to use NSS.
+// Uses test_signed_apps/valid_app_1.zip from test_signed_apps.js.
+
+function startAsyncNSSOperation(certdb, appFile) {
+  return new Promise((resolve, reject) => {
+    certdb.openSignedAppFileAsync(Ci.nsIX509CertDB.AppXPCShellRoot, appFile,
+      function(rv, aZipReader, aSignerCert) {
+        // rv will either indicate success (if NSS hasn't been shut down yet) or
+        // it will be some error code that varies depending on when NSS got shut
+        // down. As such, there's nothing really to check here. Just resolve the
+        // promise to continue execution.
+        resolve();
+      });
+  });
+}
+
+add_task(function* () {
+  do_get_profile();
+  let psm = Cc["@mozilla.org/psm;1"]
+              .getService(Ci.nsISupports)
+              .QueryInterface(Ci.nsIObserver);
+  let certdb = Cc["@mozilla.org/security/x509certdb;1"]
+                 .getService(Ci.nsIX509CertDB);
+  let appFile = do_get_file("test_signed_apps/valid_app_1.zip");
+
+  let promises = [];
+  for (let i = 0; i < 25; i++) {
+    promises.push(startAsyncNSSOperation(certdb, appFile));
+  }
+  // Trick PSM into thinking it should shut down NSS. If this test doesn't
+  // hang or crash, we're good.
+  psm.observe(null, "profile-before-change", null);
+  for (let i = 0; i < 25; i++) {
+    promises.push(startAsyncNSSOperation(certdb, appFile));
+  }
+  yield Promise.all(promises);
+});
--- a/security/manager/ssl/tests/unit/xpcshell.ini
+++ b/security/manager/ssl/tests/unit/xpcshell.ini
@@ -81,16 +81,17 @@ run-sequentially = hardcoded ports
 [test_local_cert.js]
 [test_logoutAndTeardown.js]
 run-sequentially = hardcoded ports
 [test_name_constraints.js]
 [test_nsCertType.js]
 run-sequentially = hardcoded ports
 [test_nsIX509Cert_utf8.js]
 [test_nsIX509CertValidity.js]
+[test_nss_shutdown.js]
 [test_ocsp_caching.js]
 run-sequentially = hardcoded ports
 [test_ocsp_enabled_pref.js]
 run-sequentially = hardcoded ports
 [test_ocsp_fetch_method.js]
 # OCSP requests in this test time out on slow B2G Emulator debug builds.
 # See Bug 1147725.
 skip-if = toolkit == 'gonk' && debug