Bug 1075991 - Tracking cause of inappropriate TLS version fallback, r=keeler
authorMartin Thomson <martin.thomson@gmail.com>
Fri, 03 Oct 2014 11:01:24 -0700
changeset 231989 59ef591ec31895fff8d6ca429832e93e94462356
parent 231988 b1a9b31ab26e28c99b9583a31e5ea5c97f16aa68
child 231990 cb3fd4a4b7c55208b7f657f5e5bd55bc975292bc
push id4187
push userbhearsum@mozilla.com
push dateFri, 28 Nov 2014 15:29:12 +0000
treeherdermozilla-beta@f23cc6a30c11 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerskeeler
bugs1075991
milestone35.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1075991 - Tracking cause of inappropriate TLS version fallback, r=keeler
security/manager/ssl/src/nsNSSIOLayer.cpp
toolkit/components/telemetry/Histograms.json
--- a/security/manager/ssl/src/nsNSSIOLayer.cpp
+++ b/security/manager/ssl/src/nsNSSIOLayer.cpp
@@ -1039,79 +1039,98 @@ class SSLErrorRunnable : public SyncRunn
 
   RefPtr<nsNSSSocketInfo> mInfoObject;
   ::mozilla::psm::SSLErrorMessageType mErrType;
   const PRErrorCode mErrorCode;
 };
 
 namespace {
 
+uint32_t tlsIntoleranceTelemetryBucket(PRErrorCode err)
+{
+  // returns a numeric code for where we track various errors in telemetry
+  // only errors that cause version fallback are tracked,
+  // so this is also used to determine which errors can cause version fallback
+  switch (err) {
+    case SSL_ERROR_BAD_MAC_ALERT: return 1;
+    case SSL_ERROR_BAD_MAC_READ: return 2;
+    case SSL_ERROR_HANDSHAKE_FAILURE_ALERT: return 3;
+    case SSL_ERROR_HANDSHAKE_UNEXPECTED_ALERT: return 4;
+    case SSL_ERROR_CLIENT_KEY_EXCHANGE_FAILURE: return 5;
+    case SSL_ERROR_ILLEGAL_PARAMETER_ALERT: return 6;
+    case SSL_ERROR_NO_CYPHER_OVERLAP: return 7;
+    case SSL_ERROR_BAD_SERVER: return 8;
+    case SSL_ERROR_BAD_BLOCK_PADDING: return 9;
+    case SSL_ERROR_UNSUPPORTED_VERSION: return 10;
+    case SSL_ERROR_PROTOCOL_VERSION_ALERT: return 11;
+    case SSL_ERROR_RX_MALFORMED_FINISHED: return 12;
+    case SSL_ERROR_BAD_HANDSHAKE_HASH_VALUE: return 13;
+    case SSL_ERROR_DECODE_ERROR_ALERT: return 14;
+    case SSL_ERROR_RX_UNKNOWN_ALERT: return 15;
+    case PR_CONNECT_RESET_ERROR: return 16;
+    case PR_END_OF_FILE_ERROR: return 17;
+    default: return 0;
+  }
+}
+
 bool
 retryDueToTLSIntolerance(PRErrorCode err, nsNSSSocketInfo* socketInfo)
 {
   // This function is supposed to decide which error codes should
   // be used to conclude server is TLS intolerant.
   // Note this only happens during the initial SSL handshake.
 
   SSLVersionRange range = socketInfo->GetTLSVersionRange();
 
-  uint32_t reason;
-  switch (err) {
-    case SSL_ERROR_INAPPROPRIATE_FALLBACK_ALERT:
-      // This is a clear signal that we've fallen back too many versions.  Treat
-      // this as a hard failure now, but also mark the next higher version as
-      // being tolerant so that later attempts don't use this version (i.e.,
-      // range.max), which makes the error unrecoverable without a full restart.
-      socketInfo->SharedState().IOLayerHelpers()
-        .rememberTolerantAtVersion(socketInfo->GetHostName(),
-                                   socketInfo->GetPort(),
-                                   range.max + 1);
-      return false;
-
-    case SSL_ERROR_BAD_MAC_ALERT: reason = 1; break;
-    case SSL_ERROR_BAD_MAC_READ: reason = 2; break;
-    case SSL_ERROR_HANDSHAKE_FAILURE_ALERT: reason = 3; break;
-    case SSL_ERROR_HANDSHAKE_UNEXPECTED_ALERT: reason = 4; break;
-    case SSL_ERROR_CLIENT_KEY_EXCHANGE_FAILURE: reason = 5; break;
-    case SSL_ERROR_ILLEGAL_PARAMETER_ALERT: reason = 6; break;
-    case SSL_ERROR_NO_CYPHER_OVERLAP: reason = 7; break;
-    case SSL_ERROR_BAD_SERVER: reason = 8; break;
-    case SSL_ERROR_BAD_BLOCK_PADDING: reason = 9; break;
-    case SSL_ERROR_UNSUPPORTED_VERSION: reason = 10; break;
-    case SSL_ERROR_PROTOCOL_VERSION_ALERT: reason = 11; break;
-    case SSL_ERROR_RX_MALFORMED_FINISHED: reason = 12; break;
-    case SSL_ERROR_BAD_HANDSHAKE_HASH_VALUE: reason = 13; break;
-    case SSL_ERROR_DECODE_ERROR_ALERT: reason = 14; break;
-    case SSL_ERROR_RX_UNKNOWN_ALERT: reason = 15; break;
-
-    case PR_CONNECT_RESET_ERROR: reason = 16; goto conditional;
-    case PR_END_OF_FILE_ERROR: reason = 17; goto conditional;
-
-      // When not using a proxy we'll see a connection reset error.
-      // When using a proxy, we'll see an end of file error.
-      // In addition check for some error codes where it is reasonable
-      // to retry without TLS.
-
-      // Don't allow STARTTLS connections to fall back on connection resets or
-      // EOF. Also, don't fall back from TLS 1.0 to SSL 3.0 for connection
-      // resets, because connection resets have too many false positives,
-      // and we want to maximize how often we send TLS 1.0+ with extensions
-      // if at all reasonable. Unfortunately, it appears we have to allow
-      // fallback from TLS 1.2 and TLS 1.1 for connection resets due to bad
-      // servers and possibly bad intermediaries.
-    conditional:
-      if ((err == PR_CONNECT_RESET_ERROR &&
-           range.max <= SSL_LIBRARY_VERSION_TLS_1_0) ||
-          socketInfo->GetForSTARTTLS()) {
-        return false;
-      }
-      break;
-
-    default:
-      return false;
+  if (err == SSL_ERROR_INAPPROPRIATE_FALLBACK_ALERT) {
+    // This is a clear signal that we've fallen back too many versions.  Treat
+    // this as a hard failure now, but also mark the next higher version as
+    // being tolerant so that later attempts don't use this version (i.e.,
+    // range.max), which makes the error unrecoverable without a full restart.
+
+    // First, track the original cause of the version fallback.  This uses the
+    // same buckets as the telemetry below, except that bucket 0 will include
+    // all cases where there wasn't an original reason.
+    PRErrorCode originalReason = socketInfo->SharedState().IOLayerHelpers()
+      .getIntoleranceReason(socketInfo->GetHostName(), socketInfo->GetPort());
+    Telemetry::Accumulate(Telemetry::SSL_VERSION_FALLBACK_INAPPROPRIATE,
+                          tlsIntoleranceTelemetryBucket(originalReason));
+
+    socketInfo->SharedState().IOLayerHelpers()
+      .rememberTolerantAtVersion(socketInfo->GetHostName(),
+                                 socketInfo->GetPort(),
+                                 range.max + 1);
+
+    return false;
+  }
+
+  // When not using a proxy we'll see a connection reset error.
+  // When using a proxy, we'll see an end of file error.
+  // In addition check for some error codes where it is reasonable
+  // to retry without TLS.
+
+  // Don't allow STARTTLS connections to fall back on connection resets or
+  // EOF. Also, don't fall back from TLS 1.0 to SSL 3.0 for connection
+  // resets, because connection resets have too many false positives,
+  // and we want to maximize how often we send TLS 1.0+ with extensions
+  // if at all reasonable. Unfortunately, it appears we have to allow
+  // fallback from TLS 1.2 and TLS 1.1 for connection resets due to bad
+  // servers and possibly bad intermediaries.
+  if (err == PR_CONNECT_RESET_ERROR &&
+      range.max <= SSL_LIBRARY_VERSION_TLS_1_0) {
+    return false;
+  }
+  if ((err == PR_CONNECT_RESET_ERROR || err == PR_END_OF_FILE_ERROR)
+      && socketInfo->GetForSTARTTLS()) {
+    return false;
+  }
+
+  uint32_t reason = tlsIntoleranceTelemetryBucket(err);
+  if (reason == 0) {
+    return false;
   }
 
   Telemetry::ID pre;
   Telemetry::ID post;
   switch (range.max) {
     case SSL_LIBRARY_VERSION_TLS_1_2:
       pre = Telemetry::SSL_TLS12_INTOLERANCE_REASON_PRE;
       post = Telemetry::SSL_TLS12_INTOLERANCE_REASON_POST;
--- a/toolkit/components/telemetry/Histograms.json
+++ b/toolkit/components/telemetry/Histograms.json
@@ -6394,16 +6394,22 @@
     "description": "detected symptom of SSL 3.0 intolerance, before considering historical info"
   },
   "SSL_SSL30_INTOLERANCE_REASON_POST": {
     "expires_in_version": "never",
     "kind": "enumerated",
     "n_values": 64,
     "description": "detected symptom of SSL 3.0 intolerance, after considering historical info"
   },
+  "SSL_VERSION_FALLBACK_INAPPROPRIATE": {
+    "expires_in_version": "never",
+    "kind": "enumerated",
+    "n_values": 64,
+    "description": "TLS/SSL version intolerance was falsely detected, server rejected handshake"
+  },
   "SSL_CIPHER_SUITE_FULL": {
     "expires_in_version": "never",
     "kind": "enumerated",
     "n_values": 128,
     "description": "Negotiated cipher suite in full handshake (see key in HandshakeCallback in nsNSSCallbacks.cpp)"
   },
   "SSL_CIPHER_SUITE_RESUMED": {
     "expires_in_version": "never",