Bug 707275, Part 1: Add telemetry for TLS intolerance, r=keeler
authorBrian Smith <brian@briansmith.org>
Sun, 17 Nov 2013 13:47:23 -0800 (2013-11-17)
changeset 155079 2f3ce72ec2d8e48ae2b816fd8248b10e917b7295
parent 155078 db7cbf61b0019782c9a270248ca5bc1f6899d374
child 155080 53dffb3da44637c3aff3db9634f712f1f0985834
push id25662
push useremorley@mozilla.com
push dateMon, 18 Nov 2013 10:53:03 +0000 (2013-11-18)
treeherdermozilla-central@59f6274ce8f1 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerskeeler
bugs707275
milestone28.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 707275, Part 1: Add telemetry for TLS intolerance, r=keeler
security/manager/ssl/src/nsNSSIOLayer.cpp
toolkit/components/telemetry/Histograms.json
--- a/security/manager/ssl/src/nsNSSIOLayer.cpp
+++ b/security/manager/ssl/src/nsNSSIOLayer.cpp
@@ -35,16 +35,17 @@
 #include "nsIConsoleService.h"
 #include "PSMRunnable.h"
 #include "ScopedNSSTypes.h"
 #include "SharedSSLState.h"
 #include "mozilla/Preferences.h"
 #include "nsContentUtils.h"
 
 #include "ssl.h"
+#include "sslproto.h"
 #include "secerr.h"
 #include "sslerr.h"
 #include "secder.h"
 #include "keyhi.h"
 
 using namespace mozilla;
 using namespace mozilla::psm;
 
@@ -882,71 +883,16 @@ nsDumpBuffer(unsigned char *buf, int len
   PR_LOG(gPIPNSSLog, PR_LOG_DEBUG, ("%s%s\n", hexbuf, chrbuf));
 }
 
 #define DEBUG_DUMP_BUFFER(buf,len) nsDumpBuffer(buf,len)
 #else
 #define DEBUG_DUMP_BUFFER(buf,len)
 #endif
 
-static bool
-isNonSSLErrorThatWeAllowToRetry(int32_t err, bool withInitialCleartext)
-{
-  switch (err)
-  {
-    case PR_CONNECT_RESET_ERROR:
-      if (!withInitialCleartext)
-        return true;
-      break;
-    
-    case PR_END_OF_FILE_ERROR:
-      return true;
-  }
-
-  return false;
-}
-
-static bool
-isTLSIntoleranceError(int32_t err, bool withInitialCleartext)
-{
-  // This function is supposed to decide, which error codes should
-  // be used to conclude server is TLS intolerant.
-  // Note this only happens during the initial SSL handshake.
-  // 
-  // When not using a proxy we'll see a connection reset error.
-  // When using a proxy, we'll see an end of file error.
-  // In addition check for some error codes where it is reasonable
-  // to retry without TLS.
-
-  if (isNonSSLErrorThatWeAllowToRetry(err, withInitialCleartext))
-    return true;
-
-  switch (err)
-  {
-    case SSL_ERROR_BAD_MAC_ALERT:
-    case SSL_ERROR_BAD_MAC_READ:
-    case SSL_ERROR_HANDSHAKE_FAILURE_ALERT:
-    case SSL_ERROR_HANDSHAKE_UNEXPECTED_ALERT:
-    case SSL_ERROR_CLIENT_KEY_EXCHANGE_FAILURE:
-    case SSL_ERROR_ILLEGAL_PARAMETER_ALERT:
-    case SSL_ERROR_NO_CYPHER_OVERLAP:
-    case SSL_ERROR_BAD_SERVER:
-    case SSL_ERROR_BAD_BLOCK_PADDING:
-    case SSL_ERROR_UNSUPPORTED_VERSION:
-    case SSL_ERROR_PROTOCOL_VERSION_ALERT:
-    case SSL_ERROR_RX_MALFORMED_FINISHED:
-    case SSL_ERROR_BAD_HANDSHAKE_HASH_VALUE:
-    case SSL_ERROR_DECODE_ERROR_ALERT:
-    case SSL_ERROR_RX_UNKNOWN_ALERT:
-      return true;
-  }
-  
-  return false;
-}
-
 class SSLErrorRunnable : public SyncRunnableBase
 {
  public:
   SSLErrorRunnable(nsNSSSocketInfo * infoObject, 
                    ::mozilla::psm::SSLErrorMessageType errtype, 
                    PRErrorCode errorCode)
     : mInfoObject(infoObject)
     , mErrType(errtype)
@@ -961,20 +907,110 @@ class SSLErrorRunnable : public SyncRunn
   
   RefPtr<nsNSSSocketInfo> mInfoObject;
   ::mozilla::psm::SSLErrorMessageType mErrType;
   const PRErrorCode mErrorCode;
 };
 
 namespace {
 
+bool
+retryDueToTLSIntolerance(PRErrorCode err, nsNSSSocketInfo* socketInfo)
+{
+  // This function is supposed to decide which error codes should
+  // be used to conclude server is TLS intolerant.
+  // Note this only happens during the initial SSL handshake.
+
+  uint32_t reason;
+
+  switch (err)
+  {
+    case SSL_ERROR_BAD_MAC_ALERT: reason = 1; break;
+    case SSL_ERROR_BAD_MAC_READ: reason = 2; break;
+    case SSL_ERROR_HANDSHAKE_FAILURE_ALERT: reason = 3; break;
+    case SSL_ERROR_HANDSHAKE_UNEXPECTED_ALERT: reason = 4; break;
+    case SSL_ERROR_CLIENT_KEY_EXCHANGE_FAILURE: reason = 5; break;
+    case SSL_ERROR_ILLEGAL_PARAMETER_ALERT: reason = 6; break;
+    case SSL_ERROR_NO_CYPHER_OVERLAP: reason = 7; break;
+    case SSL_ERROR_BAD_SERVER: reason = 8; break;
+    case SSL_ERROR_BAD_BLOCK_PADDING: reason = 9; break;
+    case SSL_ERROR_UNSUPPORTED_VERSION: reason = 10; break;
+    case SSL_ERROR_PROTOCOL_VERSION_ALERT: reason = 11; break;
+    case SSL_ERROR_RX_MALFORMED_FINISHED: reason = 12; break;
+    case SSL_ERROR_BAD_HANDSHAKE_HASH_VALUE: reason = 13; break;
+    case SSL_ERROR_DECODE_ERROR_ALERT: reason = 14; break;
+    case SSL_ERROR_RX_UNKNOWN_ALERT: reason = 15; break;
+
+    case PR_CONNECT_RESET_ERROR: reason = 16; goto conditional;
+    case PR_END_OF_FILE_ERROR: reason = 17; goto conditional;
+
+      // When not using a proxy we'll see a connection reset error.
+      // When using a proxy, we'll see an end of file error.
+      // In addition check for some error codes where it is reasonable
+      // to retry without TLS.
+
+      // Don't allow STARTTLS connections to fall back on connection resets or
+      // EOF.
+    conditional:
+      if (socketInfo->GetHasCleartextPhase()) {
+        return false;
+      }
+      break;
+
+    default:
+      return false;
+  }
+
+  Telemetry::ID pre;
+  Telemetry::ID post;
+  SSLVersionRange range = socketInfo->GetTLSVersionRange();
+  switch (range.max) {
+    case SSL_LIBRARY_VERSION_TLS_1_2:
+      pre = Telemetry::SSL_TLS12_INTOLERANCE_REASON_PRE;
+      post = Telemetry::SSL_TLS12_INTOLERANCE_REASON_POST;
+      break;
+    case SSL_LIBRARY_VERSION_TLS_1_1:
+      pre = Telemetry::SSL_TLS11_INTOLERANCE_REASON_PRE;
+      post = Telemetry::SSL_TLS11_INTOLERANCE_REASON_POST;
+      break;
+    case SSL_LIBRARY_VERSION_TLS_1_0:
+      pre = Telemetry::SSL_TLS10_INTOLERANCE_REASON_PRE;
+      post = Telemetry::SSL_TLS10_INTOLERANCE_REASON_POST;
+      break;
+    case SSL_LIBRARY_VERSION_3_0:
+      pre = Telemetry::SSL_SSL30_INTOLERANCE_REASON_PRE;
+      post = Telemetry::SSL_SSL30_INTOLERANCE_REASON_POST;
+      break;
+    default:
+      MOZ_CRASH("impossible TLS version");
+      return false;
+  }
+
+  // The difference between _PRE and _POST represents how often we avoided
+  // TLS intolerance fallback due to remembered tolerance.
+  Telemetry::Accumulate(pre, reason);
+
+  if (!socketInfo->SharedState().IOLayerHelpers()
+                 .rememberIntolerantAtVersion(socketInfo->GetHostName(),
+                                              socketInfo->GetPort(),
+                                              range.min, range.max)) {
+    return false;
+  }
+
+  Telemetry::Accumulate(post, reason);
+
+  return true;
+}
+
 int32_t checkHandshake(int32_t bytesTransfered, bool wasReading,
                        PRFileDesc* ssl_layer_fd,
                        nsNSSSocketInfo *socketInfo)
 {
+  PRErrorCode err = PR_GetError();
+
   // This is where we work around all of those SSL servers that don't 
   // conform to the SSL spec and shutdown a connection when we request
   // SSL v3.1 (aka TLS).  The spec says the client says what version
   // of the protocol we're willing to perform, in our case SSL v3.1
   // In its response, the server says which version it wants to perform.
   // Many servers out there only know how to do v3.0.  Next, we're supposed
   // to send back the version of the protocol we requested (ie v3.1).  At
   // this point many servers's implementations are broken and they shut
@@ -989,32 +1025,23 @@ int32_t checkHandshake(int32_t bytesTran
   // This depends on the fact that Cert UI will not be shown again,
   // should the user override the bad cert.
 
   bool handleHandshakeResultNow = socketInfo->IsHandshakePending();
 
   bool wantRetry = false;
 
   if (0 > bytesTransfered) {
-    int32_t err = PR_GetError();
-
     if (handleHandshakeResultNow) {
       if (PR_WOULD_BLOCK_ERROR == err) {
+        PR_SetError(err, 0);
         return bytesTransfered;
       }
 
-      if (!wantRetry // no decision yet
-          && isTLSIntoleranceError(err, socketInfo->GetHasCleartextPhase()))
-      {
-        SSLVersionRange range = socketInfo->GetTLSVersionRange();
-        wantRetry = socketInfo->SharedState().IOLayerHelpers()
-                      .rememberIntolerantAtVersion(socketInfo->GetHostName(),
-                                                   socketInfo->GetPort(),
-                                                   range.min, range.max);
-      }
+      wantRetry = retryDueToTLSIntolerance(err, socketInfo);
     }
     
     // This is the common place where we trigger non-cert-errors on a SSL
     // socket. This might be reached at any time of the connection.
     //
     // The socketInfo->GetErrorCode() check is here to ensure we don't try to
     // do the synchronous dispatch to the main thread unnecessarily after we've
     // already handled a certificate error. (SSLErrorRunnable calls
@@ -1028,45 +1055,41 @@ int32_t checkHandshake(int32_t bytesTran
                                                              err));
       (void) runnable->DispatchToMainThreadAndWait();
     }
   }
   else if (wasReading && 0 == bytesTransfered) // zero bytes on reading, socket closed
   {
     if (handleHandshakeResultNow)
     {
-      if (!wantRetry // no decision yet
-          && !socketInfo->GetHasCleartextPhase()) // mirror PR_CONNECT_RESET_ERROR treament
-      {
-        SSLVersionRange range = socketInfo->GetTLSVersionRange();
-        wantRetry = socketInfo->SharedState().IOLayerHelpers()
-                        .rememberIntolerantAtVersion(socketInfo->GetHostName(),
-                                                     socketInfo->GetPort(),
-                                                     range.min, range.max);
-      }
+      wantRetry = retryDueToTLSIntolerance(PR_END_OF_FILE_ERROR, socketInfo);
     }
   }
 
   if (wantRetry) {
     PR_LOG(gPIPNSSLog, PR_LOG_DEBUG,
            ("[%p] checkHandshake: will retry with lower max TLS version\n",
             ssl_layer_fd));
     // We want to cause the network layer to retry the connection.
-    PR_SetError(PR_CONNECT_RESET_ERROR, 0);
+    err = PR_CONNECT_RESET_ERROR;
     if (wasReading)
       bytesTransfered = -1;
   }
 
   // TLS intolerant servers only cause the first transfer to fail, so let's 
   // set the HandshakePending attribute to false so that we don't try the logic
   // above again in a subsequent transfer.
   if (handleHandshakeResultNow) {
     socketInfo->SetHandshakeNotPending();
   }
   
+  if (bytesTransfered < 0) {
+    PR_SetError(err, 0);
+  }
+
   return bytesTransfered;
 }
 
 }
 
 static int16_t
 nsSSLIOLayerPoll(PRFileDesc * fd, int16_t in_flags, int16_t *out_flags)
 {
--- a/toolkit/components/telemetry/Histograms.json
+++ b/toolkit/components/telemetry/Histograms.json
@@ -4490,10 +4490,50 @@
     "description": "Time spent to find out a cache entry is missing"
   },
   "NETWORK_CACHE_V1_HIT_TIME_MS": {
     "kind": "exponential",
     "high": "10000",
     "n_buckets": 50,
     "extended_statistics_ok": true,
     "description": "Time spent to open an existing cache entry"
+  },
+  "SSL_TLS12_INTOLERANCE_REASON_PRE": {
+    "kind": "enumerated",
+    "n_values": 64,
+    "description": "detected symptom of TLS 1.2 intolerance, before considering historical info"
+  },
+  "SSL_TLS12_INTOLERANCE_REASON_POST": {
+    "kind": "enumerated",
+    "n_values": 64,
+    "description": "detected symptom of TLS 1.2 intolerance, after considering historical info"
+  },
+  "SSL_TLS11_INTOLERANCE_REASON_PRE": {
+    "kind": "enumerated",
+    "n_values": 64,
+    "description": "detected symptom of TLS 1.1 intolerance, before considering historical info"
+  },
+  "SSL_TLS11_INTOLERANCE_REASON_POST": {
+    "kind": "enumerated",
+    "n_values": 64,
+    "description": "detected symptom of TLS 1.1 intolerance, after considering historical info"
+  },
+  "SSL_TLS10_INTOLERANCE_REASON_PRE": {
+    "kind": "enumerated",
+    "n_values": 64,
+    "description": "detected symptom of TLS 1.0 intolerance, before considering historical info"
+  },
+  "SSL_TLS10_INTOLERANCE_REASON_POST": {
+    "kind": "enumerated",
+    "n_values": 64,
+    "description": "detected symptom of TLS 1.0 intolerance, after considering historical info"
+  },
+  "SSL_SSL30_INTOLERANCE_REASON_PRE": {
+    "kind": "enumerated",
+    "n_values": 64,
+    "description": "detected symptom of SSL 3.0 intolerance, before considering historical info"
+  },
+  "SSL_SSL30_INTOLERANCE_REASON_POST": {
+    "kind": "enumerated",
+    "n_values": 64,
+    "description": "detected symptom of SSL 3.0 intolerance, after considering historical info"
   }
 }