bug 686312 - websockets should not reject non character utf-8 sequence as invalid r=dbaron
authorPatrick McManus <mcmanus@ducksong.com>
Tue, 20 Sep 2011 18:13:43 -0400
changeset 77228 f98f144896ad67aa57e737b8b74eaa95d42ad711
parent 77227 a84273cf364468ced0be05f8ea860c70a95c53c5
child 77229 ef007a375b3475e4f622b06453affcaba41a0f85
push id21187
push usermak77@bonardo.net
push dateWed, 21 Sep 2011 08:36:41 +0000
treeherdermozilla-central@3178f1c42505 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersdbaron
bugs686312
milestone9.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
bug 686312 - websockets should not reject non character utf-8 sequence as invalid r=dbaron
content/base/test/file_websocket_wsh.py
content/base/test/test_websocket.html
netwerk/protocol/websocket/WebSocketChannel.cpp
xpcom/string/public/nsReadableUtils.h
xpcom/string/src/nsReadableUtils.cpp
--- a/content/base/test/file_websocket_wsh.py
+++ b/content/base/test/file_websocket_wsh.py
@@ -100,10 +100,18 @@ def web_socket_transfer_data(request):
     while not request.client_terminated:
       msgutil.receive_message(request)
     global test37code
     test37code = request.ws_close_code
     global test37reason
     test37reason = request.ws_close_reason
   elif request.ws_protocol == "test-37c":
     request.ws_stream.close_connection(test37code, test37reason)
+  elif request.ws_protocol == "test-42":
+    # Echo back 3 messages
+    msgutil.send_message(request,
+                         msgutil.receive_message(request))
+    msgutil.send_message(request, 
+                         msgutil.receive_message(request))
+    msgutil.send_message(request, 
+                         msgutil.receive_message(request))
   while not request.client_terminated:
     msgutil.receive_message(request)
--- a/content/base/test/test_websocket.html
+++ b/content/base/test/test_websocket.html
@@ -57,20 +57,22 @@
  * 33. default close code test
  * 34. test for receiving custom close code and reason
  * 35. test for sending custom close code and reason
  * 36. negative test for sending out of range close code
  * 37. negative test for too long of a close reason
  * 38. ensure extensions attribute is defined
  * 39. a basic wss:// connectivity test
  * 40. negative test for wss:// with no cert
+ * 41. HSTS
+ * 42. non-char utf-8 sequences
  */
 
 var first_test = 1;
-var last_test = 40;
+var last_test = 42;
 
 var current_test = first_test;
 
 var all_ws = [];
 
 function shouldNotOpen(e)
 {
   var ws = e.target;
@@ -1118,16 +1120,54 @@ function test40()
   ws.onclose = function(e)
   {
     ok(true, "test 40 close");
     ok(status_test40 == "started", "test 40 did not open"); 
     doTest(41);
   };
 }
 
+function test41()
+{
+ // reserve test41 for HSTS - bug 664284
+ doTest(42);
+}
+
+function test42()
+{
+// test some utf-8 non-characters. They should be allowed in the
+// websockets context. Test via round trip echo.
+  var ws = CreateTestWS("ws://mochi.test:8888/tests/content/base/test/file_websocket", "test-42");
+  var data = ["U+FFFE ￾",
+              "U+FFFF ￿",
+              "U+10FFFF 􏿿"];
+  var index = 0;
+
+  ws.onopen = function()
+  {
+    ws.send(data[0]);
+    ws.send(data[1]);
+    ws.send(data[2]);
+  }
+
+  ws.onmessage = function(e)
+  {
+    ok(e.data == data[index], "bad received message in test-42! index="+index);
+    index++;
+    if (index == 3)
+      ws.close();
+  }
+
+  ws.onclose = function(e)
+  {
+    doTest(43);
+  }
+}
+
+
 var ranAllTests = false;
 
 function maybeFinished()
 {
   if (!ranAllTests)
     return;
 
   if (waitTest2Part1 || waitTest2Part2 || waitTest9 || waitTest10 ||
--- a/netwerk/protocol/websocket/WebSocketChannel.cpp
+++ b/netwerk/protocol/websocket/WebSocketChannel.cpp
@@ -924,17 +924,17 @@ WebSocketChannel::ProcessInput(PRUint8 *
       LOG(("WebSocketChannel:: text frame received\n"));
       if (mListener) {
         nsCString utf8Data((const char *)payload, payloadLength);
 
         // Section 8.1 says to replace received non utf-8 sequences
         // (which are non-conformant to send) with u+fffd,
         // but secteam feels that silently rewriting messages is
         // inappropriate - so we will fail the connection instead.
-        if (!IsUTF8(utf8Data)) {
+        if (!IsUTF8(utf8Data, PR_FALSE)) {
           LOG(("WebSocketChannel:: text frame invalid utf-8\n"));
           AbortSession(NS_ERROR_ILLEGAL_VALUE);
           return NS_ERROR_ILLEGAL_VALUE;
         }
 
         NS_DispatchToMainThread(new CallOnMessageAvailable(mListener, mContext,
                                                            utf8Data, -1));
       }
@@ -961,17 +961,17 @@ WebSocketChannel::ProcessInput(PRUint8 *
             mServerCloseReason.SetLength(msglen);
             memcpy(mServerCloseReason.BeginWriting(),
                    (const char *)payload + 2, msglen);
 
             // section 8.1 says to replace received non utf-8 sequences
             // (which are non-conformant to send) with u+fffd,
             // but secteam feels that silently rewriting messages is
             // inappropriate - so we will fail the connection instead.
-            if (!IsUTF8(mServerCloseReason)) {
+            if (!IsUTF8(mServerCloseReason, PR_FALSE)) {
               LOG(("WebSocketChannel:: close frame invalid utf-8\n"));
               AbortSession(NS_ERROR_ILLEGAL_VALUE);
               return NS_ERROR_ILLEGAL_VALUE;
             }
 
             LOG(("WebSocketChannel:: close msg %s\n",
                  mServerCloseReason.get()));
           }
--- a/xpcom/string/public/nsReadableUtils.h
+++ b/xpcom/string/public/nsReadableUtils.h
@@ -236,26 +236,39 @@ PRBool IsASCII( const nsACString& aStrin
    * Returns |PR_TRUE| if |aString| is a valid UTF-8 string.
    * XXX This is not bullet-proof and nor an all-purpose UTF-8 validator. 
    * It is mainly written to replace and roughly equivalent to
    *
    *    str.Equals(NS_ConvertUTF16toUTF8(NS_ConvertUTF8toUTF16(str)))
    *
    * (see bug 191541)
    * As such,  it does not check for non-UTF-8 7bit encodings such as 
-   * ISO-2022-JP and HZ. However, it filters out  UTF-8 representation
-   * of surrogate codepoints and non-characters ( 0xFFFE and 0xFFFF
-   * in planes 0 through 16.) as well as overlong UTF-8 sequences. 
-   * Also note that it regards UTF-8 sequences corresponding to 
-   * codepoints above 0x10FFFF as invalid in accordance with 
-   * http://www.ietf.org/internet-drafts/draft-yergeau-rfc2279bis-04.txt
+   * ISO-2022-JP and HZ. 
+   *
+   * It rejects sequences with the following errors:
+   *
+   * byte sequences that cannot be decoded into characters according to
+   *   UTF-8's rules (including cases where the input is part of a valid
+   *   UTF-8 sequence but starts or ends mid-character)
+   * overlong sequences (i.e., cases where a character was encoded
+   *   non-canonically by using more bytes than necessary)
+   * surrogate codepoints (i.e., the codepoints reserved for
+       representing astral characters in UTF-16)
+   * codepoints above the unicode range (i.e., outside the first 17
+   *   planes; higher than U+10FFFF), in accordance with
+   *   http://tools.ietf.org/html/rfc3629
+   * when aRejectNonChar is true (the default), any codepoint whose low
+   *   16 bits are 0xFFFE or 0xFFFF
+
    *
    * @param aString an 8-bit wide string to scan
+   * @param aRejectNonChar a boolean to control the rejection of utf-8
+   *        non characters
    */
-PRBool IsUTF8( const nsACString& aString );
+PRBool IsUTF8( const nsACString& aString, PRBool aRejectNonChar = PR_TRUE );
 
 PRBool ParseString(const nsACString& aAstring, char aDelimiter, 
                           nsTArray<nsCString>& aArray);
 
   /**
    * Converts case in place in the argument string.
    */
 void ToUpperCase( nsACString& );
--- a/xpcom/string/src/nsReadableUtils.cpp
+++ b/xpcom/string/src/nsReadableUtils.cpp
@@ -466,17 +466,17 @@ IsASCII( const nsACString& aString )
         if ( *c++ & NOT_ASCII )
           return PR_FALSE;
       }
 
     return PR_TRUE;
   }
 
 PRBool
-IsUTF8( const nsACString& aString )
+IsUTF8( const nsACString& aString, PRBool aRejectNonChar )
   {
     nsReadingIterator<char> done_reading;
     aString.EndReading(done_reading);
 
     PRInt32 state = 0;
     PRBool overlong = PR_FALSE;
     PRBool surrogate = PR_FALSE;
     PRBool nonchar = PR_FALSE;
@@ -534,16 +534,19 @@ IsUTF8( const nsACString& aString )
                     surrogate = PR_TRUE;
                     slower = 0x90;
                   }
               }
             else
               return PR_FALSE; // Not UTF-8 string
           }
           
+        if (nonchar && !aRejectNonChar)
+          nonchar = PR_FALSE;
+
         while ( ptr < end && state )
           {
             c = *ptr++;
             --state;
 
             // non-character : EF BF [BE-BF] or F[0-7] [89AB]F BF [BE-BF]
             if ( nonchar &&  
                  ( ( !state && c < 0xBE ) ||