Bug 1206283 - Improve detection of invalid UTF-16 sequences and add some tests. r=ttaubert f=keeler
authorJed Davis <jld@mozilla.com>
Fri, 05 Feb 2016 11:15:14 +0100
changeset 11856 1ba7cd83c6729baf4660f8bb3258ec6342a353f5
parent 11855 5fde729fdbff53653c4ede1dfd0cbe4e5ddad4f0
child 11857 5cbc92f72be32be8729c8331159fb0a149170941
push id966
push userttaubert@mozilla.com
push dateFri, 05 Feb 2016 10:22:44 +0000
reviewersttaubert
bugs1206283
Bug 1206283 - Improve detection of invalid UTF-16 sequences and add some tests. r=ttaubert f=keeler
lib/util/utf8.c
--- a/lib/util/utf8.c
+++ b/lib/util/utf8.c
@@ -317,17 +317,17 @@ sec_port_ucs2_utf8_conversion_function
       *outBufLen = 0;
       return PR_FALSE;
     }
 
     for( i = 0; i < inBufLen; i += 2 ) {
       if( (inBuf[i+H_0] == 0x00) && ((inBuf[i+H_1] & 0x80) == 0x00) ) len += 1;
       else if( inBuf[i+H_0] < 0x08 ) len += 2;
       else if( ((inBuf[i+0+H_0] & 0xDC) == 0xD8) ) {
-        if( ((inBuf[i+2+H_0] & 0xDC) == 0xDC) && ((inBufLen - i) > 2) ) {
+        if( ((inBufLen - i) > 2) && ((inBuf[i+2+H_0] & 0xDC) == 0xDC) ) {
           i += 2;
           len += 4;
         } else {
           return PR_FALSE;
         }
       }
       else len += 3;
     }
@@ -354,17 +354,17 @@ sec_port_ucs2_utf8_conversion_function
         outBuf[len+0] = 0xC0 | ((inBuf[i+H_0] & 0x07) << 2) 
                              | ((inBuf[i+H_1] & 0xC0) >> 6);
         outBuf[len+1] = 0x80 | ((inBuf[i+H_1] & 0x3F) >> 0);
 
         len += 2;
       } else if( (inBuf[i+H_0] & 0xDC) == 0xD8 ) {
         int abcde, BCDE;
 
-        PORT_Assert(((inBuf[i+2+H_0] & 0xDC) == 0xDC) && ((inBufLen - i) > 2));
+        PORT_Assert(((inBufLen - i) > 2) && ((inBuf[i+2+H_0] & 0xDC) == 0xDC) );
 
         /* D800-DBFF DC00-DFFF -> 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
         /* 110110BC DEfghijk 110111lm nopqrstu ->
            { Let abcde = BCDE + 1 }
            11110abc 10defghi 10jklmno 10pqrstu */
 
         BCDE = ((inBuf[i+H_0] & 0x03) << 2) | ((inBuf[i+H_1] & 0xC0) >> 6);
         abcde = BCDE + 1;
@@ -1148,16 +1148,26 @@ char *utf8_bad[] = {
   "\xC3",
   "\xC3\xC3\x80",
   "\xED\xA0\x80",
   "\xED\xBF\x80",
   "\xED\xBF\xBF",
   "\xED\xA0\x80\xE0\xBF\xBF",
 };
 
+/* illegal UTF-16 sequences, 0-terminated */
+uint16_t utf16_bad[][3] = {
+  /* leading surrogate not followed by trailing surrogate */
+  { 0xD800, 0, 0 },
+  { 0xD800, 0x41, 0 },
+  { 0xD800, 0xfe, 0 },
+  { 0xD800, 0x3bb, 0 },
+  { 0xD800, 0xD800, 0 },
+};
+
 static void
 dump_utf8
 (
   char *word,
   unsigned char *utf8,
   char *end
 )
 {
@@ -1446,16 +1456,48 @@ test_utf8_bad_chars
     }
 
   }
 
   return rv;
 }
 
 static PRBool
+test_utf16_bad_chars(void)
+{
+  PRBool rv = PR_TRUE;
+  int i;
+
+  for( i = 0; i < sizeof(utf16_bad)/sizeof(utf16_bad[0]); ++i ) {
+    PRBool result;
+    unsigned char destbuf[18];
+    unsigned int j, len, destlen;
+    uint16_t *buf;
+
+    for( len = 0; utf16_bad[i][len] != 0; ++len )
+      /* nothing */;
+
+    buf = malloc(sizeof(uint16_t) * len);
+    for( j = 0; j < len; ++j )
+      buf[j] = htons(utf16_bad[i][j]);
+
+    result = sec_port_ucs2_utf8_conversion_function(PR_FALSE,
+        (unsigned char *)buf, sizeof(uint16_t) * len, destbuf, sizeof(destbuf),
+        &destlen);
+    if( result ) {
+      fprintf(stdout, "Failed to detect bad UTF-16 string conversion for "
+          "{0x%x,0x%x} (UTF-8 len = %u)\n", utf16_bad[i][0], utf16_bad[i][1],
+          destlen);
+      rv = PR_FALSE;
+    }
+    free(buf);
+  }
+}
+
+static PRBool
 test_iso88591_chars
 (
   void
 )
 {
   PRBool rv = PR_TRUE;
   int i;
 
@@ -1800,16 +1842,17 @@ main
 )
 {
   byte_order();
 
   if( test_ucs4_chars() &&
       test_ucs2_chars() &&
       test_utf16_chars() &&
       test_utf8_bad_chars() &&
+      test_utf16_bad_chars() &&
       test_iso88591_chars() &&
       test_zeroes() &&
       test_multichars() &&
       PR_TRUE ) {
     fprintf(stderr, "PASS\n");
     return 1;
   } else {
     fprintf(stderr, "FAIL\n");