Bug 1469021 - Unicode-escape unpaired surrogates in JSON-quoted strings so that JSON-quoted strings are always valid Unicode without any embedded unpaired surrogates. r=anba
authorJeff Walden <jwalden@mit.edu>
Thu, 09 Aug 2018 21:04:23 -0700
changeset 439721 55c19b23576a
parent 439720 9dd02ae2ee0a
child 439722 cb4aa4bcb7fa
push id108651
push userjwalden@mit.edu
push dateFri, 05 Oct 2018 02:51:08 +0000
treeherdermozilla-inbound@e8aec0dccbb8 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersanba
bugs1469021
milestone64.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1469021 - Unicode-escape unpaired surrogates in JSON-quoted strings so that JSON-quoted strings are always valid Unicode without any embedded unpaired surrogates. r=anba
js/src/builtin/JSON.cpp
js/src/tests/non262/JSON/stringify-special-escapes.js
js/src/tests/non262/RegExp/split-trace.js
--- a/js/src/builtin/JSON.cpp
+++ b/js/src/builtin/JSON.cpp
@@ -67,36 +67,76 @@ InfallibleQuote(RangedPtr<const SrcCharT
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,  '\\', // rest are all zeros
     };
 
     /* Step 1. */
     *dstPtr++ = '"';
 
+    auto ToLowerHex = [](uint8_t u) {
+        MOZ_ASSERT(u <= 0xF);
+        return "0123456789abcdef"[u];
+    };
+
     /* Step 2. */
     while (srcBegin != srcEnd) {
-        SrcCharT c = *srcBegin++;
-        size_t escapeIndex = c % sizeof(escapeLookup);
-        Latin1Char escaped = escapeLookup[escapeIndex];
-        if (MOZ_LIKELY((escapeIndex != size_t(c)) || !escaped)) {
+        const SrcCharT c = *srcBegin++;
+
+        // Handle the Latin-1 cases.
+        if (MOZ_LIKELY(c < sizeof(escapeLookup))) {
+            Latin1Char escaped = escapeLookup[c];
+
+            // Directly copy non-escaped code points.
+            if (escaped == 0) {
+                *dstPtr++ = c;
+                continue;
+            }
+
+            // Escape the rest, elaborating Unicode escapes when needed.
+            *dstPtr++ = '\\';
+            *dstPtr++ = escaped;
+            if (escaped == 'u') {
+                *dstPtr++ = '0';
+                *dstPtr++ = '0';
+
+                uint8_t x = c >> 4;
+                MOZ_ASSERT(x < 10);
+                *dstPtr++ = '0' + x;
+
+                *dstPtr++ = ToLowerHex(c & 0xF);
+            }
+
+            continue;
+        }
+
+        // Non-ASCII non-surrogates are directly copied.
+        if (!unicode::IsSurrogate(c)) {
             *dstPtr++ = c;
             continue;
         }
+
+        // So too for complete surrogate pairs.
+        if (MOZ_LIKELY(unicode::IsLeadSurrogate(c) &&
+                       srcBegin < srcEnd &&
+                       unicode::IsTrailSurrogate(*srcBegin)))
+        {
+            *dstPtr++ = c;
+            *dstPtr++ = *srcBegin++;
+            continue;
+        }
+
+        // But lone surrogates are Unicode-escaped.
+        char32_t as32 = char32_t(c);
         *dstPtr++ = '\\';
-        *dstPtr++ = escaped;
-        if (escaped == 'u') {
-            MOZ_ASSERT(c < ' ');
-            MOZ_ASSERT((c >> 4) < 10);
-            uint8_t x = c >> 4, y = c % 16;
-            *dstPtr++ = '0';
-            *dstPtr++ = '0';
-            *dstPtr++ = '0' + x;
-            *dstPtr++ = y < 10 ? '0' + y : 'a' + (y - 10);
-        }
+        *dstPtr++ = 'u';
+        *dstPtr++ = ToLowerHex(as32 >> 12);
+        *dstPtr++ = ToLowerHex((as32 >> 8) & 0xF);
+        *dstPtr++ = ToLowerHex((as32 >> 4) & 0xF);
+        *dstPtr++ = ToLowerHex(as32 & 0xF);
     }
 
     /* Steps 3-4. */
     *dstPtr++ = '"';
     return dstPtr;
 }
 
 template <typename SrcCharT, typename CharVectorT>
--- a/js/src/tests/non262/JSON/stringify-special-escapes.js
+++ b/js/src/tests/non262/JSON/stringify-special-escapes.js
@@ -214,14 +214,64 @@ assertEq(JSON.stringify("\\u0019Q"), '"\
 assertEq(JSON.stringify("\\u001AQ"), '"\\\\u001AQ"');
 assertEq(JSON.stringify("\\u001BQ"), '"\\\\u001BQ"');
 assertEq(JSON.stringify("\\u001CQ"), '"\\\\u001CQ"');
 assertEq(JSON.stringify("\\u001DQ"), '"\\\\u001DQ"');
 assertEq(JSON.stringify("\\u001EQ"), '"\\\\u001EQ"');
 assertEq(JSON.stringify("\\u001FQ"), '"\\\\u001FQ"');
 assertEq(JSON.stringify("\\u0020Q"), '"\\\\u0020Q"');
 
+// https://tc39.github.io/proposal-well-formed-stringify/
+
+assertEq(JSON.stringify("\ud7ff"), '"\ud7ff"');
+assertEq(JSON.stringify("\ud800"), '"\\ud800"');
+assertEq(JSON.stringify("\ud937"), '"\\ud937"');
+assertEq(JSON.stringify("\uda20"), '"\\uda20"');
+assertEq(JSON.stringify("\udbff"), '"\\udbff"');
+
+assertEq(JSON.stringify("\udc00"), '"\\udc00"');
+assertEq(JSON.stringify("\udddd"), '"\\udddd"');
+assertEq(JSON.stringify("\udeaf"), '"\\udeaf"');
+assertEq(JSON.stringify("\udfff"), '"\\udfff"');
+assertEq(JSON.stringify("\ue000"), '"\ue000"');
+
+assertEq(JSON.stringify("\ud7ffa"), '"\ud7ffa"');
+assertEq(JSON.stringify("\ud800a"), '"\\ud800a"');
+assertEq(JSON.stringify("\ud937a"), '"\\ud937a"');
+assertEq(JSON.stringify("\uda20a"), '"\\uda20a"');
+assertEq(JSON.stringify("\udbffa"), '"\\udbffa"');
+
+assertEq(JSON.stringify("\udc00a"), '"\\udc00a"');
+assertEq(JSON.stringify("\udddda"), '"\\udddda"');
+assertEq(JSON.stringify("\udeafa"), '"\\udeafa"');
+assertEq(JSON.stringify("\udfffa"), '"\\udfffa"');
+assertEq(JSON.stringify("\ue000a"), '"\ue000a"');
+
+assertEq(JSON.stringify("\ud7ff\ud800"), '"\ud7ff\\ud800"');
+assertEq(JSON.stringify("\ud800\ud800"), '"\\ud800\\ud800"');
+assertEq(JSON.stringify("\ud937\ud800"), '"\\ud937\\ud800"');
+assertEq(JSON.stringify("\uda20\ud800"), '"\\uda20\\ud800"');
+assertEq(JSON.stringify("\udbff\ud800"), '"\\udbff\\ud800"');
+
+assertEq(JSON.stringify("\udc00\ud800"), '"\\udc00\\ud800"');
+assertEq(JSON.stringify("\udddd\ud800"), '"\\udddd\\ud800"');
+assertEq(JSON.stringify("\udeaf\ud800"), '"\\udeaf\\ud800"');
+assertEq(JSON.stringify("\udfff\ud800"), '"\\udfff\\ud800"');
+assertEq(JSON.stringify("\ue000\ud800"), '"\ue000\\ud800"');
+
+assertEq(JSON.stringify("\ud7ff\udc00"), '"\ud7ff\\udc00"');
+assertEq(JSON.stringify("\ud800\udc00"), '"\ud800\udc00"');
+assertEq(JSON.stringify("\ud937\udc00"), '"\ud937\udc00"');
+assertEq(JSON.stringify("\uda20\udc00"), '"\uda20\udc00"');
+assertEq(JSON.stringify("\udbff\udc00"), '"\udbff\udc00"');
+
+assertEq(JSON.stringify("\udc00\udc00"), '"\\udc00\\udc00"');
+assertEq(JSON.stringify("\udddd\udc00"), '"\\udddd\\udc00"');
+assertEq(JSON.stringify("\udeaf\udc00"), '"\\udeaf\\udc00"');
+assertEq(JSON.stringify("\udfff\udc00"), '"\\udfff\\udc00"');
+assertEq(JSON.stringify("\ue000\udc00"), '"\ue000\\udc00"');
+
 /******************************************************************************/
 
 if (typeof reportCompare === "function")
   reportCompare(true, true);
 
 print("Tests complete");
--- a/js/src/tests/non262/RegExp/split-trace.js
+++ b/js/src/tests/non262/RegExp/split-trace.js
@@ -176,17 +176,17 @@ reset();
 flags = "u";
 expectedFlags = "uy";
 target = "-\uD83D\uDC38\uDC38\uD83D";
 execResult        = [    null, null, null, null ];
 lastIndexResult   = [ ,  ,     ,     ,     ,    ];
 lastIndexExpected = [ 0, 1,    3,    4,         ];
 ret = RegExp.prototype[Symbol.split].call(myRegExp, target);
 assertEq(arraySetterObserved, false);
-assertEq(JSON.stringify(ret), `["-\uD83D\uDC38\uDC38\uD83D"]`);
+assertEq(JSON.stringify(ret), `["-\uD83D\uDC38\\udc38\\ud83d"]`);
 assertEq(log,
          "get:constructor," +
          "get:species," +
          "get:flags," +
          "call:constructor," +
          "set:lastIndex,get:exec,call:exec," +
          "set:lastIndex,get:exec,call:exec," +
          "set:lastIndex,get:exec,call:exec," +
@@ -198,17 +198,17 @@ flags = "u";
 expectedFlags = "uy";
 target = "-\uD83D\uDC38\uDC38\uD83D";
 var E = P(["", "X"]);
 execResult        = [    E, E, E, E, E, E, E ];
 lastIndexResult   = [ ,  0, 1, 1, 3, 3, 4, 4 ];
 lastIndexExpected = [ 0, 1, 1, 3, 3, 4, 4,   ];
 ret = RegExp.prototype[Symbol.split].call(myRegExp, target);
 assertEq(arraySetterObserved, false);
-assertEq(JSON.stringify(ret), `["-","X","\uD83D\uDC38","X","\uDC38","X","\uD83D"]`);
+assertEq(JSON.stringify(ret), `["-","X","\uD83D\uDC38","X","\\udc38","X","\\ud83d"]`);
 assertEq(log,
          "get:constructor," +
          "get:species," +
          "get:flags," +
          "call:constructor," +
          "set:lastIndex,get:exec,call:exec,get:lastIndex," +
          "set:lastIndex,get:exec,call:exec,get:lastIndex," +
          "get:result[length]," +