Bug 1469021 - Unicode-escape unpaired surrogates in JSON-quoted strings so that JSON-quoted strings are always valid Unicode without any embedded unpaired surrogates. r=anba
authorJeff Walden <jwalden@mit.edu>
Thu, 09 Aug 2018 21:04:23 -0700
changeset 495499 55c19b23576a09a2735c1f6cfb1f61acd61ab470
parent 495477 9dd02ae2ee0ae93d12ea26e7a1d405427f090dc6
child 495500 cb4aa4bcb7fa800d8aed782c4e5dec55a694cfe0
push id9984
push userffxbld-merge
push dateMon, 15 Oct 2018 21:07:35 +0000
treeherdermozilla-beta@183d27ea8570 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersanba
bugs1469021
milestone64.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1469021 - Unicode-escape unpaired surrogates in JSON-quoted strings so that JSON-quoted strings are always valid Unicode without any embedded unpaired surrogates. r=anba
js/src/builtin/JSON.cpp
js/src/tests/non262/JSON/stringify-special-escapes.js
js/src/tests/non262/RegExp/split-trace.js
--- a/js/src/builtin/JSON.cpp
+++ b/js/src/builtin/JSON.cpp
@@ -67,36 +67,76 @@ InfallibleQuote(RangedPtr<const SrcCharT
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,  '\\', // rest are all zeros
     };
 
     /* Step 1. */
     *dstPtr++ = '"';
 
+    auto ToLowerHex = [](uint8_t u) {
+        MOZ_ASSERT(u <= 0xF);
+        return "0123456789abcdef"[u];
+    };
+
     /* Step 2. */
     while (srcBegin != srcEnd) {
-        SrcCharT c = *srcBegin++;
-        size_t escapeIndex = c % sizeof(escapeLookup);
-        Latin1Char escaped = escapeLookup[escapeIndex];
-        if (MOZ_LIKELY((escapeIndex != size_t(c)) || !escaped)) {
+        const SrcCharT c = *srcBegin++;
+
+        // Handle the Latin-1 cases.
+        if (MOZ_LIKELY(c < sizeof(escapeLookup))) {
+            Latin1Char escaped = escapeLookup[c];
+
+            // Directly copy non-escaped code points.
+            if (escaped == 0) {
+                *dstPtr++ = c;
+                continue;
+            }
+
+            // Escape the rest, elaborating Unicode escapes when needed.
+            *dstPtr++ = '\\';
+            *dstPtr++ = escaped;
+            if (escaped == 'u') {
+                *dstPtr++ = '0';
+                *dstPtr++ = '0';
+
+                uint8_t x = c >> 4;
+                MOZ_ASSERT(x < 10);
+                *dstPtr++ = '0' + x;
+
+                *dstPtr++ = ToLowerHex(c & 0xF);
+            }
+
+            continue;
+        }
+
+        // Non-ASCII non-surrogates are directly copied.
+        if (!unicode::IsSurrogate(c)) {
             *dstPtr++ = c;
             continue;
         }
+
+        // So too for complete surrogate pairs.
+        if (MOZ_LIKELY(unicode::IsLeadSurrogate(c) &&
+                       srcBegin < srcEnd &&
+                       unicode::IsTrailSurrogate(*srcBegin)))
+        {
+            *dstPtr++ = c;
+            *dstPtr++ = *srcBegin++;
+            continue;
+        }
+
+        // But lone surrogates are Unicode-escaped.
+        char32_t as32 = char32_t(c);
         *dstPtr++ = '\\';
-        *dstPtr++ = escaped;
-        if (escaped == 'u') {
-            MOZ_ASSERT(c < ' ');
-            MOZ_ASSERT((c >> 4) < 10);
-            uint8_t x = c >> 4, y = c % 16;
-            *dstPtr++ = '0';
-            *dstPtr++ = '0';
-            *dstPtr++ = '0' + x;
-            *dstPtr++ = y < 10 ? '0' + y : 'a' + (y - 10);
-        }
+        *dstPtr++ = 'u';
+        *dstPtr++ = ToLowerHex(as32 >> 12);
+        *dstPtr++ = ToLowerHex((as32 >> 8) & 0xF);
+        *dstPtr++ = ToLowerHex((as32 >> 4) & 0xF);
+        *dstPtr++ = ToLowerHex(as32 & 0xF);
     }
 
     /* Steps 3-4. */
     *dstPtr++ = '"';
     return dstPtr;
 }
 
 template <typename SrcCharT, typename CharVectorT>
--- a/js/src/tests/non262/JSON/stringify-special-escapes.js
+++ b/js/src/tests/non262/JSON/stringify-special-escapes.js
@@ -214,14 +214,64 @@ assertEq(JSON.stringify("\\u0019Q"), '"\
 assertEq(JSON.stringify("\\u001AQ"), '"\\\\u001AQ"');
 assertEq(JSON.stringify("\\u001BQ"), '"\\\\u001BQ"');
 assertEq(JSON.stringify("\\u001CQ"), '"\\\\u001CQ"');
 assertEq(JSON.stringify("\\u001DQ"), '"\\\\u001DQ"');
 assertEq(JSON.stringify("\\u001EQ"), '"\\\\u001EQ"');
 assertEq(JSON.stringify("\\u001FQ"), '"\\\\u001FQ"');
 assertEq(JSON.stringify("\\u0020Q"), '"\\\\u0020Q"');
 
+// https://tc39.github.io/proposal-well-formed-stringify/
+
+assertEq(JSON.stringify("\ud7ff"), '"\ud7ff"');
+assertEq(JSON.stringify("\ud800"), '"\\ud800"');
+assertEq(JSON.stringify("\ud937"), '"\\ud937"');
+assertEq(JSON.stringify("\uda20"), '"\\uda20"');
+assertEq(JSON.stringify("\udbff"), '"\\udbff"');
+
+assertEq(JSON.stringify("\udc00"), '"\\udc00"');
+assertEq(JSON.stringify("\udddd"), '"\\udddd"');
+assertEq(JSON.stringify("\udeaf"), '"\\udeaf"');
+assertEq(JSON.stringify("\udfff"), '"\\udfff"');
+assertEq(JSON.stringify("\ue000"), '"\ue000"');
+
+assertEq(JSON.stringify("\ud7ffa"), '"\ud7ffa"');
+assertEq(JSON.stringify("\ud800a"), '"\\ud800a"');
+assertEq(JSON.stringify("\ud937a"), '"\\ud937a"');
+assertEq(JSON.stringify("\uda20a"), '"\\uda20a"');
+assertEq(JSON.stringify("\udbffa"), '"\\udbffa"');
+
+assertEq(JSON.stringify("\udc00a"), '"\\udc00a"');
+assertEq(JSON.stringify("\udddda"), '"\\udddda"');
+assertEq(JSON.stringify("\udeafa"), '"\\udeafa"');
+assertEq(JSON.stringify("\udfffa"), '"\\udfffa"');
+assertEq(JSON.stringify("\ue000a"), '"\ue000a"');
+
+assertEq(JSON.stringify("\ud7ff\ud800"), '"\ud7ff\\ud800"');
+assertEq(JSON.stringify("\ud800\ud800"), '"\\ud800\\ud800"');
+assertEq(JSON.stringify("\ud937\ud800"), '"\\ud937\\ud800"');
+assertEq(JSON.stringify("\uda20\ud800"), '"\\uda20\\ud800"');
+assertEq(JSON.stringify("\udbff\ud800"), '"\\udbff\\ud800"');
+
+assertEq(JSON.stringify("\udc00\ud800"), '"\\udc00\\ud800"');
+assertEq(JSON.stringify("\udddd\ud800"), '"\\udddd\\ud800"');
+assertEq(JSON.stringify("\udeaf\ud800"), '"\\udeaf\\ud800"');
+assertEq(JSON.stringify("\udfff\ud800"), '"\\udfff\\ud800"');
+assertEq(JSON.stringify("\ue000\ud800"), '"\ue000\\ud800"');
+
+assertEq(JSON.stringify("\ud7ff\udc00"), '"\ud7ff\\udc00"');
+assertEq(JSON.stringify("\ud800\udc00"), '"\ud800\udc00"');
+assertEq(JSON.stringify("\ud937\udc00"), '"\ud937\udc00"');
+assertEq(JSON.stringify("\uda20\udc00"), '"\uda20\udc00"');
+assertEq(JSON.stringify("\udbff\udc00"), '"\udbff\udc00"');
+
+assertEq(JSON.stringify("\udc00\udc00"), '"\\udc00\\udc00"');
+assertEq(JSON.stringify("\udddd\udc00"), '"\\udddd\\udc00"');
+assertEq(JSON.stringify("\udeaf\udc00"), '"\\udeaf\\udc00"');
+assertEq(JSON.stringify("\udfff\udc00"), '"\\udfff\\udc00"');
+assertEq(JSON.stringify("\ue000\udc00"), '"\ue000\\udc00"');
+
 /******************************************************************************/
 
 if (typeof reportCompare === "function")
   reportCompare(true, true);
 
 print("Tests complete");
--- a/js/src/tests/non262/RegExp/split-trace.js
+++ b/js/src/tests/non262/RegExp/split-trace.js
@@ -176,17 +176,17 @@ reset();
 flags = "u";
 expectedFlags = "uy";
 target = "-\uD83D\uDC38\uDC38\uD83D";
 execResult        = [    null, null, null, null ];
 lastIndexResult   = [ ,  ,     ,     ,     ,    ];
 lastIndexExpected = [ 0, 1,    3,    4,         ];
 ret = RegExp.prototype[Symbol.split].call(myRegExp, target);
 assertEq(arraySetterObserved, false);
-assertEq(JSON.stringify(ret), `["-\uD83D\uDC38\uDC38\uD83D"]`);
+assertEq(JSON.stringify(ret), `["-\uD83D\uDC38\\udc38\\ud83d"]`);
 assertEq(log,
          "get:constructor," +
          "get:species," +
          "get:flags," +
          "call:constructor," +
          "set:lastIndex,get:exec,call:exec," +
          "set:lastIndex,get:exec,call:exec," +
          "set:lastIndex,get:exec,call:exec," +
@@ -198,17 +198,17 @@ flags = "u";
 expectedFlags = "uy";
 target = "-\uD83D\uDC38\uDC38\uD83D";
 var E = P(["", "X"]);
 execResult        = [    E, E, E, E, E, E, E ];
 lastIndexResult   = [ ,  0, 1, 1, 3, 3, 4, 4 ];
 lastIndexExpected = [ 0, 1, 1, 3, 3, 4, 4,   ];
 ret = RegExp.prototype[Symbol.split].call(myRegExp, target);
 assertEq(arraySetterObserved, false);
-assertEq(JSON.stringify(ret), `["-","X","\uD83D\uDC38","X","\uDC38","X","\uD83D"]`);
+assertEq(JSON.stringify(ret), `["-","X","\uD83D\uDC38","X","\\udc38","X","\\ud83d"]`);
 assertEq(log,
          "get:constructor," +
          "get:species," +
          "get:flags," +
          "call:constructor," +
          "set:lastIndex,get:exec,call:exec,get:lastIndex," +
          "set:lastIndex,get:exec,call:exec,get:lastIndex," +
          "get:result[length]," +