Bug 1469021 - Unicode-escape unpaired surrogates in JSON-quoted strings so that JSON-quoted strings are always valid Unicode without any embedded unpaired surrogates. r=anba
authorJeff Walden <jwalden@mit.edu>
Thu, 09 Aug 2018 21:04:23 -0700
changeset 439763 55c19b23576a
parent 439741 9dd02ae2ee0a
child 439764 cb4aa4bcb7fa
push id34787
push usercsabou@mozilla.com
push dateFri, 05 Oct 2018 10:08:34 +0000
treeherdermozilla-central@863c5a0642a8 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersanba
bugs1469021
milestone64.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1469021 - Unicode-escape unpaired surrogates in JSON-quoted strings so that JSON-quoted strings are always valid Unicode without any embedded unpaired surrogates. r=anba
js/src/builtin/JSON.cpp
js/src/tests/non262/JSON/stringify-special-escapes.js
js/src/tests/non262/RegExp/split-trace.js
--- a/js/src/builtin/JSON.cpp
+++ b/js/src/builtin/JSON.cpp
@@ -67,36 +67,76 @@ InfallibleQuote(RangedPtr<const SrcCharT
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,  '\\', // rest are all zeros
     };
 
     /* Step 1. */
     *dstPtr++ = '"';
 
+    auto ToLowerHex = [](uint8_t u) {
+        MOZ_ASSERT(u <= 0xF);
+        return "0123456789abcdef"[u];
+    };
+
     /* Step 2. */
     while (srcBegin != srcEnd) {
-        SrcCharT c = *srcBegin++;
-        size_t escapeIndex = c % sizeof(escapeLookup);
-        Latin1Char escaped = escapeLookup[escapeIndex];
-        if (MOZ_LIKELY((escapeIndex != size_t(c)) || !escaped)) {
+        const SrcCharT c = *srcBegin++;
+
+        // Handle the Latin-1 cases.
+        if (MOZ_LIKELY(c < sizeof(escapeLookup))) {
+            Latin1Char escaped = escapeLookup[c];
+
+            // Directly copy non-escaped code points.
+            if (escaped == 0) {
+                *dstPtr++ = c;
+                continue;
+            }
+
+            // Escape the rest, elaborating Unicode escapes when needed.
+            *dstPtr++ = '\\';
+            *dstPtr++ = escaped;
+            if (escaped == 'u') {
+                *dstPtr++ = '0';
+                *dstPtr++ = '0';
+
+                uint8_t x = c >> 4;
+                MOZ_ASSERT(x < 10);
+                *dstPtr++ = '0' + x;
+
+                *dstPtr++ = ToLowerHex(c & 0xF);
+            }
+
+            continue;
+        }
+
+        // Non-ASCII non-surrogates are directly copied.
+        if (!unicode::IsSurrogate(c)) {
             *dstPtr++ = c;
             continue;
         }
+
+        // So too for complete surrogate pairs.
+        if (MOZ_LIKELY(unicode::IsLeadSurrogate(c) &&
+                       srcBegin < srcEnd &&
+                       unicode::IsTrailSurrogate(*srcBegin)))
+        {
+            *dstPtr++ = c;
+            *dstPtr++ = *srcBegin++;
+            continue;
+        }
+
+        // But lone surrogates are Unicode-escaped.
+        char32_t as32 = char32_t(c);
         *dstPtr++ = '\\';
-        *dstPtr++ = escaped;
-        if (escaped == 'u') {
-            MOZ_ASSERT(c < ' ');
-            MOZ_ASSERT((c >> 4) < 10);
-            uint8_t x = c >> 4, y = c % 16;
-            *dstPtr++ = '0';
-            *dstPtr++ = '0';
-            *dstPtr++ = '0' + x;
-            *dstPtr++ = y < 10 ? '0' + y : 'a' + (y - 10);
-        }
+        *dstPtr++ = 'u';
+        *dstPtr++ = ToLowerHex(as32 >> 12);
+        *dstPtr++ = ToLowerHex((as32 >> 8) & 0xF);
+        *dstPtr++ = ToLowerHex((as32 >> 4) & 0xF);
+        *dstPtr++ = ToLowerHex(as32 & 0xF);
     }
 
     /* Steps 3-4. */
     *dstPtr++ = '"';
     return dstPtr;
 }
 
 template <typename SrcCharT, typename CharVectorT>
--- a/js/src/tests/non262/JSON/stringify-special-escapes.js
+++ b/js/src/tests/non262/JSON/stringify-special-escapes.js
@@ -214,14 +214,64 @@ assertEq(JSON.stringify("\\u0019Q"), '"\
 assertEq(JSON.stringify("\\u001AQ"), '"\\\\u001AQ"');
 assertEq(JSON.stringify("\\u001BQ"), '"\\\\u001BQ"');
 assertEq(JSON.stringify("\\u001CQ"), '"\\\\u001CQ"');
 assertEq(JSON.stringify("\\u001DQ"), '"\\\\u001DQ"');
 assertEq(JSON.stringify("\\u001EQ"), '"\\\\u001EQ"');
 assertEq(JSON.stringify("\\u001FQ"), '"\\\\u001FQ"');
 assertEq(JSON.stringify("\\u0020Q"), '"\\\\u0020Q"');
 
+// https://tc39.github.io/proposal-well-formed-stringify/
+
+assertEq(JSON.stringify("\ud7ff"), '"\ud7ff"');
+assertEq(JSON.stringify("\ud800"), '"\\ud800"');
+assertEq(JSON.stringify("\ud937"), '"\\ud937"');
+assertEq(JSON.stringify("\uda20"), '"\\uda20"');
+assertEq(JSON.stringify("\udbff"), '"\\udbff"');
+
+assertEq(JSON.stringify("\udc00"), '"\\udc00"');
+assertEq(JSON.stringify("\udddd"), '"\\udddd"');
+assertEq(JSON.stringify("\udeaf"), '"\\udeaf"');
+assertEq(JSON.stringify("\udfff"), '"\\udfff"');
+assertEq(JSON.stringify("\ue000"), '"\ue000"');
+
+assertEq(JSON.stringify("\ud7ffa"), '"\ud7ffa"');
+assertEq(JSON.stringify("\ud800a"), '"\\ud800a"');
+assertEq(JSON.stringify("\ud937a"), '"\\ud937a"');
+assertEq(JSON.stringify("\uda20a"), '"\\uda20a"');
+assertEq(JSON.stringify("\udbffa"), '"\\udbffa"');
+
+assertEq(JSON.stringify("\udc00a"), '"\\udc00a"');
+assertEq(JSON.stringify("\udddda"), '"\\udddda"');
+assertEq(JSON.stringify("\udeafa"), '"\\udeafa"');
+assertEq(JSON.stringify("\udfffa"), '"\\udfffa"');
+assertEq(JSON.stringify("\ue000a"), '"\ue000a"');
+
+assertEq(JSON.stringify("\ud7ff\ud800"), '"\ud7ff\\ud800"');
+assertEq(JSON.stringify("\ud800\ud800"), '"\\ud800\\ud800"');
+assertEq(JSON.stringify("\ud937\ud800"), '"\\ud937\\ud800"');
+assertEq(JSON.stringify("\uda20\ud800"), '"\\uda20\\ud800"');
+assertEq(JSON.stringify("\udbff\ud800"), '"\\udbff\\ud800"');
+
+assertEq(JSON.stringify("\udc00\ud800"), '"\\udc00\\ud800"');
+assertEq(JSON.stringify("\udddd\ud800"), '"\\udddd\\ud800"');
+assertEq(JSON.stringify("\udeaf\ud800"), '"\\udeaf\\ud800"');
+assertEq(JSON.stringify("\udfff\ud800"), '"\\udfff\\ud800"');
+assertEq(JSON.stringify("\ue000\ud800"), '"\ue000\\ud800"');
+
+assertEq(JSON.stringify("\ud7ff\udc00"), '"\ud7ff\\udc00"');
+assertEq(JSON.stringify("\ud800\udc00"), '"\ud800\udc00"');
+assertEq(JSON.stringify("\ud937\udc00"), '"\ud937\udc00"');
+assertEq(JSON.stringify("\uda20\udc00"), '"\uda20\udc00"');
+assertEq(JSON.stringify("\udbff\udc00"), '"\udbff\udc00"');
+
+assertEq(JSON.stringify("\udc00\udc00"), '"\\udc00\\udc00"');
+assertEq(JSON.stringify("\udddd\udc00"), '"\\udddd\\udc00"');
+assertEq(JSON.stringify("\udeaf\udc00"), '"\\udeaf\\udc00"');
+assertEq(JSON.stringify("\udfff\udc00"), '"\\udfff\\udc00"');
+assertEq(JSON.stringify("\ue000\udc00"), '"\ue000\\udc00"');
+
 /******************************************************************************/
 
 if (typeof reportCompare === "function")
   reportCompare(true, true);
 
 print("Tests complete");
--- a/js/src/tests/non262/RegExp/split-trace.js
+++ b/js/src/tests/non262/RegExp/split-trace.js
@@ -176,17 +176,17 @@ reset();
 flags = "u";
 expectedFlags = "uy";
 target = "-\uD83D\uDC38\uDC38\uD83D";
 execResult        = [    null, null, null, null ];
 lastIndexResult   = [ ,  ,     ,     ,     ,    ];
 lastIndexExpected = [ 0, 1,    3,    4,         ];
 ret = RegExp.prototype[Symbol.split].call(myRegExp, target);
 assertEq(arraySetterObserved, false);
-assertEq(JSON.stringify(ret), `["-\uD83D\uDC38\uDC38\uD83D"]`);
+assertEq(JSON.stringify(ret), `["-\uD83D\uDC38\\udc38\\ud83d"]`);
 assertEq(log,
          "get:constructor," +
          "get:species," +
          "get:flags," +
          "call:constructor," +
          "set:lastIndex,get:exec,call:exec," +
          "set:lastIndex,get:exec,call:exec," +
          "set:lastIndex,get:exec,call:exec," +
@@ -198,17 +198,17 @@ flags = "u";
 expectedFlags = "uy";
 target = "-\uD83D\uDC38\uDC38\uD83D";
 var E = P(["", "X"]);
 execResult        = [    E, E, E, E, E, E, E ];
 lastIndexResult   = [ ,  0, 1, 1, 3, 3, 4, 4 ];
 lastIndexExpected = [ 0, 1, 1, 3, 3, 4, 4,   ];
 ret = RegExp.prototype[Symbol.split].call(myRegExp, target);
 assertEq(arraySetterObserved, false);
-assertEq(JSON.stringify(ret), `["-","X","\uD83D\uDC38","X","\uDC38","X","\uD83D"]`);
+assertEq(JSON.stringify(ret), `["-","X","\uD83D\uDC38","X","\\udc38","X","\\ud83d"]`);
 assertEq(log,
          "get:constructor," +
          "get:species," +
          "get:flags," +
          "call:constructor," +
          "set:lastIndex,get:exec,call:exec,get:lastIndex," +
          "set:lastIndex,get:exec,call:exec,get:lastIndex," +
          "get:result[length]," +