Bug 1659104 - Part 1 - Add ParserAtom support for Char16LE strings. r=tcampbell
authorKannan Vijayan <kvijayan@mozilla.com>
Thu, 24 Sep 2020 17:47:53 +0000
changeset 550243 3b7aed9c0a06129b270943a8502f9db943f1ab50
parent 550242 47cdef9c2e5ccfeea61683e5b726d32ffbe5efe5
child 550244 b87a709b879d95ff7ee26dd0dec9231ad00b8780
push id37809
push userapavel@mozilla.com
push dateFri, 25 Sep 2020 03:37:48 +0000
treeherdermozilla-central@4846ccf88574 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerstcampbell
bugs1659104
milestone83.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1659104 - Part 1 - Add ParserAtom support for Char16LE strings. r=tcampbell Differential Revision: https://phabricator.services.mozilla.com/D90510
js/src/frontend/ParserAtom.cpp
js/src/frontend/ParserAtom.h
js/src/jsapi-tests/testParserAtom.cpp
--- a/js/src/frontend/ParserAtom.cpp
+++ b/js/src/frontend/ParserAtom.cpp
@@ -70,16 +70,48 @@ class InflatedChar16Sequence<const Parse
     }
     return hash;
   }
 };
 
 }  // namespace js
 
 namespace js {
+
+template <>
+class InflatedChar16Sequence<LittleEndianChars> {
+ private:
+  LittleEndianChars chars_;
+  size_t idx_;
+  size_t len_;
+
+ public:
+  InflatedChar16Sequence(LittleEndianChars chars, size_t length)
+      : chars_(chars), idx_(0), len_(length) {}
+
+  bool hasMore() { return idx_ < len_; }
+
+  char16_t next() {
+    MOZ_ASSERT(hasMore());
+    return chars_[idx_++];
+  }
+
+  HashNumber computeHash() const {
+    auto copy = *this;
+    HashNumber hash = 0;
+    while (copy.hasMore()) {
+      hash = mozilla::AddToHash(hash, copy.next());
+    }
+    return hash;
+  }
+};
+
+}  // namespace js
+
+namespace js {
 namespace frontend {
 
 static JS::OOM PARSER_ATOMS_OOM;
 
 static JSAtom* GetWellKnownAtom(JSContext* cx, WellKnownAtomId kind) {
 #define ASSERT_OFFSET_(idpart, id, text)       \
   static_assert(offsetof(JSAtomState, id) ==   \
                 int32_t(WellKnownAtomId::id) * \
@@ -308,16 +340,56 @@ JS::Result<const ParserAtom*, OOM> Parse
   auto addPtr = entrySet_.lookupForAdd(lookup);
   if (addPtr) {
     return (*addPtr)->asAtom();
   }
 
   return internLatin1Seq(cx, addPtr, lookup.hash(), latin1Ptr, length);
 }
 
+// For XDR
+JS::Result<const ParserAtom*, OOM> ParserAtomsTable::internChar16LE(
+    JSContext* cx, LittleEndianChars twoByteLE, uint32_t length) {
+  // Check for tiny strings which are abundant in minified code.
+  if (const ParserAtom* tiny = wellKnownTable_.lookupTiny(twoByteLE, length)) {
+    return tiny;
+  }
+
+  InflatedChar16Sequence<LittleEndianChars> seq(twoByteLE, length);
+
+  // Check for well-known atom.
+  SpecificParserAtomLookup<LittleEndianChars> lookup(seq);
+  if (const ParserAtom* wk = wellKnownTable_.lookupChar16Seq(lookup)) {
+    return wk;
+  }
+
+  // An XDR interning is guaranteed to be unique: there should be no
+  // existing atom with the same contents, except for well-known atoms.
+  EntrySet::AddPtr addPtr = entrySet_.lookupForAdd(lookup);
+  MOZ_ASSERT(!addPtr);
+
+  // Compute the target encoding.
+  // NOTE: Length in code-points will be same, even if we deflate to Latin1.
+  bool wide = false;
+  InflatedChar16Sequence<LittleEndianChars> seqCopy = seq;
+  while (seqCopy.hasMore()) {
+    char16_t ch = seqCopy.next();
+    if (ch > MAX_LATIN1_CHAR) {
+      wide = true;
+      break;
+    }
+  }
+
+  // Add new entry.
+  return wide
+             ? internChar16Seq<char16_t>(cx, addPtr, lookup.hash(), seq, length)
+             : internChar16Seq<Latin1Char>(cx, addPtr, lookup.hash(), seq,
+                                           length);
+}
+
 JS::Result<const ParserAtom*, OOM> ParserAtomsTable::internUtf8(
     JSContext* cx, const mozilla::Utf8Unit* utf8Ptr, uint32_t nbyte) {
   // Check for tiny strings which are abundant in minified code.
   // NOTE: The tiny atoms are all ASCII-only so we can directly look at the
   //        UTF-8 data without worrying about surrogates.
   if (const ParserAtom* tiny = wellKnownTable_.lookupTiny(
           reinterpret_cast<const Latin1Char*>(utf8Ptr), nbyte)) {
     return tiny;
--- a/js/src/frontend/ParserAtom.h
+++ b/js/src/frontend/ParserAtom.h
@@ -461,16 +461,21 @@ class ParserAtomsTable {
 
   JS::Result<const ParserAtom*, OOM> internUtf8(
       JSContext* cx, const mozilla::Utf8Unit* utf8Ptr, uint32_t nbyte);
 
   JS::Result<const ParserAtom*, OOM> internChar16(JSContext* cx,
                                                   const char16_t* char16Ptr,
                                                   uint32_t length);
 
+  // This only exists for XDR support.
+  JS::Result<const ParserAtom*, OOM> internChar16LE(JSContext* cx,
+                                                    LittleEndianChars twoByteLE,
+                                                    uint32_t length);
+
   JS::Result<const ParserAtom*, OOM> internJSAtom(
       JSContext* cx, CompilationInfo& compilationInfo, JSAtom* atom);
 
   JS::Result<const ParserAtom*, OOM> concatAtoms(
       JSContext* cx, mozilla::Range<const ParserAtom*> atoms);
 };
 
 template <typename CharT>
--- a/js/src/jsapi-tests/testParserAtom.cpp
+++ b/js/src/jsapi-tests/testParserAtom.cpp
@@ -15,30 +15,32 @@
 
 // Test empty strings behave consistently.
 BEGIN_TEST(testParserAtom_empty) {
   using js::frontend::ParserAtom;
   using js::frontend::ParserAtomsTable;
 
   ParserAtomsTable atomTable(cx->runtime());
 
-  constexpr size_t len = 0;
-
   const char ascii[] = {};
   const JS::Latin1Char latin1[] = {};
   const mozilla::Utf8Unit utf8[] = {};
   const char16_t char16[] = {};
 
+  const uint8_t bytes[] = {};
+  const js::LittleEndianChars leTwoByte(bytes);
+
   // Check that the well-known empty atom matches for different entry points.
   const ParserAtom* ref = cx->parserNames().empty;
   CHECK(ref);
-  CHECK(atomTable.internAscii(cx, ascii, len).unwrap() == ref);
-  CHECK(atomTable.internLatin1(cx, latin1, len).unwrap() == ref);
-  CHECK(atomTable.internUtf8(cx, utf8, len).unwrap() == ref);
-  CHECK(atomTable.internChar16(cx, char16, len).unwrap() == ref);
+  CHECK(atomTable.internAscii(cx, ascii, 0).unwrap() == ref);
+  CHECK(atomTable.internLatin1(cx, latin1, 0).unwrap() == ref);
+  CHECK(atomTable.internUtf8(cx, utf8, 0).unwrap() == ref);
+  CHECK(atomTable.internChar16(cx, char16, 0).unwrap() == ref);
+  CHECK(atomTable.internChar16LE(cx, leTwoByte, 0).unwrap() == ref);
 
   // Check concatenation works on empty atoms.
   const ParserAtom* concat[] = {
       cx->parserNames().empty,
       cx->parserNames().empty,
   };
   mozilla::Range<const ParserAtom*> concatRange(concat, 2);
   CHECK(atomTable.concatAtoms(cx, concatRange).unwrap() == ref);
@@ -55,22 +57,26 @@ BEGIN_TEST(testParserAtom_tiny1) {
   ParserAtomsTable atomTable(cx->runtime());
 
   char16_t a = 'a';
   const char ascii[] = {'a'};
   JS::Latin1Char latin1[] = {'a'};
   const mozilla::Utf8Unit utf8[] = {mozilla::Utf8Unit('a')};
   char16_t char16[] = {'a'};
 
+  const uint8_t bytes[] = {'a', 0};
+  const js::LittleEndianChars leTwoByte(bytes);
+
   const ParserAtom* ref = cx->parserNames().lookupTiny(&a, 1);
   CHECK(ref);
   CHECK(atomTable.internAscii(cx, ascii, 1).unwrap() == ref);
   CHECK(atomTable.internLatin1(cx, latin1, 1).unwrap() == ref);
   CHECK(atomTable.internUtf8(cx, utf8, 1).unwrap() == ref);
   CHECK(atomTable.internChar16(cx, char16, 1).unwrap() == ref);
+  CHECK(atomTable.internChar16LE(cx, leTwoByte, 1).unwrap() == ref);
 
   const ParserAtom* concat[] = {
       ref,
       cx->parserNames().empty,
   };
   mozilla::Range<const ParserAtom*> concatRange(concat, 2);
   CHECK(atomTable.concatAtoms(cx, concatRange).unwrap() == ref);
 
@@ -91,22 +97,26 @@ BEGIN_TEST(testParserAtom_tiny2) {
   ParserAtomsTable atomTable(cx->runtime());
 
   const char ascii[] = {'a', '0'};
   JS::Latin1Char latin1[] = {'a', '0'};
   const mozilla::Utf8Unit utf8[] = {mozilla::Utf8Unit('a'),
                                     mozilla::Utf8Unit('0')};
   char16_t char16[] = {'a', '0'};
 
+  const uint8_t bytes[] = {'a', 0, '0', 0};
+  const js::LittleEndianChars leTwoByte(bytes);
+
   const ParserAtom* ref = cx->parserNames().lookupTiny(ascii, 2);
   CHECK(ref);
   CHECK(atomTable.internAscii(cx, ascii, 2).unwrap() == ref);
   CHECK(atomTable.internLatin1(cx, latin1, 2).unwrap() == ref);
   CHECK(atomTable.internUtf8(cx, utf8, 2).unwrap() == ref);
   CHECK(atomTable.internChar16(cx, char16, 2).unwrap() == ref);
+  CHECK(atomTable.internChar16LE(cx, leTwoByte, 2).unwrap() == ref);
 
   const ParserAtom* concat[] = {
       cx->parserNames().lookupTiny(ascii + 0, 1),
       cx->parserNames().lookupTiny(ascii + 1, 1),
   };
   mozilla::Range<const ParserAtom*> concatRange(concat, 2);
   CHECK(atomTable.concatAtoms(cx, concatRange).unwrap() == ref);