Bug 1659104 - Part 1 - Add ParserAtom support for Char16LE strings. r=tcampbell
☠☠ backed out by 2f4118bd48c5 ☠ ☠
authorKannan Vijayan <kvijayan@mozilla.com>
Tue, 22 Sep 2020 15:36:01 +0000
changeset 549641 ffac61e233f765cdff22b6ee348f1f0538b3190c
parent 549640 f8a6dd6afdfbf44d4877b693c92c62ce59aa9534
child 549642 55f9807037732ec8be73cdccae25d18f663ed860
push id37805
push userabutkovits@mozilla.com
push dateWed, 23 Sep 2020 09:59:09 +0000
treeherdermozilla-central@7927a1705247 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerstcampbell
bugs1659104
milestone83.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1659104 - Part 1 - Add ParserAtom support for Char16LE strings. r=tcampbell Differential Revision: https://phabricator.services.mozilla.com/D90510
js/src/frontend/ParserAtom.cpp
js/src/frontend/ParserAtom.h
js/src/jsapi-tests/testParserAtom.cpp
--- a/js/src/frontend/ParserAtom.cpp
+++ b/js/src/frontend/ParserAtom.cpp
@@ -16,16 +16,48 @@
 #include "vm/Printer.h"
 #include "vm/Runtime.h"
 #include "vm/StringType.h"
 
 using namespace js;
 using namespace js::frontend;
 
 namespace js {
+
+template <>
+class InflatedChar16Sequence<LittleEndianChars> {
+ private:
+  LittleEndianChars chars_;
+  size_t idx_;
+  size_t len_;
+
+ public:
+  InflatedChar16Sequence(LittleEndianChars chars, size_t length)
+      : chars_(chars), idx_(0), len_(length) {}
+
+  bool hasMore() { return idx_ < len_; }
+
+  char16_t next() {
+    MOZ_ASSERT(hasMore());
+    return chars_[idx_++];
+  }
+
+  HashNumber computeHash() const {
+    auto copy = *this;
+    HashNumber hash = 0;
+    while (copy.hasMore()) {
+      hash = mozilla::AddToHash(hash, copy.next());
+    }
+    return hash;
+  }
+};
+
+}  // namespace js
+
+namespace js {
 namespace frontend {
 
 static JS::OOM PARSER_ATOMS_OOM;
 
 static JSAtom* GetWellKnownAtom(JSContext* cx, WellKnownAtomId kind) {
 #define ASSERT_OFFSET_(idpart, id, text)       \
   static_assert(offsetof(JSAtomState, id) ==   \
                 int32_t(WellKnownAtomId::id) * \
@@ -319,16 +351,52 @@ JS::Result<const ParserAtom*, OOM> Parse
   AddPtr addPtr = lookupForAdd(cx, seq);
   if (addPtr) {
     return addPtr.get()->asAtom();
   }
 
   return internLatin1Seq(cx, addPtr, latin1Ptr, length);
 }
 
+JS::Result<const ParserAtom*, OOM> ParserAtomsTable::internChar16LE(
+    JSContext* cx, LittleEndianChars twoByteLE, uint32_t length) {
+  // Check for tiny strings which are abundant in minified code.
+  if (const ParserAtom* tiny = wellKnownTable_.lookupTiny(twoByteLE, length)) {
+    return tiny;
+  }
+
+  InflatedChar16Sequence<LittleEndianChars> seq(twoByteLE, length);
+
+  // An XDR interning is guaranteed to be unique: there should be no
+  // existing atom with the same contents, except for well-known atoms.
+  //
+  // However, using the existing path which checks for exiting atoms
+  // is a lot simpler as it handles well-known atom checks seamlessly.
+  AddPtr addPtr = lookupForAdd(cx, seq);
+  if (addPtr) {
+    return addPtr.get()->asAtom();
+  }
+
+  // Compute the target encoding.
+  // NOTE: Length in code-points will be same, even if we deflate to Latin1.
+  bool wide = false;
+  InflatedChar16Sequence<LittleEndianChars> seqCopy = seq;
+  while (seqCopy.hasMore()) {
+    char16_t ch = seqCopy.next();
+    if (ch > MAX_LATIN1_CHAR) {
+      wide = true;
+      break;
+    }
+  }
+
+  // Otherwise, add new entry.
+  return wide ? internChar16Seq<char16_t>(cx, addPtr, seq, length)
+              : internChar16Seq<Latin1Char>(cx, addPtr, seq, length);
+}
+
 JS::Result<const ParserAtom*, OOM> ParserAtomsTable::internUtf8(
     JSContext* cx, const mozilla::Utf8Unit* utf8Ptr, uint32_t nbyte) {
   // Check for tiny strings which are abundant in minified code.
   // NOTE: The tiny atoms are all ASCII-only so we can directly look at the
   //        UTF-8 data without worrying about surrogates.
   if (const ParserAtom* tiny = wellKnownTable_.lookupTiny(
           reinterpret_cast<const Latin1Char*>(utf8Ptr), nbyte)) {
     return tiny;
--- a/js/src/frontend/ParserAtom.h
+++ b/js/src/frontend/ParserAtom.h
@@ -432,33 +432,40 @@ class WellKnownParserAtoms {
 
   bool init(JSContext* cx);
 
   template <typename CharT>
   const ParserAtom* lookupChar16Seq(
       const SpecificParserAtomLookup<CharT>& lookup) const;
 
   // Fast-path tiny strings since they are abundant in minified code.
-  template <typename CharT>
-  const ParserAtom* lookupTiny(const CharT* charPtr, uint32_t length) const {
+  template <typename CharsT>
+  const ParserAtom* lookupTiny(CharsT chars, size_t length) const {
+    static_assert(std::is_same_v<CharsT, const Latin1Char*> ||
+                      std::is_same_v<CharsT, const char16_t*> ||
+                      std::is_same_v<CharsT, const char*> ||
+                      std::is_same_v<CharsT, char16_t*> ||
+                      std::is_same_v<CharsT, LittleEndianChars>,
+                  "This assert mostly explicitly documents the calling types, "
+                  "and forces that to be updated if new types show up.");
     switch (length) {
       case 0:
         return empty;
 
       case 1: {
-        if (char16_t(charPtr[0]) < ASCII_STATIC_LIMIT) {
-          return getLength1String(charPtr[0]);
+        if (char16_t(chars[0]) < ASCII_STATIC_LIMIT) {
+          return getLength1String(chars[0]);
         }
         break;
       }
 
       case 2:
-        if (StaticStrings::fitsInSmallChar(charPtr[0]) &&
-            StaticStrings::fitsInSmallChar(charPtr[1])) {
-          return getLength2String(charPtr[0], charPtr[1]);
+        if (StaticStrings::fitsInSmallChar(chars[0]) &&
+            StaticStrings::fitsInSmallChar(chars[1])) {
+          return getLength2String(chars[0], chars[1]);
         }
         break;
     }
 
     // No match on tiny Atoms
     return nullptr;
   }
 };
@@ -537,16 +544,21 @@ class ParserAtomsTable {
 
   JS::Result<const ParserAtom*, OOM> internUtf8(
       JSContext* cx, const mozilla::Utf8Unit* utf8Ptr, uint32_t nbyte);
 
   JS::Result<const ParserAtom*, OOM> internChar16(JSContext* cx,
                                                   const char16_t* char16Ptr,
                                                   uint32_t length);
 
+  // This only exists for XDR support.
+  JS::Result<const ParserAtom*, OOM> internChar16LE(JSContext* cx,
+                                                    LittleEndianChars twoByteLE,
+                                                    uint32_t length);
+
   JS::Result<const ParserAtom*, OOM> internJSAtom(
       JSContext* cx, CompilationInfo& compilationInfo, JSAtom* atom);
 
   JS::Result<const ParserAtom*, OOM> concatAtoms(
       JSContext* cx, mozilla::Range<const ParserAtom*> atoms);
 };
 
 template <typename CharT>
--- a/js/src/jsapi-tests/testParserAtom.cpp
+++ b/js/src/jsapi-tests/testParserAtom.cpp
@@ -11,30 +11,32 @@
 
 // Test empty strings behave consistently.
 BEGIN_TEST(testParserAtom_empty) {
   using js::frontend::ParserAtom;
   using js::frontend::ParserAtomsTable;
 
   ParserAtomsTable atomTable(cx->runtime());
 
-  constexpr size_t len = 0;
-
   const char ascii[] = {};
   const JS::Latin1Char latin1[] = {};
   const mozilla::Utf8Unit utf8[] = {};
   const char16_t char16[] = {};
 
+  const uint8_t bytes[] = {};
+  const js::LittleEndianChars leTwoByte(bytes);
+
   // Check that the well-known empty atom matches for different entry points.
   const ParserAtom* ref = cx->parserNames().empty;
   CHECK(ref);
-  CHECK(atomTable.internAscii(cx, ascii, len).unwrap() == ref);
-  CHECK(atomTable.internLatin1(cx, latin1, len).unwrap() == ref);
-  CHECK(atomTable.internUtf8(cx, utf8, len).unwrap() == ref);
-  CHECK(atomTable.internChar16(cx, char16, len).unwrap() == ref);
+  CHECK(atomTable.internAscii(cx, ascii, 0).unwrap() == ref);
+  CHECK(atomTable.internLatin1(cx, latin1, 0).unwrap() == ref);
+  CHECK(atomTable.internUtf8(cx, utf8, 0).unwrap() == ref);
+  CHECK(atomTable.internChar16(cx, char16, 0).unwrap() == ref);
+  CHECK(atomTable.internChar16LE(cx, leTwoByte, 0).unwrap() == ref);
 
   // Check concatenation works on empty atoms.
   const ParserAtom* concat[] = {
       cx->parserNames().empty,
       cx->parserNames().empty,
   };
   mozilla::Range<const ParserAtom*> concatRange(concat, 2);
   CHECK(atomTable.concatAtoms(cx, concatRange).unwrap() == ref);
@@ -51,22 +53,26 @@ BEGIN_TEST(testParserAtom_tiny1) {
   ParserAtomsTable atomTable(cx->runtime());
 
   char16_t a = 'a';
   const char ascii[] = {'a'};
   JS::Latin1Char latin1[] = {'a'};
   const mozilla::Utf8Unit utf8[] = {mozilla::Utf8Unit('a')};
   char16_t char16[] = {'a'};
 
+  const uint8_t bytes[] = {'a', 0};
+  const js::LittleEndianChars leTwoByte(bytes);
+
   const ParserAtom* ref = cx->parserNames().lookupTiny(&a, 1);
   CHECK(ref);
   CHECK(atomTable.internAscii(cx, ascii, 1).unwrap() == ref);
   CHECK(atomTable.internLatin1(cx, latin1, 1).unwrap() == ref);
   CHECK(atomTable.internUtf8(cx, utf8, 1).unwrap() == ref);
   CHECK(atomTable.internChar16(cx, char16, 1).unwrap() == ref);
+  CHECK(atomTable.internChar16LE(cx, leTwoByte, 1).unwrap() == ref);
 
   const ParserAtom* concat[] = {
       ref,
       cx->parserNames().empty,
   };
   mozilla::Range<const ParserAtom*> concatRange(concat, 2);
   CHECK(atomTable.concatAtoms(cx, concatRange).unwrap() == ref);
 
@@ -87,22 +93,26 @@ BEGIN_TEST(testParserAtom_tiny2) {
   ParserAtomsTable atomTable(cx->runtime());
 
   const char ascii[] = {'a', '0'};
   JS::Latin1Char latin1[] = {'a', '0'};
   const mozilla::Utf8Unit utf8[] = {mozilla::Utf8Unit('a'),
                                     mozilla::Utf8Unit('0')};
   char16_t char16[] = {'a', '0'};
 
+  const uint8_t bytes[] = {'a', 0, '0', 0};
+  const js::LittleEndianChars leTwoByte(bytes);
+
   const ParserAtom* ref = cx->parserNames().lookupTiny(ascii, 2);
   CHECK(ref);
   CHECK(atomTable.internAscii(cx, ascii, 2).unwrap() == ref);
   CHECK(atomTable.internLatin1(cx, latin1, 2).unwrap() == ref);
   CHECK(atomTable.internUtf8(cx, utf8, 2).unwrap() == ref);
   CHECK(atomTable.internChar16(cx, char16, 2).unwrap() == ref);
+  CHECK(atomTable.internChar16LE(cx, leTwoByte, 2).unwrap() == ref);
 
   const ParserAtom* concat[] = {
       cx->parserNames().lookupTiny(ascii + 0, 1),
       cx->parserNames().lookupTiny(ascii + 1, 1),
   };
   mozilla::Range<const ParserAtom*> concatRange(concat, 2);
   CHECK(atomTable.concatAtoms(cx, concatRange).unwrap() == ref);