author | Kannan Vijayan <kvijayan@mozilla.com> |
Thu, 24 Sep 2020 17:47:53 +0000 | |
changeset 550243 | 3b7aed9c0a06129b270943a8502f9db943f1ab50 |
parent 550242 | 47cdef9c2e5ccfeea61683e5b726d32ffbe5efe5 |
child 550244 | b87a709b879d95ff7ee26dd0dec9231ad00b8780 |
push id | 37809 |
push user | apavel@mozilla.com |
push date | Fri, 25 Sep 2020 03:37:48 +0000 |
treeherder | mozilla-central@4846ccf88574 [default view] [failures only] |
perfherder | [talos] [build metrics] [platform microbench] (compared to previous push) |
reviewers | tcampbell |
bugs | 1659104 |
milestone | 83.0a1 |
first release with | nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
|
last release without | nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
|
--- a/js/src/frontend/ParserAtom.cpp +++ b/js/src/frontend/ParserAtom.cpp @@ -70,16 +70,48 @@ class InflatedChar16Sequence<const Parse } return hash; } }; } // namespace js namespace js { + +template <> +class InflatedChar16Sequence<LittleEndianChars> { + private: + LittleEndianChars chars_; + size_t idx_; + size_t len_; + + public: + InflatedChar16Sequence(LittleEndianChars chars, size_t length) + : chars_(chars), idx_(0), len_(length) {} + + bool hasMore() { return idx_ < len_; } + + char16_t next() { + MOZ_ASSERT(hasMore()); + return chars_[idx_++]; + } + + HashNumber computeHash() const { + auto copy = *this; + HashNumber hash = 0; + while (copy.hasMore()) { + hash = mozilla::AddToHash(hash, copy.next()); + } + return hash; + } +}; + +} // namespace js + +namespace js { namespace frontend { static JS::OOM PARSER_ATOMS_OOM; static JSAtom* GetWellKnownAtom(JSContext* cx, WellKnownAtomId kind) { #define ASSERT_OFFSET_(idpart, id, text) \ static_assert(offsetof(JSAtomState, id) == \ int32_t(WellKnownAtomId::id) * \ @@ -308,16 +340,56 @@ JS::Result<const ParserAtom*, OOM> Parse auto addPtr = entrySet_.lookupForAdd(lookup); if (addPtr) { return (*addPtr)->asAtom(); } return internLatin1Seq(cx, addPtr, lookup.hash(), latin1Ptr, length); } +// For XDR +JS::Result<const ParserAtom*, OOM> ParserAtomsTable::internChar16LE( + JSContext* cx, LittleEndianChars twoByteLE, uint32_t length) { + // Check for tiny strings which are abundant in minified code. + if (const ParserAtom* tiny = wellKnownTable_.lookupTiny(twoByteLE, length)) { + return tiny; + } + + InflatedChar16Sequence<LittleEndianChars> seq(twoByteLE, length); + + // Check for well-known atom. + SpecificParserAtomLookup<LittleEndianChars> lookup(seq); + if (const ParserAtom* wk = wellKnownTable_.lookupChar16Seq(lookup)) { + return wk; + } + + // An XDR interning is guaranteed to be unique: there should be no + // existing atom with the same contents, except for well-known atoms. + EntrySet::AddPtr addPtr = entrySet_.lookupForAdd(lookup); + MOZ_ASSERT(!addPtr); + + // Compute the target encoding. + // NOTE: Length in code-points will be same, even if we deflate to Latin1. + bool wide = false; + InflatedChar16Sequence<LittleEndianChars> seqCopy = seq; + while (seqCopy.hasMore()) { + char16_t ch = seqCopy.next(); + if (ch > MAX_LATIN1_CHAR) { + wide = true; + break; + } + } + + // Add new entry. + return wide + ? internChar16Seq<char16_t>(cx, addPtr, lookup.hash(), seq, length) + : internChar16Seq<Latin1Char>(cx, addPtr, lookup.hash(), seq, + length); +} + JS::Result<const ParserAtom*, OOM> ParserAtomsTable::internUtf8( JSContext* cx, const mozilla::Utf8Unit* utf8Ptr, uint32_t nbyte) { // Check for tiny strings which are abundant in minified code. // NOTE: The tiny atoms are all ASCII-only so we can directly look at the // UTF-8 data without worrying about surrogates. if (const ParserAtom* tiny = wellKnownTable_.lookupTiny( reinterpret_cast<const Latin1Char*>(utf8Ptr), nbyte)) { return tiny;
--- a/js/src/frontend/ParserAtom.h +++ b/js/src/frontend/ParserAtom.h @@ -461,16 +461,21 @@ class ParserAtomsTable { JS::Result<const ParserAtom*, OOM> internUtf8( JSContext* cx, const mozilla::Utf8Unit* utf8Ptr, uint32_t nbyte); JS::Result<const ParserAtom*, OOM> internChar16(JSContext* cx, const char16_t* char16Ptr, uint32_t length); + // This only exists for XDR support. + JS::Result<const ParserAtom*, OOM> internChar16LE(JSContext* cx, + LittleEndianChars twoByteLE, + uint32_t length); + JS::Result<const ParserAtom*, OOM> internJSAtom( JSContext* cx, CompilationInfo& compilationInfo, JSAtom* atom); JS::Result<const ParserAtom*, OOM> concatAtoms( JSContext* cx, mozilla::Range<const ParserAtom*> atoms); }; template <typename CharT>
--- a/js/src/jsapi-tests/testParserAtom.cpp +++ b/js/src/jsapi-tests/testParserAtom.cpp @@ -15,30 +15,32 @@ // Test empty strings behave consistently. BEGIN_TEST(testParserAtom_empty) { using js::frontend::ParserAtom; using js::frontend::ParserAtomsTable; ParserAtomsTable atomTable(cx->runtime()); - constexpr size_t len = 0; - const char ascii[] = {}; const JS::Latin1Char latin1[] = {}; const mozilla::Utf8Unit utf8[] = {}; const char16_t char16[] = {}; + const uint8_t bytes[] = {}; + const js::LittleEndianChars leTwoByte(bytes); + // Check that the well-known empty atom matches for different entry points. const ParserAtom* ref = cx->parserNames().empty; CHECK(ref); - CHECK(atomTable.internAscii(cx, ascii, len).unwrap() == ref); - CHECK(atomTable.internLatin1(cx, latin1, len).unwrap() == ref); - CHECK(atomTable.internUtf8(cx, utf8, len).unwrap() == ref); - CHECK(atomTable.internChar16(cx, char16, len).unwrap() == ref); + CHECK(atomTable.internAscii(cx, ascii, 0).unwrap() == ref); + CHECK(atomTable.internLatin1(cx, latin1, 0).unwrap() == ref); + CHECK(atomTable.internUtf8(cx, utf8, 0).unwrap() == ref); + CHECK(atomTable.internChar16(cx, char16, 0).unwrap() == ref); + CHECK(atomTable.internChar16LE(cx, leTwoByte, 0).unwrap() == ref); // Check concatenation works on empty atoms. const ParserAtom* concat[] = { cx->parserNames().empty, cx->parserNames().empty, }; mozilla::Range<const ParserAtom*> concatRange(concat, 2); CHECK(atomTable.concatAtoms(cx, concatRange).unwrap() == ref); @@ -55,22 +57,26 @@ BEGIN_TEST(testParserAtom_tiny1) { ParserAtomsTable atomTable(cx->runtime()); char16_t a = 'a'; const char ascii[] = {'a'}; JS::Latin1Char latin1[] = {'a'}; const mozilla::Utf8Unit utf8[] = {mozilla::Utf8Unit('a')}; char16_t char16[] = {'a'}; + const uint8_t bytes[] = {'a', 0}; + const js::LittleEndianChars leTwoByte(bytes); + const ParserAtom* ref = cx->parserNames().lookupTiny(&a, 1); CHECK(ref); CHECK(atomTable.internAscii(cx, ascii, 1).unwrap() == ref); CHECK(atomTable.internLatin1(cx, latin1, 1).unwrap() == ref); CHECK(atomTable.internUtf8(cx, utf8, 1).unwrap() == ref); CHECK(atomTable.internChar16(cx, char16, 1).unwrap() == ref); + CHECK(atomTable.internChar16LE(cx, leTwoByte, 1).unwrap() == ref); const ParserAtom* concat[] = { ref, cx->parserNames().empty, }; mozilla::Range<const ParserAtom*> concatRange(concat, 2); CHECK(atomTable.concatAtoms(cx, concatRange).unwrap() == ref); @@ -91,22 +97,26 @@ BEGIN_TEST(testParserAtom_tiny2) { ParserAtomsTable atomTable(cx->runtime()); const char ascii[] = {'a', '0'}; JS::Latin1Char latin1[] = {'a', '0'}; const mozilla::Utf8Unit utf8[] = {mozilla::Utf8Unit('a'), mozilla::Utf8Unit('0')}; char16_t char16[] = {'a', '0'}; + const uint8_t bytes[] = {'a', 0, '0', 0}; + const js::LittleEndianChars leTwoByte(bytes); + const ParserAtom* ref = cx->parserNames().lookupTiny(ascii, 2); CHECK(ref); CHECK(atomTable.internAscii(cx, ascii, 2).unwrap() == ref); CHECK(atomTable.internLatin1(cx, latin1, 2).unwrap() == ref); CHECK(atomTable.internUtf8(cx, utf8, 2).unwrap() == ref); CHECK(atomTable.internChar16(cx, char16, 2).unwrap() == ref); + CHECK(atomTable.internChar16LE(cx, leTwoByte, 2).unwrap() == ref); const ParserAtom* concat[] = { cx->parserNames().lookupTiny(ascii + 0, 1), cx->parserNames().lookupTiny(ascii + 1, 1), }; mozilla::Range<const ParserAtom*> concatRange(concat, 2); CHECK(atomTable.concatAtoms(cx, concatRange).unwrap() == ref);