Bug 1666274 - Support constexpr initialization of ParserAtomEntry r=djvj
authorTed Campbell <tcampbell@mozilla.com>
Wed, 23 Sep 2020 19:47:14 +0000
changeset 550076 9c9de11d4a7e79fcc95f094d0b159407cd39cddf
parent 550075 7302dce9d2ced15b9391677a42aea1fc7937d1e3
child 550077 09648c7997b634d2ae3cd2a0006f0d7085f81f10
push id127066
push usertcampbell@mozilla.com
push dateWed, 23 Sep 2020 22:23:14 +0000
treeherderautoland@09648c7997b6 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersdjvj
bugs1666274
milestone83.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1666274 - Support constexpr initialization of ParserAtomEntry r=djvj Now that all allocations are inline we can remove the tagged buffer pointer and have it implicitly computed from `this` instead. Update the atomIndex to avoid using a Variant and instead manage the type tag explicitly. We also remove the use of mutable keyword to allow constexpr atoms to later be stored in the .rodata section. Add a StaticParserAtomEntry type to use for this initialization. Differential Revision: https://phabricator.services.mozilla.com/D91086
js/src/frontend/ParserAtom.cpp
js/src/frontend/ParserAtom.h
--- a/js/src/frontend/ParserAtom.cpp
+++ b/js/src/frontend/ParserAtom.cpp
@@ -104,26 +104,28 @@ mozilla::GenericErrorResult<OOM> RaisePa
   js::ReportOutOfMemory(cx);
   return mozilla::Err(PARSER_ATOMS_OOM);
 }
 
 template <typename CharT, typename SeqCharT>
 /* static */ JS::Result<UniquePtr<ParserAtomEntry>, OOM>
 ParserAtomEntry::allocate(JSContext* cx, InflatedChar16Sequence<SeqCharT> seq,
                           uint32_t length, HashNumber hash) {
-  ParserAtomEntry* uninitEntry =
-      cx->pod_malloc_with_extra<ParserAtomEntry, CharT>(length);
-  if (!uninitEntry) {
+  constexpr size_t HeaderSize = sizeof(ParserAtomEntry);
+  uint8_t* raw = cx->pod_malloc<uint8_t>(HeaderSize + (sizeof(CharT) * length));
+  if (!raw) {
     return RaiseParserAtomsOOMError(cx);
   }
 
-  CharT* entryBuf = ParserAtomEntry::inlineBufferPtr<CharT>(
-      reinterpret_cast<ParserAtomEntry*>(uninitEntry));
-  UniquePtr<ParserAtomEntry> entry(new (uninitEntry)
-                                       ParserAtomEntry(entryBuf, length, hash));
+  constexpr bool hasTwoByteChars = (sizeof(CharT) == 2);
+  static_assert(sizeof(CharT) == 1 || sizeof(CharT) == 2,
+                "CharT should be 1 or 2 byte type");
+  UniquePtr<ParserAtomEntry> entry(
+      new (raw) ParserAtomEntry(length, hash, hasTwoByteChars));
+  CharT* entryBuf = entry->chars<CharT>();
   drainChar16Seq(entryBuf, seq, length);
   return entry;
 }
 
 bool ParserAtomEntry::equalsJSAtom(JSAtom* other) const {
   // Compare hashes and lengths first.
   if (hash_ != other->hash() || length_ != other->length()) {
     return false;
@@ -176,45 +178,51 @@ bool ParserAtomEntry::isIndex(uint32_t* 
            js::CheckStringIsIndex(latin1Chars(), len, indexp);
   }
   return mozilla::IsAsciiDigit(*twoByteChars()) &&
          js::CheckStringIsIndex(twoByteChars(), len, indexp);
 }
 
 JS::Result<JSAtom*, OOM> ParserAtomEntry::toJSAtom(
     JSContext* cx, CompilationInfo& compilationInfo) const {
-  if (atomIndex_.is<AtomIndex>()) {
-    return compilationInfo.input.atoms[atomIndex_.as<AtomIndex>()];
-  }
-  if (atomIndex_.is<WellKnownAtomId>()) {
-    return GetWellKnownAtom(cx, atomIndex_.as<WellKnownAtomId>());
-  }
-  if (atomIndex_.is<StaticParserString1>()) {
-    char16_t ch = static_cast<char16_t>(atomIndex_.as<StaticParserString1>());
-    return cx->staticStrings().getUnit(ch);
-  }
-  if (atomIndex_.is<StaticParserString2>()) {
-    size_t index = static_cast<size_t>(atomIndex_.as<StaticParserString2>());
-    return cx->staticStrings().getLength2FromIndex(index);
+  switch (atomIndexKind_) {
+    case AtomIndexKind::AtomIndex:
+      return compilationInfo.input.atoms[atomIndex_];
+
+    case AtomIndexKind::WellKnown:
+      return GetWellKnownAtom(cx, WellKnownAtomId(atomIndex_));
+
+    case AtomIndexKind::Static1: {
+      char16_t ch = static_cast<char16_t>(atomIndex_);
+      return cx->staticStrings().getUnit(ch);
+    }
+
+    case AtomIndexKind::Static2:
+      return cx->staticStrings().getLength2FromIndex(atomIndex_);
+
+    case AtomIndexKind::Unresolved:
+      break;
   }
 
   JSAtom* atom;
   if (hasLatin1Chars()) {
     atom = AtomizeChars(cx, latin1Chars(), length());
   } else {
     atom = AtomizeChars(cx, twoByteChars(), length());
   }
   if (!atom) {
     return RaiseParserAtomsOOMError(cx);
   }
   auto index = compilationInfo.input.atoms.length();
   if (!compilationInfo.input.atoms.append(atom)) {
     return RaiseParserAtomsOOMError(cx);
   }
-  atomIndex_ = mozilla::AsVariant(AtomIndex(index));
+
+  const_cast<ParserAtomEntry*>(this)->setAtomIndex(AtomIndex(index));
+
   return atom;
 }
 
 bool ParserAtomEntry::toNumber(JSContext* cx, double* result) const {
   return hasLatin1Chars() ? CharsToNumber(cx, latin1Chars(), length(), result)
                           : CharsToNumber(cx, twoByteChars(), length(), result);
 }
 
@@ -404,35 +412,30 @@ JS::Result<const ParserAtom*, OOM> Parse
             ? internLatin1(cx, atom->latin1Chars(nogc), atom->length())
             : internChar16(cx, atom->twoByteChars(nogc), atom->length());
     if (result.isErr()) {
       return result;
     }
     id = result.unwrap();
   }
 
-  if (id->atomIndex_.is<mozilla::Nothing>()) {
+  if (id->atomIndexKind_ == ParserAtomEntry::AtomIndexKind::Unresolved) {
     MOZ_ASSERT(id->equalsJSAtom(atom));
 
     auto index = AtomIndex(compilationInfo.input.atoms.length());
     if (!compilationInfo.input.atoms.append(atom)) {
       return RaiseParserAtomsOOMError(cx);
     }
-    id->setAtomIndex(index);
-  } else {
-#ifdef DEBUG
-    if (id->atomIndex_.is<AtomIndex>()) {
-      MOZ_ASSERT(compilationInfo.input.atoms[id->atomIndex_.as<AtomIndex>()] ==
-                 atom);
-    } else if (id->atomIndex_.is<WellKnownAtomId>()) {
-      MOZ_ASSERT(GetWellKnownAtom(cx, id->atomIndex_.as<WellKnownAtomId>()) ==
-                 atom);
-    }
-#endif
+
+    const_cast<ParserAtom*>(id)->setAtomIndex(AtomIndex(index));
   }
+
+  // We should (infallibly) map back to the same JSAtom.
+  MOZ_ASSERT(id->toJSAtom(cx, compilationInfo).unwrap() == atom);
+
   return id;
 }
 
 JS::Result<const ParserAtom*, OOM> ParserAtomsTable::concatAtoms(
     JSContext* cx, mozilla::Range<const ParserAtom*> atoms) {
   MOZ_ASSERT(atoms.length() >= 2,
              "concatAtoms should only be used for multiple inputs");
 
--- a/js/src/frontend/ParserAtom.h
+++ b/js/src/frontend/ParserAtom.h
@@ -60,122 +60,20 @@ enum class StaticParserString2 : uint16_
 /**
  * A ParserAtomEntry is an in-parser representation of an interned atomic
  * string.  It mostly mirrors the information carried by a JSAtom*.
  *
  * The atom contents are stored in one of two locations:
  *  1. Inline Latin1Char storage (immediately after the ParserAtomEntry memory).
  *  2. Inline char16_t storage (immediately after the ParserAtomEntry memory).
  */
-class alignas(alignof(void*)) ParserAtomEntry {
+class alignas(alignof(uint32_t)) ParserAtomEntry {
   friend class ParserAtomsTable;
   friend class WellKnownParserAtoms;
 
-  /**
-   * This single-word variant struct multiplexes between four representations.
-   *    1. An owned pointer to a heap-allocated Latin1Char buffer.
-   *    2. An owned pointer to a heap-allocated char16_t buffer.
-   *    3. A weak Latin1Char pointer to the inline buffer in an entry.
-   *    4. A weak char16_t pointer to the inline buffer in an entry.
-   *
-   * The lowest bit of the tagged pointer is used to distinguish between
-   * character widths.
-   *
-   * The second lowest bit of the tagged pointer is used to distinguish
-   * between heap-ptr contents and inline contents.
-   */
-  struct ContentPtrVariant {
-    uintptr_t taggedPtr;
-
-    static const uintptr_t CHARTYPE_MASK = 0x1;
-    static const uintptr_t CHARTYPE_LATIN1 = 0x0;
-    static const uintptr_t CHARTYPE_TWO_BYTE = 0x1;
-
-    static const uintptr_t LOCATION_MASK = 0x2;
-    static const uintptr_t LOCATION_INLINE = 0x0;
-    static const uintptr_t LOCATION_HEAP = 0x2;
-
-    static const uintptr_t LOWBITS_MASK = CHARTYPE_MASK | LOCATION_MASK;
-
-    // A tagged ptr representation for no contents.  The taggedPtr
-    // field is set to when contents are moved out of a ParserAtomEntry,
-    // so that the original atom (moved from) does not try to destroy/free
-    // its contents.
-    static const uintptr_t EMPTY_TAGGED_PTR =
-        CHARTYPE_LATIN1 | LOCATION_INLINE | (0x0 /* nullptr */);
-
-    template <typename CharT>
-    static uintptr_t Tag(const CharT* ptr, bool isInline) {
-      static_assert(std::is_same_v<CharT, Latin1Char> ||
-                    std::is_same_v<CharT, char16_t>);
-      return uintptr_t(ptr) |
-             (std::is_same_v<CharT, Latin1Char> ? CHARTYPE_LATIN1
-                                                : CHARTYPE_TWO_BYTE) |
-             (isInline ? LOCATION_INLINE : LOCATION_HEAP);
-    }
-
-    // The variant owns data, so move semantics apply.
-    ContentPtrVariant(const ContentPtrVariant& other) = delete;
-
-    // Raw pointer constructor.
-    template <typename CharT>
-    ContentPtrVariant(const CharT* weakContents, bool isInline)
-        : taggedPtr(Tag(weakContents, isInline)) {
-      static_assert(std::is_same_v<CharT, Latin1Char> ||
-                    std::is_same_v<CharT, char16_t>);
-      MOZ_ASSERT((reinterpret_cast<uintptr_t>(weakContents) & LOWBITS_MASK) ==
-                 0);
-    }
-
-    // Owned pointer construction.
-    template <typename CharT>
-    explicit ContentPtrVariant(
-        mozilla::UniquePtr<CharT[], JS::FreePolicy> owned)
-        : ContentPtrVariant(owned.release(), false) {}
-
-    // Move construction.
-    // Clear the other variant's contents to not free content after move.
-    ContentPtrVariant(ContentPtrVariant&& other) : taggedPtr(other.taggedPtr) {
-      other.taggedPtr = EMPTY_TAGGED_PTR;
-    }
-
-    ~ContentPtrVariant() {
-      if (isInline()) {
-        return;
-      }
-
-      // Re-construct UniqueChars<CharT[]> and destroy them.
-      if (hasCharType<Latin1Char>()) {
-        UniqueLatin1Chars chars(getUnchecked<Latin1Char>());
-      } else {
-        UniqueTwoByteChars chars(getUnchecked<char16_t>());
-      }
-    }
-
-    template <typename T>
-    const T* getUnchecked() const {
-      return reinterpret_cast<const T*>(taggedPtr & ~LOWBITS_MASK);
-    }
-    template <typename T>
-    T* getUnchecked() {
-      return reinterpret_cast<T*>(taggedPtr & ~LOWBITS_MASK);
-    }
-    template <typename CharT>
-    bool hasCharType() const {
-      static_assert(std::is_same_v<CharT, Latin1Char> ||
-                    std::is_same_v<CharT, char16_t>);
-      return (taggedPtr & CHARTYPE_MASK) ==
-             (std::is_same_v<CharT, Latin1Char> ? CHARTYPE_LATIN1
-                                                : CHARTYPE_TWO_BYTE);
-    }
-    bool isInline() const {
-      return (taggedPtr & LOCATION_MASK) == LOCATION_INLINE;
-    }
-  };
-
   static const uint16_t MAX_LATIN1_CHAR = 0xff;
 
   // Helper routine to read some sequence of two-byte chars, and write them
   // into a target buffer of a particular character width.
   //
   // The characters in the sequence must have been verified prior
   template <typename CharT, typename SeqCharT>
   static void drainChar16Seq(CharT* buf, InflatedChar16Sequence<SeqCharT> seq,
@@ -191,60 +89,48 @@ class alignas(alignof(void*)) ParserAtom
       }
       MOZ_ASSERT(cur < (buf + length));
       *cur = ch;
       cur++;
     }
   }
 
  private:
-  // Owned characters, either 8-bit Latin1Char, or 16-bit char16_t
-  ContentPtrVariant variant_;
+  // The JSAtom-compatible hash of the string.
+  HashNumber hash_ = 0;
 
   // The length of the buffer in chars_.
-  uint32_t length_;
-
-  // The JSAtom-compatible hash of the string.
-  HashNumber hash_;
+  uint32_t length_ = 0;
 
-  // Used to dynamically optimize the mapping of ParserAtoms to JSAtom*s.
-  //
-  // If this ParserAtomEntry is a part of WellKnownParserAtoms, this should
-  // hold WellKnownAtomId that maps to an item in cx->names().
-  //
-  // Otherwise, this should hold AtomIndex into CompilationInfo.atoms,
-  // or empty if the JSAtom isn't yet allocated.
-  using AtomIndexType =
-      mozilla::Variant<mozilla::Nothing, AtomIndex, WellKnownAtomId,
-                       StaticParserString1, StaticParserString2>;
-  mutable AtomIndexType atomIndex_ = AtomIndexType(mozilla::Nothing());
+  // Mapping into from ParserAtoms to JSAtoms.
+  enum class AtomIndexKind : uint8_t {
+    Unresolved,  // Not yet resolved
+    AtomIndex,   // Index into CompilationInfo atoms map
+    WellKnown,   // WellKnownAtomId to index into cx->names() set
+    Static1,     // Index into StaticStrings length-1 set
+    Static2,     // Index into StaticStrings length-2 set
+  };
+  uint32_t atomIndex_ = 0;
+  AtomIndexKind atomIndexKind_ = AtomIndexKind::Unresolved;
+
+  // Encoding type.
+  bool hasTwoByteChars_ = false;
 
   // End of fields.
 
   static const uint32_t MAX_LENGTH = JSString::MAX_LENGTH;
 
-  template <typename CharT>
-  ParserAtomEntry(mozilla::UniquePtr<CharT[], JS::FreePolicy> chars,
-                  uint32_t length, HashNumber hash)
-      : variant_(std::move(chars)), length_(length), hash_(hash) {}
+  ParserAtomEntry(uint32_t length, HashNumber hash, bool hasTwoByteChars)
+      : hash_(hash), length_(length), hasTwoByteChars_(hasTwoByteChars) {}
 
-  template <typename CharT>
-  ParserAtomEntry(const CharT* chars, uint32_t length, HashNumber hash)
-      : variant_(chars, /* isInline = */ true), length_(length), hash_(hash) {}
+ protected:
+  // The constexpr constructor is used by StaticParserAtomEntry.
+  constexpr ParserAtomEntry() = default;
 
  public:
-  template <typename CharT>
-  static CharT* inlineBufferPtr(ParserAtomEntry* entry) {
-    return reinterpret_cast<CharT*>(entry + 1);
-  }
-  template <typename CharT>
-  static const CharT* inlineBufferPtr(const ParserAtomEntry* entry) {
-    return reinterpret_cast<const CharT*>(entry + 1);
-  }
-
   // ParserAtomEntries may own their content buffers in variant_, and thus
   // cannot be copy-constructed - as a new chars would need to be allocated.
   ParserAtomEntry(const ParserAtomEntry&) = delete;
   ParserAtomEntry(ParserAtomEntry&& other) = delete;
 
   template <typename CharT, typename SeqCharT>
   static JS::Result<UniquePtr<ParserAtomEntry>, OOM> allocate(
       JSContext* cx, InflatedChar16Sequence<SeqCharT> seq, uint32_t length,
@@ -253,27 +139,33 @@ class alignas(alignof(void*)) ParserAtom
   ParserAtom* asAtom() { return reinterpret_cast<ParserAtom*>(this); }
   const ParserAtom* asAtom() const {
     return reinterpret_cast<const ParserAtom*>(this);
   }
 
   inline ParserName* asName();
   inline const ParserName* asName() const;
 
-  bool hasLatin1Chars() const { return variant_.hasCharType<Latin1Char>(); }
-  bool hasTwoByteChars() const { return variant_.hasCharType<char16_t>(); }
+  bool hasLatin1Chars() const { return !hasTwoByteChars_; }
+  bool hasTwoByteChars() const { return hasTwoByteChars_; }
+
+  template <typename CharT>
+  const CharT* chars() const {
+    MOZ_ASSERT(sizeof(CharT) == (hasTwoByteChars() ? 2 : 1));
+    return reinterpret_cast<const CharT*>(this + 1);
+  }
 
-  const Latin1Char* latin1Chars() const {
-    MOZ_ASSERT(hasLatin1Chars());
-    return variant_.getUnchecked<Latin1Char>();
+  template <typename CharT>
+  CharT* chars() {
+    MOZ_ASSERT(sizeof(CharT) == (hasTwoByteChars() ? 2 : 1));
+    return reinterpret_cast<CharT*>(this + 1);
   }
-  const char16_t* twoByteChars() const {
-    MOZ_ASSERT(hasTwoByteChars());
-    return variant_.getUnchecked<char16_t>();
-  }
+
+  const Latin1Char* latin1Chars() const { return chars<Latin1Char>(); }
+  const char16_t* twoByteChars() const { return chars<char16_t>(); }
   mozilla::Range<const Latin1Char> latin1Range() const {
     return mozilla::Range(latin1Chars(), length_);
   }
   mozilla::Range<const char16_t> twoByteRange() const {
     return mozilla::Range(twoByteChars(), length_);
   }
 
   bool isIndex(uint32_t* indexp) const;
@@ -285,29 +177,41 @@ class alignas(alignof(void*)) ParserAtom
   HashNumber hash() const { return hash_; }
   uint32_t length() const { return length_; }
 
   bool equalsJSAtom(JSAtom* other) const;
 
   template <typename CharT>
   bool equalsSeq(HashNumber hash, InflatedChar16Sequence<CharT> seq) const;
 
-  void setAtomIndex(AtomIndex index) const {
-    atomIndex_ = mozilla::AsVariant(index);
+ private:
+  void setAtomIndex(AtomIndex index) {
+    atomIndex_ = index;
+    atomIndexKind_ = AtomIndexKind::AtomIndex;
   }
-  void setWellKnownAtomId(WellKnownAtomId kind) const {
-    atomIndex_ = mozilla::AsVariant(kind);
+  constexpr void setWellKnownAtomId(WellKnownAtomId kind) {
+    atomIndex_ = static_cast<uint32_t>(kind);
+    atomIndexKind_ = AtomIndexKind::WellKnown;
   }
-  void setStaticParserString1(StaticParserString1 s) const {
-    atomIndex_ = mozilla::AsVariant(s);
+  constexpr void setStaticParserString1(StaticParserString1 s) {
+    atomIndex_ = static_cast<uint32_t>(s);
+    atomIndexKind_ = AtomIndexKind::Static1;
   }
-  void setStaticParserString2(StaticParserString2 s) const {
-    atomIndex_ = mozilla::AsVariant(s);
+  constexpr void setStaticParserString2(StaticParserString2 s) {
+    atomIndex_ = static_cast<uint32_t>(s);
+    atomIndexKind_ = AtomIndexKind::Static2;
+  }
+  constexpr void setHashAndLength(HashNumber hash, uint32_t length,
+                                  bool hasTwoByteChars = false) {
+    hash_ = hash;
+    length_ = length;
+    hasTwoByteChars_ = hasTwoByteChars;
   }
 
+ public:
   // Convert this entry to a js-atom.  The first time this method is called
   // the entry will cache the JSAtom pointer to return later.
   JS::Result<JSAtom*, OOM> toJSAtom(JSContext* cx,
                                     CompilationInfo& compilationInfo) const;
 
   // Convert this entry to a number.
   bool toNumber(JSContext* cx, double* result) const;
 
@@ -333,16 +237,40 @@ inline ParserName* ParserAtomEntry::asNa
   MOZ_ASSERT(!isIndex());
   return static_cast<ParserName*>(this);
 }
 inline const ParserName* ParserAtomEntry::asName() const {
   MOZ_ASSERT(!isIndex());
   return static_cast<const ParserName*>(this);
 }
 
+// A ParserAtomEntry with explicit inline storage. This is compatible with
+// constexpr to have builtin atoms. Care must be taken to ensure these atoms are
+// unique.
+template <size_t Length>
+class StaticParserAtomEntry : public ParserAtomEntry {
+  alignas(alignof(ParserAtomEntry)) char storage_[Length] = {};
+
+ public:
+  constexpr StaticParserAtomEntry() = default;
+
+  constexpr char* storage() {
+    static_assert(
+        offsetof(StaticParserAtomEntry, storage_) == sizeof(ParserAtomEntry),
+        "StaticParserAtomEntry storage should follow ParserAtomEntry");
+    return storage_;
+  }
+};
+
+template <>
+class StaticParserAtomEntry<0> : public ParserAtomEntry {
+ public:
+  constexpr StaticParserAtomEntry() = default;
+};
+
 /**
  * A lookup structure that allows for querying ParserAtoms in
  * a hashtable using a flexible input type that supports string
  * representations of various forms.
  */
 class ParserAtomLookup {
  protected:
   HashNumber hash_;