Bug 1666274 - [Beta82] Support constexpr initialization of ParserAtomEntry. r=djvj, a=jcristau
authorTed Campbell <tcampbell@mozilla.com>
Tue, 22 Sep 2020 20:54:18 +0000
changeset 614820 d3fe34f9b009b6264f5aec394adc0520afe03926
parent 614819 b4d8caa99362c45011c138d348734f3d7f4e900c
child 614821 7dd5c43f1b5c1b579be8b47092567ca8ed7e185c
push id14029
push userjcristau@mozilla.com
push dateSat, 26 Sep 2020 07:33:07 +0000
treeherdermozilla-beta@7dd5c43f1b5c [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersdjvj, jcristau
bugs1666274
milestone82.0
Bug 1666274 - [Beta82] Support constexpr initialization of ParserAtomEntry. r=djvj, a=jcristau Now that all allocations are inline we can remove the tagged buffer pointer and have it implicitly computed from `this` instead. Update the atomIndex to avoid using a Variant and instead manage the type tag explicitly. We also remove the use of mutable keyword to allow constexpr atoms to later be stored in the .rodata section. Add a StaticParserAtomEntry type to use for this initialization. Depends on D91425 Differential Revision: https://phabricator.services.mozilla.com/D91426
js/src/frontend/ParserAtom.cpp
js/src/frontend/ParserAtom.h
--- a/js/src/frontend/ParserAtom.cpp
+++ b/js/src/frontend/ParserAtom.cpp
@@ -104,26 +104,28 @@ mozilla::GenericErrorResult<OOM&> RaiseP
   js::ReportOutOfMemory(cx);
   return mozilla::Err(PARSER_ATOMS_OOM);
 }
 
 template <typename CharT, typename SeqCharT>
 /* static */ JS::Result<UniquePtr<ParserAtomEntry>, OOM&>
 ParserAtomEntry::allocate(JSContext* cx, InflatedChar16Sequence<SeqCharT> seq,
                           uint32_t length, HashNumber hash) {
-  ParserAtomEntry* uninitEntry =
-      cx->pod_malloc_with_extra<ParserAtomEntry, CharT>(length);
-  if (!uninitEntry) {
+  constexpr size_t HeaderSize = sizeof(ParserAtomEntry);
+  uint8_t* raw = cx->pod_malloc<uint8_t>(HeaderSize + (sizeof(CharT) * length));
+  if (!raw) {
     return RaiseParserAtomsOOMError(cx);
   }
 
-  CharT* entryBuf = ParserAtomEntry::inlineBufferPtr<CharT>(
-      reinterpret_cast<ParserAtomEntry*>(uninitEntry));
-  UniquePtr<ParserAtomEntry> entry(new (uninitEntry)
-                                       ParserAtomEntry(entryBuf, length, hash));
+  constexpr bool hasTwoByteChars = (sizeof(CharT) == 2);
+  static_assert(sizeof(CharT) == 1 || sizeof(CharT) == 2,
+                "CharT should be 1 or 2 byte type");
+  UniquePtr<ParserAtomEntry> entry(
+      new (raw) ParserAtomEntry(length, hash, hasTwoByteChars));
+  CharT* entryBuf = entry->chars<CharT>();
   drainChar16Seq(entryBuf, seq, length);
   return entry;
 }
 
 bool ParserAtomEntry::equalsJSAtom(JSAtom* other) const {
   // Compare hashes and lengths first.
   if (hash_ != other->hash() || length_ != other->length()) {
     return false;
@@ -176,45 +178,51 @@ bool ParserAtomEntry::isIndex(uint32_t* 
            js::CheckStringIsIndex(latin1Chars(), len, indexp);
   }
   return mozilla::IsAsciiDigit(*twoByteChars()) &&
          js::CheckStringIsIndex(twoByteChars(), len, indexp);
 }
 
 JS::Result<JSAtom*, OOM&> ParserAtomEntry::toJSAtom(
     JSContext* cx, CompilationInfo& compilationInfo) const {
-  if (atomIndex_.is<AtomIndex>()) {
-    return compilationInfo.input.atoms[atomIndex_.as<AtomIndex>()];
-  }
-  if (atomIndex_.is<WellKnownAtomId>()) {
-    return GetWellKnownAtom(cx, atomIndex_.as<WellKnownAtomId>());
-  }
-  if (atomIndex_.is<StaticParserString1>()) {
-    char16_t ch = static_cast<char16_t>(atomIndex_.as<StaticParserString1>());
-    return cx->staticStrings().getUnit(ch);
-  }
-  if (atomIndex_.is<StaticParserString2>()) {
-    size_t index = static_cast<size_t>(atomIndex_.as<StaticParserString2>());
-    return cx->staticStrings().getLength2FromIndex(index);
+  switch (atomIndexKind_) {
+    case AtomIndexKind::AtomIndex:
+      return compilationInfo.input.atoms[atomIndex_];
+
+    case AtomIndexKind::WellKnown:
+      return GetWellKnownAtom(cx, WellKnownAtomId(atomIndex_));
+
+    case AtomIndexKind::Static1: {
+      char16_t ch = static_cast<char16_t>(atomIndex_);
+      return cx->staticStrings().getUnit(ch);
+    }
+
+    case AtomIndexKind::Static2:
+      return cx->staticStrings().getLength2FromIndex(atomIndex_);
+
+    case AtomIndexKind::Unresolved:
+      break;
   }
 
   JSAtom* atom;
   if (hasLatin1Chars()) {
     atom = AtomizeChars(cx, latin1Chars(), length());
   } else {
     atom = AtomizeChars(cx, twoByteChars(), length());
   }
   if (!atom) {
     return RaiseParserAtomsOOMError(cx);
   }
   auto index = compilationInfo.input.atoms.length();
   if (!compilationInfo.input.atoms.append(atom)) {
     return RaiseParserAtomsOOMError(cx);
   }
-  atomIndex_ = mozilla::AsVariant(AtomIndex(index));
+
+  const_cast<ParserAtomEntry*>(this)->setAtomIndex(AtomIndex(index));
+
   return atom;
 }
 
 bool ParserAtomEntry::toNumber(JSContext* cx, double* result) const {
   return hasLatin1Chars() ? CharsToNumber(cx, latin1Chars(), length(), result)
                           : CharsToNumber(cx, twoByteChars(), length(), result);
 }
 
@@ -404,35 +412,30 @@ JS::Result<const ParserAtom*, OOM&> Pars
             ? internLatin1(cx, atom->latin1Chars(nogc), atom->length())
             : internChar16(cx, atom->twoByteChars(nogc), atom->length());
     if (result.isErr()) {
       return result;
     }
     id = result.unwrap();
   }
 
-  if (id->atomIndex_.is<mozilla::Nothing>()) {
+  if (id->atomIndexKind_ == ParserAtomEntry::AtomIndexKind::Unresolved) {
     MOZ_ASSERT(id->equalsJSAtom(atom));
 
     auto index = AtomIndex(compilationInfo.input.atoms.length());
     if (!compilationInfo.input.atoms.append(atom)) {
       return RaiseParserAtomsOOMError(cx);
     }
-    id->setAtomIndex(index);
-  } else {
-#ifdef DEBUG
-    if (id->atomIndex_.is<AtomIndex>()) {
-      MOZ_ASSERT(compilationInfo.input.atoms[id->atomIndex_.as<AtomIndex>()] ==
-                 atom);
-    } else if (id->atomIndex_.is<WellKnownAtomId>()) {
-      MOZ_ASSERT(GetWellKnownAtom(cx, id->atomIndex_.as<WellKnownAtomId>()) ==
-                 atom);
-    }
-#endif
+
+    const_cast<ParserAtom*>(id)->setAtomIndex(AtomIndex(index));
   }
+
+  // We should (infallibly) map back to the same JSAtom.
+  MOZ_ASSERT(id->toJSAtom(cx, compilationInfo).unwrap() == atom);
+
   return id;
 }
 
 JS::Result<const ParserAtom*, OOM&> ParserAtomsTable::concatAtoms(
     JSContext* cx, mozilla::Range<const ParserAtom*> atoms) {
   MOZ_ASSERT(atoms.length() >= 2,
              "concatAtoms should only be used for multiple inputs");
 
--- a/js/src/frontend/ParserAtom.h
+++ b/js/src/frontend/ParserAtom.h
@@ -60,122 +60,20 @@ enum class StaticParserString2 : uint16_
 /**
  * A ParserAtomEntry is an in-parser representation of an interned atomic
  * string.  It mostly mirrors the information carried by a JSAtom*.
  *
  * The atom contents are stored in one of two locations:
  *  1. Inline Latin1Char storage (immediately after the ParserAtomEntry memory).
  *  2. Inline char16_t storage (immediately after the ParserAtomEntry memory).
  */
-class alignas(alignof(void*)) ParserAtomEntry {
+class alignas(alignof(uint32_t)) ParserAtomEntry {
   friend class ParserAtomsTable;
   friend class WellKnownParserAtoms;
 
-  /**
-   * This single-word variant struct multiplexes between four representations.
-   *    1. An owned pointer to a heap-allocated Latin1Char buffer.
-   *    2. An owned pointer to a heap-allocated char16_t buffer.
-   *    3. A weak Latin1Char pointer to the inline buffer in an entry.
-   *    4. A weak char16_t pointer to the inline buffer in an entry.
-   *
-   * The lowest bit of the tagged pointer is used to distinguish between
-   * character widths.
-   *
-   * The second lowest bit of the tagged pointer is used to distinguish
-   * between heap-ptr contents and inline contents.
-   */
-  struct ContentPtrVariant {
-    uintptr_t taggedPtr;
-
-    static const uintptr_t CHARTYPE_MASK = 0x1;
-    static const uintptr_t CHARTYPE_LATIN1 = 0x0;
-    static const uintptr_t CHARTYPE_TWO_BYTE = 0x1;
-
-    static const uintptr_t LOCATION_MASK = 0x2;
-    static const uintptr_t LOCATION_INLINE = 0x0;
-    static const uintptr_t LOCATION_HEAP = 0x2;
-
-    static const uintptr_t LOWBITS_MASK = CHARTYPE_MASK | LOCATION_MASK;
-
-    // A tagged ptr representation for no contents.  The taggedPtr
-    // field is set to when contents are moved out of a ParserAtomEntry,
-    // so that the original atom (moved from) does not try to destroy/free
-    // its contents.
-    static const uintptr_t EMPTY_TAGGED_PTR =
-        CHARTYPE_LATIN1 | LOCATION_INLINE | (0x0 /* nullptr */);
-
-    template <typename CharT>
-    static uintptr_t Tag(const CharT* ptr, bool isInline) {
-      static_assert(std::is_same_v<CharT, Latin1Char> ||
-                    std::is_same_v<CharT, char16_t>);
-      return uintptr_t(ptr) |
-             (std::is_same_v<CharT, Latin1Char> ? CHARTYPE_LATIN1
-                                                : CHARTYPE_TWO_BYTE) |
-             (isInline ? LOCATION_INLINE : LOCATION_HEAP);
-    }
-
-    // The variant owns data, so move semantics apply.
-    ContentPtrVariant(const ContentPtrVariant& other) = delete;
-
-    // Raw pointer constructor.
-    template <typename CharT>
-    ContentPtrVariant(const CharT* weakContents, bool isInline)
-        : taggedPtr(Tag(weakContents, isInline)) {
-      static_assert(std::is_same_v<CharT, Latin1Char> ||
-                    std::is_same_v<CharT, char16_t>);
-      MOZ_ASSERT((reinterpret_cast<uintptr_t>(weakContents) & LOWBITS_MASK) ==
-                 0);
-    }
-
-    // Owned pointer construction.
-    template <typename CharT>
-    explicit ContentPtrVariant(
-        mozilla::UniquePtr<CharT[], JS::FreePolicy> owned)
-        : ContentPtrVariant(owned.release(), false) {}
-
-    // Move construction.
-    // Clear the other variant's contents to not free content after move.
-    ContentPtrVariant(ContentPtrVariant&& other) : taggedPtr(other.taggedPtr) {
-      other.taggedPtr = EMPTY_TAGGED_PTR;
-    }
-
-    ~ContentPtrVariant() {
-      if (isInline()) {
-        return;
-      }
-
-      // Re-construct UniqueChars<CharT[]> and destroy them.
-      if (hasCharType<Latin1Char>()) {
-        UniqueLatin1Chars chars(getUnchecked<Latin1Char>());
-      } else {
-        UniqueTwoByteChars chars(getUnchecked<char16_t>());
-      }
-    }
-
-    template <typename T>
-    const T* getUnchecked() const {
-      return reinterpret_cast<const T*>(taggedPtr & ~LOWBITS_MASK);
-    }
-    template <typename T>
-    T* getUnchecked() {
-      return reinterpret_cast<T*>(taggedPtr & ~LOWBITS_MASK);
-    }
-    template <typename CharT>
-    bool hasCharType() const {
-      static_assert(std::is_same_v<CharT, Latin1Char> ||
-                    std::is_same_v<CharT, char16_t>);
-      return (taggedPtr & CHARTYPE_MASK) ==
-             (std::is_same_v<CharT, Latin1Char> ? CHARTYPE_LATIN1
-                                                : CHARTYPE_TWO_BYTE);
-    }
-    bool isInline() const {
-      return (taggedPtr & LOCATION_MASK) == LOCATION_INLINE;
-    }
-  };
-
   static const uint16_t MAX_LATIN1_CHAR = 0xff;
 
   // Helper routine to read some sequence of two-byte chars, and write them
   // into a target buffer of a particular character width.
   //
   // The characters in the sequence must have been verified prior
   template <typename CharT, typename SeqCharT>
   static void drainChar16Seq(CharT* buf, InflatedChar16Sequence<SeqCharT> seq,
@@ -191,60 +89,48 @@ class alignas(alignof(void*)) ParserAtom
       }
       MOZ_ASSERT(cur < (buf + length));
       *cur = ch;
       cur++;
     }
   }
 
  private:
-  // Owned characters, either 8-bit Latin1Char, or 16-bit char16_t
-  ContentPtrVariant variant_;
+  // The JSAtom-compatible hash of the string.
+  HashNumber hash_ = 0;
 
   // The length of the buffer in chars_.
-  uint32_t length_;
-
-  // The JSAtom-compatible hash of the string.
-  HashNumber hash_;
+  uint32_t length_ = 0;
 
-  // Used to dynamically optimize the mapping of ParserAtoms to JSAtom*s.
-  //
-  // If this ParserAtomEntry is a part of WellKnownParserAtoms, this should
-  // hold WellKnownAtomId that maps to an item in cx->names().
-  //
-  // Otherwise, this should hold AtomIndex into CompilationInfo.atoms,
-  // or empty if the JSAtom isn't yet allocated.
-  using AtomIndexType =
-      mozilla::Variant<mozilla::Nothing, AtomIndex, WellKnownAtomId,
-                       StaticParserString1, StaticParserString2>;
-  mutable AtomIndexType atomIndex_ = AtomIndexType(mozilla::Nothing());
+  // Mapping into from ParserAtoms to JSAtoms.
+  enum class AtomIndexKind : uint8_t {
+    Unresolved,  // Not yet resolved
+    AtomIndex,   // Index into CompilationInfo atoms map
+    WellKnown,   // WellKnownAtomId to index into cx->names() set
+    Static1,     // Index into StaticStrings length-1 set
+    Static2,     // Index into StaticStrings length-2 set
+  };
+  uint32_t atomIndex_ = 0;
+  AtomIndexKind atomIndexKind_ = AtomIndexKind::Unresolved;
+
+  // Encoding type.
+  bool hasTwoByteChars_ = false;
 
   // End of fields.
 
   static const uint32_t MAX_LENGTH = JSString::MAX_LENGTH;
 
-  template <typename CharT>
-  ParserAtomEntry(mozilla::UniquePtr<CharT[], JS::FreePolicy> chars,
-                  uint32_t length, HashNumber hash)
-      : variant_(std::move(chars)), length_(length), hash_(hash) {}
+  ParserAtomEntry(uint32_t length, HashNumber hash, bool hasTwoByteChars)
+      : hash_(hash), length_(length), hasTwoByteChars_(hasTwoByteChars) {}
 
-  template <typename CharT>
-  ParserAtomEntry(const CharT* chars, uint32_t length, HashNumber hash)
-      : variant_(chars, /* isInline = */ true), length_(length), hash_(hash) {}
+ protected:
+  // The constexpr constructor is used by StaticParserAtomEntry.
+  constexpr ParserAtomEntry() = default;
 
  public:
-  template <typename CharT>
-  static CharT* inlineBufferPtr(ParserAtomEntry* entry) {
-    return reinterpret_cast<CharT*>(entry + 1);
-  }
-  template <typename CharT>
-  static const CharT* inlineBufferPtr(const ParserAtomEntry* entry) {
-    return reinterpret_cast<const CharT*>(entry + 1);
-  }
-
   // ParserAtomEntries may own their content buffers in variant_, and thus
   // cannot be copy-constructed - as a new chars would need to be allocated.
   ParserAtomEntry(const ParserAtomEntry&) = delete;
   ParserAtomEntry(ParserAtomEntry&& other) = delete;
 
   template <typename CharT, typename SeqCharT>
   static JS::Result<UniquePtr<ParserAtomEntry>, OOM&> allocate(
       JSContext* cx, InflatedChar16Sequence<SeqCharT> seq, uint32_t length,
@@ -253,27 +139,33 @@ class alignas(alignof(void*)) ParserAtom
   ParserAtom* asAtom() { return reinterpret_cast<ParserAtom*>(this); }
   const ParserAtom* asAtom() const {
     return reinterpret_cast<const ParserAtom*>(this);
   }
 
   inline ParserName* asName();
   inline const ParserName* asName() const;
 
-  bool hasLatin1Chars() const { return variant_.hasCharType<Latin1Char>(); }
-  bool hasTwoByteChars() const { return variant_.hasCharType<char16_t>(); }
+  bool hasLatin1Chars() const { return !hasTwoByteChars_; }
+  bool hasTwoByteChars() const { return hasTwoByteChars_; }
+
+  template <typename CharT>
+  const CharT* chars() const {
+    MOZ_ASSERT(sizeof(CharT) == (hasTwoByteChars() ? 2 : 1));
+    return reinterpret_cast<const CharT*>(this + 1);
+  }
 
-  const Latin1Char* latin1Chars() const {
-    MOZ_ASSERT(hasLatin1Chars());
-    return variant_.getUnchecked<Latin1Char>();
+  template <typename CharT>
+  CharT* chars() {
+    MOZ_ASSERT(sizeof(CharT) == (hasTwoByteChars() ? 2 : 1));
+    return reinterpret_cast<CharT*>(this + 1);
   }
-  const char16_t* twoByteChars() const {
-    MOZ_ASSERT(hasTwoByteChars());
-    return variant_.getUnchecked<char16_t>();
-  }
+
+  const Latin1Char* latin1Chars() const { return chars<Latin1Char>(); }
+  const char16_t* twoByteChars() const { return chars<char16_t>(); }
   mozilla::Range<const Latin1Char> latin1Range() const {
     return mozilla::Range(latin1Chars(), length_);
   }
   mozilla::Range<const char16_t> twoByteRange() const {
     return mozilla::Range(twoByteChars(), length_);
   }
 
   bool isIndex(uint32_t* indexp) const;
@@ -285,29 +177,41 @@ class alignas(alignof(void*)) ParserAtom
   HashNumber hash() const { return hash_; }
   uint32_t length() const { return length_; }
 
   bool equalsJSAtom(JSAtom* other) const;
 
   template <typename CharT>
   bool equalsSeq(HashNumber hash, InflatedChar16Sequence<CharT> seq) const;
 
-  void setAtomIndex(AtomIndex index) const {
-    atomIndex_ = mozilla::AsVariant(index);
+ private:
+  void setAtomIndex(AtomIndex index) {
+    atomIndex_ = index;
+    atomIndexKind_ = AtomIndexKind::AtomIndex;
   }
-  void setWellKnownAtomId(WellKnownAtomId kind) const {
-    atomIndex_ = mozilla::AsVariant(kind);
+  constexpr void setWellKnownAtomId(WellKnownAtomId kind) {
+    atomIndex_ = static_cast<uint32_t>(kind);
+    atomIndexKind_ = AtomIndexKind::WellKnown;
   }
-  void setStaticParserString1(StaticParserString1 s) const {
-    atomIndex_ = mozilla::AsVariant(s);
+  constexpr void setStaticParserString1(StaticParserString1 s) {
+    atomIndex_ = static_cast<uint32_t>(s);
+    atomIndexKind_ = AtomIndexKind::Static1;
   }
-  void setStaticParserString2(StaticParserString2 s) const {
-    atomIndex_ = mozilla::AsVariant(s);
+  constexpr void setStaticParserString2(StaticParserString2 s) {
+    atomIndex_ = static_cast<uint32_t>(s);
+    atomIndexKind_ = AtomIndexKind::Static2;
+  }
+  constexpr void setHashAndLength(HashNumber hash, uint32_t length,
+                                  bool hasTwoByteChars = false) {
+    hash_ = hash;
+    length_ = length;
+    hasTwoByteChars_ = hasTwoByteChars;
   }
 
+ public:
   // Convert this entry to a js-atom.  The first time this method is called
   // the entry will cache the JSAtom pointer to return later.
   JS::Result<JSAtom*, OOM&> toJSAtom(JSContext* cx,
                                      CompilationInfo& compilationInfo) const;
 
   // Convert this entry to a number.
   bool toNumber(JSContext* cx, double* result) const;
 
@@ -333,16 +237,40 @@ inline ParserName* ParserAtomEntry::asNa
   MOZ_ASSERT(!isIndex());
   return static_cast<ParserName*>(this);
 }
 inline const ParserName* ParserAtomEntry::asName() const {
   MOZ_ASSERT(!isIndex());
   return static_cast<const ParserName*>(this);
 }
 
+// A ParserAtomEntry with explicit inline storage. This is compatible with
+// constexpr to have builtin atoms. Care must be taken to ensure these atoms are
+// unique.
+template <size_t Length>
+class StaticParserAtomEntry : public ParserAtomEntry {
+  alignas(alignof(ParserAtomEntry)) char storage_[Length] = {};
+
+ public:
+  constexpr StaticParserAtomEntry() = default;
+
+  constexpr char* storage() {
+    static_assert(
+        offsetof(StaticParserAtomEntry, storage_) == sizeof(ParserAtomEntry),
+        "StaticParserAtomEntry storage should follow ParserAtomEntry");
+    return storage_;
+  }
+};
+
+template <>
+class StaticParserAtomEntry<0> : public ParserAtomEntry {
+ public:
+  constexpr StaticParserAtomEntry() = default;
+};
+
 /**
  * A lookup structure that allows for querying ParserAtoms in
  * a hashtable using a flexible input type that supports string
  * representations of various forms.
  */
 class ParserAtomLookup {
  protected:
   HashNumber hash_;