Bug 1711063 - Part 4: Add the ability to pass an allocation site when assembling inline allocations r=jandem
authorJon Coppeard <jcoppeard@mozilla.com>
Tue, 01 Jun 2021 09:57:00 +0000
changeset 654062 e03161d4969667607aa141682853f412f19221ca
parent 654061 4c02be7ed57c3f1b77156efb822a75e03311e501
child 654063 1eb11eaf408932283ba9cd54be2eefb7b3050d13
push id2623
push userffxbld-merge
push dateMon, 02 Aug 2021 14:47:51 +0000
treeherdermozilla-release@8500ce65f7c6 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersjandem
bugs1711063
milestone91.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1711063 - Part 4: Add the ability to pass an allocation site when assembling inline allocations r=jandem For baseline allocations we still want to track the allocations made and update the AllocSite whereas for optimized code we want to ignore this entirely and use the catch-all optimzed site for tracked allocation sites. To allow this, the patch adds AllocSiteInput which can either be a register holding an AllocSite pointer or an immediate indicating which catch-all allocation site to use. Differential Revision: https://phabricator.services.mozilla.com/D115241
js/src/gc/Nursery.cpp
js/src/gc/Nursery.h
js/src/gc/Pretenuring.h
js/src/jit/CodeGenerator.cpp
js/src/jit/CompileWrappers.cpp
js/src/jit/CompileWrappers.h
js/src/jit/MacroAssembler.cpp
js/src/jit/MacroAssembler.h
--- a/js/src/gc/Nursery.cpp
+++ b/js/src/gc/Nursery.cpp
@@ -467,20 +467,21 @@ Cell* js::Nursery::allocateCell(gc::Allo
     return nullptr;
   }
 
   new (ptr) NurseryCellHeader(site, kind);
 
   auto cell =
       reinterpret_cast<Cell*>(uintptr_t(ptr) + sizeof(NurseryCellHeader));
 
+  // Update the allocation site. This code is also inlined in
+  // MacroAssembler::updateAllocSite.
   if (!site->isInAllocatedList()) {
     pretenuringNursery.insertIntoAllocatedList(site);
   }
-
   site->incAllocCount();
 
   gcprobes::NurseryAlloc(cell, kind);
   return cell;
 }
 
 Cell* js::Nursery::allocateString(gc::AllocSite* site, size_t size) {
   Cell* cell = allocateCell(site, size, JS::TraceKind::String);
--- a/js/src/gc/Nursery.h
+++ b/js/src/gc/Nursery.h
@@ -394,16 +394,19 @@ class Nursery {
   void* addressOfPosition() const { return (void**)&position_; }
   const void* addressOfCurrentEnd() const { return (void**)&currentEnd_; }
   const void* addressOfCurrentStringEnd() const {
     return (void*)&currentStringEnd_;
   }
   const void* addressOfCurrentBigIntEnd() const {
     return (void*)&currentBigIntEnd_;
   }
+  void* addressOfNurseryAllocatedSites() {
+    return pretenuringNursery.addressOfAllocatedSites();
+  }
 
   void requestMinorGC(JS::GCReason reason) const;
 
   bool minorGCRequested() const {
     return minorGCTriggerReason_ != JS::GCReason::NO_REASON;
   }
   JS::GCReason minorGCTriggerReason() const { return minorGCTriggerReason_; }
   void clearMinorGCRequest() {
--- a/js/src/gc/Pretenuring.h
+++ b/js/src/gc/Pretenuring.h
@@ -25,16 +25,18 @@
 #include "js/TypeDecls.h"
 
 namespace js {
 namespace gc {
 
 class GCRuntime;
 class PretenuringNursery;
 
+enum class CatchAllAllocSite { Unknown, Optimized };
+
 // Information about an allocation site.
 //
 // Nursery cells contain a pointer to one of these in their cell header (stored
 // before the cell). The site can relate to either for a specific bytecode
 // instruction or can be a catch-all instance for unknown sites or optimized
 // code.
 class AllocSite {
  public:
@@ -99,16 +101,26 @@ class AllocSite {
   void incTenuredCount() { nurseryTenuredCount++; }
 
   void updateStateOnMinorGC(double promotionRate);
 
   static void printInfoHeader();
   static void printInfoFooter(size_t sitesActive);
   void printInfo(bool hasPromotionRate, double promotionRate) const;
 
+  static constexpr size_t offsetOfState() {
+    return offsetof(AllocSite, state_);
+  }
+  static constexpr size_t offsetOfNurseryAllocCount() {
+    return offsetof(AllocSite, nurseryAllocCount);
+  }
+  static constexpr size_t offsetOfNextNurseryAllocated() {
+    return offsetof(AllocSite, nextNurseryAllocated);
+  }
+
  private:
   const char* stateName() const;
 };
 
 // Pretenuring information stored per zone.
 class PretenuringZone {
  public:
   explicit PretenuringZone(JS::Zone* zone)
@@ -138,14 +150,16 @@ class PretenuringNursery {
 
   void insertIntoAllocatedList(AllocSite* site) {
     MOZ_ASSERT(!site->isInAllocatedList());
     site->nextNurseryAllocated = allocatedSites;
     allocatedSites = site;
   }
 
   void doPretenuring(GCRuntime* gc, bool reportInfo);
+
+  void* addressOfAllocatedSites() { return &allocatedSites; }
 };
 
 }  // namespace gc
 }  // namespace js
 
 #endif /* gc_Pretenuring_h */
--- a/js/src/jit/CodeGenerator.cpp
+++ b/js/src/jit/CodeGenerator.cpp
@@ -7069,17 +7069,18 @@ void CodeGenerator::visitNewPlainObject(
   OutOfLineCode* ool = oolCallVM<Fn, NewPlainObject>(
       lir,
       ArgList(ImmGCPtr(shape), Imm32(int32_t(allocKind)), Imm32(initialHeap)),
       StoreRegisterTo(objReg));
 
   masm.movePtr(ImmGCPtr(shape), shapeReg);
   masm.createPlainGCObject(objReg, shapeReg, temp0Reg, temp1Reg,
                            mir->numFixedSlots(), mir->numDynamicSlots(),
-                           allocKind, initialHeap, ool->entry());
+                           allocKind, initialHeap, ool->entry(),
+                           AllocSiteInput(gc::CatchAllAllocSite::Optimized));
 
   masm.bind(ool->rejoin());
 }
 
 void CodeGenerator::visitNewArrayObject(LNewArrayObject* lir) {
   Register objReg = ToRegister(lir->output());
   Register temp0Reg = ToRegister(lir->temp0());
   Register shapeReg = ToRegister(lir->temp1());
@@ -7098,19 +7099,20 @@ void CodeGenerator::visitNewArrayObject(
   const Shape* shape = mir->shape();
 
   using Fn = ArrayObject* (*)(JSContext*, uint32_t, NewObjectKind);
   OutOfLineCode* ool = oolCallVM<Fn, NewArrayOperation>(
       lir, ArgList(Imm32(arrayLength), Imm32(GenericObject)),
       StoreRegisterTo(objReg));
 
   masm.movePtr(ImmPtr(shape), shapeReg);
-  masm.createArrayWithFixedElements(objReg, shapeReg, temp0Reg, arrayLength,
-                                    arrayCapacity, allocKind,
-                                    mir->initialHeap(), ool->entry());
+  masm.createArrayWithFixedElements(
+      objReg, shapeReg, temp0Reg, arrayLength, arrayCapacity, allocKind,
+      mir->initialHeap(), ool->entry(),
+      AllocSiteInput(gc::CatchAllAllocSite::Optimized));
   masm.bind(ool->rejoin());
 }
 
 void CodeGenerator::visitNewNamedLambdaObject(LNewNamedLambdaObject* lir) {
   Register objReg = ToRegister(lir->output());
   Register tempReg = ToRegister(lir->temp());
   const CompileInfo& info = lir->mir()->block()->info();
 
--- a/js/src/jit/CompileWrappers.cpp
+++ b/js/src/jit/CompileWrappers.cpp
@@ -147,31 +147,37 @@ const void* CompileZone::addressOfBigInt
   // allocatable things.
   return zone()->runtimeFromAnyThread()->gc.addressOfBigIntNurseryCurrentEnd();
 }
 
 uint32_t* CompileZone::addressOfNurseryAllocCount() {
   return zone()->runtimeFromAnyThread()->gc.addressOfNurseryAllocCount();
 }
 
+void* CompileZone::addressOfNurseryAllocatedSites() {
+  JSRuntime* rt = zone()->runtimeFromAnyThread();
+  return rt->gc.nursery().addressOfNurseryAllocatedSites();
+}
+
 bool CompileZone::canNurseryAllocateStrings() {
   return zone()->runtimeFromAnyThread()->gc.nursery().canAllocateStrings() &&
          zone()->allocNurseryStrings;
 }
 
 bool CompileZone::canNurseryAllocateBigInts() {
   return zone()->runtimeFromAnyThread()->gc.nursery().canAllocateBigInts() &&
          zone()->allocNurseryBigInts;
 }
 
-uintptr_t CompileZone::nurseryCellHeader(JS::TraceKind kind) {
-  gc::AllocSite* site = kind == JS::TraceKind::Object
+uintptr_t CompileZone::nurseryCellHeader(JS::TraceKind traceKind,
+                                         gc::CatchAllAllocSite siteKind) {
+  gc::AllocSite* site = siteKind == gc::CatchAllAllocSite::Optimized
                             ? zone()->optimizedAllocSite()
                             : zone()->unknownAllocSite();
-  return gc::NurseryCellHeader::MakeValue(site, kind);
+  return gc::NurseryCellHeader::MakeValue(site, traceKind);
 }
 
 JS::Realm* CompileRealm::realm() { return reinterpret_cast<JS::Realm*>(this); }
 
 /* static */
 CompileRealm* CompileRealm::get(JS::Realm* realm) {
   return reinterpret_cast<CompileRealm*>(realm);
 }
--- a/js/src/jit/CompileWrappers.h
+++ b/js/src/jit/CompileWrappers.h
@@ -4,16 +4,17 @@
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #ifndef jit_CompileWrappers_h
 #define jit_CompileWrappers_h
 
 #include <stdint.h>
 
+#include "gc/Pretenuring.h"
 #include "js/TypeDecls.h"
 
 struct JSAtomState;
 
 namespace mozilla::non_crypto {
 class XorShift128PlusRNG;
 }
 
@@ -30,16 +31,18 @@ class PropertyName;
 class StaticStrings;
 struct WellKnownSymbols;
 
 using DOMCallbacks = struct JSDOMCallbacks;
 
 namespace gc {
 
 enum class AllocKind : uint8_t;
+
+class AllocSite;
 class FreeSpan;
 
 }  // namespace gc
 
 namespace jit {
 
 class JitRuntime;
 
@@ -105,20 +108,23 @@ class CompileZone {
   void* addressOfStringNurseryPosition();
   void* addressOfBigIntNurseryPosition();
   const void* addressOfNurseryCurrentEnd();
   const void* addressOfStringNurseryCurrentEnd();
   const void* addressOfBigIntNurseryCurrentEnd();
 
   uint32_t* addressOfNurseryAllocCount();
 
+  void* addressOfNurseryAllocatedSites();
+
   bool canNurseryAllocateStrings();
   bool canNurseryAllocateBigInts();
 
-  uintptr_t nurseryCellHeader(JS::TraceKind kind);
+  uintptr_t nurseryCellHeader(JS::TraceKind traceKind,
+                              gc::CatchAllAllocSite siteKind);
 };
 
 class JitRealm;
 
 class CompileRealm {
   JS::Realm* realm();
 
  public:
--- a/js/src/jit/MacroAssembler.cpp
+++ b/js/src/jit/MacroAssembler.cpp
@@ -299,39 +299,49 @@ bool MacroAssembler::shouldNurseryAlloca
   // initializing writes.
   return IsNurseryAllocable(allocKind) && initialHeap != gc::TenuredHeap;
 }
 
 // Inline version of Nursery::allocateObject. If the object has dynamic slots,
 // this fills in the slots_ pointer.
 void MacroAssembler::nurseryAllocateObject(Register result, Register temp,
                                            gc::AllocKind allocKind,
-                                           size_t nDynamicSlots, Label* fail) {
+                                           size_t nDynamicSlots, Label* fail,
+                                           const AllocSiteInput& allocSite) {
   MOZ_ASSERT(IsNurseryAllocable(allocKind));
 
   // We still need to allocate in the nursery, per the comment in
   // shouldNurseryAllocate; however, we need to insert into the
   // mallocedBuffers set, so bail to do the nursery allocation in the
   // interpreter.
   if (nDynamicSlots >= Nursery::MaxNurseryBufferSize / sizeof(Value)) {
     jump(fail);
     return;
   }
 
+  // Check whether this allocation site needs pretenuring. This dynamic check
+  // only happens for baseline code.
+  if (allocSite.is<Register>()) {
+    Register site = allocSite.as<Register>();
+    branch32(Assembler::Equal, Address(site, gc::AllocSite::offsetOfState()),
+             Imm32(int32_t(gc::AllocSite::State::LongLived)), fail);
+  }
+
   // No explicit check for nursery.isEnabled() is needed, as the comparison
   // with the nursery's end will always fail in such cases.
   CompileZone* zone = GetJitContext()->realm()->zone();
   size_t thingSize = gc::Arena::thingSize(allocKind);
   size_t totalSize = thingSize + ObjectSlots::allocSize(nDynamicSlots);
   MOZ_ASSERT(totalSize < INT32_MAX);
   MOZ_ASSERT(totalSize % gc::CellAlignBytes == 0);
 
-  bumpPointerAllocate(
-      result, temp, fail, zone, zone->addressOfNurseryPosition(),
-      zone->addressOfNurseryCurrentEnd(), JS::TraceKind::Object, totalSize);
+  bumpPointerAllocate(result, temp, fail, zone,
+                      zone->addressOfNurseryPosition(),
+                      zone->addressOfNurseryCurrentEnd(), JS::TraceKind::Object,
+                      totalSize, allocSite);
 
   if (nDynamicSlots) {
     store32(Imm32(nDynamicSlots),
             Address(result, thingSize + ObjectSlots::offsetOfCapacity()));
     store32(
         Imm32(0),
         Address(result, thingSize + ObjectSlots::offsetOfDictionarySlotSpan()));
     computeEffectiveAddress(
@@ -397,40 +407,42 @@ void MacroAssembler::callFreeStub(Regist
   call(GetJitContext()->runtime->jitRuntime()->freeStub());
   pop(regSlots);
 }
 
 // Inlined equivalent of gc::AllocateObject, without failure case handling.
 void MacroAssembler::allocateObject(Register result, Register temp,
                                     gc::AllocKind allocKind,
                                     uint32_t nDynamicSlots,
-                                    gc::InitialHeap initialHeap, Label* fail) {
+                                    gc::InitialHeap initialHeap, Label* fail,
+                                    const AllocSiteInput& allocSite) {
   MOZ_ASSERT(gc::IsObjectAllocKind(allocKind));
 
   checkAllocatorState(fail);
 
   if (shouldNurseryAllocate(allocKind, initialHeap)) {
     MOZ_ASSERT(initialHeap == gc::DefaultHeap);
-    return nurseryAllocateObject(result, temp, allocKind, nDynamicSlots, fail);
+    return nurseryAllocateObject(result, temp, allocKind, nDynamicSlots, fail,
+                                 allocSite);
   }
 
   // Fall back to calling into the VM to allocate objects in the tenured heap
   // that have dynamic slots.
   if (nDynamicSlots) {
     jump(fail);
     return;
   }
 
   return freeListAllocate(result, temp, allocKind, fail);
 }
 
 void MacroAssembler::createGCObject(Register obj, Register temp,
                                     const TemplateObject& templateObj,
                                     gc::InitialHeap initialHeap, Label* fail,
-                                    bool initContents) {
+                                    bool initContents /* = true */) {
   gc::AllocKind allocKind = templateObj.getAllocKind();
   MOZ_ASSERT(gc::IsObjectAllocKind(allocKind));
 
   uint32_t nDynamicSlots = 0;
   if (templateObj.isNativeObject()) {
     const TemplateNativeObject& ntemplate =
         templateObj.asTemplateNativeObject();
     nDynamicSlots = ntemplate.numDynamicSlots();
@@ -438,22 +450,23 @@ void MacroAssembler::createGCObject(Regi
 
   allocateObject(obj, temp, allocKind, nDynamicSlots, initialHeap, fail);
   initGCThing(obj, temp, templateObj, initContents);
 }
 
 void MacroAssembler::createPlainGCObject(
     Register result, Register shape, Register temp, Register temp2,
     uint32_t numFixedSlots, uint32_t numDynamicSlots, gc::AllocKind allocKind,
-    gc::InitialHeap initialHeap, Label* fail) {
+    gc::InitialHeap initialHeap, Label* fail, const AllocSiteInput& allocSite) {
   MOZ_ASSERT(gc::IsObjectAllocKind(allocKind));
   MOZ_ASSERT(shape != temp, "shape can overlap with temp2, but not temp");
 
   // Allocate object.
-  allocateObject(result, temp, allocKind, numDynamicSlots, initialHeap, fail);
+  allocateObject(result, temp, allocKind, numDynamicSlots, initialHeap, fail,
+                 allocSite);
 
   // Initialize shape field.
   storePtr(shape, Address(result, JSObject::offsetOfShape()));
 
   // If the object has dynamic slots, allocateObject will initialize
   // the slots field. If not, we must initialize it now.
   if (numDynamicSlots == 0) {
     storePtr(ImmPtr(emptyObjectSlots),
@@ -473,29 +486,29 @@ void MacroAssembler::createPlainGCObject
     loadPtr(Address(result, NativeObject::offsetOfSlots()), temp2);
     fillSlotsWithUndefined(Address(temp2, 0), temp, 0, numDynamicSlots);
   }
 }
 
 void MacroAssembler::createArrayWithFixedElements(
     Register result, Register shape, Register temp, uint32_t arrayLength,
     uint32_t arrayCapacity, gc::AllocKind allocKind,
-    gc::InitialHeap initialHeap, Label* fail) {
+    gc::InitialHeap initialHeap, Label* fail, const AllocSiteInput& allocSite) {
   MOZ_ASSERT(gc::IsObjectAllocKind(allocKind));
   MOZ_ASSERT(shape != temp, "shape can overlap with temp2, but not temp");
   MOZ_ASSERT(result != temp);
 
   // This only supports allocating arrays with fixed elements and does not
   // support any dynamic slots or elements.
   MOZ_ASSERT(arrayCapacity >= arrayLength);
   MOZ_ASSERT(gc::GetGCKindSlots(allocKind) >=
              arrayCapacity + ObjectElements::VALUES_PER_HEADER);
 
   // Allocate object.
-  allocateObject(result, temp, allocKind, 0, initialHeap, fail);
+  allocateObject(result, temp, allocKind, 0, initialHeap, fail, allocSite);
 
   // Initialize shape field.
   storePtr(shape, Address(result, JSObject::offsetOfShape()));
 
   // There are no dynamic slots.
   storePtr(ImmPtr(emptyObjectSlots),
            Address(result, NativeObject::offsetOfSlots()));
 
@@ -547,18 +560,18 @@ void MacroAssembler::nurseryAllocateBigI
                       zone->addressOfBigIntNurseryPosition(),
                       zone->addressOfBigIntNurseryCurrentEnd(),
                       JS::TraceKind::BigInt, thingSize);
 }
 
 void MacroAssembler::bumpPointerAllocate(Register result, Register temp,
                                          Label* fail, CompileZone* zone,
                                          void* posAddr, const void* curEndAddr,
-                                         JS::TraceKind traceKind,
-                                         uint32_t size) {
+                                         JS::TraceKind traceKind, uint32_t size,
+                                         const AllocSiteInput& allocSite) {
   uint32_t totalSize = size + Nursery::nurseryCellHeaderSize();
   MOZ_ASSERT(totalSize < INT32_MAX, "Nursery allocation too large");
   MOZ_ASSERT(totalSize % gc::CellAlignBytes == 0);
 
   // The position (allocation pointer) and the end pointer are stored
   // very close to each other -- specifically, easily within a 32 bit offset.
   // Use relative offsets between them, to avoid 64-bit immediate loads.
   //
@@ -571,32 +584,64 @@ void MacroAssembler::bumpPointerAllocate
   CheckedInt<int32_t> endOffset =
       (CheckedInt<uintptr_t>(uintptr_t(curEndAddr)) -
        CheckedInt<uintptr_t>(uintptr_t(posAddr)))
           .toChecked<int32_t>();
   MOZ_ASSERT(endOffset.isValid(), "Position and end pointers must be nearby");
   branchPtr(Assembler::Below, Address(temp, endOffset.value()), result, fail);
   storePtr(result, Address(temp, 0));
   subPtr(Imm32(size), result);
-  storePtr(ImmWord(zone->nurseryCellHeader(traceKind)),
-           Address(result, -js::Nursery::nurseryCellHeaderSize()));
 
   if (GetJitContext()->runtime->geckoProfiler().enabled()) {
     uint32_t* countAddress = zone->addressOfNurseryAllocCount();
     CheckedInt<int32_t> counterOffset =
         (CheckedInt<uintptr_t>(uintptr_t(countAddress)) -
          CheckedInt<uintptr_t>(uintptr_t(posAddr)))
             .toChecked<int32_t>();
     if (counterOffset.isValid()) {
       add32(Imm32(1), Address(temp, counterOffset.value()));
     } else {
       movePtr(ImmPtr(countAddress), temp);
       add32(Imm32(1), Address(temp, 0));
     }
   }
+
+  if (allocSite.is<gc::CatchAllAllocSite>()) {
+    // No allocation site supplied. This is the case when called from Warp, or
+    // from places that don't support pretenuring.
+    gc::CatchAllAllocSite siteKind = allocSite.as<gc::CatchAllAllocSite>();
+    storePtr(ImmWord(zone->nurseryCellHeader(traceKind, siteKind)),
+             Address(result, -js::Nursery::nurseryCellHeaderSize()));
+  } else {
+    // Update allocation site and store pointer in the nursery cell header. This
+    // is only used from baseline.
+    Register site = allocSite.as<Register>();
+    updateAllocSite(temp, result, zone, site);
+    // See NurseryCellHeader::MakeValue.
+    orPtr(Imm32(int32_t(traceKind)), site);
+    storePtr(site, Address(result, -js::Nursery::nurseryCellHeaderSize()));
+  }
+}
+
+// Update the allocation site in the same way as Nursery::allocateCell.
+void MacroAssembler::updateAllocSite(Register temp, Register result,
+                                     CompileZone* zone, Register site) {
+  Label done;
+
+  add32(Imm32(1), Address(site, gc::AllocSite::offsetOfNurseryAllocCount()));
+
+  branchPtr(Assembler::NotEqual,
+            Address(site, gc::AllocSite::offsetOfNextNurseryAllocated()),
+            ImmPtr(nullptr), &done);
+
+  loadPtr(AbsoluteAddress(zone->addressOfNurseryAllocatedSites()), temp);
+  storePtr(temp, Address(site, gc::AllocSite::offsetOfNextNurseryAllocated()));
+  storePtr(site, AbsoluteAddress(zone->addressOfNurseryAllocatedSites()));
+
+  bind(&done);
 }
 
 // Inlined equivalent of gc::AllocateString, jumping to fail if nursery
 // allocation requested but unsuccessful.
 void MacroAssembler::allocateString(Register result, Register temp,
                                     gc::AllocKind allocKind,
                                     gc::InitialHeap initialHeap, Label* fail) {
   MOZ_ASSERT(allocKind == gc::AllocKind::STRING ||
--- a/js/src/jit/MacroAssembler.h
+++ b/js/src/jit/MacroAssembler.h
@@ -6,16 +6,17 @@
 
 #ifndef jit_MacroAssembler_h
 #define jit_MacroAssembler_h
 
 #include "mozilla/EndianUtils.h"
 #include "mozilla/MacroForEach.h"
 #include "mozilla/MathAlgorithms.h"
 #include "mozilla/Maybe.h"
+#include "mozilla/Variant.h"
 
 #if defined(JS_CODEGEN_X86)
 #  include "jit/x86/MacroAssembler-x86.h"
 #elif defined(JS_CODEGEN_X64)
 #  include "jit/x64/MacroAssembler-x64.h"
 #elif defined(JS_CODEGEN_ARM)
 #  include "jit/arm/MacroAssembler-arm.h"
 #elif defined(JS_CODEGEN_ARM64)
@@ -262,16 +263,27 @@ static inline DynFn DynamicFunction(Sig 
 
 enum class CharEncoding { Latin1, TwoByte };
 
 constexpr uint32_t WasmCallerTLSOffsetBeforeCall =
     wasm::FrameWithTls::callerTLSOffset() + ShadowStackSpace;
 constexpr uint32_t WasmCalleeTLSOffsetBeforeCall =
     wasm::FrameWithTls::calleeTLSOffset() + ShadowStackSpace;
 
+// Allocation sites may be passed to GC thing allocation methods either via a
+// register (for baseline compilation) or an enum indicating one of the
+// catch-all allocation sites (for optimized compilation).
+struct AllocSiteInput
+    : public mozilla::Variant<Register, gc::CatchAllAllocSite> {
+  using Base = mozilla::Variant<Register, gc::CatchAllAllocSite>;
+  AllocSiteInput() : Base(gc::CatchAllAllocSite::Unknown) {}
+  explicit AllocSiteInput(gc::CatchAllAllocSite catchAll) : Base(catchAll) {}
+  explicit AllocSiteInput(Register reg) : Base(reg) {}
+};
+
 // [SMDOC] Code generation invariants (incomplete)
 //
 // ## 64-bit GPRs carrying 32-bit values
 //
 // At least at the end of every JS or Wasm operation (= SpiderMonkey bytecode or
 // Wasm bytecode; this is necessarily a little vague), if a 64-bit GPR has a
 // 32-bit value, then the upper 32 bits of the register may be predictable in
 // accordance with platform-specific rules, as follows.
@@ -4662,29 +4674,34 @@ class MacroAssembler : public MacroAssem
     bind(&done);
   }
 
   // Inline allocation.
  private:
   void checkAllocatorState(Label* fail);
   bool shouldNurseryAllocate(gc::AllocKind allocKind,
                              gc::InitialHeap initialHeap);
-  void nurseryAllocateObject(Register result, Register temp,
-                             gc::AllocKind allocKind, size_t nDynamicSlots,
-                             Label* fail);
+  void nurseryAllocateObject(
+      Register result, Register temp, gc::AllocKind allocKind,
+      size_t nDynamicSlots, Label* fail,
+      const AllocSiteInput& allocSite = AllocSiteInput());
   void bumpPointerAllocate(Register result, Register temp, Label* fail,
                            CompileZone* zone, void* posAddr,
                            const void* curEddAddr, JS::TraceKind traceKind,
-                           uint32_t size);
+                           uint32_t size,
+                           const AllocSiteInput& allocSite = AllocSiteInput());
+  void updateAllocSite(Register temp, Register result, CompileZone* zone,
+                       Register site);
 
   void freeListAllocate(Register result, Register temp, gc::AllocKind allocKind,
                         Label* fail);
   void allocateObject(Register result, Register temp, gc::AllocKind allocKind,
                       uint32_t nDynamicSlots, gc::InitialHeap initialHeap,
-                      Label* fail);
+                      Label* fail,
+                      const AllocSiteInput& allocSite = AllocSiteInput());
   void nurseryAllocateString(Register result, Register temp,
                              gc::AllocKind allocKind, Label* fail);
   void allocateString(Register result, Register temp, gc::AllocKind allocKind,
                       gc::InitialHeap initialHeap, Label* fail);
   void nurseryAllocateBigInt(Register result, Register temp, Label* fail);
   void copySlotsFromTemplate(Register obj,
                              const TemplateNativeObject& templateObj,
                              uint32_t start, uint32_t end);
@@ -4703,23 +4720,24 @@ class MacroAssembler : public MacroAssem
   void createGCObject(Register result, Register temp,
                       const TemplateObject& templateObj,
                       gc::InitialHeap initialHeap, Label* fail,
                       bool initContents = true);
 
   void createPlainGCObject(Register result, Register shape, Register temp,
                            Register temp2, uint32_t numFixedSlots,
                            uint32_t numDynamicSlots, gc::AllocKind allocKind,
-                           gc::InitialHeap initialHeap, Label* fail);
-
-  void createArrayWithFixedElements(Register result, Register shape,
-                                    Register temp, uint32_t arrayLength,
-                                    uint32_t arrayCapacity,
-                                    gc::AllocKind allocKind,
-                                    gc::InitialHeap initialHeap, Label* fail);
+                           gc::InitialHeap initialHeap, Label* fail,
+                           const AllocSiteInput& allocSite = AllocSiteInput());
+
+  void createArrayWithFixedElements(
+      Register result, Register shape, Register temp, uint32_t arrayLength,
+      uint32_t arrayCapacity, gc::AllocKind allocKind,
+      gc::InitialHeap initialHeap, Label* fail,
+      const AllocSiteInput& allocSite = AllocSiteInput());
 
   void initGCThing(Register obj, Register temp,
                    const TemplateObject& templateObj, bool initContents = true);
 
   enum class TypedArrayLength { Fixed, Dynamic };
 
   void initTypedArraySlots(Register obj, Register temp, Register lengthReg,
                            LiveRegisterSet liveRegs, Label* fail,