Bug 1754377 - Dual-path call_indirect code. r=rhunt
authorLars T Hansen <lhansen@mozilla.com>
Tue, 15 Feb 2022 07:03:29 +0000
changeset 607744 9700c43270312e2d3a1159be5ec7f5f6503f0d79
parent 607743 e644785b83a6b4117f60925e75842afa83539c8f
child 607745 f1feee3d48d17822bf99b467eeeff9cbcae78594
push id39292
push usercsabou@mozilla.com
push dateTue, 15 Feb 2022 21:00:51 +0000
treeherdermozilla-central@950c1613c97d [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersrhunt
bugs1754377
milestone99.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1754377 - Dual-path call_indirect code. r=rhunt This changes MacroAssembler::wasmCallIndirect to implement dual-path call code for call_indirect: if the caller's tls equals the callee's tls, no context switch will be needed and a fast call can be used, otherwise a slow call with a context switch must be used. This speeds up call_indirect significantly in the vast majority of cases at a small cost in code size. As a result of this, wasmCallIndirect has two call instructions and therefore two safepoints, and this complication bubbles up to the baseline compiler, the codegenerator, and lowering. The main issue is that a LIR node only has one safepoint, so we must generate a second, synthetic LIR node for the second safepoint. Ideally the slow path would move out-of-line, but that will be in a different patch, because I want to do tail calls first. Also it becomes hairy in the presence of exception handling, as the exception region for the call will now be split. Drive-by fix: the InterModule attribute in the baseline compiler is not really about whether a call is inter-module, but about whether the register state and realm must be restored after a call. The change to call_indirect exposes this incorrectness: such calls may be intermodule, but the compiler never needs to restore the register state or the realm - the macroassembler does this, as needed, on the slow path. Drive-by fix: minor cleanup of the emitted code, notably, better pointer scaling on ARM64. Drive-by fix: remove some redundant parameters in lowering to reduce confusion about whether a MIR node is updated for some LIR operations. Drive-by fix: The immediate TypeIdDesc value is restricted to u32, so declare the accessor appropriately. This saves clients from (silently) chopping size_t, which is bad form. Differential Revision: https://phabricator.services.mozilla.com/D138052
js/src/jit/CodeGenerator.cpp
js/src/jit/LIROps.yaml
js/src/jit/Lowering.cpp
js/src/jit/MacroAssembler.cpp
js/src/jit/MacroAssembler.h
js/src/jit/arm/MacroAssembler-arm.cpp
js/src/jit/arm64/MacroAssembler-arm64.cpp
js/src/jit/mips-shared/MacroAssembler-mips-shared.cpp
js/src/jit/shared/LIR-shared.h
js/src/jit/shared/Lowering-shared.cpp
js/src/jit/shared/Lowering-shared.h
js/src/jit/x86-shared/MacroAssembler-x86-shared.cpp
js/src/wasm/WasmBCClass.h
js/src/wasm/WasmBCDefs.h
js/src/wasm/WasmBaselineCompile.cpp
js/src/wasm/WasmBuiltins.cpp
js/src/wasm/WasmCodegenTypes.h
js/src/wasm/WasmGenerator.cpp
js/src/wasm/WasmTypeDef.h
--- a/js/src/jit/CodeGenerator.cpp
+++ b/js/src/jit/CodeGenerator.cpp
@@ -8046,31 +8046,37 @@ void CodeGenerator::visitWasmCall(LWasmC
   // TLS and pinned regs. The only case where where we don't have to reload
   // the TLS and pinned regs is when the callee preserves them.
   bool reloadRegs = true;
   bool switchRealm = true;
 
   const wasm::CallSiteDesc& desc = mir->desc();
   const wasm::CalleeDesc& callee = mir->callee();
   CodeOffset retOffset;
+  CodeOffset secondRetOffset;
   switch (callee.which()) {
     case wasm::CalleeDesc::Func:
       retOffset = masm.call(desc, callee.funcIndex());
       reloadRegs = false;
       switchRealm = false;
       break;
     case wasm::CalleeDesc::Import:
       retOffset = masm.wasmCallImport(desc, callee);
       break;
     case wasm::CalleeDesc::AsmJSTable:
       retOffset = masm.asmCallIndirect(desc, callee);
       break;
     case wasm::CalleeDesc::WasmTable:
-      retOffset = masm.wasmCallIndirect(desc, callee, lir->needsBoundsCheck(),
-                                        lir->tableSize());
+      masm.wasmCallIndirect(desc, callee, lir->needsBoundsCheck(),
+                            lir->tableSize(), &retOffset, &secondRetOffset);
+      // Register reloading and realm switching are handled dynamically inside
+      // wasmCallIndirect.  There are two return offsets, one for each call
+      // instruction (fast path and slow path).
+      reloadRegs = false;
+      switchRealm = false;
       break;
     case wasm::CalleeDesc::Builtin:
       retOffset = masm.call(desc, callee.builtin());
       reloadRegs = false;
       switchRealm = false;
       break;
     case wasm::CalleeDesc::BuiltinInstanceMethod:
       retOffset = masm.wasmCallBuiltinInstanceMethod(
@@ -8080,20 +8086,28 @@ void CodeGenerator::visitWasmCall(LWasmC
       break;
   }
 
   // Note the assembler offset for the associated LSafePoint.
   markSafepointAt(retOffset.offset(), lir);
 
   // Now that all the outbound in-memory args are on the stack, note the
   // required lower boundary point of the associated StackMap.
-  lir->safepoint()->setFramePushedAtStackMapBase(
-      masm.framePushed() - mir->stackArgAreaSizeUnaligned());
+  uint32_t framePushedAtStackMapBase =
+      masm.framePushed() - mir->stackArgAreaSizeUnaligned();
+  lir->safepoint()->setFramePushedAtStackMapBase(framePushedAtStackMapBase);
   MOZ_ASSERT(!lir->safepoint()->isWasmTrap());
 
+  // Note the assembler offset and framePushed for use by the adjunct
+  // LSafePoint, see visitor for LWasmCallIndirectAdjunctSafepoint below.
+  if (callee.which() == wasm::CalleeDesc::WasmTable) {
+    lir->adjunctSafepoint()->recordSafepointInfo(secondRetOffset,
+                                                 framePushedAtStackMapBase);
+  }
+
   if (reloadRegs) {
     masm.loadPtr(Address(masm.getStackPointer(), WasmCallerTlsOffsetBeforeCall),
                  WasmTlsReg);
     masm.loadWasmPinnedRegsFromTls();
     if (switchRealm) {
       masm.switchToWasmTlsRealm(ABINonArgReturnReg0, ABINonArgReturnReg1);
     }
   } else {
@@ -8117,16 +8131,23 @@ void CodeGenerator::visitWasmCall(LWasmC
     tryNote.framePushed = masm.framePushed();
 
     // Required by WasmTryNote.
     MOZ_ASSERT(tryNote.end > tryNote.begin);
   }
 #endif
 }
 
+void CodeGenerator::visitWasmCallIndirectAdjunctSafepoint(
+    LWasmCallIndirectAdjunctSafepoint* lir) {
+  markSafepointAt(lir->safepointLocation().offset(), lir);
+  lir->safepoint()->setFramePushedAtStackMapBase(
+      lir->framePushedAtStackMapBase());
+}
+
 void CodeGenerator::visitWasmLoadSlot(LWasmLoadSlot* ins) {
   MIRType type = ins->type();
   Register container = ToRegister(ins->containerRef());
   Address addr(container, ins->offset());
   AnyRegister dst = ToAnyRegister(ins->output());
 
   switch (type) {
     case MIRType::Int32:
--- a/js/src/jit/LIROps.yaml
+++ b/js/src/jit/LIROps.yaml
@@ -2762,16 +2762,19 @@
 - name: WasmStackArgI64
   operands:
     arg: Int64
   mir_op: WasmStackArg
 
 - name: WasmNullConstant
   result_type: WordSized
 
+- name: WasmCallIndirectAdjunctSafepoint
+  gen_boilerplate: false
+
 - name: WasmCall
   gen_boilerplate: false
 
 - name: WasmRegisterResult
   gen_boilerplate: false
 
 - name: WasmRegisterPairResult
   gen_boilerplate: false
--- a/js/src/jit/Lowering.cpp
+++ b/js/src/jit/Lowering.cpp
@@ -3128,18 +3128,17 @@ void LIRGenerator::visitInterruptCheck(M
   add(lir, ins);
   assignSafepoint(lir, ins);
 }
 
 void LIRGenerator::visitWasmInterruptCheck(MWasmInterruptCheck* ins) {
   auto* lir =
       new (alloc()) LWasmInterruptCheck(useRegisterAtStart(ins->tlsPtr()));
   add(lir, ins);
-
-  assignWasmSafepoint(lir, ins);
+  assignWasmSafepoint(lir);
 }
 
 void LIRGenerator::visitWasmTrap(MWasmTrap* ins) {
   add(new (alloc()) LWasmTrap, ins);
 }
 
 void LIRGenerator::visitWasmReinterpret(MWasmReinterpret* ins) {
   if (ins->type() == MIRType::Int64) {
@@ -5383,18 +5382,28 @@ void LIRGenerator::visitWasmCall(MWasmCa
 
   if (ins->callee().isTable()) {
     MDefinition* index = ins->getOperand(ins->numArgs());
     lir->setOperand(ins->numArgs(),
                     useFixedAtStart(index, WasmTableCallIndexReg));
   }
 
   add(lir, ins);
-
-  assignWasmSafepoint(lir, ins);
+  assignWasmSafepoint(lir);
+
+  // WasmCall with WasmTable has two call instructions, and they both need a
+  // safepoint associated with them.  Create a second safepoint here; the node
+  // otherwise does nothing, and codegen for it only marks the safepoint at the
+  // node.
+  if (ins->callee().which() == wasm::CalleeDesc::WasmTable) {
+    auto* adjunctSafepoint = new (alloc()) LWasmCallIndirectAdjunctSafepoint();
+    add(adjunctSafepoint);
+    assignWasmSafepoint(adjunctSafepoint);
+    lir->setAdjunctSafepoint(adjunctSafepoint);
+  }
 }
 
 void LIRGenerator::visitSetDOMProperty(MSetDOMProperty* ins) {
   MDefinition* val = ins->value();
 
   Register cxReg, objReg, privReg, valueReg;
   GetTempRegForIntArg(0, 0, &cxReg);
   GetTempRegForIntArg(1, 0, &objReg);
--- a/js/src/jit/MacroAssembler.cpp
+++ b/js/src/jit/MacroAssembler.cpp
@@ -3880,18 +3880,18 @@ CodeOffset MacroAssembler::wasmCallBuilt
 
   return ret;
 }
 
 CodeOffset MacroAssembler::asmCallIndirect(const wasm::CallSiteDesc& desc,
                                            const wasm::CalleeDesc& callee) {
   MOZ_ASSERT(callee.which() == wasm::CalleeDesc::AsmJSTable);
 
-  Register scratch = WasmTableCallScratchReg0;
-  Register index = WasmTableCallIndexReg;
+  const Register scratch = WasmTableCallScratchReg0;
+  const Register index = WasmTableCallIndexReg;
 
   // Optimization opportunity: when offsetof(FunctionTableElem, code) == 0, as
   // it is at present, we can probably generate better code here by folding
   // the address computation into the load.
 
   static_assert(sizeof(wasm::FunctionTableElem) == 8 ||
                     sizeof(wasm::FunctionTableElem) == 16,
                 "elements of function tables are two words");
@@ -3908,82 +3908,159 @@ CodeOffset MacroAssembler::asmCallIndire
   loadPtr(Address(scratch, offsetof(wasm::FunctionTableElem, code)), scratch);
   storePtr(WasmTlsReg,
            Address(getStackPointer(), WasmCallerTlsOffsetBeforeCall));
   storePtr(WasmTlsReg,
            Address(getStackPointer(), WasmCalleeTlsOffsetBeforeCall));
   return call(desc, scratch);
 }
 
-CodeOffset MacroAssembler::wasmCallIndirect(
-    const wasm::CallSiteDesc& desc, const wasm::CalleeDesc& callee,
-    bool needsBoundsCheck, mozilla::Maybe<uint32_t> tableSize) {
+// In principle, call_indirect requires an expensive context switch to the
+// callee's instance and realm before the call and an almost equally expensive
+// switch back to the caller's ditto after.  However, if the caller's tls is the
+// same as the callee's tls then no context switch is required, and it only
+// takes a compare-and-branch at run-time to test this - all values are in
+// registers already.  We therefore generate two call paths, one for the fast
+// call without the context switch (which additionally avoids a null check) and
+// one for the slow call with the context switch.
+
+void MacroAssembler::wasmCallIndirect(const wasm::CallSiteDesc& desc,
+                                      const wasm::CalleeDesc& callee,
+                                      bool needsBoundsCheck,
+                                      mozilla::Maybe<uint32_t> tableSize,
+                                      CodeOffset* fastCallOffset,
+                                      CodeOffset* slowCallOffset) {
+  static_assert(sizeof(wasm::FunctionTableElem) == 2 * sizeof(void*),
+                "Exactly two pointers or index scaling won't work correctly");
   MOZ_ASSERT(callee.which() == wasm::CalleeDesc::WasmTable);
 
-  Register scratch = WasmTableCallScratchReg0;
-  Register index = WasmTableCallIndexReg;
-
-  // Write the functype-id into the ABI functype-id register.
-  wasm::TypeIdDesc funcTypeId = callee.wasmTableSigId();
-  switch (funcTypeId.kind()) {
-    case wasm::TypeIdDescKind::Global:
-      loadWasmGlobalPtr(funcTypeId.globalDataOffset(), WasmTableCallSigReg);
-      break;
-    case wasm::TypeIdDescKind::Immediate:
-      move32(Imm32(funcTypeId.immediate()), WasmTableCallSigReg);
-      break;
-    case wasm::TypeIdDescKind::None:
-      break;
-  }
-
+  const int shift = sizeof(wasm::FunctionTableElem) == 8 ? 3 : 4;
   wasm::BytecodeOffset trapOffset(desc.lineOrBytecode());
-
-  // WebAssembly throws if the index is out-of-bounds.
+  const Register calleeScratch = WasmTableCallScratchReg0;
+  const Register index = WasmTableCallIndexReg;
+
+  // Check the table index and throw if out-of-bounds.
+  //
+  // Frequently the table size is known, so optimize for that.  Otherwise
+  // compare with a memory operand when that's possible.  (There's little sense
+  // in hoisting the load of the bound into a register at a higher level and
+  // reusing that register, because a hoisted value would either have to be
+  // spilled and re-loaded before the next call_indirect, or would be abandoned
+  // because we could not trust that a hoisted value would not have changed.)
+
   if (needsBoundsCheck) {
     Label ok;
     if (tableSize.isSome()) {
       branch32(Assembler::Condition::Below, index, Imm32(*tableSize), &ok);
     } else {
       branch32(Assembler::Condition::Above,
                Address(WasmTlsReg, offsetof(wasm::TlsData, globalArea) +
                                        callee.tableLengthGlobalDataOffset()),
                index, &ok);
     }
     wasmTrap(wasm::Trap::OutOfBounds, trapOffset);
     bind(&ok);
   }
 
-  // Load the base pointer of the table.
-  loadWasmGlobalPtr(callee.tableFunctionBaseGlobalDataOffset(), scratch);
-
-  // Load the callee from the table.
-  if (sizeof(wasm::FunctionTableElem) == 8) {
-    computeEffectiveAddress(BaseIndex(scratch, index, TimesEight), scratch);
-  } else {
-    lshift32(Imm32(4), index);
-    addPtr(index, scratch);
+  // Write the functype-id into the ABI functype-id register.
+
+  const wasm::TypeIdDesc funcTypeId = callee.wasmTableSigId();
+  switch (funcTypeId.kind()) {
+    case wasm::TypeIdDescKind::Global:
+      loadWasmGlobalPtr(funcTypeId.globalDataOffset(), WasmTableCallSigReg);
+      break;
+    case wasm::TypeIdDescKind::Immediate:
+      move32(Imm32(funcTypeId.immediate()), WasmTableCallSigReg);
+      break;
+    case wasm::TypeIdDescKind::None:
+      break;
   }
 
+  // Load the base pointer of the table and compute the address of the callee in
+  // the table.
+
+  loadWasmGlobalPtr(callee.tableFunctionBaseGlobalDataOffset(), calleeScratch);
+  shiftIndex32AndAdd(index, shift, calleeScratch);
+
+  // Load the callee tls and decide whether to take the fast path or the slow
+  // path.
+
+  Label fastCall;
+  Label done;
+  const Register newTlsTemp = WasmTableCallScratchReg1;
+  loadPtr(Address(calleeScratch, offsetof(wasm::FunctionTableElem, tls)),
+          newTlsTemp);
+  branchPtr(Assembler::Equal, WasmTlsReg, newTlsTemp, &fastCall);
+
+  // Slow path: Save context, check for null, setup new context, call, restore
+  // context.
+  //
+  // TODO: The slow path could usefully be out-of-line and the test above would
+  // just fall through to the fast path.  This keeps the fast-path code dense,
+  // and has correct static prediction for the branch (forward conditional
+  // branches predicted not taken, normally).
+
   storePtr(WasmTlsReg,
            Address(getStackPointer(), WasmCallerTlsOffsetBeforeCall));
-  loadPtr(Address(scratch, offsetof(wasm::FunctionTableElem, tls)), WasmTlsReg);
+  movePtr(newTlsTemp, WasmTlsReg);
   storePtr(WasmTlsReg,
            Address(getStackPointer(), WasmCalleeTlsOffsetBeforeCall));
 
+  // TODO: The null test cannot quite go away because we need a valid(ish) tls
+  // to load the target state and switch realms.  A couple of possibilities:
+  //
+  // - We could have a dummy tls in the null tls slots, and then catch the
+  //   signal when we later jump to null.  Not clear what the implications would
+  //   be for the realm switch, since *cx would already be updated.
+  //
+  // - If the newTls is null, then the loading of the HeapReg from it (except on
+  //   x86-32) will cause a trap, and that trap will happen before the realm
+  //   switch, and that may be good enough.
+
   Label nonNull;
   branchTestPtr(Assembler::NonZero, WasmTlsReg, WasmTlsReg, &nonNull);
   wasmTrap(wasm::Trap::IndirectCallToNull, trapOffset);
   bind(&nonNull);
 
   loadWasmPinnedRegsFromTls();
   switchToWasmTlsRealm(index, WasmTableCallScratchReg1);
 
-  loadPtr(Address(scratch, offsetof(wasm::FunctionTableElem, code)), scratch);
-
-  return call(desc, scratch);
+  loadPtr(Address(calleeScratch, offsetof(wasm::FunctionTableElem, code)),
+          calleeScratch);
+
+  *slowCallOffset = call(desc, calleeScratch);
+
+  // Restore registers and realm and join up with the fast path.
+
+  loadPtr(Address(getStackPointer(), WasmCallerTlsOffsetBeforeCall),
+          WasmTlsReg);
+  loadWasmPinnedRegsFromTls();
+  switchToWasmTlsRealm(ABINonArgReturnReg0, ABINonArgReturnReg1);
+  jump(&done);
+
+  // Fast path: just load the code pointer and go.  The tls and heap register
+  // are the same as in the caller, and nothing will be null.
+  //
+  // (In particular, the code pointer will not be null: if it were, the tls
+  // would have been null, and then it would not have been equivalent to our
+  // current tls.  So no null check is needed on the fast path.)
+
+  bind(&fastCall);
+
+  loadPtr(Address(calleeScratch, offsetof(wasm::FunctionTableElem, code)),
+          calleeScratch);
+
+  // We use a different type of call site for the fast call since the Tls slots
+  // in the frame do not have valid values.
+
+  wasm::CallSiteDesc newDesc(desc.lineOrBytecode(),
+                             wasm::CallSiteDesc::IndirectFast);
+  *fastCallOffset = call(newDesc, calleeScratch);
+
+  bind(&done);
 }
 
 void MacroAssembler::nopPatchableToCall(const wasm::CallSiteDesc& desc) {
   CodeOffset offset = nopPatchableToCall();
   append(desc, offset);
 }
 
 void MacroAssembler::emitPreBarrierFastPath(JSRuntime* rt, MIRType type,
--- a/js/src/jit/MacroAssembler.h
+++ b/js/src/jit/MacroAssembler.h
@@ -3793,43 +3793,57 @@ class MacroAssembler : public MacroAssem
 
   void loadWasmGlobalPtr(uint32_t globalDataOffset, Register dest);
 
   // This function takes care of loading the callee's TLS and pinned regs but
   // it is the caller's responsibility to save/restore TLS or pinned regs.
   CodeOffset wasmCallImport(const wasm::CallSiteDesc& desc,
                             const wasm::CalleeDesc& callee);
 
-  // WasmTableCallIndexReg must contain the index of the indirect call.
-  // This is for wasm calls only.
-  CodeOffset wasmCallIndirect(const wasm::CallSiteDesc& desc,
-                              const wasm::CalleeDesc& callee,
-                              bool needsBoundsCheck,
-                              mozilla::Maybe<uint32_t> tableSize);
+  // WasmTableCallIndexReg must contain the index of the indirect call.  This is
+  // for wasm calls only.
+  //
+  // Indirect calls use a dual-path mechanism where a run-time test determines
+  // whether a context switch is needed (slow path) or not (fast path).  This
+  // gives rise to two call instructions, both of which need safe points.  As
+  // per normal, the call offsets are the code offsets at the end of the call
+  // instructions (the return points).
+  void wasmCallIndirect(const wasm::CallSiteDesc& desc,
+                        const wasm::CalleeDesc& callee, bool needsBoundsCheck,
+                        mozilla::Maybe<uint32_t> tableSize,
+                        CodeOffset* fastCallOffset, CodeOffset* slowCallOffset);
 
   // WasmTableCallIndexReg must contain the index of the indirect call.
   // This is for asm.js calls only.
   CodeOffset asmCallIndirect(const wasm::CallSiteDesc& desc,
                              const wasm::CalleeDesc& callee);
 
   // This function takes care of loading the pointer to the current instance
   // as the implicit first argument. It preserves TLS and pinned registers.
   // (TLS & pinned regs are non-volatile registers in the system ABI).
   CodeOffset wasmCallBuiltinInstanceMethod(const wasm::CallSiteDesc& desc,
                                            const ABIArg& instanceArg,
                                            wasm::SymbolicAddress builtin,
                                            wasm::FailureMode failureMode);
 
+  // Compute ptr += (indexTemp32 << shift) where shift can be any value < 32.
+  // May destroy indexTemp32.  The value of indexTemp32 must be positive, and it
+  // is implementation-defined what happens if bits are lost or the value
+  // becomes negative through the shift.  On 64-bit systems, the high 32 bits of
+  // indexTemp32 must be zero, not garbage.
+  void shiftIndex32AndAdd(Register indexTemp32, int shift,
+                          Register pointer) PER_SHARED_ARCH;
+
   // The System ABI frequently states that the high bits of a 64-bit register
   // that holds a 32-bit return value are unpredictable, and C++ compilers will
   // indeed generate code that leaves garbage in the upper bits.
   //
   // Adjust the contents of the 64-bit register `r` to conform to our internal
   // convention, which requires predictable high bits.  In practice, this means
-  // that the 32-bit valuewill be zero-extended or sign-extended to 64 bits as
+  // that the 32-bit value will be zero-extended or sign-extended to 64 bits as
   // appropriate for the platform.
   void widenInt32(Register r) DEFINED_ON(arm64, x64, mips64);
 
   // As enterFakeExitFrame(), but using register conventions appropriate for
   // wasm stubs.
   void enterFakeExitFrameForWasm(Register cxreg, Register scratch,
                                  ExitFrameType type) PER_SHARED_ARCH;
 
--- a/js/src/jit/arm/MacroAssembler-arm.cpp
+++ b/js/src/jit/arm/MacroAssembler-arm.cpp
@@ -5971,16 +5971,27 @@ void MacroAssembler::nearbyIntFloat32(Ro
   MOZ_CRASH("not supported on this platform");
 }
 
 void MacroAssembler::copySignDouble(FloatRegister lhs, FloatRegister rhs,
                                     FloatRegister output) {
   MOZ_CRASH("not supported on this platform");
 }
 
+void MacroAssembler::shiftIndex32AndAdd(Register indexTemp32, int shift,
+                                        Register pointer) {
+  if (IsShiftInScaleRange(shift)) {
+    computeEffectiveAddress(
+        BaseIndex(pointer, indexTemp32, ShiftToScale(shift)), pointer);
+    return;
+  }
+  lshift32(Imm32(shift), indexTemp32);
+  addPtr(indexTemp32, pointer);
+}
+
 //}}} check_macroassembler_style
 
 void MacroAssemblerARM::wasmTruncateToInt32(FloatRegister input,
                                             Register output, MIRType fromType,
                                             bool isUnsigned, bool isSaturating,
                                             Label* oolEntry) {
   ScratchDoubleScope scratchScope(asMasm());
   ScratchRegisterScope scratchReg(asMasm());
--- a/js/src/jit/arm64/MacroAssembler-arm64.cpp
+++ b/js/src/jit/arm64/MacroAssembler-arm64.cpp
@@ -3138,12 +3138,18 @@ void MacroAssembler::copySignFloat32(Flo
     moveFloat32(lhs, output);
   }
 
   bit(ARMFPRegister(output.encoding(), vixl::VectorFormat::kFormat8B),
       ARMFPRegister(rhs.encoding(), vixl::VectorFormat::kFormat8B),
       ARMFPRegister(scratch.encoding(), vixl::VectorFormat::kFormat8B));
 }
 
+void MacroAssembler::shiftIndex32AndAdd(Register indexTemp32, int shift,
+                                        Register pointer) {
+  Add(ARMRegister(pointer, 64), ARMRegister(pointer, 64),
+      Operand(ARMRegister(indexTemp32, 64), vixl::LSL, shift));
+}
+
 //}}} check_macroassembler_style
 
 }  // namespace jit
 }  // namespace js
--- a/js/src/jit/mips-shared/MacroAssembler-mips-shared.cpp
+++ b/js/src/jit/mips-shared/MacroAssembler-mips-shared.cpp
@@ -3340,9 +3340,20 @@ void MacroAssembler::nearbyIntFloat32(Ro
   MOZ_CRASH("not supported on this platform");
 }
 
 void MacroAssembler::copySignDouble(FloatRegister lhs, FloatRegister rhs,
                                     FloatRegister output) {
   MOZ_CRASH("not supported on this platform");
 }
 
+void MacroAssembler::shiftIndex32AndAdd(Register indexTemp32, int shift,
+                                        Register pointer) {
+  if (IsShiftInScaleRange(shift)) {
+    computeEffectiveAddress(
+        BaseIndex(pointer, indexTemp32, ShiftToScale(shift)), pointer);
+    return;
+  }
+  lshift32(Imm32(shift), indexTemp32);
+  addPtr(indexTemp32, pointer);
+}
+
 //}}} check_macroassembler_style
--- a/js/src/jit/shared/LIR-shared.h
+++ b/js/src/jit/shared/LIR-shared.h
@@ -3159,44 +3159,86 @@ class LWasmDerivedPointer : public LInst
 
 class LWasmParameterI64 : public LInstructionHelper<INT64_PIECES, 0, 0> {
  public:
   LIR_HEADER(WasmParameterI64);
 
   LWasmParameterI64() : LInstructionHelper(classOpcode) {}
 };
 
+// This is used only with LWasmCall.
+class LWasmCallIndirectAdjunctSafepoint : public LInstructionHelper<0, 0, 0> {
+  CodeOffset offs_;
+  uint32_t framePushedAtStackMapBase_;
+
+ public:
+  LIR_HEADER(WasmCallIndirectAdjunctSafepoint);
+
+  LWasmCallIndirectAdjunctSafepoint()
+      : LInstructionHelper(classOpcode),
+        offs_(0),
+        framePushedAtStackMapBase_(0) {}
+
+  CodeOffset safepointLocation() const {
+    MOZ_ASSERT(offs_.offset() != 0);
+    return offs_;
+  }
+  uint32_t framePushedAtStackMapBase() const {
+    MOZ_ASSERT(offs_.offset() != 0);
+    return framePushedAtStackMapBase_;
+  }
+  void recordSafepointInfo(CodeOffset offs, uint32_t framePushed) {
+    offs_ = offs;
+    framePushedAtStackMapBase_ = framePushed;
+  }
+};
+
+// LWasmCall may be generated into two function calls in the case of
+// call_indirect, one for the fast path and one for the slow path.  In that
+// case, the node carries a pointer to a companion node, the "adjunct
+// safepoint", representing the safepoint for the second of the two calls.  The
+// dual-call construction is only meaningful for wasm because wasm has no
+// invalidation of code; this is not a pattern to be used generally.
 class LWasmCall : public LVariadicInstruction<0, 0> {
   bool needsBoundsCheck_;
   mozilla::Maybe<uint32_t> tableSize_;
+  LWasmCallIndirectAdjunctSafepoint* adjunctSafepoint_;
 
  public:
   LIR_HEADER(WasmCall);
 
   LWasmCall(uint32_t numOperands, bool needsBoundsCheck,
             mozilla::Maybe<uint32_t> tableSize = mozilla::Nothing())
       : LVariadicInstruction(classOpcode, numOperands),
         needsBoundsCheck_(needsBoundsCheck),
-        tableSize_(tableSize) {
+        tableSize_(tableSize),
+        adjunctSafepoint_(nullptr) {
     this->setIsCall();
   }
 
   MWasmCall* mir() const { return mir_->toWasmCall(); }
 
   static bool isCallPreserved(AnyRegister reg) {
     // All MWasmCalls preserve the TLS register:
     //  - internal/indirect calls do by the internal wasm ABI
     //  - import calls do by explicitly saving/restoring at the callsite
     //  - builtin calls do because the TLS reg is non-volatile
     // See also CodeGeneratorShared::emitWasmCall.
     return !reg.isFloat() && reg.gpr() == WasmTlsReg;
   }
 
   bool needsBoundsCheck() const { return needsBoundsCheck_; }
   mozilla::Maybe<uint32_t> tableSize() const { return tableSize_; }
+  LWasmCallIndirectAdjunctSafepoint* adjunctSafepoint() const {
+    MOZ_ASSERT(adjunctSafepoint_ != nullptr);
+    return adjunctSafepoint_;
+  }
+  void setAdjunctSafepoint(LWasmCallIndirectAdjunctSafepoint* asp) {
+    adjunctSafepoint_ = asp;
+  }
 };
 
 class LWasmRegisterResult : public LInstructionHelper<1, 0, 0> {
  public:
   LIR_HEADER(WasmRegisterResult);
 
   LWasmRegisterResult() : LInstructionHelper(classOpcode) {}
 
--- a/js/src/jit/shared/Lowering-shared.cpp
+++ b/js/src/jit/shared/Lowering-shared.cpp
@@ -301,18 +301,17 @@ void LIRGeneratorShared::assignSafepoint
   osiPoint_ = new (alloc()) LOsiPoint(ins->safepoint(), postSnapshot);
 
   if (!lirGraph_.noteNeedsSafepoint(ins)) {
     abort(AbortReason::Alloc, "noteNeedsSafepoint failed");
     return;
   }
 }
 
-void LIRGeneratorShared::assignWasmSafepoint(LInstruction* ins,
-                                             MInstruction* mir) {
+void LIRGeneratorShared::assignWasmSafepoint(LInstruction* ins) {
   MOZ_ASSERT(!osiPoint_);
   MOZ_ASSERT(!ins->safepoint());
 
   ins->initSafepoint(alloc());
 
   if (!lirGraph_.noteNeedsSafepoint(ins)) {
     abort(AbortReason::Alloc, "noteNeedsSafepoint failed");
     return;
--- a/js/src/jit/shared/Lowering-shared.h
+++ b/js/src/jit/shared/Lowering-shared.h
@@ -344,17 +344,17 @@ class LIRGeneratorShared {
 
   // Marks this instruction as needing to call into either the VM or GC. This
   // function may build a snapshot that captures the result of its own
   // instruction, and as such, should generally be called after define*().
   void assignSafepoint(LInstruction* ins, MInstruction* mir,
                        BailoutKind kind = BailoutKind::DuringVMCall);
 
   // Marks this instruction as needing a wasm safepoint.
-  void assignWasmSafepoint(LInstruction* ins, MInstruction* mir);
+  void assignWasmSafepoint(LInstruction* ins);
 
   inline void lowerConstantDouble(double d, MInstruction* mir);
   inline void lowerConstantFloat32(float f, MInstruction* mir);
 
   bool canSpecializeWasmCompareAndSelect(MCompare::CompareType compTy,
                                          MIRType insTy);
   void lowerWasmCompareAndSelect(MWasmSelect* ins, MDefinition* lhs,
                                  MDefinition* rhs, MCompare::CompareType compTy,
--- a/js/src/jit/x86-shared/MacroAssembler-x86-shared.cpp
+++ b/js/src/jit/x86-shared/MacroAssembler-x86-shared.cpp
@@ -2136,9 +2136,20 @@ void MacroAssembler::copySignFloat32(Flo
     float keepSignMask = mozilla::BitwiseCast<float>(INT32_MIN);
     loadConstantFloat32(keepSignMask, scratch);
     vandps(rhs, scratch, scratch);
   }
 
   vorps(scratch, output, output);
 }
 
+void MacroAssembler::shiftIndex32AndAdd(Register indexTemp32, int shift,
+                                        Register pointer) {
+  if (IsShiftInScaleRange(shift)) {
+    computeEffectiveAddress(
+        BaseIndex(pointer, indexTemp32, ShiftToScale(shift)), pointer);
+    return;
+  }
+  lshift32(Imm32(shift), indexTemp32);
+  addPtr(indexTemp32, pointer);
+}
+
 //}}} check_macroassembler_style
--- a/js/src/wasm/WasmBCClass.h
+++ b/js/src/wasm/WasmBCClass.h
@@ -125,28 +125,28 @@ struct AccessCheck {
   bool omitAlignmentCheck;
   bool onlyPointerAlignment;
 };
 
 // Encapsulate all the information about a function call.
 struct FunctionCall {
   explicit FunctionCall(uint32_t lineOrBytecode)
       : lineOrBytecode(lineOrBytecode),
-        isInterModule(false),
+        restoreRegisterStateAndRealm(false),
         usesSystemAbi(false),
 #ifdef JS_CODEGEN_ARM
         hardFP(true),
 #endif
         frameAlignAdjustment(0),
         stackArgAreaSize(0) {
   }
 
   uint32_t lineOrBytecode;
   WasmABIArgGenerator abi;
-  bool isInterModule;
+  bool restoreRegisterStateAndRealm;
   bool usesSystemAbi;
 #ifdef JS_CODEGEN_ARM
   bool hardFP;
 #endif
   size_t frameAlignAdjustment;
   size_t stackArgAreaSize;
 };
 
@@ -908,28 +908,30 @@ struct BaseCompiler final {
 
   // Move return values to memory before returning, as appropriate
   void popStackReturnValues(const ResultType& resultType);
 
   //////////////////////////////////////////////////////////////////////
   //
   // Calls.
 
-  void beginCall(FunctionCall& call, UseABI useABI, InterModule interModule);
+  void beginCall(FunctionCall& call, UseABI useABI,
+                 RestoreRegisterStateAndRealm restoreRegisterStateAndRealm);
   void endCall(FunctionCall& call, size_t stackSpace);
   void startCallArgs(size_t stackArgAreaSizeUnaligned, FunctionCall* call);
   ABIArg reservePointerArgument(FunctionCall* call);
   void passArg(ValType type, const Stk& arg, FunctionCall* call);
   CodeOffset callDefinition(uint32_t funcIndex, const FunctionCall& call);
   CodeOffset callSymbolic(SymbolicAddress callee, const FunctionCall& call);
 
   // Precondition for the call*() methods: sync()
 
-  CodeOffset callIndirect(uint32_t funcTypeIndex, uint32_t tableIndex,
-                          const Stk& indexVal, const FunctionCall& call);
+  void callIndirect(uint32_t funcTypeIndex, uint32_t tableIndex,
+                    const Stk& indexVal, const FunctionCall& call,
+                    CodeOffset* fastCallOffset, CodeOffset* slowCallOffset);
   CodeOffset callImport(unsigned globalDataOffset, const FunctionCall& call);
   CodeOffset builtinCall(SymbolicAddress builtin, const FunctionCall& call);
   CodeOffset builtinInstanceMethodCall(const SymbolicAddressSignature& builtin,
                                        const ABIArg& instanceArg,
                                        const FunctionCall& call);
   [[nodiscard]] bool pushCallResults(const FunctionCall& call, ResultType type,
                                      const StackResultsLoc& loc);
 
--- a/js/src/wasm/WasmBCDefs.h
+++ b/js/src/wasm/WasmBCDefs.h
@@ -76,39 +76,41 @@ using IsKnownNotZero = bool;
 using IsUnsigned = bool;
 using IsRemainder = bool;
 using NeedsBoundsCheck = bool;
 using WantResult = bool;
 using ZeroOnOverflow = bool;
 
 class BaseStackFrame;
 
-// Two flags, useABI and interModule, control how calls are made.
+// Two flags, useABI and restoreRegisterStateAndRealm, control how calls are
+// made.
 //
 // UseABI::Wasm implies that the Tls/Heap/Global registers are nonvolatile,
-// except when InterModule::True is also set, when they are volatile.
+// except when RestoreRegisterStateAndRealm::True is also set, when they are
+// volatile.
 //
 // UseABI::Builtin implies that the Tls/Heap/Global registers are volatile.
-// In this case, we require InterModule::False.  The calling convention
-// is otherwise like UseABI::Wasm.
+// In this case, we require RestoreRegisterStateAndRealm::False.  The calling
+// convention is otherwise like UseABI::Wasm.
 //
 // UseABI::System implies that the Tls/Heap/Global registers are volatile.
 // Additionally, the parameter passing mechanism may be slightly different from
 // the UseABI::Wasm convention.
 //
 // When the Tls/Heap/Global registers are not volatile, the baseline compiler
 // will restore the Tls register from its save slot before the call, since the
 // baseline compiler uses the Tls register for other things.
 //
 // When those registers are volatile, the baseline compiler will reload them
 // after the call (it will restore the Tls register from the save slot and load
 // the other two from the Tls data).
 
 enum class UseABI { Wasm, Builtin, System };
-enum class InterModule { False = false, True = true };
+enum class RestoreRegisterStateAndRealm { False = false, True = true };
 enum class RhsDestOp { True = true };
 
 // Compiler configuration.
 //
 // The following internal configuration #defines are used.  The configuration is
 // partly below in this file, partly in WasmBCRegDefs.h.
 //
 // RABALDR_HAS_HEAPREG
--- a/js/src/wasm/WasmBaselineCompile.cpp
+++ b/js/src/wasm/WasmBaselineCompile.cpp
@@ -1085,21 +1085,25 @@ void BaseCompiler::shuffleStackResultsBe
 
   fr.popStackBeforeBranch(destHeight, stackResultBytes);
 }
 
 //////////////////////////////////////////////////////////////////////////////
 //
 // Function calls.
 
-void BaseCompiler::beginCall(FunctionCall& call, UseABI useABI,
-                             InterModule interModule) {
-  MOZ_ASSERT_IF(useABI == UseABI::Builtin, interModule == InterModule::False);
-
-  call.isInterModule = interModule == InterModule::True;
+void BaseCompiler::beginCall(
+    FunctionCall& call, UseABI useABI,
+    RestoreRegisterStateAndRealm restoreRegisterStateAndRealm) {
+  MOZ_ASSERT_IF(
+      useABI == UseABI::Builtin,
+      restoreRegisterStateAndRealm == RestoreRegisterStateAndRealm::False);
+
+  call.restoreRegisterStateAndRealm =
+      restoreRegisterStateAndRealm == RestoreRegisterStateAndRealm::True;
   call.usesSystemAbi = useABI == UseABI::System;
 
   if (call.usesSystemAbi) {
     // Call-outs need to use the appropriate system ABI.
 #if defined(JS_CODEGEN_ARM)
     call.hardFP = UseHardFpABI();
     call.abi.setUseHardFp(call.hardFP);
 #endif
@@ -1118,17 +1122,17 @@ void BaseCompiler::beginCall(FunctionCal
 
 void BaseCompiler::endCall(FunctionCall& call, size_t stackSpace) {
   size_t adjustment = call.stackArgAreaSize + call.frameAlignAdjustment;
   fr.freeArgAreaAndPopBytes(adjustment, stackSpace);
 
   MOZ_ASSERT(stackMapGenerator_.framePushedExcludingOutboundCallArgs.isSome());
   stackMapGenerator_.framePushedExcludingOutboundCallArgs.reset();
 
-  if (call.isInterModule) {
+  if (call.restoreRegisterStateAndRealm) {
     fr.loadTlsPtr(WasmTlsReg);
     masm.loadWasmPinnedRegsFromTls();
     masm.switchToWasmTlsRealm(ABINonArgReturnReg0, ABINonArgReturnReg1);
   } else if (call.usesSystemAbi) {
     // On x86 there are no pinned registers, so don't waste time
     // reloading the Tls.
 #ifndef JS_CODEGEN_X86
     fr.loadTlsPtr(WasmTlsReg);
@@ -1344,30 +1348,31 @@ CodeOffset BaseCompiler::callDefinition(
 CodeOffset BaseCompiler::callSymbolic(SymbolicAddress callee,
                                       const FunctionCall& call) {
   CallSiteDesc desc(call.lineOrBytecode, CallSiteDesc::Symbolic);
   return masm.call(desc, callee);
 }
 
 // Precondition: sync()
 
-CodeOffset BaseCompiler::callIndirect(uint32_t funcTypeIndex,
-                                      uint32_t tableIndex, const Stk& indexVal,
-                                      const FunctionCall& call) {
+void BaseCompiler::callIndirect(uint32_t funcTypeIndex, uint32_t tableIndex,
+                                const Stk& indexVal, const FunctionCall& call,
+                                CodeOffset* fastCallOffset,
+                                CodeOffset* slowCallOffset) {
   const TypeIdDesc& funcTypeId = moduleEnv_.typeIds[funcTypeIndex];
   MOZ_ASSERT(funcTypeId.kind() != TypeIdDescKind::None);
 
   const TableDesc& table = moduleEnv_.tables[tableIndex];
 
   loadI32(indexVal, RegI32(WasmTableCallIndexReg));
 
   CallSiteDesc desc(call.lineOrBytecode, CallSiteDesc::Indirect);
   CalleeDesc callee = CalleeDesc::wasmTable(table, funcTypeId);
-  return masm.wasmCallIndirect(desc, callee, NeedsBoundsCheck(true),
-                               mozilla::Nothing());
+  masm.wasmCallIndirect(desc, callee, NeedsBoundsCheck(true),
+                        mozilla::Nothing(), fastCallOffset, slowCallOffset);
 }
 
 // Precondition: sync()
 
 CodeOffset BaseCompiler::callImport(unsigned globalDataOffset,
                                     const FunctionCall& call) {
   CallSiteDesc desc(call.lineOrBytecode, CallSiteDesc::Import);
   CalleeDesc callee = CalleeDesc::import(globalDataOffset);
@@ -4314,17 +4319,18 @@ bool BaseCompiler::emitCall() {
   ResultType resultType(ResultType::Vector(funcType.results()));
   StackResultsLoc results;
   if (!pushStackResultsForCall(resultType, RegPtr(ABINonArgReg0), &results)) {
     return false;
   }
 
   FunctionCall baselineCall(lineOrBytecode);
   beginCall(baselineCall, UseABI::Wasm,
-            import ? InterModule::True : InterModule::False);
+            import ? RestoreRegisterStateAndRealm::True
+                   : RestoreRegisterStateAndRealm::False);
 
   if (!emitCallArgs(funcType.args(), results, &baselineCall,
                     CalleeOnStack::False)) {
     return false;
   }
 
   CodeOffset raOffset;
   if (import) {
@@ -4374,27 +4380,34 @@ bool BaseCompiler::emitCallIndirect() {
 
   ResultType resultType(ResultType::Vector(funcType.results()));
   StackResultsLoc results;
   if (!pushStackResultsForCall(resultType, RegPtr(ABINonArgReg0), &results)) {
     return false;
   }
 
   FunctionCall baselineCall(lineOrBytecode);
-  beginCall(baselineCall, UseABI::Wasm, InterModule::True);
+  // State and realm are restored as needed by by callIndirect (really by
+  // MacroAssembler::wasmCallIndirect).
+  beginCall(baselineCall, UseABI::Wasm, RestoreRegisterStateAndRealm::False);
 
   if (!emitCallArgs(funcType.args(), results, &baselineCall,
                     CalleeOnStack::True)) {
     return false;
   }
 
   const Stk& callee = peek(results.count());
-  CodeOffset raOffset =
-      callIndirect(funcTypeIndex, tableIndex, callee, baselineCall);
-  if (!createStackMap("emitCallIndirect", raOffset)) {
+  CodeOffset fastCallOffset;
+  CodeOffset slowCallOffset;
+  callIndirect(funcTypeIndex, tableIndex, callee, baselineCall, &fastCallOffset,
+               &slowCallOffset);
+  if (!createStackMap("emitCallIndirect", fastCallOffset)) {
+    return false;
+  }
+  if (!createStackMap("emitCallIndirect", slowCallOffset)) {
     return false;
   }
 
   popStackResultsAfterCall(results, stackArgBytes);
 
   endCall(baselineCall, stackArgBytes);
 
   popValueStackBy(numArgs);
@@ -4441,17 +4454,17 @@ bool BaseCompiler::emitUnaryMathBuiltinC
 
   ValTypeVector& signature = operandType == ValType::F32 ? SigF_ : SigD_;
   ValType retType = operandType;
   uint32_t numArgs = signature.length();
   size_t stackSpace = stackConsumed(numArgs);
   StackResultsLoc noStackResults;
 
   FunctionCall baselineCall(lineOrBytecode);
-  beginCall(baselineCall, UseABI::Builtin, InterModule::False);
+  beginCall(baselineCall, UseABI::Builtin, RestoreRegisterStateAndRealm::False);
 
   if (!emitCallArgs(signature, noStackResults, &baselineCall,
                     CalleeOnStack::False)) {
     return false;
   }
 
   CodeOffset raOffset = builtinCall(callee, baselineCall);
   if (!createStackMap("emitUnaryMathBuiltin[..]", raOffset)) {
@@ -5191,17 +5204,17 @@ bool BaseCompiler::emitInstanceCall(uint
   MOZ_ASSERT(argTypes[0] == MIRType::Pointer);
 
   sync();
 
   uint32_t numNonInstanceArgs = builtin.numArgs - 1 /* instance */;
   size_t stackSpace = stackConsumed(numNonInstanceArgs);
 
   FunctionCall baselineCall(lineOrBytecode);
-  beginCall(baselineCall, UseABI::System, InterModule::True);
+  beginCall(baselineCall, UseABI::System, RestoreRegisterStateAndRealm::True);
 
   ABIArg instanceArg = reservePointerArgument(&baselineCall);
 
   startCallArgs(StackArgAreaSizeUnaligned(builtin), &baselineCall);
   for (uint32_t i = 1; i < builtin.numArgs; i++) {
     ValType t;
     switch (argTypes[i]) {
       case MIRType::Int32:
--- a/js/src/wasm/WasmBuiltins.cpp
+++ b/js/src/wasm/WasmBuiltins.cpp
@@ -558,16 +558,18 @@ bool wasm::HandleThrow(JSContext* cx, Wa
       if (tryNote) {
         cx->clearPendingException();
         RootedAnyRef ref(cx, AnyRef::null());
         if (!BoxAnyRef(cx, exn, &ref)) {
           MOZ_ASSERT(cx->isThrowingOutOfMemory());
           continue;
         }
 
+        MOZ_ASSERT(iter.tls() == iter.instance()->tlsData());
+
         iter.tls()->pendingException = ref.get().asJSObject();
 
         rfe->kind = ResumeFromException::RESUME_WASM_CATCH;
         rfe->framePointer = (uint8_t*)iter.frame();
         rfe->tlsData = iter.instance()->tlsData();
 
         size_t offsetAdjustment = 0;
         rfe->stackPointer =
--- a/js/src/wasm/WasmCodegenTypes.h
+++ b/js/src/wasm/WasmCodegenTypes.h
@@ -404,23 +404,24 @@ class CallSiteDesc {
   uint32_t lineOrBytecode_ : LINE_OR_BYTECODE_BITS_SIZE;
   uint32_t kind_ : 3;
 
  public:
   static constexpr uint32_t MAX_LINE_OR_BYTECODE_VALUE =
       (1 << LINE_OR_BYTECODE_BITS_SIZE) - 1;
 
   enum Kind {
-    Func,        // pc-relative call to a specific function
-    Import,      // wasm import call
-    Indirect,    // wasm indirect call
-    Symbolic,    // call to a single symbolic callee
-    EnterFrame,  // call to a enter frame handler
-    LeaveFrame,  // call to a leave frame handler
-    Breakpoint   // call to instruction breakpoint
+    Func,          // pc-relative call to a specific function
+    Import,        // wasm import call
+    Indirect,      // dynamic callee called via register, context on stack
+    IndirectFast,  // dynamically determined to be same-instance
+    Symbolic,      // call to a single symbolic callee
+    EnterFrame,    // call to a enter frame handler
+    LeaveFrame,    // call to a leave frame handler
+    Breakpoint     // call to instruction breakpoint
   };
   CallSiteDesc() : lineOrBytecode_(0), kind_(0) {}
   explicit CallSiteDesc(Kind kind) : lineOrBytecode_(0), kind_(kind) {
     MOZ_ASSERT(kind == Kind(kind_));
   }
   CallSiteDesc(uint32_t lineOrBytecode, Kind kind)
       : lineOrBytecode_(lineOrBytecode), kind_(kind) {
     MOZ_ASSERT(kind == Kind(kind_));
--- a/js/src/wasm/WasmGenerator.cpp
+++ b/js/src/wasm/WasmGenerator.cpp
@@ -474,16 +474,17 @@ bool ModuleGenerator::linkCallSites() {
   for (; lastPatchedCallSite_ < metadataTier_->callSites.length();
        lastPatchedCallSite_++) {
     const CallSite& callSite = metadataTier_->callSites[lastPatchedCallSite_];
     const CallSiteTarget& target = callSiteTargets_[lastPatchedCallSite_];
     uint32_t callerOffset = callSite.returnAddressOffset();
     switch (callSite.kind()) {
       case CallSiteDesc::Import:
       case CallSiteDesc::Indirect:
+      case CallSiteDesc::IndirectFast:
       case CallSiteDesc::Symbolic:
         break;
       case CallSiteDesc::Func: {
         if (funcIsCompiled(target.funcIndex())) {
           uint32_t calleeOffset =
               funcCodeRange(target.funcIndex()).funcUncheckedCallEntry();
           if (InRange(callerOffset, calleeOffset)) {
             masm_.patchCall(callerOffset, calleeOffset);
--- a/js/src/wasm/WasmTypeDef.h
+++ b/js/src/wasm/WasmTypeDef.h
@@ -824,17 +824,17 @@ class TypeIdDesc {
   static bool isGlobal(const TypeDef& type);
 
   TypeIdDesc() : kind_(TypeIdDescKind::None), bits_(0) {}
   static TypeIdDesc global(const TypeDef& type, uint32_t globalDataOffset);
   static TypeIdDesc immediate(const TypeDef& type);
 
   bool isGlobal() const { return kind_ == TypeIdDescKind::Global; }
 
-  size_t immediate() const {
+  uint32_t immediate() const {
     MOZ_ASSERT(kind_ == TypeIdDescKind::Immediate);
     return bits_;
   }
   uint32_t globalDataOffset() const {
     MOZ_ASSERT(kind_ == TypeIdDescKind::Global);
     return bits_;
   }
 };