author | Lars T Hansen <lhansen@mozilla.com> |
Tue, 15 Feb 2022 07:03:29 +0000 | |
changeset 607744 | 9700c43270312e2d3a1159be5ec7f5f6503f0d79 |
parent 607743 | e644785b83a6b4117f60925e75842afa83539c8f |
child 607745 | f1feee3d48d17822bf99b467eeeff9cbcae78594 |
push id | 39292 |
push user | csabou@mozilla.com |
push date | Tue, 15 Feb 2022 21:00:51 +0000 |
treeherder | mozilla-central@950c1613c97d [default view] [failures only] |
perfherder | [talos] [build metrics] [platform microbench] (compared to previous push) |
reviewers | rhunt |
bugs | 1754377 |
milestone | 99.0a1 |
first release with | nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
|
last release without | nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
|
--- a/js/src/jit/CodeGenerator.cpp +++ b/js/src/jit/CodeGenerator.cpp @@ -8046,31 +8046,37 @@ void CodeGenerator::visitWasmCall(LWasmC // TLS and pinned regs. The only case where where we don't have to reload // the TLS and pinned regs is when the callee preserves them. bool reloadRegs = true; bool switchRealm = true; const wasm::CallSiteDesc& desc = mir->desc(); const wasm::CalleeDesc& callee = mir->callee(); CodeOffset retOffset; + CodeOffset secondRetOffset; switch (callee.which()) { case wasm::CalleeDesc::Func: retOffset = masm.call(desc, callee.funcIndex()); reloadRegs = false; switchRealm = false; break; case wasm::CalleeDesc::Import: retOffset = masm.wasmCallImport(desc, callee); break; case wasm::CalleeDesc::AsmJSTable: retOffset = masm.asmCallIndirect(desc, callee); break; case wasm::CalleeDesc::WasmTable: - retOffset = masm.wasmCallIndirect(desc, callee, lir->needsBoundsCheck(), - lir->tableSize()); + masm.wasmCallIndirect(desc, callee, lir->needsBoundsCheck(), + lir->tableSize(), &retOffset, &secondRetOffset); + // Register reloading and realm switching are handled dynamically inside + // wasmCallIndirect. There are two return offsets, one for each call + // instruction (fast path and slow path). + reloadRegs = false; + switchRealm = false; break; case wasm::CalleeDesc::Builtin: retOffset = masm.call(desc, callee.builtin()); reloadRegs = false; switchRealm = false; break; case wasm::CalleeDesc::BuiltinInstanceMethod: retOffset = masm.wasmCallBuiltinInstanceMethod( @@ -8080,20 +8086,28 @@ void CodeGenerator::visitWasmCall(LWasmC break; } // Note the assembler offset for the associated LSafePoint. markSafepointAt(retOffset.offset(), lir); // Now that all the outbound in-memory args are on the stack, note the // required lower boundary point of the associated StackMap. - lir->safepoint()->setFramePushedAtStackMapBase( - masm.framePushed() - mir->stackArgAreaSizeUnaligned()); + uint32_t framePushedAtStackMapBase = + masm.framePushed() - mir->stackArgAreaSizeUnaligned(); + lir->safepoint()->setFramePushedAtStackMapBase(framePushedAtStackMapBase); MOZ_ASSERT(!lir->safepoint()->isWasmTrap()); + // Note the assembler offset and framePushed for use by the adjunct + // LSafePoint, see visitor for LWasmCallIndirectAdjunctSafepoint below. + if (callee.which() == wasm::CalleeDesc::WasmTable) { + lir->adjunctSafepoint()->recordSafepointInfo(secondRetOffset, + framePushedAtStackMapBase); + } + if (reloadRegs) { masm.loadPtr(Address(masm.getStackPointer(), WasmCallerTlsOffsetBeforeCall), WasmTlsReg); masm.loadWasmPinnedRegsFromTls(); if (switchRealm) { masm.switchToWasmTlsRealm(ABINonArgReturnReg0, ABINonArgReturnReg1); } } else { @@ -8117,16 +8131,23 @@ void CodeGenerator::visitWasmCall(LWasmC tryNote.framePushed = masm.framePushed(); // Required by WasmTryNote. MOZ_ASSERT(tryNote.end > tryNote.begin); } #endif } +void CodeGenerator::visitWasmCallIndirectAdjunctSafepoint( + LWasmCallIndirectAdjunctSafepoint* lir) { + markSafepointAt(lir->safepointLocation().offset(), lir); + lir->safepoint()->setFramePushedAtStackMapBase( + lir->framePushedAtStackMapBase()); +} + void CodeGenerator::visitWasmLoadSlot(LWasmLoadSlot* ins) { MIRType type = ins->type(); Register container = ToRegister(ins->containerRef()); Address addr(container, ins->offset()); AnyRegister dst = ToAnyRegister(ins->output()); switch (type) { case MIRType::Int32:
--- a/js/src/jit/LIROps.yaml +++ b/js/src/jit/LIROps.yaml @@ -2762,16 +2762,19 @@ - name: WasmStackArgI64 operands: arg: Int64 mir_op: WasmStackArg - name: WasmNullConstant result_type: WordSized +- name: WasmCallIndirectAdjunctSafepoint + gen_boilerplate: false + - name: WasmCall gen_boilerplate: false - name: WasmRegisterResult gen_boilerplate: false - name: WasmRegisterPairResult gen_boilerplate: false
--- a/js/src/jit/Lowering.cpp +++ b/js/src/jit/Lowering.cpp @@ -3128,18 +3128,17 @@ void LIRGenerator::visitInterruptCheck(M add(lir, ins); assignSafepoint(lir, ins); } void LIRGenerator::visitWasmInterruptCheck(MWasmInterruptCheck* ins) { auto* lir = new (alloc()) LWasmInterruptCheck(useRegisterAtStart(ins->tlsPtr())); add(lir, ins); - - assignWasmSafepoint(lir, ins); + assignWasmSafepoint(lir); } void LIRGenerator::visitWasmTrap(MWasmTrap* ins) { add(new (alloc()) LWasmTrap, ins); } void LIRGenerator::visitWasmReinterpret(MWasmReinterpret* ins) { if (ins->type() == MIRType::Int64) { @@ -5383,18 +5382,28 @@ void LIRGenerator::visitWasmCall(MWasmCa if (ins->callee().isTable()) { MDefinition* index = ins->getOperand(ins->numArgs()); lir->setOperand(ins->numArgs(), useFixedAtStart(index, WasmTableCallIndexReg)); } add(lir, ins); - - assignWasmSafepoint(lir, ins); + assignWasmSafepoint(lir); + + // WasmCall with WasmTable has two call instructions, and they both need a + // safepoint associated with them. Create a second safepoint here; the node + // otherwise does nothing, and codegen for it only marks the safepoint at the + // node. + if (ins->callee().which() == wasm::CalleeDesc::WasmTable) { + auto* adjunctSafepoint = new (alloc()) LWasmCallIndirectAdjunctSafepoint(); + add(adjunctSafepoint); + assignWasmSafepoint(adjunctSafepoint); + lir->setAdjunctSafepoint(adjunctSafepoint); + } } void LIRGenerator::visitSetDOMProperty(MSetDOMProperty* ins) { MDefinition* val = ins->value(); Register cxReg, objReg, privReg, valueReg; GetTempRegForIntArg(0, 0, &cxReg); GetTempRegForIntArg(1, 0, &objReg);
--- a/js/src/jit/MacroAssembler.cpp +++ b/js/src/jit/MacroAssembler.cpp @@ -3880,18 +3880,18 @@ CodeOffset MacroAssembler::wasmCallBuilt return ret; } CodeOffset MacroAssembler::asmCallIndirect(const wasm::CallSiteDesc& desc, const wasm::CalleeDesc& callee) { MOZ_ASSERT(callee.which() == wasm::CalleeDesc::AsmJSTable); - Register scratch = WasmTableCallScratchReg0; - Register index = WasmTableCallIndexReg; + const Register scratch = WasmTableCallScratchReg0; + const Register index = WasmTableCallIndexReg; // Optimization opportunity: when offsetof(FunctionTableElem, code) == 0, as // it is at present, we can probably generate better code here by folding // the address computation into the load. static_assert(sizeof(wasm::FunctionTableElem) == 8 || sizeof(wasm::FunctionTableElem) == 16, "elements of function tables are two words"); @@ -3908,82 +3908,159 @@ CodeOffset MacroAssembler::asmCallIndire loadPtr(Address(scratch, offsetof(wasm::FunctionTableElem, code)), scratch); storePtr(WasmTlsReg, Address(getStackPointer(), WasmCallerTlsOffsetBeforeCall)); storePtr(WasmTlsReg, Address(getStackPointer(), WasmCalleeTlsOffsetBeforeCall)); return call(desc, scratch); } -CodeOffset MacroAssembler::wasmCallIndirect( - const wasm::CallSiteDesc& desc, const wasm::CalleeDesc& callee, - bool needsBoundsCheck, mozilla::Maybe<uint32_t> tableSize) { +// In principle, call_indirect requires an expensive context switch to the +// callee's instance and realm before the call and an almost equally expensive +// switch back to the caller's ditto after. However, if the caller's tls is the +// same as the callee's tls then no context switch is required, and it only +// takes a compare-and-branch at run-time to test this - all values are in +// registers already. We therefore generate two call paths, one for the fast +// call without the context switch (which additionally avoids a null check) and +// one for the slow call with the context switch. + +void MacroAssembler::wasmCallIndirect(const wasm::CallSiteDesc& desc, + const wasm::CalleeDesc& callee, + bool needsBoundsCheck, + mozilla::Maybe<uint32_t> tableSize, + CodeOffset* fastCallOffset, + CodeOffset* slowCallOffset) { + static_assert(sizeof(wasm::FunctionTableElem) == 2 * sizeof(void*), + "Exactly two pointers or index scaling won't work correctly"); MOZ_ASSERT(callee.which() == wasm::CalleeDesc::WasmTable); - Register scratch = WasmTableCallScratchReg0; - Register index = WasmTableCallIndexReg; - - // Write the functype-id into the ABI functype-id register. - wasm::TypeIdDesc funcTypeId = callee.wasmTableSigId(); - switch (funcTypeId.kind()) { - case wasm::TypeIdDescKind::Global: - loadWasmGlobalPtr(funcTypeId.globalDataOffset(), WasmTableCallSigReg); - break; - case wasm::TypeIdDescKind::Immediate: - move32(Imm32(funcTypeId.immediate()), WasmTableCallSigReg); - break; - case wasm::TypeIdDescKind::None: - break; - } - + const int shift = sizeof(wasm::FunctionTableElem) == 8 ? 3 : 4; wasm::BytecodeOffset trapOffset(desc.lineOrBytecode()); - - // WebAssembly throws if the index is out-of-bounds. + const Register calleeScratch = WasmTableCallScratchReg0; + const Register index = WasmTableCallIndexReg; + + // Check the table index and throw if out-of-bounds. + // + // Frequently the table size is known, so optimize for that. Otherwise + // compare with a memory operand when that's possible. (There's little sense + // in hoisting the load of the bound into a register at a higher level and + // reusing that register, because a hoisted value would either have to be + // spilled and re-loaded before the next call_indirect, or would be abandoned + // because we could not trust that a hoisted value would not have changed.) + if (needsBoundsCheck) { Label ok; if (tableSize.isSome()) { branch32(Assembler::Condition::Below, index, Imm32(*tableSize), &ok); } else { branch32(Assembler::Condition::Above, Address(WasmTlsReg, offsetof(wasm::TlsData, globalArea) + callee.tableLengthGlobalDataOffset()), index, &ok); } wasmTrap(wasm::Trap::OutOfBounds, trapOffset); bind(&ok); } - // Load the base pointer of the table. - loadWasmGlobalPtr(callee.tableFunctionBaseGlobalDataOffset(), scratch); - - // Load the callee from the table. - if (sizeof(wasm::FunctionTableElem) == 8) { - computeEffectiveAddress(BaseIndex(scratch, index, TimesEight), scratch); - } else { - lshift32(Imm32(4), index); - addPtr(index, scratch); + // Write the functype-id into the ABI functype-id register. + + const wasm::TypeIdDesc funcTypeId = callee.wasmTableSigId(); + switch (funcTypeId.kind()) { + case wasm::TypeIdDescKind::Global: + loadWasmGlobalPtr(funcTypeId.globalDataOffset(), WasmTableCallSigReg); + break; + case wasm::TypeIdDescKind::Immediate: + move32(Imm32(funcTypeId.immediate()), WasmTableCallSigReg); + break; + case wasm::TypeIdDescKind::None: + break; } + // Load the base pointer of the table and compute the address of the callee in + // the table. + + loadWasmGlobalPtr(callee.tableFunctionBaseGlobalDataOffset(), calleeScratch); + shiftIndex32AndAdd(index, shift, calleeScratch); + + // Load the callee tls and decide whether to take the fast path or the slow + // path. + + Label fastCall; + Label done; + const Register newTlsTemp = WasmTableCallScratchReg1; + loadPtr(Address(calleeScratch, offsetof(wasm::FunctionTableElem, tls)), + newTlsTemp); + branchPtr(Assembler::Equal, WasmTlsReg, newTlsTemp, &fastCall); + + // Slow path: Save context, check for null, setup new context, call, restore + // context. + // + // TODO: The slow path could usefully be out-of-line and the test above would + // just fall through to the fast path. This keeps the fast-path code dense, + // and has correct static prediction for the branch (forward conditional + // branches predicted not taken, normally). + storePtr(WasmTlsReg, Address(getStackPointer(), WasmCallerTlsOffsetBeforeCall)); - loadPtr(Address(scratch, offsetof(wasm::FunctionTableElem, tls)), WasmTlsReg); + movePtr(newTlsTemp, WasmTlsReg); storePtr(WasmTlsReg, Address(getStackPointer(), WasmCalleeTlsOffsetBeforeCall)); + // TODO: The null test cannot quite go away because we need a valid(ish) tls + // to load the target state and switch realms. A couple of possibilities: + // + // - We could have a dummy tls in the null tls slots, and then catch the + // signal when we later jump to null. Not clear what the implications would + // be for the realm switch, since *cx would already be updated. + // + // - If the newTls is null, then the loading of the HeapReg from it (except on + // x86-32) will cause a trap, and that trap will happen before the realm + // switch, and that may be good enough. + Label nonNull; branchTestPtr(Assembler::NonZero, WasmTlsReg, WasmTlsReg, &nonNull); wasmTrap(wasm::Trap::IndirectCallToNull, trapOffset); bind(&nonNull); loadWasmPinnedRegsFromTls(); switchToWasmTlsRealm(index, WasmTableCallScratchReg1); - loadPtr(Address(scratch, offsetof(wasm::FunctionTableElem, code)), scratch); - - return call(desc, scratch); + loadPtr(Address(calleeScratch, offsetof(wasm::FunctionTableElem, code)), + calleeScratch); + + *slowCallOffset = call(desc, calleeScratch); + + // Restore registers and realm and join up with the fast path. + + loadPtr(Address(getStackPointer(), WasmCallerTlsOffsetBeforeCall), + WasmTlsReg); + loadWasmPinnedRegsFromTls(); + switchToWasmTlsRealm(ABINonArgReturnReg0, ABINonArgReturnReg1); + jump(&done); + + // Fast path: just load the code pointer and go. The tls and heap register + // are the same as in the caller, and nothing will be null. + // + // (In particular, the code pointer will not be null: if it were, the tls + // would have been null, and then it would not have been equivalent to our + // current tls. So no null check is needed on the fast path.) + + bind(&fastCall); + + loadPtr(Address(calleeScratch, offsetof(wasm::FunctionTableElem, code)), + calleeScratch); + + // We use a different type of call site for the fast call since the Tls slots + // in the frame do not have valid values. + + wasm::CallSiteDesc newDesc(desc.lineOrBytecode(), + wasm::CallSiteDesc::IndirectFast); + *fastCallOffset = call(newDesc, calleeScratch); + + bind(&done); } void MacroAssembler::nopPatchableToCall(const wasm::CallSiteDesc& desc) { CodeOffset offset = nopPatchableToCall(); append(desc, offset); } void MacroAssembler::emitPreBarrierFastPath(JSRuntime* rt, MIRType type,
--- a/js/src/jit/MacroAssembler.h +++ b/js/src/jit/MacroAssembler.h @@ -3793,43 +3793,57 @@ class MacroAssembler : public MacroAssem void loadWasmGlobalPtr(uint32_t globalDataOffset, Register dest); // This function takes care of loading the callee's TLS and pinned regs but // it is the caller's responsibility to save/restore TLS or pinned regs. CodeOffset wasmCallImport(const wasm::CallSiteDesc& desc, const wasm::CalleeDesc& callee); - // WasmTableCallIndexReg must contain the index of the indirect call. - // This is for wasm calls only. - CodeOffset wasmCallIndirect(const wasm::CallSiteDesc& desc, - const wasm::CalleeDesc& callee, - bool needsBoundsCheck, - mozilla::Maybe<uint32_t> tableSize); + // WasmTableCallIndexReg must contain the index of the indirect call. This is + // for wasm calls only. + // + // Indirect calls use a dual-path mechanism where a run-time test determines + // whether a context switch is needed (slow path) or not (fast path). This + // gives rise to two call instructions, both of which need safe points. As + // per normal, the call offsets are the code offsets at the end of the call + // instructions (the return points). + void wasmCallIndirect(const wasm::CallSiteDesc& desc, + const wasm::CalleeDesc& callee, bool needsBoundsCheck, + mozilla::Maybe<uint32_t> tableSize, + CodeOffset* fastCallOffset, CodeOffset* slowCallOffset); // WasmTableCallIndexReg must contain the index of the indirect call. // This is for asm.js calls only. CodeOffset asmCallIndirect(const wasm::CallSiteDesc& desc, const wasm::CalleeDesc& callee); // This function takes care of loading the pointer to the current instance // as the implicit first argument. It preserves TLS and pinned registers. // (TLS & pinned regs are non-volatile registers in the system ABI). CodeOffset wasmCallBuiltinInstanceMethod(const wasm::CallSiteDesc& desc, const ABIArg& instanceArg, wasm::SymbolicAddress builtin, wasm::FailureMode failureMode); + // Compute ptr += (indexTemp32 << shift) where shift can be any value < 32. + // May destroy indexTemp32. The value of indexTemp32 must be positive, and it + // is implementation-defined what happens if bits are lost or the value + // becomes negative through the shift. On 64-bit systems, the high 32 bits of + // indexTemp32 must be zero, not garbage. + void shiftIndex32AndAdd(Register indexTemp32, int shift, + Register pointer) PER_SHARED_ARCH; + // The System ABI frequently states that the high bits of a 64-bit register // that holds a 32-bit return value are unpredictable, and C++ compilers will // indeed generate code that leaves garbage in the upper bits. // // Adjust the contents of the 64-bit register `r` to conform to our internal // convention, which requires predictable high bits. In practice, this means - // that the 32-bit valuewill be zero-extended or sign-extended to 64 bits as + // that the 32-bit value will be zero-extended or sign-extended to 64 bits as // appropriate for the platform. void widenInt32(Register r) DEFINED_ON(arm64, x64, mips64); // As enterFakeExitFrame(), but using register conventions appropriate for // wasm stubs. void enterFakeExitFrameForWasm(Register cxreg, Register scratch, ExitFrameType type) PER_SHARED_ARCH;
--- a/js/src/jit/arm/MacroAssembler-arm.cpp +++ b/js/src/jit/arm/MacroAssembler-arm.cpp @@ -5971,16 +5971,27 @@ void MacroAssembler::nearbyIntFloat32(Ro MOZ_CRASH("not supported on this platform"); } void MacroAssembler::copySignDouble(FloatRegister lhs, FloatRegister rhs, FloatRegister output) { MOZ_CRASH("not supported on this platform"); } +void MacroAssembler::shiftIndex32AndAdd(Register indexTemp32, int shift, + Register pointer) { + if (IsShiftInScaleRange(shift)) { + computeEffectiveAddress( + BaseIndex(pointer, indexTemp32, ShiftToScale(shift)), pointer); + return; + } + lshift32(Imm32(shift), indexTemp32); + addPtr(indexTemp32, pointer); +} + //}}} check_macroassembler_style void MacroAssemblerARM::wasmTruncateToInt32(FloatRegister input, Register output, MIRType fromType, bool isUnsigned, bool isSaturating, Label* oolEntry) { ScratchDoubleScope scratchScope(asMasm()); ScratchRegisterScope scratchReg(asMasm());
--- a/js/src/jit/arm64/MacroAssembler-arm64.cpp +++ b/js/src/jit/arm64/MacroAssembler-arm64.cpp @@ -3138,12 +3138,18 @@ void MacroAssembler::copySignFloat32(Flo moveFloat32(lhs, output); } bit(ARMFPRegister(output.encoding(), vixl::VectorFormat::kFormat8B), ARMFPRegister(rhs.encoding(), vixl::VectorFormat::kFormat8B), ARMFPRegister(scratch.encoding(), vixl::VectorFormat::kFormat8B)); } +void MacroAssembler::shiftIndex32AndAdd(Register indexTemp32, int shift, + Register pointer) { + Add(ARMRegister(pointer, 64), ARMRegister(pointer, 64), + Operand(ARMRegister(indexTemp32, 64), vixl::LSL, shift)); +} + //}}} check_macroassembler_style } // namespace jit } // namespace js
--- a/js/src/jit/mips-shared/MacroAssembler-mips-shared.cpp +++ b/js/src/jit/mips-shared/MacroAssembler-mips-shared.cpp @@ -3340,9 +3340,20 @@ void MacroAssembler::nearbyIntFloat32(Ro MOZ_CRASH("not supported on this platform"); } void MacroAssembler::copySignDouble(FloatRegister lhs, FloatRegister rhs, FloatRegister output) { MOZ_CRASH("not supported on this platform"); } +void MacroAssembler::shiftIndex32AndAdd(Register indexTemp32, int shift, + Register pointer) { + if (IsShiftInScaleRange(shift)) { + computeEffectiveAddress( + BaseIndex(pointer, indexTemp32, ShiftToScale(shift)), pointer); + return; + } + lshift32(Imm32(shift), indexTemp32); + addPtr(indexTemp32, pointer); +} + //}}} check_macroassembler_style
--- a/js/src/jit/shared/LIR-shared.h +++ b/js/src/jit/shared/LIR-shared.h @@ -3159,44 +3159,86 @@ class LWasmDerivedPointer : public LInst class LWasmParameterI64 : public LInstructionHelper<INT64_PIECES, 0, 0> { public: LIR_HEADER(WasmParameterI64); LWasmParameterI64() : LInstructionHelper(classOpcode) {} }; +// This is used only with LWasmCall. +class LWasmCallIndirectAdjunctSafepoint : public LInstructionHelper<0, 0, 0> { + CodeOffset offs_; + uint32_t framePushedAtStackMapBase_; + + public: + LIR_HEADER(WasmCallIndirectAdjunctSafepoint); + + LWasmCallIndirectAdjunctSafepoint() + : LInstructionHelper(classOpcode), + offs_(0), + framePushedAtStackMapBase_(0) {} + + CodeOffset safepointLocation() const { + MOZ_ASSERT(offs_.offset() != 0); + return offs_; + } + uint32_t framePushedAtStackMapBase() const { + MOZ_ASSERT(offs_.offset() != 0); + return framePushedAtStackMapBase_; + } + void recordSafepointInfo(CodeOffset offs, uint32_t framePushed) { + offs_ = offs; + framePushedAtStackMapBase_ = framePushed; + } +}; + +// LWasmCall may be generated into two function calls in the case of +// call_indirect, one for the fast path and one for the slow path. In that +// case, the node carries a pointer to a companion node, the "adjunct +// safepoint", representing the safepoint for the second of the two calls. The +// dual-call construction is only meaningful for wasm because wasm has no +// invalidation of code; this is not a pattern to be used generally. class LWasmCall : public LVariadicInstruction<0, 0> { bool needsBoundsCheck_; mozilla::Maybe<uint32_t> tableSize_; + LWasmCallIndirectAdjunctSafepoint* adjunctSafepoint_; public: LIR_HEADER(WasmCall); LWasmCall(uint32_t numOperands, bool needsBoundsCheck, mozilla::Maybe<uint32_t> tableSize = mozilla::Nothing()) : LVariadicInstruction(classOpcode, numOperands), needsBoundsCheck_(needsBoundsCheck), - tableSize_(tableSize) { + tableSize_(tableSize), + adjunctSafepoint_(nullptr) { this->setIsCall(); } MWasmCall* mir() const { return mir_->toWasmCall(); } static bool isCallPreserved(AnyRegister reg) { // All MWasmCalls preserve the TLS register: // - internal/indirect calls do by the internal wasm ABI // - import calls do by explicitly saving/restoring at the callsite // - builtin calls do because the TLS reg is non-volatile // See also CodeGeneratorShared::emitWasmCall. return !reg.isFloat() && reg.gpr() == WasmTlsReg; } bool needsBoundsCheck() const { return needsBoundsCheck_; } mozilla::Maybe<uint32_t> tableSize() const { return tableSize_; } + LWasmCallIndirectAdjunctSafepoint* adjunctSafepoint() const { + MOZ_ASSERT(adjunctSafepoint_ != nullptr); + return adjunctSafepoint_; + } + void setAdjunctSafepoint(LWasmCallIndirectAdjunctSafepoint* asp) { + adjunctSafepoint_ = asp; + } }; class LWasmRegisterResult : public LInstructionHelper<1, 0, 0> { public: LIR_HEADER(WasmRegisterResult); LWasmRegisterResult() : LInstructionHelper(classOpcode) {}
--- a/js/src/jit/shared/Lowering-shared.cpp +++ b/js/src/jit/shared/Lowering-shared.cpp @@ -301,18 +301,17 @@ void LIRGeneratorShared::assignSafepoint osiPoint_ = new (alloc()) LOsiPoint(ins->safepoint(), postSnapshot); if (!lirGraph_.noteNeedsSafepoint(ins)) { abort(AbortReason::Alloc, "noteNeedsSafepoint failed"); return; } } -void LIRGeneratorShared::assignWasmSafepoint(LInstruction* ins, - MInstruction* mir) { +void LIRGeneratorShared::assignWasmSafepoint(LInstruction* ins) { MOZ_ASSERT(!osiPoint_); MOZ_ASSERT(!ins->safepoint()); ins->initSafepoint(alloc()); if (!lirGraph_.noteNeedsSafepoint(ins)) { abort(AbortReason::Alloc, "noteNeedsSafepoint failed"); return;
--- a/js/src/jit/shared/Lowering-shared.h +++ b/js/src/jit/shared/Lowering-shared.h @@ -344,17 +344,17 @@ class LIRGeneratorShared { // Marks this instruction as needing to call into either the VM or GC. This // function may build a snapshot that captures the result of its own // instruction, and as such, should generally be called after define*(). void assignSafepoint(LInstruction* ins, MInstruction* mir, BailoutKind kind = BailoutKind::DuringVMCall); // Marks this instruction as needing a wasm safepoint. - void assignWasmSafepoint(LInstruction* ins, MInstruction* mir); + void assignWasmSafepoint(LInstruction* ins); inline void lowerConstantDouble(double d, MInstruction* mir); inline void lowerConstantFloat32(float f, MInstruction* mir); bool canSpecializeWasmCompareAndSelect(MCompare::CompareType compTy, MIRType insTy); void lowerWasmCompareAndSelect(MWasmSelect* ins, MDefinition* lhs, MDefinition* rhs, MCompare::CompareType compTy,
--- a/js/src/jit/x86-shared/MacroAssembler-x86-shared.cpp +++ b/js/src/jit/x86-shared/MacroAssembler-x86-shared.cpp @@ -2136,9 +2136,20 @@ void MacroAssembler::copySignFloat32(Flo float keepSignMask = mozilla::BitwiseCast<float>(INT32_MIN); loadConstantFloat32(keepSignMask, scratch); vandps(rhs, scratch, scratch); } vorps(scratch, output, output); } +void MacroAssembler::shiftIndex32AndAdd(Register indexTemp32, int shift, + Register pointer) { + if (IsShiftInScaleRange(shift)) { + computeEffectiveAddress( + BaseIndex(pointer, indexTemp32, ShiftToScale(shift)), pointer); + return; + } + lshift32(Imm32(shift), indexTemp32); + addPtr(indexTemp32, pointer); +} + //}}} check_macroassembler_style
--- a/js/src/wasm/WasmBCClass.h +++ b/js/src/wasm/WasmBCClass.h @@ -125,28 +125,28 @@ struct AccessCheck { bool omitAlignmentCheck; bool onlyPointerAlignment; }; // Encapsulate all the information about a function call. struct FunctionCall { explicit FunctionCall(uint32_t lineOrBytecode) : lineOrBytecode(lineOrBytecode), - isInterModule(false), + restoreRegisterStateAndRealm(false), usesSystemAbi(false), #ifdef JS_CODEGEN_ARM hardFP(true), #endif frameAlignAdjustment(0), stackArgAreaSize(0) { } uint32_t lineOrBytecode; WasmABIArgGenerator abi; - bool isInterModule; + bool restoreRegisterStateAndRealm; bool usesSystemAbi; #ifdef JS_CODEGEN_ARM bool hardFP; #endif size_t frameAlignAdjustment; size_t stackArgAreaSize; }; @@ -908,28 +908,30 @@ struct BaseCompiler final { // Move return values to memory before returning, as appropriate void popStackReturnValues(const ResultType& resultType); ////////////////////////////////////////////////////////////////////// // // Calls. - void beginCall(FunctionCall& call, UseABI useABI, InterModule interModule); + void beginCall(FunctionCall& call, UseABI useABI, + RestoreRegisterStateAndRealm restoreRegisterStateAndRealm); void endCall(FunctionCall& call, size_t stackSpace); void startCallArgs(size_t stackArgAreaSizeUnaligned, FunctionCall* call); ABIArg reservePointerArgument(FunctionCall* call); void passArg(ValType type, const Stk& arg, FunctionCall* call); CodeOffset callDefinition(uint32_t funcIndex, const FunctionCall& call); CodeOffset callSymbolic(SymbolicAddress callee, const FunctionCall& call); // Precondition for the call*() methods: sync() - CodeOffset callIndirect(uint32_t funcTypeIndex, uint32_t tableIndex, - const Stk& indexVal, const FunctionCall& call); + void callIndirect(uint32_t funcTypeIndex, uint32_t tableIndex, + const Stk& indexVal, const FunctionCall& call, + CodeOffset* fastCallOffset, CodeOffset* slowCallOffset); CodeOffset callImport(unsigned globalDataOffset, const FunctionCall& call); CodeOffset builtinCall(SymbolicAddress builtin, const FunctionCall& call); CodeOffset builtinInstanceMethodCall(const SymbolicAddressSignature& builtin, const ABIArg& instanceArg, const FunctionCall& call); [[nodiscard]] bool pushCallResults(const FunctionCall& call, ResultType type, const StackResultsLoc& loc);
--- a/js/src/wasm/WasmBCDefs.h +++ b/js/src/wasm/WasmBCDefs.h @@ -76,39 +76,41 @@ using IsKnownNotZero = bool; using IsUnsigned = bool; using IsRemainder = bool; using NeedsBoundsCheck = bool; using WantResult = bool; using ZeroOnOverflow = bool; class BaseStackFrame; -// Two flags, useABI and interModule, control how calls are made. +// Two flags, useABI and restoreRegisterStateAndRealm, control how calls are +// made. // // UseABI::Wasm implies that the Tls/Heap/Global registers are nonvolatile, -// except when InterModule::True is also set, when they are volatile. +// except when RestoreRegisterStateAndRealm::True is also set, when they are +// volatile. // // UseABI::Builtin implies that the Tls/Heap/Global registers are volatile. -// In this case, we require InterModule::False. The calling convention -// is otherwise like UseABI::Wasm. +// In this case, we require RestoreRegisterStateAndRealm::False. The calling +// convention is otherwise like UseABI::Wasm. // // UseABI::System implies that the Tls/Heap/Global registers are volatile. // Additionally, the parameter passing mechanism may be slightly different from // the UseABI::Wasm convention. // // When the Tls/Heap/Global registers are not volatile, the baseline compiler // will restore the Tls register from its save slot before the call, since the // baseline compiler uses the Tls register for other things. // // When those registers are volatile, the baseline compiler will reload them // after the call (it will restore the Tls register from the save slot and load // the other two from the Tls data). enum class UseABI { Wasm, Builtin, System }; -enum class InterModule { False = false, True = true }; +enum class RestoreRegisterStateAndRealm { False = false, True = true }; enum class RhsDestOp { True = true }; // Compiler configuration. // // The following internal configuration #defines are used. The configuration is // partly below in this file, partly in WasmBCRegDefs.h. // // RABALDR_HAS_HEAPREG
--- a/js/src/wasm/WasmBaselineCompile.cpp +++ b/js/src/wasm/WasmBaselineCompile.cpp @@ -1085,21 +1085,25 @@ void BaseCompiler::shuffleStackResultsBe fr.popStackBeforeBranch(destHeight, stackResultBytes); } ////////////////////////////////////////////////////////////////////////////// // // Function calls. -void BaseCompiler::beginCall(FunctionCall& call, UseABI useABI, - InterModule interModule) { - MOZ_ASSERT_IF(useABI == UseABI::Builtin, interModule == InterModule::False); - - call.isInterModule = interModule == InterModule::True; +void BaseCompiler::beginCall( + FunctionCall& call, UseABI useABI, + RestoreRegisterStateAndRealm restoreRegisterStateAndRealm) { + MOZ_ASSERT_IF( + useABI == UseABI::Builtin, + restoreRegisterStateAndRealm == RestoreRegisterStateAndRealm::False); + + call.restoreRegisterStateAndRealm = + restoreRegisterStateAndRealm == RestoreRegisterStateAndRealm::True; call.usesSystemAbi = useABI == UseABI::System; if (call.usesSystemAbi) { // Call-outs need to use the appropriate system ABI. #if defined(JS_CODEGEN_ARM) call.hardFP = UseHardFpABI(); call.abi.setUseHardFp(call.hardFP); #endif @@ -1118,17 +1122,17 @@ void BaseCompiler::beginCall(FunctionCal void BaseCompiler::endCall(FunctionCall& call, size_t stackSpace) { size_t adjustment = call.stackArgAreaSize + call.frameAlignAdjustment; fr.freeArgAreaAndPopBytes(adjustment, stackSpace); MOZ_ASSERT(stackMapGenerator_.framePushedExcludingOutboundCallArgs.isSome()); stackMapGenerator_.framePushedExcludingOutboundCallArgs.reset(); - if (call.isInterModule) { + if (call.restoreRegisterStateAndRealm) { fr.loadTlsPtr(WasmTlsReg); masm.loadWasmPinnedRegsFromTls(); masm.switchToWasmTlsRealm(ABINonArgReturnReg0, ABINonArgReturnReg1); } else if (call.usesSystemAbi) { // On x86 there are no pinned registers, so don't waste time // reloading the Tls. #ifndef JS_CODEGEN_X86 fr.loadTlsPtr(WasmTlsReg); @@ -1344,30 +1348,31 @@ CodeOffset BaseCompiler::callDefinition( CodeOffset BaseCompiler::callSymbolic(SymbolicAddress callee, const FunctionCall& call) { CallSiteDesc desc(call.lineOrBytecode, CallSiteDesc::Symbolic); return masm.call(desc, callee); } // Precondition: sync() -CodeOffset BaseCompiler::callIndirect(uint32_t funcTypeIndex, - uint32_t tableIndex, const Stk& indexVal, - const FunctionCall& call) { +void BaseCompiler::callIndirect(uint32_t funcTypeIndex, uint32_t tableIndex, + const Stk& indexVal, const FunctionCall& call, + CodeOffset* fastCallOffset, + CodeOffset* slowCallOffset) { const TypeIdDesc& funcTypeId = moduleEnv_.typeIds[funcTypeIndex]; MOZ_ASSERT(funcTypeId.kind() != TypeIdDescKind::None); const TableDesc& table = moduleEnv_.tables[tableIndex]; loadI32(indexVal, RegI32(WasmTableCallIndexReg)); CallSiteDesc desc(call.lineOrBytecode, CallSiteDesc::Indirect); CalleeDesc callee = CalleeDesc::wasmTable(table, funcTypeId); - return masm.wasmCallIndirect(desc, callee, NeedsBoundsCheck(true), - mozilla::Nothing()); + masm.wasmCallIndirect(desc, callee, NeedsBoundsCheck(true), + mozilla::Nothing(), fastCallOffset, slowCallOffset); } // Precondition: sync() CodeOffset BaseCompiler::callImport(unsigned globalDataOffset, const FunctionCall& call) { CallSiteDesc desc(call.lineOrBytecode, CallSiteDesc::Import); CalleeDesc callee = CalleeDesc::import(globalDataOffset); @@ -4314,17 +4319,18 @@ bool BaseCompiler::emitCall() { ResultType resultType(ResultType::Vector(funcType.results())); StackResultsLoc results; if (!pushStackResultsForCall(resultType, RegPtr(ABINonArgReg0), &results)) { return false; } FunctionCall baselineCall(lineOrBytecode); beginCall(baselineCall, UseABI::Wasm, - import ? InterModule::True : InterModule::False); + import ? RestoreRegisterStateAndRealm::True + : RestoreRegisterStateAndRealm::False); if (!emitCallArgs(funcType.args(), results, &baselineCall, CalleeOnStack::False)) { return false; } CodeOffset raOffset; if (import) { @@ -4374,27 +4380,34 @@ bool BaseCompiler::emitCallIndirect() { ResultType resultType(ResultType::Vector(funcType.results())); StackResultsLoc results; if (!pushStackResultsForCall(resultType, RegPtr(ABINonArgReg0), &results)) { return false; } FunctionCall baselineCall(lineOrBytecode); - beginCall(baselineCall, UseABI::Wasm, InterModule::True); + // State and realm are restored as needed by by callIndirect (really by + // MacroAssembler::wasmCallIndirect). + beginCall(baselineCall, UseABI::Wasm, RestoreRegisterStateAndRealm::False); if (!emitCallArgs(funcType.args(), results, &baselineCall, CalleeOnStack::True)) { return false; } const Stk& callee = peek(results.count()); - CodeOffset raOffset = - callIndirect(funcTypeIndex, tableIndex, callee, baselineCall); - if (!createStackMap("emitCallIndirect", raOffset)) { + CodeOffset fastCallOffset; + CodeOffset slowCallOffset; + callIndirect(funcTypeIndex, tableIndex, callee, baselineCall, &fastCallOffset, + &slowCallOffset); + if (!createStackMap("emitCallIndirect", fastCallOffset)) { + return false; + } + if (!createStackMap("emitCallIndirect", slowCallOffset)) { return false; } popStackResultsAfterCall(results, stackArgBytes); endCall(baselineCall, stackArgBytes); popValueStackBy(numArgs); @@ -4441,17 +4454,17 @@ bool BaseCompiler::emitUnaryMathBuiltinC ValTypeVector& signature = operandType == ValType::F32 ? SigF_ : SigD_; ValType retType = operandType; uint32_t numArgs = signature.length(); size_t stackSpace = stackConsumed(numArgs); StackResultsLoc noStackResults; FunctionCall baselineCall(lineOrBytecode); - beginCall(baselineCall, UseABI::Builtin, InterModule::False); + beginCall(baselineCall, UseABI::Builtin, RestoreRegisterStateAndRealm::False); if (!emitCallArgs(signature, noStackResults, &baselineCall, CalleeOnStack::False)) { return false; } CodeOffset raOffset = builtinCall(callee, baselineCall); if (!createStackMap("emitUnaryMathBuiltin[..]", raOffset)) { @@ -5191,17 +5204,17 @@ bool BaseCompiler::emitInstanceCall(uint MOZ_ASSERT(argTypes[0] == MIRType::Pointer); sync(); uint32_t numNonInstanceArgs = builtin.numArgs - 1 /* instance */; size_t stackSpace = stackConsumed(numNonInstanceArgs); FunctionCall baselineCall(lineOrBytecode); - beginCall(baselineCall, UseABI::System, InterModule::True); + beginCall(baselineCall, UseABI::System, RestoreRegisterStateAndRealm::True); ABIArg instanceArg = reservePointerArgument(&baselineCall); startCallArgs(StackArgAreaSizeUnaligned(builtin), &baselineCall); for (uint32_t i = 1; i < builtin.numArgs; i++) { ValType t; switch (argTypes[i]) { case MIRType::Int32:
--- a/js/src/wasm/WasmBuiltins.cpp +++ b/js/src/wasm/WasmBuiltins.cpp @@ -558,16 +558,18 @@ bool wasm::HandleThrow(JSContext* cx, Wa if (tryNote) { cx->clearPendingException(); RootedAnyRef ref(cx, AnyRef::null()); if (!BoxAnyRef(cx, exn, &ref)) { MOZ_ASSERT(cx->isThrowingOutOfMemory()); continue; } + MOZ_ASSERT(iter.tls() == iter.instance()->tlsData()); + iter.tls()->pendingException = ref.get().asJSObject(); rfe->kind = ResumeFromException::RESUME_WASM_CATCH; rfe->framePointer = (uint8_t*)iter.frame(); rfe->tlsData = iter.instance()->tlsData(); size_t offsetAdjustment = 0; rfe->stackPointer =
--- a/js/src/wasm/WasmCodegenTypes.h +++ b/js/src/wasm/WasmCodegenTypes.h @@ -404,23 +404,24 @@ class CallSiteDesc { uint32_t lineOrBytecode_ : LINE_OR_BYTECODE_BITS_SIZE; uint32_t kind_ : 3; public: static constexpr uint32_t MAX_LINE_OR_BYTECODE_VALUE = (1 << LINE_OR_BYTECODE_BITS_SIZE) - 1; enum Kind { - Func, // pc-relative call to a specific function - Import, // wasm import call - Indirect, // wasm indirect call - Symbolic, // call to a single symbolic callee - EnterFrame, // call to a enter frame handler - LeaveFrame, // call to a leave frame handler - Breakpoint // call to instruction breakpoint + Func, // pc-relative call to a specific function + Import, // wasm import call + Indirect, // dynamic callee called via register, context on stack + IndirectFast, // dynamically determined to be same-instance + Symbolic, // call to a single symbolic callee + EnterFrame, // call to a enter frame handler + LeaveFrame, // call to a leave frame handler + Breakpoint // call to instruction breakpoint }; CallSiteDesc() : lineOrBytecode_(0), kind_(0) {} explicit CallSiteDesc(Kind kind) : lineOrBytecode_(0), kind_(kind) { MOZ_ASSERT(kind == Kind(kind_)); } CallSiteDesc(uint32_t lineOrBytecode, Kind kind) : lineOrBytecode_(lineOrBytecode), kind_(kind) { MOZ_ASSERT(kind == Kind(kind_));
--- a/js/src/wasm/WasmGenerator.cpp +++ b/js/src/wasm/WasmGenerator.cpp @@ -474,16 +474,17 @@ bool ModuleGenerator::linkCallSites() { for (; lastPatchedCallSite_ < metadataTier_->callSites.length(); lastPatchedCallSite_++) { const CallSite& callSite = metadataTier_->callSites[lastPatchedCallSite_]; const CallSiteTarget& target = callSiteTargets_[lastPatchedCallSite_]; uint32_t callerOffset = callSite.returnAddressOffset(); switch (callSite.kind()) { case CallSiteDesc::Import: case CallSiteDesc::Indirect: + case CallSiteDesc::IndirectFast: case CallSiteDesc::Symbolic: break; case CallSiteDesc::Func: { if (funcIsCompiled(target.funcIndex())) { uint32_t calleeOffset = funcCodeRange(target.funcIndex()).funcUncheckedCallEntry(); if (InRange(callerOffset, calleeOffset)) { masm_.patchCall(callerOffset, calleeOffset);
--- a/js/src/wasm/WasmTypeDef.h +++ b/js/src/wasm/WasmTypeDef.h @@ -824,17 +824,17 @@ class TypeIdDesc { static bool isGlobal(const TypeDef& type); TypeIdDesc() : kind_(TypeIdDescKind::None), bits_(0) {} static TypeIdDesc global(const TypeDef& type, uint32_t globalDataOffset); static TypeIdDesc immediate(const TypeDef& type); bool isGlobal() const { return kind_ == TypeIdDescKind::Global; } - size_t immediate() const { + uint32_t immediate() const { MOZ_ASSERT(kind_ == TypeIdDescKind::Immediate); return bits_; } uint32_t globalDataOffset() const { MOZ_ASSERT(kind_ == TypeIdDescKind::Global); return bits_; } };