author | Andy Wingo <wingo@igalia.com> |
Tue, 22 Oct 2019 15:30:10 +0000 | |
changeset 498684 | a9d2b57a99be08cf6942e5dd8c300e75ba0b7501 |
parent 498683 | 8e233a18ab9ae1511789985097376b6ea1903508 |
child 498685 | e4aa69b40591d7b91661b01371da50cb059608a1 |
push id | 98604 |
push user | jdemooij@mozilla.com |
push date | Wed, 23 Oct 2019 08:26:28 +0000 |
treeherder | autoland@a9d2b57a99be [default view] [failures only] |
perfherder | [talos] [build metrics] [platform microbench] (compared to previous push) |
reviewers | luke, lth |
bugs | 1578418 |
milestone | 72.0a1 |
first release with | nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
|
last release without | nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
|
--- a/js/src/wasm/WasmBaselineCompile.cpp +++ b/js/src/wasm/WasmBaselineCompile.cpp @@ -158,17 +158,16 @@ namespace wasm { using namespace js::jit; using HandleNaNSpecially = bool; using InvertBranch = bool; using IsKnownNotZero = bool; using IsUnsigned = bool; using NeedsBoundsCheck = bool; -using PopStack = bool; using WantResult = bool; using ZeroOnOverflow = bool; class BaseStackFrame; // Two flags, useABI and interModule, control how calls are made. // // UseABI::Wasm implies that the Tls/Heap/Global registers are nonvolatile, @@ -223,19 +222,20 @@ static constexpr FloatRegister RabaldrSc static_assert(RabaldrScratchF32 != ScratchFloat32Reg, "Too busy"); static_assert(RabaldrScratchF64 != ScratchDoubleReg, "Too busy"); #endif #ifdef JS_CODEGEN_X86 // The selection of EBX here steps gingerly around: the need for EDX // to be allocatable for multiply/divide; ECX to be allocatable for -// shift/rotate; EAX (= ReturnReg) to be allocatable as the joinreg; -// EBX not being one of the WasmTableCall registers; and needing a -// temp register for load/store that has a single-byte persona. +// shift/rotate; EAX (= ReturnReg) to be allocatable as the result +// register; EBX not being one of the WasmTableCall registers; and +// needing a temp register for load/store that has a single-byte +// persona. // // The compiler assumes that RabaldrScratchI32 has a single-byte // persona. Code for 8-byte atomic operations assumes that // RabaldrScratchI32 is in fact ebx. # define RABALDR_SCRATCH_I32 static const Register RabaldrScratchI32 = ebx; @@ -479,16 +479,18 @@ struct SpecificRegs { class BaseCompilerInterface { public: // Spill all spillable registers. // // TODO / OPTIMIZE (Bug 1316802): It's possible to do better here by // spilling only enough registers to satisfy current needs. virtual void sync() = 0; + virtual void saveTempPtr(RegPtr r) = 0; + virtual void restoreTempPtr(RegPtr r) = 0; }; // Register allocator. class BaseRegAlloc { // Notes on float register allocation. // // The general rule in SpiderMonkey is that float registers can alias double @@ -762,16 +764,30 @@ class BaseRegAlloc { void needPtr(RegPtr specific) { if (!isAvailablePtr(specific)) { bc.sync(); } allocGPR(specific); } + // Use when you need a register for a short time but explicitly want to avoid + // a full sync(). + MOZ_MUST_USE RegPtr needTempPtr(RegPtr fallback, bool* saved) { + if (hasGPR()) { + *saved = false; + return RegPtr(allocGPR()); + } + *saved = true; + bc.saveTempPtr(fallback); + MOZ_ASSERT(isAvailablePtr(fallback)); + allocGPR(fallback); + return RegPtr(fallback); + } + MOZ_MUST_USE RegF32 needF32() { if (!hasFPU<MIRType::Float32>()) { bc.sync(); } return RegF32(allocFPU<MIRType::Float32>()); } void needF32(RegF32 specific) { @@ -800,16 +816,24 @@ class BaseRegAlloc { void freeI64(RegI64 r) { freeInt64(r); } void freePtr(RegPtr r) { freeGPR(r); } void freeF64(RegF64 r) { freeFPU(r); } void freeF32(RegF32 r) { freeFPU(r); } + void freeTempPtr(RegPtr r, bool saved) { + freePtr(r); + if (saved) { + bc.restoreTempPtr(r); + MOZ_ASSERT(!isAvailablePtr(r)); + } + } + #ifdef JS_CODEGEN_ARM MOZ_MUST_USE RegI64 needI64Pair() { if (!hasGPRPair()) { bc.sync(); } Register low, high; allocGPRPair(&low, &high); return RegI64(Register64(high, low)); @@ -1089,16 +1113,21 @@ class StackHeight { friend class BaseStackFrameAllocator; uint32_t height; public: explicit StackHeight(uint32_t h) : height(h) {} static StackHeight Invalid() { return StackHeight(UINT32_MAX); } bool isValid() const { return height != UINT32_MAX; } + bool operator==(StackHeight rhs) const { + MOZ_ASSERT(isValid() && rhs.isValid()); + return height == rhs.height; + } + bool operator!=(StackHeight rhs) const { return !(*this == rhs); } }; // Abstraction of the baseline compiler's stack frame (except for the Frame / // DebugFrame parts). See comments above for more. Remember, "below" on the // stack means at lower addresses. // // The abstraction is split into two parts: BaseStackFrameAllocator is // responsible for allocating and deallocating space on the stack and for @@ -1238,16 +1267,23 @@ class BaseStackFrameAllocator { // // The Dynamic area - the dynamic part of the frame, for spilling and saving // intermediate values. // Offset off of sp_ for the slot at stack area location `offset`. int32_t stackOffset(int32_t offset) { return masm.framePushed() - offset; } + uint32_t computeHeightWithStackResults(StackHeight stackBase, + uint32_t stackResultBytes) { + MOZ_ASSERT(stackResultBytes); + MOZ_ASSERT(currentStackHeight() >= stackBase.height); + return stackBase.height + stackResultBytes; + } + #ifdef RABALDR_CHUNKY_STACK void pushChunkyBytes(uint32_t bytes) { MOZ_ASSERT(bytes <= ChunkSize); checkChunkyInvariants(); if (masm.framePushed() - currentStackHeight_ < bytes) { masm.reserveStack(ChunkSize); } currentStackHeight_ += bytes; @@ -1330,56 +1366,61 @@ class BaseStackFrameAllocator { uint32_t dynamicHeight() const { return currentStackHeight() - localSize_; } // Before branching to an outer control label, pop the execution stack to // the level expected by that region, but do not update masm.framePushed() // as that will happen as compilation leaves the block. // // Note these operate directly on the stack pointer register. - void popStackBeforeBranch(StackHeight destStackHeight) { + void popStackBeforeBranch(StackHeight destStackHeight, + uint32_t stackResultBytes) { uint32_t framePushedHere = masm.framePushed(); - uint32_t framePushedThere = framePushedForHeight(destStackHeight); + StackHeight heightThere = + StackHeight(destStackHeight.height + stackResultBytes); + uint32_t framePushedThere = framePushedForHeight(heightThere); if (framePushedHere > framePushedThere) { masm.addToStackPtr(Imm32(framePushedHere - framePushedThere)); } } - bool willPopStackBeforeBranch(StackHeight destStackHeight) { - uint32_t framePushedHere = masm.framePushed(); - uint32_t framePushedThere = framePushedForHeight(destStackHeight); - return framePushedHere > framePushedThere; - } - - // Before exiting a nested control region, pop the execution stack - // to the level expected by the nesting region, and free the - // stack. - // - // Note this operates on the stack height, which is not the same as the - // stack pointer on chunky-stack systems; the stack pointer may or may not - // change on such systems. - - void popStackOnBlockExit(StackHeight destStackHeight, bool deadCode) { - uint32_t stackHeightHere = currentStackHeight(); - uint32_t stackHeightThere = destStackHeight.height; - if (stackHeightHere > stackHeightThere) { -#ifdef RABALDR_CHUNKY_STACK - if (deadCode) { - setStackHeight(destStackHeight); - } else { - popChunkyBytes(stackHeightHere - stackHeightThere); - } -#else - if (deadCode) { - masm.setFramePushed(stackHeightThere); - } else { - masm.freeStack(stackHeightHere - stackHeightThere); - } -#endif - } + void popStackBeforeBranch(StackHeight destStackHeight, ResultType type) { + popStackBeforeBranch(destStackHeight, + ABIResultIter::MeasureStackBytes(type)); + } + + // Given that there are |stackParamSize| bytes on the dynamic stack + // corresponding to the stack results, return the stack height once these + // parameters are popped. + + StackHeight stackResultsBase(uint32_t stackParamSize) { + return StackHeight(currentStackHeight() - stackParamSize); + } + + // For most of WebAssembly, adjacent instructions have fallthrough control + // flow between them, which allows us to simply thread the current stack + // height through the compiler. There are two exceptions to this rule: when + // leaving a block via dead code, and when entering the "else" arm of an "if". + // In these cases, the stack height is the block entry height, plus any stack + // values (results in the block exit case, parameters in the else entry case). + + void resetStackHeight(StackHeight destStackHeight, ResultType type) { + uint32_t height = destStackHeight.height; + height += ABIResultIter::MeasureStackBytes(type); + setStackHeight(StackHeight(height)); + } + + // Return offset of stack result. + + uint32_t locateStackResult(const ABIResult& result, StackHeight stackBase, + uint32_t stackResultBytes) { + MOZ_ASSERT(result.onStack()); + MOZ_ASSERT(result.stackOffset() + result.size() <= stackResultBytes); + uint32_t end = computeHeightWithStackResults(stackBase, stackResultBytes); + return end - result.stackOffset(); } public: ////////////////////////////////////////////////////////////////////// // // The Argument area - for outgoing calls. // // We abstract these operations as an optimization: we can merge the freeing @@ -1611,32 +1652,20 @@ class BaseStackFrame final : public Base // Offset off of sp_ for a local with offset `offset` from Frame. int32_t localOffset(int32_t offset) { return masm.framePushed() - offset; } public: /////////////////////////////////////////////////////////////////////////// // // Dynamic area - // Sizes of items in the stack area. - // - // The size values come from the implementations of Push() in - // MacroAssembler-x86-shared.cpp and MacroAssembler-arm-shared.cpp, and from - // VFPRegister::size() in Architecture-arm.h. - // - // On ARM unlike on x86 we push a single for float. - - static const size_t StackSizeOfPtr = sizeof(intptr_t); - static const size_t StackSizeOfInt64 = sizeof(int64_t); -#if defined(JS_CODEGEN_ARM) || defined(JS_CODEGEN_MIPS32) - static const size_t StackSizeOfFloat = sizeof(float); -#else - static const size_t StackSizeOfFloat = sizeof(double); -#endif - static const size_t StackSizeOfDouble = sizeof(double); + static const size_t StackSizeOfPtr = ABIResult::StackSizeOfPtr; + static const size_t StackSizeOfInt64 = ABIResult::StackSizeOfInt64; + static const size_t StackSizeOfFloat = ABIResult::StackSizeOfFloat; + static const size_t StackSizeOfDouble = ABIResult::StackSizeOfDouble; uint32_t pushPtr(Register r) { DebugOnly<uint32_t> stackBefore = currentStackHeight(); #ifdef RABALDR_CHUNKY_STACK pushChunkyBytes(StackSizeOfPtr); masm.storePtr(r, Address(sp_, stackOffset(currentStackHeight()))); #else masm.Push(r); @@ -1742,16 +1771,112 @@ class BaseStackFrame final : public Base void loadStackF64(int32_t offset, RegF64 dest) { masm.loadDouble(Address(sp_, stackOffset(offset)), dest); } void loadStackF32(int32_t offset, RegF32 dest) { masm.loadFloat32(Address(sp_, stackOffset(offset)), dest); } + + uint32_t prepareStackResultArea(StackHeight stackBase, + uint32_t stackResultBytes) { + uint32_t end = computeHeightWithStackResults(stackBase, stackResultBytes); + if (currentStackHeight() < end) { + uint32_t bytes = end - currentStackHeight(); +#ifdef RABALDR_CHUNKY_STACK + pushChunkyBytes(bytes); +#else + masm.reserveStack(bytes); +#endif + maxFramePushed_ = Max(maxFramePushed_, masm.framePushed()); + } + return end; + } + + void finishStackResultArea(StackHeight stackBase, uint32_t stackResultBytes) { + uint32_t end = computeHeightWithStackResults(stackBase, stackResultBytes); + MOZ_ASSERT(currentStackHeight() >= end); + popBytes(currentStackHeight() - end); + } + + void shuffleStackResultsTowardFP(uint32_t srcHeight, uint32_t destHeight, + uint32_t bytes, Register temp) { + MOZ_ASSERT(destHeight < srcHeight); + MOZ_ASSERT(bytes % sizeof(uint32_t) == 0); + uint32_t destOffset = stackOffset(destHeight); + uint32_t srcOffset = stackOffset(srcHeight); + MOZ_ASSERT(destOffset >= bytes); + MOZ_ASSERT(srcOffset >= bytes); + while (bytes >= sizeof(intptr_t)) { + destOffset -= sizeof(intptr_t); + srcOffset -= sizeof(intptr_t); + bytes -= sizeof(intptr_t); + masm.loadPtr(Address(sp_, srcOffset), temp); + masm.storePtr(temp, Address(sp_, destOffset)); + } + if (bytes) { + MOZ_ASSERT(bytes == sizeof(uint32_t)); + destOffset -= sizeof(uint32_t); + srcOffset -= sizeof(uint32_t); + masm.load32(Address(sp_, srcOffset), temp); + masm.store32(temp, Address(sp_, destOffset)); + } + } + + void shuffleStackResultsTowardFP(StackHeight srcHeight, + StackHeight destHeight, uint32_t bytes, + Register temp) { + MOZ_ASSERT(srcHeight.isValid()); + MOZ_ASSERT(destHeight.isValid()); + uint32_t src = computeHeightWithStackResults(srcHeight, bytes); + uint32_t dest = computeHeightWithStackResults(destHeight, bytes); + MOZ_ASSERT(src <= currentStackHeight()); + MOZ_ASSERT(dest <= currentStackHeight()); + shuffleStackResultsTowardFP(src - bytes, dest - bytes, bytes, temp); + } + + void shuffleStackResultsTowardSP(uint32_t srcHeight, uint32_t destHeight, + uint32_t bytes, Register temp) { + MOZ_ASSERT(destHeight > srcHeight); + MOZ_ASSERT(bytes % sizeof(uint32_t) == 0); + uint32_t destOffset = stackOffset(destHeight); + uint32_t srcOffset = stackOffset(srcHeight); + MOZ_ASSERT(destOffset >= bytes); + MOZ_ASSERT(srcOffset >= bytes); + while (bytes >= sizeof(intptr_t)) { + masm.loadPtr(Address(sp_, srcOffset - bytes), temp); + masm.storePtr(temp, Address(sp_, destOffset - bytes)); + bytes -= sizeof(intptr_t); + } + if (bytes) { + MOZ_ASSERT(bytes == sizeof(uint32_t)); + masm.load32(Address(sp_, srcOffset - bytes), temp); + masm.store32(temp, Address(sp_, destOffset - bytes)); + } + } + + void storeImmediateToStack(int32_t imm, uint32_t destHeight, Register temp) { + masm.move32(Imm32(imm), temp); + masm.store32(temp, Address(sp_, stackOffset(destHeight))); + } + + void storeImmediateToStack(int64_t imm, uint32_t destHeight, Register temp) { +#ifdef JS_PUNBOX64 + masm.move64(Imm64(imm), Register64(temp)); + masm.store64(Register64(temp), Address(sp_, stackOffset(destHeight))); +#else + union { + int64_t i64; + int32_t i32[2]; + } bits = {.i64 = imm}; + storeImmediateToStack(bits.i32[0], destHeight, temp); + storeImmediateToStack(bits.i32[1], destHeight - sizeof(int32_t), temp); +#endif + } }; void BaseStackFrame::zeroLocals(BaseRegAlloc* ra) { MOZ_ASSERT(varLow_ != UINT32_MAX); if (varLow_ == varHigh_) { return; } @@ -1921,16 +2046,43 @@ struct Stk { MOZ_ASSERT(k > MemLast && k <= LocalLast); } static Stk StkRef(intptr_t v) { Stk s; s.kind_ = ConstRef; s.refval_ = v; return s; } + static Stk StackResult(ValType type, uint32_t offs) { + Kind k; + switch (type.code()) { + case ValType::I32: + k = Stk::MemI32; + break; + case ValType::I64: + k = Stk::MemI64; + break; + case ValType::F32: + k = Stk::MemF32; + break; + case ValType::F64: + k = Stk::MemF64; + break; + case ValType::FuncRef: + case ValType::AnyRef: + case ValType::Ref: + k = Stk::MemRef; + break; + case ValType::NullRef: + MOZ_CRASH("unexpected nullref stack result"); + } + Stk s; + s.setOffs(k, offs); + return s; + } void setOffs(Kind k, uint32_t v) { MOZ_ASSERT(k <= MemLast); kind_ = k; offs_ = v; } Kind kind() const { return kind_; } @@ -2560,26 +2712,16 @@ class BaseCompiler final : public BaseCo BaseStackFrame::LocalVector localInfo_; Vector<OutOfLineCode*, 8, SystemAllocPolicy> outOfLine_; // On specific platforms we sometimes need to use specific registers. SpecificRegs specific_; - // The join registers are used to carry values out of blocks. - // JoinRegI32 and joinRegI64_ must overlap: emitBrIf and - // emitBrTable assume that. - - RegI32 joinRegI32_; - RegI64 joinRegI64_; - RegPtr joinRegPtr_; - RegF32 joinRegF32_; - RegF64 joinRegF64_; - // There are more members scattered throughout. public: BaseCompiler(const ModuleEnvironment& env, const FuncCompileInput& input, const ValTypeVector& locals, const MachineState& trapExitLayout, size_t trapExitLayoutNumWords, Decoder& decoder, StkVector& stkSource, TempAllocator* alloc, MacroAssembler* masm, StackMaps* stackMaps); @@ -2782,116 +2924,165 @@ class BaseCompiler final : public BaseCo } void moveF32(RegF32 src, RegF32 dest) { if (src != dest) { masm.moveFloat32(src, dest); } } - void maybeReserveJoinRegI(ResultType type) { + //////////////////////////////////////////////////////////////////////////// + // + // Block parameters and results. + // + // Blocks may have multiple parameters and multiple results. Blocks can also + // be the target of branches: the entry for loops, and the exit for + // non-loops. + // + // Passing multiple values to a non-branch target (i.e., the entry of a + // "block") falls out naturally: any items on the value stack can flow + // directly from one block to another. + // + // However, for branch targets, we need to allocate well-known locations for + // the branch values. The approach taken in the baseline compiler is to + // allocate registers to the top N values (currently N=1), and then stack + // locations for the rest. + // + + enum class RegKind { All, OnlyGPRs }; + + inline void needResultRegisters(ResultType type, RegKind which) { if (type.empty()) { return; } - MOZ_ASSERT(type.length() == 1, "multi-value joins unimplemented"); - switch (type[0].code()) { - case ValType::I32: - needI32(joinRegI32_); - break; - case ValType::I64: - needI64(joinRegI64_); - break; - case ValType::F32: - case ValType::F64: - break; - case ValType::FuncRef: - case ValType::AnyRef: - case ValType::NullRef: - case ValType::Ref: - needRef(joinRegPtr_); - break; - } - } - - void maybeUnreserveJoinRegI(ResultType type) { + + for (ABIResultIter iter(type); !iter.done(); iter.next()) { + ABIResult result = iter.cur(); + // Register results are visited first; when we see a stack result we're + // done. + if (!result.inRegister()) { + return; + } + switch (result.type().code()) { + case ValType::I32: + needI32(RegI32(result.gpr())); + break; + case ValType::I64: + needI64(RegI64(result.gpr64())); + break; + case ValType::F32: + if (which == RegKind::All) { + needF32(RegF32(result.fpr())); + } + break; + case ValType::F64: + if (which == RegKind::All) { + needF64(RegF64(result.fpr())); + } + break; + case ValType::FuncRef: + case ValType::AnyRef: + case ValType::Ref: + needRef(RegPtr(result.gpr())); + break; + case ValType::NullRef: + MOZ_CRASH("unexpected nullref result"); + } + } + } + + inline void freeResultRegisters(ResultType type, RegKind which) { if (type.empty()) { return; } - MOZ_ASSERT(type.length() == 1, "multi-value joins unimplemented"); - switch (type[0].code()) { - case ValType::I32: - freeI32(joinRegI32_); - break; - case ValType::I64: - freeI64(joinRegI64_); - break; - case ValType::F32: - case ValType::F64: - break; - case ValType::FuncRef: - case ValType::AnyRef: - case ValType::NullRef: - case ValType::Ref: - freeRef(joinRegPtr_); - break; - } - } - - void maybeReserveJoinReg(ResultType type) { - if (type.empty()) { - return; - } - MOZ_ASSERT(type.length() == 1, "multi-value joins unimplemented"); - switch (type[0].code()) { - case ValType::I32: - needI32(joinRegI32_); - break; - case ValType::I64: - needI64(joinRegI64_); - break; - case ValType::F32: - needF32(joinRegF32_); - break; - case ValType::F64: - needF64(joinRegF64_); - break; - case ValType::Ref: - case ValType::NullRef: - case ValType::FuncRef: - case ValType::AnyRef: - needRef(joinRegPtr_); - break; - } - } - - void maybeUnreserveJoinReg(ResultType type) { - if (type.empty()) { - return; - } - MOZ_ASSERT(type.length() == 1, "multi-value joins unimplemented"); - switch (type[0].code()) { - case ValType::I32: - freeI32(joinRegI32_); - break; - case ValType::I64: - freeI64(joinRegI64_); - break; - case ValType::F32: - freeF32(joinRegF32_); - break; - case ValType::F64: - freeF64(joinRegF64_); - break; - case ValType::Ref: - case ValType::NullRef: - case ValType::FuncRef: - case ValType::AnyRef: - freeRef(joinRegPtr_); - break; - } + + for (ABIResultIter iter(type); !iter.done(); iter.next()) { + ABIResult result = iter.cur(); + // Register results are visited first; when we see a stack result we're + // done. + if (!result.inRegister()) { + return; + } + switch (result.type().code()) { + case ValType::I32: + freeI32(RegI32(result.gpr())); + break; + case ValType::I64: + freeI64(RegI64(result.gpr64())); + break; + case ValType::F32: + if (which == RegKind::All) { + freeF32(RegF32(result.fpr())); + } + break; + case ValType::F64: + if (which == RegKind::All) { + freeF64(RegF64(result.fpr())); + } + break; + case ValType::FuncRef: + case ValType::AnyRef: + case ValType::Ref: + freeRef(RegPtr(result.gpr())); + break; + case ValType::NullRef: + MOZ_CRASH("unexpected nullref result"); + } + } + } + + void needIntegerResultRegisters(ResultType type) { + needResultRegisters(type, RegKind::OnlyGPRs); + } + void freeIntegerResultRegisters(ResultType type) { + freeResultRegisters(type, RegKind::OnlyGPRs); + } + + void needResultRegisters(ResultType type) { + needResultRegisters(type, RegKind::All); + } + void freeResultRegisters(ResultType type) { + freeResultRegisters(type, RegKind::All); + } + + void assertResultRegistersAvailable(ResultType type) { +#ifdef DEBUG + for (ABIResultIter iter(type); !iter.done(); iter.next()) { + ABIResult result = iter.cur(); + if (!result.inRegister()) { + return; + } + switch (result.type().code()) { + case ValType::I32: + MOZ_ASSERT(isAvailableI32(RegI32(result.gpr()))); + break; + case ValType::I64: + MOZ_ASSERT(isAvailableI64(RegI64(result.gpr64()))); + break; + case ValType::F32: + MOZ_ASSERT(isAvailableF32(RegF32(result.fpr()))); + break; + case ValType::F64: + MOZ_ASSERT(isAvailableF64(RegF64(result.fpr()))); + break; + case ValType::FuncRef: + case ValType::AnyRef: + case ValType::Ref: + MOZ_ASSERT(isAvailableRef(RegPtr(result.gpr()))); + break; + case ValType::NullRef: + MOZ_CRASH("unexpected nullref result"); + } + } +#endif + } + + void captureResultRegisters(ResultType type) { + assertResultRegistersAvailable(type); + needResultRegisters(type); } //////////////////////////////////////////////////////////// // // Value stack and spilling. // // The value stack facilitates some on-the-fly register allocation // and immediate-constant use. It tracks constants, latent @@ -3278,16 +3469,30 @@ class BaseCompiler final : public BaseCo } default: { break; } } } } + void saveTempPtr(RegPtr r) final { + MOZ_ASSERT(!ra.isAvailablePtr(r)); + fr.pushPtr(r); + ra.freePtr(r); + MOZ_ASSERT(ra.isAvailablePtr(r)); + } + + void restoreTempPtr(RegPtr r) final { + MOZ_ASSERT(ra.isAvailablePtr(r)); + ra.needPtr(r); + fr.popPtr(r); + MOZ_ASSERT(!ra.isAvailablePtr(r)); + } + // Various methods for creating a stack map. Stack maps are indexed by the // lowest address of the instruction immediately *after* the instruction of // interest. In practice that means either: the return point of a call, the // instruction immediately after a trap instruction (the "resume" // instruction), or the instruction immediately following a no-op (when // debugging is enabled). // Create a vanilla stack map. @@ -3755,161 +3960,318 @@ class BaseCompiler final : public BaseCo Stk& v = stk_.back(); if (v.kind() != Stk::LocalI32) { return false; } *local = v.slot(); return true; } - // TODO / OPTIMIZE (Bug 1316818): At the moment we use ReturnReg - // for JoinReg. It is possible other choices would lead to better - // register allocation, as ReturnReg is often first in the - // register set and will be heavily wanted by the register - // allocator that uses takeFirst(). + // TODO / OPTIMIZE (Bug 1316818): At the moment we use the Wasm + // inter-procedure ABI for block returns, which allocates ReturnReg as the + // single block result register. It is possible other choices would lead to + // better register allocation, as ReturnReg is often first in the register set + // and will be heavily wanted by the register allocator that uses takeFirst(). // // Obvious options: // - pick a register at the back of the register set // - pick a random register per block (different blocks have // different join regs) - // - // On the other hand, we sync() before every block and only the - // JoinReg is live out of the block. But on the way out, we - // currently pop the JoinReg before freeing regs to be discarded, - // so there is a real risk of some pointless shuffling there. If - // we instead integrate the popping of the join reg into the - // popping of the stack we can just use the JoinReg as it will - // become available in that process. - - MOZ_MUST_USE Maybe<AnyReg> popJoinRegUnlessVoid(ResultType type) { - if (type.empty()) { - return Nothing(); - } - MOZ_ASSERT(type.length() == 1, "multi-value return unimplemented"); - switch (type[0].code()) { - case ValType::I32: { - DebugOnly<Stk::Kind> k(stk_.back().kind()); - MOZ_ASSERT(k == Stk::RegisterI32 || k == Stk::ConstI32 || - k == Stk::MemI32 || k == Stk::LocalI32); - return Some(AnyReg(popI32(joinRegI32_))); - } - case ValType::I64: { - DebugOnly<Stk::Kind> k(stk_.back().kind()); - MOZ_ASSERT(k == Stk::RegisterI64 || k == Stk::ConstI64 || - k == Stk::MemI64 || k == Stk::LocalI64); - return Some(AnyReg(popI64(joinRegI64_))); - } - case ValType::F64: { - DebugOnly<Stk::Kind> k(stk_.back().kind()); - MOZ_ASSERT(k == Stk::RegisterF64 || k == Stk::ConstF64 || - k == Stk::MemF64 || k == Stk::LocalF64); - return Some(AnyReg(popF64(joinRegF64_))); - } - case ValType::F32: { - DebugOnly<Stk::Kind> k(stk_.back().kind()); - MOZ_ASSERT(k == Stk::RegisterF32 || k == Stk::ConstF32 || - k == Stk::MemF32 || k == Stk::LocalF32); - return Some(AnyReg(popF32(joinRegF32_))); - } - case ValType::Ref: - case ValType::NullRef: - case ValType::FuncRef: - case ValType::AnyRef: { - DebugOnly<Stk::Kind> k(stk_.back().kind()); - MOZ_ASSERT(k == Stk::RegisterRef || k == Stk::ConstRef || - k == Stk::MemRef || k == Stk::LocalRef); - return Some(AnyReg(popRef(joinRegPtr_))); - } - } - MOZ_CRASH("Compiler bug: unexpected expression type"); - } - - // If we ever start not sync-ing on entry to Block (but instead try to sync - // lazily) then this may start asserting because it does not spill the - // joinreg if the joinreg is already allocated. Note, it *can't* spill the - // joinreg in the contexts it's being used, so some other solution will need - // to be found. - - MOZ_MUST_USE Maybe<AnyReg> captureJoinRegUnlessVoid(ResultType type) { + + void popRegisterResults(ABIResultIter& iter) { + // Pop register results. Note that in the single-value case, popping to a + // register may cause a sync(); for multi-value we sync'd already. + for (; !iter.done(); iter.next()) { + const ABIResult& result = iter.cur(); + if (!result.inRegister()) { + // TODO / OPTIMIZE: We sync here to avoid solving the general parallel + // move problem in popStackResults. However we could avoid syncing the + // values that are going to registers anyway, if they are already in + // registers. + sync(); + break; + } + switch (result.type().code()) { + case ValType::I32: + popI32(RegI32(result.gpr())); + break; + case ValType::I64: + popI64(RegI64(result.gpr64())); + break; + case ValType::F32: + popF32(RegF32(result.fpr())); + break; + case ValType::F64: + popF64(RegF64(result.fpr())); + break; + case ValType::FuncRef: + case ValType::AnyRef: + case ValType::Ref: + popRef(RegPtr(result.gpr())); + break; + default: + MOZ_CRASH("bad result type"); + } + } + } + + void popStackResults(ABIResultIter& iter, StackHeight stackBase) { + MOZ_ASSERT(!iter.done()); + + // The iterator should be advanced beyond register results, and register + // results should be popped already from the value stack. + uint32_t alreadyPopped = iter.index(); + + // At this point, only stack arguments are remaining. Iterate through them + // to measure how much stack space they will take up. + for (; !iter.done(); iter.next()) { + MOZ_ASSERT(iter.cur().onStack()); + } + + // Calculate the space needed to store stack results, in bytes. + uint32_t stackResultBytes = iter.stackBytesConsumedSoFar(); + MOZ_ASSERT(stackResultBytes); + + // Compute the stack height including the stack results. Note that it's + // possible that this call expands the stack, for example if some of the + // results are supplied by constants and so are not already on the machine + // stack. + uint32_t endHeight = fr.prepareStackResultArea(stackBase, stackResultBytes); + + // Find a free GPR to use when shuffling stack values. If none is + // available, push ReturnReg and restore it after we're done. + bool saved = false; + RegPtr temp = ra.needTempPtr(RegPtr(ReturnReg), &saved); + + // The sequence of Stk values is in the same order on the machine stack as + // the result locations, but there is a complication: constant values are + // not actually pushed on the machine stack. (At this point registers and + // locals have been spilled already.) So, moving the Stk values into place + // isn't simply a shuffle-down or shuffle-up operation. There is a part of + // the Stk sequence that shuffles toward the FP, a part that's already in + // place, and a part that shuffles toward the SP. After shuffling, we have + // to materialize the constants. + + // Shuffle mem values toward the frame pointer, copying deepest values + // first. Stop when we run out of results, get to a register result, or + // find a Stk value that is closer to the FP than the result. + for (iter.switchToPrev(); !iter.done(); iter.prev()) { + const ABIResult& result = iter.cur(); + if (!result.onStack()) { + break; + } + MOZ_ASSERT(result.stackOffset() < stackResultBytes); + uint32_t destHeight = endHeight - result.stackOffset(); + uint32_t stkBase = stk_.length() - (iter.count() - alreadyPopped); + Stk& v = stk_[stkBase + iter.index()]; + if (v.isMem()) { + uint32_t srcHeight = v.offs(); + if (srcHeight <= destHeight) { + break; + } + fr.shuffleStackResultsTowardFP(srcHeight, destHeight, result.size(), + temp); + } + } + + // Reset iterator and skip register results. + for (iter.reset(); !iter.done(); iter.next()) { + if (iter.cur().onStack()) { + break; + } + } + + // Revisit top stack values, shuffling mem values toward the stack pointer, + // copying shallowest values first. + for (; !iter.done(); iter.next()) { + const ABIResult& result = iter.cur(); + MOZ_ASSERT(result.onStack()); + MOZ_ASSERT(result.stackOffset() < stackResultBytes); + uint32_t destHeight = endHeight - result.stackOffset(); + Stk& v = stk_[stk_.length() - (iter.index() - alreadyPopped) - 1]; + if (v.isMem()) { + uint32_t srcHeight = v.offs(); + if (srcHeight >= destHeight) { + break; + } + fr.shuffleStackResultsTowardSP(srcHeight, destHeight, result.size(), + temp); + } + } + + // Reset iterator and skip register results, which are already popped off + // the value stack. + for (iter.reset(); !iter.done(); iter.next()) { + if (iter.cur().onStack()) { + break; + } + } + + // Materialize constants and pop the remaining items from the value stack. + for (; !iter.done(); iter.next()) { + const ABIResult& result = iter.cur(); + uint32_t resultHeight = endHeight - result.stackOffset(); + Stk& v = stk_.back(); + switch (v.kind()) { + case Stk::ConstI32: + case Stk::ConstF32: + // Rely on the fact that Stk stores its immediate values in a union, + // and that the bits of an f32 will be in the i32. + fr.storeImmediateToStack(v.i32val_, resultHeight, temp); + break; + case Stk::ConstI64: + case Stk::ConstF64: + // Likewise, rely on f64 bits being punned to i64. + fr.storeImmediateToStack(v.i64val_, resultHeight, temp); + break; + case Stk::ConstRef: + if (sizeof(intptr_t) == sizeof(int32_t)) { + fr.storeImmediateToStack(int32_t(v.refval_), resultHeight, temp); + } else { + fr.storeImmediateToStack(int64_t(v.refval_), resultHeight, temp); + } + break; + case Stk::MemRef: + // Update bookkeeping as we pop the Stk entry. + stackMapGenerator_.memRefsOnStk--; + break; + default: + MOZ_ASSERT(v.isMem()); + break; + } + stk_.popBack(); + } + + ra.freeTempPtr(temp, saved); + + // This will pop the stack if needed. + fr.finishStackResultArea(stackBase, stackResultBytes); + } + + enum class ContinuationKind { Fallthrough, Jump }; + + void popBlockResults(ResultType type, StackHeight stackBase, + ContinuationKind kind) { if (type.empty()) { - return Nothing(); - } - MOZ_ASSERT(type.length() == 1, "multi-value return unimplemented"); - switch (type[0].code()) { - case ValType::I32: - MOZ_ASSERT(isAvailableI32(joinRegI32_)); - needI32(joinRegI32_); - return Some(AnyReg(joinRegI32_)); - case ValType::I64: - MOZ_ASSERT(isAvailableI64(joinRegI64_)); - needI64(joinRegI64_); - return Some(AnyReg(joinRegI64_)); - case ValType::F32: - MOZ_ASSERT(isAvailableF32(joinRegF32_)); - needF32(joinRegF32_); - return Some(AnyReg(joinRegF32_)); - case ValType::F64: - MOZ_ASSERT(isAvailableF64(joinRegF64_)); - needF64(joinRegF64_); - return Some(AnyReg(joinRegF64_)); - case ValType::Ref: - case ValType::NullRef: - case ValType::FuncRef: - case ValType::AnyRef: - MOZ_ASSERT(isAvailableRef(joinRegPtr_)); - needRef(joinRegPtr_); - return Some(AnyReg(joinRegPtr_)); - } - MOZ_CRASH("Compiler bug: unexpected type"); - } - - void pushJoinRegUnlessVoid(const Maybe<AnyReg>& r) { - if (!r) { + return; + } + + ABIResultIter iter(type); + popRegisterResults(iter); + if (!iter.done()) { + popStackResults(iter, stackBase); + } else if (kind == ContinuationKind::Jump) { + fr.popStackBeforeBranch(stackBase, type); + } + } + + Stk captureStackResult(const ABIResult& result, uint32_t stackResultBytes) { + MOZ_ASSERT(result.onStack()); + uint32_t offs = fr.locateStackResult(result, controlItem().stackHeight, + stackResultBytes); + return Stk::StackResult(result.type(), offs); + } + + void pushBlockResults(ResultType type) { + if (type.empty()) { return; } - switch (r->tag) { - case AnyReg::I32: - pushI32(r->i32()); - break; - case AnyReg::I64: - pushI64(r->i64()); - break; - case AnyReg::F64: - pushF64(r->f64()); - break; - case AnyReg::F32: - pushF32(r->f32()); - break; - case AnyReg::REF: - pushRef(r->ref()); - break; - } - } - - void freeJoinRegUnlessVoid(const Maybe<AnyReg>& r) { - if (!r) { - return; - } - switch (r->tag) { - case AnyReg::I32: - freeI32(r->i32()); - break; - case AnyReg::I64: - freeI64(r->i64()); - break; - case AnyReg::F64: - freeF64(r->f64()); - break; - case AnyReg::F32: - freeF32(r->f32()); - break; - case AnyReg::REF: - freeRef(r->ref()); - break; - } + + // We need to push the results in reverse order, so first iterate through + // all results to determine the locations of stack result types. + ABIResultIter iter(type); + while (!iter.done()) { + iter.next(); + } + uint32_t stackResultBytes = iter.stackBytesConsumedSoFar(); + + for (iter.switchToPrev(); !iter.done(); iter.prev()) { + const ABIResult& result = iter.cur(); + if (!result.onStack()) { + break; + } + Stk v = captureStackResult(result, stackResultBytes); + push(v); + if (v.kind() == Stk::MemRef) { + stackMapGenerator_.memRefsOnStk++; + } + } + + for (; !iter.done(); iter.prev()) { + const ABIResult& result = iter.cur(); + MOZ_ASSERT(result.inRegister()); + switch (result.type().code()) { + case ValType::I32: + pushI32(RegI32(result.gpr())); + break; + case ValType::I64: + pushI64(RegI64(result.gpr64())); + break; + case ValType::F32: + pushF32(RegF32(result.fpr())); + break; + case ValType::F64: + pushF64(RegF64(result.fpr())); + break; + case ValType::FuncRef: + case ValType::AnyRef: + case ValType::Ref: + pushRef(RegPtr(result.gpr())); + break; + case ValType::NullRef: + MOZ_CRASH("unexpected nullref result"); + } + } + } + + // A combination of popBlockResults + pushBlockResults, to shuffle the top + // stack values into the expected block result locations for the given type. + StackHeight topBlockResults(ResultType type) { + if (type.empty()) { + return fr.stackHeight(); + } + StackHeight base = fr.stackResultsBase(stackConsumed(type.length())); + popBlockResults(type, base, ContinuationKind::Fallthrough); + pushBlockResults(type); + return base; + } + + // Conditional branches with fallthrough are preceded by a topBlockResults, so + // we know that there are no stack results that need to be materialized. In + // that case, we can just shuffle the whole block down before popping the + // stack. + void shuffleStackResultsBeforeBranch(StackHeight srcHeight, + StackHeight destHeight, + ResultType type) { + uint32_t stackResultBytes = 0; + + if (ABIResultIter::HasStackResults(type)) { + MOZ_ASSERT(stk_.length() >= type.length()); + ABIResultIter iter(type); + for (ABIResultIter iter(type); !iter.done(); iter.next()) { +#ifdef DEBUG + const ABIResult& result = iter.cur(); + const Stk& v = stk_[stk_.length() - iter.index() - 1]; + MOZ_ASSERT(v.isMem() == result.onStack()); +#endif + } + stackResultBytes = iter.stackBytesConsumedSoFar(); + + if (stackResultBytes) { + // Find a free GPR to use when shuffling stack values. If none is + // available, push ReturnReg and restore it after we're done. + bool saved = false; + RegPtr temp = ra.needTempPtr(RegPtr(ReturnReg), &saved); + fr.shuffleStackResultsTowardFP(srcHeight, destHeight, stackResultBytes, + temp); + ra.freeTempPtr(temp, saved); + } + } + + fr.popStackBeforeBranch(destHeight, stackResultBytes); } // Return the amount of execution stack consumed by the top numval // values on the value stack. size_t stackConsumed(size_t numval) { size_t size = 0; MOZ_ASSERT(numval <= stk_.length()); @@ -4044,22 +4406,24 @@ class BaseCompiler final : public BaseCo } #endif //////////////////////////////////////////////////////////// // // Control stack - void initControl(Control& item) { + void initControl(Control& item, ResultType params) { // Make sure the constructor was run properly MOZ_ASSERT(!item.stackHeight.isValid() && item.stackSize == UINT32_MAX); - item.stackHeight = fr.stackHeight(); - item.stackSize = stk_.length(); + uint32_t paramCount = deadCode_ ? 0 : params.length(); + uint32_t stackParamSize = stackConsumed(paramCount); + item.stackHeight = fr.stackResultsBase(stackParamSize); + item.stackSize = stk_.length() - paramCount; item.deadOnArrival = deadCode_; item.bceSafeOnEntry = bceSafe_; } Control& controlItem() { return iter_.controlItem(); } Control& controlItem(uint32_t relativeDepth) { return iter_.controlItem(relativeDepth); @@ -4844,23 +5208,16 @@ class BaseCompiler final : public BaseCo RegPtr captureReturnedRef() { RegPtr r = RegPtr(ReturnReg); MOZ_ASSERT(isAvailableRef(r)); needRef(r); return r; } - void returnCleanup(bool popStack) { - if (popStack) { - fr.popStackBeforeBranch(controlOutermost().stackHeight); - } - masm.jump(&returnLabel_); - } - void checkDivideByZeroI32(RegI32 rhs) { Label nonZero; masm.branchTest32(Assembler::NonZero, rhs, rhs, &nonZero); trap(Trap::IntegerDivideByZero); masm.bind(&nonZero); } void checkDivideByZeroI64(RegI64 r) { @@ -6589,18 +6946,19 @@ class BaseCompiler final : public BaseCo } f32; struct { RegF64 lhs; RegF64 rhs; } f64; }; Label* const label; // The target of the branch, never NULL - const StackHeight stackHeight; // The value to pop to along the taken edge, - // unless !hasPop() + const StackHeight stackHeight; // The stack base above which to place + // stack-spilled block results, if + // hasBlockResults(). const bool invertBranch; // If true, invert the sense of the branch const ResultType resultType; // The result propagated along the edges explicit BranchState(Label* label) : label(label), stackHeight(StackHeight::Invalid()), invertBranch(false), resultType(ResultType::Empty()) {} @@ -6613,17 +6971,17 @@ class BaseCompiler final : public BaseCo BranchState(Label* label, StackHeight stackHeight, bool invertBranch, ResultType resultType) : label(label), stackHeight(stackHeight), invertBranch(invertBranch), resultType(resultType) {} - bool hasPop() const { return stackHeight.isValid(); } + bool hasBlockResults() const { return stackHeight.isValid(); } }; void setLatentCompare(Assembler::Condition compareOp, ValType operandType) { latentOp_ = LatentOp::Compare; latentType_ = operandType; latentIntCmp_ = compareOp; } @@ -6673,32 +7031,35 @@ class BaseCompiler final : public BaseCo // Cond is either Assembler::Condition or Assembler::DoubleCondition. // // Lhs is RegI32, RegI64, or RegF32, or RegF64. // // Rhs is either the same as Lhs, or an immediate expression compatible with // Lhs "when applicable". template <typename Cond, typename Lhs, typename Rhs> - void jumpConditionalWithJoinReg(BranchState* b, Cond cond, Lhs lhs, Rhs rhs) { - Maybe<AnyReg> r = popJoinRegUnlessVoid(b->resultType); - - if (b->hasPop() && fr.willPopStackBeforeBranch(b->stackHeight)) { - Label notTaken; - branchTo(b->invertBranch ? cond : Assembler::InvertCondition(cond), lhs, - rhs, ¬Taken); - fr.popStackBeforeBranch(b->stackHeight); - masm.jump(b->label); - masm.bind(¬Taken); - } else { - branchTo(b->invertBranch ? Assembler::InvertCondition(cond) : cond, lhs, - rhs, b->label); - } - - pushJoinRegUnlessVoid(r); + void jumpConditionalWithResults(BranchState* b, Cond cond, Lhs lhs, Rhs rhs) { + if (b->hasBlockResults()) { + StackHeight resultsBase = topBlockResults(b->resultType); + if (b->stackHeight != resultsBase) { + Label notTaken; + branchTo(b->invertBranch ? cond : Assembler::InvertCondition(cond), lhs, + rhs, ¬Taken); + + // Shuffle stack args. + shuffleStackResultsBeforeBranch(resultsBase, b->stackHeight, + b->resultType); + masm.jump(b->label); + masm.bind(¬Taken); + return; + } + } + + branchTo(b->invertBranch ? Assembler::InvertCondition(cond) : cond, lhs, + rhs, b->label); } // sniffConditionalControl{Cmp,Eqz} may modify the latentWhatever_ state in // the BaseCompiler so that a subsequent conditional branch can be compiled // optimally. emitBranchSetup() and emitBranchPerform() will consume that // state. If the latter methods are not called because deadCode_ is true // then the compiler MUST instead call resetLatentOp() to reset the state. @@ -6740,21 +7101,20 @@ class BaseCompiler final : public BaseCo MOZ_MUST_USE bool emitStore(ValType resultType, Scalar::Type viewType); MOZ_MUST_USE bool storeCommon(MemoryAccessDesc* access, ValType resultType); MOZ_MUST_USE bool emitSelect(bool typed); template <bool isSetLocal> MOZ_MUST_USE bool emitSetOrTeeLocal(uint32_t slot); void endBlock(ResultType type); - void endLoop(ResultType type); - void endIfThen(); + void endIfThen(ResultType type); void endIfThenElse(ResultType type); - void doReturn(bool popStack); + void doReturn(ContinuationKind kind); void pushReturnValueOfCall(const FunctionCall& call, ValType type); void pushReturnValueOfCall(const FunctionCall& call, MIRType type); void emitCompareI32(Assembler::Condition compareOp, ValType compareType); void emitCompareI64(Assembler::Condition compareOp, ValType compareType); void emitCompareF32(Assembler::DoubleCondition compareOp, ValType compareType); void emitCompareF64(Assembler::DoubleCondition compareOp, @@ -8015,17 +8375,20 @@ bool BaseCompiler::sniffConditionalContr setLatentEqz(operandType); return true; default: return false; } } void BaseCompiler::emitBranchSetup(BranchState* b) { - maybeReserveJoinReg(b->resultType); + // Avoid allocating operands to latentOp_ to result registers. + if (b->hasBlockResults()) { + needResultRegisters(b->resultType); + } // Set up fields so that emitBranchPerform() need not switch on latentOp_. switch (latentOp_) { case LatentOp::None: { latentIntCmp_ = Assembler::NotEqual; latentType_ = ValType::I32; b->i32.lhs = popI32(); b->i32.rhsImm = true; @@ -8082,51 +8445,53 @@ void BaseCompiler::emitBranchSetup(Branc default: { MOZ_CRASH("Unexpected type for LatentOp::Eqz"); } } break; } } - maybeUnreserveJoinReg(b->resultType); + if (b->hasBlockResults()) { + freeResultRegisters(b->resultType); + } } void BaseCompiler::emitBranchPerform(BranchState* b) { switch (latentType_.code()) { case ValType::I32: { if (b->i32.rhsImm) { - jumpConditionalWithJoinReg(b, latentIntCmp_, b->i32.lhs, + jumpConditionalWithResults(b, latentIntCmp_, b->i32.lhs, Imm32(b->i32.imm)); } else { - jumpConditionalWithJoinReg(b, latentIntCmp_, b->i32.lhs, b->i32.rhs); + jumpConditionalWithResults(b, latentIntCmp_, b->i32.lhs, b->i32.rhs); freeI32(b->i32.rhs); } freeI32(b->i32.lhs); break; } case ValType::I64: { if (b->i64.rhsImm) { - jumpConditionalWithJoinReg(b, latentIntCmp_, b->i64.lhs, + jumpConditionalWithResults(b, latentIntCmp_, b->i64.lhs, Imm64(b->i64.imm)); } else { - jumpConditionalWithJoinReg(b, latentIntCmp_, b->i64.lhs, b->i64.rhs); + jumpConditionalWithResults(b, latentIntCmp_, b->i64.lhs, b->i64.rhs); freeI64(b->i64.rhs); } freeI64(b->i64.lhs); break; } case ValType::F32: { - jumpConditionalWithJoinReg(b, latentDoubleCmp_, b->f32.lhs, b->f32.rhs); + jumpConditionalWithResults(b, latentDoubleCmp_, b->f32.lhs, b->f32.rhs); freeF32(b->f32.lhs); freeF32(b->f32.rhs); break; } case ValType::F64: { - jumpConditionalWithJoinReg(b, latentDoubleCmp_, b->f64.lhs, b->f64.rhs); + jumpConditionalWithResults(b, latentDoubleCmp_, b->f64.lhs, b->f64.rhs); freeF64(b->f64.lhs); freeF64(b->f64.rhs); break; } default: { MOZ_CRASH("Unexpected type for LatentOp::Compare"); } } @@ -8142,75 +8507,78 @@ void BaseCompiler::emitBranchPerform(Bra // branching out of the block or falling out at the end be sure to // pop the appropriate stacks back to where they were on entry, while // preserving the exit value. // - A continue branch in a loop is much like an exit branch, but the branch // value must not be preserved. // - The exit value is always in a designated join register (type dependent). bool BaseCompiler::emitBlock() { - if (!iter_.readBlock()) { + ResultType params; + if (!iter_.readBlock(¶ms)) { return false; } if (!deadCode_) { sync(); // Simplifies branching out from block } - initControl(controlItem()); + initControl(controlItem(), params); return true; } void BaseCompiler::endBlock(ResultType type) { Control& block = controlItem(); - // Save the value. - Maybe<AnyReg> r; - if (!deadCode_) { - r = popJoinRegUnlessVoid(type); + if (deadCode_) { + // Block does not fall through; reset stack. + fr.resetStackHeight(block.stackHeight, type); + popValueStackTo(block.stackSize); + } else { + // If the block label is used, we have a control join, so we need to shuffle + // fallthrough values into place. Otherwise if it's not a control join, we + // can leave the value stack alone. + MOZ_ASSERT(stk_.length() == block.stackSize + type.length()); + if (block.label.used()) { + popBlockResults(type, block.stackHeight, ContinuationKind::Fallthrough); + } block.bceSafeOnExit &= bceSafe_; } - // Leave the block. - fr.popStackOnBlockExit(block.stackHeight, deadCode_); - popValueStackTo(block.stackSize); - // Bind after cleanup: branches out will have popped the stack. if (block.label.used()) { masm.bind(&block.label); - // No value was provided by the fallthrough but the branch out will - // have stored one in joinReg, so capture that. if (deadCode_) { - r = captureJoinRegUnlessVoid(type); - } - deadCode_ = false; + captureResultRegisters(type); + deadCode_ = false; + } + pushBlockResults(type); } bceSafe_ = block.bceSafeOnExit; - - // Retain the value stored in joinReg by all paths, if there are any. - if (!deadCode_) { - pushJoinRegUnlessVoid(r); - } } bool BaseCompiler::emitLoop() { - if (!iter_.readLoop()) { + ResultType params; + if (!iter_.readLoop(¶ms)) { return false; } if (!deadCode_) { sync(); // Simplifies branching out from block } - initControl(controlItem()); + initControl(controlItem(), params); bceSafe_ = 0; if (!deadCode_) { + // Loop entry is a control join, so shuffle the entry parameters into the + // well-known locations. + topBlockResults(params); masm.nopAlign(CodeAlignment); masm.bind(&controlItem(0).label); if (!addInterruptCheck()) { return false; } } return true; @@ -8226,172 +8594,209 @@ bool BaseCompiler::emitLoop() { // (begin (br 1) (unreachable)) // (begin (unreachable))) // (i32.const 1)) // // The branch causes neither of the unreachable expressions to be // evaluated. bool BaseCompiler::emitIf() { + ResultType params; Nothing unused_cond; - if (!iter_.readIf(&unused_cond)) { + if (!iter_.readIf(¶ms, &unused_cond)) { return false; } BranchState b(&controlItem().otherLabel, InvertBranch(true)); if (!deadCode_) { emitBranchSetup(&b); sync(); + // Because params can flow immediately to results in the case of an empty + // "then" or "else" block, and the result of an if/then is a join in + // general, we shuffle params eagerly to the result allocations. + topBlockResults(params); } else { resetLatentOp(); } - initControl(controlItem()); + initControl(controlItem(), params); if (!deadCode_) { emitBranchPerform(&b); } return true; } -void BaseCompiler::endIfThen() { +void BaseCompiler::endIfThen(ResultType type) { Control& ifThen = controlItem(); - fr.popStackOnBlockExit(ifThen.stackHeight, deadCode_); - popValueStackTo(ifThen.stackSize); + // The parameters to the "if" logically flow to both the "then" and "else" + // blocks, but the "else" block is empty. Since we know that the "if" + // type-checks, that means that the "else" parameters are the "else" results, + // and that the "if"'s result type is the same as its parameter type. + + if (deadCode_) { + // "then" arm does not fall through; reset stack. + fr.resetStackHeight(ifThen.stackHeight, type); + popValueStackTo(ifThen.stackSize); + if (!ifThen.deadOnArrival) { + captureResultRegisters(type); + } + } else { + MOZ_ASSERT(stk_.length() == ifThen.stackSize + type.length()); + // Assume we have a control join, so place results in block result + // allocations. + popBlockResults(type, ifThen.stackHeight, ContinuationKind::Fallthrough); + MOZ_ASSERT(!ifThen.deadOnArrival); + } if (ifThen.otherLabel.used()) { masm.bind(&ifThen.otherLabel); } if (ifThen.label.used()) { masm.bind(&ifThen.label); } if (!deadCode_) { ifThen.bceSafeOnExit &= bceSafe_; } deadCode_ = ifThen.deadOnArrival; + if (!deadCode_) { + pushBlockResults(type); + } bceSafe_ = ifThen.bceSafeOnExit & ifThen.bceSafeOnEntry; } bool BaseCompiler::emitElse() { - ResultType thenType; + ResultType params, results; NothingVector unused_thenValues; - if (!iter_.readElse(&thenType, &unused_thenValues)) { + if (!iter_.readElse(¶ms, &results, &unused_thenValues)) { return false; } Control& ifThenElse = controlItem(0); // See comment in endIfThenElse, below. // Exit the "then" branch. ifThenElse.deadThenBranch = deadCode_; - Maybe<AnyReg> r; - if (!deadCode_) { - r = popJoinRegUnlessVoid(thenType); - } - - fr.popStackOnBlockExit(ifThenElse.stackHeight, deadCode_); - popValueStackTo(ifThenElse.stackSize); + if (deadCode_) { + fr.resetStackHeight(ifThenElse.stackHeight, results); + popValueStackTo(ifThenElse.stackSize); + } else { + MOZ_ASSERT(stk_.length() == ifThenElse.stackSize + results.length()); + popBlockResults(results, ifThenElse.stackHeight, ContinuationKind::Jump); + freeResultRegisters(results); + MOZ_ASSERT(!ifThenElse.deadOnArrival); + } if (!deadCode_) { masm.jump(&ifThenElse.label); } if (ifThenElse.otherLabel.used()) { masm.bind(&ifThenElse.otherLabel); } // Reset to the "else" branch. if (!deadCode_) { - freeJoinRegUnlessVoid(r); ifThenElse.bceSafeOnExit &= bceSafe_; } deadCode_ = ifThenElse.deadOnArrival; bceSafe_ = ifThenElse.bceSafeOnEntry; + fr.resetStackHeight(ifThenElse.stackHeight, params); + + if (!deadCode_) { + pushBlockResults(params); + } + return true; } void BaseCompiler::endIfThenElse(ResultType type) { Control& ifThenElse = controlItem(); // The expression type is not a reliable guide to what we'll find // on the stack, we could have (if E (i32.const 1) (unreachable)) // in which case the "else" arm is AnyType but the type of the // full expression is I32. So restore whatever's there, not what // we want to find there. The "then" arm has the same constraint. - Maybe<AnyReg> r; - if (!deadCode_) { - r = popJoinRegUnlessVoid(type); + if (deadCode_) { + // "then" arm does not fall through; reset stack. + fr.resetStackHeight(ifThenElse.stackHeight, type); + popValueStackTo(ifThenElse.stackSize); + } else { + MOZ_ASSERT(stk_.length() == ifThenElse.stackSize + type.length()); + // Assume we have a control join, so place results in block result + // allocations. + popBlockResults(type, ifThenElse.stackHeight, + ContinuationKind::Fallthrough); ifThenElse.bceSafeOnExit &= bceSafe_; - } - - fr.popStackOnBlockExit(ifThenElse.stackHeight, deadCode_); - popValueStackTo(ifThenElse.stackSize); + MOZ_ASSERT(!ifThenElse.deadOnArrival); + } if (ifThenElse.label.used()) { masm.bind(&ifThenElse.label); } bool joinLive = !ifThenElse.deadOnArrival && (!ifThenElse.deadThenBranch || !deadCode_ || ifThenElse.label.bound()); if (joinLive) { - // No value was provided by the "then" path but capture the one + // No values were provided by the "then" path, but capture the values // provided by the "else" path. if (deadCode_) { - r = captureJoinRegUnlessVoid(type); + captureResultRegisters(type); } deadCode_ = false; } bceSafe_ = ifThenElse.bceSafeOnExit; if (!deadCode_) { - pushJoinRegUnlessVoid(r); + pushBlockResults(type); } } bool BaseCompiler::emitEnd() { LabelKind kind; ResultType type; NothingVector unused_values; if (!iter_.readEnd(&kind, &type, &unused_values)) { return false; } switch (kind) { case LabelKind::Body: endBlock(type); + doReturn(ContinuationKind::Fallthrough); iter_.popEnd(); MOZ_ASSERT(iter_.controlStackEmpty()); - doReturn(PopStack(false)); return iter_.readFunctionEnd(iter_.end()); case LabelKind::Block: endBlock(type); break; case LabelKind::Loop: // The end of a loop isn't a branch target, so we can just leave its - // results on the stack to be consumed by the outer block. + // results on the expression stack to be consumed by the outer block. break; case LabelKind::Then: - endIfThen(); + endIfThen(type); break; case LabelKind::Else: endIfThenElse(type); break; } iter_.popEnd(); @@ -8408,28 +8813,26 @@ bool BaseCompiler::emitBr() { if (deadCode_) { return true; } Control& target = controlItem(relativeDepth); target.bceSafeOnExit &= bceSafe_; - // Save any value in the designated join register, where the - // normal block exit code will also leave it. - - Maybe<AnyReg> r = popJoinRegUnlessVoid(type); - - fr.popStackBeforeBranch(target.stackHeight); + // Save any values in the designated join registers, as if the target block + // returned normally. + + popBlockResults(type, target.stackHeight, ContinuationKind::Jump); masm.jump(&target.label); - // The register holding the join value is free for the remainder - // of this block. - - freeJoinRegUnlessVoid(r); + // The registers holding the join values are free for the remainder of this + // block. + + freeResultRegisters(type); deadCode_ = true; return true; } bool BaseCompiler::emitBrIf() { uint32_t relativeDepth; @@ -8454,50 +8857,51 @@ bool BaseCompiler::emitBrIf() { emitBranchPerform(&b); return true; } bool BaseCompiler::emitBrTable() { Uint32Vector depths; uint32_t defaultDepth; - ResultType type; + ResultType branchParams; NothingVector unused_values; Nothing unused_index; - // N.B., `type' gets set to the type of the default branch target. In the - // presence of subtyping, it could be that the different branch targets have - // different types. Here we rely on the assumption that the value + // N.B., `branchParams' gets set to the type of the default branch target. In + // the presence of subtyping, it could be that the different branch targets + // have different types. Here we rely on the assumption that the value // representations (e.g. Stk value types) of all branch target types are the // same, in the baseline compiler. Notably, this means that all Ref types // should be represented the same. - if (!iter_.readBrTable(&depths, &defaultDepth, &type, &unused_values, + if (!iter_.readBrTable(&depths, &defaultDepth, &branchParams, &unused_values, &unused_index)) { return false; } if (deadCode_) { return true; } - // Don't use joinReg for rc - maybeReserveJoinRegI(type); + // Don't use param registers for rc + needIntegerResultRegisters(branchParams); // Table switch value always on top. RegI32 rc = popI32(); - maybeUnreserveJoinRegI(type); - - Maybe<AnyReg> r = popJoinRegUnlessVoid(type); + freeIntegerResultRegisters(branchParams); + + StackHeight resultsBase = topBlockResults(branchParams); Label dispatchCode; masm.branch32(Assembler::Below, rc, Imm32(depths.length()), &dispatchCode); // This is the out-of-range stub. rc is dead here but we don't need it. - fr.popStackBeforeBranch(controlItem(defaultDepth).stackHeight); + shuffleStackResultsBeforeBranch( + resultsBase, controlItem(defaultDepth).stackHeight, branchParams); controlItem(defaultDepth).bceSafeOnExit &= bceSafe_; masm.jump(&controlItem(defaultDepth).label); // Emit stubs. rc is dead in all of these but we don't need it. // // The labels in the vector are in the TempAllocator and will // be freed by and by. // @@ -8507,17 +8911,18 @@ bool BaseCompiler::emitBrTable() { LabelVector stubs; if (!stubs.reserve(depths.length())) { return false; } for (uint32_t depth : depths) { stubs.infallibleEmplaceBack(NonAssertingLabel()); masm.bind(&stubs.back()); - fr.popStackBeforeBranch(controlItem(depth).stackHeight); + shuffleStackResultsBeforeBranch(resultsBase, controlItem(depth).stackHeight, + branchParams); controlItem(depth).bceSafeOnExit &= bceSafe_; masm.jump(&controlItem(depth).label); } // Emit table. Label theTable; jumpTable(stubs, &theTable); @@ -8526,17 +8931,17 @@ bool BaseCompiler::emitBrTable() { tableSwitch(&theTable, rc, &dispatchCode); deadCode_ = true; // Clean up. freeI32(rc); - freeJoinRegUnlessVoid(r); + popValueStackBy(branchParams.length()); return true; } bool BaseCompiler::emitDrop() { if (!iter_.readDrop()) { return false; } @@ -8544,76 +8949,39 @@ bool BaseCompiler::emitDrop() { if (deadCode_) { return true; } dropValue(); return true; } -void BaseCompiler::doReturn(bool popStack) { +void BaseCompiler::doReturn(ContinuationKind kind) { if (deadCode_) { return; } - Maybe<ValType> type = funcType().ret(); - if (!type) { - returnCleanup(popStack); - return; - } - switch (type.ref().code()) { - case ValType::I32: { - RegI32 rv = popI32(RegI32(ReturnReg)); - returnCleanup(popStack); - freeI32(rv); - break; - } - case ValType::I64: { - RegI64 rv = popI64(RegI64(ReturnReg64)); - returnCleanup(popStack); - freeI64(rv); - break; - } - case ValType::F64: { - RegF64 rv = popF64(RegF64(ReturnDoubleReg)); - returnCleanup(popStack); - freeF64(rv); - break; - } - case ValType::F32: { - RegF32 rv = popF32(RegF32(ReturnFloat32Reg)); - returnCleanup(popStack); - freeF32(rv); - break; - } - case ValType::Ref: - case ValType::NullRef: - case ValType::FuncRef: - case ValType::AnyRef: { - RegPtr rv = popRef(RegPtr(ReturnReg)); - returnCleanup(popStack); - freeRef(rv); - break; - } - default: { - MOZ_CRASH("Function return type"); - } - } + + StackHeight height = controlOutermost().stackHeight; + ResultType type = ResultType::Vector(funcType().results()); + popBlockResults(type, height, kind); + masm.jump(&returnLabel_); + freeResultRegisters(type); } bool BaseCompiler::emitReturn() { NothingVector unused_values; if (!iter_.readReturn(&unused_values)) { return false; } if (deadCode_) { return true; } - doReturn(PopStack(true)); + doReturn(ContinuationKind::Jump); deadCode_ = true; return true; } bool BaseCompiler::emitCallArgs(const ValTypeVector& argTypes, FunctionCall* baselineCall) { MOZ_ASSERT(!deadCode_); @@ -10830,17 +11198,17 @@ bool BaseCompiler::emitStructNarrow() { bool BaseCompiler::emitBody() { MOZ_ASSERT(stackMapGenerator_.framePushedAtEntryToBody.isSome()); if (!iter_.readFunctionStart(func_.index)) { return false; } - initControl(controlItem()); + initControl(controlItem(), ResultType::Empty()); uint32_t overhead = 0; for (;;) { Nothing unused_a, unused_b; #ifdef DEBUG performRegisterLeakCheck(); @@ -11889,21 +12257,16 @@ BaseCompiler::BaseCompiler(const ModuleE latentType_(ValType::I32), latentIntCmp_(Assembler::Equal), latentDoubleCmp_(Assembler::DoubleEqual), masm(*masm), ra(*this), fr(*masm), stackMapGenerator_(stackMaps, trapExitLayout, trapExitLayoutNumWords, *masm), - joinRegI32_(RegI32(ReturnReg)), - joinRegI64_(RegI64(ReturnReg64)), - joinRegPtr_(RegPtr(ReturnReg)), - joinRegF32_(RegF32(ReturnFloat32Reg)), - joinRegF64_(RegF64(ReturnDoubleReg)), stkSource_(stkSource) { // Our caller, BaselineCompileFunctions, will lend us the vector contents to // use for the eval stack. To get hold of those contents, we'll temporarily // installing an empty one in its place. MOZ_ASSERT(stk_.empty()); stk_.swap(stkSource_); // Assuming that previously processed wasm functions are well formed, the
--- a/js/src/wasm/WasmIonCompile.cpp +++ b/js/src/wasm/WasmIonCompile.cpp @@ -1759,54 +1759,58 @@ static bool EmitF64Const(FunctionCompile return false; } f.iter().setResult(f.constant(f64)); return true; } static bool EmitBlock(FunctionCompiler& f) { - return f.iter().readBlock() && f.startBlock(); + ResultType params; + return f.iter().readBlock(¶ms) && f.startBlock(); } static bool EmitLoop(FunctionCompiler& f) { - if (!f.iter().readLoop()) { + ResultType params; + if (!f.iter().readLoop(¶ms)) { return false; } MBasicBlock* loopHeader; if (!f.startLoop(&loopHeader)) { return false; } f.addInterruptCheck(); f.iter().controlItem() = loopHeader; return true; } static bool EmitIf(FunctionCompiler& f) { + ResultType params; MDefinition* condition = nullptr; - if (!f.iter().readIf(&condition)) { + if (!f.iter().readIf(¶ms, &condition)) { return false; } MBasicBlock* elseBlock; if (!f.branchAndStartThen(condition, &elseBlock)) { return false; } f.iter().controlItem() = elseBlock; return true; } static bool EmitElse(FunctionCompiler& f) { - ResultType thenType; + ResultType paramType; + ResultType resultType; DefVector thenValues; - if (!f.iter().readElse(&thenType, &thenValues)) { + if (!f.iter().readElse(¶mType, &resultType, &thenValues)) { return false; } if (!f.pushDefs(thenValues)) { return false; } if (!f.switchToElse(f.iter().controlItem(), &f.iter().controlItem())) {
--- a/js/src/wasm/WasmOpIter.h +++ b/js/src/wasm/WasmOpIter.h @@ -715,20 +715,21 @@ class MOZ_STACK_CLASS OpIter : private P // ------------------------------------------------------------------------ // Decoding and validation interface. MOZ_MUST_USE bool readOp(OpBytes* op); MOZ_MUST_USE bool readFunctionStart(uint32_t funcIndex); MOZ_MUST_USE bool readFunctionEnd(const uint8_t* bodyEnd); MOZ_MUST_USE bool readReturn(ValueVector* values); - MOZ_MUST_USE bool readBlock(); - MOZ_MUST_USE bool readLoop(); - MOZ_MUST_USE bool readIf(Value* condition); - MOZ_MUST_USE bool readElse(ResultType* thenType, ValueVector* thenValues); + MOZ_MUST_USE bool readBlock(ResultType* paramType); + MOZ_MUST_USE bool readLoop(ResultType* paramType); + MOZ_MUST_USE bool readIf(ResultType* paramType, Value* condition); + MOZ_MUST_USE bool readElse(ResultType* paramType, ResultType* resultType, + ValueVector* thenValues); MOZ_MUST_USE bool readEnd(LabelKind* kind, ResultType* type, ValueVector* values); void popEnd(); MOZ_MUST_USE bool readBr(uint32_t* relativeDepth, ResultType* type, ValueVector* values); MOZ_MUST_USE bool readBrIf(uint32_t* relativeDepth, ResultType* type, ValueVector* values, Value* condition); MOZ_MUST_USE bool readBrTable(Uint32Vector* depths, uint32_t* defaultDepth, @@ -1257,71 +1258,76 @@ inline bool OpIter<Policy>::readReturn(V return false; } afterUnconditionalBranch(); return true; } template <typename Policy> -inline bool OpIter<Policy>::readBlock() { +inline bool OpIter<Policy>::readBlock(ResultType* paramType) { MOZ_ASSERT(Classify(op_) == OpKind::Block); BlockType type; if (!readBlockType(&type)) { return false; } + *paramType = type.params(); return pushControl(LabelKind::Block, type); } template <typename Policy> -inline bool OpIter<Policy>::readLoop() { +inline bool OpIter<Policy>::readLoop(ResultType* paramType) { MOZ_ASSERT(Classify(op_) == OpKind::Loop); BlockType type; if (!readBlockType(&type)) { return false; } + *paramType = type.params(); return pushControl(LabelKind::Loop, type); } template <typename Policy> -inline bool OpIter<Policy>::readIf(Value* condition) { +inline bool OpIter<Policy>::readIf(ResultType* paramType, Value* condition) { MOZ_ASSERT(Classify(op_) == OpKind::If); BlockType type; if (!readBlockType(&type)) { return false; } if (!popWithType(ValType::I32, condition)) { return false; } if (!pushControl(LabelKind::Then, type)) { return false; } + *paramType = type.params(); size_t paramsLength = type.params().length(); return thenParamStack_.append(valueStack_.end() - paramsLength, paramsLength); } template <typename Policy> -inline bool OpIter<Policy>::readElse(ResultType* thenType, +inline bool OpIter<Policy>::readElse(ResultType* paramType, + ResultType* resultType, ValueVector* values) { MOZ_ASSERT(Classify(op_) == OpKind::Else); Control& block = controlStack_.back(); if (block.kind() != LabelKind::Then) { return fail("else can only be used within an if"); } - if (!checkStackAtEndOfBlock(thenType, values)) { + *paramType = block.type().params(); + if (!checkStackAtEndOfBlock(resultType, values)) { return false; } // Restore to the entry state of the then block. Since the then block may // clobbered any value in the block's params, we must restore from a // snapshot. valueStack_.shrinkTo(block.valueStackBase()); size_t thenParamsLength = block.type().params().length(); @@ -1342,17 +1348,18 @@ inline bool OpIter<Policy>::readEnd(Labe if (!checkStackAtEndOfBlock(type, values)) { return false; } Control& block = controlStack_.back(); // If an `if` block ends with `end` instead of `else`, then we must // additionally validate that the then-block doesn't push anything. - if (block.kind() == LabelKind::Then && !block.resultType().empty()) { + if (block.kind() == LabelKind::Then && + block.type().params() != block.type().results()) { return fail("if without else with a result value"); } *kind = block.kind(); return true; } template <typename Policy>
--- a/js/src/wasm/WasmStubs.cpp +++ b/js/src/wasm/WasmStubs.cpp @@ -34,16 +34,103 @@ using namespace js::jit; using namespace js::wasm; using mozilla::ArrayLength; typedef Vector<jit::MIRType, 8, SystemAllocPolicy> MIRTypeVector; typedef jit::ABIArgIter<MIRTypeVector> ABIArgMIRTypeIter; typedef jit::ABIArgIter<ValTypeVector> ABIArgValTypeIter; +/*****************************************************************************/ +// ABIResultIter implementation + +static uint32_t ResultStackSize(ValType type) { + switch (type.code()) { + case ValType::I32: + return ABIResult::StackSizeOfInt32; + case ValType::I64: + return ABIResult::StackSizeOfInt64; + case ValType::F32: + return ABIResult::StackSizeOfFloat; + case ValType::F64: + return ABIResult::StackSizeOfDouble; + case ValType::Ref: + case ValType::FuncRef: + case ValType::AnyRef: + return ABIResult::StackSizeOfPtr; + case ValType::NullRef: + default: + MOZ_CRASH("Unexpected result type"); + } +} + +uint32_t ABIResult::size() const { return ResultStackSize(type()); } + +void ABIResultIter::settleRegister(ValType type) { + MOZ_ASSERT(!done()); + MOZ_ASSERT(index() < RegisterResultCount); + static_assert(RegisterResultCount == 1, "expected a single register result"); + + switch (type.code()) { + case ValType::I32: + cur_ = ABIResult(type, ReturnReg); + break; + case ValType::I64: + cur_ = ABIResult(type, ReturnReg64); + break; + case ValType::F32: + cur_ = ABIResult(type, ReturnFloat32Reg); + break; + case ValType::F64: + cur_ = ABIResult(type, ReturnDoubleReg); + break; + case ValType::Ref: + case ValType::FuncRef: + case ValType::AnyRef: + cur_ = ABIResult(type, ReturnReg); + break; + case ValType::NullRef: + default: + MOZ_CRASH("Unexpected result type"); + } +} + +void ABIResultIter::settleNext() { + MOZ_ASSERT(direction_ == Next); + MOZ_ASSERT(!done()); + + uint32_t typeIndex = count_ - index_ - 1; + ValType type = type_[typeIndex]; + + if (index_ < RegisterResultCount) { + settleRegister(type); + return; + } + + cur_ = ABIResult(type, nextStackOffset_); + nextStackOffset_ += ResultStackSize(type); +} + +void ABIResultIter::settlePrev() { + MOZ_ASSERT(direction_ == Prev); + MOZ_ASSERT(!done()); + uint32_t typeIndex = index_; + ValType type = type_[typeIndex]; + + if (count_ - index_ - 1 < RegisterResultCount) { + settleRegister(type); + return; + } + + uint32_t size = ResultStackSize(type); + MOZ_ASSERT(nextStackOffset_ >= size); + nextStackOffset_ -= size; + cur_ = ABIResult(type, nextStackOffset_); +} + #ifdef WASM_CODEGEN_DEBUG template <class Closure> static void GenPrint(DebugChannel channel, MacroAssembler& masm, const Maybe<Register>& taken, Closure passArgAndCall) { if (!IsCodegenDebugEnabled(channel)) { return; }
--- a/js/src/wasm/WasmStubs.h +++ b/js/src/wasm/WasmStubs.h @@ -15,20 +15,237 @@ * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef wasm_stubs_h #define wasm_stubs_h #include "wasm/WasmGenerator.h" +#include "wasm/WasmOpIter.h" namespace js { namespace wasm { +// ValType and location for a single result: either in a register or on the +// stack. + +class ABIResult { + ValType type_; + enum class Location { Gpr, Gpr64, Fpr, Stack } loc_; + union { + Register gpr_; + Register64 gpr64_; + FloatRegister fpr_; + uint32_t stackOffset_; + }; + + void validate() { +#ifdef DEBUG + if (onStack()) { + return; + } + MOZ_ASSERT(inRegister()); + switch (type_.code()) { + case ValType::I32: + MOZ_ASSERT(loc_ == Location::Gpr); + break; + case ValType::I64: + MOZ_ASSERT(loc_ == Location::Gpr64); + break; + case ValType::F32: + case ValType::F64: + MOZ_ASSERT(loc_ == Location::Fpr); + break; + case ValType::AnyRef: + case ValType::FuncRef: + case ValType::Ref: + MOZ_ASSERT(loc_ == Location::Gpr); + break; + default: + MOZ_CRASH("bad value type"); + } +#endif + } + + friend class ABIResultIter; + ABIResult(){}; + + public: + // Sizes of items in the stack area. + // + // The size values come from the implementations of Push() in + // MacroAssembler-x86-shared.cpp and MacroAssembler-arm-shared.cpp, and from + // VFPRegister::size() in Architecture-arm.h. + // + // On ARM unlike on x86 we push a single for float. + + static constexpr size_t StackSizeOfPtr = sizeof(intptr_t); + static constexpr size_t StackSizeOfInt32 = StackSizeOfPtr; + static constexpr size_t StackSizeOfInt64 = sizeof(int64_t); +#if defined(JS_CODEGEN_ARM) || defined(JS_CODEGEN_MIPS32) + static constexpr size_t StackSizeOfFloat = sizeof(float); +#else + static constexpr size_t StackSizeOfFloat = sizeof(double); +#endif + static constexpr size_t StackSizeOfDouble = sizeof(double); + + ABIResult(ValType type, Register gpr) + : type_(type), loc_(Location::Gpr), gpr_(gpr) { + validate(); + } + ABIResult(ValType type, Register64 gpr64) + : type_(type), loc_(Location::Gpr64), gpr64_(gpr64) { + validate(); + } + ABIResult(ValType type, FloatRegister fpr) + : type_(type), loc_(Location::Fpr), fpr_(fpr) { + validate(); + } + ABIResult(ValType type, uint32_t stackOffset) + : type_(type), loc_(Location::Stack), stackOffset_(stackOffset) { + validate(); + } + + ValType type() const { return type_; } + bool onStack() const { return loc_ == Location::Stack; } + bool inRegister() const { return !onStack(); } + Register gpr() const { + MOZ_ASSERT(loc_ == Location::Gpr); + return gpr_; + } + Register64 gpr64() const { + MOZ_ASSERT(loc_ == Location::Gpr64); + return gpr64_; + } + FloatRegister fpr() const { + MOZ_ASSERT(loc_ == Location::Fpr); + return fpr_; + } + // Offset from SP. + uint32_t stackOffset() const { + MOZ_ASSERT(loc_ == Location::Stack); + return stackOffset_; + } + uint32_t size() const; +}; + +// Just as WebAssembly functions can take multiple arguments, they can also +// return multiple results. As with a call, a limited number of results will be +// located in registers, and the rest will be stored in a stack area. The +// |ABIResultIter| computes result locations, given a |ResultType|. +// +// Recall that a |ResultType| represents a sequence of value types t1..tN, +// indexed from 1 to N. In principle it doesn't matter how we decide which +// results get to be in registers and which go to the stack. To better +// harmonize with WebAssembly's abstract stack machine, whose properties are +// taken advantage of by the baseline compiler, our strategy is to start +// allocating result locations in "reverse" order: from result N down to 1. +// +// If a result with index I is in a register, then all results with index J > I +// are also in registers. If a result I is on the stack, then all results with +// index K < I are also on the stack, farther away from the stack pointer than +// result I. +// +// Currently only a single result is ever stored in a register, though this may +// change in the future on register-rich platforms. +// +// NB: The baseline compiler also uses thie ABI for locations of block +// parameters and return values, within individual WebAssembly functions. + +class ABIResultIter { + ResultType type_; + uint32_t count_; + uint32_t index_; + uint32_t nextStackOffset_; + enum { Next, Prev } direction_; + ABIResult cur_; + + void settleRegister(ValType type); + void settleNext(); + void settlePrev(); + + static constexpr size_t RegisterResultCount = 1; + + public: + explicit ABIResultIter(const ResultType& type) + : type_(type), count_(type.length()) { + reset(); + } + + void reset() { + index_ = nextStackOffset_ = 0; + direction_ = Next; + if (!done()) { + settleNext(); + } + } + bool done() const { return index_ == count_; } + uint32_t index() const { return index_; } + uint32_t count() const { return count_; } + uint32_t remaining() const { return count_ - index_; } + void switchToNext() { + MOZ_ASSERT(direction_ == Prev); + if (!done() && cur().onStack()) { + nextStackOffset_ += cur().size(); + } + index_ = count_ - index_; + direction_ = Next; + if (!done()) { + settleNext(); + } + } + void switchToPrev() { + MOZ_ASSERT(direction_ == Next); + if (!done() && cur().onStack()) { + nextStackOffset_ -= cur().size(); + } + index_ = count_ - index_; + direction_ = Prev; + if (!done()) settlePrev(); + } + void next() { + MOZ_ASSERT(direction_ == Next); + MOZ_ASSERT(!done()); + index_++; + if (!done()) { + settleNext(); + } + } + void prev() { + MOZ_ASSERT(direction_ == Prev); + MOZ_ASSERT(!done()); + index_++; + if (!done()) { + settlePrev(); + } + } + const ABIResult& cur() const { + MOZ_ASSERT(!done()); + return cur_; + } + + uint32_t stackBytesConsumedSoFar() const { return nextStackOffset_; } + + static inline bool HasStackResults(const ResultType& type) { + return type.length() > RegisterResultCount; + } + + static uint32_t MeasureStackBytes(const ResultType& type) { + if (!HasStackResults(type)) { + return 0; + } + ABIResultIter iter(type); + while (!iter.done()) { + iter.next(); + } + return iter.stackBytesConsumedSoFar(); + } +}; + extern bool GenerateBuiltinThunk(jit::MacroAssembler& masm, jit::ABIFunctionType abiType, ExitReason exitReason, void* funcPtr, CallableOffsets* offsets); extern bool GenerateImportFunctions(const ModuleEnvironment& env, const FuncImportVector& imports, CompiledCode* code);
--- a/js/src/wasm/WasmValidate.cpp +++ b/js/src/wasm/WasmValidate.cpp @@ -573,23 +573,23 @@ static bool DecodeFunctionBodyExprs(cons if (!env.refTypesEnabled()) { return iter.unrecognizedOpcode(&op); } StackType unused; CHECK(iter.readSelect(/*typed*/ true, &unused, ¬hing, ¬hing, ¬hing)); } case uint16_t(Op::Block): - CHECK(iter.readBlock()); + CHECK(iter.readBlock(&unusedType)); case uint16_t(Op::Loop): - CHECK(iter.readLoop()); + CHECK(iter.readLoop(&unusedType)); case uint16_t(Op::If): - CHECK(iter.readIf(¬hing)); + CHECK(iter.readIf(&unusedType, ¬hing)); case uint16_t(Op::Else): - CHECK(iter.readElse(&unusedType, ¬hings)); + CHECK(iter.readElse(&unusedType, &unusedType, ¬hings)); case uint16_t(Op::I32Clz): case uint16_t(Op::I32Ctz): case uint16_t(Op::I32Popcnt): CHECK(iter.readUnary(ValType::I32, ¬hing)); case uint16_t(Op::I64Clz): case uint16_t(Op::I64Ctz): case uint16_t(Op::I64Popcnt): CHECK(iter.readUnary(ValType::I64, ¬hing));