Merge autoland to mozilla-central. a=merge
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
* vim: set ts=8 sts=2 et sw=2 tw=80:
*
* Copyright 2016 Mozilla Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* [SMDOC] WebAssembly baseline compiler (RabaldrMonkey)
*
* General assumptions for 32-bit vs 64-bit code:
*
* - A 32-bit register can be extended in-place to a 64-bit register on 64-bit
* systems.
*
* - Code that knows that Register64 has a '.reg' member on 64-bit systems and
* '.high' and '.low' members on 32-bit systems, or knows the implications
* thereof, is #ifdef JS_PUNBOX64. All other code is #if(n)?def JS_64BIT.
*
*
* Coding standards:
*
* - In "small" code generating functions (eg emitMultiplyF64, emitQuotientI32,
* and surrounding functions; most functions fall into this class) where the
* meaning is obvious:
*
* - if there is a single source + destination register, it is called 'r'
* - if there is one source and a different destination, they are called 'rs'
* and 'rd'
* - if there is one source + destination register and another source register
* they are called 'r' and 'rs'
* - if there are two source registers and a destination register they are
* called 'rs0', 'rs1', and 'rd'.
*
* - Generic temp registers are named /temp[0-9]?/ not /tmp[0-9]?/.
*
* - Registers can be named non-generically for their function ('rp' for the
* 'pointer' register and 'rv' for the 'value' register are typical) and those
* names may or may not have an 'r' prefix.
*
* - "Larger" code generating functions make their own rules.
*
*
* General status notes:
*
* "FIXME" indicates a known or suspected bug. Always has a bug#.
*
* "TODO" indicates an opportunity for a general improvement, with an additional
* tag to indicate the area of improvement. Usually has a bug#.
*
* There are lots of machine dependencies here but they are pretty well isolated
* to a segment of the compiler. Many dependencies will eventually be factored
* into the MacroAssembler layer and shared with other code generators.
*
*
* High-value compiler performance improvements:
*
* - (Bug 1316802) The specific-register allocator (the needI32(r), needI64(r)
* etc methods) can avoid syncing the value stack if the specific register is
* in use but there is a free register to shuffle the specific register into.
* (This will also improve the generated code.) The sync happens often enough
* here to show up in profiles, because it is triggered by integer multiply
* and divide.
*
*
* High-value code generation improvements:
*
* - (Bug 1316804) brTable pessimizes by always dispatching to code that pops
* the stack and then jumps to the code for the target case. If no cleanup is
* needed we could just branch conditionally to the target; if the same amount
* of cleanup is needed for all cases then the cleanup can be done before the
* dispatch. Both are highly likely.
*
* - (Bug 1316806) Register management around calls: At the moment we sync the
* value stack unconditionally (this is simple) but there are probably many
* common cases where we could instead save/restore live caller-saves
* registers and perform parallel assignment into argument registers. This
* may be important if we keep some locals in registers.
*
* - (Bug 1316808) Allocate some locals to registers on machines where there are
* enough registers. This is probably hard to do well in a one-pass compiler
* but it might be that just keeping register arguments and the first few
* locals in registers is a viable strategy; another (more general) strategy
* is caching locals in registers in straight-line code. Such caching could
* also track constant values in registers, if that is deemed valuable. A
* combination of techniques may be desirable: parameters and the first few
* locals could be cached on entry to the function but not statically assigned
* to registers throughout.
*
* (On a large corpus of code it should be possible to compute, for every
* signature comprising the types of parameters and locals, and using a static
* weight for loops, a list in priority order of which parameters and locals
* that should be assigned to registers. Or something like that. Wasm makes
* this simple. Static assignments are desirable because they are not flushed
* to memory by the pre-block sync() call.)
*/
#include "wasm/WasmBaselineCompile.h"
#include "mozilla/MathAlgorithms.h"
#include "mozilla/Maybe.h"
#include <algorithm>
#include <utility>
#include "jit/AtomicOp.h"
#include "jit/IonTypes.h"
#include "jit/JitAllocPolicy.h"
#include "jit/Label.h"
#include "jit/MIR.h"
#include "jit/RegisterAllocator.h"
#include "jit/Registers.h"
#include "jit/RegisterSets.h"
#if defined(JS_CODEGEN_ARM)
# include "jit/arm/Assembler-arm.h"
#endif
#if defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_X86)
# include "jit/x86-shared/Architecture-x86-shared.h"
# include "jit/x86-shared/Assembler-x86-shared.h"
#endif
#if defined(JS_CODEGEN_MIPS32)
# include "jit/mips-shared/Assembler-mips-shared.h"
# include "jit/mips32/Assembler-mips32.h"
#endif
#if defined(JS_CODEGEN_MIPS64)
# include "jit/mips-shared/Assembler-mips-shared.h"
# include "jit/mips64/Assembler-mips64.h"
#endif
#include "js/ScalarType.h" // js::Scalar::Type
#include "util/Memory.h"
#include "wasm/WasmGC.h"
#include "wasm/WasmGenerator.h"
#include "wasm/WasmInstance.h"
#include "wasm/WasmOpIter.h"
#include "wasm/WasmSignalHandlers.h"
#include "wasm/WasmStubs.h"
#include "wasm/WasmValidate.h"
#include "jit/MacroAssembler-inl.h"
using mozilla::DebugOnly;
using mozilla::FloorLog2;
using mozilla::IsPowerOfTwo;
using mozilla::Maybe;
namespace js {
namespace wasm {
using namespace js::jit;
using HandleNaNSpecially = bool;
using InvertBranch = bool;
using IsKnownNotZero = bool;
using IsUnsigned = bool;
using NeedsBoundsCheck = bool;
using WantResult = bool;
using ZeroOnOverflow = bool;
class BaseStackFrame;
// Two flags, useABI and interModule, control how calls are made.
//
// UseABI::Wasm implies that the Tls/Heap/Global registers are nonvolatile,
// except when InterModule::True is also set, when they are volatile.
//
// UseABI::Builtin implies that the Tls/Heap/Global registers are volatile.
// In this case, we require InterModule::False. The calling convention
// is otherwise like UseABI::Wasm.
//
// UseABI::System implies that the Tls/Heap/Global registers are volatile.
// Additionally, the parameter passing mechanism may be slightly different from
// the UseABI::Wasm convention.
//
// When the Tls/Heap/Global registers are not volatile, the baseline compiler
// will restore the Tls register from its save slot before the call, since the
// baseline compiler uses the Tls register for other things.
//
// When those registers are volatile, the baseline compiler will reload them
// after the call (it will restore the Tls register from the save slot and load
// the other two from the Tls data).
enum class UseABI { Wasm, Builtin, System };
enum class InterModule { False = false, True = true };
enum class RhsDestOp { True = true };
#if defined(JS_CODEGEN_NONE)
# define RABALDR_SCRATCH_I32
# define RABALDR_SCRATCH_F32
# define RABALDR_SCRATCH_F64
static constexpr Register RabaldrScratchI32 = Register::Invalid();
static constexpr FloatRegister RabaldrScratchF32 = InvalidFloatReg;
static constexpr FloatRegister RabaldrScratchF64 = InvalidFloatReg;
#endif
#ifdef JS_CODEGEN_ARM64
# define RABALDR_CHUNKY_STACK
# define RABALDR_SIDEALLOC_V128
# define RABALDR_SCRATCH_I32
# define RABALDR_SCRATCH_F32
# define RABALDR_SCRATCH_F64
# define RABALDR_SCRATCH_V128
# define RABALDR_SCRATCH_F32_ALIASES_F64
static constexpr Register RabaldrScratchI32{Registers::x15};
// Note, the float scratch regs cannot be registers that are used for parameter
// passing in any ABI we use. Argregs tend to be low-numbered; register 30
// should be safe.
static constexpr FloatRegister RabaldrScratchF32{FloatRegisters::s30,
FloatRegisters::Single};
static constexpr FloatRegister RabaldrScratchF64{FloatRegisters::d30,
FloatRegisters::Double};
# ifdef ENABLE_WASM_SIMD
static constexpr FloatRegister RabaldrScratchV128{FloatRegisters::d30,
FloatRegisters::Simd128};
# endif
static_assert(RabaldrScratchF32 != ScratchFloat32Reg, "Too busy");
static_assert(RabaldrScratchF64 != ScratchDoubleReg, "Too busy");
# ifdef ENABLE_WASM_SIMD
static_assert(RabaldrScratchV128 != ScratchSimd128Reg, "Too busy");
# endif
#endif
#ifdef JS_CODEGEN_X86
// The selection of EBX here steps gingerly around: the need for EDX
// to be allocatable for multiply/divide; ECX to be allocatable for
// shift/rotate; EAX (= ReturnReg) to be allocatable as the result
// register; EBX not being one of the WasmTableCall registers; and
// needing a temp register for load/store that has a single-byte
// persona.
//
// The compiler assumes that RabaldrScratchI32 has a single-byte
// persona. Code for 8-byte atomic operations assumes that
// RabaldrScratchI32 is in fact ebx.
# define RABALDR_SCRATCH_I32
static constexpr Register RabaldrScratchI32 = ebx;
# define RABALDR_INT_DIV_I64_CALLOUT
#endif
#ifdef JS_CODEGEN_ARM
// We use our own scratch register, because the macro assembler uses
// the regular scratch register(s) pretty liberally. We could
// work around that in several cases but the mess does not seem
// worth it yet. CallTempReg2 seems safe.
# define RABALDR_SCRATCH_I32
static constexpr Register RabaldrScratchI32 = CallTempReg2;
# define RABALDR_INT_DIV_I64_CALLOUT
# define RABALDR_I64_TO_FLOAT_CALLOUT
# define RABALDR_FLOAT_TO_I64_CALLOUT
#endif
#ifdef JS_CODEGEN_MIPS32
# define RABALDR_SCRATCH_I32
static constexpr Register RabaldrScratchI32 = CallTempReg2;
# define RABALDR_INT_DIV_I64_CALLOUT
# define RABALDR_I64_TO_FLOAT_CALLOUT
# define RABALDR_FLOAT_TO_I64_CALLOUT
#endif
#ifdef JS_CODEGEN_MIPS64
# define RABALDR_SCRATCH_I32
static constexpr Register RabaldrScratchI32 = CallTempReg2;
#endif
#ifdef RABALDR_SCRATCH_F32_ALIASES_F64
# if !defined(RABALDR_SCRATCH_F32) || !defined(RABALDR_SCRATCH_F64)
# error "Bad configuration"
# endif
#endif
template <MIRType t>
struct RegTypeOf {
#ifdef ENABLE_WASM_SIMD
static_assert(t == MIRType::Float32 || t == MIRType::Double ||
t == MIRType::Simd128,
"Float mask type");
#else
static_assert(t == MIRType::Float32 || t == MIRType::Double,
"Float mask type");
#endif
};
template <>
struct RegTypeOf<MIRType::Float32> {
static constexpr RegTypeName value = RegTypeName::Float32;
};
template <>
struct RegTypeOf<MIRType::Double> {
static constexpr RegTypeName value = RegTypeName::Float64;
};
#ifdef ENABLE_WASM_SIMD
template <>
struct RegTypeOf<MIRType::Simd128> {
static constexpr RegTypeName value = RegTypeName::Vector128;
};
#endif
// The strongly typed register wrappers are especially useful to distinguish
// float registers from double registers, but they also clearly distinguish
// 32-bit registers from 64-bit register pairs on 32-bit systems.
struct RegI32 : public Register {
RegI32() : Register(Register::Invalid()) {}
explicit RegI32(Register reg) : Register(reg) {
MOZ_ASSERT(reg != Invalid());
}
bool isInvalid() const { return *this == Invalid(); }
bool isValid() const { return !isInvalid(); }
static RegI32 Invalid() { return RegI32(); }
};
struct RegI64 : public Register64 {
RegI64() : Register64(Register64::Invalid()) {}
explicit RegI64(Register64 reg) : Register64(reg) {
MOZ_ASSERT(reg != Invalid());
}
bool isInvalid() const { return *this == Invalid(); }
bool isValid() const { return !isInvalid(); }
static RegI64 Invalid() { return RegI64(); }
};
struct RegPtr : public Register {
RegPtr() : Register(Register::Invalid()) {}
explicit RegPtr(Register reg) : Register(reg) {
MOZ_ASSERT(reg != Invalid());
}
bool isInvalid() const { return *this == Invalid(); }
bool isValid() const { return !isInvalid(); }
static RegPtr Invalid() { return RegPtr(); }
};
struct RegF32 : public FloatRegister {
RegF32() : FloatRegister() {}
explicit RegF32(FloatRegister reg) : FloatRegister(reg) {
MOZ_ASSERT(isSingle());
}
bool isValid() const { return !isInvalid(); }
static RegF32 Invalid() { return RegF32(); }
};
struct RegF64 : public FloatRegister {
RegF64() : FloatRegister() {}
explicit RegF64(FloatRegister reg) : FloatRegister(reg) {
MOZ_ASSERT(isDouble());
}
bool isValid() const { return !isInvalid(); }
static RegF64 Invalid() { return RegF64(); }
};
#ifdef ENABLE_WASM_SIMD
# ifdef RABALDR_SIDEALLOC_V128
class RegV128 {
// fpr_ is either invalid or a double that aliases the simd register, see
// comments below at BaseRegAlloc.
FloatRegister fpr_;
public:
RegV128() : fpr_(FloatRegister()) {}
explicit RegV128(FloatRegister reg)
: fpr_(FloatRegister(reg.encoding(), FloatRegisters::Double)) {
MOZ_ASSERT(reg.isSimd128());
}
static RegV128 fromDouble(FloatRegister reg) {
MOZ_ASSERT(reg.isDouble());
return RegV128(FloatRegister(reg.encoding(), FloatRegisters::Simd128));
}
FloatRegister asDouble() const { return fpr_; }
bool isInvalid() const { return fpr_.isInvalid(); }
bool isValid() const { return !isInvalid(); }
static RegV128 Invalid() { return RegV128(); }
operator FloatRegister() const {
return FloatRegister(fpr_.encoding(), FloatRegisters::Simd128);
}
bool operator==(const RegV128& that) const {
return asDouble() == that.asDouble();
}
bool operator!=(const RegV128& that) const {
return asDouble() != that.asDouble();
}
};
# else
struct RegV128 : public FloatRegister {
RegV128() : FloatRegister() {}
explicit RegV128(FloatRegister reg) : FloatRegister(reg) {
MOZ_ASSERT(isSimd128());
}
bool isValid() const { return !isInvalid(); }
static RegV128 Invalid() { return RegV128(); }
};
# endif
#endif
struct AnyReg {
union {
RegI32 i32_;
RegI64 i64_;
RegPtr ref_;
RegF32 f32_;
RegF64 f64_;
#ifdef ENABLE_WASM_SIMD
RegV128 v128_;
#endif
};
enum {
I32,
I64,
REF,
F32,
F64,
#ifdef ENABLE_WASM_SIMD
V128
#endif
} tag;
explicit AnyReg(RegI32 r) {
tag = I32;
i32_ = r;
}
explicit AnyReg(RegI64 r) {
tag = I64;
i64_ = r;
}
explicit AnyReg(RegF32 r) {
tag = F32;
f32_ = r;
}
explicit AnyReg(RegF64 r) {
tag = F64;
f64_ = r;
}
#ifdef ENABLE_WASM_SIMD
explicit AnyReg(RegV128 r) {
tag = V128;
v128_ = r;
}
#endif
explicit AnyReg(RegPtr r) {
tag = REF;
ref_ = r;
}
RegI32 i32() const {
MOZ_ASSERT(tag == I32);
return i32_;
}
RegI64 i64() const {
MOZ_ASSERT(tag == I64);
return i64_;
}
RegF32 f32() const {
MOZ_ASSERT(tag == F32);
return f32_;
}
RegF64 f64() const {
MOZ_ASSERT(tag == F64);
return f64_;
}
#ifdef ENABLE_WASM_SIMD
RegV128 v128() const {
MOZ_ASSERT(tag == V128);
return v128_;
}
#endif
RegPtr ref() const {
MOZ_ASSERT(tag == REF);
return ref_;
}
AnyRegister any() const {
switch (tag) {
case F32:
return AnyRegister(f32_);
case F64:
return AnyRegister(f64_);
#ifdef ENABLE_WASM_SIMD
case V128:
return AnyRegister(v128_);
#endif
case I32:
return AnyRegister(i32_);
case I64:
#ifdef JS_PUNBOX64
return AnyRegister(i64_.reg);
#else
// The compiler is written so that this is never needed: any() is
// called on arbitrary registers for asm.js but asm.js does not have
// 64-bit ints. For wasm, any() is called on arbitrary registers
// only on 64-bit platforms.
MOZ_CRASH("AnyReg::any() on 32-bit platform");
#endif
case REF:
MOZ_CRASH("AnyReg::any() not implemented for ref types");
default:
MOZ_CRASH();
}
// Work around GCC 5 analysis/warning bug.
MOZ_CRASH("AnyReg::any(): impossible case");
}
};
// Platform-specific registers.
//
// All platforms must define struct SpecificRegs. All 32-bit platforms must
// have an abiReturnRegI64 member in that struct.
#if defined(JS_CODEGEN_X64)
struct SpecificRegs {
RegI32 eax, ecx, edx, edi, esi;
RegI64 rax, rcx, rdx;
SpecificRegs()
: eax(RegI32(js::jit::eax)),
ecx(RegI32(js::jit::ecx)),
edx(RegI32(js::jit::edx)),
edi(RegI32(js::jit::edi)),
esi(RegI32(js::jit::esi)),
rax(RegI64(Register64(js::jit::rax))),
rcx(RegI64(Register64(js::jit::rcx))),
rdx(RegI64(Register64(js::jit::rdx))) {}
};
#elif defined(JS_CODEGEN_X86)
struct SpecificRegs {
RegI32 eax, ecx, edx, edi, esi;
RegI64 ecx_ebx, edx_eax, abiReturnRegI64;
SpecificRegs()
: eax(RegI32(js::jit::eax)),
ecx(RegI32(js::jit::ecx)),
edx(RegI32(js::jit::edx)),
edi(RegI32(js::jit::edi)),
esi(RegI32(js::jit::esi)),
ecx_ebx(RegI64(Register64(js::jit::ecx, js::jit::ebx))),
edx_eax(RegI64(Register64(js::jit::edx, js::jit::eax))),
abiReturnRegI64(edx_eax) {}
};
#elif defined(JS_CODEGEN_ARM)
struct SpecificRegs {
RegI64 abiReturnRegI64;
SpecificRegs() : abiReturnRegI64(ReturnReg64) {}
};
#elif defined(JS_CODEGEN_ARM64)
struct SpecificRegs {};
#elif defined(JS_CODEGEN_MIPS32)
struct SpecificRegs {
RegI64 abiReturnRegI64;
SpecificRegs() : abiReturnRegI64(ReturnReg64) {}
};
#elif defined(JS_CODEGEN_MIPS64)
struct SpecificRegs {};
#else
struct SpecificRegs {
# ifndef JS_64BIT
RegI64 abiReturnRegI64;
# endif
SpecificRegs() { MOZ_CRASH("BaseCompiler porting interface: SpecificRegs"); }
};
#endif
class BaseCompilerInterface {
public:
// Spill all spillable registers.
//
// TODO / OPTIMIZE (Bug 1316802): It's possible to do better here by
// spilling only enough registers to satisfy current needs.
virtual void sync() = 0;
virtual void saveTempPtr(RegPtr r) = 0;
virtual void restoreTempPtr(RegPtr r) = 0;
};
// Register allocator.
class BaseRegAlloc {
// Notes on float register allocation.
//
// The general rule in SpiderMonkey is that float registers can alias double
// registers, but there are predicates to handle exceptions to that rule:
// hasUnaliasedDouble() and hasMultiAlias(). The way aliasing actually
// works is platform dependent and exposed through the aliased(n, &r)
// predicate, etc.
//
// - hasUnaliasedDouble(): on ARM VFPv3-D32 there are double registers that
// cannot be treated as float.
// - hasMultiAlias(): on ARM and MIPS a double register aliases two float
// registers.
//
// On some platforms (x86, x64, ARM64) but not all (ARM)
// ScratchFloat32Register is the same as ScratchDoubleRegister.
//
// It's a basic invariant of the AllocatableRegisterSet that it deals
// properly with aliasing of registers: if s0 or s1 are allocated then d0 is
// not allocatable; if s0 and s1 are freed individually then d0 becomes
// allocatable.
//
// On platforms with RABALDR_SIDEALLOC_V128, the register set does not
// represent SIMD registers. Instead, we allocate and free these registers as
// doubles and change the kind to Simd128 while the register is exposed to
// masm. (This is the case on ARM64 for now, and is a consequence of needing
// more than 64 bits for FloatRegisters::SetType to represent SIMD registers.
// See lengty comment in Architecture-arm64.h.)
BaseCompilerInterface* bc;
AllocatableGeneralRegisterSet availGPR;
AllocatableFloatRegisterSet availFPU;
#ifdef DEBUG
// The registers available after removing ScratchReg, HeapReg, etc.
AllocatableGeneralRegisterSet allGPR;
AllocatableFloatRegisterSet allFPU;
uint32_t scratchTaken;
#endif
#ifdef JS_CODEGEN_X86
AllocatableGeneralRegisterSet singleByteRegs;
#endif
bool hasGPR() { return !availGPR.empty(); }
bool hasGPR64() {
#ifdef JS_PUNBOX64
return !availGPR.empty();
#else
if (availGPR.empty()) {
return false;
}
Register r = allocGPR();
bool available = !availGPR.empty();
freeGPR(r);
return available;
#endif
}
template <MIRType t>
bool hasFPU() {
#ifdef RABALDR_SIDEALLOC_V128
// Workaround for GCC problem, bug 1677690
if constexpr (t == MIRType::Simd128) {
MOZ_CRASH("Should not happen");
} else
#endif
{
return availFPU.hasAny<RegTypeOf<t>::value>();
}
}
bool isAvailableGPR(Register r) { return availGPR.has(r); }
bool isAvailableFPU(FloatRegister r) {
#ifdef RABALDR_SIDEALLOC_V128
MOZ_ASSERT(!r.isSimd128());
#endif
return availFPU.has(r);
}
void allocGPR(Register r) {
MOZ_ASSERT(isAvailableGPR(r));
availGPR.take(r);
}
Register allocGPR() {
MOZ_ASSERT(hasGPR());
return availGPR.takeAny();
}
void allocInt64(Register64 r) {
#ifdef JS_PUNBOX64
allocGPR(r.reg);
#else
allocGPR(r.low);
allocGPR(r.high);
#endif
}
Register64 allocInt64() {
MOZ_ASSERT(hasGPR64());
#ifdef JS_PUNBOX64
return Register64(availGPR.takeAny());
#else
Register high = availGPR.takeAny();
Register low = availGPR.takeAny();
return Register64(high, low);
#endif
}
#ifdef JS_CODEGEN_ARM
// r12 is normally the ScratchRegister and r13 is always the stack pointer,
// so the highest possible pair has r10 as the even-numbered register.
static constexpr uint32_t PAIR_LIMIT = 10;
bool hasGPRPair() {
for (uint32_t i = 0; i <= PAIR_LIMIT; i += 2) {
if (isAvailableGPR(Register::FromCode(i)) &&
isAvailableGPR(Register::FromCode(i + 1))) {
return true;
}
}
return false;
}
void allocGPRPair(Register* low, Register* high) {
MOZ_ASSERT(hasGPRPair());
for (uint32_t i = 0; i <= PAIR_LIMIT; i += 2) {
if (isAvailableGPR(Register::FromCode(i)) &&
isAvailableGPR(Register::FromCode(i + 1))) {
*low = Register::FromCode(i);
*high = Register::FromCode(i + 1);
allocGPR(*low);
allocGPR(*high);
return;
}
}
MOZ_CRASH("No pair");
}
#endif
void allocFPU(FloatRegister r) {
#ifdef RABALDR_SIDEALLOC_V128
MOZ_ASSERT(!r.isSimd128());
#endif
MOZ_ASSERT(isAvailableFPU(r));
availFPU.take(r);
}
template <MIRType t>
FloatRegister allocFPU() {
#ifdef RABALDR_SIDEALLOC_V128
// Workaround for GCC problem, bug 1677690
if constexpr (t == MIRType::Simd128) {
MOZ_CRASH("Should not happen");
} else
#endif
{
return availFPU.takeAny<RegTypeOf<t>::value>();
}
}
void freeGPR(Register r) { availGPR.add(r); }
void freeInt64(Register64 r) {
#ifdef JS_PUNBOX64
freeGPR(r.reg);
#else
freeGPR(r.low);
freeGPR(r.high);
#endif
}
void freeFPU(FloatRegister r) {
#ifdef RABALDR_SIDEALLOC_V128
MOZ_ASSERT(!r.isSimd128());
#endif
availFPU.add(r);
}
public:
explicit BaseRegAlloc()
: bc(nullptr),
availGPR(GeneralRegisterSet::All()),
availFPU(FloatRegisterSet::All())
#ifdef DEBUG
,
scratchTaken(0)
#endif
#ifdef JS_CODEGEN_X86
,
singleByteRegs(GeneralRegisterSet(Registers::SingleByteRegs))
#endif
{
RegisterAllocator::takeWasmRegisters(availGPR);
// Allocate any private scratch registers.
#if defined(RABALDR_SCRATCH_I32)
if (RabaldrScratchI32 != RegI32::Invalid()) {
availGPR.take(RabaldrScratchI32);
}
#endif
#ifdef RABALDR_SCRATCH_F32_ALIASES_F64
static_assert(RabaldrScratchF32 != InvalidFloatReg, "Float reg definition");
static_assert(RabaldrScratchF64 != InvalidFloatReg, "Float reg definition");
#endif
#if defined(RABALDR_SCRATCH_F32) && !defined(RABALDR_SCRATCH_F32_ALIASES_F64)
if (RabaldrScratchF32 != RegF32::Invalid()) {
availFPU.take(RabaldrScratchF32);
}
#endif
#if defined(RABALDR_SCRATCH_F64)
# ifdef RABALDR_SCRATCH_F32_ALIASES_F64
MOZ_ASSERT(availFPU.has(RabaldrScratchF32));
# endif
if (RabaldrScratchF64 != RegF64::Invalid()) {
availFPU.take(RabaldrScratchF64);
}
# ifdef RABALDR_SCRATCH_F32_ALIASES_F64
MOZ_ASSERT(!availFPU.has(RabaldrScratchF32));
# endif
#endif
#ifdef DEBUG
allGPR = availGPR;
allFPU = availFPU;
#endif
}
void init(BaseCompilerInterface* bc) { this->bc = bc; }
enum class ScratchKind { I32 = 1, F32 = 2, F64 = 4, V128 = 8 };
#ifdef DEBUG
bool isScratchRegisterTaken(ScratchKind s) const {
return (scratchTaken & uint32_t(s)) != 0;
}
void setScratchRegisterTaken(ScratchKind s, bool state) {
if (state) {
scratchTaken |= uint32_t(s);
} else {
scratchTaken &= ~uint32_t(s);
}
}
#endif
#ifdef JS_CODEGEN_X86
bool isSingleByteI32(Register r) { return singleByteRegs.has(r); }
#endif
bool isAvailableI32(RegI32 r) { return isAvailableGPR(r); }
bool isAvailableI64(RegI64 r) {
#ifdef JS_PUNBOX64
return isAvailableGPR(r.reg);
#else
return isAvailableGPR(r.low) && isAvailableGPR(r.high);
#endif
}
bool isAvailablePtr(RegPtr r) { return isAvailableGPR(r); }
bool isAvailableF32(RegF32 r) { return isAvailableFPU(r); }
bool isAvailableF64(RegF64 r) { return isAvailableFPU(r); }
#ifdef ENABLE_WASM_SIMD
# ifdef RABALDR_SIDEALLOC_V128
bool isAvailableV128(RegV128 r) { return isAvailableFPU(r.asDouble()); }
# else
bool isAvailableV128(RegV128 r) { return isAvailableFPU(r); }
# endif
#endif
// TODO / OPTIMIZE (Bug 1316802): Do not sync everything on allocation
// failure, only as much as we need.
[[nodiscard]] RegI32 needI32() {
if (!hasGPR()) {
bc->sync();
}
return RegI32(allocGPR());
}
void needI32(RegI32 specific) {
if (!isAvailableI32(specific)) {
bc->sync();
}
allocGPR(specific);
}
[[nodiscard]] RegI64 needI64() {
if (!hasGPR64()) {
bc->sync();
}
return RegI64(allocInt64());
}
void needI64(RegI64 specific) {
if (!isAvailableI64(specific)) {
bc->sync();
}
allocInt64(specific);
}
[[nodiscard]] RegPtr needPtr() {
if (!hasGPR()) {
bc->sync();
}
return RegPtr(allocGPR());
}
void needPtr(RegPtr specific) {
if (!isAvailablePtr(specific)) {
bc->sync();
}
allocGPR(specific);
}
// Use when you need a register for a short time but explicitly want to avoid
// a full sync().
[[nodiscard]] RegPtr needTempPtr(RegPtr fallback, bool* saved) {
if (hasGPR()) {
*saved = false;
return RegPtr(allocGPR());
}
*saved = true;
bc->saveTempPtr(fallback);
MOZ_ASSERT(isAvailablePtr(fallback));
allocGPR(fallback);
return RegPtr(fallback);
}
[[nodiscard]] RegF32 needF32() {
if (!hasFPU<MIRType::Float32>()) {
bc->sync();
}
return RegF32(allocFPU<MIRType::Float32>());
}
void needF32(RegF32 specific) {
if (!isAvailableF32(specific)) {
bc->sync();
}
allocFPU(specific);
}
[[nodiscard]] RegF64 needF64() {
if (!hasFPU<MIRType::Double>()) {
bc->sync();
}
return RegF64(allocFPU<MIRType::Double>());
}
void needF64(RegF64 specific) {
if (!isAvailableF64(specific)) {
bc->sync();
}
allocFPU(specific);
}
#ifdef ENABLE_WASM_SIMD
[[nodiscard]] RegV128 needV128() {
# ifdef RABALDR_SIDEALLOC_V128
if (!hasFPU<MIRType::Double>()) {
bc->sync();
}
return RegV128::fromDouble(allocFPU<MIRType::Double>());
# else
if (!hasFPU<MIRType::Simd128>()) {
bc->sync();
}
return RegV128(allocFPU<MIRType::Simd128>());
# endif
}
void needV128(RegV128 specific) {
# ifdef RABALDR_SIDEALLOC_V128
if (!isAvailableV128(specific)) {
bc->sync();
}
allocFPU(specific.asDouble());
# else
if (!isAvailableV128(specific)) {
bc->sync();
}
allocFPU(specific);
# endif
}
#endif
void freeI32(RegI32 r) { freeGPR(r); }
void freeI64(RegI64 r) { freeInt64(r); }
void freePtr(RegPtr r) { freeGPR(r); }
void freeF64(RegF64 r) { freeFPU(r); }
void freeF32(RegF32 r) { freeFPU(r); }
#ifdef ENABLE_WASM_SIMD
void freeV128(RegV128 r) {
# ifdef RABALDR_SIDEALLOC_V128
freeFPU(r.asDouble());
# else
freeFPU(r);
# endif
}
#endif
void freeTempPtr(RegPtr r, bool saved) {
freePtr(r);
if (saved) {
bc->restoreTempPtr(r);
MOZ_ASSERT(!isAvailablePtr(r));
}
}
#ifdef JS_CODEGEN_ARM
[[nodiscard]] RegI64 needI64Pair() {
if (!hasGPRPair()) {
bc->sync();
}
Register low, high;
allocGPRPair(&low, &high);
return RegI64(Register64(high, low));
}
#endif
#ifdef DEBUG
friend class LeakCheck;
class MOZ_RAII LeakCheck {
private:
const BaseRegAlloc& ra;
AllocatableGeneralRegisterSet knownGPR_;
AllocatableFloatRegisterSet knownFPU_;
public:
explicit LeakCheck(const BaseRegAlloc& ra) : ra(ra) {
knownGPR_ = ra.availGPR;
knownFPU_ = ra.availFPU;
}
~LeakCheck() {
MOZ_ASSERT(knownGPR_.bits() == ra.allGPR.bits());
MOZ_ASSERT(knownFPU_.bits() == ra.allFPU.bits());
}
void addKnownI32(RegI32 r) { knownGPR_.add(r); }
void addKnownI64(RegI64 r) {
# ifdef JS_PUNBOX64
knownGPR_.add(r.reg);
# else
knownGPR_.add(r.high);
knownGPR_.add(r.low);
# endif
}
void addKnownF32(RegF32 r) { knownFPU_.add(r); }
void addKnownF64(RegF64 r) { knownFPU_.add(r); }
# ifdef ENABLE_WASM_SIMD
void addKnownV128(RegV128 r) {
# ifdef RABALDR_SIDEALLOC_V128
knownFPU_.add(r.asDouble());
# else
knownFPU_.add(r);
# endif
}
# endif
void addKnownRef(RegPtr r) { knownGPR_.add(r); }
};
#endif
};
// Scratch register abstractions.
//
// We define our own scratch registers when the platform doesn't provide what we
// need. A notable use case is that we will need a private scratch register
// when the platform masm uses its scratch register very frequently (eg, ARM).
class BaseScratchRegister {
#ifdef DEBUG
BaseRegAlloc& ra;
BaseRegAlloc::ScratchKind kind_;
public:
explicit BaseScratchRegister(BaseRegAlloc& ra, BaseRegAlloc::ScratchKind kind)
: ra(ra), kind_(kind) {
MOZ_ASSERT(!ra.isScratchRegisterTaken(kind_));
ra.setScratchRegisterTaken(kind_, true);
}
~BaseScratchRegister() {
MOZ_ASSERT(ra.isScratchRegisterTaken(kind_));
ra.setScratchRegisterTaken(kind_, false);
}
#else
public:
explicit BaseScratchRegister(BaseRegAlloc& ra,
BaseRegAlloc::ScratchKind kind) {}
#endif
};
#ifdef ENABLE_WASM_SIMD
# ifdef RABALDR_SCRATCH_V128
class ScratchV128 : public BaseScratchRegister {
public:
explicit ScratchV128(BaseRegAlloc& ra)
: BaseScratchRegister(ra, BaseRegAlloc::ScratchKind::V128) {}
operator RegV128() const { return RegV128(RabaldrScratchV128); }
};
# else
class ScratchV128 : public ScratchSimd128Scope {
public:
explicit ScratchV128(MacroAssembler& m) : ScratchSimd128Scope(m) {}
operator RegV128() const { return RegV128(FloatRegister(*this)); }
};
# endif
#endif
#ifdef RABALDR_SCRATCH_F64
class ScratchF64 : public BaseScratchRegister {
public:
explicit ScratchF64(BaseRegAlloc& ra)
: BaseScratchRegister(ra, BaseRegAlloc::ScratchKind::F64) {}
operator RegF64() const { return RegF64(RabaldrScratchF64); }
};
#else
class ScratchF64 : public ScratchDoubleScope {
public:
explicit ScratchF64(MacroAssembler& m) : ScratchDoubleScope(m) {}
operator RegF64() const { return RegF64(FloatRegister(*this)); }
};
#endif
#ifdef RABALDR_SCRATCH_F32
class ScratchF32 : public BaseScratchRegister {
public:
explicit ScratchF32(BaseRegAlloc& ra)
: BaseScratchRegister(ra, BaseRegAlloc::ScratchKind::F32) {}
operator RegF32() const { return RegF32(RabaldrScratchF32); }
};
#else
class ScratchF32 : public ScratchFloat32Scope {
public:
explicit ScratchF32(MacroAssembler& m) : ScratchFloat32Scope(m) {}
operator RegF32() const { return RegF32(FloatRegister(*this)); }
};
#endif
#ifdef RABALDR_SCRATCH_I32
template <class RegType>
class ScratchGPR : public BaseScratchRegister {
public:
explicit ScratchGPR(BaseRegAlloc& ra)
: BaseScratchRegister(ra, BaseRegAlloc::ScratchKind::I32) {}
operator RegType() const { return RegType(RabaldrScratchI32); }
};
#else
template <class RegType>
class ScratchGPR : public ScratchRegisterScope {
public:
explicit ScratchGPR(MacroAssembler& m) : ScratchRegisterScope(m) {}
operator RegType() const { return RegType(Register(*this)); }
};
#endif
using ScratchI32 = ScratchGPR<RegI32>;
using ScratchPtr = ScratchGPR<RegPtr>;
#if defined(JS_CODEGEN_X86)
// ScratchEBX is a mnemonic device: For some atomic ops we really need EBX,
// no other register will do. And we would normally have to allocate that
// register using ScratchI32 since normally the scratch register is EBX.
// But the whole point of ScratchI32 is to hide that relationship. By using
// the ScratchEBX alias, we document that at that point we require the
// scratch register to be EBX.
using ScratchEBX = ScratchI32;
// ScratchI8 is a mnemonic device: For some ops we need a register with a
// byte subregister.
using ScratchI8 = ScratchI32;
#endif
// The stack frame.
//
// The stack frame has four parts ("below" means at lower addresses):
//
// - the Frame element;
// - the Local area, including the DebugFrame element and possibly a spilled
// pointer to stack results, if any; allocated below the header with various
// forms of alignment;
// - the Dynamic area, comprising the temporary storage the compiler uses for
// register spilling, allocated below the Local area;
// - the Arguments area, comprising memory allocated for outgoing calls,
// allocated below the Dynamic area.
//
// +==============================+
// | Incoming stack arg |
// | ... |
// ------------- +==============================+
// | Frame (fixed size) |
// ------------- +==============================+ <-------------------- FP
// ^ | DebugFrame (optional) | ^ ^ ^^
// localSize | Register arg local | | | ||
// | | ... | | | framePushed
// | | Register stack result ptr?| | | ||
// | | Non-arg local | | | ||
// | | ... | | | ||
// | | (padding) | | | ||
// | | Tls pointer | | | ||
// | +------------------------------+ | | ||
// v | (padding) | | v ||
// ------------- +==============================+ currentStackHeight ||
// ^ | Dynamic (variable size) | | ||
// dynamicSize | ... | | ||
// v | ... | v ||
// ------------- | (free space, sometimes) | --------- v|
// +==============================+ <----- SP not-during calls
// | Arguments (sometimes) | |
// | ... | v
// +==============================+ <----- SP during calls
//
// The Frame is addressed off the stack pointer. masm.framePushed() is always
// correct, and masm.getStackPointer() + masm.framePushed() always addresses the
// Frame, with the DebugFrame optionally below it.
//
// The Local area (including the DebugFrame and, if needed, the spilled value of
// the stack results area pointer) is laid out by BaseLocalIter and is allocated
// and deallocated by standard prologue and epilogue functions that manipulate
// the stack pointer, but it is accessed via BaseStackFrame.
//
// The Dynamic area is maintained by and accessed via BaseStackFrame. On some
// systems (such as ARM64), the Dynamic memory may be allocated in chunks
// because the SP needs a specific alignment, and in this case there will
// normally be some free space directly above the SP. The stack height does not
// include the free space, it reflects the logically used space only.
//
// The Dynamic area is where space for stack results is allocated when calling
// functions that return results on the stack. If a function has stack results,
// a pointer to the low address of the stack result area is passed as an
// additional argument, according to the usual ABI. See
// ABIResultIter::HasStackResults.
//
// The Arguments area is allocated and deallocated via BaseStackFrame (see
// comments later) but is accessed directly off the stack pointer.
// BaseLocalIter iterates over a vector of types of locals and provides offsets
// from the Frame address for those locals, and associated data.
//
// The implementation of BaseLocalIter is the property of the BaseStackFrame.
// But it is also exposed for eg the debugger to use.
BaseLocalIter::BaseLocalIter(const ValTypeVector& locals,
const ArgTypeVector& args, bool debugEnabled)
: locals_(locals),
args_(args),
argsIter_(args_),
index_(0),
nextFrameSize_(debugEnabled ? DebugFrame::offsetOfFrame() : 0),
frameOffset_(INT32_MAX),
stackResultPointerOffset_(INT32_MAX),
mirType_(MIRType::Undefined),
done_(false) {
MOZ_ASSERT(args.lengthWithoutStackResults() <= locals.length());
settle();
}
int32_t BaseLocalIter::pushLocal(size_t nbytes) {
MOZ_ASSERT(nbytes % 4 == 0 && nbytes <= 16);
nextFrameSize_ = AlignBytes(frameSize_, nbytes) + nbytes;
return nextFrameSize_; // Locals grow down so capture base address.
}
void BaseLocalIter::settle() {
MOZ_ASSERT(!done_);
frameSize_ = nextFrameSize_;
if (!argsIter_.done()) {
mirType_ = argsIter_.mirType();
MIRType concreteType = mirType_;
switch (mirType_) {
case MIRType::StackResults:
// The pointer to stack results is handled like any other argument:
// either addressed in place if it is passed on the stack, or we spill
// it in the frame if it's in a register.
MOZ_ASSERT(args_.isSyntheticStackResultPointerArg(index_));
concreteType = MIRType::Pointer;
[[fallthrough]];
case MIRType::Int32:
case MIRType::Int64:
case MIRType::Double:
case MIRType::Float32:
case MIRType::RefOrNull:
#ifdef ENABLE_WASM_SIMD
case MIRType::Simd128:
#endif
if (argsIter_->argInRegister()) {
frameOffset_ = pushLocal(MIRTypeToSize(concreteType));
} else {
frameOffset_ = -(argsIter_->offsetFromArgBase() + sizeof(Frame));
}
break;
default:
MOZ_CRASH("Argument type");
}
if (mirType_ == MIRType::StackResults) {
stackResultPointerOffset_ = frameOffset();
// Advance past the synthetic stack result pointer argument and fall
// through to the next case.
argsIter_++;
frameSize_ = nextFrameSize_;
MOZ_ASSERT(argsIter_.done());
} else {
return;
}
}
if (index_ < locals_.length()) {
switch (locals_[index_].kind()) {
case ValType::I32:
case ValType::I64:
case ValType::F32:
case ValType::F64:
#ifdef ENABLE_WASM_SIMD
case ValType::V128:
#endif
case ValType::Ref:
// TODO/AnyRef-boxing: With boxed immediates and strings, the
// debugger must be made aware that AnyRef != Pointer.
ASSERT_ANYREF_IS_JSOBJECT;
mirType_ = ToMIRType(locals_[index_]);
frameOffset_ = pushLocal(MIRTypeToSize(mirType_));
break;
default:
MOZ_CRASH("Compiler bug: Unexpected local type");
}
return;
}
done_ = true;
}
void BaseLocalIter::operator++(int) {
MOZ_ASSERT(!done_);
index_++;
if (!argsIter_.done()) {
argsIter_++;
}
settle();
}
// Abstraction of the height of the stack frame, to avoid type confusion.
class StackHeight {
friend class BaseStackFrameAllocator;
uint32_t height;
public:
explicit StackHeight(uint32_t h) : height(h) {}
static StackHeight Invalid() { return StackHeight(UINT32_MAX); }
bool isValid() const { return height != UINT32_MAX; }
bool operator==(StackHeight rhs) const {
MOZ_ASSERT(isValid() && rhs.isValid());
return height == rhs.height;
}
bool operator!=(StackHeight rhs) const { return !(*this == rhs); }
};
// Abstraction for where multi-value results go on the machine stack.
class StackResultsLoc {
uint32_t bytes_;
size_t count_;
Maybe<uint32_t> height_;
public:
StackResultsLoc() : bytes_(0), count_(0){};
StackResultsLoc(uint32_t bytes, size_t count, uint32_t height)
: bytes_(bytes), count_(count), height_(Some(height)) {
MOZ_ASSERT(bytes != 0);
MOZ_ASSERT(count != 0);
MOZ_ASSERT(height != 0);
}
uint32_t bytes() const { return bytes_; }
uint32_t count() const { return count_; }
uint32_t height() const { return height_.value(); }
bool hasStackResults() const { return bytes() != 0; }
StackResults stackResults() const {
return hasStackResults() ? StackResults::HasStackResults
: StackResults::NoStackResults;
}
};
// Abstraction of the baseline compiler's stack frame (except for the Frame /
// DebugFrame parts). See comments above for more. Remember, "below" on the
// stack means at lower addresses.
//
// The abstraction is split into two parts: BaseStackFrameAllocator is
// responsible for allocating and deallocating space on the stack and for
// performing computations that are affected by how the allocation is performed;
// BaseStackFrame then provides a pleasant interface for stack frame management.
class BaseStackFrameAllocator {
MacroAssembler& masm;
#ifdef RABALDR_CHUNKY_STACK
// On platforms that require the stack pointer to be aligned on a boundary
// greater than the typical stack item (eg, ARM64 requires 16-byte alignment
// but items are 8 bytes), allocate stack memory in chunks, and use a
// separate stack height variable to track the effective stack pointer
// within the allocated area. Effectively, there's a variable amount of
// free space directly above the stack pointer. See diagram above.
// The following must be true in order for the stack height to be
// predictable at control flow joins:
//
// - The Local area is always aligned according to WasmStackAlignment, ie,
// masm.framePushed() % WasmStackAlignment is zero after allocating
// locals.
//
// - ChunkSize is always a multiple of WasmStackAlignment.
//
// - Pushing and popping are always in units of ChunkSize (hence preserving
// alignment).
//
// - The free space on the stack (masm.framePushed() - currentStackHeight_)
// is a predictable (nonnegative) amount.
// As an optimization, we pre-allocate some space on the stack, the size of
// this allocation is InitialChunk and it must be a multiple of ChunkSize.
// It is allocated as part of the function prologue and deallocated as part
// of the epilogue, along with the locals.
//
// If ChunkSize is too large then we risk overflowing the stack on simple
// recursions with few live values where stack overflow should not be a
// risk; if it is too small we spend too much time adjusting the stack
// pointer.
//
// Good values for ChunkSize are the subject of future empirical analysis;
// eight words is just an educated guess.
static constexpr uint32_t ChunkSize = 8 * sizeof(void*);
static constexpr uint32_t InitialChunk = ChunkSize;
// The current logical height of the frame is
// currentStackHeight_ = localSize_ + dynamicSize
// where dynamicSize is not accounted for explicitly and localSize_ also
// includes size for the DebugFrame.
//
// The allocated size of the frame, provided by masm.framePushed(), is usually
// larger than currentStackHeight_, notably at the beginning of execution when
// we've allocated InitialChunk extra space.
uint32_t currentStackHeight_;
#endif
// Size of the Local area in bytes (stable after BaseCompiler::init() has
// called BaseStackFrame::setupLocals(), which in turn calls
// BaseStackFrameAllocator::setLocalSize()), always rounded to the proper
// stack alignment. The Local area is then allocated in beginFunction(),
// following the allocation of the Header. See onFixedStackAllocated()
// below.
uint32_t localSize_;
protected:
///////////////////////////////////////////////////////////////////////////
//
// Initialization
explicit BaseStackFrameAllocator(MacroAssembler& masm)
: masm(masm),
#ifdef RABALDR_CHUNKY_STACK
currentStackHeight_(0),
#endif
localSize_(UINT32_MAX) {
}
protected:
//////////////////////////////////////////////////////////////////////
//
// The Local area - the static part of the frame.
// Record the size of the Local area, once it is known.
void setLocalSize(uint32_t localSize) {
MOZ_ASSERT(localSize == AlignBytes(localSize, sizeof(void*)),
"localSize_ should be aligned to at least a pointer");
MOZ_ASSERT(localSize_ == UINT32_MAX);
localSize_ = localSize;
}
// Record the current stack height, after it has become stable in
// beginFunction(). See also BaseStackFrame::onFixedStackAllocated().
void onFixedStackAllocated() {
MOZ_ASSERT(localSize_ != UINT32_MAX);
#ifdef RABALDR_CHUNKY_STACK
currentStackHeight_ = localSize_;
#endif
}
public:
// The fixed amount of memory, in bytes, allocated on the stack below the
// Header for purposes such as locals and other fixed values. Includes all
// necessary alignment, and on ARM64 also the initial chunk for the working
// stack memory.
uint32_t fixedAllocSize() const {
MOZ_ASSERT(localSize_ != UINT32_MAX);
#ifdef RABALDR_CHUNKY_STACK
return localSize_ + InitialChunk;
#else
return localSize_;
#endif
}
#ifdef RABALDR_CHUNKY_STACK
// The allocated frame size is frequently larger than the logical stack
// height; we round up to a chunk boundary, and special case the initial
// chunk.
uint32_t framePushedForHeight(uint32_t logicalHeight) {
if (logicalHeight <= fixedAllocSize()) {
return fixedAllocSize();
}
return fixedAllocSize() +
AlignBytes(logicalHeight - fixedAllocSize(), ChunkSize);
}
#endif
protected:
//////////////////////////////////////////////////////////////////////
//
// The Dynamic area - the dynamic part of the frame, for spilling and saving
// intermediate values.
// Offset off of sp_ for the slot at stack area location `offset`.
int32_t stackOffset(int32_t offset) {
MOZ_ASSERT(offset > 0);
return masm.framePushed() - offset;
}
uint32_t computeHeightWithStackResults(StackHeight stackBase,
uint32_t stackResultBytes) {
MOZ_ASSERT(stackResultBytes);
MOZ_ASSERT(currentStackHeight() >= stackBase.height);
return stackBase.height + stackResultBytes;
}
#ifdef RABALDR_CHUNKY_STACK
void pushChunkyBytes(uint32_t bytes) {
checkChunkyInvariants();
uint32_t freeSpace = masm.framePushed() - currentStackHeight_;
if (freeSpace < bytes) {
uint32_t bytesToReserve = AlignBytes(bytes - freeSpace, ChunkSize);
MOZ_ASSERT(bytesToReserve + freeSpace >= bytes);
masm.reserveStack(bytesToReserve);
}
currentStackHeight_ += bytes;
checkChunkyInvariants();
}
void popChunkyBytes(uint32_t bytes) {
checkChunkyInvariants();
currentStackHeight_ -= bytes;
// Sometimes, popChunkyBytes() is used to pop a larger area, as when we drop
// values consumed by a call, and we may need to drop several chunks. But
// never drop the initial chunk. Crucially, the amount we drop is always an
// integral number of chunks.
uint32_t freeSpace = masm.framePushed() - currentStackHeight_;
if (freeSpace >= ChunkSize) {
uint32_t targetAllocSize = framePushedForHeight(currentStackHeight_);
uint32_t amountToFree = masm.framePushed() - targetAllocSize;
MOZ_ASSERT(amountToFree % ChunkSize == 0);
if (amountToFree) {
masm.freeStack(amountToFree);
}
}
checkChunkyInvariants();
}
#endif
uint32_t currentStackHeight() const {
#ifdef RABALDR_CHUNKY_STACK
return currentStackHeight_;
#else
return masm.framePushed();
#endif
}
private:
#ifdef RABALDR_CHUNKY_STACK
void checkChunkyInvariants() {
MOZ_ASSERT(masm.framePushed() >= fixedAllocSize());
MOZ_ASSERT(masm.framePushed() >= currentStackHeight_);
MOZ_ASSERT(masm.framePushed() == fixedAllocSize() ||
masm.framePushed() - currentStackHeight_ < ChunkSize);
MOZ_ASSERT((masm.framePushed() - localSize_) % ChunkSize == 0);
}
#endif
// For a given stack height, return the appropriate size of the allocated
// frame.
uint32_t framePushedForHeight(StackHeight stackHeight) {
#ifdef RABALDR_CHUNKY_STACK
// A more complicated adjustment is needed.
return framePushedForHeight(stackHeight.height);
#else
// The allocated frame size equals the stack height.
return stackHeight.height;
#endif
}
public:
// The current height of the stack area, not necessarily zero-based, in a
// type-safe way.
StackHeight stackHeight() const { return StackHeight(currentStackHeight()); }
// Set the frame height to a previously recorded value.
void setStackHeight(StackHeight amount) {
#ifdef RABALDR_CHUNKY_STACK
currentStackHeight_ = amount.height;
masm.setFramePushed(framePushedForHeight(amount));
checkChunkyInvariants();
#else
masm.setFramePushed(amount.height);
#endif
}
// The current height of the dynamic part of the stack area (ie, the backing
// store for the evaluation stack), zero-based.
uint32_t dynamicHeight() const { return currentStackHeight() - localSize_; }
// Before branching to an outer control label, pop the execution stack to
// the level expected by that region, but do not update masm.framePushed()
// as that will happen as compilation leaves the block.
//
// Note these operate directly on the stack pointer register.
void popStackBeforeBranch(StackHeight destStackHeight,
uint32_t stackResultBytes) {
uint32_t framePushedHere = masm.framePushed();
StackHeight heightThere =
StackHeight(destStackHeight.height + stackResultBytes);
uint32_t framePushedThere = framePushedForHeight(heightThere);
if (framePushedHere > framePushedThere) {
masm.addToStackPtr(Imm32(framePushedHere - framePushedThere));
}
}
void popStackBeforeBranch(StackHeight destStackHeight, ResultType type) {
popStackBeforeBranch(destStackHeight,
ABIResultIter::MeasureStackBytes(type));
}
// Given that there are |stackParamSize| bytes on the dynamic stack
// corresponding to the stack results, return the stack height once these
// parameters are popped.
StackHeight stackResultsBase(uint32_t stackParamSize) {
return StackHeight(currentStackHeight() - stackParamSize);
}
// For most of WebAssembly, adjacent instructions have fallthrough control
// flow between them, which allows us to simply thread the current stack
// height through the compiler. There are two exceptions to this rule: when
// leaving a block via dead code, and when entering the "else" arm of an "if".
// In these cases, the stack height is the block entry height, plus any stack
// values (results in the block exit case, parameters in the else entry case).
void resetStackHeight(StackHeight destStackHeight, ResultType type) {
uint32_t height = destStackHeight.height;
height += ABIResultIter::MeasureStackBytes(type);
setStackHeight(StackHeight(height));
}
// Return offset of stack result.
uint32_t locateStackResult(const ABIResult& result, StackHeight stackBase,
uint32_t stackResultBytes) {
MOZ_ASSERT(result.onStack());
MOZ_ASSERT(result.stackOffset() + result.size() <= stackResultBytes);
uint32_t end = computeHeightWithStackResults(stackBase, stackResultBytes);
return end - result.stackOffset();
}
public:
//////////////////////////////////////////////////////////////////////
//
// The Argument area - for outgoing calls.
//
// We abstract these operations as an optimization: we can merge the freeing
// of the argument area and dropping values off the stack after a call. But
// they always amount to manipulating the real stack pointer by some amount.
//
// Note that we do not update currentStackHeight_ for this; the frame does
// not know about outgoing arguments. But we do update framePushed(), so we
// can still index into the frame below the outgoing arguments area.
// This is always equivalent to a masm.reserveStack() call.
void allocArgArea(size_t argSize) {
if (argSize) {
masm.reserveStack(argSize);
}
}
// This frees the argument area allocated by allocArgArea(), and `argSize`
// must be equal to the `argSize` argument to allocArgArea(). In addition
// we drop some values from the frame, corresponding to the values that were
// consumed by the call.
void freeArgAreaAndPopBytes(size_t argSize, size_t dropSize) {
#ifdef RABALDR_CHUNKY_STACK
// Freeing the outgoing arguments and freeing the consumed values have
// different semantics here, which is why the operation is split.
if (argSize) {
masm.freeStack(argSize);
}
popChunkyBytes(dropSize);
#else
if (argSize + dropSize) {
masm.freeStack(argSize + dropSize);
}
#endif
}
};
class BaseStackFrame final : public BaseStackFrameAllocator {
MacroAssembler& masm;
// The largest observed value of masm.framePushed(), ie, the size of the
// stack frame. Read this for its true value only when code generation is
// finished.
uint32_t maxFramePushed_;
// Patch point where we check for stack overflow.
CodeOffset stackAddOffset_;
// Low byte offset of pointer to stack results, if any.
Maybe<int32_t> stackResultsPtrOffset_;
// The offset of TLS pointer.
uint32_t tlsPointerOffset_;
// Low byte offset of local area for true locals (not parameters).
uint32_t varLow_;
// High byte offset + 1 of local area for true locals.
uint32_t varHigh_;
// The stack pointer, cached for brevity.
RegisterOrSP sp_;
public:
explicit BaseStackFrame(MacroAssembler& masm)
: BaseStackFrameAllocator(masm),
masm(masm),
maxFramePushed_(0),
stackAddOffset_(0),
tlsPointerOffset_(UINT32_MAX),
varLow_(UINT32_MAX),
varHigh_(UINT32_MAX),
sp_(masm.getStackPointer()) {}
///////////////////////////////////////////////////////////////////////////
//
// Stack management and overflow checking
// This must be called once beginFunction has allocated space for the Header
// (the Frame and DebugFrame) and the Local area, and will record the current
// frame size for internal use by the stack abstractions.
void onFixedStackAllocated() {
maxFramePushed_ = masm.framePushed();
BaseStackFrameAllocator::onFixedStackAllocated();
}
// We won't know until after we've generated code how big the frame will be
// (we may need arbitrary spill slots and outgoing param slots) so emit a
// patchable add that is patched in endFunction().
//
// Note the platform scratch register may be used by branchPtr(), so
// generally tmp must be something else.
void checkStack(Register tmp, BytecodeOffset trapOffset) {
stackAddOffset_ = masm.sub32FromStackPtrWithPatch(tmp);
Label ok;
masm.branchPtr(Assembler::Below,
Address(WasmTlsReg, offsetof(wasm::TlsData, stackLimit)),
tmp, &ok);
masm.wasmTrap(Trap::StackOverflow, trapOffset);
masm.bind(&ok);
}
void patchCheckStack() {
masm.patchSub32FromStackPtr(stackAddOffset_,
Imm32(int32_t(maxFramePushed_)));
}
// Very large frames are implausible, probably an attack.
bool checkStackHeight() {
// 512KiB should be enough, considering how Rabaldr uses the stack and
// what the standard limits are:
//
// - 1,000 parameters
// - 50,000 locals
// - 10,000 values on the eval stack (not an official limit)
//
// At sizeof(int64) bytes per slot this works out to about 480KiB.
return maxFramePushed_ <= 512 * 1024;
}
///////////////////////////////////////////////////////////////////////////
//
// Local area
struct Local {
// Type of the value.
const MIRType type;
// Byte offset from Frame "into" the locals, ie positive for true locals
// and negative for incoming args that read directly from the arg area.
// It assumes the stack is growing down and that locals are on the stack
// at lower addresses than Frame, and is the offset from Frame of the
// lowest-addressed byte of the local.
const int32_t offs;
Local(MIRType type, int32_t offs) : type(type), offs(offs) {}
bool isStackArgument() const { return offs < 0; }
};
// Profiling shows that the number of parameters and locals frequently
// touches or exceeds 8. So 16 seems like a reasonable starting point.
using LocalVector = Vector<Local, 16, SystemAllocPolicy>;
// Initialize `localInfo` based on the types of `locals` and `args`.
[[nodiscard]] bool setupLocals(const ValTypeVector& locals,
const ArgTypeVector& args, bool debugEnabled,
LocalVector* localInfo) {
if (!localInfo->reserve(locals.length())) {
return false;
}
DebugOnly<uint32_t> index = 0;
BaseLocalIter i(locals, args, debugEnabled);
for (; !i.done() && i.index() < args.lengthWithoutStackResults(); i++) {
MOZ_ASSERT(i.isArg());
MOZ_ASSERT(i.index() == index);
localInfo->infallibleEmplaceBack(i.mirType(), i.frameOffset());
index++;
}
varLow_ = i.frameSize();
for (; !i.done(); i++) {
MOZ_ASSERT(!i.isArg());
MOZ_ASSERT(i.index() == index);
localInfo->infallibleEmplaceBack(i.mirType(), i.frameOffset());
index++;
}
varHigh_ = i.frameSize();
// Reserve an additional stack slot for the TLS pointer.
const uint32_t pointerAlignedVarHigh = AlignBytes(varHigh_, sizeof(void*));
const uint32_t localSize = pointerAlignedVarHigh + sizeof(void*);
tlsPointerOffset_ = localSize;
setLocalSize(AlignBytes(localSize, WasmStackAlignment));
if (args.hasSyntheticStackResultPointerArg()) {
stackResultsPtrOffset_ = Some(i.stackResultPointerOffset());
}
return true;
}
void zeroLocals(BaseRegAlloc* ra);
Address addressOfLocal(const Local& local, uint32_t additionalOffset = 0) {
if (local.isStackArgument()) {
return Address(FramePointer,
stackArgumentOffsetFromFp(local) + additionalOffset);
}
return Address(sp_, localOffsetFromSp(local) + additionalOffset);
}
void loadLocalI32(const Local& src, RegI32 dest) {
masm.load32(addressOfLocal(src), dest);
}
#ifndef JS_PUNBOX64
void loadLocalI64Low(const Local& src, RegI32 dest) {
masm.load32(addressOfLocal(src, INT64LOW_OFFSET), dest);
}
void loadLocalI64High(const Local& src, RegI32 dest) {
masm.load32(addressOfLocal(src, INT64HIGH_OFFSET), dest);
}
#endif
void loadLocalI64(const Local& src, RegI64 dest) {
masm.load64(addressOfLocal(src), dest);
}
void loadLocalPtr(const Local& src, RegPtr dest) {
masm.loadPtr(addressOfLocal(src), dest);
}
void loadLocalF64(const Local& src, RegF64 dest) {
masm.loadDouble(addressOfLocal(src), dest);
}
void loadLocalF32(const Local& src, RegF32 dest) {
masm.loadFloat32(addressOfLocal(src), dest);
}
#ifdef ENABLE_WASM_SIMD
void loadLocalV128(const Local& src, RegV128 dest) {
masm.loadUnalignedSimd128(addressOfLocal(src), dest);
}
#endif
void storeLocalI32(RegI32 src, const Local& dest) {
masm.store32(src, addressOfLocal(dest));
}
void storeLocalI64(RegI64 src, const Local& dest) {
masm.store64(src, addressOfLocal(dest));
}
void storeLocalPtr(Register src, const Local& dest) {
masm.storePtr(src, addressOfLocal(dest));
}
void storeLocalF64(RegF64 src, const Local& dest) {
masm.storeDouble(src, addressOfLocal(dest));
}
void storeLocalF32(RegF32 src, const Local& dest) {
masm.storeFloat32(src, addressOfLocal(dest));
}
#ifdef ENABLE_WASM_SIMD
void storeLocalV128(RegV128 src, const Local& dest) {
masm.storeUnalignedSimd128(src, addressOfLocal(dest));
}
#endif
// Offset off of sp_ for `local`.
int32_t localOffsetFromSp(const Local& local) {
MOZ_ASSERT(!local.isStackArgument());
return localOffset(local.offs);
}
// Offset off of frame pointer for `stack argument`.
int32_t stackArgumentOffsetFromFp(const Local& local) {
MOZ_ASSERT(local.isStackArgument());
return -local.offs;
}
// The incoming stack result area pointer is for stack results of the function
// being compiled.
void loadIncomingStackResultAreaPtr(RegPtr reg) {
const int32_t offset = stackResultsPtrOffset_.value();
Address src = offset < 0 ? Address(FramePointer, -offset)
: Address(sp_, stackOffset(offset));
masm.loadPtr(src, reg);
}
void storeIncomingStackResultAreaPtr(RegPtr reg) {
// If we get here, that means the pointer to the stack results area was
// passed in as a register, and therefore it will be spilled below the
// frame, so the offset is a positive height.
MOZ_ASSERT(stackResultsPtrOffset_.value() > 0);
masm.storePtr(reg,
Address(sp_, stackOffset(stackResultsPtrOffset_.value())));
}
void loadTlsPtr(Register dst) {
masm.loadPtr(Address(sp_, stackOffset(tlsPointerOffset_)), dst);
}
void storeTlsPtr(Register tls) {
masm.storePtr(tls, Address(sp_, stackOffset(tlsPointerOffset_)));
}
int32_t getTlsPtrOffset() { return stackOffset(tlsPointerOffset_); }
// An outgoing stack result area pointer is for stack results of callees of
// the function being compiled.
void computeOutgoingStackResultAreaPtr(const StackResultsLoc& results,
RegPtr dest) {
MOZ_ASSERT(results.height() <= masm.framePushed());
uint32_t offsetFromSP = masm.framePushed() - results.height();
masm.moveStackPtrTo(dest);
if (offsetFromSP) {
masm.addPtr(Imm32(offsetFromSP), dest);
}
}
private:
// Offset off of sp_ for a local with offset `offset` from Frame.
int32_t localOffset(int32_t offset) { return masm.framePushed() - offset; }
public:
///////////////////////////////////////////////////////////////////////////
//
// Dynamic area
static constexpr size_t StackSizeOfPtr = ABIResult::StackSizeOfPtr;
static constexpr size_t StackSizeOfInt64 = ABIResult::StackSizeOfInt64;
static constexpr size_t StackSizeOfFloat = ABIResult::StackSizeOfFloat;
static constexpr size_t StackSizeOfDouble = ABIResult::StackSizeOfDouble;
#ifdef ENABLE_WASM_SIMD
static constexpr size_t StackSizeOfV128 = ABIResult::StackSizeOfV128;
#endif
uint32_t pushPtr(Register r) {
DebugOnly<uint32_t> stackBefore = currentStackHeight();
#ifdef RABALDR_CHUNKY_STACK
pushChunkyBytes(StackSizeOfPtr);
masm.storePtr(r, Address(sp_, stackOffset(currentStackHeight())));
#else
masm.Push(r);
#endif
maxFramePushed_ = std::max(maxFramePushed_, masm.framePushed());
MOZ_ASSERT(stackBefore + StackSizeOfPtr == currentStackHeight());
return currentStackHeight();
}
uint32_t pushFloat32(FloatRegister r) {
DebugOnly<uint32_t> stackBefore = currentStackHeight();
#ifdef RABALDR_CHUNKY_STACK
pushChunkyBytes(StackSizeOfFloat);
masm.storeFloat32(r, Address(sp_, stackOffset(currentStackHeight())));
#else
masm.Push(r);
#endif
maxFramePushed_ = std::max(maxFramePushed_, masm.framePushed());
MOZ_ASSERT(stackBefore + StackSizeOfFloat == currentStackHeight());
return currentStackHeight();
}
#ifdef ENABLE_WASM_SIMD
uint32_t pushV128(RegV128 r) {
DebugOnly<uint32_t> stackBefore = currentStackHeight();
# ifdef RABALDR_CHUNKY_STACK
pushChunkyBytes(StackSizeOfV128);
# else
masm.adjustStack(-(int)StackSizeOfV128);
# endif
masm.storeUnalignedSimd128(r,
Address(sp_, stackOffset(currentStackHeight())));
maxFramePushed_ = std::max(maxFramePushed_, masm.framePushed());
MOZ_ASSERT(stackBefore + StackSizeOfV128 == currentStackHeight());
return currentStackHeight();
}
#endif
uint32_t pushDouble(FloatRegister r) {
DebugOnly<uint32_t> stackBefore = currentStackHeight();
#ifdef RABALDR_CHUNKY_STACK
pushChunkyBytes(StackSizeOfDouble);
masm.storeDouble(r, Address(sp_, stackOffset(currentStackHeight())));
#else
masm.Push(r);
#endif
maxFramePushed_ = std::max(maxFramePushed_, masm.framePushed());
MOZ_ASSERT(stackBefore + StackSizeOfDouble == currentStackHeight());
return currentStackHeight();
}
void popPtr(Register r) {
DebugOnly<uint32_t> stackBefore = currentStackHeight();
#ifdef RABALDR_CHUNKY_STACK
masm.loadPtr(Address(sp_, stackOffset(currentStackHeight())), r);
popChunkyBytes(StackSizeOfPtr);
#else
masm.Pop(r);
#endif
MOZ_ASSERT(stackBefore - StackSizeOfPtr == currentStackHeight());
}
void popFloat32(FloatRegister r) {
DebugOnly<uint32_t> stackBefore = currentStackHeight();
#ifdef RABALDR_CHUNKY_STACK
masm.loadFloat32(Address(sp_, stackOffset(currentStackHeight())), r);
popChunkyBytes(StackSizeOfFloat);
#else
masm.Pop(r);
#endif
MOZ_ASSERT(stackBefore - StackSizeOfFloat == currentStackHeight());
}
void popDouble(FloatRegister r) {
DebugOnly<uint32_t> stackBefore = currentStackHeight();
#ifdef RABALDR_CHUNKY_STACK
masm.loadDouble(Address(sp_, stackOffset(currentStackHeight())), r);
popChunkyBytes(StackSizeOfDouble);
#else
masm.Pop(r);
#endif
MOZ_ASSERT(stackBefore - StackSizeOfDouble == currentStackHeight());
}
#ifdef ENABLE_WASM_SIMD
void popV128(RegV128 r) {
DebugOnly<uint32_t> stackBefore = currentStackHeight();
masm.loadUnalignedSimd128(Address(sp_, stackOffset(currentStackHeight())),
r);
# ifdef RABALDR_CHUNKY_STACK
popChunkyBytes(StackSizeOfV128);
# else
masm.adjustStack((int)StackSizeOfV128);
# endif
MOZ_ASSERT(stackBefore - StackSizeOfV128 == currentStackHeight());
}
#endif
void popBytes(size_t bytes) {
if (bytes > 0) {
#ifdef RABALDR_CHUNKY_STACK
popChunkyBytes(bytes);
#else
masm.freeStack(bytes);
#endif
}
}
void loadStackI32(int32_t offset, RegI32 dest) {
masm.load32(Address(sp_, stackOffset(offset)), dest);
}
void loadStackI64(int32_t offset, RegI64 dest) {
masm.load64(Address(sp_, stackOffset(offset)), dest);
}
#ifndef JS_PUNBOX64
void loadStackI64Low(int32_t offset, RegI32 dest) {
masm.load32(Address(sp_, stackOffset(offset - INT64LOW_OFFSET)), dest);
}
void loadStackI64High(int32_t offset, RegI32 dest) {
masm.load32(Address(sp_, stackOffset(offset - INT64HIGH_OFFSET)), dest);
}
#endif
// Disambiguation: this loads a "Ptr" value from the stack, it does not load
// the "StackPtr".
void loadStackPtr(int32_t offset, RegPtr dest) {
masm.loadPtr(Address(sp_, stackOffset(offset)), dest);
}
void loadStackF64(int32_t offset, RegF64 dest) {
masm.loadDouble(Address(sp_, stackOffset(offset)), dest);
}
void loadStackF32(int32_t offset, RegF32 dest) {
masm.loadFloat32(Address(sp_, stackOffset(offset)), dest);
}
#ifdef ENABLE_WASM_SIMD
void loadStackV128(int32_t offset, RegV128 dest) {
masm.loadUnalignedSimd128(Address(sp_, stackOffset(offset)), dest);
}
#endif
uint32_t prepareStackResultArea(StackHeight stackBase,
uint32_t stackResultBytes) {
uint32_t end = computeHeightWithStackResults(stackBase, stackResultBytes);
if (currentStackHeight() < end) {
uint32_t bytes = end - currentStackHeight();
#ifdef RABALDR_CHUNKY_STACK
pushChunkyBytes(bytes);
#else
masm.reserveStack(bytes);
#endif
maxFramePushed_ = std::max(maxFramePushed_, masm.framePushed());
}
return end;
}
void finishStackResultArea(StackHeight stackBase, uint32_t stackResultBytes) {
uint32_t end = computeHeightWithStackResults(stackBase, stackResultBytes);
MOZ_ASSERT(currentStackHeight() >= end);
popBytes(currentStackHeight() - end);
}
// |srcHeight| and |destHeight| are stack heights *including* |bytes|.
void shuffleStackResultsTowardFP(uint32_t srcHeight, uint32_t destHeight,
uint32_t bytes, Register temp) {
MOZ_ASSERT(destHeight < srcHeight);
MOZ_ASSERT(bytes % sizeof(uint32_t) == 0);
uint32_t destOffset = stackOffset(destHeight) + bytes;
uint32_t srcOffset = stackOffset(srcHeight) + bytes;
while (bytes >= sizeof(intptr_t)) {
destOffset -= sizeof(intptr_t);
srcOffset -= sizeof(intptr_t);
bytes -= sizeof(intptr_t);
masm.loadPtr(Address(sp_, srcOffset), temp);
masm.storePtr(temp, Address(sp_, destOffset));
}
if (bytes) {
MOZ_ASSERT(bytes == sizeof(uint32_t));
destOffset -= sizeof(uint32_t);
srcOffset -= sizeof(uint32_t);
masm.load32(Address(sp_, srcOffset), temp);
masm.store32(temp, Address(sp_, destOffset));
}
}
// Unlike the overload that operates on raw heights, |srcHeight| and
// |destHeight| are stack heights *not including* |bytes|.
void shuffleStackResultsTowardFP(StackHeight srcHeight,
StackHeight destHeight, uint32_t bytes,
Register temp) {
MOZ_ASSERT(srcHeight.isValid());
MOZ_ASSERT(destHeight.isValid());
uint32_t src = computeHeightWithStackResults(srcHeight, bytes);
uint32_t dest = computeHeightWithStackResults(destHeight, bytes);
MOZ_ASSERT(src <= currentStackHeight());
MOZ_ASSERT(dest <= currentStackHeight());
shuffleStackResultsTowardFP(src, dest, bytes, temp);
}
// |srcHeight| and |destHeight| are stack heights *including* |bytes|.
void shuffleStackResultsTowardSP(uint32_t srcHeight, uint32_t destHeight,
uint32_t bytes, Register temp) {
MOZ_ASSERT(destHeight > srcHeight);
MOZ_ASSERT(bytes % sizeof(uint32_t) == 0);
uint32_t destOffset = stackOffset(destHeight);
uint32_t srcOffset = stackOffset(srcHeight);
while (bytes >= sizeof(intptr_t)) {
masm.loadPtr(Address(sp_, srcOffset), temp);
masm.storePtr(temp, Address(sp_, destOffset));
destOffset += sizeof(intptr_t);
srcOffset += sizeof(intptr_t);
bytes -= sizeof(intptr_t);
}
if (bytes) {
MOZ_ASSERT(bytes == sizeof(uint32_t));
masm.load32(Address(sp_, srcOffset), temp);
masm.store32(temp, Address(sp_, destOffset));
}
}
// Copy results from the top of the current stack frame to an area of memory,
// and pop the stack accordingly. `dest` is the address of the low byte of
// that memory.
void popStackResultsToMemory(Register dest, uint32_t bytes, Register temp) {
MOZ_ASSERT(bytes <= currentStackHeight());
MOZ_ASSERT(bytes % sizeof(uint32_t) == 0);
uint32_t bytesToPop = bytes;
uint32_t srcOffset = stackOffset(currentStackHeight());
uint32_t destOffset = 0;
while (bytes >= sizeof(intptr_t)) {
masm.loadPtr(Address(sp_, srcOffset), temp);
masm.storePtr(temp, Address(dest, destOffset));
destOffset += sizeof(intptr_t);
srcOffset += sizeof(intptr_t);
bytes -= sizeof(intptr_t);
}
if (bytes) {
MOZ_ASSERT(bytes == sizeof(uint32_t));
masm.load32(Address(sp_, srcOffset), temp);
masm.store32(temp, Address(dest, destOffset));
}
popBytes(bytesToPop);
}
private:
void store32BitsToStack(int32_t imm, uint32_t destHeight, Register temp) {
masm.move32(Imm32(imm), temp);
masm.store32(temp, Address(sp_, stackOffset(destHeight)));
}
void store64BitsToStack(int64_t imm, uint32_t destHeight, Register temp) {
#ifdef JS_PUNBOX64
masm.move64(Imm64(imm), Register64(temp));
masm.store64(Register64(temp), Address(sp_, stackOffset(destHeight)));
#else
union {
int64_t i64;
int32_t i32[2];
} bits = {.i64 = imm};
static_assert(sizeof(bits) == 8);
store32BitsToStack(bits.i32[0], destHeight, temp);
store32BitsToStack(bits.i32[1], destHeight - sizeof(int32_t), temp);
#endif
}
public:
void storeImmediatePtrToStack(intptr_t imm, uint32_t destHeight,
Register temp) {
#ifdef JS_PUNBOX64
static_assert(StackSizeOfPtr == 8);
store64BitsToStack(imm, destHeight, temp);
#else
static_assert(StackSizeOfPtr == 4);
store32BitsToStack(int32_t(imm), destHeight, temp);
#endif
}
void storeImmediateI64ToStack(int64_t imm, uint32_t destHeight,
Register temp) {
store64BitsToStack(imm, destHeight, temp);
}
void storeImmediateF32ToStack(float imm, uint32_t destHeight, Register temp) {
union {
int32_t i32;
float f32;
} bits = {.f32 = imm};
static_assert(sizeof(bits) == 4);
// Do not store 4 bytes if StackSizeOfFloat == 8. It's probably OK to do
// so, but it costs little to store something predictable.
if (StackSizeOfFloat == 4) {
store32BitsToStack(bits.i32, destHeight, temp);
} else {
store64BitsToStack(uint32_t(bits.i32), destHeight, temp);
}
}
void storeImmediateF64ToStack(double imm, uint32_t destHeight,
Register temp) {
union {
int64_t i64;
double f64;
} bits = {.f64 = imm};
static_assert(sizeof(bits) == 8);
store64BitsToStack(bits.i64, destHeight, temp);
}
#ifdef ENABLE_WASM_SIMD
void storeImmediateV128ToStack(V128 imm, uint32_t destHeight, Register temp) {
union {
int32_t i32[4];
uint8_t bytes[16];
} bits;
static_assert(sizeof(bits) == 16);
memcpy(bits.bytes, imm.bytes, 16);
for (unsigned i = 0; i < 4; i++) {
store32BitsToStack(bits.i32[i], destHeight - i * sizeof(int32_t), temp);
}
}
#endif
};
void BaseStackFrame::zeroLocals(BaseRegAlloc* ra) {
MOZ_ASSERT(varLow_ != UINT32_MAX);
if (varLow_ == varHigh_) {
return;
}
static const uint32_t wordSize = sizeof(void*);
// The adjustments to 'low' by the size of the item being stored compensates
// for the fact that locals offsets are the offsets from Frame to the bytes
// directly "above" the locals in the locals area. See comment at Local.
// On 64-bit systems we may have 32-bit alignment for the local area as it
// may be preceded by parameters and prologue/debug data.
uint32_t low = varLow_;
if (low % wordSize) {
masm.store32(Imm32(0), Address(sp_, localOffset(low + 4)));
low += 4;
}
MOZ_ASSERT(low % wordSize == 0);
const uint32_t high = AlignBytes(varHigh_, wordSize);
// An UNROLL_LIMIT of 16 is chosen so that we only need an 8-bit signed
// immediate to represent the offset in the store instructions in the loop
// on x64.
const uint32_t UNROLL_LIMIT = 16;
const uint32_t initWords = (high - low) / wordSize;
const uint32_t tailWords = initWords % UNROLL_LIMIT;
const uint32_t loopHigh = high - (tailWords * wordSize);
// With only one word to initialize, just store an immediate zero.
if (initWords == 1) {
masm.storePtr(ImmWord(0), Address(sp_, localOffset(low + wordSize)));
return;
}
// For other cases, it's best to have a zero in a register.
//
// One can do more here with SIMD registers (store 16 bytes at a time) or
// with instructions like STRD on ARM (store 8 bytes at a time), but that's
// for another day.
RegI32 zero = ra->needI32();
masm.mov(ImmWord(0), zero);
// For the general case we want to have a loop body of UNROLL_LIMIT stores
// and then a tail of less than UNROLL_LIMIT stores. When initWords is less
// than 2*UNROLL_LIMIT the loop trip count is at most 1 and there is no
// benefit to having the pointer calculations and the compare-and-branch.
// So we completely unroll when we have initWords < 2 * UNROLL_LIMIT. (In
// this case we'll end up using 32-bit offsets on x64 for up to half of the
// stores, though.)
// Fully-unrolled case.
if (initWords < 2 * UNROLL_LIMIT) {
for (uint32_t i = low; i < high; i += wordSize) {
masm.storePtr(zero, Address(sp_, localOffset(i + wordSize)));
}
ra->freeI32(zero);
return;
}
// Unrolled loop with a tail. Stores will use negative offsets. That's OK
// for x86 and ARM, at least.
// Compute pointer to the highest-addressed slot on the frame.
RegI32 p = ra->needI32();
masm.computeEffectiveAddress(Address(sp_, localOffset(low + wordSize)), p);
// Compute pointer to the lowest-addressed slot on the frame that will be
// initialized by the loop body.
RegI32 lim = ra->needI32();
masm.computeEffectiveAddress(Address(sp_, localOffset(loopHigh + wordSize)),
lim);
// The loop body. Eventually we'll have p == lim and exit the loop.
Label again;
masm.bind(&again);
for (uint32_t i = 0; i < UNROLL_LIMIT; ++i) {
masm.storePtr(zero, Address(p, -(wordSize * i)));
}
masm.subPtr(Imm32(UNROLL_LIMIT * wordSize), p);
masm.branchPtr(Assembler::LessThan, lim, p, &again);
// The tail.
for (uint32_t i = 0; i < tailWords; ++i) {
masm.storePtr(zero, Address(p, -(wordSize * i)));
}
ra->freeI32(p);
ra->freeI32(lim);
ra->freeI32(zero);
}
// Value stack: stack elements
struct Stk {
private:
Stk() : kind_(Unknown), i64val_(0) {}
public:
enum Kind {
// The Mem opcodes are all clustered at the beginning to
// allow for a quick test within sync().
MemI32, // 32-bit integer stack value ("offs")
MemI64, // 64-bit integer stack value ("offs")
MemF32, // 32-bit floating stack value ("offs")
MemF64, // 64-bit floating stack value ("offs")
#ifdef ENABLE_WASM_SIMD
MemV128, // 128-bit vector stack value ("offs")
#endif
MemRef, // reftype (pointer wide) stack value ("offs")
// The Local opcodes follow the Mem opcodes for a similar
// quick test within hasLocal().
LocalI32, // Local int32 var ("slot")
LocalI64, // Local int64 var ("slot")
LocalF32, // Local float32 var ("slot")
LocalF64, // Local double var ("slot")
#ifdef ENABLE_WASM_SIMD
LocalV128, // Local v128 var ("slot")
#endif
LocalRef, // Local reftype (pointer wide) var ("slot")
RegisterI32, // 32-bit integer register ("i32reg")
RegisterI64, // 64-bit integer register ("i64reg")
RegisterF32, // 32-bit floating register ("f32reg")
RegisterF64, // 64-bit floating register ("f64reg")
#ifdef ENABLE_WASM_SIMD
RegisterV128, // 128-bit vector register ("v128reg")
#endif
RegisterRef, // reftype (pointer wide) register ("refReg")
ConstI32, // 32-bit integer constant ("i32val")
ConstI64, // 64-bit integer constant ("i64val")
ConstF32, // 32-bit floating constant ("f32val")
ConstF64, // 64-bit floating constant ("f64val")
#ifdef ENABLE_WASM_SIMD
ConstV128, // 128-bit vector constant ("v128val")
#endif
ConstRef, // reftype (pointer wide) constant ("refval")
Unknown,
};
Kind kind_;
static const Kind MemLast = MemRef;
static const Kind LocalLast = LocalRef;
union {
RegI32 i32reg_;
RegI64 i64reg_;
RegPtr refReg_;
RegF32 f32reg_;
RegF64 f64reg_;
#ifdef ENABLE_WASM_SIMD
RegV128 v128reg_;
#endif
int32_t i32val_;
int64_t i64val_;
intptr_t refval_;
float f32val_;
double f64val_;
#ifdef ENABLE_WASM_SIMD
V128 v128val_;
#endif
uint32_t slot_;
uint32_t offs_;
};
explicit Stk(RegI32 r) : kind_(RegisterI32), i32reg_(r) {}
explicit Stk(RegI64 r) : kind_(RegisterI64), i64reg_(r) {}
explicit Stk(RegPtr r) : kind_(RegisterRef), refReg_(r) {}
explicit Stk(RegF32 r) : kind_(RegisterF32), f32reg_(r) {}
explicit Stk(RegF64 r) : kind_(RegisterF64), f64reg_(r) {}
#ifdef ENABLE_WASM_SIMD
explicit Stk(RegV128 r) : kind_(RegisterV128), v128reg_(r) {}
#endif
explicit Stk(int32_t v) : kind_(ConstI32), i32val_(v) {}
explicit Stk(int64_t v) : kind_(ConstI64), i64val_(v) {}
explicit Stk(float v) : kind_(ConstF32), f32val_(v) {}
explicit Stk(double v) : kind_(ConstF64), f64val_(v) {}
#ifdef ENABLE_WASM_SIMD
explicit Stk(V128 v) : kind_(ConstV128), v128val_(v) {}
#endif
explicit Stk(Kind k, uint32_t v) : kind_(k), slot_(v) {
MOZ_ASSERT(k > MemLast && k <= LocalLast);
}
static Stk StkRef(intptr_t v) {
Stk s;
s.kind_ = ConstRef;
s.refval_ = v;
return s;
}
static Stk StackResult(ValType type, uint32_t offs) {
Kind k;
switch (type.kind()) {
case ValType::I32:
k = Stk::MemI32;
break;
case ValType::I64:
k = Stk::MemI64;
break;
case ValType::V128:
#ifdef ENABLE_WASM_SIMD
k = Stk::MemV128;
break;
#else
MOZ_CRASH("No SIMD");
#endif
case ValType::F32:
k = Stk::MemF32;
break;
case ValType::F64:
k = Stk::MemF64;
break;
case ValType::Ref:
k = Stk::MemRef;
break;
}
Stk s;
s.setOffs(k, offs);
return s;
}
void setOffs(Kind k, uint32_t v) {
MOZ_ASSERT(k <= MemLast);
kind_ = k;
offs_ = v;
}
Kind kind() const { return kind_; }
bool isMem() const { return kind_ <= MemLast; }
RegI32 i32reg() const {
MOZ_ASSERT(kind_ == RegisterI32);
return i32reg_;
}
RegI64 i64reg() const {
MOZ_ASSERT(kind_ == RegisterI64);
return i64reg_;
}
RegPtr refReg() const {
MOZ_ASSERT(kind_ == RegisterRef);
return refReg_;
}
RegF32 f32reg() const {
MOZ_ASSERT(kind_ == RegisterF32);
return f32reg_;
}
RegF64 f64reg() const {
MOZ_ASSERT(kind_ == RegisterF64);
return f64reg_;
}
#ifdef ENABLE_WASM_SIMD
RegV128 v128reg() const {
MOZ_ASSERT(kind_ == RegisterV128);
return v128reg_;
}
#endif
int32_t i32val() const {
MOZ_ASSERT(kind_ == ConstI32);
return i32val_;
}
int64_t i64val() const {
MOZ_ASSERT(kind_ == ConstI64);
return i64val_;
}
intptr_t refval() const {
MOZ_ASSERT(kind_ == ConstRef);
return refval_;
}
// For these two, use an out-param instead of simply returning, to
// use the normal stack and not the x87 FP stack (which has effect on
// NaNs with the signaling bit set).
void f32val(float* out) const {
MOZ_ASSERT(kind_ == ConstF32);
*out = f32val_;
}
void f64val(double* out) const {
MOZ_ASSERT(kind_ == ConstF64);
*out = f64val_;
}
#ifdef ENABLE_WASM_SIMD
// For SIMD, do the same as for floats since we're using float registers to
// hold vectors; this is just conservative.
void v128val(V128* out) const {
MOZ_ASSERT(kind_ == ConstV128);
*out = v128val_;
}
#endif
uint32_t slot() const {
MOZ_ASSERT(kind_ > MemLast && kind_ <= LocalLast);
return slot_;
}
uint32_t offs() const {
MOZ_ASSERT(isMem());
return offs_;
}
};
typedef Vector<Stk, 0, SystemAllocPolicy> StkVector;
// MachineStackTracker, used for stack-slot pointerness tracking.
class MachineStackTracker {
// Simulates the machine's stack, with one bool per word. Index zero in
// this vector corresponds to the highest address in the machine stack. The
// last entry corresponds to what SP currently points at. This all assumes
// a grow-down stack.
//
// numPtrs_ contains the number of "true" values in vec_, and is therefore
// redundant. But it serves as a constant-time way to detect the common
// case where vec_ holds no "true" values.
size_t numPtrs_;
Vector<bool, 64, SystemAllocPolicy> vec_;
public:
MachineStackTracker() : numPtrs_(0) {}
~MachineStackTracker() {
#ifdef DEBUG
size_t n = 0;
for (bool b : vec_) {
n += (b ? 1 : 0);
}
MOZ_ASSERT(n == numPtrs_);
#endif
}
// Clone this MachineStackTracker, writing the result at |dst|.
[[nodiscard]] bool cloneTo(MachineStackTracker* dst) {
MOZ_ASSERT(dst->vec_.empty());
if (!dst->vec_.appendAll(vec_)) {
return false;
}
dst->numPtrs_ = numPtrs_;
return true;
}
// Notionally push |n| non-pointers on the stack.
[[nodiscard]] bool pushNonGCPointers(size_t n) {
return vec_.appendN(false, n);
}
// Mark the stack slot |offsetFromSP| up from the bottom as holding a
// pointer.
void setGCPointer(size_t offsetFromSP) {
// offsetFromSP == 0 denotes the most recently pushed item, == 1 the
// second most recently pushed item, etc.
MOZ_ASSERT(offsetFromSP < vec_.length());
size_t offsetFromTop = vec_.length() - 1 - offsetFromSP;
numPtrs_ = numPtrs_ + 1 - (vec_[offsetFromTop] ? 1 : 0);
vec_[offsetFromTop] = true;
}
// Query the pointerness of the slot |offsetFromSP| up from the bottom.
bool isGCPointer(size_t offsetFromSP) {
MOZ_ASSERT(offsetFromSP < vec_.length());
size_t offsetFromTop = vec_.length() - 1 - offsetFromSP;
return vec_[offsetFromTop];
}
// Return the number of words tracked by this MachineStackTracker.
size_t length() { return vec_.length(); }
// Return the number of pointer-typed words tracked by this
// MachineStackTracker.
size_t numPtrs() {
MOZ_ASSERT(numPtrs_ <= length());
return numPtrs_;
}
// Discard all contents, but (per mozilla::Vector::clear semantics) don't
// free or reallocate any dynamic storage associated with |vec_|.
void clear() {
vec_.clear();
numPtrs_ = 0;
}
};
// StackMapGenerator, which carries all state needed to create stack maps.
enum class HasDebugFrame { No, Yes };
struct StackMapGenerator {
private:
// --- These are constant for the life of the function's compilation ---
// For generating stack maps, we'll need to know the offsets of registers
// as saved by the trap exit stub.
const MachineState& trapExitLayout_;
const size_t trapExitLayoutNumWords_;
// Completed stackmaps are added here
StackMaps* stackMaps_;
// So as to be able to get current offset when creating stack maps
const MacroAssembler& masm_;
public:
// --- These are constant once we've completed beginFunction() ---
// The number of words of arguments passed to this function in memory.
size_t numStackArgWords;
MachineStackTracker machineStackTracker; // tracks machine stack pointerness
// This holds masm.framePushed at entry to the function's body. It is a
// Maybe because createStackMap needs to know whether or not we're still
// in the prologue. It makes a Nothing-to-Some transition just once per
// function.
Maybe<uint32_t> framePushedAtEntryToBody;
// --- These can change at any point ---
// This holds masm.framePushed at it would be be for a function call
// instruction, but excluding the stack area used to pass arguments in
// memory. That is, for an upcoming function call, this will hold
//
// masm.framePushed() at the call instruction -
// StackArgAreaSizeUnaligned(argumentTypes)
//
// This value denotes the lowest-addressed stack word covered by the current
// function's stackmap. Words below this point form the highest-addressed
// area of the callee's stackmap. Note that all alignment padding above the
// arguments-in-memory themselves belongs to the caller's stack map, which
// is why this is defined in terms of StackArgAreaSizeUnaligned() rather than
// StackArgAreaSizeAligned().
//
// When not inside a function call setup/teardown sequence, it is Nothing.
// It can make Nothing-to/from-Some transitions arbitrarily as we progress
// through the function body.
Maybe<uint32_t> framePushedExcludingOutboundCallArgs;
// The number of memory-resident, ref-typed entries on the containing
// BaseCompiler::stk_.
size_t memRefsOnStk;
// This is a copy of machineStackTracker that is used only within individual
// calls to createStackMap. It is here only to avoid possible heap allocation
// costs resulting from making it local to createStackMap().
MachineStackTracker augmentedMst;
StackMapGenerator(StackMaps* stackMaps, const MachineState& trapExitLayout,
const size_t trapExitLayoutNumWords,
const MacroAssembler& masm)
: trapExitLayout_(trapExitLayout),
trapExitLayoutNumWords_(trapExitLayoutNumWords),
stackMaps_(stackMaps),
masm_(masm),
numStackArgWords(0),
memRefsOnStk(0) {}
// At the beginning of a function, we may have live roots in registers (as
// arguments) at the point where we perform a stack overflow check. This
// method generates the "extra" stackmap entries to describe that, in the
// case that the check fails and we wind up calling into the wasm exit
// stub, as generated by GenerateTrapExit().
//
// The resulting map must correspond precisely with the stack layout
// created for the integer registers as saved by (code generated by)
// GenerateTrapExit(). To do that we use trapExitLayout_ and
// trapExitLayoutNumWords_, which together comprise a description of the
// layout and are created by GenerateTrapExitMachineState().
[[nodiscard]] bool generateStackmapEntriesForTrapExit(
const ArgTypeVector& args, ExitStubMapVector* extras) {
return GenerateStackmapEntriesForTrapExit(args, trapExitLayout_,
trapExitLayoutNumWords_, extras);
}
// Creates a stackmap associated with the instruction denoted by
// |assemblerOffset|, incorporating pointers from the current operand
// stack |stk|, incorporating possible extra pointers in |extra| at the
// lower addressed end, and possibly with the associated frame having a
// ref-typed DebugFrame as indicated by |refDebugFrame|.
[[nodiscard]] bool createStackMap(const char* who,
const ExitStubMapVector& extras,
uint32_t assemblerOffset,
HasDebugFrame debugFrame,
const StkVector& stk) {
size_t countedPointers = machineStackTracker.numPtrs() + memRefsOnStk;
#ifndef DEBUG
// An important optimization. If there are obviously no pointers, as
// we expect in the majority of cases, exit quickly.
if (countedPointers == 0 && debugFrame == HasDebugFrame::No) {
// We can skip creating the map if there are no |true| elements in
// |extras|.
bool extrasHasRef = false;
for (bool b : extras) {
if (b) {
extrasHasRef = true;
break;
}
}
if (!extrasHasRef) {
return true;
}
}
#else
// In the debug case, create the stack map regardless, and cross-check
// the pointer-counting below. We expect the final map to have
// |countedPointers| in total. This doesn't include those in the
// DebugFrame, but they do not appear in the map's bitmap. Note that
// |countedPointers| is debug-only from this point onwards.
for (bool b : extras) {
countedPointers += (b ? 1 : 0);
}
#endif
// Start with the frame-setup map, and add operand-stack information to
// that. augmentedMst holds live data only within individual calls to
// createStackMap.
augmentedMst.clear();
if (!machineStackTracker.cloneTo(&augmentedMst)) {
return false;
}
// At this point, augmentedMst only contains entries covering the
// incoming argument area (if any) and for the area allocated by this
// function's prologue. We now need to calculate how far the machine's
// stack pointer is below where it was at the start of the body. But we
// must take care not to include any words pushed as arguments to an
// upcoming function call, since those words "belong" to the stackmap of
// the callee, not to the stackmap of this function. Note however that
// any alignment padding pushed prior to pushing the args *does* belong to
// this function.
//
// That padding is taken into account at the point where
// framePushedExcludingOutboundCallArgs is set, viz, in startCallArgs(),
// and comprises two components:
//
// * call->frameAlignAdjustment
// * the padding applied to the stack arg area itself. That is:
// StackArgAreaSize(argTys) - StackArgAreaSizeUnpadded(argTys)
Maybe<uint32_t> framePushedExcludingArgs;
if (framePushedAtEntryToBody.isNothing()) {
// Still in the prologue. framePushedExcludingArgs remains Nothing.
MOZ_ASSERT(framePushedExcludingOutboundCallArgs.isNothing());
} else {
// In the body.
MOZ_ASSERT(masm_.framePushed() >= framePushedAtEntryToBody.value());
if (framePushedExcludingOutboundCallArgs.isSome()) {
// In the body, and we've potentially pushed some args onto the stack.
// We must ignore them when sizing the stackmap.
MOZ_ASSERT(masm_.framePushed() >=
framePushedExcludingOutboundCallArgs.value());
MOZ_ASSERT(framePushedExcludingOutboundCallArgs.value() >=
framePushedAtEntryToBody.value());
framePushedExcludingArgs =
Some(framePushedExcludingOutboundCallArgs.value());
} else {
// In the body, but not with call args on the stack. The stackmap
// must be sized so as to extend all the way "down" to
// masm_.framePushed().
framePushedExcludingArgs = Some(masm_.framePushed());
}
}
if (framePushedExcludingArgs.isSome()) {
uint32_t bodyPushedBytes =
framePushedExcludingArgs.value() - framePushedAtEntryToBody.value();
MOZ_ASSERT(0 == bodyPushedBytes % sizeof(void*));
if (!augmentedMst.pushNonGCPointers(bodyPushedBytes / sizeof(void*))) {
return false;
}
}
// Scan the operand stack, marking pointers in the just-added new
// section.
MOZ_ASSERT_IF(framePushedAtEntryToBody.isNothing(), stk.empty());
MOZ_ASSERT_IF(framePushedExcludingArgs.isNothing(), stk.empty());
for (const Stk& v : stk) {
#ifndef DEBUG
// We don't track roots in registers, per rationale below, so if this
// doesn't hold, something is seriously wrong, and we're likely to get a
// GC-related crash.
MOZ_RELEASE_ASSERT(v.kind() != Stk::RegisterRef);
if (v.kind() != Stk::MemRef) {
continue;
}
#else
// Take the opportunity to check everything we reasonably can about
// operand stack elements.
switch (v.kind()) {
case Stk::MemI32:
case Stk::MemI64:
case Stk::MemF32:
case Stk::MemF64:
case Stk::ConstI32:
case Stk::ConstI64:
case Stk::ConstF32:
case Stk::ConstF64:
# ifdef ENABLE_WASM_SIMD
case Stk::MemV128:
case Stk::ConstV128:
# endif
// All of these have uninteresting type.
continue;
case Stk::LocalI32:
case Stk::LocalI64:
case Stk::LocalF32:
case Stk::LocalF64:
# ifdef ENABLE_WASM_SIMD
case Stk::LocalV128:
# endif
// These also have uninteresting type. Check that they live in the
// section of stack set up by beginFunction(). The unguarded use of
// |value()| here is safe due to the assertion above this loop.
MOZ_ASSERT(v.offs() <= framePushedAtEntryToBody.value());
continue;
case Stk::RegisterI32:
case Stk::RegisterI64:
case Stk::RegisterF32:
case Stk::RegisterF64:
# ifdef ENABLE_WASM_SIMD
case Stk::RegisterV128:
# endif
// These also have uninteresting type, but more to the point: all
// registers holding live values should have been flushed to the
// machine stack immediately prior to the instruction to which this
// stackmap pertains. So these can't happen.
MOZ_CRASH("createStackMap: operand stack has Register-non-Ref");
case Stk::MemRef:
// This is the only case we care about. We'll handle it after the
// switch.
break;
case Stk::LocalRef:
// We need the stackmap to mention this pointer, but it should
// already be in the machineStackTracker section created by
// beginFunction().
MOZ_ASSERT(v.offs() <= framePushedAtEntryToBody.value());
continue;
case Stk::ConstRef:
// This can currently only be a null pointer.
MOZ_ASSERT(v.refval() == 0);
continue;
case Stk::RegisterRef:
// This can't happen, per rationale above.
MOZ_CRASH("createStackMap: operand stack contains RegisterRef");
default:
MOZ_CRASH("createStackMap: unknown operand stack element");
}
#endif
// v.offs() holds masm.framePushed() at the point immediately after it
// was pushed on the stack. Since it's still on the stack,
// masm.framePushed() can't be less.
MOZ_ASSERT(v.offs() <= framePushedExcludingArgs.value());
uint32_t offsFromMapLowest = framePushedExcludingArgs.value() - v.offs();
MOZ_ASSERT(0 == offsFromMapLowest % sizeof(void*));
augmentedMst.setGCPointer(offsFromMapLowest / sizeof(void*));
}
// Create the final StackMap. The initial map is zeroed out, so there's
// no need to write zero bits in it.
const uint32_t extraWords = extras.length();
const uint32_t augmentedMstWords = augmentedMst.length();
const uint32_t numMappedWords = extraWords + augmentedMstWords;
StackMap* stackMap = StackMap::create(numMappedWords);
if (!stackMap) {
return false;
}
{
// First the exit stub extra words, if any.
uint32_t i = 0;
for (bool b : extras) {
if (b) {
stackMap->setBit(i);
}
i++;
}
}
// Followed by the "main" part of the map.
for (uint32_t i = 0; i < augmentedMstWords; i++) {
if (augmentedMst.isGCPointer(i)) {
stackMap->setBit(extraWords + i);
}
}
stackMap->setExitStubWords(extraWords);
// Record in the map, how far down from the highest address the Frame* is.
// Take the opportunity to check that we haven't marked any part of the
// Frame itself as a pointer.
stackMap->setFrameOffsetFromTop(numStackArgWords +
sizeof(Frame) / sizeof(void*));
#ifdef DEBUG
for (uint32_t i = 0; i < sizeof(Frame) / sizeof(void*); i++) {
MOZ_ASSERT(stackMap->getBit(stackMap->numMappedWords -
stackMap->frameOffsetFromTop + i) == 0);
}
#endif
// Note the presence of a ref-typed DebugFrame, if any.
if (debugFrame == HasDebugFrame::Yes) {
stackMap->setHasDebugFrame();
}
// Add the completed map to the running collection thereof.
if (!stackMaps_->add((uint8_t*)(uintptr_t)assemblerOffset, stackMap)) {
stackMap->destroy();
return false;
}
#ifdef DEBUG
{
// Crosscheck the map pointer counting.
uint32_t nw = stackMap->numMappedWords;
uint32_t np = 0;
for (uint32_t i = 0; i < nw; i++) {
np += stackMap->getBit(i);
}
MOZ_ASSERT(size_t(np) == countedPointers);
}
#endif
return true;
}
};
// The baseline compiler proper.
class BaseCompiler final : public BaseCompilerInterface {
using Local = BaseStackFrame::Local;
using LabelVector = Vector<NonAssertingLabel, 8, SystemAllocPolicy>;
// Bit set used for simple bounds check elimination. Capping this at 64
// locals makes sense; even 32 locals would probably be OK in practice.
//
// For more information about BCE, see the block comment above
// popMemoryAccess(), below.
using BCESet = uint64_t;
// Information stored in the control node for generating exception handling
// landing pads.
struct CatchInfo {
uint32_t eventIndex; // Index for the associated exception.
NonAssertingLabel label; // The entry label for the handler.
explicit CatchInfo(uint32_t eventIndex_) : eventIndex(eventIndex_) {}
};
using CatchInfoVector = Vector<CatchInfo, 0, SystemAllocPolicy>;
// Control node, representing labels and stack heights at join points.
struct Control {
NonAssertingLabel label; // The "exit" label
NonAssertingLabel otherLabel; // Used for the "else" branch of if-then-else
StackHeight stackHeight; // From BaseStackFrame
uint32_t stackSize; // Value stack height
BCESet bceSafeOnEntry; // Bounds check info flowing into the item
BCESet bceSafeOnExit; // Bounds check info flowing out of the item
bool deadOnArrival; // deadCode_ was set on entry to the region
bool deadThenBranch; // deadCode_ was set on exit from "then"
size_t tryNoteIndex; // For tracking try branch code ranges.
CatchInfoVector catchInfos; // Used for try-catch handlers.
Control()
: stackHeight(StackHeight::Invalid()),
stackSize(UINT32_MAX),
bceSafeOnEntry(0),
bceSafeOnExit(~BCESet(0)),
deadOnArrival(false),
deadThenBranch(false) {}
};
class NothingVector {
Nothing unused_;
public:
bool resize(size_t length) { return true; }
Nothing& operator[](size_t) { return unused_; }
Nothing& back() { return unused_; }
};
struct BaseCompilePolicy {
// The baseline compiler tracks values on a stack of its own -- it
// needs to scan that stack for spilling -- and thus has no need
// for the values maintained by the iterator.
using Value = Nothing;
using ValueVector = NothingVector;
// The baseline compiler uses the iterator's control stack, attaching
// its own control information.
using ControlItem = Control;
};
using BaseOpIter = OpIter<BaseCompilePolicy>;
// The baseline compiler will use OOL code more sparingly than
// Baldr since our code is not high performance and frills like
// code density and branch prediction friendliness will be less
// important.
class OutOfLineCode : public TempObject {
private:
NonAssertingLabel entry_;
NonAssertingLabel rejoin_;
StackHeight stackHeight_;
public:
OutOfLineCode() : stackHeight_(StackHeight::Invalid()) {}
Label* entry() { return &entry_; }
Label* rejoin() { return &rejoin_; }
void setStackHeight(StackHeight stackHeight) {
MOZ_ASSERT(!stackHeight_.isValid());
stackHeight_ = stackHeight;
}
void bind(BaseStackFrame* fr, MacroAssembler* masm) {
MOZ_ASSERT(stackHeight_.isValid());
masm->bind(&entry_);
fr->setStackHeight(stackHeight_);
}
// The generate() method must be careful about register use
// because it will be invoked when there is a register
// assignment in the BaseCompiler that does not correspond
// to the available registers when the generated OOL code is
// executed. The register allocator *must not* be called.
//
// The best strategy is for the creator of the OOL object to
// allocate all temps that the OOL code will need.
//
// Input, output, and temp registers are embedded in the OOL
// object and are known to the code generator.
//
// Scratch registers are available to use in OOL code.
//
// All other registers must be explicitly saved and restored
// by the OOL code before being used.
virtual void generate(MacroAssembler* masm) = 0;
};
enum class LatentOp { None, Compare, Eqz };
struct AccessCheck {
AccessCheck()
: omitBoundsCheck(false),
omitAlignmentCheck(false),
onlyPointerAlignment(false) {}
// If `omitAlignmentCheck` is true then we need check neither the
// pointer nor the offset. Otherwise, if `onlyPointerAlignment` is true
// then we need check only the pointer. Otherwise, check the sum of
// pointer and offset.
bool omitBoundsCheck;
bool omitAlignmentCheck;
bool onlyPointerAlignment;
};
const ModuleEnvironment& moduleEnv_;
const CompilerEnvironment& compilerEnv_;
BaseOpIter iter_;
const FuncCompileInput& func_;
size_t lastReadCallSite_;
TempAllocator::Fallible alloc_;
const ValTypeVector& locals_; // Types of parameters and locals
bool deadCode_; // Flag indicating we should decode & discard the opcode
BCESet
bceSafe_; // Locals that have been bounds checked and not updated since
ValTypeVector SigD_;
ValTypeVector SigF_;
NonAssertingLabel returnLabel_;
LatentOp latentOp_; // Latent operation for branch (seen next)
ValType latentType_; // Operand type, if latentOp_ is true
Assembler::Condition
latentIntCmp_; // Comparison operator, if latentOp_ == Compare, int types
Assembler::DoubleCondition
latentDoubleCmp_; // Comparison operator, if latentOp_ == Compare, float
// types
FuncOffsets offsets_;
MacroAssembler& masm; // No '_' suffix - too tedious...
BaseRegAlloc ra; // Ditto
BaseStackFrame fr;
StackMapGenerator stackMapGenerator_;
BaseStackFrame::LocalVector localInfo_;
Vector<OutOfLineCode*, 8, SystemAllocPolicy> outOfLine_;
// On specific platforms we sometimes need to use specific registers.
SpecificRegs specific_;
// There are more members scattered throughout.
public:
BaseCompiler(const ModuleEnvironment& moduleEnv,
const CompilerEnvironment& compilerEnv,
const FuncCompileInput& input, const ValTypeVector& locals,
const MachineState& trapExitLayout,
size_t trapExitLayoutNumWords, Decoder& decoder,
StkVector& stkSource, TempAllocator* alloc, MacroAssembler* masm,
StackMaps* stackMaps);
~BaseCompiler();
[[nodiscard]] bool init();
FuncOffsets finish();
[[nodiscard]] bool emitFunction();
void emitInitStackLocals();
const FuncType& funcType() const {
return *moduleEnv_.funcs[func_.index].type;
}
const TypeIdDesc& funcTypeId() const {
return *moduleEnv_.funcs[func_.index].typeId;
}
// Used by some of the ScratchRegister implementations.
operator MacroAssembler&() const { return masm; }
operator BaseRegAlloc&() { return ra; }
bool usesSharedMemory() const { return moduleEnv_.usesSharedMemory(); }
private:
////////////////////////////////////////////////////////////
//
// Out of line code management.
[[nodiscard]] OutOfLineCode* addOutOfLineCode(OutOfLineCode* ool) {
if (!ool || !outOfLine_.append(ool)) {
return nullptr;
}
ool->setStackHeight(fr.stackHeight());
return ool;
}
[[nodiscard]] bool generateOutOfLineCode() {
for (uint32_t i = 0; i < outOfLine_.length(); i++) {
OutOfLineCode* ool = outOfLine_[i];
ool->bind(&fr, &masm);
ool->generate(&masm);
}
return !masm.oom();
}
// Utility.
const Local& localFromSlot(uint32_t slot, MIRType type) {
MOZ_ASSERT(localInfo_[slot].type == type);
return localInfo_[slot];
}
////////////////////////////////////////////////////////////
//
// High-level register management.
bool isAvailableI32(RegI32 r) { return ra.isAvailableI32(r); }
bool isAvailableI64(RegI64 r) { return ra.isAvailableI64(r); }
bool isAvailableRef(RegPtr r) { return ra.isAvailablePtr(r); }
bool isAvailableF32(RegF32 r) { return ra.isAvailableF32(r); }
bool isAvailableF64(RegF64 r) { return ra.isAvailableF64(r); }
#ifdef ENABLE_WASM_SIMD
bool isAvailableV128(RegV128 r) { return ra.isAvailableV128(r); }
#endif
[[nodiscard]] RegI32 needI32() { return ra.needI32(); }
[[nodiscard]] RegI64 needI64() { return ra.needI64(); }
[[nodiscard]] RegPtr needRef() { return ra.needPtr(); }
[[nodiscard]] RegF32 needF32() { return ra.needF32(); }
[[nodiscard]] RegF64 needF64() { return ra.needF64(); }
#ifdef ENABLE_WASM_SIMD
[[nodiscard]] RegV128 needV128() { return ra.needV128(); }
#endif
void needI32(RegI32 specific) { ra.needI32(specific); }
void needI64(RegI64 specific) { ra.needI64(specific); }
void needRef(RegPtr specific) { ra.needPtr(specific); }
void needF32(RegF32 specific) { ra.needF32(specific); }
void needF64(RegF64 specific) { ra.needF64(specific); }
#ifdef ENABLE_WASM_SIMD
void needV128(RegV128 specific) { ra.needV128(specific); }
#endif
#if defined(JS_CODEGEN_ARM)
[[nodiscard]] RegI64 needI64Pair() { return ra.needI64Pair(); }
#endif
void freeI32(RegI32 r) { ra.freeI32(r); }
void freeI64(RegI64 r) { ra.freeI64(r); }
void freeRef(RegPtr r) { ra.freePtr(r); }
void freeF32(RegF32 r) { ra.freeF32(r); }
void freeF64(RegF64 r) { ra.freeF64(r); }
#ifdef ENABLE_WASM_SIMD
void freeV128(RegV128 r) { ra.freeV128(r); }
#endif
void freeI64Except(RegI64 r, RegI32 except) {
#ifdef JS_PUNBOX64
MOZ_ASSERT(r.reg == except);
#else
MOZ_ASSERT(r.high == except || r.low == except);
freeI64(r);
needI32(except);
#endif
}
void maybeFreeI32(RegI32 r) {
if (r.isValid()) {
freeI32(r);
}
}
void maybeFreeI64(RegI64 r) {
if (r.isValid()) {
freeI64(r);
}
}
void maybeFreeF64(RegF64 r) {
if (r.isValid()) {
freeF64(r);
}
}
void needI32NoSync(RegI32 r) {
MOZ_ASSERT(isAvailableI32(r));
needI32(r);
}
// TODO / OPTIMIZE: need2xI32() can be optimized along with needI32()
// to avoid sync(). (Bug 1316802)
void need2xI32(RegI32 r0, RegI32 r1) {
needI32(r0);
needI32(r1);
}
void need2xI64(RegI64 r0, RegI64 r1) {
needI64(r0);
needI64(r1);
}
RegI32 fromI64(RegI64 r) { return RegI32(lowPart(r)); }
#ifdef JS_PUNBOX64
RegI64 fromI32(RegI32 r) { return RegI64(Register64(r)); }
#endif
RegI64 widenI32(RegI32 r) {
MOZ_ASSERT(!isAvailableI32(r));
#ifdef JS_PUNBOX64
return fromI32(r);
#else
RegI32 high = needI32();
return RegI64(Register64(high, r));
#endif
}
RegI32 narrowI64(RegI64 r) {
#ifdef JS_PUNBOX64
return RegI32(r.reg);
#else
freeI32(RegI32(r.high));
return RegI32(r.low);
#endif
}
RegI32 narrowPtr(RegPtr r) { return RegI32(r); }
RegI32 lowPart(RegI64 r) {
#ifdef JS_PUNBOX64
return RegI32(r.reg);
#else
return RegI32(r.low);
#endif
}
RegI32 maybeHighPart(RegI64 r) {
#ifdef JS_PUNBOX64
return RegI32::Invalid();
#else
return RegI32(r.high);
#endif
}
void maybeClearHighPart(RegI64 r) {
#if !defined(JS_PUNBOX64)
moveImm32(0, RegI32(r.high));
#endif
}
void moveI32(RegI32 src, RegI32 dest) {
if (src != dest) {
masm.move32(src, dest);
}
}
void moveI64(RegI64 src, RegI64 dest) {
if (src != dest) {
masm.move64(src, dest);
}
}
void moveRef(RegPtr src, RegPtr dest) {
if (src != dest) {
masm.movePtr(src, dest);
}
}
void moveF64(RegF64 src, RegF64 dest) {
if (src != dest) {
masm.moveDouble(src, dest);
}
}
void moveF32(RegF32 src, RegF32 dest) {
if (src != dest) {
masm.moveFloat32(src, dest);
}
}
#ifdef ENABLE_WASM_SIMD
void moveV128(RegV128 src, RegV128 dest) {
if (src != dest) {
masm.moveSimd128(src, dest);
}
}
#endif
////////////////////////////////////////////////////////////////////////////
//
// Block parameters and results.
//
// Blocks may have multiple parameters and multiple results. Blocks can also
// be the target of branches: the entry for loops, and the exit for
// non-loops.
//
// Passing multiple values to a non-branch target (i.e., the entry of a
// "block") falls out naturally: any items on the value stack can flow
// directly from one block to another.
//
// However, for branch targets, we need to allocate well-known locations for
// the branch values. The approach taken in the baseline compiler is to
// allocate registers to the top N values (currently N=1), and then stack
// locations for the rest.
//
enum class RegKind { All, OnlyGPRs };
inline void needResultRegisters(ResultType type, RegKind which) {
if (type.empty()) {
return;
}
for (ABIResultIter iter(type); !iter.done(); iter.next()) {
ABIResult result = iter.cur();
// Register results are visited first; when we see a stack result we're
// done.
if (!result.inRegister()) {
return;
}
switch (result.type().kind()) {
case ValType::I32:
needI32(RegI32(result.gpr()));
break;
case ValType::I64:
needI64(RegI64(result.gpr64()));
break;
case ValType::V128:
#ifdef ENABLE_WASM_SIMD
if (which == RegKind::All) {
needV128(RegV128(result.fpr()));
}
break;
#else
MOZ_CRASH("No SIMD support");
#endif
case ValType::F32:
if (which == RegKind::All) {
needF32(RegF32(result.fpr()));
}
break;
case ValType::F64:
if (which == RegKind::All) {
needF64(RegF64(result.fpr()));
}
break;
case ValType::Ref:
needRef(RegPtr(result.gpr()));
break;
}
}
}
#ifdef JS_CODEGEN_X64
inline void maskResultRegisters(ResultType type) {
MOZ_ASSERT(JitOptions.spectreIndexMasking);
if (type.empty()) {
return;
}
for (ABIResultIter iter(type); !iter.done(); iter.next()) {
ABIResult result = iter.cur();
if (result.inRegister() && result.type().kind() == ValType::I32) {
masm.movl(result.gpr(), result.gpr());
}
}
}
#endif
inline void freeResultRegisters(ResultType type, RegKind which) {
if (type.empty()) {
return;
}
for (ABIResultIter iter(type); !iter.done(); iter.next()) {
ABIResult result = iter.cur();
// Register results are visited first; when we see a stack result we're
// done.
if (!result.inRegister()) {
return;
}
switch (result.type().kind()) {
case ValType::I32:
freeI32(RegI32(result.gpr()));
break;
case ValType::I64:
freeI64(RegI64(result.gpr64()));
break;
case ValType::V128:
#ifdef ENABLE_WASM_SIMD
if (which == RegKind::All) {
freeV128(RegV128(result.fpr()));
}
break;
#else
MOZ_CRASH("No SIMD support");
#endif
case ValType::F32:
if (which == RegKind::All) {
freeF32(RegF32(result.fpr()));
}
break;
case ValType::F64:
if (which == RegKind::All) {
freeF64(RegF64(result.fpr()));
}
break;
case ValType::Ref:
freeRef(RegPtr(result.gpr()));
break;
}
}
}
void needIntegerResultRegisters(ResultType type) {
needResultRegisters(type, RegKind::OnlyGPRs);
}
void freeIntegerResultRegisters(ResultType type) {
freeResultRegisters(type, RegKind::OnlyGPRs);
}
void needResultRegisters(ResultType type) {
needResultRegisters(type, RegKind::All);
}
void freeResultRegisters(ResultType type) {
freeResultRegisters(type, RegKind::All);
}
void assertResultRegistersAvailable(ResultType type) {
#ifdef DEBUG
for (ABIResultIter iter(type); !iter.done(); iter.next()) {
ABIResult result = iter.cur();
if (!result.inRegister()) {
return;
}
switch (result.type().kind()) {
case ValType::I32:
MOZ_ASSERT(isAvailableI32(RegI32(result.gpr())));
break;
case ValType::I64:
MOZ_ASSERT(isAvailableI64(RegI64(result.gpr64())));
break;
case ValType::V128:
# ifdef ENABLE_WASM_SIMD
MOZ_ASSERT(isAvailableV128(RegV128(result.fpr())));
break;
# else
MOZ_CRASH("No SIMD support");
# endif
case ValType::F32:
MOZ_ASSERT(isAvailableF32(RegF32(result.fpr())));
break;
case ValType::F64:
MOZ_ASSERT(isAvailableF64(RegF64(result.fpr())));
break;
case ValType::Ref:
MOZ_ASSERT(isAvailableRef(RegPtr(result.gpr())));
break;
}
}
#endif
}
void captureResultRegisters(ResultType type) {
assertResultRegistersAvailable(type);
needResultRegisters(type);
}
void captureCallResultRegisters(ResultType type) {
captureResultRegisters(type);
#ifdef JS_CODEGEN_X64
if (JitOptions.spectreIndexMasking) {
maskResultRegisters(type);
}
#endif
}
////////////////////////////////////////////////////////////
//
// Value stack and spilling.
//
// The value stack facilitates some on-the-fly register allocation
// and immediate-constant use. It tracks constants, latent
// references to locals, register contents, and values on the CPU
// stack.
//
// The stack can be flushed to memory using sync(). This is handy
// to avoid problems with control flow and messy register usage
// patterns.
// This is the value stack actually used during compilation. It is a
// StkVector rather than a StkVector& since constantly dereferencing a
// StkVector& adds about 0.5% or more to the compiler's dynamic instruction
// count.
StkVector stk_;
static constexpr size_t MaxPushesPerOpcode = 10;
// BaselineCompileFunctions() "lends" us the StkVector to use in this
// BaseCompiler object, and that is installed in |stk_| in our constructor.
// This is so as to avoid having to malloc/free the vector's contents at
// each creation/destruction of a BaseCompiler object. It does however mean
// that we need to hold on to a reference to BaselineCompileFunctions()'s
// vector, so we can swap (give) its contents back when this BaseCompiler
// object is destroyed. This significantly reduces the heap turnover of the
// baseline compiler. See bug 1532592.
StkVector& stkSource_;
#ifdef DEBUG
size_t countMemRefsOnStk() {
size_t nRefs = 0;
for (Stk& v : stk_) {
if (v.kind() == Stk::MemRef) {
nRefs++;
}
}
return nRefs;
}
#endif
template <typename T>
void push(T item) {
// None of the single-arg Stk constructors create a Stk::MemRef, so
// there's no need to increment stackMapGenerator_.memRefsOnStk here.
stk_.infallibleEmplaceBack(Stk(item));
}
void pushConstRef(intptr_t v) { stk_.infallibleEmplaceBack(Stk::StkRef(v)); }
void loadConstI32(const Stk& src, RegI32 dest) {
moveImm32(src.i32val(), dest);
}
void loadMemI32(const Stk& src, RegI32 dest) {
fr.loadStackI32(src.offs(), dest);
}
void loadLocalI32(const Stk& src, RegI32 dest) {
fr.loadLocalI32(localFromSlot(src.slot(), MIRType::Int32), dest);
}
void loadRegisterI32(const Stk& src, RegI32 dest) {
moveI32(src.i32reg(), dest);
}
void loadConstI64(const Stk& src, RegI64 dest) {
moveImm64(src.i64val(), dest);
}
void loadMemI64(const Stk& src, RegI64 dest) {
fr.loadStackI64(src.offs(), dest);
}
void loadLocalI64(const Stk& src, RegI64 dest) {
fr.loadLocalI64(localFromSlot(src.slot(), MIRType::Int64), dest);
}
void loadRegisterI64(const Stk& src, RegI64 dest) {
moveI64(src.i64reg(), dest);
}
void loadConstRef(const Stk& src, RegPtr dest) {
moveImmRef(src.refval(), dest);
}
void loadMemRef(const Stk& src, RegPtr dest) {
fr.loadStackPtr(src.offs(), dest);
}
void loadLocalRef(const Stk& src, RegPtr dest) {
fr.loadLocalPtr(localFromSlot(src.slot(), MIRType::RefOrNull), dest);
}
void loadRegisterRef(const Stk& src, RegPtr dest) {
moveRef(src.refReg(), dest);
}
void loadConstF64(const Stk& src, RegF64 dest) {
double d;
src.f64val(&d);
masm.loadConstantDouble(d, dest);
}
void loadMemF64(const Stk& src, RegF64 dest) {
fr.loadStackF64(src.offs(), dest);
}
void loadLocalF64(const Stk& src, RegF64 dest) {
fr.loadLocalF64(localFromSlot(src.slot(), MIRType::Double), dest);
}
void loadRegisterF64(const Stk& src, RegF64 dest) {
moveF64(src.f64reg(), dest);
}
void loadConstF32(const Stk& src, RegF32 dest) {
float f;
src.f32val(&f);
masm.loadConstantFloat32(f, dest);
}
void loadMemF32(const Stk& src, RegF32 dest) {
fr.loadStackF32(src.offs(), dest);
}
void loadLocalF32(const Stk& src, RegF32 dest) {
fr.loadLocalF32(localFromSlot(src.slot(), MIRType::Float32), dest);
}
void loadRegisterF32(const Stk& src, RegF32 dest) {
moveF32(src.f32reg(), dest);
}
#ifdef ENABLE_WASM_SIMD
void loadConstV128(const Stk& src, RegV128 dest) {
V128 f;
src.v128val(&f);
masm.loadConstantSimd128(SimdConstant::CreateX16((int8_t*)f.bytes), dest);
}
void loadMemV128(const Stk& src, RegV128 dest) {
fr.loadStackV128(src.offs(), dest);
}
void loadLocalV128(const Stk& src, RegV128 dest) {
fr.loadLocalV128(localFromSlot(src.slot(), MIRType::Simd128), dest);
}
void loadRegisterV128(const Stk& src, RegV128 dest) {
moveV128(src.v128reg(), dest);
}
#endif
void loadI32(const Stk& src, RegI32 dest) {
switch (src.kind()) {
case Stk::ConstI32:
loadConstI32(src, dest);
break;
case Stk::MemI32:
loadMemI32(src, dest);
break;
case Stk::LocalI32:
loadLocalI32(src, dest);
break;
case Stk::RegisterI32:
loadRegisterI32(src, dest);
break;
default:
MOZ_CRASH("Compiler bug: Expected I32 on stack");
}
}
void loadI64(const Stk& src, RegI64 dest) {
switch (src.kind()) {
case Stk::ConstI64:
loadConstI64(src, dest);
break;
case Stk::MemI64:
loadMemI64(src, dest);
break;
case Stk::LocalI64:
loadLocalI64(src, dest);
break;
case Stk::RegisterI64:
loadRegisterI64(src, dest);
break;
default:
MOZ_CRASH("Compiler bug: Expected I64 on stack");
}
}
#if !defined(JS_PUNBOX64)
void loadI64Low(const Stk& src, RegI32 dest) {
switch (src.kind()) {
case Stk::ConstI64:
moveImm32(int32_t(src.i64val()), dest);
break;
case Stk::MemI64:
fr.loadStackI64Low(src.offs(), dest);
break;
case Stk::LocalI64:
fr.loadLocalI64Low(localFromSlot(src.slot(), MIRType::Int64), dest);
break;
case Stk::RegisterI64:
moveI32(RegI32(src.i64reg().low), dest);
break;
default:
MOZ_CRASH("Compiler bug: Expected I64 on stack");
}
}
void loadI64High(const Stk& src, RegI32 dest) {
switch (src.kind()) {
case Stk::ConstI64:
moveImm32(int32_t(src.i64val() >> 32), dest);
break;
case Stk::MemI64:
fr.loadStackI64High(src.offs(), dest);
break;
case Stk::LocalI64:
fr.loadLocalI64High(localFromSlot(src.slot(), MIRType::Int64), dest);
break;
case Stk::RegisterI64:
moveI32(RegI32(src.i64reg().high), dest);
break;
default:
MOZ_CRASH("Compiler bug: Expected I64 on stack");
}
}
#endif
void loadF64(const Stk& src, RegF64 dest) {
switch (src.kind()) {
case Stk::ConstF64:
loadConstF64(src, dest);
break;
case Stk::MemF64:
loadMemF64(src, dest);
break;
case Stk::LocalF64:
loadLocalF64(src, dest);
break;
case Stk::RegisterF64:
loadRegisterF64(src, dest);
break;
default:
MOZ_CRASH("Compiler bug: expected F64 on stack");
}
}
void loadF32(const Stk& src, RegF32 dest) {
switch (src.kind()) {
case Stk::ConstF32:
loadConstF32(src, dest);
break;
case Stk::MemF32:
loadMemF32(src, dest);
break;
case Stk::LocalF32:
loadLocalF32(src, dest);
break;
case Stk::RegisterF32:
loadRegisterF32(src, dest);
break;
default:
MOZ_CRASH("Compiler bug: expected F32 on stack");
}
}
#ifdef ENABLE_WASM_SIMD
void loadV128(const Stk& src, RegV128 dest) {
switch (src.kind()) {
case Stk::ConstV128:
loadConstV128(src, dest);
break;
case Stk::MemV128:
loadMemV128(src, dest);
break;
case Stk::LocalV128:
loadLocalV128(src, dest);
break;
case Stk::RegisterV128:
loadRegisterV128(src, dest);
break;
default:
MOZ_CRASH("Compiler bug: expected V128 on stack");
}
}
#endif
void loadRef(const Stk& src, RegPtr dest) {
switch (src.kind()) {
case Stk::ConstRef:
loadConstRef(src, dest);
break;
case Stk::MemRef:
loadMemRef(src, dest);
break;
case Stk::LocalRef:
loadLocalRef(src, dest);
break;
case Stk::RegisterRef:
loadRegisterRef(src, dest);
break;
default:
MOZ_CRASH("Compiler bug: expected ref on stack");
}
}
// Flush all local and register value stack elements to memory.
//
// TODO / OPTIMIZE: As this is fairly expensive and causes worse
// code to be emitted subsequently, it is useful to avoid calling
// it. (Bug 1316802)
//
// Some optimization has been done already. Remaining
// opportunities:
//
// - It would be interesting to see if we can specialize it
// before calls with particularly simple signatures, or where
// we can do parallel assignment of register arguments, or
// similar. See notes in emitCall().
//
// - Operations that need specific registers: multiply, quotient,
// remainder, will tend to sync because the registers we need
// will tend to be allocated. We may be able to avoid that by
// prioritizing registers differently (takeLast instead of
// takeFirst) but we may also be able to allocate an unused
// register on demand to free up one we need, thus avoiding the
// sync. That type of fix would go into needI32().
void sync() final {
size_t start = 0;
size_t lim = stk_.length();
for (size_t i = lim; i > 0; i--) {
// Memory opcodes are first in the enum, single check against MemLast is
// fine.
if (stk_[i - 1].kind() <= Stk::MemLast) {
start = i;
break;
}
}
for (size_t i = start; i < lim; i++) {
Stk& v = stk_[i];
switch (v.kind()) {
case Stk::LocalI32: {
ScratchI32 scratch(*this);
loadLocalI32(v, scratch);
uint32_t offs = fr.pushPtr(scratch);
v.setOffs(Stk::MemI32, offs);
break;
}
case Stk::RegisterI32: {
uint32_t offs = fr.pushPtr(v.i32reg());
freeI32(v.i32reg());
v.setOffs(Stk::MemI32, offs);
break;
}
case Stk::LocalI64: {
ScratchI32 scratch(*this);
#ifdef JS_PUNBOX64
loadI64(v, fromI32(scratch));
uint32_t offs = fr.pushPtr(scratch);
#else
fr.loadLocalI64High(localFromSlot(v.slot(), MIRType::Int64), scratch);
fr.pushPtr(scratch);
fr.loadLocalI64Low(localFromSlot(v.slot(), MIRType::Int64), scratch);
uint32_t offs = fr.pushPtr(scratch);
#endif
v.setOffs(Stk::MemI64, offs);
break;
}
case Stk::RegisterI64: {
#ifdef JS_PUNBOX64
uint32_t offs = fr.pushPtr(v.i64reg().reg);
freeI64(v.i64reg());
#else
fr.pushPtr(v.i64reg().high);
uint32_t offs = fr.pushPtr(v.i64reg().low);
freeI64(v.i64reg());
#endif
v.setOffs(Stk::MemI64, offs);
break;
}
case Stk::LocalF64: {
ScratchF64 scratch(*this);
loadF64(v, scratch);
uint32_t offs = fr.pushDouble(scratch);
v.setOffs(Stk::MemF64, offs);
break;
}
case Stk::RegisterF64: {
uint32_t offs = fr.pushDouble(v.f64reg());
freeF64(v.f64reg());
v.setOffs(Stk::MemF64, offs);
break;
}
case Stk::LocalF32: {
ScratchF32 scratch(*this);
loadF32(v, scratch);
uint32_t offs = fr.pushFloat32(scratch);
v.setOffs(Stk::MemF32, offs);
break;
}
case Stk::RegisterF32: {
uint32_t offs = fr.pushFloat32(v.f32reg());
freeF32(v.f32reg());
v.setOffs(Stk::MemF32, offs);
break;
}
#ifdef ENABLE_WASM_SIMD
case Stk::LocalV128: {
ScratchV128 scratch(*this);
loadV128(v, scratch);
uint32_t offs = fr.pushV128(scratch);
v.setOffs(Stk::MemV128, offs);
break;
}
case Stk::RegisterV128: {
uint32_t offs = fr.pushV128(v.v128reg());
freeV128(v.v128reg());
v.setOffs(Stk::MemV128, offs);
break;
}
#endif
case Stk::LocalRef: {
ScratchPtr scratch(*this);
loadLocalRef(v, scratch);
uint32_t offs = fr.pushPtr(scratch);
v.setOffs(Stk::MemRef, offs);
stackMapGenerator_.memRefsOnStk++;
break;
}
case Stk::RegisterRef: {
uint32_t offs = fr.pushPtr(v.refReg());
freeRef(v.refReg());
v.setOffs(Stk::MemRef, offs);
stackMapGenerator_.memRefsOnStk++;
break;
}
default: {
break;
}
}
}
}
void saveTempPtr(RegPtr r) final {
MOZ_ASSERT(!ra.isAvailablePtr(r));
fr.pushPtr(r);
ra.freePtr(r);
MOZ_ASSERT(ra.isAvailablePtr(r));
}
void restoreTempPtr(RegPtr r) final {
MOZ_ASSERT(ra.isAvailablePtr(r));
ra.needPtr(r);
fr.popPtr(r);
MOZ_ASSERT(!ra.isAvailablePtr(r));
}
// Various methods for creating a stack map. Stack maps are indexed by the
// lowest address of the instruction immediately *after* the instruction of
// interest. In practice that means either: the return point of a call, the
// instruction immediately after a trap instruction (the "resume"
// instruction), or the instruction immediately following a no-op (when
// debugging is enabled).
// Create a vanilla stack map.
[[nodiscard]] bool createStackMap(const char* who) {
const ExitStubMapVector noExtras;
return createStackMap(who, noExtras, masm.currentOffset());
}
// Create a stack map as vanilla, but for a custom assembler offset.
[[nodiscard]] bool createStackMap(const char* who,
CodeOffset assemblerOffset) {
const ExitStubMapVector noExtras;
return createStackMap(who, noExtras, assemblerOffset.offset());
}
// The most general stack map construction.
[[nodiscard]] bool createStackMap(const char* who,
const ExitStubMapVector& extras,
uint32_t assemblerOffset) {
auto debugFrame =
compilerEnv_.debugEnabled() ? HasDebugFrame::Yes : HasDebugFrame::No;
return stackMapGenerator_.createStackMap(who, extras, assemblerOffset,
debugFrame, stk_);
}
// This is an optimization used to avoid calling sync() for
// setLocal(): if the local does not exist unresolved on the stack
// then we can skip the sync.
bool hasLocal(uint32_t slot) {
for (size_t i = stk_.length(); i > 0; i--) {
// Memory opcodes are first in the enum, single check against MemLast is
// fine.
Stk::Kind kind = stk_[i - 1].kind();
if (kind <= Stk::MemLast) {
return false;
}
// Local opcodes follow memory opcodes in the enum, single check against
// LocalLast is sufficient.
if (kind <= Stk::LocalLast && stk_[i - 1].slot() == slot) {
return true;
}
}
return false;
}
void syncLocal(uint32_t slot) {
if (hasLocal(slot)) {
sync(); // TODO / OPTIMIZE: Improve this? (Bug 1316817)
}
}
// Push the register r onto the stack.
void pushI32(RegI32 r) {
MOZ_ASSERT(!isAvailableI32(r));
push(Stk(r));
}
void pushI64(RegI64 r) {
MOZ_ASSERT(!isAvailableI64(r));
push(Stk(r));
}
void pushRef(RegPtr r) {
MOZ_ASSERT(!isAvailableRef(r));
push(Stk(r));
}
void pushF64(RegF64 r) {
MOZ_ASSERT(!isAvailableF64(r));
push(Stk(r));
}
void pushF32(RegF32 r) {
MOZ_ASSERT(!isAvailableF32(r));
push(Stk(r));
}
#ifdef ENABLE_WASM_SIMD
void pushV128(RegV128 r) {
MOZ_ASSERT(!isAvailableV128(r));
push(Stk(r));
}
#endif
// Push the value onto the stack.
void pushI32(int32_t v) { push(Stk(v)); }
void pushI64(int64_t v) { push(Stk(v)); }
void pushRef(intptr_t v) { pushConstRef(v); }
void pushF64(double v) { push(Stk(v)); }
void pushF32(float v) { push(Stk(v)); }
#ifdef ENABLE_WASM_SIMD
void pushV128(V128 v) { push(Stk(v)); }
#endif
// Push the local slot onto the stack. The slot will not be read
// here; it will be read when it is consumed, or when a side
// effect to the slot forces its value to be saved.
void pushLocalI32(uint32_t slot) {
stk_.infallibleEmplaceBack(Stk(Stk::LocalI32, slot));
}
void pushLocalI64(uint32_t slot) {
stk_.infallibleEmplaceBack(Stk(Stk::LocalI64, slot));
}
void pushLocalRef(uint32_t slot) {
stk_.infallibleEmplaceBack(Stk(Stk::LocalRef, slot));
}
void pushLocalF64(uint32_t slot) {
stk_.infallibleEmplaceBack(Stk(Stk::LocalF64, slot));
}
void pushLocalF32(uint32_t slot) {
stk_.infallibleEmplaceBack(Stk(Stk::LocalF32, slot));
}
#ifdef ENABLE_WASM_SIMD
void pushLocalV128(uint32_t slot) {
stk_.infallibleEmplaceBack(Stk(Stk::LocalV128, slot));
}
#endif
// Call only from other popI32() variants.
// v must be the stack top. May pop the CPU stack.
void popI32(const Stk& v, RegI32 dest) {
MOZ_ASSERT(&v == &stk_.back());
switch (v.kind()) {
case Stk::ConstI32:
loadConstI32(v, dest);
break;
case Stk::LocalI32:
loadLocalI32(v, dest);
break;
case Stk::MemI32:
fr.popPtr(dest);
break;
case Stk::RegisterI32:
loadRegisterI32(v, dest);
break;
default:
MOZ_CRASH("Compiler bug: expected int on stack");
}
}
[[nodiscard]] RegI32 popI32() {
Stk& v = stk_.back();
RegI32 r;
if (v.kind() == Stk::RegisterI32) {
r = v.i32reg();
} else {
popI32(v, (r = needI32()));
}
stk_.popBack();
return r;
}
RegI32 popI32(RegI32 specific) {
Stk& v = stk_.back();
if (!(v.kind() == Stk::RegisterI32 && v.i32reg() == specific)) {
needI32(specific);
popI32(v, specific);
if (v.kind() == Stk::RegisterI32) {
freeI32(v.i32reg());
}
}
stk_.popBack();
return specific;
}
#ifdef ENABLE_WASM_SIMD
// Call only from other popV128() variants.
// v must be the stack top. May pop the CPU stack.
void popV128(const Stk& v, RegV128 dest) {
MOZ_ASSERT(&v == &stk_.back());
switch (v.kind()) {
case Stk::ConstV128:
loadConstV128(v, dest);
break;
case Stk::LocalV128:
loadLocalV128(v, dest);
break;
case Stk::MemV128:
fr.popV128(dest);
break;
case Stk::RegisterV128:
loadRegisterV128(v, dest);
break;
default:
MOZ_CRASH("Compiler bug: expected int on stack");
}
}
[[nodiscard]] RegV128 popV128() {
Stk& v = stk_.back();
RegV128 r;
if (v.kind() == Stk::RegisterV128) {
r = v.v128reg();
} else {
popV128(v, (r = needV128()));
}
stk_.popBack();
return r;
}
RegV128 popV128(RegV128 specific) {
Stk& v = stk_.back();
if (!(v.kind() == Stk::RegisterV128 && v.v128reg() == specific)) {
needV128(specific);
popV128(v, specific);
if (v.kind() == Stk::RegisterV128) {
freeV128(v.v128reg());
}
}
stk_.popBack();
return specific;
}
#endif
// Call only from other popI64() variants.
// v must be the stack top. May pop the CPU stack.
void popI64(const Stk& v, RegI64 dest) {
MOZ_ASSERT(&v == &stk_.back());
switch (v.kind()) {
case Stk::ConstI64:
loadConstI64(v, dest);
break;
case Stk::LocalI64:
loadLocalI64(v, dest);
break;
case Stk::MemI64:
#ifdef JS_PUNBOX64
fr.popPtr(dest.reg);
#else
fr.popPtr(dest.low);
fr.popPtr(dest.high);
#endif
break;
case Stk::RegisterI64:
loadRegisterI64(v, dest);
break;
default:
MOZ_CRASH("Compiler bug: expected long on stack");
}
}
[[nodiscard]] RegI64 popI64() {
Stk& v = stk_.back();
RegI64 r;
if (v.kind() == Stk::RegisterI64) {
r = v.i64reg();
} else {
popI64(v, (r = needI64()));
}
stk_.popBack();
return r;
}
// Note, the stack top can be in one half of "specific" on 32-bit
// systems. We can optimize, but for simplicity, if the register
// does not match exactly, then just force the stack top to memory
// and then read it back in.
RegI64 popI64(RegI64 specific) {
Stk& v = stk_.back();
if (!(v.kind() == Stk::RegisterI64 && v.i64reg() == specific)) {
needI64(specific);
popI64(v, specific);
if (v.kind() == Stk::RegisterI64) {
freeI64(v.i64reg());
}
}
stk_.popBack();
return specific;
}
// Call only from other popRef() variants.
// v must be the stack top. May pop the CPU stack.
void popRef(const Stk& v, RegPtr dest) {
MOZ_ASSERT(&v == &stk_.back());
switch (v.kind()) {
case Stk::ConstRef:
loadConstRef(v, dest);
break;
case Stk::LocalRef:
loadLocalRef(v, dest);
break;
case Stk::MemRef:
fr.popPtr(dest);
break;
case Stk::RegisterRef:
loadRegisterRef(v, dest);
break;
default:
MOZ_CRASH("Compiler bug: expected ref on stack");
}
}
RegPtr popRef(RegPtr specific) {
Stk& v = stk_.back();
if (!(v.kind() == Stk::RegisterRef && v.refReg() == specific)) {
needRef(specific);
popRef(v, specific);
if (v.kind() == Stk::RegisterRef) {
freeRef(v.refReg());
}
}
stk_.popBack();
if (v.kind() == Stk::MemRef) {
stackMapGenerator_.memRefsOnStk--;
}
return specific;
}
[[nodiscard]] RegPtr popRef() {
Stk& v = stk_.back();
RegPtr r;
if (v.kind() == Stk::RegisterRef) {
r = v.refReg();
} else {
popRef(v, (r = needRef()));
}
stk_.popBack();
if (v.kind() == Stk::MemRef) {
stackMapGenerator_.memRefsOnStk--;
}
return r;
}
// Call only from other popF64() variants.
// v must be the stack top. May pop the CPU stack.
void popF64(const Stk& v, RegF64 dest) {
MOZ_ASSERT(&v == &stk_.back());
switch (v.kind()) {
case Stk::ConstF64:
loadConstF64(v, dest);
break;
case Stk::LocalF64:
loadLocalF64(v, dest);
break;
case Stk::MemF64:
fr.popDouble(dest);
break;
case Stk::RegisterF64:
loadRegisterF64(v, dest);
break;
default:
MOZ_CRASH("Compiler bug: expected double on stack");
}
}
[[nodiscard]] RegF64 popF64() {
Stk& v = stk_.back();
RegF64 r;
if (v.kind() == Stk::RegisterF64) {
r = v.f64reg();
} else {
popF64(v, (r = needF64()));
}
stk_.popBack();
return r;
}
RegF64 popF64(RegF64 specific) {
Stk& v = stk_.back();
if (!(v.kind() == Stk::RegisterF64 && v.f64reg() == specific)) {
needF64(specific);
popF64(v, specific);
if (v.kind() == Stk::RegisterF64) {
freeF64(v.f64reg());
}
}
stk_.popBack();
return specific;
}
// Call only from other popF32() variants.
// v must be the stack top. May pop the CPU stack.
void popF32(const Stk& v, RegF32 dest) {
MOZ_ASSERT(&v == &stk_.back());
switch (v.kind()) {
case Stk::ConstF32:
loadConstF32(v, dest);
break;
case Stk::LocalF32:
loadLocalF32(v, dest);
break;
case Stk::MemF32:
fr.popFloat32(dest);
break;
case Stk::RegisterF32:
loadRegisterF32(v, dest);
break;
default:
MOZ_CRASH("Compiler bug: expected float on stack");
}
}
[[nodiscard]] RegF32 popF32() {
Stk& v = stk_.back();
RegF32 r;
if (v.kind() == Stk::RegisterF32) {
r = v.f32reg();
} else {
popF32(v, (r = needF32()));
}
stk_.popBack();
return r;
}
RegF32 popF32(RegF32 specific) {
Stk& v = stk_.back();
if (!(v.kind() == Stk::RegisterF32 && v.f32reg() == specific)) {
needF32(specific);
popF32(v, specific);
if (v.kind() == Stk::RegisterF32) {
freeF32(v.f32reg());
}
}
stk_.popBack();
return specific;
}
[[nodiscard]] bool popConstI32(int32_t* c) {
Stk& v = stk_.back();
if (v.kind() != Stk::ConstI32) {
return false;
}
*c = v.i32val();
stk_.popBack();
return true;
}
[[nodiscard]] bool popConstI64(int64_t* c) {
Stk& v = stk_.back();
if (v.kind() != Stk::ConstI64) {
return false;
}
*c = v.i64val();
stk_.popBack();
return true;
}
[[nodiscard]] bool peekConstI32(int32_t* c) {
Stk& v = stk_.back();
if (v.kind() != Stk::ConstI32) {
return false;
}
*c = v.i32val();
return true;
}
[[nodiscard]] bool peekConstI64(int64_t* c) {
Stk& v = stk_.back();
if (v.kind() != Stk::ConstI64) {
return false;
}
*c = v.i64val();
return true;
}
[[nodiscard]] bool peek2xI32(int32_t* c0, int32_t* c1) {
MOZ_ASSERT(stk_.length() >= 2);
const Stk& v0 = *(stk_.end() - 1);
const Stk& v1 = *(stk_.end() - 2);
if (v0.kind() != Stk::ConstI32 || v1.kind() != Stk::ConstI32) {
return false;
}
*c0 = v0.i32val();
*c1 = v1.i32val();
return true;
}
[[nodiscard]] bool popConstPositivePowerOfTwoI32(int32_t* c,
uint_fast8_t* power,
int32_t cutoff) {
Stk& v = stk_.back();
if (v.kind() != Stk::ConstI32) {
return false;
}
*c = v.i32val();
if (*c <= cutoff || !IsPowerOfTwo(static_cast<uint32_t>(*c))) {
return false;
}
*power = FloorLog2(*c);
stk_.popBack();
return true;
}
[[nodiscard]] bool popConstPositivePowerOfTwoI64(int64_t* c,
uint_fast8_t* power,
int64_t cutoff) {
Stk& v = stk_.back();
if (v.kind() != Stk::ConstI64) {
return false;
}
*c = v.i64val();
if (*c <= cutoff || !IsPowerOfTwo(static_cast<uint64_t>(*c))) {
return false;
}
*power = FloorLog2(*c);
stk_.popBack();
return true;
}
[[nodiscard]] bool peekLocalI32(uint32_t* local) {
Stk& v = stk_.back();
if (v.kind() != Stk::LocalI32) {
return false;
}
*local = v.slot();
return true;
}
// TODO / OPTIMIZE (Bug 1316818): At the moment we use the Wasm
// inter-procedure ABI for block returns, which allocates ReturnReg as the
// single block result register. It is possible other choices would lead to
// better register allocation, as ReturnReg is often first in the register set
// and will be heavily wanted by the register allocator that uses takeFirst().
//
// Obvious options:
// - pick a register at the back of the register set
// - pick a random register per block (different blocks have
// different join regs)
void popRegisterResults(ABIResultIter& iter) {
// Pop register results. Note that in the single-value case, popping to a
// register may cause a sync(); for multi-value we sync'd already.
for (; !iter.done(); iter.next()) {
const ABIResult& result = iter.cur();
if (!result.inRegister()) {
// TODO / OPTIMIZE: We sync here to avoid solving the general parallel
// move problem in popStackResults. However we could avoid syncing the
// values that are going to registers anyway, if they are already in
// registers.
sync();
break;
}
switch (result.type().kind()) {
case ValType::I32:
popI32(RegI32(result.gpr()));
break;
case ValType::I64:
popI64(RegI64(result.gpr64()));
break;
case ValType::F32:
popF32(RegF32(result.fpr()));
break;
case ValType::F64:
popF64(RegF64(result.fpr()));
break;
case ValType::Ref:
popRef(RegPtr(result.gpr()));
break;
case ValType::V128:
#ifdef ENABLE_WASM_SIMD
popV128(RegV128(result.fpr()));
#else
MOZ_CRASH("No SIMD support");
#endif
}
}
}
void popStackResults(ABIResultIter& iter, StackHeight stackBase) {
MOZ_ASSERT(!iter.done());
// The iterator should be advanced beyond register results, and register
// results should be popped already from the value stack.
uint32_t alreadyPopped = iter.index();
// At this point, only stack arguments are remaining. Iterate through them
// to measure how much stack space they will take up.
for (; !iter.done(); iter.next()) {
MOZ_ASSERT(iter.cur().onStack());
}
// Calculate the space needed to store stack results, in bytes.
uint32_t stackResultBytes = iter.stackBytesConsumedSoFar();
MOZ_ASSERT(stackResultBytes);
// Compute the stack height including the stack results. Note that it's
// possible that this call expands the stack, for example if some of the
// results are supplied by constants and so are not already on the machine
// stack.
uint32_t endHeight = fr.prepareStackResultArea(stackBase, stackResultBytes);
// Find a free GPR to use when shuffling stack values. If none is
// available, push ReturnReg and restore it after we're done.
bool saved = false;
RegPtr temp = ra.needTempPtr(RegPtr(ReturnReg), &saved);
// The sequence of Stk values is in the same order on the machine stack as
// the result locations, but there is a complication: constant values are
// not actually pushed on the machine stack. (At this point registers and
// locals have been spilled already.) So, moving the Stk values into place
// isn't simply a shuffle-down or shuffle-up operation. There is a part of
// the Stk sequence that shuffles toward the FP, a part that's already in
// place, and a part that shuffles toward the SP. After shuffling, we have
// to materialize the constants.
// Shuffle mem values toward the frame pointer, copying deepest values
// first. Stop when we run out of results, get to a register result, or
// find a Stk value that is closer to the FP than the result.
for (iter.switchToPrev(); !iter.done(); iter.prev()) {
const ABIResult& result = iter.cur();
if (!result.onStack()) {
break;
}
MOZ_ASSERT(result.stackOffset() < stackResultBytes);
uint32_t destHeight = endHeight - result.stackOffset();
uint32_t stkBase = stk_.length() - (iter.count() - alreadyPopped);
Stk& v = stk_[stkBase + iter.index()];
if (v.isMem()) {
uint32_t srcHeight = v.offs();
if (srcHeight <= destHeight) {
break;
}
fr.shuffleStackResultsTowardFP(srcHeight, destHeight, result.size(),
temp);
}
}
// Reset iterator and skip register results.
for (iter.reset(); !iter.done(); iter.next()) {
if (iter.cur().onStack()) {
break;
}
}
// Revisit top stack values, shuffling mem values toward the stack pointer,
// copying shallowest values first.
for (; !iter.done(); iter.next()) {
const ABIResult& result = iter.cur();
MOZ_ASSERT(result.onStack());
MOZ_ASSERT(result.stackOffset() < stackResultBytes);
uint32_t destHeight = endHeight - result.stackOffset();
Stk& v = stk_[stk_.length() - (iter.index() - alreadyPopped) - 1];
if (v.isMem()) {
uint32_t srcHeight = v.offs();
if (srcHeight >= destHeight) {
break;
}
fr.shuffleStackResultsTowardSP(srcHeight, destHeight, result.size(),
temp);
}
}
// Reset iterator and skip register results, which are already popped off
// the value stack.
for (iter.reset(); !iter.done(); iter.next()) {
if (iter.cur().onStack()) {
break;
}
}
// Materialize constants and pop the remaining items from the value stack.
for (; !iter.done(); iter.next()) {
const ABIResult& result = iter.cur();
uint32_t resultHeight = endHeight - result.stackOffset();
Stk& v = stk_.back();
switch (v.kind()) {
case Stk::ConstI32:
fr.storeImmediatePtrToStack(uint32_t(v.i32val_), resultHeight, temp);
break;
case Stk::ConstF32:
fr.storeImmediateF32ToStack(v.f32val_, resultHeight, temp);
break;
case Stk::ConstI64:
fr.storeImmediateI64ToStack(v.i64val_, resultHeight, temp);
break;
case Stk::ConstF64:
fr.storeImmediateF64ToStack(v.f64val_, resultHeight, temp);
break;
#ifdef ENABLE_WASM_SIMD
case Stk::ConstV128:
fr.storeImmediateV128ToStack(v.v128val_, resultHeight, temp);
break;
#endif
case Stk::ConstRef:
fr.storeImmediatePtrToStack(v.refval_, resultHeight, temp);
break;
case Stk::MemRef:
// Update bookkeeping as we pop the Stk entry.
stackMapGenerator_.memRefsOnStk--;
break;
default:
MOZ_ASSERT(v.isMem());
break;
}
stk_.popBack();
}
ra.freeTempPtr(temp, saved);
// This will pop the stack if needed.
fr.finishStackResultArea(stackBase, stackResultBytes);
}
enum class ContinuationKind { Fallthrough, Jump };
void popBlockResults(ResultType type, StackHeight stackBase,
ContinuationKind kind) {
if (!type.empty()) {
ABIResultIter iter(type);
popRegisterResults(iter);
if (!iter.done()) {
popStackResults(iter, stackBase);
// Because popStackResults might clobber the stack, it leaves the stack
// pointer already in the right place for the continuation, whether the
// continuation is a jump or fallthrough.
return;
}
}
// We get here if there are no stack results. For a fallthrough, the stack
// is already at the right height. For a jump, we may need to pop the stack
// pointer if the continuation's stack height is lower than the current
// stack height.
if (kind == ContinuationKind::Jump) {
fr.popStackBeforeBranch(stackBase, type);
}
}
Stk captureStackResult(const ABIResult& result, StackHeight resultsBase,
uint32_t stackResultBytes) {
MOZ_ASSERT(result.onStack());
uint32_t offs = fr.locateStackResult(result, resultsBase, stackResultBytes);
return Stk::StackResult(result.type(), offs);
}
[[nodiscard]] bool pushResults(ResultType type, StackHeight resultsBase) {
if (type.empty()) {
return true;
}
if (type.length() > 1) {
if (!stk_.reserve(stk_.length() + type.length() + MaxPushesPerOpcode)) {
return false;
}
}
// We need to push the results in reverse order, so first iterate through
// all results to determine the locations of stack result types.
ABIResultIter iter(type);
while (!iter.done()) {
iter.next();
}
uint32_t stackResultBytes = iter.stackBytesConsumedSoFar();
for (iter.switchToPrev(); !iter.done(); iter.prev()) {
const ABIResult& result = iter.cur();
if (!result.onStack()) {
break;
}
Stk v = captureStackResult(result, resultsBase, stackResultBytes);
push(v);
if (v.kind() == Stk::MemRef) {
stackMapGenerator_.memRefsOnStk++;
}
}
for (; !iter.done(); iter.prev()) {
const ABIResult& result = iter.cur();
MOZ_ASSERT(result.inRegister());
switch (result.type().kind()) {
case ValType::I32:
pushI32(RegI32(result.gpr()));
break;
case ValType::I64:
pushI64(RegI64(result.gpr64()));
break;
case ValType::V128:
#ifdef ENABLE_WASM_SIMD
pushV128(RegV128(result.fpr()));
break;
#else
MOZ_CRASH("No SIMD support");
#endif
case ValType::F32:
pushF32(RegF32(result.fpr()));
break;
case ValType::F64:
pushF64(RegF64(result.fpr()));
break;
case ValType::Ref:
pushRef(RegPtr(result.gpr()));
break;
}
}
return true;
}
[[nodiscard]] bool pushBlockResults(ResultType type) {
return pushResults(type, controlItem().stackHeight);
}
// A combination of popBlockResults + pushBlockResults, used when entering a
// block with a control-flow join (loops) or split (if) to shuffle the
// fallthrough block parameters into the locations expected by the
// continuation.
[[nodiscard]] bool topBlockParams(ResultType type) {
// This function should only be called when entering a block with a
// control-flow join at the entry, where there are no live temporaries in
// the current block.
StackHeight base = controlItem().stackHeight;
MOZ_ASSERT(fr.stackResultsBase(stackConsumed(type.length())) == base);
popBlockResults(type, base, ContinuationKind::Fallthrough);
return pushBlockResults(type);
}
// A combination of popBlockResults + pushBlockResults, used before branches
// where we don't know the target (br_if / br_table). If and when the branch
// is taken, the stack results will be shuffled down into place. For br_if
// that has fallthrough, the parameters for the untaken branch flow through to
// the continuation.
[[nodiscard]] bool topBranchParams(ResultType type, StackHeight* height) {
if (type.empty()) {
*height = fr.stackHeight();
return true;
}
// There may be temporary values that need spilling; delay computation of
// the stack results base until after the popRegisterResults(), which spills
// if needed.
ABIResultIter iter(type);
popRegisterResults(iter);
StackHeight base = fr.stackResultsBase(stackConsumed(iter.remaining()));
if (!iter.done()) {
popStackResults(iter, base);
}
if (!pushResults(type, base)) {
return false;
}
*height = base;
return true;
}
// Conditional branches with fallthrough are preceded by a topBranchParams, so
// we know that there are no stack results that need to be materialized. In
// that case, we can just shuffle the whole block down before popping the
// stack.
void shuffleStackResultsBeforeBranch(StackHeight srcHeight,
StackHeight destHeight,
ResultType type) {
uint32_t stackResultBytes = 0;
if (ABIResultIter::HasStackResults(type)) {
MOZ_ASSERT(stk_.length() >= type.length());
ABIResultIter iter(type);
for (; !iter.done(); iter.next()) {
#ifdef DEBUG
const ABIResult& result = iter.cur();
const Stk& v = stk_[stk_.length() - iter.index() - 1];
MOZ_ASSERT(v.isMem() == result.onStack());
#endif
}
stackResultBytes = iter.stackBytesConsumedSoFar();
MOZ_ASSERT(stackResultBytes > 0);
if (srcHeight != destHeight) {
// Find a free GPR to use when shuffling stack values. If none
// is available, push ReturnReg and restore it after we're done.
bool saved = false;
RegPtr temp = ra.needTempPtr(RegPtr(ReturnReg), &saved);
fr.shuffleStackResultsTowardFP(srcHeight, destHeight, stackResultBytes,
temp);
ra.freeTempPtr(temp, saved);
}
}
fr.popStackBeforeBranch(destHeight, stackResultBytes);
}
// Return the amount of execution stack consumed by the top numval
// values on the value stack.
size_t stackConsumed(size_t numval) {
size_t size = 0;
MOZ_ASSERT(numval <= stk_.length());
for (uint32_t i = stk_.length() - 1; numval > 0; numval--, i--) {
Stk& v = stk_[i];
switch (v.kind()) {
case Stk::MemRef:
size += BaseStackFrame::StackSizeOfPtr;
break;
case Stk::MemI32:
size += BaseStackFrame::StackSizeOfPtr;
break;
case Stk::MemI64:
size += BaseStackFrame::StackSizeOfInt64;
break;
case Stk::MemF64:
size += BaseStackFrame::StackSizeOfDouble;
break;
case Stk::MemF32:
size += BaseStackFrame::StackSizeOfFloat;
break;
#ifdef ENABLE_WASM_SIMD
case Stk::MemV128:
size += BaseStackFrame::StackSizeOfV128;
break;
#endif
default:
break;
}
}
return size;
}
void popValueStackTo(uint32_t stackSize) {
for (uint32_t i = stk_.length(); i > stackSize; i--) {
Stk& v = stk_[i - 1];
switch (v.kind()) {
case Stk::RegisterI32:
freeI32(v.i32reg());
break;
case Stk::RegisterI64:
freeI64(v.i64reg());
break;
case Stk::RegisterF64:
freeF64(v.f64reg());
break;
case Stk::RegisterF32:
freeF32(v.f32reg());
break;
#ifdef ENABLE_WASM_SIMD
case Stk::RegisterV128:
freeV128(v.v128reg());
break;
#endif
case Stk::RegisterRef:
freeRef(v.refReg());
break;
case Stk::MemRef:
stackMapGenerator_.memRefsOnStk--;
break;
default:
break;
}
}
stk_.shrinkTo(stackSize);
}
void popValueStackBy(uint32_t items) {
popValueStackTo(stk_.length() - items);
}
void dropValue() {
if (peek(0).isMem()) {
fr.popBytes(stackConsumed(1));
}
popValueStackBy(1);
}
// Peek at the stack, for calls.
Stk& peek(uint32_t relativeDepth) {
return stk_[stk_.length() - 1 - relativeDepth];
}
#ifdef DEBUG
// Check that we're not leaking registers by comparing the
// state of the stack + available registers with the set of
// all available registers.
// Call this between opcodes.
void performRegisterLeakCheck() {
BaseRegAlloc::LeakCheck check(ra);
for (size_t i = 0; i < stk_.length(); i++) {
Stk& item = stk_[i];
switch (item.kind_) {
case Stk::RegisterI32:
check.addKnownI32(item.i32reg());
break;
case Stk::RegisterI64:
check.addKnownI64(item.i64reg());
break;
case Stk::RegisterF32:
check.addKnownF32(item.f32reg());
break;
case Stk::RegisterF64:
check.addKnownF64(item.f64reg());
break;
# ifdef ENABLE_WASM_SIMD
case Stk::RegisterV128:
check.addKnownV128(item.v128reg());
break;
# endif
case Stk::RegisterRef:
check.addKnownRef(item.refReg());
break;
default:
break;
}
}
}
void assertStackInvariants() const {
if (deadCode_) {
// Nonlocal control flow can pass values in stack locations in a way that
// isn't accounted for by the value stack. In dead code, which occurs
// after unconditional non-local control flow, there is no invariant to
// assert.
return;
}
size_t size = 0;
for (const Stk& v : stk_) {
switch (v.kind()) {
case Stk::MemRef:
size += BaseStackFrame::StackSizeOfPtr;
break;
case Stk::MemI32:
size += BaseStackFrame::StackSizeOfPtr;
break;
case Stk::MemI64:
size += BaseStackFrame::StackSizeOfInt64;
break;
case Stk::MemF64:
size += BaseStackFrame::StackSizeOfDouble;
break;
case Stk::MemF32:
size += BaseStackFrame::StackSizeOfFloat;
break;
# ifdef ENABLE_WASM_SIMD
case Stk::MemV128:
size += BaseStackFrame::StackSizeOfV128;
break;
# endif
default:
MOZ_ASSERT(!v.isMem());
break;
}
}
MOZ_ASSERT(size == fr.dynamicHeight());
}
#endif
////////////////////////////////////////////////////////////
//
// Control stack
void initControl(Control& item, ResultType params) {
// Make sure the constructor was run properly
MOZ_ASSERT(!item.stackHeight.isValid() && item.stackSize == UINT32_MAX);
uint32_t paramCount = deadCode_ ? 0 : params.length();
uint32_t stackParamSize = stackConsumed(paramCount);
item.stackHeight = fr.stackResultsBase(stackParamSize);
item.stackSize = stk_.length() - paramCount;
item.deadOnArrival = deadCode_;
item.bceSafeOnEntry = bceSafe_;
}
Control& controlItem() { return iter_.controlItem(); }
Control& controlItem(uint32_t relativeDepth) {
return iter_.controlItem(relativeDepth);
}
Control& controlOutermost() { return iter_.controlOutermost(); }
////////////////////////////////////////////////////////////
//
// Labels
void insertBreakablePoint(CallSiteDesc::Kind kind) {
fr.loadTlsPtr(WasmTlsReg);
masm.nopPatchableToCall(CallSiteDesc(iter_.lastOpcodeOffset(), kind));
}
//////////////////////////////////////////////////////////////////////
//
// Function prologue and epilogue.
[[nodiscard]] bool beginFunction() {
JitSpew(JitSpew_Codegen, "# ========================================");
JitSpew(JitSpew_Codegen, "# Emitting wasm baseline code");
JitSpew(JitSpew_Codegen,
"# beginFunction: start of function prologue for index %d",
(int)func_.index);
// Make a start on the stack map for this function. Inspect the args so
// as to determine which of them are both in-memory and pointer-typed, and
// add entries to machineStackTracker as appropriate.
ArgTypeVector args(funcType());
size_t inboundStackArgBytes = StackArgAreaSizeUnaligned(args);
MOZ_ASSERT(inboundStackArgBytes % sizeof(void*) == 0);
stackMapGenerator_.numStackArgWords = inboundStackArgBytes / sizeof(void*);
MOZ_ASSERT(stackMapGenerator_.machineStackTracker.length() == 0);
if (!stackMapGenerator_.machineStackTracker.pushNonGCPointers(
stackMapGenerator_.numStackArgWords)) {
return false;
}
// Identify GC-managed pointers passed on the stack.
for (WasmABIArgIter i(args); !i.done(); i++) {
ABIArg argLoc = *i;
if (argLoc.kind() == ABIArg::Stack &&
args[i.index()] == MIRType::RefOrNull) {
uint32_t offset = argLoc.offsetFromArgBase();
MOZ_ASSERT(offset < inboundStackArgBytes);
MOZ_ASSERT(offset % sizeof(void*) == 0);
stackMapGenerator_.machineStackTracker.setGCPointer(offset /
sizeof(void*));
}
}
GenerateFunctionPrologue(masm, *moduleEnv_.funcs[func_.index].typeId,
compilerEnv_.mode() == CompileMode::Tier1
? Some(func_.index)
: Nothing(),
&offsets_);
// GenerateFunctionPrologue pushes exactly one wasm::Frame's worth of
// stuff, and none of the values are GC pointers. Hence:
if (!stackMapGenerator_.machineStackTracker.pushNonGCPointers(
sizeof(Frame) / sizeof(void*))) {
return false;
}
// Initialize DebugFrame fields before the stack overflow trap so that
// we have the invariant that all observable Frames in a debugEnabled
// Module have valid DebugFrames.
if (compilerEnv_.debugEnabled()) {
#ifdef JS_CODEGEN_ARM64
static_assert(DebugFrame::offsetOfFrame() % WasmStackAlignment == 0,
"aligned");
#endif
masm.reserveStack(DebugFrame::offsetOfFrame());
if (!stackMapGenerator_.machineStackTracker.pushNonGCPointers(
DebugFrame::offsetOfFrame() / sizeof(void*))) {
return false;
}
masm.store32(
Imm32(func_.index),
Address(masm.getStackPointer(), DebugFrame::offsetOfFuncIndex()));
masm.store32(Imm32(0), Address(masm.getStackPointer(),
DebugFrame::offsetOfFlags()));
// No need to initialize cachedReturnJSValue_ or any ref-typed spilled
// register results, as they are traced if and only if a corresponding
// flag (hasCachedReturnJSValue or hasSpilledRefRegisterResult) is set.
}
// Generate a stack-overflow check and its associated stack map.
fr.checkStack(ABINonArgReg0, BytecodeOffset(func_.lineOrBytecode));
ExitStubMapVector extras;
if (!stackMapGenerator_.generateStackmapEntriesForTrapExit(args, &extras)) {
return false;
}
if (!createStackMap("stack check", extras, masm.currentOffset())) {
return false;
}
size_t reservedBytes = fr.fixedAllocSize() - masm.framePushed();
MOZ_ASSERT(0 == (reservedBytes % sizeof(void*)));
masm.reserveStack(reservedBytes);
fr.onFixedStackAllocated();
if (!stackMapGenerator_.machineStackTracker.pushNonGCPointers(
reservedBytes / sizeof(void*))) {
return false;
}
// Locals are stack allocated. Mark ref-typed ones in the stackmap
// accordingly.
for (const Local& l : localInfo_) {
// Locals that are stack arguments were already added to the stack map
// before pushing the frame.
if (l.type == MIRType::RefOrNull && !l.isStackArgument()) {
uint32_t offs = fr.localOffsetFromSp(l);
MOZ_ASSERT(0 == (offs % sizeof(void*)));
stackMapGenerator_.machineStackTracker.setGCPointer(offs /
sizeof(void*));
}
}
// Copy arguments from registers to stack.
for (WasmABIArgIter i(args); !i.done(); i++) {
if (args.isSyntheticStackResultPointerArg(i.index())) {
// If there are stack results and the pointer to stack results
// was passed in a register, store it to the stack.
if (i->argInRegister()) {
fr.storeIncomingStackResultAreaPtr(RegPtr(i->gpr()));
}
// If we're in a debug frame, copy the stack result pointer arg
// to a well-known place.
if (compilerEnv_.debugEnabled()) {
Register target = ABINonArgReturnReg0;
fr.loadIncomingStackResultAreaPtr(RegPtr(target));
size_t debugFrameOffset =
masm.framePushed() - DebugFrame::offsetOfFrame();
size_t debugStackResultsPointerOffset =
debugFrameOffset + DebugFrame::offsetOfStackResultsPointer();
masm.storePtr(target, Address(masm.getStackPointer(),
debugStackResultsPointerOffset));
}
continue;
}
if (!i->argInRegister()) {
continue;
}
Local& l = localInfo_[args.naturalIndex(i.index())];
switch (i.mirType()) {
case MIRType::Int32:
fr.storeLocalI32(RegI32(i->gpr()), l);
break;
case MIRType::Int64:
fr.storeLocalI64(RegI64(i->gpr64()), l);
break;
case MIRType::RefOrNull: {
DebugOnly<uint32_t> offs = fr.localOffsetFromSp(l);
MOZ_ASSERT(0 == (offs % sizeof(void*)));
fr.storeLocalPtr(RegPtr(i->gpr()), l);
// We should have just visited this local in the preceding loop.
MOZ_ASSERT(stackMapGenerator_.machineStackTracker.isGCPointer(
offs / sizeof(void*)));
break;
}
case MIRType::Double:
fr.storeLocalF64(RegF64(i->fpu()), l);
break;
case MIRType::Float32:
fr.storeLocalF32(RegF32(i->fpu()), l);
break;
#ifdef ENABLE_WASM_SIMD
case MIRType::Simd128:
fr.storeLocalV128(RegV128(i->fpu()), l);
break;
#endif
default:
MOZ_CRASH("Function argument type");
}
}
fr.zeroLocals(&ra);
fr.storeTlsPtr(WasmTlsReg);
if (compilerEnv_.debugEnabled()) {
insertBreakablePoint(CallSiteDesc::EnterFrame);
if (!createStackMap("debug: breakable point")) {
return false;
}
}
JitSpew(JitSpew_Codegen,
"# beginFunction: enter body with masm.framePushed = %u",
masm.framePushed());
MOZ_ASSERT(stackMapGenerator_.framePushedAtEntryToBody.isNothing());
stackMapGenerator_.framePushedAtEntryToBody.emplace(masm.framePushed());
return true;
}
void popStackReturnValues(const ResultType& resultType) {
uint32_t bytes = ABIResultIter::MeasureStackBytes(resultType);
if (bytes == 0) {
return;
}
Register target = ABINonArgReturnReg0;
Register temp = ABINonArgReturnReg1;
fr.loadIncomingStackResultAreaPtr(RegPtr(target));
fr.popStackResultsToMemory(target, bytes, temp);
}
void saveRegisterReturnValues(const ResultType& resultType) {
MOZ_ASSERT(compilerEnv_.debugEnabled());
size_t debugFrameOffset = masm.framePushed() - DebugFrame::offsetOfFrame();
size_t registerResultIdx = 0;
for (ABIResultIter i(resultType); !i.done(); i.next()) {
const ABIResult result = i.cur();
if (!result.inRegister()) {
#ifdef DEBUG
for (i.next(); !i.done(); i.next()) {
MOZ_ASSERT(!i.cur().inRegister());
}
#endif
break;
}
size_t resultOffset =
DebugFrame::offsetOfRegisterResult(registerResultIdx);
Address dest(masm.getStackPointer(), debugFrameOffset + resultOffset);
switch (result.type().kind()) {
case ValType::I32:
masm.store32(RegI32(result.gpr()), dest);
break;
case ValType::I64:
masm.store64(RegI64(result.gpr64()), dest);
break;
case ValType::F64:
masm.storeDouble(RegF64(result.fpr()), dest);
break;
case ValType::F32:
masm.storeFloat32(RegF32(result.fpr()), dest);
break;
case ValType::Ref: {
uint32_t flag =
DebugFrame::hasSpilledRegisterRefResultBitMask(registerResultIdx);
// Tell Instance::traceFrame that we have a pointer to trace.
masm.or32(Imm32(flag),
Address(masm.getStackPointer(),
debugFrameOffset + DebugFrame::offsetOfFlags()));
masm.storePtr(RegPtr(result.gpr()), dest);
break;
}
case ValType::V128:
#ifdef ENABLE_WASM_SIMD
masm.storeUnalignedSimd128(RegV128(result.fpr()), dest);
break;
#else
MOZ_CRASH("No SIMD support");
#endif
}
registerResultIdx++;
}
}
void restoreRegisterReturnValues(const ResultType& resultType) {
MOZ_ASSERT(compilerEnv_.debugEnabled());
size_t debugFrameOffset = masm.framePushed() - DebugFrame::offsetOfFrame();
size_t registerResultIdx = 0;
for (ABIResultIter i(resultType); !i.done(); i.next()) {
const ABIResult result = i.cur();
if (!result.inRegister()) {
#ifdef DEBUG
for (i.next(); !i.done(); i.next()) {
MOZ_ASSERT(!i.cur().inRegister());
}
#endif
break;
}
size_t resultOffset =
DebugFrame::offsetOfRegisterResult(registerResultIdx++);
Address src(masm.getStackPointer(), debugFrameOffset + resultOffset);
switch (result.type().kind()) {
case ValType::I32:
masm.load32(src, RegI32(result.gpr()));
break;
case ValType::I64:
masm.load64(src, RegI64(result.gpr64()));
break;
case ValType::F64:
masm.loadDouble(src, RegF64(result.fpr()));
break;
case ValType::F32:
masm.loadFloat32(src, RegF32(result.fpr()));
break;
case ValType::Ref:
masm.loadPtr(src, RegPtr(result.gpr()));
break;
case ValType::V128:
#ifdef ENABLE_WASM_SIMD
masm.loadUnalignedSimd128(src, RegV128(result.fpr()));
break;
#else
MOZ_CRASH("No SIMD support");
#endif
}
}
}
[[nodiscard]] bool endFunction() {
JitSpew(JitSpew_Codegen, "# endFunction: start of function epilogue");
// Always branch to returnLabel_.
masm.breakpoint();
// Patch the add in the prologue so that it checks against the correct
// frame size. Flush the constant pool in case it needs to be patched.
masm.flush();
// Precondition for patching.
if (masm.oom()) {
return false;
}
fr.patchCheckStack();
masm.bind(&returnLabel_);
ResultType resultType(ResultType::Vector(funcType().results()));
popStackReturnValues(resultType);
if (compilerEnv_.debugEnabled()) {
// Store and reload the return value from DebugFrame::return so that
// it can be clobbered, and/or modified by the debug trap.
saveRegisterReturnValues(resultType);
insertBreakablePoint(CallSiteDesc::Breakpoint);
if (!createStackMap("debug: breakpoint")) {
return false;
}
insertBreakablePoint(CallSiteDesc::LeaveFrame);
if (!createStackMap("debug: leave frame")) {
return false;
}
restoreRegisterReturnValues(resultType);
}
// To satisy Tls extent invariant we need to reload WasmTlsReg because
// baseline can clobber it.
fr.loadTlsPtr(WasmTlsReg);
GenerateFunctionEpilogue(masm, fr.fixedAllocSize(), &offsets_);
#if defined(JS_ION_PERF)
// FIXME - profiling code missing. No bug for this.
// Note the end of the inline code and start of the OOL code.
// gen->perfSpewer().noteEndInlineCode(masm);
#endif
JitSpew(JitSpew_Codegen, "# endFunction: end of function epilogue");
JitSpew(JitSpew_Codegen, "# endFunction: start of OOL code");
if (!generateOutOfLineCode()) {
return false;
}
offsets_.end = masm.currentOffset();
if (!fr.checkStackHeight()) {
return false;
}
JitSpew(JitSpew_Codegen, "# endFunction: end of OOL code for index %d",
(int)func_.index);
return !masm.oom();
}
//////////////////////////////////////////////////////////////////////
//
// Calls.
struct FunctionCall {
explicit FunctionCall(uint32_t lineOrBytecode)
: lineOrBytecode(lineOrBytecode),
isInterModule(false),
usesSystemAbi(false),
#ifdef JS_CODEGEN_ARM
hardFP(true),
#endif
frameAlignAdjustment(0),
stackArgAreaSize(0) {
}
uint32_t lineOrBytecode;
WasmABIArgGenerator abi;
bool isInterModule;
bool usesSystemAbi;
#ifdef JS_CODEGEN_ARM
bool hardFP;
#endif
size_t frameAlignAdjustment;
size_t stackArgAreaSize;
};
void beginCall(FunctionCall& call, UseABI useABI, InterModule interModule) {
MOZ_ASSERT_IF(useABI == UseABI::Builtin, interModule == InterModule::False);
call.isInterModule = interModule == InterModule::True;
call.usesSystemAbi = useABI == UseABI::System;
if (call.usesSystemAbi) {
// Call-outs need to use the appropriate system ABI.
#if defined(JS_CODEGEN_ARM)
call.hardFP = UseHardFpABI();
call.abi.setUseHardFp(call.hardFP);
#elif defined(JS_CODEGEN_MIPS32)
call.abi.enforceO32ABI();
#endif
} else {
#if defined(JS_CODEGEN_ARM)
MOZ_ASSERT(call.hardFP,
"All private ABIs pass FP arguments in registers");
#endif
}
// Use masm.framePushed() because the value we want here does not depend
// on the height of the frame's stack area, but the actual size of the
// allocated frame.
call.frameAlignAdjustment = ComputeByteAlignment(
masm.framePushed() + sizeof(Frame), JitStackAlignment);
}
void endCall(FunctionCall& call, size_t stackSpace) {
size_t adjustment = call.stackArgAreaSize + call.frameAlignAdjustment;
fr.freeArgAreaAndPopBytes(adjustment, stackSpace);
MOZ_ASSERT(
stackMapGenerator_.framePushedExcludingOutboundCallArgs.isSome());
stackMapGenerator_.framePushedExcludingOutboundCallArgs.reset();
if (call.isInterModule) {
fr.loadTlsPtr(WasmTlsReg);
masm.loadWasmPinnedRegsFromTls();
masm.switchToWasmTlsRealm(ABINonArgReturnReg0, ABINonArgReturnReg1);
} else if (call.usesSystemAbi) {
// On x86 there are no pinned registers, so don't waste time
// reloading the Tls.
#ifndef JS_CODEGEN_X86
fr.loadTlsPtr(WasmTlsReg);
masm.loadWasmPinnedRegsFromTls();
#endif
}
}
void startCallArgs(size_t stackArgAreaSizeUnaligned, FunctionCall* call) {
size_t stackArgAreaSizeAligned =
AlignStackArgAreaSize(stackArgAreaSizeUnaligned);
MOZ_ASSERT(stackArgAreaSizeUnaligned <= stackArgAreaSizeAligned);
// Record the masm.framePushed() value at this point, before we push args
// for the call, but including the alignment space placed above the args.
// This defines the lower limit of the stackmap that will be created for
// this call.
MOZ_ASSERT(
stackMapGenerator_.framePushedExcludingOutboundCallArgs.isNothing());
stackMapGenerator_.framePushedExcludingOutboundCallArgs.emplace(
// However much we've pushed so far
masm.framePushed() +
// Extra space we'll push to get the frame aligned
call->frameAlignAdjustment +
// Extra space we'll push to get the outbound arg area 16-aligned
(stackArgAreaSizeAligned - stackArgAreaSizeUnaligned));
call->stackArgAreaSize = stackArgAreaSizeAligned;
size_t adjustment = call->stackArgAreaSize + call->frameAlignAdjustment;
fr.allocArgArea(adjustment);
}
const ABIArg reservePointerArgument(FunctionCall* call) {
return call->abi.next(MIRType::Pointer);
}
// TODO / OPTIMIZE (Bug 1316821): Note passArg is used only in one place.
// (Or it was, until Luke wandered through, but that can be fixed again.)
// I'm not saying we should manually inline it, but we could hoist the
// dispatch into the caller and have type-specific implementations of
// passArg: passArgI32(), etc. Then those might be inlined, at least in PGO
// builds.
//
// The bulk of the work here (60%) is in the next() call, though.
//
// Notably, since next() is so expensive, StackArgAreaSizeUnaligned()
// becomes expensive too.
//
// Somehow there could be a trick here where the sequence of argument types
// (read from the input stream) leads to a cached entry for
// StackArgAreaSizeUnaligned() and for how to pass arguments...
//
// But at least we could reduce the cost of StackArgAreaSizeUnaligned() by
// first reading the argument types into a (reusable) vector, then we have
// the outgoing size at low cost, and then we can pass args based on the
// info we read.
void passArg(ValType type, const Stk& arg, FunctionCall* call) {
switch (type.kind()) {
case ValType::I32: {
ABIArg argLoc = call->abi.next(MIRType::Int32);
if (argLoc.kind() == ABIArg::Stack) {
ScratchI32 scratch(*this);
loadI32(arg, scratch);
masm.store32(scratch, Address(masm.getStackPointer(),
argLoc.offsetFromArgBase()));
} else {
loadI32(arg, RegI32(argLoc.gpr()));
}
break;
}
case ValType::I64: {
ABIArg argLoc = call->abi.next(MIRType::Int64);
if (argLoc.kind() == ABIArg::Stack) {
ScratchI32 scratch(*this);
#ifdef JS_PUNBOX64
loadI64(arg, fromI32(scratch));
masm.storePtr(scratch, Address(masm.getStackPointer(),
argLoc.offsetFromArgBase()));
#else
loadI64Low(arg, scratch);
masm.store32(scratch, LowWord(Address(masm.getStackPointer(),
argLoc.offsetFromArgBase())));
loadI64High(arg, scratch);
masm.store32(scratch, HighWord(Address(masm.getStackPointer(),
argLoc.offsetFromArgBase())));
#endif
} else {
loadI64(arg, RegI64(argLoc.gpr64()));
}
break;
}
case ValType::V128: {
#ifdef ENABLE_WASM_SIMD
ABIArg argLoc = call->abi.next(MIRType::Simd128);
switch (argLoc.kind()) {
case ABIArg::Stack: {
ScratchV128 scratch(*this);
loadV128(arg, scratch);
masm.storeUnalignedSimd128(
(RegV128)scratch,
Address(masm.getStackPointer(), argLoc.offsetFromArgBase()));
break;
}
case ABIArg::GPR: {
MOZ_CRASH("Unexpected parameter passing discipline");
}
case ABIArg::FPU: {
loadV128(arg, RegV128(argLoc.fpu()));
break;
}
# if defined(JS_CODEGEN_REGISTER_PAIR)
case ABIArg::GPR_PAIR: {
MOZ_CRASH("Unexpected parameter passing discipline");
}
# endif
case ABIArg::Uninitialized:
MOZ_CRASH("Uninitialized ABIArg kind");
}
break;
#else
MOZ_CRASH("No SIMD support");
#endif
}
case ValType::F64: {
ABIArg argLoc = call->abi.next(MIRType::Double);
switch (argLoc.kind()) {
case ABIArg::Stack: {
ScratchF64 scratch(*this);
loadF64(arg, scratch);
masm.storeDouble(scratch, Address(masm.getStackPointer(),
argLoc.offsetFromArgBase()));
break;
}
#if defined(JS_CODEGEN_REGISTER_PAIR)
case ABIArg::GPR_PAIR: {
# if defined(JS_CODEGEN_ARM)
ScratchF64 scratch(*this);
loadF64(arg, scratch);
masm.ma_vxfer(scratch, argLoc.evenGpr(), argLoc.oddGpr());
break;
# elif defined(JS_CODEGEN_MIPS32)
ScratchF64 scratch(*this);
loadF64(arg, scratch);
MOZ_ASSERT(MOZ_LITTLE_ENDIAN());
masm.moveFromDoubleLo(scratch, argLoc.evenGpr());
masm.moveFromDoubleHi(scratch, argLoc.oddGpr());
break;
# else
MOZ_CRASH("BaseCompiler platform hook: passArg F64 pair");
# endif
}
#endif
case ABIArg::FPU: {
loadF64(arg, RegF64(argLoc.fpu()));
break;
}
case ABIArg::GPR: {
MOZ_CRASH("Unexpected parameter passing discipline");
}
case ABIArg::Uninitialized:
MOZ_CRASH("Uninitialized ABIArg kind");
}
break;
}
case ValType::F32: {
ABIArg argLoc = call->abi.next(MIRType::Float32);
switch (argLoc.kind()) {
case ABIArg::Stack: {
ScratchF32 scratch(*this);
loadF32(arg, scratch);
masm.storeFloat32(scratch, Address(masm.getStackPointer(),
argLoc.offsetFromArgBase()));
break;
}
case ABIArg::GPR: {
ScratchF32 scratch(*this);
loadF32(arg, scratch);
masm.moveFloat32ToGPR(scratch, argLoc.gpr());
break;
}
case ABIArg::FPU: {
loadF32(arg, RegF32(argLoc.fpu()));
break;
}
#if defined(JS_CODEGEN_REGISTER_PAIR)
case ABIArg::GPR_PAIR: {
MOZ_CRASH("Unexpected parameter passing discipline");
}
#endif
case ABIArg::Uninitialized:
MOZ_CRASH("Uninitialized ABIArg kind");
}
break;
}
case ValType::Ref: {
ABIArg argLoc = call->abi.next(MIRType::RefOrNull);
if (argLoc.kind() == ABIArg::Stack) {
ScratchPtr scratch(*this);
loadRef(arg, scratch);
masm.storePtr(scratch, Address(masm.getStackPointer(),
argLoc.offsetFromArgBase()));
} else {
loadRef(arg, RegPtr(argLoc.gpr()));
}
break;
}
}
}
CodeOffset callDefinition(uint32_t funcIndex, const FunctionCall& call) {
CallSiteDesc desc(call.lineOrBytecode, CallSiteDesc::Func);
return masm.call(desc, funcIndex);
}
CodeOffset callSymbolic(SymbolicAddress callee, const FunctionCall& call) {
CallSiteDesc desc(call.lineOrBytecode, CallSiteDesc::Symbolic);
return masm.call(desc, callee);
}
// Precondition: sync()
CodeOffset callIndirect(uint32_t funcTypeIndex, uint32_t tableIndex,
const Stk& indexVal, const FunctionCall& call) {
const TypeIdDesc& funcTypeId = moduleEnv_.typeIds[funcTypeIndex];
MOZ_ASSERT(funcTypeId.kind() != TypeIdDescKind::None);
const TableDesc& table = moduleEnv_.tables[tableIndex];
loadI32(indexVal, RegI32(WasmTableCallIndexReg));
CallSiteDesc desc(call.lineOrBytecode, CallSiteDesc::Dynamic);
CalleeDesc callee = CalleeDesc::wasmTable(table, funcTypeId);
return masm.wasmCallIndirect(desc, callee, NeedsBoundsCheck(true));
}
// Precondition: sync()
CodeOffset callImport(unsigned globalDataOffset, const FunctionCall& call) {
CallSiteDesc desc(call.lineOrBytecode, CallSiteDesc::Dynamic);
CalleeDesc callee = CalleeDesc::import(globalDataOffset);
return masm.wasmCallImport(desc, callee);
}
CodeOffset builtinCall(SymbolicAddress builtin, const FunctionCall& call) {
return callSymbolic(builtin, call);
}
CodeOffset builtinInstanceMethodCall(const SymbolicAddressSignature& builtin,
const ABIArg& instanceArg,
const FunctionCall& call) {
// Builtin method calls assume the TLS register has been set.
fr.loadTlsPtr(WasmTlsReg);
CallSiteDesc desc(call.lineOrBytecode, CallSiteDesc::Symbolic);
return masm.wasmCallBuiltinInstanceMethod(
desc, instanceArg, builtin.identity, builtin.failureMode);
}
[[nodiscard]] bool pushCallResults(const FunctionCall& call, ResultType type,
const StackResultsLoc& loc) {
#if defined(JS_CODEGEN_ARM)
// pushResults currently bypasses special case code in captureReturnedFxx()
// that converts GPR results to FPR results for systemABI+softFP. If we
// ever start using that combination for calls we need more code. This
// assert is stronger than we need - we only care about results in return
// registers - but that's OK.
MOZ_ASSERT(!call.usesSystemAbi || call.hardFP);
#endif
return pushResults(type, fr.stackResultsBase(loc.bytes()));
}
//////////////////////////////////////////////////////////////////////
//
// Sundry low-level code generators.
// The compiler depends on moveImm32() clearing the high bits of a 64-bit
// register on 64-bit systems except MIPS64 where high bits are sign extended
// from lower bits.
void moveImm32(int32_t v, RegI32 dest) { masm.move32(Imm32(v), dest); }
void moveImm64(int64_t v, RegI64 dest) { masm.move64(Imm64(v), dest); }
void moveImmRef(intptr_t v, RegPtr dest) { masm.movePtr(ImmWord(v), dest); }
void moveImmF32(float f, RegF32 dest) { masm.loadConstantFloat32(f, dest); }
void moveImmF64(double d, RegF64 dest) { masm.loadConstantDouble(d, dest); }
[[nodiscard]] bool addInterruptCheck() {
ScratchI32 tmp(*this);
fr.loadTlsPtr(tmp);
masm.wasmInterruptCheck(tmp, bytecodeOffset());
return createStackMap("addInterruptCheck");
}
void jumpTable(const LabelVector& labels, Label* theTable) {
// Flush constant pools to ensure that the table is never interrupted by
// constant pool entries.
masm.flush();
#if defined(JS_CODEGEN_ARM) || defined(JS_CODEGEN_ARM64)
// Prevent nop sequences to appear in the jump table.
AutoForbidNops afn(&masm);
#endif
masm.bind(theTable);
for (uint32_t i = 0; i < labels.length(); i++) {
CodeLabel cl;
masm.writeCodePointer(&cl);
cl.target()->bind(labels[i].offset());
masm.addCodeLabel(cl);
}
}
void tableSwitch(Label* theTable, RegI32 switchValue, Label* dispatchCode) {
masm.bind(dispatchCode);
#if defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_X86)
ScratchI32 scratch(*this);
CodeLabel tableCl;
masm.mov(&tableCl, scratch);
tableCl.target()->bind(theTable->offset());
masm.addCodeLabel(tableCl);
masm.jmp(Operand(scratch, switchValue, ScalePointer));
#elif defined(JS_CODEGEN_ARM)
// Flush constant pools: offset must reflect the distance from the MOV
// to the start of the table; as the address of the MOV is given by the
// label, nothing must come between the bind() and the ma_mov().
AutoForbidPoolsAndNops afp(&masm,
/* number of instructions in scope = */ 5);
ScratchI32 scratch(*this);
// Compute the offset from the ma_mov instruction to the jump table.
Label here;
masm.bind(&here);
uint32_t offset = here.offset() - theTable->offset();
// Read PC+8
masm.ma_mov(pc, scratch);
// ARM scratch register is required by ma_sub.
ScratchRegisterScope arm_scratch(*this);
// Compute the absolute table base pointer into `scratch`, offset by 8
// to account for the fact that ma_mov read PC+8.
masm.ma_sub(Imm32(offset + 8), scratch, arm_scratch);
// Jump indirect via table element.
masm.ma_ldr(DTRAddr(scratch, DtrRegImmShift(switchValue, LSL, 2)), pc,
Offset, Assembler::Always);
#elif defined(JS_CODEGEN_MIPS32) || defined(JS_CODEGEN_MIPS64)
ScratchI32 scratch(*this);
CodeLabel tableCl;
masm.ma_li(scratch, &tableCl);
tableCl.target()->bind(theTable->offset());
masm.addCodeLabel(tableCl);
masm.branchToComputedAddress(BaseIndex(scratch, switchValue, ScalePointer));
#elif defined(JS_CODEGEN_ARM64)
AutoForbidPoolsAndNops afp(&masm,
/* number of instructions in scope = */ 4);
ScratchI32 scratch(*this);
ARMRegister s(scratch, 64);
ARMRegister v(switchValue, 64);
masm.Adr(s, theTable);
masm.Add(s, s, Operand(v, vixl::LSL, 3));
masm.Ldr(s, MemOperand(s, 0));
masm.Br(s);
#else
MOZ_CRASH("BaseCompiler platform hook: tableSwitch");
#endif
}
RegI32 captureReturnedI32() {
RegI32 r = RegI32(ReturnReg);
MOZ_ASSERT(isAvailableI32(r));
needI32(r);
#if defined(JS_CODEGEN_X64)
if (JitOptions.spectreIndexMasking) {
masm.movl(r, r);
}
#endif
return r;
}
RegI64 captureReturnedI64() {
RegI64 r = RegI64(ReturnReg64);
MOZ_ASSERT(isAvailableI64(r));
needI64(r);
return r;
}
RegF32 captureReturnedF32(const FunctionCall& call) {
RegF32 r = RegF32(ReturnFloat32Reg);
MOZ_ASSERT(isAvailableF32(r));
needF32(r);
#if defined(JS_CODEGEN_ARM)
if (call.usesSystemAbi && !call.hardFP) {
masm.ma_vxfer(ReturnReg, r);
}
#endif
return r;
}
RegF64 captureReturnedF64(const FunctionCall& call) {
RegF64 r = RegF64(ReturnDoubleReg);
MOZ_ASSERT(isAvailableF64(r));
needF64(r);
#if defined(JS_CODEGEN_ARM)
if (call.usesSystemAbi && !call.hardFP) {
masm.ma_vxfer(ReturnReg64.low, ReturnReg64.high, r);
}
#endif
return r;
}
#ifdef ENABLE_WASM_SIMD
RegV128 captureReturnedV128(const FunctionCall& call) {
RegV128 r = RegV128(ReturnSimd128Reg);
MOZ_ASSERT(isAvailableV128(r));
needV128(r);
return r;
}
#endif
RegPtr captureReturnedRef() {
RegPtr r = RegPtr(ReturnReg);
MOZ_ASSERT(isAvailableRef(r));
needRef(r);
return r;
}
void checkDivideByZeroI32(RegI32 rhs) {
Label nonZero;
masm.branchTest32(Assembler::NonZero, rhs, rhs, &nonZero);
trap(Trap::IntegerDivideByZero);
masm.bind(&nonZero);
}
void checkDivideByZeroI64(RegI64 r) {
Label nonZero;
ScratchI32 scratch(*this);
masm.branchTest64(Assembler::NonZero, r, r, scratch, &nonZero);
trap(Trap::IntegerDivideByZero);
masm.bind(&nonZero);
}
void checkDivideSignedOverflowI32(RegI32 rhs, RegI32 srcDest, Label* done,
bool zeroOnOverflow) {
Label notMin;
masm.branch32(Assembler::NotEqual, srcDest, Imm32(INT32_MIN), ¬Min);
if (zeroOnOverflow) {
masm.branch32(Assembler::NotEqual, rhs, Imm32(-1), ¬Min);
moveImm32(0, srcDest);
masm.jump(done);
} else {
masm.branch32(Assembler::NotEqual,