js/src/wasm/WasmBaselineCompile.cpp
author Victor Porof <vporof@mozilla.com>
Sat, 18 Jan 2020 07:54:59 +0000
changeset 510647 7cdabf0e8c3b2eb13b44c68e934ab3eaab913410
parent 510243 06590dc45a945733fb550325875705a739d693a3
permissions -rw-r--r--
Bug 1608676 - Add multithreaded scenarios to RKV function calls fuzzing, r=truber Differential Revision: https://phabricator.services.mozilla.com/D59620

/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
 * vim: set ts=8 sts=2 et sw=2 tw=80:
 *
 * Copyright 2016 Mozilla Foundation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/*
 * [SMDOC] WebAssembly baseline compiler (RabaldrMonkey)
 *
 * General assumptions for 32-bit vs 64-bit code:
 *
 * - A 32-bit register can be extended in-place to a 64-bit register on 64-bit
 *   systems.
 *
 * - Code that knows that Register64 has a '.reg' member on 64-bit systems and
 *   '.high' and '.low' members on 32-bit systems, or knows the implications
 *   thereof, is #ifdef JS_PUNBOX64.  All other code is #if(n)?def JS_64BIT.
 *
 *
 * Coding standards:
 *
 * - In "small" code generating functions (eg emitMultiplyF64, emitQuotientI32,
 *   and surrounding functions; most functions fall into this class) where the
 *   meaning is obvious:
 *
 *   - if there is a single source + destination register, it is called 'r'
 *   - if there is one source and a different destination, they are called 'rs'
 *     and 'rd'
 *   - if there is one source + destination register and another source register
 *     they are called 'r' and 'rs'
 *   - if there are two source registers and a destination register they are
 *     called 'rs0', 'rs1', and 'rd'.
 *
 * - Generic temp registers are named /temp[0-9]?/ not /tmp[0-9]?/.
 *
 * - Registers can be named non-generically for their function ('rp' for the
 *   'pointer' register and 'rv' for the 'value' register are typical) and those
 *   names may or may not have an 'r' prefix.
 *
 * - "Larger" code generating functions make their own rules.
 *
 *
 * General status notes:
 *
 * "FIXME" indicates a known or suspected bug.  Always has a bug#.
 *
 * "TODO" indicates an opportunity for a general improvement, with an additional
 * tag to indicate the area of improvement.  Usually has a bug#.
 *
 * There are lots of machine dependencies here but they are pretty well isolated
 * to a segment of the compiler.  Many dependencies will eventually be factored
 * into the MacroAssembler layer and shared with other code generators.
 *
 *
 * High-value compiler performance improvements:
 *
 * - (Bug 1316802) The specific-register allocator (the needI32(r), needI64(r)
 *   etc methods) can avoid syncing the value stack if the specific register is
 *   in use but there is a free register to shuffle the specific register into.
 *   (This will also improve the generated code.)  The sync happens often enough
 *   here to show up in profiles, because it is triggered by integer multiply
 *   and divide.
 *
 *
 * High-value code generation improvements:
 *
 * - (Bug 1316804) brTable pessimizes by always dispatching to code that pops
 *   the stack and then jumps to the code for the target case.  If no cleanup is
 *   needed we could just branch conditionally to the target; if the same amount
 *   of cleanup is needed for all cases then the cleanup can be done before the
 *   dispatch.  Both are highly likely.
 *
 * - (Bug 1316806) Register management around calls: At the moment we sync the
 *   value stack unconditionally (this is simple) but there are probably many
 *   common cases where we could instead save/restore live caller-saves
 *   registers and perform parallel assignment into argument registers.  This
 *   may be important if we keep some locals in registers.
 *
 * - (Bug 1316808) Allocate some locals to registers on machines where there are
 *   enough registers.  This is probably hard to do well in a one-pass compiler
 *   but it might be that just keeping register arguments and the first few
 *   locals in registers is a viable strategy; another (more general) strategy
 *   is caching locals in registers in straight-line code.  Such caching could
 *   also track constant values in registers, if that is deemed valuable.  A
 *   combination of techniques may be desirable: parameters and the first few
 *   locals could be cached on entry to the function but not statically assigned
 *   to registers throughout.
 *
 *   (On a large corpus of code it should be possible to compute, for every
 *   signature comprising the types of parameters and locals, and using a static
 *   weight for loops, a list in priority order of which parameters and locals
 *   that should be assigned to registers.  Or something like that.  Wasm makes
 *   this simple.  Static assignments are desirable because they are not flushed
 *   to memory by the pre-block sync() call.)
 */

#include "wasm/WasmBaselineCompile.h"

#include "mozilla/MathAlgorithms.h"
#include "mozilla/Maybe.h"

#include <algorithm>
#include <utility>

#include "jit/AtomicOp.h"
#include "jit/IonTypes.h"
#include "jit/JitAllocPolicy.h"
#include "jit/Label.h"
#include "jit/MacroAssembler.h"
#include "jit/MIR.h"
#include "jit/RegisterAllocator.h"
#include "jit/Registers.h"
#include "jit/RegisterSets.h"
#if defined(JS_CODEGEN_ARM)
#  include "jit/arm/Assembler-arm.h"
#endif
#if defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_X86)
#  include "jit/x86-shared/Architecture-x86-shared.h"
#  include "jit/x86-shared/Assembler-x86-shared.h"
#endif
#if defined(JS_CODEGEN_MIPS32)
#  include "jit/mips-shared/Assembler-mips-shared.h"
#  include "jit/mips32/Assembler-mips32.h"
#endif
#if defined(JS_CODEGEN_MIPS64)
#  include "jit/mips-shared/Assembler-mips-shared.h"
#  include "jit/mips64/Assembler-mips64.h"
#endif

#include "util/Memory.h"
#include "wasm/WasmGC.h"
#include "wasm/WasmGenerator.h"
#include "wasm/WasmInstance.h"
#include "wasm/WasmOpIter.h"
#include "wasm/WasmSignalHandlers.h"
#include "wasm/WasmStubs.h"
#include "wasm/WasmValidate.h"

#include "jit/MacroAssembler-inl.h"

using mozilla::DebugOnly;
using mozilla::FloorLog2;
using mozilla::IsPowerOfTwo;
using mozilla::Maybe;

namespace js {
namespace wasm {

using namespace js::jit;

using HandleNaNSpecially = bool;
using InvertBranch = bool;
using IsKnownNotZero = bool;
using IsUnsigned = bool;
using NeedsBoundsCheck = bool;
using WantResult = bool;
using ZeroOnOverflow = bool;

class BaseStackFrame;

// Two flags, useABI and interModule, control how calls are made.
//
// UseABI::Wasm implies that the Tls/Heap/Global registers are nonvolatile,
// except when InterModule::True is also set, when they are volatile.
//
// UseABI::Builtin implies that the Tls/Heap/Global registers are volatile.
// In this case, we require InterModule::False.  The calling convention
// is otherwise like UseABI::Wasm.
//
// UseABI::System implies that the Tls/Heap/Global registers are volatile.
// Additionally, the parameter passing mechanism may be slightly different from
// the UseABI::Wasm convention.
//
// When the Tls/Heap/Global registers are not volatile, the baseline compiler
// will restore the Tls register from its save slot before the call, since the
// baseline compiler uses the Tls register for other things.
//
// When those registers are volatile, the baseline compiler will reload them
// after the call (it will restore the Tls register from the save slot and load
// the other two from the Tls data).

enum class UseABI { Wasm, Builtin, System };
enum class InterModule { False = false, True = true };

#if defined(JS_CODEGEN_NONE)
#  define RABALDR_SCRATCH_I32
#  define RABALDR_SCRATCH_F32
#  define RABALDR_SCRATCH_F64

static const Register RabaldrScratchI32 = Register::Invalid();
static const FloatRegister RabaldrScratchF32 = InvalidFloatReg;
static const FloatRegister RabaldrScratchF64 = InvalidFloatReg;
#endif

#ifdef JS_CODEGEN_ARM64
#  define RABALDR_CHUNKY_STACK
#  define RABALDR_SCRATCH_I32
#  define RABALDR_SCRATCH_F32
#  define RABALDR_SCRATCH_F64
#  define RABALDR_SCRATCH_F32_ALIASES_F64

static const Register RabaldrScratchI32 = Register::FromCode(15);

// Note, the float scratch regs cannot be registers that are used for parameter
// passing in any ABI we use.  Argregs tend to be low-numbered; register 30
// should be safe.

static constexpr FloatRegister RabaldrScratchF32 =
    FloatRegister(30, FloatRegisters::Single);
static constexpr FloatRegister RabaldrScratchF64 =
    FloatRegister(30, FloatRegisters::Double);

static_assert(RabaldrScratchF32 != ScratchFloat32Reg, "Too busy");
static_assert(RabaldrScratchF64 != ScratchDoubleReg, "Too busy");
#endif

#ifdef JS_CODEGEN_X86
// The selection of EBX here steps gingerly around: the need for EDX
// to be allocatable for multiply/divide; ECX to be allocatable for
// shift/rotate; EAX (= ReturnReg) to be allocatable as the result
// register; EBX not being one of the WasmTableCall registers; and
// needing a temp register for load/store that has a single-byte
// persona.
//
// The compiler assumes that RabaldrScratchI32 has a single-byte
// persona.  Code for 8-byte atomic operations assumes that
// RabaldrScratchI32 is in fact ebx.

#  define RABALDR_SCRATCH_I32
static const Register RabaldrScratchI32 = ebx;

#  define RABALDR_INT_DIV_I64_CALLOUT
#endif

#ifdef JS_CODEGEN_ARM
// We use our own scratch register, because the macro assembler uses
// the regular scratch register(s) pretty liberally.  We could
// work around that in several cases but the mess does not seem
// worth it yet.  CallTempReg2 seems safe.

#  define RABALDR_SCRATCH_I32
static const Register RabaldrScratchI32 = CallTempReg2;

#  define RABALDR_INT_DIV_I64_CALLOUT
#  define RABALDR_I64_TO_FLOAT_CALLOUT
#  define RABALDR_FLOAT_TO_I64_CALLOUT
#endif

#ifdef JS_CODEGEN_MIPS32
#  define RABALDR_SCRATCH_I32
static const Register RabaldrScratchI32 = CallTempReg2;

#  define RABALDR_INT_DIV_I64_CALLOUT
#  define RABALDR_I64_TO_FLOAT_CALLOUT
#  define RABALDR_FLOAT_TO_I64_CALLOUT
#endif

#ifdef JS_CODEGEN_MIPS64
#  define RABALDR_SCRATCH_I32
static const Register RabaldrScratchI32 = CallTempReg2;
#endif

#ifdef RABALDR_SCRATCH_F32_ALIASES_F64
#  if !defined(RABALDR_SCRATCH_F32) || !defined(RABALDR_SCRATCH_F64)
#    error "Bad configuration"
#  endif
#endif

template <MIRType t>
struct RegTypeOf {
  static_assert(t == MIRType::Float32 || t == MIRType::Double,
                "Float mask type");
};

template <>
struct RegTypeOf<MIRType::Float32> {
  static constexpr RegTypeName value = RegTypeName::Float32;
};
template <>
struct RegTypeOf<MIRType::Double> {
  static constexpr RegTypeName value = RegTypeName::Float64;
};

// The strongly typed register wrappers are especially useful to distinguish
// float registers from double registers, but they also clearly distinguish
// 32-bit registers from 64-bit register pairs on 32-bit systems.

struct RegI32 : public Register {
  RegI32() : Register(Register::Invalid()) {}
  explicit RegI32(Register reg) : Register(reg) {}
  bool isValid() const { return *this != Invalid(); }
  bool isInvalid() const { return !isValid(); }
  static RegI32 Invalid() { return RegI32(Register::Invalid()); }
};

struct RegI64 : public Register64 {
  RegI64() : Register64(Register64::Invalid()) {}
  explicit RegI64(Register64 reg) : Register64(reg) {}
  bool isValid() const { return *this != Invalid(); }
  bool isInvalid() const { return !isValid(); }
  static RegI64 Invalid() { return RegI64(Register64::Invalid()); }
};

struct RegPtr : public Register {
  RegPtr() : Register(Register::Invalid()) {}
  explicit RegPtr(Register reg) : Register(reg) {}
  bool isValid() const { return *this != Invalid(); }
  bool isInvalid() const { return !isValid(); }
  static RegPtr Invalid() { return RegPtr(Register::Invalid()); }
};

struct RegF32 : public FloatRegister {
  RegF32() : FloatRegister() {}
  explicit RegF32(FloatRegister reg) : FloatRegister(reg) {}
  bool isValid() const { return *this != Invalid(); }
  bool isInvalid() const { return !isValid(); }
  static RegF32 Invalid() { return RegF32(InvalidFloatReg); }
};

struct RegF64 : public FloatRegister {
  RegF64() : FloatRegister() {}
  explicit RegF64(FloatRegister reg) : FloatRegister(reg) {}
  bool isValid() const { return *this != Invalid(); }
  bool isInvalid() const { return !isValid(); }
  static RegF64 Invalid() { return RegF64(InvalidFloatReg); }
};

struct AnyReg {
  union {
    RegI32 i32_;
    RegI64 i64_;
    RegPtr ref_;
    RegF32 f32_;
    RegF64 f64_;
  };

  enum { I32, I64, REF, F32, F64 } tag;

  explicit AnyReg(RegI32 r) {
    tag = I32;
    i32_ = r;
  }
  explicit AnyReg(RegI64 r) {
    tag = I64;
    i64_ = r;
  }
  explicit AnyReg(RegF32 r) {
    tag = F32;
    f32_ = r;
  }
  explicit AnyReg(RegF64 r) {
    tag = F64;
    f64_ = r;
  }
  explicit AnyReg(RegPtr r) {
    tag = REF;
    ref_ = r;
  }

  RegI32 i32() const {
    MOZ_ASSERT(tag == I32);
    return i32_;
  }
  RegI64 i64() const {
    MOZ_ASSERT(tag == I64);
    return i64_;
  }
  RegF32 f32() const {
    MOZ_ASSERT(tag == F32);
    return f32_;
  }
  RegF64 f64() const {
    MOZ_ASSERT(tag == F64);
    return f64_;
  }
  RegPtr ref() const {
    MOZ_ASSERT(tag == REF);
    return ref_;
  }

  AnyRegister any() const {
    switch (tag) {
      case F32:
        return AnyRegister(f32_);
      case F64:
        return AnyRegister(f64_);
      case I32:
        return AnyRegister(i32_);
      case I64:
#ifdef JS_PUNBOX64
        return AnyRegister(i64_.reg);
#else
        // The compiler is written so that this is never needed: any() is
        // called on arbitrary registers for asm.js but asm.js does not have
        // 64-bit ints.  For wasm, any() is called on arbitrary registers
        // only on 64-bit platforms.
        MOZ_CRASH("AnyReg::any() on 32-bit platform");
#endif
      case REF:
        MOZ_CRASH("AnyReg::any() not implemented for ref types");
      default:
        MOZ_CRASH();
    }
    // Work around GCC 5 analysis/warning bug.
    MOZ_CRASH("AnyReg::any(): impossible case");
  }
};

// Platform-specific registers.
//
// All platforms must define struct SpecificRegs.  All 32-bit platforms must
// have an abiReturnRegI64 member in that struct.

#if defined(JS_CODEGEN_X64)
struct SpecificRegs {
  RegI32 eax, ecx, edx, edi, esi;
  RegI64 rax, rcx, rdx;

  SpecificRegs()
      : eax(RegI32(js::jit::eax)),
        ecx(RegI32(js::jit::ecx)),
        edx(RegI32(js::jit::edx)),
        edi(RegI32(js::jit::edi)),
        esi(RegI32(js::jit::esi)),
        rax(RegI64(Register64(js::jit::rax))),
        rcx(RegI64(Register64(js::jit::rcx))),
        rdx(RegI64(Register64(js::jit::rdx))) {}
};
#elif defined(JS_CODEGEN_X86)
struct SpecificRegs {
  RegI32 eax, ecx, edx, edi, esi;
  RegI64 ecx_ebx, edx_eax, abiReturnRegI64;

  SpecificRegs()
      : eax(RegI32(js::jit::eax)),
        ecx(RegI32(js::jit::ecx)),
        edx(RegI32(js::jit::edx)),
        edi(RegI32(js::jit::edi)),
        esi(RegI32(js::jit::esi)),
        ecx_ebx(RegI64(Register64(js::jit::ecx, js::jit::ebx))),
        edx_eax(RegI64(Register64(js::jit::edx, js::jit::eax))),
        abiReturnRegI64(edx_eax) {}
};
#elif defined(JS_CODEGEN_ARM)
struct SpecificRegs {
  RegI64 abiReturnRegI64;

  SpecificRegs() : abiReturnRegI64(ReturnReg64) {}
};
#elif defined(JS_CODEGEN_ARM64)
struct SpecificRegs {};
#elif defined(JS_CODEGEN_MIPS32)
struct SpecificRegs {
  RegI64 abiReturnRegI64;

  SpecificRegs() : abiReturnRegI64(ReturnReg64) {}
};
#elif defined(JS_CODEGEN_MIPS64)
struct SpecificRegs {};
#else
struct SpecificRegs {
#  ifndef JS_64BIT
  RegI64 abiReturnRegI64;
#  endif

  SpecificRegs() { MOZ_CRASH("BaseCompiler porting interface: SpecificRegs"); }
};
#endif

class BaseCompilerInterface {
 public:
  // Spill all spillable registers.
  //
  // TODO / OPTIMIZE (Bug 1316802): It's possible to do better here by
  // spilling only enough registers to satisfy current needs.
  virtual void sync() = 0;
  virtual void saveTempPtr(RegPtr r) = 0;
  virtual void restoreTempPtr(RegPtr r) = 0;
};

// Register allocator.

class BaseRegAlloc {
  // Notes on float register allocation.
  //
  // The general rule in SpiderMonkey is that float registers can alias double
  // registers, but there are predicates to handle exceptions to that rule:
  // hasUnaliasedDouble() and hasMultiAlias().  The way aliasing actually
  // works is platform dependent and exposed through the aliased(n, &r)
  // predicate, etc.
  //
  //  - hasUnaliasedDouble(): on ARM VFPv3-D32 there are double registers that
  //    cannot be treated as float.
  //  - hasMultiAlias(): on ARM and MIPS a double register aliases two float
  //    registers.
  //
  // On some platforms (x86, x64, ARM64) but not all (ARM)
  // ScratchFloat32Register is the same as ScratchDoubleRegister.
  //
  // It's a basic invariant of the AllocatableRegisterSet that it deals
  // properly with aliasing of registers: if s0 or s1 are allocated then d0 is
  // not allocatable; if s0 and s1 are freed individually then d0 becomes
  // allocatable.

  BaseCompilerInterface* bc;
  AllocatableGeneralRegisterSet availGPR;
  AllocatableFloatRegisterSet availFPU;
#ifdef DEBUG
  AllocatableGeneralRegisterSet
      allGPR;  // The registers available to the compiler
  AllocatableFloatRegisterSet
      allFPU;  //   after removing ScratchReg, HeapReg, etc
  uint32_t scratchTaken;
#endif
#ifdef JS_CODEGEN_X86
  AllocatableGeneralRegisterSet singleByteRegs;
#endif

  bool hasGPR() { return !availGPR.empty(); }

  bool hasGPR64() {
#ifdef JS_PUNBOX64
    return !availGPR.empty();
#else
    if (availGPR.empty()) {
      return false;
    }
    Register r = allocGPR();
    bool available = !availGPR.empty();
    freeGPR(r);
    return available;
#endif
  }

  template <MIRType t>
  bool hasFPU() {
    return availFPU.hasAny<RegTypeOf<t>::value>();
  }

  bool isAvailableGPR(Register r) { return availGPR.has(r); }

  bool isAvailableFPU(FloatRegister r) { return availFPU.has(r); }

  void allocGPR(Register r) {
    MOZ_ASSERT(isAvailableGPR(r));
    availGPR.take(r);
  }

  Register allocGPR() {
    MOZ_ASSERT(hasGPR());
    return availGPR.takeAny();
  }

  void allocInt64(Register64 r) {
#ifdef JS_PUNBOX64
    allocGPR(r.reg);
#else
    allocGPR(r.low);
    allocGPR(r.high);
#endif
  }

  Register64 allocInt64() {
    MOZ_ASSERT(hasGPR64());
#ifdef JS_PUNBOX64
    return Register64(availGPR.takeAny());
#else
    Register high = availGPR.takeAny();
    Register low = availGPR.takeAny();
    return Register64(high, low);
#endif
  }

#ifdef JS_CODEGEN_ARM
  // r12 is normally the ScratchRegister and r13 is always the stack pointer,
  // so the highest possible pair has r10 as the even-numbered register.

  static constexpr uint32_t PAIR_LIMIT = 10;

  bool hasGPRPair() {
    for (uint32_t i = 0; i <= PAIR_LIMIT; i += 2) {
      if (isAvailableGPR(Register::FromCode(i)) &&
          isAvailableGPR(Register::FromCode(i + 1))) {
        return true;
      }
    }
    return false;
  }

  void allocGPRPair(Register* low, Register* high) {
    MOZ_ASSERT(hasGPRPair());
    for (uint32_t i = 0; i <= PAIR_LIMIT; i += 2) {
      if (isAvailableGPR(Register::FromCode(i)) &&
          isAvailableGPR(Register::FromCode(i + 1))) {
        *low = Register::FromCode(i);
        *high = Register::FromCode(i + 1);
        allocGPR(*low);
        allocGPR(*high);
        return;
      }
    }
    MOZ_CRASH("No pair");
  }
#endif

  void allocFPU(FloatRegister r) {
    MOZ_ASSERT(isAvailableFPU(r));
    availFPU.take(r);
  }

  template <MIRType t>
  FloatRegister allocFPU() {
    return availFPU.takeAny<RegTypeOf<t>::value>();
  }

  void freeGPR(Register r) { availGPR.add(r); }

  void freeInt64(Register64 r) {
#ifdef JS_PUNBOX64
    freeGPR(r.reg);
#else
    freeGPR(r.low);
    freeGPR(r.high);
#endif
  }

  void freeFPU(FloatRegister r) { availFPU.add(r); }

 public:
  explicit BaseRegAlloc()
      : bc(nullptr),
        availGPR(GeneralRegisterSet::All()),
        availFPU(FloatRegisterSet::All())
#ifdef DEBUG
        ,
        scratchTaken(0)
#endif
#ifdef JS_CODEGEN_X86
        ,
        singleByteRegs(GeneralRegisterSet(Registers::SingleByteRegs))
#endif
  {
    RegisterAllocator::takeWasmRegisters(availGPR);

    // Allocate any private scratch registers.
#if defined(RABALDR_SCRATCH_I32)
    if (RabaldrScratchI32 != RegI32::Invalid()) {
      availGPR.take(RabaldrScratchI32);
    }
#endif

#ifdef RABALDR_SCRATCH_F32_ALIASES_F64
    MOZ_ASSERT(RabaldrScratchF32 != InvalidFloatReg, "Float reg definition");
    MOZ_ASSERT(RabaldrScratchF64 != InvalidFloatReg, "Float reg definition");
#endif

#if defined(RABALDR_SCRATCH_F32) && !defined(RABALDR_SCRATCH_F32_ALIASES_F64)
    if (RabaldrScratchF32 != RegF32::Invalid()) {
      availFPU.take(RabaldrScratchF32);
    }
#endif

#if defined(RABALDR_SCRATCH_F64)
#  ifdef RABALDR_SCRATCH_F32_ALIASES_F64
    MOZ_ASSERT(availFPU.has(RabaldrScratchF32));
#  endif
    if (RabaldrScratchF64 != RegF64::Invalid()) {
      availFPU.take(RabaldrScratchF64);
    }
#  ifdef RABALDR_SCRATCH_F32_ALIASES_F64
    MOZ_ASSERT(!availFPU.has(RabaldrScratchF32));
#  endif
#endif

#ifdef DEBUG
    allGPR = availGPR;
    allFPU = availFPU;
#endif
  }

  void init(BaseCompilerInterface* bc) { this->bc = bc; }

  enum class ScratchKind { I32 = 1, F32 = 2, F64 = 4 };

#ifdef DEBUG
  bool isScratchRegisterTaken(ScratchKind s) const {
    return (scratchTaken & uint32_t(s)) != 0;
  }

  void setScratchRegisterTaken(ScratchKind s, bool state) {
    if (state) {
      scratchTaken |= uint32_t(s);
    } else {
      scratchTaken &= ~uint32_t(s);
    }
  }
#endif

#ifdef JS_CODEGEN_X86
  bool isSingleByteI32(Register r) { return singleByteRegs.has(r); }
#endif

  bool isAvailableI32(RegI32 r) { return isAvailableGPR(r); }

  bool isAvailableI64(RegI64 r) {
#ifdef JS_PUNBOX64
    return isAvailableGPR(r.reg);
#else
    return isAvailableGPR(r.low) && isAvailableGPR(r.high);
#endif
  }

  bool isAvailablePtr(RegPtr r) { return isAvailableGPR(r); }

  bool isAvailableF32(RegF32 r) { return isAvailableFPU(r); }

  bool isAvailableF64(RegF64 r) { return isAvailableFPU(r); }

  // TODO / OPTIMIZE (Bug 1316802): Do not sync everything on allocation
  // failure, only as much as we need.

  MOZ_MUST_USE RegI32 needI32() {
    if (!hasGPR()) {
      bc->sync();
    }
    return RegI32(allocGPR());
  }

  void needI32(RegI32 specific) {
    if (!isAvailableI32(specific)) {
      bc->sync();
    }
    allocGPR(specific);
  }

  MOZ_MUST_USE RegI64 needI64() {
    if (!hasGPR64()) {
      bc->sync();
    }
    return RegI64(allocInt64());
  }

  void needI64(RegI64 specific) {
    if (!isAvailableI64(specific)) {
      bc->sync();
    }
    allocInt64(specific);
  }

  MOZ_MUST_USE RegPtr needPtr() {
    if (!hasGPR()) {
      bc->sync();
    }
    return RegPtr(allocGPR());
  }

  void needPtr(RegPtr specific) {
    if (!isAvailablePtr(specific)) {
      bc->sync();
    }
    allocGPR(specific);
  }

  // Use when you need a register for a short time but explicitly want to avoid
  // a full sync().
  MOZ_MUST_USE RegPtr needTempPtr(RegPtr fallback, bool* saved) {
    if (hasGPR()) {
      *saved = false;
      return RegPtr(allocGPR());
    }
    *saved = true;
    bc->saveTempPtr(fallback);
    MOZ_ASSERT(isAvailablePtr(fallback));
    allocGPR(fallback);
    return RegPtr(fallback);
  }

  MOZ_MUST_USE RegF32 needF32() {
    if (!hasFPU<MIRType::Float32>()) {
      bc->sync();
    }
    return RegF32(allocFPU<MIRType::Float32>());
  }

  void needF32(RegF32 specific) {
    if (!isAvailableF32(specific)) {
      bc->sync();
    }
    allocFPU(specific);
  }

  MOZ_MUST_USE RegF64 needF64() {
    if (!hasFPU<MIRType::Double>()) {
      bc->sync();
    }
    return RegF64(allocFPU<MIRType::Double>());
  }

  void needF64(RegF64 specific) {
    if (!isAvailableF64(specific)) {
      bc->sync();
    }
    allocFPU(specific);
  }

  void freeI32(RegI32 r) { freeGPR(r); }

  void freeI64(RegI64 r) { freeInt64(r); }

  void freePtr(RegPtr r) { freeGPR(r); }

  void freeF64(RegF64 r) { freeFPU(r); }

  void freeF32(RegF32 r) { freeFPU(r); }

  void freeTempPtr(RegPtr r, bool saved) {
    freePtr(r);
    if (saved) {
      bc->restoreTempPtr(r);
      MOZ_ASSERT(!isAvailablePtr(r));
    }
  }

#ifdef JS_CODEGEN_ARM
  MOZ_MUST_USE RegI64 needI64Pair() {
    if (!hasGPRPair()) {
      bc->sync();
    }
    Register low, high;
    allocGPRPair(&low, &high);
    return RegI64(Register64(high, low));
  }
#endif

#ifdef DEBUG
  friend class LeakCheck;

  class MOZ_RAII LeakCheck {
   private:
    const BaseRegAlloc& ra;
    AllocatableGeneralRegisterSet knownGPR_;
    AllocatableFloatRegisterSet knownFPU_;

   public:
    explicit LeakCheck(const BaseRegAlloc& ra) : ra(ra) {
      knownGPR_ = ra.availGPR;
      knownFPU_ = ra.availFPU;
    }

    ~LeakCheck() {
      MOZ_ASSERT(knownGPR_.bits() == ra.allGPR.bits());
      MOZ_ASSERT(knownFPU_.bits() == ra.allFPU.bits());
    }

    void addKnownI32(RegI32 r) { knownGPR_.add(r); }

    void addKnownI64(RegI64 r) {
#  ifdef JS_PUNBOX64
      knownGPR_.add(r.reg);
#  else
      knownGPR_.add(r.high);
      knownGPR_.add(r.low);
#  endif
    }

    void addKnownF32(RegF32 r) { knownFPU_.add(r); }

    void addKnownF64(RegF64 r) { knownFPU_.add(r); }

    void addKnownRef(RegPtr r) { knownGPR_.add(r); }
  };
#endif
};

// Scratch register abstractions.
//
// We define our own scratch registers when the platform doesn't provide what we
// need.  A notable use case is that we will need a private scratch register
// when the platform masm uses its scratch register very frequently (eg, ARM).

class BaseScratchRegister {
#ifdef DEBUG
  BaseRegAlloc& ra;
  BaseRegAlloc::ScratchKind kind_;

 public:
  explicit BaseScratchRegister(BaseRegAlloc& ra, BaseRegAlloc::ScratchKind kind)
      : ra(ra), kind_(kind) {
    MOZ_ASSERT(!ra.isScratchRegisterTaken(kind_));
    ra.setScratchRegisterTaken(kind_, true);
  }
  ~BaseScratchRegister() {
    MOZ_ASSERT(ra.isScratchRegisterTaken(kind_));
    ra.setScratchRegisterTaken(kind_, false);
  }
#else
 public:
  explicit BaseScratchRegister(BaseRegAlloc& ra,
                               BaseRegAlloc::ScratchKind kind) {}
#endif
};

#ifdef RABALDR_SCRATCH_F64
class ScratchF64 : public BaseScratchRegister {
 public:
  explicit ScratchF64(BaseRegAlloc& ra)
      : BaseScratchRegister(ra, BaseRegAlloc::ScratchKind::F64) {}
  operator RegF64() const { return RegF64(RabaldrScratchF64); }
};
#else
class ScratchF64 : public ScratchDoubleScope {
 public:
  explicit ScratchF64(MacroAssembler& m) : ScratchDoubleScope(m) {}
  operator RegF64() const { return RegF64(FloatRegister(*this)); }
};
#endif

#ifdef RABALDR_SCRATCH_F32
class ScratchF32 : public BaseScratchRegister {
 public:
  explicit ScratchF32(BaseRegAlloc& ra)
      : BaseScratchRegister(ra, BaseRegAlloc::ScratchKind::F32) {}
  operator RegF32() const { return RegF32(RabaldrScratchF32); }
};
#else
class ScratchF32 : public ScratchFloat32Scope {
 public:
  explicit ScratchF32(MacroAssembler& m) : ScratchFloat32Scope(m) {}
  operator RegF32() const { return RegF32(FloatRegister(*this)); }
};
#endif

#ifdef RABALDR_SCRATCH_I32
template <class RegType>
class ScratchGPR : public BaseScratchRegister {
 public:
  explicit ScratchGPR(BaseRegAlloc& ra)
      : BaseScratchRegister(ra, BaseRegAlloc::ScratchKind::I32) {}
  operator RegType() const { return RegType(RabaldrScratchI32); }
};
#else
template <class RegType>
class ScratchGPR : public ScratchRegisterScope {
 public:
  explicit ScratchGPR(MacroAssembler& m) : ScratchRegisterScope(m) {}
  operator RegType() const { return RegType(Register(*this)); }
};
#endif

using ScratchI32 = ScratchGPR<RegI32>;
using ScratchPtr = ScratchGPR<RegPtr>;

#if defined(JS_CODEGEN_X86)
// ScratchEBX is a mnemonic device: For some atomic ops we really need EBX,
// no other register will do.  And we would normally have to allocate that
// register using ScratchI32 since normally the scratch register is EBX.
// But the whole point of ScratchI32 is to hide that relationship.  By using
// the ScratchEBX alias, we document that at that point we require the
// scratch register to be EBX.
using ScratchEBX = ScratchI32;

// ScratchI8 is a mnemonic device: For some ops we need a register with a
// byte subregister.
using ScratchI8 = ScratchI32;
#endif

// The stack frame.
//
// The stack frame has four parts ("below" means at lower addresses):
//
//  - the Frame element;
//  - the Local area, including the DebugFrame element and possibly a spilled
//    pointer to stack results, if any; allocated below the header with various
//    forms of alignment;
//  - the Dynamic area, comprising the temporary storage the compiler uses for
//    register spilling, allocated below the Local area;
//  - the Arguments area, comprising memory allocated for outgoing calls,
//    allocated below the Dynamic area.
//
//                +==============================+
//                |    Incoming stack arg        |
//                |    ...                       |
// -------------  +==============================+
//                |    Frame (fixed size)        |
// -------------  +==============================+ <-------------------- FP
//         ^      |    DebugFrame (optional)     |    ^  ^             ^^
//   localSize    |    Register arg local        |    |  |             ||
//         |      |    ...                       |    |  |     framePushed
//         |      |    Register stack result ptr?|    |  |             ||
//         |      |    Non-arg local             |    |  |             ||
//         |      |    ...                       |    |  |             ||
//         |      +------------------------------+    |  |             ||
//         v      |    (padding)                 |    |  v             ||
// -------------  +==============================+ currentStackHeight  ||
//         ^      |    Dynamic (variable size)   |    |                ||
//  dynamicSize   |    ...                       |    |                ||
//         v      |    ...                       |    v                ||
// -------------  |    (free space, sometimes)   | ---------           v|
//                +==============================+ <----- SP not-during calls
//                |    Arguments (sometimes)     |                      |
//                |    ...                       |                      v
//                +==============================+ <----- SP during calls
//
// The Frame is addressed off the stack pointer.  masm.framePushed() is always
// correct, and masm.getStackPointer() + masm.framePushed() always addresses the
// Frame, with the DebugFrame optionally below it.
//
// The Local area (including the DebugFrame and, if needed, the spilled value of
// the stack results area pointer) is laid out by BaseLocalIter and is allocated
// and deallocated by standard prologue and epilogue functions that manipulate
// the stack pointer, but it is accessed via BaseStackFrame.
//
// The Dynamic area is maintained by and accessed via BaseStackFrame.  On some
// systems (such as ARM64), the Dynamic memory may be allocated in chunks
// because the SP needs a specific alignment, and in this case there will
// normally be some free space directly above the SP.  The stack height does not
// include the free space, it reflects the logically used space only.
//
// The Dynamic area is where space for stack results is allocated when calling
// functions that return results on the stack.  If a function has stack results,
// a pointer to the low address of the stack result area is passed as an
// additional argument, according to the usual ABI.  See
// ABIResultIter::HasStackResults.
//
// The Arguments area is allocated and deallocated via BaseStackFrame (see
// comments later) but is accessed directly off the stack pointer.

// BaseLocalIter iterates over a vector of types of locals and provides offsets
// from the Frame address for those locals, and associated data.
//
// The implementation of BaseLocalIter is the property of the BaseStackFrame.
// But it is also exposed for eg the debugger to use.

BaseLocalIter::BaseLocalIter(const ValTypeVector& locals,
                             const ArgTypeVector& args, bool debugEnabled)
    : locals_(locals),
      args_(args),
      argsIter_(args_),
      index_(0),
      localSize_(debugEnabled ? DebugFrame::offsetOfFrame() : 0),
      reservedSize_(localSize_),
      frameOffset_(INT32_MAX),
      stackResultPointerOffset_(INT32_MAX),
      mirType_(MIRType::Undefined),
      done_(false) {
  MOZ_ASSERT(args.lengthWithoutStackResults() <= locals.length());
  settle();
}

int32_t BaseLocalIter::pushLocal(size_t nbytes) {
  MOZ_ASSERT(nbytes % 4 == 0 && nbytes <= 16);
  localSize_ = AlignBytes(localSize_, nbytes) + nbytes;
  return localSize_;  // Locals grow down so capture base address.
}

void BaseLocalIter::settle() {
  if (!argsIter_.done()) {
    mirType_ = argsIter_.mirType();
    switch (mirType_) {
      case MIRType::Pointer:
        // The pointer to stack results is handled like any other argument:
        // either addressed in place if it is passed on the stack, or we spill
        // it in the frame if it's in a register.
        MOZ_ASSERT(args_.isSyntheticStackResultPointerArg(index_));
        [[fallthrough]];
      case MIRType::Int32:
      case MIRType::Int64:
      case MIRType::Double:
      case MIRType::Float32:
      case MIRType::RefOrNull:
        if (argsIter_->argInRegister()) {
          frameOffset_ = pushLocal(MIRTypeToSize(mirType_));
        } else {
          frameOffset_ = -(argsIter_->offsetFromArgBase() + sizeof(Frame));
        }
        break;
      default:
        MOZ_CRASH("Argument type");
    }
    if (mirType_ == MIRType::Pointer) {
      stackResultPointerOffset_ = frameOffset();
      // Advance past the synthetic stack result pointer argument and fall
      // through to the next case.
      argsIter_++;
      MOZ_ASSERT(argsIter_.done());
    } else {
      return;
    }
  }

  if (index_ < locals_.length()) {
    switch (locals_[index_].kind()) {
      case ValType::I32:
      case ValType::I64:
      case ValType::F32:
      case ValType::F64:
      case ValType::Ref:
        // TODO/AnyRef-boxing: With boxed immediates and strings, the
        // debugger must be made aware that AnyRef != Pointer.
        ASSERT_ANYREF_IS_JSOBJECT;
        mirType_ = ToMIRType(locals_[index_]);
        frameOffset_ = pushLocal(MIRTypeToSize(mirType_));
        break;
      default:
        MOZ_CRASH("Compiler bug: Unexpected local type");
    }
    return;
  }

  done_ = true;
}

void BaseLocalIter::operator++(int) {
  MOZ_ASSERT(!done_);
  index_++;
  if (!argsIter_.done()) {
    argsIter_++;
  }
  settle();
}

// Abstraction of the height of the stack frame, to avoid type confusion.

class StackHeight {
  friend class BaseStackFrameAllocator;

  uint32_t height;

 public:
  explicit StackHeight(uint32_t h) : height(h) {}
  static StackHeight Invalid() { return StackHeight(UINT32_MAX); }
  bool isValid() const { return height != UINT32_MAX; }
  bool operator==(StackHeight rhs) const {
    MOZ_ASSERT(isValid() && rhs.isValid());
    return height == rhs.height;
  }
  bool operator!=(StackHeight rhs) const { return !(*this == rhs); }
};

// Abstraction for where multi-value results go on the machine stack.

class StackResultsLoc {
  uint32_t bytes_;
  size_t count_;
  Maybe<uint32_t> height_;

 public:
  StackResultsLoc() : bytes_(0), count_(0){};
  StackResultsLoc(uint32_t bytes, size_t count, uint32_t height)
      : bytes_(bytes), count_(count), height_(Some(height)) {
    MOZ_ASSERT(bytes != 0);
    MOZ_ASSERT(count != 0);
    MOZ_ASSERT(height != 0);
  }

  uint32_t bytes() const { return bytes_; }
  uint32_t count() const { return count_; }
  uint32_t height() const { return height_.value(); }

  bool hasStackResults() const { return bytes() != 0; }
  StackResults stackResults() const {
    return hasStackResults() ? StackResults::HasStackResults
                             : StackResults::NoStackResults;
  }
};

// Abstraction of the baseline compiler's stack frame (except for the Frame /
// DebugFrame parts).  See comments above for more.  Remember, "below" on the
// stack means at lower addresses.
//
// The abstraction is split into two parts: BaseStackFrameAllocator is
// responsible for allocating and deallocating space on the stack and for
// performing computations that are affected by how the allocation is performed;
// BaseStackFrame then provides a pleasant interface for stack frame management.

class BaseStackFrameAllocator {
  MacroAssembler& masm;

#ifdef RABALDR_CHUNKY_STACK
  // On platforms that require the stack pointer to be aligned on a boundary
  // greater than the typical stack item (eg, ARM64 requires 16-byte alignment
  // but items are 8 bytes), allocate stack memory in chunks, and use a
  // separate stack height variable to track the effective stack pointer
  // within the allocated area.  Effectively, there's a variable amount of
  // free space directly above the stack pointer.  See diagram above.

  // The following must be true in order for the stack height to be
  // predictable at control flow joins:
  //
  // - The Local area is always aligned according to WasmStackAlignment, ie,
  //   masm.framePushed() % WasmStackAlignment is zero after allocating
  //   locals.
  //
  // - ChunkSize is always a multiple of WasmStackAlignment.
  //
  // - Pushing and popping are always in units of ChunkSize (hence preserving
  //   alignment).
  //
  // - The free space on the stack (masm.framePushed() - currentStackHeight_)
  //   is a predictable (nonnegative) amount.

  // As an optimization, we pre-allocate some space on the stack, the size of
  // this allocation is InitialChunk and it must be a multiple of ChunkSize.
  // It is allocated as part of the function prologue and deallocated as part
  // of the epilogue, along with the locals.
  //
  // If ChunkSize is too large then we risk overflowing the stack on simple
  // recursions with few live values where stack overflow should not be a
  // risk; if it is too small we spend too much time adjusting the stack
  // pointer.
  //
  // Good values for ChunkSize are the subject of future empirical analysis;
  // eight words is just an educated guess.

  static constexpr uint32_t ChunkSize = 8 * sizeof(void*);
  static constexpr uint32_t InitialChunk = ChunkSize;

  // The current logical height of the frame is
  //   currentStackHeight_ = localSize_ + dynamicSize
  // where dynamicSize is not accounted for explicitly and localSize_ also
  // includes size for the DebugFrame.
  //
  // The allocated size of the frame, provided by masm.framePushed(), is usually
  // larger than currentStackHeight_, notably at the beginning of execution when
  // we've allocated InitialChunk extra space.

  uint32_t currentStackHeight_;
#endif

  // Size of the Local area in bytes (stable after BaseCompiler::init() has
  // called BaseStackFrame::setupLocals(), which in turn calls
  // BaseStackFrameAllocator::setLocalSize()), always rounded to the proper
  // stack alignment.  The Local area is then allocated in beginFunction(),
  // following the allocation of the Header.  See onFixedStackAllocated()
  // below.

  uint32_t localSize_;

 protected:
  ///////////////////////////////////////////////////////////////////////////
  //
  // Initialization

  explicit BaseStackFrameAllocator(MacroAssembler& masm)
      : masm(masm),
#ifdef RABALDR_CHUNKY_STACK
        currentStackHeight_(0),
#endif
        localSize_(UINT32_MAX) {
  }

 protected:
  //////////////////////////////////////////////////////////////////////
  //
  // The Local area - the static part of the frame.

  // Record the size of the Local area, once it is known.

  void setLocalSize(uint32_t localSize) {
    MOZ_ASSERT(localSize == AlignBytes(localSize, sizeof(void*)),
               "localSize_ should be aligned to at least a pointer");
    MOZ_ASSERT(localSize_ == UINT32_MAX);
    localSize_ = localSize;
  }

  // Record the current stack height, after it has become stable in
  // beginFunction().  See also BaseStackFrame::onFixedStackAllocated().

  void onFixedStackAllocated() {
    MOZ_ASSERT(localSize_ != UINT32_MAX);
#ifdef RABALDR_CHUNKY_STACK
    currentStackHeight_ = localSize_;
#endif
  }

 public:
  // The fixed amount of memory, in bytes, allocated on the stack below the
  // Header for purposes such as locals and other fixed values.  Includes all
  // necessary alignment, and on ARM64 also the initial chunk for the working
  // stack memory.

  uint32_t fixedAllocSize() const {
    MOZ_ASSERT(localSize_ != UINT32_MAX);
#ifdef RABALDR_CHUNKY_STACK
    return localSize_ + InitialChunk;
#else
    return localSize_;
#endif
  }

#ifdef RABALDR_CHUNKY_STACK
  // The allocated frame size is frequently larger than the logical stack
  // height; we round up to a chunk boundary, and special case the initial
  // chunk.
  uint32_t framePushedForHeight(uint32_t logicalHeight) {
    if (logicalHeight <= fixedAllocSize()) {
      return fixedAllocSize();
    }
    return fixedAllocSize() +
           AlignBytes(logicalHeight - fixedAllocSize(), ChunkSize);
  }
#endif

 protected:
  //////////////////////////////////////////////////////////////////////
  //
  // The Dynamic area - the dynamic part of the frame, for spilling and saving
  // intermediate values.

  // Offset off of sp_ for the slot at stack area location `offset`.

  int32_t stackOffset(int32_t offset) { return masm.framePushed() - offset; }

  uint32_t computeHeightWithStackResults(StackHeight stackBase,
                                         uint32_t stackResultBytes) {
    MOZ_ASSERT(stackResultBytes);
    MOZ_ASSERT(currentStackHeight() >= stackBase.height);
    return stackBase.height + stackResultBytes;
  }

#ifdef RABALDR_CHUNKY_STACK
  void pushChunkyBytes(uint32_t bytes) {
    MOZ_ASSERT(bytes <= ChunkSize);
    checkChunkyInvariants();
    if (masm.framePushed() - currentStackHeight_ < bytes) {
      masm.reserveStack(ChunkSize);
    }
    currentStackHeight_ += bytes;
    checkChunkyInvariants();
  }

  void popChunkyBytes(uint32_t bytes) {
    checkChunkyInvariants();
    currentStackHeight_ -= bytes;
    // Sometimes, popChunkyBytes() is used to pop a larger area, as when we drop
    // values consumed by a call, and we may need to drop several chunks.  But
    // never drop the initial chunk.  Crucially, the amount we drop is always an
    // integral number of chunks.
    uint32_t freeSpace = masm.framePushed() - currentStackHeight_;
    if (freeSpace >= ChunkSize) {
      uint32_t targetAllocSize = framePushedForHeight(currentStackHeight_);
      uint32_t amountToFree = masm.framePushed() - targetAllocSize;
      MOZ_ASSERT(amountToFree % ChunkSize == 0);
      if (amountToFree) {
        masm.freeStack(amountToFree);
      }
    }
    checkChunkyInvariants();
  }
#endif

  uint32_t currentStackHeight() const {
#ifdef RABALDR_CHUNKY_STACK
    return currentStackHeight_;
#else
    return masm.framePushed();
#endif
  }

 private:
#ifdef RABALDR_CHUNKY_STACK
  void checkChunkyInvariants() {
    MOZ_ASSERT(masm.framePushed() >= fixedAllocSize());
    MOZ_ASSERT(masm.framePushed() >= currentStackHeight_);
    MOZ_ASSERT(masm.framePushed() == fixedAllocSize() ||
               masm.framePushed() - currentStackHeight_ < ChunkSize);
    MOZ_ASSERT((masm.framePushed() - localSize_) % ChunkSize == 0);
  }
#endif

  // For a given stack height, return the appropriate size of the allocated
  // frame.

  uint32_t framePushedForHeight(StackHeight stackHeight) {
#ifdef RABALDR_CHUNKY_STACK
    // A more complicated adjustment is needed.
    return framePushedForHeight(stackHeight.height);
#else
    // The allocated frame size equals the stack height.
    return stackHeight.height;
#endif
  }

 public:
  // The current height of the stack area, not necessarily zero-based, in a
  // type-safe way.

  StackHeight stackHeight() const { return StackHeight(currentStackHeight()); }

  // Set the frame height to a previously recorded value.

  void setStackHeight(StackHeight amount) {
#ifdef RABALDR_CHUNKY_STACK
    currentStackHeight_ = amount.height;
    masm.setFramePushed(framePushedForHeight(amount));
    checkChunkyInvariants();
#else
    masm.setFramePushed(amount.height);
#endif
  }

  // The current height of the dynamic part of the stack area (ie, the backing
  // store for the evaluation stack), zero-based.

  uint32_t dynamicHeight() const { return currentStackHeight() - localSize_; }

  // Before branching to an outer control label, pop the execution stack to
  // the level expected by that region, but do not update masm.framePushed()
  // as that will happen as compilation leaves the block.
  //
  // Note these operate directly on the stack pointer register.

  void popStackBeforeBranch(StackHeight destStackHeight,
                            uint32_t stackResultBytes) {
    uint32_t framePushedHere = masm.framePushed();
    StackHeight heightThere =
        StackHeight(destStackHeight.height + stackResultBytes);
    uint32_t framePushedThere = framePushedForHeight(heightThere);
    if (framePushedHere > framePushedThere) {
      masm.addToStackPtr(Imm32(framePushedHere - framePushedThere));
    }
  }

  void popStackBeforeBranch(StackHeight destStackHeight, ResultType type) {
    popStackBeforeBranch(destStackHeight,
                         ABIResultIter::MeasureStackBytes(type));
  }

  // Given that there are |stackParamSize| bytes on the dynamic stack
  // corresponding to the stack results, return the stack height once these
  // parameters are popped.

  StackHeight stackResultsBase(uint32_t stackParamSize) {
    return StackHeight(currentStackHeight() - stackParamSize);
  }

  // For most of WebAssembly, adjacent instructions have fallthrough control
  // flow between them, which allows us to simply thread the current stack
  // height through the compiler.  There are two exceptions to this rule: when
  // leaving a block via dead code, and when entering the "else" arm of an "if".
  // In these cases, the stack height is the block entry height, plus any stack
  // values (results in the block exit case, parameters in the else entry case).

  void resetStackHeight(StackHeight destStackHeight, ResultType type) {
    uint32_t height = destStackHeight.height;
    height += ABIResultIter::MeasureStackBytes(type);
    setStackHeight(StackHeight(height));
  }

  // Return offset of stack result.

  uint32_t locateStackResult(const ABIResult& result, StackHeight stackBase,
                             uint32_t stackResultBytes) {
    MOZ_ASSERT(result.onStack());
    MOZ_ASSERT(result.stackOffset() + result.size() <= stackResultBytes);
    uint32_t end = computeHeightWithStackResults(stackBase, stackResultBytes);
    return end - result.stackOffset();
  }

 public:
  //////////////////////////////////////////////////////////////////////
  //
  // The Argument area - for outgoing calls.
  //
  // We abstract these operations as an optimization: we can merge the freeing
  // of the argument area and dropping values off the stack after a call.  But
  // they always amount to manipulating the real stack pointer by some amount.
  //
  // Note that we do not update currentStackHeight_ for this; the frame does
  // not know about outgoing arguments.  But we do update framePushed(), so we
  // can still index into the frame below the outgoing arguments area.

  // This is always equivalent to a masm.reserveStack() call.

  void allocArgArea(size_t argSize) {
    if (argSize) {
      masm.reserveStack(argSize);
    }
  }

  // This frees the argument area allocated by allocArgArea(), and `argSize`
  // must be equal to the `argSize` argument to allocArgArea().  In addition
  // we drop some values from the frame, corresponding to the values that were
  // consumed by the call.

  void freeArgAreaAndPopBytes(size_t argSize, size_t dropSize) {
#ifdef RABALDR_CHUNKY_STACK
    // Freeing the outgoing arguments and freeing the consumed values have
    // different semantics here, which is why the operation is split.
    if (argSize) {
      masm.freeStack(argSize);
    }
    popChunkyBytes(dropSize);
#else
    if (argSize + dropSize) {
      masm.freeStack(argSize + dropSize);
    }
#endif
  }
};

class BaseStackFrame final : public BaseStackFrameAllocator {
  MacroAssembler& masm;

  // The largest observed value of masm.framePushed(), ie, the size of the
  // stack frame.  Read this for its true value only when code generation is
  // finished.
  uint32_t maxFramePushed_;

  // Patch point where we check for stack overflow.
  CodeOffset stackAddOffset_;

  // Low byte offset of pointer to stack results, if any.
  Maybe<int32_t> stackResultsPtrOffset_;

  // Low byte offset of local area for true locals (not parameters).
  uint32_t varLow_;

  // High byte offset + 1 of local area for true locals.
  uint32_t varHigh_;

  // The stack pointer, cached for brevity.
  RegisterOrSP sp_;

 public:
  explicit BaseStackFrame(MacroAssembler& masm)
      : BaseStackFrameAllocator(masm),
        masm(masm),
        maxFramePushed_(0),
        stackAddOffset_(0),
        varLow_(UINT32_MAX),
        varHigh_(UINT32_MAX),
        sp_(masm.getStackPointer()) {}

  ///////////////////////////////////////////////////////////////////////////
  //
  // Stack management and overflow checking

  // This must be called once beginFunction has allocated space for the Header
  // (the Frame and DebugFrame) and the Local area, and will record the current
  // frame size for internal use by the stack abstractions.

  void onFixedStackAllocated() {
    maxFramePushed_ = masm.framePushed();
    BaseStackFrameAllocator::onFixedStackAllocated();
  }

  // We won't know until after we've generated code how big the frame will be
  // (we may need arbitrary spill slots and outgoing param slots) so emit a
  // patchable add that is patched in endFunction().
  //
  // Note the platform scratch register may be used by branchPtr(), so
  // generally tmp must be something else.

  void checkStack(Register tmp, BytecodeOffset trapOffset) {
    stackAddOffset_ = masm.sub32FromStackPtrWithPatch(tmp);
    Label ok;
    masm.branchPtr(Assembler::Below,
                   Address(WasmTlsReg, offsetof(wasm::TlsData, stackLimit)),
                   tmp, &ok);
    masm.wasmTrap(Trap::StackOverflow, trapOffset);
    masm.bind(&ok);
  }

  void patchCheckStack() {
    masm.patchSub32FromStackPtr(stackAddOffset_,
                                Imm32(int32_t(maxFramePushed_)));
  }

  // Very large frames are implausible, probably an attack.

  bool checkStackHeight() {
    // 512KiB should be enough, considering how Rabaldr uses the stack and
    // what the standard limits are:
    //
    // - 1,000 parameters
    // - 50,000 locals
    // - 10,000 values on the eval stack (not an official limit)
    //
    // At sizeof(int64) bytes per slot this works out to about 480KiB.
    return maxFramePushed_ <= 512 * 1024;
  }

  ///////////////////////////////////////////////////////////////////////////
  //
  // Local area

  struct Local {
    // Type of the value.
    const MIRType type;

    // Byte offset from Frame "into" the locals, ie positive for true locals
    // and negative for incoming args that read directly from the arg area.
    // It assumes the stack is growing down and that locals are on the stack
    // at lower addresses than Frame, and is the offset from Frame of the
    // lowest-addressed byte of the local.
    const int32_t offs;

    Local(MIRType type, int32_t offs) : type(type), offs(offs) {}
  };

  // Profiling shows that the number of parameters and locals frequently
  // touches or exceeds 8.  So 16 seems like a reasonable starting point.
  using LocalVector = Vector<Local, 16, SystemAllocPolicy>;

  // Initialize `localInfo` based on the types of `locals` and `args`.
  bool setupLocals(const ValTypeVector& locals, const ArgTypeVector& args,
                   bool debugEnabled, LocalVector* localInfo) {
    if (!localInfo->reserve(locals.length())) {
      return false;
    }

    DebugOnly<uint32_t> index = 0;
    BaseLocalIter i(locals, args, debugEnabled);
    varLow_ = i.reservedSize();
    for (; !i.done() && i.index() < args.length(); i++) {
      MOZ_ASSERT(i.isArg());
      MOZ_ASSERT(i.index() == index);
      localInfo->infallibleEmplaceBack(i.mirType(), i.frameOffset());
      varLow_ = i.currentLocalSize();
      index++;
    }

    varHigh_ = varLow_;
    for (; !i.done(); i++) {
      MOZ_ASSERT(!i.isArg());
      MOZ_ASSERT(i.index() == index);
      localInfo->infallibleEmplaceBack(i.mirType(), i.frameOffset());
      varHigh_ = i.currentLocalSize();
      index++;
    }

    setLocalSize(AlignBytes(varHigh_, WasmStackAlignment));

    if (args.hasSyntheticStackResultPointerArg()) {
      stackResultsPtrOffset_ = Some(i.stackResultPointerOffset());
    }

    return true;
  }

  void zeroLocals(BaseRegAlloc* ra);

  void loadLocalI32(const Local& src, RegI32 dest) {
    masm.load32(Address(sp_, localOffset(src)), dest);
  }

#ifndef JS_PUNBOX64
  void loadLocalI64Low(const Local& src, RegI32 dest) {
    masm.load32(Address(sp_, localOffset(src) + INT64LOW_OFFSET), dest);
  }

  void loadLocalI64High(const Local& src, RegI32 dest) {
    masm.load32(Address(sp_, localOffset(src) + INT64HIGH_OFFSET), dest);
  }
#endif

  void loadLocalI64(const Local& src, RegI64 dest) {
    masm.load64(Address(sp_, localOffset(src)), dest);
  }

  void loadLocalPtr(const Local& src, RegPtr dest) {
    masm.loadPtr(Address(sp_, localOffset(src)), dest);
  }

  void loadLocalF64(const Local& src, RegF64 dest) {
    masm.loadDouble(Address(sp_, localOffset(src)), dest);
  }

  void loadLocalF32(const Local& src, RegF32 dest) {
    masm.loadFloat32(Address(sp_, localOffset(src)), dest);
  }

  void storeLocalI32(RegI32 src, const Local& dest) {
    masm.store32(src, Address(sp_, localOffset(dest)));
  }

  void storeLocalI64(RegI64 src, const Local& dest) {
    masm.store64(src, Address(sp_, localOffset(dest)));
  }

  void storeLocalPtr(Register src, const Local& dest) {
    masm.storePtr(src, Address(sp_, localOffset(dest)));
  }

  void storeLocalF64(RegF64 src, const Local& dest) {
    masm.storeDouble(src, Address(sp_, localOffset(dest)));
  }

  void storeLocalF32(RegF32 src, const Local& dest) {
    masm.storeFloat32(src, Address(sp_, localOffset(dest)));
  }

  // Offset off of sp_ for `local`.
  int32_t localOffset(const Local& local) { return localOffset(local.offs); }

  // The incoming stack result area pointer is for stack results of the function
  // being compiled.
  void loadIncomingStackResultAreaPtr(RegPtr reg) {
    masm.loadPtr(Address(sp_, stackOffset(stackResultsPtrOffset_.value())),
                 reg);
  }
  void storeIncomingStackResultAreaPtr(RegPtr reg) {
    // If we get here, that means the pointer to the stack results area was
    // passed in as a register, and therefore it will be spilled below the
    // frame, so the offset is a positive height.
    MOZ_ASSERT(stackResultsPtrOffset_.value() > 0);
    masm.storePtr(reg,
                  Address(sp_, stackOffset(stackResultsPtrOffset_.value())));
  }

  // An outgoing stack result area pointer is for stack results of callees of
  // the function being compiled.
  void computeOutgoingStackResultAreaPtr(const StackResultsLoc& results,
                                         RegPtr dest) {
    MOZ_ASSERT(results.height() <= masm.framePushed());
    uint32_t offsetFromSP = masm.framePushed() - results.height();
    masm.movePtr(AsRegister(sp_), dest);
    masm.addPtr(Imm32(offsetFromSP), dest);
  }

 private:
  // Offset off of sp_ for a local with offset `offset` from Frame.
  int32_t localOffset(int32_t offset) { return masm.framePushed() - offset; }

 public:
  ///////////////////////////////////////////////////////////////////////////
  //
  // Dynamic area

  static const size_t StackSizeOfPtr = ABIResult::StackSizeOfPtr;
  static const size_t StackSizeOfInt64 = ABIResult::StackSizeOfInt64;
  static const size_t StackSizeOfFloat = ABIResult::StackSizeOfFloat;
  static const size_t StackSizeOfDouble = ABIResult::StackSizeOfDouble;

  uint32_t pushPtr(Register r) {
    DebugOnly<uint32_t> stackBefore = currentStackHeight();
#ifdef RABALDR_CHUNKY_STACK
    pushChunkyBytes(StackSizeOfPtr);
    masm.storePtr(r, Address(sp_, stackOffset(currentStackHeight())));
#else
    masm.Push(r);
#endif
    maxFramePushed_ = std::max(maxFramePushed_, masm.framePushed());
    MOZ_ASSERT(stackBefore + StackSizeOfPtr == currentStackHeight());
    return currentStackHeight();
  }

  uint32_t pushFloat32(FloatRegister r) {
    DebugOnly<uint32_t> stackBefore = currentStackHeight();
#ifdef RABALDR_CHUNKY_STACK
    pushChunkyBytes(StackSizeOfFloat);
    masm.storeFloat32(r, Address(sp_, stackOffset(currentStackHeight())));
#else
    masm.Push(r);
#endif
    maxFramePushed_ = std::max(maxFramePushed_, masm.framePushed());
    MOZ_ASSERT(stackBefore + StackSizeOfFloat == currentStackHeight());
    return currentStackHeight();
  }

  uint32_t pushDouble(FloatRegister r) {
    DebugOnly<uint32_t> stackBefore = currentStackHeight();
#ifdef RABALDR_CHUNKY_STACK
    pushChunkyBytes(StackSizeOfDouble);
    masm.storeDouble(r, Address(sp_, stackOffset(currentStackHeight())));
#else
    masm.Push(r);
#endif
    maxFramePushed_ = std::max(maxFramePushed_, masm.framePushed());
    MOZ_ASSERT(stackBefore + StackSizeOfDouble == currentStackHeight());
    return currentStackHeight();
  }

  void popPtr(Register r) {
    DebugOnly<uint32_t> stackBefore = currentStackHeight();
#ifdef RABALDR_CHUNKY_STACK
    masm.loadPtr(Address(sp_, stackOffset(currentStackHeight())), r);
    popChunkyBytes(StackSizeOfPtr);
#else
    masm.Pop(r);
#endif
    MOZ_ASSERT(stackBefore - StackSizeOfPtr == currentStackHeight());
  }

  void popFloat32(FloatRegister r) {
    DebugOnly<uint32_t> stackBefore = currentStackHeight();
#ifdef RABALDR_CHUNKY_STACK
    masm.loadFloat32(Address(sp_, stackOffset(currentStackHeight())), r);
    popChunkyBytes(StackSizeOfFloat);
#else
    masm.Pop(r);
#endif
    MOZ_ASSERT(stackBefore - StackSizeOfFloat == currentStackHeight());
  }

  void popDouble(FloatRegister r) {
    DebugOnly<uint32_t> stackBefore = currentStackHeight();
#ifdef RABALDR_CHUNKY_STACK
    masm.loadDouble(Address(sp_, stackOffset(currentStackHeight())), r);
    popChunkyBytes(StackSizeOfDouble);
#else
    masm.Pop(r);
#endif
    MOZ_ASSERT(stackBefore - StackSizeOfDouble == currentStackHeight());
  }

  void popBytes(size_t bytes) {
    if (bytes > 0) {
#ifdef RABALDR_CHUNKY_STACK
      popChunkyBytes(bytes);
#else
      masm.freeStack(bytes);
#endif
    }
  }

  void loadStackI32(int32_t offset, RegI32 dest) {
    masm.load32(Address(sp_, stackOffset(offset)), dest);
  }

  void loadStackI64(int32_t offset, RegI64 dest) {
    masm.load64(Address(sp_, stackOffset(offset)), dest);
  }

#ifndef JS_PUNBOX64
  void loadStackI64Low(int32_t offset, RegI32 dest) {
    masm.load32(Address(sp_, stackOffset(offset - INT64LOW_OFFSET)), dest);
  }

  void loadStackI64High(int32_t offset, RegI32 dest) {
    masm.load32(Address(sp_, stackOffset(offset - INT64HIGH_OFFSET)), dest);
  }
#endif

  // Disambiguation: this loads a "Ptr" value from the stack, it does not load
  // the "StackPtr".

  void loadStackPtr(int32_t offset, RegPtr dest) {
    masm.loadPtr(Address(sp_, stackOffset(offset)), dest);
  }

  void loadStackF64(int32_t offset, RegF64 dest) {
    masm.loadDouble(Address(sp_, stackOffset(offset)), dest);
  }

  void loadStackF32(int32_t offset, RegF32 dest) {
    masm.loadFloat32(Address(sp_, stackOffset(offset)), dest);
  }

  uint32_t prepareStackResultArea(StackHeight stackBase,
                                  uint32_t stackResultBytes) {
    uint32_t end = computeHeightWithStackResults(stackBase, stackResultBytes);
    if (currentStackHeight() < end) {
      uint32_t bytes = end - currentStackHeight();
#ifdef RABALDR_CHUNKY_STACK
      pushChunkyBytes(bytes);
#else
      masm.reserveStack(bytes);
#endif
      maxFramePushed_ = std::max(maxFramePushed_, masm.framePushed());
    }
    return end;
  }

  void finishStackResultArea(StackHeight stackBase, uint32_t stackResultBytes) {
    uint32_t end = computeHeightWithStackResults(stackBase, stackResultBytes);
    MOZ_ASSERT(currentStackHeight() >= end);
    popBytes(currentStackHeight() - end);
  }

  void shuffleStackResultsTowardFP(uint32_t srcHeight, uint32_t destHeight,
                                   uint32_t bytes, Register temp) {
    MOZ_ASSERT(destHeight < srcHeight);
    MOZ_ASSERT(bytes % sizeof(uint32_t) == 0);
    uint32_t destOffset = stackOffset(destHeight) + bytes;
    uint32_t srcOffset = stackOffset(srcHeight) + bytes;
    while (bytes >= sizeof(intptr_t)) {
      destOffset -= sizeof(intptr_t);
      srcOffset -= sizeof(intptr_t);
      bytes -= sizeof(intptr_t);
      masm.loadPtr(Address(sp_, srcOffset), temp);
      masm.storePtr(temp, Address(sp_, destOffset));
    }
    if (bytes) {
      MOZ_ASSERT(bytes == sizeof(uint32_t));
      destOffset -= sizeof(uint32_t);
      srcOffset -= sizeof(uint32_t);
      masm.load32(Address(sp_, srcOffset), temp);
      masm.store32(temp, Address(sp_, destOffset));
    }
  }

  void shuffleStackResultsTowardFP(StackHeight srcHeight,
                                   StackHeight destHeight, uint32_t bytes,
                                   Register temp) {
    MOZ_ASSERT(srcHeight.isValid());
    MOZ_ASSERT(destHeight.isValid());
    uint32_t src = computeHeightWithStackResults(srcHeight, bytes);
    uint32_t dest = computeHeightWithStackResults(destHeight, bytes);
    MOZ_ASSERT(src <= currentStackHeight());
    MOZ_ASSERT(dest <= currentStackHeight());
    shuffleStackResultsTowardFP(src - bytes, dest - bytes, bytes, temp);
  }

  void shuffleStackResultsTowardSP(uint32_t srcHeight, uint32_t destHeight,
                                   uint32_t bytes, Register temp) {
    MOZ_ASSERT(destHeight > srcHeight);
    MOZ_ASSERT(bytes % sizeof(uint32_t) == 0);
    uint32_t destOffset = stackOffset(destHeight);
    uint32_t srcOffset = stackOffset(srcHeight);
    while (bytes >= sizeof(intptr_t)) {
      masm.loadPtr(Address(sp_, srcOffset), temp);
      masm.storePtr(temp, Address(sp_, destOffset));
      destOffset += sizeof(intptr_t);
      srcOffset += sizeof(intptr_t);
      bytes -= sizeof(intptr_t);
    }
    if (bytes) {
      MOZ_ASSERT(bytes == sizeof(uint32_t));
      masm.load32(Address(sp_, srcOffset), temp);
      masm.store32(temp, Address(sp_, destOffset));
    }
  }

  // Copy results from the top of the current stack frame to an area of memory,
  // and pop the stack accordingly.  `dest` is the address of the low byte of
  // that memory.
  void popStackResultsToMemory(Register dest, uint32_t bytes, Register temp) {
    MOZ_ASSERT(bytes <= currentStackHeight());
    MOZ_ASSERT(bytes % sizeof(uint32_t) == 0);
    uint32_t srcOffset = stackOffset(currentStackHeight());
    uint32_t destOffset = 0;
    while (bytes >= sizeof(intptr_t)) {
      masm.loadPtr(Address(sp_, srcOffset), temp);
      masm.storePtr(temp, Address(dest, destOffset));
      destOffset += sizeof(intptr_t);
      srcOffset += sizeof(intptr_t);
      bytes -= sizeof(intptr_t);
    }
    if (bytes) {
      MOZ_ASSERT(bytes == sizeof(uint32_t));
      masm.load32(Address(sp_, srcOffset), temp);
      masm.store32(temp, Address(dest, destOffset));
    }
    popBytes(bytes);
  }

  void storeImmediateToStack(int32_t imm, uint32_t destHeight, Register temp) {
    masm.move32(Imm32(imm), temp);
    masm.store32(temp, Address(sp_, stackOffset(destHeight)));
  }

  void storeImmediateToStack(int64_t imm, uint32_t destHeight, Register temp) {
#ifdef JS_PUNBOX64
    masm.move64(Imm64(imm), Register64(temp));
    masm.store64(Register64(temp), Address(sp_, stackOffset(destHeight)));
#else
    union {
      int64_t i64;
      int32_t i32[2];
    } bits = {.i64 = imm};
    storeImmediateToStack(bits.i32[0], destHeight, temp);
    storeImmediateToStack(bits.i32[1], destHeight - sizeof(int32_t), temp);
#endif
  }
};

void BaseStackFrame::zeroLocals(BaseRegAlloc* ra) {
  MOZ_ASSERT(varLow_ != UINT32_MAX);

  if (varLow_ == varHigh_) {
    return;
  }

  static const uint32_t wordSize = sizeof(void*);

  // The adjustments to 'low' by the size of the item being stored compensates
  // for the fact that locals offsets are the offsets from Frame to the bytes
  // directly "above" the locals in the locals area.  See comment at Local.

  // On 64-bit systems we may have 32-bit alignment for the local area as it
  // may be preceded by parameters and prologue/debug data.

  uint32_t low = varLow_;
  if (low % wordSize) {
    masm.store32(Imm32(0), Address(sp_, localOffset(low + 4)));
    low += 4;
  }
  MOZ_ASSERT(low % wordSize == 0);

  const uint32_t high = AlignBytes(varHigh_, wordSize);

  // An UNROLL_LIMIT of 16 is chosen so that we only need an 8-bit signed
  // immediate to represent the offset in the store instructions in the loop
  // on x64.

  const uint32_t UNROLL_LIMIT = 16;
  const uint32_t initWords = (high - low) / wordSize;
  const uint32_t tailWords = initWords % UNROLL_LIMIT;
  const uint32_t loopHigh = high - (tailWords * wordSize);

  // With only one word to initialize, just store an immediate zero.

  if (initWords == 1) {
    masm.storePtr(ImmWord(0), Address(sp_, localOffset(low + wordSize)));
    return;
  }

  // For other cases, it's best to have a zero in a register.
  //
  // One can do more here with SIMD registers (store 16 bytes at a time) or
  // with instructions like STRD on ARM (store 8 bytes at a time), but that's
  // for another day.

  RegI32 zero = ra->needI32();
  masm.mov(ImmWord(0), zero);

  // For the general case we want to have a loop body of UNROLL_LIMIT stores
  // and then a tail of less than UNROLL_LIMIT stores.  When initWords is less
  // than 2*UNROLL_LIMIT the loop trip count is at most 1 and there is no
  // benefit to having the pointer calculations and the compare-and-branch.
  // So we completely unroll when we have initWords < 2 * UNROLL_LIMIT.  (In
  // this case we'll end up using 32-bit offsets on x64 for up to half of the
  // stores, though.)

  // Fully-unrolled case.

  if (initWords < 2 * UNROLL_LIMIT) {
    for (uint32_t i = low; i < high; i += wordSize) {
      masm.storePtr(zero, Address(sp_, localOffset(i + wordSize)));
    }
    ra->freeI32(zero);
    return;
  }

  // Unrolled loop with a tail. Stores will use negative offsets. That's OK
  // for x86 and ARM, at least.

  // Compute pointer to the highest-addressed slot on the frame.
  RegI32 p = ra->needI32();
  masm.computeEffectiveAddress(Address(sp_, localOffset(low + wordSize)), p);

  // Compute pointer to the lowest-addressed slot on the frame that will be
  // initialized by the loop body.
  RegI32 lim = ra->needI32();
  masm.computeEffectiveAddress(Address(sp_, localOffset(loopHigh + wordSize)),
                               lim);

  // The loop body.  Eventually we'll have p == lim and exit the loop.
  Label again;
  masm.bind(&again);
  for (uint32_t i = 0; i < UNROLL_LIMIT; ++i) {
    masm.storePtr(zero, Address(p, -(wordSize * i)));
  }
  masm.subPtr(Imm32(UNROLL_LIMIT * wordSize), p);
  masm.branchPtr(Assembler::LessThan, lim, p, &again);

  // The tail.
  for (uint32_t i = 0; i < tailWords; ++i) {
    masm.storePtr(zero, Address(p, -(wordSize * i)));
  }

  ra->freeI32(p);
  ra->freeI32(lim);
  ra->freeI32(zero);
}

// Value stack: stack elements

struct Stk {
 private:
  Stk() : kind_(Unknown), i64val_(0) {}

 public:
  enum Kind {
    // The Mem opcodes are all clustered at the beginning to
    // allow for a quick test within sync().
    MemI32,  // 32-bit integer stack value ("offs")
    MemI64,  // 64-bit integer stack value ("offs")
    MemF32,  // 32-bit floating stack value ("offs")
    MemF64,  // 64-bit floating stack value ("offs")
    MemRef,  // reftype (pointer wide) stack value ("offs")

    // The Local opcodes follow the Mem opcodes for a similar
    // quick test within hasLocal().
    LocalI32,  // Local int32 var ("slot")
    LocalI64,  // Local int64 var ("slot")
    LocalF32,  // Local float32 var ("slot")
    LocalF64,  // Local double var ("slot")
    LocalRef,  // Local reftype (pointer wide) var ("slot")

    RegisterI32,  // 32-bit integer register ("i32reg")
    RegisterI64,  // 64-bit integer register ("i64reg")
    RegisterF32,  // 32-bit floating register ("f32reg")
    RegisterF64,  // 64-bit floating register ("f64reg")
    RegisterRef,  // reftype (pointer wide) register ("refReg")

    ConstI32,  // 32-bit integer constant ("i32val")
    ConstI64,  // 64-bit integer constant ("i64val")
    ConstF32,  // 32-bit floating constant ("f32val")
    ConstF64,  // 64-bit floating constant ("f64val")
    ConstRef,  // reftype (pointer wide) constant ("refval")

    Unknown,
  };

  Kind kind_;

  static const Kind MemLast = MemRef;
  static const Kind LocalLast = LocalRef;

  union {
    RegI32 i32reg_;
    RegI64 i64reg_;
    RegPtr refReg_;
    RegF32 f32reg_;
    RegF64 f64reg_;
    int32_t i32val_;
    int64_t i64val_;
    intptr_t refval_;
    float f32val_;
    double f64val_;
    uint32_t slot_;
    uint32_t offs_;
  };

  explicit Stk(RegI32 r) : kind_(RegisterI32), i32reg_(r) {}
  explicit Stk(RegI64 r) : kind_(RegisterI64), i64reg_(r) {}
  explicit Stk(RegPtr r) : kind_(RegisterRef), refReg_(r) {}
  explicit Stk(RegF32 r) : kind_(RegisterF32), f32reg_(r) {}
  explicit Stk(RegF64 r) : kind_(RegisterF64), f64reg_(r) {}
  explicit Stk(int32_t v) : kind_(ConstI32), i32val_(v) {}
  explicit Stk(int64_t v) : kind_(ConstI64), i64val_(v) {}
  explicit Stk(float v) : kind_(ConstF32), f32val_(v) {}
  explicit Stk(double v) : kind_(ConstF64), f64val_(v) {}
  explicit Stk(Kind k, uint32_t v) : kind_(k), slot_(v) {
    MOZ_ASSERT(k > MemLast && k <= LocalLast);
  }
  static Stk StkRef(intptr_t v) {
    Stk s;
    s.kind_ = ConstRef;
    s.refval_ = v;
    return s;
  }
  static Stk StackResult(ValType type, uint32_t offs) {
    Kind k;
    switch (type.kind()) {
      case ValType::I32:
        k = Stk::MemI32;
        break;
      case ValType::I64:
        k = Stk::MemI64;
        break;
      case ValType::F32:
        k = Stk::MemF32;
        break;
      case ValType::F64:
        k = Stk::MemF64;
        break;
      case ValType::Ref:
        k = Stk::MemRef;
        break;
    }
    Stk s;
    s.setOffs(k, offs);
    return s;
  }

  void setOffs(Kind k, uint32_t v) {
    MOZ_ASSERT(k <= MemLast);
    kind_ = k;
    offs_ = v;
  }

  Kind kind() const { return kind_; }
  bool isMem() const { return kind_ <= MemLast; }

  RegI32 i32reg() const {
    MOZ_ASSERT(kind_ == RegisterI32);
    return i32reg_;
  }
  RegI64 i64reg() const {
    MOZ_ASSERT(kind_ == RegisterI64);
    return i64reg_;
  }
  RegPtr refReg() const {
    MOZ_ASSERT(kind_ == RegisterRef);
    return refReg_;
  }
  RegF32 f32reg() const {
    MOZ_ASSERT(kind_ == RegisterF32);
    return f32reg_;
  }
  RegF64 f64reg() const {
    MOZ_ASSERT(kind_ == RegisterF64);
    return f64reg_;
  }

  int32_t i32val() const {
    MOZ_ASSERT(kind_ == ConstI32);
    return i32val_;
  }
  int64_t i64val() const {
    MOZ_ASSERT(kind_ == ConstI64);
    return i64val_;
  }
  intptr_t refval() const {
    MOZ_ASSERT(kind_ == ConstRef);
    return refval_;
  }

  // For these two, use an out-param instead of simply returning, to
  // use the normal stack and not the x87 FP stack (which has effect on
  // NaNs with the signaling bit set).

  void f32val(float* out) const {
    MOZ_ASSERT(kind_ == ConstF32);
    *out = f32val_;
  }
  void f64val(double* out) const {
    MOZ_ASSERT(kind_ == ConstF64);
    *out = f64val_;
  }

  uint32_t slot() const {
    MOZ_ASSERT(kind_ > MemLast && kind_ <= LocalLast);
    return slot_;
  }
  uint32_t offs() const {
    MOZ_ASSERT(isMem());
    return offs_;
  }
};

typedef Vector<Stk, 0, SystemAllocPolicy> StkVector;

// MachineStackTracker, used for stack-slot pointerness tracking.

class MachineStackTracker {
  // Simulates the machine's stack, with one bool per word.  Index zero in
  // this vector corresponds to the highest address in the machine stack.  The
  // last entry corresponds to what SP currently points at.  This all assumes
  // a grow-down stack.
  //
  // numPtrs_ contains the number of "true" values in vec_, and is therefore
  // redundant.  But it serves as a constant-time way to detect the common
  // case where vec_ holds no "true" values.
  size_t numPtrs_;
  Vector<bool, 64, SystemAllocPolicy> vec_;

 public:
  MachineStackTracker() : numPtrs_(0) {}

  ~MachineStackTracker() {
#ifdef DEBUG
    size_t n = 0;
    for (bool b : vec_) {
      n += (b ? 1 : 0);
    }
    MOZ_ASSERT(n == numPtrs_);
#endif
  }

  // Clone this MachineStackTracker, writing the result at |dst|.
  MOZ_MUST_USE bool cloneTo(MachineStackTracker* dst) {
    MOZ_ASSERT(dst->vec_.empty());
    if (!dst->vec_.appendAll(vec_)) {
      return false;
    }
    dst->numPtrs_ = numPtrs_;
    return true;
  }

  // Notionally push |n| non-pointers on the stack.
  MOZ_MUST_USE bool pushNonGCPointers(size_t n) {
    return vec_.appendN(false, n);
  }

  // Mark the stack slot |offsetFromSP| up from the bottom as holding a
  // pointer.
  void setGCPointer(size_t offsetFromSP) {
    // offsetFromSP == 0 denotes the most recently pushed item, == 1 the
    // second most recently pushed item, etc.
    MOZ_ASSERT(offsetFromSP < vec_.length());

    size_t offsetFromTop = vec_.length() - 1 - offsetFromSP;
    numPtrs_ = numPtrs_ + 1 - (vec_[offsetFromTop] ? 1 : 0);
    vec_[offsetFromTop] = true;
  }

  // Query the pointerness of the slot |offsetFromSP| up from the bottom.
  bool isGCPointer(size_t offsetFromSP) {
    MOZ_ASSERT(offsetFromSP < vec_.length());

    size_t offsetFromTop = vec_.length() - 1 - offsetFromSP;
    return vec_[offsetFromTop];
  }

  // Return the number of words tracked by this MachineStackTracker.
  size_t length() { return vec_.length(); }

  // Return the number of pointer-typed words tracked by this
  // MachineStackTracker.
  size_t numPtrs() {
    MOZ_ASSERT(numPtrs_ <= length());
    return numPtrs_;
  }

  // Discard all contents, but (per mozilla::Vector::clear semantics) don't
  // free or reallocate any dynamic storage associated with |vec_|.
  void clear() {
    vec_.clear();
    numPtrs_ = 0;
  }
};

// StackMapGenerator, which carries all state needed to create stack maps.

enum class HasRefTypedDebugFrame { No, Yes };

struct StackMapGenerator {
 private:
  // --- These are constant for the life of the function's compilation ---

  // For generating stack maps, we'll need to know the offsets of registers
  // as saved by the trap exit stub.
  const MachineState& trapExitLayout_;
  const size_t trapExitLayoutNumWords_;

  // Completed stackmaps are added here
  StackMaps* stackMaps_;

  // So as to be able to get current offset when creating stack maps
  const MacroAssembler& masm_;

 public:
  // --- These are constant once we've completed beginFunction() ---

  // The number of words of arguments passed to this function in memory.
  size_t numStackArgWords;

  MachineStackTracker machineStackTracker;  // tracks machine stack pointerness

  // This holds masm.framePushed at entry to the function's body.  It is a
  // Maybe because createStackMap needs to know whether or not we're still
  // in the prologue.  It makes a Nothing-to-Some transition just once per
  // function.
  Maybe<uint32_t> framePushedAtEntryToBody;

  // --- These can change at any point ---

  // This holds masm.framePushed at it would be be for a function call
  // instruction, but excluding the stack area used to pass arguments in
  // memory.  That is, for an upcoming function call, this will hold
  //
  //   masm.framePushed() at the call instruction -
  //      StackArgAreaSizeUnaligned(argumentTypes)
  //
  // This value denotes the lowest-addressed stack word covered by the current
  // function's stackmap.  Words below this point form the highest-addressed
  // area of the callee's stackmap.  Note that all alignment padding above the
  // arguments-in-memory themselves belongs to the caller's stack map, which
  // is why this is defined in terms of StackArgAreaSizeUnaligned() rather than
  // StackArgAreaSizeAligned().
  //
  // When not inside a function call setup/teardown sequence, it is Nothing.
  // It can make Nothing-to/from-Some transitions arbitrarily as we progress
  // through the function body.
  Maybe<uint32_t> framePushedExcludingOutboundCallArgs;

  // The number of memory-resident, ref-typed entries on the containing
  // BaseCompiler::stk_.
  size_t memRefsOnStk;

  // This is a copy of machineStackTracker that is used only within individual
  // calls to createStackMap. It is here only to avoid possible heap allocation
  // costs resulting from making it local to createStackMap().
  MachineStackTracker augmentedMst;

  StackMapGenerator(StackMaps* stackMaps, const MachineState& trapExitLayout,
                    const size_t trapExitLayoutNumWords,
                    const MacroAssembler& masm)
      : trapExitLayout_(trapExitLayout),
        trapExitLayoutNumWords_(trapExitLayoutNumWords),
        stackMaps_(stackMaps),
        masm_(masm),
        numStackArgWords(0),
        memRefsOnStk(0) {}

  // At the beginning of a function, we may have live roots in registers (as
  // arguments) at the point where we perform a stack overflow check.  This
  // method generates the "extra" stackmap entries to describe that, in the
  // case that the check fails and we wind up calling into the wasm exit
  // stub, as generated by GenerateTrapExit().
  //
  // The resulting map must correspond precisely with the stack layout
  // created for the integer registers as saved by (code generated by)
  // GenerateTrapExit().  To do that we use trapExitLayout_ and
  // trapExitLayoutNumWords_, which together comprise a description of the
  // layout and are created by GenerateTrapExitMachineState().
  MOZ_MUST_USE bool generateStackmapEntriesForTrapExit(
      const ArgTypeVector& args, ExitStubMapVector* extras) {
    return GenerateStackmapEntriesForTrapExit(args, trapExitLayout_,
                                              trapExitLayoutNumWords_, extras);
  }

  // Creates a stackmap associated with the instruction denoted by
  // |assemblerOffset|, incorporating pointers from the current operand
  // stack |stk|, incorporating possible extra pointers in |extra| at the
  // lower addressed end, and possibly with the associated frame having a
  // ref-typed DebugFrame as indicated by |refDebugFrame|.
  MOZ_MUST_USE bool createStackMap(const char* who,
                                   const ExitStubMapVector& extras,
                                   uint32_t assemblerOffset,
                                   HasRefTypedDebugFrame refDebugFrame,
                                   const StkVector& stk) {
    size_t countedPointers = machineStackTracker.numPtrs() + memRefsOnStk;
#ifndef DEBUG
    // An important optimization.  If there are obviously no pointers, as
    // we expect in the majority of cases, exit quickly.
    if (countedPointers == 0 && refDebugFrame == HasRefTypedDebugFrame::No) {
      // We can skip creating the map if there are no |true| elements in
      // |extras|.
      bool extrasHasRef = false;
      for (bool b : extras) {
        if (b) {
          extrasHasRef = true;
          break;
        }
      }
      if (!extrasHasRef) {
        return true;
      }
    }
#else
    // In the debug case, create the stack map regardless, and cross-check
    // the pointer-counting below.  We expect the final map to have
    // |countedPointers| in total.  This doesn't include those in the
    // DebugFrame, but they do not appear in the map's bitmap.  Note that
    // |countedPointers| is debug-only from this point onwards.
    for (bool b : extras) {
      countedPointers += (b ? 1 : 0);
    }
#endif

    // Start with the frame-setup map, and add operand-stack information to
    // that.  augmentedMst holds live data only within individual calls to
    // createStackMap.
    augmentedMst.clear();
    if (!machineStackTracker.cloneTo(&augmentedMst)) {
      return false;
    }

    // At this point, augmentedMst only contains entries covering the
    // incoming argument area (if any) and for the area allocated by this
    // function's prologue.  We now need to calculate how far the machine's
    // stack pointer is below where it was at the start of the body.  But we
    // must take care not to include any words pushed as arguments to an
    // upcoming function call, since those words "belong" to the stackmap of
    // the callee, not to the stackmap of this function.  Note however that
    // any alignment padding pushed prior to pushing the args *does* belong to
    // this function.
    //
    // That padding is taken into account at the point where
    // framePushedExcludingOutboundCallArgs is set, viz, in startCallArgs(),
    // and comprises two components:
    //
    // * call->frameAlignAdjustment
    // * the padding applied to the stack arg area itself.  That is:
    //   StackArgAreaSize(argTys) - StackArgAreaSizeUnpadded(argTys)
    Maybe<uint32_t> framePushedExcludingArgs;
    if (framePushedAtEntryToBody.isNothing()) {
      // Still in the prologue.  framePushedExcludingArgs remains Nothing.
      MOZ_ASSERT(framePushedExcludingOutboundCallArgs.isNothing());
    } else {
      // In the body.
      MOZ_ASSERT(masm_.framePushed() >= framePushedAtEntryToBody.value());
      if (framePushedExcludingOutboundCallArgs.isSome()) {
        // In the body, and we've potentially pushed some args onto the stack.
        // We must ignore them when sizing the stackmap.
        MOZ_ASSERT(masm_.framePushed() >=
                   framePushedExcludingOutboundCallArgs.value());
        MOZ_ASSERT(framePushedExcludingOutboundCallArgs.value() >=
                   framePushedAtEntryToBody.value());
        framePushedExcludingArgs =
            Some(framePushedExcludingOutboundCallArgs.value());
      } else {
        // In the body, but not with call args on the stack.  The stackmap
        // must be sized so as to extend all the way "down" to
        // masm_.framePushed().
        framePushedExcludingArgs = Some(masm_.framePushed());
      }
    }

    if (framePushedExcludingArgs.isSome()) {
      uint32_t bodyPushedBytes =
          framePushedExcludingArgs.value() - framePushedAtEntryToBody.value();
      MOZ_ASSERT(0 == bodyPushedBytes % sizeof(void*));
      if (!augmentedMst.pushNonGCPointers(bodyPushedBytes / sizeof(void*))) {
        return false;
      }
    }

    // Scan the operand stack, marking pointers in the just-added new
    // section.
    MOZ_ASSERT_IF(framePushedAtEntryToBody.isNothing(), stk.empty());
    MOZ_ASSERT_IF(framePushedExcludingArgs.isNothing(), stk.empty());

    for (const Stk& v : stk) {
#ifndef DEBUG
      // We don't track roots in registers, per rationale below, so if this
      // doesn't hold, something is seriously wrong, and we're likely to get a
      // GC-related crash.
      MOZ_RELEASE_ASSERT(v.kind() != Stk::RegisterRef);
      if (v.kind() != Stk::MemRef) {
        continue;
      }
#else
      // Take the opportunity to check everything we reasonably can about
      // operand stack elements.
      switch (v.kind()) {
        case Stk::MemI32:
        case Stk::MemI64:
        case Stk::MemF32:
        case Stk::MemF64:
        case Stk::ConstI32:
        case Stk::ConstI64:
        case Stk::ConstF32:
        case Stk::ConstF64:
          // All of these have uninteresting type.
          continue;
        case Stk::LocalI32:
        case Stk::LocalI64:
        case Stk::LocalF32:
        case Stk::LocalF64:
          // These also have uninteresting type.  Check that they live in the
          // section of stack set up by beginFunction().  The unguarded use of
          // |value()| here is safe due to the assertion above this loop.
          MOZ_ASSERT(v.offs() <= framePushedAtEntryToBody.value());
          continue;
        case Stk::RegisterI32:
        case Stk::RegisterI64:
        case Stk::RegisterF32:
        case Stk::RegisterF64:
          // These also have uninteresting type, but more to the point: all
          // registers holding live values should have been flushed to the
          // machine stack immediately prior to the instruction to which this
          // stackmap pertains.  So these can't happen.
          MOZ_CRASH("createStackMap: operand stack has Register-non-Ref");
        case Stk::MemRef:
          // This is the only case we care about.  We'll handle it after the
          // switch.
          break;
        case Stk::LocalRef:
          // We need the stackmap to mention this pointer, but it should
          // already be in the machineStackTracker section created by
          // beginFunction().
          MOZ_ASSERT(v.offs() <= framePushedAtEntryToBody.value());
          continue;
        case Stk::ConstRef:
          // This can currently only be a null pointer.
          MOZ_ASSERT(v.refval() == 0);
          continue;
        case Stk::RegisterRef:
          // This can't happen, per rationale above.
          MOZ_CRASH("createStackMap: operand stack contains RegisterRef");
        default:
          MOZ_CRASH("createStackMap: unknown operand stack element");
      }
#endif
      // v.offs() holds masm.framePushed() at the point immediately after it
      // was pushed on the stack.  Since it's still on the stack,
      // masm.framePushed() can't be less.
      MOZ_ASSERT(v.offs() <= framePushedExcludingArgs.value());
      uint32_t offsFromMapLowest = framePushedExcludingArgs.value() - v.offs();
      MOZ_ASSERT(0 == offsFromMapLowest % sizeof(void*));
      augmentedMst.setGCPointer(offsFromMapLowest / sizeof(void*));
    }

    // Create the final StackMap.  The initial map is zeroed out, so there's
    // no need to write zero bits in it.
    const uint32_t extraWords = extras.length();
    const uint32_t augmentedMstWords = augmentedMst.length();
    const uint32_t numMappedWords = extraWords + augmentedMstWords;
    StackMap* stackMap = StackMap::create(numMappedWords);
    if (!stackMap) {
      return false;
    }

    {
      // First the exit stub extra words, if any.
      uint32_t i = 0;
      for (bool b : extras) {
        if (b) {
          stackMap->setBit(i);
        }
        i++;
      }
    }
    // Followed by the "main" part of the map.
    for (uint32_t i = 0; i < augmentedMstWords; i++) {
      if (augmentedMst.isGCPointer(i)) {
        stackMap->setBit(extraWords + i);
      }
    }

    stackMap->setExitStubWords(extraWords);

    // Record in the map, how far down from the highest address the Frame* is.
    // Take the opportunity to check that we haven't marked any part of the
    // Frame itself as a pointer.
    stackMap->setFrameOffsetFromTop(numStackArgWords +
                                    sizeof(Frame) / sizeof(void*));
#ifdef DEBUG
    for (uint32_t i = 0; i < sizeof(Frame) / sizeof(void*); i++) {
      MOZ_ASSERT(stackMap->getBit(stackMap->numMappedWords -
                                  stackMap->frameOffsetFromTop + i) == 0);
    }
#endif

    // Note the presence of a ref-typed DebugFrame, if any.
    if (refDebugFrame == HasRefTypedDebugFrame::Yes) {
      stackMap->setHasRefTypedDebugFrame();
    }

    // Add the completed map to the running collection thereof.
    if (!stackMaps_->add((uint8_t*)(uintptr_t)assemblerOffset, stackMap)) {
      stackMap->destroy();
      return false;
    }

#ifdef DEBUG
    {
      // Crosscheck the map pointer counting.
      uint32_t nw = stackMap->numMappedWords;
      uint32_t np = 0;
      for (uint32_t i = 0; i < nw; i++) {
        np += stackMap->getBit(i);
      }
      MOZ_ASSERT(size_t(np) == countedPointers);
    }
#endif

    return true;
  }
};

// The baseline compiler proper.

class BaseCompiler final : public BaseCompilerInterface {
  using Local = BaseStackFrame::Local;
  using LabelVector = Vector<NonAssertingLabel, 8, SystemAllocPolicy>;

  // Bit set used for simple bounds check elimination.  Capping this at 64
  // locals makes sense; even 32 locals would probably be OK in practice.
  //
  // For more information about BCE, see the block comment above
  // popMemoryAccess(), below.

  using BCESet = uint64_t;

  // Control node, representing labels and stack heights at join points.

  struct Control {
    NonAssertingLabel label;       // The "exit" label
    NonAssertingLabel otherLabel;  // Used for the "else" branch of if-then-else
    StackHeight stackHeight;       // From BaseStackFrame
    uint32_t stackSize;            // Value stack height
    BCESet bceSafeOnEntry;         // Bounds check info flowing into the item
    BCESet bceSafeOnExit;          // Bounds check info flowing out of the item
    bool deadOnArrival;            // deadCode_ was set on entry to the region
    bool deadThenBranch;           // deadCode_ was set on exit from "then"

    Control()
        : stackHeight(StackHeight::Invalid()),
          stackSize(UINT32_MAX),
          bceSafeOnEntry(0),
          bceSafeOnExit(~BCESet(0)),
          deadOnArrival(false),
          deadThenBranch(false) {}
  };

  class NothingVector {
    Nothing unused_;

   public:
    bool resize(size_t length) { return true; }
    Nothing& operator[](size_t) { return unused_; }
    Nothing& back() { return unused_; }
  };

  struct BaseCompilePolicy {
    // The baseline compiler tracks values on a stack of its own -- it
    // needs to scan that stack for spilling -- and thus has no need
    // for the values maintained by the iterator.
    using Value = Nothing;
    using ValueVector = NothingVector;

    // The baseline compiler uses the iterator's control stack, attaching
    // its own control information.
    using ControlItem = Control;
  };

  using BaseOpIter = OpIter<BaseCompilePolicy>;

  // The baseline compiler will use OOL code more sparingly than
  // Baldr since our code is not high performance and frills like
  // code density and branch prediction friendliness will be less
  // important.

  class OutOfLineCode : public TempObject {
   private:
    NonAssertingLabel entry_;
    NonAssertingLabel rejoin_;
    StackHeight stackHeight_;

   public:
    OutOfLineCode() : stackHeight_(StackHeight::Invalid()) {}

    Label* entry() { return &entry_; }
    Label* rejoin() { return &rejoin_; }

    void setStackHeight(StackHeight stackHeight) {
      MOZ_ASSERT(!stackHeight_.isValid());
      stackHeight_ = stackHeight;
    }

    void bind(BaseStackFrame* fr, MacroAssembler* masm) {
      MOZ_ASSERT(stackHeight_.isValid());
      masm->bind(&entry_);
      fr->setStackHeight(stackHeight_);
    }

    // The generate() method must be careful about register use
    // because it will be invoked when there is a register
    // assignment in the BaseCompiler that does not correspond
    // to the available registers when the generated OOL code is
    // executed.  The register allocator *must not* be called.
    //
    // The best strategy is for the creator of the OOL object to
    // allocate all temps that the OOL code will need.
    //
    // Input, output, and temp registers are embedded in the OOL
    // object and are known to the code generator.
    //
    // Scratch registers are available to use in OOL code.
    //
    // All other registers must be explicitly saved and restored
    // by the OOL code before being used.

    virtual void generate(MacroAssembler* masm) = 0;
  };

  enum class LatentOp { None, Compare, Eqz };

  struct AccessCheck {
    AccessCheck()
        : omitBoundsCheck(false),
          omitAlignmentCheck(false),
          onlyPointerAlignment(false) {}

    // If `omitAlignmentCheck` is true then we need check neither the
    // pointer nor the offset.  Otherwise, if `onlyPointerAlignment` is true
    // then we need check only the pointer.  Otherwise, check the sum of
    // pointer and offset.

    bool omitBoundsCheck;
    bool omitAlignmentCheck;
    bool onlyPointerAlignment;
  };

  const ModuleEnvironment& env_;
  BaseOpIter iter_;
  const FuncCompileInput& func_;
  size_t lastReadCallSite_;
  TempAllocator& alloc_;
  const ValTypeVector& locals_;  // Types of parameters and locals
  bool deadCode_;  // Flag indicating we should decode & discard the opcode
  BCESet
      bceSafe_;  // Locals that have been bounds checked and not updated since
  ValTypeVector SigD_;
  ValTypeVector SigF_;
  NonAssertingLabel returnLabel_;

  LatentOp latentOp_;   // Latent operation for branch (seen next)
  ValType latentType_;  // Operand type, if latentOp_ is true
  Assembler::Condition
      latentIntCmp_;  // Comparison operator, if latentOp_ == Compare, int types
  Assembler::DoubleCondition
      latentDoubleCmp_;  // Comparison operator, if latentOp_ == Compare, float
                         // types

  FuncOffsets offsets_;
  MacroAssembler& masm;  // No '_' suffix - too tedious...
  BaseRegAlloc ra;       // Ditto
  BaseStackFrame fr;

  StackMapGenerator stackMapGenerator_;

  BaseStackFrame::LocalVector localInfo_;
  Vector<OutOfLineCode*, 8, SystemAllocPolicy> outOfLine_;

  // On specific platforms we sometimes need to use specific registers.

  SpecificRegs specific_;

  // There are more members scattered throughout.

 public:
  BaseCompiler(const ModuleEnvironment& env, const FuncCompileInput& input,
               const ValTypeVector& locals, const MachineState& trapExitLayout,
               size_t trapExitLayoutNumWords, Decoder& decoder,
               StkVector& stkSource, TempAllocator* alloc, MacroAssembler* masm,
               StackMaps* stackMaps);
  ~BaseCompiler();

  MOZ_MUST_USE bool init();

  FuncOffsets finish();

  MOZ_MUST_USE bool emitFunction();
  void emitInitStackLocals();

  const FuncTypeWithId& funcType() const {
    return *env_.funcTypes[func_.index];
  }

  // Used by some of the ScratchRegister implementations.
  operator MacroAssembler&() const { return masm; }
  operator BaseRegAlloc&() { return ra; }

  bool usesSharedMemory() const { return env_.usesSharedMemory(); }

 private:
  ////////////////////////////////////////////////////////////
  //
  // Out of line code management.

  MOZ_MUST_USE OutOfLineCode* addOutOfLineCode(OutOfLineCode* ool) {
    if (!ool || !outOfLine_.append(ool)) {
      return nullptr;
    }
    ool->setStackHeight(fr.stackHeight());
    return ool;
  }

  MOZ_MUST_USE bool generateOutOfLineCode() {
    for (uint32_t i = 0; i < outOfLine_.length(); i++) {
      OutOfLineCode* ool = outOfLine_[i];
      ool->bind(&fr, &masm);
      ool->generate(&masm);
    }

    return !masm.oom();
  }

  // Utility.

  const Local& localFromSlot(uint32_t slot, MIRType type) {
    MOZ_ASSERT(localInfo_[slot].type == type);
    return localInfo_[slot];
  }

  ////////////////////////////////////////////////////////////
  //
  // High-level register management.

  bool isAvailableI32(RegI32 r) { return ra.isAvailableI32(r); }
  bool isAvailableI64(RegI64 r) { return ra.isAvailableI64(r); }
  bool isAvailableRef(RegPtr r) { return ra.isAvailablePtr(r); }
  bool isAvailableF32(RegF32 r) { return ra.isAvailableF32(r); }
  bool isAvailableF64(RegF64 r) { return ra.isAvailableF64(r); }

  MOZ_MUST_USE RegI32 needI32() { return ra.needI32(); }
  MOZ_MUST_USE RegI64 needI64() { return ra.needI64(); }
  MOZ_MUST_USE RegPtr needRef() { return ra.needPtr(); }
  MOZ_MUST_USE RegF32 needF32() { return ra.needF32(); }
  MOZ_MUST_USE RegF64 needF64() { return ra.needF64(); }

  void needI32(RegI32 specific) { ra.needI32(specific); }
  void needI64(RegI64 specific) { ra.needI64(specific); }
  void needRef(RegPtr specific) { ra.needPtr(specific); }
  void needF32(RegF32 specific) { ra.needF32(specific); }
  void needF64(RegF64 specific) { ra.needF64(specific); }

#if defined(JS_CODEGEN_ARM)
  MOZ_MUST_USE RegI64 needI64Pair() { return ra.needI64Pair(); }
#endif

  void freeI32(RegI32 r) { ra.freeI32(r); }
  void freeI64(RegI64 r) { ra.freeI64(r); }
  void freeRef(RegPtr r) { ra.freePtr(r); }
  void freeF32(RegF32 r) { ra.freeF32(r); }
  void freeF64(RegF64 r) { ra.freeF64(r); }

  void freeI64Except(RegI64 r, RegI32 except) {
#ifdef JS_PUNBOX64
    MOZ_ASSERT(r.reg == except);
#else
    MOZ_ASSERT(r.high == except || r.low == except);
    freeI64(r);
    needI32(except);
#endif
  }

  void maybeFreeI32(RegI32 r) {
    if (r.isValid()) {
      freeI32(r);
    }
  }

  void maybeFreeI64(RegI64 r) {
    if (r.isValid()) {
      freeI64(r);
    }
  }

  void maybeFreeF64(RegF64 r) {
    if (r.isValid()) {
      freeF64(r);
    }
  }

  void needI32NoSync(RegI32 r) {
    MOZ_ASSERT(isAvailableI32(r));
    needI32(r);
  }

  // TODO / OPTIMIZE: need2xI32() can be optimized along with needI32()
  // to avoid sync(). (Bug 1316802)

  void need2xI32(RegI32 r0, RegI32 r1) {
    needI32(r0);
    needI32(r1);
  }

  void need2xI64(RegI64 r0, RegI64 r1) {
    needI64(r0);
    needI64(r1);
  }

  RegI32 fromI64(RegI64 r) { return RegI32(lowPart(r)); }

#ifdef JS_PUNBOX64
  RegI64 fromI32(RegI32 r) { return RegI64(Register64(r)); }
#endif

  RegI64 widenI32(RegI32 r) {
    MOZ_ASSERT(!isAvailableI32(r));
#ifdef JS_PUNBOX64
    return fromI32(r);
#else
    RegI32 high = needI32();
    return RegI64(Register64(high, r));
#endif
  }

  RegI32 narrowI64(RegI64 r) {
#ifdef JS_PUNBOX64
    return RegI32(r.reg);
#else
    freeI32(RegI32(r.high));
    return RegI32(r.low);
#endif
  }

  RegI32 narrowPtr(RegPtr r) { return RegI32(r); }

  RegI32 lowPart(RegI64 r) {
#ifdef JS_PUNBOX64
    return RegI32(r.reg);
#else
    return RegI32(r.low);
#endif
  }

  RegI32 maybeHighPart(RegI64 r) {
#ifdef JS_PUNBOX64
    return RegI32::Invalid();
#else
    return RegI32(r.high);
#endif
  }

  void maybeClearHighPart(RegI64 r) {
#if !defined(JS_PUNBOX64)
    moveImm32(0, RegI32(r.high));
#endif
  }

  void moveI32(RegI32 src, RegI32 dest) {
    if (src != dest) {
      masm.move32(src, dest);
    }
  }

  void moveI64(RegI64 src, RegI64 dest) {
    if (src != dest) {
      masm.move64(src, dest);
    }
  }

  void moveRef(RegPtr src, RegPtr dest) {
    if (src != dest) {
      masm.movePtr(src, dest);
    }
  }

  void moveF64(RegF64 src, RegF64 dest) {
    if (src != dest) {
      masm.moveDouble(src, dest);
    }
  }

  void moveF32(RegF32 src, RegF32 dest) {
    if (src != dest) {
      masm.moveFloat32(src, dest);
    }
  }

  ////////////////////////////////////////////////////////////////////////////
  //
  // Block parameters and results.
  //
  // Blocks may have multiple parameters and multiple results.  Blocks can also
  // be the target of branches: the entry for loops, and the exit for
  // non-loops.
  //
  // Passing multiple values to a non-branch target (i.e., the entry of a
  // "block") falls out naturally: any items on the value stack can flow
  // directly from one block to another.
  //
  // However, for branch targets, we need to allocate well-known locations for
  // the branch values.  The approach taken in the baseline compiler is to
  // allocate registers to the top N values (currently N=1), and then stack
  // locations for the rest.
  //

  enum class RegKind { All, OnlyGPRs };

  inline void needResultRegisters(ResultType type, RegKind which) {
    if (type.empty()) {
      return;
    }

    for (ABIResultIter iter(type); !iter.done(); iter.next()) {
      ABIResult result = iter.cur();
      // Register results are visited first; when we see a stack result we're
      // done.
      if (!result.inRegister()) {
        return;
      }
      switch (result.type().kind()) {
        case ValType::I32:
          needI32(RegI32(result.gpr()));
          break;
        case ValType::I64:
          needI64(RegI64(result.gpr64()));
          break;
        case ValType::F32:
          if (which == RegKind::All) {
            needF32(RegF32(result.fpr()));
          }
          break;
        case ValType::F64:
          if (which == RegKind::All) {
            needF64(RegF64(result.fpr()));
          }
          break;
        case ValType::Ref:
          needRef(RegPtr(result.gpr()));
          break;
      }
    }
  }

  inline void freeResultRegisters(ResultType type, RegKind which) {
    if (type.empty()) {
      return;
    }

    for (ABIResultIter iter(type); !iter.done(); iter.next()) {
      ABIResult result = iter.cur();
      // Register results are visited first; when we see a stack result we're
      // done.
      if (!result.inRegister()) {
        return;
      }
      switch (result.type().kind()) {
        case ValType::I32:
          freeI32(RegI32(result.gpr()));
          break;
        case ValType::I64:
          freeI64(RegI64(result.gpr64()));
          break;
        case ValType::F32:
          if (which == RegKind::All) {
            freeF32(RegF32(result.fpr()));
          }
          break;
        case ValType::F64:
          if (which == RegKind::All) {
            freeF64(RegF64(result.fpr()));
          }
          break;
        case ValType::Ref:
          freeRef(RegPtr(result.gpr()));
          break;
      }
    }
  }

  void needIntegerResultRegisters(ResultType type) {
    needResultRegisters(type, RegKind::OnlyGPRs);
  }
  void freeIntegerResultRegisters(ResultType type) {
    freeResultRegisters(type, RegKind::OnlyGPRs);
  }

  void needResultRegisters(ResultType type) {
    needResultRegisters(type, RegKind::All);
  }
  void freeResultRegisters(ResultType type) {
    freeResultRegisters(type, RegKind::All);
  }

  void assertResultRegistersAvailable(ResultType type) {
#ifdef DEBUG
    for (ABIResultIter iter(type); !iter.done(); iter.next()) {
      ABIResult result = iter.cur();
      if (!result.inRegister()) {
        return;
      }
      switch (result.type().kind()) {
        case ValType::I32:
          MOZ_ASSERT(isAvailableI32(RegI32(result.gpr())));
          break;
        case ValType::I64:
          MOZ_ASSERT(isAvailableI64(RegI64(result.gpr64())));
          break;
        case ValType::F32:
          MOZ_ASSERT(isAvailableF32(RegF32(result.fpr())));
          break;
        case ValType::F64:
          MOZ_ASSERT(isAvailableF64(RegF64(result.fpr())));
          break;
        case ValType::Ref:
          MOZ_ASSERT(isAvailableRef(RegPtr(result.gpr())));
          break;
      }
    }
#endif
  }

  void captureResultRegisters(ResultType type) {
    assertResultRegistersAvailable(type);
    needResultRegisters(type);
  }

  ////////////////////////////////////////////////////////////
  //
  // Value stack and spilling.
  //
  // The value stack facilitates some on-the-fly register allocation
  // and immediate-constant use.  It tracks constants, latent
  // references to locals, register contents, and values on the CPU
  // stack.
  //
  // The stack can be flushed to memory using sync().  This is handy
  // to avoid problems with control flow and messy register usage
  // patterns.

  // This is the value stack actually used during compilation.  It is a
  // StkVector rather than a StkVector& since constantly dereferencing a
  // StkVector& adds about 0.5% or more to the compiler's dynamic instruction
  // count.
  StkVector stk_;

  // BaselineCompileFunctions() "lends" us the StkVector to use in this
  // BaseCompiler object, and that is installed in |stk_| in our constructor.
  // This is so as to avoid having to malloc/free the vector's contents at
  // each creation/destruction of a BaseCompiler object.  It does however mean
  // that we need to hold on to a reference to BaselineCompileFunctions()'s
  // vector, so we can swap (give) its contents back when this BaseCompiler
  // object is destroyed.  This significantly reduces the heap turnover of the
  // baseline compiler.  See bug 1532592.
  StkVector& stkSource_;

#ifdef DEBUG
  size_t countMemRefsOnStk() {
    size_t nRefs = 0;
    for (Stk& v : stk_) {
      if (v.kind() == Stk::MemRef) {
        nRefs++;
      }
    }
    return nRefs;
  }
#endif

  template <typename T>
  void push(T item) {
    // None of the single-arg Stk constructors create a Stk::MemRef, so
    // there's no need to increment stackMapGenerator_.memRefsOnStk here.
    stk_.infallibleEmplaceBack(Stk(item));
  }

  void pushConstRef(intptr_t v) { stk_.infallibleEmplaceBack(Stk::StkRef(v)); }

  void loadConstI32(const Stk& src, RegI32 dest) {
    moveImm32(src.i32val(), dest);
  }

  void loadMemI32(const Stk& src, RegI32 dest) {
    fr.loadStackI32(src.offs(), dest);
  }

  void loadLocalI32(const Stk& src, RegI32 dest) {
    fr.loadLocalI32(localFromSlot(src.slot(), MIRType::Int32), dest);
  }

  void loadRegisterI32(const Stk& src, RegI32 dest) {
    moveI32(src.i32reg(), dest);
  }

  void loadConstI64(const Stk& src, RegI64 dest) {
    moveImm64(src.i64val(), dest);
  }

  void loadMemI64(const Stk& src, RegI64 dest) {
    fr.loadStackI64(src.offs(), dest);
  }

  void loadLocalI64(const Stk& src, RegI64 dest) {
    fr.loadLocalI64(localFromSlot(src.slot(), MIRType::Int64), dest);
  }

  void loadRegisterI64(const Stk& src, RegI64 dest) {
    moveI64(src.i64reg(), dest);
  }

  void loadConstRef(const Stk& src, RegPtr dest) {
    moveImmRef(src.refval(), dest);
  }

  void loadMemRef(const Stk& src, RegPtr dest) {
    fr.loadStackPtr(src.offs(), dest);
  }

  void loadLocalRef(const Stk& src, RegPtr dest) {
    fr.loadLocalPtr(localFromSlot(src.slot(), MIRType::RefOrNull), dest);
  }

  void loadRegisterRef(const Stk& src, RegPtr dest) {
    moveRef(src.refReg(), dest);
  }

  void loadConstF64(const Stk& src, RegF64 dest) {
    double d;
    src.f64val(&d);
    masm.loadConstantDouble(d, dest);
  }

  void loadMemF64(const Stk& src, RegF64 dest) {
    fr.loadStackF64(src.offs(), dest);
  }

  void loadLocalF64(const Stk& src, RegF64 dest) {
    fr.loadLocalF64(localFromSlot(src.slot(), MIRType::Double), dest);
  }

  void loadRegisterF64(const Stk& src, RegF64 dest) {
    moveF64(src.f64reg(), dest);
  }

  void loadConstF32(const Stk& src, RegF32 dest) {
    float f;
    src.f32val(&f);
    masm.loadConstantFloat32(f, dest);
  }

  void loadMemF32(const Stk& src, RegF32 dest) {
    fr.loadStackF32(src.offs(), dest);
  }

  void loadLocalF32(const Stk& src, RegF32 dest) {
    fr.loadLocalF32(localFromSlot(src.slot(), MIRType::Float32), dest);
  }

  void loadRegisterF32(const Stk& src, RegF32 dest) {
    moveF32(src.f32reg(), dest);
  }

  void loadI32(const Stk& src, RegI32 dest) {
    switch (src.kind()) {
      case Stk::ConstI32:
        loadConstI32(src, dest);
        break;
      case Stk::MemI32:
        loadMemI32(src, dest);
        break;
      case Stk::LocalI32:
        loadLocalI32(src, dest);
        break;
      case Stk::RegisterI32:
        loadRegisterI32(src, dest);
        break;
      default:
        MOZ_CRASH("Compiler bug: Expected I32 on stack");
    }
  }

  void loadI64(const Stk& src, RegI64 dest) {
    switch (src.kind()) {
      case Stk::ConstI64:
        loadConstI64(src, dest);
        break;
      case Stk::MemI64:
        loadMemI64(src, dest);
        break;
      case Stk::LocalI64:
        loadLocalI64(src, dest);
        break;
      case Stk::RegisterI64:
        loadRegisterI64(src, dest);
        break;
      default:
        MOZ_CRASH("Compiler bug: Expected I64 on stack");
    }
  }

#if !defined(JS_PUNBOX64)
  void loadI64Low(const Stk& src, RegI32 dest) {
    switch (src.kind()) {
      case Stk::ConstI64:
        moveImm32(int32_t(src.i64val()), dest);
        break;
      case Stk::MemI64:
        fr.loadStackI64Low(src.offs(), dest);
        break;
      case Stk::LocalI64:
        fr.loadLocalI64Low(localFromSlot(src.slot(), MIRType::Int64), dest);
        break;
      case Stk::RegisterI64:
        moveI32(RegI32(src.i64reg().low), dest);
        break;
      default:
        MOZ_CRASH("Compiler bug: Expected I64 on stack");
    }
  }

  void loadI64High(const Stk& src, RegI32 dest) {
    switch (src.kind()) {
      case Stk::ConstI64:
        moveImm32(int32_t(src.i64val() >> 32), dest);
        break;
      case Stk::MemI64:
        fr.loadStackI64High(src.offs(), dest);
        break;
      case Stk::LocalI64:
        fr.loadLocalI64High(localFromSlot(src.slot(), MIRType::Int64), dest);
        break;
      case Stk::RegisterI64:
        moveI32(RegI32(src.i64reg().high), dest);
        break;
      default:
        MOZ_CRASH("Compiler bug: Expected I64 on stack");
    }
  }
#endif

  void loadF64(const Stk& src, RegF64 dest) {
    switch (src.kind()) {
      case Stk::ConstF64:
        loadConstF64(src, dest);
        break;
      case Stk::MemF64:
        loadMemF64(src, dest);
        break;
      case Stk::LocalF64:
        loadLocalF64(src, dest);
        break;
      case Stk::RegisterF64:
        loadRegisterF64(src, dest);
        break;
      default:
        MOZ_CRASH("Compiler bug: expected F64 on stack");
    }
  }

  void loadF32(const Stk& src, RegF32 dest) {
    switch (src.kind()) {
      case Stk::ConstF32:
        loadConstF32(src, dest);
        break;
      case Stk::MemF32:
        loadMemF32(src, dest);
        break;
      case Stk::LocalF32:
        loadLocalF32(src, dest);
        break;
      case Stk::RegisterF32:
        loadRegisterF32(src, dest);
        break;
      default:
        MOZ_CRASH("Compiler bug: expected F32 on stack");
    }
  }

  void loadRef(const Stk& src, RegPtr dest) {
    switch (src.kind()) {
      case Stk::ConstRef:
        loadConstRef(src, dest);
        break;
      case Stk::MemRef:
        loadMemRef(src, dest);
        break;
      case Stk::LocalRef:
        loadLocalRef(src, dest);
        break;
      case Stk::RegisterRef:
        loadRegisterRef(src, dest);
        break;
      default:
        MOZ_CRASH("Compiler bug: expected ref on stack");
    }
  }

  // Flush all local and register value stack elements to memory.
  //
  // TODO / OPTIMIZE: As this is fairly expensive and causes worse
  // code to be emitted subsequently, it is useful to avoid calling
  // it.  (Bug 1316802)
  //
  // Some optimization has been done already.  Remaining
  // opportunities:
  //
  //  - It would be interesting to see if we can specialize it
  //    before calls with particularly simple signatures, or where
  //    we can do parallel assignment of register arguments, or
  //    similar.  See notes in emitCall().
  //
  //  - Operations that need specific registers: multiply, quotient,
  //    remainder, will tend to sync because the registers we need
  //    will tend to be allocated.  We may be able to avoid that by
  //    prioritizing registers differently (takeLast instead of
  //    takeFirst) but we may also be able to allocate an unused
  //    register on demand to free up one we need, thus avoiding the
  //    sync.  That type of fix would go into needI32().

  void sync() final {
    size_t start = 0;
    size_t lim = stk_.length();

    for (size_t i = lim; i > 0; i--) {
      // Memory opcodes are first in the enum, single check against MemLast is
      // fine.
      if (stk_[i - 1].kind() <= Stk::MemLast) {
        start = i;
        break;
      }
    }

    for (size_t i = start; i < lim; i++) {
      Stk& v = stk_[i];
      switch (v.kind()) {
        case Stk::LocalI32: {
          ScratchI32 scratch(*this);
          loadLocalI32(v, scratch);
          uint32_t offs = fr.pushPtr(scratch);
          v.setOffs(Stk::MemI32, offs);
          break;
        }
        case Stk::RegisterI32: {
          uint32_t offs = fr.pushPtr(v.i32reg());
          freeI32(v.i32reg());
          v.setOffs(Stk::MemI32, offs);
          break;
        }
        case Stk::LocalI64: {
          ScratchI32 scratch(*this);
#ifdef JS_PUNBOX64
          loadI64(v, fromI32(scratch));
          uint32_t offs = fr.pushPtr(scratch);
#else
          fr.loadLocalI64High(localFromSlot(v.slot(), MIRType::Int64), scratch);
          fr.pushPtr(scratch);
          fr.loadLocalI64Low(localFromSlot(v.slot(), MIRType::Int64), scratch);
          uint32_t offs = fr.pushPtr(scratch);
#endif
          v.setOffs(Stk::MemI64, offs);
          break;
        }
        case Stk::RegisterI64: {
#ifdef JS_PUNBOX64
          uint32_t offs = fr.pushPtr(v.i64reg().reg);
          freeI64(v.i64reg());
#else
          fr.pushPtr(v.i64reg().high);
          uint32_t offs = fr.pushPtr(v.i64reg().low);
          freeI64(v.i64reg());
#endif
          v.setOffs(Stk::MemI64, offs);
          break;
        }
        case Stk::LocalF64: {
          ScratchF64 scratch(*this);
          loadF64(v, scratch);
          uint32_t offs = fr.pushDouble(scratch);
          v.setOffs(Stk::MemF64, offs);
          break;
        }
        case Stk::RegisterF64: {
          uint32_t offs = fr.pushDouble(v.f64reg());
          freeF64(v.f64reg());
          v.setOffs(Stk::MemF64, offs);
          break;
        }
        case Stk::LocalF32: {
          ScratchF32 scratch(*this);
          loadF32(v, scratch);
          uint32_t offs = fr.pushFloat32(scratch);
          v.setOffs(Stk::MemF32, offs);
          break;
        }
        case Stk::RegisterF32: {
          uint32_t offs = fr.pushFloat32(v.f32reg());
          freeF32(v.f32reg());
          v.setOffs(Stk::MemF32, offs);
          break;
        }
        case Stk::LocalRef: {
          ScratchPtr scratch(*this);
          loadLocalRef(v, scratch);
          uint32_t offs = fr.pushPtr(scratch);
          v.setOffs(Stk::MemRef, offs);
          stackMapGenerator_.memRefsOnStk++;
          break;
        }
        case Stk::RegisterRef: {
          uint32_t offs = fr.pushPtr(v.refReg());
          freeRef(v.refReg());
          v.setOffs(Stk::MemRef, offs);
          stackMapGenerator_.memRefsOnStk++;
          break;
        }
        default: {
          break;
        }
      }
    }
  }

  void saveTempPtr(RegPtr r) final {
    MOZ_ASSERT(!ra.isAvailablePtr(r));
    fr.pushPtr(r);
    ra.freePtr(r);
    MOZ_ASSERT(ra.isAvailablePtr(r));
  }

  void restoreTempPtr(RegPtr r) final {
    MOZ_ASSERT(ra.isAvailablePtr(r));
    ra.needPtr(r);
    fr.popPtr(r);
    MOZ_ASSERT(!ra.isAvailablePtr(r));
  }

  // Various methods for creating a stack map.  Stack maps are indexed by the
  // lowest address of the instruction immediately *after* the instruction of
  // interest.  In practice that means either: the return point of a call, the
  // instruction immediately after a trap instruction (the "resume"
  // instruction), or the instruction immediately following a no-op (when
  // debugging is enabled).

  // Create a vanilla stack map.
  MOZ_MUST_USE bool createStackMap(const char* who) {
    const ExitStubMapVector noExtras;
    return stackMapGenerator_.createStackMap(
        who, noExtras, masm.currentOffset(), HasRefTypedDebugFrame::No, stk_);
  }

  // Create a stack map as vanilla, but for a custom assembler offset.
  MOZ_MUST_USE bool createStackMap(const char* who,
                                   CodeOffset assemblerOffset) {
    const ExitStubMapVector noExtras;
    return stackMapGenerator_.createStackMap(who, noExtras,
                                             assemblerOffset.offset(),
                                             HasRefTypedDebugFrame::No, stk_);
  }

  // Create a stack map as vanilla, and note the presence of a ref-typed
  // DebugFrame on the stack.
  MOZ_MUST_USE bool createStackMap(const char* who,
                                   HasRefTypedDebugFrame refDebugFrame) {
    const ExitStubMapVector noExtras;
    return stackMapGenerator_.createStackMap(
        who, noExtras, masm.currentOffset(), refDebugFrame, stk_);
  }

  // The most general stack map construction.
  MOZ_MUST_USE bool createStackMap(const char* who,
                                   const ExitStubMapVector& extras,
                                   uint32_t assemblerOffset,
                                   HasRefTypedDebugFrame refDebugFrame) {
    return stackMapGenerator_.createStackMap(who, extras, assemblerOffset,
                                             refDebugFrame, stk_);
  }

  // This is an optimization used to avoid calling sync() for
  // setLocal(): if the local does not exist unresolved on the stack
  // then we can skip the sync.

  bool hasLocal(uint32_t slot) {
    for (size_t i = stk_.length(); i > 0; i--) {
      // Memory opcodes are first in the enum, single check against MemLast is
      // fine.
      Stk::Kind kind = stk_[i - 1].kind();
      if (kind <= Stk::MemLast) {
        return false;
      }

      // Local opcodes follow memory opcodes in the enum, single check against
      // LocalLast is sufficient.
      if (kind <= Stk::LocalLast && stk_[i - 1].slot() == slot) {
        return true;
      }
    }
    return false;
  }

  void syncLocal(uint32_t slot) {
    if (hasLocal(slot)) {
      sync();  // TODO / OPTIMIZE: Improve this?  (Bug 1316817)
    }
  }

  // Push the register r onto the stack.

  void pushI32(RegI32 r) {
    MOZ_ASSERT(!isAvailableI32(r));
    push(Stk(r));
  }

  void pushI64(RegI64 r) {
    MOZ_ASSERT(!isAvailableI64(r));
    push(Stk(r));
  }

  void pushRef(RegPtr r) {
    MOZ_ASSERT(!isAvailableRef(r));
    push(Stk(r));
  }

  void pushF64(RegF64 r) {
    MOZ_ASSERT(!isAvailableF64(r));
    push(Stk(r));
  }

  void pushF32(RegF32 r) {
    MOZ_ASSERT(!isAvailableF32(r));
    push(Stk(r));
  }

  // Push the value onto the stack.

  void pushI32(int32_t v) { push(Stk(v)); }

  void pushI64(int64_t v) { push(Stk(v)); }

  void pushRef(intptr_t v) { pushConstRef(v); }

  void pushF64(double v) { push(Stk(v)); }

  void pushF32(float v) { push(Stk(v)); }

  // Push the local slot onto the stack.  The slot will not be read
  // here; it will be read when it is consumed, or when a side
  // effect to the slot forces its value to be saved.

  void pushLocalI32(uint32_t slot) {
    stk_.infallibleEmplaceBack(Stk(Stk::LocalI32, slot));
  }

  void pushLocalI64(uint32_t slot) {
    stk_.infallibleEmplaceBack(Stk(Stk::LocalI64, slot));
  }

  void pushLocalRef(uint32_t slot) {
    stk_.infallibleEmplaceBack(Stk(Stk::LocalRef, slot));
  }

  void pushLocalF64(uint32_t slot) {
    stk_.infallibleEmplaceBack(Stk(Stk::LocalF64, slot));
  }

  void pushLocalF32(uint32_t slot) {
    stk_.infallibleEmplaceBack(Stk(Stk::LocalF32, slot));
  }

  // Call only from other popI32() variants.
  // v must be the stack top.  May pop the CPU stack.

  void popI32(const Stk& v, RegI32 dest) {
    MOZ_ASSERT(&v == &stk_.back());
    switch (v.kind()) {
      case Stk::ConstI32:
        loadConstI32(v, dest);
        break;
      case Stk::LocalI32:
        loadLocalI32(v, dest);
        break;
      case Stk::MemI32:
        fr.popPtr(dest);
        break;
      case Stk::RegisterI32:
        loadRegisterI32(v, dest);
        break;
      default:
        MOZ_CRASH("Compiler bug: expected int on stack");
    }
  }

  MOZ_MUST_USE RegI32 popI32() {
    Stk& v = stk_.back();
    RegI32 r;
    if (v.kind() == Stk::RegisterI32) {
      r = v.i32reg();
    } else {
      popI32(v, (r = needI32()));
    }
    stk_.popBack();
    return r;
  }

  RegI32 popI32(RegI32 specific) {
    Stk& v = stk_.back();

    if (!(v.kind() == Stk::RegisterI32 && v.i32reg() == specific)) {
      needI32(specific);
      popI32(v, specific);
      if (v.kind() == Stk::RegisterI32) {
        freeI32(v.i32reg());
      }
    }

    stk_.popBack();
    return specific;
  }

  // Call only from other popI64() variants.
  // v must be the stack top.  May pop the CPU stack.

  void popI64(const Stk& v, RegI64 dest) {
    MOZ_ASSERT(&v == &stk_.back());
    switch (v.kind()) {
      case Stk::ConstI64:
        loadConstI64(v, dest);
        break;
      case Stk::LocalI64:
        loadLocalI64(v, dest);
        break;
      case Stk::MemI64:
#ifdef JS_PUNBOX64
        fr.popPtr(dest.reg);
#else
        fr.popPtr(dest.low);
        fr.popPtr(dest.high);
#endif
        break;
      case Stk::RegisterI64:
        loadRegisterI64(v, dest);
        break;
      default:
        MOZ_CRASH("Compiler bug: expected long on stack");
    }
  }

  MOZ_MUST_USE RegI64 popI64() {
    Stk& v = stk_.back();
    RegI64 r;
    if (v.kind() == Stk::RegisterI64) {
      r = v.i64reg();
    } else {
      popI64(v, (r = needI64()));
    }
    stk_.popBack();
    return r;
  }

  // Note, the stack top can be in one half of "specific" on 32-bit
  // systems.  We can optimize, but for simplicity, if the register
  // does not match exactly, then just force the stack top to memory
  // and then read it back in.

  RegI64 popI64(RegI64 specific) {
    Stk& v = stk_.back();

    if (!(v.kind() == Stk::RegisterI64 && v.i64reg() == specific)) {
      needI64(specific);
      popI64(v, specific);
      if (v.kind() == Stk::RegisterI64) {
        freeI64(v.i64reg());
      }
    }

    stk_.popBack();
    return specific;
  }

  // Call only from other popRef() variants.
  // v must be the stack top.  May pop the CPU stack.

  void popRef(const Stk& v, RegPtr dest) {
    MOZ_ASSERT(&v == &stk_.back());
    switch (v.kind()) {
      case Stk::ConstRef:
        loadConstRef(v, dest);
        break;
      case Stk::LocalRef:
        loadLocalRef(v, dest);
        break;
      case Stk::MemRef:
        fr.popPtr(dest);
        break;
      case Stk::RegisterRef:
        loadRegisterRef(v, dest);
        break;
      default:
        MOZ_CRASH("Compiler bug: expected ref on stack");
    }
  }

  RegPtr popRef(RegPtr specific) {
    Stk& v = stk_.back();

    if (!(v.kind() == Stk::RegisterRef && v.refReg() == specific)) {
      needRef(specific);
      popRef(v, specific);
      if (v.kind() == Stk::RegisterRef) {
        freeRef(v.refReg());
      }
    }

    stk_.popBack();
    if (v.kind() == Stk::MemRef) {
      stackMapGenerator_.memRefsOnStk--;
    }
    return specific;
  }

  MOZ_MUST_USE RegPtr popRef() {
    Stk& v = stk_.back();
    RegPtr r;
    if (v.kind() == Stk::RegisterRef) {
      r = v.refReg();
    } else {
      popRef(v, (r = needRef()));
    }
    stk_.popBack();
    if (v.kind() == Stk::MemRef) {
      stackMapGenerator_.memRefsOnStk--;
    }
    return r;
  }

  // Call only from other popF64() variants.
  // v must be the stack top.  May pop the CPU stack.

  void popF64(const Stk& v, RegF64 dest) {
    MOZ_ASSERT(&v == &stk_.back());
    switch (v.kind()) {
      case Stk::ConstF64:
        loadConstF64(v, dest);
        break;
      case Stk::LocalF64:
        loadLocalF64(v, dest);
        break;
      case Stk::MemF64:
        fr.popDouble(dest);
        break;
      case Stk::RegisterF64:
        loadRegisterF64(v, dest);
        break;
      default:
        MOZ_CRASH("Compiler bug: expected double on stack");
    }
  }

  MOZ_MUST_USE RegF64 popF64() {
    Stk& v = stk_.back();
    RegF64 r;
    if (v.kind() == Stk::RegisterF64) {
      r = v.f64reg();
    } else {
      popF64(v, (r = needF64()));
    }
    stk_.popBack();
    return r;
  }

  RegF64 popF64(RegF64 specific) {
    Stk& v = stk_.back();

    if (!(v.kind() == Stk::RegisterF64 && v.f64reg() == specific)) {
      needF64(specific);
      popF64(v, specific);
      if (v.kind() == Stk::RegisterF64) {
        freeF64(v.f64reg());
      }
    }

    stk_.popBack();
    return specific;
  }

  // Call only from other popF32() variants.
  // v must be the stack top.  May pop the CPU stack.

  void popF32(const Stk& v, RegF32 dest) {
    MOZ_ASSERT(&v == &stk_.back());
    switch (v.kind()) {
      case Stk::ConstF32:
        loadConstF32(v, dest);
        break;
      case Stk::LocalF32:
        loadLocalF32(v, dest);
        break;
      case Stk::MemF32:
        fr.popFloat32(dest);
        break;
      case Stk::RegisterF32:
        loadRegisterF32(v, dest);
        break;
      default:
        MOZ_CRASH("Compiler bug: expected float on stack");
    }
  }

  MOZ_MUST_USE RegF32 popF32() {
    Stk& v = stk_.back();
    RegF32 r;
    if (v.kind() == Stk::RegisterF32) {
      r = v.f32reg();
    } else {
      popF32(v, (r = needF32()));
    }
    stk_.popBack();
    return r;
  }

  RegF32 popF32(RegF32 specific) {
    Stk& v = stk_.back();

    if (!(v.kind() == Stk::RegisterF32 && v.f32reg() == specific)) {
      needF32(specific);
      popF32(v, specific);
      if (v.kind() == Stk::RegisterF32) {
        freeF32(v.f32reg());
      }
    }

    stk_.popBack();
    return specific;
  }

  MOZ_MUST_USE bool popConstI32(int32_t* c) {
    Stk& v = stk_.back();
    if (v.kind() != Stk::ConstI32) {
      return false;
    }
    *c = v.i32val();
    stk_.popBack();
    return true;
  }

  MOZ_MUST_USE bool popConstI64(int64_t* c) {
    Stk& v = stk_.back();
    if (v.kind() != Stk::ConstI64) {
      return false;
    }
    *c = v.i64val();
    stk_.popBack();
    return true;
  }

  MOZ_MUST_USE bool peekConstI32(int32_t* c) {
    Stk& v = stk_.back();
    if (v.kind() != Stk::ConstI32) {
      return false;
    }
    *c = v.i32val();
    return true;
  }

  MOZ_MUST_USE bool peekConstI64(int64_t* c) {
    Stk& v = stk_.back();
    if (v.kind() != Stk::ConstI64) {
      return false;
    }
    *c = v.i64val();
    return true;
  }

  MOZ_MUST_USE bool peek2xI32(int32_t* c0, int32_t* c1) {
    MOZ_ASSERT(stk_.length() >= 2);
    const Stk& v0 = *(stk_.end() - 1);
    const Stk& v1 = *(stk_.end() - 2);
    if (v0.kind() != Stk::ConstI32 || v1.kind() != Stk::ConstI32) {
      return false;
    }
    *c0 = v0.i32val();
    *c1 = v1.i32val();
    return true;
  }

  MOZ_MUST_USE bool popConstPositivePowerOfTwoI32(int32_t* c,
                                                  uint_fast8_t* power,
                                                  int32_t cutoff) {
    Stk& v = stk_.back();
    if (v.kind() != Stk::ConstI32) {
      return false;
    }
    *c = v.i32val();
    if (*c <= cutoff || !IsPowerOfTwo(static_cast<uint32_t>(*c))) {
      return false;
    }
    *power = FloorLog2(*c);
    stk_.popBack();
    return true;
  }

  MOZ_MUST_USE bool popConstPositivePowerOfTwoI64(int64_t* c,
                                                  uint_fast8_t* power,
                                                  int64_t cutoff) {
    Stk& v = stk_.back();
    if (v.kind() != Stk::ConstI64) {
      return false;
    }
    *c = v.i64val();
    if (*c <= cutoff || !IsPowerOfTwo(static_cast<uint64_t>(*c))) {
      return false;
    }
    *power = FloorLog2(*c);
    stk_.popBack();
    return true;
  }

  MOZ_MUST_USE bool peekLocalI32(uint32_t* local) {
    Stk& v = stk_.back();
    if (v.kind() != Stk::LocalI32) {
      return false;
    }
    *local = v.slot();
    return true;
  }

  // TODO / OPTIMIZE (Bug 1316818): At the moment we use the Wasm
  // inter-procedure ABI for block returns, which allocates ReturnReg as the
  // single block result register.  It is possible other choices would lead to
  // better register allocation, as ReturnReg is often first in the register set
  // and will be heavily wanted by the register allocator that uses takeFirst().
  //
  // Obvious options:
  //  - pick a register at the back of the register set
  //  - pick a random register per block (different blocks have
  //    different join regs)

  void popRegisterResults(ABIResultIter& iter) {
    // Pop register results.  Note that in the single-value case, popping to a
    // register may cause a sync(); for multi-value we sync'd already.
    for (; !iter.done(); iter.next()) {
      const ABIResult& result = iter.cur();
      if (!result.inRegister()) {
        // TODO / OPTIMIZE: We sync here to avoid solving the general parallel
        // move problem in popStackResults.  However we could avoid syncing the
        // values that are going to registers anyway, if they are already in
        // registers.
        sync();
        break;
      }
      switch (result.type().kind()) {
        case ValType::I32:
          popI32(RegI32(result.gpr()));
          break;
        case ValType::I64:
          popI64(RegI64(result.gpr64()));
          break;
        case ValType::F32:
          popF32(RegF32(result.fpr()));
          break;
        case ValType::F64:
          popF64(RegF64(result.fpr()));
          break;
        case ValType::Ref:
          popRef(RegPtr(result.gpr()));
          break;
      }
    }
  }

  void popStackResults(ABIResultIter& iter, StackHeight stackBase) {
    MOZ_ASSERT(!iter.done());

    // The iterator should be advanced beyond register results, and register
    // results should be popped already from the value stack.
    uint32_t alreadyPopped = iter.index();

    // At this point, only stack arguments are remaining.  Iterate through them
    // to measure how much stack space they will take up.
    for (; !iter.done(); iter.next()) {
      MOZ_ASSERT(iter.cur().onStack());
    }

    // Calculate the space needed to store stack results, in bytes.
    uint32_t stackResultBytes = iter.stackBytesConsumedSoFar();
    MOZ_ASSERT(stackResultBytes);

    // Compute the stack height including the stack results.  Note that it's
    // possible that this call expands the stack, for example if some of the
    // results are supplied by constants and so are not already on the machine
    // stack.
    uint32_t endHeight = fr.prepareStackResultArea(stackBase, stackResultBytes);

    // Find a free GPR to use when shuffling stack values.  If none is
    // available, push ReturnReg and restore it after we're done.
    bool saved = false;
    RegPtr temp = ra.needTempPtr(RegPtr(ReturnReg), &saved);

    // The sequence of Stk values is in the same order on the machine stack as
    // the result locations, but there is a complication: constant values are
    // not actually pushed on the machine stack.  (At this point registers and
    // locals have been spilled already.)  So, moving the Stk values into place
    // isn't simply a shuffle-down or shuffle-up operation.  There is a part of
    // the Stk sequence that shuffles toward the FP, a part that's already in
    // place, and a part that shuffles toward the SP.  After shuffling, we have
    // to materialize the constants.

    // Shuffle mem values toward the frame pointer, copying deepest values
    // first.  Stop when we run out of results, get to a register result, or
    // find a Stk value that is closer to the FP than the result.
    for (iter.switchToPrev(); !iter.done(); iter.prev()) {
      const ABIResult& result = iter.cur();
      if (!result.onStack()) {
        break;
      }
      MOZ_ASSERT(result.stackOffset() < stackResultBytes);
      uint32_t destHeight = endHeight - result.stackOffset();
      uint32_t stkBase = stk_.length() - (iter.count() - alreadyPopped);
      Stk& v = stk_[stkBase + iter.index()];
      if (v.isMem()) {
        uint32_t srcHeight = v.offs();
        if (srcHeight <= destHeight) {
          break;
        }
        fr.shuffleStackResultsTowardFP(srcHeight, destHeight, result.size(),
                                       temp);
      }
    }

    // Reset iterator and skip register results.
    for (iter.reset(); !iter.done(); iter.next()) {
      if (iter.cur().onStack()) {
        break;
      }
    }

    // Revisit top stack values, shuffling mem values toward the stack pointer,
    // copying shallowest values first.
    for (; !iter.done(); iter.next()) {
      const ABIResult& result = iter.cur();
      MOZ_ASSERT(result.onStack());
      MOZ_ASSERT(result.stackOffset() < stackResultBytes);
      uint32_t destHeight = endHeight - result.stackOffset();
      Stk& v = stk_[stk_.length() - (iter.index() - alreadyPopped) - 1];
      if (v.isMem()) {
        uint32_t srcHeight = v.offs();
        if (srcHeight >= destHeight) {
          break;
        }
        fr.shuffleStackResultsTowardSP(srcHeight, destHeight, result.size(),
                                       temp);
      }
    }

    // Reset iterator and skip register results, which are already popped off
    // the value stack.
    for (iter.reset(); !iter.done(); iter.next()) {
      if (iter.cur().onStack()) {
        break;
      }
    }

    // Materialize constants and pop the remaining items from the value stack.
    for (; !iter.done(); iter.next()) {
      const ABIResult& result = iter.cur();
      uint32_t resultHeight = endHeight - result.stackOffset();
      Stk& v = stk_.back();
      switch (v.kind()) {
        case Stk::ConstI32:
        case Stk::ConstF32:
          // Rely on the fact that Stk stores its immediate values in a union,
          // and that the bits of an f32 will be in the i32.
          fr.storeImmediateToStack(v.i32val_, resultHeight, temp);
          break;
        case Stk::ConstI64:
        case Stk::ConstF64:
          // Likewise, rely on f64 bits being punned to i64.
          fr.storeImmediateToStack(v.i64val_, resultHeight, temp);
          break;
        case Stk::ConstRef:
          if (sizeof(intptr_t) == sizeof(int32_t)) {
            fr.storeImmediateToStack(int32_t(v.refval_), resultHeight, temp);
          } else {
            fr.storeImmediateToStack(int64_t(v.refval_), resultHeight, temp);
          }
          break;
        case Stk::MemRef:
          // Update bookkeeping as we pop the Stk entry.
          stackMapGenerator_.memRefsOnStk--;
          break;
        default:
          MOZ_ASSERT(v.isMem());
          break;
      }
      stk_.popBack();
    }

    ra.freeTempPtr(temp, saved);

    // This will pop the stack if needed.
    fr.finishStackResultArea(stackBase, stackResultBytes);
  }

  enum class ContinuationKind { Fallthrough, Jump };

  void popBlockResults(ResultType type, StackHeight stackBase,
                       ContinuationKind kind) {
    if (!type.empty()) {
      ABIResultIter iter(type);
      popRegisterResults(iter);
      if (!iter.done()) {
        popStackResults(iter, stackBase);
        // Because popStackResults might clobber the stack, it leaves the stack
        // pointer already in the right place for the continuation, whether the
        // continuation is a jump or fallthrough.
        return;
      }
    }
    // We get here if there are no stack results.  For a fallthrough, the stack
    // is already at the right height.  For a jump, we may need to pop the stack
    // pointer if the continuation's stack height is lower than the current
    // stack height.
    if (kind == ContinuationKind::Jump) {
      fr.popStackBeforeBranch(stackBase, type);
    }
  }

  Stk captureStackResult(const ABIResult& result, uint32_t stackResultBytes) {
    MOZ_ASSERT(result.onStack());
    uint32_t offs = fr.locateStackResult(result, controlItem().stackHeight,
                                         stackResultBytes);
    return Stk::StackResult(result.type(), offs);
  }

  void pushBlockResults(ResultType type) {
    if (type.empty()) {
      return;
    }

    // We need to push the results in reverse order, so first iterate through
    // all results to determine the locations of stack result types.
    ABIResultIter iter(type);
    while (!iter.done()) {
      iter.next();
    }
    uint32_t stackResultBytes = iter.stackBytesConsumedSoFar();

    for (iter.switchToPrev(); !iter.done(); iter.prev()) {
      const ABIResult& result = iter.cur();
      if (!result.onStack()) {
        break;
      }
      Stk v = captureStackResult(result, stackResultBytes);
      push(v);
      if (v.kind() == Stk::MemRef) {
        stackMapGenerator_.memRefsOnStk++;
      }
    }

    for (; !iter.done(); iter.prev()) {
      const ABIResult& result = iter.cur();
      MOZ_ASSERT(result.inRegister());
      switch (result.type().kind()) {
        case ValType::I32:
          pushI32(RegI32(result.gpr()));
          break;
        case ValType::I64:
          pushI64(RegI64(result.gpr64()));
          break;
        case ValType::F32:
          pushF32(RegF32(result.fpr()));
          break;
        case ValType::F64:
          pushF64(RegF64(result.fpr()));
          break;
        case ValType::Ref:
          pushRef(RegPtr(result.gpr()));
          break;
      }
    }
  }

  // A combination of popBlockResults + pushBlockResults, to shuffle the top
  // stack values into the expected block result locations for the given type.
  StackHeight topBlockResults(ResultType type) {
    if (type.empty()) {
      return fr.stackHeight();
    }
    StackHeight base = fr.stackResultsBase(stackConsumed(type.length()));
    popBlockResults(type, base, ContinuationKind::Fallthrough);
    pushBlockResults(type);
    return base;
  }

  // Conditional branches with fallthrough are preceded by a topBlockResults, so
  // we know that there are no stack results that need to be materialized.  In
  // that case, we can just shuffle the whole block down before popping the
  // stack.
  void shuffleStackResultsBeforeBranch(StackHeight srcHeight,
                                       StackHeight destHeight,
                                       ResultType type) {
    uint32_t stackResultBytes = 0;

    if (ABIResultIter::HasStackResults(type)) {
      MOZ_ASSERT(stk_.length() >= type.length());
      ABIResultIter iter(type);
      for (ABIResultIter iter(type); !iter.done(); iter.next()) {
#ifdef DEBUG
        const ABIResult& result = iter.cur();
        const Stk& v = stk_[stk_.length() - iter.index() - 1];
        MOZ_ASSERT(v.isMem() == result.onStack());
#endif
      }
      stackResultBytes = iter.stackBytesConsumedSoFar();

      if (stackResultBytes) {
        // Find a free GPR to use when shuffling stack values.  If none is
        // available, push ReturnReg and restore it after we're done.
        bool saved = false;
        RegPtr temp = ra.needTempPtr(RegPtr(ReturnReg), &saved);
        fr.shuffleStackResultsTowardFP(srcHeight, destHeight, stackResultBytes,
                                       temp);
        ra.freeTempPtr(temp, saved);
      }
    }

    fr.popStackBeforeBranch(destHeight, stackResultBytes);
  }

  // Return the amount of execution stack consumed by the top numval
  // values on the value stack.

  size_t stackConsumed(size_t numval) {
    size_t size = 0;
    MOZ_ASSERT(numval <= stk_.length());
    for (uint32_t i = stk_.length() - 1; numval > 0; numval--, i--) {
      Stk& v = stk_[i];
      switch (v.kind()) {
        case Stk::MemRef:
          size += BaseStackFrame::StackSizeOfPtr;
          break;
        case Stk::MemI32:
          size += BaseStackFrame::StackSizeOfPtr;
          break;
        case Stk::MemI64:
          size += BaseStackFrame::StackSizeOfInt64;
          break;
        case Stk::MemF64:
          size += BaseStackFrame::StackSizeOfDouble;
          break;
        case Stk::MemF32:
          size += BaseStackFrame::StackSizeOfFloat;
          break;
        default:
          break;
      }
    }
    return size;
  }

  void popValueStackTo(uint32_t stackSize) {
    for (uint32_t i = stk_.length(); i > stackSize; i--) {
      Stk& v = stk_[i - 1];
      switch (v.kind()) {
        case Stk::RegisterI32:
          freeI32(v.i32reg());
          break;
        case Stk::RegisterI64:
          freeI64(v.i64reg());
          break;
        case Stk::RegisterF64:
          freeF64(v.f64reg());
          break;
        case Stk::RegisterF32:
          freeF32(v.f32reg());
          break;
        case Stk::RegisterRef:
          freeRef(v.refReg());
          break;
        case Stk::MemRef:
          stackMapGenerator_.memRefsOnStk--;
          break;
        default:
          break;
      }
    }
    stk_.shrinkTo(stackSize);
  }

  void popValueStackBy(uint32_t items) {
    popValueStackTo(stk_.length() - items);
  }

  void dropValue() {
    if (peek(0).isMem()) {
      fr.popBytes(stackConsumed(1));
    }
    popValueStackBy(1);
  }

  // Peek at the stack, for calls.

  Stk& peek(uint32_t relativeDepth) {
    return stk_[stk_.length() - 1 - relativeDepth];
  }

#ifdef DEBUG
  // Check that we're not leaking registers by comparing the
  // state of the stack + available registers with the set of
  // all available registers.

  // Call this between opcodes.
  void performRegisterLeakCheck() {
    BaseRegAlloc::LeakCheck check(ra);
    for (size_t i = 0; i < stk_.length(); i++) {
      Stk& item = stk_[i];
      switch (item.kind_) {
        case Stk::RegisterI32:
          check.addKnownI32(item.i32reg());
          break;
        case Stk::RegisterI64:
          check.addKnownI64(item.i64reg());
          break;
        case Stk::RegisterF32:
          check.addKnownF32(item.f32reg());
          break;
        case Stk::RegisterF64:
          check.addKnownF64(item.f64reg());
          break;
        case Stk::RegisterRef:
          check.addKnownRef(item.refReg());
          break;
        default:
          break;
      }
    }
  }

  void assertStackInvariants() const {
    if (deadCode_) {
      // Nonlocal control flow can pass values in stack locations in a way that
      // isn't accounted for by the value stack.  In dead code, which occurs
      // after unconditional non-local control flow, there is no invariant to
      // assert.
      return;
    }
    size_t size = 0;
    for (const Stk& v : stk_) {
      switch (v.kind()) {
        case Stk::MemRef:
          size += BaseStackFrame::StackSizeOfPtr;
          break;
        case Stk::MemI32:
          size += BaseStackFrame::StackSizeOfPtr;
          break;
        case Stk::MemI64:
          size += BaseStackFrame::StackSizeOfInt64;
          break;
        case Stk::MemF64:
          size += BaseStackFrame::StackSizeOfDouble;
          break;
        case Stk::MemF32:
          size += BaseStackFrame::StackSizeOfFloat;
          break;
        default:
          MOZ_ASSERT(!v.isMem());
          break;
      }
    }
    MOZ_ASSERT(size == fr.dynamicHeight());
  }

#endif

  ////////////////////////////////////////////////////////////
  //
  // Control stack

  void initControl(Control& item, ResultType params) {
    // Make sure the constructor was run properly
    MOZ_ASSERT(!item.stackHeight.isValid() && item.stackSize == UINT32_MAX);

    uint32_t paramCount = deadCode_ ? 0 : params.length();
    uint32_t stackParamSize = stackConsumed(paramCount);
    item.stackHeight = fr.stackResultsBase(stackParamSize);
    item.stackSize = stk_.length() - paramCount;
    item.deadOnArrival = deadCode_;
    item.bceSafeOnEntry = bceSafe_;
  }

  Control& controlItem() { return iter_.controlItem(); }

  Control& controlItem(uint32_t relativeDepth) {
    return iter_.controlItem(relativeDepth);
  }

  Control& controlOutermost() { return iter_.controlOutermost(); }

  ////////////////////////////////////////////////////////////
  //
  // Labels

  void insertBreakablePoint(CallSiteDesc::Kind kind) {
    // The debug trap exit requires WasmTlsReg be loaded. However, since we
    // are emitting millions of these breakable points inline, we push this
    // loading of TLS into the FarJumpIsland created by linkCallSites.
    masm.nopPatchableToCall(CallSiteDesc(iter_.lastOpcodeOffset(), kind));
  }

  //////////////////////////////////////////////////////////////////////
  //
  // Function prologue and epilogue.

  MOZ_MUST_USE bool beginFunction() {
    JitSpew(JitSpew_Codegen, "# ========================================");
    JitSpew(JitSpew_Codegen, "# Emitting wasm baseline code");
    JitSpew(JitSpew_Codegen,
            "# beginFunction: start of function prologue for index %d",
            (int)func_.index);

    // Make a start on the stack map for this function.  Inspect the args so
    // as to determine which of them are both in-memory and pointer-typed, and
    // add entries to machineStackTracker as appropriate.

    ArgTypeVector args(funcType());
    size_t inboundStackArgBytes = StackArgAreaSizeUnaligned(args);
    MOZ_ASSERT(inboundStackArgBytes % sizeof(void*) == 0);
    stackMapGenerator_.numStackArgWords = inboundStackArgBytes / sizeof(void*);

    MOZ_ASSERT(stackMapGenerator_.machineStackTracker.length() == 0);
    if (!stackMapGenerator_.machineStackTracker.pushNonGCPointers(
            stackMapGenerator_.numStackArgWords)) {
      return false;
    }

    // Identify GC-managed pointers passed on the stack.
    for (ABIArgIter i(args); !i.done(); i++) {
      ABIArg argLoc = *i;
      if (argLoc.kind() == ABIArg::Stack &&
          args[i.index()] == MIRType::RefOrNull) {
        uint32_t offset = argLoc.offsetFromArgBase();
        MOZ_ASSERT(offset < inboundStackArgBytes);
        MOZ_ASSERT(offset % sizeof(void*) == 0);
        stackMapGenerator_.machineStackTracker.setGCPointer(offset /
                                                            sizeof(void*));
      }
    }

    GenerateFunctionPrologue(
        masm, env_.funcTypes[func_.index]->id,
        env_.mode() == CompileMode::Tier1 ? Some(func_.index) : Nothing(),
        &offsets_);

    // GenerateFunctionPrologue pushes exactly one wasm::Frame's worth of
    // stuff, and none of the values are GC pointers.  Hence:
    if (!stackMapGenerator_.machineStackTracker.pushNonGCPointers(
            sizeof(Frame) / sizeof(void*))) {
      return false;
    }

    // Initialize DebugFrame fields before the stack overflow trap so that
    // we have the invariant that all observable Frames in a debugEnabled
    // Module have valid DebugFrames.
    if (env_.debugEnabled()) {
#ifdef JS_CODEGEN_ARM64
      static_assert(DebugFrame::offsetOfFrame() % WasmStackAlignment == 0,
                    "aligned");
#endif
      masm.reserveStack(DebugFrame::offsetOfFrame());
      if (!stackMapGenerator_.machineStackTracker.pushNonGCPointers(
              DebugFrame::offsetOfFrame() / sizeof(void*))) {
        return false;
      }

      masm.store32(
          Imm32(func_.index),
          Address(masm.getStackPointer(), DebugFrame::offsetOfFuncIndex()));
      masm.storePtr(ImmWord(0), Address(masm.getStackPointer(),
                                        DebugFrame::offsetOfFlagsWord()));
      // Zero out pointer values for safety, since it's not easy to establish
      // whether they will always be defined before a GC.

      // DebugFrame::resultRef_ and ::resultAnyRef_
      masm.storePtr(ImmWord(0), Address(masm.getStackPointer(),
                                        DebugFrame::offsetOfResults()));

      // DebugFrame::cachedReturnJSValue_
      for (size_t i = 0; i < sizeof(js::Value) / sizeof(void*); i++) {
        masm.storePtr(ImmWord(0),
                      Address(masm.getStackPointer(),
                              DebugFrame::offsetOfCachedReturnJSValue() +
                                  i * sizeof(void*)));
      }
    }

    // Generate a stack-overflow check and its associated stack map.

    fr.checkStack(ABINonArgReg0, BytecodeOffset(func_.lineOrBytecode));

    ExitStubMapVector extras;
    if (!stackMapGenerator_.generateStackmapEntriesForTrapExit(args, &extras)) {
      return false;
    }
    if (!createStackMap("stack check", extras, masm.currentOffset(),
                        HasRefTypedDebugFrame::No)) {
      return false;
    }

    size_t reservedBytes = fr.fixedAllocSize() - masm.framePushed();
    MOZ_ASSERT(0 == (reservedBytes % sizeof(void*)));

    masm.reserveStack(reservedBytes);
    fr.onFixedStackAllocated();
    if (!stackMapGenerator_.machineStackTracker.pushNonGCPointers(
            reservedBytes / sizeof(void*))) {
      return false;
    }

    // Locals are stack allocated.  Mark ref-typed ones in the stackmap
    // accordingly.
    for (const Local& l : localInfo_) {
      if (l.type == MIRType::RefOrNull) {
        uint32_t offs = fr.localOffset(l);
        MOZ_ASSERT(0 == (offs % sizeof(void*)));
        stackMapGenerator_.machineStackTracker.setGCPointer(offs /
                                                            sizeof(void*));
      }
    }

    // Copy arguments from registers to stack.
    for (ABIArgIter i(args); !i.done(); i++) {
      if (!i->argInRegister()) {
        continue;
      }
      if (args.isSyntheticStackResultPointerArg(i.index())) {
        // The synthetic stack result area parameter was passed in a register.
        // Store it to the stack.
        fr.storeIncomingStackResultAreaPtr(RegPtr(i->gpr()));
        continue;
      }
      Local& l = localInfo_[args.naturalIndex(i.index())];
      switch (i.mirType()) {
        case MIRType::Int32:
          fr.storeLocalI32(RegI32(i->gpr()), l);
          break;
        case MIRType::Int64:
          fr.storeLocalI64(RegI64(i->gpr64()), l);
          break;
        case MIRType::RefOrNull: {
          DebugOnly<uint32_t> offs = fr.localOffset(l);
          MOZ_ASSERT(0 == (offs % sizeof(void*)));
          fr.storeLocalPtr(RegPtr(i->gpr()), l);
          // We should have just visited this local in the preceding loop.
          MOZ_ASSERT(stackMapGenerator_.machineStackTracker.isGCPointer(
              offs / sizeof(void*)));
          break;
        }
        case MIRType::Double:
          fr.storeLocalF64(RegF64(i->fpu()), l);
          break;
        case MIRType::Float32:
          fr.storeLocalF32(RegF32(i->fpu()), l);
          break;
        default:
          MOZ_CRASH("Function argument type");
      }
    }

    fr.zeroLocals(&ra);

    if (env_.debugEnabled()) {
      insertBreakablePoint(CallSiteDesc::EnterFrame);
      if (!createStackMap("debug: breakable point")) {
        return false;
      }
    }

    JitSpew(JitSpew_Codegen,
            "# beginFunction: enter body with masm.framePushed = %u",
            masm.framePushed());
    MOZ_ASSERT(stackMapGenerator_.framePushedAtEntryToBody.isNothing());
    stackMapGenerator_.framePushedAtEntryToBody.emplace(masm.framePushed());

    return true;
  }

  void popStackReturnValues(const ResultType& resultType) {
    uint32_t bytes = ABIResultIter::MeasureStackBytes(resultType);
    if (bytes == 0) {
      return;
    }
    Register target = ABINonArgReturnReg0;
    Register temp = ABINonArgReturnReg1;
    fr.loadIncomingStackResultAreaPtr(RegPtr(target));
    fr.popStackResultsToMemory(target, bytes, temp);
  }

  void saveRegisterReturnValues(const ResultType& resultType) {
    MOZ_ASSERT(env_.debugEnabled());
    size_t debugFrameOffset = masm.framePushed() - DebugFrame::offsetOfFrame();
    Address resultsAddress(masm.getStackPointer(),
                           debugFrameOffset + DebugFrame::offsetOfResults());

    for (ABIResultIter i(resultType); !i.done(); i.next()) {
      const ABIResult result = i.cur();
      if (!result.inRegister()) {
#ifdef DEBUG
        for (i.next(); !i.done(); i.next()) {
          MOZ_ASSERT(!i.cur().inRegister());
        }
#endif
        break;
      }
      MOZ_ASSERT(i.index() == 0,
                 "debug frame only has space for one stored register result");
      switch (result.type().kind()) {
        case ValType::I32:
          masm.store32(RegI32(result.gpr()), resultsAddress);
          break;
        case ValType::I64:
          masm.store64(RegI64(result.gpr64()), resultsAddress);
          break;
        case ValType::F64:
          masm.storeDouble(RegF64(result.fpr()), resultsAddress);
          break;
        case ValType::F32:
          masm.storeFloat32(RegF32(result.fpr()), resultsAddress);
          break;
        case ValType::Ref:
          masm.storePtr(RegPtr(result.gpr()), resultsAddress);
          break;
      }
    }
  }

  void restoreRegisterReturnValues(const ResultType& resultType) {
    MOZ_ASSERT(env_.debugEnabled());
    size_t debugFrameOffset = masm.framePushed() - DebugFrame::offsetOfFrame();
    Address resultsAddress(masm.getStackPointer(),
                           debugFrameOffset + DebugFrame::offsetOfResults());

    for (ABIResultIter i(resultType); !i.done(); i.next()) {
      const ABIResult result = i.cur();
      if (!result.inRegister()) {
#ifdef DEBUG
        for (i.next(); !i.done(); i.next()) {
          MOZ_ASSERT(!i.cur().inRegister());
        }
#endif
        break;
      }
      MOZ_ASSERT(i.index() == 0,
                 "debug frame only has space for one stored register result");
      switch (result.type().kind()) {
        case ValType::I32:
          masm.load32(resultsAddress, RegI32(result.gpr()));
          break;
        case ValType::I64:
          masm.load64(resultsAddress, RegI64(result.gpr64()));
          break;
        case ValType::F64:
          masm.loadDouble(resultsAddress, RegF64(result.fpr()));
          break;
        case ValType::F32:
          masm.loadFloat32(resultsAddress, RegF32(result.fpr()));
          break;
        case ValType::Ref:
          masm.loadPtr(resultsAddress, RegPtr(result.gpr()));
          break;
      }
    }
  }

  MOZ_MUST_USE bool endFunction() {
    JitSpew(JitSpew_Codegen, "# endFunction: start of function epilogue");

    // Always branch to returnLabel_.
    masm.breakpoint();

    // Patch the add in the prologue so that it checks against the correct
    // frame size. Flush the constant pool in case it needs to be patched.
    masm.flush();

    // Precondition for patching.
    if (masm.oom()) {
      return false;
    }

    fr.patchCheckStack();

    masm.bind(&returnLabel_);

    ResultType resultType(ResultType::Vector(funcType().results()));

    popStackReturnValues(resultType);

    if (env_.debugEnabled()) {
      // If a return type is a ref, we need to note that in the stack maps
      // generated here.  Note that this assumes that DebugFrame::result* and
      // DebugFrame::cachedReturnJSValue_ are either both ref-typed or they are
      // both not ref-typed.  It can't represent the situation where one is and
      // the other isn't.
      HasRefTypedDebugFrame refDebugFrame = HasRefTypedDebugFrame::No;
      for (ValType result : funcType().results()) {
        if (result.isReference()) {
          refDebugFrame = HasRefTypedDebugFrame::Yes;
          break;
        }
      }
      // Store and reload the return value from DebugFrame::return so that
      // it can be clobbered, and/or modified by the debug trap.
      saveRegisterReturnValues(resultType);
      insertBreakablePoint(CallSiteDesc::Breakpoint);
      if (!createStackMap("debug: breakpoint", refDebugFrame)) {
        return false;
      }
      insertBreakablePoint(CallSiteDesc::LeaveFrame);
      if (!createStackMap("debug: leave frame", refDebugFrame)) {
        return false;
      }
      restoreRegisterReturnValues(resultType);
    }

    GenerateFunctionEpilogue(masm, fr.fixedAllocSize(), &offsets_);

#if defined(JS_ION_PERF)
    // FIXME - profiling code missing.  No bug for this.

    // Note the end of the inline code and start of the OOL code.
    // gen->perfSpewer().noteEndInlineCode(masm);
#endif

    JitSpew(JitSpew_Codegen, "# endFunction: end of function epilogue");
    JitSpew(JitSpew_Codegen, "# endFunction: start of OOL code");
    if (!generateOutOfLineCode()) {
      return false;
    }

    offsets_.end = masm.currentOffset();

    if (!fr.checkStackHeight()) {
      return false;
    }

    JitSpew(JitSpew_Codegen, "# endFunction: end of OOL code for index %d",
            (int)func_.index);
    return !masm.oom();
  }

  //////////////////////////////////////////////////////////////////////
  //
  // Calls.

  struct FunctionCall {
    explicit FunctionCall(uint32_t lineOrBytecode)
        : lineOrBytecode(lineOrBytecode),
          isInterModule(false),
          usesSystemAbi(false),
#ifdef JS_CODEGEN_ARM
          hardFP(true),
#endif
          frameAlignAdjustment(0),
          stackArgAreaSize(0) {
    }

    uint32_t lineOrBytecode;
    ABIArgGenerator abi;
    bool isInterModule;
    bool usesSystemAbi;
#ifdef JS_CODEGEN_ARM
    bool hardFP;
#endif
    size_t frameAlignAdjustment;
    size_t stackArgAreaSize;
  };

  void beginCall(FunctionCall& call, UseABI useABI, InterModule interModule) {
    MOZ_ASSERT_IF(useABI == UseABI::Builtin, interModule == InterModule::False);

    call.isInterModule = interModule == InterModule::True;
    call.usesSystemAbi = useABI == UseABI::System;

    if (call.usesSystemAbi) {
      // Call-outs need to use the appropriate system ABI.
#if defined(JS_CODEGEN_ARM)
      call.hardFP = UseHardFpABI();
      call.abi.setUseHardFp(call.hardFP);
#elif defined(JS_CODEGEN_MIPS32)
      call.abi.enforceO32ABI();
#endif
    } else {
#if defined(JS_CODEGEN_ARM)
      MOZ_ASSERT(call.hardFP,
                 "All private ABIs pass FP arguments in registers");
#endif
    }

    // Use masm.framePushed() because the value we want here does not depend
    // on the height of the frame's stack area, but the actual size of the
    // allocated frame.
    call.frameAlignAdjustment = ComputeByteAlignment(
        masm.framePushed() + sizeof(Frame), JitStackAlignment);
  }

  void endCall(FunctionCall& call, size_t stackSpace) {
    size_t adjustment = call.stackArgAreaSize + call.frameAlignAdjustment;
    fr.freeArgAreaAndPopBytes(adjustment, stackSpace);

    MOZ_ASSERT(
        stackMapGenerator_.framePushedExcludingOutboundCallArgs.isSome());
    stackMapGenerator_.framePushedExcludingOutboundCallArgs.reset();

    if (call.isInterModule) {
      masm.loadWasmTlsRegFromFrame();
      masm.loadWasmPinnedRegsFromTls();
      masm.switchToWasmTlsRealm(ABINonArgReturnReg0, ABINonArgReturnReg1);
    } else if (call.usesSystemAbi) {
      // On x86 there are no pinned registers, so don't waste time
      // reloading the Tls.
#ifndef JS_CODEGEN_X86
      masm.loadWasmTlsRegFromFrame();
      masm.loadWasmPinnedRegsFromTls();
#endif
    }
  }

  void startCallArgs(size_t stackArgAreaSizeUnaligned, FunctionCall* call) {
    size_t stackArgAreaSizeAligned =
        AlignStackArgAreaSize(stackArgAreaSizeUnaligned);
    MOZ_ASSERT(stackArgAreaSizeUnaligned <= stackArgAreaSizeAligned);

    // Record the masm.framePushed() value at this point, before we push args
    // for the call, but including the alignment space placed above the args.
    // This defines the lower limit of the stackmap that will be created for
    // this call.
    MOZ_ASSERT(
        stackMapGenerator_.framePushedExcludingOutboundCallArgs.isNothing());
    stackMapGenerator_.framePushedExcludingOutboundCallArgs.emplace(
        // However much we've pushed so far
        masm.framePushed() +
        // Extra space we'll push to get the frame aligned
        call->frameAlignAdjustment +
        // Extra space we'll push to get the outbound arg area 16-aligned
        (stackArgAreaSizeAligned - stackArgAreaSizeUnaligned));

    call->stackArgAreaSize = stackArgAreaSizeAligned;

    size_t adjustment = call->stackArgAreaSize + call->frameAlignAdjustment;
    fr.allocArgArea(adjustment);
  }

  const ABIArg reservePointerArgument(FunctionCall* call) {
    return call->abi.next(MIRType::Pointer);
  }

  // TODO / OPTIMIZE (Bug 1316821): Note passArg is used only in one place.
  // (Or it was, until Luke wandered through, but that can be fixed again.)
  // I'm not saying we should manually inline it, but we could hoist the
  // dispatch into the caller and have type-specific implementations of
  // passArg: passArgI32(), etc.  Then those might be inlined, at least in PGO
  // builds.
  //
  // The bulk of the work here (60%) is in the next() call, though.
  //
  // Notably, since next() is so expensive, StackArgAreaSizeUnaligned()
  // becomes expensive too.
  //
  // Somehow there could be a trick here where the sequence of argument types
  // (read from the input stream) leads to a cached entry for
  // StackArgAreaSizeUnaligned() and for how to pass arguments...
  //
  // But at least we could reduce the cost of StackArgAreaSizeUnaligned() by
  // first reading the argument types into a (reusable) vector, then we have
  // the outgoing size at low cost, and then we can pass args based on the
  // info we read.

  void passArg(ValType type, const Stk& arg, FunctionCall* call) {
    switch (type.kind()) {
      case ValType::I32: {
        ABIArg argLoc = call->abi.next(MIRType::Int32);
        if (argLoc.kind() == ABIArg::Stack) {
          ScratchI32 scratch(*this);
          loadI32(arg, scratch);
          masm.store32(scratch, Address(masm.getStackPointer(),
                                        argLoc.offsetFromArgBase()));
        } else {
          loadI32(arg, RegI32(argLoc.gpr()));
        }
        break;
      }
      case ValType::I64: {
        ABIArg argLoc = call->abi.next(MIRType::Int64);
        if (argLoc.kind() == ABIArg::Stack) {
          ScratchI32 scratch(*this);
#ifdef JS_PUNBOX64
          loadI64(arg, fromI32(scratch));
          masm.storePtr(scratch, Address(masm.getStackPointer(),
                                         argLoc.offsetFromArgBase()));
#else
          loadI64Low(arg, scratch);
          masm.store32(scratch, LowWord(Address(masm.getStackPointer(),
                                                argLoc.offsetFromArgBase())));
          loadI64High(arg, scratch);
          masm.store32(scratch, HighWord(Address(masm.getStackPointer(),
                                                 argLoc.offsetFromArgBase())));
#endif
        } else {
          loadI64(arg, RegI64(argLoc.gpr64()));
        }
        break;
      }
      case ValType::F64: {
        ABIArg argLoc = call->abi.next(MIRType::Double);
        switch (argLoc.kind()) {
          case ABIArg::Stack: {
            ScratchF64 scratch(*this);
            loadF64(arg, scratch);
            masm.storeDouble(scratch, Address(masm.getStackPointer(),
                                              argLoc.offsetFromArgBase()));
            break;
          }
#if defined(JS_CODEGEN_REGISTER_PAIR)
          case ABIArg::GPR_PAIR: {
#  if defined(JS_CODEGEN_ARM)
            ScratchF64 scratch(*this);
            loadF64(arg, scratch);
            masm.ma_vxfer(scratch, argLoc.evenGpr(), argLoc.oddGpr());
            break;
#  elif defined(JS_CODEGEN_MIPS32)
            ScratchF64 scratch(*this);
            loadF64(arg, scratch);
            MOZ_ASSERT(MOZ_LITTLE_ENDIAN());
            masm.moveFromDoubleLo(scratch, argLoc.evenGpr());
            masm.moveFromDoubleHi(scratch, argLoc.oddGpr());
            break;
#  else
            MOZ_CRASH("BaseCompiler platform hook: passArg F64 pair");
#  endif
          }
#endif
          case ABIArg::FPU: {
            loadF64(arg, RegF64(argLoc.fpu()));
            break;
          }
          case ABIArg::GPR: {
            MOZ_CRASH("Unexpected parameter passing discipline");
          }
          case ABIArg::Uninitialized:
            MOZ_CRASH("Uninitialized ABIArg kind");
        }
        break;
      }
      case ValType::F32: {
        ABIArg argLoc = call->abi.next(MIRType::Float32);
        switch (argLoc.kind()) {
          case ABIArg::Stack: {
            ScratchF32 scratch(*this);
            loadF32(arg, scratch);
            masm.storeFloat32(scratch, Address(masm.getStackPointer(),
                                               argLoc.offsetFromArgBase()));
            break;
          }
          case ABIArg::GPR: {
            ScratchF32 scratch(*this);
            loadF32(arg, scratch);
            masm.moveFloat32ToGPR(scratch, argLoc.gpr());
            break;
          }
          case ABIArg::FPU: {
            loadF32(arg, RegF32(argLoc.fpu()));
            break;
          }
#if defined(JS_CODEGEN_REGISTER_PAIR)
          case ABIArg::GPR_PAIR: {
            MOZ_CRASH("Unexpected parameter passing discipline");
          }
#endif
          case ABIArg::Uninitialized:
            MOZ_CRASH("Uninitialized ABIArg kind");
        }
        break;
      }
      case ValType::Ref: {
        ABIArg argLoc = call->abi.next(MIRType::RefOrNull);
        if (argLoc.kind() == ABIArg::Stack) {
          ScratchPtr scratch(*this);
          loadRef(arg, scratch);
          masm.storePtr(scratch, Address(masm.getStackPointer(),
                                         argLoc.offsetFromArgBase()));
        } else {
          loadRef(arg, RegPtr(argLoc.gpr()));
        }
        break;
      }
    }
  }

  CodeOffset callDefinition(uint32_t funcIndex, const FunctionCall& call) {
    CallSiteDesc desc(call.lineOrBytecode, CallSiteDesc::Func);
    return masm.call(desc, funcIndex);
  }

  CodeOffset callSymbolic(SymbolicAddress callee, const FunctionCall& call) {
    CallSiteDesc desc(call.lineOrBytecode, CallSiteDesc::Symbolic);
    return masm.call(desc, callee);
  }

  // Precondition: sync()

  CodeOffset callIndirect(uint32_t funcTypeIndex, uint32_t tableIndex,
                          const Stk& indexVal, const FunctionCall& call) {
    const FuncTypeWithId& funcType = env_.types[funcTypeIndex].funcType();
    MOZ_ASSERT(funcType.id.kind() != FuncTypeIdDescKind::None);

    const TableDesc& table = env_.tables[tableIndex];

    loadI32(indexVal, RegI32(WasmTableCallIndexReg));

    CallSiteDesc desc(call.lineOrBytecode, CallSiteDesc::Dynamic);
    CalleeDesc callee = CalleeDesc::wasmTable(table, funcType.id);
    return masm.wasmCallIndirect(desc, callee, NeedsBoundsCheck(true));
  }

  // Precondition: sync()

  CodeOffset callImport(unsigned globalDataOffset, const FunctionCall& call) {
    CallSiteDesc desc(call.lineOrBytecode, CallSiteDesc::Dynamic);
    CalleeDesc callee = CalleeDesc::import(globalDataOffset);
    return masm.wasmCallImport(desc, callee);
  }

  CodeOffset builtinCall(SymbolicAddress builtin, const FunctionCall& call) {
    return callSymbolic(builtin, call);
  }

  CodeOffset builtinInstanceMethodCall(const SymbolicAddressSignature& builtin,
                                       const ABIArg& instanceArg,
                                       const FunctionCall& call) {
    // Builtin method calls assume the TLS register has been set.
    masm.loadWasmTlsRegFromFrame();

    CallSiteDesc desc(call.lineOrBytecode, CallSiteDesc::Symbolic);
    return masm.wasmCallBuiltinInstanceMethod(
        desc, instanceArg, builtin.identity, builtin.failureMode);
  }

  //////////////////////////////////////////////////////////////////////
  //
  // Sundry low-level code generators.

  // The compiler depends on moveImm32() clearing the high bits of a 64-bit
  // register on 64-bit systems except MIPS64 where high bits are sign extended
  // from lower bits.

  void moveImm32(int32_t v, RegI32 dest) { masm.move32(Imm32(v), dest); }

  void moveImm64(int64_t v, RegI64 dest) { masm.move64(Imm64(v), dest); }

  void moveImmRef(intptr_t v, RegPtr dest) { masm.movePtr(ImmWord(v), dest); }

  void moveImmF32(float f, RegF32 dest) { masm.loadConstantFloat32(f, dest); }

  void moveImmF64(double d, RegF64 dest) { masm.loadConstantDouble(d, dest); }

  MOZ_MUST_USE bool addInterruptCheck() {
    ScratchI32 tmp(*this);
    masm.loadWasmTlsRegFromFrame(tmp);
    masm.wasmInterruptCheck(tmp, bytecodeOffset());
    return createStackMap("addInterruptCheck");
  }

  void jumpTable(const LabelVector& labels, Label* theTable) {
    // Flush constant pools to ensure that the table is never interrupted by
    // constant pool entries.
    masm.flush();

#if defined(JS_CODEGEN_ARM) || defined(JS_CODEGEN_ARM64)
    // Prevent nop sequences to appear in the jump table.
    AutoForbidNops afn(&masm);
#endif
    masm.bind(theTable);

    for (uint32_t i = 0; i < labels.length(); i++) {
      CodeLabel cl;
      masm.writeCodePointer(&cl);
      cl.target()->bind(labels[i].offset());
      masm.addCodeLabel(cl);
    }
  }

  void tableSwitch(Label* theTable, RegI32 switchValue, Label* dispatchCode) {
    masm.bind(dispatchCode);

#if defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_X86)
    ScratchI32 scratch(*this);
    CodeLabel tableCl;

    masm.mov(&tableCl, scratch);

    tableCl.target()->bind(theTable->offset());
    masm.addCodeLabel(tableCl);

    masm.jmp(Operand(scratch, switchValue, ScalePointer));
#elif defined(JS_CODEGEN_ARM)
    // Flush constant pools: offset must reflect the distance from the MOV
    // to the start of the table; as the address of the MOV is given by the
    // label, nothing must come between the bind() and the ma_mov().
    AutoForbidPoolsAndNops afp(&masm,
                               /* number of instructions in scope = */ 5);

    ScratchI32 scratch(*this);

    // Compute the offset from the ma_mov instruction to the jump table.
    Label here;
    masm.bind(&here);
    uint32_t offset = here.offset() - theTable->offset();

    // Read PC+8
    masm.ma_mov(pc, scratch);

    // ARM scratch register is required by ma_sub.
    ScratchRegisterScope arm_scratch(*this);

    // Compute the absolute table base pointer into `scratch`, offset by 8
    // to account for the fact that ma_mov read PC+8.
    masm.ma_sub(Imm32(offset + 8), scratch, arm_scratch);

    // Jump indirect via table element.
    masm.ma_ldr(DTRAddr(scratch, DtrRegImmShift(switchValue, LSL, 2)), pc,
                Offset, Assembler::Always);
#elif defined(JS_CODEGEN_MIPS32) || defined(JS_CODEGEN_MIPS64)
    ScratchI32 scratch(*this);
    CodeLabel tableCl;

    masm.ma_li(scratch, &tableCl);

    tableCl.target()->bind(theTable->offset());
    masm.addCodeLabel(tableCl);

    masm.branchToComputedAddress(BaseIndex(scratch, switchValue, ScalePointer));
#elif defined(JS_CODEGEN_ARM64)
    AutoForbidPoolsAndNops afp(&masm,
                               /* number of instructions in scope = */ 4);

    ScratchI32 scratch(*this);

    ARMRegister s(scratch, 64);
    ARMRegister v(switchValue, 64);
    masm.Adr(s, theTable);
    masm.Add(s, s, Operand(v, vixl::LSL, 3));
    masm.Ldr(s, MemOperand(s, 0));
    masm.Br(s);
#else
    MOZ_CRASH("BaseCompiler platform hook: tableSwitch");
#endif
  }

  RegI32 captureReturnedI32() {
    RegI32 r = RegI32(ReturnReg);
    MOZ_ASSERT(isAvailableI32(r));
    needI32(r);
    return r;
  }

  RegI64 captureReturnedI64() {
    RegI64 r = RegI64(ReturnReg64);
    MOZ_ASSERT(isAvailableI64(r));
    needI64(r);
    return r;
  }

  RegF32 captureReturnedF32(const FunctionCall& call) {
    RegF32 r = RegF32(ReturnFloat32Reg);
    MOZ_ASSERT(isAvailableF32(r));
    needF32(r);
#if defined(JS_CODEGEN_ARM)
    if (call.usesSystemAbi && !call.hardFP) {
      masm.ma_vxfer(ReturnReg, r);
    }
#endif
    return r;
  }

  RegF64 captureReturnedF64(const FunctionCall& call) {
    RegF64 r = RegF64(ReturnDoubleReg);
    MOZ_ASSERT(isAvailableF64(r));
    needF64(r);
#if defined(JS_CODEGEN_ARM)
    if (call.usesSystemAbi && !call.hardFP) {
      masm.ma_vxfer(ReturnReg64.low, ReturnReg64.high, r);
    }
#endif
    return r;
  }

  RegPtr captureReturnedRef() {
    RegPtr r = RegPtr(ReturnReg);
    MOZ_ASSERT(isAvailableRef(r));
    needRef(r);
    return r;
  }

  void checkDivideByZeroI32(RegI32 rhs) {
    Label nonZero;
    masm.branchTest32(Assembler::NonZero, rhs, rhs, &nonZero);
    trap(Trap::IntegerDivideByZero);
    masm.bind(&nonZero);
  }

  void checkDivideByZeroI64(RegI64 r) {
    Label nonZero;
    ScratchI32 scratch(*this);
    masm.branchTest64(Assembler::NonZero, r, r, scratch, &nonZero);
    trap(Trap::IntegerDivideByZero);
    masm.bind(&nonZero);
  }

  void checkDivideSignedOverflowI32(RegI32 rhs, RegI32 srcDest, Label* done,
                                    bool zeroOnOverflow) {
    Label notMin;
    masm.branch32(Assembler::NotEqual, srcDest, Imm32(INT32_MIN), &notMin);
    if (zeroOnOverflow) {
      masm.branch32(Assembler::NotEqual, rhs, Imm32(-1), &notMin);
      moveImm32(0, srcDest);
      masm.jump(done);
    } else {
      masm.branch32(Assembler::NotEqual, rhs, Imm32(-1), &notMin);
      trap(Trap::IntegerOverflow);
    }
    masm.bind(&notMin);
  }

  void checkDivideSignedOverflowI64(RegI64 rhs, RegI64 srcDest, Label* done,
                                    bool zeroOnOverflow) {
    Label notmin;
    masm.branch64(Assembler::NotEqual, srcDest, Imm64(INT64_MIN), &notmin);
    masm.branch64(Assembler::NotEqual, rhs, Imm64(-1), &notmin);
    if (zeroOnOverflow) {
      masm.xor64(srcDest, srcDest);
      masm.jump(done);
    } else {
      trap(Trap::IntegerOverflow);
    }
    masm.bind(&notmin);
  }

#ifndef RABALDR_INT_DIV_I64_CALLOUT
  void quotientI64(RegI64 rhs, RegI64 srcDest, RegI64 reserved,
                   IsUnsigned isUnsigned, bool isConst, int64_t c) {
    Label done;

    if (!isConst || c == 0) {
      checkDivideByZeroI64(rhs);
    }

    if (!isUnsigned && (!isConst || c == -1)) {
      checkDivideSignedOverflowI64(rhs, srcDest, &done, ZeroOnOverflow(false));
    }

#  if defined(JS_CODEGEN_X64)
    // The caller must set up the following situation.
    MOZ_ASSERT(srcDest.reg == rax);
    MOZ_ASSERT(reserved == specific_.rdx);
    if (isUnsigned) {
      masm.xorq(rdx, rdx);
      masm.udivq(rhs.reg);
    } else {
      masm.cqo();
      masm.idivq(rhs.reg);
    }
#  elif defined(JS_CODEGEN_MIPS64)
    if (isUnsigned) {
      masm.as_ddivu(srcDest.reg, rhs.reg);
    } else {
      masm.as_ddiv(srcDest.reg, rhs.reg);
    }
    masm.as_mflo(srcDest.reg);
#  elif defined(JS_CODEGEN_ARM64)
    ARMRegister sd(srcDest.reg, 64);
    ARMRegister r(rhs.reg, 64);
    if (isUnsigned) {
      masm.Udiv(sd, sd, r);
    } else {
      masm.Sdiv(sd, sd, r);
    }
#  else
    MOZ_CRASH("BaseCompiler platform hook: quotientI64");
#  endif
    masm.bind(&done);
  }

  void remainderI64(RegI64 rhs, RegI64 srcDest, RegI64 reserved,
                    IsUnsigned isUnsigned, bool isConst, int64_t c) {
    Label done;

    if (!isConst || c == 0) {
      checkDivideByZeroI64(rhs);
    }

    if (!isUnsigned && (!isConst || c == -1)) {
      checkDivideSignedOverflowI64(rhs, srcDest, &done, ZeroOnOverflow(true));
    }

#  if defined(JS_CODEGEN_X64)
    // The caller must set up the following situation.
    MOZ_ASSERT(srcDest.reg == rax);
    MOZ_ASSERT(reserved == specific_.rdx);

    if (isUnsigned) {
      masm.xorq(rdx, rdx);
      masm.udivq(rhs.reg);
    } else {
      masm.cqo();
      masm.idivq(rhs.reg);
    }
    masm.movq(rdx, rax);
#  elif defined(JS_CODEGEN_MIPS64)
    if (isUnsigned) {
      masm.as_ddivu(srcDest.reg, rhs.reg);
    } else {
      masm.as_ddiv(srcDest.reg, rhs.reg);
    }
    masm.as_mfhi(srcDest.reg);
#  elif defined(JS_CODEGEN_ARM64)
    MOZ_ASSERT(reserved.isInvalid());
    ARMRegister sd(srcDest.reg, 64);
    ARMRegister r(rhs.reg, 64);
    ScratchI32 temp(*this);
    ARMRegister t(temp, 64);
    if (isUnsigned) {
      masm.Udiv(t, sd, r);
    } else {
      masm.Sdiv(t, sd, r);
    }
    masm.Mul(t, t, r);
    masm.Sub(sd, sd, t);
#  else
    MOZ_CRASH("BaseCompiler platform hook: remainderI64");
#  endif
    masm.bind(&done);
  }
#endif  // RABALDR_INT_DIV_I64_CALLOUT

  RegI32 needRotate64Temp() {
#if defined(JS_CODEGEN_X86)
    return needI32();
#elif defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_ARM) ||    \
    defined(JS_CODEGEN_ARM64) || defined(JS_CODEGEN_MIPS32) || \
    defined(JS_CODEGEN_MIPS64)
    return RegI32::Invalid();
#else
    MOZ_CRASH("BaseCompiler platform hook: needRotate64Temp");
#endif
  }

  void maskShiftCount32(RegI32 r) {
#if defined(JS_CODEGEN_ARM)
    masm.and32(Imm32(31), r);
#endif
  }

  RegI32 needPopcnt32Temp() {
#if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64)
    return AssemblerX86Shared::HasPOPCNT() ? RegI32::Invalid() : needI32();
#elif defined(JS_CODEGEN_ARM) || defined(JS_CODEGEN_ARM64) || \
    defined(JS_CODEGEN_MIPS32) || defined(JS_CODEGEN_MIPS64)
    return needI32();
#else
    MOZ_CRASH("BaseCompiler platform hook: needPopcnt32Temp");
#endif
  }

  RegI32 needPopcnt64Temp() {
#if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64)
    return AssemblerX86Shared::HasPOPCNT() ? RegI32::Invalid() : needI32();
#elif defined(JS_CODEGEN_ARM) || defined(JS_CODEGEN_ARM64) || \
    defined(JS_CODEGEN_MIPS32) || defined(JS_CODEGEN_MIPS64)
    return needI32();
#else
    MOZ_CRASH("BaseCompiler platform hook: needPopcnt64Temp");
#endif
  }

  class OutOfLineTruncateCheckF32OrF64ToI32 : public OutOfLineCode {
    AnyReg src;
    RegI32 dest;
    TruncFlags flags;
    BytecodeOffset off;

   public:
    OutOfLineTruncateCheckF32OrF64ToI32(AnyReg src, RegI32 dest,
                                        TruncFlags flags, BytecodeOffset off)
        : src(src), dest(dest), flags(flags), off(off) {}

    virtual void generate(MacroAssembler* masm) override {
      if (src.tag == AnyReg::F32) {
        masm->oolWasmTruncateCheckF32ToI32(src.f32(), dest, flags, off,
                                           rejoin());
      } else if (src.tag == AnyReg::F64) {
        masm->oolWasmTruncateCheckF64ToI32(src.f64(), dest, flags, off,
                                           rejoin());
      } else {
        MOZ_CRASH("unexpected type");
      }
    }
  };

  MOZ_MUST_USE bool truncateF32ToI32(RegF32 src, RegI32 dest,
                                     TruncFlags flags) {
    BytecodeOffset off = bytecodeOffset();
    OutOfLineCode* ool =
        addOutOfLineCode(new (alloc_) OutOfLineTruncateCheckF32OrF64ToI32(
            AnyReg(src), dest, flags, off));
    if (!ool) {
      return false;
    }
    bool isSaturating = flags & TRUNC_SATURATING;
    if (flags & TRUNC_UNSIGNED) {
      masm.wasmTruncateFloat32ToUInt32(src, dest, isSaturating, ool->entry());
    } else {
      masm.wasmTruncateFloat32ToInt32(src, dest, isSaturating, ool->entry());
    }
    masm.bind(ool->rejoin());
    return true;
  }

  MOZ_MUST_USE bool truncateF64ToI32(RegF64 src, RegI32 dest,
                                     TruncFlags flags) {
    BytecodeOffset off = bytecodeOffset();
    OutOfLineCode* ool =
        addOutOfLineCode(new (alloc_) OutOfLineTruncateCheckF32OrF64ToI32(
            AnyReg(src), dest, flags, off));
    if (!ool) {
      return false;
    }
    bool isSaturating = flags & TRUNC_SATURATING;
    if (flags & TRUNC_UNSIGNED) {
      masm.wasmTruncateDoubleToUInt32(src, dest, isSaturating, ool->entry());
    } else {
      masm.wasmTruncateDoubleToInt32(src, dest, isSaturating, ool->entry());
    }
    masm.bind(ool->rejoin());
    return true;
  }

  class OutOfLineTruncateCheckF32OrF64ToI64 : public OutOfLineCode {
    AnyReg src;
    RegI64 dest;
    TruncFlags flags;
    BytecodeOffset off;

   public:
    OutOfLineTruncateCheckF32OrF64ToI64(AnyReg src, RegI64 dest,
                                        TruncFlags flags, BytecodeOffset off)
        : src(src), dest(dest), flags(flags), off(off) {}

    virtual void generate(MacroAssembler* masm) override {
      if (src.tag == AnyReg::F32) {
        masm->oolWasmTruncateCheckF32ToI64(src.f32(), dest, flags, off,
                                           rejoin());
      } else if (src.tag == AnyReg::F64) {
        masm->oolWasmTruncateCheckF64ToI64(src.f64(), dest, flags, off,
                                           rejoin());
      } else {
        MOZ_CRASH("unexpected type");
      }
    }
  };

#ifndef RABALDR_FLOAT_TO_I64_CALLOUT
  MOZ_MUST_USE RegF64 needTempForFloatingToI64(TruncFlags flags) {
#  if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64)
    if (flags & TRUNC_UNSIGNED) {
      return needF64();
    }
#  endif
    return RegF64::Invalid();
  }

  MOZ_MUST_USE bool truncateF32ToI64(RegF32 src, RegI64 dest, TruncFlags flags,
                                     RegF64 temp) {
    OutOfLineCode* ool =
        addOutOfLineCode(new (alloc_) OutOfLineTruncateCheckF32OrF64ToI64(
            AnyReg(src), dest, flags, bytecodeOffset()));
    if (!ool) {
      return false;
    }
    bool isSaturating = flags & TRUNC_SATURATING;
    if (flags & TRUNC_UNSIGNED) {
      masm.wasmTruncateFloat32ToUInt64(src, dest, isSaturating, ool->entry(),
                                       ool->rejoin(), temp);
    } else {
      masm.wasmTruncateFloat32ToInt64(src, dest, isSaturating, ool->entry(),
                                      ool->rejoin(), temp);
    }
    return true;
  }

  MOZ_MUST_USE bool truncateF64ToI64(RegF64 src, RegI64 dest, TruncFlags flags,
                                     RegF64 temp) {
    OutOfLineCode* ool =
        addOutOfLineCode(new (alloc_) OutOfLineTruncateCheckF32OrF64ToI64(
            AnyReg(src), dest, flags, bytecodeOffset()));
    if (!ool) {
      return false;
    }
    bool isSaturating = flags & TRUNC_SATURATING;
    if (flags & TRUNC_UNSIGNED) {
      masm.wasmTruncateDoubleToUInt64(src, dest, isSaturating, ool->entry(),
                                      ool->rejoin(), temp);
    } else {
      masm.wasmTruncateDoubleToInt64(src, dest, isSaturating, ool->entry(),
                                     ool->rejoin(), temp);
    }
    return true;
  }
#endif  // RABALDR_FLOAT_TO_I64_CALLOUT

#ifndef RABALDR_I64_TO_FLOAT_CALLOUT
  RegI32 needConvertI64ToFloatTemp(ValType to, bool isUnsigned) {
    bool needs = false;
    if (to == ValType::F64) {
      needs = isUnsigned && masm.convertUInt64ToDoubleNeedsTemp();
    } else {
#  if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64)
      needs = true;
#  endif
    }
    return needs ? needI32() : RegI32::Invalid();
  }

  void convertI64ToF32(RegI64 src, bool isUnsigned, RegF32 dest, RegI32 temp) {
    if (isUnsigned) {
      masm.convertUInt64ToFloat32(src, dest, temp);
    } else {
      masm.convertInt64ToFloat32(src, dest);
    }
  }

  void convertI64ToF64(RegI64 src, bool isUnsigned, RegF64 dest, RegI32 temp) {
    if (isUnsigned) {
      masm.convertUInt64ToDouble(src, dest, temp);
    } else {
      masm.convertInt64ToDouble(src, dest);
    }
  }
#endif  // RABALDR_I64_TO_FLOAT_CALLOUT

  void cmp64Set(Assembler::Condition cond, RegI64 lhs, RegI64 rhs,
                RegI32 dest) {
#if defined(JS_PUNBOX64)
    masm.cmpPtrSet(cond, lhs.reg, rhs.reg, dest);
#elif defined(JS_CODEGEN_MIPS32)
    masm.cmp64Set(cond, lhs, rhs, dest);
#else
    // TODO / OPTIMIZE (Bug 1316822): This is pretty branchy, we should be
    // able to do better.
    Label done, condTrue;
    masm.branch64(cond, lhs, rhs, &condTrue);
    moveImm32(0, dest);
    masm.jump(&done);
    masm.bind(&condTrue);
    moveImm32(1, dest);
    masm.bind(&done);
#endif
  }

  void eqz64(RegI64 src, RegI32 dest) {
#ifdef JS_PUNBOX64
    masm.cmpPtrSet(Assembler::Equal, src.reg, ImmWord(0), dest);
#else
    masm.or32(src.high, src.low);
    masm.cmp32Set(Assembler::Equal, src.low, Imm32(0), dest);
#endif
  }

  MOZ_MUST_USE bool supportsRoundInstruction(RoundingMode mode) {
#if defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_X86)
    return Assembler::HasRoundInstruction(mode);
#else
    return false;
#endif
  }

  void roundF32(RoundingMode roundingMode, RegF32 f0) {
#if defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_X86)
    masm.vroundss(Assembler::ToX86RoundingMode(roundingMode), f0, f0, f0);
#else
    MOZ_CRASH("NYI");
#endif
  }

  void roundF64(RoundingMode roundingMode, RegF64 f0) {
#if defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_X86)
    masm.vroundsd(Assembler::ToX86RoundingMode(roundingMode), f0, f0, f0);
#else
    MOZ_CRASH("NYI");
#endif
  }

  //////////////////////////////////////////////////////////////////////
  //
  // Global variable access.

  Address addressOfGlobalVar(const GlobalDesc& global, RegI32 tmp) {
    uint32_t globalToTlsOffset =
        offsetof(TlsData, globalArea) + global.offset();
    masm.loadWasmTlsRegFromFrame(tmp);
    if (global.isIndirect()) {
      masm.loadPtr(Address(tmp, globalToTlsOffset), tmp);
      return Address(tmp, 0);
    }
    return Address(tmp, globalToTlsOffset);
  }

  //////////////////////////////////////////////////////////////////////
  //
  // Heap access.

  void bceCheckLocal(MemoryAccessDesc* access, AccessCheck* check,
                     uint32_t local) {
    if (local >= sizeof(BCESet) * 8) {
      return;
    }

    uint32_t offsetGuardLimit = GetOffsetGuardLimit(env_.hugeMemoryEnabled());

    if ((bceSafe_ & (BCESet(1) << local)) &&
        access->offset() < offsetGuardLimit) {
      check->omitBoundsCheck = true;
    }

    // The local becomes safe even if the offset is beyond the guard limit.
    bceSafe_ |= (BCESet(1) << local);
  }

  void bceLocalIsUpdated(uint32_t local) {
    if (local >= sizeof(BCESet) * 8) {
      return;
    }

    bceSafe_ &= ~(BCESet(1) << local);
  }

  void prepareMemoryAccess(MemoryAccessDesc* access, AccessCheck* check,
                           RegI32 tls, RegI32 ptr) {
    uint32_t offsetGuardLimit = GetOffsetGuardLimit(env_.hugeMemoryEnabled());

    // Fold offset if necessary for further computations.
    if (access->offset() >= offsetGuardLimit ||
        (access->isAtomic() && !check->omitAlignmentCheck &&
         !check->onlyPointerAlignment)) {
      Label ok;
      masm.branchAdd32(Assembler::CarryClear, Imm32(access->offset()), ptr,
                       &ok);
      masm.wasmTrap(Trap::OutOfBounds, bytecodeOffset());
      masm.bind(&ok);
      access->clearOffset();
      check->onlyPointerAlignment = true;
    }

    // Alignment check if required.

    if (access->isAtomic() && !check->omitAlignmentCheck) {
      MOZ_ASSERT(check->onlyPointerAlignment);
      // We only care about the low pointer bits here.
      Label ok;
      masm.branchTest32(Assembler::Zero, ptr, Imm32(access->byteSize() - 1),
                        &ok);
      masm.wasmTrap(Trap::UnalignedAccess, bytecodeOffset());
      masm.bind(&ok);
    }

    // Ensure no tls if we don't need it.

    if (env_.hugeMemoryEnabled()) {
      // We have HeapReg and no bounds checking and need load neither
      // memoryBase nor boundsCheckLimit from tls.
      MOZ_ASSERT_IF(check->omitBoundsCheck, tls.isInvalid());
    }
#ifdef JS_CODEGEN_ARM
    // We have HeapReg on ARM and don't need to load the memoryBase from tls.
    MOZ_ASSERT_IF(check->omitBoundsCheck, tls.isInvalid());
#endif

    // Bounds check if required.

    if (!env_.hugeMemoryEnabled() && !check->omitBoundsCheck) {
      Label ok;
      masm.wasmBoundsCheck(Assembler::Below, ptr,
                           Address(tls, offsetof(TlsData, boundsCheckLimit)),
                           &ok);
      masm.wasmTrap(Trap::OutOfBounds, bytecodeOffset());
      masm.bind(&ok);
    }
  }

#if defined(JS_CODEGEN_X64) || defined(JS_CODEGEN_ARM) ||      \
    defined(JS_CODEGEN_ARM64) || defined(JS_CODEGEN_MIPS32) || \
    defined(JS_CODEGEN_MIPS64)
  BaseIndex prepareAtomicMemoryAccess(MemoryAccessDesc* access,
                                      AccessCheck* check, RegI32 tls,
                                      RegI32 ptr) {
    MOZ_ASSERT(needTlsForAccess(*check) == tls.isValid());
    prepareMemoryAccess(access, check, tls, ptr);
    return BaseIndex(HeapReg, ptr, TimesOne, access->offset());
  }
#elif defined(JS_CODEGEN_X86)
  // Some consumers depend on the address not retaining tls, as tls may be the
  // scratch register.

  Address prepareAtomicMemoryAccess(MemoryAccessDesc* access,
                                    AccessCheck* check, RegI32 tls,
                                    RegI32 ptr) {
    MOZ_ASSERT(needTlsForAccess(*check) == tls.isValid());
    prepareMemoryAccess(access, check, tls, ptr);
    masm.addPtr(Address(tls, offsetof(TlsData, memoryBase)), ptr);
    return Address(ptr, access->offset());
  }
#else
  Address prepareAtomicMemoryAccess(MemoryAccessDesc* access,
                                    AccessCheck* check, RegI32 tls,
                                    RegI32 ptr) {
    MOZ_CRASH("BaseCompiler platform hook: prepareAtomicMemoryAccess");
  }
#endif

  void needLoadTemps(const MemoryAccessDesc& access, RegI32* temp1,
                     RegI32* temp2, RegI32* temp3) {
#if defined(JS_CODEGEN_ARM)
    if (IsUnaligned(access)) {
      switch (access.type()) {
        case Scalar::Float64:
          *temp3 = needI32();
          [[fallthrough]];
        case Scalar::Float32:
          *temp2 = needI32();
          [[fallthrough]];
        default:
          *temp1 = needI32();
          break;
      }
    }
#elif defined(JS_CODEGEN_MIPS32) || defined(JS_CODEGEN_MIPS64)
    *temp1 = needI32();
#endif
  }

  MOZ_MUST_USE bool needTlsForAccess(const AccessCheck& check) {
#if defined(JS_CODEGEN_X86)
    // x86 requires Tls for memory base
    return true;
#else
    return !env_.hugeMemoryEnabled() && !check.omitBoundsCheck;
#endif
  }

  // ptr and dest may be the same iff dest is I32.
  // This may destroy ptr even if ptr and dest are not the same.
  MOZ_MUST_USE bool load(MemoryAccessDesc* access, AccessCheck* check,
                         RegI32 tls, RegI32 ptr, AnyReg dest, RegI32 temp1,
                         RegI32 temp2, RegI32 temp3) {
    prepareMemoryAccess(access, check, tls, ptr);

#if defined(JS_CODEGEN_X64)
    Operand srcAddr(HeapReg, ptr, TimesOne, access->offset());

    if (dest.tag == AnyReg::I64) {
      masm.wasmLoadI64(*access, srcAddr, dest.i64());
    } else {
      masm.wasmLoad(*access, srcAddr, dest.any());
    }
#elif defined(JS_CODEGEN_X86)
    masm.addPtr(Address(tls, offsetof(TlsData, memoryBase)), ptr);
    Operand srcAddr(ptr, access->offset());

    if (dest.tag == AnyReg::I64) {
      MOZ_ASSERT(dest.i64() == specific_.abiReturnRegI64);
      masm.wasmLoadI64(*access, srcAddr, dest.i64());
    } else {
      // For 8 bit loads, this will generate movsbl or movzbl, so
      // there's no constraint on what the output register may be.
      masm.wasmLoad(*access, srcAddr, dest.any());
    }
#elif defined(JS_CODEGEN_ARM) || defined(JS_CODEGEN_MIPS32) || \
    defined(JS_CODEGEN_MIPS64)
    if (IsUnaligned(*access)) {
      switch (dest.tag) {
        case AnyReg::I64:
          masm.wasmUnalignedLoadI64(*access, HeapReg, ptr, ptr, dest.i64(),
                                    temp1);
          break;
        case AnyReg::F32:
          masm.wasmUnalignedLoadFP(*access, HeapReg, ptr, ptr, dest.f32(),
                                   temp1, temp2, RegI32::Invalid());
          break;
        case AnyReg::F64:
          masm.wasmUnalignedLoadFP(*access, HeapReg, ptr, ptr, dest.f64(),
                                   temp1, temp2, temp3);
          break;
        default:
          masm.wasmUnalignedLoad(*access, HeapReg, ptr, ptr, dest.i32(), temp1);
          break;
      }
    } else {
      if (dest.tag == AnyReg::I64) {
        masm.wasmLoadI64(*access, HeapReg, ptr, ptr, dest.i64());
      } else {
        masm.wasmLoad(*access, HeapReg, ptr, ptr, dest.any());
      }
    }
#elif defined(JS_CODEGEN_ARM64)
    if (dest.tag == AnyReg::I64) {
      masm.wasmLoadI64(*access, HeapReg, ptr, ptr, dest.i64());
    } else {
      masm.wasmLoad(*access, HeapReg, ptr, ptr, dest.any());
    }
#else
    MOZ_CRASH("BaseCompiler platform hook: load");
#endif

    return true;
  }

  RegI32 needStoreTemp(const MemoryAccessDesc& access, ValType srcType) {
#if defined(JS_CODEGEN_ARM)
    if (IsUnaligned(access) && srcType != ValType::I32) {
      return needI32();
    }
#elif defined(JS_CODEGEN_MIPS32) || defined(JS_CODEGEN_MIPS64)
    return needI32();
#endif
    return RegI32::Invalid();
  }

  // ptr and src must not be the same register.
  // This may destroy ptr and src.
  MOZ_MUST_USE bool store(MemoryAccessDesc* access, AccessCheck* check,
                          RegI32 tls, RegI32 ptr, AnyReg src, RegI32 temp) {
    prepareMemoryAccess(access, check, tls, ptr);

    // Emit the store
#if defined(JS_CODEGEN_X64)
    MOZ_ASSERT(temp.isInvalid());
    Operand dstAddr(HeapReg, ptr, TimesOne, access->offset());

    masm.wasmStore(*access, src.any(), dstAddr);
#elif defined(JS_CODEGEN_X86)
    MOZ_ASSERT(temp.isInvalid());
    masm.addPtr(Address(tls, offsetof(TlsData, memoryBase)), ptr);
    Operand dstAddr(ptr, access->offset());

    if (access->type() == Scalar::Int64) {
      masm.wasmStoreI64(*access, src.i64(), dstAddr);
    } else {
      AnyRegister value;
      ScratchI8 scratch(*this);
      if (src.tag == AnyReg::I64) {
        if (access->byteSize() == 1 && !ra.isSingleByteI32(src.i64().low)) {
          masm.mov(src.i64().low, scratch);
          value = AnyRegister(scratch);
        } else {
          value = AnyRegister(src.i64().low);
        }
      } else if (access->byteSize() == 1 && !ra.isSingleByteI32(src.i32())) {
        masm.mov(src.i32(), scratch);
        value = AnyRegister(scratch);
      } else {
        value = src.any();
      }

      masm.wasmStore(*access, value, dstAddr);
    }
#elif defined(JS_CODEGEN_ARM)
    if (IsUnaligned(*access)) {
      switch (src.tag) {
        case AnyReg::I64:
          masm.wasmUnalignedStoreI64(*access, src.i64(), HeapReg, ptr, ptr,
                                     temp);
          break;
        case AnyReg::F32:
          masm.wasmUnalignedStoreFP(*access, src.f32(), HeapReg, ptr, ptr,
                                    temp);
          break;
        case AnyReg::F64:
          masm.wasmUnalignedStoreFP(*access, src.f64(), HeapReg, ptr, ptr,
                                    temp);
          break;
        default:
          MOZ_ASSERT(temp.isInvalid());
          masm.wasmUnalignedStore(*access, src.i32(), HeapReg, ptr, ptr, temp);
          break;
      }
    } else {
      MOZ_ASSERT(temp.isInvalid());
      if (access->type() == Scalar::Int64) {
        masm.wasmStoreI64(*access, src.i64(), HeapReg, ptr, ptr);
      } else if (