js/src/wasm/WasmBCFrame.h
author Cristian Tuns <ctuns@mozilla.com>
Mon, 04 Jul 2022 17:41:45 -0400
changeset 622948 bff913e9d1b0db7ba7671ee7020be4c3cbcad9b7
parent 616653 c0c5e5b8909013fdf6cd4b5ee60e4a29b7f1e7c7
permissions -rw-r--r--
Merge autoland to mozilla-central. a=merge

/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
 * vim: set ts=8 sts=2 et sw=2 tw=80:
 *
 * Copyright 2016 Mozilla Foundation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

// This is an INTERNAL header for Wasm baseline compiler: CPU stack frame,
// stack maps, and associated logic.

#ifndef wasm_wasm_baseline_frame_h
#define wasm_wasm_baseline_frame_h

#include "wasm/WasmBaselineCompile.h"  // For BaseLocalIter
#include "wasm/WasmBCDefs.h"
#include "wasm/WasmBCRegDefs.h"
#include "wasm/WasmBCStk.h"
#include "wasm/WasmConstants.h"  // For MaxFrameSize

// [SMDOC] Wasm baseline compiler's stack frame.
//
// For background, see "Wasm's ABIs" in WasmFrame.h, the following should never
// be in conflict with that.
//
// The stack frame has four parts ("below" means at lower addresses):
//
//  - the Frame element;
//  - the Local area, including the DebugFrame element and possibly a spilled
//    pointer to stack results, if any; allocated below the header with various
//    forms of alignment;
//  - the Dynamic area, comprising the temporary storage the compiler uses for
//    register spilling, allocated below the Local area;
//  - the Arguments area, comprising memory allocated for outgoing calls,
//    allocated below the Dynamic area.
//
//                +==============================+
//                |    Incoming stack arg        |
//                |    ...                       |
// -------------  +==============================+
//                |    Frame (fixed size)        |
// -------------  +==============================+ <-------------------- FP
//         ^      |    DebugFrame (optional)     |    ^  ^             ^^
//   localSize    |    Register arg local        |    |  |             ||
//         |      |    ...                       |    |  |     framePushed
//         |      |    Register stack result ptr?|    |  |             ||
//         |      |    Non-arg local             |    |  |             ||
//         |      |    ...                       |    |  |             ||
//         |      |    (padding)                 |    |  |             ||
//         |      |    Instance pointer          |    |  |             ||
//         |      +------------------------------+    |  |             ||
//         v      |    (padding)                 |    |  v             ||
// -------------  +==============================+ currentStackHeight  ||
//         ^      |    Dynamic (variable size)   |    |                ||
//  dynamicSize   |    ...                       |    |                ||
//         v      |    ...                       |    v                ||
// -------------  |    (free space, sometimes)   | ---------           v|
//                +==============================+ <----- SP not-during calls
//                |    Arguments (sometimes)     |                      |
//                |    ...                       |                      v
//                +==============================+ <----- SP during calls
//
// The Frame is addressed off the stack pointer.  masm.framePushed() is always
// correct, and masm.getStackPointer() + masm.framePushed() always addresses the
// Frame, with the DebugFrame optionally below it.
//
// The Local area (including the DebugFrame and, if needed, the spilled value of
// the stack results area pointer) is laid out by BaseLocalIter and is allocated
// and deallocated by standard prologue and epilogue functions that manipulate
// the stack pointer, but it is accessed via BaseStackFrame.
//
// The Dynamic area is maintained by and accessed via BaseStackFrame.  On some
// systems (such as ARM64), the Dynamic memory may be allocated in chunks
// because the SP needs a specific alignment, and in this case there will
// normally be some free space directly above the SP.  The stack height does not
// include the free space, it reflects the logically used space only.
//
// The Dynamic area is where space for stack results is allocated when calling
// functions that return results on the stack.  If a function has stack results,
// a pointer to the low address of the stack result area is passed as an
// additional argument, according to the usual ABI.  See
// ABIResultIter::HasStackResults.
//
// The Arguments area is allocated and deallocated via BaseStackFrame (see
// comments later) but is accessed directly off the stack pointer.

namespace js {
namespace wasm {

using namespace js::jit;

// Abstraction of the height of the stack frame, to avoid type confusion.

class StackHeight {
  friend class BaseStackFrameAllocator;

  uint32_t height;

 public:
  explicit StackHeight(uint32_t h) : height(h) {}
  static StackHeight Invalid() { return StackHeight(UINT32_MAX); }
  bool isValid() const { return height != UINT32_MAX; }
  bool operator==(StackHeight rhs) const {
    MOZ_ASSERT(isValid() && rhs.isValid());
    return height == rhs.height;
  }
  bool operator!=(StackHeight rhs) const { return !(*this == rhs); }
};

// Abstraction for where multi-value results go on the machine stack.

class StackResultsLoc {
  uint32_t bytes_;
  size_t count_;
  Maybe<uint32_t> height_;

 public:
  StackResultsLoc() : bytes_(0), count_(0){};
  StackResultsLoc(uint32_t bytes, size_t count, uint32_t height)
      : bytes_(bytes), count_(count), height_(Some(height)) {
    MOZ_ASSERT(bytes != 0);
    MOZ_ASSERT(count != 0);
    MOZ_ASSERT(height != 0);
  }

  uint32_t bytes() const { return bytes_; }
  uint32_t count() const { return count_; }
  uint32_t height() const { return height_.value(); }

  bool hasStackResults() const { return bytes() != 0; }
  StackResults stackResults() const {
    return hasStackResults() ? StackResults::HasStackResults
                             : StackResults::NoStackResults;
  }
};

// Abstraction of the baseline compiler's stack frame (except for the Frame /
// DebugFrame parts).  See comments above for more.  Remember, "below" on the
// stack means at lower addresses.
//
// The abstraction is split into two parts: BaseStackFrameAllocator is
// responsible for allocating and deallocating space on the stack and for
// performing computations that are affected by how the allocation is performed;
// BaseStackFrame then provides a pleasant interface for stack frame management.

class BaseStackFrameAllocator {
  MacroAssembler& masm;

#ifdef RABALDR_CHUNKY_STACK
  // On platforms that require the stack pointer to be aligned on a boundary
  // greater than the typical stack item (eg, ARM64 requires 16-byte alignment
  // but items are 8 bytes), allocate stack memory in chunks, and use a
  // separate stack height variable to track the effective stack pointer
  // within the allocated area.  Effectively, there's a variable amount of
  // free space directly above the stack pointer.  See diagram above.

  // The following must be true in order for the stack height to be
  // predictable at control flow joins:
  //
  // - The Local area is always aligned according to WasmStackAlignment, ie,
  //   masm.framePushed() % WasmStackAlignment is zero after allocating
  //   locals.
  //
  // - ChunkSize is always a multiple of WasmStackAlignment.
  //
  // - Pushing and popping are always in units of ChunkSize (hence preserving
  //   alignment).
  //
  // - The free space on the stack (masm.framePushed() - currentStackHeight_)
  //   is a predictable (nonnegative) amount.

  // As an optimization, we pre-allocate some space on the stack, the size of
  // this allocation is InitialChunk and it must be a multiple of ChunkSize.
  // It is allocated as part of the function prologue and deallocated as part
  // of the epilogue, along with the locals.
  //
  // If ChunkSize is too large then we risk overflowing the stack on simple
  // recursions with few live values where stack overflow should not be a
  // risk; if it is too small we spend too much time adjusting the stack
  // pointer.
  //
  // Good values for ChunkSize are the subject of future empirical analysis;
  // eight words is just an educated guess.

  static constexpr uint32_t ChunkSize = 8 * sizeof(void*);
  static constexpr uint32_t InitialChunk = ChunkSize;

  // The current logical height of the frame is
  //   currentStackHeight_ = localSize_ + dynamicSize
  // where dynamicSize is not accounted for explicitly and localSize_ also
  // includes size for the DebugFrame.
  //
  // The allocated size of the frame, provided by masm.framePushed(), is usually
  // larger than currentStackHeight_, notably at the beginning of execution when
  // we've allocated InitialChunk extra space.

  uint32_t currentStackHeight_;
#endif

  // Size of the Local area in bytes (stable after BaseCompiler::init() has
  // called BaseStackFrame::setupLocals(), which in turn calls
  // BaseStackFrameAllocator::setLocalSize()), always rounded to the proper
  // stack alignment.  The Local area is then allocated in beginFunction(),
  // following the allocation of the Header.  See onFixedStackAllocated()
  // below.

  uint32_t localSize_;

 protected:
  ///////////////////////////////////////////////////////////////////////////
  //
  // Initialization

  explicit BaseStackFrameAllocator(MacroAssembler& masm)
      : masm(masm),
#ifdef RABALDR_CHUNKY_STACK
        currentStackHeight_(0),
#endif
        localSize_(UINT32_MAX) {
  }

 protected:
  //////////////////////////////////////////////////////////////////////
  //
  // The Local area - the static part of the frame.

  // Record the size of the Local area, once it is known.

  void setLocalSize(uint32_t localSize) {
    MOZ_ASSERT(localSize == AlignBytes(localSize, sizeof(void*)),
               "localSize_ should be aligned to at least a pointer");
    MOZ_ASSERT(localSize_ == UINT32_MAX);
    localSize_ = localSize;
  }

  // Record the current stack height, after it has become stable in
  // beginFunction().  See also BaseStackFrame::onFixedStackAllocated().

  void onFixedStackAllocated() {
    MOZ_ASSERT(localSize_ != UINT32_MAX);
#ifdef RABALDR_CHUNKY_STACK
    currentStackHeight_ = localSize_;
#endif
  }

 public:
  // The fixed amount of memory, in bytes, allocated on the stack below the
  // Header for purposes such as locals and other fixed values.  Includes all
  // necessary alignment, and on ARM64 also the initial chunk for the working
  // stack memory.

  uint32_t fixedAllocSize() const {
    MOZ_ASSERT(localSize_ != UINT32_MAX);
#ifdef RABALDR_CHUNKY_STACK
    return localSize_ + InitialChunk;
#else
    return localSize_;
#endif
  }

#ifdef RABALDR_CHUNKY_STACK
  // The allocated frame size is frequently larger than the logical stack
  // height; we round up to a chunk boundary, and special case the initial
  // chunk.
  uint32_t framePushedForHeight(uint32_t logicalHeight) {
    if (logicalHeight <= fixedAllocSize()) {
      return fixedAllocSize();
    }
    return fixedAllocSize() +
           AlignBytes(logicalHeight - fixedAllocSize(), ChunkSize);
  }
#endif

 protected:
  //////////////////////////////////////////////////////////////////////
  //
  // The Dynamic area - the dynamic part of the frame, for spilling and saving
  // intermediate values.

  // Offset off of sp_ for the slot at stack area location `offset`.

  int32_t stackOffset(int32_t offset) {
    MOZ_ASSERT(offset > 0);
    return masm.framePushed() - offset;
  }

  uint32_t computeHeightWithStackResults(StackHeight stackBase,
                                         uint32_t stackResultBytes) {
    MOZ_ASSERT(stackResultBytes);
    MOZ_ASSERT(currentStackHeight() >= stackBase.height);
    return stackBase.height + stackResultBytes;
  }

#ifdef RABALDR_CHUNKY_STACK
  void pushChunkyBytes(uint32_t bytes) {
    checkChunkyInvariants();
    uint32_t freeSpace = masm.framePushed() - currentStackHeight_;
    if (freeSpace < bytes) {
      uint32_t bytesToReserve = AlignBytes(bytes - freeSpace, ChunkSize);
      MOZ_ASSERT(bytesToReserve + freeSpace >= bytes);
      masm.reserveStack(bytesToReserve);
    }
    currentStackHeight_ += bytes;
    checkChunkyInvariants();
  }

  void popChunkyBytes(uint32_t bytes) {
    checkChunkyInvariants();
    currentStackHeight_ -= bytes;
    // Sometimes, popChunkyBytes() is used to pop a larger area, as when we drop
    // values consumed by a call, and we may need to drop several chunks.  But
    // never drop the initial chunk.  Crucially, the amount we drop is always an
    // integral number of chunks.
    uint32_t freeSpace = masm.framePushed() - currentStackHeight_;
    if (freeSpace >= ChunkSize) {
      uint32_t targetAllocSize = framePushedForHeight(currentStackHeight_);
      uint32_t amountToFree = masm.framePushed() - targetAllocSize;
      MOZ_ASSERT(amountToFree % ChunkSize == 0);
      if (amountToFree) {
        masm.freeStack(amountToFree);
      }
    }
    checkChunkyInvariants();
  }
#endif

  uint32_t currentStackHeight() const {
#ifdef RABALDR_CHUNKY_STACK
    return currentStackHeight_;
#else
    return masm.framePushed();
#endif
  }

 private:
#ifdef RABALDR_CHUNKY_STACK
  void checkChunkyInvariants() {
    MOZ_ASSERT(masm.framePushed() >= fixedAllocSize());
    MOZ_ASSERT(masm.framePushed() >= currentStackHeight_);
    MOZ_ASSERT(masm.framePushed() == fixedAllocSize() ||
               masm.framePushed() - currentStackHeight_ < ChunkSize);
    MOZ_ASSERT((masm.framePushed() - localSize_) % ChunkSize == 0);
  }
#endif

  // For a given stack height, return the appropriate size of the allocated
  // frame.

  uint32_t framePushedForHeight(StackHeight stackHeight) {
#ifdef RABALDR_CHUNKY_STACK
    // A more complicated adjustment is needed.
    return framePushedForHeight(stackHeight.height);
#else
    // The allocated frame size equals the stack height.
    return stackHeight.height;
#endif
  }

 public:
  // The current height of the stack area, not necessarily zero-based, in a
  // type-safe way.

  StackHeight stackHeight() const { return StackHeight(currentStackHeight()); }

  // Set the frame height to a previously recorded value.

  void setStackHeight(StackHeight amount) {
#ifdef RABALDR_CHUNKY_STACK
    currentStackHeight_ = amount.height;
    masm.setFramePushed(framePushedForHeight(amount));
    checkChunkyInvariants();
#else
    masm.setFramePushed(amount.height);
#endif
  }

  // The current height of the dynamic part of the stack area (ie, the backing
  // store for the evaluation stack), zero-based.

  uint32_t dynamicHeight() const { return currentStackHeight() - localSize_; }

  // Before branching to an outer control label, pop the execution stack to
  // the level expected by that region, but do not update masm.framePushed()
  // as that will happen as compilation leaves the block.
  //
  // Note these operate directly on the stack pointer register.

  void popStackBeforeBranch(StackHeight destStackHeight,
                            uint32_t stackResultBytes) {
    uint32_t framePushedHere = masm.framePushed();
    StackHeight heightThere =
        StackHeight(destStackHeight.height + stackResultBytes);
    uint32_t framePushedThere = framePushedForHeight(heightThere);
    if (framePushedHere > framePushedThere) {
      masm.addToStackPtr(Imm32(framePushedHere - framePushedThere));
    }
  }

  void popStackBeforeBranch(StackHeight destStackHeight, ResultType type) {
    popStackBeforeBranch(destStackHeight,
                         ABIResultIter::MeasureStackBytes(type));
  }

  // Given that there are |stackParamSize| bytes on the dynamic stack
  // corresponding to the stack results, return the stack height once these
  // parameters are popped.

  StackHeight stackResultsBase(uint32_t stackParamSize) {
    return StackHeight(currentStackHeight() - stackParamSize);
  }

  // For most of WebAssembly, adjacent instructions have fallthrough control
  // flow between them, which allows us to simply thread the current stack
  // height through the compiler.  There are two exceptions to this rule: when
  // leaving a block via dead code, and when entering the "else" arm of an "if".
  // In these cases, the stack height is the block entry height, plus any stack
  // values (results in the block exit case, parameters in the else entry case).

  void resetStackHeight(StackHeight destStackHeight, ResultType type) {
    uint32_t height = destStackHeight.height;
    height += ABIResultIter::MeasureStackBytes(type);
    setStackHeight(StackHeight(height));
  }

  // Return offset of stack result.

  uint32_t locateStackResult(const ABIResult& result, StackHeight stackBase,
                             uint32_t stackResultBytes) {
    MOZ_ASSERT(result.onStack());
    MOZ_ASSERT(result.stackOffset() + result.size() <= stackResultBytes);
    uint32_t end = computeHeightWithStackResults(stackBase, stackResultBytes);
    return end - result.stackOffset();
  }

 public:
  //////////////////////////////////////////////////////////////////////
  //
  // The Argument area - for outgoing calls.
  //
  // We abstract these operations as an optimization: we can merge the freeing
  // of the argument area and dropping values off the stack after a call.  But
  // they always amount to manipulating the real stack pointer by some amount.
  //
  // Note that we do not update currentStackHeight_ for this; the frame does
  // not know about outgoing arguments.  But we do update framePushed(), so we
  // can still index into the frame below the outgoing arguments area.

  // This is always equivalent to a masm.reserveStack() call.

  void allocArgArea(size_t argSize) {
    if (argSize) {
      masm.reserveStack(argSize);
    }
  }

  // This frees the argument area allocated by allocArgArea(), and `argSize`
  // must be equal to the `argSize` argument to allocArgArea().  In addition
  // we drop some values from the frame, corresponding to the values that were
  // consumed by the call.

  void freeArgAreaAndPopBytes(size_t argSize, size_t dropSize) {
#ifdef RABALDR_CHUNKY_STACK
    // Freeing the outgoing arguments and freeing the consumed values have
    // different semantics here, which is why the operation is split.
    if (argSize) {
      masm.freeStack(argSize);
    }
    popChunkyBytes(dropSize);
#else
    if (argSize + dropSize) {
      masm.freeStack(argSize + dropSize);
    }
#endif
  }
};

class BaseStackFrame final : public BaseStackFrameAllocator {
  MacroAssembler& masm;

  // The largest observed value of masm.framePushed(), ie, the size of the
  // stack frame.  Read this for its true value only when code generation is
  // finished.
  uint32_t maxFramePushed_;

  // Patch point where we check for stack overflow.
  CodeOffset stackAddOffset_;

  // Low byte offset of pointer to stack results, if any.
  Maybe<int32_t> stackResultsPtrOffset_;

  // The offset of instance pointer.
  uint32_t instancePointerOffset_;

  // Low byte offset of local area for true locals (not parameters).
  uint32_t varLow_;

  // High byte offset + 1 of local area for true locals.
  uint32_t varHigh_;

  // The stack pointer, cached for brevity.
  RegisterOrSP sp_;

 public:
  explicit BaseStackFrame(MacroAssembler& masm)
      : BaseStackFrameAllocator(masm),
        masm(masm),
        maxFramePushed_(0),
        stackAddOffset_(0),
        instancePointerOffset_(UINT32_MAX),
        varLow_(UINT32_MAX),
        varHigh_(UINT32_MAX),
        sp_(masm.getStackPointer()) {}

  ///////////////////////////////////////////////////////////////////////////
  //
  // Stack management and overflow checking

  // This must be called once beginFunction has allocated space for the Header
  // (the Frame and DebugFrame) and the Local area, and will record the current
  // frame size for internal use by the stack abstractions.

  void onFixedStackAllocated() {
    maxFramePushed_ = masm.framePushed();
    BaseStackFrameAllocator::onFixedStackAllocated();
  }

  // We won't know until after we've generated code how big the frame will be
  // (we may need arbitrary spill slots and outgoing param slots) so emit a
  // patchable add that is patched in endFunction().
  //
  // Note the platform scratch register may be used by branchPtr(), so
  // generally tmp must be something else.

  void checkStack(Register tmp, BytecodeOffset trapOffset) {
    stackAddOffset_ = masm.sub32FromStackPtrWithPatch(tmp);
    Label ok;
    masm.branchPtr(Assembler::Below,
                   Address(InstanceReg, wasm::Instance::offsetOfStackLimit()),
                   tmp, &ok);
    masm.wasmTrap(Trap::StackOverflow, trapOffset);
    masm.bind(&ok);
  }

  void patchCheckStack() {
    masm.patchSub32FromStackPtr(stackAddOffset_,
                                Imm32(int32_t(maxFramePushed_)));
  }

  // Very large frames are implausible, probably an attack.

  bool checkStackHeight() { return maxFramePushed_ <= MaxFrameSize; }

  ///////////////////////////////////////////////////////////////////////////
  //
  // Local area

  struct Local {
    // Type of the value.
    const MIRType type;

    // Byte offset from Frame "into" the locals, ie positive for true locals
    // and negative for incoming args that read directly from the arg area.
    // It assumes the stack is growing down and that locals are on the stack
    // at lower addresses than Frame, and is the offset from Frame of the
    // lowest-addressed byte of the local.
    const int32_t offs;

    Local(MIRType type, int32_t offs) : type(type), offs(offs) {}

    bool isStackArgument() const { return offs < 0; }
  };

  // Profiling shows that the number of parameters and locals frequently
  // touches or exceeds 8.  So 16 seems like a reasonable starting point.
  using LocalVector = Vector<Local, 16, SystemAllocPolicy>;

  // Initialize `localInfo` based on the types of `locals` and `args`.
  [[nodiscard]] bool setupLocals(const ValTypeVector& locals,
                                 const ArgTypeVector& args, bool debugEnabled,
                                 LocalVector* localInfo) {
    if (!localInfo->reserve(locals.length())) {
      return false;
    }

    DebugOnly<uint32_t> index = 0;
    BaseLocalIter i(locals, args, debugEnabled);
    for (; !i.done() && i.index() < args.lengthWithoutStackResults(); i++) {
      MOZ_ASSERT(i.isArg());
      MOZ_ASSERT(i.index() == index);
      localInfo->infallibleEmplaceBack(i.mirType(), i.frameOffset());
      index++;
    }

    varLow_ = i.frameSize();
    for (; !i.done(); i++) {
      MOZ_ASSERT(!i.isArg());
      MOZ_ASSERT(i.index() == index);
      localInfo->infallibleEmplaceBack(i.mirType(), i.frameOffset());
      index++;
    }
    varHigh_ = i.frameSize();

    // Reserve an additional stack slot for the instance pointer.
    const uint32_t pointerAlignedVarHigh = AlignBytes(varHigh_, sizeof(void*));
    const uint32_t localSize = pointerAlignedVarHigh + sizeof(void*);
    instancePointerOffset_ = localSize;

    setLocalSize(AlignBytes(localSize, WasmStackAlignment));

    if (args.hasSyntheticStackResultPointerArg()) {
      stackResultsPtrOffset_ = Some(i.stackResultPointerOffset());
    }

    return true;
  }

  void zeroLocals(BaseRegAlloc* ra);

  Address addressOfLocal(const Local& local, uint32_t additionalOffset = 0) {
    if (local.isStackArgument()) {
      return Address(FramePointer,
                     stackArgumentOffsetFromFp(local) + additionalOffset);
    }
    return Address(sp_, localOffsetFromSp(local) + additionalOffset);
  }

  void loadLocalI32(const Local& src, RegI32 dest) {
    masm.load32(addressOfLocal(src), dest);
  }

#ifndef JS_PUNBOX64
  void loadLocalI64Low(const Local& src, RegI32 dest) {
    masm.load32(addressOfLocal(src, INT64LOW_OFFSET), dest);
  }

  void loadLocalI64High(const Local& src, RegI32 dest) {
    masm.load32(addressOfLocal(src, INT64HIGH_OFFSET), dest);
  }
#endif

  void loadLocalI64(const Local& src, RegI64 dest) {
    masm.load64(addressOfLocal(src), dest);
  }

  void loadLocalRef(const Local& src, RegRef dest) {
    masm.loadPtr(addressOfLocal(src), dest);
  }

  void loadLocalF64(const Local& src, RegF64 dest) {
    masm.loadDouble(addressOfLocal(src), dest);
  }

  void loadLocalF32(const Local& src, RegF32 dest) {
    masm.loadFloat32(addressOfLocal(src), dest);
  }

#ifdef ENABLE_WASM_SIMD
  void loadLocalV128(const Local& src, RegV128 dest) {
    masm.loadUnalignedSimd128(addressOfLocal(src), dest);
  }
#endif

  void storeLocalI32(RegI32 src, const Local& dest) {
    masm.store32(src, addressOfLocal(dest));
  }

  void storeLocalI64(RegI64 src, const Local& dest) {
    masm.store64(src, addressOfLocal(dest));
  }

  void storeLocalRef(RegRef src, const Local& dest) {
    masm.storePtr(src, addressOfLocal(dest));
  }

  void storeLocalF64(RegF64 src, const Local& dest) {
    masm.storeDouble(src, addressOfLocal(dest));
  }

  void storeLocalF32(RegF32 src, const Local& dest) {
    masm.storeFloat32(src, addressOfLocal(dest));
  }

#ifdef ENABLE_WASM_SIMD
  void storeLocalV128(RegV128 src, const Local& dest) {
    masm.storeUnalignedSimd128(src, addressOfLocal(dest));
  }
#endif

  // Offset off of sp_ for `local`.
  int32_t localOffsetFromSp(const Local& local) {
    MOZ_ASSERT(!local.isStackArgument());
    return localOffset(local.offs);
  }

  // Offset off of frame pointer for `stack argument`.
  int32_t stackArgumentOffsetFromFp(const Local& local) {
    MOZ_ASSERT(local.isStackArgument());
    return -local.offs;
  }

  // The incoming stack result area pointer is for stack results of the function
  // being compiled.
  void loadIncomingStackResultAreaPtr(RegPtr reg) {
    const int32_t offset = stackResultsPtrOffset_.value();
    Address src = offset < 0 ? Address(FramePointer, -offset)
                             : Address(sp_, stackOffset(offset));
    masm.loadPtr(src, reg);
  }

  void storeIncomingStackResultAreaPtr(RegPtr reg) {
    // If we get here, that means the pointer to the stack results area was
    // passed in as a register, and therefore it will be spilled below the
    // frame, so the offset is a positive height.
    MOZ_ASSERT(stackResultsPtrOffset_.value() > 0);
    masm.storePtr(reg,
                  Address(sp_, stackOffset(stackResultsPtrOffset_.value())));
  }

  void loadInstancePtr(Register dst) {
    masm.loadPtr(Address(sp_, stackOffset(instancePointerOffset_)), dst);
  }

  void storeInstancePtr(Register instance) {
    masm.storePtr(instance, Address(sp_, stackOffset(instancePointerOffset_)));
  }

  int32_t getInstancePtrOffset() { return stackOffset(instancePointerOffset_); }

  // An outgoing stack result area pointer is for stack results of callees of
  // the function being compiled.
  void computeOutgoingStackResultAreaPtr(const StackResultsLoc& results,
                                         RegPtr dest) {
    MOZ_ASSERT(results.height() <= masm.framePushed());
    uint32_t offsetFromSP = masm.framePushed() - results.height();
    masm.moveStackPtrTo(dest);
    if (offsetFromSP) {
      masm.addPtr(Imm32(offsetFromSP), dest);
    }
  }

 private:
  // Offset off of sp_ for a local with offset `offset` from Frame.
  int32_t localOffset(int32_t offset) { return masm.framePushed() - offset; }

 public:
  ///////////////////////////////////////////////////////////////////////////
  //
  // Dynamic area

  static constexpr size_t StackSizeOfPtr = ABIResult::StackSizeOfPtr;
  static constexpr size_t StackSizeOfInt64 = ABIResult::StackSizeOfInt64;
  static constexpr size_t StackSizeOfFloat = ABIResult::StackSizeOfFloat;
  static constexpr size_t StackSizeOfDouble = ABIResult::StackSizeOfDouble;
#ifdef ENABLE_WASM_SIMD
  static constexpr size_t StackSizeOfV128 = ABIResult::StackSizeOfV128;
#endif

  // Pushes the register `r` to the stack. This pushes the full 64-bit width on
  // 64-bit systems, and 32-bits otherwise.
  uint32_t pushGPR(Register r) {
    DebugOnly<uint32_t> stackBefore = currentStackHeight();
#ifdef RABALDR_CHUNKY_STACK
    pushChunkyBytes(StackSizeOfPtr);
    masm.storePtr(r, Address(sp_, stackOffset(currentStackHeight())));
#else
    masm.Push(r);
#endif
    maxFramePushed_ = std::max(maxFramePushed_, masm.framePushed());
    MOZ_ASSERT(stackBefore + StackSizeOfPtr == currentStackHeight());
    return currentStackHeight();
  }

  uint32_t pushFloat32(FloatRegister r) {
    DebugOnly<uint32_t> stackBefore = currentStackHeight();
#ifdef RABALDR_CHUNKY_STACK
    pushChunkyBytes(StackSizeOfFloat);
    masm.storeFloat32(r, Address(sp_, stackOffset(currentStackHeight())));
#else
    masm.Push(r);
#endif
    maxFramePushed_ = std::max(maxFramePushed_, masm.framePushed());
    MOZ_ASSERT(stackBefore + StackSizeOfFloat == currentStackHeight());
    return currentStackHeight();
  }

#ifdef ENABLE_WASM_SIMD
  uint32_t pushV128(RegV128 r) {
    DebugOnly<uint32_t> stackBefore = currentStackHeight();
#  ifdef RABALDR_CHUNKY_STACK
    pushChunkyBytes(StackSizeOfV128);
#  else
    masm.adjustStack(-(int)StackSizeOfV128);
#  endif
    masm.storeUnalignedSimd128(r,
                               Address(sp_, stackOffset(currentStackHeight())));
    maxFramePushed_ = std::max(maxFramePushed_, masm.framePushed());
    MOZ_ASSERT(stackBefore + StackSizeOfV128 == currentStackHeight());
    return currentStackHeight();
  }
#endif

  uint32_t pushDouble(FloatRegister r) {
    DebugOnly<uint32_t> stackBefore = currentStackHeight();
#ifdef RABALDR_CHUNKY_STACK
    pushChunkyBytes(StackSizeOfDouble);
    masm.storeDouble(r, Address(sp_, stackOffset(currentStackHeight())));
#else
    masm.Push(r);
#endif
    maxFramePushed_ = std::max(maxFramePushed_, masm.framePushed());
    MOZ_ASSERT(stackBefore + StackSizeOfDouble == currentStackHeight());
    return currentStackHeight();
  }

  // Pops the stack into the register `r`. This pops the full 64-bit width on
  // 64-bit systems, and 32-bits otherwise.
  void popGPR(Register r) {
    DebugOnly<uint32_t> stackBefore = currentStackHeight();
#ifdef RABALDR_CHUNKY_STACK
    masm.loadPtr(Address(sp_, stackOffset(currentStackHeight())), r);
    popChunkyBytes(StackSizeOfPtr);
#else
    masm.Pop(r);
#endif
    MOZ_ASSERT(stackBefore - StackSizeOfPtr == currentStackHeight());
  }

  void popFloat32(FloatRegister r) {
    DebugOnly<uint32_t> stackBefore = currentStackHeight();
#ifdef RABALDR_CHUNKY_STACK
    masm.loadFloat32(Address(sp_, stackOffset(currentStackHeight())), r);
    popChunkyBytes(StackSizeOfFloat);
#else
    masm.Pop(r);
#endif
    MOZ_ASSERT(stackBefore - StackSizeOfFloat == currentStackHeight());
  }

  void popDouble(FloatRegister r) {
    DebugOnly<uint32_t> stackBefore = currentStackHeight();
#ifdef RABALDR_CHUNKY_STACK
    masm.loadDouble(Address(sp_, stackOffset(currentStackHeight())), r);
    popChunkyBytes(StackSizeOfDouble);
#else
    masm.Pop(r);
#endif
    MOZ_ASSERT(stackBefore - StackSizeOfDouble == currentStackHeight());
  }

#ifdef ENABLE_WASM_SIMD
  void popV128(RegV128 r) {
    DebugOnly<uint32_t> stackBefore = currentStackHeight();
    masm.loadUnalignedSimd128(Address(sp_, stackOffset(currentStackHeight())),
                              r);
#  ifdef RABALDR_CHUNKY_STACK
    popChunkyBytes(StackSizeOfV128);
#  else
    masm.adjustStack((int)StackSizeOfV128);
#  endif
    MOZ_ASSERT(stackBefore - StackSizeOfV128 == currentStackHeight());
  }
#endif

  void popBytes(size_t bytes) {
    if (bytes > 0) {
#ifdef RABALDR_CHUNKY_STACK
      popChunkyBytes(bytes);
#else
      masm.freeStack(bytes);
#endif
    }
  }

  void loadStackI32(int32_t offset, RegI32 dest) {
    masm.load32(Address(sp_, stackOffset(offset)), dest);
  }

  void loadStackI64(int32_t offset, RegI64 dest) {
    masm.load64(Address(sp_, stackOffset(offset)), dest);
  }

#ifndef JS_PUNBOX64
  void loadStackI64Low(int32_t offset, RegI32 dest) {
    masm.load32(Address(sp_, stackOffset(offset - INT64LOW_OFFSET)), dest);
  }

  void loadStackI64High(int32_t offset, RegI32 dest) {
    masm.load32(Address(sp_, stackOffset(offset - INT64HIGH_OFFSET)), dest);
  }
#endif

  void loadStackRef(int32_t offset, RegRef dest) {
    masm.loadPtr(Address(sp_, stackOffset(offset)), dest);
  }

  void loadStackF64(int32_t offset, RegF64 dest) {
    masm.loadDouble(Address(sp_, stackOffset(offset)), dest);
  }

  void loadStackF32(int32_t offset, RegF32 dest) {
    masm.loadFloat32(Address(sp_, stackOffset(offset)), dest);
  }

#ifdef ENABLE_WASM_SIMD
  void loadStackV128(int32_t offset, RegV128 dest) {
    masm.loadUnalignedSimd128(Address(sp_, stackOffset(offset)), dest);
  }
#endif

  uint32_t prepareStackResultArea(StackHeight stackBase,
                                  uint32_t stackResultBytes) {
    uint32_t end = computeHeightWithStackResults(stackBase, stackResultBytes);
    if (currentStackHeight() < end) {
      uint32_t bytes = end - currentStackHeight();
#ifdef RABALDR_CHUNKY_STACK
      pushChunkyBytes(bytes);
#else
      masm.reserveStack(bytes);
#endif
      maxFramePushed_ = std::max(maxFramePushed_, masm.framePushed());
    }
    return end;
  }

  void finishStackResultArea(StackHeight stackBase, uint32_t stackResultBytes) {
    uint32_t end = computeHeightWithStackResults(stackBase, stackResultBytes);
    MOZ_ASSERT(currentStackHeight() >= end);
    popBytes(currentStackHeight() - end);
  }

  // |srcHeight| and |destHeight| are stack heights *including* |bytes|.
  void shuffleStackResultsTowardFP(uint32_t srcHeight, uint32_t destHeight,
                                   uint32_t bytes, Register temp) {
    MOZ_ASSERT(destHeight < srcHeight);
    MOZ_ASSERT(bytes % sizeof(uint32_t) == 0);
    uint32_t destOffset = stackOffset(destHeight) + bytes;
    uint32_t srcOffset = stackOffset(srcHeight) + bytes;
    while (bytes >= sizeof(intptr_t)) {
      destOffset -= sizeof(intptr_t);
      srcOffset -= sizeof(intptr_t);
      bytes -= sizeof(intptr_t);
      masm.loadPtr(Address(sp_, srcOffset), temp);
      masm.storePtr(temp, Address(sp_, destOffset));
    }
    if (bytes) {
      MOZ_ASSERT(bytes == sizeof(uint32_t));
      destOffset -= sizeof(uint32_t);
      srcOffset -= sizeof(uint32_t);
      masm.load32(Address(sp_, srcOffset), temp);
      masm.store32(temp, Address(sp_, destOffset));
    }
  }

  // Unlike the overload that operates on raw heights, |srcHeight| and
  // |destHeight| are stack heights *not including* |bytes|.
  void shuffleStackResultsTowardFP(StackHeight srcHeight,
                                   StackHeight destHeight, uint32_t bytes,
                                   Register temp) {
    MOZ_ASSERT(srcHeight.isValid());
    MOZ_ASSERT(destHeight.isValid());
    uint32_t src = computeHeightWithStackResults(srcHeight, bytes);
    uint32_t dest = computeHeightWithStackResults(destHeight, bytes);
    MOZ_ASSERT(src <= currentStackHeight());
    MOZ_ASSERT(dest <= currentStackHeight());
    shuffleStackResultsTowardFP(src, dest, bytes, temp);
  }

  // |srcHeight| and |destHeight| are stack heights *including* |bytes|.
  void shuffleStackResultsTowardSP(uint32_t srcHeight, uint32_t destHeight,
                                   uint32_t bytes, Register temp) {
    MOZ_ASSERT(destHeight > srcHeight);
    MOZ_ASSERT(bytes % sizeof(uint32_t) == 0);
    uint32_t destOffset = stackOffset(destHeight);
    uint32_t srcOffset = stackOffset(srcHeight);
    while (bytes >= sizeof(intptr_t)) {
      masm.loadPtr(Address(sp_, srcOffset), temp);
      masm.storePtr(temp, Address(sp_, destOffset));
      destOffset += sizeof(intptr_t);
      srcOffset += sizeof(intptr_t);
      bytes -= sizeof(intptr_t);
    }
    if (bytes) {
      MOZ_ASSERT(bytes == sizeof(uint32_t));
      masm.load32(Address(sp_, srcOffset), temp);
      masm.store32(temp, Address(sp_, destOffset));
    }
  }

  // Copy results from the top of the current stack frame to an area of memory,
  // and pop the stack accordingly.  `dest` is the address of the low byte of
  // that memory.
  void popStackResultsToMemory(Register dest, uint32_t bytes, Register temp) {
    MOZ_ASSERT(bytes <= currentStackHeight());
    MOZ_ASSERT(bytes % sizeof(uint32_t) == 0);
    uint32_t bytesToPop = bytes;
    uint32_t srcOffset = stackOffset(currentStackHeight());
    uint32_t destOffset = 0;
    while (bytes >= sizeof(intptr_t)) {
      masm.loadPtr(Address(sp_, srcOffset), temp);
      masm.storePtr(temp, Address(dest, destOffset));
      destOffset += sizeof(intptr_t);
      srcOffset += sizeof(intptr_t);
      bytes -= sizeof(intptr_t);
    }
    if (bytes) {
      MOZ_ASSERT(bytes == sizeof(uint32_t));
      masm.load32(Address(sp_, srcOffset), temp);
      masm.store32(temp, Address(dest, destOffset));
    }
    popBytes(bytesToPop);
  }

  void allocArgArea(size_t argSize) {
    if (argSize) {
      BaseStackFrameAllocator::allocArgArea(argSize);
      maxFramePushed_ = std::max(maxFramePushed_, masm.framePushed());
    }
  }

 private:
  void store32BitsToStack(int32_t imm, uint32_t destHeight, Register temp) {
    masm.move32(Imm32(imm), temp);
    masm.store32(temp, Address(sp_, stackOffset(destHeight)));
  }

  void store64BitsToStack(int64_t imm, uint32_t destHeight, Register temp) {
#ifdef JS_PUNBOX64
    masm.move64(Imm64(imm), Register64(temp));
    masm.store64(Register64(temp), Address(sp_, stackOffset(destHeight)));
#else
    union {
      int64_t i64;
      int32_t i32[2];
    } bits = {.i64 = imm};
    static_assert(sizeof(bits) == 8);
    store32BitsToStack(bits.i32[0], destHeight, temp);
    store32BitsToStack(bits.i32[1], destHeight - sizeof(int32_t), temp);
#endif
  }

 public:
  void storeImmediatePtrToStack(intptr_t imm, uint32_t destHeight,
                                Register temp) {
#ifdef JS_PUNBOX64
    static_assert(StackSizeOfPtr == 8);
    store64BitsToStack(imm, destHeight, temp);
#else
    static_assert(StackSizeOfPtr == 4);
    store32BitsToStack(int32_t(imm), destHeight, temp);
#endif
  }

  void storeImmediateI64ToStack(int64_t imm, uint32_t destHeight,
                                Register temp) {
    store64BitsToStack(imm, destHeight, temp);
  }

  void storeImmediateF32ToStack(float imm, uint32_t destHeight, Register temp) {
    union {
      int32_t i32;
      float f32;
    } bits = {.f32 = imm};
    static_assert(sizeof(bits) == 4);
    // Do not store 4 bytes if StackSizeOfFloat == 8.  It's probably OK to do
    // so, but it costs little to store something predictable.
    if (StackSizeOfFloat == 4) {
      store32BitsToStack(bits.i32, destHeight, temp);
    } else {
      store64BitsToStack(uint32_t(bits.i32), destHeight, temp);
    }
  }

  void storeImmediateF64ToStack(double imm, uint32_t destHeight,
                                Register temp) {
    union {
      int64_t i64;
      double f64;
    } bits = {.f64 = imm};
    static_assert(sizeof(bits) == 8);
    store64BitsToStack(bits.i64, destHeight, temp);
  }

#ifdef ENABLE_WASM_SIMD
  void storeImmediateV128ToStack(V128 imm, uint32_t destHeight, Register temp) {
    union {
      int32_t i32[4];
      uint8_t bytes[16];
    } bits{};
    static_assert(sizeof(bits) == 16);
    memcpy(bits.bytes, imm.bytes, 16);
    for (unsigned i = 0; i < 4; i++) {
      store32BitsToStack(bits.i32[i], destHeight - i * sizeof(int32_t), temp);
    }
  }
#endif
};

//////////////////////////////////////////////////////////////////////////////
//
// MachineStackTracker, used for stack-slot pointerness tracking.

class MachineStackTracker {
  // Simulates the machine's stack, with one bool per word.  Index zero in
  // this vector corresponds to the highest address in the machine stack.  The
  // last entry corresponds to what SP currently points at.  This all assumes
  // a grow-down stack.
  //
  // numPtrs_ contains the number of "true" values in vec_, and is therefore
  // redundant.  But it serves as a constant-time way to detect the common
  // case where vec_ holds no "true" values.
  size_t numPtrs_;
  Vector<bool, 64, SystemAllocPolicy> vec_;

 public:
  MachineStackTracker() : numPtrs_(0) {}

  ~MachineStackTracker() {
#ifdef DEBUG
    size_t n = 0;
    for (bool b : vec_) {
      n += (b ? 1 : 0);
    }
    MOZ_ASSERT(n == numPtrs_);
#endif
  }

  // Clone this MachineStackTracker, writing the result at |dst|.
  [[nodiscard]] bool cloneTo(MachineStackTracker* dst);

  // Notionally push |n| non-pointers on the stack.
  [[nodiscard]] bool pushNonGCPointers(size_t n) {
    return vec_.appendN(false, n);
  }

  // Mark the stack slot |offsetFromSP| up from the bottom as holding a
  // pointer.
  void setGCPointer(size_t offsetFromSP) {
    // offsetFromSP == 0 denotes the most recently pushed item, == 1 the
    // second most recently pushed item, etc.
    MOZ_ASSERT(offsetFromSP < vec_.length());

    size_t offsetFromTop = vec_.length() - 1 - offsetFromSP;
    numPtrs_ = numPtrs_ + 1 - (vec_[offsetFromTop] ? 1 : 0);
    vec_[offsetFromTop] = true;
  }

  // Query the pointerness of the slot |offsetFromSP| up from the bottom.
  bool isGCPointer(size_t offsetFromSP) {
    MOZ_ASSERT(offsetFromSP < vec_.length());

    size_t offsetFromTop = vec_.length() - 1 - offsetFromSP;
    return vec_[offsetFromTop];
  }

  // Return the number of words tracked by this MachineStackTracker.
  size_t length() { return vec_.length(); }

  // Return the number of pointer-typed words tracked by this
  // MachineStackTracker.
  size_t numPtrs() {
    MOZ_ASSERT(numPtrs_ <= length());
    return numPtrs_;
  }

  // Discard all contents, but (per mozilla::Vector::clear semantics) don't
  // free or reallocate any dynamic storage associated with |vec_|.
  void clear() {
    vec_.clear();
    numPtrs_ = 0;
  }
};

//////////////////////////////////////////////////////////////////////////////
//
// StackMapGenerator, which carries all state needed to create stackmaps.

enum class HasDebugFrameWithLiveRefs { No, Maybe };

struct StackMapGenerator {
 private:
  // --- These are constant for the life of the function's compilation ---

  // For generating stackmaps, we'll need to know the offsets of registers
  // as saved by the trap exit stub.
  const RegisterOffsets& trapExitLayout_;
  const size_t trapExitLayoutNumWords_;

  // Completed stackmaps are added here
  StackMaps* stackMaps_;

  // So as to be able to get current offset when creating stackmaps
  const MacroAssembler& masm_;

 public:
  // --- These are constant once we've completed beginFunction() ---

  // The number of words of arguments passed to this function in memory.
  size_t numStackArgWords;

  MachineStackTracker machineStackTracker;  // tracks machine stack pointerness

  // This holds masm.framePushed at entry to the function's body.  It is a
  // Maybe because createStackMap needs to know whether or not we're still
  // in the prologue.  It makes a Nothing-to-Some transition just once per
  // function.
  Maybe<uint32_t> framePushedAtEntryToBody;

  // --- These can change at any point ---

  // This holds masm.framePushed at it would be be for a function call
  // instruction, but excluding the stack area used to pass arguments in
  // memory.  That is, for an upcoming function call, this will hold
  //
  //   masm.framePushed() at the call instruction -
  //      StackArgAreaSizeUnaligned(argumentTypes)
  //
  // This value denotes the lowest-addressed stack word covered by the current
  // function's stackmap.  Words below this point form the highest-addressed
  // area of the callee's stackmap.  Note that all alignment padding above the
  // arguments-in-memory themselves belongs to the caller's stackmap, which
  // is why this is defined in terms of StackArgAreaSizeUnaligned() rather than
  // StackArgAreaSizeAligned().
  //
  // When not inside a function call setup/teardown sequence, it is Nothing.
  // It can make Nothing-to/from-Some transitions arbitrarily as we progress
  // through the function body.
  Maybe<uint32_t> framePushedExcludingOutboundCallArgs;

  // The number of memory-resident, ref-typed entries on the containing
  // BaseCompiler::stk_.
  size_t memRefsOnStk;

  // This is a copy of machineStackTracker that is used only within individual
  // calls to createStackMap. It is here only to avoid possible heap allocation
  // costs resulting from making it local to createStackMap().
  MachineStackTracker augmentedMst;

  StackMapGenerator(StackMaps* stackMaps, const RegisterOffsets& trapExitLayout,
                    const size_t trapExitLayoutNumWords,
                    const MacroAssembler& masm)
      : trapExitLayout_(trapExitLayout),
        trapExitLayoutNumWords_(trapExitLayoutNumWords),
        stackMaps_(stackMaps),
        masm_(masm),
        numStackArgWords(0),
        memRefsOnStk(0) {}

  // At the beginning of a function, we may have live roots in registers (as
  // arguments) at the point where we perform a stack overflow check.  This
  // method generates the "extra" stackmap entries to describe that, in the
  // case that the check fails and we wind up calling into the wasm exit
  // stub, as generated by GenerateTrapExit().
  //
  // The resulting map must correspond precisely with the stack layout
  // created for the integer registers as saved by (code generated by)
  // GenerateTrapExit().  To do that we use trapExitLayout_ and
  // trapExitLayoutNumWords_, which together comprise a description of the
  // layout and are created by GenerateTrapExitRegisterOffsets().
  [[nodiscard]] bool generateStackmapEntriesForTrapExit(
      const ArgTypeVector& args, ExitStubMapVector* extras);

  // Creates a stackmap associated with the instruction denoted by
  // |assemblerOffset|, incorporating pointers from the current operand
  // stack |stk|, incorporating possible extra pointers in |extra| at the
  // lower addressed end, and possibly with the associated frame having a
  // DebugFrame that must be traced, as indicated by |debugFrameWithLiveRefs|.
  [[nodiscard]] bool createStackMap(
      const char* who, const ExitStubMapVector& extras,
      uint32_t assemblerOffset,
      HasDebugFrameWithLiveRefs debugFrameWithLiveRefs, const StkVector& stk);
};

}  // namespace wasm
}  // namespace js

#endif  // wasm_wasm_baseline_frame_h