Bug 1527259 - Fix wasm-baseline stackmap boundaries at function calls. r=lhansen.
authorJulian Seward <jseward@acm.org>
Thu, 14 Feb 2019 10:25:42 +0100
changeset 459316 6eba9a719f8e1328706f939c6ab10eb69ece4ce7
parent 459315 7f50b598aae62897c87c868da2972c7df96c78fb
child 459317 06a5e063b4099007d8cb2b9722dae0818d4ee6bd
push id35557
push userdvarga@mozilla.com
push dateFri, 15 Feb 2019 01:42:08 +0000
treeherdermozilla-central@426ca85d2303 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerslhansen
bugs1527259, 1517924
milestone67.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1527259 - Fix wasm-baseline stackmap boundaries at function calls. r=lhansen. The boundaries of Baseline-created stackmaps, specifically the lowest and highest addressed stack words covered by each map, are not quite correct. For Baseline-to-Baseline calls, this isn't a problem; the map boundaries in this domain are internally self-consistent. But when calling to or from Ion-generated code, it becomes apparent that the Ion maps (bug 1517924) disagree with Baseline maps in such a way that, depending on the direction of the call, either there could be words on the stack covered by neither map (caller nor callee), or there could be words covered by both maps. In detail: Baseline so far assumes that the size of the outbound argument area is stackArgAreaSize(callee arg types). That however 16-aligns the size and in the process can introduce up to 12 bytes of padding. In Baseline that doesn't matter; via Ion, however, that 12 bytes can (and has been seen to) belong to the caller's spill area. So it shouldn't be covered by the callee's map. The fix is simple in essence: split stackArgAreaSize into pre- and post-alignment parts and use the pre- rather than the post-alignment part, as required. This patch also takes the opportunity to: * rename StackMapGenerator::framePushedBeforePushingCallArgs_ to framePushedExcludingOutboundCallArgs_, for clarity and consistency with ongoing Ion map work * similarly, rename nStackArgBytes in BaseCompiler::beginFunction * create new files WasmGC.{h,cpp}, as a home for GC-related code that is used by more than one of the compilers. * move stackArgAreaSize into WasmGC.h, and split it into pre/post-aligned variants. * fix a related error in a comment showing the general layout of stack frames.
js/src/wasm/WasmBaselineCompile.cpp
js/src/wasm/WasmGC.cpp
js/src/wasm/WasmGC.h
js/src/wasm/moz.build
--- a/js/src/wasm/WasmBaselineCompile.cpp
+++ b/js/src/wasm/WasmBaselineCompile.cpp
@@ -133,16 +133,17 @@
 #  include "jit/mips-shared/Assembler-mips-shared.h"
 #  include "jit/mips32/Assembler-mips32.h"
 #endif
 #if defined(JS_CODEGEN_MIPS64)
 #  include "jit/mips-shared/Assembler-mips-shared.h"
 #  include "jit/mips64/Assembler-mips64.h"
 #endif
 
+#include "wasm/WasmGC.h"
 #include "wasm/WasmGenerator.h"
 #include "wasm/WasmInstance.h"
 #include "wasm/WasmOpIter.h"
 #include "wasm/WasmSignalHandlers.h"
 #include "wasm/WasmStubs.h"
 #include "wasm/WasmValidate.h"
 
 #include "jit/MacroAssembler-inl.h"
@@ -957,18 +958,16 @@ using ScratchI8 = ScratchI32;
 //  - the Dynamic area, comprising the temporary storage the compiler uses for
 //    register spilling, allocated below the Local area;
 //  - the Arguments area, comprising memory allocated for outgoing calls,
 //    allocated below the Dynamic area.
 //
 //                 +============================+
 //                 |    Incoming arg            |
 //                 |    ...                     |
-//                 +----------------------------+
-//                 |    unspecified             |
 // --------------  +============================+
 //                 |    Frame (fixed size)      |
 // --------------  +============================+ <-------------------- FP
 //          ^      |    DebugFrame (optional)   |    ^                ^^
 //          |      +----------------------------+    |                ||
 //    localSize    |    Local (static size)     |    |                ||
 //          |      |    ...                     |    |        framePushed
 //          v      |    (padding)               |    |                ||
@@ -2101,28 +2100,34 @@ struct StackMapGenerator {
   // This holds masm.framePushed at entry to the function's body.  It is a
   // Maybe because createStackMap needs to know whether or not we're still
   // in the prologue.  It makes a Nothing-to-Some transition just once per
   // function.
   Maybe<uint32_t> framePushedAtEntryToBody_;
 
   // --- These can change at any point ---
 
-  // This holds masm.framePushed immediately before we move the stack
-  // pointer down so as to reserve space, in a function call, for arguments
-  // passed in memory.  To be more precise: this holds the value
-  // masm.framePushed would have had after moving the stack pointer over any
-  // alignment padding pushed before the arguments proper, but before the
-  // downward movement of the stack pointer that allocates space for the
-  // arguments proper.
+  // This holds masm.framePushed at it would be be for a function call
+  // instruction, but excluding the stack area used to pass arguments in
+  // memory.  That is, for an upcoming function call, this will hold
+  //
+  //   masm.framePushed() at the call instruction -
+  //      StackArgAreaSizeUnaligned(argumentTypes)
+  //
+  // This value denotes the lowest-addressed stack word covered by the current
+  // function's stackmap.  Words below this point form the highest-addressed
+  // area of the callee's stackmap.  Note that all alignment padding above the
+  // arguments-in-memory themselves belongs to the caller's stack map, which
+  // is why this is defined in terms of StackArgAreaSizeUnaligned() rather than
+  // StackArgAreaSizeAligned().
   //
   // When not inside a function call setup/teardown sequence, it is Nothing.
   // It can make Nothing-to/from-Some transitions arbitrarily as we progress
   // through the function body.
-  Maybe<uint32_t> framePushedBeforePushingCallArgs_;
+  Maybe<uint32_t> framePushedExcludingOutboundCallArgs_;
 
   // The number of memory-resident, ref-typed entries on the containing
   // BaseCompiler::stk_.
   size_t memRefsOnStk_;
 
   // This is a copy of mst_ that is used only within individual calls to
   // createStackMap.  It is here only to avoid possible heap allocation costs
   // resulting from making it local to createStackMap().
@@ -2224,34 +2229,41 @@ struct StackMapGenerator {
     // At this point, augmentedMst only contains entries covering the
     // incoming argument area (if any) and for the area allocated by this
     // function's prologue.  We now need to calculate how far the machine's
     // stack pointer is below where it was at the start of the body.  But we
     // must take care not to include any words pushed as arguments to an
     // upcoming function call, since those words "belong" to the stackmap of
     // the callee, not to the stackmap of this function.  Note however that
     // any alignment padding pushed prior to pushing the args *does* belong to
-    // this function.  That padding is taken into account at the point where
-    // framePushedBeforePushingCallArgs_ is set.
+    // this function.
+    //
+    // That padding is taken into account at the point where
+    // framePushedExcludingOutboundCallArgs_ is set, viz, in startCallArgs(),
+    // and comprises two components:
+    //
+    // * call->frameAlignAdjustment
+    // * the padding applied to the stack arg area itself.  That is:
+    //   StackArgAreaSize(argTys) - StackArgAreaSizeUnpadded(argTys)
     Maybe<uint32_t> framePushedExcludingArgs;
     if (framePushedAtEntryToBody_.isNothing()) {
       // Still in the prologue.  framePushedExcludingArgs remains Nothing.
-      MOZ_ASSERT(framePushedBeforePushingCallArgs_.isNothing());
+      MOZ_ASSERT(framePushedExcludingOutboundCallArgs_.isNothing());
     } else {
       // In the body.
       MOZ_ASSERT(masm_.framePushed() >= framePushedAtEntryToBody_.value());
-      if (framePushedBeforePushingCallArgs_.isSome()) {
+      if (framePushedExcludingOutboundCallArgs_.isSome()) {
         // In the body, and we've potentially pushed some args onto the stack.
         // We must ignore them when sizing the stackmap.
         MOZ_ASSERT(masm_.framePushed() >=
-                   framePushedBeforePushingCallArgs_.value());
-        MOZ_ASSERT(framePushedBeforePushingCallArgs_.value() >=
+                   framePushedExcludingOutboundCallArgs_.value());
+        MOZ_ASSERT(framePushedExcludingOutboundCallArgs_.value() >=
                    framePushedAtEntryToBody_.value());
         framePushedExcludingArgs =
-            Some(framePushedBeforePushingCallArgs_.value());
+            Some(framePushedExcludingOutboundCallArgs_.value());
       } else {
         // In the body, but not with call args on the stack.  The stackmap
         // must be sized so as to extend all the way "down" to
         // masm_.framePushed().
         framePushedExcludingArgs = Some(masm_.framePushed());
       }
     }
 
@@ -4050,34 +4062,34 @@ class BaseCompiler final : public BaseCo
             (int)func_.index);
 
     // Make a start on the stack map for this function.  Inspect the args so
     // as to determine which of them are both in-memory and pointer-typed, and
     // add entries to mst_ as appropriate.
 
     const ValTypeVector& argTys = env_.funcTypes[func_.index]->args();
 
-    size_t nStackArgBytes = stackArgAreaSize(argTys);
-    MOZ_ASSERT(nStackArgBytes % sizeof(void*) == 0);
-    smgen_.numStackArgWords_ = nStackArgBytes / sizeof(void*);
+    size_t nInboundStackArgBytes = StackArgAreaSizeUnaligned(argTys);
+    MOZ_ASSERT(nInboundStackArgBytes % sizeof(void*) == 0);
+    smgen_.numStackArgWords_ = nInboundStackArgBytes / sizeof(void*);
 
     MOZ_ASSERT(smgen_.mst_.length() == 0);
     if (!smgen_.mst_.pushNonGCPointers(smgen_.numStackArgWords_)) {
       return false;
     }
 
     for (ABIArgIter<const ValTypeVector> i(argTys); !i.done(); i++) {
       ABIArg argLoc = *i;
       const ValType& ty = argTys[i.index()];
       MOZ_ASSERT(ToMIRType(ty) != MIRType::Pointer);
       if (argLoc.kind() != ABIArg::Stack || !ty.isReference()) {
         continue;
       }
       uint32_t offset = argLoc.offsetFromArgBase();
-      MOZ_ASSERT(offset < nStackArgBytes);
+      MOZ_ASSERT(offset < nInboundStackArgBytes);
       MOZ_ASSERT(offset % sizeof(void*) == 0);
       smgen_.mst_.setGCPointer(offset / sizeof(void*));
     }
 
     GenerateFunctionPrologue(
         masm, env_.funcTypes[func_.index]->id,
         env_.mode() == CompileMode::Tier1 ? Some(func_.index) : Nothing(),
         &offsets_);
@@ -4378,59 +4390,52 @@ class BaseCompiler final : public BaseCo
     call.frameAlignAdjustment = ComputeByteAlignment(
         masm.framePushed() + sizeof(Frame), JitStackAlignment);
   }
 
   void endCall(FunctionCall& call, size_t stackSpace) {
     size_t adjustment = call.stackArgAreaSize + call.frameAlignAdjustment;
     fr.freeArgAreaAndPopBytes(adjustment, stackSpace);
 
-    MOZ_ASSERT(smgen_.framePushedBeforePushingCallArgs_.isSome());
-    smgen_.framePushedBeforePushingCallArgs_.reset();
+    MOZ_ASSERT(smgen_.framePushedExcludingOutboundCallArgs_.isSome());
+    smgen_.framePushedExcludingOutboundCallArgs_.reset();
 
     if (call.isInterModule) {
       masm.loadWasmTlsRegFromFrame();
       masm.loadWasmPinnedRegsFromTls();
       masm.switchToWasmTlsRealm(ABINonArgReturnReg0, ABINonArgReturnReg1);
     } else if (call.usesSystemAbi) {
       // On x86 there are no pinned registers, so don't waste time
       // reloading the Tls.
 #ifndef JS_CODEGEN_X86
       masm.loadWasmTlsRegFromFrame();
       masm.loadWasmPinnedRegsFromTls();
 #endif
     }
   }
 
-  // TODO / OPTIMIZE (Bug 1316821): This is expensive; let's roll the iterator
-  // walking into the walking done for passArg.  See comments in passArg.
-
-  // Note, stackArgAreaSize() must process all the arguments to get the
-  // alignment right; the signature must therefore be the complete call
-  // signature.
-
-  template <class T>
-  size_t stackArgAreaSize(const T& args) {
-    ABIArgIter<const T> i(args);
-    while (!i.done()) {
-      i++;
-    }
-    return AlignBytes(i.stackBytesConsumedSoFar(), 16u);
-  }
-
-  void startCallArgs(size_t stackArgAreaSize, FunctionCall* call) {
+  void startCallArgs(size_t stackArgAreaSizeUnaligned, FunctionCall* call) {
+    size_t stackArgAreaSizeAligned
+        = AlignStackArgAreaSize(stackArgAreaSizeUnaligned);
+    MOZ_ASSERT(stackArgAreaSizeUnaligned <= stackArgAreaSizeAligned);
+
     // Record the masm.framePushed() value at this point, before we push args
     // for the call, but including the alignment space placed above the args.
     // This defines the lower limit of the stackmap that will be created for
     // this call.
-    MOZ_ASSERT(smgen_.framePushedBeforePushingCallArgs_.isNothing());
-    smgen_.framePushedBeforePushingCallArgs_.emplace(
-        masm.framePushed() + call->frameAlignAdjustment);
-
-    call->stackArgAreaSize = stackArgAreaSize;
+    MOZ_ASSERT(smgen_.framePushedExcludingOutboundCallArgs_.isNothing());
+    smgen_.framePushedExcludingOutboundCallArgs_.emplace(
+        // However much we've pushed so far
+        masm.framePushed() +
+        // Extra space we'll push to get the frame aligned
+        call->frameAlignAdjustment +
+        // Extra space we'll push to get the outbound arg area 16-aligned
+        (stackArgAreaSizeAligned - stackArgAreaSizeUnaligned));
+
+    call->stackArgAreaSize = stackArgAreaSizeAligned;
 
     size_t adjustment = call->stackArgAreaSize + call->frameAlignAdjustment;
     fr.allocArgArea(adjustment);
   }
 
   const ABIArg reservePointerArgument(FunctionCall* call) {
     return call->abi.next(MIRType::Pointer);
   }
@@ -4439,27 +4444,27 @@ class BaseCompiler final : public BaseCo
   // (Or it was, until Luke wandered through, but that can be fixed again.)
   // I'm not saying we should manually inline it, but we could hoist the
   // dispatch into the caller and have type-specific implementations of
   // passArg: passArgI32(), etc.  Then those might be inlined, at least in PGO
   // builds.
   //
   // The bulk of the work here (60%) is in the next() call, though.
   //
-  // Notably, since next() is so expensive, stackArgAreaSize() becomes
-  // expensive too.
-  //
-  // Somehow there could be a trick here where the sequence of
-  // argument types (read from the input stream) leads to a cached
-  // entry for stackArgAreaSize() and for how to pass arguments...
-  //
-  // But at least we could reduce the cost of stackArgAreaSize() by
-  // first reading the argument types into a (reusable) vector, then
-  // we have the outgoing size at low cost, and then we can pass
-  // args based on the info we read.
+  // Notably, since next() is so expensive, StackArgAreaSizeUnaligned()
+  // becomes expensive too.
+  //
+  // Somehow there could be a trick here where the sequence of argument types
+  // (read from the input stream) leads to a cached entry for
+  // StackArgAreaSizeUnaligned() and for how to pass arguments...
+  //
+  // But at least we could reduce the cost of StackArgAreaSizeUnaligned() by
+  // first reading the argument types into a (reusable) vector, then we have
+  // the outgoing size at low cost, and then we can pass args based on the
+  // info we read.
 
   void passArg(ValType type, const Stk& arg, FunctionCall* call) {
     switch (type.code()) {
       case ValType::I32: {
         ABIArg argLoc = call->abi.next(MIRType::Int32);
         if (argLoc.kind() == ABIArg::Stack) {
           ScratchI32 scratch(*this);
           loadI32(arg, scratch);
@@ -8596,17 +8601,17 @@ bool BaseCompiler::emitReturn() {
 
   return true;
 }
 
 bool BaseCompiler::emitCallArgs(const ValTypeVector& argTypes,
                                 FunctionCall* baselineCall) {
   MOZ_ASSERT(!deadCode_);
 
-  startCallArgs(stackArgAreaSize(argTypes), baselineCall);
+  startCallArgs(StackArgAreaSizeUnaligned(argTypes), baselineCall);
 
   uint32_t numArgs = argTypes.length();
   for (size_t i = 0; i < numArgs; ++i) {
     passArg(argTypes[i], peek(numArgs - 1 - i), baselineCall);
   }
 
   masm.loadWasmTlsRegFromFrame();
   return true;
@@ -9761,17 +9766,17 @@ bool BaseCompiler::emitInstanceCall(uint
   uint32_t numArgs = sig.length() - 1 /* instance */;
   size_t stackSpace = stackConsumed(numArgs);
 
   FunctionCall baselineCall(lineOrBytecode);
   beginCall(baselineCall, UseABI::System, InterModule::True);
 
   ABIArg instanceArg = reservePointerArgument(&baselineCall);
 
-  startCallArgs(stackArgAreaSize(sig), &baselineCall);
+  startCallArgs(StackArgAreaSizeUnaligned(sig), &baselineCall);
   for (uint32_t i = 1; i < sig.length(); i++) {
     ValType t;
     switch (sig[i]) {
       case MIRType::Int32:
         t = ValType::I32;
         break;
       case MIRType::Int64:
         t = ValType::I64;
@@ -10897,17 +10902,17 @@ bool BaseCompiler::emitBody() {
     }
 
     // Going below framePushedAtEntryToBody_ would imply that we've
     // popped off the machine stack, part of the frame created by
     // beginFunction().
     MOZ_ASSERT(masm.framePushed() >= smgen_.framePushedAtEntryToBody_.value());
 
     // At this point we're definitely not generating code for a function call.
-    MOZ_ASSERT(smgen_.framePushedBeforePushingCallArgs_.isNothing());
+    MOZ_ASSERT(smgen_.framePushedExcludingOutboundCallArgs_.isNothing());
 
     switch (op.b0) {
       case uint16_t(Op::End):
         if (!emitEnd()) {
           return false;
         }
 
         if (iter_.controlStackEmpty()) {
new file mode 100644
--- /dev/null
+++ b/js/src/wasm/WasmGC.cpp
@@ -0,0 +1,25 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ *
+ * Copyright 2019 Mozilla Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "wasm/WasmGC.h"
+
+namespace js {
+namespace wasm {
+
+}  // namespace wasm
+}  // namespace js
new file mode 100644
--- /dev/null
+++ b/js/src/wasm/WasmGC.h
@@ -0,0 +1,62 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ *
+ * Copyright 2019 Mozilla Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef wasm_gc_h
+#define wasm_gc_h
+
+#include "jit/MacroAssembler.h"
+
+namespace js {
+namespace wasm {
+
+using namespace js::jit;
+
+// StackArgAreaSizeUnaligned returns the size, in bytes, of the stack arg area
+// size needed to pass |argTypes|, excluding any alignment padding beyond the
+// size of the area as a whole.  The size is as determined by the platforms
+// native ABI.
+//
+// StackArgAreaSizeAligned returns the same, but rounded up to the nearest 16
+// byte boundary.
+//
+// Note, StackArgAreaSize{Unaligned,Aligned}() must process all the arguments
+// in order to take into account all necessary alignment constraints.  The
+// signature must include any receiver argument -- in other words, it must be
+// the complete native-ABI-level call signature.
+template <class T>
+static inline size_t StackArgAreaSizeUnaligned(const T& argTypes) {
+  ABIArgIter<const T> i(argTypes);
+  while (!i.done()) {
+     i++;
+  }
+  return i.stackBytesConsumedSoFar();
+}
+
+static inline size_t AlignStackArgAreaSize(size_t unalignedSize) {
+  return AlignBytes(unalignedSize, 16u);
+}
+
+template <class T>
+static inline size_t StackArgAreaSizeAligned(const T& argTypes) {
+  return AlignStackArgAreaSize(StackArgAreaSizeUnaligned(argTypes));
+}
+
+}  // namespace wasm
+}  // namespace js
+
+#endif  // wasm_gc_h
--- a/js/src/wasm/moz.build
+++ b/js/src/wasm/moz.build
@@ -24,16 +24,17 @@ if CONFIG['ENABLE_WASM_CRANELIFT']:
 UNIFIED_SOURCES += [
     'AsmJS.cpp',
     'WasmBaselineCompile.cpp',
     'WasmBuiltins.cpp',
     'WasmCode.cpp',
     'WasmCompile.cpp',
     'WasmDebug.cpp',
     'WasmFrameIter.cpp',
+    'WasmGC.cpp',
     'WasmGenerator.cpp',
     'WasmInstance.cpp',
     'WasmIonCompile.cpp',
     'WasmJS.cpp',
     'WasmModule.cpp',
     'WasmOpIter.cpp',
     'WasmProcess.cpp',
     'WasmRealm.cpp',