Bug 1637868 - Make Baseline free from frame's tls r=wingo,lth
authorDmitry Bezhetskov <dbezhetskov@igalia.com>
Fri, 15 May 2020 08:17:56 +0000
changeset 530242 a9970903e180d4e6392b6aa2e27875d35249327c
parent 530241 8c9fb10d84edc87a28eb4ea6fbec64e9c56cb2be
child 530243 34e4adeaf16bbb51729be4dbcb5e29ed16edd05c
push id37420
push usernerli@mozilla.com
push dateFri, 15 May 2020 21:52:36 +0000
treeherdermozilla-central@f340bbb582d1 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerswingo, lth
bugs1637868
milestone78.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1637868 - Make Baseline free from frame's tls r=wingo,lth We are going to get rid of the TLS slot in the Frame because we don't need it all the time. Now it consumes one stack slot in Frame and forces the Frame to add one additional padding slot for arm64. In this patch we are removing dependency from Frame::tls in Baseline. Note that we need to allocate a stack slot for WasmTlsReg value because baseline can clobber WasmTlsReg and we should be able to load it when we need it. For example, when we need access to the Instance*. The followup patch will do the same for the Ion compiler. Differential Revision: https://phabricator.services.mozilla.com/D66201
js/src/wasm/WasmBaselineCompile.cpp
--- a/js/src/wasm/WasmBaselineCompile.cpp
+++ b/js/src/wasm/WasmBaselineCompile.cpp
@@ -1095,16 +1095,18 @@ using ScratchI8 = ScratchI32;
 //                |    Frame (fixed size)        |
 // -------------  +==============================+ <-------------------- FP
 //         ^      |    DebugFrame (optional)     |    ^  ^             ^^
 //   localSize    |    Register arg local        |    |  |             ||
 //         |      |    ...                       |    |  |     framePushed
 //         |      |    Register stack result ptr?|    |  |             ||
 //         |      |    Non-arg local             |    |  |             ||
 //         |      |    ...                       |    |  |             ||
+//         |      |    (padding)                 |    |  |             ||
+//         |      |    Tls pointer               |    |  |             ||
 //         |      +------------------------------+    |  |             ||
 //         v      |    (padding)                 |    |  v             ||
 // -------------  +==============================+ currentStackHeight  ||
 //         ^      |    Dynamic (variable size)   |    |                ||
 //  dynamicSize   |    ...                       |    |                ||
 //         v      |    ...                       |    v                ||
 // -------------  |    (free space, sometimes)   | ---------           v|
 //                +==============================+ <----- SP not-during calls
@@ -1633,31 +1635,35 @@ class BaseStackFrame final : public Base
   uint32_t maxFramePushed_;
 
   // Patch point where we check for stack overflow.
   CodeOffset stackAddOffset_;
 
   // Low byte offset of pointer to stack results, if any.
   Maybe<int32_t> stackResultsPtrOffset_;
 
+  // The offset of TLS pointer.
+  uint32_t tlsPointerOffset_;
+
   // Low byte offset of local area for true locals (not parameters).
   uint32_t varLow_;
 
   // High byte offset + 1 of local area for true locals.
   uint32_t varHigh_;
 
   // The stack pointer, cached for brevity.
   RegisterOrSP sp_;
 
  public:
   explicit BaseStackFrame(MacroAssembler& masm)
       : BaseStackFrameAllocator(masm),
         masm(masm),
         maxFramePushed_(0),
         stackAddOffset_(0),
+        tlsPointerOffset_(UINT32_MAX),
         varLow_(UINT32_MAX),
         varHigh_(UINT32_MAX),
         sp_(masm.getStackPointer()) {}
 
   ///////////////////////////////////////////////////////////////////////////
   //
   // Stack management and overflow checking
 
@@ -1750,17 +1756,22 @@ class BaseStackFrame final : public Base
     for (; !i.done(); i++) {
       MOZ_ASSERT(!i.isArg());
       MOZ_ASSERT(i.index() == index);
       localInfo->infallibleEmplaceBack(i.mirType(), i.frameOffset());
       index++;
     }
     varHigh_ = i.frameSize();
 
-    setLocalSize(AlignBytes(varHigh_, WasmStackAlignment));
+    // Reserve an additional stack slot for the TLS pointer.
+    const uint32_t pointerAlignedVarHigh = AlignBytes(varHigh_, sizeof(void*));
+    const uint32_t localSize = pointerAlignedVarHigh + sizeof(void*);
+    tlsPointerOffset_ = localSize;
+
+    setLocalSize(AlignBytes(localSize, WasmStackAlignment));
 
     if (args.hasSyntheticStackResultPointerArg()) {
       stackResultsPtrOffset_ = Some(i.stackResultPointerOffset());
     }
 
     return true;
   }
 
@@ -1861,16 +1872,24 @@ class BaseStackFrame final : public Base
     // If we get here, that means the pointer to the stack results area was
     // passed in as a register, and therefore it will be spilled below the
     // frame, so the offset is a positive height.
     MOZ_ASSERT(stackResultsPtrOffset_.value() > 0);
     masm.storePtr(reg,
                   Address(sp_, stackOffset(stackResultsPtrOffset_.value())));
   }
 
+  void loadTlsPtr(Register dst) {
+    masm.loadPtr(Address(sp_, stackOffset(tlsPointerOffset_)), dst);
+  }
+
+  void storeTlsPtr(Register tls) {
+    masm.storePtr(tls, Address(sp_, stackOffset(tlsPointerOffset_)));
+  }
+
   // An outgoing stack result area pointer is for stack results of callees of
   // the function being compiled.
   void computeOutgoingStackResultAreaPtr(const StackResultsLoc& results,
                                          RegPtr dest) {
     MOZ_ASSERT(results.height() <= masm.framePushed());
     uint32_t offsetFromSP = masm.framePushed() - results.height();
     masm.moveStackPtrTo(dest);
     if (offsetFromSP) {
@@ -5203,16 +5222,17 @@ class BaseCompiler final : public BaseCo
           break;
 #endif
         default:
           MOZ_CRASH("Function argument type");
       }
     }
 
     fr.zeroLocals(&ra);
+    fr.storeTlsPtr(WasmTlsReg);
 
     if (env_.debugEnabled()) {
       insertBreakablePoint(CallSiteDesc::EnterFrame);
       if (!createStackMap("debug: breakable point")) {
         return false;
       }
     }
 
@@ -5456,24 +5476,24 @@ class BaseCompiler final : public BaseCo
     size_t adjustment = call.stackArgAreaSize + call.frameAlignAdjustment;
     fr.freeArgAreaAndPopBytes(adjustment, stackSpace);
 
     MOZ_ASSERT(
         stackMapGenerator_.framePushedExcludingOutboundCallArgs.isSome());
     stackMapGenerator_.framePushedExcludingOutboundCallArgs.reset();
 
     if (call.isInterModule) {
-      masm.loadWasmTlsRegFromFrame();
+      fr.loadTlsPtr(WasmTlsReg);
       masm.loadWasmPinnedRegsFromTls();
       masm.switchToWasmTlsRealm(ABINonArgReturnReg0, ABINonArgReturnReg1);
     } else if (call.usesSystemAbi) {
       // On x86 there are no pinned registers, so don't waste time
       // reloading the Tls.
 #ifndef JS_CODEGEN_X86
-      masm.loadWasmTlsRegFromFrame();
+      fr.loadTlsPtr(WasmTlsReg);
       masm.loadWasmPinnedRegsFromTls();
 #endif
     }
   }
 
   void startCallArgs(size_t stackArgAreaSizeUnaligned, FunctionCall* call) {
     size_t stackArgAreaSizeAligned =
         AlignStackArgAreaSize(stackArgAreaSizeUnaligned);
@@ -5714,17 +5734,17 @@ class BaseCompiler final : public BaseCo
   CodeOffset builtinCall(SymbolicAddress builtin, const FunctionCall& call) {
     return callSymbolic(builtin, call);
   }
 
   CodeOffset builtinInstanceMethodCall(const SymbolicAddressSignature& builtin,
                                        const ABIArg& instanceArg,
                                        const FunctionCall& call) {
     // Builtin method calls assume the TLS register has been set.
-    masm.loadWasmTlsRegFromFrame();
+    fr.loadTlsPtr(WasmTlsReg);
 
     CallSiteDesc desc(call.lineOrBytecode, CallSiteDesc::Symbolic);
     return masm.wasmCallBuiltinInstanceMethod(
         desc, instanceArg, builtin.identity, builtin.failureMode);
   }
 
   //////////////////////////////////////////////////////////////////////
   //
@@ -5741,17 +5761,17 @@ class BaseCompiler final : public BaseCo
   void moveImmRef(intptr_t v, RegPtr dest) { masm.movePtr(ImmWord(v), dest); }
 
   void moveImmF32(float f, RegF32 dest) { masm.loadConstantFloat32(f, dest); }
 
   void moveImmF64(double d, RegF64 dest) { masm.loadConstantDouble(d, dest); }
 
   MOZ_MUST_USE bool addInterruptCheck() {
     ScratchI32 tmp(*this);
-    masm.loadWasmTlsRegFromFrame(tmp);
+    fr.loadTlsPtr(tmp);
     masm.wasmInterruptCheck(tmp, bytecodeOffset());
     return createStackMap("addInterruptCheck");
   }
 
   void jumpTable(const LabelVector& labels, Label* theTable) {
     // Flush constant pools to ensure that the table is never interrupted by
     // constant pool entries.
     masm.flush();
@@ -6291,17 +6311,17 @@ class BaseCompiler final : public BaseCo
 
   //////////////////////////////////////////////////////////////////////
   //
   // Global variable access.
 
   Address addressOfGlobalVar(const GlobalDesc& global, RegI32 tmp) {
     uint32_t globalToTlsOffset =
         offsetof(TlsData, globalArea) + global.offset();
-    masm.loadWasmTlsRegFromFrame(tmp);
+    fr.loadTlsPtr(tmp);
     if (global.isIndirect()) {
       masm.loadPtr(Address(tmp, globalToTlsOffset), tmp);
       return Address(tmp, 0);
     }
     return Address(tmp, globalToTlsOffset);
   }
 
   //////////////////////////////////////////////////////////////////////
@@ -7555,20 +7575,20 @@ class BaseCompiler final : public BaseCo
   //
   // The `valueAddr` is the address of the location that we are about to
   // update.  This function preserves that register.
 
   void emitPreBarrier(RegPtr valueAddr) {
     Label skipBarrier;
     ScratchPtr scratch(*this);
 
-    masm.loadWasmTlsRegFromFrame(scratch);
+    fr.loadTlsPtr(scratch);
     EmitWasmPreBarrierGuard(masm, scratch, scratch, valueAddr, &skipBarrier);
 
-    masm.loadWasmTlsRegFromFrame(scratch);
+    fr.loadTlsPtr(scratch);
 #ifdef JS_CODEGEN_ARM64
     // The prebarrier stub assumes the PseudoStackPointer is set up.  It is OK
     // to just move the sp to x28 here because x28 is not being used by the
     // baseline compiler and need not be saved or restored.
     MOZ_ASSERT(!GeneralRegisterSet::All().hasRegisterIndex(x28.asUnsized()));
     masm.Mov(x28, sp);
 #endif
     EmitWasmPreBarrierCall(masm, scratch, scratch, valueAddr);
@@ -9865,17 +9885,17 @@ bool BaseCompiler::emitCallArgs(const Va
         masm.storePtr(scratch, Address(masm.getStackPointer(),
                                        argLoc.offsetFromArgBase()));
       } else {
         fr.computeOutgoingStackResultAreaPtr(results, RegPtr(argLoc.gpr()));
       }
     }
   }
 
-  masm.loadWasmTlsRegFromFrame();
+  fr.loadTlsPtr(WasmTlsReg);
   return true;
 }
 
 void BaseCompiler::pushReturnValueOfCall(const FunctionCall& call,
                                          MIRType type) {
   switch (type) {
     case MIRType::Int32: {
       RegI32 rv = captureReturnedI32();
@@ -10741,37 +10761,37 @@ void BaseCompiler::pushHeapBase() {
   moveI64(RegI64(Register64(HeapReg)), heapBase);
   pushI64(heapBase);
 #elif defined(JS_CODEGEN_ARM) || defined(JS_CODEGEN_MIPS32)
   RegI32 heapBase = needI32();
   moveI32(RegI32(HeapReg), heapBase);
   pushI32(heapBase);
 #elif defined(JS_CODEGEN_X86)
   RegI32 heapBase = needI32();
-  masm.loadWasmTlsRegFromFrame(heapBase);
+  fr.loadTlsPtr(heapBase);
   masm.loadPtr(Address(heapBase, offsetof(TlsData, memoryBase)), heapBase);
   pushI32(heapBase);
 #else
   MOZ_CRASH("BaseCompiler platform hook: pushHeapBase");
 #endif
 }
 
 RegI32 BaseCompiler::maybeLoadTlsForAccess(const AccessCheck& check) {
   RegI32 tls;
   if (needTlsForAccess(check)) {
     tls = needI32();
-    masm.loadWasmTlsRegFromFrame(tls);
+    fr.loadTlsPtr(tls);
   }
   return tls;
 }
 
 RegI32 BaseCompiler::maybeLoadTlsForAccess(const AccessCheck& check,
                                            RegI32 specific) {
   if (needTlsForAccess(check)) {
-    masm.loadWasmTlsRegFromFrame(specific);
+    fr.loadTlsPtr(specific);
     return specific;
   }
   return RegI32::Invalid();
 }
 
 bool BaseCompiler::loadCommon(MemoryAccessDesc* access, AccessCheck check,
                               ValType type) {
   RegI32 tls, temp1, temp2, temp3;