Bug 1319388 - wasm baseline: Generate function prologue in-line by patching. r=luke
authorLars T Hansen <lhansen@mozilla.com>
Thu, 29 Dec 2016 07:28:03 +0100
changeset 327534 0de5f500516b0d576fd77f342ac2570b536e8115
parent 327533 e0e05d0bc34b19e5c034de40f1435cde9a803226
child 327535 1882ac4adbc561063b4c527769583d102bd93f42
push id35517
push userkwierso@gmail.com
push dateThu, 29 Dec 2016 20:22:54 +0000
treeherderautoland@3f2f8d77ad27 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersluke
bugs1319388
milestone53.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1319388 - wasm baseline: Generate function prologue in-line by patching. r=luke
js/src/wasm/WasmBaselineCompile.cpp
--- a/js/src/wasm/WasmBaselineCompile.cpp
+++ b/js/src/wasm/WasmBaselineCompile.cpp
@@ -498,19 +498,19 @@ class BaseCompiler
     bool                        deadCode_;       // Flag indicating we should decode & discard the opcode
     ValTypeVector               SigI64I64_;
     ValTypeVector               SigDD_;
     ValTypeVector               SigD_;
     ValTypeVector               SigF_;
     ValTypeVector               SigI_;
     ValTypeVector               Sig_;
     Label                       returnLabel_;
-    Label                       outOfLinePrologue_;
-    Label                       bodyLabel_;
+    Label                       stackOverflowLabel_;
     TrapOffset                  prologueTrapOffset_;
+    CodeOffset                  stackAddOffset_;
 
     LatentOp                    latentOp_;       // Latent operation for branch (seen next)
     ValType                     latentType_;     // Operand type, if latentOp_ is true
     Assembler::Condition        latentIntCmp_;   // Comparison operator, if latentOp_ == Compare, int types
     Assembler::DoubleCondition  latentDoubleCmp_;// Comparison operator, if latentOp_ == Compare, float types
 
     FuncCompileResults&         compileResults_;
     MacroAssembler&             masm;            // No '_' suffix - too tedious...
@@ -2047,26 +2047,28 @@ class BaseCompiler
 
         SigIdDesc sigId = env_.funcSigs[func_.index()]->id;
         GenerateFunctionPrologue(masm, localSize_, sigId, &compileResults_.offsets());
 
         MOZ_ASSERT(masm.framePushed() == uint32_t(localSize_));
 
         maxFramePushed_ = localSize_;
 
-        // We won't know until after we've generated code how big the
-        // frame will be (we may need arbitrary spill slots and
-        // outgoing param slots) so branch to code emitted after the
-        // function body that will perform the check.
+        // We won't know until after we've generated code how big the frame will
+        // be (we may need arbitrary spill slots and outgoing param slots) so
+        // emit a patchable add that is patched in endFunction().
         //
-        // Code there will also assume that the fixed-size stack frame
-        // has been allocated.
-
-        masm.jump(&outOfLinePrologue_);
-        masm.bind(&bodyLabel_);
+        // ScratchReg may be used by branchPtr(), so use ABINonArgReg0 for the
+        // effective address.
+
+        stackAddOffset_ = masm.add32ToPtrWithPatch(StackPointer, ABINonArgReg0);
+        masm.branchPtr(Assembler::AboveOrEqual,
+                       Address(WasmTlsReg, offsetof(TlsData, stackLimit)),
+                       ABINonArgReg0,
+                       &stackOverflowLabel_);
 
         // Copy arguments from registers to stack.
 
         const ValTypeVector& args = func_.sig().args();
 
         for (ABIArgIter<const ValTypeVector> i(args); !i.done(); i++) {
             Local& l = localInfo_[i.index()];
             switch (i.mirType()) {
@@ -2113,40 +2115,28 @@ class BaseCompiler
             ScratchI32 scratch(*this);
             masm.mov(ImmWord(0), scratch);
             for (int32_t i = varLow_ ; i < varHigh_ ; i += 4)
                 storeToFrameI32(scratch, i + 4);
         }
     }
 
     bool endFunction() {
-        // Always branch to outOfLinePrologue_ or returnLabel_.
+        // Always branch to stackOverflowLabel_ or returnLabel_.
         masm.breakpoint();
 
-        // Out-of-line prologue.  Assumes that the in-line prologue has
-        // been executed and that a frame of size = localSize_ + sizeof(Frame)
-        // has been allocated.
-
-        masm.bind(&outOfLinePrologue_);
-
+        // Patch the add in the prologue so that it checks against the correct
+        // frame size.
         MOZ_ASSERT(maxFramePushed_ >= localSize_);
-
-        // ABINonArgReg0 != ScratchReg, which can be used by branchPtr().
-
-        masm.movePtr(masm.getStackPointer(), ABINonArgReg0);
-        if (maxFramePushed_ - localSize_)
-            masm.subPtr(Imm32(maxFramePushed_ - localSize_), ABINonArgReg0);
-        masm.branchPtr(Assembler::Below,
-                       Address(WasmTlsReg, offsetof(TlsData, stackLimit)),
-                       ABINonArgReg0,
-                       &bodyLabel_);
+        masm.patchAdd32ToPtr(stackAddOffset_, Imm32(-int32_t(maxFramePushed_ - localSize_)));
 
         // Since we just overflowed the stack, to be on the safe side, pop the
         // stack so that, when the trap exit stub executes, it is a safe
         // distance away from the end of the native stack.
+        masm.bind(&stackOverflowLabel_);
         if (localSize_)
             masm.addToStackPtr(Imm32(localSize_));
         masm.jump(TrapDesc(prologueTrapOffset_, Trap::StackOverflow, /* framePushed = */ 0));
 
         masm.bind(&returnLabel_);
 
         // Restore the TLS register in case it was overwritten by the function.
         loadFromFramePtr(WasmTlsReg, frameOffsetFromSlot(tlsSlot_, MIRType::Pointer));
@@ -7551,16 +7541,17 @@ BaseCompiler::BaseCompiler(const ModuleE
       alloc_(compileResults.alloc()),
       locals_(locals),
       localSize_(0),
       varLow_(0),
       varHigh_(0),
       maxFramePushed_(0),
       deadCode_(false),
       prologueTrapOffset_(trapOffset()),
+      stackAddOffset_(0),
       latentOp_(LatentOp::None),
       latentType_(ValType::I32),
       latentIntCmp_(Assembler::Equal),
       latentDoubleCmp_(Assembler::DoubleEqual),
       compileResults_(compileResults),
       masm(compileResults_.masm()),
       availGPR_(GeneralRegisterSet::All()),
       availFPU_(FloatRegisterSet::All()),