Bug 1342353 - Wasm baseline, properly compute aligned frame size. r=bbouvier
authorLars T Hansen <lhansen@mozilla.com>
Fri, 24 Feb 2017 10:47:55 +0100
changeset 373801 d798f41291ba515aef8849c3b8d16112e3c222ef
parent 373800 d5a8190c6f7b94fa125a5277046eb38d2423d5c0
child 373802 e0028f7c1c008dab9ef0aab6fa73195048313bf2
push id10863
push userjlorenzo@mozilla.com
push dateMon, 06 Mar 2017 23:02:23 +0000
treeherdermozilla-aurora@0931190cd725 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersbbouvier
bugs1342353
milestone54.0a1
Bug 1342353 - Wasm baseline, properly compute aligned frame size. r=bbouvier
js/src/wasm/WasmBaselineCompile.cpp
--- a/js/src/wasm/WasmBaselineCompile.cpp
+++ b/js/src/wasm/WasmBaselineCompile.cpp
@@ -480,18 +480,18 @@ class BaseCompiler
     int32_t                     varHigh_;        // High byte offset + 1 of local area for true locals
     int32_t                     maxFramePushed_; // Max value of masm.framePushed() observed
     bool                        deadCode_;       // Flag indicating we should decode & discard the opcode
     bool                        debugEnabled_;
     BCESet                      bceSafe_;        // Locals that have been bounds checked and not updated since
     ValTypeVector               SigI64I64_;
     ValTypeVector               SigD_;
     ValTypeVector               SigF_;
-    ValTypeVector               SigI_;
-    ValTypeVector               Sig_;
+    MIRTypeVector               SigPI_;
+    MIRTypeVector               SigP_;
     Label                       returnLabel_;
     Label                       stackOverflowLabel_;
     CodeOffset                  stackAddOffset_;
 
     LatentOp                    latentOp_;       // Latent operation for branch (seen next)
     ValType                     latentType_;     // Operand type, if latentOp_ is true
     Assembler::Condition        latentIntCmp_;   // Comparison operator, if latentOp_ == Compare, int types
     Assembler::DoubleCondition  latentDoubleCmp_;// Comparison operator, if latentOp_ == Compare, float types
@@ -2374,40 +2374,39 @@ class BaseCompiler
             loadFromFramePtr(WasmTlsReg, frameOffsetFromSlot(tlsSlot_, MIRType::Pointer));
             masm.loadWasmPinnedRegsFromTls();
         }
     }
 
     // TODO / OPTIMIZE (Bug 1316821): This is expensive; let's roll the iterator
     // walking into the walking done for passArg.  See comments in passArg.
 
-    size_t stackArgAreaSize(const ValTypeVector& args) {
-        ABIArgIter<const ValTypeVector> i(args);
+    // Note, stackArgAreaSize() must process all the arguments to get the
+    // alignment right; the signature must therefore be the complete call
+    // signature.
+
+    template<class T>
+    size_t stackArgAreaSize(const T& args) {
+        ABIArgIter<const T> i(args);
         while (!i.done())
             i++;
         return AlignBytes(i.stackBytesConsumedSoFar(), 16u);
     }
 
     void startCallArgs(FunctionCall& call, size_t stackArgAreaSize)
     {
-        // It's possible the TLS pointer may have implicitly used stack before.
-        MOZ_ASSERT(call.stackArgAreaSize == 0 || call.stackArgAreaSize == sizeof(void*));
-
-        call.stackArgAreaSize += stackArgAreaSize;
+        call.stackArgAreaSize = stackArgAreaSize;
 
         size_t adjustment = call.stackArgAreaSize + call.frameAlignAdjustment;
         if (adjustment)
             masm.reserveStack(adjustment);
     }
 
     const ABIArg reservePointerArgument(FunctionCall& call) {
-        ABIArg ret = call.abi.next(MIRType::Pointer);
-        if (ret.kind() == ABIArg::Stack)
-            call.stackArgAreaSize += sizeof(void*);
-        return ret;
+        return call.abi.next(MIRType::Pointer);
     }
 
     // TODO / OPTIMIZE (Bug 1316821): Note passArg is used only in one place.
     // (Or it was, until Luke wandered through, but that can be fixed again.)
     // I'm not saying we should manually inline it, but we could hoist the
     // dispatch into the caller and have type-specific implementations of
     // passArg: passArgI32(), etc.  Then those might be inlined, at least in PGO
     // builds.
@@ -6597,17 +6596,17 @@ BaseCompiler::emitGrowMemory()
     uint32_t numArgs = 1;
     size_t stackSpace = stackConsumed(numArgs);
 
     FunctionCall baselineCall(lineOrBytecode);
     beginCall(baselineCall, UseABI::System, InterModule::True);
 
     ABIArg instanceArg = reservePointerArgument(baselineCall);
 
-    startCallArgs(baselineCall, stackArgAreaSize(SigI_));
+    startCallArgs(baselineCall, stackArgAreaSize(SigPI_));
     passArg(baselineCall, ValType::I32, peek(0));
     builtinInstanceMethodCall(SymbolicAddress::GrowMemory, instanceArg, baselineCall);
     endCall(baselineCall, stackSpace);
 
     popValueStackBy(numArgs);
 
     pushReturned(baselineCall, ExprType::I32);
 
@@ -6627,17 +6626,17 @@ BaseCompiler::emitCurrentMemory()
 
     sync();
 
     FunctionCall baselineCall(lineOrBytecode);
     beginCall(baselineCall, UseABI::System, InterModule::False);
 
     ABIArg instanceArg = reservePointerArgument(baselineCall);
 
-    startCallArgs(baselineCall, stackArgAreaSize(Sig_));
+    startCallArgs(baselineCall, stackArgAreaSize(SigP_));
     builtinInstanceMethodCall(SymbolicAddress::CurrentMemory, instanceArg, baselineCall);
     endCall(baselineCall, 0);
 
     pushReturned(baselineCall, ExprType::I32);
 
     return true;
 }
 
@@ -7402,17 +7401,19 @@ BaseCompiler::BaseCompiler(const ModuleE
 
 bool
 BaseCompiler::init()
 {
     if (!SigD_.append(ValType::F64))
         return false;
     if (!SigF_.append(ValType::F32))
         return false;
-    if (!SigI_.append(ValType::I32))
+    if (!SigP_.append(MIRType::Pointer))
+        return false;
+    if (!SigPI_.append(MIRType::Pointer) || !SigPI_.append(MIRType::Int32))
         return false;
     if (!SigI64I64_.append(ValType::I64) || !SigI64I64_.append(ValType::I64))
         return false;
 
     const ValTypeVector& args = func_.sig().args();
 
     // localInfo_ contains an entry for every local in locals_, followed by
     // entries for special locals. Currently the only special local is the TLS