Bug 1277011 - Wasm baseline: Cleanup around calls. r=bbouvier
authorLars T Hansen <lhansen@mozilla.com>
Wed, 06 Jul 2016 12:22:58 +0200
changeset 349061 a398af8197f70e2fda529fcecd2c0ce2fa2499c2
parent 349060 2846438ad01dc71e903a71f093b0dcd8f1a93fe9
child 349062 6032373138d2e6b1c31cec4b6b0980b44c032cad
push id1230
push userjlund@mozilla.com
push dateMon, 31 Oct 2016 18:13:35 +0000
treeherdermozilla-release@5e06e3766db2 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersbbouvier
bugs1277011
milestone50.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1277011 - Wasm baseline: Cleanup around calls. r=bbouvier
js/src/asmjs/WasmBaselineCompile.cpp
--- a/js/src/asmjs/WasmBaselineCompile.cpp
+++ b/js/src/asmjs/WasmBaselineCompile.cpp
@@ -19,29 +19,24 @@
 /* General status notes:
  *
  * "FIXME" indicates a known or suspected bug.
  * "TODO" indicates an opportunity for a general improvement, with an
  * additional tag to indicate the area of improvement.
  *
  * Unimplemented functionality:
  *
- *  - This is not actually a baseline compiler, as it performs no
- *    profiling and does not trigger ion compilation and function
- *    replacement (duh)
- *  - int64 load and store
+ *  - Tiered compilation (bug 1277562)
+ *  - int64 operations on 32-bit systems
  *  - SIMD
  *  - Atomics (very simple now, we have range checking)
  *  - current_memory, grow_memory
  *  - non-signaling interrupts
- *  - non-signaling bounds checks
  *  - profiler support (devtools)
- *  - Platform support:
- *      ARM-32
- *      ARM-64
+ *  - ARM-32 support (bug 1277011)
  *
  * There are lots of machine dependencies here but they are pretty
  * well isolated to a segment of the compiler.  Many dependencies
  * will eventually be factored into the MacroAssembler layer and shared
  * with other code generators.
  *
  *
  * High-value compiler performance improvements:
@@ -139,20 +134,21 @@ struct BaseCompilePolicy : ExprIterPolic
     //
     // TODO / REDUNDANT: It would be nice if we could make use of the
     // iterator's ControlItems and not require our own stack for that.
 };
 
 typedef ExprIter<BaseCompilePolicy> BaseExprIter;
 
 typedef bool IsUnsigned;
-typedef bool IsLoad;
 typedef bool ZeroOnOverflow;
 typedef bool IsKnownNotZero;
 typedef bool HandleNaNSpecially;
+typedef bool EscapesSandbox;
+typedef bool IsBuiltinCall;
 
 #ifdef JS_CODEGEN_ARM64
 // FIXME: This is not correct, indeed for ARM64 there is no reliable
 // StackPointer and we'll need to change the abstractions that use
 // SP-relative addressing.  There's a guard in emitFunction() below to
 // prevent this workaround from having any consequence.  This hack
 // exists only as a stopgap; there is no ARM64 JIT support yet.
 static const Register StackPointer = RealStackPointer;
@@ -1878,48 +1874,56 @@ class BaseCompiler
     //
     // Calls.
 
     struct FunctionCall
     {
         explicit FunctionCall(uint32_t lineOrBytecode)
           : lineOrBytecode_(lineOrBytecode),
             callSavesMachineState_(false),
+            builtinCall_(false),
             machineStateAreaSize_(0),
             frameAlignAdjustment_(0),
             stackArgAreaSize_(0),
             calleePopsArgs_(false)
         {}
 
         uint32_t lineOrBytecode_;
         ABIArgGenerator abi_;
         bool callSavesMachineState_;
+        bool builtinCall_;
         size_t machineStateAreaSize_;
         size_t frameAlignAdjustment_;
         size_t stackArgAreaSize_;
 
         // TODO / INVESTIGATE: calleePopsArgs_ is unused on x86, x64,
         // always false at present, certainly not tested.
 
         bool calleePopsArgs_;
     };
 
-    void beginCall(FunctionCall& call, bool escapesSandbox)
+    void beginCall(FunctionCall& call, bool escapesSandbox, bool builtinCall)
     {
         call.callSavesMachineState_ = escapesSandbox;
-        if (call.callSavesMachineState_) {
+        if (escapesSandbox) {
 #if defined(JS_CODEGEN_X64)
             call.machineStateAreaSize_ = 16; // Save HeapReg
 #elif defined(JS_CODEGEN_X86)
             // Nothing
 #else
             MOZ_CRASH("BaseCompiler platform hook: beginCall");
 #endif
         }
 
+        call.builtinCall_ = builtinCall;
+        if (builtinCall) {
+            // Call-outs need to use the appropriate system ABI.
+            // ARM will have something here.
+        }
+
         call.frameAlignAdjustment_ = ComputeByteAlignment(masm.framePushed() + sizeof(AsmJSFrame),
                                                           JitStackAlignment);
     }
 
     void endCall(FunctionCall& call)
     {
         if (call.machineStateAreaSize_ || call.frameAlignAdjustment_) {
             int size = call.calleePopsArgs_ ? 0 : call.stackArgAreaSize_;
@@ -1992,54 +1996,83 @@ class BaseCompiler
         switch (type) {
           case ValType::I32: {
             ABIArg argLoc = call.abi_.next(MIRType::Int32);
             if (argLoc.kind() == ABIArg::Stack) {
                 ScratchI32 scratch(*this);
                 loadI32(scratch, arg);
                 masm.store32(scratch, Address(StackPointer, argLoc.offsetFromArgBase()));
             } else {
-                loadI32(argLoc.reg().gpr(), arg);
+                loadI32(argLoc.gpr(), arg);
             }
             break;
           }
           case ValType::I64: {
 #ifdef JS_CODEGEN_X64
             ABIArg argLoc = call.abi_.next(MIRType::Int64);
             if (argLoc.kind() == ABIArg::Stack) {
                 ScratchI32 scratch(*this);
                 loadI64(Register64(scratch), arg);
                 masm.movq(scratch, Operand(StackPointer, argLoc.offsetFromArgBase()));
             } else {
-                loadI64(Register64(argLoc.reg().gpr()), arg);
+                loadI64(argLoc.gpr64(), arg);
             }
 #else
             MOZ_CRASH("BaseCompiler platform hook: passArg I64");
 #endif
             break;
           }
           case ValType::F64: {
             ABIArg argLoc = call.abi_.next(MIRType::Double);
-            if (argLoc.kind() == ABIArg::Stack) {
+            switch (argLoc.kind()) {
+              case ABIArg::Stack: {
                 ScratchF64 scratch(*this);
                 loadF64(scratch, arg);
                 masm.storeDouble(scratch, Address(StackPointer, argLoc.offsetFromArgBase()));
-            } else {
-                loadF64(argLoc.reg().fpu(), arg);
+                break;
+              }
+#if defined(JS_CODEGEN_REGISTER_PAIR)
+              case ABIArg::GPR_PAIR: {
+                MOZ_CRASH("BaseCompiler platform hook: passArg F64 pair");
+              }
+#endif
+              case ABIArg::FPU: {
+                loadF64(argLoc.fpu(), arg);
+                break;
+              }
+              case ABIArg::GPR: {
+                MOZ_CRASH("Unexpected parameter passing discipline");
+              }
             }
             break;
           }
           case ValType::F32: {
             ABIArg argLoc = call.abi_.next(MIRType::Float32);
-            if (argLoc.kind() == ABIArg::Stack) {
+            switch (argLoc.kind()) {
+              case ABIArg::Stack: {
                 ScratchF32 scratch(*this);
                 loadF32(scratch, arg);
                 masm.storeFloat32(scratch, Address(StackPointer, argLoc.offsetFromArgBase()));
-            } else {
-                loadF32(argLoc.reg().fpu(), arg);
+                break;
+              }
+              case ABIArg::GPR: {
+                ScratchF32 scratch(*this);
+                loadF32(scratch, arg);
+                masm.moveFloat32ToGPR(scratch, argLoc.gpr());
+                break;
+              }
+              case ABIArg::FPU: {
+                loadF32(argLoc.fpu(), arg);
+                break;
+              }
+#if defined(JS_CODEGEN_REGISTER_PAIR)
+              case ABIArg::GPR_PAIR: {
+                MOZ_CRASH("Unexpected parameter passing discipline");
+              }
+#endif
             }
             break;
           }
           default:
             MOZ_CRASH("Function argument type");
         }
     }
 
@@ -2155,52 +2188,48 @@ class BaseCompiler
     void captureReturnedI32(RegI32 dest) {
         moveI32(RegI32(ReturnReg), dest);
     }
 
     void captureReturnedI64(RegI64 dest) {
 #ifdef JS_PUNBOX64
         moveI64(RegI64(ReturnReg64), dest);
 #else
-        MOZ_CRASH("BaseCompiler platform hook: pushReturned");
+        MOZ_CRASH("BaseCompiler platform hook: captureReturnedI64");
 #endif
     }
 
-    void captureReturnedF32(RegF32 dest) {
-        moveF32(RegF32(ReturnFloat32Reg), dest);
-    }
-
-    void captureBuiltinReturnedF32(RegF32 dest) {
+    void captureReturnedF32(const FunctionCall& call, RegF32 dest) {
 #ifdef JS_CODEGEN_X86
-        masm.reserveStack(sizeof(float));
-        Operand op(esp, 0);
-        masm.fstp32(op);
-        masm.loadFloat32(op, dest.reg);
-        masm.freeStack(sizeof(float));
-#else
-        captureReturnedF32(dest);
+        if (call.builtinCall_) {
+            masm.reserveStack(sizeof(float));
+            Operand op(esp, 0);
+            masm.fstp32(op);
+            masm.loadFloat32(op, dest.reg);
+            masm.freeStack(sizeof(float));
+            return;
+        }
 #endif
-    }
-
-    void captureReturnedF64(RegF64 dest) {
+        moveF32(RegF32(ReturnFloat32Reg), dest);
+    }
+
+    void captureReturnedF64(const FunctionCall& call, RegF64 dest) {
+#ifdef JS_CODEGEN_X86
+        if (call.builtinCall_) {
+            masm.reserveStack(sizeof(double));
+            Operand op(esp, 0);
+            masm.fstp(op);
+            masm.loadDouble(op, dest.reg);
+            masm.freeStack(sizeof(double));
+            return;
+        }
+#endif
         moveF64(RegF64(ReturnDoubleReg), dest);
     }
 
-    void captureBuiltinReturnedF64(RegF64 dest) {
-#ifdef JS_CODEGEN_X86
-        masm.reserveStack(sizeof(double));
-        Operand op(esp, 0);
-        masm.fstp(op);
-        masm.loadDouble(op, dest.reg);
-        masm.freeStack(sizeof(double));
-#else
-        captureReturnedF64(dest);
-#endif
-    }
-
     void returnVoid() {
         popStackBeforeBranch(ctl_[0].framePushed);
         masm.jump(&returnLabel_);
     }
 
     void returnI32(RegI32 r) {
         moveI32(r, RegI32(ReturnReg));
         popStackBeforeBranch(ctl_[0].framePushed);
@@ -2443,32 +2472,40 @@ class BaseCompiler
     void ctzI64(RegI64 srcDest) {
 #if defined(JS_CODEGEN_X64)
         masm.ctz64(srcDest.reg, srcDest.reg);
 #else
         MOZ_CRASH("BaseCompiler platform hook: ctzI64");
 #endif
     }
 
-    bool popcntNeedsTemp() {
+    bool popcnt32NeedsTemp() const {
 #if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64)
         return !AssemblerX86Shared::HasPOPCNT();
 #else
-        return false;
+        MOZ_CRASH("BaseCompiler platform hook: popcnt32NeedsTemp");
 #endif
     }
 
     void popcntI32(RegI32 srcDest, RegI32 tmp) {
 #if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64)
         masm.popcnt32(srcDest.reg, srcDest.reg, tmp.reg);
 #else
         MOZ_CRASH("BaseCompiler platform hook: popcntI32");
 #endif
     }
 
+    bool popcnt64NeedsTemp() const {
+#if defined(JS_CODEGEN_X64)
+        return !AssemblerX86Shared::HasPOPCNT();
+#else
+        MOZ_CRASH("BaseCompiler platform hook: popcnt64NeedsTemp");
+#endif
+    }
+
     void popcntI64(RegI64 srcDest, RegI64 tmp) {
 #if defined(JS_CODEGEN_X64)
         masm.popcnt64(srcDest.reg, srcDest.reg, tmp.reg);
 #else
         MOZ_CRASH("BaseCompiler platform hook: popcntI64");
 #endif
     }
 
@@ -2706,17 +2743,17 @@ class BaseCompiler
     {
 #if defined(JS_CODEGEN_X64)
         CodeOffset label = masm.loadRipRelativeDouble(r.reg);
         masm.append(GlobalAccess(label, globalDataOffset));
 #elif defined(JS_CODEGEN_X86)
         CodeOffset label = masm.vmovsdWithPatch(PatchedAbsoluteAddress(), r.reg);
         masm.append(GlobalAccess(label, globalDataOffset));
 #else
-        MOZ_CRASH("BaseCompiler platform hook: loadGlobalVarF32");
+        MOZ_CRASH("BaseCompiler platform hook: loadGlobalVarF64");
 #endif
     }
 
     // CodeGeneratorX64::visitAsmJSStoreGlobalVar()
 
     void storeGlobalVarI32(unsigned globalDataOffset, RegI32 r)
     {
 #if defined(JS_CODEGEN_X64)
@@ -3277,18 +3314,17 @@ class BaseCompiler
     bool emitSelect();
 
     void endBlock();
     void endLoop();
     void endIfThen();
     void endIfThenElse();
 
     void doReturn(ExprType returnType);
-    void pushReturned(ExprType type);
-    void pushBuiltinReturned(ExprType type);
+    void pushReturned(const FunctionCall& call, ExprType type);
 
     void emitCompareI32(JSOp compareOp, MCompare::CompareType compareType);
     void emitCompareI64(JSOp compareOp, MCompare::CompareType compareType);
     void emitCompareF32(JSOp compareOp, MCompare::CompareType compareType);
     void emitCompareF64(JSOp compareOp, MCompare::CompareType compareType);
 
     void emitAddI32();
     void emitAddI64();
@@ -4129,31 +4165,31 @@ BaseCompiler::emitCtzI64()
     ctzI64(r0);
     pushI64(r0);
 }
 
 void
 BaseCompiler::emitPopcntI32()
 {
     RegI32 r0 = popI32();
-    if (popcntNeedsTemp()) {
+    if (popcnt32NeedsTemp()) {
         RegI32 tmp = needI32();
         popcntI32(r0, tmp);
         freeI32(tmp);
     } else {
         popcntI32(r0, invalidI32());
     }
     pushI32(r0);
 }
 
 void
 BaseCompiler::emitPopcntI64()
 {
     RegI64 r0 = popI64();
-    if (popcntNeedsTemp()) {
+    if (popcnt64NeedsTemp()) {
         RegI64 tmp = needI64();
         popcntI64(r0, tmp);
         freeI64(tmp);
     } else {
         popcntI64(r0, invalidI64());
     }
     pushI64(r0);
 }
@@ -5046,17 +5082,17 @@ BaseCompiler::skipCall(const ValTypeVect
         if (!iter_.readCallReturn(maybeReturnType))
             return false;
     }
 
     return true;
 }
 
 void
-BaseCompiler::pushReturned(ExprType type)
+BaseCompiler::pushReturned(const FunctionCall& call, ExprType type)
 {
     switch (type) {
       case ExprType::Void: {
         pushVoid();
         break;
       }
       case ExprType::I32: {
         RegI32 rv = needI32();
@@ -5067,52 +5103,31 @@ BaseCompiler::pushReturned(ExprType type
       case ExprType::I64: {
         RegI64 rv = needI64();
         captureReturnedI64(rv);
         pushI64(rv);
         break;
       }
       case ExprType::F32: {
         RegF32 rv = needF32();
-        captureReturnedF32(rv);
+        captureReturnedF32(call, rv);
         pushF32(rv);
         break;
       }
       case ExprType::F64: {
         RegF64 rv = needF64();
-        captureReturnedF64(rv);
+        captureReturnedF64(call, rv);
         pushF64(rv);
         break;
       }
       default:
         MOZ_CRASH("Function return type");
     }
 }
 
-void
-BaseCompiler::pushBuiltinReturned(ExprType type)
-{
-    switch (type) {
-      case ExprType::F32: {
-        RegF32 rv = needF32();
-        captureBuiltinReturnedF32(rv);
-        pushF32(rv);
-        break;
-      }
-      case ExprType::F64: {
-        RegF64 rv = needF64();
-        captureBuiltinReturnedF64(rv);
-        pushF64(rv);
-        break;
-      }
-      default:
-        MOZ_CRASH("Compiler bug: unexpected type");
-    }
-}
-
 // For now, always sync() at the beginning of the call to easily save
 // live values.
 //
 // TODO / OPTIMIZE: We may be able to avoid a full sync(), since all
 // we want is to save live registers that won't be saved by the callee
 // or that we need for outgoing args - we don't need to sync the
 // locals.  We can just push the necessary registers, it'll be like a
 // lightweight sync.
@@ -5138,17 +5153,17 @@ BaseCompiler::emitCall(uint32_t callOffs
         return skipCall(sig.args(), sig.ret());
 
     sync();
 
     uint32_t numArgs = sig.args().length();
     size_t stackSpace = stackConsumed(numArgs);
 
     FunctionCall baselineCall(lineOrBytecode);
-    beginCall(baselineCall, false);
+    beginCall(baselineCall, EscapesSandbox(false), IsBuiltinCall(false));
 
     if (!emitCallArgs(sig.args(), baselineCall))
         return false;
 
     if (!iter_.readCallReturn(sig.ret()))
         return false;
 
     callDirect(calleeIndex, baselineCall);
@@ -5156,17 +5171,17 @@ BaseCompiler::emitCall(uint32_t callOffs
     endCall(baselineCall);
 
     // TODO / OPTIMIZE: It would be better to merge this freeStack()
     // into the one in endCall, if we can.
 
     popValueStackBy(numArgs);
     masm.freeStack(stackSpace);
 
-    pushReturned(sig.ret());
+    pushReturned(baselineCall, sig.ret());
 
     return true;
 }
 
 bool
 BaseCompiler::emitCallIndirect(uint32_t callOffset)
 {
     uint32_t lineOrBytecode = readCallSiteLineOrBytecode(callOffset);
@@ -5188,17 +5203,17 @@ BaseCompiler::emitCallIndirect(uint32_t 
     sync();
 
     // Stack: ... index arg1 .. argn
 
     uint32_t numArgs = sig.args().length();
     size_t stackSpace = stackConsumed(numArgs+1);
 
     FunctionCall baselineCall(lineOrBytecode);
-    beginCall(baselineCall, false);
+    beginCall(baselineCall, EscapesSandbox(false), IsBuiltinCall(false));
 
     if (!emitCallArgs(sig.args(), baselineCall))
         return false;
 
     if (!iter_.readCallIndirectCallee(&callee_))
         return false;
 
     if (!iter_.readCallReturn(sig.ret()))
@@ -5215,17 +5230,17 @@ BaseCompiler::emitCallIndirect(uint32_t 
     endCall(baselineCall);
 
     // TODO / OPTIMIZE: It would be better to merge this freeStack()
     // into the one in endCall, if we can.
 
     popValueStackBy(numArgs+1);
     masm.freeStack(stackSpace);
 
-    pushReturned(sig.ret());
+    pushReturned(baselineCall, sig.ret());
 
     return true;
 }
 
 bool
 BaseCompiler::emitCallImport(uint32_t callOffset)
 {
     uint32_t lineOrBytecode = readCallSiteLineOrBytecode(callOffset);
@@ -5242,17 +5257,17 @@ BaseCompiler::emitCallImport(uint32_t ca
         return skipCall(sig.args(), sig.ret());
 
     sync();
 
     uint32_t numArgs = sig.args().length();
     size_t stackSpace = stackConsumed(numArgs);
 
     FunctionCall baselineCall(lineOrBytecode);
-    beginCall(baselineCall, true);
+    beginCall(baselineCall, EscapesSandbox(true), IsBuiltinCall(false));
 
     if (!emitCallArgs(sig.args(), baselineCall))
         return false;
 
     if (!iter_.readCallReturn(sig.ret()))
         return false;
 
     ffiCall(funcImport.globalDataOffset, baselineCall);
@@ -5260,17 +5275,17 @@ BaseCompiler::emitCallImport(uint32_t ca
     endCall(baselineCall);
 
     // TODO / OPTIMIZE: It would be better to merge this freeStack()
     // into the one in endCall, if we can.
 
     popValueStackBy(numArgs);
     masm.freeStack(stackSpace);
 
-    pushReturned(sig.ret());
+    pushReturned(baselineCall, sig.ret());
 
     return true;
 }
 
 bool
 BaseCompiler::emitUnaryMathBuiltinCall(uint32_t callOffset, SymbolicAddress callee,
                                        ValType operandType)
 {
@@ -5288,17 +5303,17 @@ BaseCompiler::emitUnaryMathBuiltinCall(u
     uint32_t lineOrBytecode = readCallSiteLineOrBytecode(callOffset);
 
     sync();
 
     uint32_t numArgs = 1;
     size_t stackSpace = stackConsumed(numArgs);
 
     FunctionCall baselineCall(lineOrBytecode);
-    beginCall(baselineCall, false);
+    beginCall(baselineCall, EscapesSandbox(false), IsBuiltinCall(true));
 
     ExprType retType;
     switch (operandType) {
       case ValType::F64:
         if (!emitCallArgs(SigD_, baselineCall))
             return false;
         retType = ExprType::F64;
         break;
@@ -5319,17 +5334,17 @@ BaseCompiler::emitUnaryMathBuiltinCall(u
     endCall(baselineCall);
 
     // TODO / OPTIMIZE: It would be better to merge this freeStack()
     // into the one in endCall, if we can.
 
     popValueStackBy(numArgs);
     masm.freeStack(stackSpace);
 
-    pushBuiltinReturned(retType);
+    pushReturned(baselineCall, retType);
 
     return true;
 }
 
 bool
 BaseCompiler::emitBinaryMathBuiltinCall(uint32_t callOffset, SymbolicAddress callee,
                                         ValType operandType)
 {
@@ -5346,17 +5361,17 @@ BaseCompiler::emitBinaryMathBuiltinCall(
     }
 
     sync();
 
     uint32_t numArgs = 2;
     size_t stackSpace = stackConsumed(numArgs);
 
     FunctionCall baselineCall(lineOrBytecode);
-    beginCall(baselineCall, false);
+    beginCall(baselineCall, EscapesSandbox(false), IsBuiltinCall(true));
 
     ExprType retType = ExprType::F64;
     if (!emitCallArgs(SigDD_, baselineCall))
         return false;
 
     if (!iter_.readCallReturn(retType))
         return false;
 
@@ -5365,17 +5380,17 @@ BaseCompiler::emitBinaryMathBuiltinCall(
     endCall(baselineCall);
 
     // TODO / OPTIMIZE: It would be better to merge this freeStack()
     // into the one in endCall, if we can.
 
     popValueStackBy(numArgs);
     masm.freeStack(stackSpace);
 
-    pushBuiltinReturned(retType);
+    pushReturned(baselineCall, retType);
 
     return true;
 }
 
 bool
 BaseCompiler::emitGetLocal()
 {
     uint32_t slot;