Bug 860736 - Align the local stack storage for ARM asm.js frames; r=mjrosenb
authorBenjamin Bouvier <benj@benj.me>
Tue, 03 Jun 2014 17:24:28 +0200
changeset 205585 c8a1656249fcd195c1bb2781714df8395db39d88
parent 205584 7652c6ab1d362fa23ea2662e09e716c873364558
child 205586 066f499d0544e4f5564f4590ce52e6a5fd61253a
push id3741
push userasasaki@mozilla.com
push dateMon, 21 Jul 2014 20:25:18 +0000
treeherdermozilla-beta@4d6f46f5af68 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersmjrosenb
bugs860736
milestone32.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 860736 - Align the local stack storage for ARM asm.js frames; r=mjrosenb
js/src/jit/AsmJS.cpp
js/src/jit/IonMacroAssembler.h
js/src/jit/arm/Assembler-arm.h
js/src/jit/arm/CodeGenerator-arm.cpp
js/src/jit/arm/MacroAssembler-arm.cpp
js/src/jit/arm/MacroAssembler-arm.h
js/src/jit/shared/CodeGenerator-shared.cpp
js/src/jit/x64/Assembler-x64.h
js/src/jit/x86/Assembler-x86.h
--- a/js/src/jit/AsmJS.cpp
+++ b/js/src/jit/AsmJS.cpp
@@ -5985,19 +5985,21 @@ CheckModuleReturn(ModuleCompiler &m)
 
 // All registers except the stack pointer.
 static const RegisterSet AllRegsExceptSP =
     RegisterSet(GeneralRegisterSet(Registers::AllMask &
                                    ~(uint32_t(1) << Registers::StackPointer)),
                 FloatRegisterSet(FloatRegisters::AllMask));
 #if defined(JS_CODEGEN_ARM)
 // The ARM system ABI also includes d15 in the non volatile float registers.
+// Also exclude lr (a.k.a. r14) as we preserve it manually)
 static const RegisterSet NonVolatileRegs =
-    RegisterSet(GeneralRegisterSet(Registers::NonVolatileMask),
-                    FloatRegisterSet(FloatRegisters::NonVolatileMask | (1 << FloatRegisters::d15)));
+    RegisterSet(GeneralRegisterSet(Registers::NonVolatileMask &
+                                   ~(uint32_t(1) << Registers::lr)),
+                FloatRegisterSet(FloatRegisters::NonVolatileMask | (1 << FloatRegisters::d15)));
 #else
 static const RegisterSet NonVolatileRegs =
     RegisterSet(GeneralRegisterSet(Registers::NonVolatileMask),
                 FloatRegisterSet(FloatRegisters::NonVolatileMask));
 #endif
 
 static void
 LoadAsmJSActivationIntoRegister(MacroAssembler &masm, Register reg)
@@ -6073,16 +6075,24 @@ GenerateEntry(ModuleCompiler &m, const A
 
     // In constrast to the system ABI, the Ion convention is that all registers
     // are clobbered by calls. Thus, we must save the caller's non-volatile
     // registers.
     //
     // NB: GenerateExits assumes that masm.framePushed() == 0 before
     // PushRegsInMask(NonVolatileRegs).
     masm.setFramePushed(0);
+
+#if defined(JS_CODEGEN_ARM)
+    // Push lr without incrementing masm.framePushed since this push is
+    // accounted for by AlignmentAtAsmJSPrologue. The masm.ret at the end will
+    // pop.
+    masm.push(lr);
+#endif // JS_CODEGEN_ARM
+
     masm.PushRegsInMask(NonVolatileRegs);
     JS_ASSERT(masm.framePushed() == FramePushedAfterSave);
 
     // Remember the stack pointer in the current AsmJSActivation. This will be
     // used by error exit paths to set the stack pointer back to what it was
     // right after the (C++) caller's non-volatile registers were saved so that
     // they can be restored.
     Register activation = ABIArgGenerator::NonArgReturnVolatileReg0;
@@ -6169,17 +6179,17 @@ GenerateEntry(ModuleCompiler &m, const A
     }
 
     // Restore clobbered non-volatile registers of the caller.
     masm.PopRegsInMask(NonVolatileRegs);
 
     JS_ASSERT(masm.framePushed() == 0);
 
     masm.move32(Imm32(true), ReturnReg);
-    masm.abiret();
+    masm.ret();
     return true;
 }
 
 static inline bool
 TryEnablingIon(JSContext *cx, AsmJSModule &module, HandleFunction fun, uint32_t exitIndex,
                int32_t argc, Value *argv)
 {
     if (!fun->hasScript())
@@ -6323,18 +6333,22 @@ FillArgumentArray(ModuleCompiler &m, con
 static void
 GenerateFFIInterpreterExit(ModuleCompiler &m, const ModuleCompiler::ExitDescriptor &exit,
                            unsigned exitIndex, Label *throwLabel)
 {
     MacroAssembler &masm = m.masm();
     masm.align(CodeAlignment);
     m.setInterpExitOffset(exitIndex);
     masm.setFramePushed(0);
+
 #if defined(JS_CODEGEN_ARM)
-    masm.Push(lr);
+    // Push lr without incrementing masm.framePushed since this push is
+    // accounted for by AlignmentAtAsmJSPrologue. The masm.ret at the end will
+    // pop.
+    masm.push(lr);
 #endif
 
     MIRType typeArray[] = { MIRType_Pointer,   // cx
                             MIRType_Pointer,   // exitDatum
                             MIRType_Int32,     // argc
                             MIRType_Pointer }; // argv
     MIRTypeVector invokeArgTypes(m.cx());
     invokeArgTypes.infallibleAppend(typeArray, ArrayLength(typeArray));
@@ -6494,24 +6508,26 @@ GenerateFFIIonExit(ModuleCompiler &m, co
     MacroAssembler &masm = m.masm();
     masm.align(CodeAlignment);
     m.setIonExitOffset(exitIndex);
     masm.setFramePushed(0);
 
 #if defined(JS_CODEGEN_X64)
     masm.Push(HeapReg);
 #elif defined(JS_CODEGEN_ARM)
-    // The lr register holds the return address and needs to be saved.  The GlobalReg
-    // (r10) and HeapReg (r11) also need to be restored before returning to asm.js code.
+    // Push lr without incrementing masm.framePushed since this push is
+    // accounted for by AlignmentAtAsmJSPrologue. The masm.ret at the end will
+    // pop.
+    masm.push(lr);
+
+    // The GlobalReg (r10) and HeapReg (r11) also need to be restored before
+    // returning to asm.js code.
     // The NANReg also needs to be restored, but is a constant and is reloaded before
     // returning to asm.js code.
-    masm.PushRegsInMask(RegisterSet(GeneralRegisterSet((1<<GlobalReg.code()) |
-                                                       (1<<HeapReg.code()) |
-                                                       (1<<lr.code())),
-                                    FloatRegisterSet(uint32_t(0))));
+    masm.PushRegsInMask(GeneralRegisterSet((1<<GlobalReg.code()) | (1<<HeapReg.code())));
 #endif
 
     // The stack frame is used for the call into Ion and also for calls into C for OOL
     // conversion of the result.  A frame large enough for both is allocated.
     //
     // Arguments to the Ion function are in the following order on the stack:
     // | return address | descriptor | callee | argc | this | arg1 | arg2 | ...
     unsigned argBytes = 3 * sizeof(size_t) + (1 + exit.sig().args().length()) * sizeof(Value);
@@ -6695,28 +6711,24 @@ GenerateFFIIonExit(ModuleCompiler &m, co
         break;
       case RetType::Float:
         MOZ_ASSUME_UNREACHABLE("Float shouldn't be returned from a FFI");
         break;
     }
 
     masm.bind(&done);
     masm.freeStack(stackDec);
+#if defined(JS_CODEGEN_X64)
+    masm.Pop(HeapReg);
+#endif
 #if defined(JS_CODEGEN_ARM)
     masm.ma_vimm(GenericNaN(), NANReg);
-    masm.PopRegsInMask(RegisterSet(GeneralRegisterSet((1<<GlobalReg.code()) |
-                                                      (1<<HeapReg.code()) |
-                                                      (1<<pc.code())),
-                                   FloatRegisterSet(uint32_t(0))));
-#else
-# if defined(JS_CODEGEN_X64)
-    masm.Pop(HeapReg);
-# endif
+    masm.PopRegsInMask(GeneralRegisterSet((1<<GlobalReg.code()) | (1<<HeapReg.code())));
+#endif
     masm.ret();
-#endif
     JS_ASSERT(masm.framePushed() == 0);
 
     // oolConvert
     if (oolConvert.used()) {
         masm.bind(&oolConvert);
         masm.setFramePushed(oolConvertFramePushed);
         GenerateOOLConvert(m, exit.sig().retType(), throwLabel);
         masm.setFramePushed(0);
@@ -6746,20 +6758,16 @@ GenerateFFIExit(ModuleCompiler &m, const
 // all the frames.
 static bool
 GenerateStackOverflowExit(ModuleCompiler &m, Label *throwLabel)
 {
     MacroAssembler &masm = m.masm();
     masm.align(CodeAlignment);
     masm.bind(&m.stackOverflowLabel());
 
-    // The overflow check always occurs before the initial function-specific
-    // stack-size adjustment. See CodeGenerator::generateAsmJSPrologue.
-    masm.setFramePushed(AlignmentMidPrologue - AlignmentAtPrologue);
-
     MIRTypeVector argTypes(m.cx());
     argTypes.infallibleAppend(MIRType_Pointer); // cx
 
     unsigned stackDec = StackDecrementForCall(masm, argTypes, MaybeRetAddr);
     masm.reserveStack(stackDec);
 
     Register activation = ABIArgGenerator::NonArgReturnVolatileReg0;
     LoadAsmJSActivationIntoRegister(masm, activation);
@@ -6919,17 +6927,17 @@ GenerateThrowExit(ModuleCompiler &m, Lab
 
     masm.setFramePushed(FramePushedAfterSave);
     masm.loadPtr(Address(activation, AsmJSActivation::offsetOfErrorRejoinSP()), StackPointer);
 
     masm.PopRegsInMask(NonVolatileRegs);
     JS_ASSERT(masm.framePushed() == 0);
 
     masm.mov(ImmWord(0), ReturnReg);
-    masm.abiret();
+    masm.ret();
 
     return !masm.oom();
 }
 
 static bool
 GenerateStubs(ModuleCompiler &m)
 {
     for (unsigned i = 0; i < m.module().numExportedFunctions(); i++) {
--- a/js/src/jit/IonMacroAssembler.h
+++ b/js/src/jit/IonMacroAssembler.h
@@ -246,17 +246,17 @@ class MacroAssembler : public MacroAssem
                 spsPc_ = pc;
                 spsInstrumentation_.construct(&cx->runtime()->spsProfiler, &spsPc_);
                 sps_ = spsInstrumentation_.addr();
                 sps_->setPushed(script);
             }
         }
     }
 
-    // asm.js compilation handles its own IonContet-pushing
+    // asm.js compilation handles its own IonContext-pushing
     struct AsmJSToken {};
     explicit MacroAssembler(AsmJSToken)
       : enoughMemory_(true),
         embedsNurseryPointers_(false),
         sps_(nullptr)
     {
 #ifdef JS_CODEGEN_ARM
         initWithAllocator();
--- a/js/src/jit/arm/Assembler-arm.h
+++ b/js/src/jit/arm/Assembler-arm.h
@@ -134,19 +134,17 @@ static MOZ_CONSTEXPR_VAR FloatRegister d
 // ldrd/strd (dual-register load/store) operate in a single cycle
 // when the address they are dealing with is 8 byte aligned.
 // Also, the ARM abi wants the stack to be 8 byte aligned at
 // function boundaries.  I'm trying to make sure this is always true.
 static const uint32_t StackAlignment = 8;
 static const uint32_t CodeAlignment = 8;
 static const bool StackKeptAligned = true;
 static const uint32_t NativeFrameSize = sizeof(void*);
-static const uint32_t AlignmentAtPrologue = 0;
-static const uint32_t AlignmentMidPrologue = 4;
-
+static const uint32_t AlignmentAtPrologue = sizeof(void*);
 
 static const Scale ScalePointer = TimesFour;
 
 class Instruction;
 class InstBranchImm;
 uint32_t RM(Register r);
 uint32_t RS(Register r);
 uint32_t RD(Register r);
--- a/js/src/jit/arm/CodeGenerator-arm.cpp
+++ b/js/src/jit/arm/CodeGenerator-arm.cpp
@@ -48,17 +48,17 @@ CodeGeneratorARM::generatePrologue()
     return true;
 }
 
 bool
 CodeGeneratorARM::generateAsmJSPrologue(Label *stackOverflowLabel)
 {
     JS_ASSERT(gen->compilingAsmJS());
 
-    masm.Push(lr);
+    masm.push(lr);
 
     // The asm.js over-recursed handler wants to be able to assume that SP
     // points to the return address, so perform the check after pushing lr but
     // before pushing frameDepth.
     if (!omitOverRecursedCheck()) {
         masm.branchPtr(Assembler::AboveOrEqual,
                        AsmJSAbsoluteAddress(AsmJSImm_StackLimit),
                        StackPointer,
@@ -80,28 +80,22 @@ CodeGeneratorARM::generateEpilogue()
     if (!gen->compilingAsmJS() && gen->info().executionMode() == SequentialExecution) {
         if (!emitTracelogStopEvent(TraceLogger::IonMonkey))
             return false;
         if (!emitTracelogScriptStop())
             return false;
     }
 #endif
 
-    if (gen->compilingAsmJS()) {
-        // Pop the stack we allocated at the start of the function.
+    if (gen->compilingAsmJS())
         masm.freeStack(frameDepth_);
-        masm.Pop(pc);
-        JS_ASSERT(masm.framePushed() == 0);
-        //masm.as_bkpt();
-    } else {
-        // Pop the stack we allocated at the start of the function.
+    else
         masm.freeStack(frameSize());
-        JS_ASSERT(masm.framePushed() == 0);
-        masm.ma_pop(pc);
-    }
+    JS_ASSERT(masm.framePushed() == 0);
+    masm.pop(pc);
     masm.dumpPool();
     return true;
 }
 
 void
 CodeGeneratorARM::emitBranch(Assembler::Condition cond, MBasicBlock *mirTrue, MBasicBlock *mirFalse)
 {
     if (isNextBlock(mirFalse->lir())) {
@@ -1042,24 +1036,18 @@ CodeGeneratorARM::visitPowHalfD(LPowHalf
 
 MoveOperand
 CodeGeneratorARM::toMoveOperand(const LAllocation *a) const
 {
     if (a->isGeneralReg())
         return MoveOperand(ToRegister(a));
     if (a->isFloatReg())
         return MoveOperand(ToFloatRegister(a));
-    JS_ASSERT((ToStackOffset(a) & 3) == 0);
     int32_t offset = ToStackOffset(a);
-
-    // The way the stack slots work, we assume that everything from depth == 0 downwards is writable
-    // however, since our frame is included in this, ensure that the frame gets skipped
-    if (gen->compilingAsmJS())
-        offset -= AlignmentMidPrologue;
-
+    JS_ASSERT((offset & 3) == 0);
     return MoveOperand(StackPointer, offset);
 }
 
 class js::jit::OutOfLineTableSwitch : public OutOfLineCodeBase<CodeGeneratorARM>
 {
     MTableSwitch *mir_;
     Vector<CodeLabel, 8, IonAllocPolicy> codeLabels_;
 
--- a/js/src/jit/arm/MacroAssembler-arm.cpp
+++ b/js/src/jit/arm/MacroAssembler-arm.cpp
@@ -3862,27 +3862,30 @@ void MacroAssemblerARMCompat::checkStack
 {
 #ifdef DEBUG
     ma_tst(sp, Imm32(StackAlignment - 1));
     breakpoint(NonZero);
 #endif
 }
 
 void
-MacroAssemblerARMCompat::callWithABIPre(uint32_t *stackAdjust)
+MacroAssemblerARMCompat::callWithABIPre(uint32_t *stackAdjust, bool callFromAsmJS)
 {
     JS_ASSERT(inCall_);
 
     *stackAdjust = ((usedIntSlots_ > NumIntArgRegs) ? usedIntSlots_ - NumIntArgRegs : 0) * sizeof(intptr_t);
 #if defined(JS_CODEGEN_ARM_HARDFP) || defined(JS_ARM_SIMULATOR)
     if (useHardFpABI())
         *stackAdjust += 2*((usedFloatSlots_ > NumFloatArgRegs) ? usedFloatSlots_ - NumFloatArgRegs : 0) * sizeof(intptr_t);
 #endif
+    uint32_t alignmentAtPrologue = (callFromAsmJS) ? AlignmentAtPrologue : 0;
+
     if (!dynamicAlignment_) {
-        *stackAdjust += ComputeByteAlignment(framePushed_ + *stackAdjust, StackAlignment);
+        *stackAdjust += ComputeByteAlignment(framePushed_ + *stackAdjust + alignmentAtPrologue,
+                                             StackAlignment);
     } else {
         // sizeof(intptr_t) account for the saved stack pointer pushed by setupUnalignedABICall
         *stackAdjust += ComputeByteAlignment(*stackAdjust + sizeof(intptr_t), StackAlignment);
     }
 
     reserveStack(*stackAdjust);
 
     // Position all arguments.
@@ -4016,17 +4019,17 @@ MacroAssemblerARMCompat::callWithABI(voi
     ma_call(ImmPtr(fun));
     callWithABIPost(stackAdjust, result);
 }
 
 void
 MacroAssemblerARMCompat::callWithABI(AsmJSImmPtr imm, MoveOp::Type result)
 {
     uint32_t stackAdjust;
-    callWithABIPre(&stackAdjust);
+    callWithABIPre(&stackAdjust, /* callFromAsmJS = */ true);
     call(imm);
     callWithABIPost(stackAdjust, result);
 }
 
 void
 MacroAssemblerARMCompat::callWithABI(const Address &fun, MoveOp::Type result)
 {
     // Load the callee in r12, no instruction between the ldr and call
--- a/js/src/jit/arm/MacroAssembler-arm.h
+++ b/js/src/jit/arm/MacroAssembler-arm.h
@@ -448,19 +448,19 @@ private:
 
         JS_ASSERT(offset == static_cast<int32_t>(set.size() * sizeof(double)) * sign);
         return offset;
     }
 };
 
 class MacroAssemblerARMCompat : public MacroAssemblerARM
 {
+    bool inCall_;
     // Number of bytes the stack is adjusted inside a call to C. Calls to C may
     // not be nested.
-    bool inCall_;
     uint32_t args_;
     // The actual number of arguments that were passed, used to assert that
     // the initial number of arguments declared was correct.
     uint32_t passedArgs_;
     uint32_t passedArgTypes_;
 
     // ARM treats arguments as a vector in registers/memory, that looks like:
     // { r0, r1, r2, r3, [sp], [sp,+4], [sp,+8] ... }
@@ -571,17 +571,19 @@ class MacroAssemblerARMCompat : public M
         else
             rs = L_LDR;
 
         ma_movPatchable(ImmPtr(c->raw()), ScratchRegister, Always, rs);
         ma_callIonHalfPush(ScratchRegister);
     }
 
     void appendCallSite(const CallSiteDesc &desc) {
-        enoughMemory_ &= append(CallSite(desc, currentOffset(), framePushed_));
+        // Add an extra sizeof(void*) to include the return address that was
+        // pushed by the call instruction (see CallSite::stackDepth).
+        enoughMemory_ &= append(CallSite(desc, currentOffset(), framePushed_ + sizeof(void*)));
     }
 
     void call(const CallSiteDesc &desc, const Register reg) {
         call(reg);
         appendCallSite(desc);
     }
     void call(const CallSiteDesc &desc, Label *label) {
         call(label);
@@ -1525,17 +1527,17 @@ class MacroAssemblerARMCompat : public M
   private:
     void passHardFpABIArg(const MoveOperand &from, MoveOp::Type type);
     void passSoftFpABIArg(const MoveOperand &from, MoveOp::Type type);
 
   protected:
     bool buildOOLFakeExitFrame(void *fakeReturnAddr);
 
   private:
-    void callWithABIPre(uint32_t *stackAdjust);
+    void callWithABIPre(uint32_t *stackAdjust, bool callFromAsmJS = false);
     void callWithABIPost(uint32_t stackAdjust, MoveOp::Type result);
 
   public:
     // Emits a call to a C/C++ function, resolving all argument moves.
     void callWithABI(void *fun, MoveOp::Type result = MoveOp::GENERAL);
     void callWithABI(AsmJSImmPtr imm, MoveOp::Type result = MoveOp::GENERAL);
     void callWithABI(const Address &fun, MoveOp::Type result = MoveOp::GENERAL);
 
--- a/js/src/jit/shared/CodeGenerator-shared.cpp
+++ b/js/src/jit/shared/CodeGenerator-shared.cpp
@@ -69,17 +69,17 @@ CodeGeneratorShared::CodeGeneratorShared
         // relies on the a priori stack adjustment (in the prologue) on platforms
         // (like x64) which require the stack to be aligned.
 #if defined(JS_CODEGEN_ARM) || defined(JS_CODEGEN_MIPS)
         bool forceAlign = true;
 #else
         bool forceAlign = false;
 #endif
         if (gen->needsInitialStackAlignment() || forceAlign) {
-            unsigned alignmentAtCall = AlignmentMidPrologue + frameDepth_;
+            unsigned alignmentAtCall = AlignmentAtPrologue + frameDepth_;
             if (unsigned rem = alignmentAtCall % StackAlignment) {
                 frameInitialAdjustment_ = StackAlignment - rem;
                 frameDepth_ += frameInitialAdjustment_;
             }
         }
 
         // FrameSizeClass is only used for bailing, which cannot happen in
         // asm.js code.
--- a/js/src/jit/x64/Assembler-x64.h
+++ b/js/src/jit/x64/Assembler-x64.h
@@ -179,17 +179,16 @@ static MOZ_CONSTEXPR_VAR Register PreBar
 
 // GCC stack is aligned on 16 bytes, but we don't maintain the invariant in
 // jitted code.
 static const uint32_t StackAlignment = 16;
 static const bool StackKeptAligned = false;
 static const uint32_t CodeAlignment = 8;
 static const uint32_t NativeFrameSize = sizeof(void*);
 static const uint32_t AlignmentAtPrologue = sizeof(void*);
-static const uint32_t AlignmentMidPrologue = AlignmentAtPrologue;
 
 static const Scale ScalePointer = TimesEight;
 
 } // namespace jit
 } // namespace js
 
 #include "jit/shared/Assembler-x86-shared.h"
 
--- a/js/src/jit/x86/Assembler-x86.h
+++ b/js/src/jit/x86/Assembler-x86.h
@@ -107,17 +107,16 @@ static MOZ_CONSTEXPR_VAR Register AsmJSI
 static const uint32_t StackAlignment = 16;
 #else
 static const uint32_t StackAlignment = 4;
 #endif
 static const bool StackKeptAligned = false;
 static const uint32_t CodeAlignment = 8;
 static const uint32_t NativeFrameSize = sizeof(void*);
 static const uint32_t AlignmentAtPrologue = sizeof(void*);
-static const uint32_t AlignmentMidPrologue = AlignmentAtPrologue;
 struct ImmTag : public Imm32
 {
     ImmTag(JSValueTag mask)
       : Imm32(int32_t(mask))
     { }
 };
 
 struct ImmType : public ImmTag