Bug 1019831: SIMD x86-x64: Align stack top on 16 bytes boundaries if SIMD instructions are present; r=sunfish
authorBenjamin Bouvier <benj@benj.me>
Thu, 07 Aug 2014 17:57:44 +0200
changeset 219987 b8887016ec4f2baefa379d82731e7c87c5afa662
parent 219986 8333f43d2bfe013822e11fdbd781797d56e2b6ca
child 219988 c18202c2ccd0251281f5a3a9794e4044c8860b59
push id3979
push userraliiev@mozilla.com
push dateMon, 13 Oct 2014 16:35:44 +0000
treeherdermozilla-beta@30f2cc610691 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerssunfish
bugs1019831
milestone34.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1019831: SIMD x86-x64: Align stack top on 16 bytes boundaries if SIMD instructions are present; r=sunfish
js/src/jit/Lowering.cpp
js/src/jit/MIRGenerator.h
js/src/jit/MIRGraph.cpp
js/src/jit/arm/Assembler-arm.h
js/src/jit/shared/CodeGenerator-shared.cpp
js/src/jit/shared/CodeGenerator-shared.h
js/src/jit/x64/Assembler-x64.h
js/src/jit/x86/Assembler-x86.h
--- a/js/src/jit/Lowering.cpp
+++ b/js/src/jit/Lowering.cpp
@@ -2223,17 +2223,17 @@ LIRGenerator::visitInterruptCheck(MInter
 
     LInterruptCheck *lir = new(alloc()) LInterruptCheck();
     return add(lir, ins) && assignSafepoint(lir, ins);
 }
 
 bool
 LIRGenerator::visitAsmJSInterruptCheck(MAsmJSInterruptCheck *ins)
 {
-    gen->setPerformsAsmJSCall();
+    gen->setPerformsCall();
 
     LAsmJSInterruptCheck *lir = new(alloc()) LAsmJSInterruptCheck(temp(),
                                                                   ins->interruptExit(),
                                                                   ins->funcDesc());
     return add(lir, ins);
 }
 
 bool
@@ -3551,17 +3551,17 @@ LIRGenerator::visitAsmJSPassStackArg(MAs
     }
 
     return add(new(alloc()) LAsmJSPassStackArg(useRegisterOrConstantAtStart(ins->arg())), ins);
 }
 
 bool
 LIRGenerator::visitAsmJSCall(MAsmJSCall *ins)
 {
-    gen->setPerformsAsmJSCall();
+    gen->setPerformsCall();
 
     LAllocation *args = gen->allocate<LAllocation>(ins->numOperands());
     if (!args)
         return false;
 
     for (unsigned i = 0; i < ins->numArgs(); i++)
         args[i] = useFixed(ins->getOperand(i), ins->registerForArg(i));
 
--- a/js/src/jit/MIRGenerator.h
+++ b/js/src/jit/MIRGenerator.h
@@ -122,28 +122,19 @@ class MIRGenerator
         maxAsmJSStackArgBytes_ = n;
     }
     void setPerformsCall() {
         performsCall_ = true;
     }
     bool performsCall() const {
         return performsCall_;
     }
-    void setNeedsInitialStackAlignment() {
-        needsInitialStackAlignment_ = true;
-    }
-    bool needsInitialStackAlignment() const {
-        JS_ASSERT(compilingAsmJS());
-        return needsInitialStackAlignment_;
-    }
-    void setPerformsAsmJSCall() {
-        JS_ASSERT(compilingAsmJS());
-        setPerformsCall();
-        setNeedsInitialStackAlignment();
-    }
+    // Traverses the graph to find if there's any SIMD instruction. Costful but
+    // the value is cached, so don't worry about calling it several times.
+    bool usesSimd();
     void noteMinAsmJSHeapLength(uint32_t len) {
         minAsmJSHeapLength_ = len;
     }
     uint32_t minAsmJSHeapLength() const {
         return minAsmJSHeapLength_;
     }
 
     bool modifiesFrameArguments() const {
@@ -162,17 +153,18 @@ class MIRGenerator
     MIRGraph *graph_;
     AbortReason abortReason_;
     bool error_;
     mozilla::Atomic<bool, mozilla::Relaxed> *pauseBuild_;
     mozilla::Atomic<bool, mozilla::Relaxed> cancelBuild_;
 
     uint32_t maxAsmJSStackArgBytes_;
     bool performsCall_;
-    bool needsInitialStackAlignment_;
+    bool usesSimd_;
+    bool usesSimdCached_;
     uint32_t minAsmJSHeapLength_;
 
     // Keep track of whether frame arguments are modified during execution.
     // RegAlloc needs to know this as spilling values back to their register
     // slots is not compatible with that.
     bool modifiesFrameArguments_;
 
 #if defined(JS_ION_PERF)
--- a/js/src/jit/MIRGraph.cpp
+++ b/js/src/jit/MIRGraph.cpp
@@ -25,23 +25,55 @@ MIRGenerator::MIRGenerator(CompileCompar
     alloc_(alloc),
     graph_(graph),
     abortReason_(AbortReason_NoAbort),
     error_(false),
     pauseBuild_(nullptr),
     cancelBuild_(false),
     maxAsmJSStackArgBytes_(0),
     performsCall_(false),
-    needsInitialStackAlignment_(false),
+    usesSimd_(false),
+    usesSimdCached_(false),
     minAsmJSHeapLength_(AsmJSAllocationGranularity),
     modifiesFrameArguments_(false),
     options(options)
 { }
 
 bool
+MIRGenerator::usesSimd()
+{
+    if (usesSimdCached_)
+        return usesSimd_;
+
+    usesSimdCached_ = true;
+    for (ReversePostorderIterator block = graph_->rpoBegin(),
+                                  end   = graph_->rpoEnd();
+         block != end;
+         block++)
+    {
+        // It's fine to use MInstructionIterator here because we don't have to
+        // worry about Phis, since any reachable phi (or phi cycle) will have at
+        // least one instruction as an input.
+        for (MInstructionIterator inst = block->begin(); inst != block->end(); inst++) {
+            // Instructions that have SIMD inputs but not a SIMD type are fine
+            // to ignore, as their inputs are also reached at some point. By
+            // induction, at least one instruction with a SIMD type is reached
+            // at some point.
+            if (IsSimdType(inst->type())) {
+                JS_ASSERT(SupportsSimd);
+                usesSimd_ = true;
+                return true;
+            }
+        }
+    }
+    usesSimd_ = false;
+    return false;
+}
+
+bool
 MIRGenerator::abortFmt(const char *message, va_list ap)
 {
     IonSpewVA(IonSpew_Abort, message, ap);
     error_ = true;
     return false;
 }
 
 bool
--- a/js/src/jit/arm/Assembler-arm.h
+++ b/js/src/jit/arm/Assembler-arm.h
@@ -145,16 +145,17 @@ static const uint32_t StackAlignment = 8
 static const uint32_t CodeAlignment = 8;
 static const bool StackKeptAligned = true;
 
 // This boolean indicates whether we support SIMD instructions flavoured for
 // this architecture or not. Rather than a method in the LIRGenerator, it is
 // here such that it is accessible from the entire codebase. Once full support
 // for SIMD is reached on all tier-1 platforms, this constant can be deleted.
 static const bool SupportsSimd = false;
+static const uint32_t SimdStackAlignment = 8;
 
 static const Scale ScalePointer = TimesFour;
 
 class Instruction;
 class InstBranchImm;
 uint32_t RM(Register r);
 uint32_t RS(Register r);
 uint32_t RD(Register r);
--- a/js/src/jit/shared/CodeGenerator-shared.cpp
+++ b/js/src/jit/shared/CodeGenerator-shared.cpp
@@ -63,30 +63,66 @@ CodeGeneratorShared::CodeGeneratorShared
     // argument stack depth separately.
     if (gen->compilingAsmJS()) {
         JS_ASSERT(graph->argumentSlotCount() == 0);
         frameDepth_ += gen->maxAsmJSStackArgBytes();
 
         // An MAsmJSCall does not align the stack pointer at calls sites but instead
         // relies on the a priori stack adjustment (in the prologue) on platforms
         // (like x64) which require the stack to be aligned.
-        if (StackKeptAligned || gen->needsInitialStackAlignment()) {
+        if (StackKeptAligned || gen->performsCall() || gen->usesSimd()) {
             unsigned alignmentAtCall = sizeof(AsmJSFrame) + frameDepth_;
+            unsigned firstFixup = 0;
             if (unsigned rem = alignmentAtCall % StackAlignment)
-                frameDepth_ += StackAlignment - rem;
+                frameDepth_ += (firstFixup = StackAlignment - rem);
+
+            if (gen->usesSimd())
+                setupSimdAlignment(firstFixup);
         }
 
         // FrameSizeClass is only used for bailing, which cannot happen in
         // asm.js code.
         frameClass_ = FrameSizeClass::None();
     } else {
         frameClass_ = FrameSizeClass::FromDepth(frameDepth_);
     }
 }
 
+void
+CodeGeneratorShared::setupSimdAlignment(unsigned fixup)
+{
+    JS_STATIC_ASSERT(SimdStackAlignment % StackAlignment == 0);
+    //  At this point, we have:
+    //      (frameDepth_ + sizeof(AsmJSFrame)) % StackAlignment == 0
+    //  which means we can add as many SimdStackAlignment as needed.
+
+    //  The next constraint is to have all stack slots
+    //  aligned for SIMD. That's done by having the first stack slot
+    //  aligned. We need an offset such that:
+    //      (frameDepth_ - offset) % SimdStackAlignment == 0
+    frameInitialAdjustment_ = frameDepth_ % SimdStackAlignment;
+
+    //  We need to ensure that the first stack slot is actually
+    //  located in this frame and not beforehand, when taking this
+    //  offset into account, i.e.:
+    //      frameDepth_ - initial adjustment >= frameDepth_ - fixup
+    //  <=>                            fixup >= initial adjustment
+    //
+    //  For instance, on x86 with gcc, if the initial frameDepth
+    //  % 16 is 8, then the fixup is 0, although the initial
+    //  adjustment is 8. The first stack slot would be located at
+    //  frameDepth - 8 in this case, which is obviously before
+    //  frameDepth.
+    //
+    //  If that's not the case, we add SimdStackAlignment to the
+    //  fixup, which will keep on satisfying other constraints.
+    if (frameInitialAdjustment_ > int32_t(fixup))
+        frameDepth_ += SimdStackAlignment;
+}
+
 bool
 CodeGeneratorShared::generateOutOfLineCode()
 {
     JSScript *topScript = sps_.getPushed();
     for (size_t i = 0; i < outOfLineCode_.length(); i++) {
         if (!gen->alloc().ensureBallast())
             return false;
 
--- a/js/src/jit/shared/CodeGenerator-shared.h
+++ b/js/src/jit/shared/CodeGenerator-shared.h
@@ -453,16 +453,18 @@ class CodeGeneratorShared : public LInst
 // This function is not used for MIPS. MIPS has branchToBlock.
 #ifndef JS_CODEGEN_MIPS
     void jumpToBlock(MBasicBlock *mir, Assembler::Condition cond);
 #endif
 
   private:
     void generateInvalidateEpilogue();
 
+    void setupSimdAlignment(unsigned fixup);
+
   public:
     CodeGeneratorShared(MIRGenerator *gen, LIRGraph *graph, MacroAssembler *masm);
 
   public:
     template <class ArgSeq, class StoreOutputTo>
     bool visitOutOfLineCallVM(OutOfLineCallVM<ArgSeq, StoreOutputTo> *ool);
 
     bool visitOutOfLineTruncateSlow(OutOfLineTruncateSlow *ool);
--- a/js/src/jit/x64/Assembler-x64.h
+++ b/js/src/jit/x64/Assembler-x64.h
@@ -189,16 +189,17 @@ static const uint32_t StackAlignment = 1
 static const bool StackKeptAligned = false;
 static const uint32_t CodeAlignment = 8;
 
 // This boolean indicates whether we support SIMD instructions flavoured for
 // this architecture or not. Rather than a method in the LIRGenerator, it is
 // here such that it is accessible from the entire codebase. Once full support
 // for SIMD is reached on all tier-1 platforms, this constant can be deleted.
 static const bool SupportsSimd = true;
+static const uint32_t SimdStackAlignment = 16;
 
 static const Scale ScalePointer = TimesEight;
 
 } // namespace jit
 } // namespace js
 
 #include "jit/shared/Assembler-x86-shared.h"
 
--- a/js/src/jit/x86/Assembler-x86.h
+++ b/js/src/jit/x86/Assembler-x86.h
@@ -117,16 +117,17 @@ static const uint32_t StackAlignment = 4
 static const bool StackKeptAligned = false;
 static const uint32_t CodeAlignment = 8;
 
 // This boolean indicates whether we support SIMD instructions flavoured for
 // this architecture or not. Rather than a method in the LIRGenerator, it is
 // here such that it is accessible from the entire codebase. Once full support
 // for SIMD is reached on all tier-1 platforms, this constant can be deleted.
 static const bool SupportsSimd = true;
+static const uint32_t SimdStackAlignment = 16;
 
 struct ImmTag : public Imm32
 {
     ImmTag(JSValueTag mask)
       : Imm32(int32_t(mask))
     { }
 };