Bug 1060789: SIMD x86-x64 backend: Add 'splat' backend support; r=sunfish
authorDouglas Crosher <dtc-moz@scieneer.com>
Mon, 01 Sep 2014 01:30:27 +1000
changeset 225787 a65fbd070a585bf58dcbdda15190ae477db15de9
parent 225786 6a78c4812f10cd1003d088b3a2d30b1840e02a2c
child 225788 e28ec487d050a1b3508d7944c96c08facdefe9a1
push id583
push userbhearsum@mozilla.com
push dateMon, 24 Nov 2014 19:04:58 +0000
treeherdermozilla-release@c107e74250f4 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerssunfish
bugs1060789
milestone34.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1060789: SIMD x86-x64 backend: Add 'splat' backend support; r=sunfish
js/src/jit/LIR-Common.h
js/src/jit/LOpcodes.h
js/src/jit/MIR.cpp
js/src/jit/MIR.h
js/src/jit/MOpcodes.h
js/src/jit/ParallelSafetyAnalysis.cpp
js/src/jit/arm/CodeGenerator-arm.h
js/src/jit/arm/Lowering-arm.cpp
js/src/jit/arm/Lowering-arm.h
js/src/jit/mips/Lowering-mips.cpp
js/src/jit/mips/Lowering-mips.h
js/src/jit/none/Lowering-none.h
js/src/jit/shared/CodeGenerator-x86-shared.cpp
js/src/jit/shared/CodeGenerator-x86-shared.h
js/src/jit/shared/Lowering-x86-shared.cpp
js/src/jit/shared/Lowering-x86-shared.h
--- a/js/src/jit/LIR-Common.h
+++ b/js/src/jit/LIR-Common.h
@@ -142,16 +142,31 @@ class LSimdValueX4 : public LInstruction
         setOperand(3, w);
     }
 
     MSimdValueX4 *mir() const {
         return mir_->toSimdValueX4();
     }
 };
 
+// Constructs a SIMD value with 4 equal components (e.g. int32x4, float32x4).
+class LSimdSplatX4 : public LInstructionHelper<1, 1, 0>
+{
+  public:
+    LIR_HEADER(SimdSplatX4)
+    explicit LSimdSplatX4(const LAllocation &v)
+    {
+        setOperand(0, v);
+    }
+
+    MSimdSplatX4 *mir() const {
+        return mir_->toSimdSplatX4();
+    }
+};
+
 // Extracts an element from a given SIMD int32x4 lane.
 class LSimdExtractElementI : public LInstructionHelper<1, 1, 0>
 {
     SimdLane lane_;
 
   public:
     LIR_HEADER(SimdExtractElementI);
 
--- a/js/src/jit/LOpcodes.h
+++ b/js/src/jit/LOpcodes.h
@@ -12,16 +12,17 @@
     _(Nop)                          \
     _(OsiPoint)                     \
     _(MoveGroup)                    \
     _(Integer)                      \
     _(Pointer)                      \
     _(Double)                       \
     _(Float32)                      \
     _(SimdValueX4)                  \
+    _(SimdSplatX4)                  \
     _(Int32x4)                      \
     _(Float32x4)                    \
     _(SimdExtractElementI)          \
     _(SimdExtractElementF)          \
     _(SimdSignMaskX4)               \
     _(SimdBinaryCompIx4)            \
     _(SimdBinaryCompFx4)            \
     _(SimdBinaryArithIx4)           \
--- a/js/src/jit/MIR.cpp
+++ b/js/src/jit/MIR.cpp
@@ -667,16 +667,49 @@ MSimdValueX4::foldsTo(TempAllocator &all
         break;
       }
       default: MOZ_ASSUME_UNREACHABLE("unexpected type in MSimdValueX4::foldsTo");
     }
 
     return MSimdConstant::New(alloc, cst, type());
 }
 
+MDefinition*
+MSimdSplatX4::foldsTo(TempAllocator &alloc)
+{
+    DebugOnly<MIRType> scalarType = SimdTypeToScalarType(type());
+    MDefinition *op = getOperand(0);
+    if (!op->isConstant())
+        return this;
+    JS_ASSERT(op->type() == scalarType);
+
+    SimdConstant cst;
+    switch (type()) {
+      case MIRType_Int32x4: {
+        int32_t a[4];
+        int32_t v = getOperand(0)->toConstant()->value().toInt32();
+        for (size_t i = 0; i < 4; ++i)
+            a[i] = v;
+        cst = SimdConstant::CreateX4(a);
+        break;
+      }
+      case MIRType_Float32x4: {
+        float a[4];
+        float v = getOperand(0)->toConstant()->value().toNumber();
+        for (size_t i = 0; i < 4; ++i)
+            a[i] = v;
+        cst = SimdConstant::CreateX4(a);
+        break;
+      }
+      default: MOZ_ASSUME_UNREACHABLE("unexpected type in MSimdSplatX4::foldsTo");
+    }
+
+    return MSimdConstant::New(alloc, cst, type());
+}
+
 MCloneLiteral *
 MCloneLiteral::New(TempAllocator &alloc, MDefinition *obj)
 {
     return new(alloc) MCloneLiteral(obj);
 }
 
 void
 MControlInstruction::printOpcode(FILE *fp) const
--- a/js/src/jit/MIR.h
+++ b/js/src/jit/MIR.h
@@ -1279,16 +1279,50 @@ class MSimdValueX4 : public MQuaternaryI
 
     bool congruentTo(const MDefinition *ins) const {
         return congruentIfOperandsEqual(ins);
     }
 
     MDefinition *foldsTo(TempAllocator &alloc);
 };
 
+// Generic constructor of SIMD valuesX4.
+class MSimdSplatX4 : public MUnaryInstruction
+{
+  protected:
+    MSimdSplatX4(MIRType type, MDefinition *v)
+      : MUnaryInstruction(v)
+    {
+        JS_ASSERT(IsSimdType(type));
+        mozilla::DebugOnly<MIRType> scalarType = SimdTypeToScalarType(type);
+        JS_ASSERT(scalarType == v->type());
+
+        setMovable();
+        setResultType(type);
+    }
+
+  public:
+    INSTRUCTION_HEADER(SimdSplatX4)
+
+    static MSimdSplatX4 *New(TempAllocator &alloc, MIRType type, MDefinition *v)
+    {
+        return new(alloc) MSimdSplatX4(type, v);
+    }
+
+    AliasSet getAliasSet() const {
+        return AliasSet::None();
+    }
+
+    bool congruentTo(const MDefinition *ins) const {
+        return congruentIfOperandsEqual(ins);
+    }
+
+    MDefinition *foldsTo(TempAllocator &alloc);
+};
+
 // A constant SIMD value.
 class MSimdConstant : public MNullaryInstruction
 {
     SimdConstant value_;
 
   protected:
     MSimdConstant(const SimdConstant &v, MIRType type) : value_(v) {
         JS_ASSERT(IsSimdType(type));
--- a/js/src/jit/MOpcodes.h
+++ b/js/src/jit/MOpcodes.h
@@ -8,16 +8,17 @@
 #define jit_MOpcodes_h
 
 namespace js {
 namespace jit {
 
 #define MIR_OPCODE_LIST(_)                                                  \
     _(Constant)                                                             \
     _(SimdValueX4)                                                          \
+    _(SimdSplatX4)                                                          \
     _(SimdConstant)                                                         \
     _(SimdExtractElement)                                                   \
     _(SimdSignMask)                                                         \
     _(SimdBinaryComp)                                                       \
     _(SimdBinaryArith)                                                      \
     _(SimdBinaryBitwise)                                                    \
     _(CloneLiteral)                                                         \
     _(Parameter)                                                            \
--- a/js/src/jit/ParallelSafetyAnalysis.cpp
+++ b/js/src/jit/ParallelSafetyAnalysis.cpp
@@ -108,16 +108,17 @@ class ParallelSafetyVisitor : public MDe
 
     bool convertToBailout(MInstructionIterator &iter);
 
     // I am taking the policy of blacklisting everything that's not
     // obviously safe for now.  We can loosen as we need.
 
     SAFE_OP(Constant)
     SAFE_OP(SimdValueX4)
+    SAFE_OP(SimdSplatX4)
     SAFE_OP(SimdConstant)
     SAFE_OP(SimdExtractElement)
     SAFE_OP(SimdSignMask)
     SAFE_OP(SimdBinaryComp)
     SAFE_OP(SimdBinaryArith)
     SAFE_OP(SimdBinaryBitwise)
     UNSAFE_OP(CloneLiteral)
     SAFE_OP(Parameter)
--- a/js/src/jit/arm/CodeGenerator-arm.h
+++ b/js/src/jit/arm/CodeGenerator-arm.h
@@ -225,16 +225,17 @@ class CodeGeneratorARM : public CodeGene
     bool visitEffectiveAddress(LEffectiveAddress *ins);
     bool visitUDiv(LUDiv *ins);
     bool visitUMod(LUMod *ins);
     bool visitSoftUDivOrMod(LSoftUDivOrMod *ins);
 
   public:
     // Unimplemented SIMD instructions
     bool visitSimdValueX4(LSimdValueX4 *lir) { MOZ_CRASH("NYI"); }
+    bool visitSimdSplatX4(LSimdSplatX4 *lir) { MOZ_CRASH("NYI"); }
     bool visitInt32x4(LInt32x4 *ins) { MOZ_CRASH("NYI"); }
     bool visitFloat32x4(LFloat32x4 *ins) { MOZ_CRASH("NYI"); }
     bool visitSimdExtractElementI(LSimdExtractElementI *ins) { MOZ_CRASH("NYI"); }
     bool visitSimdExtractElementF(LSimdExtractElementF *ins) { MOZ_CRASH("NYI"); }
     bool visitSimdSignMaskX4(LSimdSignMaskX4 *ins) { MOZ_CRASH("NYI"); }
     bool visitSimdBinaryCompIx4(LSimdBinaryCompIx4 *lir) { MOZ_CRASH("NYI"); }
     bool visitSimdBinaryCompFx4(LSimdBinaryCompFx4 *lir) { MOZ_CRASH("NYI"); }
     bool visitSimdBinaryArithIx4(LSimdBinaryArithIx4 *lir) { MOZ_CRASH("NYI"); }
--- a/js/src/jit/arm/Lowering-arm.cpp
+++ b/js/src/jit/arm/Lowering-arm.cpp
@@ -544,9 +544,15 @@ LIRGeneratorARM::visitStoreTypedArrayEle
 }
 
 bool
 LIRGeneratorARM::visitForkJoinGetSlice(MForkJoinGetSlice *ins)
 {
     MOZ_CRASH("NYI");
 }
 
+bool
+LIRGeneratorARM::visitSimdSplatX4(MSimdSplatX4 *ins)
+{
+    MOZ_CRASH("NYI");
+}
+
 //__aeabi_uidiv
--- a/js/src/jit/arm/Lowering-arm.h
+++ b/js/src/jit/arm/Lowering-arm.h
@@ -86,16 +86,17 @@ class LIRGeneratorARM : public LIRGenera
     bool visitGuardObjectType(MGuardObjectType *ins);
     bool visitAsmJSUnsignedToDouble(MAsmJSUnsignedToDouble *ins);
     bool visitAsmJSUnsignedToFloat32(MAsmJSUnsignedToFloat32 *ins);
     bool visitAsmJSLoadHeap(MAsmJSLoadHeap *ins);
     bool visitAsmJSStoreHeap(MAsmJSStoreHeap *ins);
     bool visitAsmJSLoadFuncPtr(MAsmJSLoadFuncPtr *ins);
     bool visitStoreTypedArrayElementStatic(MStoreTypedArrayElementStatic *ins);
     bool visitForkJoinGetSlice(MForkJoinGetSlice *ins);
+    bool visitSimdSplatX4(MSimdSplatX4 *ins);
 };
 
 typedef LIRGeneratorARM LIRGeneratorSpecific;
 
 } // namespace jit
 } // namespace js
 
 #endif /* jit_arm_Lowering_arm_h */
--- a/js/src/jit/mips/Lowering-mips.cpp
+++ b/js/src/jit/mips/Lowering-mips.cpp
@@ -525,8 +525,14 @@ LIRGeneratorMIPS::visitStoreTypedArrayEl
     MOZ_CRASH("NYI");
 }
 
 bool
 LIRGeneratorMIPS::visitForkJoinGetSlice(MForkJoinGetSlice *ins)
 {
     MOZ_CRASH("NYI");
 }
+
+bool
+LIRGeneratorMIPS::visitSimdSplatX4(MSimdSplatX4 *ins)
+{
+    MOZ_CRASH("NYI");
+}
--- a/js/src/jit/mips/Lowering-mips.h
+++ b/js/src/jit/mips/Lowering-mips.h
@@ -86,16 +86,17 @@ class LIRGeneratorMIPS : public LIRGener
     bool visitGuardObjectType(MGuardObjectType *ins);
     bool visitAsmJSUnsignedToDouble(MAsmJSUnsignedToDouble *ins);
     bool visitAsmJSUnsignedToFloat32(MAsmJSUnsignedToFloat32 *ins);
     bool visitAsmJSLoadHeap(MAsmJSLoadHeap *ins);
     bool visitAsmJSStoreHeap(MAsmJSStoreHeap *ins);
     bool visitAsmJSLoadFuncPtr(MAsmJSLoadFuncPtr *ins);
     bool visitStoreTypedArrayElementStatic(MStoreTypedArrayElementStatic *ins);
     bool visitForkJoinGetSlice(MForkJoinGetSlice *ins);
+    bool visitSimdSplatX4(MSimdSplatX4 *ins);
 };
 
 typedef LIRGeneratorMIPS LIRGeneratorSpecific;
 
 } // namespace jit
 } // namespace js
 
 #endif /* jit_mips_Lowering_mips_h */
--- a/js/src/jit/none/Lowering-none.h
+++ b/js/src/jit/none/Lowering-none.h
@@ -68,16 +68,17 @@ class LIRGeneratorNone : public LIRGener
     bool visitAsmJSLoadHeap(MAsmJSLoadHeap *ins) { MOZ_CRASH(); }
     bool visitAsmJSStoreHeap(MAsmJSStoreHeap *ins) { MOZ_CRASH(); }
     bool visitAsmJSLoadFuncPtr(MAsmJSLoadFuncPtr *ins) { MOZ_CRASH(); }
     bool visitStoreTypedArrayElementStatic(MStoreTypedArrayElementStatic *ins) { MOZ_CRASH(); }
     bool visitForkJoinGetSlice(MForkJoinGetSlice *ins) { MOZ_CRASH(); }
 
     LTableSwitch *newLTableSwitch(LAllocation, LDefinition, MTableSwitch *) { MOZ_CRASH(); }
     LTableSwitchV *newLTableSwitchV(MTableSwitch *) { MOZ_CRASH(); }
+    bool visitSimdSplatX4(MSimdSplatX4 *ins) { MOZ_CRASH(); }
 };
 
 typedef LIRGeneratorNone LIRGeneratorSpecific;
 
 } // namespace jit
 } // namespace js
 
 #endif /* jit_none_Lowering_none_h */
--- a/js/src/jit/shared/CodeGenerator-x86-shared.cpp
+++ b/js/src/jit/shared/CodeGenerator-x86-shared.cpp
@@ -2149,16 +2149,45 @@ CodeGeneratorX86Shared::visitSimdValueX4
       default: MOZ_CRASH("Unknown SIMD kind");
     }
 
     masm.freeStack(Simd128DataSize);
     return true;
 }
 
 bool
+CodeGeneratorX86Shared::visitSimdSplatX4(LSimdSplatX4 *ins)
+{
+    FloatRegister output = ToFloatRegister(ins->output());
+
+    MSimdSplatX4 *mir = ins->mir();
+    MOZ_ASSERT(IsSimdType(mir->type()));
+    JS_STATIC_ASSERT(sizeof(float) == sizeof(int32_t));
+
+    switch (mir->type()) {
+      case MIRType_Int32x4: {
+        Register r = ToRegister(ins->getOperand(0));
+        masm.movd(r, output);
+        masm.pshufd(0, output, output);
+        break;
+      }
+      case MIRType_Float32x4: {
+        FloatRegister r = ToFloatRegister(ins->getOperand(0));
+        MOZ_ASSERT(r == output);
+        masm.shufps(0, r, output);
+        break;
+      }
+      default:
+        MOZ_CRASH("Unknown SIMD kind");
+    }
+
+    return true;
+}
+
+bool
 CodeGeneratorX86Shared::visitSimdExtractElementI(LSimdExtractElementI *ins)
 {
     FloatRegister input = ToFloatRegister(ins->input());
     Register output = ToRegister(ins->output());
 
     SimdLane lane = ins->lane();
     if (lane == LaneX) {
         // The value we want to extract is in the low double-word
--- a/js/src/jit/shared/CodeGenerator-x86-shared.h
+++ b/js/src/jit/shared/CodeGenerator-x86-shared.h
@@ -202,16 +202,17 @@ class CodeGeneratorX86Shared : public Co
     bool visitForkJoinGetSlice(LForkJoinGetSlice *ins);
 
     bool visitNegI(LNegI *lir);
     bool visitNegD(LNegD *lir);
     bool visitNegF(LNegF *lir);
 
     // SIMD operators
     bool visitSimdValueX4(LSimdValueX4 *lir);
+    bool visitSimdSplatX4(LSimdSplatX4 *lir);
     bool visitInt32x4(LInt32x4 *ins);
     bool visitFloat32x4(LFloat32x4 *ins);
     bool visitSimdExtractElementI(LSimdExtractElementI *lir);
     bool visitSimdExtractElementF(LSimdExtractElementF *lir);
     bool visitSimdSignMaskX4(LSimdSignMaskX4 *ins);
     bool visitSimdBinaryCompIx4(LSimdBinaryCompIx4 *lir);
     bool visitSimdBinaryCompFx4(LSimdBinaryCompFx4 *lir);
     bool visitSimdBinaryArithIx4(LSimdBinaryArithIx4 *lir);
--- a/js/src/jit/shared/Lowering-x86-shared.cpp
+++ b/js/src/jit/shared/Lowering-x86-shared.cpp
@@ -301,8 +301,24 @@ LIRGeneratorX86Shared::visitForkJoinGetS
     LForkJoinGetSlice *lir = new(alloc())
         LForkJoinGetSlice(useFixed(ins->forkJoinContext(), ForkJoinGetSliceReg_cx),
                           tempFixed(eax),
                           tempFixed(edx),
                           tempFixed(ForkJoinGetSliceReg_temp0),
                           tempFixed(ForkJoinGetSliceReg_temp1));
     return defineFixed(lir, ins, LAllocation(AnyRegister(ForkJoinGetSliceReg_output)));
 }
+
+bool
+LIRGeneratorX86Shared::visitSimdSplatX4(MSimdSplatX4 *ins)
+{
+    LAllocation x = useRegisterAtStart(ins->getOperand(0));
+    LSimdSplatX4 *lir = new(alloc()) LSimdSplatX4(x);
+
+    switch (ins->type()) {
+      case MIRType_Int32x4:
+        return define(lir, ins);
+      case MIRType_Float32x4:
+        return defineReuseInput(lir, ins, 0);
+      default:
+        MOZ_CRASH("Unknown SIMD kind");
+    }
+}
--- a/js/src/jit/shared/Lowering-x86-shared.h
+++ b/js/src/jit/shared/Lowering-x86-shared.h
@@ -43,14 +43,15 @@ class LIRGeneratorX86Shared : public LIR
     bool lowerUDiv(MDiv *div);
     bool lowerUMod(MMod *mod);
     bool lowerUrshD(MUrsh *mir);
     bool lowerConstantDouble(double d, MInstruction *ins);
     bool lowerConstantFloat32(float d, MInstruction *ins);
     bool lowerTruncateDToInt32(MTruncateToInt32 *ins);
     bool lowerTruncateFToInt32(MTruncateToInt32 *ins);
     bool visitForkJoinGetSlice(MForkJoinGetSlice *ins);
+    bool visitSimdSplatX4(MSimdSplatX4 *ins);
 };
 
 } // namespace jit
 } // namespace js
 
 #endif /* jit_shared_Lowering_x86_shared_h */