Bug 1051860: Optimize SimdValueX4 codegen for float32x4 with unpcklps; r=sunfish
authorBenjamin Bouvier <benj@benj.me>
Thu, 11 Sep 2014 08:50:10 +0200
changeset 229331 179193fbcccdfe7ea0326038527956fd11543794
parent 229330 b78bb4028f3a3911259e561ff3c213b330e9cbf1
child 229332 6cc38353cfa4e1569dfe1fa62f476015becfe465
push id611
push userraliiev@mozilla.com
push dateMon, 05 Jan 2015 23:23:16 +0000
treeherdermozilla-release@345cd3b9c445 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerssunfish
bugs1051860
milestone35.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1051860: Optimize SimdValueX4 codegen for float32x4 with unpcklps; r=sunfish
js/src/jit/LIR-Common.h
js/src/jit/LOpcodes.h
js/src/jit/Lowering.cpp
js/src/jit/Lowering.h
js/src/jit/arm/CodeGenerator-arm.h
js/src/jit/arm/Lowering-arm.cpp
js/src/jit/arm/Lowering-arm.h
js/src/jit/shared/CodeGenerator-x86-shared.cpp
js/src/jit/shared/CodeGenerator-x86-shared.h
js/src/jit/shared/LIR-x86-shared.h
js/src/jit/shared/Lowering-x86-shared.cpp
js/src/jit/shared/Lowering-x86-shared.h
js/src/jit/x64/LOpcodes-x64.h
js/src/jit/x86/LOpcodes-x86.h
--- a/js/src/jit/LIR-Common.h
+++ b/js/src/jit/LIR-Common.h
@@ -123,35 +123,16 @@ class LMoveGroup : public LInstructionHe
     size_t numMoves() const {
         return moves_.length();
     }
     const LMove &getMove(size_t i) const {
         return moves_[i];
     }
 };
 
-// Constructs a SIMD value with 4 components (e.g. int32x4, float32x4).
-class LSimdValueX4 : public LInstructionHelper<1, 4, 0>
-{
-  public:
-    LIR_HEADER(SimdValueX4)
-    LSimdValueX4(const LAllocation &x, const LAllocation &y,
-                 const LAllocation &z, const LAllocation &w)
-    {
-        setOperand(0, x);
-        setOperand(1, y);
-        setOperand(2, z);
-        setOperand(3, w);
-    }
-
-    MSimdValueX4 *mir() const {
-        return mir_->toSimdValueX4();
-    }
-};
-
 // Constructs a SIMD value with 4 equal components (e.g. int32x4, float32x4).
 class LSimdSplatX4 : public LInstructionHelper<1, 1, 0>
 {
   public:
     LIR_HEADER(SimdSplatX4)
     explicit LSimdSplatX4(const LAllocation &v)
     {
         setOperand(0, v);
--- a/js/src/jit/LOpcodes.h
+++ b/js/src/jit/LOpcodes.h
@@ -11,17 +11,16 @@
     _(Label)                        \
     _(Nop)                          \
     _(OsiPoint)                     \
     _(MoveGroup)                    \
     _(Integer)                      \
     _(Pointer)                      \
     _(Double)                       \
     _(Float32)                      \
-    _(SimdValueX4)                  \
     _(SimdSplatX4)                  \
     _(Int32x4)                      \
     _(Float32x4)                    \
     _(SimdExtractElementI)          \
     _(SimdExtractElementF)          \
     _(SimdSignMaskX4)               \
     _(SimdBinaryCompIx4)            \
     _(SimdBinaryCompFx4)            \
--- a/js/src/jit/Lowering.cpp
+++ b/js/src/jit/Lowering.cpp
@@ -3682,27 +3682,16 @@ LIRGenerator::visitRecompileCheck(MRecom
 {
     LRecompileCheck *lir = new(alloc()) LRecompileCheck(temp());
     if (!add(lir, ins))
         return false;
     return assignSafepoint(lir, ins);
 }
 
 bool
-LIRGenerator::visitSimdValueX4(MSimdValueX4 *ins)
-{
-    LAllocation x = useRegisterAtStart(ins->getOperand(0));
-    LAllocation y = useRegisterAtStart(ins->getOperand(1));
-    LAllocation z = useRegisterAtStart(ins->getOperand(2));
-    LAllocation w = useRegisterAtStart(ins->getOperand(3));
-
-    return define(new(alloc()) LSimdValueX4(x, y, z, w), ins);
-}
-
-bool
 LIRGenerator::visitSimdConstant(MSimdConstant *ins)
 {
     JS_ASSERT(IsSimdType(ins->type()));
 
     if (ins->type() == MIRType_Int32x4)
         return define(new(alloc()) LInt32x4(), ins);
     if (ins->type() == MIRType_Float32x4)
         return define(new(alloc()) LFloat32x4(), ins);
--- a/js/src/jit/Lowering.h
+++ b/js/src/jit/Lowering.h
@@ -268,17 +268,16 @@ class LIRGenerator : public LIRGenerator
     bool visitGetDOMProperty(MGetDOMProperty *ins);
     bool visitGetDOMMember(MGetDOMMember *ins);
     bool visitRecompileCheck(MRecompileCheck *ins);
     bool visitSimdExtractElement(MSimdExtractElement *ins);
     bool visitSimdSignMask(MSimdSignMask *ins);
     bool visitSimdBinaryComp(MSimdBinaryComp *ins);
     bool visitSimdBinaryArith(MSimdBinaryArith *ins);
     bool visitSimdBinaryBitwise(MSimdBinaryBitwise *ins);
-    bool visitSimdValueX4(MSimdValueX4 *ins);
     bool visitSimdConstant(MSimdConstant *ins);
     bool visitPhi(MPhi *ins);
     bool visitBeta(MBeta *ins);
     bool visitObjectState(MObjectState *ins);
     bool visitArrayState(MArrayState *ins);
 };
 
 } // namespace jit
--- a/js/src/jit/arm/CodeGenerator-arm.h
+++ b/js/src/jit/arm/CodeGenerator-arm.h
@@ -224,17 +224,16 @@ class CodeGeneratorARM : public CodeGene
 
     bool visitEffectiveAddress(LEffectiveAddress *ins);
     bool visitUDiv(LUDiv *ins);
     bool visitUMod(LUMod *ins);
     bool visitSoftUDivOrMod(LSoftUDivOrMod *ins);
 
   public:
     // Unimplemented SIMD instructions
-    bool visitSimdValueX4(LSimdValueX4 *lir) { MOZ_CRASH("NYI"); }
     bool visitSimdSplatX4(LSimdSplatX4 *lir) { MOZ_CRASH("NYI"); }
     bool visitInt32x4(LInt32x4 *ins) { MOZ_CRASH("NYI"); }
     bool visitFloat32x4(LFloat32x4 *ins) { MOZ_CRASH("NYI"); }
     bool visitSimdExtractElementI(LSimdExtractElementI *ins) { MOZ_CRASH("NYI"); }
     bool visitSimdExtractElementF(LSimdExtractElementF *ins) { MOZ_CRASH("NYI"); }
     bool visitSimdSignMaskX4(LSimdSignMaskX4 *ins) { MOZ_CRASH("NYI"); }
     bool visitSimdBinaryCompIx4(LSimdBinaryCompIx4 *lir) { MOZ_CRASH("NYI"); }
     bool visitSimdBinaryCompFx4(LSimdBinaryCompFx4 *lir) { MOZ_CRASH("NYI"); }
--- a/js/src/jit/arm/Lowering-arm.cpp
+++ b/js/src/jit/arm/Lowering-arm.cpp
@@ -556,9 +556,15 @@ LIRGeneratorARM::visitSimdTernaryBitwise
 }
 
 bool
 LIRGeneratorARM::visitSimdSplatX4(MSimdSplatX4 *ins)
 {
     MOZ_CRASH("NYI");
 }
 
+bool
+LIRGeneratorARM::visitSimdValueX4(MSimdValueX4 *ins)
+{
+    MOZ_CRASH("NYI");
+}
+
 //__aeabi_uidiv
--- a/js/src/jit/arm/Lowering-arm.h
+++ b/js/src/jit/arm/Lowering-arm.h
@@ -88,16 +88,17 @@ class LIRGeneratorARM : public LIRGenera
     bool visitAsmJSUnsignedToFloat32(MAsmJSUnsignedToFloat32 *ins);
     bool visitAsmJSLoadHeap(MAsmJSLoadHeap *ins);
     bool visitAsmJSStoreHeap(MAsmJSStoreHeap *ins);
     bool visitAsmJSLoadFuncPtr(MAsmJSLoadFuncPtr *ins);
     bool visitStoreTypedArrayElementStatic(MStoreTypedArrayElementStatic *ins);
     bool visitForkJoinGetSlice(MForkJoinGetSlice *ins);
     bool visitSimdTernaryBitwise(MSimdTernaryBitwise *ins);
     bool visitSimdSplatX4(MSimdSplatX4 *ins);
+    bool visitSimdValueX4(MSimdValueX4 *ins);
 };
 
 typedef LIRGeneratorARM LIRGeneratorSpecific;
 
 } // namespace jit
 } // namespace js
 
 #endif /* jit_arm_Lowering_arm_h */
--- a/js/src/jit/shared/CodeGenerator-x86-shared.cpp
+++ b/js/src/jit/shared/CodeGenerator-x86-shared.cpp
@@ -2114,51 +2114,53 @@ bool
 CodeGeneratorX86Shared::visitFloat32x4(LFloat32x4 *ins)
 {
     const LDefinition *out = ins->getDef(0);
     masm.loadConstantFloat32x4(ins->getValue(), ToFloatRegister(out));
     return true;
 }
 
 bool
-CodeGeneratorX86Shared::visitSimdValueX4(LSimdValueX4 *ins)
+CodeGeneratorX86Shared::visitSimdValueInt32x4(LSimdValueInt32x4 *ins)
 {
-    FloatRegister output = ToFloatRegister(ins->output());
-
     MSimdValueX4 *mir = ins->mir();
-    JS_ASSERT(IsSimdType(mir->type()));
-    JS_STATIC_ASSERT(sizeof(float) == sizeof(int32_t));
-
-    masm.reserveStack(Simd128DataSize);
+    MOZ_ASSERT(mir->type() == MIRType_Int32x4);
+
     // TODO see bug 1051860 for possible optimizations.
-    switch (mir->type()) {
-      case MIRType_Int32x4: {
-        for (size_t i = 0; i < 4; ++i) {
-            Register r = ToRegister(ins->getOperand(i));
-            masm.store32(r, Address(StackPointer, i * sizeof(int32_t)));
-        }
-        masm.loadAlignedInt32x4(Address(StackPointer, 0), output);
-        break;
-      }
-      case MIRType_Float32x4: {
-        for (size_t i = 0; i < 4; ++i) {
-            FloatRegister r = ToFloatRegister(ins->getOperand(i));
-            masm.storeFloat32(r, Address(StackPointer, i * sizeof(float)));
-        }
-        masm.loadAlignedFloat32x4(Address(StackPointer, 0), output);
-        break;
-      }
-      default: MOZ_CRASH("Unknown SIMD kind");
+    masm.reserveStack(Simd128DataSize);
+    for (size_t i = 0; i < 4; ++i) {
+        Register r = ToRegister(ins->getOperand(i));
+        masm.store32(r, Address(StackPointer, i * sizeof(int32_t)));
     }
-
+    masm.loadAlignedInt32x4(Address(StackPointer, 0), ToFloatRegister(ins->output()));
     masm.freeStack(Simd128DataSize);
     return true;
 }
 
 bool
+CodeGeneratorX86Shared::visitSimdValueFloat32x4(LSimdValueFloat32x4 *ins)
+{
+    MSimdValueX4 *mir = ins->mir();
+    MOZ_ASSERT(mir->type() == MIRType_Float32x4);
+
+    FloatRegister output = ToFloatRegister(ins->output());
+    FloatRegister r0 = ToFloatRegister(ins->getOperand(0));
+    MOZ_ASSERT(r0 == output); // defineReuseInput(0)
+
+    FloatRegister r1 = ToFloatRegister(ins->getTemp(0));
+    FloatRegister r2 = ToFloatRegister(ins->getOperand(2));
+    FloatRegister r3 = ToFloatRegister(ins->getOperand(3));
+
+    masm.unpcklps(r3, r1);
+    masm.unpcklps(r2, r0);
+    masm.unpcklps(r1, r0);
+    return true;
+}
+
+bool
 CodeGeneratorX86Shared::visitSimdSplatX4(LSimdSplatX4 *ins)
 {
     FloatRegister output = ToFloatRegister(ins->output());
 
     MSimdSplatX4 *mir = ins->mir();
     MOZ_ASSERT(IsSimdType(mir->type()));
     JS_STATIC_ASSERT(sizeof(float) == sizeof(int32_t));
 
--- a/js/src/jit/shared/CodeGenerator-x86-shared.h
+++ b/js/src/jit/shared/CodeGenerator-x86-shared.h
@@ -201,17 +201,18 @@ class CodeGeneratorX86Shared : public Co
 
     bool visitForkJoinGetSlice(LForkJoinGetSlice *ins);
 
     bool visitNegI(LNegI *lir);
     bool visitNegD(LNegD *lir);
     bool visitNegF(LNegF *lir);
 
     // SIMD operators
-    bool visitSimdValueX4(LSimdValueX4 *lir);
+    bool visitSimdValueInt32x4(LSimdValueInt32x4 *lir);
+    bool visitSimdValueFloat32x4(LSimdValueFloat32x4 *lir);
     bool visitSimdSplatX4(LSimdSplatX4 *lir);
     bool visitInt32x4(LInt32x4 *ins);
     bool visitFloat32x4(LFloat32x4 *ins);
     bool visitSimdExtractElementI(LSimdExtractElementI *lir);
     bool visitSimdExtractElementF(LSimdExtractElementF *lir);
     bool visitSimdSignMaskX4(LSimdSignMaskX4 *ins);
     bool visitSimdBinaryCompIx4(LSimdBinaryCompIx4 *lir);
     bool visitSimdBinaryCompFx4(LSimdBinaryCompFx4 *lir);
--- a/js/src/jit/shared/LIR-x86-shared.h
+++ b/js/src/jit/shared/LIR-x86-shared.h
@@ -315,12 +315,53 @@ class LMulI : public LBinaryMath<0, 1>
     MMul *mir() const {
         return mir_->toMul();
     }
     const LAllocation *lhsCopy() {
         return this->getOperand(2);
     }
 };
 
+// Constructs an int32x4 SIMD value.
+class LSimdValueInt32x4 : public LInstructionHelper<1, 4, 0>
+{
+  public:
+    LIR_HEADER(SimdValueInt32x4)
+    LSimdValueInt32x4(const LAllocation &x, const LAllocation &y,
+                      const LAllocation &z, const LAllocation &w)
+    {
+        setOperand(0, x);
+        setOperand(1, y);
+        setOperand(2, z);
+        setOperand(3, w);
+    }
+
+    MSimdValueX4 *mir() const {
+        return mir_->toSimdValueX4();
+    }
+};
+
+// Constructs a float32x4 SIMD value, optimized for x86 family
+class LSimdValueFloat32x4 : public LInstructionHelper<1, 4, 1>
+{
+  public:
+    LIR_HEADER(SimdValueFloat32x4)
+    LSimdValueFloat32x4(const LAllocation &x, const LAllocation &y,
+                        const LAllocation &z, const LAllocation &w,
+                        const LDefinition &copyY)
+    {
+        setOperand(0, x);
+        setOperand(1, y);
+        setOperand(2, z);
+        setOperand(3, w);
+
+        setTemp(0, copyY);
+    }
+
+    MSimdValueX4 *mir() const {
+        return mir_->toSimdValueX4();
+    }
+};
+
 } // namespace jit
 } // namespace js
 
 #endif /* jit_shared_LIR_x86_shared_h */
--- a/js/src/jit/shared/Lowering-x86-shared.cpp
+++ b/js/src/jit/shared/Lowering-x86-shared.cpp
@@ -342,8 +342,26 @@ LIRGeneratorX86Shared::visitSimdSplatX4(
       case MIRType_Int32x4:
         return define(lir, ins);
       case MIRType_Float32x4:
         return defineReuseInput(lir, ins, 0);
       default:
         MOZ_CRASH("Unknown SIMD kind");
     }
 }
+
+
+bool
+LIRGeneratorX86Shared::visitSimdValueX4(MSimdValueX4 *ins)
+{
+    LAllocation x = useRegisterAtStart(ins->getOperand(0));
+    LAllocation y = useRegisterAtStart(ins->getOperand(1));
+    LAllocation z = useRegisterAtStart(ins->getOperand(2));
+    LAllocation w = useRegisterAtStart(ins->getOperand(3));
+
+    LDefinition copyY = tempCopy(ins->getOperand(1), 1);
+
+    if (ins->type() == MIRType_Float32x4)
+        return defineReuseInput(new (alloc()) LSimdValueFloat32x4(x, y, z, w, copyY), ins, 0);
+
+    MOZ_ASSERT(ins->type() == MIRType_Int32x4);
+    return define(new(alloc()) LSimdValueInt32x4(x, y, z, w), ins);
+}
--- a/js/src/jit/shared/Lowering-x86-shared.h
+++ b/js/src/jit/shared/Lowering-x86-shared.h
@@ -45,14 +45,15 @@ class LIRGeneratorX86Shared : public LIR
     bool lowerUrshD(MUrsh *mir);
     bool lowerConstantDouble(double d, MInstruction *ins);
     bool lowerConstantFloat32(float d, MInstruction *ins);
     bool lowerTruncateDToInt32(MTruncateToInt32 *ins);
     bool lowerTruncateFToInt32(MTruncateToInt32 *ins);
     bool visitForkJoinGetSlice(MForkJoinGetSlice *ins);
     bool visitSimdTernaryBitwise(MSimdTernaryBitwise *ins);
     bool visitSimdSplatX4(MSimdSplatX4 *ins);
+    bool visitSimdValueX4(MSimdValueX4 *ins);
 };
 
 } // namespace jit
 } // namespace js
 
 #endif /* jit_shared_Lowering_x86_shared_h */
--- a/js/src/jit/x64/LOpcodes-x64.h
+++ b/js/src/jit/x64/LOpcodes-x64.h
@@ -15,11 +15,13 @@
     _(DivPowTwoI)                   \
     _(DivOrModConstantI)            \
     _(ModI)                         \
     _(ModPowTwoI)                   \
     _(PowHalfD)                     \
     _(AsmJSUInt32ToDouble)          \
     _(AsmJSUInt32ToFloat32)         \
     _(AsmJSLoadFuncPtr)             \
+    _(SimdValueInt32x4)             \
+    _(SimdValueFloat32x4)           \
     _(UDivOrMod)
 
 #endif /* jit_x64_LOpcodes_x64_h */
--- a/js/src/jit/x86/LOpcodes-x86.h
+++ b/js/src/jit/x86/LOpcodes-x86.h
@@ -16,11 +16,13 @@
     _(DivPowTwoI)               \
     _(DivOrModConstantI)        \
     _(ModI)                     \
     _(ModPowTwoI)               \
     _(PowHalfD)                 \
     _(AsmJSUInt32ToDouble)      \
     _(AsmJSUInt32ToFloat32)     \
     _(AsmJSLoadFuncPtr)         \
+    _(SimdValueInt32x4)         \
+    _(SimdValueFloat32x4)       \
     _(UDivOrMod)
 
 #endif /* jit_x86_LOpcodes_x86_h */