Bug 1073064: SIMD x86-x64 backend: Implement MSimdBinaryLogical; r=sunfish
authorBenjamin Bouvier <benj@benj.me>
Wed, 01 Oct 2014 14:57:15 +0200
changeset 208195 e6e7586b3e02cdd59e978f306b8f1c2d1ef71c4c
parent 208194 89716e3f169df2469a1f82da92ce95163a8ff8bc
child 208196 4baa041973f8860d94d8f04e16b7c839403ea362
push id27580
push userkwierso@gmail.com
push dateWed, 01 Oct 2014 23:26:55 +0000
treeherderautoland@af6c928893c0 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerssunfish
bugs1073064
milestone35.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1073064: SIMD x86-x64 backend: Implement MSimdBinaryLogical; r=sunfish
js/src/jit/LIR-Common.h
js/src/jit/LOpcodes.h
js/src/jit/Lowering.cpp
js/src/jit/Lowering.h
js/src/jit/MIR.h
js/src/jit/MOpcodes.h
js/src/jit/ParallelSafetyAnalysis.cpp
js/src/jit/shared/Assembler-x86-shared.h
js/src/jit/shared/BaseAssembler-x86-shared.h
js/src/jit/shared/CodeGenerator-x86-shared.cpp
js/src/jit/shared/CodeGenerator-x86-shared.h
js/src/jit/shared/MacroAssembler-x86-shared.h
--- a/js/src/jit/LIR-Common.h
+++ b/js/src/jit/LIR-Common.h
@@ -321,16 +321,38 @@ class LSimdBinaryBitwiseX4 : public LIns
     const LAllocation *rhs() {
         return getOperand(1);
     }
     MSimdBinaryBitwise::Operation operation() const {
         return mir_->toSimdBinaryBitwise()->operation();
     }
 };
 
+class LSimdShift : public LInstructionHelper<1, 2, 0>
+{
+  public:
+    LIR_HEADER(SimdShift)
+    LSimdShift(const LAllocation &vec, const LAllocation &val) {
+        setOperand(0, vec);
+        setOperand(1, val);
+    }
+    const LAllocation *vector() {
+        return getOperand(0);
+    }
+    const LAllocation *value() {
+        return getOperand(1);
+    }
+    MSimdShift::Operation operation() const {
+        return mir_->toSimdShift()->operation();
+    }
+    MSimdShift *mir() const {
+        return mir_->toSimdShift();
+    }
+};
+
 // SIMD selection of lanes from two int32x4 or float32x4 arguments based on a
 // int32x4 argument.
 class LSimdSelect : public LInstructionHelper<1, 3, 0>
 {
   public:
     LIR_HEADER(SimdSelect);
     const LAllocation *mask() {
         return getOperand(0);
--- a/js/src/jit/LOpcodes.h
+++ b/js/src/jit/LOpcodes.h
@@ -24,16 +24,17 @@
     _(SimdInsertElementI)           \
     _(SimdInsertElementF)           \
     _(SimdSignMaskX4)               \
     _(SimdBinaryCompIx4)            \
     _(SimdBinaryCompFx4)            \
     _(SimdBinaryArithIx4)           \
     _(SimdBinaryArithFx4)           \
     _(SimdBinaryBitwiseX4)          \
+    _(SimdShift)                    \
     _(SimdSelect)                   \
     _(Value)                        \
     _(CloneLiteral)                 \
     _(Parameter)                    \
     _(Callee)                       \
     _(IsConstructing)               \
     _(TableSwitch)                  \
     _(TableSwitchV)                 \
--- a/js/src/jit/Lowering.cpp
+++ b/js/src/jit/Lowering.cpp
@@ -3839,25 +3839,36 @@ LIRGenerator::visitSimdBinaryArith(MSimd
 }
 
 bool
 LIRGenerator::visitSimdBinaryBitwise(MSimdBinaryBitwise *ins)
 {
     MOZ_ASSERT(IsSimdType(ins->type()));
 
     if (ins->type() == MIRType_Int32x4 || ins->type() == MIRType_Float32x4) {
-        LSimdBinaryBitwiseX4 *add = new(alloc()) LSimdBinaryBitwiseX4;
-        return lowerForFPU(add, ins, ins->lhs(), ins->rhs());
+        LSimdBinaryBitwiseX4 *lir = new(alloc()) LSimdBinaryBitwiseX4;
+        return lowerForFPU(lir, ins, ins->lhs(), ins->rhs());
     }
 
     MOZ_CRASH("Unknown SIMD kind when doing bitwise operations");
     return false;
 }
 
 bool
+LIRGenerator::visitSimdShift(MSimdShift *ins)
+{
+    MOZ_ASSERT(ins->type() == MIRType_Int32x4);
+
+    LUse vector = useRegisterAtStart(ins->lhs());
+    LAllocation value = useRegisterOrConstant(ins->rhs());
+    LSimdShift *lir = new(alloc()) LSimdShift(vector, value);
+    return defineReuseInput(lir, ins, 0);
+}
+
+bool
 LIRGenerator::visitLexicalCheck(MLexicalCheck *ins)
 {
     MDefinition *input = ins->input();
     MOZ_ASSERT(input->type() == MIRType_Value);
     LLexicalCheck *lir = new(alloc()) LLexicalCheck();
     return redefine(ins, input) && useBox(lir, LLexicalCheck::Input, input) &&
            add(lir, ins) && assignSafepoint(lir, ins);
 }
--- a/js/src/jit/Lowering.h
+++ b/js/src/jit/Lowering.h
@@ -269,16 +269,17 @@ class LIRGenerator : public LIRGenerator
     bool visitGetDOMMember(MGetDOMMember *ins);
     bool visitRecompileCheck(MRecompileCheck *ins);
     bool visitSimdExtractElement(MSimdExtractElement *ins);
     bool visitSimdInsertElement(MSimdInsertElement *ins);
     bool visitSimdSignMask(MSimdSignMask *ins);
     bool visitSimdBinaryComp(MSimdBinaryComp *ins);
     bool visitSimdBinaryArith(MSimdBinaryArith *ins);
     bool visitSimdBinaryBitwise(MSimdBinaryBitwise *ins);
+    bool visitSimdShift(MSimdShift *ins);
     bool visitSimdConstant(MSimdConstant *ins);
     bool visitSimdConvert(MSimdConvert *ins);
     bool visitSimdReinterpretCast(MSimdReinterpretCast *ins);
     bool visitPhi(MPhi *ins);
     bool visitBeta(MBeta *ins);
     bool visitObjectState(MObjectState *ins);
     bool visitArrayState(MArrayState *ins);
     bool visitUnknownValue(MUnknownValue *ins);
--- a/js/src/jit/MIR.h
+++ b/js/src/jit/MIR.h
@@ -1716,16 +1716,59 @@ class MSimdBinaryBitwise : public MBinar
         if (!binaryCongruentTo(ins))
             return false;
         return operation_ == ins->toSimdBinaryBitwise()->operation();
     }
 
     ALLOW_CLONE(MSimdBinaryBitwise)
 };
 
+class MSimdShift : public MBinaryInstruction
+{
+  public:
+    enum Operation {
+        lsh,
+        rsh,
+        ursh
+    };
+
+  private:
+    Operation operation_;
+
+    MSimdShift(MDefinition *left, MDefinition *right, Operation op)
+      : MBinaryInstruction(left, right), operation_(op)
+    {
+        MOZ_ASSERT(left->type() == MIRType_Int32x4 && right->type() == MIRType_Int32);
+        setResultType(MIRType_Int32x4);
+        setMovable();
+    }
+
+  public:
+    INSTRUCTION_HEADER(SimdShift);
+    static MSimdShift *NewAsmJS(TempAllocator &alloc, MDefinition *left,
+                                MDefinition *right, Operation op)
+    {
+        return new(alloc) MSimdShift(left, right, op);
+    }
+
+    AliasSet getAliasSet() const {
+        return AliasSet::None();
+    }
+
+    Operation operation() const { return operation_; }
+
+    bool congruentTo(const MDefinition *ins) const {
+        if (!binaryCongruentTo(ins))
+            return false;
+        return operation_ == ins->toSimdShift()->operation();
+    }
+
+    ALLOW_CLONE(MSimdShift)
+};
+
 class MSimdTernaryBitwise : public MTernaryInstruction
 {
   public:
     enum Operation {
         select
     };
 
   private:
--- a/js/src/jit/MOpcodes.h
+++ b/js/src/jit/MOpcodes.h
@@ -18,16 +18,17 @@ namespace jit {
     _(SimdConvert)                                                          \
     _(SimdReinterpretCast)                                                  \
     _(SimdExtractElement)                                                   \
     _(SimdInsertElement)                                                    \
     _(SimdSignMask)                                                         \
     _(SimdBinaryComp)                                                       \
     _(SimdBinaryArith)                                                      \
     _(SimdBinaryBitwise)                                                    \
+    _(SimdShift)                                                            \
     _(SimdTernaryBitwise)                                                   \
     _(CloneLiteral)                                                         \
     _(Parameter)                                                            \
     _(Callee)                                                               \
     _(IsConstructing)                                                       \
     _(TableSwitch)                                                          \
     _(Goto)                                                                 \
     _(Test)                                                                 \
--- a/js/src/jit/ParallelSafetyAnalysis.cpp
+++ b/js/src/jit/ParallelSafetyAnalysis.cpp
@@ -117,16 +117,17 @@ class ParallelSafetyVisitor : public MDe
     SAFE_OP(SimdConvert)
     SAFE_OP(SimdReinterpretCast)
     SAFE_OP(SimdExtractElement)
     SAFE_OP(SimdInsertElement)
     SAFE_OP(SimdSignMask)
     SAFE_OP(SimdBinaryComp)
     SAFE_OP(SimdBinaryArith)
     SAFE_OP(SimdBinaryBitwise)
+    SAFE_OP(SimdShift)
     SAFE_OP(SimdTernaryBitwise)
     UNSAFE_OP(CloneLiteral)
     SAFE_OP(Parameter)
     SAFE_OP(Callee)
     SAFE_OP(IsConstructing)
     SAFE_OP(TableSwitch)
     SAFE_OP(Goto)
     SAFE_OP(Test)
--- a/js/src/jit/shared/Assembler-x86-shared.h
+++ b/js/src/jit/shared/Assembler-x86-shared.h
@@ -1442,16 +1442,40 @@ class AssemblerX86Shared : public Assemb
     void psllq(Imm32 shift, FloatRegister dest) {
         JS_ASSERT(HasSSE2());
         masm.psllq_ir(shift.value, dest.code());
     }
     void psrlq(Imm32 shift, FloatRegister dest) {
         JS_ASSERT(HasSSE2());
         masm.psrlq_ir(shift.value, dest.code());
     }
+    void pslld(FloatRegister src, FloatRegister dest) {
+        MOZ_ASSERT(HasSSE2());
+        masm.pslld_rr(src.code(), dest.code());
+    }
+    void pslld(Imm32 count, FloatRegister dest) {
+        MOZ_ASSERT(HasSSE2());
+        masm.pslld_ir(count.value, dest.code());
+    }
+    void psrad(FloatRegister src, FloatRegister dest) {
+        MOZ_ASSERT(HasSSE2());
+        masm.psrad_rr(src.code(), dest.code());
+    }
+    void psrad(Imm32 count, FloatRegister dest) {
+        MOZ_ASSERT(HasSSE2());
+        masm.psrad_ir(count.value, dest.code());
+    }
+    void psrld(FloatRegister src, FloatRegister dest) {
+        MOZ_ASSERT(HasSSE2());
+        masm.psrld_rr(src.code(), dest.code());
+    }
+    void psrld(Imm32 count, FloatRegister dest) {
+        MOZ_ASSERT(HasSSE2());
+        masm.psrld_ir(count.value, dest.code());
+    }
 
     void cvtsi2sd(const Operand &src, FloatRegister dest) {
         JS_ASSERT(HasSSE2());
         switch (src.kind()) {
           case Operand::REG:
             masm.cvtsi2sd_rr(src.reg(), dest.code());
             break;
           case Operand::MEM_REG_DISP:
--- a/js/src/jit/shared/BaseAssembler-x86-shared.h
+++ b/js/src/jit/shared/BaseAssembler-x86-shared.h
@@ -322,16 +322,19 @@ private:
         OP2_ANDPD_VpdWpd    = 0x54,
         OP2_ORPD_VpdWpd     = 0x56,
         OP2_XORPD_VpdWpd    = 0x57,
         OP2_PCMPGTD_VdqWdq  = 0x66,
         OP2_MOVD_VdEd       = 0x6E,
         OP2_MOVDQ_VsdWsd    = 0x6F,
         OP2_MOVDQ_VdqWdq    = 0x6F,
         OP2_PSHUFD_VdqWdqIb = 0x70,
+        OP2_PSLLD_UdqIb     = 0x72,
+        OP2_PSRAD_UdqIb     = 0x72,
+        OP2_PSRLD_UdqIb     = 0x72,
         OP2_PSRLDQ_Vd       = 0x73,
         OP2_PCMPEQW         = 0x75,
         OP2_PCMPEQD_VdqWdq  = 0x76,
         OP2_MOVD_EdVd       = 0x7E,
         OP2_MOVDQ_WdqVdq    = 0x7F,
         OP2_JCC_rel32       = 0x80,
         OP_SETCC            = 0x90,
         OP2_IMUL_GvEv       = 0xAF,
@@ -340,17 +343,20 @@ private:
         OP2_MOVSX_GvEb      = 0xBE,
         OP2_MOVSX_GvEw      = 0xBF,
         OP2_MOVZX_GvEb      = 0xB6,
         OP2_MOVZX_GvEw      = 0xB7,
         OP2_XADD_EvGv       = 0xC1,
         OP2_CMPPS_VpsWps    = 0xC2,
         OP2_PEXTRW_GdUdIb   = 0xC5,
         OP2_SHUFPS_VpsWpsIb = 0xC6,
+        OP2_PSRLD_VdqWdq    = 0xD2,
+        OP2_PSRAD_VdqWdq    = 0xE2,
         OP2_PXORDQ_VdqWdq   = 0xEF,
+        OP2_PSLLD_VdqWdq    = 0xF2,
         OP2_PSUBD_VdqWdq    = 0xFA,
         OP2_PADDD_VdqWdq    = 0xFE
     } TwoByteOpcodeID;
 
     typedef enum {
         OP3_ROUNDSS_VsdWsd  = 0x0A,
         OP3_ROUNDSD_VsdWsd  = 0x0B,
         OP3_PTEST_VdVd      = 0x17,
@@ -2919,16 +2925,67 @@ public:
     {
         spew("psrlq      $%d, %s",
              shift, nameFPReg(dest));
         m_formatter.prefix(PRE_SSE_66);
         m_formatter.twoByteOp(OP2_PSRLDQ_Vd, (RegisterID)2, (RegisterID)dest);
         m_formatter.immediate8(shift);
     }
 
+    void pslld_rr(XMMRegisterID src, XMMRegisterID dst)
+    {
+        spew("pslld      %s, %s",
+             nameFPReg(src), nameFPReg(dst));
+        m_formatter.prefix(PRE_SSE_66);
+        m_formatter.twoByteOp(OP2_PSLLD_VdqWdq, (RegisterID)dst, (RegisterID)src);
+    }
+
+    void pslld_ir(int32_t count, XMMRegisterID dst)
+    {
+        spew("pslld      $%d, %s",
+             count, nameFPReg(dst));
+        m_formatter.prefix(PRE_SSE_66);
+        m_formatter.twoByteOp(OP2_PSLLD_UdqIb, (RegisterID)6, (RegisterID)dst);
+        m_formatter.immediate8(int8_t(count));
+    }
+
+    void psrad_rr(XMMRegisterID src, XMMRegisterID dst)
+    {
+        spew("psrad      %s, %s",
+             nameFPReg(src), nameFPReg(dst));
+        m_formatter.prefix(PRE_SSE_66);
+        m_formatter.twoByteOp(OP2_PSRAD_VdqWdq, (RegisterID)dst, (RegisterID)src);
+    }
+
+    void psrad_ir(int32_t count, XMMRegisterID dst)
+    {
+        spew("psrad      $%d, %s",
+             count, nameFPReg(dst));
+        m_formatter.prefix(PRE_SSE_66);
+        m_formatter.twoByteOp(OP2_PSRAD_UdqIb, (RegisterID)4, (RegisterID)dst);
+        m_formatter.immediate8(int8_t(count));
+    }
+
+    void psrld_rr(XMMRegisterID src, XMMRegisterID dst)
+    {
+        spew("psrld      %s, %s",
+             nameFPReg(src), nameFPReg(dst));
+        m_formatter.prefix(PRE_SSE_66);
+        m_formatter.twoByteOp(OP2_PSRLD_VdqWdq, (RegisterID)dst, (RegisterID)src);
+    }
+
+    void psrld_ir(int32_t count, XMMRegisterID dst)
+    {
+        spew("psrld      $%d, %s",
+             count, nameFPReg(dst));
+        m_formatter.prefix(PRE_SSE_66);
+        m_formatter.twoByteOp(OP2_PSRLD_UdqIb, (RegisterID)2, (RegisterID)dst);
+        m_formatter.immediate8(int8_t(count));
+    }
+
     void movmskpd_rr(XMMRegisterID src, RegisterID dst)
     {
         spew("movmskpd   %s, %s",
              nameFPReg(src), nameIReg(dst));
         m_formatter.prefix(PRE_SSE_66);
         m_formatter.twoByteOp(OP2_MOVMSKPD_EdVd, dst, (RegisterID)src);
     }
 
--- a/js/src/jit/shared/CodeGenerator-x86-shared.cpp
+++ b/js/src/jit/shared/CodeGenerator-x86-shared.cpp
@@ -2526,16 +2526,64 @@ CodeGeneratorX86Shared::visitSimdBinaryB
       case MSimdBinaryBitwise::xor_:
         masm.bitwiseXorX4(rhs, lhs);
         return true;
     }
     MOZ_CRASH("unexpected SIMD bitwise op");
 }
 
 bool
+CodeGeneratorX86Shared::visitSimdShift(LSimdShift *ins)
+{
+    FloatRegister vec = ToFloatRegister(ins->vector());
+    FloatRegister out = ToFloatRegister(ins->output());
+    MOZ_ASSERT(vec == out); // defineReuseInput(0);
+
+    // TODO: If the shift count is greater than 31, this will just zero all
+    // lanes by default for lsh and ursh, and set the count to 32 for rsh
+    // (which will just extend the sign bit to all bits). Plain JS doesn't do
+    // this: instead it only keeps the five low bits of the mask. Spec isn't
+    // clear about that topic so this might need to be fixed. See also bug
+    // 1068028.
+    const LAllocation *val = ins->value();
+    if (val->isConstant()) {
+        Imm32 count(ToInt32(val));
+        switch (ins->operation()) {
+          case MSimdShift::lsh:
+            masm.packedLeftShiftByScalar(count, out);
+            return true;
+          case MSimdShift::rsh:
+            masm.packedRightShiftByScalar(count, out);
+            return true;
+          case MSimdShift::ursh:
+            masm.packedUnsignedRightShiftByScalar(count, out);
+            return true;
+        }
+        MOZ_CRASH("unexpected SIMD bitwise op");
+    }
+
+    MOZ_ASSERT(val->isRegister());
+    FloatRegister tmp = ScratchFloat32Reg;
+    masm.movd(ToRegister(val), tmp);
+
+    switch (ins->operation()) {
+      case MSimdShift::lsh:
+        masm.packedLeftShiftByScalar(tmp, out);
+        return true;
+      case MSimdShift::rsh:
+        masm.packedRightShiftByScalar(tmp, out);
+        return true;
+      case MSimdShift::ursh:
+        masm.packedUnsignedRightShiftByScalar(tmp, out);
+        return true;
+    }
+    MOZ_CRASH("unexpected SIMD bitwise op");
+}
+
+bool
 CodeGeneratorX86Shared::visitSimdSelect(LSimdSelect *ins)
 {
     FloatRegister mask = ToFloatRegister(ins->mask());
     FloatRegister onTrue = ToFloatRegister(ins->lhs());
     FloatRegister onFalse = ToFloatRegister(ins->rhs());
 
     MOZ_ASSERT(onTrue == ToFloatRegister(ins->output()));
     // The onFalse argument is not destroyed but due to limitations of the
--- a/js/src/jit/shared/CodeGenerator-x86-shared.h
+++ b/js/src/jit/shared/CodeGenerator-x86-shared.h
@@ -219,16 +219,17 @@ class CodeGeneratorX86Shared : public Co
     bool visitSimdInsertElementI(LSimdInsertElementI *lir);
     bool visitSimdInsertElementF(LSimdInsertElementF *lir);
     bool visitSimdSignMaskX4(LSimdSignMaskX4 *ins);
     bool visitSimdBinaryCompIx4(LSimdBinaryCompIx4 *lir);
     bool visitSimdBinaryCompFx4(LSimdBinaryCompFx4 *lir);
     bool visitSimdBinaryArithIx4(LSimdBinaryArithIx4 *lir);
     bool visitSimdBinaryArithFx4(LSimdBinaryArithFx4 *lir);
     bool visitSimdBinaryBitwiseX4(LSimdBinaryBitwiseX4 *lir);
+    bool visitSimdShift(LSimdShift *lir);
     bool visitSimdSelect(LSimdSelect *ins);
 
     // Out of line visitors.
     bool visitOutOfLineBailout(OutOfLineBailout *ool);
     bool visitOutOfLineUndoALUOperation(OutOfLineUndoALUOperation *ool);
     bool visitMulNegativeZeroCheck(MulNegativeZeroCheck *ool);
     bool visitModOverflowCheck(ModOverflowCheck *ool);
     bool visitReturnZero(ReturnZero *ool);
--- a/js/src/jit/shared/MacroAssembler-x86-shared.h
+++ b/js/src/jit/shared/MacroAssembler-x86-shared.h
@@ -520,16 +520,35 @@ class MacroAssemblerX86Shared : public A
     }
     void packedAddInt32(const Operand &src, FloatRegister dest) {
         paddd(src, dest);
     }
     void packedSubInt32(const Operand &src, FloatRegister dest) {
         psubd(src, dest);
     }
 
+    void packedLeftShiftByScalar(FloatRegister src, FloatRegister dest) {
+        pslld(src, dest);
+    }
+    void packedLeftShiftByScalar(Imm32 count, FloatRegister dest) {
+        pslld(count, dest);
+    }
+    void packedRightShiftByScalar(FloatRegister src, FloatRegister dest) {
+        psrad(src, dest);
+    }
+    void packedRightShiftByScalar(Imm32 count, FloatRegister dest) {
+        psrad(count, dest);
+    }
+    void packedUnsignedRightShiftByScalar(FloatRegister src, FloatRegister dest) {
+        psrld(src, dest);
+    }
+    void packedUnsignedRightShiftByScalar(Imm32 count, FloatRegister dest) {
+        psrld(count, dest);
+    }
+
     void loadAlignedFloat32x4(const Address &src, FloatRegister dest) {
         movaps(Operand(src), dest);
     }
     void loadAlignedFloat32x4(const Operand &src, FloatRegister dest) {
         movaps(src, dest);
     }
     void storeAlignedFloat32x4(FloatRegister src, const Address &dest) {
         movaps(src, Operand(dest));