Bug 1059529: SIMD x86-x64 backend: Implement bitwise operations; a=ijibaja,dougc,bbouvier; r=sunfish
authorIvan Jibaja <ivan@cs.utexas.edu>
Wed, 27 Aug 2014 18:07:03 -0700
changeset 202964 393c2341a26b5419d88cd17add68b9d4cf309423
parent 202963 5fa26de0a04fcf3f6e44de54f143fbb9cda38004
child 202965 6a78c4812f10cd1003d088b3a2d30b1840e02a2c
push id10297
push userryanvm@gmail.com
push dateTue, 02 Sep 2014 18:36:40 +0000
treeherderb2g-inbound@1d3b0ec6e32d [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersijibaja, dougc, bbouvier, sunfish
bugs1059529
milestone34.0a1
Bug 1059529: SIMD x86-x64 backend: Implement bitwise operations; a=ijibaja,dougc,bbouvier; r=sunfish
js/src/jit/LIR-Common.h
js/src/jit/LOpcodes.h
js/src/jit/Lowering.cpp
js/src/jit/Lowering.h
js/src/jit/MIR.h
js/src/jit/MOpcodes.h
js/src/jit/ParallelSafetyAnalysis.cpp
js/src/jit/arm/CodeGenerator-arm.h
js/src/jit/shared/Assembler-x86-shared.h
js/src/jit/shared/BaseAssembler-x86-shared.h
js/src/jit/shared/CodeGenerator-x86-shared.cpp
js/src/jit/shared/CodeGenerator-x86-shared.h
js/src/jit/shared/MacroAssembler-x86-shared.h
--- a/js/src/jit/LIR-Common.h
+++ b/js/src/jit/LIR-Common.h
@@ -261,16 +261,32 @@ class LSimdBinaryArithIx4 : public LSimd
 // Binary SIMD arithmetic operation between two Float32x4 operands
 class LSimdBinaryArithFx4 : public LSimdBinaryArith
 {
   public:
     LIR_HEADER(SimdBinaryArithFx4);
     LSimdBinaryArithFx4() : LSimdBinaryArith() {}
 };
 
+// Binary SIMD bitwise operation between two int32x4 or float32x4 operands
+class LSimdBinaryBitwiseX4 : public LInstructionHelper<1, 2, 0>
+{
+  public:
+    LIR_HEADER(SimdBinaryBitwiseX4);
+    const LAllocation *lhs() {
+        return getOperand(0);
+    }
+    const LAllocation *rhs() {
+        return getOperand(1);
+    }
+    MSimdBinaryBitwise::Operation operation() const {
+        return mir_->toSimdBinaryBitwise()->operation();
+    }
+};
+
 // Constant 32-bit integer.
 class LInteger : public LInstructionHelper<1, 0, 0>
 {
     int32_t i32_;
 
   public:
     LIR_HEADER(Integer)
 
--- a/js/src/jit/LOpcodes.h
+++ b/js/src/jit/LOpcodes.h
@@ -20,16 +20,17 @@
     _(Int32x4)                      \
     _(Float32x4)                    \
     _(SimdExtractElementI)          \
     _(SimdExtractElementF)          \
     _(SimdBinaryCompIx4)            \
     _(SimdBinaryCompFx4)            \
     _(SimdBinaryArithIx4)           \
     _(SimdBinaryArithFx4)           \
+    _(SimdBinaryBitwiseX4)          \
     _(Value)                        \
     _(CloneLiteral)                 \
     _(Parameter)                    \
     _(Callee)                       \
     _(TableSwitch)                  \
     _(TableSwitchV)                 \
     _(Goto)                         \
     _(NewArray)                     \
--- a/js/src/jit/Lowering.cpp
+++ b/js/src/jit/Lowering.cpp
@@ -3760,16 +3760,30 @@ LIRGenerator::visitSimdBinaryArith(MSimd
         LSimdBinaryArithFx4 *add = new(alloc()) LSimdBinaryArithFx4();
         return lowerForFPU(add, ins, ins->lhs(), ins->rhs());
     }
 
     MOZ_ASSUME_UNREACHABLE("Unknown SIMD kind when adding values");
     return false;
 }
 
+bool
+LIRGenerator::visitSimdBinaryBitwise(MSimdBinaryBitwise *ins)
+{
+    MOZ_ASSERT(IsSimdType(ins->type()));
+
+    if (ins->type() == MIRType_Int32x4 || ins->type() == MIRType_Float32x4) {
+        LSimdBinaryBitwiseX4 *add = new(alloc()) LSimdBinaryBitwiseX4;
+        return lowerForFPU(add, ins, ins->lhs(), ins->rhs());
+    }
+
+    MOZ_CRASH("Unknown SIMD kind when doing bitwise operations");
+    return false;
+}
+
 static void
 SpewResumePoint(MBasicBlock *block, MInstruction *ins, MResumePoint *resumePoint)
 {
     fprintf(IonSpewFile, "Current resume point %p details:\n", (void *)resumePoint);
     fprintf(IonSpewFile, "    frame count: %u\n", resumePoint->frameCount());
 
     if (ins) {
         fprintf(IonSpewFile, "    taken after: ");
--- a/js/src/jit/Lowering.h
+++ b/js/src/jit/Lowering.h
@@ -265,16 +265,17 @@ class LIRGenerator : public LIRGenerator
     bool visitAsmJSCall(MAsmJSCall *ins);
     bool visitSetDOMProperty(MSetDOMProperty *ins);
     bool visitGetDOMProperty(MGetDOMProperty *ins);
     bool visitGetDOMMember(MGetDOMMember *ins);
     bool visitRecompileCheck(MRecompileCheck *ins);
     bool visitSimdExtractElement(MSimdExtractElement *ins);
     bool visitSimdBinaryComp(MSimdBinaryComp *ins);
     bool visitSimdBinaryArith(MSimdBinaryArith *ins);
+    bool visitSimdBinaryBitwise(MSimdBinaryBitwise *ins);
     bool visitSimdValueX4(MSimdValueX4 *ins);
     bool visitSimdConstant(MSimdConstant *ins);
     bool visitPhi(MPhi *ins);
     bool visitBeta(MBeta *ins);
     bool visitObjectState(MObjectState *ins);
     bool visitArrayState(MArrayState *ins);
 };
 
--- a/js/src/jit/MIR.h
+++ b/js/src/jit/MIR.h
@@ -1475,16 +1475,60 @@ class MSimdBinaryArith : public MBinaryI
 
     bool congruentTo(const MDefinition *ins) const {
         if (!binaryCongruentTo(ins))
             return false;
         return operation_ == ins->toSimdBinaryArith()->operation();
     }
 };
 
+class MSimdBinaryBitwise : public MBinaryInstruction
+{
+  public:
+    enum Operation {
+        and_,
+        or_,
+        xor_
+    };
+
+  private:
+    Operation operation_;
+
+    MSimdBinaryBitwise(MDefinition *left, MDefinition *right, Operation op, MIRType type)
+      : MBinaryInstruction(left, right), operation_(op)
+    {
+        MOZ_ASSERT(IsSimdType(type));
+        MOZ_ASSERT(left->type() == right->type());
+        MOZ_ASSERT(left->type() == type);
+        setResultType(type);
+        setMovable();
+        setCommutative();
+    }
+
+  public:
+    INSTRUCTION_HEADER(SimdBinaryBitwise);
+    static MSimdBinaryBitwise *NewAsmJS(TempAllocator &alloc, MDefinition *left,
+                                        MDefinition *right, Operation op, MIRType t)
+    {
+        return new(alloc) MSimdBinaryBitwise(left, right, op, t);
+    }
+
+    AliasSet getAliasSet() const {
+        return AliasSet::None();
+    }
+
+    Operation operation() const { return operation_; }
+
+    bool congruentTo(const MDefinition *ins) const {
+        if (!binaryCongruentTo(ins))
+            return false;
+        return operation_ == ins->toSimdBinaryBitwise()->operation();
+    }
+};
+
 // Deep clone a constant JSObject.
 class MCloneLiteral
   : public MUnaryInstruction,
     public ObjectPolicy<0>
 {
   protected:
     explicit MCloneLiteral(MDefinition *obj)
       : MUnaryInstruction(obj)
--- a/js/src/jit/MOpcodes.h
+++ b/js/src/jit/MOpcodes.h
@@ -12,16 +12,17 @@ namespace jit {
 
 #define MIR_OPCODE_LIST(_)                                                  \
     _(Constant)                                                             \
     _(SimdValueX4)                                                          \
     _(SimdConstant)                                                         \
     _(SimdExtractElement)                                                   \
     _(SimdBinaryComp)                                                       \
     _(SimdBinaryArith)                                                      \
+    _(SimdBinaryBitwise)                                                    \
     _(CloneLiteral)                                                         \
     _(Parameter)                                                            \
     _(Callee)                                                               \
     _(TableSwitch)                                                          \
     _(Goto)                                                                 \
     _(Test)                                                                 \
     _(TypeObjectDispatch)                                                   \
     _(FunctionDispatch)                                                     \
--- a/js/src/jit/ParallelSafetyAnalysis.cpp
+++ b/js/src/jit/ParallelSafetyAnalysis.cpp
@@ -112,16 +112,17 @@ class ParallelSafetyVisitor : public MDe
     // obviously safe for now.  We can loosen as we need.
 
     SAFE_OP(Constant)
     SAFE_OP(SimdValueX4)
     SAFE_OP(SimdConstant)
     SAFE_OP(SimdExtractElement)
     SAFE_OP(SimdBinaryComp)
     SAFE_OP(SimdBinaryArith)
+    SAFE_OP(SimdBinaryBitwise)
     UNSAFE_OP(CloneLiteral)
     SAFE_OP(Parameter)
     SAFE_OP(Callee)
     SAFE_OP(TableSwitch)
     SAFE_OP(Goto)
     SAFE_OP(Test)
     SAFE_OP(Compare)
     SAFE_OP(Phi)
--- a/js/src/jit/arm/CodeGenerator-arm.h
+++ b/js/src/jit/arm/CodeGenerator-arm.h
@@ -233,16 +233,17 @@ class CodeGeneratorARM : public CodeGene
     bool visitInt32x4(LInt32x4 *ins) { MOZ_CRASH("NYI"); }
     bool visitFloat32x4(LFloat32x4 *ins) { MOZ_CRASH("NYI"); }
     bool visitSimdExtractElementI(LSimdExtractElementI *ins) { MOZ_CRASH("NYI"); }
     bool visitSimdExtractElementF(LSimdExtractElementF *ins) { MOZ_CRASH("NYI"); }
     bool visitSimdBinaryCompIx4(LSimdBinaryCompIx4 *lir) { MOZ_CRASH("NYI"); }
     bool visitSimdBinaryCompFx4(LSimdBinaryCompFx4 *lir) { MOZ_CRASH("NYI"); }
     bool visitSimdBinaryArithIx4(LSimdBinaryArithIx4 *lir) { MOZ_CRASH("NYI"); }
     bool visitSimdBinaryArithFx4(LSimdBinaryArithFx4 *lir) { MOZ_CRASH("NYI"); }
+    bool visitSimdBinaryBitwiseX4(LSimdBinaryBitwiseX4 *lir) { MOZ_CRASH("NYI"); }
 };
 
 typedef CodeGeneratorARM CodeGeneratorSpecific;
 
 // An out-of-line bailout thunk.
 class OutOfLineBailout : public OutOfLineCodeBase<CodeGeneratorARM>
 {
   protected: // Silence Clang warning.
--- a/js/src/jit/shared/Assembler-x86-shared.h
+++ b/js/src/jit/shared/Assembler-x86-shared.h
@@ -1660,16 +1660,64 @@ class AssemblerX86Shared : public Assemb
             break;
           case Operand::MEM_ADDRESS32:
             masm.divps_mr(src.address(), dest.code());
             break;
           default:
             MOZ_CRASH("unexpected operand kind");
         }
     }
+    void andps(const Operand &src, FloatRegister dest) {
+        MOZ_ASSERT(HasSSE2());
+        switch (src.kind()) {
+          case Operand::FPREG:
+            masm.andps_rr(src.fpu(), dest.code());
+            break;
+          case Operand::MEM_REG_DISP:
+            masm.andps_mr(src.disp(), src.base(), dest.code());
+            break;
+          case Operand::MEM_ADDRESS32:
+            masm.andps_mr(src.address(), dest.code());
+            break;
+          default:
+            MOZ_CRASH("unexpected operand kind");
+        }
+    }
+    void orps(const Operand &src, FloatRegister dest) {
+        MOZ_ASSERT(HasSSE2());
+        switch (src.kind()) {
+          case Operand::FPREG:
+            masm.orps_rr(src.fpu(), dest.code());
+            break;
+          case Operand::MEM_REG_DISP:
+            masm.orps_mr(src.disp(), src.base(), dest.code());
+            break;
+          case Operand::MEM_ADDRESS32:
+            masm.orps_mr(src.address(), dest.code());
+            break;
+          default:
+            MOZ_CRASH("unexpected operand kind");
+        }
+    }
+    void xorps(const Operand &src, FloatRegister dest) {
+        MOZ_ASSERT(HasSSE2());
+        switch (src.kind()) {
+          case Operand::FPREG:
+            masm.xorps_rr(src.fpu(), dest.code());
+            break;
+          case Operand::MEM_REG_DISP:
+            masm.xorps_mr(src.disp(), src.base(), dest.code());
+            break;
+          case Operand::MEM_ADDRESS32:
+            masm.xorps_mr(src.address(), dest.code());
+            break;
+          default:
+            MOZ_CRASH("unexpected operand kind");
+        }
+    }
     void pxor(FloatRegister src, FloatRegister dest) {
         JS_ASSERT(HasSSE2());
         masm.pxor_rr(src.code(), dest.code());
     }
     void pshufd(uint32_t mask, FloatRegister src, FloatRegister dest) {
         JS_ASSERT(HasSSE2());
         masm.pshufd_irr(mask, src.code(), dest.code());
     }
--- a/js/src/jit/shared/BaseAssembler-x86-shared.h
+++ b/js/src/jit/shared/BaseAssembler-x86-shared.h
@@ -290,16 +290,19 @@ private:
         OP2_UNPCKLPS_VsdWsd = 0x14,
         OP2_MOVAPD_VsdWsd   = 0x28,
         OP2_MOVAPS_VsdWsd   = 0x28,
         OP2_MOVAPS_WsdVsd   = 0x29,
         OP2_CVTSI2SD_VsdEd  = 0x2A,
         OP2_CVTTSD2SI_GdWsd = 0x2C,
         OP2_UCOMISD_VsdWsd  = 0x2E,
         OP2_MOVMSKPD_EdVd   = 0x50,
+        OP2_ANDPS_VpsWps    = 0x54,
+        OP2_ORPS_VpsWps     = 0x56,
+        OP2_XORPS_VpsWps    = 0x57,
         OP2_ADDSD_VsdWsd    = 0x58,
         OP2_ADDPS_VpsWps    = 0x58,
         OP2_MULSD_VsdWsd    = 0x59,
         OP2_MULPS_VpsWps    = 0x59,
         OP2_CVTSS2SD_VsdEd  = 0x5A,
         OP2_CVTSD2SS_VsdEd  = 0x5A,
         OP2_SUBSD_VsdWsd    = 0x5C,
         OP2_SUBPS_VpsWps    = 0x5C,
@@ -3419,23 +3422,16 @@ public:
     void xorpd_rr(XMMRegisterID src, XMMRegisterID dst)
     {
         spew("xorpd      %s, %s",
              nameFPReg(src), nameFPReg(dst));
         m_formatter.prefix(PRE_SSE_66);
         m_formatter.twoByteOp(OP2_XORPD_VpdWpd, (RegisterID)dst, (RegisterID)src);
     }
 
-    void xorps_rr(XMMRegisterID src, XMMRegisterID dst)
-    {
-        spew("xorps      %s, %s",
-             nameFPReg(src), nameFPReg(dst));
-        m_formatter.twoByteOp(OP2_XORPD_VpdWpd, (RegisterID)dst, (RegisterID)src);
-    }
-
     void orpd_rr(XMMRegisterID src, XMMRegisterID dst)
     {
         spew("orpd       %s, %s",
              nameFPReg(src), nameFPReg(dst));
         m_formatter.prefix(PRE_SSE_66);
         m_formatter.twoByteOp(OP2_ORPD_VpdWpd, (RegisterID)dst, (RegisterID)src);
     }
 
@@ -3446,17 +3442,73 @@ public:
         m_formatter.prefix(PRE_SSE_66);
         m_formatter.twoByteOp(OP2_ANDPD_VpdWpd, (RegisterID)dst, (RegisterID)src);
     }
 
     void andps_rr(XMMRegisterID src, XMMRegisterID dst)
     {
         spew("andps      %s, %s",
              nameFPReg(src), nameFPReg(dst));
-        m_formatter.twoByteOp(OP2_ANDPD_VpdWpd, (RegisterID)dst, (RegisterID)src);
+        m_formatter.twoByteOp(OP2_ANDPS_VpsWps, (RegisterID)dst, (RegisterID)src);
+    }
+
+    void andps_mr(int offset, RegisterID base, XMMRegisterID dst)
+    {
+        spew("andps      %s0x%x(%s), %s",
+             PRETTY_PRINT_OFFSET(offset), nameIReg(base), nameFPReg(dst));
+        m_formatter.twoByteOp(OP2_ANDPS_VpsWps, (RegisterID)dst, base, offset);
+    }
+
+    void andps_mr(const void* address, XMMRegisterID dst)
+    {
+        spew("andps      %p, %s",
+             address, nameFPReg(dst));
+        m_formatter.twoByteOp(OP2_ANDPS_VpsWps, (RegisterID)dst, address);
+    }
+
+    void orps_rr(XMMRegisterID src, XMMRegisterID dst)
+    {
+        spew("orps      %s, %s",
+             nameFPReg(src), nameFPReg(dst));
+        m_formatter.twoByteOp(OP2_ORPS_VpsWps, (RegisterID)dst, (RegisterID)src);
+    }
+
+    void orps_mr(int offset, RegisterID base, XMMRegisterID dst)
+    {
+        spew("orps      %s0x%x(%s), %s",
+             PRETTY_PRINT_OFFSET(offset), nameIReg(base), nameFPReg(dst));
+        m_formatter.twoByteOp(OP2_ORPS_VpsWps, (RegisterID)dst, base, offset);
+    }
+
+    void orps_mr(const void* address, XMMRegisterID dst)
+    {
+        spew("orps      %p, %s",
+             address, nameFPReg(dst));
+        m_formatter.twoByteOp(OP2_ORPS_VpsWps, (RegisterID)dst, address);
+    }
+
+    void xorps_rr(XMMRegisterID src, XMMRegisterID dst)
+    {
+        spew("xorps      %s, %s",
+             nameFPReg(src), nameFPReg(dst));
+        m_formatter.twoByteOp(OP2_XORPS_VpsWps, (RegisterID)dst, (RegisterID)src);
+    }
+
+    void xorps_mr(int offset, RegisterID base, XMMRegisterID dst)
+    {
+        spew("xorps      %s0x%x(%s), %s",
+             PRETTY_PRINT_OFFSET(offset), nameIReg(base), nameFPReg(dst));
+        m_formatter.twoByteOp(OP2_XORPS_VpsWps, (RegisterID)dst, base, offset);
+    }
+
+    void xorps_mr(const void* address, XMMRegisterID dst)
+    {
+        spew("xorps      %p, %s",
+             address, nameFPReg(dst));
+        m_formatter.twoByteOp(OP2_XORPS_VpsWps, (RegisterID)dst, address);
     }
 
     void sqrtsd_rr(XMMRegisterID src, XMMRegisterID dst)
     {
         spew("sqrtsd     %s, %s",
              nameFPReg(src), nameFPReg(dst));
         m_formatter.prefix(PRE_SSE_F2);
         m_formatter.twoByteOp(OP2_SQRTSD_VsdWsd, (RegisterID)dst, (RegisterID)src);
--- a/js/src/jit/shared/CodeGenerator-x86-shared.cpp
+++ b/js/src/jit/shared/CodeGenerator-x86-shared.cpp
@@ -2305,16 +2305,38 @@ CodeGeneratorX86Shared::visitSimdBinaryA
       case MSimdBinaryArith::Div:
         masm.packedDivFloat32(rhs, lhs);
         return true;
     }
     MOZ_CRASH("unexpected SIMD op");
 }
 
 bool
+CodeGeneratorX86Shared::visitSimdBinaryBitwiseX4(LSimdBinaryBitwiseX4 *ins)
+{
+    FloatRegister lhs = ToFloatRegister(ins->lhs());
+    Operand rhs = ToOperand(ins->rhs());
+    MOZ_ASSERT(ToFloatRegister(ins->output()) == lhs);
+
+    MSimdBinaryBitwise::Operation op = ins->operation();
+    switch (op) {
+      case MSimdBinaryBitwise::and_:
+        masm.bitwiseAndX4(rhs, lhs);
+        return true;
+      case MSimdBinaryBitwise::or_:
+        masm.bitwiseOrX4(rhs, lhs);
+        return true;
+      case MSimdBinaryBitwise::xor_:
+        masm.bitwiseXorX4(rhs, lhs);
+        return true;
+    }
+    MOZ_CRASH("unexpected SIMD bitwise op");
+}
+
+bool
 CodeGeneratorX86Shared::visitForkJoinGetSlice(LForkJoinGetSlice *ins)
 {
     MOZ_ASSERT(gen->info().executionMode() == ParallelExecution);
     MOZ_ASSERT(ToRegister(ins->forkJoinContext()) == ForkJoinGetSliceReg_cx);
     MOZ_ASSERT(ToRegister(ins->temp1()) == eax);
     MOZ_ASSERT(ToRegister(ins->temp2()) == edx);
     MOZ_ASSERT(ToRegister(ins->temp3()) == ForkJoinGetSliceReg_temp0);
     MOZ_ASSERT(ToRegister(ins->temp4()) == ForkJoinGetSliceReg_temp1);
--- a/js/src/jit/shared/CodeGenerator-x86-shared.h
+++ b/js/src/jit/shared/CodeGenerator-x86-shared.h
@@ -210,16 +210,17 @@ class CodeGeneratorX86Shared : public Co
     bool visitInt32x4(LInt32x4 *ins);
     bool visitFloat32x4(LFloat32x4 *ins);
     bool visitSimdExtractElementI(LSimdExtractElementI *lir);
     bool visitSimdExtractElementF(LSimdExtractElementF *lir);
     bool visitSimdBinaryCompIx4(LSimdBinaryCompIx4 *lir);
     bool visitSimdBinaryCompFx4(LSimdBinaryCompFx4 *lir);
     bool visitSimdBinaryArithIx4(LSimdBinaryArithIx4 *lir);
     bool visitSimdBinaryArithFx4(LSimdBinaryArithFx4 *lir);
+    bool visitSimdBinaryBitwiseX4(LSimdBinaryBitwiseX4 *lir);
 
     // Out of line visitors.
     bool visitOutOfLineBailout(OutOfLineBailout *ool);
     bool visitOutOfLineUndoALUOperation(OutOfLineUndoALUOperation *ool);
     bool visitMulNegativeZeroCheck(MulNegativeZeroCheck *ool);
     bool visitModOverflowCheck(ModOverflowCheck *ool);
     bool visitReturnZero(ReturnZero *ool);
     bool visitOutOfLineTableSwitch(OutOfLineTableSwitch *ool);
--- a/js/src/jit/shared/MacroAssembler-x86-shared.h
+++ b/js/src/jit/shared/MacroAssembler-x86-shared.h
@@ -462,16 +462,28 @@ class MacroAssemblerX86Shared : public A
     }
     void convertFloat32ToDouble(FloatRegister src, FloatRegister dest) {
         cvtss2sd(src, dest);
     }
     void convertDoubleToFloat32(FloatRegister src, FloatRegister dest) {
         cvtsd2ss(src, dest);
     }
 
+    void bitwiseAndX4(const Operand &src, FloatRegister dest) {
+        // TODO Using the "ps" variant for all types incurs a domain crossing
+        // penalty for integer types and double.
+        andps(src, dest);
+    }
+    void bitwiseOrX4(const Operand &src, FloatRegister dest) {
+        orps(src, dest);
+    }
+    void bitwiseXorX4(const Operand &src, FloatRegister dest) {
+        xorps(src, dest);
+    }
+
     void loadAlignedInt32x4(const Address &src, FloatRegister dest) {
         movdqa(Operand(src), dest);
     }
     void loadAlignedInt32x4(const Operand &src, FloatRegister dest) {
         movdqa(src, dest);
     }
     void storeAlignedInt32x4(FloatRegister src, const Address &dest) {
         movdqa(src, Operand(dest));