Bug 1060437 - SIMD backend: Implement the select operation; r=bbouvier
authorDouglas Crosher <dtc-moz@scieneer.com>
Tue, 02 Sep 2014 17:49:17 +1000
changeset 203056 974abe2bd9499d0031b4500eaacab7f0f4b7c5e3
parent 203055 d1cbbceeb19e5255b3dbab37858d4f1b131fe8ae
child 203057 35742bdc126cb37525e1784ad2c785d4524e57ef
push id10297
push userryanvm@gmail.com
push dateTue, 02 Sep 2014 18:36:40 +0000
treeherderb2g-inbound@1d3b0ec6e32d [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersbbouvier
bugs1060437
milestone34.0a1
Bug 1060437 - SIMD backend: Implement the select operation; r=bbouvier
js/src/jit/LIR-Common.h
js/src/jit/LOpcodes.h
js/src/jit/MIR.h
js/src/jit/MOpcodes.h
js/src/jit/ParallelSafetyAnalysis.cpp
js/src/jit/arm/Lowering-arm.cpp
js/src/jit/arm/Lowering-arm.h
js/src/jit/mips/Lowering-mips.cpp
js/src/jit/mips/Lowering-mips.h
js/src/jit/none/Lowering-none.h
js/src/jit/shared/Assembler-x86-shared.h
js/src/jit/shared/BaseAssembler-x86-shared.h
js/src/jit/shared/CodeGenerator-x86-shared.cpp
js/src/jit/shared/CodeGenerator-x86-shared.h
js/src/jit/shared/Lowering-x86-shared.cpp
js/src/jit/shared/Lowering-x86-shared.h
js/src/jit/shared/MacroAssembler-x86-shared.h
--- a/js/src/jit/LIR-Common.h
+++ b/js/src/jit/LIR-Common.h
@@ -302,16 +302,36 @@ class LSimdBinaryBitwiseX4 : public LIns
     const LAllocation *rhs() {
         return getOperand(1);
     }
     MSimdBinaryBitwise::Operation operation() const {
         return mir_->toSimdBinaryBitwise()->operation();
     }
 };
 
+// SIMD selection of lanes from two int32x4 or float32x4 arguments based on a
+// int32x4 argument.
+class LSimdSelect : public LInstructionHelper<1, 3, 0>
+{
+  public:
+    LIR_HEADER(SimdSelect);
+    const LAllocation *mask() {
+        return getOperand(0);
+    }
+    const LAllocation *lhs() {
+        return getOperand(1);
+    }
+    const LAllocation *rhs() {
+        return getOperand(2);
+    }
+    MSimdTernaryBitwise::Operation operation() const {
+        return mir_->toSimdTernaryBitwise()->operation();
+    }
+};
+
 // Constant 32-bit integer.
 class LInteger : public LInstructionHelper<1, 0, 0>
 {
     int32_t i32_;
 
   public:
     LIR_HEADER(Integer)
 
--- a/js/src/jit/LOpcodes.h
+++ b/js/src/jit/LOpcodes.h
@@ -23,16 +23,17 @@
     _(SimdExtractElementI)          \
     _(SimdExtractElementF)          \
     _(SimdSignMaskX4)               \
     _(SimdBinaryCompIx4)            \
     _(SimdBinaryCompFx4)            \
     _(SimdBinaryArithIx4)           \
     _(SimdBinaryArithFx4)           \
     _(SimdBinaryBitwiseX4)          \
+    _(SimdSelect)                   \
     _(Value)                        \
     _(CloneLiteral)                 \
     _(Parameter)                    \
     _(Callee)                       \
     _(TableSwitch)                  \
     _(TableSwitchV)                 \
     _(Goto)                         \
     _(NewArray)                     \
--- a/js/src/jit/MIR.h
+++ b/js/src/jit/MIR.h
@@ -1582,16 +1582,52 @@ class MSimdBinaryBitwise : public MBinar
 
     bool congruentTo(const MDefinition *ins) const {
         if (!binaryCongruentTo(ins))
             return false;
         return operation_ == ins->toSimdBinaryBitwise()->operation();
     }
 };
 
+class MSimdTernaryBitwise : public MTernaryInstruction
+{
+  public:
+    enum Operation {
+        select
+    };
+
+  private:
+    Operation operation_;
+
+    MSimdTernaryBitwise(MDefinition *mask, MDefinition *lhs, MDefinition *rhs, Operation op, MIRType type)
+      : MTernaryInstruction(mask, lhs, rhs), operation_(op)
+    {
+        MOZ_ASSERT(IsSimdType(type));
+        MOZ_ASSERT(mask->type() == MIRType_Int32x4);
+        MOZ_ASSERT(lhs->type() == rhs->type());
+        MOZ_ASSERT(lhs->type() == type);
+        setResultType(type);
+        setMovable();
+    }
+
+  public:
+    INSTRUCTION_HEADER(SimdTernaryBitwise);
+    static MSimdTernaryBitwise *NewAsmJS(TempAllocator &alloc, MDefinition *mask, MDefinition *lhs,
+                                         MDefinition *rhs, Operation op, MIRType t)
+    {
+        return new(alloc) MSimdTernaryBitwise(mask, lhs, rhs, op, t);
+    }
+
+    AliasSet getAliasSet() const {
+        return AliasSet::None();
+    }
+
+    Operation operation() const { return operation_; }
+};
+
 // Deep clone a constant JSObject.
 class MCloneLiteral
   : public MUnaryInstruction,
     public ObjectPolicy<0>
 {
   protected:
     explicit MCloneLiteral(MDefinition *obj)
       : MUnaryInstruction(obj)
--- a/js/src/jit/MOpcodes.h
+++ b/js/src/jit/MOpcodes.h
@@ -15,16 +15,17 @@ namespace jit {
     _(SimdValueX4)                                                          \
     _(SimdSplatX4)                                                          \
     _(SimdConstant)                                                         \
     _(SimdExtractElement)                                                   \
     _(SimdSignMask)                                                         \
     _(SimdBinaryComp)                                                       \
     _(SimdBinaryArith)                                                      \
     _(SimdBinaryBitwise)                                                    \
+    _(SimdTernaryBitwise)                                                   \
     _(CloneLiteral)                                                         \
     _(Parameter)                                                            \
     _(Callee)                                                               \
     _(TableSwitch)                                                          \
     _(Goto)                                                                 \
     _(Test)                                                                 \
     _(TypeObjectDispatch)                                                   \
     _(FunctionDispatch)                                                     \
--- a/js/src/jit/ParallelSafetyAnalysis.cpp
+++ b/js/src/jit/ParallelSafetyAnalysis.cpp
@@ -115,16 +115,17 @@ class ParallelSafetyVisitor : public MDe
     SAFE_OP(SimdValueX4)
     SAFE_OP(SimdSplatX4)
     SAFE_OP(SimdConstant)
     SAFE_OP(SimdExtractElement)
     SAFE_OP(SimdSignMask)
     SAFE_OP(SimdBinaryComp)
     SAFE_OP(SimdBinaryArith)
     SAFE_OP(SimdBinaryBitwise)
+    SAFE_OP(SimdTernaryBitwise)
     UNSAFE_OP(CloneLiteral)
     SAFE_OP(Parameter)
     SAFE_OP(Callee)
     SAFE_OP(TableSwitch)
     SAFE_OP(Goto)
     SAFE_OP(Test)
     SAFE_OP(Compare)
     SAFE_OP(Phi)
--- a/js/src/jit/arm/Lowering-arm.cpp
+++ b/js/src/jit/arm/Lowering-arm.cpp
@@ -545,14 +545,20 @@ LIRGeneratorARM::visitStoreTypedArrayEle
 
 bool
 LIRGeneratorARM::visitForkJoinGetSlice(MForkJoinGetSlice *ins)
 {
     MOZ_CRASH("NYI");
 }
 
 bool
+LIRGeneratorARM::visitSimdTernaryBitwise(MSimdTernaryBitwise *ins)
+{
+    MOZ_CRASH("NYI");
+}
+
+bool
 LIRGeneratorARM::visitSimdSplatX4(MSimdSplatX4 *ins)
 {
     MOZ_CRASH("NYI");
 }
 
 //__aeabi_uidiv
--- a/js/src/jit/arm/Lowering-arm.h
+++ b/js/src/jit/arm/Lowering-arm.h
@@ -86,16 +86,17 @@ class LIRGeneratorARM : public LIRGenera
     bool visitGuardObjectType(MGuardObjectType *ins);
     bool visitAsmJSUnsignedToDouble(MAsmJSUnsignedToDouble *ins);
     bool visitAsmJSUnsignedToFloat32(MAsmJSUnsignedToFloat32 *ins);
     bool visitAsmJSLoadHeap(MAsmJSLoadHeap *ins);
     bool visitAsmJSStoreHeap(MAsmJSStoreHeap *ins);
     bool visitAsmJSLoadFuncPtr(MAsmJSLoadFuncPtr *ins);
     bool visitStoreTypedArrayElementStatic(MStoreTypedArrayElementStatic *ins);
     bool visitForkJoinGetSlice(MForkJoinGetSlice *ins);
+    bool visitSimdTernaryBitwise(MSimdTernaryBitwise *ins);
     bool visitSimdSplatX4(MSimdSplatX4 *ins);
 };
 
 typedef LIRGeneratorARM LIRGeneratorSpecific;
 
 } // namespace jit
 } // namespace js
 
--- a/js/src/jit/mips/Lowering-mips.cpp
+++ b/js/src/jit/mips/Lowering-mips.cpp
@@ -527,12 +527,18 @@ LIRGeneratorMIPS::visitStoreTypedArrayEl
 
 bool
 LIRGeneratorMIPS::visitForkJoinGetSlice(MForkJoinGetSlice *ins)
 {
     MOZ_CRASH("NYI");
 }
 
 bool
+LIRGeneratorMIPS::visitSimdTernaryBitwise(MSimdTernaryBitwise *ins)
+{
+    MOZ_CRASH("NYI");
+}
+
+bool
 LIRGeneratorMIPS::visitSimdSplatX4(MSimdSplatX4 *ins)
 {
     MOZ_CRASH("NYI");
 }
--- a/js/src/jit/mips/Lowering-mips.h
+++ b/js/src/jit/mips/Lowering-mips.h
@@ -86,16 +86,17 @@ class LIRGeneratorMIPS : public LIRGener
     bool visitGuardObjectType(MGuardObjectType *ins);
     bool visitAsmJSUnsignedToDouble(MAsmJSUnsignedToDouble *ins);
     bool visitAsmJSUnsignedToFloat32(MAsmJSUnsignedToFloat32 *ins);
     bool visitAsmJSLoadHeap(MAsmJSLoadHeap *ins);
     bool visitAsmJSStoreHeap(MAsmJSStoreHeap *ins);
     bool visitAsmJSLoadFuncPtr(MAsmJSLoadFuncPtr *ins);
     bool visitStoreTypedArrayElementStatic(MStoreTypedArrayElementStatic *ins);
     bool visitForkJoinGetSlice(MForkJoinGetSlice *ins);
+    bool visitSimdTernaryBitwise(MSimdTernaryBitwise *ins);
     bool visitSimdSplatX4(MSimdSplatX4 *ins);
 };
 
 typedef LIRGeneratorMIPS LIRGeneratorSpecific;
 
 } // namespace jit
 } // namespace js
 
--- a/js/src/jit/none/Lowering-none.h
+++ b/js/src/jit/none/Lowering-none.h
@@ -68,16 +68,17 @@ class LIRGeneratorNone : public LIRGener
     bool visitAsmJSLoadHeap(MAsmJSLoadHeap *ins) { MOZ_CRASH(); }
     bool visitAsmJSStoreHeap(MAsmJSStoreHeap *ins) { MOZ_CRASH(); }
     bool visitAsmJSLoadFuncPtr(MAsmJSLoadFuncPtr *ins) { MOZ_CRASH(); }
     bool visitStoreTypedArrayElementStatic(MStoreTypedArrayElementStatic *ins) { MOZ_CRASH(); }
     bool visitForkJoinGetSlice(MForkJoinGetSlice *ins) { MOZ_CRASH(); }
 
     LTableSwitch *newLTableSwitch(LAllocation, LDefinition, MTableSwitch *) { MOZ_CRASH(); }
     LTableSwitchV *newLTableSwitchV(MTableSwitch *) { MOZ_CRASH(); }
+    bool visitSimdTernaryBitwise(MSimdTernaryBitwise *ins) { MOZ_CRASH(); }
     bool visitSimdSplatX4(MSimdSplatX4 *ins) { MOZ_CRASH(); }
 };
 
 typedef LIRGeneratorNone LIRGeneratorSpecific;
 
 } // namespace jit
 } // namespace js
 
--- a/js/src/jit/shared/Assembler-x86-shared.h
+++ b/js/src/jit/shared/Assembler-x86-shared.h
@@ -1676,16 +1676,32 @@ class AssemblerX86Shared : public Assemb
             break;
           case Operand::MEM_ADDRESS32:
             masm.andps_mr(src.address(), dest.code());
             break;
           default:
             MOZ_CRASH("unexpected operand kind");
         }
     }
+    void andnps(const Operand &src, FloatRegister dest) {
+        JS_ASSERT(HasSSE2());
+        switch (src.kind()) {
+          case Operand::FPREG:
+            masm.andnps_rr(src.fpu(), dest.code());
+            break;
+          case Operand::MEM_REG_DISP:
+            masm.andnps_mr(src.disp(), src.base(), dest.code());
+            break;
+          case Operand::MEM_ADDRESS32:
+            masm.andnps_mr(src.address(), dest.code());
+            break;
+          default:
+            MOZ_CRASH("unexpected operand kind");
+        }
+    }
     void orps(const Operand &src, FloatRegister dest) {
         MOZ_ASSERT(HasSSE2());
         switch (src.kind()) {
           case Operand::FPREG:
             masm.orps_rr(src.fpu(), dest.code());
             break;
           case Operand::MEM_REG_DISP:
             masm.orps_mr(src.disp(), src.base(), dest.code());
--- a/js/src/jit/shared/BaseAssembler-x86-shared.h
+++ b/js/src/jit/shared/BaseAssembler-x86-shared.h
@@ -291,16 +291,17 @@ private:
         OP2_MOVAPD_VsdWsd   = 0x28,
         OP2_MOVAPS_VsdWsd   = 0x28,
         OP2_MOVAPS_WsdVsd   = 0x29,
         OP2_CVTSI2SD_VsdEd  = 0x2A,
         OP2_CVTTSD2SI_GdWsd = 0x2C,
         OP2_UCOMISD_VsdWsd  = 0x2E,
         OP2_MOVMSKPD_EdVd   = 0x50,
         OP2_ANDPS_VpsWps    = 0x54,
+        OP2_ANDNPS_VpsWps   = 0x55,
         OP2_ORPS_VpsWps     = 0x56,
         OP2_XORPS_VpsWps    = 0x57,
         OP2_ADDSD_VsdWsd    = 0x58,
         OP2_ADDPS_VpsWps    = 0x58,
         OP2_MULSD_VsdWsd    = 0x59,
         OP2_MULPS_VpsWps    = 0x59,
         OP2_CVTSS2SD_VsdEd  = 0x5A,
         OP2_CVTSD2SS_VsdEd  = 0x5A,
@@ -3459,16 +3460,37 @@ public:
 
     void andps_mr(const void* address, XMMRegisterID dst)
     {
         spew("andps      %p, %s",
              address, nameFPReg(dst));
         m_formatter.twoByteOp(OP2_ANDPS_VpsWps, (RegisterID)dst, address);
     }
 
+    void andnps_rr(XMMRegisterID src, XMMRegisterID dst)
+    {
+        spew("andnps     %s, %s",
+             nameFPReg(src), nameFPReg(dst));
+        m_formatter.twoByteOp(OP2_ANDNPS_VpsWps, (RegisterID)dst, (RegisterID)src);
+    }
+
+    void andnps_mr(int offset, RegisterID base, XMMRegisterID dst)
+    {
+        spew("andnps     %s0x%x(%s), %s",
+             PRETTY_PRINT_OFFSET(offset), nameIReg(base), nameFPReg(dst));
+        m_formatter.twoByteOp(OP2_ANDNPS_VpsWps, (RegisterID)dst, base, offset);
+    }
+
+    void andnps_mr(const void* address, XMMRegisterID dst)
+    {
+        spew("andnps     %p, %s",
+             address, nameFPReg(dst));
+        m_formatter.twoByteOp(OP2_ANDPS_VpsWps, (RegisterID)dst, address);
+    }
+
     void orps_rr(XMMRegisterID src, XMMRegisterID dst)
     {
         spew("orps      %s, %s",
              nameFPReg(src), nameFPReg(dst));
         m_formatter.twoByteOp(OP2_ORPS_VpsWps, (RegisterID)dst, (RegisterID)src);
     }
 
     void orps_mr(int offset, RegisterID base, XMMRegisterID dst)
--- a/js/src/jit/shared/CodeGenerator-x86-shared.cpp
+++ b/js/src/jit/shared/CodeGenerator-x86-shared.cpp
@@ -2367,16 +2367,32 @@ CodeGeneratorX86Shared::visitSimdBinaryB
       case MSimdBinaryBitwise::xor_:
         masm.bitwiseXorX4(rhs, lhs);
         return true;
     }
     MOZ_CRASH("unexpected SIMD bitwise op");
 }
 
 bool
+CodeGeneratorX86Shared::visitSimdSelect(LSimdSelect *ins)
+{
+    FloatRegister mask = ToFloatRegister(ins->mask());
+    FloatRegister onTrue = ToFloatRegister(ins->lhs());
+    FloatRegister onFalse = ToFloatRegister(ins->rhs());
+
+    MOZ_ASSERT(onTrue == ToFloatRegister(ins->output()));
+    // The onFalse argument is not destroyed but due to limitations of the
+    // register allocator its life ends at the start of the operation.
+    masm.bitwiseAndX4(Operand(mask), onTrue);
+    masm.bitwiseAndNotX4(Operand(onFalse), mask);
+    masm.bitwiseOrX4(Operand(mask), onTrue);
+    return true;
+}
+
+bool
 CodeGeneratorX86Shared::visitForkJoinGetSlice(LForkJoinGetSlice *ins)
 {
     MOZ_ASSERT(gen->info().executionMode() == ParallelExecution);
     MOZ_ASSERT(ToRegister(ins->forkJoinContext()) == ForkJoinGetSliceReg_cx);
     MOZ_ASSERT(ToRegister(ins->temp1()) == eax);
     MOZ_ASSERT(ToRegister(ins->temp2()) == edx);
     MOZ_ASSERT(ToRegister(ins->temp3()) == ForkJoinGetSliceReg_temp0);
     MOZ_ASSERT(ToRegister(ins->temp4()) == ForkJoinGetSliceReg_temp1);
--- a/js/src/jit/shared/CodeGenerator-x86-shared.h
+++ b/js/src/jit/shared/CodeGenerator-x86-shared.h
@@ -213,16 +213,17 @@ class CodeGeneratorX86Shared : public Co
     bool visitSimdExtractElementI(LSimdExtractElementI *lir);
     bool visitSimdExtractElementF(LSimdExtractElementF *lir);
     bool visitSimdSignMaskX4(LSimdSignMaskX4 *ins);
     bool visitSimdBinaryCompIx4(LSimdBinaryCompIx4 *lir);
     bool visitSimdBinaryCompFx4(LSimdBinaryCompFx4 *lir);
     bool visitSimdBinaryArithIx4(LSimdBinaryArithIx4 *lir);
     bool visitSimdBinaryArithFx4(LSimdBinaryArithFx4 *lir);
     bool visitSimdBinaryBitwiseX4(LSimdBinaryBitwiseX4 *lir);
+    bool visitSimdSelect(LSimdSelect *ins);
 
     // Out of line visitors.
     bool visitOutOfLineBailout(OutOfLineBailout *ool);
     bool visitOutOfLineUndoALUOperation(OutOfLineUndoALUOperation *ool);
     bool visitMulNegativeZeroCheck(MulNegativeZeroCheck *ool);
     bool visitModOverflowCheck(ModOverflowCheck *ool);
     bool visitReturnZero(ReturnZero *ool);
     bool visitOutOfLineTableSwitch(OutOfLineTableSwitch *ool);
--- a/js/src/jit/shared/Lowering-x86-shared.cpp
+++ b/js/src/jit/shared/Lowering-x86-shared.cpp
@@ -303,16 +303,41 @@ LIRGeneratorX86Shared::visitForkJoinGetS
                           tempFixed(eax),
                           tempFixed(edx),
                           tempFixed(ForkJoinGetSliceReg_temp0),
                           tempFixed(ForkJoinGetSliceReg_temp1));
     return defineFixed(lir, ins, LAllocation(AnyRegister(ForkJoinGetSliceReg_output)));
 }
 
 bool
+LIRGeneratorX86Shared::visitSimdTernaryBitwise(MSimdTernaryBitwise *ins)
+{
+    MOZ_ASSERT(IsSimdType(ins->type()));
+
+    if (ins->type() == MIRType_Int32x4 || ins->type() == MIRType_Float32x4) {
+        LSimdSelect *lins = new(alloc()) LSimdSelect;
+
+        // This must be useRegisterAtStart() because it is destroyed.
+        lins->setOperand(0, useRegisterAtStart(ins->getOperand(0)));
+        // This must be useRegisterAtStart() because it is destroyed.
+        lins->setOperand(1, useRegisterAtStart(ins->getOperand(1)));
+        // This could be useRegister(), but combining it with
+        // useRegisterAtStart() is broken see bug 772830.
+        lins->setOperand(2, useRegisterAtStart(ins->getOperand(2)));
+        // The output is constrained to be in the same register as the second
+        // argument to avoid redundantly copying the result into place. The
+        // register allocator will move the result if necessary.
+        return defineReuseInput(lins, ins, 1);
+    }
+
+    MOZ_CRASH("Unknown SIMD kind when doing bitwise operations");
+    return false;
+}
+
+bool
 LIRGeneratorX86Shared::visitSimdSplatX4(MSimdSplatX4 *ins)
 {
     LAllocation x = useRegisterAtStart(ins->getOperand(0));
     LSimdSplatX4 *lir = new(alloc()) LSimdSplatX4(x);
 
     switch (ins->type()) {
       case MIRType_Int32x4:
         return define(lir, ins);
--- a/js/src/jit/shared/Lowering-x86-shared.h
+++ b/js/src/jit/shared/Lowering-x86-shared.h
@@ -43,15 +43,16 @@ class LIRGeneratorX86Shared : public LIR
     bool lowerUDiv(MDiv *div);
     bool lowerUMod(MMod *mod);
     bool lowerUrshD(MUrsh *mir);
     bool lowerConstantDouble(double d, MInstruction *ins);
     bool lowerConstantFloat32(float d, MInstruction *ins);
     bool lowerTruncateDToInt32(MTruncateToInt32 *ins);
     bool lowerTruncateFToInt32(MTruncateToInt32 *ins);
     bool visitForkJoinGetSlice(MForkJoinGetSlice *ins);
+    bool visitSimdTernaryBitwise(MSimdTernaryBitwise *ins);
     bool visitSimdSplatX4(MSimdSplatX4 *ins);
 };
 
 } // namespace jit
 } // namespace js
 
 #endif /* jit_shared_Lowering_x86_shared_h */
--- a/js/src/jit/shared/MacroAssembler-x86-shared.h
+++ b/js/src/jit/shared/MacroAssembler-x86-shared.h
@@ -467,16 +467,19 @@ class MacroAssemblerX86Shared : public A
         cvtsd2ss(src, dest);
     }
 
     void bitwiseAndX4(const Operand &src, FloatRegister dest) {
         // TODO Using the "ps" variant for all types incurs a domain crossing
         // penalty for integer types and double.
         andps(src, dest);
     }
+    void bitwiseAndNotX4(const Operand &src, FloatRegister dest) {
+        andnps(src, dest);
+    }
     void bitwiseOrX4(const Operand &src, FloatRegister dest) {
         orps(src, dest);
     }
     void bitwiseXorX4(const Operand &src, FloatRegister dest) {
         xorps(src, dest);
     }
 
     void loadAlignedInt32x4(const Address &src, FloatRegister dest) {