Bug 1053788: Implement backends for generating min/max for float32; r=sunfish
authorBenjamin Bouvier <benj@benj.me>
Fri, 19 Sep 2014 14:39:27 +0200
changeset 206196 5f142d472a2686baa78337418fce8f0ded0ab41f
parent 206195 8ac587fb9cdafba6e345b083ee17ec9a5ba44823
child 206197 bad63e9d0b19aaf3bb0c5f3a2dd84b885f55e316
push id49368
push userbenj@benj.me
push dateFri, 19 Sep 2014 12:42:58 +0000
treeherdermozilla-inbound@b1b54c2c8785 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerssunfish
bugs1053788
milestone35.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1053788: Implement backends for generating min/max for float32; r=sunfish
js/src/jit/LIR-Common.h
js/src/jit/LOpcodes.h
js/src/jit/Lowering.cpp
js/src/jit/arm/CodeGenerator-arm.cpp
js/src/jit/arm/CodeGenerator-arm.h
js/src/jit/shared/Assembler-x86-shared.h
js/src/jit/shared/BaseAssembler-x86-shared.h
js/src/jit/shared/CodeGenerator-x86-shared.cpp
js/src/jit/shared/CodeGenerator-x86-shared.h
--- a/js/src/jit/LIR-Common.h
+++ b/js/src/jit/LIR-Common.h
@@ -2694,16 +2694,24 @@ class LMinMaxI : public LMinMaxBase
 class LMinMaxD : public LMinMaxBase
 {
   public:
     LIR_HEADER(MinMaxD)
     LMinMaxD(const LAllocation &first, const LAllocation &second) : LMinMaxBase(first, second)
     {}
 };
 
+class LMinMaxF : public LMinMaxBase
+{
+  public:
+    LIR_HEADER(MinMaxF)
+    LMinMaxF(const LAllocation &first, const LAllocation &second) : LMinMaxBase(first, second)
+    {}
+};
+
 // Negative of an integer
 class LNegI : public LInstructionHelper<1, 1, 0>
 {
   public:
     LIR_HEADER(NegI);
     explicit LNegI(const LAllocation &num) {
         setOperand(0, num);
     }
--- a/js/src/jit/LOpcodes.h
+++ b/js/src/jit/LOpcodes.h
@@ -110,16 +110,17 @@
     _(CompareVM)                    \
     _(BitAndAndBranch)              \
     _(IsNullOrLikeUndefined)        \
     _(IsNullOrLikeUndefinedAndBranch)\
     _(EmulatesUndefined)            \
     _(EmulatesUndefinedAndBranch)   \
     _(MinMaxI)                      \
     _(MinMaxD)                      \
+    _(MinMaxF)                      \
     _(NegI)                         \
     _(NegD)                         \
     _(NegF)                         \
     _(AbsI)                         \
     _(AbsD)                         \
     _(AbsF)                         \
     _(ClzI)                         \
     _(SqrtD)                        \
--- a/js/src/jit/Lowering.cpp
+++ b/js/src/jit/Lowering.cpp
@@ -1283,16 +1283,22 @@ LIRGenerator::visitMinMax(MMinMax *ins)
 
     ReorderCommutative(&first, &second, ins);
 
     if (ins->specialization() == MIRType_Int32) {
         LMinMaxI *lir = new(alloc()) LMinMaxI(useRegisterAtStart(first), useRegisterOrConstant(second));
         return defineReuseInput(lir, ins, 0);
     }
 
+    if (ins->specialization() == MIRType_Float32) {
+        LMinMaxF *lir = new(alloc()) LMinMaxF(useRegisterAtStart(first), useRegister(second));
+        return defineReuseInput(lir, ins, 0);
+    }
+
+    MOZ_ASSERT(ins->specialization() == MIRType_Double);
     LMinMaxD *lir = new(alloc()) LMinMaxD(useRegisterAtStart(first), useRegister(second));
     return defineReuseInput(lir, ins, 0);
 }
 
 bool
 LIRGenerator::visitAbs(MAbs *ins)
 {
     MDefinition *num = ins->input();
--- a/js/src/jit/arm/CodeGenerator-arm.cpp
+++ b/js/src/jit/arm/CodeGenerator-arm.cpp
@@ -286,16 +286,65 @@ CodeGeneratorARM::visitMinMaxD(LMinMaxD 
     masm.bind(&returnSecond);
     masm.ma_vmov(second, output);
 
     masm.bind(&done);
     return true;
 }
 
 bool
+CodeGeneratorARM::visitMinMaxF(LMinMaxF *ins)
+{
+    FloatRegister first = ToFloatRegister(ins->first());
+    FloatRegister second = ToFloatRegister(ins->second());
+    FloatRegister output = ToFloatRegister(ins->output());
+
+    JS_ASSERT(first == output);
+
+    Assembler::Condition cond = ins->mir()->isMax()
+        ? Assembler::VFP_LessThanOrEqual
+        : Assembler::VFP_GreaterThanOrEqual;
+    Label nan, equal, returnSecond, done;
+
+    masm.compareFloat(first, second);
+    // First or second is NaN, result is NaN.
+    masm.ma_b(&nan, Assembler::VFP_Unordered);
+    // Make sure we handle -0 and 0 right.
+    masm.ma_b(&equal, Assembler::VFP_Equal);
+    masm.ma_b(&returnSecond, cond);
+    masm.ma_b(&done);
+
+    // Check for zero.
+    masm.bind(&equal);
+    masm.compareFloat(first, NoVFPRegister);
+    // First wasn't 0 or -0, so just return it.
+    masm.ma_b(&done, Assembler::VFP_NotEqualOrUnordered);
+    // So now both operands are either -0 or 0.
+    if (ins->mir()->isMax()) {
+        // -0 + -0 = -0 and -0 + 0 = 0.
+        masm.ma_vadd_f32(second, first, first);
+    } else {
+        masm.ma_vneg_f32(first, first);
+        masm.ma_vsub_f32(first, second, first);
+        masm.ma_vneg_f32(first, first);
+    }
+    masm.ma_b(&done);
+
+    masm.bind(&nan);
+    masm.loadConstantFloat32(GenericNaN(), output);
+    masm.ma_b(&done);
+
+    masm.bind(&returnSecond);
+    masm.ma_vmov_f32(second, output);
+
+    masm.bind(&done);
+    return true;
+}
+
+bool
 CodeGeneratorARM::visitAbsD(LAbsD *ins)
 {
     FloatRegister input = ToFloatRegister(ins->input());
     JS_ASSERT(input == ToFloatRegister(ins->output()));
     masm.ma_vabs(input, input);
     return true;
 }
 
--- a/js/src/jit/arm/CodeGenerator-arm.h
+++ b/js/src/jit/arm/CodeGenerator-arm.h
@@ -97,16 +97,17 @@ class CodeGeneratorARM : public CodeGene
         emitBranch(cond, ifTrue, ifFalse);
     }
 
     bool emitTableSwitchDispatch(MTableSwitch *mir, Register index, Register base);
 
   public:
     // Instruction visitors.
     virtual bool visitMinMaxD(LMinMaxD *ins);
+    virtual bool visitMinMaxF(LMinMaxF *ins);
     virtual bool visitAbsD(LAbsD *ins);
     virtual bool visitAbsF(LAbsF *ins);
     virtual bool visitSqrtD(LSqrtD *ins);
     virtual bool visitSqrtF(LSqrtF *ins);
     virtual bool visitAddI(LAddI *ins);
     virtual bool visitSubI(LSubI *ins);
     virtual bool visitBitNotI(LBitNotI *ins);
     virtual bool visitBitOpI(LBitOpI *ins);
--- a/js/src/jit/shared/Assembler-x86-shared.h
+++ b/js/src/jit/shared/Assembler-x86-shared.h
@@ -1926,16 +1926,20 @@ class AssemblerX86Shared : public Assemb
     void xorps(FloatRegister src, FloatRegister dest) {
         JS_ASSERT(HasSSE2());
         masm.xorps_rr(src.code(), dest.code());
     }
     void orpd(FloatRegister src, FloatRegister dest) {
         JS_ASSERT(HasSSE2());
         masm.orpd_rr(src.code(), dest.code());
     }
+    void orps(FloatRegister src, FloatRegister dest) {
+        JS_ASSERT(HasSSE2());
+        masm.orps_rr(src.code(), dest.code());
+    }
     void andpd(FloatRegister src, FloatRegister dest) {
         JS_ASSERT(HasSSE2());
         masm.andpd_rr(src.code(), dest.code());
     }
     void andps(FloatRegister src, FloatRegister dest) {
         JS_ASSERT(HasSSE2());
         masm.andps_rr(src.code(), dest.code());
     }
@@ -1971,16 +1975,20 @@ class AssemblerX86Shared : public Assemb
             break;
           case Operand::MEM_REG_DISP:
             masm.minsd_mr(src.disp(), src.base(), dest.code());
             break;
           default:
             MOZ_CRASH("unexpected operand kind");
         }
     }
+    void minss(FloatRegister src, FloatRegister dest) {
+        JS_ASSERT(HasSSE2());
+        masm.minss_rr(src.code(), dest.code());
+    }
     void maxsd(FloatRegister src, FloatRegister dest) {
         JS_ASSERT(HasSSE2());
         masm.maxsd_rr(src.code(), dest.code());
     }
     void maxsd(const Operand &src, FloatRegister dest) {
         JS_ASSERT(HasSSE2());
         switch (src.kind()) {
           case Operand::FPREG:
@@ -1988,16 +1996,20 @@ class AssemblerX86Shared : public Assemb
             break;
           case Operand::MEM_REG_DISP:
             masm.maxsd_mr(src.disp(), src.base(), dest.code());
             break;
           default:
             MOZ_CRASH("unexpected operand kind");
         }
     }
+    void maxss(FloatRegister src, FloatRegister dest) {
+        JS_ASSERT(HasSSE2());
+        masm.maxss_rr(src.code(), dest.code());
+    }
     void fisttp(const Operand &dest) {
         JS_ASSERT(HasSSE3());
         switch (dest.kind()) {
           case Operand::MEM_REG_DISP:
             masm.fisttp_m(dest.disp(), dest.base());
             break;
           default:
             MOZ_CRASH("unexpected operand kind");
--- a/js/src/jit/shared/BaseAssembler-x86-shared.h
+++ b/js/src/jit/shared/BaseAssembler-x86-shared.h
@@ -303,20 +303,22 @@ private:
         OP2_ADDPS_VpsWps    = 0x58,
         OP2_MULSD_VsdWsd    = 0x59,
         OP2_MULPS_VpsWps    = 0x59,
         OP2_CVTSS2SD_VsdEd  = 0x5A,
         OP2_CVTSD2SS_VsdEd  = 0x5A,
         OP2_SUBSD_VsdWsd    = 0x5C,
         OP2_SUBPS_VpsWps    = 0x5C,
         OP2_MINSD_VsdWsd    = 0x5D,
+        OP2_MINSS_VssWss    = 0x5D,
         OP2_MINPS_VpsWps    = 0x5D,
         OP2_DIVSD_VsdWsd    = 0x5E,
         OP2_DIVPS_VpsWps    = 0x5E,
         OP2_MAXSD_VsdWsd    = 0x5F,
+        OP2_MAXSS_VssWss    = 0x5F,
         OP2_MAXPS_VpsWps    = 0x5F,
         OP2_SQRTSD_VsdWsd   = 0x51,
         OP2_SQRTSS_VssWss   = 0x51,
         OP2_ANDPD_VpdWpd    = 0x54,
         OP2_ORPD_VpdWpd     = 0x56,
         OP2_XORPD_VpdWpd    = 0x57,
         OP2_PCMPGTD_VdqWdq  = 0x66,
         OP2_MOVD_VdEd       = 0x6E,
@@ -3634,32 +3636,48 @@ public:
     void minsd_mr(int offset, RegisterID base, XMMRegisterID dst)
     {
         spew("minsd      %s0x%x(%s), %s",
              PRETTY_PRINT_OFFSET(offset), nameIReg(base), nameFPReg(dst));
         m_formatter.prefix(PRE_SSE_F2);
         m_formatter.twoByteOp(OP2_MINSD_VsdWsd, (RegisterID)dst, base, offset);
     }
 
+    void minss_rr(XMMRegisterID src, XMMRegisterID dst)
+    {
+        spew("minss      %s, %s",
+             nameFPReg(src), nameFPReg(dst));
+        m_formatter.prefix(PRE_SSE_F3);
+        m_formatter.twoByteOp(OP2_MINSS_VssWss, (RegisterID)dst, (RegisterID)src);
+    }
+
     void maxsd_rr(XMMRegisterID src, XMMRegisterID dst)
     {
         spew("maxsd      %s, %s",
              nameFPReg(src), nameFPReg(dst));
         m_formatter.prefix(PRE_SSE_F2);
         m_formatter.twoByteOp(OP2_MAXSD_VsdWsd, (RegisterID)dst, (RegisterID)src);
     }
 
     void maxsd_mr(int offset, RegisterID base, XMMRegisterID dst)
     {
         spew("maxsd      %s0x%x(%s), %s",
              PRETTY_PRINT_OFFSET(offset), nameIReg(base), nameFPReg(dst));
         m_formatter.prefix(PRE_SSE_F2);
         m_formatter.twoByteOp(OP2_MAXSD_VsdWsd, (RegisterID)dst, base, offset);
     }
 
+    void maxss_rr(XMMRegisterID src, XMMRegisterID dst)
+    {
+        spew("maxss      %s, %s",
+             nameFPReg(src), nameFPReg(dst));
+        m_formatter.prefix(PRE_SSE_F3);
+        m_formatter.twoByteOp(OP2_MAXSS_VssWss, (RegisterID)dst, (RegisterID)src);
+    }
+
     // Misc instructions:
 
     void int3()
     {
         spew("int3");
         m_formatter.oneByteOp(OP_INT3);
     }
 
--- a/js/src/jit/shared/CodeGenerator-x86-shared.cpp
+++ b/js/src/jit/shared/CodeGenerator-x86-shared.cpp
@@ -532,16 +532,68 @@ CodeGeneratorX86Shared::visitMinMaxD(LMi
     else
         masm.minsd(second, first);
 
     masm.bind(&done);
     return true;
 }
 
 bool
+CodeGeneratorX86Shared::visitMinMaxF(LMinMaxF *ins)
+{
+    FloatRegister first = ToFloatRegister(ins->first());
+    FloatRegister second = ToFloatRegister(ins->second());
+#ifdef DEBUG
+    FloatRegister output = ToFloatRegister(ins->output());
+    JS_ASSERT(first == output);
+#endif
+
+    Label done, nan, minMaxInst;
+
+    // Do a ucomiss to catch equality and NaNs, which both require special
+    // handling. If the operands are ordered and inequal, we branch straight to
+    // the min/max instruction. If we wanted, we could also branch for less-than
+    // or greater-than here instead of using min/max, however these conditions
+    // will sometimes be hard on the branch predictor.
+    masm.ucomiss(first, second);
+    masm.j(Assembler::NotEqual, &minMaxInst);
+    if (!ins->mir()->range() || ins->mir()->range()->canBeNaN())
+        masm.j(Assembler::Parity, &nan);
+
+    // Ordered and equal. The operands are bit-identical unless they are zero
+    // and negative zero. These instructions merge the sign bits in that
+    // case, and are no-ops otherwise.
+    if (ins->mir()->isMax())
+        masm.andps(second, first);
+    else
+        masm.orps(second, first);
+    masm.jump(&done);
+
+    // x86's min/max are not symmetric; if either operand is a NaN, they return
+    // the read-only operand. We need to return a NaN if either operand is a
+    // NaN, so we explicitly check for a NaN in the read-write operand.
+    if (!ins->mir()->range() || ins->mir()->range()->canBeNaN()) {
+        masm.bind(&nan);
+        masm.ucomiss(first, first);
+        masm.j(Assembler::Parity, &done);
+    }
+
+    // When the values are inequal, or second is NaN, x86's min and max will
+    // return the value we need.
+    masm.bind(&minMaxInst);
+    if (ins->mir()->isMax())
+        masm.maxss(second, first);
+    else
+        masm.minss(second, first);
+
+    masm.bind(&done);
+    return true;
+}
+
+bool
 CodeGeneratorX86Shared::visitAbsD(LAbsD *ins)
 {
     FloatRegister input = ToFloatRegister(ins->input());
     JS_ASSERT(input == ToFloatRegister(ins->output()));
     // Load a value which is all ones except for the sign bit.
     masm.loadConstantDouble(SpecificNaN<double>(0, FloatingPoint<double>::kSignificandBits),
                             ScratchDoubleReg);
     masm.andpd(ScratchDoubleReg, input);
--- a/js/src/jit/shared/CodeGenerator-x86-shared.h
+++ b/js/src/jit/shared/CodeGenerator-x86-shared.h
@@ -146,16 +146,17 @@ class CodeGeneratorX86Shared : public Co
   public:
     CodeGeneratorX86Shared(MIRGenerator *gen, LIRGraph *graph, MacroAssembler *masm);
 
   public:
     // Instruction visitors.
     virtual bool visitDouble(LDouble *ins);
     virtual bool visitFloat32(LFloat32 *ins);
     virtual bool visitMinMaxD(LMinMaxD *ins);
+    virtual bool visitMinMaxF(LMinMaxF *ins);
     virtual bool visitAbsD(LAbsD *ins);
     virtual bool visitAbsF(LAbsF *ins);
     virtual bool visitClzI(LClzI *ins);
     virtual bool visitSqrtD(LSqrtD *ins);
     virtual bool visitSqrtF(LSqrtF *ins);
     virtual bool visitPowHalfD(LPowHalfD *ins);
     virtual bool visitAddI(LAddI *ins);
     virtual bool visitSubI(LSubI *ins);