Bug 995230 - Inline Math.clz32. r=jandem
authorTom Schuster <evilpies@gmail.com>
Sat, 23 Aug 2014 12:58:22 +0200
changeset 201215 c9ffa291cfe3597c53b00a37d485c9d730586a89
parent 201214 1ed271ffb59cabf5349ed5f08a5799aff60a34ab
child 201216 9339c5e928e255fcc717d59e115b866341ac2c2b
push id48111
push userevilpies@gmail.com
push dateSat, 23 Aug 2014 10:58:58 +0000
treeherdermozilla-inbound@a67d187584ed [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersjandem
bugs995230
milestone34.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 995230 - Inline Math.clz32. r=jandem
js/src/jit-test/tests/basic/testMathClz32.js
js/src/jit/IonBuilder.h
js/src/jit/LIR-Common.h
js/src/jit/LOpcodes.h
js/src/jit/Lowering.cpp
js/src/jit/Lowering.h
js/src/jit/MCallOptimize.cpp
js/src/jit/MIR.cpp
js/src/jit/MIR.h
js/src/jit/MOpcodes.h
js/src/jit/ParallelSafetyAnalysis.cpp
js/src/jit/RangeAnalysis.cpp
js/src/jit/shared/Assembler-x86-shared.h
js/src/jit/shared/BaseAssembler-x86-shared.h
js/src/jit/shared/CodeGenerator-x86-shared.cpp
js/src/jit/shared/CodeGenerator-x86-shared.h
new file mode 100644
--- /dev/null
+++ b/js/src/jit-test/tests/basic/testMathClz32.js
@@ -0,0 +1,28 @@
+function f() {
+    var x = 0;
+    for (var i = 1; i < 1e6; i++) {
+        if (i > 0)
+            x += Math.clz32(i);
+    }
+    return x;
+}
+
+function g() {
+    var x = 0;
+    for (var i = 1; i < 1e6; i++) {
+        x += Math.clz32(i);
+    }
+    return x;
+}
+
+function h() {
+    var x = 0;
+    for (var i = 0; i < 1e6; i++) {
+        x += Math.clz32(i);
+    }
+    return x;
+}
+
+assertEq(f(), 13048543);
+assertEq(g(), 13048543);
+assertEq(h(), 13048575);
\ No newline at end of file
--- a/js/src/jit/IonBuilder.h
+++ b/js/src/jit/IonBuilder.h
@@ -679,16 +679,17 @@ class IonBuilder : public MIRGenerator
     InliningStatus inlineArrayConcat(CallInfo &callInfo);
     InliningStatus inlineArrayJoin(CallInfo &callInfo);
     InliningStatus inlineArraySplice(CallInfo &callInfo);
 
     // Math natives.
     InliningStatus inlineMathAbs(CallInfo &callInfo);
     InliningStatus inlineMathFloor(CallInfo &callInfo);
     InliningStatus inlineMathCeil(CallInfo &callInfo);
+    InliningStatus inlineMathClz32(CallInfo &callInfo);
     InliningStatus inlineMathRound(CallInfo &callInfo);
     InliningStatus inlineMathSqrt(CallInfo &callInfo);
     InliningStatus inlineMathAtan2(CallInfo &callInfo);
     InliningStatus inlineMathHypot(CallInfo &callInfo);
     InliningStatus inlineMathMinMax(CallInfo &callInfo, bool max);
     InliningStatus inlineMathPow(CallInfo &callInfo);
     InliningStatus inlineMathRandom(CallInfo &callInfo);
     InliningStatus inlineMathImul(CallInfo &callInfo);
--- a/js/src/jit/LIR-Common.h
+++ b/js/src/jit/LIR-Common.h
@@ -2672,16 +2672,30 @@ class LAbsF : public LInstructionHelper<
 {
   public:
     LIR_HEADER(AbsF)
     explicit LAbsF(const LAllocation &num) {
         setOperand(0, num);
     }
 };
 
+// Count leading zeroes
+class LClzI : public LInstructionHelper<1, 1, 0>
+{
+  public:
+    LIR_HEADER(ClzI)
+    LClzI(const LAllocation &num) {
+        setOperand(0, num);
+    }
+
+    MClz *mir() const {
+        return mir_->toClz();
+    }
+};
+
 // Square root of a double.
 class LSqrtD : public LInstructionHelper<1, 1, 0>
 {
   public:
     LIR_HEADER(SqrtD)
     explicit LSqrtD(const LAllocation &num) {
         setOperand(0, num);
     }
--- a/js/src/jit/LOpcodes.h
+++ b/js/src/jit/LOpcodes.h
@@ -110,16 +110,17 @@
     _(MinMaxI)                      \
     _(MinMaxD)                      \
     _(NegI)                         \
     _(NegD)                         \
     _(NegF)                         \
     _(AbsI)                         \
     _(AbsD)                         \
     _(AbsF)                         \
+    _(ClzI)                         \
     _(SqrtD)                        \
     _(SqrtF)                        \
     _(Atan2D)                       \
     _(Hypot)                        \
     _(PowI)                         \
     _(PowD)                         \
     _(Random)                       \
     _(MathFunctionD)                \
--- a/js/src/jit/Lowering.cpp
+++ b/js/src/jit/Lowering.cpp
@@ -1292,16 +1292,25 @@ LIRGenerator::visitAbs(MAbs *ins)
         return defineReuseInput(lir, ins, 0);
     }
 
     LAbsD *lir = new(alloc()) LAbsD(useRegisterAtStart(num));
     return defineReuseInput(lir, ins, 0);
 }
 
 bool
+LIRGenerator::visitClz(MClz *ins)
+{
+    MDefinition *num = ins->num();
+
+    LClzI *lir = new(alloc()) LClzI(useRegisterAtStart(num));
+    return define(lir, ins);
+}
+
+bool
 LIRGenerator::visitSqrt(MSqrt *ins)
 {
     MDefinition *num = ins->input();
     JS_ASSERT(IsFloatingPointType(num->type()));
     if (num->type() == MIRType_Double) {
         LSqrtD *lir = new(alloc()) LSqrtD(useRegisterAtStart(num));
         return define(lir, ins);
     }
--- a/js/src/jit/Lowering.h
+++ b/js/src/jit/Lowering.h
@@ -120,16 +120,17 @@ class LIRGenerator : public LIRGenerator
     bool visitLsh(MLsh *ins);
     bool visitRsh(MRsh *ins);
     bool visitUrsh(MUrsh *ins);
     bool visitFloor(MFloor *ins);
     bool visitCeil(MCeil *ins);
     bool visitRound(MRound *ins);
     bool visitMinMax(MMinMax *ins);
     bool visitAbs(MAbs *ins);
+    bool visitClz(MClz *ins);
     bool visitSqrt(MSqrt *ins);
     bool visitAtan2(MAtan2 *ins);
     bool visitHypot(MHypot *ins);
     bool visitPow(MPow *ins);
     bool visitRandom(MRandom *ins);
     bool visitMathFunction(MMathFunction *ins);
     bool visitAdd(MAdd *ins);
     bool visitSub(MSub *ins);
--- a/js/src/jit/MCallOptimize.cpp
+++ b/js/src/jit/MCallOptimize.cpp
@@ -49,16 +49,18 @@ IonBuilder::inlineNativeCall(CallInfo &c
 
     // Math natives.
     if (native == js::math_abs)
         return inlineMathAbs(callInfo);
     if (native == js::math_floor)
         return inlineMathFloor(callInfo);
     if (native == js::math_ceil)
         return inlineMathCeil(callInfo);
+    if (native == js::math_clz32)
+        return inlineMathClz32(callInfo);
     if (native == js::math_round)
         return inlineMathRound(callInfo);
     if (native == js::math_sqrt)
         return inlineMathSqrt(callInfo);
     if (native == js::math_atan2)
         return inlineMathAtan2(callInfo);
     if (native == js::math_hypot)
         return inlineMathHypot(callInfo);
@@ -787,16 +789,41 @@ IonBuilder::inlineMathCeil(CallInfo &cal
         current->push(ins);
         return InliningStatus_Inlined;
     }
 
     return InliningStatus_NotInlined;
 }
 
 IonBuilder::InliningStatus
+IonBuilder::inlineMathClz32(CallInfo &callInfo)
+{
+    if (callInfo.constructing())
+        return InliningStatus_NotInlined;
+
+    if (callInfo.argc() != 1)
+        return InliningStatus_NotInlined;
+
+    MIRType returnType = getInlineReturnType();
+    if (returnType != MIRType_Int32)
+        return InliningStatus_NotInlined;
+
+    if (!IsNumberType(callInfo.getArg(0)->type()))
+        return InliningStatus_NotInlined;
+
+    callInfo.setImplicitlyUsedUnchecked();
+
+    MClz *ins = MClz::New(alloc(), callInfo.getArg(0));
+    current->add(ins);
+    current->push(ins);
+    return InliningStatus_Inlined;
+
+}
+
+IonBuilder::InliningStatus
 IonBuilder::inlineMathRound(CallInfo &callInfo)
 {
     if (callInfo.constructing())
         return InliningStatus_NotInlined;
 
     if (callInfo.argc() != 1)
         return InliningStatus_NotInlined;
 
--- a/js/src/jit/MIR.cpp
+++ b/js/src/jit/MIR.cpp
@@ -3420,16 +3420,29 @@ MSqrt::trySpecializeFloat32(TempAllocato
         return;
     }
 
     setResultType(MIRType_Float32);
     setPolicyType(MIRType_Float32);
 }
 
 MDefinition *
+MClz::foldsTo(TempAllocator &alloc)
+{
+    if (num()->isConstant()) {
+        int32_t n = num()->toConstant()->value().toInt32();
+        if (n == 0)
+            return MConstant::New(alloc, Int32Value(32));
+        return MConstant::New(alloc, Int32Value(mozilla::CountLeadingZeroes32(n)));
+    }
+
+    return this;
+}
+
+MDefinition *
 MBoundsCheck::foldsTo(TempAllocator &alloc)
 {
     if (index()->isConstant() && length()->isConstant()) {
        uint32_t len = length()->toConstant()->value().toInt32();
        uint32_t idx = index()->toConstant()->value().toInt32();
        if (idx + uint32_t(minimum()) < len && idx + uint32_t(maximum()) < len)
            return index();
     }
--- a/js/src/jit/MIR.h
+++ b/js/src/jit/MIR.h
@@ -4526,16 +4526,60 @@ class MAbs
     bool writeRecoverData(CompactBufferWriter &writer) const;
     bool canRecoverOnBailout() const {
         return true;
     }
 
     ALLOW_CLONE(MAbs)
 };
 
+class MClz
+    : public MUnaryInstruction
+    , public BitwisePolicy
+{
+    bool operandIsNeverZero_;
+
+    MClz(MDefinition *num)
+      : MUnaryInstruction(num),
+        operandIsNeverZero_(false)
+    {
+        JS_ASSERT(IsNumberType(num->type()));
+        specialization_ = MIRType_Int32;
+        setResultType(MIRType_Int32);
+        setMovable();
+    }
+
+  public:
+    INSTRUCTION_HEADER(Clz)
+    static MClz *New(TempAllocator &alloc, MDefinition *num) {
+        return new(alloc) MClz(num);
+    }
+    MDefinition *num() const {
+        return getOperand(0);
+    }
+    TypePolicy *typePolicy() {
+        return this;
+    }
+    bool congruentTo(const MDefinition *ins) const {
+        return congruentIfOperandsEqual(ins);
+    }
+
+    AliasSet getAliasSet() const {
+        return AliasSet::None();
+    }
+
+    bool operandIsNeverZero() const {
+        return operandIsNeverZero_;
+    }
+
+    MDefinition *foldsTo(TempAllocator &alloc);
+    void computeRange(TempAllocator &alloc);
+    void collectRangeInfoPreTrunc();
+};
+
 // Inline implementation of Math.sqrt().
 class MSqrt
   : public MUnaryInstruction,
     public FloatingPointPolicy<0>
 {
     MSqrt(MDefinition *num, MIRType type)
       : MUnaryInstruction(num)
     {
--- a/js/src/jit/MOpcodes.h
+++ b/js/src/jit/MOpcodes.h
@@ -58,16 +58,17 @@ namespace jit {
     _(BitAnd)                                                               \
     _(BitOr)                                                                \
     _(BitXor)                                                               \
     _(Lsh)                                                                  \
     _(Rsh)                                                                  \
     _(Ursh)                                                                 \
     _(MinMax)                                                               \
     _(Abs)                                                                  \
+    _(Clz)                                                                  \
     _(Sqrt)                                                                 \
     _(Atan2)                                                                \
     _(Hypot)                                                                \
     _(Pow)                                                                  \
     _(PowHalf)                                                              \
     _(Random)                                                               \
     _(MathFunction)                                                         \
     _(Add)                                                                  \
--- a/js/src/jit/ParallelSafetyAnalysis.cpp
+++ b/js/src/jit/ParallelSafetyAnalysis.cpp
@@ -156,16 +156,17 @@ class ParallelSafetyVisitor : public MDe
     SAFE_OP(BitAnd)
     SAFE_OP(BitOr)
     SAFE_OP(BitXor)
     SAFE_OP(Lsh)
     SAFE_OP(Rsh)
     SAFE_OP(Ursh)
     SPECIALIZED_OP(MinMax, PERMIT_NUMERIC)
     SAFE_OP(Abs)
+    SAFE_OP(Clz)
     SAFE_OP(Sqrt)
     UNSAFE_OP(Atan2)
     UNSAFE_OP(Hypot)
     CUSTOM_OP(MathFunction)
     SPECIALIZED_OP(Add, PERMIT_NUMERIC)
     SPECIALIZED_OP(Sub, PERMIT_NUMERIC)
     SPECIALIZED_OP(Mul, PERMIT_NUMERIC)
     SPECIALIZED_OP(Div, PERMIT_NUMERIC)
--- a/js/src/jit/RangeAnalysis.cpp
+++ b/js/src/jit/RangeAnalysis.cpp
@@ -1224,16 +1224,22 @@ MFloor::computeRange(TempAllocator &allo
 void
 MCeil::computeRange(TempAllocator &alloc)
 {
     Range other(getOperand(0));
     setRange(Range::ceil(alloc, &other));
 }
 
 void
+MClz::computeRange(TempAllocator &alloc)
+{
+    setRange(Range::NewUInt32Range(alloc, 0, 32));
+}
+
+void
 MMinMax::computeRange(TempAllocator &alloc)
 {
     if (specialization_ != MIRType_Int32 && specialization_ != MIRType_Double)
         return;
 
     Range left(getOperand(0));
     Range right(getOperand(1));
     setRange(isMax() ? Range::max(alloc, &left, &right) : Range::min(alloc, &left, &right));
@@ -2663,16 +2669,24 @@ void
 MLoadElementHole::collectRangeInfoPreTrunc()
 {
     Range indexRange(index());
     if (indexRange.isFiniteNonNegative())
         needsNegativeIntCheck_ = false;
 }
 
 void
+MClz::collectRangeInfoPreTrunc()
+{
+    Range inputRange(input());
+    if (!inputRange.canBeZero())
+        operandIsNeverZero_ = true;
+}
+
+void
 MDiv::collectRangeInfoPreTrunc()
 {
     Range lhsRange(lhs());
     Range rhsRange(rhs());
 
     // Test if Dividend is non-negative.
     if (lhsRange.isFiniteNonNegative())
         canBeNegativeDividend_ = false;
--- a/js/src/jit/shared/Assembler-x86-shared.h
+++ b/js/src/jit/shared/Assembler-x86-shared.h
@@ -1155,16 +1155,19 @@ class AssemblerX86Shared : public Assemb
             break;
           case Operand::MEM_REG_DISP:
             masm.andl_mr(src.disp(), src.base(), dest.code());
             break;
           default:
             MOZ_CRASH("unexpected operand kind");
         }
     }
+    void bsr(const Register &src, const Register &dest) {
+        masm.bsr_rr(src.code(), dest.code());
+    }
     void imull(Register multiplier) {
         masm.imull_r(multiplier.code());
     }
     void imull(Imm32 imm, Register dest) {
         masm.imull_i32r(dest.code(), imm.value, dest.code());
     }
     void imull(Register src, Register dest) {
         masm.imull_rr(src.code(), dest.code());
--- a/js/src/jit/shared/BaseAssembler-x86-shared.h
+++ b/js/src/jit/shared/BaseAssembler-x86-shared.h
@@ -319,16 +319,17 @@ private:
         OP2_PSRLDQ_Vd       = 0x73,
         OP2_PCMPEQW         = 0x75,
         OP2_MOVD_EdVd       = 0x7E,
         OP2_MOVDQ_WdqVdq    = 0x7F,
         OP2_JCC_rel32       = 0x80,
         OP_SETCC            = 0x90,
         OP2_IMUL_GvEv       = 0xAF,
         OP2_CMPXCHG_GvEw    = 0xB1,
+        OP2_BSR_GvEv        = 0xBD,
         OP2_MOVSX_GvEb      = 0xBE,
         OP2_MOVSX_GvEw      = 0xBF,
         OP2_MOVZX_GvEb      = 0xB6,
         OP2_MOVZX_GvEw      = 0xB7,
         OP2_XADD_EvGv       = 0xC1,
         OP2_PEXTRW_GdUdIb   = 0xC5,
         OP2_SHUFPS_VpsWpsIb = 0xC6,
         OP2_PXORDQ_VdqWdq   = 0xEF,
@@ -1296,16 +1297,22 @@ public:
             m_formatter.oneByteOp64(OP_GROUP2_Ev1, GROUP2_OP_SHR, dst);
         else {
             m_formatter.oneByteOp64(OP_GROUP2_EvIb, GROUP2_OP_SHR, dst);
             m_formatter.immediate8(imm);
         }
     }
 #endif
 
+    void bsr_rr(RegisterID src, RegisterID dst)
+    {
+        spew("bsr        %s, %s", nameIReg(4, src), nameIReg(4, dst));
+        m_formatter.twoByteOp(OP2_BSR_GvEv, dst, src);
+    }
+
     void imull_rr(RegisterID src, RegisterID dst)
     {
         spew("imull      %s, %s", nameIReg(4,src), nameIReg(4, dst));
         m_formatter.twoByteOp(OP2_IMUL_GvEv, dst, src);
     }
 
     void imull_r(RegisterID multiplier)
     {
--- a/js/src/jit/shared/CodeGenerator-x86-shared.cpp
+++ b/js/src/jit/shared/CodeGenerator-x86-shared.cpp
@@ -540,16 +540,38 @@ CodeGeneratorX86Shared::visitAbsF(LAbsF 
     // Same trick as visitAbsD above.
     masm.loadConstantFloat32(SpecificNaN<float>(0, FloatingPoint<float>::kSignificandBits),
                              ScratchFloat32Reg);
     masm.andps(ScratchFloat32Reg, input);
     return true;
 }
 
 bool
+CodeGeneratorX86Shared::visitClzI(LClzI *ins)
+{
+    Register input = ToRegister(ins->input());
+    Register output = ToRegister(ins->output());
+
+    // bsr is undefined on 0
+    Label done, nonzero;
+    if (!ins->mir()->operandIsNeverZero()) {
+        masm.testl(input, input);
+        masm.j(Assembler::NonZero, &nonzero);
+        masm.move32(Imm32(32), output);
+        masm.jump(&done);
+    }
+
+    masm.bind(&nonzero);
+    masm.bsr(input, output);
+    masm.xor32(Imm32(0x1F), output);
+    masm.bind(&done);
+    return true;
+}
+
+bool
 CodeGeneratorX86Shared::visitSqrtD(LSqrtD *ins)
 {
     FloatRegister input = ToFloatRegister(ins->input());
     FloatRegister output = ToFloatRegister(ins->output());
     masm.sqrtsd(input, output);
     return true;
 }
 
--- a/js/src/jit/shared/CodeGenerator-x86-shared.h
+++ b/js/src/jit/shared/CodeGenerator-x86-shared.h
@@ -148,16 +148,17 @@ class CodeGeneratorX86Shared : public Co
 
   public:
     // Instruction visitors.
     virtual bool visitDouble(LDouble *ins);
     virtual bool visitFloat32(LFloat32 *ins);
     virtual bool visitMinMaxD(LMinMaxD *ins);
     virtual bool visitAbsD(LAbsD *ins);
     virtual bool visitAbsF(LAbsF *ins);
+    virtual bool visitClzI(LClzI *ins);
     virtual bool visitSqrtD(LSqrtD *ins);
     virtual bool visitSqrtF(LSqrtF *ins);
     virtual bool visitPowHalfD(LPowHalfD *ins);
     virtual bool visitAddI(LAddI *ins);
     virtual bool visitSubI(LSubI *ins);
     virtual bool visitMulI(LMulI *ins);
     virtual bool visitDivI(LDivI *ins);
     virtual bool visitDivPowTwoI(LDivPowTwoI *ins);