Bug 1289054 - Part 16: Implement the 64bit variant of Clz and Ctz on arm, r=bbouvier
authorHannes Verschore <hv1989@gmail.com>
Fri, 29 Jul 2016 16:53:49 +0200
changeset 347365 973d29183f5f8159b48feebbb114db03a83def87
parent 347364 4c042c13a73a4c9ac17ad682eb86b2d8e21b38b7
child 347366 2f7ac340ea3c3fa7c3c1d1b37340f6d2ed56f4a5
push id6389
push userraliiev@mozilla.com
push dateMon, 19 Sep 2016 13:38:22 +0000
treeherdermozilla-beta@01d67bfe6c81 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersbbouvier
bugs1289054
milestone50.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1289054 - Part 16: Implement the 64bit variant of Clz and Ctz on arm, r=bbouvier
js/src/jit/MacroAssembler.h
js/src/jit/arm/CodeGenerator-arm.cpp
js/src/jit/arm/CodeGenerator-arm.h
js/src/jit/arm/MacroAssembler-arm-inl.h
--- a/js/src/jit/MacroAssembler.h
+++ b/js/src/jit/MacroAssembler.h
@@ -904,18 +904,18 @@ class MacroAssembler : public MacroAssem
 
     // ===============================================================
     // Bit counting functions
 
     // knownNotZero may be true only if the src is known not to be zero.
     inline void clz32(Register src, Register dest, bool knownNotZero) PER_SHARED_ARCH;
     inline void ctz32(Register src, Register dest, bool knownNotZero) PER_SHARED_ARCH;
 
-    inline void clz64(Register64 src, Register dest) DEFINED_ON(x86, x64);
-    inline void ctz64(Register64 src, Register dest) DEFINED_ON(x86, x64);
+    inline void clz64(Register64 src, Register dest) DEFINED_ON(x86, x64, arm);
+    inline void ctz64(Register64 src, Register dest) DEFINED_ON(x86, x64, arm);
 
     // On x86_shared, temp may be Invalid only if the chip has the POPCNT instruction.
     // On ARM, temp may never be Invalid.
     inline void popcnt32(Register src, Register dest, Register temp) DEFINED_ON(arm, x86_shared);
 
     // temp may be invalid only if the chip has the POPCNT instruction.
     inline void popcnt64(Register64 src, Register64 dest, Register temp) DEFINED_ON(x86, x64, arm);
 
--- a/js/src/jit/arm/CodeGenerator-arm.cpp
+++ b/js/src/jit/arm/CodeGenerator-arm.cpp
@@ -977,30 +977,26 @@ CodeGeneratorARM::visitUrshD(LUrshD* ins
 }
 
 void
 CodeGeneratorARM::visitClzI(LClzI* ins)
 {
     Register input = ToRegister(ins->input());
     Register output = ToRegister(ins->output());
 
-    masm.ma_clz(input, output);
+    masm.clz32(input, output, /* knownNotZero = */ false);
 }
 
 void
 CodeGeneratorARM::visitCtzI(LCtzI* ins)
 {
     Register input = ToRegister(ins->input());
     Register output = ToRegister(ins->output());
-    ScratchRegisterScope scratch(masm);
-
-    masm.ma_rsb(input, Imm32(0), scratch, SetCC);
-    masm.ma_and(input, scratch, input);
-    masm.ma_clz(input, output);
-    masm.ma_rsb(input, Imm32(0x1f), output, LeaveCC, Assembler::NotEqual);
+
+    masm.ctz32(input, output, /* knownNotZero = */ false);
 }
 
 void
 CodeGeneratorARM::visitPopcntI(LPopcntI* ins)
 {
     Register input = ToRegister(ins->input());
     Register output = ToRegister(ins->output());
 
@@ -3415,8 +3411,28 @@ CodeGeneratorARM::visitPopcntI64(LPopcnt
 {
     Register64 input = ToRegister64(lir->getInt64Operand(0));
     Register64 output = ToOutRegister64(lir);
     Register temp = ToRegister(lir->getTemp(0));
 
     masm.popcnt64(input, output, temp);
 }
 
+void
+CodeGeneratorARM::visitClzI64(LClzI64* lir)
+{
+    Register64 input = ToRegister64(lir->getInt64Operand(0));
+    Register64 output = ToOutRegister64(lir);
+
+    masm.clz64(input, output.low);
+    masm.move32(Imm32(0), output.high);
+}
+
+void
+CodeGeneratorARM::visitCtzI64(LCtzI64* lir)
+{
+    Register64 input = ToRegister64(lir->getInt64Operand(0));
+    Register64 output = ToOutRegister64(lir);
+
+    masm.ctz64(input, output.low);
+    masm.move32(Imm32(0), output.high);
+}
+
--- a/js/src/jit/arm/CodeGenerator-arm.h
+++ b/js/src/jit/arm/CodeGenerator-arm.h
@@ -169,16 +169,18 @@ class CodeGeneratorARM : public CodeGene
     virtual void visitCompareI64AndBranch(LCompareI64AndBranch* lir);
     virtual void visitBitOpI64(LBitOpI64* lir);
     virtual void visitRotateI64(LRotateI64* lir);
     virtual void visitAsmJSPassStackArgI64(LAsmJSPassStackArgI64* lir);
     virtual void visitAsmSelectI64(LAsmSelectI64* lir);
     virtual void visitAsmReinterpretFromI64(LAsmReinterpretFromI64* lir);
     virtual void visitAsmReinterpretToI64(LAsmReinterpretToI64* lir);
     virtual void visitPopcntI64(LPopcntI64* ins);
+    virtual void visitClzI64(LClzI64* ins);
+    virtual void visitCtzI64(LCtzI64* ins);
 
     // Out of line visitors.
     void visitOutOfLineBailout(OutOfLineBailout* ool);
     void visitOutOfLineTableSwitch(OutOfLineTableSwitch* ool);
 
   protected:
     ValueOperand ToValue(LInstruction* ins, size_t pos);
     ValueOperand ToOutValue(LInstruction* ins);
--- a/js/src/jit/arm/MacroAssembler-arm-inl.h
+++ b/js/src/jit/arm/MacroAssembler-arm-inl.h
@@ -951,22 +951,51 @@ MacroAssembler::rotateRight64(Register s
 
 void
 MacroAssembler::clz32(Register src, Register dest, bool knownNotZero)
 {
     ma_clz(src, dest);
 }
 
 void
+MacroAssembler::clz64(Register64 src, Register dest)
+{
+    ScratchRegisterScope scratch(*this);
+    ma_clz(src.high, scratch);
+    ma_cmp(scratch, Imm32(32));
+    ma_mov(scratch, dest, LeaveCC, NotEqual);
+    ma_clz(src.low, dest, Equal);
+    ma_add(Imm32(32), dest, LeaveCC, Equal);
+}
+
+void
 MacroAssembler::ctz32(Register src, Register dest, bool knownNotZero)
 {
     ma_ctz(src, dest);
 }
 
 void
+MacroAssembler::ctz64(Register64 src, Register dest)
+{
+    Label done, high;
+
+    ma_cmp(src.low, Imm32(0));
+    ma_b(&high, Equal);
+
+    ctz32(src.low, dest, /* knownNotZero = */ true);
+    ma_b(&done);
+
+    bind(&high);
+    ctz32(src.high, dest, /* knownNotZero = */ false);
+    ma_add(Imm32(32), dest);
+
+    bind(&done);
+}
+
+void
 MacroAssembler::popcnt32(Register input,  Register output, Register tmp)
 {
     // Equivalent to GCC output of mozilla::CountPopulation32()
 
     if (input != output)
         ma_mov(input, output);
     as_mov(tmp, asr(output, 1));
     ma_and(Imm32(0x55555555), tmp);