Bug 1289054 - Part 6: Implement the 64bit variant of Mul on arm, r=nbp
authorHannes Verschore <hv1989@gmail.com>
Fri, 29 Jul 2016 16:53:48 +0200
changeset 349425 ea5752e51cafaef6995dea92e8574fb83d24e3a3
parent 349424 e11ffc9f839ccd254455d506e55cffe6bba7a281
child 349426 48ed814c72c6db5913e0eb5f5724035a652e57cc
push id1230
push userjlund@mozilla.com
push dateMon, 31 Oct 2016 18:13:35 +0000
treeherdermozilla-release@5e06e3766db2 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersnbp
bugs1289054
milestone50.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1289054 - Part 6: Implement the 64bit variant of Mul on arm, r=nbp
js/src/jit/MacroAssembler.h
js/src/jit/arm/CodeGenerator-arm.cpp
js/src/jit/arm/CodeGenerator-arm.h
js/src/jit/arm/Lowering-arm.cpp
js/src/jit/arm/MacroAssembler-arm-inl.h
js/src/jit/arm/MacroAssembler-arm.cpp
js/src/jit/arm/MacroAssembler-arm.h
js/src/jit/arm/Simulator-arm.cpp
js/src/jit/x86/Lowering-x86.cpp
--- a/js/src/jit/MacroAssembler.h
+++ b/js/src/jit/MacroAssembler.h
@@ -785,19 +785,20 @@ class MacroAssembler : public MacroAssem
     inline void mul32(Register rhs, Register srcDest) PER_SHARED_ARCH;
 
     inline void mul32(Register src1, Register src2, Register dest, Label* onOver, Label* onZero) DEFINED_ON(arm64);
 
     inline void mul64(const Operand& src, const Register64& dest) DEFINED_ON(x64);
     inline void mul64(const Operand& src, const Register64& dest, const Register temp)
         DEFINED_ON(x64);
     inline void mul64(Imm64 imm, const Register64& dest) PER_ARCH;
-    inline void mul64(Imm64 imm, const Register64& dest, const Register temp) DEFINED_ON(x86, x64);
+    inline void mul64(Imm64 imm, const Register64& dest, const Register temp)
+        DEFINED_ON(x86, x64, arm);
     inline void mul64(const Register64& src, const Register64& dest, const Register temp)
-        DEFINED_ON(x86, x64);
+        DEFINED_ON(x86, x64, arm);
 
     inline void mulBy3(Register src, Register dest) PER_ARCH;
 
     inline void mulFloat32(FloatRegister src, FloatRegister dest) PER_SHARED_ARCH;
     inline void mulDouble(FloatRegister src, FloatRegister dest) PER_SHARED_ARCH;
 
     inline void mulDoublePtr(ImmPtr imm, Register temp, FloatRegister dest) DEFINED_ON(mips_shared, arm, arm64, x86, x64);
 
@@ -819,17 +820,17 @@ class MacroAssembler : public MacroAssem
     inline void divDouble(FloatRegister src, FloatRegister dest) PER_SHARED_ARCH;
 
     inline void inc32(RegisterOrInt32Constant* key);
     inline void inc64(AbsoluteAddress dest) PER_ARCH;
 
     inline void dec32(RegisterOrInt32Constant* key);
 
     inline void neg32(Register reg) PER_SHARED_ARCH;
-    inline void neg64(Register64 reg) DEFINED_ON(x86, x64);
+    inline void neg64(Register64 reg) DEFINED_ON(x86, x64, arm);
 
     inline void negateFloat(FloatRegister reg) PER_SHARED_ARCH;
 
     inline void negateDouble(FloatRegister reg) PER_SHARED_ARCH;
 
     inline void absFloat32(FloatRegister src, FloatRegister dest) PER_SHARED_ARCH;
     inline void absDouble(FloatRegister src, FloatRegister dest) PER_SHARED_ARCH;
 
--- a/js/src/jit/arm/CodeGenerator-arm.cpp
+++ b/js/src/jit/arm/CodeGenerator-arm.cpp
@@ -454,16 +454,57 @@ CodeGeneratorARM::visitMulI(LMulI* ins)
             bailoutIf(Assembler::Signed, ins->snapshot());
 
             masm.bind(&done);
         }
     }
 }
 
 void
+CodeGeneratorARM::visitMulI64(LMulI64* lir)
+{
+    const LInt64Allocation lhs = lir->getInt64Operand(LMulI64::Lhs);
+    const LInt64Allocation rhs = lir->getInt64Operand(LMulI64::Rhs);
+
+    MOZ_ASSERT(ToRegister64(lhs) == ToOutRegister64(lir));
+
+    if (IsConstant(rhs)) {
+        int64_t constant = ToInt64(rhs);
+        switch (constant) {
+          case -1:
+            masm.neg64(ToRegister64(lhs));
+            return;
+          case 0:
+            masm.xor64(ToRegister64(lhs), ToRegister64(lhs));
+            return;
+          case 1:
+            // nop
+            return;
+          case 2:
+            masm.add64(ToRegister64(lhs), ToRegister64(lhs));
+            return;
+          default:
+            if (constant > 0) {
+                // Use shift if constant is power of 2.
+                int32_t shift = mozilla::FloorLog2(constant);
+                if (int64_t(1) << shift == constant) {
+                    masm.lshift64(Imm32(shift), ToRegister64(lhs));
+                    return;
+                }
+            }
+            Register temp = ToTempRegisterOrInvalid(lir->temp());
+            masm.mul64(Imm64(constant), ToRegister64(lhs), temp);
+        }
+    } else {
+        Register temp = ToTempRegisterOrInvalid(lir->temp());
+        masm.mul64(ToOperandOrRegister64(rhs), ToRegister64(lhs), temp);
+    }
+}
+
+void
 CodeGeneratorARM::divICommon(MDiv* mir, Register lhs, Register rhs, Register output,
                              LSnapshot* snapshot, Label& done)
 {
     if (mir->canBeNegativeOverflow()) {
         // Handle INT32_MIN / -1;
         // The integer division will give INT32_MIN, but we want -(double)INT32_MIN.
 
         // Sets EQ if lhs == INT32_MIN.
--- a/js/src/jit/arm/CodeGenerator-arm.h
+++ b/js/src/jit/arm/CodeGenerator-arm.h
@@ -156,16 +156,17 @@ class CodeGeneratorARM : public CodeGene
     virtual void visitRoundF(LRoundF* lir);
     virtual void visitTruncateDToInt32(LTruncateDToInt32* ins);
     virtual void visitTruncateFToInt32(LTruncateFToInt32* ins);
 
     virtual void visitWrapInt64ToInt32(LWrapInt64ToInt32* lir);
     virtual void visitExtendInt32ToInt64(LExtendInt32ToInt64* lir);
     virtual void visitAddI64(LAddI64* lir);
     virtual void visitSubI64(LSubI64* lir);
+    virtual void visitMulI64(LMulI64* lir);
 
     // Out of line visitors.
     void visitOutOfLineBailout(OutOfLineBailout* ool);
     void visitOutOfLineTableSwitch(OutOfLineTableSwitch* ool);
 
   protected:
     ValueOperand ToValue(LInstruction* ins, size_t pos);
     ValueOperand ToOutValue(LInstruction* ins);
--- a/js/src/jit/arm/Lowering-arm.cpp
+++ b/js/src/jit/arm/Lowering-arm.cpp
@@ -200,17 +200,33 @@ LIRGeneratorARM::lowerForALUInt64(LInstr
     ins->setInt64Operand(INT64_PIECES,
                          lhs != rhs ? useInt64OrConstant(rhs) : useInt64OrConstantAtStart(rhs));
     defineInt64ReuseInput(ins, mir, 0);
 }
 
 void
 LIRGeneratorARM::lowerForMulInt64(LMulI64* ins, MMul* mir, MDefinition* lhs, MDefinition* rhs)
 {
-    MOZ_CRASH("NYI");
+    bool constantNeedTemp = true;
+    if (rhs->isConstant()) {
+        int64_t constant = rhs->toConstant()->toInt64();
+        int32_t shift = mozilla::FloorLog2(constant);
+        // See special cases in CodeGeneratorARM::visitMulI64
+        if (constant >= -1 && constant <= 2)
+            constantNeedTemp = false;
+        if (int64_t(1) << shift == constant)
+            constantNeedTemp = false;
+    }
+
+    ins->setInt64Operand(0, useInt64RegisterAtStart(lhs));
+    ins->setInt64Operand(INT64_PIECES,
+                         lhs != rhs ? useInt64OrConstant(rhs) : useInt64OrConstantAtStart(rhs));
+    if (constantNeedTemp)
+        ins->setTemp(0, temp());
+    defineInt64ReuseInput(ins, mir, 0);
 }
 
 void
 LIRGeneratorARM::lowerForFPU(LInstructionHelper<1, 1, 0>* ins, MDefinition* mir, MDefinition* input)
 {
     ins->setOperand(0, useRegisterAtStart(input));
     define(ins, mir, LDefinition(LDefinition::TypeFrom(mir->type()), LDefinition::REGISTER));
 }
--- a/js/src/jit/arm/MacroAssembler-arm-inl.h
+++ b/js/src/jit/arm/MacroAssembler-arm-inl.h
@@ -380,16 +380,53 @@ MacroAssembler::mul64(Imm64 imm, const R
         MOZ_CRASH("Not supported imm");
     as_add(dest.high, dest.high, O2Reg(secondScratchReg_));
 
     // LOW(dest) = low;
     ma_mov(ScratchRegister, dest.low);
 }
 
 void
+MacroAssembler::mul64(Imm64 imm, const Register64& dest, const Register temp)
+{
+    // LOW32  = LOW(LOW(dest) * LOW(src));                                  (1)
+    // HIGH32 = LOW(HIGH(dest) * LOW(src)) [multiply src into upper bits]   (2)
+    //        + LOW(LOW(dest) * HIGH(src)) [multiply dest into upper bits]  (3)
+    //        + HIGH(LOW(dest) * LOW(src)) [carry]                          (4)
+
+    MOZ_ASSERT(temp != dest.high && temp != dest.low);
+
+    // Compute mul64
+    ma_mul(dest.high, imm.low(), dest.high); // (2)
+    ma_mul(dest.low, imm.hi(), temp); // (3)
+    ma_add(dest.high, temp, temp);
+    ma_umull(dest.low, imm.low(), dest.high, dest.low); // (4) + (1)
+    ma_add(temp, dest.high, dest.high);
+}
+
+void
+MacroAssembler::mul64(const Register64& src, const Register64& dest, const Register temp)
+{
+    // LOW32  = LOW(LOW(dest) * LOW(src));                                  (1)
+    // HIGH32 = LOW(HIGH(dest) * LOW(src)) [multiply src into upper bits]   (2)
+    //        + LOW(LOW(dest) * HIGH(src)) [multiply dest into upper bits]  (3)
+    //        + HIGH(LOW(dest) * LOW(src)) [carry]                          (4)
+
+    MOZ_ASSERT(dest != src);
+    MOZ_ASSERT(dest.low != src.high && dest.high != src.low);
+
+    // Compute mul64
+    ma_mul(dest.high, src.low, dest.high); // (2)
+    ma_mul(src.high, dest.low, temp); // (3)
+    ma_add(dest.high, temp, temp);
+    ma_umull(dest.low, src.low, dest.high, dest.low); // (4) + (1)
+    ma_add(temp, dest.high, dest.high);
+}
+
+void
 MacroAssembler::mulBy3(Register src, Register dest)
 {
     as_add(dest, src, lsl(src, 1));
 }
 
 void
 MacroAssembler::mulFloat32(FloatRegister src, FloatRegister dest)
 {
@@ -461,16 +498,23 @@ MacroAssembler::inc64(AbsoluteAddress de
 
 void
 MacroAssembler::neg32(Register reg)
 {
     ma_neg(reg, reg, SetCC);
 }
 
 void
+MacroAssembler::neg64(Register64 reg)
+{
+    ma_rsb(Imm32(0), reg.low, SetCC);
+    ma_rsc(Imm32(0), reg.high);
+}
+
+void
 MacroAssembler::negateDouble(FloatRegister reg)
 {
     ma_vneg(reg, reg);
 }
 
 void
 MacroAssembler::negateFloat(FloatRegister reg)
 {
--- a/js/src/jit/arm/MacroAssembler-arm.cpp
+++ b/js/src/jit/arm/MacroAssembler-arm.cpp
@@ -896,16 +896,30 @@ MacroAssemblerARM::ma_check_mul(Register
         as_cmp(scratch, asr(dest, 31));
         return NotEqual;
     }
 
     MOZ_CRASH("Condition NYI");
 }
 
 void
+MacroAssemblerARM::ma_umull(Register src1, Imm32 imm, Register destHigh, Register destLow)
+{
+    ScratchRegisterScope scratch(asMasm());
+    ma_mov(imm, scratch);
+    as_umull(destHigh, destLow, src1, scratch);
+}
+
+void
+MacroAssemblerARM::ma_umull(Register src1, Register src2, Register destHigh, Register destLow)
+{
+    as_umull(destHigh, destLow, src1, src2);
+}
+
+void
 MacroAssemblerARM::ma_mod_mask(Register src, Register dest, Register hold, Register tmp,
                                int32_t shift)
 {
     // We wish to compute x % (1<<y) - 1 for a known constant, y.
     //
     // 1. Let b = (1<<y) and C = (1<<y)-1, then think of the 32 bit dividend as
     // a number in base b, namely c_0*1 + c_1*b + c_2*b^2 ... c_n*b^n
     //
--- a/js/src/jit/arm/MacroAssembler-arm.h
+++ b/js/src/jit/arm/MacroAssembler-arm.h
@@ -257,16 +257,19 @@ class MacroAssemblerARM : public Assembl
     void ma_tst(Register src1, Operand op, Condition c = Always);
 
     // Multiplies. For now, there are only two that we care about.
     void ma_mul(Register src1, Register src2, Register dest);
     void ma_mul(Register src1, Imm32 imm, Register dest);
     Condition ma_check_mul(Register src1, Register src2, Register dest, Condition cond);
     Condition ma_check_mul(Register src1, Imm32 imm, Register dest, Condition cond);
 
+    void ma_umull(Register src1, Register src2, Register destHigh, Register destLow);
+    void ma_umull(Register src1, Imm32 imm, Register destHigh, Register destLow);
+
     // Fast mod, uses scratch registers, and thus needs to be in the assembler
     // implicitly assumes that we can overwrite dest at the beginning of the
     // sequence.
     void ma_mod_mask(Register src, Register dest, Register hold, Register tmp,
                      int32_t shift);
 
     // Mod - depends on integer divide instructions being supported.
     void ma_smod(Register num, Register div, Register dest);
--- a/js/src/jit/arm/Simulator-arm.cpp
+++ b/js/src/jit/arm/Simulator-arm.cpp
@@ -3093,17 +3093,20 @@ Simulator::decodeType01(SimInstruction* 
             break;
           case OpSbc:
             alu_out = rn_val - shifter_operand - (getCarry() == 0 ? 1 : 0);
             set_register(rd, alu_out);
             if (instr->hasS())
                 MOZ_CRASH();
             break;
           case OpRsc:
-            MOZ_CRASH();
+            alu_out = shifter_operand - rn_val - (getCarry() == 0 ? 1 : 0);
+            set_register(rd, alu_out);
+            if (instr->hasS())
+                MOZ_CRASH();
             break;
           case OpTst:
             if (instr->hasS()) {
                 alu_out = rn_val & shifter_operand;
                 setNZFlags(alu_out);
                 setCFlag(shifter_carry_out);
             } else {
                 alu_out = instr->immedMovwMovtValue();
--- a/js/src/jit/x86/Lowering-x86.cpp
+++ b/js/src/jit/x86/Lowering-x86.cpp
@@ -210,20 +210,21 @@ LIRGeneratorX86::lowerForALUInt64(LInstr
 
 void
 LIRGeneratorX86::lowerForMulInt64(LMulI64* ins, MMul* mir, MDefinition* lhs, MDefinition* rhs)
 {
     bool constantNeedTemp = true;
     if (rhs->isConstant()) {
         int64_t constant = rhs->toConstant()->toInt64();
         int32_t shift = mozilla::FloorLog2(constant);
-        if (constant <= 0 || int64_t(1) << shift != constant) {
-            constantNeedTemp = constant != -1 && constant != 0 &&
-                               constant != 1 && constant != 2;
-        }
+        // See special cases in CodeGeneratorX86Shared::visitMulI64
+        if (constant >= -1 && constant <= 2)
+            constantNeedTemp = false;
+        if (int64_t(1) << shift == constant)
+            constantNeedTemp = false;
     }
 
     // MulI64 on x86 needs output to be in edx, eax;
     ins->setInt64Operand(0, useInt64Fixed(lhs, Register64(edx, eax), /*useAtStart = */ true));
     ins->setInt64Operand(INT64_PIECES,
             lhs != rhs ? useInt64OrConstant(rhs) : useInt64OrConstantAtStart(rhs));
     if (constantNeedTemp)
         ins->setTemp(0, temp());