Bug 1279248 - Part 8: Implement the 64bit variant of Sub on x86, r=lth
authorHannes Verschore <hv1989@gmail.com>
Fri, 29 Jul 2016 16:51:41 +0200
changeset 332363 5512359e559fa64dc8bfb0245766d9ebdd2540fc
parent 332362 e3e8bb6b8d812c9ec3f2003fe1cf0c7e2af8d3f2
child 332364 0c56943e6d0e50426d82f90261d852768363a08a
push id9858
push userjlund@mozilla.com
push dateMon, 01 Aug 2016 14:37:10 +0000
treeherdermozilla-aurora@203106ef6cb6 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerslth
bugs1279248
milestone50.0a1
Bug 1279248 - Part 8: Implement the 64bit variant of Sub on x86, r=lth
js/src/jit/MacroAssembler.h
js/src/jit/shared/LIR-shared.h
js/src/jit/x64/CodeGenerator-x64.cpp
js/src/jit/x64/CodeGenerator-x64.h
js/src/jit/x64/MacroAssembler-x64-inl.h
js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp
js/src/jit/x86-shared/CodeGenerator-x86-shared.h
js/src/jit/x86-shared/Encoding-x86-shared.h
js/src/jit/x86/Assembler-x86.h
js/src/jit/x86/BaseAssembler-x86.h
js/src/jit/x86/MacroAssembler-x86-inl.h
--- a/js/src/jit/MacroAssembler.h
+++ b/js/src/jit/MacroAssembler.h
@@ -766,16 +766,20 @@ class MacroAssembler : public MacroAssem
     inline void sub32(Imm32 imm, Register dest) PER_SHARED_ARCH;
 
     inline void subPtr(Register src, Register dest) PER_ARCH;
     inline void subPtr(Register src, const Address& dest) DEFINED_ON(mips_shared, arm, arm64, x86, x64);
     inline void subPtr(Imm32 imm, Register dest) PER_ARCH;
     inline void subPtr(ImmWord imm, Register dest) DEFINED_ON(x64);
     inline void subPtr(const Address& addr, Register dest) DEFINED_ON(mips_shared, arm, arm64, x86, x64);
 
+    inline void sub64(Register64 src, Register64 dest) DEFINED_ON(x86, x64);
+    inline void sub64(Imm64 imm, Register64 dest) DEFINED_ON(x86, x64);
+    inline void sub64(const Operand& src, Register64 dest) DEFINED_ON(x64);
+
     inline void subFloat32(FloatRegister src, FloatRegister dest) PER_SHARED_ARCH;
 
     inline void subDouble(FloatRegister src, FloatRegister dest) PER_SHARED_ARCH;
 
     // On x86-shared, srcDest must be eax and edx will be clobbered.
     inline void mul32(Register rhs, Register srcDest) PER_SHARED_ARCH;
 
     inline void mul32(Register src1, Register src2, Register dest, Label* onOver, Label* onZero) DEFINED_ON(arm64);
--- a/js/src/jit/shared/LIR-shared.h
+++ b/js/src/jit/shared/LIR-shared.h
@@ -3803,16 +3803,19 @@ class LSubI : public LBinaryMath<0>
         return mir_->toSub();
     }
 };
 
 class LSubI64 : public LInstructionHelper<INT64_PIECES, 2 * INT64_PIECES, 0>
 {
   public:
     LIR_HEADER(SubI64)
+
+    static const size_t Lhs = 0;
+    static const size_t Rhs = INT64_PIECES;
 };
 
 class LMulI64 : public LInstructionHelper<INT64_PIECES, 2 * INT64_PIECES, 0>
 {
   public:
     LIR_HEADER(MulI64)
 };
 
--- a/js/src/jit/x64/CodeGenerator-x64.cpp
+++ b/js/src/jit/x64/CodeGenerator-x64.cpp
@@ -282,30 +282,16 @@ CodeGeneratorX64::visitRotate64(LRotate6
         if (mir->isLeftRotate())
             masm.rolq_cl(input);
         else
             masm.rorq_cl(input);
     }
 }
 
 void
-CodeGeneratorX64::visitSubI64(LSubI64* lir)
-{
-    Register lhs = ToRegister(lir->getOperand(0));
-    const LAllocation* rhs = lir->getOperand(1);
-
-    MOZ_ASSERT(ToRegister(lir->getDef(0)) == lhs);
-
-    if (rhs->isConstant())
-        masm.subPtr(ImmWord(ToInt64(rhs)), lhs);
-    else
-        masm.subq(ToOperand(rhs), lhs);
-}
-
-void
 CodeGeneratorX64::visitMulI64(LMulI64* lir)
 {
     Register lhs = ToRegister(lir->getOperand(0));
     const LAllocation* rhs = lir->getOperand(1);
 
     MOZ_ASSERT(ToRegister(lir->getDef(0)) == lhs);
 
     if (rhs->isConstant()) {
--- a/js/src/jit/x64/CodeGenerator-x64.h
+++ b/js/src/jit/x64/CodeGenerator-x64.h
@@ -48,17 +48,16 @@ class CodeGeneratorX64 : public CodeGene
     void visitUnbox(LUnbox* unbox);
     void visitCompareB(LCompareB* lir);
     void visitCompareBAndBranch(LCompareBAndBranch* lir);
     void visitCompareBitwise(LCompareBitwise* lir);
     void visitCompareBitwiseAndBranch(LCompareBitwiseAndBranch* lir);
     void visitCompareI64(LCompareI64* lir);
     void visitCompareI64AndBranch(LCompareI64AndBranch* lir);
     void visitRotate64(LRotate64* lir);
-    void visitSubI64(LSubI64* lir);
     void visitMulI64(LMulI64* lir);
     void visitDivOrModI64(LDivOrModI64* lir);
     void visitUDivOrMod64(LUDivOrMod64* lir);
     void visitNotI64(LNotI64* lir);
     void visitClzI64(LClzI64* lir);
     void visitCtzI64(LCtzI64* lir);
     void visitPopcntI64(LPopcntI64* lir);
     void visitTruncateDToInt32(LTruncateDToInt32* ins);
--- a/js/src/jit/x64/MacroAssembler-x64-inl.h
+++ b/js/src/jit/x64/MacroAssembler-x64-inl.h
@@ -234,16 +234,34 @@ MacroAssembler::subPtr(ImmWord imm, Regi
 
 void
 MacroAssembler::subPtr(const Address& addr, Register dest)
 {
     subq(Operand(addr), dest);
 }
 
 void
+MacroAssembler::sub64(const Operand& src, Register64 dest)
+{
+    subq(src, dest.reg);
+}
+
+void
+MacroAssembler::sub64(Register64 src, Register64 dest)
+{
+    subq(src.reg, dest.reg);
+}
+
+void
+MacroAssembler::sub64(Imm64 imm, Register64 dest)
+{
+    subPtr(ImmWord(imm.value), dest.reg);
+}
+
+void
 MacroAssembler::mul64(Imm64 imm, const Register64& dest)
 {
     movq(ImmWord(uintptr_t(imm.value)), ScratchReg);
     imulq(ScratchReg, dest.reg);
 }
 
 void
 MacroAssembler::mulBy3(Register src, Register dest)
--- a/js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp
+++ b/js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp
@@ -925,16 +925,32 @@ CodeGeneratorX86Shared::visitSubI(LSubI*
             masm.j(Assembler::Overflow, ool->entry());
         } else {
             bailoutIf(Assembler::Overflow, ins->snapshot());
         }
     }
 }
 
 void
+CodeGeneratorX86Shared::visitSubI64(LSubI64* lir)
+{
+    const LInt64Allocation lhs = lir->getInt64Operand(LSubI64::Lhs);
+    const LInt64Allocation rhs = lir->getInt64Operand(LSubI64::Rhs);
+
+    MOZ_ASSERT(ToOutRegister64(lir) == ToRegister64(lhs));
+
+    if (IsConstant(rhs)) {
+        masm.sub64(Imm64(ToInt64(rhs)), ToRegister64(lhs));
+        return;
+    }
+
+    masm.sub64(ToOperandOrRegister64(rhs), ToRegister64(lhs));
+}
+
+void
 CodeGeneratorX86Shared::visitOutOfLineUndoALUOperation(OutOfLineUndoALUOperation* ool)
 {
     LInstruction* ins = ool->ins();
     Register reg = ToRegister(ins->getDef(0));
 
     DebugOnly<LAllocation*> lhs = ins->getOperand(0);
     LAllocation* rhs = ins->getOperand(1);
 
--- a/js/src/jit/x86-shared/CodeGenerator-x86-shared.h
+++ b/js/src/jit/x86-shared/CodeGenerator-x86-shared.h
@@ -235,16 +235,17 @@ class CodeGeneratorX86Shared : public Co
     virtual void visitCtzI(LCtzI* ins);
     virtual void visitPopcntI(LPopcntI* ins);
     virtual void visitSqrtD(LSqrtD* ins);
     virtual void visitSqrtF(LSqrtF* ins);
     virtual void visitPowHalfD(LPowHalfD* ins);
     virtual void visitAddI(LAddI* ins);
     virtual void visitAddI64(LAddI64* ins);
     virtual void visitSubI(LSubI* ins);
+    virtual void visitSubI64(LSubI64* ins);
     virtual void visitMulI(LMulI* ins);
     virtual void visitDivI(LDivI* ins);
     virtual void visitDivPowTwoI(LDivPowTwoI* ins);
     virtual void visitDivOrModConstantI(LDivOrModConstantI* ins);
     virtual void visitModI(LModI* ins);
     virtual void visitModPowTwoI(LModPowTwoI* ins);
     virtual void visitBitNotI(LBitNotI* ins);
     virtual void visitBitOpI(LBitOpI* ins);
--- a/js/src/jit/x86-shared/Encoding-x86-shared.h
+++ b/js/src/jit/x86-shared/Encoding-x86-shared.h
@@ -59,16 +59,17 @@ enum OneByteOpcodeID {
     OP_ADD_EAXIv                    = 0x05,
     OP_OR_EbGb                      = 0x08,
     OP_OR_EvGv                      = 0x09,
     OP_OR_GvEv                      = 0x0B,
     OP_OR_EAXIv                     = 0x0D,
     OP_2BYTE_ESCAPE                 = 0x0F,
     OP_NOP_0F                       = 0x0F,
     OP_ADC_GvEv                     = 0x13,
+    OP_SBB_GvEv                     = 0x1B,
     OP_NOP_1F                       = 0x1F,
     OP_AND_EbGb                     = 0x20,
     OP_AND_EvGv                     = 0x21,
     OP_AND_GvEv                     = 0x23,
     OP_AND_EAXIv                    = 0x25,
     OP_SUB_EbGb                     = 0x28,
     OP_SUB_EvGv                     = 0x29,
     OP_SUB_GvEv                     = 0x2B,
@@ -342,16 +343,17 @@ inline TwoByteOpcodeID setccOpcode(Condi
 {
     return TwoByteOpcodeID(OP_SETCC + cond);
 }
 
 enum GroupOpcodeID {
     GROUP1_OP_ADD = 0,
     GROUP1_OP_OR  = 1,
     GROUP1_OP_ADC = 2,
+    GROUP1_OP_SBB = 3,
     GROUP1_OP_AND = 4,
     GROUP1_OP_SUB = 5,
     GROUP1_OP_XOR = 6,
     GROUP1_OP_CMP = 7,
 
     GROUP1A_OP_POP = 0,
 
     GROUP2_OP_ROL = 0,
--- a/js/src/jit/x86/Assembler-x86.h
+++ b/js/src/jit/x86/Assembler-x86.h
@@ -398,16 +398,23 @@ class Assembler : public AssemblerX86Sha
 
     void adcl(Imm32 imm, Register dest) {
         masm.adcl_ir(imm.value, dest.encoding());
     }
     void adcl(Register src, Register dest) {
         masm.adcl_rr(src.encoding(), dest.encoding());
     }
 
+    void sbbl(Imm32 imm, Register dest) {
+        masm.sbbl_ir(imm.value, dest.encoding());
+    }
+    void sbbl(Register src, Register dest) {
+        masm.sbbl_rr(src.encoding(), dest.encoding());
+    }
+
     void mull(Register multiplier) {
         masm.mull_r(multiplier.encoding());
     }
 
     void shldl(const Imm32 imm, Register src, Register dest) {
         masm.shldl_irr(imm.value, src.encoding(), dest.encoding());
     }
     void shrdl(const Imm32 imm, Register src, Register dest) {
--- a/js/src/jit/x86/BaseAssembler-x86.h
+++ b/js/src/jit/x86/BaseAssembler-x86.h
@@ -45,16 +45,34 @@ class BaseAssemblerX86 : public BaseAsse
     }
 
     void adcl_rr(RegisterID src, RegisterID dst)
     {
         spew("adcl       %s, %s", GPReg32Name(src), GPReg32Name(dst));
         m_formatter.oneByteOp(OP_ADC_GvEv, src, dst);
     }
 
+    void sbbl_ir(int32_t imm, RegisterID dst)
+    {
+        spew("sbbl       $%d, %s", imm, GPReg32Name(dst));
+        if (CAN_SIGN_EXTEND_8_32(imm)) {
+            m_formatter.oneByteOp(OP_GROUP1_EvIb, dst, GROUP1_OP_SBB);
+            m_formatter.immediate8s(imm);
+        } else {
+            m_formatter.oneByteOp(OP_GROUP1_EvIz, dst, GROUP1_OP_SBB);
+            m_formatter.immediate32(imm);
+        }
+    }
+
+    void sbbl_rr(RegisterID src, RegisterID dst)
+    {
+        spew("sbbl       %s, %s", GPReg32Name(src), GPReg32Name(dst));
+        m_formatter.oneByteOp(OP_SBB_GvEv, src, dst);
+    }
+
     using BaseAssembler::andl_im;
     void andl_im(int32_t imm, const void* addr)
     {
         spew("andl       $0x%x, %p", imm, addr);
         if (CAN_SIGN_EXTEND_8_32(imm)) {
             m_formatter.oneByteOp(OP_GROUP1_EvIb, addr, GROUP1_OP_AND);
             m_formatter.immediate8s(imm);
         } else {
--- a/js/src/jit/x86/MacroAssembler-x86-inl.h
+++ b/js/src/jit/x86/MacroAssembler-x86-inl.h
@@ -210,16 +210,34 @@ MacroAssembler::subPtr(Imm32 imm, Regist
 }
 
 void
 MacroAssembler::subPtr(const Address& addr, Register dest)
 {
     subl(Operand(addr), dest);
 }
 
+void
+MacroAssembler::sub64(Register64 src, Register64 dest)
+{
+    subl(src.low, dest.low);
+    sbbl(src.high, dest.high);
+}
+
+void
+MacroAssembler::sub64(Imm64 imm, Register64 dest)
+{
+    if (imm.low().value == 0) {
+        subl(imm.hi(), dest.high);
+        return;
+    }
+    subl(imm.low(), dest.low);
+    sbbl(imm.hi(), dest.high);
+}
+
 // Note: this function clobbers eax and edx.
 void
 MacroAssembler::mul64(Imm64 imm, const Register64& dest)
 {
     // LOW32  = LOW(LOW(dest) * LOW(imm));
     // HIGH32 = LOW(HIGH(dest) * LOW(imm)) [multiply imm into upper bits]
     //        + LOW(LOW(dest) * HIGH(imm)) [multiply dest into upper bits]
     //        + HIGH(LOW(dest) * LOW(imm)) [carry]