Bug 1279248 - Part 5: Implement the 64bit variant of Shift on x86, r=bbouvier
authorHannes Verschore <hv1989@gmail.com>
Fri, 29 Jul 2016 16:51:41 +0200
changeset 349396 27826b22e1406ba39e5bc6f11546f411b7ad7324
parent 349395 8bc3e8bbbf524ea9888df6c341db15c2df62e555
child 349397 02f604c9ad7330732c13792141aa24dc5f0c4d92
push id1230
push userjlund@mozilla.com
push dateMon, 31 Oct 2016 18:13:35 +0000
treeherdermozilla-release@5e06e3766db2 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersbbouvier
bugs1279248
milestone50.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1279248 - Part 5: Implement the 64bit variant of Shift on x86, r=bbouvier
js/src/jit/MacroAssembler.h
js/src/jit/shared/LIR-shared.h
js/src/jit/x64/CodeGenerator-x64.cpp
js/src/jit/x64/CodeGenerator-x64.h
js/src/jit/x64/MacroAssembler-x64-inl.h
js/src/jit/x86-shared/Assembler-x86-shared.h
js/src/jit/x86-shared/BaseAssembler-x86-shared.h
js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp
js/src/jit/x86-shared/CodeGenerator-x86-shared.h
js/src/jit/x86-shared/Encoding-x86-shared.h
js/src/jit/x86/MacroAssembler-x86-inl.h
--- a/js/src/jit/MacroAssembler.h
+++ b/js/src/jit/MacroAssembler.h
@@ -831,35 +831,37 @@ class MacroAssembler : public MacroAssem
     // For shift-by-register there may be platform-specific
     // variations, for example, x86 will perform the shift mod 32 but
     // ARM will perform the shift mod 256.
     //
     // For shift-by-immediate the platform assembler may restrict the
     // immediate, for example, the ARM assembler requires the count
     // for 32-bit shifts to be in the range [0,31].
 
+    inline void lshift32(Imm32 shift, Register srcDest) PER_SHARED_ARCH;
+    inline void rshift32(Imm32 shift, Register srcDest) PER_SHARED_ARCH;
+    inline void rshift32Arithmetic(Imm32 shift, Register srcDest) PER_SHARED_ARCH;
+
     inline void lshiftPtr(Imm32 imm, Register dest) PER_ARCH;
+    inline void rshiftPtr(Imm32 imm, Register dest) PER_ARCH;
+    inline void rshiftPtr(Imm32 imm, Register src, Register dest) DEFINED_ON(arm64);
+    inline void rshiftPtrArithmetic(Imm32 imm, Register dest) PER_ARCH;
 
     inline void lshift64(Imm32 imm, Register64 dest) PER_ARCH;
-
-    inline void rshiftPtr(Imm32 imm, Register dest) PER_ARCH;
-    inline void rshiftPtr(Imm32 imm, Register src, Register dest) DEFINED_ON(arm64);
-
-    inline void rshiftPtrArithmetic(Imm32 imm, Register dest) PER_ARCH;
-
     inline void rshift64(Imm32 imm, Register64 dest) PER_ARCH;
+    inline void rshift64Arithmetic(Imm32 imm, Register64 dest) DEFINED_ON(x86, x64);
 
     // On x86_shared these have the constraint that shift must be in CL.
     inline void lshift32(Register shift, Register srcDest) PER_SHARED_ARCH;
     inline void rshift32(Register shift, Register srcDest) PER_SHARED_ARCH;
     inline void rshift32Arithmetic(Register shift, Register srcDest) PER_SHARED_ARCH;
 
-    inline void lshift32(Imm32 shift, Register srcDest) PER_SHARED_ARCH;
-    inline void rshift32(Imm32 shift, Register srcDest) PER_SHARED_ARCH;
-    inline void rshift32Arithmetic(Imm32 shift, Register srcDest) PER_SHARED_ARCH;
+    inline void lshift64(Register shift, Register64 srcDest) DEFINED_ON(x86, x64);
+    inline void rshift64(Register shift, Register64 srcDest) DEFINED_ON(x86, x64);
+    inline void rshift64Arithmetic(Register shift, Register64 srcDest) DEFINED_ON(x86, x64);
 
     // ===============================================================
     // Rotation functions
     inline void rotateLeft(Imm32 count, Register input, Register dest) PER_SHARED_ARCH;
     inline void rotateLeft(Register count, Register input, Register dest) PER_SHARED_ARCH;
     inline void rotateRight(Imm32 count, Register input, Register dest) PER_SHARED_ARCH;
     inline void rotateRight(Register count, Register input, Register dest) PER_SHARED_ARCH;
 
--- a/js/src/jit/shared/LIR-shared.h
+++ b/js/src/jit/shared/LIR-shared.h
@@ -3285,16 +3285,19 @@ class LShiftI64 : public LInstructionHel
 
   public:
     LIR_HEADER(ShiftI64)
 
     explicit LShiftI64(JSOp op)
       : op_(op)
     { }
 
+    static const size_t Lhs = 0;
+    static const size_t Rhs = INT64_PIECES;
+
     JSOp bitop() {
         return op_;
     }
 
     MInstruction* mir() {
         return mir_->toInstruction();
     }
 
--- a/js/src/jit/x64/CodeGenerator-x64.cpp
+++ b/js/src/jit/x64/CodeGenerator-x64.cpp
@@ -288,58 +288,16 @@ CodeGeneratorX64::visitBitOpI64(LBitOpI6
             masm.andq(ToOperand(rhs), lhs);
         break;
       default:
         MOZ_CRASH("unexpected binary opcode");
     }
 }
 
 void
-CodeGeneratorX64::visitShiftI64(LShiftI64* lir)
-{
-    Register lhs = ToRegister(lir->getOperand(0));
-    const LAllocation* rhs = lir->getOperand(1);
-
-    if (rhs->isConstant()) {
-        int32_t shift = int32_t(ToInt64(rhs) & 0x3F);
-        switch (lir->bitop()) {
-          case JSOP_LSH:
-            if (shift)
-                masm.shlq(Imm32(shift), lhs);
-            break;
-          case JSOP_RSH:
-            if (shift)
-                masm.sarq(Imm32(shift), lhs);
-            break;
-          case JSOP_URSH:
-            if (shift)
-                masm.shrq(Imm32(shift), lhs);
-            break;
-          default:
-            MOZ_CRASH("Unexpected shift op");
-        }
-    } else {
-        MOZ_ASSERT(ToRegister(rhs) == ecx);
-        switch (lir->bitop()) {
-          case JSOP_LSH:
-            masm.shlq_cl(lhs);
-            break;
-          case JSOP_RSH:
-            masm.sarq_cl(lhs);
-            break;
-          case JSOP_URSH:
-            masm.shrq_cl(lhs);
-            break;
-          default:
-            MOZ_CRASH("Unexpected shift op");
-        }
-    }
-}
-
-void
 CodeGeneratorX64::visitRotate64(LRotate64* lir)
 {
     MRotate* mir = lir->mir();
     Register input = ToRegister(lir->input());
     const LAllocation* count = lir->count();
 
     if (count->isConstant()) {
         int32_t c = int32_t(ToInt64(count) & 0x3F);
--- a/js/src/jit/x64/CodeGenerator-x64.h
+++ b/js/src/jit/x64/CodeGenerator-x64.h
@@ -48,17 +48,16 @@ class CodeGeneratorX64 : public CodeGene
     void visitUnbox(LUnbox* unbox);
     void visitCompareB(LCompareB* lir);
     void visitCompareBAndBranch(LCompareBAndBranch* lir);
     void visitCompareBitwise(LCompareBitwise* lir);
     void visitCompareBitwiseAndBranch(LCompareBitwiseAndBranch* lir);
     void visitCompareI64(LCompareI64* lir);
     void visitCompareI64AndBranch(LCompareI64AndBranch* lir);
     void visitBitOpI64(LBitOpI64* lir);
-    void visitShiftI64(LShiftI64* lir);
     void visitRotate64(LRotate64* lir);
     void visitAddI64(LAddI64* lir);
     void visitSubI64(LSubI64* lir);
     void visitMulI64(LMulI64* lir);
     void visitDivOrModI64(LDivOrModI64* lir);
     void visitUDivOrMod64(LUDivOrMod64* lir);
     void visitNotI64(LNotI64* lir);
     void visitClzI64(LClzI64* lir);
--- a/js/src/jit/x64/MacroAssembler-x64-inl.h
+++ b/js/src/jit/x64/MacroAssembler-x64-inl.h
@@ -247,35 +247,62 @@ void
 MacroAssembler::lshiftPtr(Imm32 imm, Register dest)
 {
     shlq(imm, dest);
 }
 
 void
 MacroAssembler::lshift64(Imm32 imm, Register64 dest)
 {
-    shlq(imm, dest.reg);
+    lshiftPtr(imm, dest.reg);
+}
+
+void
+MacroAssembler::lshift64(Register shift, Register64 srcDest)
+{
+    MOZ_ASSERT(shift == rcx);
+    shlq_cl(srcDest.reg);
 }
 
 void
 MacroAssembler::rshiftPtr(Imm32 imm, Register dest)
 {
     shrq(imm, dest);
 }
 
 void
+MacroAssembler::rshift64(Imm32 imm, Register64 dest)
+{
+    rshiftPtr(imm, dest.reg);
+}
+
+void
+MacroAssembler::rshift64(Register shift, Register64 srcDest)
+{
+    MOZ_ASSERT(shift == rcx);
+    shrq_cl(srcDest.reg);
+}
+
+void
 MacroAssembler::rshiftPtrArithmetic(Imm32 imm, Register dest)
 {
     sarq(imm, dest);
 }
 
 void
-MacroAssembler::rshift64(Imm32 imm, Register64 dest)
+MacroAssembler::rshift64Arithmetic(Imm32 imm, Register64 dest)
 {
-    shrq(imm, dest.reg);
+    rshiftPtrArithmetic(imm, dest.reg);
+}
+
+void
+MacroAssembler::rshift64Arithmetic(Register shift, Register64 srcDest)
+{
+    MOZ_ASSERT(shift == rcx);
+    sarq_cl(srcDest.reg);
 }
 
 // ===============================================================
 // Bit counting functions
 
 void
 MacroAssembler::clz64(Register64 src, Register64 dest)
 {
--- a/js/src/jit/x86-shared/Assembler-x86-shared.h
+++ b/js/src/jit/x86-shared/Assembler-x86-shared.h
@@ -1697,16 +1697,22 @@ class AssemblerX86Shared : public Assemb
         masm.shrl_CLr(dest.encoding());
     }
     void shll_cl(Register dest) {
         masm.shll_CLr(dest.encoding());
     }
     void sarl_cl(Register dest) {
         masm.sarl_CLr(dest.encoding());
     }
+    void shrdl_cl(Register src, Register dest) {
+        masm.shrdl_CLr(src.encoding(), dest.encoding());
+    }
+    void shldl_cl(Register src, Register dest) {
+        masm.shldl_CLr(src.encoding(), dest.encoding());
+    }
 
     void roll(const Imm32 imm, Register dest) {
         masm.roll_ir(imm.value, dest.encoding());
     }
     void roll_cl(Register dest) {
         masm.roll_CLr(dest.encoding());
     }
     void rorl(const Imm32 imm, Register dest) {
--- a/js/src/jit/x86-shared/BaseAssembler-x86-shared.h
+++ b/js/src/jit/x86-shared/BaseAssembler-x86-shared.h
@@ -1459,16 +1459,28 @@ public:
     }
 
     void shrl_CLr(RegisterID dst)
     {
         spew("shrl       %%cl, %s", GPReg32Name(dst));
         m_formatter.oneByteOp(OP_GROUP2_EvCL, dst, GROUP2_OP_SHR);
     }
 
+    void shrdl_CLr(RegisterID src, RegisterID dst)
+    {
+        spew("shrdl      %%cl, %s, %s", GPReg32Name(src), GPReg32Name(dst));
+        m_formatter.twoByteOp(OP2_SHRD_GvEv, dst, src);
+    }
+
+    void shldl_CLr(RegisterID src, RegisterID dst)
+    {
+        spew("shldl      %%cl, %s, %s", GPReg32Name(src), GPReg32Name(dst));
+        m_formatter.twoByteOp(OP2_SHLD_GvEv, dst, src);
+    }
+
     void shll_ir(int32_t imm, RegisterID dst)
     {
         MOZ_ASSERT(imm < 32);
         spew("shll       $%d, %s", imm, GPReg32Name(dst));
         if (imm == 1)
             m_formatter.oneByteOp(OP_GROUP2_Ev1, dst, GROUP2_OP_SHL);
         else {
             m_formatter.oneByteOp(OP_GROUP2_EvIb, dst, GROUP2_OP_SHL);
--- a/js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp
+++ b/js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp
@@ -1714,16 +1714,61 @@ CodeGeneratorX86Shared::visitShiftI(LShi
             break;
           default:
             MOZ_CRASH("Unexpected shift op");
         }
     }
 }
 
 void
+CodeGeneratorX86Shared::visitShiftI64(LShiftI64* lir)
+{
+    const LInt64Allocation lhs = lir->getInt64Operand(LShiftI64::Lhs);
+    LAllocation* rhs = lir->getOperand(LShiftI64::Rhs);
+
+    MOZ_ASSERT(ToOutRegister64(lir) == ToRegister64(lhs));
+
+    if (rhs->isConstant()) {
+        int32_t shift = int32_t(rhs->toConstant()->toInt64() & 0x3F);
+        switch (lir->bitop()) {
+          case JSOP_LSH:
+            if (shift)
+                masm.lshift64(Imm32(shift), ToRegister64(lhs));
+            break;
+          case JSOP_RSH:
+            if (shift)
+                masm.rshift64Arithmetic(Imm32(shift), ToRegister64(lhs));
+            break;
+          case JSOP_URSH:
+            if (shift)
+                masm.rshift64(Imm32(shift), ToRegister64(lhs));
+            break;
+          default:
+            MOZ_CRASH("Unexpected shift op");
+        }
+        return;
+    }
+
+    MOZ_ASSERT(ToRegister(rhs) == ecx);
+    switch (lir->bitop()) {
+      case JSOP_LSH:
+        masm.lshift64(ecx, ToRegister64(lhs));
+        break;
+      case JSOP_RSH:
+        masm.rshift64Arithmetic(ecx, ToRegister64(lhs));
+        break;
+      case JSOP_URSH:
+        masm.rshift64(ecx, ToRegister64(lhs));
+        break;
+      default:
+        MOZ_CRASH("Unexpected shift op");
+    }
+}
+
+void
 CodeGeneratorX86Shared::visitUrshD(LUrshD* ins)
 {
     Register lhs = ToRegister(ins->lhs());
     MOZ_ASSERT(ToRegister(ins->temp()) == lhs);
 
     const LAllocation* rhs = ins->rhs();
     FloatRegister out = ToFloatRegister(ins->output());
 
--- a/js/src/jit/x86-shared/CodeGenerator-x86-shared.h
+++ b/js/src/jit/x86-shared/CodeGenerator-x86-shared.h
@@ -243,16 +243,17 @@ class CodeGeneratorX86Shared : public Co
     virtual void visitDivI(LDivI* ins);
     virtual void visitDivPowTwoI(LDivPowTwoI* ins);
     virtual void visitDivOrModConstantI(LDivOrModConstantI* ins);
     virtual void visitModI(LModI* ins);
     virtual void visitModPowTwoI(LModPowTwoI* ins);
     virtual void visitBitNotI(LBitNotI* ins);
     virtual void visitBitOpI(LBitOpI* ins);
     virtual void visitShiftI(LShiftI* ins);
+    virtual void visitShiftI64(LShiftI64* ins);
     virtual void visitUrshD(LUrshD* ins);
     virtual void visitTestIAndBranch(LTestIAndBranch* test);
     virtual void visitTestDAndBranch(LTestDAndBranch* test);
     virtual void visitTestFAndBranch(LTestFAndBranch* test);
     virtual void visitCompare(LCompare* comp);
     virtual void visitCompareAndBranch(LCompareAndBranch* comp);
     virtual void visitCompareD(LCompareD* comp);
     virtual void visitCompareDAndBranch(LCompareDAndBranch* comp);
--- a/js/src/jit/x86-shared/Encoding-x86-shared.h
+++ b/js/src/jit/x86-shared/Encoding-x86-shared.h
@@ -233,17 +233,19 @@ enum TwoByteOpcodeID {
     OP2_PCMPEQD_VdqWdq  = 0x76,
     OP2_HADDPD          = 0x7C,
     OP2_MOVD_EdVd       = 0x7E,
     OP2_MOVQ_VdWd       = 0x7E,
     OP2_MOVDQ_WdqVdq    = 0x7F,
     OP2_JCC_rel32       = 0x80,
     OP_SETCC            = 0x90,
     OP2_SHLD            = 0xA4,
+    OP2_SHLD_GvEv       = 0xA5,
     OP2_SHRD            = 0xAC,
+    OP2_SHRD_GvEv       = 0xAD,
     OP_FENCE            = 0xAE,
     OP2_IMUL_GvEv       = 0xAF,
     OP2_CMPXCHG_GvEb    = 0xB0,
     OP2_CMPXCHG_GvEw    = 0xB1,
     OP2_POPCNT_GvEv     = 0xB8,
     OP2_BSF_GvEv        = 0xBC,
     OP2_BSR_GvEv        = 0xBD,
     OP2_MOVSX_GvEb      = 0xBE,
--- a/js/src/jit/x86/MacroAssembler-x86-inl.h
+++ b/js/src/jit/x86/MacroAssembler-x86-inl.h
@@ -256,37 +256,128 @@ void
 MacroAssembler::lshiftPtr(Imm32 imm, Register dest)
 {
     shll(imm, dest);
 }
 
 void
 MacroAssembler::lshift64(Imm32 imm, Register64 dest)
 {
-    shldl(imm, dest.low, dest.high);
-    shll(imm, dest.low);
+    if ((imm.value & INT32_MAX) < 32) {
+        shldl(imm, dest.low, dest.high);
+        shll(imm, dest.low);
+        return;
+    }
+
+    mov(dest.low, dest.high);
+    shll(Imm32(imm.value & 0x1f), dest.high);
+    xorl(dest.low, dest.low);
+}
+
+void
+MacroAssembler::lshift64(Register shift, Register64 srcDest)
+{
+    MOZ_ASSERT(shift == ecx);
+    MOZ_ASSERT(srcDest.low != ecx && srcDest.high != ecx);
+
+    Label done;
+
+    shldl_cl(srcDest.low, srcDest.high);
+    shll_cl(srcDest.low);
+
+    testl(Imm32(0x20), ecx);
+    j(Condition::Equal, &done);
+
+    // 32 - 63 bit shift
+    movl(srcDest.low, srcDest.high);
+    xorl(srcDest.low, srcDest.low);
+
+    bind(&done);
 }
 
 void
 MacroAssembler::rshiftPtr(Imm32 imm, Register dest)
 {
     shrl(imm, dest);
 }
 
 void
+MacroAssembler::rshift64(Imm32 imm, Register64 dest)
+{
+    if ((imm.value & INT32_MAX) < 32) {
+        shrdl(imm, dest.high, dest.low);
+        shrl(imm, dest.high);
+        return;
+    }
+
+    movl(dest.high, dest.low);
+    shrl(Imm32(imm.value & 0x1f), dest.low);
+    xorl(dest.high, dest.high);
+}
+
+void
+MacroAssembler::rshift64(Register shift, Register64 srcDest)
+{
+    MOZ_ASSERT(shift == ecx);
+    MOZ_ASSERT(srcDest.low != ecx && srcDest.high != ecx);
+
+    Label done;
+
+    shrdl_cl(srcDest.high, srcDest.low);
+    shrl_cl(srcDest.high);
+
+    testl(Imm32(0x20), ecx);
+    j(Condition::Equal, &done);
+
+    // 32 - 63 bit shift
+    movl(srcDest.high, srcDest.low);
+    xorl(srcDest.high, srcDest.high);
+
+    bind(&done);
+}
+
+void
 MacroAssembler::rshiftPtrArithmetic(Imm32 imm, Register dest)
 {
     sarl(imm, dest);
 }
 
 void
-MacroAssembler::rshift64(Imm32 imm, Register64 dest)
+MacroAssembler::rshift64Arithmetic(Imm32 imm, Register64 dest)
+{
+    if ((imm.value & INT32_MAX) < 32) {
+        shrdl(imm, dest.high, dest.low);
+        sarl(imm, dest.high);
+        return;
+    }
+
+    movl(dest.high, dest.low);
+    sarl(Imm32(imm.value & 0x1f), dest.low);
+    sarl(Imm32(0x1f), dest.high);
+}
+
+void
+MacroAssembler::rshift64Arithmetic(Register shift, Register64 srcDest)
 {
-    shrdl(imm, dest.high, dest.low);
-    shrl(imm, dest.high);
+    MOZ_ASSERT(shift == ecx);
+    MOZ_ASSERT(srcDest.low != ecx && srcDest.high != ecx);
+
+    Label done;
+
+    shrdl_cl(srcDest.high, srcDest.low);
+    sarl_cl(srcDest.high);
+
+    testl(Imm32(0x20), ecx);
+    j(Condition::Equal, &done);
+
+    // 32 - 63 bit shift
+    movl(srcDest.high, srcDest.low);
+    sarl(Imm32(0x1f), srcDest.high);
+
+    bind(&done);
 }
 
 // ===============================================================
 // Branch functions
 
 void
 MacroAssembler::branch32(Condition cond, const AbsoluteAddress& lhs, Register rhs, Label* label)
 {