Bug 1279248 - Part 10: Implement the 64bit variant of Rotate on x86, r=luke
authorHannes Verschore <hv1989@gmail.com>
Fri, 29 Jul 2016 16:51:41 +0200
changeset 347331 8c7e1e2e1a9df837743042ac1de6999bd0ee28af
parent 347330 0c56943e6d0e50426d82f90261d852768363a08a
child 347332 61f6a14488120dce6b9f9022060d816e23ad54b3
push id6389
push userraliiev@mozilla.com
push dateMon, 19 Sep 2016 13:38:22 +0000
treeherdermozilla-beta@01d67bfe6c81 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersluke
bugs1279248
milestone50.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1279248 - Part 10: Implement the 64bit variant of Rotate on x86, r=luke
js/src/jit/Lowering.cpp
js/src/jit/MacroAssembler.h
js/src/jit/arm/Lowering-arm.cpp
js/src/jit/arm/Lowering-arm.h
js/src/jit/arm64/Lowering-arm64.cpp
js/src/jit/arm64/Lowering-arm64.h
js/src/jit/mips-shared/Lowering-mips-shared.cpp
js/src/jit/mips-shared/Lowering-mips-shared.h
js/src/jit/shared/LIR-shared.h
js/src/jit/shared/LOpcodes-shared.h
js/src/jit/x64/CodeGenerator-x64.cpp
js/src/jit/x64/CodeGenerator-x64.h
js/src/jit/x64/MacroAssembler-x64-inl.h
js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp
js/src/jit/x86-shared/CodeGenerator-x86-shared.h
js/src/jit/x86-shared/Lowering-x86-shared.cpp
js/src/jit/x86-shared/Lowering-x86-shared.h
js/src/jit/x86-shared/MacroAssembler-x86-shared-inl.h
js/src/jit/x86/MacroAssembler-x86-inl.h
--- a/js/src/jit/Lowering.cpp
+++ b/js/src/jit/Lowering.cpp
@@ -1299,17 +1299,17 @@ LIRGenerator::visitRotate(MRotate* ins)
 {
     MDefinition* input = ins->input();
     MDefinition* count = ins->count();
 
     if (ins->type() == MIRType::Int32) {
         auto* lir = new(alloc()) LRotate();
         lowerForShift(lir, ins, input, count);
     } else if (ins->type() == MIRType::Int64) {
-        auto* lir = new(alloc()) LRotate64();
+        auto* lir = new(alloc()) LRotateI64();
         lowerForShiftInt64(lir, ins, input, count);
     } else {
         MOZ_CRASH("unexpected type in visitRotate");
     }
 }
 
 void
 LIRGenerator::visitFloor(MFloor* ins)
--- a/js/src/jit/MacroAssembler.h
+++ b/js/src/jit/MacroAssembler.h
@@ -873,20 +873,36 @@ class MacroAssembler : public MacroAssem
     inline void rshift32Arithmetic(Register shift, Register srcDest) PER_SHARED_ARCH;
 
     inline void lshift64(Register shift, Register64 srcDest) DEFINED_ON(x86, x64);
     inline void rshift64(Register shift, Register64 srcDest) DEFINED_ON(x86, x64);
     inline void rshift64Arithmetic(Register shift, Register64 srcDest) DEFINED_ON(x86, x64);
 
     // ===============================================================
     // Rotation functions
+    // Note: - on x86 and x64 the count register must be in CL.
+    //       - on x64 the temp register should be InvalidReg.
+
     inline void rotateLeft(Imm32 count, Register input, Register dest) PER_SHARED_ARCH;
     inline void rotateLeft(Register count, Register input, Register dest) PER_SHARED_ARCH;
+    inline void rotateLeft64(Imm32 count, Register64 input, Register64 dest) DEFINED_ON(x64);
+    inline void rotateLeft64(Register count, Register64 input, Register64 dest) DEFINED_ON(x64);
+    inline void rotateLeft64(Imm32 count, Register64 input, Register64 dest, Register temp)
+        DEFINED_ON(x86, x64);
+    inline void rotateLeft64(Register count, Register64 input, Register64 dest, Register temp)
+        DEFINED_ON(x86, x64);
+
     inline void rotateRight(Imm32 count, Register input, Register dest) PER_SHARED_ARCH;
     inline void rotateRight(Register count, Register input, Register dest) PER_SHARED_ARCH;
+    inline void rotateRight64(Imm32 count, Register64 input, Register64 dest) DEFINED_ON(x64);
+    inline void rotateRight64(Register count, Register64 input, Register64 dest) DEFINED_ON(x64);
+    inline void rotateRight64(Imm32 count, Register64 input, Register64 dest, Register temp)
+        DEFINED_ON(x86, x64);
+    inline void rotateRight64(Register count, Register64 input, Register64 dest, Register temp)
+        DEFINED_ON(x86, x64);
 
     // ===============================================================
     // Bit counting functions
 
     // knownNotZero may be true only if the src is known not to be zero.
     inline void clz32(Register src, Register dest, bool knownNotZero) PER_SHARED_ARCH;
     inline void ctz32(Register src, Register dest, bool knownNotZero) PER_SHARED_ARCH;
 
--- a/js/src/jit/arm/Lowering-arm.cpp
+++ b/js/src/jit/arm/Lowering-arm.cpp
@@ -238,23 +238,31 @@ LIRGeneratorARM::lowerUntypedPhiInput(MP
 void
 LIRGeneratorARM::lowerForShift(LInstructionHelper<1, 2, 0>* ins, MDefinition* mir, MDefinition* lhs, MDefinition* rhs)
 {
     ins->setOperand(0, useRegister(lhs));
     ins->setOperand(1, useRegisterOrConstant(rhs));
     define(ins, mir);
 }
 
+template<size_t Temps>
 void
-LIRGeneratorARM::lowerForShiftInt64(LInstructionHelper<INT64_PIECES, INT64_PIECES + 1, 0>* ins,
+LIRGeneratorARM::lowerForShiftInt64(LInstructionHelper<INT64_PIECES, INT64_PIECES + 1, Temps>* ins,
                                     MDefinition* mir, MDefinition* lhs, MDefinition* rhs)
 {
     MOZ_CRASH("NYI");
 }
 
+template void LIRGeneratorARM::lowerForShiftInt64(
+    LInstructionHelper<INT64_PIECES, INT64_PIECES+1, 0>* ins, MDefinition* mir,
+    MDefinition* lhs, MDefinition* rhs);
+template void LIRGeneratorARM::lowerForShiftInt64(
+    LInstructionHelper<INT64_PIECES, INT64_PIECES+1, 1>* ins, MDefinition* mir,
+    MDefinition* lhs, MDefinition* rhs);
+
 void
 LIRGeneratorARM::lowerDivI(MDiv* div)
 {
     if (div->isUnsigned()) {
         lowerUDiv(div);
         return;
     }
 
--- a/js/src/jit/arm/Lowering-arm.h
+++ b/js/src/jit/arm/Lowering-arm.h
@@ -48,17 +48,18 @@ class LIRGeneratorARM : public LIRGenera
     void lowerForALU(LInstructionHelper<1, 1, 0>* ins, MDefinition* mir,
                      MDefinition* input);
     void lowerForALU(LInstructionHelper<1, 2, 0>* ins, MDefinition* mir,
                      MDefinition* lhs, MDefinition* rhs);
 
     void lowerForALUInt64(LInstructionHelper<INT64_PIECES, 2 * INT64_PIECES, 0>* ins,
                           MDefinition* mir, MDefinition* lhs, MDefinition* rhs);
     void lowerForMulInt64(LMulI64* ins, MMul* mir, MDefinition* lhs, MDefinition* rhs);
-    void lowerForShiftInt64(LInstructionHelper<INT64_PIECES, INT64_PIECES + 1, 0>* ins,
+    template<size_t Temps>
+    void lowerForShiftInt64(LInstructionHelper<INT64_PIECES, INT64_PIECES + 1, Temps>* ins,
                             MDefinition* mir, MDefinition* lhs, MDefinition* rhs);
 
     void lowerForFPU(LInstructionHelper<1, 1, 0>* ins, MDefinition* mir,
                      MDefinition* src);
     template<size_t Temps>
     void lowerForFPU(LInstructionHelper<1, 2, Temps>* ins, MDefinition* mir,
                      MDefinition* lhs, MDefinition* rhs);
 
--- a/js/src/jit/arm64/Lowering-arm64.cpp
+++ b/js/src/jit/arm64/Lowering-arm64.cpp
@@ -95,23 +95,31 @@ LIRGeneratorARM64::lowerForALUInt64(LIns
 }
 
 void
 LIRGeneratorARM64::lowerForMulInt64(LMulI64* ins, MMul* mir, MDefinition* lhs, MDefinition* rhs)
 {
     MOZ_CRASH("NYI");
 }
 
+template<size_t Temps>
 void
-LIRGeneratorARM64::lowerForShiftInt64(LInstructionHelper<INT64_PIECES, INT64_PIECES + 1, 0>* ins,
+LIRGeneratorARM64::lowerForShiftInt64(LInstructionHelper<INT64_PIECES, INT64_PIECES + 1, Temps>* ins,
                                       MDefinition* mir, MDefinition* lhs, MDefinition* rhs)
 {
     MOZ_CRASH("NYI");
 }
 
+template void LIRGeneratorARM64::lowerForShiftInt64(
+    LInstructionHelper<INT64_PIECES, INT64_PIECES+1, 0>* ins, MDefinition* mir,
+    MDefinition* lhs, MDefinition* rhs);
+template void LIRGeneratorARM64::lowerForShiftInt64(
+    LInstructionHelper<INT64_PIECES, INT64_PIECES+1, 1>* ins, MDefinition* mir,
+    MDefinition* lhs, MDefinition* rhs);
+
 void
 LIRGeneratorARM64::lowerForBitAndAndBranch(LBitAndAndBranch* baab, MInstruction* mir,
                                          MDefinition* lhs, MDefinition* rhs)
 {
     MOZ_CRASH("lowerForBitAndAndBranch");
 }
 
 void
--- a/js/src/jit/arm64/Lowering-arm64.h
+++ b/js/src/jit/arm64/Lowering-arm64.h
@@ -48,17 +48,18 @@ class LIRGeneratorARM64 : public LIRGene
 
     void lowerForALU(LInstructionHelper<1, 1, 0>* ins, MDefinition* mir, MDefinition* input);
     void lowerForALU(LInstructionHelper<1, 2, 0>* ins, MDefinition* mir,
                      MDefinition* lhs, MDefinition* rhs);
 
     void lowerForALUInt64(LInstructionHelper<INT64_PIECES, 2 * INT64_PIECES, 0>* ins,
                           MDefinition* mir, MDefinition* lhs, MDefinition* rhs);
     void lowerForMulInt64(LMulI64* ins, MMul* mir, MDefinition* lhs, MDefinition* rhs);
-    void lowerForShiftInt64(LInstructionHelper<INT64_PIECES, INT64_PIECES + 1, 0>* ins,
+    template<size_t Temps>
+    void lowerForShiftInt64(LInstructionHelper<INT64_PIECES, INT64_PIECES + 1, Temps>* ins,
                             MDefinition* mir, MDefinition* lhs, MDefinition* rhs);
 
     void lowerForFPU(LInstructionHelper<1, 1, 0>* ins, MDefinition* mir, MDefinition* input);
 
     template <size_t Temps>
     void lowerForFPU(LInstructionHelper<1, 2, Temps>* ins, MDefinition* mir,
                      MDefinition* lhs, MDefinition* rhs);
 
--- a/js/src/jit/mips-shared/Lowering-mips-shared.cpp
+++ b/js/src/jit/mips-shared/Lowering-mips-shared.cpp
@@ -62,23 +62,31 @@ LIRGeneratorMIPSShared::lowerForALUInt64
 }
 
 void
 LIRGeneratorMIPSShared::lowerForMulInt64(LMulI64* ins, MMul* mir, MDefinition* lhs, MDefinition* rhs)
 {
     MOZ_CRASH("NYI");
 }
 
+template<size_t Temps>
 void
-LIRGeneratorMIPSShared::lowerForShiftInt64(LInstructionHelper<INT64_PIECES, INT64_PIECES + 1, 0>* ins,
+LIRGeneratorMIPSShared::lowerForShiftInt64(LInstructionHelper<INT64_PIECES, INT64_PIECES + 1, Temps>* ins,
                                            MDefinition* mir, MDefinition* lhs, MDefinition* rhs)
 {
     MOZ_CRASH("NYI");
 }
 
+template void LIRGeneratorMIPSShared::lowerForShiftInt64(
+    LInstructionHelper<INT64_PIECES, INT64_PIECES+1, 0>* ins, MDefinition* mir,
+    MDefinition* lhs, MDefinition* rhs);
+template void LIRGeneratorMIPSShared::lowerForShiftInt64(
+    LInstructionHelper<INT64_PIECES, INT64_PIECES+1, 1>* ins, MDefinition* mir,
+    MDefinition* lhs, MDefinition* rhs);
+
 void
 LIRGeneratorMIPSShared::lowerForFPU(LInstructionHelper<1, 1, 0>* ins, MDefinition* mir,
                                     MDefinition* input)
 {
     ins->setOperand(0, useRegister(input));
     define(ins, mir, LDefinition(LDefinition::TypeFrom(mir->type()), LDefinition::REGISTER));
 }
 
--- a/js/src/jit/mips-shared/Lowering-mips-shared.h
+++ b/js/src/jit/mips-shared/Lowering-mips-shared.h
@@ -38,17 +38,18 @@ class LIRGeneratorMIPSShared : public LI
     void lowerForALU(LInstructionHelper<1, 1, 0>* ins, MDefinition* mir,
                      MDefinition* input);
     void lowerForALU(LInstructionHelper<1, 2, 0>* ins, MDefinition* mir,
                      MDefinition* lhs, MDefinition* rhs);
 
     void lowerForALUInt64(LInstructionHelper<INT64_PIECES, 2 * INT64_PIECES, 0>* ins,
                           MDefinition* mir, MDefinition* lhs, MDefinition* rhs);
     void lowerForMulInt64(LMulI64* ins, MMul* mir, MDefinition* lhs, MDefinition* rhs);
-    void lowerForShiftInt64(LInstructionHelper<INT64_PIECES, INT64_PIECES + 1, 0>* ins,
+    template<size_t Temps>
+    void lowerForShiftInt64(LInstructionHelper<INT64_PIECES, INT64_PIECES + 1, Temps>* ins,
                             MDefinition* mir, MDefinition* lhs, MDefinition* rhs);
 
     void lowerForFPU(LInstructionHelper<1, 1, 0>* ins, MDefinition* mir,
                      MDefinition* src);
     template<size_t Temps>
     void lowerForFPU(LInstructionHelper<1, 2, Temps>* ins, MDefinition* mir,
                      MDefinition* lhs, MDefinition* rhs);
 
--- a/js/src/jit/shared/LIR-shared.h
+++ b/js/src/jit/shared/LIR-shared.h
@@ -1427,39 +1427,52 @@ class LAsmReinterpretToI64 : public LAsm
   public:
     LIR_HEADER(AsmReinterpretToI64);
     explicit LAsmReinterpretToI64(const LAllocation& input) {
         setOperand(0, input);
     }
 };
 
 namespace details {
-    template<size_t Defs, size_t Ops>
-    class RotateBase : public LInstructionHelper<Defs, Ops, 0>
+    template<size_t Defs, size_t Ops, size_t Temps>
+    class RotateBase : public LInstructionHelper<Defs, Ops, Temps>
     {
-        typedef LInstructionHelper<Defs, Ops, 0> Base;
+        typedef LInstructionHelper<Defs, Ops, Temps> Base;
       public:
         MRotate* mir() {
             return Base::mir_->toRotate();
         }
-        const LAllocation* input() { return Base::getOperand(0); }
-        const LAllocation* count() { return Base::getOperand(1); }
     };
 } // details
 
-class LRotate : public details::RotateBase<1, 2>
+class LRotate : public details::RotateBase<1, 2, 0>
 {
   public:
     LIR_HEADER(Rotate);
-};
-
-class LRotate64 : public details::RotateBase<INT64_PIECES, INT64_PIECES + 1>
-{
-  public:
-    LIR_HEADER(Rotate64);
+
+    const LAllocation* input() { return getOperand(0); }
+    LAllocation* count() { return getOperand(1); }
+};
+
+class LRotateI64 : public details::RotateBase<INT64_PIECES, INT64_PIECES + 1, 1>
+{
+  public:
+    LIR_HEADER(RotateI64);
+
+    LRotateI64()
+    {
+        setTemp(0, LDefinition::BogusTemp());
+    }
+
+    static const size_t Input = 0;
+    static const size_t Count = INT64_PIECES;
+
+    const LInt64Allocation input() { return getInt64Operand(Input); }
+    const LDefinition* temp() { return getTemp(0); }
+    LAllocation* count() { return getOperand(Count); }
 };
 
 class LInterruptCheck : public LInstructionHelper<0, 0, 0>
 {
     Label* oolEntry_;
 
     // Whether this is an implicit interrupt check. Implicit interrupt checks
     // use a patchable backedge and signal handlers instead of an explicit
--- a/js/src/jit/shared/LOpcodes-shared.h
+++ b/js/src/jit/shared/LOpcodes-shared.h
@@ -367,17 +367,17 @@
     _(CallInstanceOf)               \
     _(InterruptCheck)               \
     _(AsmJSInterruptCheck)          \
     _(AsmThrowUnreachable)          \
     _(AsmReinterpret)               \
     _(AsmReinterpretToI64)          \
     _(AsmReinterpretFromI64)        \
     _(Rotate)                       \
-    _(Rotate64)                     \
+    _(RotateI64)                    \
     _(GetDOMProperty)               \
     _(GetDOMMemberV)                \
     _(GetDOMMemberT)                \
     _(SetDOMProperty)               \
     _(CallDOMNative)                \
     _(IsCallable)                   \
     _(IsConstructor)                \
     _(IsObject)                     \
--- a/js/src/jit/x64/CodeGenerator-x64.cpp
+++ b/js/src/jit/x64/CodeGenerator-x64.cpp
@@ -258,40 +258,16 @@ CodeGeneratorX64::visitCompareI64AndBran
         masm.cmpPtr(lhsReg, Operand(rhsReg));
     }
 
     bool isSigned = mir->compareType() == MCompare::Compare_Int64;
     emitBranch(JSOpToCondition(lir->jsop(), isSigned), lir->ifTrue(), lir->ifFalse());
 }
 
 void
-CodeGeneratorX64::visitRotate64(LRotate64* lir)
-{
-    MRotate* mir = lir->mir();
-    Register input = ToRegister(lir->input());
-    const LAllocation* count = lir->count();
-
-    if (count->isConstant()) {
-        int32_t c = int32_t(ToInt64(count) & 0x3F);
-        if (!c)
-            return;
-        if (mir->isLeftRotate())
-            masm.rolq(Imm32(c), input);
-        else
-            masm.rorq(Imm32(c), input);
-    } else {
-        MOZ_ASSERT(ToRegister(count) == ecx);
-        if (mir->isLeftRotate())
-            masm.rolq_cl(input);
-        else
-            masm.rorq_cl(input);
-    }
-}
-
-void
 CodeGeneratorX64::visitDivOrModI64(LDivOrModI64* lir)
 {
     Register lhs = ToRegister(lir->lhs());
     Register rhs = ToRegister(lir->rhs());
     Register output = ToRegister(lir->output());
 
     MOZ_ASSERT_IF(lhs != rhs, rhs != rax);
     MOZ_ASSERT(rhs != rdx);
--- a/js/src/jit/x64/CodeGenerator-x64.h
+++ b/js/src/jit/x64/CodeGenerator-x64.h
@@ -47,17 +47,16 @@ class CodeGeneratorX64 : public CodeGene
     void visitBox(LBox* box);
     void visitUnbox(LUnbox* unbox);
     void visitCompareB(LCompareB* lir);
     void visitCompareBAndBranch(LCompareBAndBranch* lir);
     void visitCompareBitwise(LCompareBitwise* lir);
     void visitCompareBitwiseAndBranch(LCompareBitwiseAndBranch* lir);
     void visitCompareI64(LCompareI64* lir);
     void visitCompareI64AndBranch(LCompareI64AndBranch* lir);
-    void visitRotate64(LRotate64* lir);
     void visitDivOrModI64(LDivOrModI64* lir);
     void visitUDivOrMod64(LUDivOrMod64* lir);
     void visitNotI64(LNotI64* lir);
     void visitClzI64(LClzI64* lir);
     void visitCtzI64(LCtzI64* lir);
     void visitPopcntI64(LPopcntI64* lir);
     void visitTruncateDToInt32(LTruncateDToInt32* ins);
     void visitTruncateFToInt32(LTruncateFToInt32* ins);
--- a/js/src/jit/x64/MacroAssembler-x64-inl.h
+++ b/js/src/jit/x64/MacroAssembler-x64-inl.h
@@ -377,16 +377,79 @@ MacroAssembler::rshift64Arithmetic(Imm32
 void
 MacroAssembler::rshift64Arithmetic(Register shift, Register64 srcDest)
 {
     MOZ_ASSERT(shift == rcx);
     sarq_cl(srcDest.reg);
 }
 
 // ===============================================================
+// Rotation functions
+
+void
+MacroAssembler::rotateLeft64(Register count, Register64 src, Register64 dest)
+{
+    MOZ_ASSERT(src == dest, "defineReuseInput");
+    MOZ_ASSERT(count == ecx, "defineFixed(ecx)");
+
+    rolq_cl(dest.reg);
+}
+
+void
+MacroAssembler::rotateLeft64(Register count, Register64 src, Register64 dest, Register temp)
+{
+    MOZ_ASSERT(temp == InvalidReg);
+    rotateLeft64(count, src, dest);
+}
+
+void
+MacroAssembler::rotateRight64(Register count, Register64 src, Register64 dest)
+{
+    MOZ_ASSERT(src == dest, "defineReuseInput");
+    MOZ_ASSERT(count == ecx, "defineFixed(ecx)");
+
+    rorq_cl(dest.reg);
+}
+
+void
+MacroAssembler::rotateRight64(Register count, Register64 src, Register64 dest, Register temp)
+{
+    MOZ_ASSERT(temp == InvalidReg);
+    rotateRight64(count, src, dest);
+}
+
+void
+MacroAssembler::rotateLeft64(Imm32 count, Register64 src, Register64 dest)
+{
+    MOZ_ASSERT(src == dest, "defineReuseInput");
+    rolq(count, dest.reg);
+}
+
+void
+MacroAssembler::rotateLeft64(Imm32 count, Register64 src, Register64 dest, Register temp)
+{
+    MOZ_ASSERT(temp == InvalidReg);
+    rotateLeft64(count, src, dest);
+}
+
+void
+MacroAssembler::rotateRight64(Imm32 count, Register64 src, Register64 dest)
+{
+    MOZ_ASSERT(src == dest, "defineReuseInput");
+    rorq(count, dest.reg);
+}
+
+void
+MacroAssembler::rotateRight64(Imm32 count, Register64 src, Register64 dest, Register temp)
+{
+    MOZ_ASSERT(temp == InvalidReg);
+    rotateRight64(count, src, dest);
+}
+
+// ===============================================================
 // Bit counting functions
 
 void
 MacroAssembler::clz64(Register64 src, Register64 dest)
 {
     // On very recent chips (Haswell and newer) there is actually an
     // LZCNT instruction that does all of this.
 
--- a/js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp
+++ b/js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp
@@ -4794,10 +4794,38 @@ CodeGeneratorX86Shared::visitCopySignD(L
 
     double keepSignMask = BitwiseCast<double>(INT64_MIN);
     masm.loadConstantDouble(keepSignMask, scratch);
     masm.vandpd(rhs, scratch, scratch);
 
     masm.vorpd(scratch, out, out);
 }
 
+void
+CodeGeneratorX86Shared::visitRotateI64(LRotateI64* lir)
+{
+    MRotate* mir = lir->mir();
+    LAllocation* count = lir->count();
+
+    Register64 input = ToRegister64(lir->input());
+    Register64 output = ToOutRegister64(lir);
+    Register temp = ToTempRegisterOrInvalid(lir->temp());
+
+    MOZ_ASSERT(input == output);
+
+    if (count->isConstant()) {
+        int32_t c = int32_t(count->toConstant()->toInt64() & 0x3F);
+        if (!c)
+            return;
+        if (mir->isLeftRotate())
+            masm.rotateLeft64(Imm32(c), input, output, temp);
+        else
+            masm.rotateRight64(Imm32(c), input, output, temp);
+    } else {
+        if (mir->isLeftRotate())
+            masm.rotateLeft64(ToRegister(count), input, output, temp);
+        else
+            masm.rotateRight64(ToRegister(count), input, output, temp);
+    }
+}
+
 } // namespace jit
 } // namespace js
--- a/js/src/jit/x86-shared/CodeGenerator-x86-shared.h
+++ b/js/src/jit/x86-shared/CodeGenerator-x86-shared.h
@@ -287,16 +287,17 @@ class CodeGeneratorX86Shared : public Co
     virtual void visitWasmBoundsCheck(LWasmBoundsCheck* ins);
     virtual void visitMemoryBarrier(LMemoryBarrier* ins);
     virtual void visitAtomicTypedArrayElementBinop(LAtomicTypedArrayElementBinop* lir);
     virtual void visitAtomicTypedArrayElementBinopForEffect(LAtomicTypedArrayElementBinopForEffect* lir);
     virtual void visitCompareExchangeTypedArrayElement(LCompareExchangeTypedArrayElement* lir);
     virtual void visitAtomicExchangeTypedArrayElement(LAtomicExchangeTypedArrayElement* lir);
     virtual void visitCopySignD(LCopySignD* lir);
     virtual void visitCopySignF(LCopySignF* lir);
+    virtual void visitRotateI64(LRotateI64* lir);
 
     void visitOutOfLineLoadTypedArrayOutOfBounds(OutOfLineLoadTypedArrayOutOfBounds* ool);
     void visitOffsetBoundsCheck(OffsetBoundsCheck* oolCheck);
 
     void visitNegI(LNegI* lir);
     void visitNegD(LNegD* lir);
     void visitNegF(LNegF* lir);
 
--- a/js/src/jit/x86-shared/Lowering-x86-shared.cpp
+++ b/js/src/jit/x86-shared/Lowering-x86-shared.cpp
@@ -75,21 +75,26 @@ LIRGeneratorX86Shared::lowerForShift(LIn
     if (rhs->isConstant())
         ins->setOperand(1, useOrConstantAtStart(rhs));
     else
         ins->setOperand(1, lhs != rhs ? useFixed(rhs, ecx) : useFixedAtStart(rhs, ecx));
 
     defineReuseInput(ins, mir, 0);
 }
 
+template<size_t Temps>
 void
-LIRGeneratorX86Shared::lowerForShiftInt64(LInstructionHelper<INT64_PIECES, INT64_PIECES + 1, 0>* ins,
+LIRGeneratorX86Shared::lowerForShiftInt64(LInstructionHelper<INT64_PIECES, INT64_PIECES + 1, Temps>* ins,
                                           MDefinition* mir, MDefinition* lhs, MDefinition* rhs)
 {
     ins->setInt64Operand(0, useInt64RegisterAtStart(lhs));
+#if defined(JS_NUNBOX32)
+    if (mir->isRotate())
+        ins->setTemp(0, temp());
+#endif
 
     // shift operator should be constant or in register ecx
     // x86 can't shift a non-ecx register
     if (rhs->isConstant()) {
         ins->setOperand(INT64_PIECES, useOrConstantAtStart(rhs));
     } else {
         // The operands are int64, but we only care about the lower 32 bits of
         // the RHS. On 32-bit, the code below will load that part in ecx and
@@ -99,16 +104,23 @@ LIRGeneratorX86Shared::lowerForShiftInt6
         LUse use(ecx, useAtStart);
         use.setVirtualRegister(rhs->virtualRegister());
         ins->setOperand(INT64_PIECES, use);
     }
 
     defineInt64ReuseInput(ins, mir, 0);
 }
 
+template void LIRGeneratorX86Shared::lowerForShiftInt64(
+    LInstructionHelper<INT64_PIECES, INT64_PIECES+1, 0>* ins, MDefinition* mir,
+    MDefinition* lhs, MDefinition* rhs);
+template void LIRGeneratorX86Shared::lowerForShiftInt64(
+    LInstructionHelper<INT64_PIECES, INT64_PIECES+1, 1>* ins, MDefinition* mir,
+    MDefinition* lhs, MDefinition* rhs);
+
 void
 LIRGeneratorX86Shared::lowerForALU(LInstructionHelper<1, 1, 0>* ins, MDefinition* mir,
                                    MDefinition* input)
 {
     ins->setOperand(0, useRegisterAtStart(input));
     defineReuseInput(ins, mir, 0);
 }
 
--- a/js/src/jit/x86-shared/Lowering-x86-shared.h
+++ b/js/src/jit/x86-shared/Lowering-x86-shared.h
@@ -27,17 +27,18 @@ class LIRGeneratorX86Shared : public LIR
     void visitGuardObjectGroup(MGuardObjectGroup* ins);
     void visitPowHalf(MPowHalf* ins);
     void lowerForShift(LInstructionHelper<1, 2, 0>* ins, MDefinition* mir, MDefinition* lhs,
                        MDefinition* rhs);
     void lowerForALU(LInstructionHelper<1, 1, 0>* ins, MDefinition* mir, MDefinition* input);
     void lowerForALU(LInstructionHelper<1, 2, 0>* ins, MDefinition* mir, MDefinition* lhs,
                      MDefinition* rhs);
 
-    void lowerForShiftInt64(LInstructionHelper<INT64_PIECES, INT64_PIECES + 1, 0>* ins,
+    template<size_t Temps>
+    void lowerForShiftInt64(LInstructionHelper<INT64_PIECES, INT64_PIECES + 1, Temps>* ins,
                             MDefinition* mir, MDefinition* lhs, MDefinition* rhs);
 
     template<size_t Temps>
     void lowerForFPU(LInstructionHelper<1, 2, Temps>* ins, MDefinition* mir, MDefinition* lhs,
                      MDefinition* rhs);
     void lowerForCompIx4(LSimdBinaryCompIx4* ins, MSimdBinaryComp* mir,
                          MDefinition* lhs, MDefinition* rhs);
     void lowerForCompFx4(LSimdBinaryCompFx4* ins, MSimdBinaryComp* mir,
--- a/js/src/jit/x86-shared/MacroAssembler-x86-shared-inl.h
+++ b/js/src/jit/x86-shared/MacroAssembler-x86-shared-inl.h
@@ -364,32 +364,34 @@ MacroAssembler::maxDouble(FloatRegister 
 }
 
 // ===============================================================
 // Rotation instructions
 void
 MacroAssembler::rotateLeft(Imm32 count, Register input, Register dest)
 {
     MOZ_ASSERT(input == dest, "defineReuseInput");
+    count.value &= 0x1f;
     if (count.value)
         roll(count, input);
 }
 
 void
 MacroAssembler::rotateLeft(Register count, Register input, Register dest)
 {
     MOZ_ASSERT(input == dest, "defineReuseInput");
     MOZ_ASSERT(count == ecx, "defineFixed(ecx)");
     roll_cl(input);
 }
 
 void
 MacroAssembler::rotateRight(Imm32 count, Register input, Register dest)
 {
     MOZ_ASSERT(input == dest, "defineReuseInput");
+    count.value &= 0x1f;
     if (count.value)
         rorl(count, input);
 }
 
 void
 MacroAssembler::rotateRight(Register count, Register input, Register dest)
 {
     MOZ_ASSERT(input == dest, "defineReuseInput");
--- a/js/src/jit/x86/MacroAssembler-x86-inl.h
+++ b/js/src/jit/x86/MacroAssembler-x86-inl.h
@@ -467,16 +467,89 @@ MacroAssembler::rshift64Arithmetic(Regis
     // 32 - 63 bit shift
     movl(srcDest.high, srcDest.low);
     sarl(Imm32(0x1f), srcDest.high);
 
     bind(&done);
 }
 
 // ===============================================================
+// Rotation functions
+
+void
+MacroAssembler::rotateLeft64(Register count, Register64 src, Register64 dest, Register temp)
+{
+    MOZ_ASSERT(src == dest, "defineReuseInput");
+    MOZ_ASSERT(count == ecx, "defineFixed(ecx)");
+
+    Label done;
+
+    movl(dest.high, temp);
+    shldl_cl(dest.low, dest.high);
+    shldl_cl(temp, dest.low);
+
+    testl(Imm32(0x20), count);
+    j(Condition::Equal, &done);
+    xchgl(dest.high, dest.low);
+
+    bind(&done);
+}
+
+void
+MacroAssembler::rotateRight64(Register count, Register64 src, Register64 dest, Register temp)
+{
+    MOZ_ASSERT(src == dest, "defineReuseInput");
+    MOZ_ASSERT(count == ecx, "defineFixed(ecx)");
+
+    Label done;
+
+    movl(dest.high, temp);
+    shrdl_cl(dest.low, dest.high);
+    shrdl_cl(temp, dest.low);
+
+    testl(Imm32(0x20), count);
+    j(Condition::Equal, &done);
+    xchgl(dest.high, dest.low);
+
+    bind(&done);
+}
+
+void
+MacroAssembler::rotateLeft64(Imm32 count, Register64 src, Register64 dest, Register temp)
+{
+    MOZ_ASSERT(src == dest, "defineReuseInput");
+
+    int32_t amount = count.value & 0x3f;
+    if (amount % 0x1f != 0) {
+        movl(dest.high, temp);
+        shldl(Imm32(amount & 0x1f), dest.low, dest.high);
+        shldl(Imm32(amount & 0x1f), temp, dest.low);
+    }
+
+    if (!!(amount & 0x20))
+        xchgl(dest.high, dest.low);
+}
+
+void
+MacroAssembler::rotateRight64(Imm32 count, Register64 src, Register64 dest, Register temp)
+{
+    MOZ_ASSERT(src == dest, "defineReuseInput");
+
+    int32_t amount = count.value & 0x3f;
+    if ((amount & 0x1f) != 0) {
+        movl(dest.high, temp);
+        shrdl(Imm32(amount & 0x1f), dest.low, dest.high);
+        shrdl(Imm32(amount & 0x1f), temp, dest.low);
+    }
+
+    if (!!(amount & 0x20))
+        xchgl(dest.high, dest.low);
+}
+
+// ===============================================================
 // Branch functions
 
 void
 MacroAssembler::branch32(Condition cond, const AbsoluteAddress& lhs, Register rhs, Label* label)
 {
     cmp32(Operand(lhs), rhs);
     j(cond, label);
 }