Bug 1279248 - Part 9: Implement the 64bit variant of Mul on x86, r=jandem
authorHannes Verschore <hv1989@gmail.com>
Fri, 29 Jul 2016 16:51:41 +0200
changeset 349400 0c56943e6d0e50426d82f90261d852768363a08a
parent 349399 5512359e559fa64dc8bfb0245766d9ebdd2540fc
child 349401 8c7e1e2e1a9df837743042ac1de6999bd0ee28af
push id1230
push userjlund@mozilla.com
push dateMon, 31 Oct 2016 18:13:35 +0000
treeherdermozilla-release@5e06e3766db2 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersjandem
bugs1279248
milestone50.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1279248 - Part 9: Implement the 64bit variant of Mul on x86, r=jandem
js/src/jit/Lowering.cpp
js/src/jit/MacroAssembler.h
js/src/jit/arm/Lowering-arm.cpp
js/src/jit/arm/Lowering-arm.h
js/src/jit/arm64/Lowering-arm64.cpp
js/src/jit/arm64/Lowering-arm64.h
js/src/jit/mips-shared/Lowering-mips-shared.cpp
js/src/jit/mips-shared/Lowering-mips-shared.h
js/src/jit/none/Lowering-none.h
js/src/jit/shared/LIR-shared.h
js/src/jit/x64/CodeGenerator-x64.cpp
js/src/jit/x64/CodeGenerator-x64.h
js/src/jit/x64/Lowering-x64.cpp
js/src/jit/x64/Lowering-x64.h
js/src/jit/x64/MacroAssembler-x64-inl.h
js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp
js/src/jit/x86-shared/CodeGenerator-x86-shared.h
js/src/jit/x86-shared/Lowering-x86-shared.cpp
js/src/jit/x86-shared/Lowering-x86-shared.h
js/src/jit/x86/Lowering-x86.cpp
js/src/jit/x86/Lowering-x86.h
js/src/jit/x86/MacroAssembler-x86-inl.h
--- a/js/src/jit/Lowering.cpp
+++ b/js/src/jit/Lowering.cpp
@@ -1711,17 +1711,17 @@ LIRGenerator::visitMul(MMul* ins)
             lowerMulI(ins, lhs, rhs);
         return;
     }
 
     if (ins->specialization() == MIRType::Int64) {
         MOZ_ASSERT(lhs->type() == MIRType::Int64);
         ReorderCommutative(&lhs, &rhs, ins);
         LMulI64* lir = new(alloc()) LMulI64;
-        lowerForALUInt64(lir, ins, lhs, rhs);
+        lowerForMulInt64(lir, ins, lhs, rhs);
         return;
     }
 
     if (ins->specialization() == MIRType::Double) {
         MOZ_ASSERT(lhs->type() == MIRType::Double);
         ReorderCommutative(&lhs, &rhs, ins);
 
         // If our RHS is a constant -1.0, we can optimize to an LNegD.
--- a/js/src/jit/MacroAssembler.h
+++ b/js/src/jit/MacroAssembler.h
@@ -779,17 +779,23 @@ class MacroAssembler : public MacroAssem
 
     inline void subDouble(FloatRegister src, FloatRegister dest) PER_SHARED_ARCH;
 
     // On x86-shared, srcDest must be eax and edx will be clobbered.
     inline void mul32(Register rhs, Register srcDest) PER_SHARED_ARCH;
 
     inline void mul32(Register src1, Register src2, Register dest, Label* onOver, Label* onZero) DEFINED_ON(arm64);
 
+    inline void mul64(const Operand& src, const Register64& dest) DEFINED_ON(x64);
+    inline void mul64(const Operand& src, const Register64& dest, const Register temp)
+        DEFINED_ON(x64);
     inline void mul64(Imm64 imm, const Register64& dest) PER_ARCH;
+    inline void mul64(Imm64 imm, const Register64& dest, const Register temp) DEFINED_ON(x86, x64);
+    inline void mul64(const Register64& src, const Register64& dest, const Register temp)
+        DEFINED_ON(x86, x64);
 
     inline void mulBy3(Register src, Register dest) PER_ARCH;
 
     inline void mulFloat32(FloatRegister src, FloatRegister dest) PER_SHARED_ARCH;
     inline void mulDouble(FloatRegister src, FloatRegister dest) PER_SHARED_ARCH;
 
     inline void mulDoublePtr(ImmPtr imm, Register temp, FloatRegister dest) DEFINED_ON(mips_shared, arm, arm64, x86, x64);
 
@@ -811,16 +817,17 @@ class MacroAssembler : public MacroAssem
     inline void divDouble(FloatRegister src, FloatRegister dest) PER_SHARED_ARCH;
 
     inline void inc32(RegisterOrInt32Constant* key);
     inline void inc64(AbsoluteAddress dest) PER_ARCH;
 
     inline void dec32(RegisterOrInt32Constant* key);
 
     inline void neg32(Register reg) PER_SHARED_ARCH;
+    inline void neg64(Register64 reg) DEFINED_ON(x86, x64);
 
     inline void negateFloat(FloatRegister reg) PER_SHARED_ARCH;
 
     inline void negateDouble(FloatRegister reg) PER_SHARED_ARCH;
 
     inline void absFloat32(FloatRegister src, FloatRegister dest) PER_SHARED_ARCH;
     inline void absDouble(FloatRegister src, FloatRegister dest) PER_SHARED_ARCH;
 
--- a/js/src/jit/arm/Lowering-arm.cpp
+++ b/js/src/jit/arm/Lowering-arm.cpp
@@ -166,16 +166,22 @@ LIRGeneratorARM::lowerForALU(LInstructio
 void
 LIRGeneratorARM::lowerForALUInt64(LInstructionHelper<INT64_PIECES, 2 * INT64_PIECES, 0>* ins,
                                   MDefinition* mir, MDefinition* lhs, MDefinition* rhs)
 {
     MOZ_CRASH("NYI");
 }
 
 void
+LIRGeneratorARM::lowerForMulInt64(LMulI64* ins, MMul* mir, MDefinition* lhs, MDefinition* rhs)
+{
+    MOZ_CRASH("NYI");
+}
+
+void
 LIRGeneratorARM::lowerForFPU(LInstructionHelper<1, 1, 0>* ins, MDefinition* mir, MDefinition* input)
 {
     ins->setOperand(0, useRegisterAtStart(input));
     define(ins, mir, LDefinition(LDefinition::TypeFrom(mir->type()), LDefinition::REGISTER));
 }
 
 template<size_t Temps>
 void
--- a/js/src/jit/arm/Lowering-arm.h
+++ b/js/src/jit/arm/Lowering-arm.h
@@ -45,18 +45,19 @@ class LIRGeneratorARM : public LIRGenera
                        MDefinition* rhs);
     void lowerUrshD(MUrsh* mir);
 
     void lowerForALU(LInstructionHelper<1, 1, 0>* ins, MDefinition* mir,
                      MDefinition* input);
     void lowerForALU(LInstructionHelper<1, 2, 0>* ins, MDefinition* mir,
                      MDefinition* lhs, MDefinition* rhs);
 
-    void lowerForALUInt64(LInstructionHelper<INT64_PIECES, 2 * INT64_PIECES, 0>* ins, MDefinition* mir,
-                          MDefinition* lhs, MDefinition* rhs);
+    void lowerForALUInt64(LInstructionHelper<INT64_PIECES, 2 * INT64_PIECES, 0>* ins,
+                          MDefinition* mir, MDefinition* lhs, MDefinition* rhs);
+    void lowerForMulInt64(LMulI64* ins, MMul* mir, MDefinition* lhs, MDefinition* rhs);
     void lowerForShiftInt64(LInstructionHelper<INT64_PIECES, INT64_PIECES + 1, 0>* ins,
                             MDefinition* mir, MDefinition* lhs, MDefinition* rhs);
 
     void lowerForFPU(LInstructionHelper<1, 1, 0>* ins, MDefinition* mir,
                      MDefinition* src);
     template<size_t Temps>
     void lowerForFPU(LInstructionHelper<1, 2, Temps>* ins, MDefinition* mir,
                      MDefinition* lhs, MDefinition* rhs);
--- a/js/src/jit/arm64/Lowering-arm64.cpp
+++ b/js/src/jit/arm64/Lowering-arm64.cpp
@@ -90,16 +90,22 @@ template void LIRGeneratorARM64::lowerFo
 void
 LIRGeneratorARM64::lowerForALUInt64(LInstructionHelper<INT64_PIECES, 2 * INT64_PIECES, 0>* ins,
                                     MDefinition* mir, MDefinition* lhs, MDefinition* rhs)
 {
     MOZ_CRASH("NYI");
 }
 
 void
+LIRGeneratorARM64::lowerForMulInt64(LMulI64* ins, MMul* mir, MDefinition* lhs, MDefinition* rhs)
+{
+    MOZ_CRASH("NYI");
+}
+
+void
 LIRGeneratorARM64::lowerForShiftInt64(LInstructionHelper<INT64_PIECES, INT64_PIECES + 1, 0>* ins,
                                       MDefinition* mir, MDefinition* lhs, MDefinition* rhs)
 {
     MOZ_CRASH("NYI");
 }
 
 void
 LIRGeneratorARM64::lowerForBitAndAndBranch(LBitAndAndBranch* baab, MInstruction* mir,
--- a/js/src/jit/arm64/Lowering-arm64.h
+++ b/js/src/jit/arm64/Lowering-arm64.h
@@ -45,18 +45,19 @@ class LIRGeneratorARM64 : public LIRGene
     void lowerForShift(LInstructionHelper<1, 2, 0>* ins, MDefinition* mir, MDefinition* lhs,
                        MDefinition* rhs);
     void lowerUrshD(MUrsh* mir);
 
     void lowerForALU(LInstructionHelper<1, 1, 0>* ins, MDefinition* mir, MDefinition* input);
     void lowerForALU(LInstructionHelper<1, 2, 0>* ins, MDefinition* mir,
                      MDefinition* lhs, MDefinition* rhs);
 
-    void lowerForALUInt64(LInstructionHelper<INT64_PIECES, 2 * INT64_PIECES, 0>* ins, MDefinition* mir,
-                          MDefinition* lhs, MDefinition* rhs);
+    void lowerForALUInt64(LInstructionHelper<INT64_PIECES, 2 * INT64_PIECES, 0>* ins,
+                          MDefinition* mir, MDefinition* lhs, MDefinition* rhs);
+    void lowerForMulInt64(LMulI64* ins, MMul* mir, MDefinition* lhs, MDefinition* rhs);
     void lowerForShiftInt64(LInstructionHelper<INT64_PIECES, INT64_PIECES + 1, 0>* ins,
                             MDefinition* mir, MDefinition* lhs, MDefinition* rhs);
 
     void lowerForFPU(LInstructionHelper<1, 1, 0>* ins, MDefinition* mir, MDefinition* input);
 
     template <size_t Temps>
     void lowerForFPU(LInstructionHelper<1, 2, Temps>* ins, MDefinition* mir,
                      MDefinition* lhs, MDefinition* rhs);
--- a/js/src/jit/mips-shared/Lowering-mips-shared.cpp
+++ b/js/src/jit/mips-shared/Lowering-mips-shared.cpp
@@ -57,16 +57,22 @@ LIRGeneratorMIPSShared::lowerForALU(LIns
 void
 LIRGeneratorMIPSShared::lowerForALUInt64(LInstructionHelper<INT64_PIECES, 2 * INT64_PIECES, 0>* ins,
                                          MDefinition* mir, MDefinition* lhs, MDefinition* rhs)
 {
     MOZ_CRASH("NYI");
 }
 
 void
+LIRGeneratorMIPSShared::lowerForMulInt64(LMulI64* ins, MMul* mir, MDefinition* lhs, MDefinition* rhs)
+{
+    MOZ_CRASH("NYI");
+}
+
+void
 LIRGeneratorMIPSShared::lowerForShiftInt64(LInstructionHelper<INT64_PIECES, INT64_PIECES + 1, 0>* ins,
                                            MDefinition* mir, MDefinition* lhs, MDefinition* rhs)
 {
     MOZ_CRASH("NYI");
 }
 
 void
 LIRGeneratorMIPSShared::lowerForFPU(LInstructionHelper<1, 1, 0>* ins, MDefinition* mir,
--- a/js/src/jit/mips-shared/Lowering-mips-shared.h
+++ b/js/src/jit/mips-shared/Lowering-mips-shared.h
@@ -35,18 +35,19 @@ class LIRGeneratorMIPSShared : public LI
                        MDefinition* rhs);
     void lowerUrshD(MUrsh* mir);
 
     void lowerForALU(LInstructionHelper<1, 1, 0>* ins, MDefinition* mir,
                      MDefinition* input);
     void lowerForALU(LInstructionHelper<1, 2, 0>* ins, MDefinition* mir,
                      MDefinition* lhs, MDefinition* rhs);
 
-    void lowerForALUInt64(LInstructionHelper<INT64_PIECES, 2 * INT64_PIECES, 0>* ins, MDefinition* mir,
-                          MDefinition* lhs, MDefinition* rhs);
+    void lowerForALUInt64(LInstructionHelper<INT64_PIECES, 2 * INT64_PIECES, 0>* ins,
+                          MDefinition* mir, MDefinition* lhs, MDefinition* rhs);
+    void lowerForMulInt64(LMulI64* ins, MMul* mir, MDefinition* lhs, MDefinition* rhs);
     void lowerForShiftInt64(LInstructionHelper<INT64_PIECES, INT64_PIECES + 1, 0>* ins,
                             MDefinition* mir, MDefinition* lhs, MDefinition* rhs);
 
     void lowerForFPU(LInstructionHelper<1, 1, 0>* ins, MDefinition* mir,
                      MDefinition* src);
     template<size_t Temps>
     void lowerForFPU(LInstructionHelper<1, 2, Temps>* ins, MDefinition* mir,
                      MDefinition* lhs, MDefinition* rhs);
--- a/js/src/jit/none/Lowering-none.h
+++ b/js/src/jit/none/Lowering-none.h
@@ -37,16 +37,17 @@ class LIRGeneratorNone : public LIRGener
     }
     void lowerUrshD(MUrsh*) { MOZ_CRASH(); }
     template <typename T>
     void lowerForALU(T, MDefinition*, MDefinition*, MDefinition* v = nullptr) { MOZ_CRASH(); }
     template <typename T>
     void lowerForFPU(T, MDefinition*, MDefinition*, MDefinition* v = nullptr) { MOZ_CRASH(); }
     template <typename T>
     void lowerForALUInt64(T, MDefinition*, MDefinition*, MDefinition* v = nullptr) { MOZ_CRASH(); }
+    void lowerForMulInt64(LMulI64*, MMul*, MDefinition*, MDefinition* v = nullptr) { MOZ_CRASH(); }
     template <typename T>
     void lowerForShiftInt64(T, MDefinition*, MDefinition*, MDefinition* v = nullptr) { MOZ_CRASH(); }
     void lowerForCompIx4(LSimdBinaryCompIx4* ins, MSimdBinaryComp* mir,
                          MDefinition* lhs, MDefinition* rhs) {
         MOZ_CRASH();
     }
     void lowerForCompFx4(LSimdBinaryCompFx4* ins, MSimdBinaryComp* mir,
                          MDefinition* lhs, MDefinition* rhs) {
--- a/js/src/jit/shared/LIR-shared.h
+++ b/js/src/jit/shared/LIR-shared.h
@@ -3808,20 +3808,32 @@ class LSubI64 : public LInstructionHelpe
 {
   public:
     LIR_HEADER(SubI64)
 
     static const size_t Lhs = 0;
     static const size_t Rhs = INT64_PIECES;
 };
 
-class LMulI64 : public LInstructionHelper<INT64_PIECES, 2 * INT64_PIECES, 0>
+class LMulI64 : public LInstructionHelper<INT64_PIECES, 2 * INT64_PIECES, 1>
 {
   public:
     LIR_HEADER(MulI64)
+
+    explicit LMulI64()
+    {
+        setTemp(0, LDefinition());
+    }
+
+    const LDefinition* temp() {
+        return getTemp(0);
+    }
+
+    static const size_t Lhs = 0;
+    static const size_t Rhs = INT64_PIECES;
 };
 
 // Performs an add, sub, mul, or div on two double values.
 class LMathD : public LBinaryMath<0>
 {
     JSOp jsop_;
 
   public:
--- a/js/src/jit/x64/CodeGenerator-x64.cpp
+++ b/js/src/jit/x64/CodeGenerator-x64.cpp
@@ -282,55 +282,16 @@ CodeGeneratorX64::visitRotate64(LRotate6
         if (mir->isLeftRotate())
             masm.rolq_cl(input);
         else
             masm.rorq_cl(input);
     }
 }
 
 void
-CodeGeneratorX64::visitMulI64(LMulI64* lir)
-{
-    Register lhs = ToRegister(lir->getOperand(0));
-    const LAllocation* rhs = lir->getOperand(1);
-
-    MOZ_ASSERT(ToRegister(lir->getDef(0)) == lhs);
-
-    if (rhs->isConstant()) {
-        int64_t constant = ToInt64(rhs);
-        switch (constant) {
-          case -1:
-            masm.negq(lhs);
-            return;
-          case 0:
-            masm.xorl(lhs, lhs);
-            return;
-          case 1:
-            // nop
-            return;
-          case 2:
-            masm.addq(lhs, lhs);
-            return;
-          default:
-            if (constant > 0) {
-                // Use shift if constant is power of 2.
-                int32_t shift = mozilla::FloorLog2(constant);
-                if (int64_t(1) << shift == constant) {
-                    masm.shlq(Imm32(shift), lhs);
-                    return;
-                }
-            }
-            masm.mul64(Imm64(constant), Register64(lhs));
-        }
-    } else {
-        masm.imulq(ToOperand(rhs), lhs);
-    }
-}
-
-void
 CodeGeneratorX64::visitDivOrModI64(LDivOrModI64* lir)
 {
     Register lhs = ToRegister(lir->lhs());
     Register rhs = ToRegister(lir->rhs());
     Register output = ToRegister(lir->output());
 
     MOZ_ASSERT_IF(lhs != rhs, rhs != rax);
     MOZ_ASSERT(rhs != rdx);
--- a/js/src/jit/x64/CodeGenerator-x64.h
+++ b/js/src/jit/x64/CodeGenerator-x64.h
@@ -48,17 +48,16 @@ class CodeGeneratorX64 : public CodeGene
     void visitUnbox(LUnbox* unbox);
     void visitCompareB(LCompareB* lir);
     void visitCompareBAndBranch(LCompareBAndBranch* lir);
     void visitCompareBitwise(LCompareBitwise* lir);
     void visitCompareBitwiseAndBranch(LCompareBitwiseAndBranch* lir);
     void visitCompareI64(LCompareI64* lir);
     void visitCompareI64AndBranch(LCompareI64AndBranch* lir);
     void visitRotate64(LRotate64* lir);
-    void visitMulI64(LMulI64* lir);
     void visitDivOrModI64(LDivOrModI64* lir);
     void visitUDivOrMod64(LUDivOrMod64* lir);
     void visitNotI64(LNotI64* lir);
     void visitClzI64(LClzI64* lir);
     void visitCtzI64(LCtzI64* lir);
     void visitPopcntI64(LPopcntI64* lir);
     void visitTruncateDToInt32(LTruncateDToInt32* ins);
     void visitTruncateFToInt32(LTruncateFToInt32* ins);
--- a/js/src/jit/x64/Lowering-x64.cpp
+++ b/js/src/jit/x64/Lowering-x64.cpp
@@ -43,16 +43,36 @@ LIRGeneratorX64::tempByteOpRegister()
 
 LDefinition
 LIRGeneratorX64::tempToUnbox()
 {
     return temp();
 }
 
 void
+LIRGeneratorX64::lowerForALUInt64(LInstructionHelper<INT64_PIECES, 2 * INT64_PIECES, 0>* ins,
+                                  MDefinition* mir, MDefinition* lhs, MDefinition* rhs)
+{
+    ins->setInt64Operand(0, useInt64RegisterAtStart(lhs));
+    ins->setInt64Operand(INT64_PIECES,
+                         lhs != rhs ? useInt64OrConstant(rhs) : useInt64OrConstantAtStart(rhs));
+    defineInt64ReuseInput(ins, mir, 0);
+}
+
+void
+LIRGeneratorX64::lowerForMulInt64(LMulI64* ins, MMul* mir, MDefinition* lhs, MDefinition* rhs)
+{
+    // X64 doesn't need a temp for 64bit multiplication.
+    ins->setInt64Operand(0, useInt64RegisterAtStart(lhs));
+    ins->setInt64Operand(INT64_PIECES,
+                         lhs != rhs ? useInt64OrConstant(rhs) : useInt64OrConstantAtStart(rhs));
+    defineInt64ReuseInput(ins, mir, 0);
+}
+
+void
 LIRGeneratorX64::visitBox(MBox* box)
 {
     MDefinition* opd = box->getOperand(0);
 
     // If the operand is a constant, emit near its uses.
     if (opd->isConstant() && box->canEmitAtUses()) {
         emitAtUses(box);
         return;
--- a/js/src/jit/x64/Lowering-x64.h
+++ b/js/src/jit/x64/Lowering-x64.h
@@ -20,16 +20,19 @@ class LIRGeneratorX64 : public LIRGenera
     { }
 
   protected:
     void lowerUntypedPhiInput(MPhi* phi, uint32_t inputPosition, LBlock* block, size_t lirIndex);
     void defineUntypedPhi(MPhi* phi, size_t lirIndex);
     void lowerInt64PhiInput(MPhi* phi, uint32_t inputPosition, LBlock* block, size_t lirIndex);
     void defineInt64Phi(MPhi* phi, size_t lirIndex);
 
+    void lowerForALUInt64(LInstructionHelper<INT64_PIECES, 2 * INT64_PIECES, 0>* ins,
+                          MDefinition* mir, MDefinition* lhs, MDefinition* rhs);
+    void lowerForMulInt64(LMulI64* ins, MMul* mir, MDefinition* lhs, MDefinition* rhs);
 
     // Returns a box allocation. reg2 is ignored on 64-bit platforms.
     LBoxAllocation useBoxFixed(MDefinition* mir, Register reg1, Register, bool useAtStart = false);
 
     // x86 has constraints on what registers can be formatted for 1-byte
     // stores and loads; on x64 all registers are okay.
     LAllocation useByteOpRegister(MDefinition* mir);
     LAllocation useByteOpRegisterOrNonDoubleConstant(MDefinition* mir);
--- a/js/src/jit/x64/MacroAssembler-x64-inl.h
+++ b/js/src/jit/x64/MacroAssembler-x64-inl.h
@@ -252,23 +252,50 @@ MacroAssembler::sub64(Register64 src, Re
 
 void
 MacroAssembler::sub64(Imm64 imm, Register64 dest)
 {
     subPtr(ImmWord(imm.value), dest.reg);
 }
 
 void
+MacroAssembler::mul64(Imm64 imm, const Register64& dest, const Register temp)
+{
+    MOZ_ASSERT(temp == InvalidReg);
+    mul64(imm, dest);
+}
+
+void
 MacroAssembler::mul64(Imm64 imm, const Register64& dest)
 {
     movq(ImmWord(uintptr_t(imm.value)), ScratchReg);
     imulq(ScratchReg, dest.reg);
 }
 
 void
+MacroAssembler::mul64(const Register64& src, const Register64& dest, const Register temp)
+{
+    MOZ_ASSERT(temp == InvalidReg);
+    mul64(Operand(src.reg), dest);
+}
+
+void
+MacroAssembler::mul64(const Operand& src, const Register64& dest)
+{
+    imulq(src, dest.reg);
+}
+
+void
+MacroAssembler::mul64(const Operand& src, const Register64& dest, const Register temp)
+{
+    MOZ_ASSERT(temp == InvalidReg);
+    mul64(src, dest);
+}
+
+void
 MacroAssembler::mulBy3(Register src, Register dest)
 {
     lea(Operand(src, src, TimesTwo), dest);
 }
 
 void
 MacroAssembler::mulDoublePtr(ImmPtr imm, Register temp, FloatRegister dest)
 {
@@ -283,16 +310,22 @@ MacroAssembler::inc64(AbsoluteAddress de
         addPtr(Imm32(1), dest);
     } else {
         ScratchRegisterScope scratch(*this);
         mov(ImmPtr(dest.addr), scratch);
         addPtr(Imm32(1), Address(scratch, 0));
     }
 }
 
+void
+MacroAssembler::neg64(Register64 reg)
+{
+    negq(reg.reg);
+}
+
 // ===============================================================
 // Shift functions
 
 void
 MacroAssembler::lshiftPtr(Imm32 imm, Register dest)
 {
     shlq(imm, dest);
 }
--- a/js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp
+++ b/js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp
@@ -1055,16 +1055,57 @@ CodeGeneratorX86Shared::visitMulI(LMulI*
 
             masm.test32(ToRegister(lhs), ToRegister(lhs));
             masm.j(Assembler::Zero, ool->entry());
             masm.bind(ool->rejoin());
         }
     }
 }
 
+void
+CodeGeneratorX86Shared::visitMulI64(LMulI64* lir)
+{
+    const LInt64Allocation lhs = lir->getInt64Operand(LMulI64::Lhs);
+    const LInt64Allocation rhs = lir->getInt64Operand(LMulI64::Rhs);
+
+    MOZ_ASSERT(ToRegister64(lhs) == ToOutRegister64(lir));
+
+    if (IsConstant(rhs)) {
+        int64_t constant = ToInt64(rhs);
+        switch (constant) {
+          case -1:
+            masm.neg64(ToRegister64(lhs));
+            return;
+          case 0:
+            masm.xor64(ToRegister64(lhs), ToRegister64(lhs));
+            return;
+          case 1:
+            // nop
+            return;
+          case 2:
+            masm.add64(ToRegister64(lhs), ToRegister64(lhs));
+            return;
+          default:
+            if (constant > 0) {
+                // Use shift if constant is power of 2.
+                int32_t shift = mozilla::FloorLog2(constant);
+                if (int64_t(1) << shift == constant) {
+                    masm.lshift64(Imm32(shift), ToRegister64(lhs));
+                    return;
+                }
+            }
+            Register temp = ToTempRegisterOrInvalid(lir->temp());
+            masm.mul64(Imm64(constant), ToRegister64(lhs), temp);
+        }
+    } else {
+        Register temp = ToTempRegisterOrInvalid(lir->temp());
+        masm.mul64(ToOperandOrRegister64(rhs), ToRegister64(lhs), temp);
+    }
+}
+
 class ReturnZero : public OutOfLineCodeBase<CodeGeneratorX86Shared>
 {
     Register reg_;
 
   public:
     explicit ReturnZero(Register reg)
       : reg_(reg)
     { }
--- a/js/src/jit/x86-shared/CodeGenerator-x86-shared.h
+++ b/js/src/jit/x86-shared/CodeGenerator-x86-shared.h
@@ -237,16 +237,17 @@ class CodeGeneratorX86Shared : public Co
     virtual void visitSqrtD(LSqrtD* ins);
     virtual void visitSqrtF(LSqrtF* ins);
     virtual void visitPowHalfD(LPowHalfD* ins);
     virtual void visitAddI(LAddI* ins);
     virtual void visitAddI64(LAddI64* ins);
     virtual void visitSubI(LSubI* ins);
     virtual void visitSubI64(LSubI64* ins);
     virtual void visitMulI(LMulI* ins);
+    virtual void visitMulI64(LMulI64* ins);
     virtual void visitDivI(LDivI* ins);
     virtual void visitDivPowTwoI(LDivPowTwoI* ins);
     virtual void visitDivOrModConstantI(LDivOrModConstantI* ins);
     virtual void visitModI(LModI* ins);
     virtual void visitModPowTwoI(LModPowTwoI* ins);
     virtual void visitBitNotI(LBitNotI* ins);
     virtual void visitBitOpI(LBitOpI* ins);
     virtual void visitBitOpI64(LBitOpI64* ins);
--- a/js/src/jit/x86-shared/Lowering-x86-shared.cpp
+++ b/js/src/jit/x86-shared/Lowering-x86-shared.cpp
@@ -116,26 +116,16 @@ void
 LIRGeneratorX86Shared::lowerForALU(LInstructionHelper<1, 2, 0>* ins, MDefinition* mir,
                                    MDefinition* lhs, MDefinition* rhs)
 {
     ins->setOperand(0, useRegisterAtStart(lhs));
     ins->setOperand(1, lhs != rhs ? useOrConstant(rhs) : useOrConstantAtStart(rhs));
     defineReuseInput(ins, mir, 0);
 }
 
-void
-LIRGeneratorX86Shared::lowerForALUInt64(LInstructionHelper<INT64_PIECES, 2 * INT64_PIECES, 0>* ins,
-                                        MDefinition* mir, MDefinition* lhs, MDefinition* rhs)
-{
-    ins->setInt64Operand(0, useInt64RegisterAtStart(lhs));
-    ins->setInt64Operand(INT64_PIECES,
-                         lhs != rhs ? useInt64OrConstant(rhs) : useInt64OrConstantAtStart(rhs));
-    defineInt64ReuseInput(ins, mir, 0);
-}
-
 template<size_t Temps>
 void
 LIRGeneratorX86Shared::lowerForFPU(LInstructionHelper<1, 2, Temps>* ins, MDefinition* mir, MDefinition* lhs, MDefinition* rhs)
 {
     // Without AVX, we'll need to use the x86 encodings where one of the
     // inputs must be the same location as the output.
     if (!Assembler::HasAVX()) {
         ins->setOperand(0, useRegisterAtStart(lhs));
--- a/js/src/jit/x86-shared/Lowering-x86-shared.h
+++ b/js/src/jit/x86-shared/Lowering-x86-shared.h
@@ -27,18 +27,16 @@ class LIRGeneratorX86Shared : public LIR
     void visitGuardObjectGroup(MGuardObjectGroup* ins);
     void visitPowHalf(MPowHalf* ins);
     void lowerForShift(LInstructionHelper<1, 2, 0>* ins, MDefinition* mir, MDefinition* lhs,
                        MDefinition* rhs);
     void lowerForALU(LInstructionHelper<1, 1, 0>* ins, MDefinition* mir, MDefinition* input);
     void lowerForALU(LInstructionHelper<1, 2, 0>* ins, MDefinition* mir, MDefinition* lhs,
                      MDefinition* rhs);
 
-    void lowerForALUInt64(LInstructionHelper<INT64_PIECES, 2 * INT64_PIECES, 0>* ins, MDefinition* mir,
-                          MDefinition* lhs, MDefinition* rhs);
     void lowerForShiftInt64(LInstructionHelper<INT64_PIECES, INT64_PIECES + 1, 0>* ins,
                             MDefinition* mir, MDefinition* lhs, MDefinition* rhs);
 
     template<size_t Temps>
     void lowerForFPU(LInstructionHelper<1, 2, Temps>* ins, MDefinition* mir, MDefinition* lhs,
                      MDefinition* rhs);
     void lowerForCompIx4(LSimdBinaryCompIx4* ins, MSimdBinaryComp* mir,
                          MDefinition* lhs, MDefinition* rhs);
--- a/js/src/jit/x86/Lowering-x86.cpp
+++ b/js/src/jit/x86/Lowering-x86.cpp
@@ -194,16 +194,49 @@ LIRGeneratorX86::lowerInt64PhiInput(MPhi
     MDefinition* operand = phi->getOperand(inputPosition);
     LPhi* low = block->getPhi(lirIndex + INT64LOW_INDEX);
     LPhi* high = block->getPhi(lirIndex + INT64HIGH_INDEX);
     low->setOperand(inputPosition, LUse(operand->virtualRegister() + INT64LOW_INDEX, LUse::ANY));
     high->setOperand(inputPosition, LUse(operand->virtualRegister() + INT64HIGH_INDEX, LUse::ANY));
 }
 
 void
+LIRGeneratorX86::lowerForALUInt64(LInstructionHelper<INT64_PIECES, 2 * INT64_PIECES, 0>* ins,
+                                  MDefinition* mir, MDefinition* lhs, MDefinition* rhs)
+{
+    ins->setInt64Operand(0, useInt64RegisterAtStart(lhs));
+    ins->setInt64Operand(INT64_PIECES,
+                         lhs != rhs ? useInt64OrConstant(rhs) : useInt64OrConstantAtStart(rhs));
+    defineInt64ReuseInput(ins, mir, 0);
+}
+
+void
+LIRGeneratorX86::lowerForMulInt64(LMulI64* ins, MMul* mir, MDefinition* lhs, MDefinition* rhs)
+{
+    bool constantNeedTemp = true;
+    if (rhs->isConstant()) {
+        int64_t constant = rhs->toConstant()->toInt64();
+        int32_t shift = mozilla::FloorLog2(constant);
+        if (constant <= 0 || int64_t(1) << shift != constant) {
+            constantNeedTemp = constant != -1 && constant != 0 &&
+                               constant != 1 && constant != 2;
+        }
+    }
+
+    // MulI64 on x86 needs output to be in edx, eax;
+    ins->setInt64Operand(0, useInt64Fixed(lhs, Register64(edx, eax), /*useAtStart = */ true));
+    ins->setInt64Operand(INT64_PIECES,
+            lhs != rhs ? useInt64OrConstant(rhs) : useInt64OrConstantAtStart(rhs));
+    if (constantNeedTemp)
+        ins->setTemp(0, temp());
+    defineInt64Fixed(ins, mir, LInt64Allocation(LAllocation(AnyRegister(edx)),
+                                                LAllocation(AnyRegister(eax))));
+}
+
+void
 LIRGeneratorX86::visitCompareExchangeTypedArrayElement(MCompareExchangeTypedArrayElement* ins)
 {
     lowerCompareExchangeTypedArrayElement(ins, /* useI386ByteRegisters = */ true);
 }
 
 void
 LIRGeneratorX86::visitAtomicExchangeTypedArrayElement(MAtomicExchangeTypedArrayElement* ins)
 {
--- a/js/src/jit/x86/Lowering-x86.h
+++ b/js/src/jit/x86/Lowering-x86.h
@@ -40,16 +40,20 @@ class LIRGeneratorX86 : public LIRGenera
     bool needTempForPostBarrier() { return true; }
 
     void lowerUntypedPhiInput(MPhi* phi, uint32_t inputPosition, LBlock* block, size_t lirIndex);
     void defineUntypedPhi(MPhi* phi, size_t lirIndex);
 
     void lowerInt64PhiInput(MPhi* phi, uint32_t inputPosition, LBlock* block, size_t lirIndex);
     void defineInt64Phi(MPhi* phi, size_t lirIndex);
 
+    void lowerForALUInt64(LInstructionHelper<INT64_PIECES, 2 * INT64_PIECES, 0>* ins,
+                          MDefinition* mir, MDefinition* lhs, MDefinition* rhs);
+    void lowerForMulInt64(LMulI64* ins, MMul* mir, MDefinition* lhs, MDefinition* rhs);
+
     void lowerDivI64(MDiv* div);
     void lowerModI64(MMod* mod);
 
   public:
     void visitBox(MBox* box);
     void visitUnbox(MUnbox* unbox);
     void visitReturn(MReturn* ret);
     void visitCompareExchangeTypedArrayElement(MCompareExchangeTypedArrayElement* ins);
--- a/js/src/jit/x86/MacroAssembler-x86-inl.h
+++ b/js/src/jit/x86/MacroAssembler-x86-inl.h
@@ -264,16 +264,62 @@ MacroAssembler::mul64(Imm64 imm, const R
         MOZ_CRASH("Unsupported imm");
     addl(edx, dest.high);
 
     // LOW(dest) = eax;
     movl(eax, dest.low);
 }
 
 void
+MacroAssembler::mul64(Imm64 imm, const Register64& dest, const Register temp)
+{
+    // LOW32  = LOW(LOW(dest) * LOW(src));                                  (1)
+    // HIGH32 = LOW(HIGH(dest) * LOW(src)) [multiply src into upper bits]   (2)
+    //        + LOW(LOW(dest) * HIGH(src)) [multiply dest into upper bits]  (3)
+    //        + HIGH(LOW(dest) * LOW(src)) [carry]                          (4)
+
+    MOZ_ASSERT(dest == Register64(edx, eax));
+    MOZ_ASSERT(temp != edx && temp != eax);
+
+    movl(dest.low, temp);
+
+    // Compute mul64
+    imull(imm.low(), dest.high); // (2)
+    imull(imm.hi(), temp); // (3)
+    addl(dest.high, temp);
+    movl(imm.low(), dest.high);
+    mull(dest.high/*, dest.low*/); // (4) + (1) output in edx:eax (dest_hi:dest_lo)
+    addl(temp, dest.high);
+}
+
+void
+MacroAssembler::mul64(const Register64& src, const Register64& dest, const Register temp)
+{
+    // LOW32  = LOW(LOW(dest) * LOW(src));                                  (1)
+    // HIGH32 = LOW(HIGH(dest) * LOW(src)) [multiply src into upper bits]   (2)
+    //        + LOW(LOW(dest) * HIGH(src)) [multiply dest into upper bits]  (3)
+    //        + HIGH(LOW(dest) * LOW(src)) [carry]                          (4)
+
+    MOZ_ASSERT(dest == Register64(edx, eax));
+    MOZ_ASSERT(src != Register64(edx, eax) && src != Register64(eax, edx));
+
+    // Make sure the rhs.high isn't the dest.high register anymore.
+    // This saves us from doing other register moves.
+    movl(dest.low, temp);
+
+    // Compute mul64
+    imull(src.low, dest.high); // (2)
+    imull(src.high, temp); // (3)
+    addl(dest.high, temp);
+    movl(src.low, dest.high);
+    mull(dest.high/*, dest.low*/); // (4) + (1) output in edx:eax (dest_hi:dest_lo)
+    addl(temp, dest.high);
+}
+
+void
 MacroAssembler::mulBy3(Register src, Register dest)
 {
     lea(Operand(src, src, TimesTwo), dest);
 }
 
 void
 MacroAssembler::mulDoublePtr(ImmPtr imm, Register temp, FloatRegister dest)
 {
@@ -286,16 +332,24 @@ MacroAssembler::inc64(AbsoluteAddress de
 {
     addl(Imm32(1), Operand(dest));
     Label noOverflow;
     j(NonZero, &noOverflow);
     addl(Imm32(1), Operand(dest.offset(4)));
     bind(&noOverflow);
 }
 
+void
+MacroAssembler::neg64(Register64 reg)
+{
+    negl(reg.low);
+    adcl(Imm32(0), reg.high);
+    negl(reg.high);
+}
+
 // ===============================================================
 // Shift functions
 
 void
 MacroAssembler::lshiftPtr(Imm32 imm, Register dest)
 {
     shll(imm, dest);
 }