Bug 1279248 - Part 26: Implement the 64bit variant of WasmLoad and WasmStore on x86, r=bbouvier
authorHannes Verschore <hv1989@gmail.com>
Fri, 29 Jul 2016 16:53:48 +0200
changeset 332381 4dabba8cf9261e11c487fb9aac71bc866f45250a
parent 332380 bb7803606205b1d23590fa05d78c384b833f614d
child 332382 c1e1449a0ad8448df7ae7bc2b2fcdfc40ac238e0
push id9858
push userjlund@mozilla.com
push dateMon, 01 Aug 2016 14:37:10 +0000
treeherdermozilla-aurora@203106ef6cb6 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersbbouvier
bugs1279248
milestone50.0a1
Bug 1279248 - Part 26: Implement the 64bit variant of WasmLoad and WasmStore on x86, r=bbouvier
js/src/jit/shared/LIR-shared.h
js/src/jit/shared/LOpcodes-shared.h
js/src/jit/x64/CodeGenerator-x64.cpp
js/src/jit/x64/CodeGenerator-x64.h
js/src/jit/x64/Lowering-x64.cpp
js/src/jit/x64/Lowering-x64.h
js/src/jit/x86-shared/Lowering-x86-shared.cpp
js/src/jit/x86-shared/Lowering-x86-shared.h
js/src/jit/x86/CodeGenerator-x86.cpp
js/src/jit/x86/CodeGenerator-x86.h
js/src/jit/x86/Lowering-x86.cpp
js/src/jit/x86/Lowering-x86.h
--- a/js/src/jit/shared/LIR-shared.h
+++ b/js/src/jit/shared/LIR-shared.h
@@ -7801,32 +7801,63 @@ class LWasmLoadI64 : public details::LWa
 
     LIR_HEADER(WasmLoadI64);
 };
 
 class LWasmStore : public LInstructionHelper<0, 2, 1>
 {
   public:
     LIR_HEADER(WasmStore);
+
+    static const size_t PtrIndex = 0;
+    static const size_t ValueIndex = 1;
+
     LWasmStore(const LAllocation& ptr, const LAllocation& value) {
-        setOperand(0, ptr);
-        setOperand(1, value);
+        setOperand(PtrIndex, ptr);
+        setOperand(ValueIndex, value);
         setTemp(0, LDefinition::BogusTemp());
     }
     MWasmStore* mir() const {
         return mir_->toWasmStore();
     }
     const LAllocation* ptr() {
-        return getOperand(0);
+        return getOperand(PtrIndex);
     }
     const LDefinition* ptrCopy() {
         return getTemp(0);
     }
     const LAllocation* value() {
-        return getOperand(1);
+        return getOperand(ValueIndex);
+    }
+};
+
+class LWasmStoreI64 : public LInstructionHelper<0, INT64_PIECES + 1, 1>
+{
+  public:
+    LIR_HEADER(WasmStoreI64);
+
+    static const size_t PtrIndex = 0;
+    static const size_t ValueIndex = 1;
+
+    LWasmStoreI64(const LAllocation& ptr, const LInt64Allocation& value) {
+        setOperand(PtrIndex, ptr);
+        setInt64Operand(ValueIndex, value);
+        setTemp(0, LDefinition::BogusTemp());
+    }
+    MWasmStore* mir() const {
+        return mir_->toWasmStore();
+    }
+    const LAllocation* ptr() {
+        return getOperand(PtrIndex);
+    }
+    const LDefinition* ptrCopy() {
+        return getTemp(0);
+    }
+    const LInt64Allocation value() {
+        return getInt64Operand(ValueIndex);
     }
 };
 
 class LAsmJSLoadHeap : public LInstructionHelper<1, 1, 0>
 {
   public:
     LIR_HEADER(AsmJSLoadHeap);
     explicit LAsmJSLoadHeap(const LAllocation& ptr) {
--- a/js/src/jit/shared/LOpcodes-shared.h
+++ b/js/src/jit/shared/LOpcodes-shared.h
@@ -383,16 +383,17 @@
     _(IsObject)                     \
     _(IsObjectAndBranch)            \
     _(HasClass)                     \
     _(AsmSelect)                    \
     _(AsmSelectI64)                 \
     _(WasmLoad)                     \
     _(WasmLoadI64)                  \
     _(WasmStore)                    \
+    _(WasmStoreI64)                 \
     _(WasmBoundsCheck)              \
     _(WasmLoadGlobalVar)            \
     _(WasmLoadGlobalVarI64)         \
     _(WasmStoreGlobalVar)           \
     _(WasmStoreGlobalVarI64)        \
     _(AsmJSLoadHeap)                \
     _(AsmJSStoreHeap)               \
     _(AsmJSLoadFuncPtr)             \
--- a/js/src/jit/x64/CodeGenerator-x64.cpp
+++ b/js/src/jit/x64/CodeGenerator-x64.cpp
@@ -499,115 +499,130 @@ CodeGeneratorX64::load(Scalar::Type type
         MOZ_CRASH("int64 loads must use load64");
       case Scalar::Uint8Clamped:
       case Scalar::MaxTypedArrayViewType:
         MOZ_CRASH("unexpected array type");
     }
 }
 
 void
-CodeGeneratorX64::loadI64(Scalar::Type type, const Operand& srcAddr, AnyRegister out)
+CodeGeneratorX64::loadI64(Scalar::Type type, const Operand& srcAddr, Register64 out)
 {
     switch (type) {
-      case Scalar::Int8:      masm.movsbq(srcAddr, out.gpr()); break;
-      case Scalar::Uint8:     masm.movzbq(srcAddr, out.gpr()); break;
-      case Scalar::Int16:     masm.movswq(srcAddr, out.gpr()); break;
-      case Scalar::Uint16:    masm.movzwq(srcAddr, out.gpr()); break;
-      case Scalar::Int32:     masm.movslq(srcAddr, out.gpr()); break;
+      case Scalar::Int8:      masm.movsbq(srcAddr, out.reg); break;
+      case Scalar::Uint8:     masm.movzbq(srcAddr, out.reg); break;
+      case Scalar::Int16:     masm.movswq(srcAddr, out.reg); break;
+      case Scalar::Uint16:    masm.movzwq(srcAddr, out.reg); break;
+      case Scalar::Int32:     masm.movslq(srcAddr, out.reg); break;
       // Int32 to int64 moves zero-extend by default.
-      case Scalar::Uint32:    masm.movl(srcAddr, out.gpr());   break;
-      case Scalar::Int64:     masm.movq(srcAddr, out.gpr());   break;
+      case Scalar::Uint32:    masm.movl(srcAddr, out.reg);   break;
+      case Scalar::Int64:     masm.movq(srcAddr, out.reg);   break;
       case Scalar::Float32:
       case Scalar::Float64:
       case Scalar::Float32x4:
       case Scalar::Int8x16:
       case Scalar::Int16x8:
       case Scalar::Int32x4:
         MOZ_CRASH("non-int64 loads should use load()");
       case Scalar::Uint8Clamped:
       case Scalar::MaxTypedArrayViewType:
         MOZ_CRASH("unexpected array type");
     }
 }
 
+template <typename T>
 void
-CodeGeneratorX64::visitWasmLoadBase(const MWasmLoad* mir, const LAllocation* ptr,
-                                    const LDefinition* output, bool isInt64)
+CodeGeneratorX64::emitWasmLoad(T* ins)
 {
+    const MWasmLoad* mir = ins->mir();
+    bool isInt64 = mir->type() == MIRType::Int64;
+
     Scalar::Type accessType = mir->accessType();
     MOZ_ASSERT(!Scalar::isSimdType(accessType), "SIMD NYI");
     MOZ_ASSERT(!mir->barrierBefore() && !mir->barrierAfter(), "atomics NYI");
 
     if (mir->offset() > INT32_MAX) {
         masm.jump(wasm::JumpTarget::OutOfBounds);
         return;
     }
 
+    const LAllocation* ptr = ins->ptr();
     Operand srcAddr = ptr->isBogus()
                       ? Operand(HeapReg, mir->offset())
                       : Operand(HeapReg, ToRegister(ptr), TimesOne, mir->offset());
 
-    AnyRegister out = ToAnyRegister(output);
-
     uint32_t before = masm.size();
-    if (!isInt64)
-        load(accessType, srcAddr, out);
+    if (isInt64)
+        loadI64(accessType, srcAddr, ToOutRegister64(ins));
     else
-        loadI64(accessType, srcAddr, out);
+        load(accessType, srcAddr, ToAnyRegister(ins->output()));
     uint32_t after = masm.size();
 
     verifyLoadDisassembly(before, after, isInt64, accessType, /* numElems */ 0, srcAddr,
-                          *output->output());
+                          *ins->output()->output());
 
     masm.append(WasmMemoryAccess(before));
 }
 
 void
 CodeGeneratorX64::visitWasmLoad(LWasmLoad* ins)
 {
-    visitWasmLoadBase(ins->mir(), ins->ptr(), ins->output(), /* isInt64 */ false);
+    emitWasmLoad(ins);
 }
 
 void
 CodeGeneratorX64::visitWasmLoadI64(LWasmLoadI64* ins)
 {
-    visitWasmLoadBase(ins->mir(), ins->ptr(), ins->output(), /* isInt64 */ true);
+    emitWasmLoad(ins);
 }
 
+template <typename T>
 void
-CodeGeneratorX64::visitWasmStore(LWasmStore* ins)
+CodeGeneratorX64::emitWasmStore(T* ins)
 {
     const MWasmStore* mir = ins->mir();
 
     Scalar::Type accessType = mir->accessType();
     MOZ_ASSERT(!Scalar::isSimdType(accessType), "SIMD NYI");
     MOZ_ASSERT(!mir->barrierBefore() && !mir->barrierAfter(), "atomics NYI");
 
     if (mir->offset() > INT32_MAX) {
         masm.jump(wasm::JumpTarget::OutOfBounds);
         return;
     }
 
-    const LAllocation* value = ins->value();
+    const LAllocation* value = ins->getOperand(ins->ValueIndex);
     const LAllocation* ptr = ins->ptr();
     Operand dstAddr = ptr->isBogus()
                       ? Operand(HeapReg, mir->offset())
                       : Operand(HeapReg, ToRegister(ptr), TimesOne, mir->offset());
 
     uint32_t before = masm.size();
     store(accessType, value, dstAddr);
     uint32_t after = masm.size();
 
     verifyStoreDisassembly(before, after, mir->value()->type() == MIRType::Int64,
                            accessType, /* numElems */ 0, dstAddr, *value);
 
     masm.append(WasmMemoryAccess(before));
 }
 
 void
+CodeGeneratorX64::visitWasmStore(LWasmStore* ins)
+{
+    emitWasmStore(ins);
+}
+
+void
+CodeGeneratorX64::visitWasmStoreI64(LWasmStoreI64* ins)
+{
+    emitWasmStore(ins);
+}
+
+void
 CodeGeneratorX64::emitSimdLoad(LAsmJSLoadHeap* ins)
 {
     const MAsmJSLoadHeap* mir = ins->mir();
     Scalar::Type type = mir->accessType();
     FloatRegister out = ToFloatRegister(ins->output());
     const LAllocation* ptr = ins->ptr();
     Operand srcAddr = ptr->isBogus()
                       ? Operand(HeapReg, mir->offset())
--- a/js/src/jit/x64/CodeGenerator-x64.h
+++ b/js/src/jit/x64/CodeGenerator-x64.h
@@ -23,27 +23,30 @@ class CodeGeneratorX64 : public CodeGene
     ValueOperand ToOutValue(LInstruction* ins);
     ValueOperand ToTempValue(LInstruction* ins, size_t pos);
 
     void storeUnboxedValue(const LAllocation* value, MIRType valueType,
                            Operand dest, MIRType slotType);
     void memoryBarrier(MemoryBarrierBits barrier);
 
     void load(Scalar::Type type, const Operand& srcAddr, AnyRegister out);
-    void loadI64(Scalar::Type type, const Operand& srcAddr, AnyRegister out);
-    void visitWasmLoadBase(const MWasmLoad* mir, const LAllocation* ptr, const LDefinition* output,
-                           bool isInt64);
+    void loadI64(Scalar::Type type, const Operand& srcAddr, Register64 out);
 
     void store(Scalar::Type type, const LAllocation* value, const Operand& dstAddr);
 
     void loadSimd(Scalar::Type type, unsigned numElems, const Operand& srcAddr, FloatRegister out);
     void storeSimd(Scalar::Type type, unsigned numElems, FloatRegister in, const Operand& dstAddr);
 
     void emitSimdLoad(LAsmJSLoadHeap* ins);
     void emitSimdStore(LAsmJSStoreHeap* ins);
+
+    template <typename T>
+    void emitWasmLoad(T* ins);
+    template <typename T>
+    void emitWasmStore(T* ins);
   public:
     CodeGeneratorX64(MIRGenerator* gen, LIRGraph* graph, MacroAssembler* masm);
 
   public:
     void visitValue(LValue* value);
     void visitBox(LBox* box);
     void visitUnbox(LUnbox* unbox);
     void visitCompareB(LCompareB* lir);
@@ -63,16 +66,17 @@ class CodeGeneratorX64 : public CodeGene
     void visitExtendInt32ToInt64(LExtendInt32ToInt64* lir);
     void visitWasmTruncateToInt64(LWasmTruncateToInt64* lir);
     void visitInt64ToFloatingPoint(LInt64ToFloatingPoint* lir);
     void visitLoadTypedArrayElementStatic(LLoadTypedArrayElementStatic* ins);
     void visitStoreTypedArrayElementStatic(LStoreTypedArrayElementStatic* ins);
     void visitWasmLoad(LWasmLoad* ins);
     void visitWasmLoadI64(LWasmLoadI64* ins);
     void visitWasmStore(LWasmStore* ins);
+    void visitWasmStoreI64(LWasmStoreI64* ins);
     void visitWasmLoadGlobalVar(LWasmLoadGlobalVar* ins);
     void visitWasmStoreGlobalVar(LWasmStoreGlobalVar* ins);
     void visitWasmLoadGlobalVarI64(LWasmLoadGlobalVarI64* ins);
     void visitWasmStoreGlobalVarI64(LWasmStoreGlobalVarI64* ins);
     void visitAsmSelectI64(LAsmSelectI64* ins);
     void visitAsmJSCall(LAsmJSCall* ins);
     void visitAsmJSCallI64(LAsmJSCallI64* ins);
     void visitAsmJSLoadHeap(LAsmJSLoadHeap* ins);
--- a/js/src/jit/x64/Lowering-x64.cpp
+++ b/js/src/jit/x64/Lowering-x64.cpp
@@ -453,16 +453,31 @@ LIRGeneratorX64::lowerUModI64(MMod* mod)
 {
     LUDivOrModI64* lir = new(alloc()) LUDivOrModI64(useRegister(mod->lhs()),
                                                     useRegister(mod->rhs()),
                                                     tempFixed(rax));
     defineInt64Fixed(lir, mod, LInt64Allocation(LAllocation(AnyRegister(rdx))));
 }
 
 void
+LIRGeneratorX64::visitWasmLoad(MWasmLoad* ins)
+{
+    if (ins->type() != MIRType::Int64) {
+        lowerWasmLoad(ins);
+        return;
+    }
+
+    MDefinition* base = ins->base();
+    MOZ_ASSERT(base->type() == MIRType::Int32);
+
+    auto* lir = new(alloc()) LWasmLoadI64(useRegisterOrZeroAtStart(base));
+    defineInt64(lir, ins);
+}
+
+void
 LIRGeneratorX64::visitWasmTruncateToInt64(MWasmTruncateToInt64* ins)
 {
     MDefinition* opd = ins->input();
     MOZ_ASSERT(opd->type() == MIRType::Double || opd->type() == MIRType::Float32);
 
     LDefinition maybeTemp = ins->isUnsigned() ? tempDouble() : LDefinition::BogusTemp();
     defineInt64(new(alloc()) LWasmTruncateToInt64(useRegister(opd), maybeTemp), ins);
 }
--- a/js/src/jit/x64/Lowering-x64.h
+++ b/js/src/jit/x64/Lowering-x64.h
@@ -43,16 +43,17 @@ class LIRGeneratorX64 : public LIRGenera
     bool needTempForPostBarrier() { return false; }
 
     void lowerDivI64(MDiv* div);
     void lowerModI64(MMod* mod);
     void lowerUDivI64(MDiv* div);
     void lowerUModI64(MMod* mod);
 
   public:
+    void visitWasmLoad(MWasmLoad* ins);
     void visitBox(MBox* box);
     void visitUnbox(MUnbox* unbox);
     void visitReturn(MReturn* ret);
     void visitCompareExchangeTypedArrayElement(MCompareExchangeTypedArrayElement* ins);
     void visitAtomicExchangeTypedArrayElement(MAtomicExchangeTypedArrayElement* ins);
     void visitAtomicTypedArrayElementBinop(MAtomicTypedArrayElementBinop* ins);
     void visitAsmJSUnsignedToDouble(MAsmJSUnsignedToDouble* ins);
     void visitAsmJSUnsignedToFloat32(MAsmJSUnsignedToFloat32* ins);
--- a/js/src/jit/x86-shared/Lowering-x86-shared.cpp
+++ b/js/src/jit/x86-shared/Lowering-x86-shared.cpp
@@ -331,27 +331,23 @@ LIRGeneratorX86Shared::visitWasmBoundsCh
         return;
 
     MDefinition* index = ins->input();
     auto* lir = new(alloc()) LWasmBoundsCheck(useRegisterAtStart(index));
     add(lir, ins);
 }
 
 void
-LIRGeneratorX86Shared::visitWasmLoad(MWasmLoad* ins)
+LIRGeneratorX86Shared::lowerWasmLoad(MWasmLoad* ins)
 {
+    MOZ_ASSERT(ins->type() != MIRType::Int64);
+
     MDefinition* base = ins->base();
     MOZ_ASSERT(base->type() == MIRType::Int32);
 
-    if (ins->type() == MIRType::Int64) {
-        auto* lir = new(alloc()) LWasmLoadI64(useRegisterOrZeroAtStart(base));
-        defineInt64(lir, ins);
-        return;
-    }
-
     auto* lir = new(alloc()) LWasmLoad(useRegisterOrZeroAtStart(base));
     define(lir, ins);
 }
 
 void
 LIRGeneratorX86Shared::lowerUDiv(MDiv* div)
 {
     if (div->rhs()->isConstant()) {
--- a/js/src/jit/x86-shared/Lowering-x86-shared.h
+++ b/js/src/jit/x86-shared/Lowering-x86-shared.h
@@ -42,17 +42,17 @@ class LIRGeneratorX86Shared : public LIR
     void lowerForCompIx4(LSimdBinaryCompIx4* ins, MSimdBinaryComp* mir,
                          MDefinition* lhs, MDefinition* rhs);
     void lowerForCompFx4(LSimdBinaryCompFx4* ins, MSimdBinaryComp* mir,
                          MDefinition* lhs, MDefinition* rhs);
     void lowerForBitAndAndBranch(LBitAndAndBranch* baab, MInstruction* mir,
                                  MDefinition* lhs, MDefinition* rhs);
     void visitAsmJSNeg(MAsmJSNeg* ins);
     void visitWasmBoundsCheck(MWasmBoundsCheck* ins);
-    void visitWasmLoad(MWasmLoad* ins);
+    void lowerWasmLoad(MWasmLoad* ins);
     void visitAsmSelect(MAsmSelect* ins);
     void lowerMulI(MMul* mul, MDefinition* lhs, MDefinition* rhs);
     void lowerDivI(MDiv* div);
     void lowerModI(MMod* mod);
     void lowerUDiv(MDiv* div);
     void lowerUMod(MMod* mod);
     void lowerUrshD(MUrsh* mir);
     void lowerTruncateDToInt32(MTruncateToInt32* ins);
--- a/js/src/jit/x86/CodeGenerator-x86.cpp
+++ b/js/src/jit/x86/CodeGenerator-x86.cpp
@@ -271,16 +271,96 @@ CodeGeneratorX86::load(Scalar::Type acce
       case Scalar::Float64:      masm.vmovsdWithPatch(srcAddr, ToFloatRegister(out)); break;
       case Scalar::Float32x4:
       case Scalar::Int8x16:
       case Scalar::Int16x8:
       case Scalar::Int32x4:      MOZ_CRASH("SIMD load should be handled in their own function");
       case Scalar::Int64:        MOZ_CRASH("should be handled in a separate function (2 regs)");
       case Scalar::MaxTypedArrayViewType: MOZ_CRASH("unexpected type");
     }
+    masm.append(wasm::MemoryAccess(masm.size()));
+}
+
+void
+CodeGeneratorX86::loadI64(Scalar::Type type, const Operand& srcAddr, Register64 out)
+{
+    switch (type) {
+      case Scalar::Int8:
+        MOZ_ASSERT(out == Register64(edx, eax));
+        masm.movsblWithPatch(srcAddr, out.low);
+        masm.append(wasm::MemoryAccess(masm.size()));
+        masm.cdq();
+        break;
+      case Scalar::Uint8:
+        masm.movzblWithPatch(srcAddr, out.low);
+        masm.append(wasm::MemoryAccess(masm.size()));
+        masm.xorl(out.high, out.high);
+        break;
+      case Scalar::Int16:
+        MOZ_ASSERT(out == Register64(edx, eax));
+        masm.movswlWithPatch(srcAddr, out.low);
+        masm.append(wasm::MemoryAccess(masm.size()));
+        masm.cdq();
+        break;
+      case Scalar::Uint16:
+        masm.movzwlWithPatch(srcAddr, out.low);
+        masm.append(wasm::MemoryAccess(masm.size()));
+        masm.xorl(out.high, out.high);
+        break;
+      case Scalar::Int32:
+        MOZ_ASSERT(out == Register64(edx, eax));
+        masm.movlWithPatch(srcAddr, out.low);
+        masm.append(wasm::MemoryAccess(masm.size()));
+        masm.cdq();
+        break;
+      case Scalar::Uint32:
+        masm.movlWithPatch(srcAddr, out.low);
+        masm.append(wasm::MemoryAccess(masm.size()));
+        masm.xorl(out.high, out.high);
+        break;
+      case Scalar::Int64:
+        if (srcAddr.kind() == Operand::MEM_ADDRESS32) {
+            Operand low(PatchedAbsoluteAddress(uint32_t(srcAddr.address()) + INT64LOW_OFFSET));
+            Operand high(PatchedAbsoluteAddress(uint32_t(srcAddr.address()) + INT64HIGH_OFFSET));
+
+            masm.movlWithPatch(low, out.low);
+            masm.append(wasm::MemoryAccess(masm.size()));
+            masm.movlWithPatch(high, out.high);
+            masm.append(wasm::MemoryAccess(masm.size()));
+        } else {
+            MOZ_ASSERT(srcAddr.kind() == Operand::MEM_REG_DISP);
+            Address addr = srcAddr.toAddress();
+            Operand low(addr.base, addr.offset + INT64LOW_OFFSET);
+            Operand high(addr.base, addr.offset + INT64HIGH_OFFSET);
+
+            if (addr.base != out.low) {
+                masm.movlWithPatch(low, out.low);
+                masm.append(wasm::MemoryAccess(masm.size()));
+                masm.movlWithPatch(high, out.high);
+                masm.append(wasm::MemoryAccess(masm.size()));
+            } else {
+                MOZ_ASSERT(addr.base != out.high);
+                masm.movlWithPatch(high, out.high);
+                masm.append(wasm::MemoryAccess(masm.size()));
+                masm.movlWithPatch(low, out.low);
+                masm.append(wasm::MemoryAccess(masm.size()));
+            }
+        }
+        break;
+      case Scalar::Float32:
+      case Scalar::Float64:
+      case Scalar::Float32x4:
+      case Scalar::Int8x16:
+      case Scalar::Int16x8:
+      case Scalar::Int32x4:
+        MOZ_CRASH("non-int64 loads should use load()");
+      case Scalar::Uint8Clamped:
+      case Scalar::MaxTypedArrayViewType:
+        MOZ_CRASH("unexpected array type");
+    }
 }
 
 void
 CodeGeneratorX86::visitLoadTypedArrayElementStatic(LLoadTypedArrayElementStatic* ins)
 {
     const MLoadTypedArrayElementStatic* mir = ins->mir();
     Scalar::Type accessType = mir->accessType();
     MOZ_ASSERT_IF(accessType == Scalar::Float32, mir->type() == MIRType::Float32);
@@ -403,20 +483,22 @@ CodeGeneratorX86::loadSimd(Scalar::Type 
       case Scalar::Uint32:
       case Scalar::Int64:
       case Scalar::Float32:
       case Scalar::Float64:
       case Scalar::Uint8Clamped:
       case Scalar::MaxTypedArrayViewType:
         MOZ_CRASH("should only handle SIMD types");
     }
+    masm.append(wasm::MemoryAccess(masm.size()));
 }
 
+template <typename T>
 void
-CodeGeneratorX86::visitWasmLoad(LWasmLoad* ins)
+CodeGeneratorX86::emitWasmLoad(T* ins)
 {
     const MWasmLoad* mir = ins->mir();
 
     Scalar::Type accessType = mir->accessType();
     MOZ_ASSERT(!Scalar::isSimdType(accessType), "SIMD NYI");
     MOZ_ASSERT(!mir->barrierBefore() && !mir->barrierAfter(), "atomics NYI");
 
     if (mir->offset() > INT32_MAX) {
@@ -425,45 +507,72 @@ CodeGeneratorX86::visitWasmLoad(LWasmLoa
         return;
     }
 
     const LAllocation* ptr = ins->ptr();
     Operand srcAddr = ptr->isBogus()
                       ? Operand(PatchedAbsoluteAddress(mir->offset()))
                       : Operand(ToRegister(ptr), mir->offset());
 
-    load(accessType, srcAddr, ins->output());
-
-    masm.append(wasm::MemoryAccess(masm.size()));
+    if (mir->type() == MIRType::Int64)
+        loadI64(accessType, srcAddr, ToOutRegister64(ins));
+    else
+        load(accessType, srcAddr, ins->output());
 }
 
 void
-CodeGeneratorX86::visitWasmStore(LWasmStore* ins)
+CodeGeneratorX86::visitWasmLoad(LWasmLoad* ins)
+{
+    emitWasmLoad(ins);
+}
+
+void
+CodeGeneratorX86::visitWasmLoadI64(LWasmLoadI64* ins)
+{
+    emitWasmLoad(ins);
+}
+
+template <typename T>
+void
+CodeGeneratorX86::emitWasmStore(T* ins)
 {
     const MWasmStore* mir = ins->mir();
 
     Scalar::Type accessType = mir->accessType();
     MOZ_ASSERT(!Scalar::isSimdType(accessType), "SIMD NYI");
     MOZ_ASSERT(!mir->barrierBefore() && !mir->barrierAfter(), "atomics NYI");
 
     if (mir->offset() > INT32_MAX) {
         // This is unreachable because of the bounds check.
         masm.breakpoint();
         return;
     }
 
-    const LAllocation* value = ins->value();
     const LAllocation* ptr = ins->ptr();
     Operand dstAddr = ptr->isBogus()
                       ? Operand(PatchedAbsoluteAddress(mir->offset()))
                       : Operand(ToRegister(ptr), mir->offset());
 
-    store(accessType, value, dstAddr);
+    if (accessType == Scalar::Int64)
+        storeI64(accessType, ins->getInt64Operand(LWasmStoreI64::ValueIndex), dstAddr);
+    else
+        store(accessType, ins->getOperand(LWasmStore::ValueIndex), dstAddr);
+}
+
 
-    masm.append(wasm::MemoryAccess(masm.size()));
+void
+CodeGeneratorX86::visitWasmStore(LWasmStore* ins)
+{
+    emitWasmStore(ins);
+}
+
+void
+CodeGeneratorX86::visitWasmStoreI64(LWasmStoreI64* ins)
+{
+    emitWasmStore(ins);
 }
 
 void
 CodeGeneratorX86::emitSimdLoad(LAsmJSLoadHeap* ins)
 {
     const MAsmJSLoadHeap* mir = ins->mir();
     Scalar::Type type = mir->accessType();
     FloatRegister out = ToFloatRegister(ins->output());
@@ -480,29 +589,26 @@ CodeGeneratorX86::emitSimdLoad(LAsmJSLoa
 
         Operand srcAddrZ =
             ptr->isBogus()
             ? Operand(PatchedAbsoluteAddress(2 * sizeof(float) + mir->offset()))
             : Operand(ToRegister(ptr), 2 * sizeof(float) + mir->offset());
 
         // Load XY
         loadSimd(type, 2, srcAddr, out);
-        masm.append(wasm::MemoryAccess(masm.size()));
 
         // Load Z (W is zeroed)
         // This is still in bounds, as we've checked with a manual bounds check
         // or we had enough space for sure when removing the bounds check.
         loadSimd(type, 1, srcAddrZ, ScratchSimd128Reg);
-        masm.append(wasm::MemoryAccess(masm.size()));
 
         // Move ZW atop XY
         masm.vmovlhps(ScratchSimd128Reg, out, out);
     } else {
         loadSimd(type, numElems, srcAddr, out);
-        masm.append(wasm::MemoryAccess(masm.size()));
     }
 
     if (hasBoundsCheck)
         cleanupAfterAsmJSBoundsCheckBranch(mir, ToRegister(ptr));
 }
 
 void
 CodeGeneratorX86::visitAsmJSLoadHeap(LAsmJSLoadHeap* ins)
@@ -520,27 +626,24 @@ CodeGeneratorX86::visitAsmJSLoadHeap(LAs
                       : Operand(ToRegister(ptr), mir->offset());
 
     memoryBarrier(mir->barrierBefore());
 
     OutOfLineLoadTypedArrayOutOfBounds* ool;
     DebugOnly<bool> hasBoundsCheck = maybeEmitAsmJSLoadBoundsCheck(mir, ins, &ool);
 
     load(accessType, srcAddr, out);
-    uint32_t after = masm.size();
 
     if (ool) {
         MOZ_ASSERT(hasBoundsCheck);
         cleanupAfterAsmJSBoundsCheckBranch(mir, ToRegister(ptr));
         masm.bind(ool->rejoin());
     }
 
     memoryBarrier(mir->barrierAfter());
-
-    masm.append(wasm::MemoryAccess(after));
 }
 
 void
 CodeGeneratorX86::store(Scalar::Type accessType, const LAllocation* value, const Operand& dstAddr)
 {
     switch (accessType) {
       case Scalar::Int8:
       case Scalar::Uint8Clamped:
@@ -562,27 +665,63 @@ CodeGeneratorX86::store(Scalar::Type acc
         masm.vmovssWithPatch(ToFloatRegister(value), dstAddr);
         break;
 
       case Scalar::Float64:
         masm.vmovsdWithPatch(ToFloatRegister(value), dstAddr);
         break;
 
       case Scalar::Int64:
-        MOZ_CRASH("should be handled in a separate function (2 registers)");
+        MOZ_CRASH("Should be handled in storeI64.");
 
       case Scalar::Float32x4:
       case Scalar::Int8x16:
       case Scalar::Int16x8:
       case Scalar::Int32x4:
         MOZ_CRASH("SIMD stores should be handled in emitSimdStore");
 
       case Scalar::MaxTypedArrayViewType:
         MOZ_CRASH("unexpected type");
     }
+    masm.append(wasm::MemoryAccess(masm.size()));
+}
+
+void
+CodeGeneratorX86::storeI64(Scalar::Type accessType, const LInt64Allocation value,
+                           const Operand& dstAddr)
+{
+    Register64 input = ToRegister64(value);
+    MOZ_ASSERT(accessType == Scalar::Int64);
+    if (dstAddr.kind() == Operand::MEM_ADDRESS32) {
+        Operand low(PatchedAbsoluteAddress(uint32_t(dstAddr.address()) + INT64LOW_OFFSET));
+        Operand high(PatchedAbsoluteAddress(uint32_t(dstAddr.address()) + INT64HIGH_OFFSET));
+
+        masm.movlWithPatch(input.low, low);
+        masm.append(wasm::MemoryAccess(masm.size()));
+        masm.movlWithPatch(input.high, high);
+        masm.append(wasm::MemoryAccess(masm.size()));
+    } else {
+        MOZ_ASSERT(dstAddr.kind() == Operand::MEM_REG_DISP);
+        Address addr = dstAddr.toAddress();
+        Operand low(addr.base, addr.offset + INT64LOW_OFFSET);
+        Operand high(addr.base, addr.offset + INT64HIGH_OFFSET);
+
+        if (addr.base != input.low) {
+            masm.movlWithPatch(input.low, low);
+            masm.append(wasm::MemoryAccess(masm.size()));
+            masm.movlWithPatch(input.high, high);
+            masm.append(wasm::MemoryAccess(masm.size()));
+        } else {
+            MOZ_ASSERT(addr.base != input.high);
+            masm.movlWithPatch(input.high, high);
+            masm.append(wasm::MemoryAccess(masm.size()));
+            masm.movlWithPatch(input.low, low);
+            masm.append(wasm::MemoryAccess(masm.size()));
+        }
+    }
 }
 
 void
 CodeGeneratorX86::visitStoreTypedArrayElementStatic(LStoreTypedArrayElementStatic* ins)
 {
     MStoreTypedArrayElementStatic* mir = ins->mir();
     Scalar::Type accessType = mir->accessType();
     Register ptr = ToRegister(ins->ptr());
@@ -650,16 +789,17 @@ CodeGeneratorX86::storeSimd(Scalar::Type
       case Scalar::Uint32:
       case Scalar::Int64:
       case Scalar::Float32:
       case Scalar::Float64:
       case Scalar::Uint8Clamped:
       case Scalar::MaxTypedArrayViewType:
         MOZ_CRASH("should only handle SIMD types");
     }
+    masm.append(wasm::MemoryAccess(masm.size()));
 }
 
 void
 CodeGeneratorX86::emitSimdStore(LAsmJSStoreHeap* ins)
 {
     const MAsmJSStoreHeap* mir = ins->mir();
     Scalar::Type type = mir->accessType();
     FloatRegister in = ToFloatRegister(ins->value());
@@ -676,28 +816,25 @@ CodeGeneratorX86::emitSimdStore(LAsmJSSt
 
         Operand dstAddrZ =
             ptr->isBogus()
             ? Operand(PatchedAbsoluteAddress(2 * sizeof(float) + mir->offset()))
             : Operand(ToRegister(ptr), 2 * sizeof(float) + mir->offset());
 
         // Store XY
         storeSimd(type, 2, in, dstAddr);
-        masm.append(wasm::MemoryAccess(masm.size()));
 
         masm.vmovhlps(in, ScratchSimd128Reg, ScratchSimd128Reg);
 
         // Store Z (W is zeroed)
         // This is still in bounds, as we've checked with a manual bounds check
         // or we had enough space for sure when removing the bounds check.
         storeSimd(type, 1, ScratchSimd128Reg, dstAddrZ);
-        masm.append(wasm::MemoryAccess(masm.size()));
     } else {
         storeSimd(type, numElems, in, dstAddr);
-        masm.append(wasm::MemoryAccess(masm.size()));
     }
 
     if (hasBoundsCheck)
         cleanupAfterAsmJSBoundsCheckBranch(mir, ToRegister(ptr));
 }
 
 void
 CodeGeneratorX86::visitAsmJSStoreHeap(LAsmJSStoreHeap* ins)
@@ -717,27 +854,24 @@ CodeGeneratorX86::visitAsmJSStoreHeap(LA
                       : Operand(ToRegister(ptr), mir->offset());
 
     memoryBarrier(mir->barrierBefore());
 
     Label* rejoin;
     DebugOnly<bool> hasBoundsCheck = maybeEmitAsmJSStoreBoundsCheck(mir, ins, &rejoin);
 
     store(accessType, value, dstAddr);
-    uint32_t after = masm.size();
 
     if (rejoin) {
         MOZ_ASSERT(hasBoundsCheck);
         cleanupAfterAsmJSBoundsCheckBranch(mir, ToRegister(ptr));
         masm.bind(rejoin);
     }
 
     memoryBarrier(mir->barrierAfter());
-
-    masm.append(wasm::MemoryAccess(after));
 }
 
 // Perform bounds checking on the access if necessary; if it fails,
 // jump to out-of-line code that throws.  If the bounds check passes,
 // set up the heap address in addrTemp.
 
 void
 CodeGeneratorX86::asmJSAtomicComputeAddress(Register addrTemp, Register ptrReg,
--- a/js/src/jit/x86/CodeGenerator-x86.h
+++ b/js/src/jit/x86/CodeGenerator-x86.h
@@ -24,26 +24,33 @@ class CodeGeneratorX86 : public CodeGene
     }
 
   protected:
     ValueOperand ToValue(LInstruction* ins, size_t pos);
     ValueOperand ToOutValue(LInstruction* ins);
     ValueOperand ToTempValue(LInstruction* ins, size_t pos);
 
     void load(Scalar::Type vt, const Operand& srcAddr, const LDefinition* out);
+    void loadI64(Scalar::Type vt, const Operand& srcAddr, const Register64 out);
     void store(Scalar::Type vt, const LAllocation* value, const Operand& dstAddr);
+    void storeI64(Scalar::Type vt, const LInt64Allocation value, const Operand& dstAddr);
 
     void loadSimd(Scalar::Type type, unsigned numElems, const Operand& srcAddr, FloatRegister out);
     void emitSimdLoad(LAsmJSLoadHeap* ins);
 
     void storeSimd(Scalar::Type type, unsigned numElems, FloatRegister in, const Operand& dstAddr);
     void emitSimdStore(LAsmJSStoreHeap* ins);
 
     void memoryBarrier(MemoryBarrierBits barrier);
 
+    template <typename T>
+    void emitWasmLoad(T* ins);
+    template <typename T>
+    void emitWasmStore(T* ins);
+
   public:
     CodeGeneratorX86(MIRGenerator* gen, LIRGraph* graph, MacroAssembler* masm);
 
   public:
     void visitBox(LBox* box);
     void visitBoxFloatingPoint(LBoxFloatingPoint* box);
     void visitUnbox(LUnbox* unbox);
     void visitValue(LValue* value);
@@ -56,17 +63,19 @@ class CodeGeneratorX86 : public CodeGene
     void visitTruncateDToInt32(LTruncateDToInt32* ins);
     void visitTruncateFToInt32(LTruncateFToInt32* ins);
     void visitLoadTypedArrayElementStatic(LLoadTypedArrayElementStatic* ins);
     void visitStoreTypedArrayElementStatic(LStoreTypedArrayElementStatic* ins);
     void emitAsmJSCall(LAsmJSCallBase* ins);
     void visitAsmJSCall(LAsmJSCall* ins);
     void visitAsmJSCallI64(LAsmJSCallI64* ins);
     void visitWasmLoad(LWasmLoad* ins);
+    void visitWasmLoadI64(LWasmLoadI64* ins);
     void visitWasmStore(LWasmStore* ins);
+    void visitWasmStoreI64(LWasmStoreI64* ins);
     void visitWasmLoadGlobalVar(LWasmLoadGlobalVar* ins);
     void visitWasmStoreGlobalVar(LWasmStoreGlobalVar* ins);
     void visitAsmJSLoadHeap(LAsmJSLoadHeap* ins);
     void visitAsmJSStoreHeap(LAsmJSStoreHeap* ins);
     void visitAsmJSCompareExchangeHeap(LAsmJSCompareExchangeHeap* ins);
     void visitAsmJSAtomicExchangeHeap(LAsmJSAtomicExchangeHeap* ins);
     void visitAsmJSAtomicBinopHeap(LAsmJSAtomicBinopHeap* ins);
     void visitAsmJSAtomicBinopHeapForEffect(LAsmJSAtomicBinopHeapForEffect* ins);
--- a/js/src/jit/x86/Lowering-x86.cpp
+++ b/js/src/jit/x86/Lowering-x86.cpp
@@ -285,18 +285,22 @@ LIRGeneratorX86::visitWasmStore(MWasmSto
       case Scalar::Float32x4:
       case Scalar::Int8x16:
       case Scalar::Int16x8:
       case Scalar::Int32x4:
         // For now, don't allow constant values. The immediate operand affects
         // instruction layout which affects patching.
         valueAlloc = useRegisterAtStart(ins->value());
         break;
-      case Scalar::Int64:
-        MOZ_CRASH("NYI");
+      case Scalar::Int64: {
+        LInt64Allocation valueAlloc = useInt64RegisterAtStart(ins->value());
+        auto* lir = new(alloc()) LWasmStoreI64(baseAlloc, valueAlloc);
+        add(lir, ins);
+        return;
+      }
       case Scalar::Uint8Clamped:
       case Scalar::MaxTypedArrayViewType:
         MOZ_CRASH("unexpected array type");
     }
 
     auto* lir = new(alloc()) LWasmStore(baseAlloc, valueAlloc);
     add(lir, ins);
 }
@@ -555,16 +559,40 @@ void
 LIRGeneratorX86::lowerUModI64(MMod* mod)
 {
     LUDivOrModI64* lir = new(alloc()) LUDivOrModI64(useInt64RegisterAtStart(mod->lhs()),
                                                     useInt64RegisterAtStart(mod->rhs()));
     defineReturn(lir, mod);
 }
 
 void
+LIRGeneratorX86::visitWasmLoad(MWasmLoad* ins)
+{
+    if (ins->type() != MIRType::Int64) {
+        lowerWasmLoad(ins);
+        return;
+    }
+
+    MDefinition* base = ins->base();
+    MOZ_ASSERT(base->type() == MIRType::Int32);
+
+    auto* lir = new(alloc()) LWasmLoadI64(useRegisterOrZeroAtStart(base));
+
+    Scalar::Type accessType = ins->accessType();
+    if (accessType == Scalar::Int8 || accessType == Scalar::Int16 || accessType == Scalar::Int32) {
+        // We use cdq to sign-extend the result and cdq demands these registers.
+        defineInt64Fixed(lir, ins, LInt64Allocation(LAllocation(AnyRegister(edx)),
+                                                    LAllocation(AnyRegister(eax))));
+        return;
+    }
+
+    defineInt64(lir, ins);
+}
+
+void
 LIRGeneratorX86::visitSubstr(MSubstr* ins)
 {
     // Due to lack of registers on x86, we reuse the string register as
     // temporary. As a result we only need two temporary registers and take a
     // bugos temporary as fifth argument.
     LSubstr* lir = new (alloc()) LSubstr(useRegister(ins->string()),
                                          useRegister(ins->begin()),
                                          useRegister(ins->length()),
--- a/js/src/jit/x86/Lowering-x86.h
+++ b/js/src/jit/x86/Lowering-x86.h
@@ -50,16 +50,17 @@ class LIRGeneratorX86 : public LIRGenera
     void lowerForMulInt64(LMulI64* ins, MMul* mir, MDefinition* lhs, MDefinition* rhs);
 
     void lowerDivI64(MDiv* div);
     void lowerModI64(MMod* mod);
     void lowerUDivI64(MDiv* div);
     void lowerUModI64(MMod* mod);
 
   public:
+    void visitWasmLoad(MWasmLoad* ins);
     void visitBox(MBox* box);
     void visitUnbox(MUnbox* unbox);
     void visitReturn(MReturn* ret);
     void visitCompareExchangeTypedArrayElement(MCompareExchangeTypedArrayElement* ins);
     void visitAtomicExchangeTypedArrayElement(MAtomicExchangeTypedArrayElement* ins);
     void visitAtomicTypedArrayElementBinop(MAtomicTypedArrayElementBinop* ins);
     void visitAsmJSUnsignedToDouble(MAsmJSUnsignedToDouble* ins);
     void visitAsmJSUnsignedToFloat32(MAsmJSUnsignedToFloat32* ins);