Bug 1279248 - Part 26: Implement the 64bit variant of WasmLoad and WasmStore on x86, r=bbouvier
authorHannes Verschore <hv1989@gmail.com>
Fri, 29 Jul 2016 16:53:48 +0200
changeset 307317 4dabba8cf9261e11c487fb9aac71bc866f45250a
parent 307316 bb7803606205b1d23590fa05d78c384b833f614d
child 307318 c1e1449a0ad8448df7ae7bc2b2fcdfc40ac238e0
push id30508
push usercbook@mozilla.com
push dateSat, 30 Jul 2016 14:21:21 +0000
treeherdermozilla-central@e5859dfe0bcb [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersbbouvier
bugs1279248
milestone50.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1279248 - Part 26: Implement the 64bit variant of WasmLoad and WasmStore on x86, r=bbouvier
js/src/jit/shared/LIR-shared.h
js/src/jit/shared/LOpcodes-shared.h
js/src/jit/x64/CodeGenerator-x64.cpp
js/src/jit/x64/CodeGenerator-x64.h
js/src/jit/x64/Lowering-x64.cpp
js/src/jit/x64/Lowering-x64.h
js/src/jit/x86-shared/Lowering-x86-shared.cpp
js/src/jit/x86-shared/Lowering-x86-shared.h
js/src/jit/x86/CodeGenerator-x86.cpp
js/src/jit/x86/CodeGenerator-x86.h
js/src/jit/x86/Lowering-x86.cpp
js/src/jit/x86/Lowering-x86.h
--- a/js/src/jit/shared/LIR-shared.h
+++ b/js/src/jit/shared/LIR-shared.h
@@ -7801,32 +7801,63 @@ class LWasmLoadI64 : public details::LWa
 
     LIR_HEADER(WasmLoadI64);
 };
 
 class LWasmStore : public LInstructionHelper<0, 2, 1>
 {
   public:
     LIR_HEADER(WasmStore);
+
+    static const size_t PtrIndex = 0;
+    static const size_t ValueIndex = 1;
+
     LWasmStore(const LAllocation& ptr, const LAllocation& value) {
-        setOperand(0, ptr);
-        setOperand(1, value);
+        setOperand(PtrIndex, ptr);
+        setOperand(ValueIndex, value);
         setTemp(0, LDefinition::BogusTemp());
     }
     MWasmStore* mir() const {
         return mir_->toWasmStore();
     }
     const LAllocation* ptr() {
-        return getOperand(0);
+        return getOperand(PtrIndex);
     }
     const LDefinition* ptrCopy() {
         return getTemp(0);
     }
     const LAllocation* value() {
-        return getOperand(1);
+        return getOperand(ValueIndex);
+    }
+};
+
+class LWasmStoreI64 : public LInstructionHelper<0, INT64_PIECES + 1, 1>
+{
+  public:
+    LIR_HEADER(WasmStoreI64);
+
+    static const size_t PtrIndex = 0;
+    static const size_t ValueIndex = 1;
+
+    LWasmStoreI64(const LAllocation& ptr, const LInt64Allocation& value) {
+        setOperand(PtrIndex, ptr);
+        setInt64Operand(ValueIndex, value);
+        setTemp(0, LDefinition::BogusTemp());
+    }
+    MWasmStore* mir() const {
+        return mir_->toWasmStore();
+    }
+    const LAllocation* ptr() {
+        return getOperand(PtrIndex);
+    }
+    const LDefinition* ptrCopy() {
+        return getTemp(0);
+    }
+    const LInt64Allocation value() {
+        return getInt64Operand(ValueIndex);
     }
 };
 
 class LAsmJSLoadHeap : public LInstructionHelper<1, 1, 0>
 {
   public:
     LIR_HEADER(AsmJSLoadHeap);
     explicit LAsmJSLoadHeap(const LAllocation& ptr) {
--- a/js/src/jit/shared/LOpcodes-shared.h
+++ b/js/src/jit/shared/LOpcodes-shared.h
@@ -383,16 +383,17 @@
     _(IsObject)                     \
     _(IsObjectAndBranch)            \
     _(HasClass)                     \
     _(AsmSelect)                    \
     _(AsmSelectI64)                 \
     _(WasmLoad)                     \
     _(WasmLoadI64)                  \
     _(WasmStore)                    \
+    _(WasmStoreI64)                 \
     _(WasmBoundsCheck)              \
     _(WasmLoadGlobalVar)            \
     _(WasmLoadGlobalVarI64)         \
     _(WasmStoreGlobalVar)           \
     _(WasmStoreGlobalVarI64)        \
     _(AsmJSLoadHeap)                \
     _(AsmJSStoreHeap)               \
     _(AsmJSLoadFuncPtr)             \
--- a/js/src/jit/x64/CodeGenerator-x64.cpp
+++ b/js/src/jit/x64/CodeGenerator-x64.cpp
@@ -499,115 +499,130 @@ CodeGeneratorX64::load(Scalar::Type type
         MOZ_CRASH("int64 loads must use load64");
       case Scalar::Uint8Clamped:
       case Scalar::MaxTypedArrayViewType:
         MOZ_CRASH("unexpected array type");
     }
 }
 
 void
-CodeGeneratorX64::loadI64(Scalar::Type type, const Operand& srcAddr, AnyRegister out)
+CodeGeneratorX64::loadI64(Scalar::Type type, const Operand& srcAddr, Register64 out)
 {
     switch (type) {
-      case Scalar::Int8:      masm.movsbq(srcAddr, out.gpr()); break;
-      case Scalar::Uint8:     masm.movzbq(srcAddr, out.gpr()); break;
-      case Scalar::Int16:     masm.movswq(srcAddr, out.gpr()); break;
-      case Scalar::Uint16:    masm.movzwq(srcAddr, out.gpr()); break;
-      case Scalar::Int32:     masm.movslq(srcAddr, out.gpr()); break;
+      case Scalar::Int8:      masm.movsbq(srcAddr, out.reg); break;
+      case Scalar::Uint8:     masm.movzbq(srcAddr, out.reg); break;
+      case Scalar::Int16:     masm.movswq(srcAddr, out.reg); break;
+      case Scalar::Uint16:    masm.movzwq(srcAddr, out.reg); break;
+      case Scalar::Int32:     masm.movslq(srcAddr, out.reg); break;
       // Int32 to int64 moves zero-extend by default.
-      case Scalar::Uint32:    masm.movl(srcAddr, out.gpr());   break;
-      case Scalar::Int64:     masm.movq(srcAddr, out.gpr());   break;
+      case Scalar::Uint32:    masm.movl(srcAddr, out.reg);   break;
+      case Scalar::Int64:     masm.movq(srcAddr, out.reg);   break;
       case Scalar::Float32:
       case Scalar::Float64:
       case Scalar::Float32x4:
       case Scalar::Int8x16:
       case Scalar::Int16x8:
       case Scalar::Int32x4:
         MOZ_CRASH("non-int64 loads should use load()");
       case Scalar::Uint8Clamped:
       case Scalar::MaxTypedArrayViewType:
         MOZ_CRASH("unexpected array type");
     }
 }
 
+template <typename T>
 void
-CodeGeneratorX64::visitWasmLoadBase(const MWasmLoad* mir, const LAllocation* ptr,
-                                    const LDefinition* output, bool isInt64)
+CodeGeneratorX64::emitWasmLoad(T* ins)
 {
+    const MWasmLoad* mir = ins->mir();
+    bool isInt64 = mir->type() == MIRType::Int64;
+
     Scalar::Type accessType = mir->accessType();
     MOZ_ASSERT(!Scalar::isSimdType(accessType), "SIMD NYI");
     MOZ_ASSERT(!mir->barrierBefore() && !mir->barrierAfter(), "atomics NYI");
 
     if (mir->offset() > INT32_MAX) {
         masm.jump(wasm::JumpTarget::OutOfBounds);
         return;
     }
 
+    const LAllocation* ptr = ins->ptr();
     Operand srcAddr = ptr->isBogus()
                       ? Operand(HeapReg, mir->offset())
                       : Operand(HeapReg, ToRegister(ptr), TimesOne, mir->offset());
 
-    AnyRegister out = ToAnyRegister(output);
-
     uint32_t before = masm.size();
-    if (!isInt64)
-        load(accessType, srcAddr, out);
+    if (isInt64)
+        loadI64(accessType, srcAddr, ToOutRegister64(ins));
     else
-        loadI64(accessType, srcAddr, out);
+        load(accessType, srcAddr, ToAnyRegister(ins->output()));
     uint32_t after = masm.size();
 
     verifyLoadDisassembly(before, after, isInt64, accessType, /* numElems */ 0, srcAddr,
-                          *output->output());
+                          *ins->output()->output());
 
     masm.append(WasmMemoryAccess(before));
 }
 
 void
 CodeGeneratorX64::visitWasmLoad(LWasmLoad* ins)
 {
-    visitWasmLoadBase(ins->mir(), ins->ptr(), ins->output(), /* isInt64 */ false);
+    emitWasmLoad(ins);
 }
 
 void
 CodeGeneratorX64::visitWasmLoadI64(LWasmLoadI64* ins)
 {
-    visitWasmLoadBase(ins->mir(), ins->ptr(), ins->output(), /* isInt64 */ true);
+    emitWasmLoad(ins);
 }
 
+template <typename T>
 void
-CodeGeneratorX64::visitWasmStore(LWasmStore* ins)
+CodeGeneratorX64::emitWasmStore(T* ins)
 {
     const MWasmStore* mir = ins->mir();
 
     Scalar::Type accessType = mir->accessType();
     MOZ_ASSERT(!Scalar::isSimdType(accessType), "SIMD NYI");
     MOZ_ASSERT(!mir->barrierBefore() && !mir->barrierAfter(), "atomics NYI");
 
     if (mir->offset() > INT32_MAX) {
         masm.jump(wasm::JumpTarget::OutOfBounds);
         return;
     }
 
-    const LAllocation* value = ins->value();
+    const LAllocation* value = ins->getOperand(ins->ValueIndex);
     const LAllocation* ptr = ins->ptr();
     Operand dstAddr = ptr->isBogus()
                       ? Operand(HeapReg, mir->offset())
                       : Operand(HeapReg, ToRegister(ptr), TimesOne, mir->offset());
 
     uint32_t before = masm.size();
     store(accessType, value, dstAddr);
     uint32_t after = masm.size();
 
     verifyStoreDisassembly(before, after, mir->value()->type() == MIRType::Int64,
                            accessType, /* numElems */ 0, dstAddr, *value);
 
     masm.append(WasmMemoryAccess(before));
 }
 
 void
+CodeGeneratorX64::visitWasmStore(LWasmStore* ins)
+{
+    emitWasmStore(ins);
+}
+
+void
+CodeGeneratorX64::visitWasmStoreI64(LWasmStoreI64* ins)
+{
+    emitWasmStore(ins);
+}
+
+void
 CodeGeneratorX64::emitSimdLoad(LAsmJSLoadHeap* ins)
 {
     const MAsmJSLoadHeap* mir = ins->mir();
     Scalar::Type type = mir->accessType();
     FloatRegister out = ToFloatRegister(ins->output());
     const LAllocation* ptr = ins->ptr();
     Operand srcAddr = ptr->isBogus()
                       ? Operand(HeapReg, mir->offset())
--- a/js/src/jit/x64/CodeGenerator-x64.h
+++ b/js/src/jit/x64/CodeGenerator-x64.h
@@ -23,27 +23,30 @@ class CodeGeneratorX64 : public CodeGene
     ValueOperand ToOutValue(LInstruction* ins);
     ValueOperand ToTempValue(LInstruction* ins, size_t pos);
 
     void storeUnboxedValue(const LAllocation* value, MIRType valueType,
                            Operand dest, MIRType slotType);
     void memoryBarrier(MemoryBarrierBits barrier);
 
     void load(Scalar::Type type, const Operand& srcAddr, AnyRegister out);
-    void loadI64(Scalar::Type type, const Operand& srcAddr, AnyRegister out);
-    void visitWasmLoadBase(const MWasmLoad* mir, const LAllocation* ptr, const LDefinition* output,
-                           bool isInt64);
+    void loadI64(Scalar::Type type, const Operand& srcAddr, Register64 out);
 
     void store(Scalar::Type type, const LAllocation* value, const Operand& dstAddr);
 
     void loadSimd(Scalar::Type type, unsigned numElems, const Operand& srcAddr, FloatRegister out);
     void storeSimd(Scalar::Type type, unsigned numElems, FloatRegister in, const Operand& dstAddr);
 
     void emitSimdLoad(LAsmJSLoadHeap* ins);
     void emitSimdStore(LAsmJSStoreHeap* ins);
+
+    template <typename T>
+    void emitWasmLoad(T* ins);
+    template <typename T>
+    void emitWasmStore(T* ins);
   public:
     CodeGeneratorX64(MIRGenerator* gen, LIRGraph* graph, MacroAssembler* masm);
 
   public:
     void visitValue(LValue* value);
     void visitBox(LBox* box);
     void visitUnbox(LUnbox* unbox);
     void visitCompareB(LCompareB* lir);
@@ -63,16 +66,17 @@ class CodeGeneratorX64 : public CodeGene
     void visitExtendInt32ToInt64(LExtendInt32ToInt64* lir);
     void visitWasmTruncateToInt64(LWasmTruncateToInt64* lir);
     void visitInt64ToFloatingPoint(LInt64ToFloatingPoint* lir);
     void visitLoadTypedArrayElementStatic(LLoadTypedArrayElementStatic* ins);
     void visitStoreTypedArrayElementStatic(LStoreTypedArrayElementStatic* ins);
     void visitWasmLoad(LWasmLoad* ins);
     void visitWasmLoadI64(LWasmLoadI64* ins);
     void visitWasmStore(LWasmStore* ins);
+    void visitWasmStoreI64(LWasmStoreI64* ins);
     void visitWasmLoadGlobalVar(LWasmLoadGlobalVar* ins);
     void visitWasmStoreGlobalVar(LWasmStoreGlobalVar* ins);
     void visitWasmLoadGlobalVarI64(LWasmLoadGlobalVarI64* ins);
     void visitWasmStoreGlobalVarI64(LWasmStoreGlobalVarI64* ins);
     void visitAsmSelectI64(LAsmSelectI64* ins);
     void visitAsmJSCall(LAsmJSCall* ins);
     void visitAsmJSCallI64(LAsmJSCallI64* ins);
     void visitAsmJSLoadHeap(LAsmJSLoadHeap* ins);
--- a/js/src/jit/x64/Lowering-x64.cpp
+++ b/js/src/jit/x64/Lowering-x64.cpp
@@ -453,16 +453,31 @@ LIRGeneratorX64::lowerUModI64(MMod* mod)
 {
     LUDivOrModI64* lir = new(alloc()) LUDivOrModI64(useRegister(mod->lhs()),
                                                     useRegister(mod->rhs()),
                                                     tempFixed(rax));
     defineInt64Fixed(lir, mod, LInt64Allocation(LAllocation(AnyRegister(rdx))));
 }
 
 void
+LIRGeneratorX64::visitWasmLoad(MWasmLoad* ins)
+{
+    if (ins->type() != MIRType::Int64) {
+        lowerWasmLoad(ins);
+        return;
+    }
+
+    MDefinition* base = ins->base();
+    MOZ_ASSERT(base->type() == MIRType::Int32);
+
+    auto* lir = new(alloc()) LWasmLoadI64(useRegisterOrZeroAtStart(base));
+    defineInt64(lir, ins);
+}
+
+void
 LIRGeneratorX64::visitWasmTruncateToInt64(MWasmTruncateToInt64* ins)
 {
     MDefinition* opd = ins->input();
     MOZ_ASSERT(opd->type() == MIRType::Double || opd->type() == MIRType::Float32);
 
     LDefinition maybeTemp = ins->isUnsigned() ? tempDouble() : LDefinition::BogusTemp();
     defineInt64(new(alloc()) LWasmTruncateToInt64(useRegister(opd), maybeTemp), ins);
 }
--- a/js/src/jit/x64/Lowering-x64.h
+++ b/js/src/jit/x64/Lowering-x64.h
@@ -43,16 +43,17 @@ class LIRGeneratorX64 : public LIRGenera
     bool needTempForPostBarrier() { return false; }
 
     void lowerDivI64(MDiv* div);
     void lowerModI64(MMod* mod);
     void lowerUDivI64(MDiv* div);
     void lowerUModI64(MMod* mod);
 
   public:
+    void visitWasmLoad(MWasmLoad* ins);
     void visitBox(MBox* box);
     void visitUnbox(MUnbox* unbox);
     void visitReturn(MReturn* ret);
     void visitCompareExchangeTypedArrayElement(MCompareExchangeTypedArrayElement* ins);
     void visitAtomicExchangeTypedArrayElement(MAtomicExchangeTypedArrayElement* ins);
     void visitAtomicTypedArrayElementBinop(MAtomicTypedArrayElementBinop* ins);
     void visitAsmJSUnsignedToDouble(MAsmJSUnsignedToDouble* ins);
     void visitAsmJSUnsignedToFloat32(MAsmJSUnsignedToFloat32* ins);
--- a/js/src/jit/x86-shared/Lowering-x86-shared.cpp
+++ b/js/src/jit/x86-shared/Lowering-x86-shared.cpp
@@ -331,27 +331,23 @@ LIRGeneratorX86Shared::visitWasmBoundsCh
         return;
 
     MDefinition* index = ins->input();
     auto* lir = new(alloc()) LWasmBoundsCheck(useRegisterAtStart(index));
     add(lir, ins);
 }
 
 void
-LIRGeneratorX86Shared::visitWasmLoad(MWasmLoad* ins)
+LIRGeneratorX86Shared::lowerWasmLoad(MWasmLoad* ins)
 {
+    MOZ_ASSERT(ins->type() != MIRType::Int64);
+
     MDefinition* base = ins->base();
     MOZ_ASSERT(base->type() == MIRType::Int32);
 
-    if (ins->type() == MIRType::Int64) {
-        auto* lir = new(alloc()) LWasmLoadI64(useRegisterOrZeroAtStart(base));
-        defineInt64(lir, ins);
-        return;
-    }
-
     auto* lir = new(alloc()) LWasmLoad(useRegisterOrZeroAtStart(base));
     define(lir, ins);
 }
 
 void
 LIRGeneratorX86Shared::lowerUDiv(MDiv* div)
 {
     if (div->rhs()->isConstant()) {
--- a/js/src/jit/x86-shared/Lowering-x86-shared.h
+++ b/js/src/jit/x86-shared/Lowering-x86-shared.h
@@ -42,17 +42,17 @@ class LIRGeneratorX86Shared : public LIR
     void lowerForCompIx4(LSimdBinaryCompIx4* ins, MSimdBinaryComp* mir,
                          MDefinition* lhs, MDefinition* rhs);
     void lowerForCompFx4(LSimdBinaryCompFx4* ins, MSimdBinaryComp* mir,
                          MDefinition* lhs, MDefinition* rhs);
     void lowerForBitAndAndBranch(LBitAndAndBranch* baab, MInstruction* mir,
                                  MDefinition* lhs, MDefinition* rhs);
     void visitAsmJSNeg(MAsmJSNeg* ins);
     void visitWasmBoundsCheck(MWasmBoundsCheck* ins);
-    void visitWasmLoad(MWasmLoad* ins);
+    void lowerWasmLoad(MWasmLoad* ins);
     void visitAsmSelect(MAsmSelect* ins);
     void lowerMulI(MMul* mul, MDefinition* lhs, MDefinition* rhs);
     void lowerDivI(MDiv* div);
     void lowerModI(MMod* mod);
     void lowerUDiv(MDiv* div);
     void lowerUMod(MMod* mod);
     void lowerUrshD(MUrsh* mir);
     void lowerTruncateDToInt32(MTruncateToInt32* ins);
--- a/js/src/jit/x86/CodeGenerator-x86.cpp
+++ b/js/src/jit/x86/CodeGenerator-x86.cpp
@@ -271,16 +271,96 @@ CodeGeneratorX86::load(Scalar::Type acce
       case Scalar::Float64:      masm.vmovsdWithPatch(srcAddr, ToFloatRegister(out)); break;
       case Scalar::Float32x4:
       case Scalar::Int8x16:
       case Scalar::Int16x8:
       case Scalar::Int32x4:      MOZ_CRASH("SIMD load should be handled in their own function");
       case Scalar::Int64:        MOZ_CRASH("should be handled in a separate function (2 regs)");
       case Scalar::MaxTypedArrayViewType: MOZ_CRASH("unexpected type");
     }
+    masm.append(wasm::MemoryAccess(masm.size()));
+}
+
+void
+CodeGeneratorX86::loadI64(Scalar::Type type, const Operand& srcAddr, Register64 out)
+{
+    switch (type) {
+      case Scalar::Int8:
+        MOZ_ASSERT(out == Register64(edx, eax));
+        masm.movsblWithPatch(srcAddr, out.low);
+        masm.append(wasm::MemoryAccess(masm.size()));
+        masm.cdq();
+        break;
+      case Scalar::Uint8:
+        masm.movzblWithPatch(srcAddr, out.low);
+        masm.append(wasm::MemoryAccess(masm.size()));
+        masm.xorl(out.high, out.high);
+        break;
+      case Scalar::Int16:
+        MOZ_ASSERT(out == Register64(edx, eax));
+        masm.movswlWithPatch(srcAddr, out.low);
+        masm.append(wasm::MemoryAccess(masm.size()));
+        masm.cdq();
+        break;
+      case Scalar::Uint16:
+        masm.movzwlWithPatch(srcAddr, out.low);
+        masm.append(wasm::MemoryAccess(masm.size()));
+        masm.xorl(out.high, out.high);
+        break;
+      case Scalar::Int32:
+        MOZ_ASSERT(out == Register64(edx, eax));
+        masm.movlWithPatch(srcAddr, out.low);
+        masm.append(wasm::MemoryAccess(masm.size()));
+        masm.cdq();
+        break;
+      case Scalar::Uint32:
+        masm.movlWithPatch(srcAddr, out.low);
+        masm.append(wasm::MemoryAccess(masm.size()));
+        masm.xorl(out.high, out.high);
+        break;
+      case Scalar::Int64:
+        if (srcAddr.kind() == Operand::MEM_ADDRESS32) {
+            Operand low(PatchedAbsoluteAddress(uint32_t(srcAddr.address()) + INT64LOW_OFFSET));
+            Operand high(PatchedAbsoluteAddress(uint32_t(srcAddr.address()) + INT64HIGH_OFFSET));
+
+            masm.movlWithPatch(low, out.low);
+            masm.append(wasm::MemoryAccess(masm.size()));
+            masm.movlWithPatch(high, out.high);
+            masm.append(wasm::MemoryAccess(masm.size()));
+        } else {
+            MOZ_ASSERT(srcAddr.kind() == Operand::MEM_REG_DISP);
+            Address addr = srcAddr.toAddress();
+            Operand low(addr.base, addr.offset + INT64LOW_OFFSET);
+            Operand high(addr.base, addr.offset + INT64HIGH_OFFSET);
+
+            if (addr.base != out.low) {
+                masm.movlWithPatch(low, out.low);
+                masm.append(wasm::MemoryAccess(masm.size()));
+                masm.movlWithPatch(high, out.high);
+                masm.append(wasm::MemoryAccess(masm.size()));
+            } else {
+                MOZ_ASSERT(addr.base != out.high);
+                masm.movlWithPatch(high, out.high);
+                masm.append(wasm::MemoryAccess(masm.size()));
+                masm.movlWithPatch(low, out.low);
+                masm.append(wasm::MemoryAccess(masm.size()));
+            }
+        }
+        break;
+      case Scalar::Float32:
+      case Scalar::Float64:
+      case Scalar::Float32x4:
+      case Scalar::Int8x16:
+      case Scalar::Int16x8:
+      case Scalar::Int32x4:
+        MOZ_CRASH("non-int64 loads should use load()");
+      case Scalar::Uint8Clamped:
+      case Scalar::MaxTypedArrayViewType:
+        MOZ_CRASH("unexpected array type");
+    }
 }
 
 void
 CodeGeneratorX86::visitLoadTypedArrayElementStatic(LLoadTypedArrayElementStatic* ins)
 {
     const MLoadTypedArrayElementStatic* mir = ins->mir();
     Scalar::Type accessType = mir->accessType();
     MOZ_ASSERT_IF(accessType == Scalar::Float32, mir->type() == MIRType::Float32);
@@ -403,20 +483,22 @@ CodeGeneratorX86::loadSimd(Scalar::Type 
       case Scalar::Uint32:
       case Scalar::Int64:
       case Scalar::Float32:
       case Scalar::Float64:
       case Scalar::Uint8Clamped:
       case Scalar::MaxTypedArrayViewType:
         MOZ_CRASH("should only handle SIMD types");
     }
+    masm.append(wasm::MemoryAccess(masm.size()));
 }
 
+template <typename T>
 void
-CodeGeneratorX86::visitWasmLoad(LWasmLoad* ins)
+CodeGeneratorX86::emitWasmLoad(T* ins)
 {
     const MWasmLoad* mir = ins->mir();
 
     Scalar::Type accessType = mir->accessType();
     MOZ_ASSERT(!Scalar::isSimdType(accessType), "SIMD NYI");
     MOZ_ASSERT(!mir->barrierBefore() && !mir->barrierAfter(), "atomics NYI");
 
     if (mir->offset() > INT32_MAX) {
@@ -425,45 +507,72 @@ CodeGeneratorX86::visitWasmLoad(LWasmLoa
         return;
     }
 
     const LAllocation* ptr = ins->ptr();
     Operand srcAddr = ptr->isBogus()
                       ? Operand(PatchedAbsoluteAddress(mir->offset()))
                       : Operand(ToRegister(ptr), mir->offset());
 
-    load(accessType, srcAddr, ins->output());
-
-    masm.append(wasm::MemoryAccess(masm.size()));
+    if (mir->type() == MIRType::Int64)
+        loadI64(accessType, srcAddr, ToOutRegister64(ins));
+    else
+        load(accessType, srcAddr, ins->output());
 }
 
 void
-CodeGeneratorX86::visitWasmStore(LWasmStore* ins)
+CodeGeneratorX86::visitWasmLoad(LWasmLoad* ins)
+{
+    emitWasmLoad(ins);
+}
+
+void
+CodeGeneratorX86::visitWasmLoadI64(LWasmLoadI64* ins)
+{
+    emitWasmLoad(ins);
+}
+
+template <typename T>
+void
+CodeGeneratorX86::emitWasmStore(T* ins)
 {
     const MWasmStore* mir = ins->mir();
 
     Scalar::Type accessType = mir->accessType();
     MOZ_ASSERT(!Scalar::isSimdType(accessType), "SIMD NYI");
     MOZ_ASSERT(!mir->barrierBefore() && !mir->barrierAfter(), "atomics NYI");
 
     if (mir->offset() > INT32_MAX) {
         // This is unreachable because of the bounds check.
         masm.breakpoint();
         return;
     }
 
-    const LAllocation* value = ins->value();
     const LAllocation* ptr = ins->ptr();
     Operand dstAddr = ptr->isBogus()
                       ? Operand(PatchedAbsoluteAddress(mir->offset()))
                       : Operand(ToRegister(ptr), mir->offset());
 
-    store(accessType, value, dstAddr);
+    if (accessType == Scalar::Int64)
+        storeI64(accessType, ins->getInt64Operand(LWasmStoreI64::ValueIndex), dstAddr);
+    else
+        store(accessType, ins->getOperand(LWasmStore::ValueIndex), dstAddr);
+}
+
 
-    masm.append(wasm::MemoryAccess(masm.size()));
+void
+CodeGeneratorX86::visitWasmStore(LWasmStore* ins)
+{
+    emitWasmStore(ins);
+}
+
+void
+CodeGeneratorX86::visitWasmStoreI64(LWasmStoreI64* ins)
+{
+    emitWasmStore(ins);
 }
 
 void
 CodeGeneratorX86::emitSimdLoad(LAsmJSLoadHeap* ins)
 {
     const MAsmJSLoadHeap* mir = ins->mir();
     Scalar::Type type = mir->accessType();
     FloatRegister out = ToFloatRegister(ins->output());
@@ -480,29 +589,26 @@ CodeGeneratorX86::emitSimdLoad(LAsmJSLoa
 
         Operand srcAddrZ =
             ptr->isBogus()
             ? Operand(PatchedAbsoluteAddress(2 * sizeof(float) + mir->offset()))
             : Operand(ToRegister(ptr), 2 * sizeof(float) + mir->offset());
 
         // Load XY
         loadSimd(type, 2, srcAddr, out);
-        masm.append(wasm::MemoryAccess(masm.size()));
 
         // Load Z (W is zeroed)
         // This is still in bounds, as we've checked with a manual bounds check
         // or we had enough space for sure when removing the bounds check.
         loadSimd(type, 1, srcAddrZ, ScratchSimd128Reg);
-        masm.append(wasm::MemoryAccess(masm.size()));
 
         // Move ZW atop XY
         masm.vmovlhps(ScratchSimd128Reg, out, out);
     } else {
         loadSimd(type, numElems, srcAddr, out);
-        masm.append(wasm::MemoryAccess(masm.size()));
     }
 
     if (hasBoundsCheck)
         cleanupAfterAsmJSBoundsCheckBranch(mir, ToRegister(ptr));
 }
 
 void
 CodeGeneratorX86::visitAsmJSLoadHeap(LAsmJSLoadHeap* ins)
@@ -520,27 +626,24 @@ CodeGeneratorX86::visitAsmJSLoadHeap(LAs
                       : Operand(ToRegister(ptr), mir->offset());
 
     memoryBarrier(mir->barrierBefore());
 
     OutOfLineLoadTypedArrayOutOfBounds* ool;
     DebugOnly<bool> hasBoundsCheck = maybeEmitAsmJSLoadBoundsCheck(mir, ins, &ool);
 
     load(accessType, srcAddr, out);
-    uint32_t after = masm.size();
 
     if (ool) {
         MOZ_ASSERT(hasBoundsCheck);
         cleanupAfterAsmJSBoundsCheckBranch(mir, ToRegister(ptr));
         masm.bind(ool->rejoin());
     }
 
     memoryBarrier(mir->barrierAfter());
-
-    masm.append(wasm::MemoryAccess(after));
 }
 
 void
 CodeGeneratorX86::store(Scalar::Type accessType, const LAllocation* value, const Operand& dstAddr)
 {
     switch (accessType) {
       case Scalar::Int8:
       case Scalar::Uint8Clamped:
@@ -562,27 +665,63 @@ CodeGeneratorX86::store(Scalar::Type acc
         masm.vmovssWithPatch(ToFloatRegister(value), dstAddr);
         break;
 
       case Scalar::Float64:
         masm.vmovsdWithPatch(ToFloatRegister(value), dstAddr);
         break;
 
       case Scalar::Int64:
-        MOZ_CRASH("should be handled in a separate function (2 registers)");
+        MOZ_CRASH("Should be handled in storeI64.");
 
       case Scalar::Float32x4:
       case Scalar::Int8x16:
       case Scalar::Int16x8:
       case Scalar::Int32x4:
         MOZ_CRASH("SIMD stores should be handled in emitSimdStore");
 
       case Scalar::MaxTypedArrayViewType:
         MOZ_CRASH("unexpected type");
     }
+    masm.append(wasm::MemoryAccess(masm.size()));
+}
+
+void
+CodeGeneratorX86::storeI64(Scalar::Type accessType, const LInt64Allocation value,
+                           const Operand& dstAddr)
+{
+    Register64 input = ToRegister64(value);
+    MOZ_ASSERT(accessType == Scalar::Int64);
+    if (dstAddr.kind() == Operand::MEM_ADDRESS32) {
+        Operand low(PatchedAbsoluteAddress(uint32_t(dstAddr.address()) + INT64LOW_OFFSET));
+        Operand high(PatchedAbsoluteAddress(uint32_t(dstAddr.address()) + INT64HIGH_OFFSET));
+
+        masm.movlWithPatch(input.low, low);
+        masm.append(wasm::MemoryAccess(masm.size()));
+        masm.movlWithPatch(input.high, high);
+        masm.append(wasm::MemoryAccess(masm.size()));
+    } else {
+        MOZ_ASSERT(dstAddr.kind() == Operand::MEM_REG_DISP);
+        Address addr = dstAddr.toAddress();
+        Operand low(addr.base, addr.offset + INT64LOW_OFFSET);
+        Operand high(addr.base, addr.offset + INT64HIGH_OFFSET);
+
+        if (addr.base != input.low) {
+            masm.movlWithPatch(input.low, low);
+            masm.append(wasm::MemoryAccess(masm.size()));
+            masm.movlWithPatch(input.high, high);
+            masm.append(wasm::MemoryAccess(masm.size()));
+        } else {
+            MOZ_ASSERT(addr.base != input.high);
+            masm.movlWithPatch(input.high, high);
+            masm.append(wasm::MemoryAccess(masm.size()));
+            masm.movlWithPatch(input.low, low);
+            masm.append(wasm::MemoryAccess(masm.size()));
+        }
+    }
 }
 
 void
 CodeGeneratorX86::visitStoreTypedArrayElementStatic(LStoreTypedArrayElementStatic* ins)
 {
     MStoreTypedArrayElementStatic* mir = ins->mir();
     Scalar::Type accessType = mir->accessType();
     Register ptr = ToRegister(ins->ptr());
@@ -650,16 +789,17 @@ CodeGeneratorX86::storeSimd(Scalar::Type
       case Scalar::Uint32:
       case Scalar::Int64:
       case Scalar::Float32:
       case Scalar::Float64:
       case Scalar::Uint8Clamped:
       case Scalar::MaxTypedArrayViewType:
         MOZ_CRASH("should only handle SIMD types");
     }
+    masm.append(wasm::MemoryAccess(masm.size()));
 }
 
 void
 CodeGeneratorX86::emitSimdStore(LAsmJSStoreHeap* ins)
 {
     const MAsmJSStoreHeap* mir = ins->mir();
     Scalar::Type type = mir->accessType();
     FloatRegister in = ToFloatRegister(ins->value());
@@ -676,28 +816,25 @@ CodeGeneratorX86::emitSimdStore(LAsmJSSt
 
         Operand dstAddrZ =
             ptr->isBogus()
             ? Operand(PatchedAbsoluteAddress(2 * sizeof(float) + mir->offset()))
             : Operand(ToRegister(ptr), 2 * sizeof(float) + mir->offset());
 
         // Store XY
         storeSimd(type, 2, in, dstAddr);
-        masm.append(wasm::MemoryAccess(masm.size()));
 
         masm.vmovhlps(in, ScratchSimd128Reg, ScratchSimd128Reg);
 
         // Store Z (W is zeroed)
         // This is still in bounds, as we've checked with a manual bounds check
         // or we had enough space for sure when removing the bounds check.
         storeSimd(type, 1, ScratchSimd128Reg, dstAddrZ);
-        masm.append(wasm::MemoryAccess(masm.size()));
     } else {
         storeSimd(type, numElems, in, dstAddr);
-        masm.append(wasm::MemoryAccess(masm.size()));
     }
 
     if (hasBoundsCheck)
         cleanupAfterAsmJSBoundsCheckBranch(mir, ToRegister(ptr));
 }
 
 void
 CodeGeneratorX86::visitAsmJSStoreHeap(LAsmJSStoreHeap* ins)
@@ -717,27 +854,24 @@ CodeGeneratorX86::visitAsmJSStoreHeap(LA
                       : Operand(ToRegister(ptr), mir->offset());
 
     memoryBarrier(mir->barrierBefore());
 
     Label* rejoin;
     DebugOnly<bool> hasBoundsCheck = maybeEmitAsmJSStoreBoundsCheck(mir, ins, &rejoin);
 
     store(accessType, value, dstAddr);
-    uint32_t after = masm.size();
 
     if (rejoin) {
         MOZ_ASSERT(hasBoundsCheck);
         cleanupAfterAsmJSBoundsCheckBranch(mir, ToRegister(ptr));
         masm.bind(rejoin);
     }
 
     memoryBarrier(mir->barrierAfter());
-
-    masm.append(wasm::MemoryAccess(after));
 }
 
 // Perform bounds checking on the access if necessary; if it fails,
 // jump to out-of-line code that throws.  If the bounds check passes,
 // set up the heap address in addrTemp.
 
 void
 CodeGeneratorX86::asmJSAtomicComputeAddress(Register addrTemp, Register ptrReg,
--- a/js/src/jit/x86/CodeGenerator-x86.h
+++ b/js/src/jit/x86/CodeGenerator-x86.h
@@ -24,26 +24,33 @@ class CodeGeneratorX86 : public CodeGene
     }
 
   protected:
     ValueOperand ToValue(LInstruction* ins, size_t pos);
     ValueOperand ToOutValue(LInstruction* ins);
     ValueOperand ToTempValue(LInstruction* ins, size_t pos);
 
     void load(Scalar::Type vt, const Operand& srcAddr, const LDefinition* out);
+    void loadI64(Scalar::Type vt, const Operand& srcAddr, const Register64 out);
     void store(Scalar::Type vt, const LAllocation* value, const Operand& dstAddr);
+    void storeI64(Scalar::Type vt, const LInt64Allocation value, const Operand& dstAddr);
 
     void loadSimd(Scalar::Type type, unsigned numElems, const Operand& srcAddr, FloatRegister out);
     void emitSimdLoad(LAsmJSLoadHeap* ins);
 
     void storeSimd(Scalar::Type type, unsigned numElems, FloatRegister in, const Operand& dstAddr);
     void emitSimdStore(LAsmJSStoreHeap* ins);
 
     void memoryBarrier(MemoryBarrierBits barrier);
 
+    template <typename T>
+    void emitWasmLoad(T* ins);
+    template <typename T>
+    void emitWasmStore(T* ins);
+
   public:
     CodeGeneratorX86(MIRGenerator* gen, LIRGraph* graph, MacroAssembler* masm);
 
   public:
     void visitBox(LBox* box);
     void visitBoxFloatingPoint(LBoxFloatingPoint* box);
     void visitUnbox(LUnbox* unbox);
     void visitValue(LValue* value);
@@ -56,17 +63,19 @@ class CodeGeneratorX86 : public CodeGene
     void visitTruncateDToInt32(LTruncateDToInt32* ins);
     void visitTruncateFToInt32(LTruncateFToInt32* ins);
     void visitLoadTypedArrayElementStatic(LLoadTypedArrayElementStatic* ins);
     void visitStoreTypedArrayElementStatic(LStoreTypedArrayElementStatic* ins);
     void emitAsmJSCall(LAsmJSCallBase* ins);
     void visitAsmJSCall(LAsmJSCall* ins);
     void visitAsmJSCallI64(LAsmJSCallI64* ins);
     void visitWasmLoad(LWasmLoad* ins);
+    void visitWasmLoadI64(LWasmLoadI64* ins);
     void visitWasmStore(LWasmStore* ins);
+    void visitWasmStoreI64(LWasmStoreI64* ins);
     void visitWasmLoadGlobalVar(LWasmLoadGlobalVar* ins);
     void visitWasmStoreGlobalVar(LWasmStoreGlobalVar* ins);
     void visitAsmJSLoadHeap(LAsmJSLoadHeap* ins);
     void visitAsmJSStoreHeap(LAsmJSStoreHeap* ins);
     void visitAsmJSCompareExchangeHeap(LAsmJSCompareExchangeHeap* ins);
     void visitAsmJSAtomicExchangeHeap(LAsmJSAtomicExchangeHeap* ins);
     void visitAsmJSAtomicBinopHeap(LAsmJSAtomicBinopHeap* ins);
     void visitAsmJSAtomicBinopHeapForEffect(LAsmJSAtomicBinopHeapForEffect* ins);
--- a/js/src/jit/x86/Lowering-x86.cpp
+++ b/js/src/jit/x86/Lowering-x86.cpp
@@ -285,18 +285,22 @@ LIRGeneratorX86::visitWasmStore(MWasmSto
       case Scalar::Float32x4:
       case Scalar::Int8x16:
       case Scalar::Int16x8:
       case Scalar::Int32x4:
         // For now, don't allow constant values. The immediate operand affects
         // instruction layout which affects patching.
         valueAlloc = useRegisterAtStart(ins->value());
         break;
-      case Scalar::Int64:
-        MOZ_CRASH("NYI");
+      case Scalar::Int64: {
+        LInt64Allocation valueAlloc = useInt64RegisterAtStart(ins->value());
+        auto* lir = new(alloc()) LWasmStoreI64(baseAlloc, valueAlloc);
+        add(lir, ins);
+        return;
+      }
       case Scalar::Uint8Clamped:
       case Scalar::MaxTypedArrayViewType:
         MOZ_CRASH("unexpected array type");
     }
 
     auto* lir = new(alloc()) LWasmStore(baseAlloc, valueAlloc);
     add(lir, ins);
 }
@@ -555,16 +559,40 @@ void
 LIRGeneratorX86::lowerUModI64(MMod* mod)
 {
     LUDivOrModI64* lir = new(alloc()) LUDivOrModI64(useInt64RegisterAtStart(mod->lhs()),
                                                     useInt64RegisterAtStart(mod->rhs()));
     defineReturn(lir, mod);
 }
 
 void
+LIRGeneratorX86::visitWasmLoad(MWasmLoad* ins)
+{
+    if (ins->type() != MIRType::Int64) {
+        lowerWasmLoad(ins);
+        return;
+    }
+
+    MDefinition* base = ins->base();
+    MOZ_ASSERT(base->type() == MIRType::Int32);
+
+    auto* lir = new(alloc()) LWasmLoadI64(useRegisterOrZeroAtStart(base));
+
+    Scalar::Type accessType = ins->accessType();
+    if (accessType == Scalar::Int8 || accessType == Scalar::Int16 || accessType == Scalar::Int32) {
+        // We use cdq to sign-extend the result and cdq demands these registers.
+        defineInt64Fixed(lir, ins, LInt64Allocation(LAllocation(AnyRegister(edx)),
+                                                    LAllocation(AnyRegister(eax))));
+        return;
+    }
+
+    defineInt64(lir, ins);
+}
+
+void
 LIRGeneratorX86::visitSubstr(MSubstr* ins)
 {
     // Due to lack of registers on x86, we reuse the string register as
     // temporary. As a result we only need two temporary registers and take a
     // bugos temporary as fifth argument.
     LSubstr* lir = new (alloc()) LSubstr(useRegister(ins->string()),
                                          useRegister(ins->begin()),
                                          useRegister(ins->length()),
--- a/js/src/jit/x86/Lowering-x86.h
+++ b/js/src/jit/x86/Lowering-x86.h
@@ -50,16 +50,17 @@ class LIRGeneratorX86 : public LIRGenera
     void lowerForMulInt64(LMulI64* ins, MMul* mir, MDefinition* lhs, MDefinition* rhs);
 
     void lowerDivI64(MDiv* div);
     void lowerModI64(MMod* mod);
     void lowerUDivI64(MDiv* div);
     void lowerUModI64(MMod* mod);
 
   public:
+    void visitWasmLoad(MWasmLoad* ins);
     void visitBox(MBox* box);
     void visitUnbox(MUnbox* unbox);
     void visitReturn(MReturn* ret);
     void visitCompareExchangeTypedArrayElement(MCompareExchangeTypedArrayElement* ins);
     void visitAtomicExchangeTypedArrayElement(MAtomicExchangeTypedArrayElement* ins);
     void visitAtomicTypedArrayElementBinop(MAtomicTypedArrayElementBinop* ins);
     void visitAsmJSUnsignedToDouble(MAsmJSUnsignedToDouble* ins);
     void visitAsmJSUnsignedToFloat32(MAsmJSUnsignedToFloat32* ins);