Bug 1312751 - Refactor wasm{Load,Store}{,I64} to MacroAssembler-arm. r=h4writer
authorLars T Hansen <lhansen@mozilla.com>
Mon, 16 Jan 2017 09:05:58 +0100
changeset 357579 06c91f4ef053f69435ee491cd32a2f26cda544ad
parent 357578 473e99dde77d8a26c675c2d3d7a4af6133cfea9f
child 357580 bc9b79d56651c8fca79aa0178b5576d5c96111fe
push id10621
push userjlund@mozilla.com
push dateMon, 23 Jan 2017 16:02:43 +0000
treeherdermozilla-aurora@dca7b42e6c67 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersh4writer
bugs1312751
milestone53.0a1
Bug 1312751 - Refactor wasm{Load,Store}{,I64} to MacroAssembler-arm. r=h4writer
js/src/jit/MacroAssembler.h
js/src/jit/arm/CodeGenerator-arm.cpp
js/src/jit/arm/MacroAssembler-arm.cpp
js/src/jit/arm/MacroAssembler-arm.h
--- a/js/src/jit/MacroAssembler.h
+++ b/js/src/jit/MacroAssembler.h
@@ -1372,16 +1372,52 @@ class MacroAssembler : public MacroAssem
     // wasm::MemoryAccessVector (there can be multiple when i64 is involved).
     // On x64, only some asm.js accesses need a wasm::MemoryAccess so the caller
     // is responsible for doing this instead.
     void wasmLoad(const wasm::MemoryAccessDesc& access, Operand srcAddr, AnyRegister out) DEFINED_ON(x86, x64);
     void wasmLoadI64(const wasm::MemoryAccessDesc& access, Operand srcAddr, Register64 out) DEFINED_ON(x86, x64);
     void wasmStore(const wasm::MemoryAccessDesc& access, AnyRegister value, Operand dstAddr) DEFINED_ON(x86, x64);
     void wasmStoreI64(const wasm::MemoryAccessDesc& access, Register64 value, Operand dstAddr) DEFINED_ON(x86);
 
+    // For all the ARM wasmLoad and wasmStore functions, `ptr` MUST equal
+    // `ptrScratch`, and that register will be updated based on conditions
+    // listed below (where it is only mentioned as `ptr`).
+
+    // `ptr` will be updated if access.offset() != 0 or access.type() == Scalar::Int64.
+    void wasmLoad(const wasm::MemoryAccessDesc& access, Register ptr, Register ptrScratch, AnyRegister output) DEFINED_ON(arm);
+    void wasmLoadI64(const wasm::MemoryAccessDesc& access, Register ptr, Register ptrScratch, Register64 output) DEFINED_ON(arm);
+    void wasmStore(const wasm::MemoryAccessDesc& access, AnyRegister value, Register ptr, Register ptrScratch) DEFINED_ON(arm);
+    void wasmStoreI64(const wasm::MemoryAccessDesc& access, Register64 value, Register ptr, Register ptrScratch) DEFINED_ON(arm);
+
+    // `ptr` will always be updated.
+    void wasmUnalignedLoad(const wasm::MemoryAccessDesc& access, Register ptr, Register ptrScratch,
+                           Register output, Register tmp) DEFINED_ON(arm);
+
+    // `ptr` will always be updated and `tmp1` is always needed.  `tmp2` is
+    // needed for Float32; `tmp2` and `tmp3` are needed for Float64.  Temps must
+    // be Invalid when they are not needed.
+    void wasmUnalignedLoadFP(const wasm::MemoryAccessDesc& access, Register ptr, Register ptrScratch,
+                             FloatRegister output, Register tmp1, Register tmp2, Register tmp3) DEFINED_ON(arm);
+
+    // `ptr` will always be updated.
+    void wasmUnalignedLoadI64(const wasm::MemoryAccessDesc& access, Register ptr, Register ptrScratch,
+                              Register64 output, Register tmp) DEFINED_ON(arm);
+
+    // `ptr` and `value` will always be updated.
+    void wasmUnalignedStore(const wasm::MemoryAccessDesc& access, Register value, Register ptr, Register ptrScratch)
+        DEFINED_ON(arm);
+
+    // `ptr` will always be updated.
+    void wasmUnalignedStoreFP(const wasm::MemoryAccessDesc& access, FloatRegister floatValue, Register ptr,
+                              Register ptrScratch, Register tmp) DEFINED_ON(arm);
+
+    // `ptr` will always be updated.
+    void wasmUnalignedStoreI64(const wasm::MemoryAccessDesc& access, Register64 value, Register ptr, Register ptrScratch,
+                               Register tmp) DEFINED_ON(arm);
+
     // wasm specific methods, used in both the wasm baseline compiler and ion.
     void wasmTruncateDoubleToUInt32(FloatRegister input, Register output, Label* oolEntry) DEFINED_ON(x86, x64, arm);
     void wasmTruncateDoubleToInt32(FloatRegister input, Register output, Label* oolEntry) DEFINED_ON(x86_shared, arm);
     void outOfLineWasmTruncateDoubleToInt32(FloatRegister input, bool isUnsigned, wasm::TrapOffset off, Label* rejoin) DEFINED_ON(x86_shared);
 
     void wasmTruncateFloat32ToUInt32(FloatRegister input, Register output, Label* oolEntry) DEFINED_ON(x86, x64, arm);
     void wasmTruncateFloat32ToInt32(FloatRegister input, Register output, Label* oolEntry) DEFINED_ON(x86_shared, arm);
     void outOfLineWasmTruncateFloat32ToInt32(FloatRegister input, bool isUnsigned, wasm::TrapOffset off, Label* rejoin) DEFINED_ON(x86_shared);
--- a/js/src/jit/arm/CodeGenerator-arm.cpp
+++ b/js/src/jit/arm/CodeGenerator-arm.cpp
@@ -2361,79 +2361,30 @@ CodeGeneratorARM::visitAsmJSLoadHeap(LAs
     }
 }
 
 template <typename T>
 void
 CodeGeneratorARM::emitWasmLoad(T* lir)
 {
     const MWasmLoad* mir = lir->mir();
-
-    uint32_t offset = mir->access().offset();
-    MOZ_ASSERT(offset < wasm::OffsetGuardLimit);
-
-    Register ptr = ToRegister(lir->ptr());
-    Scalar::Type type = mir->access().type();
-
-    // Maybe add the offset.
-    if (offset || type == Scalar::Int64) {
-        ScratchRegisterScope scratch(masm);
-        Register ptrPlusOffset = ToRegister(lir->ptrCopy());
-        if (offset)
-            masm.ma_add(Imm32(offset), ptrPlusOffset, scratch);
-        ptr = ptrPlusOffset;
+    MIRType resultType = mir->type();
+    Register ptr;
+
+    if (mir->access().offset() || mir->access().type() == Scalar::Int64) {
+        ptr = ToRegister(lir->ptrCopy());
     } else {
         MOZ_ASSERT(lir->ptrCopy()->isBogusTemp());
+        ptr = ToRegister(lir->ptr());
     }
 
-    bool isSigned = type == Scalar::Int8 || type == Scalar::Int16 || type == Scalar::Int32 ||
-                    type == Scalar::Int64;
-    unsigned byteSize = mir->access().byteSize();
-
-    masm.memoryBarrier(mir->access().barrierBefore());
-
-    BufferOffset load;
-    if (mir->type() == MIRType::Int64) {
-        Register64 output = ToOutRegister64(lir);
-        if (type == Scalar::Int64) {
-            MOZ_ASSERT(INT64LOW_OFFSET == 0);
-
-            load = masm.ma_dataTransferN(IsLoad, 32, /* signed = */ false, HeapReg, ptr, output.low);
-            masm.append(mir->access(), load.getOffset(), masm.framePushed());
-
-            masm.as_add(ptr, ptr, Imm8(INT64HIGH_OFFSET));
-
-            load = masm.ma_dataTransferN(IsLoad, 32, isSigned, HeapReg, ptr, output.high);
-            masm.append(mir->access(), load.getOffset(), masm.framePushed());
-        } else {
-            load = masm.ma_dataTransferN(IsLoad, byteSize * 8, isSigned, HeapReg, ptr, output.low);
-            masm.append(mir->access(), load.getOffset(), masm.framePushed());
-
-            if (isSigned)
-                masm.ma_asr(Imm32(31), output.low, output.high);
-            else
-                masm.ma_mov(Imm32(0), output.high);
-        }
-    } else {
-        AnyRegister output = ToAnyRegister(lir->output());
-        bool isFloat = output.isFloat();
-        if (isFloat) {
-            MOZ_ASSERT((byteSize == 4) == output.fpu().isSingle());
-            ScratchRegisterScope scratch(masm);
-            masm.ma_add(HeapReg, ptr, scratch);
-
-            load = masm.ma_vldr(Operand(Address(scratch, 0)).toVFPAddr(), output.fpu());
-            masm.append(mir->access(), load.getOffset(), masm.framePushed());
-        } else {
-            load = masm.ma_dataTransferN(IsLoad, byteSize * 8, isSigned, HeapReg, ptr, output.gpr());
-            masm.append(mir->access(), load.getOffset(), masm.framePushed());
-        }
-    }
-
-    masm.memoryBarrier(mir->access().barrierAfter());
+    if (resultType == MIRType::Int64)
+        masm.wasmLoadI64(mir->access(), ptr, ptr, ToOutRegister64(lir));
+    else
+        masm.wasmLoad(mir->access(), ptr, ptr, ToAnyRegister(lir->output()));
 }
 
 void
 CodeGeneratorARM::visitWasmLoad(LWasmLoad* lir)
 {
     emitWasmLoad(lir);
 }
 
@@ -2443,81 +2394,32 @@ CodeGeneratorARM::visitWasmLoadI64(LWasm
     emitWasmLoad(lir);
 }
 
 template<typename T>
 void
 CodeGeneratorARM::emitWasmUnalignedLoad(T* lir)
 {
     const MWasmLoad* mir = lir->mir();
-
-    uint32_t offset = mir->access().offset();
-    MOZ_ASSERT(offset < wasm::OffsetGuardLimit);
+    MIRType resultType = mir->type();
 
     Register ptr = ToRegister(lir->ptrCopy());
-    if (offset) {
-        ScratchRegisterScope scratch(masm);
-        masm.ma_add(Imm32(offset), ptr, scratch);
+    Register tmp1 = ToRegister(lir->getTemp(1));
+
+    if (resultType == MIRType::Int64) {
+        masm.wasmUnalignedLoadI64(mir->access(), ptr, ptr, ToOutRegister64(lir), tmp1);
+    } else if (IsFloatingPointType(resultType)) {
+        Register tmp2(ToRegister(lir->getTemp(2)));
+        Register tmp3(Register::Invalid());
+        if (mir->access().byteSize() == 8)
+            tmp3 = ToRegister(lir->getTemp(3));
+        masm.wasmUnalignedLoadFP(mir->access(), ptr, ptr, ToFloatRegister(lir->output()), tmp1, tmp2, tmp3);
+    } else {
+        masm.wasmUnalignedLoad(mir->access(), ptr, ptr, ToRegister(lir->output()), tmp1);
     }
-
-    // Add HeapReg to ptr, so we can use base+index addressing in the byte loads.
-    masm.ma_add(HeapReg, ptr);
-
-    unsigned byteSize = mir->access().byteSize();
-    Scalar::Type type = mir->access().type();
-    bool isSigned = type == Scalar::Int8 || type == Scalar::Int16 || type == Scalar::Int32 ||
-                    type == Scalar::Int64;
-
-    MIRType mirType = mir->type();
-
-    Register tmp = ToRegister(lir->getTemp(1));
-
-    Register low;
-    if (IsFloatingPointType(mirType))
-        low = ToRegister(lir->getTemp(2));
-    else if (mirType == MIRType::Int64)
-        low = ToOutRegister64(lir).low;
-    else
-        low = ToRegister(lir->output());
-
-    MOZ_ASSERT(low != tmp);
-    MOZ_ASSERT(low != ptr);
-
-    masm.memoryBarrier(mir->access().barrierBefore());
-
-    masm.emitUnalignedLoad(isSigned, Min(byteSize, 4u), ptr, tmp, low);
-
-    if (IsFloatingPointType(mirType)) {
-        FloatRegister output = ToFloatRegister(lir->output());
-        if (byteSize == 4) {
-            MOZ_ASSERT(output.isSingle());
-            masm.ma_vxfer(low, output);
-        } else {
-            MOZ_ASSERT(byteSize == 8);
-            MOZ_ASSERT(output.isDouble());
-            Register high = ToRegister(lir->getTemp(3));
-            masm.emitUnalignedLoad(/* signed */ false, 4, ptr, tmp, high, /* offset */ 4);
-            masm.ma_vxfer(low, high, output);
-        }
-    } else if (mirType == MIRType::Int64) {
-        Register64 output = ToOutRegister64(lir);
-        if (type == Scalar::Int64) {
-            MOZ_ASSERT(byteSize == 8);
-            masm.emitUnalignedLoad(isSigned, 4, ptr, tmp, output.high, /* offset */ 4);
-        } else {
-            MOZ_ASSERT(byteSize <= 4);
-            // Propagate sign.
-            if (isSigned)
-                masm.ma_asr(Imm32(31), output.low, output.high);
-            else
-                masm.ma_mov(Imm32(0), output.high);
-        }
-    }
-
-    masm.memoryBarrier(mir->access().barrierAfter());
 }
 
 void
 CodeGeneratorARM::visitWasmUnalignedLoad(LWasmUnalignedLoad* lir)
 {
     emitWasmUnalignedLoad(lir);
 }
 
@@ -2540,70 +2442,32 @@ CodeGeneratorARM::visitWasmAddOffset(LWa
     masm.ma_b(trap(mir, wasm::Trap::OutOfBounds), Assembler::CarrySet);
 }
 
 template <typename T>
 void
 CodeGeneratorARM::emitWasmStore(T* lir)
 {
     const MWasmStore* mir = lir->mir();
-
-    uint32_t offset = mir->access().offset();
-    MOZ_ASSERT(offset < wasm::OffsetGuardLimit);
-
-    Register ptr = ToRegister(lir->ptr());
-    unsigned byteSize = mir->access().byteSize();
-    Scalar::Type type = mir->access().type();
+    Scalar::Type accessType = mir->access().type();
+    Register ptr;
 
     // Maybe add the offset.
-    if (offset || type == Scalar::Int64) {
-        ScratchRegisterScope scratch(masm);
-        Register ptrPlusOffset = ToRegister(lir->ptrCopy());
-        if (offset)
-            masm.ma_add(Imm32(offset), ptrPlusOffset, scratch);
-        ptr = ptrPlusOffset;
+    if (mir->access().offset() || accessType == Scalar::Int64) {
+        ptr = ToRegister(lir->ptrCopy());
     } else {
         MOZ_ASSERT(lir->ptrCopy()->isBogusTemp());
+        ptr = ToRegister(lir->ptr());
     }
 
-    masm.memoryBarrier(mir->access().barrierBefore());
-
-    BufferOffset store;
-    if (type == Scalar::Int64) {
-        MOZ_ASSERT(INT64LOW_OFFSET == 0);
-
-        Register64 value = ToRegister64(lir->getInt64Operand(lir->ValueIndex));
-
-        store = masm.ma_dataTransferN(IsStore, 32 /* bits */, /* signed */ false, HeapReg, ptr, value.low);
-        masm.append(mir->access(), store.getOffset(), masm.framePushed());
-
-        masm.as_add(ptr, ptr, Imm8(INT64HIGH_OFFSET));
-
-        store = masm.ma_dataTransferN(IsStore, 32 /* bits */, /* signed */ true, HeapReg, ptr, value.high);
-        masm.append(mir->access(), store.getOffset(), masm.framePushed());
-    } else {
-        AnyRegister value = ToAnyRegister(lir->getOperand(lir->ValueIndex));
-        if (value.isFloat()) {
-            ScratchRegisterScope scratch(masm);
-            FloatRegister val = value.fpu();
-            MOZ_ASSERT((byteSize == 4) == val.isSingle());
-            masm.ma_add(HeapReg, ptr, scratch);
-
-            store = masm.ma_vstr(val, Operand(Address(scratch, 0)).toVFPAddr());
-            masm.append(mir->access(), store.getOffset(), masm.framePushed());
-        } else {
-            bool isSigned = type == Scalar::Uint32 || type == Scalar::Int32; // see AsmJSStoreHeap;
-            Register val = value.gpr();
-
-            store = masm.ma_dataTransferN(IsStore, 8 * byteSize /* bits */, isSigned, HeapReg, ptr, val);
-            masm.append(mir->access(), store.getOffset(), masm.framePushed());
-        }
-    }
-
-    masm.memoryBarrier(mir->access().barrierAfter());
+    if (accessType == Scalar::Int64)
+        masm.wasmStoreI64(mir->access(), ToRegister64(lir->getInt64Operand(lir->ValueIndex)),
+                          ptr, ptr);
+    else
+        masm.wasmStore(mir->access(), ToAnyRegister(lir->getOperand(lir->ValueIndex)), ptr, ptr);
 }
 
 void
 CodeGeneratorARM::visitWasmStore(LWasmStore* lir)
 {
     emitWasmStore(lir);
 }
 
@@ -2613,61 +2477,30 @@ CodeGeneratorARM::visitWasmStoreI64(LWas
     emitWasmStore(lir);
 }
 
 template<typename T>
 void
 CodeGeneratorARM::emitWasmUnalignedStore(T* lir)
 {
     const MWasmStore* mir = lir->mir();
-
-    uint32_t offset = mir->access().offset();
-    MOZ_ASSERT(offset < wasm::OffsetGuardLimit);
+    Scalar::Type accessType = mir->access().type();
 
     Register ptr = ToRegister(lir->ptrCopy());
-    if (offset) {
-        ScratchRegisterScope scratch(masm);
-        masm.ma_add(Imm32(offset), ptr, scratch);
-    }
-
-    // Add HeapReg to ptr, so we can use base+index addressing in the byte loads.
-    masm.ma_add(HeapReg, ptr);
-
-    MIRType mirType = mir->value()->type();
-
-    masm.memoryBarrier(mir->access().barrierAfter());
-
-    Register val = ToRegister(lir->valueHelper());
-    if (IsFloatingPointType(mirType)) {
-        masm.ma_vxfer(ToFloatRegister(lir->getOperand(LWasmUnalignedStore::ValueIndex)), val);
-    } else if (mirType == MIRType::Int64) {
-        Register64 input = ToRegister64(lir->getInt64Operand(LWasmUnalignedStoreI64::ValueIndex));
-        if (input.low != val)
-            masm.ma_mov(input.low, val);
+    Register valOrTmp = ToRegister(lir->valueHelper());
+    if (accessType == Scalar::Int64) {
+        masm.wasmUnalignedStoreI64(mir->access(),
+                                   ToRegister64(lir->getInt64Operand(LWasmUnalignedStoreI64::ValueIndex)),
+                                   ptr, ptr, valOrTmp);
+    } else if (accessType == Scalar::Float32 || accessType == Scalar::Float64) {
+        FloatRegister value = ToFloatRegister(lir->getOperand(LWasmUnalignedStore::ValueIndex));
+        masm.wasmUnalignedStoreFP(mir->access(), value, ptr, ptr, valOrTmp);
+    } else {
+        masm.wasmUnalignedStore(mir->access(), valOrTmp, ptr, ptr);
     }
-
-    unsigned byteSize = mir->access().byteSize();
-    masm.emitUnalignedStore(Min(byteSize, 4u), ptr, val);
-
-    if (byteSize > 4) {
-        // It's a double or an int64 load.
-        // Load the high 32 bits when counter == 4.
-        if (IsFloatingPointType(mirType)) {
-            FloatRegister fp = ToFloatRegister(lir->getOperand(LWasmUnalignedStore::ValueIndex));
-            MOZ_ASSERT(fp.isDouble());
-            ScratchRegisterScope scratch(masm);
-            masm.ma_vxfer(fp, scratch, val);
-        } else {
-            MOZ_ASSERT(mirType == MIRType::Int64);
-            masm.ma_mov(ToRegister64(lir->getInt64Operand(LWasmUnalignedStoreI64::ValueIndex)).high, val);
-        }
-        masm.emitUnalignedStore(4, ptr, val, /* offset */ 4);
-    }
-
-    masm.memoryBarrier(mir->access().barrierBefore());
 }
 
 void
 CodeGeneratorARM::visitWasmUnalignedStore(LWasmUnalignedStore* lir)
 {
     emitWasmUnalignedStore(lir);
 }
 
--- a/js/src/jit/arm/MacroAssembler-arm.cpp
+++ b/js/src/jit/arm/MacroAssembler-arm.cpp
@@ -5431,16 +5431,90 @@ MacroAssembler::wasmTruncateFloat32ToUIn
 }
 
 void
 MacroAssembler::wasmTruncateFloat32ToInt32(FloatRegister input, Register output, Label* oolEntry)
 {
     wasmTruncateToInt32(input, output, MIRType::Float32, /* isUnsigned= */ false, oolEntry);
 }
 
+void
+MacroAssembler::wasmLoad(const wasm::MemoryAccessDesc& access, Register ptr, Register ptrScratch,
+                         AnyRegister output)
+{
+    wasmLoadImpl(access, ptr, ptrScratch, output, Register64::Invalid());
+}
+
+void
+MacroAssembler::wasmLoadI64(const wasm::MemoryAccessDesc& access, Register ptr, Register ptrScratch,
+                            Register64 output)
+{
+    wasmLoadImpl(access, ptr, ptrScratch, AnyRegister(), output);
+}
+
+void
+MacroAssembler::wasmStore(const wasm::MemoryAccessDesc& access, AnyRegister value, Register ptr,
+                          Register ptrScratch)
+{
+    wasmStoreImpl(access, value, Register64::Invalid(), ptr, ptrScratch);
+}
+
+void
+MacroAssembler::wasmStoreI64(const wasm::MemoryAccessDesc& access, Register64 value, Register ptr,
+                             Register ptrScratch)
+{
+    wasmStoreImpl(access, AnyRegister(), value, ptr, ptrScratch);
+}
+
+void
+MacroAssembler::wasmUnalignedLoad(const wasm::MemoryAccessDesc& access, Register ptr,
+                                  Register ptrScratch, Register output, Register tmp)
+{
+    wasmUnalignedLoadImpl(access, ptr, ptrScratch, AnyRegister(output), Register64::Invalid(), tmp,
+                          Register::Invalid(), Register::Invalid());
+}
+
+void
+MacroAssembler::wasmUnalignedLoadFP(const wasm::MemoryAccessDesc& access, Register ptr,
+                                    Register ptrScratch, FloatRegister outFP, Register tmp1,
+                                    Register tmp2, Register tmp3)
+{
+    wasmUnalignedLoadImpl(access, ptr, ptrScratch, AnyRegister(outFP), Register64::Invalid(),
+                          tmp1, tmp2, tmp3);
+}
+
+void
+MacroAssembler::wasmUnalignedLoadI64(const wasm::MemoryAccessDesc& access, Register ptr,
+                                     Register ptrScratch, Register64 out64, Register tmp)
+{
+    wasmUnalignedLoadImpl(access, ptr, ptrScratch, AnyRegister(), out64, tmp, Register::Invalid(),
+                          Register::Invalid());
+}
+
+void
+MacroAssembler::wasmUnalignedStore(const wasm::MemoryAccessDesc& access, Register value,
+                                   Register ptr, Register ptrScratch)
+{
+    wasmUnalignedStoreImpl(access, FloatRegister(), Register64::Invalid(), ptr, ptrScratch, value);
+}
+
+void
+MacroAssembler::wasmUnalignedStoreFP(const wasm::MemoryAccessDesc& access, FloatRegister floatVal,
+                                     Register ptr, Register ptrScratch, Register tmp)
+{
+    wasmUnalignedStoreImpl(access, floatVal, Register64::Invalid(), ptr, ptrScratch, tmp);
+}
+
+void
+MacroAssembler::wasmUnalignedStoreI64(const wasm::MemoryAccessDesc& access, Register64 val64,
+                                      Register ptr, Register ptrScratch, Register tmp)
+{
+    wasmUnalignedStoreImpl(access, FloatRegister(), val64, ptr, ptrScratch, tmp);
+}
+
 //}}} check_macroassembler_style
 
 void
 MacroAssemblerARM::wasmTruncateToInt32(FloatRegister input, Register output, MIRType fromType,
                                        bool isUnsigned, Label* oolEntry)
 {
     // vcvt* converts NaN into 0, so check for NaNs here.
     {
@@ -5576,16 +5650,258 @@ MacroAssemblerARM::outOfLineWasmTruncate
                                  asMasm().framePushed()));
 
     bind(&inputIsNaN);
     asMasm().jump(wasm::TrapDesc(trapOffset, wasm::Trap::InvalidConversionToInteger,
                                  asMasm().framePushed()));
 }
 
 void
+MacroAssemblerARM::wasmLoadImpl(const wasm::MemoryAccessDesc& access, Register ptr,
+                                Register ptrScratch, AnyRegister output, Register64 out64)
+{
+    MOZ_ASSERT(ptr == ptrScratch);
+
+    uint32_t offset = access.offset();
+    MOZ_ASSERT(offset < wasm::OffsetGuardLimit);
+
+    Scalar::Type type = access.type();
+
+    // Maybe add the offset.
+    if (offset || type == Scalar::Int64) {
+        ScratchRegisterScope scratch(asMasm());
+        if (offset)
+            ma_add(Imm32(offset), ptr, scratch);
+    }
+
+    bool isSigned = type == Scalar::Int8 || type == Scalar::Int16 || type == Scalar::Int32 ||
+                    type == Scalar::Int64;
+    unsigned byteSize = access.byteSize();
+
+    asMasm().memoryBarrier(access.barrierBefore());
+
+    uint32_t framePushed = asMasm().framePushed();
+    BufferOffset load;
+    if (out64 != Register64::Invalid()) {
+        if (type == Scalar::Int64) {
+            MOZ_ASSERT(INT64LOW_OFFSET == 0);
+
+            load = ma_dataTransferN(IsLoad, 32, /* signed = */ false, HeapReg, ptr, out64.low);
+            append(access, load.getOffset(), framePushed);
+
+            as_add(ptr, ptr, Imm8(INT64HIGH_OFFSET));
+
+            load = ma_dataTransferN(IsLoad, 32, isSigned, HeapReg, ptr, out64.high);
+            append(access, load.getOffset(), framePushed);
+        } else {
+            load = ma_dataTransferN(IsLoad, byteSize * 8, isSigned, HeapReg, ptr, out64.low);
+            append(access, load.getOffset(), framePushed);
+
+            if (isSigned)
+                ma_asr(Imm32(31), out64.low, out64.high);
+            else
+                ma_mov(Imm32(0), out64.high);
+        }
+    } else {
+        bool isFloat = output.isFloat();
+        if (isFloat) {
+            MOZ_ASSERT((byteSize == 4) == output.fpu().isSingle());
+            ScratchRegisterScope scratch(asMasm());
+            ma_add(HeapReg, ptr, scratch);
+
+            load = ma_vldr(Operand(Address(scratch, 0)).toVFPAddr(), output.fpu());
+            append(access, load.getOffset(), framePushed);
+        } else {
+            load = ma_dataTransferN(IsLoad, byteSize * 8, isSigned, HeapReg, ptr, output.gpr());
+            append(access, load.getOffset(), framePushed);
+        }
+    }
+
+    asMasm().memoryBarrier(access.barrierAfter());
+}
+
+void
+MacroAssemblerARM::wasmStoreImpl(const wasm::MemoryAccessDesc& access, AnyRegister value,
+                                 Register64 val64, Register ptr, Register ptrScratch)
+{
+    MOZ_ASSERT(ptr == ptrScratch);
+
+    uint32_t offset = access.offset();
+    MOZ_ASSERT(offset < wasm::OffsetGuardLimit);
+
+    unsigned byteSize = access.byteSize();
+    Scalar::Type type = access.type();
+
+    // Maybe add the offset.
+    if (offset || type == Scalar::Int64) {
+        ScratchRegisterScope scratch(asMasm());
+        if (offset)
+            ma_add(Imm32(offset), ptr, scratch);
+    }
+
+    asMasm().memoryBarrier(access.barrierBefore());
+
+    uint32_t framePushed = asMasm().framePushed();
+
+    BufferOffset store;
+    if (type == Scalar::Int64) {
+        MOZ_ASSERT(INT64LOW_OFFSET == 0);
+
+        store = ma_dataTransferN(IsStore, 32 /* bits */, /* signed */ false, HeapReg, ptr, val64.low);
+        append(access, store.getOffset(), framePushed);
+
+        as_add(ptr, ptr, Imm8(INT64HIGH_OFFSET));
+
+        store = ma_dataTransferN(IsStore, 32 /* bits */, /* signed */ true, HeapReg, ptr, val64.high);
+        append(access, store.getOffset(), framePushed);
+    } else {
+        if (value.isFloat()) {
+            ScratchRegisterScope scratch(asMasm());
+            FloatRegister val = value.fpu();
+            MOZ_ASSERT((byteSize == 4) == val.isSingle());
+            ma_add(HeapReg, ptr, scratch);
+
+            store = ma_vstr(val, Operand(Address(scratch, 0)).toVFPAddr());
+            append(access, store.getOffset(), framePushed);
+        } else {
+            bool isSigned = type == Scalar::Uint32 || type == Scalar::Int32; // see AsmJSStoreHeap;
+            Register val = value.gpr();
+
+            store = ma_dataTransferN(IsStore, 8 * byteSize /* bits */, isSigned, HeapReg, ptr, val);
+            append(access, store.getOffset(), framePushed);
+        }
+    }
+
+    asMasm().memoryBarrier(access.barrierAfter());
+}
+
+void
+MacroAssemblerARM::wasmUnalignedLoadImpl(const wasm::MemoryAccessDesc& access, Register ptr,
+                                         Register ptrScratch, AnyRegister outAny, Register64 out64,
+                                         Register tmp, Register tmp2, Register tmp3)
+{
+    MOZ_ASSERT(ptr == ptrScratch);
+    MOZ_ASSERT_IF(access.type() != Scalar::Float32 && access.type() != Scalar::Float64,
+                  tmp2 == Register::Invalid() && tmp3 == Register::Invalid());
+    MOZ_ASSERT_IF(access.type() == Scalar::Float32,
+                  tmp2 != Register::Invalid() && tmp3 == Register::Invalid());
+    MOZ_ASSERT_IF(access.type() == Scalar::Float64,
+                  tmp2 != Register::Invalid() && tmp3 != Register::Invalid());
+
+    uint32_t offset = access.offset();
+    MOZ_ASSERT(offset < wasm::OffsetGuardLimit);
+
+    if (offset) {
+        ScratchRegisterScope scratch(asMasm());
+        ma_add(Imm32(offset), ptr, scratch);
+    }
+
+    // Add HeapReg to ptr, so we can use base+index addressing in the byte loads.
+    ma_add(HeapReg, ptr);
+
+    unsigned byteSize = access.byteSize();
+    Scalar::Type type = access.type();
+    bool isSigned = type == Scalar::Int8 || type == Scalar::Int16 || type == Scalar::Int32 ||
+                    type == Scalar::Int64;
+
+    Register low;
+    if (out64 != Register64::Invalid())
+        low = out64.low;
+    else if (outAny.isFloat())
+        low = tmp2;
+    else
+        low = outAny.gpr();
+
+    MOZ_ASSERT(low != tmp);
+    MOZ_ASSERT(low != ptr);
+
+    asMasm().memoryBarrier(access.barrierBefore());
+
+    emitUnalignedLoad(isSigned, Min(byteSize, 4u), ptr, tmp, low);
+
+    if (out64 != Register64::Invalid()) {
+        if (type == Scalar::Int64) {
+            MOZ_ASSERT(byteSize == 8);
+            emitUnalignedLoad(isSigned, 4, ptr, tmp, out64.high, /* offset */ 4);
+        } else {
+            MOZ_ASSERT(byteSize <= 4);
+            // Propagate sign.
+            if (isSigned)
+                ma_asr(Imm32(31), out64.low, out64.high);
+            else
+                ma_mov(Imm32(0), out64.high);
+        }
+    } else if (outAny.isFloat()) {
+        FloatRegister output = outAny.fpu();
+        if (byteSize == 4) {
+            MOZ_ASSERT(output.isSingle());
+            ma_vxfer(low, output);
+        } else {
+            MOZ_ASSERT(byteSize == 8);
+            MOZ_ASSERT(output.isDouble());
+            Register high = tmp3;
+            emitUnalignedLoad(/* signed */ false, 4, ptr, tmp, high, /* offset */ 4);
+            ma_vxfer(low, high, output);
+        }
+    }
+
+    asMasm().memoryBarrier(access.barrierAfter());
+}
+
+void
+MacroAssemblerARM::wasmUnalignedStoreImpl(const wasm::MemoryAccessDesc& access, FloatRegister floatValue,
+                                          Register64 val64, Register ptr, Register ptrScratch, Register tmp)
+{
+    MOZ_ASSERT(ptr == ptrScratch);
+    // They can't both be valid, but they can both be invalid.
+    MOZ_ASSERT_IF(!floatValue.isInvalid(), val64 == Register64::Invalid());
+    MOZ_ASSERT_IF(val64 != Register64::Invalid(), floatValue.isInvalid());
+
+    uint32_t offset = access.offset();
+    MOZ_ASSERT(offset < wasm::OffsetGuardLimit);
+
+    unsigned byteSize = access.byteSize();
+
+    if (offset) {
+        ScratchRegisterScope scratch(asMasm());
+        ma_add(Imm32(offset), ptr, scratch);
+    }
+
+    // Add HeapReg to ptr, so we can use base+index addressing in the byte loads.
+    ma_add(HeapReg, ptr);
+
+    asMasm().memoryBarrier(access.barrierBefore());
+
+    if (val64 != Register64::Invalid()) {
+        if (val64.low != tmp)
+            ma_mov(val64.low, tmp);
+    } else if (!floatValue.isInvalid()) {
+        ma_vxfer(floatValue, tmp);
+    }
+    // Otherwise, tmp has the integer value to store.
+
+    emitUnalignedStore(Min(byteSize, 4u), ptr, tmp);
+
+    if (byteSize > 4) {
+        if (val64 != Register64::Invalid()) {
+            if (val64.high != tmp)
+                ma_mov(val64.high, tmp);
+        } else {
+            MOZ_ASSERT(!floatValue.isInvalid());
+            MOZ_ASSERT(floatValue.isDouble());
+            ScratchRegisterScope scratch(asMasm());
+            ma_vxfer(floatValue, scratch, tmp);
+        }
+        emitUnalignedStore(4, ptr, tmp, /* offset */ 4);
+    }
+
+    asMasm().memoryBarrier(access.barrierAfter());
+}
+
+void
 MacroAssemblerARM::emitUnalignedLoad(bool isSigned, unsigned byteSize, Register ptr, Register tmp,
                                      Register dest, unsigned offset)
 {
     // Preconditions.
     MOZ_ASSERT(ptr != tmp);
     MOZ_ASSERT(ptr != dest);
     MOZ_ASSERT(tmp != dest);
     MOZ_ASSERT(byteSize <= 4);
--- a/js/src/jit/arm/MacroAssembler-arm.h
+++ b/js/src/jit/arm/MacroAssembler-arm.h
@@ -450,31 +450,51 @@ class MacroAssemblerARM : public Assembl
         }
         if (mode == DB) {
             return transferMultipleByRunsImpl
                 <FloatRegisterBackwardIterator>(set, ls, rm, mode, -1);
         }
         MOZ_CRASH("Invalid data transfer addressing mode");
     }
 
+    // `outAny` is valid if and only if `out64` == Register64::Invalid().
+    void wasmLoadImpl(const wasm::MemoryAccessDesc& access, Register ptr, Register ptrScratch,
+                      AnyRegister outAny, Register64 out64);
+
+    // `valAny` is valid if and only if `val64` == Register64::Invalid().
+    void wasmStoreImpl(const wasm::MemoryAccessDesc& access, AnyRegister valAny, Register64 val64,
+                       Register ptr, Register ptrScratch);
+
+  protected:
+    // `outAny` is valid if and only if `out64` == Register64::Invalid().
+    void wasmUnalignedLoadImpl(const wasm::MemoryAccessDesc& access, Register ptr, Register ptrScratch,
+                               AnyRegister outAny, Register64 out64, Register tmp1, Register tmp2,
+                               Register tmp3);
+
+    // The value to be stored is in `floatValue` (if not invalid), `val64` (if not invalid),
+    // or in `valOrTmp` (if `floatValue` and `val64` are both invalid).  Note `valOrTmp` must
+    // always be valid.
+    void wasmUnalignedStoreImpl(const wasm::MemoryAccessDesc& access, FloatRegister floatValue,
+                                Register64 val64, Register ptr, Register ptrScratch, Register valOrTmp);
+
+  private:
     // Loads `byteSize` bytes, byte by byte, by reading from ptr[offset],
     // applying the indicated signedness (defined by isSigned).
     // - all three registers must be different.
     // - tmp and dest will get clobbered, ptr will remain intact.
     // - byteSize can be up to 4 bytes and no more (GPR are 32 bits on ARM).
     void emitUnalignedLoad(bool isSigned, unsigned byteSize, Register ptr, Register tmp,
                            Register dest, unsigned offset = 0);
 
     // Ditto, for a store. Note stores don't care about signedness.
     // - the two registers must be different.
     // - val will get clobbered, ptr will remain intact.
     // - byteSize can be up to 4 bytes and no more (GPR are 32 bits on ARM).
     void emitUnalignedStore(unsigned byteSize, Register ptr, Register val, unsigned offset = 0);
 
-private:
     // Implementation for transferMultipleByRuns so we can use different
     // iterators for forward/backward traversals. The sign argument should be 1
     // if we traverse forwards, -1 if we traverse backwards.
     template<typename RegisterIterator> int32_t
     transferMultipleByRunsImpl(FloatRegisterSet set, LoadStore ls,
                                Register rm, DTMMode mode, int32_t sign)
     {
         MOZ_ASSERT(sign == 1 || sign == -1);