Bug 1377576 - Assembler/MacroAssembler support for wasm atomics. r=sunfish
authorLars T Hansen <lhansen@mozilla.com>
Fri, 01 Sep 2017 16:12:06 +0200
changeset 701926 de907393db020ec66d981c400b855cdc30ecfc38
parent 701925 37c0194e2b9ae59ae0a0917fe18e34510e75f92a
child 701927 6d224bf532b536566913df59c6bbb84ad497d23c
push id90308
push userbmo:lhansen@mozilla.com
push dateWed, 22 Nov 2017 12:45:04 +0000
reviewerssunfish
bugs1377576
milestone59.0a1
Bug 1377576 - Assembler/MacroAssembler support for wasm atomics. r=sunfish
js/src/jit/arm/MacroAssembler-arm.cpp
js/src/jit/arm/MacroAssembler-arm.h
js/src/jit/arm64/MacroAssembler-arm64.h
js/src/jit/none/MacroAssembler-none.h
js/src/jit/x64/Assembler-x64.h
js/src/jit/x64/BaseAssembler-x64.h
js/src/jit/x64/MacroAssembler-x64.cpp
js/src/jit/x64/MacroAssembler-x64.h
js/src/jit/x86-shared/Assembler-x86-shared.h
js/src/jit/x86-shared/BaseAssembler-x86-shared.h
js/src/jit/x86-shared/Encoding-x86-shared.h
js/src/jit/x86-shared/MacroAssembler-x86-shared.h
js/src/jit/x86/Assembler-x86.h
js/src/jit/x86/BaseAssembler-x86.h
js/src/jit/x86/MacroAssembler-x86.cpp
js/src/jit/x86/MacroAssembler-x86.h
--- a/js/src/jit/arm/MacroAssembler-arm.cpp
+++ b/js/src/jit/arm/MacroAssembler-arm.cpp
@@ -7,16 +7,17 @@
 #include "jit/arm/MacroAssembler-arm.h"
 
 #include "mozilla/Attributes.h"
 #include "mozilla/Casting.h"
 #include "mozilla/DebugOnly.h"
 #include "mozilla/MathAlgorithms.h"
 
 #include "jit/arm/Simulator-arm.h"
+#include "jit/AtomicOperations.h"
 #include "jit/Bailouts.h"
 #include "jit/BaselineFrame.h"
 #include "jit/JitFrames.h"
 #include "jit/MacroAssembler.h"
 #include "jit/MoveEmitter.h"
 
 #include "jit/MacroAssembler-inl.h"
 
@@ -4231,17 +4232,18 @@ MacroAssemblerARMCompat::compareExchange
 
 template<typename T>
 void
 MacroAssemblerARMCompat::compareExchangeARMv7(int nbytes, bool signExtend, const T& mem,
                                               Register oldval, Register newval, Register output)
 {
     Label again;
     Label done;
-    ma_dmb(BarrierST);
+
+    asMasm().memoryBarrier(MembarFull);
 
     SecondScratchRegisterScope scratch2(asMasm());
     Register ptr = computePointer(mem, scratch2);
 
     ScratchRegisterScope scratch(asMasm());
 
     bind(&again);
     switch (nbytes) {
@@ -4282,17 +4284,18 @@ MacroAssemblerARMCompat::compareExchange
         break;
       case 4:
         as_strex(scratch, newval, ptr);
         break;
     }
     as_cmp(scratch, Imm8(1));
     as_b(&again, Equal);
     bind(&done);
-    ma_dmb();
+
+    asMasm().memoryBarrier(MembarFull);
 }
 
 template<typename T>
 void
 MacroAssemblerARMCompat::compareExchangeARMv6(int nbytes, bool signExtend, const T& mem,
                                               Register oldval, Register newval, Register output)
 {
     // Bug 1077318: Must use read-modify-write with LDREX / STREX.
@@ -4327,17 +4330,18 @@ MacroAssemblerARMCompat::atomicExchange(
 
 template<typename T>
 void
 MacroAssemblerARMCompat::atomicExchangeARMv7(int nbytes, bool signExtend, const T& mem,
                                              Register value, Register output)
 {
     Label again;
     Label done;
-    ma_dmb(BarrierST);
+
+    asMasm().memoryBarrier(MembarFull);
 
     SecondScratchRegisterScope scratch2(asMasm());
     Register ptr = computePointer(mem, scratch2);
 
     ScratchRegisterScope scratch(asMasm());
 
     bind(&again);
     switch (nbytes) {
@@ -4359,17 +4363,18 @@ MacroAssemblerARMCompat::atomicExchangeA
         as_strex(scratch, value, ptr);
         break;
       default:
         MOZ_CRASH();
     }
     as_cmp(scratch, Imm8(1));
     as_b(&again, Equal);
     bind(&done);
-    ma_dmb();
+
+    asMasm().memoryBarrier(MembarFull);
 }
 
 template<typename T>
 void
 MacroAssemblerARMCompat::atomicExchangeARMv6(int nbytes, bool signExtend, const T& mem,
                                              Register value, Register output)
 {
     // Bug 1077318: Must use read-modify-write with LDREX / STREX.
@@ -4435,23 +4440,24 @@ MacroAssemblerARMCompat::atomicFetchOp(i
 
 template<typename T>
 void
 MacroAssemblerARMCompat::atomicFetchOpARMv7(int nbytes, bool signExtend, AtomicOp op,
                                             const Register& value, const T& mem, Register flagTemp,
                                             Register output)
 {
     MOZ_ASSERT(flagTemp != InvalidReg);
+    MOZ_ASSERT(output != value);
 
     Label again;
 
     SecondScratchRegisterScope scratch2(asMasm());
     Register ptr = computePointer(mem, scratch2);
 
-    ma_dmb();
+    asMasm().memoryBarrier(MembarFull);
 
     ScratchRegisterScope scratch(asMasm());
 
     bind(&again);
     switch (nbytes) {
       case 1:
         as_ldrexb(output, ptr);
         if (signExtend)
@@ -4493,30 +4499,30 @@ MacroAssemblerARMCompat::atomicFetchOpAR
         as_strexh(flagTemp, scratch, ptr);
         break;
       case 4:
         as_strex(flagTemp, scratch, ptr);
         break;
     }
     as_cmp(flagTemp, Imm8(1));
     as_b(&again, Equal);
-    ma_dmb();
+
+    asMasm().memoryBarrier(MembarFull);
 }
 
 template<typename T>
 void
 MacroAssemblerARMCompat::atomicFetchOpARMv6(int nbytes, bool signExtend, AtomicOp op,
                                             const Register& value, const T& mem, Register flagTemp,
                                             Register output)
 {
     // Bug 1077318: Must use read-modify-write with LDREX / STREX.
     MOZ_ASSERT(nbytes == 1 || nbytes == 2);
     MOZ_CRASH("NYI");
 }
-
 template<typename T>
 void
 MacroAssemblerARMCompat::atomicEffectOp(int nbytes, AtomicOp op, const Register& value,
                                         const T& mem, Register flagTemp)
 {
     // Fork for non-word operations on ARMv6.
     //
     // Bug 1077321: We may further optimize for ARMv8 (AArch32) here.
@@ -4558,17 +4564,17 @@ MacroAssemblerARMCompat::atomicEffectOpA
 {
     MOZ_ASSERT(flagTemp != InvalidReg);
 
     Label again;
 
     SecondScratchRegisterScope scratch2(asMasm());
     Register ptr = computePointer(mem, scratch2);
 
-    ma_dmb();
+    asMasm().memoryBarrier(MembarFull);
 
     ScratchRegisterScope scratch(asMasm());
 
     bind(&again);
     switch (nbytes) {
       case 1:
         as_ldrexb(scratch, ptr);
         break;
@@ -4605,17 +4611,18 @@ MacroAssemblerARMCompat::atomicEffectOpA
         as_strexh(flagTemp, scratch, ptr);
         break;
       case 4:
         as_strex(flagTemp, scratch, ptr);
         break;
     }
     as_cmp(flagTemp, Imm8(1));
     as_b(&again, Equal);
-    ma_dmb();
+
+    asMasm().memoryBarrier(MembarFull);
 }
 
 template<typename T>
 void
 MacroAssemblerARMCompat::atomicEffectOpARMv6(int nbytes, AtomicOp op, const Register& value,
                                              const T& mem, Register flagTemp)
 {
     // Bug 1077318: Must use read-modify-write with LDREX / STREX.
@@ -4731,16 +4738,190 @@ MacroAssemblerARMCompat::atomicExchangeT
 
 template void
 MacroAssemblerARMCompat::atomicExchangeToTypedIntArray(Scalar::Type arrayType, const Address& mem,
                                                        Register value, Register temp, AnyRegister output);
 template void
 MacroAssemblerARMCompat::atomicExchangeToTypedIntArray(Scalar::Type arrayType, const BaseIndex& mem,
                                                        Register value, Register temp, AnyRegister output);
 
+template<typename T>
+void
+MacroAssemblerARMCompat::atomicLoad64(const T& mem, Register64 temp, Register64 output)
+{
+    MOZ_ASSERT(temp.low == InvalidReg && temp.high == InvalidReg);
+    MOZ_ASSERT((output.low.code() & 1) == 0);
+    MOZ_ASSERT(output.low.code() + 1 == output.high.code());
+
+    asMasm().memoryBarrier(MembarFull);
+
+    SecondScratchRegisterScope scratch2(asMasm());
+    Register ptr = computePointer(mem, scratch2);
+
+    as_ldrexd(output.low, output.high, ptr);
+    as_clrex();
+
+    asMasm().memoryBarrier(MembarFull);
+}
+
+template void
+MacroAssemblerARMCompat::atomicLoad64(const Address& mem, Register64 temp, Register64 output);
+template void
+MacroAssemblerARMCompat::atomicLoad64(const BaseIndex& mem, Register64 temp, Register64 output);
+
+template<typename T>
+void
+MacroAssemblerARMCompat::atomicFetchOp64(AtomicOp op, Register64 value, const T& mem,
+                                         Register64 temp, Register64 output)
+{
+    MOZ_ASSERT(temp.low != InvalidReg && temp.high != InvalidReg);
+    MOZ_ASSERT(output != value);
+
+    MOZ_ASSERT((temp.low.code() & 1) == 0);
+    MOZ_ASSERT(temp.low.code() + 1 == temp.high.code());
+
+    // We could avoid this pair requirement but in that case we would end up
+    // with two moves in the loop to preserve the loaded value in output.  The
+    // prize would be less register spilling around this op since the pair
+    // requirement will tend to force more spilling.
+
+    MOZ_ASSERT((output.low.code() & 1) == 0);
+    MOZ_ASSERT(output.low.code() + 1 == output.high.code());
+
+    Label again;
+
+    SecondScratchRegisterScope scratch2(asMasm());
+    Register ptr = computePointer(mem, scratch2);
+
+    asMasm().memoryBarrier(MembarFull);
+
+    bind(&again);
+    as_ldrexd(output.low, output.high, ptr);
+    switch (op) {
+      case AtomicFetchAddOp:
+        as_add(temp.low, output.low, O2Reg(value.low), SetCC);
+        as_adc(temp.high, output.high, O2Reg(value.high));
+        break;
+      case AtomicFetchSubOp:
+        as_sub(temp.low, output.low, O2Reg(value.low), SetCC);
+        as_sbc(temp.high, output.high, O2Reg(value.high));
+        break;
+      case AtomicFetchAndOp:
+        as_and(temp.low, output.low, O2Reg(value.low));
+        as_and(temp.high, output.high, O2Reg(value.high));
+        break;
+      case AtomicFetchOrOp:
+        as_orr(temp.low, output.low, O2Reg(value.low));
+        as_orr(temp.high, output.high, O2Reg(value.high));
+        break;
+      case AtomicFetchXorOp:
+        as_eor(temp.low, output.low, O2Reg(value.low));
+        as_eor(temp.high, output.high, O2Reg(value.high));
+        break;
+    }
+
+    ScratchRegisterScope scratch(asMasm());
+
+    // Rd (temp) must differ from the two other arguments to strex.
+    as_strexd(scratch, temp.low, temp.high, ptr);
+    as_cmp(scratch, Imm8(1));
+    as_b(&again, Equal);
+
+    asMasm().memoryBarrier(MembarFull);
+}
+
+template void
+MacroAssemblerARMCompat::atomicFetchOp64(AtomicOp op, Register64 value, const Address& mem,
+                                         Register64 temp, Register64 output);
+template void
+MacroAssemblerARMCompat::atomicFetchOp64(AtomicOp op, Register64 value, const BaseIndex& mem,
+                                         Register64 temp, Register64 output);
+
+template<typename T>
+void
+MacroAssemblerARMCompat::atomicExchange64(const T& mem, Register64 value, Register64 output)
+{
+    MOZ_ASSERT(output != value);
+
+    MOZ_ASSERT((value.low.code() & 1) == 0);
+    MOZ_ASSERT(value.low.code() + 1 == value.high.code());
+
+    MOZ_ASSERT((output.low.code() & 1) == 0);
+    MOZ_ASSERT(output.low.code() + 1 == output.high.code());
+
+    Label again;
+
+    SecondScratchRegisterScope scratch2(asMasm());
+    Register ptr = computePointer(mem, scratch2);
+
+    asMasm().memoryBarrier(MembarFull);
+
+    bind(&again);
+    as_ldrexd(output.low, output.high, ptr);
+
+    ScratchRegisterScope scratch(asMasm());
+
+    as_strexd(scratch, value.low, value.high, ptr);
+    as_cmp(scratch, Imm8(1));
+    as_b(&again, Equal);
+
+    asMasm().memoryBarrier(MembarFull);
+}
+
+template void
+MacroAssemblerARMCompat::atomicExchange64(const Address& mem, Register64 value, Register64 output);
+template void
+MacroAssemblerARMCompat::atomicExchange64(const BaseIndex& mem, Register64 value, Register64 output);
+
+template<typename T>
+void
+MacroAssemblerARMCompat::compareExchange64(const T& mem, Register64 expect,
+                                           Register64 replace, Register64 output)
+{
+    MOZ_ASSERT(expect != replace && replace != output && output != expect);
+
+    MOZ_ASSERT((replace.low.code() & 1) == 0);
+    MOZ_ASSERT(replace.low.code() + 1 == replace.high.code());
+
+    MOZ_ASSERT((output.low.code() & 1) == 0);
+    MOZ_ASSERT(output.low.code() + 1 == output.high.code());
+
+    Label again;
+    Label done;
+
+    SecondScratchRegisterScope scratch2(asMasm());
+    Register ptr = computePointer(mem, scratch2);
+
+    asMasm().memoryBarrier(MembarFull);
+
+    bind(&again);
+    as_ldrexd(output.low, output.high, ptr);
+
+    as_cmp(output.low, O2Reg(expect.low));
+    as_cmp(output.high, O2Reg(expect.high), Equal);
+    as_b(&done, NotEqual);
+
+    ScratchRegisterScope scratch(asMasm());
+
+    // Rd (temp) must differ from the two other arguments to strex.
+    as_strexd(scratch, replace.low, replace.high, ptr);
+    as_cmp(scratch, Imm8(1));
+    as_b(&again, Equal);
+    bind(&done);
+
+    asMasm().memoryBarrier(MembarFull);
+}
+
+template void
+MacroAssemblerARMCompat::compareExchange64(const Address& mem, Register64 expect,
+                                           Register64 replace, Register64 output);
+template void
+MacroAssemblerARMCompat::compareExchange64(const BaseIndex& mem, Register64 expect,
+                                           Register64 replace, Register64 output);
+
 void
 MacroAssemblerARMCompat::profilerEnterFrame(Register framePtr, Register scratch)
 {
     asMasm().loadJSContext(scratch);
     loadPtr(Address(scratch, offsetof(JSContext, profilingActivation_)), scratch);
     storePtr(framePtr, Address(scratch, JitActivation::offsetOfLastProfilingFrame()));
     storePtr(ImmPtr(nullptr), Address(scratch, JitActivation::offsetOfLastProfilingCallSite()));
 }
@@ -5518,30 +5699,32 @@ MacroAssembler::wasmLoad(const wasm::Mem
 {
     wasmLoadImpl(access, memoryBase, ptr, ptrScratch, output, Register64::Invalid());
 }
 
 void
 MacroAssembler::wasmLoadI64(const wasm::MemoryAccessDesc& access, Register memoryBase, Register ptr,
                             Register ptrScratch, Register64 output)
 {
+    MOZ_ASSERT_IF(access.isAtomic(), access.byteSize() <= 4);
     wasmLoadImpl(access, memoryBase, ptr, ptrScratch, AnyRegister(), output);
 }
 
 void
 MacroAssembler::wasmStore(const wasm::MemoryAccessDesc& access, AnyRegister value,
                           Register memoryBase, Register ptr, Register ptrScratch)
 {
     wasmStoreImpl(access, value, Register64::Invalid(), memoryBase, ptr, ptrScratch);
 }
 
 void
 MacroAssembler::wasmStoreI64(const wasm::MemoryAccessDesc& access, Register64 value,
                              Register memoryBase, Register ptr, Register ptrScratch)
 {
+    MOZ_ASSERT(!access.isAtomic());
     wasmStoreImpl(access, AnyRegister(), value, memoryBase, ptr, ptrScratch);
 }
 
 void
 MacroAssembler::wasmUnalignedLoad(const wasm::MemoryAccessDesc& access, Register memoryBase,
                                   Register ptr, Register ptrScratch, Register output, Register tmp)
 {
     wasmUnalignedLoadImpl(access, memoryBase, ptr, ptrScratch, AnyRegister(output),
--- a/js/src/jit/arm/MacroAssembler-arm.h
+++ b/js/src/jit/arm/MacroAssembler-arm.h
@@ -1162,16 +1162,20 @@ class MacroAssemblerARMCompat : public M
     template<typename T>
     void atomicEffectOp(int nbytes, AtomicOp op, const Imm32& value, const T& address,
                              Register flagTemp);
 
     template<typename T>
     void atomicEffectOp(int nbytes, AtomicOp op, const Register& value, const T& address,
                              Register flagTemp);
 
+    template<typename T>
+    void atomicFetchOp64(AtomicOp op, Register64 value, const T& mem, Register64 temp,
+                         Register64 output);
+
   public:
     // T in {Address,BaseIndex}
     // S in {Imm32,Register}
 
     template<typename T>
     void compareExchange8SignExtend(const T& mem, Register oldval, Register newval, Register output)
     {
         compareExchange(1, true, mem, oldval, newval, output);
@@ -1381,16 +1385,58 @@ class MacroAssemblerARMCompat : public M
     void atomicXor16(const S& value, const T& mem, Register flagTemp) {
         atomicEffectOp(2, AtomicFetchXorOp, value, mem, flagTemp);
     }
     template <typename T, typename S>
     void atomicXor32(const S& value, const T& mem, Register flagTemp) {
         atomicEffectOp(4, AtomicFetchXorOp, value, mem, flagTemp);
     }
 
+    // Temp should be invalid; output must be (even,odd) pair.
+    template<typename T>
+    void atomicLoad64(const T& mem, Register64 temp, Register64 output);
+
+    // Registers must be distinct; temp and output must be (even,odd) pairs.
+    template <typename T>
+    void atomicFetchAdd64(Register64 value, const T& mem, Register64 temp, Register64 output) {
+        atomicFetchOp64(AtomicFetchAddOp, value, mem, temp, output);
+    }
+
+    // Registers must be distinct; temp and output must be (even,odd) pairs.
+    template <typename T>
+    void atomicFetchSub64(Register64 value, const T& mem, Register64 temp, Register64 output) {
+        atomicFetchOp64(AtomicFetchSubOp, value, mem, temp, output);
+    }
+
+    // Registers must be distinct; temp and output must be (even,odd) pairs.
+    template <typename T>
+    void atomicFetchAnd64(Register64 value, const T& mem, Register64 temp, Register64 output) {
+        atomicFetchOp64(AtomicFetchAndOp, value, mem, temp, output);
+    }
+
+    // Registers must be distinct; temp and output must be (even,odd) pairs.
+    template <typename T>
+    void atomicFetchOr64(Register64 value, const T& mem, Register64 temp, Register64 output) {
+        atomicFetchOp64(AtomicFetchOrOp, value, mem, temp, output);
+    }
+
+    // Registers must be distinct; temp and output must be (even,odd) pairs.
+    template <typename T>
+    void atomicFetchXor64(Register64 value, const T& mem, Register64 temp, Register64 output) {
+        atomicFetchOp64(AtomicFetchXorOp, value, mem, temp, output);
+    }
+
+    // Registers must be distinct; value and output must be (even,odd) pairs.
+    template <typename T>
+    void atomicExchange64(const T& mem, Register64 value, Register64 output);
+
+    // Registers must be distinct; replace and output must be (even,odd) pairs.
+    template <typename T>
+    void compareExchange64(const T& mem, Register64 expect, Register64 replace, Register64 output);
+
     template<typename T>
     void compareExchangeToTypedIntArray(Scalar::Type arrayType, const T& mem, Register oldval, Register newval,
                                         Register temp, AnyRegister output);
 
     template<typename T>
     void atomicExchangeToTypedIntArray(Scalar::Type arrayType, const T& mem, Register value,
                                        Register temp, AnyRegister output);
 
--- a/js/src/jit/arm64/MacroAssembler-arm64.h
+++ b/js/src/jit/arm64/MacroAssembler-arm64.h
@@ -1901,16 +1901,23 @@ class MacroAssemblerCompat : public vixl
         MOZ_CRASH("atomicEffectOp");
     }
 
     template <typename T>
     void atomicEffectOp(int nbytes, AtomicOp op, const Imm32& value, const T& mem) {
         MOZ_CRASH("atomicEffectOp");
     }
 
+    template <typename T>
+    void atomicFetchOp64(AtomicOp op, Register64 value, const T& mem, Register64 temp,
+                         Register64 output)
+    {
+        MOZ_CRASH("AtomicFetchOp64");
+    }
+
   public:
     // T in {Address,BaseIndex}
     // S in {Imm32,Register}
 
     template <typename T>
     void compareExchange8SignExtend(const T& mem, Register oldval, Register newval, Register output)
     {
         compareExchange(1, true, mem, oldval, newval, output);
@@ -2122,16 +2129,53 @@ class MacroAssemblerCompat : public vixl
     void atomicXor16(const S& value, const T& mem) {
         atomicEffectOp(2, AtomicFetchXorOp, value, mem);
     }
     template <typename T, typename S>
     void atomicXor32(const S& value, const T& mem) {
         atomicEffectOp(4, AtomicFetchXorOp, value, mem);
     }
 
+    template <typename T>
+    void atomicFetchAdd64(Register64 value, const T& mem, Register64 temp, Register64 output) {
+        atomicFetchOp64(AtomicFetchAddOp, value, mem, temp, output);
+    }
+
+    template <typename T>
+    void atomicFetchSub64(Register64 value, const T& mem, Register64 temp, Register64 output) {
+        atomicFetchOp64(AtomicFetchSubOp, value, mem, temp, output);
+    }
+
+    template <typename T>
+    void atomicFetchAnd64(Register64 value, const T& mem, Register64 temp, Register64 output) {
+        atomicFetchOp64(AtomicFetchAndOp, value, mem, temp, output);
+    }
+
+    template <typename T>
+    void atomicFetchOr64(Register64 value, const T& mem, Register64 temp, Register64 output) {
+        atomicFetchOp64(AtomicFetchOrOp, value, mem, temp, output);
+    }
+
+    template <typename T>
+    void atomicFetchXor64(Register64 value, const T& mem, Register64 temp, Register64 output) {
+        atomicFetchOp64(AtomicFetchXorOp, value, mem, temp, output);
+    }
+
+    template <typename T>
+    void atomicExchange64(const T& mem, Register64 src, Register64 output) {
+        MOZ_CRASH("atomicExchange64");
+    }
+
+    template <typename T>
+    void compareExchange64(const T& mem, Register64 expected, Register64 replacement,
+                           Register64 output)
+    {
+        MOZ_CRASH("compareExchange64");
+    }
+
     template<typename T>
     void compareExchangeToTypedIntArray(Scalar::Type arrayType, const T& mem, Register oldval, Register newval,
                                         Register temp, AnyRegister output);
 
     template<typename T>
     void atomicExchangeToTypedIntArray(Scalar::Type arrayType, const T& mem, Register value,
                                        Register temp, AnyRegister output);
 
--- a/js/src/jit/none/MacroAssembler-none.h
+++ b/js/src/jit/none/MacroAssembler-none.h
@@ -356,16 +356,24 @@ class MacroAssemblerNone : public Assemb
     template <typename T, typename S> void atomicFetchXor8ZeroExtend(const T& value, const S& mem, Register temp, Register output) { MOZ_CRASH(); }
     template <typename T, typename S> void atomicFetchXor16SignExtend(const T& value, const S& mem, Register temp, Register output) { MOZ_CRASH(); }
     template <typename T, typename S> void atomicFetchXor16ZeroExtend(const T& value, const S& mem, Register temp, Register output) { MOZ_CRASH(); }
     template <typename T, typename S> void atomicFetchXor32(const T& value, const S& mem, Register temp, Register output) { MOZ_CRASH(); }
     template <typename T, typename S> void atomicXor8(const T& value, const S& mem) { MOZ_CRASH(); }
     template <typename T, typename S> void atomicXor16(const T& value, const S& mem) { MOZ_CRASH(); }
     template <typename T, typename S> void atomicXor32(const T& value, const S& mem) { MOZ_CRASH(); }
 
+    template <typename T> void atomicFetchAdd64(Register64 value, const T& mem, Register64 temp, Register64 output) { MOZ_CRASH(); }
+    template <typename T> void atomicFetchSub64(Register64 value, const T& mem, Register64 temp, Register64 output) { MOZ_CRASH(); }
+    template <typename T> void atomicFetchAnd64(Register64 value, const T& mem, Register64 temp, Register64 output) { MOZ_CRASH(); }
+    template <typename T> void atomicFetchOr64(Register64 value, const T& mem, Register64 temp, Register64 output) { MOZ_CRASH(); }
+    template <typename T> void atomicFetchXor64(Register64 value, const T& mem, Register64 temp, Register64 output) { MOZ_CRASH(); }
+    template <typename T> void atomicExchange64(const T& mem, Register64 src, Register64 output) { MOZ_CRASH(); }
+    template <typename T> void compareExchange64(const T& mem, Register64 expect, Register64 replace, Register64 output) { MOZ_CRASH(); }
+
     Register splitTagForTest(ValueOperand) { MOZ_CRASH(); }
 
     void boxDouble(FloatRegister, ValueOperand, FloatRegister) { MOZ_CRASH(); }
     void boxNonDouble(JSValueType, Register, ValueOperand) { MOZ_CRASH(); }
     template <typename T> void unboxInt32(T, Register) { MOZ_CRASH(); }
     template <typename T> void unboxBoolean(T, Register) { MOZ_CRASH(); }
     template <typename T> void unboxString(T, Register) { MOZ_CRASH(); }
     template <typename T> void unboxSymbol(T, Register) { MOZ_CRASH(); }
--- a/js/src/jit/x64/Assembler-x64.h
+++ b/js/src/jit/x64/Assembler-x64.h
@@ -473,20 +473,86 @@ class Assembler : public AssemblerX86Sha
           case Operand::MEM_SCALE:
             masm.cmovzq_mr(src.disp(), src.base(), src.index(), src.scale(), dest.encoding());
             break;
           default:
             MOZ_CRASH("unexpected operand kind");
         }
     }
 
+    template<typename T>
+    void lock_addq(T src, const Operand& op) {
+        masm.prefix_lock();
+        addq(src, op);
+    }
+    template<typename T>
+    void lock_subq(T src, const Operand& op) {
+        masm.prefix_lock();
+        subq(src, op);
+    }
+    template<typename T>
+    void lock_andq(T src, const Operand& op) {
+        masm.prefix_lock();
+        andq(src, op);
+    }
+    template<typename T>
+    void lock_orq(T src, const Operand& op) {
+        masm.prefix_lock();
+        orq(src, op);
+    }
+    template<typename T>
+    void lock_xorq(T src, const Operand& op) {
+        masm.prefix_lock();
+        xorq(src, op);
+    }
+
+    void lock_cmpxchgq(Register src, const Operand& mem) {
+        masm.prefix_lock();
+        switch (mem.kind()) {
+          case Operand::MEM_REG_DISP:
+            masm.cmpxchgq(src.encoding(), mem.disp(), mem.base());
+            break;
+          case Operand::MEM_SCALE:
+            masm.cmpxchgq(src.encoding(), mem.disp(), mem.base(), mem.index(), mem.scale());
+            break;
+          default:
+            MOZ_CRASH("unexpected operand kind");
+        }
+    }
+
     void xchgq(Register src, Register dest) {
         masm.xchgq_rr(src.encoding(), dest.encoding());
     }
 
+    void xchgq(Register src, const Operand& mem) {
+        switch (mem.kind()) {
+          case Operand::MEM_REG_DISP:
+            masm.xchgq_rm(src.encoding(), mem.disp(), mem.base());
+            break;
+          case Operand::MEM_SCALE:
+            masm.xchgq_rm(src.encoding(), mem.disp(), mem.base(), mem.index(), mem.scale());
+            break;
+          default:
+            MOZ_CRASH("unexpected operand kind");
+        }
+    }
+
+    void lock_xaddq(Register srcdest, const Operand& mem) {
+        switch (mem.kind()) {
+          case Operand::MEM_REG_DISP:
+            masm.lock_xaddq_rm(srcdest.encoding(), mem.disp(), mem.base());
+            break;
+          case Operand::MEM_SCALE:
+            masm.lock_xaddq_rm(srcdest.encoding(), mem.disp(), mem.base(), mem.index(), mem.scale());
+            break;
+          default:
+            MOZ_CRASH("unexpected operand kind");
+        }
+    }
+
     void movsbq(const Operand& src, Register dest) {
         switch (src.kind()) {
           case Operand::REG:
             masm.movsbq_rr(src.reg(), dest.encoding());
             break;
           case Operand::MEM_REG_DISP:
             masm.movsbq_mr(src.disp(), src.base(), dest.encoding());
             break;
@@ -564,16 +630,31 @@ class Assembler : public AssemblerX86Sha
             break;
           case Operand::MEM_ADDRESS32:
             masm.andq_mr(src.address(), dest.encoding());
             break;
           default:
             MOZ_CRASH("unexpected operand kind");
         }
     }
+    void andq(Register src, const Operand& dest) {
+        switch (dest.kind()) {
+          case Operand::REG:
+            masm.andq_rr(src.encoding(), dest.reg());
+            break;
+          case Operand::MEM_REG_DISP:
+            masm.andq_rm(src.encoding(), dest.disp(), dest.base());
+            break;
+          case Operand::MEM_SCALE:
+            masm.andq_rm(src.encoding(), dest.disp(), dest.base(), dest.index(), dest.scale());
+            break;
+          default:
+            MOZ_CRASH("unexpected operand kind");
+        }
+    }
 
     void addq(Imm32 imm, Register dest) {
         masm.addq_ir(imm.value, dest.encoding());
     }
     CodeOffset addqWithPatch(Imm32 imm, Register dest) {
         masm.addq_i32r(imm.value, dest.encoding());
         return CodeOffset(masm.currentOffset());
     }
@@ -605,16 +686,31 @@ class Assembler : public AssemblerX86Sha
             break;
           case Operand::MEM_ADDRESS32:
             masm.addq_mr(src.address(), dest.encoding());
             break;
           default:
             MOZ_CRASH("unexpected operand kind");
         }
     }
+    void addq(Register src, const Operand& dest) {
+        switch (dest.kind()) {
+          case Operand::REG:
+            masm.addq_rr(src.encoding(), dest.reg());
+            break;
+          case Operand::MEM_REG_DISP:
+            masm.addq_rm(src.encoding(), dest.disp(), dest.base());
+            break;
+          case Operand::MEM_SCALE:
+            masm.addq_rm(src.encoding(), dest.disp(), dest.base(), dest.index(), dest.scale());
+            break;
+          default:
+            MOZ_CRASH("unexpected operand kind");
+        }
+    }
 
     void subq(Imm32 imm, Register dest) {
         masm.subq_ir(imm.value, dest.encoding());
     }
     void subq(Register src, Register dest) {
         masm.subq_rr(src.encoding(), dest.encoding());
     }
     void subq(const Operand& src, Register dest) {
@@ -635,16 +731,19 @@ class Assembler : public AssemblerX86Sha
     void subq(Register src, const Operand& dest) {
         switch (dest.kind()) {
           case Operand::REG:
             masm.subq_rr(src.encoding(), dest.reg());
             break;
           case Operand::MEM_REG_DISP:
             masm.subq_rm(src.encoding(), dest.disp(), dest.base());
             break;
+          case Operand::MEM_SCALE:
+            masm.subq_rm(src.encoding(), dest.disp(), dest.base(), dest.index(), dest.scale());
+            break;
           default:
             MOZ_CRASH("unexpected operand kind");
         }
     }
     void shlq(Imm32 imm, Register dest) {
         masm.shlq_ir(imm.value, dest.encoding());
     }
     void shrq(Imm32 imm, Register dest) {
@@ -690,16 +789,31 @@ class Assembler : public AssemblerX86Sha
             break;
           case Operand::MEM_ADDRESS32:
             masm.orq_mr(src.address(), dest.encoding());
             break;
           default:
             MOZ_CRASH("unexpected operand kind");
         }
     }
+    void orq(Register src, const Operand& dest) {
+        switch (dest.kind()) {
+          case Operand::REG:
+            masm.orq_rr(src.encoding(), dest.reg());
+            break;
+          case Operand::MEM_REG_DISP:
+            masm.orq_rm(src.encoding(), dest.disp(), dest.base());
+            break;
+          case Operand::MEM_SCALE:
+            masm.orq_rm(src.encoding(), dest.disp(), dest.base(), dest.index(), dest.scale());
+            break;
+          default:
+            MOZ_CRASH("unexpected operand kind");
+        }
+    }
     void xorq(Register src, Register dest) {
         masm.xorq_rr(src.encoding(), dest.encoding());
     }
     void xorq(Imm32 imm, Register dest) {
         masm.xorq_ir(imm.value, dest.encoding());
     }
     void xorq(const Operand& src, Register dest) {
         switch (src.kind()) {
@@ -711,16 +825,31 @@ class Assembler : public AssemblerX86Sha
             break;
           case Operand::MEM_ADDRESS32:
             masm.xorq_mr(src.address(), dest.encoding());
             break;
           default:
             MOZ_CRASH("unexpected operand kind");
         }
     }
+    void xorq(Register src, const Operand& dest) {
+        switch (dest.kind()) {
+          case Operand::REG:
+            masm.xorq_rr(src.encoding(), dest.reg());
+            break;
+          case Operand::MEM_REG_DISP:
+            masm.xorq_rm(src.encoding(), dest.disp(), dest.base());
+            break;
+          case Operand::MEM_SCALE:
+            masm.xorq_rm(src.encoding(), dest.disp(), dest.base(), dest.index(), dest.scale());
+            break;
+          default:
+            MOZ_CRASH("unexpected operand kind");
+        }
+    }
 
     void bsrq(const Register& src, const Register& dest) {
         masm.bsrq_rr(src.encoding(), dest.encoding());
     }
     void bsfq(const Register& src, const Register& dest) {
         masm.bsfq_rr(src.encoding(), dest.encoding());
     }
     void popcntq(const Register& src, const Register& dest) {
@@ -796,16 +925,17 @@ class Assembler : public AssemblerX86Sha
     }
     void mov(CodeOffset* label, Register dest) {
         masm.movq_i64r(/* placeholder */ 0, dest.encoding());
         label->bind(masm.size());
     }
     void xchg(Register src, Register dest) {
         xchgq(src, dest);
     }
+
     void lea(const Operand& src, Register dest) {
         switch (src.kind()) {
           case Operand::MEM_REG_DISP:
             masm.leaq_mr(src.disp(), src.base(), dest.encoding());
             break;
           case Operand::MEM_SCALE:
             masm.leaq_mr(src.disp(), src.base(), src.index(), src.scale(), dest.encoding());
             break;
--- a/js/src/jit/x64/BaseAssembler-x64.h
+++ b/js/src/jit/x64/BaseAssembler-x64.h
@@ -33,16 +33,28 @@ class BaseAssemblerX64 : public BaseAsse
     }
 
     void addq_mr(const void* addr, RegisterID dst)
     {
         spew("addq       %p, %s", addr, GPReg64Name(dst));
         m_formatter.oneByteOp64(OP_ADD_GvEv, addr, dst);
     }
 
+    void addq_rm(RegisterID src, int32_t offset, RegisterID base)
+    {
+        spew("addq       %s, " MEM_ob, GPReg64Name(src), ADDR_ob(offset, base));
+        m_formatter.oneByteOp64(OP_ADD_EvGv, offset, base, src);
+    }
+
+    void addq_rm(RegisterID src, int32_t offset, RegisterID base, RegisterID index, int scale)
+    {
+        spew("addq       %s, " MEM_obs, GPReg64Name(src), ADDR_obs(offset, base, index, scale));
+        m_formatter.oneByteOp64(OP_ADD_EvGv, offset, base, index, scale, src);
+    }
+
     void addq_ir(int32_t imm, RegisterID dst)
     {
         spew("addq       $%d, %s", imm, GPReg64Name(dst));
         if (CAN_SIGN_EXTEND_8_32(imm)) {
             m_formatter.oneByteOp64(OP_GROUP1_EvIb, dst, GROUP1_OP_ADD);
             m_formatter.immediate8s(imm);
         } else {
             if (dst == rax)
@@ -107,40 +119,76 @@ class BaseAssemblerX64 : public BaseAsse
     }
 
     void andq_mr(const void* addr, RegisterID dst)
     {
         spew("andq       %p, %s", addr, GPReg64Name(dst));
         m_formatter.oneByteOp64(OP_AND_GvEv, addr, dst);
     }
 
+    void andq_rm(RegisterID src, int32_t offset, RegisterID base)
+    {
+        spew("andq       %s, " MEM_ob, GPReg64Name(src), ADDR_ob(offset, base));
+        m_formatter.oneByteOp64(OP_AND_EvGv, offset, base, src);
+    }
+
+    void andq_rm(RegisterID src, int32_t offset, RegisterID base, RegisterID index, int scale)
+    {
+        spew("andq       %s, " MEM_obs, GPReg64Name(src), ADDR_obs(offset, base, index, scale));
+        m_formatter.oneByteOp64(OP_AND_EvGv, offset, base, index, scale, src);
+    }
+
     void orq_mr(int32_t offset, RegisterID base, RegisterID dst)
     {
         spew("orq        " MEM_ob ", %s", ADDR_ob(offset, base), GPReg64Name(dst));
         m_formatter.oneByteOp64(OP_OR_GvEv, offset, base, dst);
     }
 
     void orq_mr(const void* addr, RegisterID dst)
     {
         spew("orq        %p, %s", addr, GPReg64Name(dst));
         m_formatter.oneByteOp64(OP_OR_GvEv, addr, dst);
     }
 
+    void orq_rm(RegisterID src, int32_t offset, RegisterID base)
+    {
+        spew("orq       %s, " MEM_ob, GPReg64Name(src), ADDR_ob(offset, base));
+        m_formatter.oneByteOp64(OP_OR_EvGv, offset, base, src);
+    }
+
+    void orq_rm(RegisterID src, int32_t offset, RegisterID base, RegisterID index, int scale)
+    {
+        spew("orq       %s, " MEM_obs, GPReg64Name(src), ADDR_obs(offset, base, index, scale));
+        m_formatter.oneByteOp64(OP_OR_EvGv, offset, base, index, scale, src);
+    }
+
     void xorq_mr(int32_t offset, RegisterID base, RegisterID dst)
     {
         spew("xorq       " MEM_ob ", %s", ADDR_ob(offset, base), GPReg64Name(dst));
         m_formatter.oneByteOp64(OP_XOR_GvEv, offset, base, dst);
     }
 
     void xorq_mr(const void* addr, RegisterID dst)
     {
         spew("xorq       %p, %s", addr, GPReg64Name(dst));
         m_formatter.oneByteOp64(OP_XOR_GvEv, addr, dst);
     }
 
+    void xorq_rm(RegisterID src, int32_t offset, RegisterID base)
+    {
+        spew("xorq       %s, " MEM_ob, GPReg64Name(src), ADDR_ob(offset, base));
+        m_formatter.oneByteOp64(OP_XOR_EvGv, offset, base, src);
+    }
+
+    void xorq_rm(RegisterID src, int32_t offset, RegisterID base, RegisterID index, int scale)
+    {
+        spew("xorq       %s, " MEM_obs, GPReg64Name(src), ADDR_obs(offset, base, index, scale));
+        m_formatter.oneByteOp64(OP_XOR_EvGv, offset, base, index, scale, src);
+    }
+
     void bsrq_rr(RegisterID src, RegisterID dst)
     {
         spew("bsrq       %s, %s", GPReg64Name(src), GPReg64Name(dst));
         m_formatter.twoByteOp64(OP2_BSR_GvEv, src, dst);
     }
 
     void bsfq_rr(RegisterID src, RegisterID dst)
     {
@@ -210,16 +258,22 @@ class BaseAssemblerX64 : public BaseAsse
     }
 
     void subq_rm(RegisterID src, int32_t offset, RegisterID base)
     {
         spew("subq       %s, " MEM_ob, GPReg64Name(src), ADDR_ob(offset, base));
         m_formatter.oneByteOp64(OP_SUB_EvGv, offset, base, src);
     }
 
+    void subq_rm(RegisterID src, int32_t offset, RegisterID base, RegisterID index, int scale)
+    {
+        spew("subq       %s, " MEM_obs, GPReg64Name(src), ADDR_obs(offset, base, index, scale));
+        m_formatter.oneByteOp64(OP_SUB_EvGv, offset, base, index, scale, src);
+    }
+
     void subq_mr(int32_t offset, RegisterID base, RegisterID dst)
     {
         spew("subq       " MEM_ob ", %s", ADDR_ob(offset, base), GPReg64Name(dst));
         m_formatter.oneByteOp64(OP_SUB_GvEv, offset, base, dst);
     }
 
     void subq_mr(const void* addr, RegisterID dst)
     {
@@ -510,16 +564,42 @@ class BaseAssemblerX64 : public BaseAsse
         m_formatter.twoByteOp64(OP2_CMOVZ_GvEv, offset, base, dst);
     }
     void cmovzq_mr(int32_t offset, RegisterID base, RegisterID index, int scale, RegisterID dst)
     {
         spew("cmovz     " MEM_obs ", %s", ADDR_obs(offset, base, index, scale), GPReg32Name(dst));
         m_formatter.twoByteOp64(OP2_CMOVZ_GvEv, offset, base, index, scale, dst);
     }
 
+    void cmpxchgq(RegisterID src, int32_t offset, RegisterID base)
+    {
+        spew("cmpxchgq   %s, " MEM_ob, GPReg64Name(src), ADDR_ob(offset, base));
+        m_formatter.twoByteOp64(OP2_CMPXCHG_GvEw, offset, base, src);
+    }
+
+    void cmpxchgq(RegisterID src, int32_t offset, RegisterID base, RegisterID index, int scale)
+    {
+        spew("cmpxchgq   %s, " MEM_obs, GPReg64Name(src), ADDR_obs(offset, base, index, scale));
+        m_formatter.twoByteOp64(OP2_CMPXCHG_GvEw, offset, base, index, scale, src);
+    }
+
+    void lock_xaddq_rm(RegisterID srcdest, int32_t offset, RegisterID base)
+    {
+        spew("lock xaddq %s, " MEM_ob, GPReg64Name(srcdest), ADDR_ob(offset, base));
+        m_formatter.oneByteOp(PRE_LOCK);
+        m_formatter.twoByteOp64(OP2_XADD_EvGv, offset, base, srcdest);
+    }
+
+    void lock_xaddq_rm(RegisterID srcdest, int32_t offset, RegisterID base, RegisterID index, int scale)
+    {
+        spew("lock xaddq %s, " MEM_obs, GPReg64Name(srcdest), ADDR_obs(offset, base, index, scale));
+        m_formatter.oneByteOp(PRE_LOCK);
+        m_formatter.twoByteOp64(OP2_XADD_EvGv, offset, base, index, scale, srcdest);
+    }
+
     void xchgq_rr(RegisterID src, RegisterID dst)
     {
         spew("xchgq      %s, %s", GPReg64Name(src), GPReg64Name(dst));
         m_formatter.oneByteOp64(OP_XCHG_GvEv, src, dst);
     }
     void xchgq_rm(RegisterID src, int32_t offset, RegisterID base)
     {
         spew("xchgq      %s, " MEM_ob, GPReg64Name(src), ADDR_ob(offset, base));
--- a/js/src/jit/x64/MacroAssembler-x64.cpp
+++ b/js/src/jit/x64/MacroAssembler-x64.cpp
@@ -785,17 +785,18 @@ MacroAssembler::wasmLoad(const wasm::Mem
     append(access, loadOffset, framePushed());
 
     memoryBarrier(access.barrierAfter());
 }
 
 void
 MacroAssembler::wasmLoadI64(const wasm::MemoryAccessDesc& access, Operand srcAddr, Register64 out)
 {
-    MOZ_ASSERT(!access.isAtomic());
+    memoryBarrier(access.barrierBefore());
+
     MOZ_ASSERT(!access.isSimd());
 
     size_t loadOffset = size();
     switch (access.type()) {
       case Scalar::Int8:
         movsbq(srcAddr, out.reg);
         break;
       case Scalar::Uint8:
@@ -824,16 +825,18 @@ MacroAssembler::wasmLoadI64(const wasm::
       case Scalar::Int16x8:
       case Scalar::Int32x4:
         MOZ_CRASH("non-int64 loads should use load()");
       case Scalar::Uint8Clamped:
       case Scalar::MaxTypedArrayViewType:
         MOZ_CRASH("unexpected array type");
     }
     append(access, loadOffset, framePushed());
+
+    memoryBarrier(access.barrierAfter());
 }
 
 void
 MacroAssembler::wasmStore(const wasm::MemoryAccessDesc& access, AnyRegister value, Operand dstAddr)
 {
     memoryBarrier(access.barrierBefore());
 
     size_t storeOffset = size();
--- a/js/src/jit/x64/MacroAssembler-x64.h
+++ b/js/src/jit/x64/MacroAssembler-x64.h
@@ -638,16 +638,98 @@ class MacroAssemblerX64 : public MacroAs
     }
     void store64(Register64 src, Address address) {
         storePtr(src.reg, address);
     }
     void store64(Imm64 imm, Address address) {
         storePtr(ImmWord(imm.value), address);
     }
 
+    template <typename T>
+    void atomicFetchAdd64(Register64 src, const T& mem, Register64 temp, Register64 output) {
+        MOZ_ASSERT(temp.reg == InvalidReg);
+        if (src != output)
+            movq(src.reg, output.reg);
+        lock_xaddq(output.reg, Operand(mem));
+    }
+    template <typename T>
+    void atomicAdd64(const T& mem, Register64 value) {
+        lock_addq(value.reg, Operand(mem));
+    }
+
+    template <typename T>
+    void atomicFetchSub64(Register64 src, const T& mem, Register64 temp, Register64 output) {
+        MOZ_ASSERT(temp.reg == InvalidReg);
+        if (src != output)
+            movq(src.reg, output.reg);
+        negq(output.reg);
+        lock_xaddq(output.reg, Operand(mem));
+    }
+    template <typename T>
+    void atomicSub64(const T& mem, Register64 value) {
+        lock_subq(value.reg, Operand(mem));
+    }
+
+    // requires output == rax
+#define ATOMIC_BITOP_BODY(OP)                     \
+        MOZ_ASSERT(output.reg == rax);            \
+        movq(Operand(mem), rax);                  \
+        Label again;                              \
+        bind(&again);                             \
+        movq(rax, temp.reg);                      \
+        OP(src.reg, temp.reg);                    \
+        lock_cmpxchgq(temp.reg, Operand(mem));    \
+        j(NonZero, &again);
+
+    template <typename S, typename T>
+    void atomicFetchAnd64(const S& src, const T& mem, Register64 temp, Register64 output) {
+        ATOMIC_BITOP_BODY(andq)
+    }
+    template <typename T>
+    void atomicAnd64(const T& mem, Register64 value) {
+        lock_andq(value.reg, Operand(mem));
+    }
+
+    template <typename S, typename T>
+    void atomicFetchOr64(const S& src, const T& mem, Register64 temp, Register64 output) {
+        ATOMIC_BITOP_BODY(orq)
+    }
+    template <typename T>
+    void atomicOr64(const T& mem, Register64 value) {
+        lock_orq(value.reg, Operand(mem));
+    }
+
+    template <typename S, typename T>
+    void atomicFetchXor64(const S& src, const T& mem, Register64 temp, Register64 output) {
+        ATOMIC_BITOP_BODY(xorq)
+    }
+    template <typename T>
+    void atomicXor64(const T& mem, Register64 value) {
+        lock_xorq(value.reg, Operand(mem));
+    }
+
+#undef ATOMIC_BITOP_BODY
+
+    template <typename T>
+    void atomicExchange64(const T& mem, Register64 src, Register64 output) {
+        if (src != output)
+            movq(src.reg, output.reg);
+        xchgq(output.reg, Operand(mem));
+    }
+
+    template <typename T>
+    void compareExchange64(const T& mem, Register64 expected, Register64 replacement,
+                           Register64 output)
+    {
+        MOZ_ASSERT(output.reg == rax);
+        if (expected != output)
+            movq(expected.reg, output.reg);
+        lock_cmpxchgq(replacement.reg, Operand(mem));
+    }
+
     void splitTag(Register src, Register dest) {
         if (src != dest)
             movq(src, dest);
         shrq(Imm32(JSVAL_TAG_SHIFT), dest);
     }
     void splitTag(const ValueOperand& operand, Register dest) {
         splitTag(operand.valueReg(), dest);
     }
--- a/js/src/jit/x86-shared/Assembler-x86-shared.h
+++ b/js/src/jit/x86-shared/Assembler-x86-shared.h
@@ -1999,16 +1999,31 @@ class AssemblerX86Shared : public Assemb
             break;
           case Operand::MEM_SCALE:
             masm.cmpxchgl(src.encoding(), mem.disp(), mem.base(), mem.index(), mem.scale());
             break;
           default:
             MOZ_CRASH("unexpected operand kind");
         }
     }
+    void lock_cmpxchg8b(Register srcHi, Register srcLo, Register newHi, Register newLo, const Operand& mem) {
+        masm.prefix_lock();
+        switch (mem.kind()) {
+          case Operand::MEM_REG_DISP:
+            masm.cmpxchg8b(srcHi.encoding(), srcLo.encoding(), newHi.encoding(), newLo.encoding(),
+                           mem.disp(), mem.base());
+            break;
+          case Operand::MEM_SCALE:
+            masm.cmpxchg8b(srcHi.encoding(), srcLo.encoding(), newHi.encoding(), newLo.encoding(),
+                           mem.disp(), mem.base(), mem.index(), mem.scale());
+            break;
+          default:
+            MOZ_CRASH("unexpected operand kind");
+        }
+    }
 
     void xchgb(Register src, const Operand& mem) {
         switch (mem.kind()) {
           case Operand::MEM_REG_DISP:
             masm.xchgb_rm(src.encoding(), mem.disp(), mem.base());
             break;
           case Operand::MEM_SCALE:
             masm.xchgb_rm(src.encoding(), mem.disp(), mem.base(), mem.index(), mem.scale());
--- a/js/src/jit/x86-shared/BaseAssembler-x86-shared.h
+++ b/js/src/jit/x86-shared/BaseAssembler-x86-shared.h
@@ -1654,16 +1654,17 @@ public:
     void prefix_lock()
     {
         spew("lock");
         m_formatter.oneByteOp(PRE_LOCK);
     }
 
     void prefix_16_for_32()
     {
+        spew("[16-bit operands next]");
         m_formatter.prefix(PRE_OPERAND_SIZE);
     }
 
     void incl_m32(int32_t offset, RegisterID base)
     {
         spew("incl       " MEM_ob, ADDR_ob(offset, base));
         m_formatter.oneByteOp(OP_GROUP5_Ev, offset, base, GROUP5_OP_INC);
     }
@@ -1707,16 +1708,33 @@ public:
         m_formatter.twoByteOp(OP2_CMPXCHG_GvEw, offset, base, src);
     }
     void cmpxchgl(RegisterID src, int32_t offset, RegisterID base, RegisterID index, int scale)
     {
         spew("cmpxchgl   %s, " MEM_obs, GPReg32Name(src), ADDR_obs(offset, base, index, scale));
         m_formatter.twoByteOp(OP2_CMPXCHG_GvEw, offset, base, index, scale, src);
     }
 
+    void cmpxchg8b(RegisterID srcHi, RegisterID srcLo, RegisterID newHi, RegisterID newLo,
+                   int32_t offset, RegisterID base)
+    {
+        MOZ_ASSERT(srcHi == edx.code() && srcLo == eax.code());
+        MOZ_ASSERT(newHi == ecx.code() && newLo == ebx.code());
+        spew("cmpxchg8b  %s, " MEM_ob, "edx:eax", ADDR_ob(offset, base));
+        m_formatter.twoByteOp(OP2_CMPXCHGNB, offset, base, 1);
+    }
+    void cmpxchg8b(RegisterID srcHi, RegisterID srcLo, RegisterID newHi, RegisterID newLo,
+                   int32_t offset, RegisterID base, RegisterID index, int scale)
+    {
+        MOZ_ASSERT(srcHi == edx.code() && srcLo == eax.code());
+        MOZ_ASSERT(newHi == ecx.code() && newLo == ebx.code());
+        spew("cmpxchg8b  %s, " MEM_obs, "edx:eax", ADDR_obs(offset, base, index, scale));
+        m_formatter.twoByteOp(OP2_CMPXCHGNB, offset, base, index, scale, 1);
+    }
+
 
     // Comparisons:
 
     void cmpl_rr(RegisterID rhs, RegisterID lhs)
     {
         spew("cmpl       %s, %s", GPReg32Name(rhs), GPReg32Name(lhs));
         m_formatter.oneByteOp(OP_CMP_GvEv, rhs, lhs);
     }
--- a/js/src/jit/x86-shared/Encoding-x86-shared.h
+++ b/js/src/jit/x86-shared/Encoding-x86-shared.h
@@ -257,16 +257,17 @@ enum TwoByteOpcodeID {
     OP2_MOVZX_GvEb      = 0xB6,
     OP2_MOVZX_GvEw      = 0xB7,
     OP2_XADD_EbGb       = 0xC0,
     OP2_XADD_EvGv       = 0xC1,
     OP2_CMPPS_VpsWps    = 0xC2,
     OP2_PINSRW          = 0xC4,
     OP2_PEXTRW_GdUdIb   = 0xC5,
     OP2_SHUFPS_VpsWpsIb = 0xC6,
+    OP2_CMPXCHGNB       = 0xC7, // CMPXCHG8B; CMPXCHG16B with REX
     OP2_PSRLW_VdqWdq    = 0xD1,
     OP2_PSRLD_VdqWdq    = 0xD2,
     OP2_PMULLW_VdqWdq   = 0xD5,
     OP2_MOVQ_WdVd       = 0xD6,
     OP2_PMOVMSKB_EdVd   = 0xD7,
     OP2_PSUBUSB_VdqWdq  = 0xD8,
     OP2_PSUBUSW_VdqWdq  = 0xD9,
     OP2_PANDDQ_VdqWdq   = 0xDB,
--- a/js/src/jit/x86-shared/MacroAssembler-x86-shared.h
+++ b/js/src/jit/x86-shared/MacroAssembler-x86-shared.h
@@ -182,16 +182,17 @@ class MacroAssemblerX86Shared : public A
     }
     void atomic_dec32(const Operand& addr) {
         lock_decl(addr);
     }
 
     template <typename T>
     void atomicFetchAdd8SignExtend(Register src, const T& mem, Register temp, Register output) {
         CHECK_BYTEREGS(src, output);
+        MOZ_ASSERT(temp == InvalidReg);
         if (src != output)
             movl(src, output);
         lock_xaddb(output, Operand(mem));
         movsbl(output, output);
     }
 
     template <typename T>
     void atomicFetchAdd8ZeroExtend(Register src, const T& mem, Register temp, Register output) {
@@ -356,37 +357,39 @@ class MacroAssemblerX86Shared : public A
     }
 
     template <typename T>
     void atomicFetchSub32(Imm32 src, const T& mem, Register temp, Register output) {
         movl(Imm32(-src.value), output);
         lock_xaddl(output, Operand(mem));
     }
 
-    // requires output == eax
+    // Requires output == eax.  Note src can be an Imm and we can't directly
+    // assert that it's different from output or tmp.
 #define ATOMIC_BITOP_BODY(LOAD, OP, LOCK_CMPXCHG) \
+        MOZ_ASSERT(output != temp);               \
         MOZ_ASSERT(output == eax);                \
         LOAD(Operand(mem), eax);                  \
         Label again;                              \
         bind(&again);                             \
         movl(eax, temp);                          \
         OP(src, temp);                            \
         LOCK_CMPXCHG(temp, Operand(mem));         \
         j(NonZero, &again);
 
     template <typename S, typename T>
     void atomicFetchAnd8SignExtend(const S& src, const T& mem, Register temp, Register output) {
+        CHECK_BYTEREG(temp);
         ATOMIC_BITOP_BODY(movb, andl, lock_cmpxchgb)
-        CHECK_BYTEREG(temp);
         movsbl(eax, eax);
     }
     template <typename S, typename T>
     void atomicFetchAnd8ZeroExtend(const S& src, const T& mem, Register temp, Register output) {
+        CHECK_BYTEREG(temp);
         ATOMIC_BITOP_BODY(movb, andl, lock_cmpxchgb)
-        CHECK_BYTEREG(temp);
         movzbl(eax, eax);
     }
     template <typename S, typename T>
     void atomicFetchAnd16SignExtend(const S& src, const T& mem, Register temp, Register output) {
         ATOMIC_BITOP_BODY(movw, andl, lock_cmpxchgw)
         movswl(eax, eax);
     }
     template <typename S, typename T>
@@ -396,24 +399,24 @@ class MacroAssemblerX86Shared : public A
     }
     template <typename S, typename T>
     void atomicFetchAnd32(const S& src, const T& mem, Register temp, Register output) {
         ATOMIC_BITOP_BODY(movl, andl, lock_cmpxchgl)
     }
 
     template <typename S, typename T>
     void atomicFetchOr8SignExtend(const S& src, const T& mem, Register temp, Register output) {
+        CHECK_BYTEREG(temp);
         ATOMIC_BITOP_BODY(movb, orl, lock_cmpxchgb)
-        CHECK_BYTEREG(temp);
         movsbl(eax, eax);
     }
     template <typename S, typename T>
     void atomicFetchOr8ZeroExtend(const S& src, const T& mem, Register temp, Register output) {
+        CHECK_BYTEREG(temp);
         ATOMIC_BITOP_BODY(movb, orl, lock_cmpxchgb)
-        CHECK_BYTEREG(temp);
         movzbl(eax, eax);
     }
     template <typename S, typename T>
     void atomicFetchOr16SignExtend(const S& src, const T& mem, Register temp, Register output) {
         ATOMIC_BITOP_BODY(movw, orl, lock_cmpxchgw)
         movswl(eax, eax);
     }
     template <typename S, typename T>
@@ -423,24 +426,24 @@ class MacroAssemblerX86Shared : public A
     }
     template <typename S, typename T>
     void atomicFetchOr32(const S& src, const T& mem, Register temp, Register output) {
         ATOMIC_BITOP_BODY(movl, orl, lock_cmpxchgl)
     }
 
     template <typename S, typename T>
     void atomicFetchXor8SignExtend(const S& src, const T& mem, Register temp, Register output) {
+        CHECK_BYTEREG(temp);
         ATOMIC_BITOP_BODY(movb, xorl, lock_cmpxchgb)
-        CHECK_BYTEREG(temp);
         movsbl(eax, eax);
     }
     template <typename S, typename T>
     void atomicFetchXor8ZeroExtend(const S& src, const T& mem, Register temp, Register output) {
+        CHECK_BYTEREG(temp);
         ATOMIC_BITOP_BODY(movb, xorl, lock_cmpxchgb)
-        CHECK_BYTEREG(temp);
         movzbl(eax, eax);
     }
     template <typename S, typename T>
     void atomicFetchXor16SignExtend(const S& src, const T& mem, Register temp, Register output) {
         ATOMIC_BITOP_BODY(movw, xorl, lock_cmpxchgw)
         movswl(eax, eax);
     }
     template <typename S, typename T>
@@ -681,23 +684,25 @@ class MacroAssemblerX86Shared : public A
         CHECK_BYTEREG(newval);
         if (oldval != output)
             movl(oldval, output);
         lock_cmpxchgb(newval, Operand(mem));
         movsbl(output, output);
     }
     template <typename T>
     void atomicExchange8ZeroExtend(const T& mem, Register value, Register output) {
+        CHECK_BYTEREG(output);
         if (value != output)
             movl(value, output);
         xchgb(output, Operand(mem));
         movzbl(output, output);
     }
     template <typename T>
     void atomicExchange8SignExtend(const T& mem, Register value, Register output) {
+        CHECK_BYTEREG(output);
         if (value != output)
             movl(value, output);
         xchgb(output, Operand(mem));
         movsbl(output, output);
     }
     void load16ZeroExtend(const Operand& src, Register dest) {
         movzwl(src, dest);
     }
--- a/js/src/jit/x86/Assembler-x86.h
+++ b/js/src/jit/x86/Assembler-x86.h
@@ -416,23 +416,47 @@ class Assembler : public AssemblerX86Sha
     }
 
     void adcl(Imm32 imm, Register dest) {
         masm.adcl_ir(imm.value, dest.encoding());
     }
     void adcl(Register src, Register dest) {
         masm.adcl_rr(src.encoding(), dest.encoding());
     }
+    void adcl(Operand src, Register dest) {
+        switch (src.kind()) {
+          case Operand::MEM_REG_DISP:
+            masm.adcl_mr(src.disp(), src.base(), dest.encoding());
+            break;
+          case Operand::MEM_SCALE:
+            masm.adcl_mr(src.disp(), src.base(), src.index(), src.scale(), dest.encoding());
+            break;
+          default:
+            MOZ_CRASH("unexpected operand kind");
+        }
+    }
 
     void sbbl(Imm32 imm, Register dest) {
         masm.sbbl_ir(imm.value, dest.encoding());
     }
     void sbbl(Register src, Register dest) {
         masm.sbbl_rr(src.encoding(), dest.encoding());
     }
+    void sbbl(Operand src, Register dest) {
+        switch (src.kind()) {
+          case Operand::MEM_REG_DISP:
+            masm.sbbl_mr(src.disp(), src.base(), dest.encoding());
+            break;
+          case Operand::MEM_SCALE:
+            masm.sbbl_mr(src.disp(), src.base(), src.index(), src.scale(), dest.encoding());
+            break;
+          default:
+            MOZ_CRASH("unexpected operand kind");
+        }
+    }
 
     void mull(Register multiplier) {
         masm.mull_r(multiplier.encoding());
     }
 
     void shldl(const Imm32 imm, Register src, Register dest) {
         masm.shldl_irr(imm.value, src.encoding(), dest.encoding());
     }
--- a/js/src/jit/x86/BaseAssembler-x86.h
+++ b/js/src/jit/x86/BaseAssembler-x86.h
@@ -45,16 +45,28 @@ class BaseAssemblerX86 : public BaseAsse
     }
 
     void adcl_rr(RegisterID src, RegisterID dst)
     {
         spew("adcl       %s, %s", GPReg32Name(src), GPReg32Name(dst));
         m_formatter.oneByteOp(OP_ADC_GvEv, src, dst);
     }
 
+    void adcl_mr(int32_t offset, RegisterID base, RegisterID dst)
+    {
+        spew("adcl       " MEM_ob ", %s", ADDR_ob(offset, base), GPReg32Name(dst));
+        m_formatter.oneByteOp(OP_ADC_GvEv, offset, base, dst);
+    }
+
+    void adcl_mr(int32_t offset, RegisterID base, RegisterID index, int scale, RegisterID dst)
+    {
+        spew("adcl       " MEM_obs ", %s", ADDR_obs(offset, base, index, scale), GPReg32Name(dst));
+        m_formatter.oneByteOp(OP_ADC_GvEv, offset, base, index, scale, dst);
+    }
+
     void sbbl_ir(int32_t imm, RegisterID dst)
     {
         spew("sbbl       $%d, %s", imm, GPReg32Name(dst));
         if (CAN_SIGN_EXTEND_8_32(imm)) {
             m_formatter.oneByteOp(OP_GROUP1_EvIb, dst, GROUP1_OP_SBB);
             m_formatter.immediate8s(imm);
         } else {
             m_formatter.oneByteOp(OP_GROUP1_EvIz, dst, GROUP1_OP_SBB);
@@ -63,16 +75,28 @@ class BaseAssemblerX86 : public BaseAsse
     }
 
     void sbbl_rr(RegisterID src, RegisterID dst)
     {
         spew("sbbl       %s, %s", GPReg32Name(src), GPReg32Name(dst));
         m_formatter.oneByteOp(OP_SBB_GvEv, src, dst);
     }
 
+    void sbbl_mr(int32_t offset, RegisterID base, RegisterID dst)
+    {
+        spew("sbbl       " MEM_ob ", %s", ADDR_ob(offset, base), GPReg32Name(dst));
+        m_formatter.oneByteOp(OP_SBB_GvEv, offset, base, dst);
+    }
+
+    void sbbl_mr(int32_t offset, RegisterID base, RegisterID index, int scale, RegisterID dst)
+    {
+        spew("sbbl       " MEM_obs ", %s", ADDR_obs(offset, base, index, scale), GPReg32Name(dst));
+        m_formatter.oneByteOp(OP_SBB_GvEv, offset, base, index, scale, dst);
+    }
+
     using BaseAssembler::andl_im;
     void andl_im(int32_t imm, const void* addr)
     {
         spew("andl       $0x%x, %p", imm, addr);
         if (CAN_SIGN_EXTEND_8_32(imm)) {
             m_formatter.oneByteOp(OP_GROUP1_EvIb, addr, GROUP1_OP_AND);
             m_formatter.immediate8s(imm);
         } else {
--- a/js/src/jit/x86/MacroAssembler-x86.cpp
+++ b/js/src/jit/x86/MacroAssembler-x86.cpp
@@ -717,20 +717,23 @@ MacroAssembler::wasmLoad(const wasm::Mem
     append(access, loadOffset, framePushed());
 
     memoryBarrier(access.barrierAfter());
 }
 
 void
 MacroAssembler::wasmLoadI64(const wasm::MemoryAccessDesc& access, Operand srcAddr, Register64 out)
 {
-    MOZ_ASSERT(!access.isAtomic());
+    // Atomic i64 load must use lock_cmpxchg8b.
+    MOZ_ASSERT_IF(access.isAtomic(), access.byteSize() <= 4);
     MOZ_ASSERT(!access.isSimd());
     MOZ_ASSERT(srcAddr.kind() == Operand::MEM_REG_DISP || srcAddr.kind() == Operand::MEM_SCALE);
 
+    memoryBarrier(access.barrierBefore());
+
     size_t loadOffset = size();
     switch (access.type()) {
       case Scalar::Int8:
         MOZ_ASSERT(out == Register64(edx, eax));
         movsbl(srcAddr, out.low);
         append(access, loadOffset, framePushed());
 
         cdq();
@@ -790,16 +793,18 @@ MacroAssembler::wasmLoadI64(const wasm::
       case Scalar::Int8x16:
       case Scalar::Int16x8:
       case Scalar::Int32x4:
         MOZ_CRASH("non-int64 loads should use load()");
       case Scalar::Uint8Clamped:
       case Scalar::MaxTypedArrayViewType:
         MOZ_CRASH("unexpected array type");
     }
+
+    memoryBarrier(access.barrierAfter());
 }
 
 void
 MacroAssembler::wasmStore(const wasm::MemoryAccessDesc& access, AnyRegister value, Operand dstAddr)
 {
     MOZ_ASSERT(dstAddr.kind() == Operand::MEM_REG_DISP || dstAddr.kind() == Operand::MEM_SCALE);
 
     memoryBarrier(access.barrierBefore());
@@ -861,29 +866,119 @@ MacroAssembler::wasmStore(const wasm::Me
     append(access, storeOffset, framePushed());
 
     memoryBarrier(access.barrierAfter());
 }
 
 void
 MacroAssembler::wasmStoreI64(const wasm::MemoryAccessDesc& access, Register64 value, Operand dstAddr)
 {
+    // Atomic i64 store must use lock_cmpxchg8b.
     MOZ_ASSERT(!access.isAtomic());
     MOZ_ASSERT(!access.isSimd());
     MOZ_ASSERT(dstAddr.kind() == Operand::MEM_REG_DISP || dstAddr.kind() == Operand::MEM_SCALE);
 
     size_t storeOffset = size();
     movl(value.low, LowWord(dstAddr));
     append(access, storeOffset, framePushed());
 
     storeOffset = size();
     movl(value.high, HighWord(dstAddr));
     append(access, storeOffset, framePushed());
 }
 
+// We don't have enough registers for all the operands on x86, so the rhs
+// operand is in memory.
+
+#define ATOMIC_OP_BODY(OPERATE)                   \
+    MOZ_ASSERT(output.low == eax);                \
+    MOZ_ASSERT(output.high == edx);               \
+    MOZ_ASSERT(temp.low == ebx);                  \
+    MOZ_ASSERT(temp.high == ecx);                 \
+    load64(address, output);                      \
+    Label again;                                  \
+    bind(&again);                                 \
+    asMasm().move64(output, temp);                \
+    OPERATE(value, temp);                         \
+    lock_cmpxchg8b(edx, eax, ecx, ebx, Operand(address));     \
+    j(NonZero, &again);
+
+template <typename T, typename U>
+void
+MacroAssemblerX86::atomicFetchAdd64(const T& value, const U& address, Register64 temp,
+                                    Register64 output)
+{
+    ATOMIC_OP_BODY(add64)
+}
+
+template <typename T, typename U>
+void
+MacroAssemblerX86::atomicFetchSub64(const T& value, const U& address, Register64 temp,
+                                    Register64 output)
+{
+    ATOMIC_OP_BODY(sub64)
+}
+
+template <typename T, typename U>
+void
+MacroAssemblerX86::atomicFetchAnd64(const T& value, const U& address, Register64 temp,
+                                    Register64 output)
+{
+    ATOMIC_OP_BODY(and64)
+}
+
+template <typename T, typename U>
+void
+MacroAssemblerX86::atomicFetchOr64(const T& value, const U& address, Register64 temp,
+                                   Register64 output)
+{
+    ATOMIC_OP_BODY(or64)
+}
+
+template <typename T, typename U>
+void
+MacroAssemblerX86::atomicFetchXor64(const T& value, const U& address, Register64 temp,
+                                    Register64 output)
+{
+    ATOMIC_OP_BODY(xor64)
+}
+
+#undef ATOMIC_OP_BODY
+
+template void
+js::jit::MacroAssemblerX86::atomicFetchAdd64(const Address& value, const Address& address,
+                                             Register64 temp, Register64 output);
+template void
+js::jit::MacroAssemblerX86::atomicFetchAdd64(const Address& value, const BaseIndex& address,
+                                             Register64 temp, Register64 output);
+template void
+js::jit::MacroAssemblerX86::atomicFetchSub64(const Address& value, const Address& address,
+                                             Register64 temp, Register64 output);
+template void
+js::jit::MacroAssemblerX86::atomicFetchSub64(const Address& value, const BaseIndex& address,
+                                             Register64 temp, Register64 output);
+template void
+js::jit::MacroAssemblerX86::atomicFetchAnd64(const Address& value, const Address& address,
+                                             Register64 temp, Register64 output);
+template void
+js::jit::MacroAssemblerX86::atomicFetchAnd64(const Address& value, const BaseIndex& address,
+                                             Register64 temp, Register64 output);
+template void
+js::jit::MacroAssemblerX86::atomicFetchOr64(const Address& value, const Address& address,
+                                            Register64 temp, Register64 output);
+template void
+js::jit::MacroAssemblerX86::atomicFetchOr64(const Address& value, const BaseIndex& address,
+                                            Register64 temp, Register64 output);
+template void
+js::jit::MacroAssemblerX86::atomicFetchXor64(const Address& value, const Address& address,
+                                             Register64 temp, Register64 output);
+template void
+js::jit::MacroAssemblerX86::atomicFetchXor64(const Address& value, const BaseIndex& address,
+                                             Register64 temp, Register64 output);
+
 void
 MacroAssembler::wasmTruncateDoubleToUInt32(FloatRegister input, Register output, Label* oolEntry)
 {
     Label done;
     vcvttsd2si(input, output);
     branch32(Assembler::Condition::NotSigned, output, Imm32(0), &done);
 
     loadConstantDouble(double(int32_t(0x80000000)), ScratchDoubleReg);
--- a/js/src/jit/x86/MacroAssembler-x86.h
+++ b/js/src/jit/x86/MacroAssembler-x86.h
@@ -88,16 +88,59 @@ class MacroAssemblerX86 : public MacroAs
           default:
             MOZ_CRASH("unexpected operand kind");
         }
     }
     Address ToType(Address base) {
         return ToType(Operand(base)).toAddress();
     }
 
+    template <typename T>
+    void add64(const T& address, Register64 dest) {
+        addl(Operand(LowWord(address)), dest.low);
+        adcl(Operand(HighWord(address)), dest.high);
+    }
+    template <typename T>
+    void sub64(const T& address, Register64 dest) {
+        subl(Operand(LowWord(address)), dest.low);
+        sbbl(Operand(HighWord(address)), dest.high);
+    }
+    template <typename T>
+    void and64(const T& address, Register64 dest) {
+        andl(Operand(LowWord(address)), dest.low);
+        andl(Operand(HighWord(address)), dest.high);
+    }
+    template <typename T>
+    void or64(const T& address, Register64 dest) {
+        orl(Operand(LowWord(address)), dest.low);
+        orl(Operand(HighWord(address)), dest.high);
+    }
+    template <typename T>
+    void xor64(const T& address, Register64 dest) {
+        xorl(Operand(LowWord(address)), dest.low);
+        xorl(Operand(HighWord(address)), dest.high);
+    }
+
+    // Here, `value` is an address to an Int64 because we don't have enough
+    // registers for all the operands.  It is allowed to be SP-relative.
+    template <typename T, typename U>
+    void atomicFetchAdd64(const T& value, const U& address, Register64 temp, Register64 output);
+
+    template <typename T, typename U>
+    void atomicFetchSub64(const T& value, const U& address, Register64 temp, Register64 output);
+
+    template <typename T, typename U>
+    void atomicFetchAnd64(const T& value, const U& address, Register64 temp, Register64 output);
+
+    template <typename T, typename U>
+    void atomicFetchOr64(const T& value, const U& address, Register64 temp, Register64 output);
+
+    template <typename T, typename U>
+    void atomicFetchXor64(const T& value, const U& address, Register64 temp, Register64 output);
+
     /////////////////////////////////////////////////////////////////
     // X86/X64-common interface.
     /////////////////////////////////////////////////////////////////
     void storeValue(ValueOperand val, Operand dest) {
         movl(val.payloadReg(), ToPayload(dest));
         movl(val.typeReg(), ToType(dest));
     }
     void storeValue(ValueOperand val, const Address& dest) {
@@ -627,16 +670,60 @@ class MacroAssemblerX86 : public MacroAs
         movl(src.low, Operand(LowWord(address)));
         movl(src.high, Operand(HighWord(address)));
     }
     void store64(Imm64 imm, Address address) {
         movl(imm.low(), Operand(LowWord(address)));
         movl(imm.hi(), Operand(HighWord(address)));
     }
 
+    template <typename T>
+    void atomicLoad64(const T& address, Register64 temp, Register64 output) {
+        MOZ_ASSERT(temp.low == ebx);
+        MOZ_ASSERT(temp.high == ecx);
+        MOZ_ASSERT(output.high == edx);
+        MOZ_ASSERT(output.low == eax);
+
+        // In the event edx:eax matches what's in memory, ecx:ebx will be
+        // stored.  The two pairs must therefore have the same values.
+        movl(edx, ecx);
+        movl(eax, ebx);
+
+        lock_cmpxchg8b(edx, eax, ecx, ebx, Operand(address));
+    }
+
+    template <typename T>
+    void atomicExchange64(const T& address, Register64 value, Register64 output) {
+        MOZ_ASSERT(value.low == ebx);
+        MOZ_ASSERT(value.high == ecx);
+        MOZ_ASSERT(output.high == edx);
+        MOZ_ASSERT(output.low == eax);
+
+        // edx:eax has garbage initially, and that is the best we can do unless
+        // we can guess with high probability what's in memory.
+
+        Label again;
+        bind(&again);
+        lock_cmpxchg8b(edx, eax, ecx, ebx, Operand(address));
+        j(Assembler::Condition::NonZero, &again);
+    }
+
+    template <typename T>
+    void compareExchange64(const T& address, Register64 expected, Register64 replacement,
+                           Register64 output)
+    {
+        MOZ_ASSERT(expected == output);
+        MOZ_ASSERT(expected.high == edx);
+        MOZ_ASSERT(expected.low == eax);
+        MOZ_ASSERT(replacement.high == ecx);
+        MOZ_ASSERT(replacement.low == ebx);
+
+        lock_cmpxchg8b(edx, eax, ecx, ebx, Operand(address));
+    }
+
     void setStackArg(Register reg, uint32_t arg) {
         movl(reg, Operand(esp, arg * sizeof(intptr_t)));
     }
 
     void boxDouble(FloatRegister src, const ValueOperand& dest, FloatRegister temp) {
         if (Assembler::HasSSE41()) {
             vmovd(src, dest.payloadReg());
             vpextrd(1, src, dest.typeReg());