Bug 979594 - JIT implementations of the important Atomics. r=sstangl
authorLars T Hansen <lhansen@mozilla.com>
Thu, 23 Oct 2014 14:23:27 +0200
changeset 211924 ab936277cf4ba207714f13d14ddbbfa9996c86fd
parent 211923 ad0fdfc44d48383ae9345ecac4c031f6008cc728
child 211925 983259897284c61f208733ac520ac3f9ba646f09
push id11598
push userryanvm@gmail.com
push dateThu, 23 Oct 2014 18:10:30 +0000
treeherderb2g-inbound@ab90d3ab521e [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerssstangl
bugs979594
milestone36.0a1
Bug 979594 - JIT implementations of the important Atomics. r=sstangl
js/src/jit-test/tests/atomics/basic-tests.js
js/src/jit-test/tests/atomics/inline-add.js
js/src/jit-test/tests/atomics/inline-add2.js
js/src/jit-test/tests/atomics/inline-cmpxchg.js
js/src/jit-test/tests/atomics/inline-fence.js
js/src/jit/AtomicOp.h
js/src/jit/CodeGenerator.cpp
js/src/jit/CodeGenerator.h
js/src/jit/IonBuilder.h
js/src/jit/IonMacroAssembler.cpp
js/src/jit/IonMacroAssembler.h
js/src/jit/LIR-Common.h
js/src/jit/LIR.h
js/src/jit/LOpcodes.h
js/src/jit/LinearScan.cpp
js/src/jit/Lowering.cpp
js/src/jit/Lowering.h
js/src/jit/MCallOptimize.cpp
js/src/jit/MIR.h
js/src/jit/MOpcodes.h
js/src/jit/ParallelSafetyAnalysis.cpp
js/src/jit/TypePolicy.cpp
js/src/jit/none/Lowering-none.h
js/src/jit/none/MacroAssembler-none.h
js/src/jit/shared/Assembler-x86-shared.h
js/src/jit/shared/BaseAssembler-x86-shared.h
js/src/jit/shared/CodeGenerator-x86-shared.cpp
js/src/jit/shared/CodeGenerator-x86-shared.h
js/src/jit/shared/Lowering-x86-shared.cpp
js/src/jit/shared/Lowering-x86-shared.h
js/src/jit/shared/MacroAssembler-x86-shared.h
--- a/js/src/jit-test/tests/atomics/basic-tests.js
+++ b/js/src/jit-test/tests/atomics/basic-tests.js
@@ -307,16 +307,30 @@ function testInt16Extremes(a) {
 
     Atomics.and(a, 10, 65536);	// Preserve none
     assertEq(a[10], 0);
     assertEq(Atomics.load(a, 10), 0);
 
     assertEq(a[11], 0);
 }
 
+function testUint32(a) {
+    var k = 0;
+    for ( var i=0 ; i < 20 ; i++ ) {
+	a[i] = i+5;
+	k += a[i];
+    }
+
+    var sum = 0;
+    for ( var i=0 ; i < 20 ; i++ )
+	sum += Atomics.add(a, i, 1);
+
+    assertEq(sum, k);
+}
+
 function isLittleEndian() {
     var xxx = new ArrayBuffer(2);
     var xxa = new Int16Array(xxx);
     var xxb = new Int8Array(xxx);
     xxa[0] = 37;
     var is_little = xxb[0] == 37;
     return is_little;
 }
@@ -390,12 +404,13 @@ function runTests() {
     // Test out-of-range references
     testRangeCAS(v8);
     testRangeCAS(v32);
 
     // Test extreme values
     testInt8Extremes(new SharedInt8Array(sab));
     testUint8Extremes(new SharedUint8Array(sab));
     testInt16Extremes(new SharedInt16Array(sab));
+    testUint32(new SharedUint32Array(sab));
 }
 
 if (this.Atomics && this.SharedArrayBuffer && this.SharedInt32Array)
     runTests();
new file mode 100644
--- /dev/null
+++ b/js/src/jit-test/tests/atomics/inline-add.js
@@ -0,0 +1,31 @@
+// |jit-test| slow;
+//
+// This is intended to be run manually with IONFLAGS=logs and
+// postprocessing by iongraph to verify manually (by inspecting the
+// MIR) that:
+//
+//  - the add operation is inlined as it should be
+//  - loads and stores are not moved across the add
+//
+// Be sure to run with --ion-eager --ion-offthread-compile=off.
+
+function add(ta) {
+    var x = ta[0];
+    Atomics.add(ta, 86, 6);
+    var y = ta[1];
+    var z = y + 1;
+    var w = x + z;
+    return w;
+}
+
+if (!this.SharedArrayBuffer || !this.Atomics || !this.SharedInt32Array)
+    quit(0);
+
+var sab = new SharedArrayBuffer(4096);
+var ia = new SharedInt32Array(sab);
+for ( var i=0, limit=ia.length ; i < limit ; i++ )
+    ia[i] = 37;
+var v = 0;
+for ( var i=0 ; i < 1000 ; i++ )
+    v += add(ia);
+//print(v);
new file mode 100644
--- /dev/null
+++ b/js/src/jit-test/tests/atomics/inline-add2.js
@@ -0,0 +1,31 @@
+// |jit-test| slow;
+//
+// Like inline-add, but with SharedUint32Array, which is a special
+// case because the value is representable only as a Number.
+// All this tests is that the Uint32 path is being triggered.
+//
+// This is intended to be run manually with IONFLAGS=logs and
+// postprocessing by iongraph to verify manually (by inspecting the
+// MIR) that:
+//
+//  - the add operation is inlined as it should be, with
+//    a return type 'Double'
+//  - loads and stores are not moved across the add
+//
+// Be sure to run with --ion-eager --ion-offthread-compile=off.
+
+function add(ta) {
+    return Atomics.add(ta, 86, 6);
+}
+
+if (!this.SharedArrayBuffer || !this.Atomics || !this.SharedUint32Array)
+    quit(0);
+
+var sab = new SharedArrayBuffer(4096);
+var ia = new SharedUint32Array(sab);
+for ( var i=0, limit=ia.length ; i < limit ; i++ )
+    ia[i] = 0xdeadbeef;		// Important: Not an int32-capable value
+var v = 0;
+for ( var i=0 ; i < 1000 ; i++ )
+    v += add(ia);
+//print(v);
new file mode 100644
--- /dev/null
+++ b/js/src/jit-test/tests/atomics/inline-cmpxchg.js
@@ -0,0 +1,31 @@
+// |jit-test| slow;
+//
+// This is intended to be run manually with IONFLAGS=logs and
+// postprocessing by iongraph to verify manually (by inspecting the
+// MIR) that:
+//
+//  - the cmpxchg operation is inlined as it should be
+//  - loads and stores are not moved across the cmpxchg
+//
+// Be sure to run with --ion-eager --ion-offthread-compile=off.
+
+function cmpxchg(ta) {
+    var x = ta[0];
+    Atomics.compareExchange(ta, 86, 37, 42);
+    var y = ta[1];
+    var z = y + 1;
+    var w = x + z;
+    return w;
+}
+
+if (!this.SharedArrayBuffer || !this.Atomics || !this.SharedInt32Array)
+    quit(0);
+
+var sab = new SharedArrayBuffer(4096);
+var ia = new SharedInt32Array(sab);
+for ( var i=0, limit=ia.length ; i < limit ; i++ )
+    ia[i] = 37;
+var v = 0;
+for ( var i=0 ; i < 1000 ; i++ )
+    v += cmpxchg(ia);
+//print(v);
new file mode 100644
--- /dev/null
+++ b/js/src/jit-test/tests/atomics/inline-fence.js
@@ -0,0 +1,31 @@
+// |jit-test| slow;
+//
+// This is intended to be run manually with IONFLAGS=logs and
+// postprocessing by iongraph to verify manually (by inspecting the
+// MIR) that:
+//
+//  - the fence operation is inlined as it should be
+//  - loads and stores are not moved across the fence
+//
+// Be sure to run with --ion-eager --ion-offthread-compile=off.
+
+function fence(ta) {
+    var x = ta[0];
+    Atomics.fence();
+    var y = ta[1];
+    var z = y + 1;
+    var w = x + z;
+    return w;
+}
+
+if (!this.SharedArrayBuffer || !this.Atomics || !this.SharedInt32Array)
+    quit(0);
+
+var sab = new SharedArrayBuffer(4096);
+var ia = new SharedInt32Array(sab);
+for ( var i=0, limit=ia.length ; i < limit ; i++ )
+    ia[i] = 37;
+var v = 0;
+for ( var i=0 ; i < 1000 ; i++ )
+    v += fence(ia);
+//print(v);
new file mode 100644
--- /dev/null
+++ b/js/src/jit/AtomicOp.h
@@ -0,0 +1,54 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
+ * vim: set ts=8 sts=4 et sw=4 tw=99:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef jit_AtomicOp_h
+#define jit_AtomicOp_h
+
+namespace js {
+namespace jit {
+
+// Types of atomic operation, shared by MIR and LIR.
+
+enum AtomicOp {
+    AtomicFetchAddOp,
+    AtomicFetchSubOp,
+    AtomicFetchAndOp,
+    AtomicFetchOrOp,
+    AtomicFetchXorOp
+};
+
+// Memory barrier types, shared by MIR and LIR.
+//
+// MembarSynchronizing is here because some platforms can make the
+// distinction (DSB vs DMB on ARM, SYNC vs parameterized SYNC on MIPS)
+// but there's been no reason to use it yet.
+
+enum MemoryBarrierBits {
+    MembarLoadLoad = 1,
+    MembarLoadStore = 2,
+    MembarStoreStore = 4,
+    MembarStoreLoad = 8,
+
+    MembarSynchronizing = 16,
+
+    // For validity testing
+    MembarAllbits = 31,
+};
+
+// Standard barrier bits for a full barrier.
+static const int MembarFull = MembarLoadLoad|MembarLoadStore|MembarStoreLoad|MembarStoreStore;
+
+// Standard sets of barrier bits for atomic loads and stores.
+// See http://gee.cs.oswego.edu/dl/jmm/cookbook.html for more.
+static const int MembarBeforeLoad = 0;
+static const int MembarAfterLoad = MembarLoadLoad|MembarLoadStore;
+static const int MembarBeforeStore = MembarStoreStore;
+static const int MembarAfterStore = MembarStoreLoad;
+
+} // namespace jit
+} // namespace js
+
+#endif /* jit_AtomicOp_h */
--- a/js/src/jit/CodeGenerator.cpp
+++ b/js/src/jit/CodeGenerator.cpp
@@ -8959,16 +8959,78 @@ CodeGenerator::visitStoreTypedArrayEleme
     }
     if (guardLength)
         masm.bind(&skip);
 
     return true;
 }
 
 bool
+CodeGenerator::visitCompareExchangeTypedArrayElement(LCompareExchangeTypedArrayElement *lir)
+{
+    Register elements = ToRegister(lir->elements());
+    AnyRegister output = ToAnyRegister(lir->output());
+    Register temp = lir->temp()->isBogusTemp() ? InvalidReg : ToRegister(lir->temp());
+
+    MOZ_ASSERT(lir->oldval()->isRegister());
+    MOZ_ASSERT(lir->newval()->isRegister());
+
+    Register oldval = ToRegister(lir->oldval());
+    Register newval = ToRegister(lir->newval());
+
+    Scalar::Type arrayType = lir->mir()->arrayType();
+    int width = Scalar::byteSize(arrayType);
+
+    if (lir->index()->isConstant()) {
+        Address dest(elements, ToInt32(lir->index()) * width);
+        masm.compareExchangeToTypedIntArray(arrayType, dest, oldval, newval, temp, output);
+    } else {
+        BaseIndex dest(elements, ToRegister(lir->index()), ScaleFromElemWidth(width));
+        masm.compareExchangeToTypedIntArray(arrayType, dest, oldval, newval, temp, output);
+    }
+
+    return true;
+}
+
+template <typename T>
+static inline void
+AtomicBinopToTypedArray(MacroAssembler &masm, AtomicOp op,
+                        Scalar::Type arrayType, const LAllocation *value, const T &mem,
+                        Register temp1, Register temp2, AnyRegister output)
+{
+    if (value->isConstant())
+        masm.atomicBinopToTypedIntArray(op, arrayType, Imm32(ToInt32(value)), mem, temp1, temp2, output);
+    else
+        masm.atomicBinopToTypedIntArray(op, arrayType, ToRegister(value), mem, temp1, temp2, output);
+}
+
+bool
+CodeGenerator::visitAtomicTypedArrayElementBinop(LAtomicTypedArrayElementBinop *lir)
+{
+    AnyRegister output = ToAnyRegister(lir->output());
+    Register elements = ToRegister(lir->elements());
+    Register temp1 = lir->temp1()->isBogusTemp() ? InvalidReg : ToRegister(lir->temp1());
+    Register temp2 = lir->temp2()->isBogusTemp() ? InvalidReg : ToRegister(lir->temp2());
+    const LAllocation* value = lir->value();
+
+    Scalar::Type arrayType = lir->mir()->arrayType();
+    int width = Scalar::byteSize(arrayType);
+
+    if (lir->index()->isConstant()) {
+        Address mem(elements, ToInt32(lir->index()) * width);
+        AtomicBinopToTypedArray(masm, lir->mir()->operation(), arrayType, value, mem, temp1, temp2, output);
+    } else {
+        BaseIndex mem(elements, ToRegister(lir->index()), ScaleFromElemWidth(width));
+        AtomicBinopToTypedArray(masm, lir->mir()->operation(), arrayType, value, mem, temp1, temp2, output);
+    }
+
+    return true;
+}
+
+bool
 CodeGenerator::visitClampIToUint8(LClampIToUint8 *lir)
 {
     Register output = ToRegister(lir->output());
     MOZ_ASSERT(output == ToRegister(lir->input()));
     masm.clampIntToUint8(output);
     return true;
 }
 
--- a/js/src/jit/CodeGenerator.h
+++ b/js/src/jit/CodeGenerator.h
@@ -260,16 +260,18 @@ class CodeGenerator : public CodeGenerat
     bool visitArrayPushV(LArrayPushV *lir);
     bool visitArrayPushT(LArrayPushT *lir);
     bool visitArrayConcat(LArrayConcat *lir);
     bool visitArrayJoin(LArrayJoin *lir);
     bool visitLoadTypedArrayElement(LLoadTypedArrayElement *lir);
     bool visitLoadTypedArrayElementHole(LLoadTypedArrayElementHole *lir);
     bool visitStoreTypedArrayElement(LStoreTypedArrayElement *lir);
     bool visitStoreTypedArrayElementHole(LStoreTypedArrayElementHole *lir);
+    bool visitCompareExchangeTypedArrayElement(LCompareExchangeTypedArrayElement *lir);
+    bool visitAtomicTypedArrayElementBinop(LAtomicTypedArrayElementBinop *lir);
     bool visitClampIToUint8(LClampIToUint8 *lir);
     bool visitClampDToUint8(LClampDToUint8 *lir);
     bool visitClampVToUint8(LClampVToUint8 *lir);
     bool visitCallIteratorStart(LCallIteratorStart *lir);
     bool visitIteratorStart(LIteratorStart *lir);
     bool visitIteratorMore(LIteratorMore *lir);
     bool visitIsNoIterAndBranch(LIsNoIterAndBranch *lir);
     bool visitIteratorEnd(LIteratorEnd *lir);
--- a/js/src/jit/IonBuilder.h
+++ b/js/src/jit/IonBuilder.h
@@ -722,16 +722,23 @@ class IonBuilder
     InliningStatus inlineStrFromCharCode(CallInfo &callInfo);
     InliningStatus inlineStrCharAt(CallInfo &callInfo);
     InliningStatus inlineStrReplace(CallInfo &callInfo);
 
     // RegExp natives.
     InliningStatus inlineRegExpExec(CallInfo &callInfo);
     InliningStatus inlineRegExpTest(CallInfo &callInfo);
 
+    // Atomics natives.
+    InliningStatus inlineAtomicsCompareExchange(CallInfo &callInfo);
+    InliningStatus inlineAtomicsLoad(CallInfo &callInfo);
+    InliningStatus inlineAtomicsStore(CallInfo &callInfo);
+    InliningStatus inlineAtomicsFence(CallInfo &callInfo);
+    InliningStatus inlineAtomicsBinop(CallInfo &callInfo, JSFunction *target);
+
     // Array intrinsics.
     InliningStatus inlineUnsafePutElements(CallInfo &callInfo);
     bool inlineUnsafeSetDenseArrayElement(CallInfo &callInfo, uint32_t base);
     bool inlineUnsafeSetTypedArrayElement(CallInfo &callInfo, uint32_t base,
                                           ScalarTypeDescr::Type arrayType);
     bool inlineUnsafeSetTypedObjectArrayElement(CallInfo &callInfo, uint32_t base,
                                                 ScalarTypeDescr::Type arrayType);
     InliningStatus inlineNewDenseArray(CallInfo &callInfo);
@@ -786,16 +793,19 @@ class IonBuilder
 
     // Inlining helpers.
     bool inlineGenericFallback(JSFunction *target, CallInfo &callInfo, MBasicBlock *dispatchBlock,
                                bool clonedAtCallsite);
     bool inlineTypeObjectFallback(CallInfo &callInfo, MBasicBlock *dispatchBlock,
                                   MTypeObjectDispatch *dispatch, MGetPropertyCache *cache,
                                   MBasicBlock **fallbackTarget);
 
+    bool atomicsMeetsPreconditions(CallInfo &callInfo, Scalar::Type *arrayElementType);
+    void atomicsCheckBounds(CallInfo &callInfo, MInstruction **elements, MDefinition **index);
+
     bool testNeedsArgumentCheck(JSFunction *target, CallInfo &callInfo);
 
     MDefinition *makeCallsiteClone(JSFunction *target, MDefinition *fun);
     MCall *makeCallHelper(JSFunction *target, CallInfo &callInfo, bool cloneAtCallsite);
     bool makeCall(JSFunction *target, CallInfo &callInfo, bool cloneAtCallsite);
 
     MDefinition *patchInlinedReturn(CallInfo &callInfo, MBasicBlock *exit, MBasicBlock *bottom);
     MDefinition *patchInlinedReturns(CallInfo &callInfo, MIRGraphReturns &returns,
--- a/js/src/jit/IonMacroAssembler.cpp
+++ b/js/src/jit/IonMacroAssembler.cpp
@@ -6,16 +6,17 @@
 
 #include "jit/IonMacroAssembler.h"
 
 #include "jsinfer.h"
 #include "jsprf.h"
 
 #include "builtin/TypedObject.h"
 #include "gc/GCTrace.h"
+#include "jit/AtomicOp.h"
 #include "jit/Bailouts.h"
 #include "jit/BaselineFrame.h"
 #include "jit/BaselineIC.h"
 #include "jit/BaselineJIT.h"
 #include "jit/Lowering.h"
 #include "jit/MIR.h"
 #include "jit/ParallelFunctions.h"
 #include "vm/ForkJoin.h"
@@ -392,16 +393,221 @@ MacroAssembler::loadFromTypedArray(Scala
     }
 }
 
 template void MacroAssembler::loadFromTypedArray(Scalar::Type arrayType, const Address &src, const ValueOperand &dest,
                                                  bool allowDouble, Register temp, Label *fail);
 template void MacroAssembler::loadFromTypedArray(Scalar::Type arrayType, const BaseIndex &src, const ValueOperand &dest,
                                                  bool allowDouble, Register temp, Label *fail);
 
+template<typename T>
+void
+MacroAssembler::compareExchangeToTypedIntArray(Scalar::Type arrayType, const T &mem,
+                                               Register oldval, Register newval,
+                                               Register temp, AnyRegister output)
+{
+    switch (arrayType) {
+      case Scalar::Int8:
+        compareExchange8SignExtend(mem, oldval, newval, output.gpr());
+        break;
+      case Scalar::Uint8:
+        compareExchange8ZeroExtend(mem, oldval, newval, output.gpr());
+        break;
+      case Scalar::Uint8Clamped:
+        compareExchange8ZeroExtend(mem, oldval, newval, output.gpr());
+        break;
+      case Scalar::Int16:
+        compareExchange16SignExtend(mem, oldval, newval, output.gpr());
+        break;
+      case Scalar::Uint16:
+        compareExchange16ZeroExtend(mem, oldval, newval, output.gpr());
+        break;
+      case Scalar::Int32:
+        compareExchange32(mem, oldval, newval, output.gpr());
+        break;
+      case Scalar::Uint32:
+        // At the moment, the code in MCallOptimize.cpp requires the output
+        // type to be double for uint32 arrays.  See bug 1077305.
+        MOZ_ASSERT(output.isFloat());
+        compareExchange32(mem, oldval, newval, temp);
+        convertUInt32ToDouble(temp, output.fpu());
+        break;
+      default:
+        MOZ_CRASH("Invalid typed array type");
+    }
+}
+
+template void
+MacroAssembler::compareExchangeToTypedIntArray(Scalar::Type arrayType, const Address &mem,
+                                               Register oldval, Register newval, Register temp,
+                                               AnyRegister output);
+template void
+MacroAssembler::compareExchangeToTypedIntArray(Scalar::Type arrayType, const BaseIndex &mem,
+                                               Register oldval, Register newval, Register temp,
+                                               AnyRegister output);
+
+template<typename S, typename T>
+void
+MacroAssembler::atomicBinopToTypedIntArray(AtomicOp op, Scalar::Type arrayType, const S &value,
+                                           const T &mem, Register temp1, Register temp2, AnyRegister output)
+{
+    // Uint8Clamped is explicitly not supported here
+    switch (arrayType) {
+      case Scalar::Int8:
+        switch (op) {
+          case AtomicFetchAddOp:
+            atomicFetchAdd8SignExtend(value, mem, temp1, output.gpr());
+            break;
+          case AtomicFetchSubOp:
+            atomicFetchSub8SignExtend(value, mem, temp1, output.gpr());
+            break;
+          case AtomicFetchAndOp:
+            atomicFetchAnd8SignExtend(value, mem, temp1, output.gpr());
+            break;
+          case AtomicFetchOrOp:
+            atomicFetchOr8SignExtend(value, mem, temp1, output.gpr());
+            break;
+          case AtomicFetchXorOp:
+            atomicFetchXor8SignExtend(value, mem, temp1, output.gpr());
+            break;
+          default:
+            MOZ_CRASH("Invalid typed array atomic operation");
+        }
+        break;
+      case Scalar::Uint8:
+        switch (op) {
+          case AtomicFetchAddOp:
+            atomicFetchAdd8ZeroExtend(value, mem, temp1, output.gpr());
+            break;
+          case AtomicFetchSubOp:
+            atomicFetchSub8ZeroExtend(value, mem, temp1, output.gpr());
+            break;
+          case AtomicFetchAndOp:
+            atomicFetchAnd8ZeroExtend(value, mem, temp1, output.gpr());
+            break;
+          case AtomicFetchOrOp:
+            atomicFetchOr8ZeroExtend(value, mem, temp1, output.gpr());
+            break;
+          case AtomicFetchXorOp:
+            atomicFetchXor8ZeroExtend(value, mem, temp1, output.gpr());
+            break;
+          default:
+            MOZ_CRASH("Invalid typed array atomic operation");
+        }
+        break;
+      case Scalar::Int16:
+        switch (op) {
+          case AtomicFetchAddOp:
+            atomicFetchAdd16SignExtend(value, mem, temp1, output.gpr());
+            break;
+          case AtomicFetchSubOp:
+            atomicFetchSub16SignExtend(value, mem, temp1, output.gpr());
+            break;
+          case AtomicFetchAndOp:
+            atomicFetchAnd16SignExtend(value, mem, temp1, output.gpr());
+            break;
+          case AtomicFetchOrOp:
+            atomicFetchOr16SignExtend(value, mem, temp1, output.gpr());
+            break;
+          case AtomicFetchXorOp:
+            atomicFetchXor16SignExtend(value, mem, temp1, output.gpr());
+            break;
+          default:
+            MOZ_CRASH("Invalid typed array atomic operation");
+        }
+        break;
+      case Scalar::Uint16:
+        switch (op) {
+          case AtomicFetchAddOp:
+            atomicFetchAdd16ZeroExtend(value, mem, temp1, output.gpr());
+            break;
+          case AtomicFetchSubOp:
+            atomicFetchSub16ZeroExtend(value, mem, temp1, output.gpr());
+            break;
+          case AtomicFetchAndOp:
+            atomicFetchAnd16ZeroExtend(value, mem, temp1, output.gpr());
+            break;
+          case AtomicFetchOrOp:
+            atomicFetchOr16ZeroExtend(value, mem, temp1, output.gpr());
+            break;
+          case AtomicFetchXorOp:
+            atomicFetchXor16ZeroExtend(value, mem, temp1, output.gpr());
+            break;
+          default:
+            MOZ_CRASH("Invalid typed array atomic operation");
+        }
+        break;
+      case Scalar::Int32:
+        switch (op) {
+          case AtomicFetchAddOp:
+            atomicFetchAdd32(value, mem, temp1, output.gpr());
+            break;
+          case AtomicFetchSubOp:
+            atomicFetchSub32(value, mem, temp1, output.gpr());
+            break;
+          case AtomicFetchAndOp:
+            atomicFetchAnd32(value, mem, temp1, output.gpr());
+            break;
+          case AtomicFetchOrOp:
+            atomicFetchOr32(value, mem, temp1, output.gpr());
+            break;
+          case AtomicFetchXorOp:
+            atomicFetchXor32(value, mem, temp1, output.gpr());
+            break;
+          default:
+            MOZ_CRASH("Invalid typed array atomic operation");
+        }
+        break;
+      case Scalar::Uint32:
+        // At the moment, the code in MCallOptimize.cpp requires the output
+        // type to be double for uint32 arrays.  See bug 1077305.
+        MOZ_ASSERT(output.isFloat());
+        switch (op) {
+          case AtomicFetchAddOp:
+            atomicFetchAdd32(value, mem, InvalidReg, temp1);
+            break;
+          case AtomicFetchSubOp:
+            atomicFetchSub32(value, mem, InvalidReg, temp1);
+            break;
+          case AtomicFetchAndOp:
+            atomicFetchAnd32(value, mem, temp2, temp1);
+            break;
+          case AtomicFetchOrOp:
+            atomicFetchOr32(value, mem, temp2, temp1);
+            break;
+          case AtomicFetchXorOp:
+            atomicFetchXor32(value, mem, temp2, temp1);
+            break;
+          default:
+            MOZ_CRASH("Invalid typed array atomic operation");
+        }
+        convertUInt32ToDouble(temp1, output.fpu());
+        break;
+      default:
+        MOZ_CRASH("Invalid typed array type");
+    }
+}
+
+template void
+MacroAssembler::atomicBinopToTypedIntArray(AtomicOp op, Scalar::Type arrayType,
+                                           const Imm32 &value, const Address &mem,
+                                           Register temp1, Register temp2, AnyRegister output);
+template void
+MacroAssembler::atomicBinopToTypedIntArray(AtomicOp op, Scalar::Type arrayType,
+                                           const Imm32 &value, const BaseIndex &mem,
+                                           Register temp1, Register temp2, AnyRegister output);
+template void
+MacroAssembler::atomicBinopToTypedIntArray(AtomicOp op, Scalar::Type arrayType,
+                                           const Register &value, const Address &mem,
+                                           Register temp1, Register temp2, AnyRegister output);
+template void
+MacroAssembler::atomicBinopToTypedIntArray(AtomicOp op, Scalar::Type arrayType,
+                                           const Register &value, const BaseIndex &mem,
+                                           Register temp1, Register temp2, AnyRegister output);
+
 // Inlined version of gc::CheckAllocatorState that checks the bare essentials
 // and bails for anything that cannot be handled with our jit allocators.
 void
 MacroAssembler::checkAllocatorState(Label *fail)
 {
     // Don't execute the inline path if we are tracing allocations.
     if (js::gc::TraceEnabled())
         jump(fail);
--- a/js/src/jit/IonMacroAssembler.h
+++ b/js/src/jit/IonMacroAssembler.h
@@ -17,16 +17,17 @@
 # include "jit/arm/MacroAssembler-arm.h"
 #elif defined(JS_CODEGEN_MIPS)
 # include "jit/mips/MacroAssembler-mips.h"
 #elif defined(JS_CODEGEN_NONE)
 # include "jit/none/MacroAssembler-none.h"
 #else
 # error "Unknown architecture!"
 #endif
+#include "jit/AtomicOp.h"
 #include "jit/IonInstrumentation.h"
 #include "jit/JitCompartment.h"
 #include "jit/VMFunctions.h"
 #include "vm/ProxyObject.h"
 #include "vm/Shape.h"
 
 #ifdef IS_LITTLE_ENDIAN
 #define IMM32_16ADJ(X) X << 16
@@ -733,16 +734,24 @@ class MacroAssembler : public MacroAssem
           case Scalar::Uint32:
             store32(value, dest);
             break;
           default:
             MOZ_CRASH("Invalid typed array type");
         }
     }
 
+    template<typename T>
+    void compareExchangeToTypedIntArray(Scalar::Type arrayType, const T &mem, Register oldval, Register newval,
+                                        Register temp, AnyRegister output);
+
+    template<typename S, typename T>
+    void atomicBinopToTypedIntArray(AtomicOp op, Scalar::Type arrayType, const S &value,
+                                    const T &mem, Register temp1, Register temp2, AnyRegister output);
+
     void storeToTypedFloatArray(Scalar::Type arrayType, FloatRegister value, const BaseIndex &dest);
     void storeToTypedFloatArray(Scalar::Type arrayType, FloatRegister value, const Address &dest);
 
     Register extractString(const Address &address, Register scratch) {
         return extractObject(address, scratch);
     }
     Register extractString(const ValueOperand &value, Register scratch) {
         return extractObject(value, scratch);
--- a/js/src/jit/LIR-Common.h
+++ b/js/src/jit/LIR-Common.h
@@ -4866,16 +4866,90 @@ class LStoreTypedArrayElementStatic : pu
     const LAllocation *ptr() {
         return getOperand(0);
     }
     const LAllocation *value() {
         return getOperand(1);
     }
 };
 
+class LCompareExchangeTypedArrayElement : public LInstructionHelper<1, 4, 1>
+{
+  public:
+    LIR_HEADER(CompareExchangeTypedArrayElement)
+
+    LCompareExchangeTypedArrayElement(const LAllocation &elements, const LAllocation &index,
+                                      const LAllocation &oldval, const LAllocation &newval,
+                                      const LDefinition &temp)
+    {
+        setOperand(0, elements);
+        setOperand(1, index);
+        setOperand(2, oldval);
+        setOperand(3, newval);
+        setTemp(0, temp);
+    }
+
+    const LAllocation *elements() {
+        return getOperand(0);
+    }
+    const LAllocation *index() {
+        return getOperand(1);
+    }
+    const LAllocation *oldval() {
+        return getOperand(2);
+    }
+    const LAllocation *newval() {
+        return getOperand(3);
+    }
+    const LDefinition *temp() {
+        return getTemp(0);
+    }
+
+    const MCompareExchangeTypedArrayElement *mir() const {
+        return mir_->toCompareExchangeTypedArrayElement();
+    }
+};
+
+class LAtomicTypedArrayElementBinop : public LInstructionHelper<1, 3, 2>
+{
+  public:
+    LIR_HEADER(AtomicTypedArrayElementBinop)
+
+    LAtomicTypedArrayElementBinop(const LAllocation &elements, const LAllocation &index,
+                                  const LAllocation &value, const LDefinition &temp1,
+                                  const LDefinition &temp2)
+    {
+        setOperand(0, elements);
+        setOperand(1, index);
+        setOperand(2, value);
+        setTemp(0, temp1);
+        setTemp(1, temp2);
+    }
+
+    const LAllocation *elements() {
+        return getOperand(0);
+    }
+    const LAllocation *index() {
+        return getOperand(1);
+    }
+    const LAllocation *value() {
+        return getOperand(2);
+    }
+    const LDefinition *temp1() {
+        return getTemp(0);
+    }
+    const LDefinition *temp2() {
+        return getTemp(1);
+    }
+
+    const MAtomicTypedArrayElementBinop *mir() const {
+        return mir_->toAtomicTypedArrayElementBinop();
+    }
+};
+
 class LEffectiveAddress : public LInstructionHelper<1, 2, 0>
 {
   public:
     LIR_HEADER(EffectiveAddress);
 
     LEffectiveAddress(const LAllocation &base, const LAllocation &index) {
         setOperand(0, base);
         setOperand(1, index);
@@ -6623,12 +6697,36 @@ class LThrowUninitializedLexical : publi
   public:
     LIR_HEADER(ThrowUninitializedLexical)
 
     MLexicalCheck *mir() {
         return mir_->toLexicalCheck();
     }
 };
 
+class LMemoryBarrier : public LInstructionHelper<0, 0, 0>
+{
+  private:
+    const int type_;
+
+  public:
+    LIR_HEADER(MemoryBarrier)
+
+    // The parameter 'type' is a bitwise 'or' of the barrier types needed,
+    // see AtomicOp.h.
+    explicit LMemoryBarrier(int type) : type_(type)
+    {
+        MOZ_ASSERT((type_ & ~MembarAllbits) == 0);
+    }
+
+    int type() const {
+        return type_;
+    }
+
+    const MMemoryBarrier *mir() const {
+        return mir_->toMemoryBarrier();
+    }
+};
+
 } // namespace jit
 } // namespace js
 
 #endif /* jit_LIR_Common_h */
--- a/js/src/jit/LIR.h
+++ b/js/src/jit/LIR.h
@@ -653,17 +653,18 @@ class LNode
     virtual void setDef(size_t index, const LDefinition &def) = 0;
 
     // Returns information about operands.
     virtual size_t numOperands() const = 0;
     virtual LAllocation *getOperand(size_t index) = 0;
     virtual void setOperand(size_t index, const LAllocation &a) = 0;
 
     // Returns information about temporary registers needed. Each temporary
-    // register is an LUse with a TEMPORARY policy, or a fixed register.
+    // register is an LDefinition with a fixed or virtual register and
+    // either GENERAL, FLOAT32, or DOUBLE type.
     virtual size_t numTemps() const = 0;
     virtual LDefinition *getTemp(size_t index) = 0;
     virtual void setTemp(size_t index, const LDefinition &a) = 0;
 
     // Returns the number of successors of this instruction, if it is a control
     // transfer instruction, or zero otherwise.
     virtual size_t numSuccessors() const = 0;
     virtual MBasicBlock *getSuccessor(size_t i) const = 0;
--- a/js/src/jit/LOpcodes.h
+++ b/js/src/jit/LOpcodes.h
@@ -229,16 +229,18 @@
     _(StoreElementHoleV)            \
     _(StoreElementHoleT)            \
     _(LoadTypedArrayElement)        \
     _(LoadTypedArrayElementHole)    \
     _(LoadTypedArrayElementStatic)  \
     _(StoreTypedArrayElement)       \
     _(StoreTypedArrayElementHole)   \
     _(StoreTypedArrayElementStatic) \
+    _(CompareExchangeTypedArrayElement) \
+    _(AtomicTypedArrayElementBinop) \
     _(EffectiveAddress)             \
     _(ClampIToUint8)                \
     _(ClampDToUint8)                \
     _(ClampVToUint8)                \
     _(LoadFixedSlotV)               \
     _(LoadFixedSlotT)               \
     _(StoreFixedSlotV)              \
     _(StoreFixedSlotT)              \
@@ -322,16 +324,17 @@
     _(AsmJSLoadFFIFunc)             \
     _(AsmJSParameter)               \
     _(AsmJSReturn)                  \
     _(AsmJSVoidReturn)              \
     _(AsmJSPassStackArg)            \
     _(AsmJSCall)                    \
     _(InterruptCheckPar)            \
     _(RecompileCheck)               \
+    _(MemoryBarrier)                \
     _(AssertRangeI)                 \
     _(AssertRangeD)                 \
     _(AssertRangeF)                 \
     _(AssertRangeV)                 \
     _(LexicalCheck)                 \
     _(ThrowUninitializedLexical)
 
 #if defined(JS_CODEGEN_X86)
--- a/js/src/jit/LinearScan.cpp
+++ b/js/src/jit/LinearScan.cpp
@@ -206,17 +206,17 @@ LinearScanAllocator::allocateRegisters()
 
 /*
  * This function iterates over control flow edges in the function and resolves
  * conflicts wherein two predecessors of a block have different allocations
  * for a virtual register than the block itself. It also turns phis into moves.
  *
  * The algorithm is based on the one published in "Linear Scan Register
  * Allocation on SSA Form" by C. Wimmer et al., for which the full citation
- * appears above.
+ * appears in LiveRangeAllocator.cpp.
  */
 bool
 LinearScanAllocator::resolveControlFlow()
 {
     for (size_t i = 0; i < graph.numBlocks(); i++) {
         if (mir->shouldCancel("LSRA Resolve Control Flow (main loop)"))
             return false;
 
--- a/js/src/jit/Lowering.cpp
+++ b/js/src/jit/Lowering.cpp
@@ -2854,20 +2854,32 @@ LIRGenerator::visitLoadTypedArrayElement
 
     MOZ_ASSERT(IsNumberType(ins->type()));
 
     // We need a temp register for Uint32Array with known double result.
     LDefinition tempDef = LDefinition::BogusTemp();
     if (ins->arrayType() == Scalar::Uint32 && IsFloatingPointType(ins->type()))
         tempDef = temp();
 
+    if (ins->requiresMemoryBarrier()) {
+        LMemoryBarrier *fence = new(alloc()) LMemoryBarrier(MembarBeforeLoad);
+        if (!add(fence, ins))
+            return false;
+    }
     LLoadTypedArrayElement *lir = new(alloc()) LLoadTypedArrayElement(elements, index, tempDef);
     if (ins->fallible() && !assignSnapshot(lir, Bailout_Overflow))
         return false;
-    return define(lir, ins);
+    if (!define(lir, ins))
+        return false;
+    if (ins->requiresMemoryBarrier()) {
+        LMemoryBarrier *fence = new(alloc()) LMemoryBarrier(MembarAfterLoad);
+        if (!add(fence, ins))
+            return false;
+    }
+    return true;
 }
 
 bool
 LIRGenerator::visitClampToUint8(MClampToUint8 *ins)
 {
     MDefinition *in = ins->input();
 
     switch (in->type()) {
@@ -2941,17 +2953,34 @@ LIRGenerator::visitStoreTypedArrayElemen
     LAllocation index = useRegisterOrConstant(ins->index());
     LAllocation value;
 
     // For byte arrays, the value has to be in a byte register on x86.
     if (ins->isByteArray())
         value = useByteOpRegisterOrNonDoubleConstant(ins->value());
     else
         value = useRegisterOrNonDoubleConstant(ins->value());
-    return add(new(alloc()) LStoreTypedArrayElement(elements, index, value), ins);
+
+    // Optimization opportunity for atomics: on some platforms there
+    // is a store instruction that incorporates the necessary
+    // barriers, and we could use that instead of separate barrier and
+    // store instructions.  See bug #1077027.
+    if (ins->requiresMemoryBarrier()) {
+        LMemoryBarrier *fence = new(alloc()) LMemoryBarrier(MembarBeforeStore);
+        if (!add(fence, ins))
+            return false;
+    }
+    if (!add(new(alloc()) LStoreTypedArrayElement(elements, index, value), ins))
+        return false;
+    if (ins->requiresMemoryBarrier()) {
+        LMemoryBarrier *fence = new(alloc()) LMemoryBarrier(MembarAfterStore);
+        if (!add(fence, ins))
+            return false;
+    }
+    return true;
 }
 
 bool
 LIRGenerator::visitStoreTypedArrayElementHole(MStoreTypedArrayElementHole *ins)
 {
     MOZ_ASSERT(ins->elements()->type() == MIRType_Elements);
     MOZ_ASSERT(ins->index()->type() == MIRType_Int32);
     MOZ_ASSERT(ins->length()->type() == MIRType_Int32);
@@ -3709,16 +3738,23 @@ LIRGenerator::visitRecompileCheck(MRecom
 {
     LRecompileCheck *lir = new(alloc()) LRecompileCheck(temp());
     if (!add(lir, ins))
         return false;
     return assignSafepoint(lir, ins);
 }
 
 bool
+LIRGenerator::visitMemoryBarrier(MMemoryBarrier *ins)
+{
+    LMemoryBarrier *lir = new(alloc()) LMemoryBarrier(ins->type());
+    return add(lir, ins);
+}
+
+bool
 LIRGenerator::visitSimdConstant(MSimdConstant *ins)
 {
     MOZ_ASSERT(IsSimdType(ins->type()));
 
     if (ins->type() == MIRType_Int32x4)
         return define(new(alloc()) LInt32x4(), ins);
     if (ins->type() == MIRType_Float32x4)
         return define(new(alloc()) LFloat32x4(), ins);
--- a/js/src/jit/Lowering.h
+++ b/js/src/jit/Lowering.h
@@ -264,16 +264,17 @@ class LIRGenerator : public LIRGenerator
     bool visitAsmJSReturn(MAsmJSReturn *ins);
     bool visitAsmJSVoidReturn(MAsmJSVoidReturn *ins);
     bool visitAsmJSPassStackArg(MAsmJSPassStackArg *ins);
     bool visitAsmJSCall(MAsmJSCall *ins);
     bool visitSetDOMProperty(MSetDOMProperty *ins);
     bool visitGetDOMProperty(MGetDOMProperty *ins);
     bool visitGetDOMMember(MGetDOMMember *ins);
     bool visitRecompileCheck(MRecompileCheck *ins);
+    bool visitMemoryBarrier(MMemoryBarrier *ins);
     bool visitSimdExtractElement(MSimdExtractElement *ins);
     bool visitSimdInsertElement(MSimdInsertElement *ins);
     bool visitSimdSignMask(MSimdSignMask *ins);
     bool visitSimdSwizzle(MSimdSwizzle *ins);
     bool visitSimdShuffle(MSimdShuffle *ins);
     bool visitSimdUnaryArith(MSimdUnaryArith *ins);
     bool visitSimdBinaryComp(MSimdBinaryComp *ins);
     bool visitSimdBinaryArith(MSimdBinaryArith *ins);
--- a/js/src/jit/MCallOptimize.cpp
+++ b/js/src/jit/MCallOptimize.cpp
@@ -1,16 +1,17 @@
 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
  * vim: set ts=8 sts=4 et sw=4 tw=99:
  * This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 #include "jsmath.h"
 
+#include "builtin/AtomicsObject.h"
 #include "builtin/TestingFunctions.h"
 #include "builtin/TypedObject.h"
 #include "jit/BaselineInspector.h"
 #include "jit/IonBuilder.h"
 #include "jit/Lowering.h"
 #include "jit/MIR.h"
 #include "jit/MIRGraph.h"
 #include "vm/ArgumentsObject.h"
@@ -29,16 +30,34 @@ IonBuilder::InliningStatus
 IonBuilder::inlineNativeCall(CallInfo &callInfo, JSFunction *target)
 {
     MOZ_ASSERT(target->isNative());
     JSNative native = target->native();
 
     if (!optimizationInfo().inlineNative())
         return InliningStatus_NotInlined;
 
+    // Atomic natives.
+    if (native == atomics_compareExchange)
+        return inlineAtomicsCompareExchange(callInfo);
+    if (native == atomics_load)
+        return inlineAtomicsLoad(callInfo);
+    if (native == atomics_store)
+        return inlineAtomicsStore(callInfo);
+    if (native == atomics_fence)
+        return inlineAtomicsFence(callInfo);
+    if (native == atomics_add ||
+        native == atomics_sub ||
+        native == atomics_and ||
+        native == atomics_or ||
+        native == atomics_xor)
+    {
+        return inlineAtomicsBinop(callInfo, target);
+    }
+
     // Array natives.
     if (native == js_Array)
         return inlineArray(callInfo);
     if (native == js::array_pop)
         return inlineArrayPopShift(callInfo, MArrayPopShift::Pop);
     if (native == js::array_shift)
         return inlineArrayPopShift(callInfo, MArrayPopShift::Shift);
     if (native == js::array_push)
@@ -2231,16 +2250,235 @@ IonBuilder::inlineBoundFunction(CallInfo
 
     if (!makeCall(scriptedTarget, callInfo, false))
         return InliningStatus_Error;
 
     return InliningStatus_Inlined;
 }
 
 IonBuilder::InliningStatus
+IonBuilder::inlineAtomicsCompareExchange(CallInfo &callInfo)
+{
+    if (callInfo.argc() != 4 || callInfo.constructing())
+        return InliningStatus_NotInlined;
+
+    Scalar::Type arrayType;
+    if (!atomicsMeetsPreconditions(callInfo, &arrayType))
+        return InliningStatus_NotInlined;
+
+    MDefinition *oldval = callInfo.getArg(2);
+    if (!(oldval->type() == MIRType_Int32 || oldval->type() == MIRType_Double))
+        return InliningStatus_NotInlined;
+
+    MDefinition *newval = callInfo.getArg(3);
+    if (!(newval->type() == MIRType_Int32 || newval->type() == MIRType_Double))
+        return InliningStatus_NotInlined;
+
+    callInfo.setImplicitlyUsedUnchecked();
+
+    MInstruction *elements;
+    MDefinition *index;
+    atomicsCheckBounds(callInfo, &elements, &index);
+
+    MDefinition *oldvalToWrite = oldval;
+    if (oldval->type() == MIRType_Double) {
+        oldvalToWrite = MTruncateToInt32::New(alloc(), oldval);
+        current->add(oldvalToWrite->toInstruction());
+    }
+
+    MDefinition *newvalToWrite = newval;
+    if (newval->type() == MIRType_Double) {
+        newvalToWrite = MTruncateToInt32::New(alloc(), newval);
+        current->add(newvalToWrite->toInstruction());
+    }
+
+    MCompareExchangeTypedArrayElement *cas =
+        MCompareExchangeTypedArrayElement::New(alloc(), elements, index, arrayType,
+                                               oldvalToWrite, newvalToWrite);
+    cas->setResultType(getInlineReturnType());
+    current->add(cas);
+    current->push(cas);
+
+    return InliningStatus_Inlined;
+}
+
+IonBuilder::InliningStatus
+IonBuilder::inlineAtomicsLoad(CallInfo &callInfo)
+{
+    if (callInfo.argc() != 2 || callInfo.constructing())
+        return InliningStatus_NotInlined;
+
+    Scalar::Type arrayType;
+    if (!atomicsMeetsPreconditions(callInfo, &arrayType))
+        return InliningStatus_NotInlined;
+
+    callInfo.setImplicitlyUsedUnchecked();
+
+    MInstruction *elements;
+    MDefinition *index;
+    atomicsCheckBounds(callInfo, &elements, &index);
+
+    MLoadTypedArrayElement *load =
+        MLoadTypedArrayElement::New(alloc(), elements, index, arrayType,
+                                    DoesRequireMemoryBarrier);
+    load->setResultType(getInlineReturnType());
+    current->add(load);
+    current->push(load);
+
+    return InliningStatus_Inlined;
+}
+
+IonBuilder::InliningStatus
+IonBuilder::inlineAtomicsStore(CallInfo &callInfo)
+{
+    if (callInfo.argc() != 3 || callInfo.constructing())
+        return InliningStatus_NotInlined;
+
+    Scalar::Type arrayType;
+    if (!atomicsMeetsPreconditions(callInfo, &arrayType))
+        return InliningStatus_NotInlined;
+
+    MDefinition *value = callInfo.getArg(2);
+    if (!(value->type() == MIRType_Int32 || value->type() == MIRType_Double))
+        return InliningStatus_NotInlined;
+
+    callInfo.setImplicitlyUsedUnchecked();
+
+    MInstruction *elements;
+    MDefinition *index;
+    atomicsCheckBounds(callInfo, &elements, &index);
+
+    MDefinition *toWrite = value;
+    if (value->type() == MIRType_Double) {
+        toWrite = MTruncateToInt32::New(alloc(), value);
+        current->add(toWrite->toInstruction());
+    }
+    MStoreTypedArrayElement *store =
+        MStoreTypedArrayElement::New(alloc(), elements, index, toWrite, arrayType,
+                                     DoesRequireMemoryBarrier);
+    current->add(store);
+    current->push(value);
+
+    return InliningStatus_Inlined;
+}
+
+IonBuilder::InliningStatus
+IonBuilder::inlineAtomicsFence(CallInfo &callInfo)
+{
+    if (callInfo.argc() != 0 || callInfo.constructing())
+        return InliningStatus_NotInlined;
+
+    callInfo.setImplicitlyUsedUnchecked();
+
+    MMemoryBarrier *fence = MMemoryBarrier::New(alloc());
+    current->add(fence);
+    pushConstant(UndefinedValue());
+
+    return InliningStatus_Inlined;
+}
+
+IonBuilder::InliningStatus
+IonBuilder::inlineAtomicsBinop(CallInfo &callInfo, JSFunction *target)
+{
+    if (callInfo.argc() != 3 || callInfo.constructing())
+        return InliningStatus_NotInlined;
+
+    Scalar::Type arrayType;
+    if (!atomicsMeetsPreconditions(callInfo, &arrayType))
+        return InliningStatus_NotInlined;
+
+    MDefinition *value = callInfo.getArg(2);
+    if (!(value->type() == MIRType_Int32 || value->type() == MIRType_Double))
+        return InliningStatus_NotInlined;
+
+    callInfo.setImplicitlyUsedUnchecked();
+
+    MInstruction *elements;
+    MDefinition *index;
+    atomicsCheckBounds(callInfo, &elements, &index);
+
+    JSNative native = target->native();
+    AtomicOp k = AtomicFetchAddOp;
+    if (native == atomics_add)
+        k = AtomicFetchAddOp;
+    else if (native == atomics_sub)
+        k = AtomicFetchSubOp;
+    else if (native == atomics_and)
+        k = AtomicFetchAndOp;
+    else if (native == atomics_or)
+        k = AtomicFetchOrOp;
+    else if (native == atomics_xor)
+        k = AtomicFetchXorOp;
+    else
+        MOZ_CRASH("Bad atomic operation");
+
+    MDefinition *toWrite = value;
+    if (value->type() == MIRType_Double) {
+        toWrite = MTruncateToInt32::New(alloc(), value);
+        current->add(toWrite->toInstruction());
+    }
+    MAtomicTypedArrayElementBinop *binop =
+        MAtomicTypedArrayElementBinop::New(alloc(), k, elements, index, arrayType, toWrite);
+    binop->setResultType(getInlineReturnType());
+    current->add(binop);
+    current->push(binop);
+
+    return InliningStatus_Inlined;
+}
+
+bool
+IonBuilder::atomicsMeetsPreconditions(CallInfo &callInfo, Scalar::Type *arrayType)
+{
+    if (callInfo.getArg(0)->type() != MIRType_Object)
+        return false;
+
+    if (callInfo.getArg(1)->type() != MIRType_Int32)
+        return false;
+
+    // Ensure that the first argument is a valid SharedTypedArray.
+    //
+    // Then check both that the element type is something we can
+    // optimize and that the return type is suitable for that element
+    // type.
+
+    types::TemporaryTypeSet *arg0Types = callInfo.getArg(0)->resultTypeSet();
+    if (!arg0Types)
+        return false;
+
+    *arrayType = arg0Types->getSharedTypedArrayType();
+    switch (*arrayType) {
+      case Scalar::Int8:
+      case Scalar::Uint8:
+      case Scalar::Int16:
+      case Scalar::Uint16:
+      case Scalar::Int32:
+        return getInlineReturnType() == MIRType_Int32;
+      case Scalar::Uint32:
+        // Bug 1077305: it would be attractive to allow inlining even
+        // if the inline return type is Int32, which it will frequently
+        // be.
+        return getInlineReturnType() == MIRType_Double;
+      default:
+        // Excludes floating types and Uint8Clamped
+        return false;
+    }
+}
+
+void
+IonBuilder::atomicsCheckBounds(CallInfo &callInfo, MInstruction **elements, MDefinition **index)
+{
+    // Perform bounds checking and extract the elements vector.
+    MDefinition *obj = callInfo.getArg(0);
+    MInstruction *length = nullptr;
+    *index = callInfo.getArg(1);
+    *elements = nullptr;
+    addTypedArrayLengthAndData(obj, DoBoundsCheck, index, &length, elements);
+}
+
+IonBuilder::InliningStatus
 IonBuilder::inlineIsConstructing(CallInfo &callInfo)
 {
     MOZ_ASSERT(!callInfo.constructing());
     MOZ_ASSERT(callInfo.argc() == 0);
     MOZ_ASSERT(script()->functionNonDelazifying(),
                "isConstructing() should only be called in function scripts");
 
     if (getInlineReturnType() != MIRType_Boolean)
--- a/js/src/jit/MIR.h
+++ b/js/src/jit/MIR.h
@@ -10,16 +10,17 @@
  */
 
 #ifndef jit_MIR_h
 #define jit_MIR_h
 
 #include "mozilla/Array.h"
 #include "mozilla/DebugOnly.h"
 
+#include "jit/AtomicOp.h"
 #include "jit/FixedList.h"
 #include "jit/InlineList.h"
 #include "jit/IonAllocPolicy.h"
 #include "jit/IonMacroAssembler.h"
 #include "jit/MOpcodes.h"
 #include "jit/TypedObjectPrediction.h"
 #include "jit/TypePolicy.h"
 #include "vm/ArrayObject.h"
@@ -8032,59 +8033,85 @@ class MArrayJoin
         return true;
     }
     virtual AliasSet getAliasSet() const {
         return AliasSet::Load(AliasSet::Element | AliasSet::ObjectFields);
     }
     MDefinition *foldsTo(TempAllocator &alloc);
 };
 
+// See comments above MMemoryBarrier, below.
+
+enum MemoryBarrierRequirement
+{
+    DoesNotRequireMemoryBarrier,
+    DoesRequireMemoryBarrier
+};
+
+// Also see comments above MMemoryBarrier, below.
+
 class MLoadTypedArrayElement
   : public MBinaryInstruction
 {
     Scalar::Type arrayType_;
+    bool requiresBarrier_;
 
     MLoadTypedArrayElement(MDefinition *elements, MDefinition *index,
-                           Scalar::Type arrayType)
-      : MBinaryInstruction(elements, index), arrayType_(arrayType)
+                           Scalar::Type arrayType, MemoryBarrierRequirement requiresBarrier)
+      : MBinaryInstruction(elements, index),
+        arrayType_(arrayType),
+        requiresBarrier_(requiresBarrier == DoesRequireMemoryBarrier)
     {
         setResultType(MIRType_Value);
-        setMovable();
+        if (requiresBarrier_)
+            setGuard();         // Not removable or movable
+        else
+            setMovable();
         MOZ_ASSERT(elements->type() == MIRType_Elements);
         MOZ_ASSERT(index->type() == MIRType_Int32);
         MOZ_ASSERT(arrayType >= 0 && arrayType < Scalar::TypeMax);
     }
 
   public:
     INSTRUCTION_HEADER(LoadTypedArrayElement)
 
     static MLoadTypedArrayElement *New(TempAllocator &alloc, MDefinition *elements, MDefinition *index,
-                                       Scalar::Type arrayType)
-    {
-        return new(alloc) MLoadTypedArrayElement(elements, index, arrayType);
+                                       Scalar::Type arrayType,
+                                       MemoryBarrierRequirement requiresBarrier=DoesNotRequireMemoryBarrier)
+    {
+        return new(alloc) MLoadTypedArrayElement(elements, index, arrayType, requiresBarrier);
     }
 
     Scalar::Type arrayType() const {
         return arrayType_;
     }
     bool fallible() const {
         // Bailout if the result does not fit in an int32.
         return arrayType_ == Scalar::Uint32 && type() == MIRType_Int32;
     }
+    bool requiresMemoryBarrier() const {
+        return requiresBarrier_;
+    }
     MDefinition *elements() const {
         return getOperand(0);
     }
     MDefinition *index() const {
         return getOperand(1);
     }
     AliasSet getAliasSet() const {
+        // When a barrier is needed make the instruction effectful by
+        // giving it a "store" effect.
+        if (requiresBarrier_)
+            return AliasSet::Store(AliasSet::TypedArrayElement);
         return AliasSet::Load(AliasSet::TypedArrayElement);
     }
 
     bool congruentTo(const MDefinition *ins) const {
+        if (requiresBarrier_)
+            return false;
         if (!ins->isLoadTypedArrayElement())
             return false;
         const MLoadTypedArrayElement *other = ins->toLoadTypedArrayElement();
         if (arrayType_ != other->arrayType_)
             return false;
         return congruentIfOperandsEqual(other);
     }
 
@@ -8209,37 +8236,46 @@ class MLoadTypedArrayElementStatic
     bool canProduceFloat32() const { return viewType() == Scalar::Float32; }
 };
 
 class MStoreTypedArrayElement
   : public MTernaryInstruction,
     public StoreTypedArrayPolicy::Data
 {
     Scalar::Type arrayType_;
+    bool requiresBarrier_;
 
     // See note in MStoreElementCommon.
     bool racy_;
 
     MStoreTypedArrayElement(MDefinition *elements, MDefinition *index, MDefinition *value,
-                            Scalar::Type arrayType)
-      : MTernaryInstruction(elements, index, value), arrayType_(arrayType), racy_(false)
-    {
-        setMovable();
+                            Scalar::Type arrayType, MemoryBarrierRequirement requiresBarrier)
+      : MTernaryInstruction(elements, index, value),
+        arrayType_(arrayType),
+        requiresBarrier_(requiresBarrier == DoesRequireMemoryBarrier),
+        racy_(false)
+    {
+        if (requiresBarrier_)
+            setGuard();         // Not removable or movable
+        else
+            setMovable();
         MOZ_ASSERT(elements->type() == MIRType_Elements);
         MOZ_ASSERT(index->type() == MIRType_Int32);
         MOZ_ASSERT(arrayType >= 0 && arrayType < Scalar::TypeMax);
     }
 
   public:
     INSTRUCTION_HEADER(StoreTypedArrayElement)
 
     static MStoreTypedArrayElement *New(TempAllocator &alloc, MDefinition *elements, MDefinition *index,
-                                        MDefinition *value, Scalar::Type arrayType)
-    {
-        return new(alloc) MStoreTypedArrayElement(elements, index, value, arrayType);
+                                        MDefinition *value, Scalar::Type arrayType,
+                                        MemoryBarrierRequirement requiresBarrier = DoesNotRequireMemoryBarrier)
+    {
+        return new(alloc) MStoreTypedArrayElement(elements, index, value, arrayType,
+                                                  requiresBarrier);
     }
 
     Scalar::Type arrayType() const {
         return arrayType_;
     }
     bool isByteArray() const {
         return arrayType_ == Scalar::Int8 ||
                arrayType_ == Scalar::Uint8 ||
@@ -8256,16 +8292,19 @@ class MStoreTypedArrayElement
         return getOperand(1);
     }
     MDefinition *value() const {
         return getOperand(2);
     }
     AliasSet getAliasSet() const {
         return AliasSet::Store(AliasSet::TypedArrayElement);
     }
+    bool requiresMemoryBarrier() const {
+        return requiresBarrier_;
+    }
     bool racy() const {
         return racy_;
     }
     void setRacy() {
         racy_ = true;
     }
     TruncateKind operandTruncateKind(size_t index) const;
 
@@ -11447,16 +11486,169 @@ class MRecompileCheck : public MNullaryI
         return increaseWarmUpCounter_;
     }
 
     AliasSet getAliasSet() const {
         return AliasSet::None();
     }
 };
 
+// All barriered operations - MMemoryBarrier, MCompareExchangeTypedArrayElement,
+// and MAtomicTypedArrayElementBinop, as well as MLoadTypedArrayElement and
+// MStoreTypedArrayElement when they are marked as requiring a memory barrer - have
+// the following attributes:
+//
+// - Not movable
+// - Not removable
+// - Not congruent with any other instruction
+// - Effectful (they alias every TypedArray store)
+//
+// The intended effect of those constraints is to prevent all loads
+// and stores preceding the barriered operation from being moved to
+// after the barriered operation, and vice versa, and to prevent the
+// barriered operation from being removed or hoisted.
+
+class MMemoryBarrier
+  : public MNullaryInstruction
+{
+    // The type is a combination of the memory barrier types in AtomicOp.h.
+    const int type_;
+
+    explicit MMemoryBarrier(int type)
+      : type_(type)
+    {
+        MOZ_ASSERT((type_ & ~MembarAllbits) == 0);
+        setGuard();             // Not removable
+    }
+
+  public:
+    INSTRUCTION_HEADER(MemoryBarrier);
+
+    static MMemoryBarrier *New(TempAllocator &alloc, int type=MembarFull) {
+        return new(alloc) MMemoryBarrier(type);
+    }
+    int type() const {
+        return type_;
+    }
+
+    AliasSet getAliasSet() const {
+        return AliasSet::Store(AliasSet::TypedArrayElement);
+    }
+};
+
+class MCompareExchangeTypedArrayElement
+  : public MAryInstruction<4>,
+    public MixPolicy< MixPolicy<ObjectPolicy<0>, IntPolicy<1> >, MixPolicy<IntPolicy<2>, IntPolicy<3> > >
+{
+    Scalar::Type arrayType_;
+
+    explicit MCompareExchangeTypedArrayElement(MDefinition *elements, MDefinition *index,
+                                               Scalar::Type arrayType, MDefinition *oldval,
+                                               MDefinition *newval)
+      : arrayType_(arrayType)
+    {
+        initOperand(0, elements);
+        initOperand(1, index);
+        initOperand(2, oldval);
+        initOperand(3, newval);
+        setGuard();             // Not removable
+    }
+
+  public:
+    INSTRUCTION_HEADER(CompareExchangeTypedArrayElement);
+
+    static MCompareExchangeTypedArrayElement *New(TempAllocator &alloc, MDefinition *elements,
+                                                  MDefinition *index, Scalar::Type arrayType,
+                                                  MDefinition *oldval, MDefinition *newval)
+    {
+        return new(alloc) MCompareExchangeTypedArrayElement(elements, index, arrayType, oldval, newval);
+    }
+    bool isByteArray() const {
+        return (arrayType_ == Scalar::Int8 ||
+                arrayType_ == Scalar::Uint8 ||
+                arrayType_ == Scalar::Uint8Clamped);
+    }
+    MDefinition *elements() {
+        return getOperand(0);
+    }
+    MDefinition *index() {
+        return getOperand(1);
+    }
+    MDefinition *oldval() {
+        return getOperand(2);
+    }
+    int oldvalOperand() {
+        return 2;
+    }
+    MDefinition *newval() {
+        return getOperand(3);
+    }
+    Scalar::Type arrayType() const {
+        return arrayType_;
+    }
+    AliasSet getAliasSet() const {
+        return AliasSet::Store(AliasSet::TypedArrayElement);
+    }
+};
+
+class MAtomicTypedArrayElementBinop
+    : public MAryInstruction<3>,
+      public Mix3Policy< ObjectPolicy<0>, IntPolicy<1>, IntPolicy<2> >
+{
+  private:
+    AtomicOp op_;
+    Scalar::Type arrayType_;
+
+  protected:
+    explicit MAtomicTypedArrayElementBinop(AtomicOp op, MDefinition *elements, MDefinition *index,
+                                           Scalar::Type arrayType, MDefinition *value)
+      : op_(op),
+        arrayType_(arrayType)
+    {
+        initOperand(0, elements);
+        initOperand(1, index);
+        initOperand(2, value);
+        setGuard();             // Not removable
+    }
+
+  public:
+    INSTRUCTION_HEADER(AtomicTypedArrayElementBinop);
+
+    static MAtomicTypedArrayElementBinop *New(TempAllocator &alloc, AtomicOp op,
+                                              MDefinition *elements, MDefinition *index,
+                                              Scalar::Type arrayType, MDefinition *value)
+    {
+        return new(alloc) MAtomicTypedArrayElementBinop(op, elements, index, arrayType, value);
+    }
+
+    bool isByteArray() const {
+        return (arrayType_ == Scalar::Int8 ||
+                arrayType_ == Scalar::Uint8 ||
+                arrayType_ == Scalar::Uint8Clamped);
+    }
+    AtomicOp operation() const {
+        return op_;
+    }
+    Scalar::Type arrayType() const {
+        return arrayType_;
+    }
+    MDefinition *elements() {
+        return getOperand(0);
+    }
+    MDefinition *index() {
+        return getOperand(1);
+    }
+    MDefinition *value() {
+        return getOperand(2);
+    }
+    AliasSet getAliasSet() const {
+        return AliasSet::Store(AliasSet::TypedArrayElement);
+    }
+};
+
 class MAsmJSNeg : public MUnaryInstruction
 {
     MAsmJSNeg(MDefinition *op, MIRType type)
       : MUnaryInstruction(op)
     {
         setResultType(type);
         setMovable();
     }
--- a/js/src/jit/MOpcodes.h
+++ b/js/src/jit/MOpcodes.h
@@ -179,16 +179,18 @@ namespace jit {
     _(ArrayConcat)                                                          \
     _(ArrayJoin)                                                            \
     _(LoadTypedArrayElement)                                                \
     _(LoadTypedArrayElementHole)                                            \
     _(LoadTypedArrayElementStatic)                                          \
     _(StoreTypedArrayElement)                                               \
     _(StoreTypedArrayElementHole)                                           \
     _(StoreTypedArrayElementStatic)                                         \
+    _(CompareExchangeTypedArrayElement)                                     \
+    _(AtomicTypedArrayElementBinop)                                         \
     _(EffectiveAddress)                                                     \
     _(ClampToUint8)                                                         \
     _(LoadFixedSlot)                                                        \
     _(StoreFixedSlot)                                                       \
     _(CallGetProperty)                                                      \
     _(GetNameCache)                                                         \
     _(CallGetIntrinsicValue)                                                \
     _(CallsiteCloneCache)                                                   \
@@ -246,16 +248,17 @@ namespace jit {
     _(NewDerivedTypedObject)                                                \
     _(LambdaPar)                                                            \
     _(RestPar)                                                              \
     _(ForkJoinContext)                                                      \
     _(ForkJoinGetSlice)                                                     \
     _(GuardThreadExclusive)                                                 \
     _(InterruptCheckPar)                                                    \
     _(RecompileCheck)                                                       \
+    _(MemoryBarrier)                                                        \
     _(UnknownValue)                                                         \
     _(LexicalCheck)                                                         \
     _(ThrowUninitializedLexical)
 
 // Forward declarations of MIR types.
 #define FORWARD_DECLARE(op) class M##op;
  MIR_OPCODE_LIST(FORWARD_DECLARE)
 #undef FORWARD_DECLARE
--- a/js/src/jit/ParallelSafetyAnalysis.cpp
+++ b/js/src/jit/ParallelSafetyAnalysis.cpp
@@ -343,21 +343,24 @@ class ParallelSafetyVisitor : public MDe
     UNSAFE_OP(AsmJSLoadFuncPtr)
     UNSAFE_OP(AsmJSLoadFFIFunc)
     UNSAFE_OP(AsmJSReturn)
     UNSAFE_OP(AsmJSVoidReturn)
     UNSAFE_OP(AsmJSPassStackArg)
     UNSAFE_OP(AsmJSParameter)
     UNSAFE_OP(AsmJSCall)
     DROP_OP(RecompileCheck)
+    UNSAFE_OP(CompareExchangeTypedArrayElement)
+    UNSAFE_OP(AtomicTypedArrayElementBinop)
+    UNSAFE_OP(MemoryBarrier)
     UNSAFE_OP(UnknownValue)
     UNSAFE_OP(LexicalCheck)
     UNSAFE_OP(ThrowUninitializedLexical)
 
-    // It looks like this could easily be made safe:
+    // It looks like these could easily be made safe:
     UNSAFE_OP(ConvertElementsToDoubles)
     UNSAFE_OP(MaybeCopyElementsForWrite)
 };
 
 static void
 TransplantResumePoint(MInstruction *oldInstruction, MInstruction *replacementInstruction)
 {
     MOZ_ASSERT(!oldInstruction->isDiscarded());
--- a/js/src/jit/TypePolicy.cpp
+++ b/js/src/jit/TypePolicy.cpp
@@ -419,16 +419,17 @@ IntPolicy<Op>::staticAdjustInputs(TempAl
     def->replaceOperand(Op, replace);
 
     return replace->typePolicy()->adjustInputs(alloc, replace);
 }
 
 template bool IntPolicy<0>::staticAdjustInputs(TempAllocator &alloc, MInstruction *def);
 template bool IntPolicy<1>::staticAdjustInputs(TempAllocator &alloc, MInstruction *def);
 template bool IntPolicy<2>::staticAdjustInputs(TempAllocator &alloc, MInstruction *def);
+template bool IntPolicy<3>::staticAdjustInputs(TempAllocator &alloc, MInstruction *def);
 
 template <unsigned Op>
 bool
 ConvertToInt32Policy<Op>::staticAdjustInputs(TempAllocator &alloc, MInstruction *def)
 {
     MDefinition *in = def->getOperand(Op);
     if (in->type() == MIRType_Int32)
         return true;
--- a/js/src/jit/none/Lowering-none.h
+++ b/js/src/jit/none/Lowering-none.h
@@ -73,16 +73,18 @@ class LIRGeneratorNone : public LIRGener
     bool visitGuardObjectType(MGuardObjectType *ins) { MOZ_CRASH(); }
     bool visitAsmJSUnsignedToDouble(MAsmJSUnsignedToDouble *ins) { MOZ_CRASH(); }
     bool visitAsmJSUnsignedToFloat32(MAsmJSUnsignedToFloat32 *ins) { MOZ_CRASH(); }
     bool visitAsmJSLoadHeap(MAsmJSLoadHeap *ins) { MOZ_CRASH(); }
     bool visitAsmJSStoreHeap(MAsmJSStoreHeap *ins) { MOZ_CRASH(); }
     bool visitAsmJSLoadFuncPtr(MAsmJSLoadFuncPtr *ins) { MOZ_CRASH(); }
     bool visitStoreTypedArrayElementStatic(MStoreTypedArrayElementStatic *ins) { MOZ_CRASH(); }
     bool visitForkJoinGetSlice(MForkJoinGetSlice *ins) { MOZ_CRASH(); }
+    bool visitAtomicTypedArrayElementBinop(MAtomicTypedArrayElementBinop *ins) { MOZ_CRASH(); }
+    bool visitCompareExchangeTypedArrayElement(MCompareExchangeTypedArrayElement *ins) { MOZ_CRASH(); }
 
     LTableSwitch *newLTableSwitch(LAllocation, LDefinition, MTableSwitch *) { MOZ_CRASH(); }
     LTableSwitchV *newLTableSwitchV(MTableSwitch *) { MOZ_CRASH(); }
     bool visitSimdTernaryBitwise(MSimdTernaryBitwise *ins) { MOZ_CRASH(); }
     bool visitSimdSplatX4(MSimdSplatX4 *ins) { MOZ_CRASH(); }
     bool visitSimdValueX4(MSimdValueX4 *lir) { MOZ_CRASH(); }
 };
 
--- a/js/src/jit/none/MacroAssembler-none.h
+++ b/js/src/jit/none/MacroAssembler-none.h
@@ -291,16 +291,47 @@ class MacroAssemblerNone : public Assemb
     template <typename T, typename S> void storeUnalignedInt32x4(T, S) { MOZ_CRASH(); }
     template <typename T, typename S> void storeAlignedFloat32x4(T, S) { MOZ_CRASH(); }
     template <typename T, typename S> void storeUnalignedFloat32x4(T, S) { MOZ_CRASH(); }
     template <typename T, typename S> void store8(T, S) { MOZ_CRASH(); }
     template <typename T, typename S> void store16(T, S) { MOZ_CRASH(); }
 
     template <typename T> void computeEffectiveAddress(T, Register) { MOZ_CRASH(); }
 
+    template <typename T> void compareExchange8SignExtend(const T &mem, Register oldval, Register newval, Register output) { MOZ_CRASH(); }
+    template <typename T> void compareExchange8ZeroExtend(const T &mem, Register oldval, Register newval, Register output) { MOZ_CRASH(); }
+    template <typename T> void compareExchange16SignExtend(const T &mem, Register oldval, Register newval, Register output) { MOZ_CRASH(); }
+    template <typename T> void compareExchange16ZeroExtend(const T &mem, Register oldval, Register newval, Register output) { MOZ_CRASH(); }
+    template <typename T> void compareExchange32(const T &mem, Register oldval, Register newval, Register output) { MOZ_CRASH(); }
+    template <typename T, typename S> void atomicFetchAdd8SignExtend(const T &value, const S &mem, Register output) { MOZ_CRASH(); }
+    template <typename T, typename S> void atomicFetchAdd8ZeroExtend(const T &value, const S &mem, Register output) { MOZ_CRASH(); }
+    template <typename T, typename S> void atomicFetchAdd16SignExtend(const T &value, const S &mem, Register output) { MOZ_CRASH(); }
+    template <typename T, typename S> void atomicFetchAdd16ZeroExtend(const T &value, const S &mem, Register output) { MOZ_CRASH(); }
+    template <typename T, typename S> void atomicFetchAdd32(const T &value, const S &mem, Register output) { MOZ_CRASH(); }
+    template <typename T, typename S> void atomicFetchSub8SignExtend(const T &value, const S &mem, Register output) { MOZ_CRASH(); }
+    template <typename T, typename S> void atomicFetchSub8ZeroExtend(const T &value, const S &mem, Register output) { MOZ_CRASH(); }
+    template <typename T, typename S> void atomicFetchSub16SignExtend(const T &value, const S &mem, Register output) { MOZ_CRASH(); }
+    template <typename T, typename S> void atomicFetchSub16ZeroExtend(const T &value, const S &mem, Register output) { MOZ_CRASH(); }
+    template <typename T, typename S> void atomicFetchSub32(const T &value, const S &mem, Register output) { MOZ_CRASH(); }
+    template <typename T, typename S> void atomicFetchAnd8SignExtend(const T &value, const S &mem, Register temp, Register output) { MOZ_CRASH(); }
+    template <typename T, typename S> void atomicFetchAnd8ZeroExtend(const T &value, const S &mem, Register temp, Register output) { MOZ_CRASH(); }
+    template <typename T, typename S> void atomicFetchAnd16SignExtend(const T &value, const S &mem, Register temp, Register output) { MOZ_CRASH(); }
+    template <typename T, typename S> void atomicFetchAnd16ZeroExtend(const T &value, const S &mem, Register temp, Register output) { MOZ_CRASH(); }
+    template <typename T, typename S> void atomicFetchAnd32(const T &value, const S &mem, Register temp, Register output) { MOZ_CRASH(); }
+    template <typename T, typename S> void atomicFetchOr8SignExtend(const T &value, const S &mem, Register temp, Register output) { MOZ_CRASH(); }
+    template <typename T, typename S> void atomicFetchOr8ZeroExtend(const T &value, const S &mem, Register temp, Register output) { MOZ_CRASH(); }
+    template <typename T, typename S> void atomicFetchOr16SignExtend(const T &value, const S &mem, Register temp, Register output) { MOZ_CRASH(); }
+    template <typename T, typename S> void atomicFetchOr16ZeroExtend(const T &value, const S &mem, Register temp, Register output) { MOZ_CRASH(); }
+    template <typename T, typename S> void atomicFetchOr32(const T &value, const S &mem, Register temp, Register output) { MOZ_CRASH(); }
+    template <typename T, typename S> void atomicFetchXor8SignExtend(const T &value, const S &mem, Register temp, Register output) { MOZ_CRASH(); }
+    template <typename T, typename S> void atomicFetchXor8ZeroExtend(const T &value, const S &mem, Register temp, Register output) { MOZ_CRASH(); }
+    template <typename T, typename S> void atomicFetchXor16SignExtend(const T &value, const S &mem, Register temp, Register output) { MOZ_CRASH(); }
+    template <typename T, typename S> void atomicFetchXor16ZeroExtend(const T &value, const S &mem, Register temp, Register output) { MOZ_CRASH(); }
+    template <typename T, typename S> void atomicFetchXor32(const T &value, const S &mem, Register temp, Register output) { MOZ_CRASH(); }
+
     void clampIntToUint8(Register) { MOZ_CRASH(); }
 
     Register splitTagForTest(ValueOperand) { MOZ_CRASH(); }
 
     template <typename T> void branchTestUndefined(Condition, T, Label *) { MOZ_CRASH(); }
     template <typename T> void branchTestInt32(Condition, T, Label *) { MOZ_CRASH(); }
     template <typename T> void branchTestBoolean(Condition, T, Label *) { MOZ_CRASH(); }
     template <typename T> void branchTestDouble(Condition, T, Label *) { MOZ_CRASH(); }
--- a/js/src/jit/shared/Assembler-x86-shared.h
+++ b/js/src/jit/shared/Assembler-x86-shared.h
@@ -624,28 +624,46 @@ class AssemblerX86Shared : public Assemb
             break;
           case Operand::MEM_SCALE:
             masm.movzbl_mr(src.disp(), src.base(), src.index(), src.scale(), dest.code());
             break;
           default:
             MOZ_CRASH("unexpected operand kind");
         }
     }
+    void movsbl(Register src, Register dest) {
+        masm.movsbl_rr(src.code(), dest.code());
+    }
     void movsbl(const Operand &src, Register dest) {
         switch (src.kind()) {
           case Operand::MEM_REG_DISP:
             masm.movsbl_mr(src.disp(), src.base(), dest.code());
             break;
           case Operand::MEM_SCALE:
             masm.movsbl_mr(src.disp(), src.base(), src.index(), src.scale(), dest.code());
             break;
           default:
             MOZ_CRASH("unexpected operand kind");
         }
     }
+    void movb(const Operand &src, Register dest) {
+        switch (src.kind()) {
+          case Operand::MEM_REG_DISP:
+            masm.movb_mr(src.disp(), src.base(), dest.code());
+            break;
+          case Operand::MEM_SCALE:
+            masm.movb_mr(src.disp(), src.base(), src.index(), src.scale(), dest.code());
+            break;
+          default:
+            MOZ_CRASH("unexpected operand kind");
+        }
+    }
+    void movb(Imm32 src, Register dest) {
+        masm.movb_i8r(src.value & 255, dest.code());
+    }
     void movb(Register src, const Operand &dest) {
         switch (dest.kind()) {
           case Operand::MEM_REG_DISP:
             masm.movb_rm(src.code(), dest.disp(), dest.base());
             break;
           case Operand::MEM_SCALE:
             masm.movb_rm(src.code(), dest.disp(), dest.base(), dest.index(), dest.scale());
             break;
@@ -678,16 +696,24 @@ class AssemblerX86Shared : public Assemb
             break;
           default:
             MOZ_CRASH("unexpected operand kind");
         }
     }
     void movzwl(Register src, Register dest) {
         masm.movzwl_rr(src.code(), dest.code());
     }
+    void movw(const Operand &src, Register dest) {
+        masm.prefix_16_for_32();
+        movl(src, dest);
+    }
+    void movw(Imm32 src, Register dest) {
+        masm.prefix_16_for_32();
+        movl(src, dest);
+    }
     void movw(Register src, const Operand &dest) {
         switch (dest.kind()) {
           case Operand::MEM_REG_DISP:
             masm.movw_rm(src.code(), dest.disp(), dest.base());
             break;
           case Operand::MEM_SCALE:
             masm.movw_rm(src.code(), dest.disp(), dest.base(), dest.index(), dest.scale());
             break;
@@ -702,16 +728,19 @@ class AssemblerX86Shared : public Assemb
             break;
           case Operand::MEM_SCALE:
             masm.movw_i16m(src.value, dest.disp(), dest.base(), dest.index(), dest.scale());
             break;
           default:
             MOZ_CRASH("unexpected operand kind");
         }
     }
+    void movswl(Register src, Register dest) {
+        masm.movswl_rr(src.code(), dest.code());
+    }
     void movswl(const Operand &src, Register dest) {
         switch (src.kind()) {
           case Operand::MEM_REG_DISP:
             masm.movswl_mr(src.disp(), src.base(), dest.code());
             break;
           case Operand::MEM_SCALE:
             masm.movswl_mr(src.disp(), src.base(), src.index(), src.scale(), dest.code());
             break;
@@ -916,19 +945,17 @@ class AssemblerX86Shared : public Assemb
             MOZ_CRASH("unexpected operand kind");
         }
     }
 
     void breakpoint() {
         masm.int3();
     }
 
-#ifdef DEBUG
     static bool HasSSE2() { return CPUInfo::IsSSE2Present(); }
-#endif
     static bool HasSSE3() { return CPUInfo::IsSSE3Present(); }
     static bool HasSSE41() { return CPUInfo::IsSSE41Present(); }
     static bool SupportsFloatingPoint() { return CPUInfo::IsSSE2Present(); }
     static bool SupportsSimd() { return CPUInfo::IsSSE2Present(); }
 
     // The below cmpl methods switch the lhs and rhs when it invokes the
     // macroassembler to conform with intel standard.  When calling this
     // function put the left operand on the left as you would expect.
@@ -1055,16 +1082,22 @@ class AssemblerX86Shared : public Assemb
             break;
           case Operand::MEM_ADDRESS32:
             masm.addl_im(imm.value, op.address());
             break;
           default:
             MOZ_CRASH("unexpected operand kind");
         }
     }
+    // Note, lock_addl() is used for a memory barrier on non-SSE2 systems.
+    // Do not optimize, replace by XADDL, or similar.
+    void lock_addl(Imm32 imm, const Operand &op) {
+        masm.prefix_lock();
+        addl(imm, op);
+    }
     void subl(Imm32 imm, Register dest) {
         masm.subl_ir(imm.value, dest.code());
     }
     void subl(Imm32 imm, const Operand &op) {
         switch (op.kind()) {
           case Operand::REG:
             masm.subl_ir(imm.value, op.reg());
             break;
@@ -1306,34 +1339,79 @@ class AssemblerX86Shared : public Assemb
             MOZ_CRASH("unexpected operand kind");
         }
     }
     void lock_decl(const Operand &op) {
         masm.prefix_lock();
         decl(op);
     }
 
-    void lock_cmpxchg32(Register src, const Operand &op) {
+    void lock_cmpxchg8(Register src, const Operand &mem) {
+        masm.prefix_lock();
+        switch (mem.kind()) {
+          case Operand::MEM_REG_DISP:
+            masm.cmpxchg8(src.code(), mem.disp(), mem.base());
+            break;
+          case Operand::MEM_SCALE:
+            masm.cmpxchg8(src.code(), mem.disp(), mem.base(), mem.index(), mem.scale());
+            break;
+          default:
+            MOZ_CRASH("unexpected operand kind");
+        }
+    }
+    void lock_cmpxchg16(Register src, const Operand &mem) {
         masm.prefix_lock();
-        switch (op.kind()) {
+        switch (mem.kind()) {
           case Operand::MEM_REG_DISP:
-            masm.cmpxchg32(src.code(), op.disp(), op.base());
+            masm.cmpxchg16(src.code(), mem.disp(), mem.base());
+            break;
+          case Operand::MEM_SCALE:
+            masm.cmpxchg16(src.code(), mem.disp(), mem.base(), mem.index(), mem.scale());
+            break;
+          default:
+            MOZ_CRASH("unexpected operand kind");
+        }
+    }
+    void lock_cmpxchg32(Register src, const Operand &mem) {
+        masm.prefix_lock();
+        switch (mem.kind()) {
+          case Operand::MEM_REG_DISP:
+            masm.cmpxchg32(src.code(), mem.disp(), mem.base());
+            break;
+          case Operand::MEM_SCALE:
+            masm.cmpxchg32(src.code(), mem.disp(), mem.base(), mem.index(), mem.scale());
             break;
           default:
             MOZ_CRASH("unexpected operand kind");
         }
     }
 
-    void xaddl(Register srcdest, const Operand &mem) {
+    void lock_xaddb(Register srcdest, const Operand &mem) {
         switch (mem.kind()) {
           case Operand::MEM_REG_DISP:
-            masm.xaddl_rm(srcdest.code(), mem.disp(), mem.base());
+            masm.lock_xaddb_rm(srcdest.code(), mem.disp(), mem.base());
             break;
           case Operand::MEM_SCALE:
-            masm.xaddl_rm(srcdest.code(), mem.disp(), mem.base(), mem.index(), mem.scale());
+            masm.lock_xaddb_rm(srcdest.code(), mem.disp(), mem.base(), mem.index(), mem.scale());
+            break;
+          default:
+            MOZ_CRASH("unexpected operand kind");
+        }
+    }
+    void lock_xaddw(Register srcdest, const Operand &mem) {
+        masm.prefix_16_for_32();
+        lock_xaddl(srcdest, mem);
+    }
+    void lock_xaddl(Register srcdest, const Operand &mem) {
+        switch (mem.kind()) {
+          case Operand::MEM_REG_DISP:
+            masm.lock_xaddl_rm(srcdest.code(), mem.disp(), mem.base());
+            break;
+          case Operand::MEM_SCALE:
+            masm.lock_xaddl_rm(srcdest.code(), mem.disp(), mem.base(), mem.index(), mem.scale());
             break;
           default:
             MOZ_CRASH("unexpected operand kind");
         }
     }
 
     void push(const Imm32 imm) {
         masm.push_i32(imm.value);
--- a/js/src/jit/shared/BaseAssembler-x86-shared.h
+++ b/js/src/jit/shared/BaseAssembler-x86-shared.h
@@ -211,26 +211,31 @@ public:
         RoundToNearest = 0x0,
         RoundDown      = 0x1,
         RoundUp        = 0x2,
         RoundToZero    = 0x3
     } RoundingMode;
 
 private:
     typedef enum {
+        OP_ADD_EbGb                     = 0x00,
         OP_ADD_EvGv                     = 0x01,
         OP_ADD_GvEv                     = 0x03,
+        OP_OR_EbGb                      = 0x08,
         OP_OR_EvGv                      = 0x09,
         OP_OR_GvEv                      = 0x0B,
         OP_2BYTE_ESCAPE                 = 0x0F,
+        OP_AND_EbGb                     = 0x20,
         OP_AND_EvGv                     = 0x21,
         OP_AND_GvEv                     = 0x23,
+        OP_SUB_EbGb                     = 0x28,
         OP_SUB_EvGv                     = 0x29,
         OP_SUB_GvEv                     = 0x2B,
         PRE_PREDICT_BRANCH_NOT_TAKEN    = 0x2E,
+        OP_XOR_EbGb                     = 0x30,
         OP_XOR_EvGv                     = 0x31,
         OP_XOR_GvEv                     = 0x33,
         OP_CMP_EvGv                     = 0x39,
         OP_CMP_GvEv                     = 0x3B,
         OP_CMP_EAXIv                    = 0x3D,
 #ifdef JS_CODEGEN_X64
         PRE_REX                         = 0x40,
 #endif
@@ -250,16 +255,17 @@ private:
         OP_GROUP1_EbIb                  = 0x80,
         OP_GROUP1_EvIz                  = 0x81,
         OP_GROUP1_EvIb                  = 0x83,
         OP_TEST_EbGb                    = 0x84,
         OP_TEST_EvGv                    = 0x85,
         OP_XCHG_EvGv                    = 0x87,
         OP_MOV_EbGv                     = 0x88,
         OP_MOV_EvGv                     = 0x89,
+        OP_MOV_GvEb                     = 0x8A,
         OP_MOV_GvEv                     = 0x8B,
         OP_LEA                          = 0x8D,
         OP_GROUP1A_Ev                   = 0x8F,
         OP_NOP                          = 0x90,
         OP_PUSHFLAGS                    = 0x9C,
         OP_POPFLAGS                     = 0x9D,
         OP_CDQ                          = 0x99,
         OP_MOV_EAXOv                    = 0xA1,
@@ -344,23 +350,26 @@ private:
         OP2_PSRLD_UdqIb     = 0x72,
         OP2_PSRLDQ_Vd       = 0x73,
         OP2_PCMPEQW         = 0x75,
         OP2_PCMPEQD_VdqWdq  = 0x76,
         OP2_MOVD_EdVd       = 0x7E,
         OP2_MOVDQ_WdqVdq    = 0x7F,
         OP2_JCC_rel32       = 0x80,
         OP_SETCC            = 0x90,
+        OP_FENCE            = 0xAE,
         OP2_IMUL_GvEv       = 0xAF,
+        OP2_CMPXCHG_GvEb    = 0xB0,
         OP2_CMPXCHG_GvEw    = 0xB1,
         OP2_BSR_GvEv        = 0xBD,
         OP2_MOVSX_GvEb      = 0xBE,
         OP2_MOVSX_GvEw      = 0xBF,
         OP2_MOVZX_GvEb      = 0xB6,
         OP2_MOVZX_GvEw      = 0xB7,
+        OP2_XADD_EbGb       = 0xC0,
         OP2_XADD_EvGv       = 0xC1,
         OP2_CMPPS_VpsWps    = 0xC2,
         OP2_PEXTRW_GdUdIb   = 0xC5,
         OP2_SHUFPS_VpsWpsIb = 0xC6,
         OP2_PSRLD_VdqWdq    = 0xD2,
         OP2_PSRAD_VdqWdq    = 0xE2,
         OP2_PXORDQ_VdqWdq   = 0xEF,
         OP2_PSLLD_VdqWdq    = 0xF2,
@@ -678,25 +687,42 @@ public:
             m_formatter.oneByteOp(OP_GROUP1_EvIb, GROUP1_OP_ADD, addr);
             m_formatter.immediate8(imm);
         } else {
             m_formatter.oneByteOp(OP_GROUP1_EvIz, GROUP1_OP_ADD, addr);
             m_formatter.immediate32(imm);
         }
     }
 
-    void xaddl_rm(RegisterID srcdest, int offset, RegisterID base)
+    void lock_xaddb_rm(RegisterID srcdest, int offset, RegisterID base)
+    {
+        spew("lock xaddl %s, %s0x%x(%s)",
+            nameIReg(1, srcdest), PRETTY_PRINT_OFFSET(offset), nameIReg(base));
+        m_formatter.oneByteOp(PRE_LOCK);
+        m_formatter.twoByteOp(OP2_XADD_EbGb, srcdest, base, offset);
+    }
+
+    void lock_xaddb_rm(RegisterID srcdest, int offset, RegisterID base, RegisterID index, int scale)
+    {
+        spew("lock xaddl %s, %s0x%x(%s,%s,%d)",
+            nameIReg(1, srcdest), PRETTY_PRINT_OFFSET(offset),
+            nameIReg(base), nameIReg(index), 1<<scale);
+        m_formatter.oneByteOp(PRE_LOCK);
+        m_formatter.twoByteOp(OP2_XADD_EbGb, srcdest, base, index, scale, offset);
+    }
+
+    void lock_xaddl_rm(RegisterID srcdest, int offset, RegisterID base)
     {
         spew("lock xaddl %s, %s0x%x(%s)",
             nameIReg(4,srcdest), PRETTY_PRINT_OFFSET(offset), nameIReg(base));
         m_formatter.oneByteOp(PRE_LOCK);
         m_formatter.twoByteOp(OP2_XADD_EvGv, srcdest, base, offset);
     }
 
-    void xaddl_rm(RegisterID srcdest, int offset, RegisterID base, RegisterID index, int scale)
+    void lock_xaddl_rm(RegisterID srcdest, int offset, RegisterID base, RegisterID index, int scale)
     {
         spew("lock xaddl %s, %s0x%x(%s,%s,%d)",
             nameIReg(4, srcdest), PRETTY_PRINT_OFFSET(offset),
             nameIReg(base), nameIReg(index), 1<<scale);
         m_formatter.oneByteOp(PRE_LOCK);
         m_formatter.twoByteOp(OP2_XADD_EvGv, srcdest, base, index, scale, offset);
     }
 
@@ -1422,37 +1448,76 @@ public:
     }
 
     void prefix_lock()
     {
         spew("lock");
         m_formatter.oneByteOp(PRE_LOCK);
     }
 
+    void prefix_16_for_32()
+    {
+        m_formatter.prefix(PRE_OPERAND_SIZE);
+    }
+
     void incl_m32(int offset, RegisterID base)
     {
         spew("incl       %s0x%x(%s)", PRETTY_PRINT_OFFSET(offset), nameIReg(base));
         m_formatter.oneByteOp(OP_GROUP5_Ev, GROUP5_OP_INC, base, offset);
     }
 
     void decl_m32(int offset, RegisterID base)
     {
         spew("decl       %s0x%x(%s)", PRETTY_PRINT_OFFSET(offset), nameIReg(base));
         m_formatter.oneByteOp(OP_GROUP5_Ev, GROUP5_OP_DEC, base, offset);
     }
 
+    // Note that CMPXCHG performs comparison against REG = %al/%ax/%eax.
+    // If %REG == [%base+offset], then %src -> [%base+offset].
+    // Otherwise, [%base+offset] -> %REG.
+    // For the 8-bit operations src must also be an 8-bit register.
+
+    void cmpxchg8(RegisterID src, int offset, RegisterID base)
+    {
+        spew("cmpxchg8    %s, %s0x%x(%s)",
+             nameIReg(src), PRETTY_PRINT_OFFSET(offset), nameIReg(base));
+        m_formatter.twoByteOp(OP2_CMPXCHG_GvEb, src, base, offset);
+    }
+    void cmpxchg8(RegisterID src, int offset, RegisterID base, RegisterID index, int scale)
+    {
+        spew("cmpxchg8    %s, %s0x%x(%s,%s,%d)",
+             nameIReg(src), PRETTY_PRINT_OFFSET(offset), nameIReg(base), nameIReg(index), 1<<scale);
+        m_formatter.twoByteOp(OP2_CMPXCHG_GvEb, src, base, index, scale, offset);
+    }
+    void cmpxchg16(RegisterID src, int offset, RegisterID base)
+    {
+        spew("cmpxchg16    %s, %s0x%x(%s)",
+             nameIReg(src), PRETTY_PRINT_OFFSET(offset), nameIReg(base));
+        m_formatter.prefix(PRE_OPERAND_SIZE);
+        m_formatter.twoByteOp(OP2_CMPXCHG_GvEw, src, base, offset);
+    }
+    void cmpxchg16(RegisterID src, int offset, RegisterID base, RegisterID index, int scale)
+    {
+        spew("cmpxchg16    %s, %s0x%x(%s,%s,%d)",
+             nameIReg(src), PRETTY_PRINT_OFFSET(offset), nameIReg(base), nameIReg(index), 1<<scale);
+        m_formatter.prefix(PRE_OPERAND_SIZE);
+        m_formatter.twoByteOp(OP2_CMPXCHG_GvEw, src, base, index, scale, offset);
+    }
     void cmpxchg32(RegisterID src, int offset, RegisterID base)
     {
-        // Note that 32-bit CMPXCHG performs comparison against %eax.
-        // If %eax == [%base+offset], then %src -> [%base+offset].
-        // Otherwise, [%base+offset] -> %eax.
-        spew("cmpxchg    %s, %s0x%x(%s)",
+        spew("cmpxchg32    %s, %s0x%x(%s)",
              nameIReg(src), PRETTY_PRINT_OFFSET(offset), nameIReg(base));
         m_formatter.twoByteOp(OP2_CMPXCHG_GvEw, src, base, offset);
     }
+    void cmpxchg32(RegisterID src, int offset, RegisterID base, RegisterID index, int scale)
+    {
+        spew("cmpxchg32    %s, %s0x%x(%s,%s,%d)",
+             nameIReg(src), PRETTY_PRINT_OFFSET(offset), nameIReg(base), nameIReg(index), 1<<scale);
+        m_formatter.twoByteOp(OP2_CMPXCHG_GvEw, src, base, index, scale, offset);
+    }
 
 
     // Comparisons:
 
     void cmpl_rr(RegisterID src, RegisterID dst)
     {
         spew("cmpl       %s, %s",
              nameIReg(4, src), nameIReg(4, dst));
@@ -1980,16 +2045,24 @@ public:
     void movl_i32r(int imm, RegisterID dst)
     {
         spew("movl       $0x%x, %s",
              imm, nameIReg(4, dst));
         m_formatter.oneByteOp(OP_MOV_EAXIv, dst);
         m_formatter.immediate32(imm);
     }
 
+    void movb_i8r(int imm, RegisterID reg)
+    {
+        spew("movb       $0x%x, %s",
+             imm, nameIReg(1, reg));
+        m_formatter.oneByteOp(OP_MOV_EbGv, reg);
+        m_formatter.immediate8(imm);
+    }
+
     void movb_i8m(int imm, int offset, RegisterID base)
     {
         spew("movb       $0x%x, %s0x%x(%s)",
              imm, PRETTY_PRINT_OFFSET(offset), nameIReg(base));
         m_formatter.oneByteOp(OP_GROUP11_EvIb, GROUP11_MOV, base, offset);
         m_formatter.immediate8(imm);
     }
 
@@ -2270,16 +2343,30 @@ public:
 
     void movb_rm(RegisterID src, const void* addr)
     {
         spew("movb       %s, %p",
              nameIReg(1, src), addr);
         m_formatter.oneByteOp8(OP_MOV_EbGv, src, addr);
     }
 
+    void movb_mr(int offset, RegisterID base, RegisterID dst)
+    {
+        spew("movb       %s0x%x(%s), %s",
+             PRETTY_PRINT_OFFSET(offset), nameIReg(base), nameIReg(1, dst));
+        m_formatter.oneByteOp(OP_MOV_GvEb, dst, base, offset);
+    }
+
+    void movb_mr(int offset, RegisterID base, RegisterID index, int scale, RegisterID dst)
+    {
+        spew("movb       %d(%s,%s,%d), %s",
+             offset, nameIReg(base), nameIReg(index), 1<<scale, nameIReg(1, dst));
+        m_formatter.oneByteOp(OP_MOV_GvEb, dst, base, index, scale, offset);
+    }
+
     void movzbl_mr(int offset, RegisterID base, RegisterID dst)
     {
         spew("movzbl     %s0x%x(%s), %s",
              PRETTY_PRINT_OFFSET(offset), nameIReg(base), nameIReg(4, dst));
         m_formatter.twoByteOp(OP2_MOVZX_GvEb, dst, base, offset);
     }
 
     void movzbl_mr_disp32(int offset, RegisterID base, RegisterID dst)
@@ -2298,16 +2385,23 @@ public:
 
     void movzbl_mr(const void* addr, RegisterID dst)
     {
         spew("movzbl     %p, %s",
              addr, nameIReg(dst));
         m_formatter.twoByteOp(OP2_MOVZX_GvEb, dst, addr);
     }
 
+    void movsbl_rr(RegisterID src, RegisterID dst)
+    {
+        spew("movsbl     %s, %s",
+             nameIReg(1,src), nameIReg(4,dst));
+        m_formatter.twoByteOp8_movx(OP2_MOVSX_GvEb, dst, src);
+    }
+
     void movsbl_mr(int offset, RegisterID base, RegisterID dst)
     {
         spew("movsbl     %s0x%x(%s), %s",
              PRETTY_PRINT_OFFSET(offset), nameIReg(base), nameIReg(4, dst));
         m_formatter.twoByteOp(OP2_MOVSX_GvEb, dst, base, offset);
     }
 
     void movsbl_mr_disp32(int offset, RegisterID base, RegisterID dst)
@@ -2361,16 +2455,23 @@ public:
 
     void movzwl_mr(const void* addr, RegisterID dst)
     {
         spew("movzwl     %p, %s",
              addr, nameIReg(4, dst));
         m_formatter.twoByteOp(OP2_MOVZX_GvEw, dst, addr);
     }
 
+    void movswl_rr(RegisterID src, RegisterID dst)
+    {
+        spew("movswl     %s, %s",
+             nameIReg(2, src), nameIReg(4, dst));
+        m_formatter.twoByteOp(OP2_MOVSX_GvEw, dst, src);
+    }
+
     void movswl_mr(int offset, RegisterID base, RegisterID dst)
     {
         spew("movswl     %s0x%x(%s), %s",
              PRETTY_PRINT_OFFSET(offset), nameIReg(base), nameIReg(4, dst));
         m_formatter.twoByteOp(OP2_MOVSX_GvEw, dst, base, offset);
     }
 
     void movswl_mr_disp32(int offset, RegisterID base, RegisterID dst)
@@ -3898,16 +3999,21 @@ public:
 
     void popa()
     {
         spew("popa");
         m_formatter.oneByteOp(OP_POPA);
     }
 #endif
 
+    void mfence() {
+        spew("mfence");
+        m_formatter.twoByteOp(OP_FENCE, (int)6, (RegisterID)0);
+    }
+
     // Assembler admin methods:
 
     JmpDst label()
     {
         JmpDst r = JmpDst(m_formatter.size());
         spew("#label     ((%d))", r.m_offset);
         return r;
     }
--- a/js/src/jit/shared/CodeGenerator-x86-shared.cpp
+++ b/js/src/jit/shared/CodeGenerator-x86-shared.cpp
@@ -3076,10 +3076,18 @@ JitRuntime::generateForkJoinGetSliceStub
 
 #ifdef JS_ION_PERF
     writePerfSpewerJitCodeProfile(code, "ForkJoinGetSliceStub");
 #endif
 
     return code;
 }
 
+bool
+CodeGeneratorX86Shared::visitMemoryBarrier(LMemoryBarrier *ins)
+{
+    if (ins->type() & MembarStoreLoad)
+        masm.storeLoadFence();
+    return true;
+}
+
 } // namespace jit
 } // namespace js
--- a/js/src/jit/shared/CodeGenerator-x86-shared.h
+++ b/js/src/jit/shared/CodeGenerator-x86-shared.h
@@ -192,16 +192,17 @@ class CodeGeneratorX86Shared : public Co
     virtual bool visitRound(LRound *lir);
     virtual bool visitRoundF(LRoundF *lir);
     virtual bool visitGuardShape(LGuardShape *guard);
     virtual bool visitGuardObjectType(LGuardObjectType *guard);
     virtual bool visitGuardClass(LGuardClass *guard);
     virtual bool visitEffectiveAddress(LEffectiveAddress *ins);
     virtual bool visitUDivOrMod(LUDivOrMod *ins);
     virtual bool visitAsmJSPassStackArg(LAsmJSPassStackArg *ins);
+    virtual bool visitMemoryBarrier(LMemoryBarrier *ins);
 
     bool visitOutOfLineLoadTypedArrayOutOfBounds(OutOfLineLoadTypedArrayOutOfBounds *ool);
 
     bool visitForkJoinGetSlice(LForkJoinGetSlice *ins);
 
     bool visitNegI(LNegI *lir);
     bool visitNegD(LNegD *lir);
     bool visitNegF(LNegF *lir);
--- a/js/src/jit/shared/Lowering-x86-shared.cpp
+++ b/js/src/jit/shared/Lowering-x86-shared.cpp
@@ -345,16 +345,155 @@ LIRGeneratorX86Shared::visitForkJoinGetS
                           tempFixed(eax),
                           tempFixed(edx),
                           tempFixed(ForkJoinGetSliceReg_temp0),
                           tempFixed(ForkJoinGetSliceReg_temp1));
     return defineFixed(lir, ins, LAllocation(AnyRegister(ForkJoinGetSliceReg_output)));
 }
 
 bool
+LIRGeneratorX86Shared::visitCompareExchangeTypedArrayElement(MCompareExchangeTypedArrayElement *ins)
+{
+    MOZ_ASSERT(ins->arrayType() != Scalar::Float32);
+    MOZ_ASSERT(ins->arrayType() != Scalar::Float64);
+
+    MOZ_ASSERT(ins->elements()->type() == MIRType_Elements);
+    MOZ_ASSERT(ins->index()->type() == MIRType_Int32);
+
+    const LUse elements = useRegister(ins->elements());
+    const LAllocation index = useRegisterOrConstant(ins->index());
+
+    // Register allocation:
+    //
+    // If the target is an integer register then the target must be
+    // eax.
+    //
+    // If the target is a floating register then we need a temp at the
+    // lower level; that temp must be eax.
+    //
+    // oldval must be in a register.
+    //
+    // newval will need to be in a register.  If the source is a byte
+    // array then the newval must be a register that has a byte size:
+    // ebx, ecx, or edx, since eax is taken for the output in this
+    // case.
+    //
+    // Bug #1077036 describes some optimization opportunities.
+
+    bool fixedOutput = false;
+    LDefinition tempDef = LDefinition::BogusTemp();
+    LAllocation newval;
+    if (ins->arrayType() == Scalar::Uint32 && IsFloatingPointType(ins->type())) {
+        tempDef = tempFixed(eax);
+        newval = useRegister(ins->newval());
+    } else {
+        fixedOutput = true;
+        if (ins->isByteArray())
+            newval = useFixed(ins->newval(), ebx);
+        else
+            newval = useRegister(ins->newval());
+    }
+
+    // A register allocator limitation precludes 'useRegisterAtStart()' here.
+    const LAllocation oldval = useRegister(ins->oldval());
+
+    LCompareExchangeTypedArrayElement *lir =
+        new(alloc()) LCompareExchangeTypedArrayElement(elements, index, oldval, newval, tempDef);
+
+    return fixedOutput ? defineFixed(lir, ins, LAllocation(AnyRegister(eax))) : define(lir, ins);
+}
+
+bool
+LIRGeneratorX86Shared::visitAtomicTypedArrayElementBinop(MAtomicTypedArrayElementBinop *ins)
+{
+    MOZ_ASSERT(ins->arrayType() != Scalar::Uint8Clamped);
+    MOZ_ASSERT(ins->arrayType() != Scalar::Float32);
+    MOZ_ASSERT(ins->arrayType() != Scalar::Float64);
+
+    MOZ_ASSERT(ins->elements()->type() == MIRType_Elements);
+    MOZ_ASSERT(ins->index()->type() == MIRType_Int32);
+
+    const LUse elements = useRegister(ins->elements());
+    const LAllocation index = useRegisterOrConstant(ins->index());
+
+    // Register allocation:
+    //
+    // For ADD and SUB we'll use XADD:
+    //
+    //    movl       src, output
+    //    lock xaddl output, mem
+    //
+    // For the 8-bit variants XADD needs a byte register for the
+    // output only.
+    //
+    // For AND/OR/XOR we need to use a CMPXCHG loop:
+    //
+    //    movl          *mem, eax
+    // L: mov           eax, temp
+    //    andl          src, temp
+    //    lock cmpxchg  temp, mem  ; reads eax also
+    //    jnz           L
+    //    ; result in eax
+    //
+    // Note the placement of L, cmpxchg will update eax with *mem if
+    // *mem does not have the expected value, so reloading it at the
+    // top of the loop is redundant.
+    //
+    // If the array is not a uint32 array then:
+    //  - eax should be the output (one result of the cmpxchg)
+    //  - there is a temp, which must have a byte register if
+    //    the array has 1-byte elements elements
+    //
+    // If the array is a uint32 array then:
+    //  - eax is the first temp
+    //  - we also need a second temp
+    //
+    // For simplicity we force the 'value' into a byte register if the
+    // array has 1-byte elements, though that could be worked around.
+    //
+    // For simplicity we also choose fixed byte registers even when
+    // any available byte register would have been OK.
+    //
+    // There are optimization opportunities:
+    //  - when the result is unused, Bug #1077014.
+    //  - better register allocation and instruction selection, Bug #1077036.
+
+    bool bitOp = !(ins->operation() == AtomicFetchAddOp || ins->operation() == AtomicFetchSubOp);
+    bool fixedOutput = true;
+    LDefinition tempDef1 = LDefinition::BogusTemp();
+    LDefinition tempDef2 = LDefinition::BogusTemp();
+    LAllocation value;
+
+    if (ins->arrayType() == Scalar::Uint32 && IsFloatingPointType(ins->type())) {
+        value = useRegister(ins->value());
+        fixedOutput = false;
+        if (bitOp) {
+            tempDef1 = tempFixed(eax);
+            tempDef2 = temp();
+        } else {
+            tempDef1 = temp();
+        }
+    } else if (ins->isByteArray()) {
+        value = useFixed(ins->value(), ebx);
+        if (bitOp)
+            tempDef1 = tempFixed(ecx);
+    }
+    else {
+        value = useRegister(ins->value());
+        if (bitOp)
+            tempDef1 = temp();
+    }
+
+    LAtomicTypedArrayElementBinop *lir =
+        new(alloc()) LAtomicTypedArrayElementBinop(elements, index, value, tempDef1, tempDef2);
+
+    return fixedOutput ? defineFixed(lir, ins, LAllocation(AnyRegister(eax))) : define(lir, ins);
+}
+
+bool
 LIRGeneratorX86Shared::visitSimdTernaryBitwise(MSimdTernaryBitwise *ins)
 {
     MOZ_ASSERT(IsSimdType(ins->type()));
 
     if (ins->type() == MIRType_Int32x4 || ins->type() == MIRType_Float32x4) {
         LSimdSelect *lins = new(alloc()) LSimdSelect;
 
         // This must be useRegisterAtStart() because it is destroyed.
--- a/js/src/jit/shared/Lowering-x86-shared.h
+++ b/js/src/jit/shared/Lowering-x86-shared.h
@@ -50,14 +50,16 @@ class LIRGeneratorX86Shared : public LIR
     bool lowerConstantDouble(double d, MInstruction *ins);
     bool lowerConstantFloat32(float d, MInstruction *ins);
     bool lowerTruncateDToInt32(MTruncateToInt32 *ins);
     bool lowerTruncateFToInt32(MTruncateToInt32 *ins);
     bool visitForkJoinGetSlice(MForkJoinGetSlice *ins);
     bool visitSimdTernaryBitwise(MSimdTernaryBitwise *ins);
     bool visitSimdSplatX4(MSimdSplatX4 *ins);
     bool visitSimdValueX4(MSimdValueX4 *ins);
+    bool visitCompareExchangeTypedArrayElement(MCompareExchangeTypedArrayElement *ins);
+    bool visitAtomicTypedArrayElementBinop(MAtomicTypedArrayElementBinop *ins);
 };
 
 } // namespace jit
 } // namespace js
 
 #endif /* jit_shared_Lowering_x86_shared_h */
--- a/js/src/jit/shared/MacroAssembler-x86-shared.h
+++ b/js/src/jit/shared/MacroAssembler-x86-shared.h
@@ -188,20 +188,303 @@ class MacroAssemblerX86Shared : public A
         notl(reg);
     }
     void atomic_inc32(const Operand &addr) {
         lock_incl(addr);
     }
     void atomic_dec32(const Operand &addr) {
         lock_decl(addr);
     }
-    void atomic_cmpxchg32(Register src, const Operand &addr, Register dest) {
+    void atomic_cmpxchg8(Register newval, const Operand &addr, Register oldval_and_result) {
+        // %eax must be explicitly provided for calling clarity.
+        MOZ_ASSERT(oldval_and_result.code() == X86Registers::eax);
+        lock_cmpxchg8(newval, addr);
+    }
+    void atomic_cmpxchg16(Register newval, const Operand &addr, Register oldval_and_result) {
+        // %eax must be explicitly provided for calling clarity.
+        MOZ_ASSERT(oldval_and_result.code() == X86Registers::eax);
+        lock_cmpxchg16(newval, addr);
+    }
+    void atomic_cmpxchg32(Register newval, const Operand &addr, Register oldval_and_result) {
         // %eax must be explicitly provided for calling clarity.
-        MOZ_ASSERT(dest.code() == X86Registers::eax);
-        lock_cmpxchg32(src, addr);
+        MOZ_ASSERT(oldval_and_result.code() == X86Registers::eax);
+        lock_cmpxchg32(newval, addr);
+    }
+
+    template <typename T>
+    void atomicFetchAdd8SignExtend(Register src, const T &mem, Register temp, Register output) {
+        MOZ_ASSERT(output == eax);
+        if (src != output)
+            movl(src, output);
+        lock_xaddb(output, Operand(mem));
+        movsbl(output, output);
+    }
+
+    template <typename T>
+    void atomicFetchAdd8ZeroExtend(Register src, const T &mem, Register temp, Register output) {
+        MOZ_ASSERT(output == eax);
+        MOZ_ASSERT(temp == InvalidReg);
+        if (src != output)
+            movl(src, output);
+        lock_xaddb(output, Operand(mem));
+        movzbl(output, output);
+    }
+
+    template <typename T>
+    void atomicFetchAdd8SignExtend(Imm32 src, const T &mem, Register temp, Register output) {
+        MOZ_ASSERT(output == eax);
+        MOZ_ASSERT(temp == InvalidReg);
+        movb(src, output);
+        lock_xaddb(output, Operand(mem));
+        movsbl(output, output);
+    }
+
+    template <typename T>
+    void atomicFetchAdd8ZeroExtend(Imm32 src, const T &mem, Register temp, Register output) {
+        MOZ_ASSERT(output == eax);
+        MOZ_ASSERT(temp == InvalidReg);
+        movb(src, output);
+        lock_xaddb(output, Operand(mem));
+        movzbl(output, output);
+    }
+
+    template <typename T>
+    void atomicFetchAdd16SignExtend(Register src, const T &mem, Register temp, Register output) {
+        MOZ_ASSERT(temp == InvalidReg);
+        if (src != output)
+            movl(src, output);
+        lock_xaddw(output, Operand(mem));
+        movswl(output, output);
+    }
+
+    template <typename T>
+    void atomicFetchAdd16ZeroExtend(Register src, const T &mem, Register temp, Register output) {
+        MOZ_ASSERT(temp == InvalidReg);
+        if (src != output)
+            movl(src, output);
+        lock_xaddw(output, Operand(mem));
+        movzwl(output, output);
+    }
+
+    template <typename T>
+    void atomicFetchAdd16SignExtend(Imm32 src, const T &mem, Register temp, Register output) {
+        MOZ_ASSERT(temp == InvalidReg);
+        movl(src, output);
+        lock_xaddw(output, Operand(mem));
+        movswl(output, output);
+    }
+
+    template <typename T>
+    void atomicFetchAdd16ZeroExtend(Imm32 src, const T &mem, Register temp, Register output) {
+        MOZ_ASSERT(temp == InvalidReg);
+        movl(src, output);
+        lock_xaddw(output, Operand(mem));
+        movzwl(output, output);
+    }
+
+    template <typename T>
+    void atomicFetchAdd32(Register src, const T &mem, Register temp, Register output) {
+        MOZ_ASSERT(temp == InvalidReg);
+        if (src != output)
+            movl(src, output);
+        lock_xaddl(output, Operand(mem));
+    }
+
+    template <typename T>
+    void atomicFetchAdd32(Imm32 src, const T &mem, Register temp, Register output) {
+        MOZ_ASSERT(temp == InvalidReg);
+        movl(src, output);
+        lock_xaddl(output, Operand(mem));
+    }
+
+    template <typename T>
+    void atomicFetchSub8SignExtend(Register src, const T &mem, Register temp, Register output) {
+        MOZ_ASSERT(output == eax);
+        MOZ_ASSERT(temp == InvalidReg);
+        if (src != output)
+            movl(src, output);
+        negl(output);
+        lock_xaddb(output, Operand(mem));
+        movsbl(output, output);
+    }
+
+    template <typename T>
+    void atomicFetchSub8ZeroExtend(Register src, const T &mem, Register temp, Register output) {
+        MOZ_ASSERT(output == eax);
+        MOZ_ASSERT(temp == InvalidReg);
+        if (src != output)
+            movl(src, output);
+        negl(output);
+        lock_xaddb(output, Operand(mem));
+        movzbl(output, output);
+    }
+
+    template <typename T>
+    void atomicFetchSub8SignExtend(Imm32 src, const T &mem, Register temp, Register output) {
+        MOZ_ASSERT(output == eax);
+        MOZ_ASSERT(temp == InvalidReg);
+        movb(Imm32(-src.value), output);
+        lock_xaddb(output, Operand(mem));
+        movsbl(output, output);
+    }
+
+    template <typename T>
+    void atomicFetchSub8ZeroExtend(Imm32 src, const T &mem, Register temp, Register output) {
+        MOZ_ASSERT(output == eax);
+        MOZ_ASSERT(temp == InvalidReg);
+        movb(Imm32(-src.value), output);
+        lock_xaddb(output, Operand(mem));
+        movzbl(output, output);
+    }
+
+    template <typename T>
+    void atomicFetchSub16SignExtend(Register src, const T &mem, Register temp, Register output) {
+        MOZ_ASSERT(temp == InvalidReg);
+        if (src != output)
+            movl(src, output);
+        negl(output);
+        lock_xaddw(output, Operand(mem));
+        movswl(output, output);
+    }
+
+    template <typename T>
+    void atomicFetchSub16ZeroExtend(Register src, const T &mem, Register temp, Register output) {
+        MOZ_ASSERT(temp == InvalidReg);
+        if (src != output)
+            movl(src, output);
+        negl(output);
+        lock_xaddw(output, Operand(mem));
+        movzwl(output, output);
+    }
+
+    template <typename T>
+    void atomicFetchSub16SignExtend(Imm32 src, const T &mem, Register temp, Register output) {
+        MOZ_ASSERT(temp == InvalidReg);
+        movl(Imm32(-src.value), output);
+        lock_xaddw(output, Operand(mem));
+        movswl(output, output);
+    }
+
+    template <typename T>
+    void atomicFetchSub16ZeroExtend(Imm32 src, const T &mem, Register temp, Register output) {
+        MOZ_ASSERT(temp == InvalidReg);
+        movl(Imm32(-src.value), output);
+        lock_xaddw(output, Operand(mem));
+        movzwl(output, output);
+    }
+
+    template <typename T>
+    void atomicFetchSub32(Register src, const T &mem, Register temp, Register output) {
+        MOZ_ASSERT(temp == InvalidReg);
+        if (src != output)
+            movl(src, output);
+        negl(output);
+        lock_xaddl(output, Operand(mem));
+    }
+
+    template <typename T>
+    void atomicFetchSub32(Imm32 src, const T &mem, Register temp, Register output) {
+        movl(Imm32(-src.value), output);
+        lock_xaddl(output, Operand(mem));
+    }
+
+    // requires output == eax
+#define ATOMIC_BITOP_BODY(LOAD, OP, LOCK_CMPXCHG)        \
+        MOZ_ASSERT(output == eax); \
+        LOAD(Operand(mem), eax);  \
+        Label again;              \
+        bind(&again);             \
+        movl(eax, temp);          \
+        OP(src, temp);            \
+        LOCK_CMPXCHG(temp, Operand(mem)); \
+        j(NonZero, &again);
+
+    template <typename S, typename T>
+    void atomicFetchAnd8SignExtend(const S &src, const T &mem, Register temp, Register output) {
+        ATOMIC_BITOP_BODY(movb, andl, lock_cmpxchg8)
+        movsbl(eax, eax);
+    }
+    template <typename S, typename T>
+    void atomicFetchAnd8ZeroExtend(const S &src, const T &mem, Register temp, Register output) {
+        ATOMIC_BITOP_BODY(movb, andl, lock_cmpxchg8)
+        movzbl(eax, eax);
+    }
+    template <typename S, typename T>
+    void atomicFetchAnd16SignExtend(const S &src, const T &mem, Register temp, Register output) {
+        ATOMIC_BITOP_BODY(movw, andl, lock_cmpxchg16)
+        movswl(eax, eax);
+    }
+    template <typename S, typename T>
+    void atomicFetchAnd16ZeroExtend(const S &src, const T &mem, Register temp, Register output) {
+        ATOMIC_BITOP_BODY(movw, andl, lock_cmpxchg16)
+        movzwl(eax, eax);
+    }
+    template <typename S, typename T>
+    void atomicFetchAnd32(const S &src, const T &mem, Register temp, Register output) {
+        ATOMIC_BITOP_BODY(movl, andl, lock_cmpxchg32)
+    }
+
+    template <typename S, typename T>
+    void atomicFetchOr8SignExtend(const S &src, const T &mem, Register temp, Register output) {
+        ATOMIC_BITOP_BODY(movb, orl, lock_cmpxchg8)
+        movsbl(eax, eax);
+    }
+    template <typename S, typename T>
+    void atomicFetchOr8ZeroExtend(const S &src, const T &mem, Register temp, Register output) {
+        ATOMIC_BITOP_BODY(movb, orl, lock_cmpxchg8)
+        movzbl(eax, eax);
+    }
+    template <typename S, typename T>
+    void atomicFetchOr16SignExtend(const S &src, const T &mem, Register temp, Register output) {
+        ATOMIC_BITOP_BODY(movw, orl, lock_cmpxchg16)
+        movswl(eax, eax);
+    }
+    template <typename S, typename T>
+    void atomicFetchOr16ZeroExtend(const S &src, const T &mem, Register temp, Register output) {
+        ATOMIC_BITOP_BODY(movw, orl, lock_cmpxchg16)
+        movzwl(eax, eax);
+    }
+    template <typename S, typename T>
+    void atomicFetchOr32(const S &src, const T &mem, Register temp, Register output) {
+        ATOMIC_BITOP_BODY(movl, orl, lock_cmpxchg32)
+    }
+
+    template <typename S, typename T>
+    void atomicFetchXor8SignExtend(const S &src, const T &mem, Register temp, Register output) {
+        ATOMIC_BITOP_BODY(movb, xorl, lock_cmpxchg8)
+        movsbl(eax, eax);
+    }
+    template <typename S, typename T>
+    void atomicFetchXor8ZeroExtend(const S &src, const T &mem, Register temp, Register output) {
+        ATOMIC_BITOP_BODY(movb, xorl, lock_cmpxchg8)
+        movzbl(eax, eax);
+    }
+    template <typename S, typename T>
+    void atomicFetchXor16SignExtend(const S &src, const T &mem, Register temp, Register output) {
+        ATOMIC_BITOP_BODY(movw, xorl, lock_cmpxchg16)
+        movswl(eax, eax);
+    }
+    template <typename S, typename T>
+    void atomicFetchXor16ZeroExtend(const S &src, const T &mem, Register temp, Register output) {
+        ATOMIC_BITOP_BODY(movw, xorl, lock_cmpxchg16)
+        movzwl(eax, eax);
+    }
+    template <typename S, typename T>
+    void atomicFetchXor32(const S &src, const T &mem, Register temp, Register output) {
+        ATOMIC_BITOP_BODY(movl, xorl, lock_cmpxchg32)
+    }
+
+#undef ATOMIC_BITOP_BODY
+
+    void storeLoadFence() {
+        // This implementation follows Linux.
+        if (HasSSE2())
+            masm.mfence();
+        else
+            lock_addl(Imm32(0), Operand(Address(esp, 0)));
     }
 
     void branch16(Condition cond, Register lhs, Register rhs, Label *label) {
         cmpw(lhs, rhs);
         j(cond, label);
     }
     void branch32(Condition cond, const Operand &lhs, Register rhs, Label *label) {
         cmpl(lhs, rhs);
@@ -357,26 +640,60 @@ class MacroAssemblerX86Shared : public A
     }
     void load8SignExtend(const BaseIndex &src, Register dest) {
         movsbl(Operand(src), dest);
     }
     template <typename S, typename T>
     void store8(const S &src, const T &dest) {
         movb(src, Operand(dest));
     }
+    template <typename T>
+    void compareExchange8ZeroExtend(const T &mem, Register oldval, Register newval, Register output) {
+        MOZ_ASSERT(output == eax);
+        MOZ_ASSERT(newval == ebx || newval == ecx || newval == edx);
+        if (oldval != output)
+            movl(oldval, output);
+        lock_cmpxchg8(newval, Operand(mem));
+        movzbl(output, output);
+    }
+    template <typename T>
+    void compareExchange8SignExtend(const T &mem, Register oldval, Register newval, Register output) {
+        MOZ_ASSERT(output == eax);
+        MOZ_ASSERT(newval == ebx || newval == ecx || newval == edx);
+        if (oldval != output)
+            movl(oldval, output);
+        lock_cmpxchg8(newval, Operand(mem));
+        movsbl(output, output);
+    }
     void load16ZeroExtend(const Address &src, Register dest) {
         movzwl(Operand(src), dest);
     }
     void load16ZeroExtend(const BaseIndex &src, Register dest) {
         movzwl(Operand(src), dest);
     }
     template <typename S, typename T>
     void store16(const S &src, const T &dest) {
         movw(src, Operand(dest));
     }
+    template <typename T>
+    void compareExchange16ZeroExtend(const T &mem, Register oldval, Register newval, Register output) {
+        MOZ_ASSERT(output == eax);
+        if (oldval != output)
+            movl(oldval, output);
+        lock_cmpxchg16(newval, Operand(mem));
+        movzwl(output, output);
+    }
+    template <typename T>
+    void compareExchange16SignExtend(const T &mem, Register oldval, Register newval, Register output) {
+        MOZ_ASSERT(output == eax);
+        if (oldval != output)
+            movl(oldval, output);
+        lock_cmpxchg16(newval, Operand(mem));
+        movswl(output, output);
+    }
     void load16SignExtend(const Address &src, Register dest) {
         movswl(Operand(src), dest);
     }
     void load16SignExtend(const BaseIndex &src, Register dest) {
         movswl(Operand(src), dest);
     }
     void load32(const Address &address, Register dest) {
         movl(Operand(address), dest);
@@ -386,16 +703,23 @@ class MacroAssemblerX86Shared : public A
     }
     void load32(const Operand &src, Register dest) {
         movl(src, dest);
     }
     template <typename S, typename T>
     void store32(const S &src, const T &dest) {
         movl(src, Operand(dest));
     }
+    template <typename T>
+    void compareExchange32(const T &mem, Register oldval, Register newval, Register output) {
+        MOZ_ASSERT(output == eax);
+        if (oldval != output)
+            movl(oldval, output);
+        lock_cmpxchg32(newval, Operand(mem));
+    }
     template <typename S, typename T>
     void store32_NoSecondScratch(const S &src, const T &dest) {
         store32(src, dest);
     }
     void loadDouble(const Address &src, FloatRegister dest) {
         movsd(src, dest);
     }
     void loadDouble(const BaseIndex &src, FloatRegister dest) {