Bug 1202650 - add a flagTemp on ARM. r=nbp.
authorLars T Hansen <lhansen@mozilla.com>
Mon, 14 Sep 2015 11:37:39 +0200
changeset 294901 61ab9f5612e481f32b6bb579b4784fe4c82b98e6
parent 294900 53096799445004f8b1c47cc797be5f38b5e1876b
child 294902 839bd50e595038bf5b445c88e256f520ee3ffe32
push id5245
push userraliiev@mozilla.com
push dateThu, 29 Oct 2015 11:30:51 +0000
treeherdermozilla-beta@dac831dc1bd0 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersnbp
bugs1202650
milestone43.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1202650 - add a flagTemp on ARM. r=nbp.
js/src/jit/arm/Assembler-arm.cpp
js/src/jit/arm/Assembler-arm.h
js/src/jit/arm/CodeGenerator-arm.cpp
js/src/jit/arm/CodeGenerator-arm.h
js/src/jit/arm/Lowering-arm.cpp
js/src/jit/arm/MacroAssembler-arm.cpp
js/src/jit/arm/MacroAssembler-arm.h
js/src/jit/shared/LIR-shared.h
--- a/js/src/jit/arm/Assembler-arm.cpp
+++ b/js/src/jit/arm/Assembler-arm.cpp
@@ -2270,28 +2270,31 @@ BufferOffset
 Assembler::as_ldrexb(Register rt, Register rn, Condition c)
 {
     return writeInst(0x01d00f9f | (int)c | RT(rt) | RN(rn));
 }
 
 BufferOffset
 Assembler::as_strex(Register rd, Register rt, Register rn, Condition c)
 {
+    MOZ_ASSERT(rd != rn && rd != rt); // True restriction on Cortex-A7 (RPi2)
     return writeInst(0x01800f90 | (int)c | RD(rd) | RN(rn) | rt.code());
 }
 
 BufferOffset
 Assembler::as_strexh(Register rd, Register rt, Register rn, Condition c)
 {
+    MOZ_ASSERT(rd != rn && rd != rt); // True restriction on Cortex-A7 (RPi2)
     return writeInst(0x01e00f90 | (int)c | RD(rd) | RN(rn) | rt.code());
 }
 
 BufferOffset
 Assembler::as_strexb(Register rd, Register rt, Register rn, Condition c)
 {
+    MOZ_ASSERT(rd != rn && rd != rt); // True restriction on Cortex-A7 (RPi2)
     return writeInst(0x01c00f90 | (int)c | RD(rd) | RN(rn) | rt.code());
 }
 
 // Memory barrier stuff:
 
 BufferOffset
 Assembler::as_dmb(BarrierOption option)
 {
--- a/js/src/jit/arm/Assembler-arm.h
+++ b/js/src/jit/arm/Assembler-arm.h
@@ -1578,17 +1578,17 @@ class Assembler : public AssemblerShared
     // The word versions are available from ARMv6 forward and can be used to
     // implement the halfword and byte versions on older systems.
 
     // LDREX rt, [rn]
     BufferOffset as_ldrex(Register rt, Register rn, Condition c = Always);
     BufferOffset as_ldrexh(Register rt, Register rn, Condition c = Always);
     BufferOffset as_ldrexb(Register rt, Register rn, Condition c = Always);
 
-    // STREX rd, rt, [rn]
+    // STREX rd, rt, [rn].  Constraint: rd != rn, rd != rt.
     BufferOffset as_strex(Register rd, Register rt, Register rn, Condition c = Always);
     BufferOffset as_strexh(Register rd, Register rt, Register rn, Condition c = Always);
     BufferOffset as_strexb(Register rd, Register rt, Register rn, Condition c = Always);
 
     // Memory synchronization.
     // These are available from ARMv7 forward.
     BufferOffset as_dmb(BarrierOption option = BarrierSY);
     BufferOffset as_dsb(BarrierOption option = BarrierSY);
--- a/js/src/jit/arm/CodeGenerator-arm.cpp
+++ b/js/src/jit/arm/CodeGenerator-arm.cpp
@@ -1689,328 +1689,342 @@ CodeGeneratorARM::visitStoreTypedArrayEl
 {
     MOZ_CRASH("NYI");
 }
 
 
 template<typename S, typename T>
 void
 CodeGeneratorARM::atomicBinopToTypedIntArray(AtomicOp op, Scalar::Type arrayType,
-                                             const S& value, const T& mem, Register temp1,
-                                             Register temp2, AnyRegister output)
+                                             const S& value, const T& mem, Register flagTemp,
+                                             Register outTemp, AnyRegister output)
 {
+    MOZ_ASSERT(flagTemp != InvalidReg);
+    MOZ_ASSERT_IF(arrayType == Scalar::Uint32, outTemp != InvalidReg);
+
     // Uint8Clamped is explicitly not supported here
     switch (arrayType) {
       case Scalar::Int8:
         switch (op) {
           case AtomicFetchAddOp:
-            masm.atomicFetchAdd8SignExtend(value, mem, temp1, output.gpr());
+            masm.atomicFetchAdd8SignExtend(value, mem, flagTemp, output.gpr());
             break;
           case AtomicFetchSubOp:
-            masm.atomicFetchSub8SignExtend(value, mem, temp1, output.gpr());
+            masm.atomicFetchSub8SignExtend(value, mem, flagTemp, output.gpr());
             break;
           case AtomicFetchAndOp:
-            masm.atomicFetchAnd8SignExtend(value, mem, temp1, output.gpr());
+            masm.atomicFetchAnd8SignExtend(value, mem, flagTemp, output.gpr());
             break;
           case AtomicFetchOrOp:
-            masm.atomicFetchOr8SignExtend(value, mem, temp1, output.gpr());
+            masm.atomicFetchOr8SignExtend(value, mem, flagTemp, output.gpr());
             break;
           case AtomicFetchXorOp:
-            masm.atomicFetchXor8SignExtend(value, mem, temp1, output.gpr());
+            masm.atomicFetchXor8SignExtend(value, mem, flagTemp, output.gpr());
             break;
           default:
             MOZ_CRASH("Invalid typed array atomic operation");
         }
         break;
       case Scalar::Uint8:
         switch (op) {
           case AtomicFetchAddOp:
-            masm.atomicFetchAdd8ZeroExtend(value, mem, temp1, output.gpr());
+            masm.atomicFetchAdd8ZeroExtend(value, mem, flagTemp, output.gpr());
             break;
           case AtomicFetchSubOp:
-            masm.atomicFetchSub8ZeroExtend(value, mem, temp1, output.gpr());
+            masm.atomicFetchSub8ZeroExtend(value, mem, flagTemp, output.gpr());
             break;
           case AtomicFetchAndOp:
-            masm.atomicFetchAnd8ZeroExtend(value, mem, temp1, output.gpr());
+            masm.atomicFetchAnd8ZeroExtend(value, mem, flagTemp, output.gpr());
             break;
           case AtomicFetchOrOp:
-            masm.atomicFetchOr8ZeroExtend(value, mem, temp1, output.gpr());
+            masm.atomicFetchOr8ZeroExtend(value, mem, flagTemp, output.gpr());
             break;
           case AtomicFetchXorOp:
-            masm.atomicFetchXor8ZeroExtend(value, mem, temp1, output.gpr());
+            masm.atomicFetchXor8ZeroExtend(value, mem, flagTemp, output.gpr());
             break;
           default:
             MOZ_CRASH("Invalid typed array atomic operation");
         }
         break;
       case Scalar::Int16:
         switch (op) {
           case AtomicFetchAddOp:
-            masm.atomicFetchAdd16SignExtend(value, mem, temp1, output.gpr());
+            masm.atomicFetchAdd16SignExtend(value, mem, flagTemp, output.gpr());
             break;
           case AtomicFetchSubOp:
-            masm.atomicFetchSub16SignExtend(value, mem, temp1, output.gpr());
+            masm.atomicFetchSub16SignExtend(value, mem, flagTemp, output.gpr());
             break;
           case AtomicFetchAndOp:
-            masm.atomicFetchAnd16SignExtend(value, mem, temp1, output.gpr());
+            masm.atomicFetchAnd16SignExtend(value, mem, flagTemp, output.gpr());
             break;
           case AtomicFetchOrOp:
-            masm.atomicFetchOr16SignExtend(value, mem, temp1, output.gpr());
+            masm.atomicFetchOr16SignExtend(value, mem, flagTemp, output.gpr());
             break;
           case AtomicFetchXorOp:
-            masm.atomicFetchXor16SignExtend(value, mem, temp1, output.gpr());
+            masm.atomicFetchXor16SignExtend(value, mem, flagTemp, output.gpr());
             break;
           default:
             MOZ_CRASH("Invalid typed array atomic operation");
         }
         break;
       case Scalar::Uint16:
         switch (op) {
           case AtomicFetchAddOp:
-            masm.atomicFetchAdd16ZeroExtend(value, mem, temp1, output.gpr());
+            masm.atomicFetchAdd16ZeroExtend(value, mem, flagTemp, output.gpr());
             break;
           case AtomicFetchSubOp:
-            masm.atomicFetchSub16ZeroExtend(value, mem, temp1, output.gpr());
+            masm.atomicFetchSub16ZeroExtend(value, mem, flagTemp, output.gpr());
             break;
           case AtomicFetchAndOp:
-            masm.atomicFetchAnd16ZeroExtend(value, mem, temp1, output.gpr());
+            masm.atomicFetchAnd16ZeroExtend(value, mem, flagTemp, output.gpr());
             break;
           case AtomicFetchOrOp:
-            masm.atomicFetchOr16ZeroExtend(value, mem, temp1, output.gpr());
+            masm.atomicFetchOr16ZeroExtend(value, mem, flagTemp, output.gpr());
             break;
           case AtomicFetchXorOp:
-            masm.atomicFetchXor16ZeroExtend(value, mem, temp1, output.gpr());
+            masm.atomicFetchXor16ZeroExtend(value, mem, flagTemp, output.gpr());
             break;
           default:
             MOZ_CRASH("Invalid typed array atomic operation");
         }
         break;
       case Scalar::Int32:
         switch (op) {
           case AtomicFetchAddOp:
-            masm.atomicFetchAdd32(value, mem, temp1, output.gpr());
+            masm.atomicFetchAdd32(value, mem, flagTemp, output.gpr());
             break;
           case AtomicFetchSubOp:
-            masm.atomicFetchSub32(value, mem, temp1, output.gpr());
+            masm.atomicFetchSub32(value, mem, flagTemp, output.gpr());
             break;
           case AtomicFetchAndOp:
-            masm.atomicFetchAnd32(value, mem, temp1, output.gpr());
+            masm.atomicFetchAnd32(value, mem, flagTemp, output.gpr());
             break;
           case AtomicFetchOrOp:
-            masm.atomicFetchOr32(value, mem, temp1, output.gpr());
+            masm.atomicFetchOr32(value, mem, flagTemp, output.gpr());
             break;
           case AtomicFetchXorOp:
-            masm.atomicFetchXor32(value, mem, temp1, output.gpr());
+            masm.atomicFetchXor32(value, mem, flagTemp, output.gpr());
             break;
           default:
             MOZ_CRASH("Invalid typed array atomic operation");
         }
         break;
       case Scalar::Uint32:
         // At the moment, the code in MCallOptimize.cpp requires the output
         // type to be double for uint32 arrays.  See bug 1077305.
         MOZ_ASSERT(output.isFloat());
         switch (op) {
           case AtomicFetchAddOp:
-            masm.atomicFetchAdd32(value, mem, InvalidReg, temp1);
+            masm.atomicFetchAdd32(value, mem, flagTemp, outTemp);
             break;
           case AtomicFetchSubOp:
-            masm.atomicFetchSub32(value, mem, InvalidReg, temp1);
+            masm.atomicFetchSub32(value, mem, flagTemp, outTemp);
             break;
           case AtomicFetchAndOp:
-            masm.atomicFetchAnd32(value, mem, temp2, temp1);
+            masm.atomicFetchAnd32(value, mem, flagTemp, outTemp);
             break;
           case AtomicFetchOrOp:
-            masm.atomicFetchOr32(value, mem, temp2, temp1);
+            masm.atomicFetchOr32(value, mem, flagTemp, outTemp);
             break;
           case AtomicFetchXorOp:
-            masm.atomicFetchXor32(value, mem, temp2, temp1);
+            masm.atomicFetchXor32(value, mem, flagTemp, outTemp);
             break;
           default:
             MOZ_CRASH("Invalid typed array atomic operation");
         }
-        masm.convertUInt32ToDouble(temp1, output.fpu());
+        masm.convertUInt32ToDouble(outTemp, output.fpu());
         break;
       default:
         MOZ_CRASH("Invalid typed array type");
     }
 }
 
 template void
 CodeGeneratorARM::atomicBinopToTypedIntArray(AtomicOp op, Scalar::Type arrayType,
                                              const Imm32& value, const Address& mem,
-                                             Register temp1, Register temp2, AnyRegister output);
+                                             Register flagTemp, Register outTemp,
+                                             AnyRegister output);
 template void
 CodeGeneratorARM::atomicBinopToTypedIntArray(AtomicOp op, Scalar::Type arrayType,
                                              const Imm32& value, const BaseIndex& mem,
-                                             Register temp1, Register temp2, AnyRegister output);
+                                             Register flagTemp, Register outTemp,
+                                             AnyRegister output);
 template void
 CodeGeneratorARM::atomicBinopToTypedIntArray(AtomicOp op, Scalar::Type arrayType,
                                              const Register& value, const Address& mem,
-                                             Register temp1, Register temp2, AnyRegister output);
+                                             Register flagTemp, Register outTemp,
+                                             AnyRegister output);
 template void
 CodeGeneratorARM::atomicBinopToTypedIntArray(AtomicOp op, Scalar::Type arrayType,
                                              const Register& value, const BaseIndex& mem,
-                                             Register temp1, Register temp2, AnyRegister output);
+                                             Register flagTemp, Register outTemp,
+                                             AnyRegister output);
 
 // Binary operation for effect, result discarded.
 template<typename S, typename T>
 void
 CodeGeneratorARM::atomicBinopToTypedIntArray(AtomicOp op, Scalar::Type arrayType, const S& value,
-                                             const T& mem)
+                                             const T& mem, Register flagTemp)
 {
+    MOZ_ASSERT(flagTemp != InvalidReg);
+
     // Uint8Clamped is explicitly not supported here
     switch (arrayType) {
       case Scalar::Int8:
       case Scalar::Uint8:
         switch (op) {
           case AtomicFetchAddOp:
-            masm.atomicAdd8(value, mem);
+            masm.atomicAdd8(value, mem, flagTemp);
             break;
           case AtomicFetchSubOp:
-            masm.atomicSub8(value, mem);
+            masm.atomicSub8(value, mem, flagTemp);
             break;
           case AtomicFetchAndOp:
-            masm.atomicAnd8(value, mem);
+            masm.atomicAnd8(value, mem, flagTemp);
             break;
           case AtomicFetchOrOp:
-            masm.atomicOr8(value, mem);
+            masm.atomicOr8(value, mem, flagTemp);
             break;
           case AtomicFetchXorOp:
-            masm.atomicXor8(value, mem);
+            masm.atomicXor8(value, mem, flagTemp);
             break;
           default:
             MOZ_CRASH("Invalid typed array atomic operation");
         }
         break;
       case Scalar::Int16:
       case Scalar::Uint16:
         switch (op) {
           case AtomicFetchAddOp:
-            masm.atomicAdd16(value, mem);
+            masm.atomicAdd16(value, mem, flagTemp);
             break;
           case AtomicFetchSubOp:
-            masm.atomicSub16(value, mem);
+            masm.atomicSub16(value, mem, flagTemp);
             break;
           case AtomicFetchAndOp:
-            masm.atomicAnd16(value, mem);
+            masm.atomicAnd16(value, mem, flagTemp);
             break;
           case AtomicFetchOrOp:
-            masm.atomicOr16(value, mem);
+            masm.atomicOr16(value, mem, flagTemp);
             break;
           case AtomicFetchXorOp:
-            masm.atomicXor16(value, mem);
+            masm.atomicXor16(value, mem, flagTemp);
             break;
           default:
             MOZ_CRASH("Invalid typed array atomic operation");
         }
         break;
       case Scalar::Int32:
       case Scalar::Uint32:
         switch (op) {
           case AtomicFetchAddOp:
-            masm.atomicAdd32(value, mem);
+            masm.atomicAdd32(value, mem, flagTemp);
             break;
           case AtomicFetchSubOp:
-            masm.atomicSub32(value, mem);
+            masm.atomicSub32(value, mem, flagTemp);
             break;
           case AtomicFetchAndOp:
-            masm.atomicAnd32(value, mem);
+            masm.atomicAnd32(value, mem, flagTemp);
             break;
           case AtomicFetchOrOp:
-            masm.atomicOr32(value, mem);
+            masm.atomicOr32(value, mem, flagTemp);
             break;
           case AtomicFetchXorOp:
-            masm.atomicXor32(value, mem);
+            masm.atomicXor32(value, mem, flagTemp);
             break;
           default:
             MOZ_CRASH("Invalid typed array atomic operation");
         }
         break;
       default:
         MOZ_CRASH("Invalid typed array type");
     }
 }
 
 template void
 CodeGeneratorARM::atomicBinopToTypedIntArray(AtomicOp op, Scalar::Type arrayType,
-                                             const Imm32& value, const Address& mem);
+                                             const Imm32& value, const Address& mem,
+                                             Register flagTemp);
 template void
 CodeGeneratorARM::atomicBinopToTypedIntArray(AtomicOp op, Scalar::Type arrayType,
-                                             const Imm32& value, const BaseIndex& mem);
+                                             const Imm32& value, const BaseIndex& mem,
+                                             Register flagTemp);
 template void
 CodeGeneratorARM::atomicBinopToTypedIntArray(AtomicOp op, Scalar::Type arrayType,
-                                             const Register& value, const Address& mem);
+                                             const Register& value, const Address& mem,
+                                             Register flagTemp);
 template void
 CodeGeneratorARM::atomicBinopToTypedIntArray(AtomicOp op, Scalar::Type arrayType,
-                                             const Register& value, const BaseIndex& mem);
+                                             const Register& value, const BaseIndex& mem,
+                                             Register flagTemp);
 
 
 template <typename T>
 static inline void
 AtomicBinopToTypedArray(CodeGeneratorARM* cg, AtomicOp op,
                         Scalar::Type arrayType, const LAllocation* value, const T& mem,
-                        Register temp1, Register temp2, AnyRegister output)
+                        Register flagTemp, Register outTemp, AnyRegister output)
 {
     if (value->isConstant())
-        cg->atomicBinopToTypedIntArray(op, arrayType, Imm32(ToInt32(value)), mem, temp1, temp2, output);
+        cg->atomicBinopToTypedIntArray(op, arrayType, Imm32(ToInt32(value)), mem, flagTemp, outTemp, output);
     else
-        cg->atomicBinopToTypedIntArray(op, arrayType, ToRegister(value), mem, temp1, temp2, output);
+        cg->atomicBinopToTypedIntArray(op, arrayType, ToRegister(value), mem, flagTemp, outTemp, output);
 }
 
 void
 CodeGeneratorARM::visitAtomicTypedArrayElementBinop(LAtomicTypedArrayElementBinop* lir)
 {
     MOZ_ASSERT(lir->mir()->hasUses());
 
     AnyRegister output = ToAnyRegister(lir->output());
     Register elements = ToRegister(lir->elements());
-    Register temp1 = lir->temp1()->isBogusTemp() ? InvalidReg : ToRegister(lir->temp1());
-    Register temp2 = lir->temp2()->isBogusTemp() ? InvalidReg : ToRegister(lir->temp2());
+    Register flagTemp = ToRegister(lir->temp1());
+    Register outTemp = lir->temp2()->isBogusTemp() ? InvalidReg : ToRegister(lir->temp2());
     const LAllocation* value = lir->value();
 
     Scalar::Type arrayType = lir->mir()->arrayType();
     int width = Scalar::byteSize(arrayType);
 
     if (lir->index()->isConstant()) {
         Address mem(elements, ToInt32(lir->index()) * width);
-        AtomicBinopToTypedArray(this, lir->mir()->operation(), arrayType, value, mem, temp1, temp2, output);
+        AtomicBinopToTypedArray(this, lir->mir()->operation(), arrayType, value, mem, flagTemp, outTemp, output);
     } else {
         BaseIndex mem(elements, ToRegister(lir->index()), ScaleFromElemWidth(width));
-        AtomicBinopToTypedArray(this, lir->mir()->operation(), arrayType, value, mem, temp1, temp2, output);
+        AtomicBinopToTypedArray(this, lir->mir()->operation(), arrayType, value, mem, flagTemp, outTemp, output);
     }
 }
 
 template <typename T>
 static inline void
-AtomicBinopToTypedArray(CodeGeneratorARM* cg, AtomicOp op,
-                        Scalar::Type arrayType, const LAllocation* value, const T& mem)
+AtomicBinopToTypedArray(CodeGeneratorARM* cg, AtomicOp op, Scalar::Type arrayType,
+                        const LAllocation* value, const T& mem, Register flagTemp)
 {
     if (value->isConstant())
-        cg->atomicBinopToTypedIntArray(op, arrayType, Imm32(ToInt32(value)), mem);
+        cg->atomicBinopToTypedIntArray(op, arrayType, Imm32(ToInt32(value)), mem, flagTemp);
     else
-        cg->atomicBinopToTypedIntArray(op, arrayType, ToRegister(value), mem);
+        cg->atomicBinopToTypedIntArray(op, arrayType, ToRegister(value), mem, flagTemp);
 }
 
 void
 CodeGeneratorARM::visitAtomicTypedArrayElementBinopForEffect(LAtomicTypedArrayElementBinopForEffect* lir)
 {
     MOZ_ASSERT(!lir->mir()->hasUses());
 
     Register elements = ToRegister(lir->elements());
+    Register flagTemp = ToRegister(lir->flagTemp());
     const LAllocation* value = lir->value();
     Scalar::Type arrayType = lir->mir()->arrayType();
     int width = Scalar::byteSize(arrayType);
 
     if (lir->index()->isConstant()) {
         Address mem(elements, ToInt32(lir->index()) * width);
-        AtomicBinopToTypedArray(this, lir->mir()->operation(), arrayType, value, mem);
+        AtomicBinopToTypedArray(this, lir->mir()->operation(), arrayType, value, mem, flagTemp);
     } else {
         BaseIndex mem(elements, ToRegister(lir->index()), ScaleFromElemWidth(width));
-        AtomicBinopToTypedArray(this, lir->mir()->operation(), arrayType, value, mem);
+        AtomicBinopToTypedArray(this, lir->mir()->operation(), arrayType, value, mem, flagTemp);
     }
 }
 
 void
 CodeGeneratorARM::visitAsmJSCall(LAsmJSCall* ins)
 {
     MAsmJSCall* mir = ins->mir();
 
@@ -2308,68 +2322,68 @@ void
 CodeGeneratorARM::visitAsmJSAtomicBinopHeap(LAsmJSAtomicBinopHeap* ins)
 {
     MOZ_ASSERT(ins->mir()->hasUses());
     MOZ_ASSERT(ins->addrTemp()->isBogusTemp());
 
     MAsmJSAtomicBinopHeap* mir = ins->mir();
     Scalar::Type vt = mir->accessType();
     Register ptrReg = ToRegister(ins->ptr());
-    Register temp = ins->temp()->isBogusTemp() ? InvalidReg : ToRegister(ins->temp());
+    Register flagTemp = ToRegister(ins->flagTemp());
     const LAllocation* value = ins->value();
     AtomicOp op = mir->operation();
 
     BaseIndex srcAddr(HeapReg, ptrReg, TimesOne);
 
     uint32_t maybeCmpOffset = 0;
     if (mir->needsBoundsCheck()) {
         BufferOffset bo = masm.ma_BoundsCheck(ptrReg);
         maybeCmpOffset = bo.getOffset();
         masm.ma_b(gen->outOfBoundsLabel(), Assembler::AboveOrEqual);
     }
 
     if (value->isConstant())
         atomicBinopToTypedIntArray(op, vt == Scalar::Uint32 ? Scalar::Int32 : vt,
-                                   Imm32(ToInt32(value)), srcAddr, temp, InvalidReg,
+                                   Imm32(ToInt32(value)), srcAddr, flagTemp, InvalidReg,
                                    ToAnyRegister(ins->output()));
     else
         atomicBinopToTypedIntArray(op, vt == Scalar::Uint32 ? Scalar::Int32 : vt,
-                                   ToRegister(value), srcAddr, temp, InvalidReg,
+                                   ToRegister(value), srcAddr, flagTemp, InvalidReg,
                                    ToAnyRegister(ins->output()));
 
     if (mir->needsBoundsCheck())
         masm.append(AsmJSHeapAccess(maybeCmpOffset));
 }
 
 void
 CodeGeneratorARM::visitAsmJSAtomicBinopHeapForEffect(LAsmJSAtomicBinopHeapForEffect* ins)
 {
     MOZ_ASSERT(!ins->mir()->hasUses());
-    MOZ_ASSERT(ins->temp()->isBogusTemp());
     MOZ_ASSERT(ins->addrTemp()->isBogusTemp());
 
     MAsmJSAtomicBinopHeap* mir = ins->mir();
     Scalar::Type vt = mir->accessType();
     Register ptrReg = ToRegister(ins->ptr());
+    Register flagTemp = ToRegister(ins->flagTemp());
     const LAllocation* value = ins->value();
     AtomicOp op = mir->operation();
 
     BaseIndex srcAddr(HeapReg, ptrReg, TimesOne);
 
     uint32_t maybeCmpOffset = 0;
     if (mir->needsBoundsCheck()) {
         BufferOffset bo = masm.ma_BoundsCheck(ptrReg);
         maybeCmpOffset = bo.getOffset();
         masm.ma_b(gen->outOfBoundsLabel(), Assembler::AboveOrEqual);
     }
 
     if (value->isConstant())
-        atomicBinopToTypedIntArray(op, vt, Imm32(ToInt32(value)), srcAddr);
+        atomicBinopToTypedIntArray(op, vt, Imm32(ToInt32(value)), srcAddr, flagTemp);
     else
-        atomicBinopToTypedIntArray(op, vt, ToRegister(value), srcAddr);
+        atomicBinopToTypedIntArray(op, vt, ToRegister(value), srcAddr, flagTemp);
 
     if (mir->needsBoundsCheck())
         masm.append(AsmJSHeapAccess(maybeCmpOffset));
 }
 
 void
 CodeGeneratorARM::visitAsmJSAtomicBinopCallout(LAsmJSAtomicBinopCallout* ins)
 {
--- a/js/src/jit/arm/CodeGenerator-arm.h
+++ b/js/src/jit/arm/CodeGenerator-arm.h
@@ -216,21 +216,23 @@ class CodeGeneratorARM : public CodeGene
 
     void generateInvalidateEpilogue();
 
     void visitRandom(LRandom* ins);
 
     // Generating a result.
     template<typename S, typename T>
     void atomicBinopToTypedIntArray(AtomicOp op, Scalar::Type arrayType, const S& value,
-                                    const T& mem, Register temp1, Register temp2, AnyRegister output);
+                                    const T& mem, Register flagTemp, Register outTemp,
+                                    AnyRegister output);
 
     // Generating no result.
     template<typename S, typename T>
-    void atomicBinopToTypedIntArray(AtomicOp op, Scalar::Type arrayType, const S& value, const T& mem);
+    void atomicBinopToTypedIntArray(AtomicOp op, Scalar::Type arrayType, const S& value,
+                                    const T& mem, Register flagTemp);
 
   protected:
     void visitEffectiveAddress(LEffectiveAddress* ins);
     void visitUDiv(LUDiv* ins);
     void visitUMod(LUMod* ins);
     void visitSoftUDivOrMod(LSoftUDivOrMod* ins);
 
   public:
--- a/js/src/jit/arm/Lowering-arm.cpp
+++ b/js/src/jit/arm/Lowering-arm.cpp
@@ -587,40 +587,39 @@ LIRGeneratorARM::visitAtomicTypedArrayEl
     MOZ_ASSERT(ins->index()->type() == MIRType_Int32);
 
     const LUse elements = useRegister(ins->elements());
     const LAllocation index = useRegisterOrConstant(ins->index());
     const LAllocation value = useRegister(ins->value());
 
     if (!ins->hasUses()) {
         LAtomicTypedArrayElementBinopForEffect* lir =
-            new(alloc()) LAtomicTypedArrayElementBinopForEffect(elements, index, value);
+            new(alloc()) LAtomicTypedArrayElementBinopForEffect(elements, index, value,
+                                                                /* flagTemp= */ temp());
         add(lir, ins);
         return;
     }
 
-    // For most operations we don't need any temps because there are
-    // enough scratch registers.  tempDef2 is never needed on ARM.
-    //
     // For a Uint32Array with a known double result we need a temp for
-    // the intermediate output, this is tempDef1.
+    // the intermediate output.
     //
     // Optimization opportunity (bug 1077317): We can do better by
     // allowing 'value' to remain as an imm32 if it is small enough to
     // fit in an instruction.
 
-    LDefinition tempDef1 = LDefinition::BogusTemp();
-    LDefinition tempDef2 = LDefinition::BogusTemp();
+    LDefinition flagTemp = temp();
+    LDefinition outTemp = LDefinition::BogusTemp();
 
     if (ins->arrayType() == Scalar::Uint32 && IsFloatingPointType(ins->type()))
-        tempDef1 = temp();
+        outTemp = temp();
+
+    // On arm, map flagTemp to temp1 and outTemp to temp2, at least for now.
 
     LAtomicTypedArrayElementBinop* lir =
-        new(alloc()) LAtomicTypedArrayElementBinop(elements, index, value, tempDef1, tempDef2);
-
+        new(alloc()) LAtomicTypedArrayElementBinop(elements, index, value, flagTemp, outTemp);
     define(lir, ins);
 }
 
 void
 LIRGeneratorARM::visitCompareExchangeTypedArrayElement(MCompareExchangeTypedArrayElement* ins)
 {
     MOZ_ASSERT(ins->arrayType() != Scalar::Float32);
     MOZ_ASSERT(ins->arrayType() != Scalar::Float64);
@@ -707,26 +706,27 @@ LIRGeneratorARM::visitAsmJSAtomicBinopHe
                                                   useRegisterAtStart(ins->value()));
         defineReturn(lir, ins);
         return;
     }
 
     if (!ins->hasUses()) {
         LAsmJSAtomicBinopHeapForEffect* lir =
             new(alloc()) LAsmJSAtomicBinopHeapForEffect(useRegister(ptr),
-                                                        useRegister(ins->value()));
+                                                        useRegister(ins->value()),
+                                                        /* flagTemp= */ temp());
         add(lir, ins);
         return;
     }
 
     LAsmJSAtomicBinopHeap* lir =
         new(alloc()) LAsmJSAtomicBinopHeap(useRegister(ptr),
                                            useRegister(ins->value()),
-                                           LDefinition::BogusTemp());
-
+                                           /* temp = */ LDefinition::BogusTemp(),
+                                           /* flagTemp= */ temp());
     define(lir, ins);
 }
 
 void
 LIRGeneratorARM::visitSubstr(MSubstr* ins)
 {
     LSubstr* lir = new (alloc()) LSubstr(useRegister(ins->string()),
                                          useRegister(ins->begin()),
--- a/js/src/jit/arm/MacroAssembler-arm.cpp
+++ b/js/src/jit/arm/MacroAssembler-arm.cpp
@@ -4491,67 +4491,68 @@ js::jit::MacroAssemblerARMCompat::atomic
 template void
 js::jit::MacroAssemblerARMCompat::atomicExchange(int nbytes, bool signExtend,
                                                  const BaseIndex& address, Register value,
                                                  Register output);
 
 template<typename T>
 void
 MacroAssemblerARMCompat::atomicFetchOp(int nbytes, bool signExtend, AtomicOp op, const Imm32& value,
-                                       const T& mem, Register temp, Register output)
+                                       const T& mem, Register flagTemp, Register output)
 {
     // The Imm32 case is not needed yet because lowering always forces
     // the value into a register at present (bug 1077317).
     //
     // This would be useful for immediates small enough to fit into
     // add/sub/and/or/xor.
     MOZ_CRASH("Feature NYI");
 }
 
 // General algorithm:
 //
 //     ...    ptr, <addr>         ; compute address of item
 //     dmb
 // L0  ldrex* output, [ptr]
 //     sxt*   output, output, 0   ; sign-extend if applicable
 //     OP     tmp, output, value  ; compute value to store
-//     strex* tmp, tmp, [ptr]
-//     cmp    tmp, 1
+//     strex* tmp2, tmp, [ptr]    ; tmp2 required by strex
+//     cmp    tmp2, 1
 //     beq    L0                  ; failed - location is dirty, retry
 //     dmb                        ; ordering barrier required
 //
 // Also see notes above at compareExchange re the barrier strategy.
 //
 // Observe that the value being operated into the memory element need
 // not be sign-extended because no OP will make use of bits to the
 // left of the bits indicated by the width of the element, and neither
 // output nor the bits stored are affected by OP.
 
 template<typename T>
 void
 MacroAssemblerARMCompat::atomicFetchOp(int nbytes, bool signExtend, AtomicOp op,
-                                       const Register& value, const T& mem, Register temp,
+                                       const Register& value, const T& mem, Register flagTemp,
                                        Register output)
 {
     // Fork for non-word operations on ARMv6.
     //
     // Bug 1077321: We may further optimize for ARMv8 (AArch32) here.
-    if (nbytes < 4 && !HasLDSTREXBHD()) {
-        atomicFetchOpARMv6(nbytes, signExtend, op, value, mem, temp, output);
-    } else {
-        MOZ_ASSERT(temp == InvalidReg);
-        atomicFetchOpARMv7(nbytes, signExtend, op, value, mem, output);
-    }
+    if (nbytes < 4 && !HasLDSTREXBHD())
+        atomicFetchOpARMv6(nbytes, signExtend, op, value, mem, flagTemp, output);
+    else
+        atomicFetchOpARMv7(nbytes, signExtend, op, value, mem, flagTemp, output);
 }
 
 template<typename T>
 void
 MacroAssemblerARMCompat::atomicFetchOpARMv7(int nbytes, bool signExtend, AtomicOp op,
-                                            const Register& value, const T& mem, Register output)
-{
+                                            const Register& value, const T& mem, Register flagTemp,
+                                            Register output)
+{
+    MOZ_ASSERT(flagTemp != InvalidReg);
+
     Label again;
 
     AutoRegisterScope scratch2(asMasm(), secondScratchReg_);
     Register ptr = computePointer(mem, scratch2);
 
     ma_dmb();
 
     ScratchRegisterScope scratch(asMasm());
@@ -4585,86 +4586,90 @@ MacroAssemblerARMCompat::atomicFetchOpAR
         break;
       case AtomicFetchOrOp:
         as_orr(scratch, output, O2Reg(value));
         break;
       case AtomicFetchXorOp:
         as_eor(scratch, output, O2Reg(value));
         break;
     }
+    // Rd must differ from the two other arguments to strex.
     switch (nbytes) {
       case 1:
-        as_strexb(scratch, scratch, ptr);
+        as_strexb(flagTemp, scratch, ptr);
         break;
       case 2:
-        as_strexh(scratch, scratch, ptr);
+        as_strexh(flagTemp, scratch, ptr);
         break;
       case 4:
-        as_strex(scratch, scratch, ptr);
+        as_strex(flagTemp, scratch, ptr);
         break;
     }
-    as_cmp(scratch, Imm8(1));
+    as_cmp(flagTemp, Imm8(1));
     as_b(&again, Equal);
     ma_dmb();
 }
 
 template<typename T>
 void
 MacroAssemblerARMCompat::atomicFetchOpARMv6(int nbytes, bool signExtend, AtomicOp op,
-                                            const Register& value, const T& mem, Register temp,
+                                            const Register& value, const T& mem, Register flagTemp,
                                             Register output)
 {
     // Bug 1077318: Must use read-modify-write with LDREX / STREX.
     MOZ_ASSERT(nbytes == 1 || nbytes == 2);
     MOZ_CRASH("NYI");
 }
 
 template<typename T>
 void
 MacroAssemblerARMCompat::atomicEffectOp(int nbytes, AtomicOp op, const Register& value,
-                                        const T& mem)
+                                        const T& mem, Register flagTemp)
 {
     // Fork for non-word operations on ARMv6.
     //
     // Bug 1077321: We may further optimize for ARMv8 (AArch32) here.
     if (nbytes < 4 && !HasLDSTREXBHD())
-        atomicEffectOpARMv6(nbytes, op, value, mem);
+        atomicEffectOpARMv6(nbytes, op, value, mem, flagTemp);
     else
-        atomicEffectOpARMv7(nbytes, op, value, mem);
+        atomicEffectOpARMv7(nbytes, op, value, mem, flagTemp);
 }
 
 template<typename T>
 void
 MacroAssemblerARMCompat::atomicEffectOp(int nbytes, AtomicOp op, const Imm32& value,
-                                        const T& mem)
+                                        const T& mem, Register flagTemp)
 {
     // The Imm32 case is not needed yet because lowering always forces
     // the value into a register at present (bug 1077317).
     //
     // This would be useful for immediates small enough to fit into
     // add/sub/and/or/xor.
     MOZ_CRASH("NYI");
 }
 
-// Uses both scratch registers, one for the address and one for the temp:
+// Uses both scratch registers, one for the address and one for a temp,
+// but needs two temps for strex:
 //
 //     ...    ptr, <addr>         ; compute address of item
 //     dmb
 // L0  ldrex* temp, [ptr]
 //     OP     temp, temp, value   ; compute value to store
-//     strex* temp, temp, [ptr]
-//     cmp    temp, 1
+//     strex* temp2, temp, [ptr]
+//     cmp    temp2, 1
 //     beq    L0                  ; failed - location is dirty, retry
 //     dmb                        ; ordering barrier required
 
 template<typename T>
 void
 MacroAssemblerARMCompat::atomicEffectOpARMv7(int nbytes, AtomicOp op, const Register& value,
-                                             const T& mem)
-{
+                                             const T& mem, Register flagTemp)
+{
+    MOZ_ASSERT(flagTemp != InvalidReg);
+
     Label again;
 
     AutoRegisterScope scratch2(asMasm(), secondScratchReg_);
     Register ptr = computePointer(mem, scratch2);
 
     ma_dmb();
 
     ScratchRegisterScope scratch(asMasm());
@@ -4693,71 +4698,72 @@ MacroAssemblerARMCompat::atomicEffectOpA
         break;
       case AtomicFetchOrOp:
         as_orr(scratch, scratch, O2Reg(value));
         break;
       case AtomicFetchXorOp:
         as_eor(scratch, scratch, O2Reg(value));
         break;
     }
+    // Rd must differ from the two other arguments to strex.
     switch (nbytes) {
       case 1:
-        as_strexb(scratch, scratch, ptr);
+        as_strexb(flagTemp, scratch, ptr);
         break;
       case 2:
-        as_strexh(scratch, scratch, ptr);
+        as_strexh(flagTemp, scratch, ptr);
         break;
       case 4:
-        as_strex(scratch, scratch, ptr);
+        as_strex(flagTemp, scratch, ptr);
         break;
     }
-    as_cmp(scratch, Imm8(1));
+    as_cmp(flagTemp, Imm8(1));
     as_b(&again, Equal);
     ma_dmb();
 }
 
 template<typename T>
 void
 MacroAssemblerARMCompat::atomicEffectOpARMv6(int nbytes, AtomicOp op, const Register& value,
-                                             const T& mem)
+                                             const T& mem, Register flagTemp)
 {
     // Bug 1077318: Must use read-modify-write with LDREX / STREX.
     MOZ_ASSERT(nbytes == 1 || nbytes == 2);
     MOZ_CRASH("NYI");
 }
 
 template void
 js::jit::MacroAssemblerARMCompat::atomicFetchOp(int nbytes, bool signExtend, AtomicOp op,
                                                 const Imm32& value, const Address& mem,
-                                                Register temp, Register output);
+                                                Register flagTemp, Register output);
 template void
 js::jit::MacroAssemblerARMCompat::atomicFetchOp(int nbytes, bool signExtend, AtomicOp op,
                                                 const Imm32& value, const BaseIndex& mem,
-                                                Register temp, Register output);
+                                                Register flagTemp, Register output);
 template void
 js::jit::MacroAssemblerARMCompat::atomicFetchOp(int nbytes, bool signExtend, AtomicOp op,
                                                 const Register& value, const Address& mem,
-                                                Register temp, Register output);
+                                                Register flagTemp, Register output);
 template void
 js::jit::MacroAssemblerARMCompat::atomicFetchOp(int nbytes, bool signExtend, AtomicOp op,
                                                 const Register& value, const BaseIndex& mem,
-                                                Register temp, Register output);
+                                                Register flagTemp, Register output);
 
 template void
 js::jit::MacroAssemblerARMCompat::atomicEffectOp(int nbytes, AtomicOp op, const Imm32& value,
-                                                 const Address& mem);
+                                                 const Address& mem, Register flagTemp);
 template void
 js::jit::MacroAssemblerARMCompat::atomicEffectOp(int nbytes, AtomicOp op, const Imm32& value,
-                                                 const BaseIndex& mem);
+                                                 const BaseIndex& mem, Register flagTemp);
 template void
 js::jit::MacroAssemblerARMCompat::atomicEffectOp(int nbytes, AtomicOp op, const Register& value,
-                                                 const Address& mem);
+                                                 const Address& mem, Register flagTemp);
 template void
 js::jit::MacroAssemblerARMCompat::atomicEffectOp(int nbytes, AtomicOp op, const Register& value,
-                                                 const BaseIndex& mem);
+                                                 const BaseIndex& mem, Register flagTemp);
 
 void
 MacroAssemblerARMCompat::profilerEnterFrame(Register framePtr, Register scratch)
 {
     AbsoluteAddress activation(GetJitContext()->runtime->addressOfProfilingActivation());
     loadPtr(activation, scratch);
     storePtr(framePtr, Address(scratch, JitActivation::offsetOfLastProfilingFrame()));
     storePtr(ImmPtr(nullptr), Address(scratch, JitActivation::offsetOfLastProfilingCallSite()));
--- a/js/src/jit/arm/MacroAssembler-arm.h
+++ b/js/src/jit/arm/MacroAssembler-arm.h
@@ -1344,41 +1344,45 @@ class MacroAssemblerARMCompat : public M
                              Register output);
 
     template<typename T>
     void atomicExchange(int nbytes, bool signExtend, const T& address, Register value,
                         Register output);
 
     template<typename T>
     void atomicFetchOpARMv6(int nbytes, bool signExtend, AtomicOp op, const Register& value,
-                            const T& mem, Register temp, Register output);
+                            const T& mem, Register flagTemp, Register output);
 
     template<typename T>
     void atomicFetchOpARMv7(int nbytes, bool signExtend, AtomicOp op, const Register& value,
-                            const T& mem, Register output);
+                            const T& mem, Register flagTemp, Register output);
 
     template<typename T>
     void atomicFetchOp(int nbytes, bool signExtend, AtomicOp op, const Imm32& value,
-                       const T& address, Register temp, Register output);
+                       const T& address, Register flagTemp, Register output);
 
     template<typename T>
     void atomicFetchOp(int nbytes, bool signExtend, AtomicOp op, const Register& value,
-                       const T& address, Register temp, Register output);
+                       const T& address, Register flagTemp, Register output);
 
     template<typename T>
-    void atomicEffectOpARMv6(int nbytes, AtomicOp op, const Register& value, const T& address);
+    void atomicEffectOpARMv6(int nbytes, AtomicOp op, const Register& value, const T& address,
+                             Register flagTemp);
 
     template<typename T>
-    void atomicEffectOpARMv7(int nbytes, AtomicOp op, const Register& value, const T& address);
+    void atomicEffectOpARMv7(int nbytes, AtomicOp op, const Register& value, const T& address,
+                             Register flagTemp);
 
     template<typename T>
-    void atomicEffectOp(int nbytes, AtomicOp op, const Imm32& value, const T& address);
+    void atomicEffectOp(int nbytes, AtomicOp op, const Imm32& value, const T& address,
+                             Register flagTemp);
 
     template<typename T>
-    void atomicEffectOp(int nbytes, AtomicOp op, const Register& value, const T& address);
+    void atomicEffectOp(int nbytes, AtomicOp op, const Register& value, const T& address,
+                             Register flagTemp);
 
   public:
     // T in {Address,BaseIndex}
     // S in {Imm32,Register}
 
     template<typename T>
     void compareExchange8SignExtend(const T& mem, Register oldval, Register newval, Register output)
     {
@@ -1445,26 +1449,26 @@ class MacroAssemblerARMCompat : public M
     void atomicFetchAdd16ZeroExtend(const S& value, const T& mem, Register temp, Register output) {
         atomicFetchOp(2, false, AtomicFetchAddOp, value, mem, temp, output);
     }
     template<typename T, typename S>
     void atomicFetchAdd32(const S& value, const T& mem, Register temp, Register output) {
         atomicFetchOp(4, false, AtomicFetchAddOp, value, mem, temp, output);
     }
     template <typename T, typename S>
-    void atomicAdd8(const S& value, const T& mem) {
-        atomicEffectOp(1, AtomicFetchAddOp, value, mem);
+    void atomicAdd8(const S& value, const T& mem, Register flagTemp) {
+        atomicEffectOp(1, AtomicFetchAddOp, value, mem, flagTemp);
     }
     template <typename T, typename S>
-    void atomicAdd16(const S& value, const T& mem) {
-        atomicEffectOp(2, AtomicFetchAddOp, value, mem);
+    void atomicAdd16(const S& value, const T& mem, Register flagTemp) {
+        atomicEffectOp(2, AtomicFetchAddOp, value, mem, flagTemp);
     }
     template <typename T, typename S>
-    void atomicAdd32(const S& value, const T& mem) {
-        atomicEffectOp(4, AtomicFetchAddOp, value, mem);
+    void atomicAdd32(const S& value, const T& mem, Register flagTemp) {
+        atomicEffectOp(4, AtomicFetchAddOp, value, mem, flagTemp);
     }
 
     template<typename T, typename S>
     void atomicFetchSub8SignExtend(const S& value, const T& mem, Register temp, Register output) {
         atomicFetchOp(1, true, AtomicFetchSubOp, value, mem, temp, output);
     }
     template<typename T, typename S>
     void atomicFetchSub8ZeroExtend(const S& value, const T& mem, Register temp, Register output) {
@@ -1478,26 +1482,26 @@ class MacroAssemblerARMCompat : public M
     void atomicFetchSub16ZeroExtend(const S& value, const T& mem, Register temp, Register output) {
         atomicFetchOp(2, false, AtomicFetchSubOp, value, mem, temp, output);
     }
     template<typename T, typename S>
     void atomicFetchSub32(const S& value, const T& mem, Register temp, Register output) {
         atomicFetchOp(4, false, AtomicFetchSubOp, value, mem, temp, output);
     }
     template <typename T, typename S>
-    void atomicSub8(const S& value, const T& mem) {
-        atomicEffectOp(1, AtomicFetchSubOp, value, mem);
+    void atomicSub8(const S& value, const T& mem, Register flagTemp) {
+        atomicEffectOp(1, AtomicFetchSubOp, value, mem, flagTemp);
     }
     template <typename T, typename S>
-    void atomicSub16(const S& value, const T& mem) {
-        atomicEffectOp(2, AtomicFetchSubOp, value, mem);
+    void atomicSub16(const S& value, const T& mem, Register flagTemp) {
+        atomicEffectOp(2, AtomicFetchSubOp, value, mem, flagTemp);
     }
     template <typename T, typename S>
-    void atomicSub32(const S& value, const T& mem) {
-        atomicEffectOp(4, AtomicFetchSubOp, value, mem);
+    void atomicSub32(const S& value, const T& mem, Register flagTemp) {
+        atomicEffectOp(4, AtomicFetchSubOp, value, mem, flagTemp);
     }
 
     template<typename T, typename S>
     void atomicFetchAnd8SignExtend(const S& value, const T& mem, Register temp, Register output) {
         atomicFetchOp(1, true, AtomicFetchAndOp, value, mem, temp, output);
     }
     template<typename T, typename S>
     void atomicFetchAnd8ZeroExtend(const S& value, const T& mem, Register temp, Register output) {
@@ -1511,26 +1515,26 @@ class MacroAssemblerARMCompat : public M
     void atomicFetchAnd16ZeroExtend(const S& value, const T& mem, Register temp, Register output) {
         atomicFetchOp(2, false, AtomicFetchAndOp, value, mem, temp, output);
     }
     template<typename T, typename S>
     void atomicFetchAnd32(const S& value, const T& mem, Register temp, Register output) {
         atomicFetchOp(4, false, AtomicFetchAndOp, value, mem, temp, output);
     }
     template <typename T, typename S>
-    void atomicAnd8(const S& value, const T& mem) {
-        atomicEffectOp(1, AtomicFetchAndOp, value, mem);
+    void atomicAnd8(const S& value, const T& mem, Register flagTemp) {
+        atomicEffectOp(1, AtomicFetchAndOp, value, mem, flagTemp);
     }
     template <typename T, typename S>
-    void atomicAnd16(const S& value, const T& mem) {
-        atomicEffectOp(2, AtomicFetchAndOp, value, mem);
+    void atomicAnd16(const S& value, const T& mem, Register flagTemp) {
+        atomicEffectOp(2, AtomicFetchAndOp, value, mem, flagTemp);
     }
     template <typename T, typename S>
-    void atomicAnd32(const S& value, const T& mem) {
-        atomicEffectOp(4, AtomicFetchAndOp, value, mem);
+    void atomicAnd32(const S& value, const T& mem, Register flagTemp) {
+        atomicEffectOp(4, AtomicFetchAndOp, value, mem, flagTemp);
     }
 
     template<typename T, typename S>
     void atomicFetchOr8SignExtend(const S& value, const T& mem, Register temp, Register output) {
         atomicFetchOp(1, true, AtomicFetchOrOp, value, mem, temp, output);
     }
     template<typename T, typename S>
     void atomicFetchOr8ZeroExtend(const S& value, const T& mem, Register temp, Register output) {
@@ -1544,26 +1548,26 @@ class MacroAssemblerARMCompat : public M
     void atomicFetchOr16ZeroExtend(const S& value, const T& mem, Register temp, Register output) {
         atomicFetchOp(2, false, AtomicFetchOrOp, value, mem, temp, output);
     }
     template<typename T, typename S>
     void atomicFetchOr32(const S& value, const T& mem, Register temp, Register output) {
         atomicFetchOp(4, false, AtomicFetchOrOp, value, mem, temp, output);
     }
     template <typename T, typename S>
-    void atomicOr8(const S& value, const T& mem) {
-        atomicEffectOp(1, AtomicFetchOrOp, value, mem);
+    void atomicOr8(const S& value, const T& mem, Register flagTemp) {
+        atomicEffectOp(1, AtomicFetchOrOp, value, mem, flagTemp);
     }
     template <typename T, typename S>
-    void atomicOr16(const S& value, const T& mem) {
-        atomicEffectOp(2, AtomicFetchOrOp, value, mem);
+    void atomicOr16(const S& value, const T& mem, Register flagTemp) {
+        atomicEffectOp(2, AtomicFetchOrOp, value, mem, flagTemp);
     }
     template <typename T, typename S>
-    void atomicOr32(const S& value, const T& mem) {
-        atomicEffectOp(4, AtomicFetchOrOp, value, mem);
+    void atomicOr32(const S& value, const T& mem, Register flagTemp) {
+        atomicEffectOp(4, AtomicFetchOrOp, value, mem, flagTemp);
     }
 
     template<typename T, typename S>
     void atomicFetchXor8SignExtend(const S& value, const T& mem, Register temp, Register output) {
         atomicFetchOp(1, true, AtomicFetchXorOp, value, mem, temp, output);
     }
     template<typename T, typename S>
     void atomicFetchXor8ZeroExtend(const S& value, const T& mem, Register temp, Register output) {
@@ -1577,26 +1581,26 @@ class MacroAssemblerARMCompat : public M
     void atomicFetchXor16ZeroExtend(const S& value, const T& mem, Register temp, Register output) {
         atomicFetchOp(2, false, AtomicFetchXorOp, value, mem, temp, output);
     }
     template<typename T, typename S>
     void atomicFetchXor32(const S& value, const T& mem, Register temp, Register output) {
         atomicFetchOp(4, false, AtomicFetchXorOp, value, mem, temp, output);
     }
     template <typename T, typename S>
-    void atomicXor8(const S& value, const T& mem) {
-        atomicEffectOp(1, AtomicFetchXorOp, value, mem);
+    void atomicXor8(const S& value, const T& mem, Register flagTemp) {
+        atomicEffectOp(1, AtomicFetchXorOp, value, mem, flagTemp);
     }
     template <typename T, typename S>
-    void atomicXor16(const S& value, const T& mem) {
-        atomicEffectOp(2, AtomicFetchXorOp, value, mem);
+    void atomicXor16(const S& value, const T& mem, Register flagTemp) {
+        atomicEffectOp(2, AtomicFetchXorOp, value, mem, flagTemp);
     }
     template <typename T, typename S>
-    void atomicXor32(const S& value, const T& mem) {
-        atomicEffectOp(4, AtomicFetchXorOp, value, mem);
+    void atomicXor32(const S& value, const T& mem, Register flagTemp) {
+        atomicEffectOp(4, AtomicFetchXorOp, value, mem, flagTemp);
     }
 
     void clampIntToUint8(Register reg) {
         // Look at (reg >> 8) if it is 0, then reg shouldn't be clamped if it is
         // <0, then we want to clamp to 0, otherwise, we wish to clamp to 255
         ScratchRegisterScope scratch(asMasm());
         as_mov(scratch, asr(reg, 8), SetCC);
         ma_mov(Imm32(0xff), reg, LeaveCC, NotEqual);
--- a/js/src/jit/shared/LIR-shared.h
+++ b/js/src/jit/shared/LIR-shared.h
@@ -5232,39 +5232,46 @@ class LAtomicTypedArrayElementBinop : pu
     }
 
     const MAtomicTypedArrayElementBinop* mir() const {
         return mir_->toAtomicTypedArrayElementBinop();
     }
 };
 
 // Atomic binary operation where the result is discarded.
-class LAtomicTypedArrayElementBinopForEffect : public LInstructionHelper<0, 3, 0>
+class LAtomicTypedArrayElementBinopForEffect : public LInstructionHelper<0, 3, 1>
 {
   public:
     LIR_HEADER(AtomicTypedArrayElementBinopForEffect)
 
     LAtomicTypedArrayElementBinopForEffect(const LAllocation& elements, const LAllocation& index,
-                                           const LAllocation& value)
+                                           const LAllocation& value,
+                                           const LDefinition& flagTemp = LDefinition::BogusTemp())
     {
         setOperand(0, elements);
         setOperand(1, index);
         setOperand(2, value);
+        setTemp(0, flagTemp);
     }
 
     const LAllocation* elements() {
         return getOperand(0);
     }
     const LAllocation* index() {
         return getOperand(1);
     }
     const LAllocation* value() {
         return getOperand(2);
     }
 
+    // Temp that may be used on LL/SC platforms for the flag result of the store.
+    const LDefinition* flagTemp() {
+        return getTemp(0);
+    }
+
     const MAtomicTypedArrayElementBinop* mir() const {
         return mir_->toAtomicTypedArrayElementBinop();
     }
 };
 
 class LEffectiveAddress : public LInstructionHelper<1, 2, 0>
 {
   public:
@@ -6690,82 +6697,95 @@ class LAsmJSAtomicExchangeHeap : public 
         setTemp(0, addrTemp);
     }
 
     MAsmJSAtomicExchangeHeap* mir() const {
         return mir_->toAsmJSAtomicExchangeHeap();
     }
 };
 
-class LAsmJSAtomicBinopHeap : public LInstructionHelper<1, 2, 2>
+class LAsmJSAtomicBinopHeap : public LInstructionHelper<1, 2, 3>
 {
   public:
     LIR_HEADER(AsmJSAtomicBinopHeap);
 
     static const int32_t valueOp = 1;
 
     LAsmJSAtomicBinopHeap(const LAllocation& ptr, const LAllocation& value,
-                          const LDefinition& temp)
+                          const LDefinition& temp,
+                          const LDefinition& flagTemp = LDefinition::BogusTemp())
     {
         setOperand(0, ptr);
         setOperand(1, value);
         setTemp(0, temp);
         setTemp(1, LDefinition::BogusTemp());
+        setTemp(2, flagTemp);
     }
     const LAllocation* ptr() {
         return getOperand(0);
     }
     const LAllocation* value() {
         MOZ_ASSERT(valueOp == 1);
         return getOperand(1);
     }
     const LDefinition* temp() {
         return getTemp(0);
     }
+
+    // Temp that may be used on some platforms to hold a computed address.
     const LDefinition* addrTemp() {
         return getTemp(1);
     }
-
     void setAddrTemp(const LDefinition& addrTemp) {
         setTemp(1, addrTemp);
     }
 
+    // Temp that may be used on LL/SC platforms for the flag result of the store.
+    const LDefinition* flagTemp() {
+        return getTemp(2);
+    }
+
     MAsmJSAtomicBinopHeap* mir() const {
         return mir_->toAsmJSAtomicBinopHeap();
     }
 };
 
 // Atomic binary operation where the result is discarded.
-class LAsmJSAtomicBinopHeapForEffect : public LInstructionHelper<0, 2, 1>
+class LAsmJSAtomicBinopHeapForEffect : public LInstructionHelper<0, 2, 2>
 {
   public:
     LIR_HEADER(AsmJSAtomicBinopHeapForEffect);
-    LAsmJSAtomicBinopHeapForEffect(const LAllocation& ptr, const LAllocation& value)
+    LAsmJSAtomicBinopHeapForEffect(const LAllocation& ptr, const LAllocation& value,
+                                   const LDefinition& flagTemp = LDefinition::BogusTemp())
     {
         setOperand(0, ptr);
         setOperand(1, value);
         setTemp(0, LDefinition::BogusTemp());
+        setTemp(1, flagTemp);
     }
     const LAllocation* ptr() {
         return getOperand(0);
     }
     const LAllocation* value() {
         return getOperand(1);
     }
-    const LDefinition* temp() {
-        return getTemp(0);
-    }
+
+    // Temp that may be used on some platforms to hold a computed address.
     const LDefinition* addrTemp() {
         return getTemp(0);
     }
-
     void setAddrTemp(const LDefinition& addrTemp) {
         setTemp(0, addrTemp);
     }
 
+    // Temp that may be used on LL/SC platforms for the flag result of the store.
+    const LDefinition* flagTemp() {
+        return getTemp(1);
+    }
+
     MAsmJSAtomicBinopHeap* mir() const {
         return mir_->toAsmJSAtomicBinopHeap();
     }
 };
 
 class LAsmJSLoadGlobalVar : public LInstructionHelper<1, 0, 0>
 {
   public: