author | Lars T Hansen <lhansen@mozilla.com> |
Thu, 23 Oct 2014 14:23:27 +0200 | |
changeset 211920 | 983259897284c61f208733ac520ac3f9ba646f09 |
parent 211919 | ab936277cf4ba207714f13d14ddbbfa9996c86fd |
child 211921 | 6b733d690a38570b1af99d4996da87ace77de4e4 |
push id | 27693 |
push user | ryanvm@gmail.com |
push date | Thu, 23 Oct 2014 18:06:22 +0000 |
treeherder | mozilla-central@d8de0d7e52e0 [default view] [failures only] |
perfherder | [talos] [build metrics] [platform microbench] (compared to previous push) |
reviewers | sstangl, dtc-moz |
bugs | 979594 |
milestone | 36.0a1 |
first release with | nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
|
last release without | nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
|
--- a/js/src/jit/arm/CodeGenerator-arm.cpp +++ b/js/src/jit/arm/CodeGenerator-arm.cpp @@ -2242,8 +2242,29 @@ CodeGeneratorARM::visitForkJoinGetSlice( MOZ_CRASH("NYI"); } JitCode * JitRuntime::generateForkJoinGetSliceStub(JSContext *cx) { MOZ_CRASH("NYI"); } + +void +CodeGeneratorARM::memoryBarrier(int barrier) +{ + // On ARMv6 the optional argument (BarrierST, etc) is ignored. + if (barrier == (MembarStoreStore|MembarSynchronizing)) + masm.ma_dsb(masm.BarrierST); + else if (barrier & MembarSynchronizing) + masm.ma_dsb(); + else if (barrier == MembarStoreStore) + masm.ma_dmb(masm.BarrierST); + else if (barrier) + masm.ma_dmb(); +} + +bool +CodeGeneratorARM::visitMemoryBarrier(LMemoryBarrier *ins) +{ + memoryBarrier(ins->type()); + return true; +}
--- a/js/src/jit/arm/CodeGenerator-arm.h +++ b/js/src/jit/arm/CodeGenerator-arm.h @@ -170,16 +170,18 @@ class CodeGeneratorARM : public CodeGene // Functions for LTestVAndBranch. Register splitTagForTest(const ValueOperand &value); bool divICommon(MDiv *mir, Register lhs, Register rhs, Register output, LSnapshot *snapshot, Label &done); bool modICommon(MMod *mir, Register lhs, Register rhs, Register output, LSnapshot *snapshot, Label &done); + void memoryBarrier(int barrier); + public: CodeGeneratorARM(MIRGenerator *gen, LIRGraph *graph, MacroAssembler *masm); public: bool visitBox(LBox *box); bool visitBoxFloatingPoint(LBoxFloatingPoint *box); bool visitUnbox(LUnbox *unbox); bool visitValue(LValue *value); @@ -201,16 +203,18 @@ class CodeGeneratorARM : public CodeGene bool visitAsmJSLoadGlobalVar(LAsmJSLoadGlobalVar *ins); bool visitAsmJSStoreGlobalVar(LAsmJSStoreGlobalVar *ins); bool visitAsmJSLoadFuncPtr(LAsmJSLoadFuncPtr *ins); bool visitAsmJSLoadFFIFunc(LAsmJSLoadFFIFunc *ins); bool visitAsmJSPassStackArg(LAsmJSPassStackArg *ins); bool visitForkJoinGetSlice(LForkJoinGetSlice *ins); + bool visitMemoryBarrier(LMemoryBarrier *ins); + bool generateInvalidateEpilogue(); protected: bool visitEffectiveAddress(LEffectiveAddress *ins); bool visitUDiv(LUDiv *ins); bool visitUMod(LUMod *ins); bool visitSoftUDivOrMod(LSoftUDivOrMod *ins);
--- a/js/src/jit/arm/Lowering-arm.cpp +++ b/js/src/jit/arm/Lowering-arm.cpp @@ -565,9 +565,74 @@ LIRGeneratorARM::visitSimdSplatX4(MSimdS } bool LIRGeneratorARM::visitSimdValueX4(MSimdValueX4 *ins) { MOZ_CRASH("NYI"); } -//__aeabi_uidiv +bool +LIRGeneratorARM::visitAtomicTypedArrayElementBinop(MAtomicTypedArrayElementBinop *ins) +{ + MOZ_ASSERT(ins->arrayType() != Scalar::Uint8Clamped); + MOZ_ASSERT(ins->arrayType() != Scalar::Float32); + MOZ_ASSERT(ins->arrayType() != Scalar::Float64); + + MOZ_ASSERT(ins->elements()->type() == MIRType_Elements); + MOZ_ASSERT(ins->index()->type() == MIRType_Int32); + + const LUse elements = useRegister(ins->elements()); + const LAllocation index = useRegisterOrConstant(ins->index()); + + // For most operations we don't need any temps because there are + // enough scratch registers. tempDef2 is never needed on ARM. + // + // For a Uint32Array with a known double result we need a temp for + // the intermediate output, this is tempDef1. + // + // Optimization opportunity (bug 1077317): We can do better by + // allowing 'value' to remain as an imm32 if it is small enough to + // fit in an instruction. + + LDefinition tempDef1 = LDefinition::BogusTemp(); + LDefinition tempDef2 = LDefinition::BogusTemp(); + + const LAllocation value = useRegister(ins->value()); + if (ins->arrayType() == Scalar::Uint32 && IsFloatingPointType(ins->type())) + tempDef1 = temp(); + + LAtomicTypedArrayElementBinop *lir = + new(alloc()) LAtomicTypedArrayElementBinop(elements, index, value, tempDef1, tempDef2); + + return define(lir, ins); +} + +bool +LIRGeneratorARM::visitCompareExchangeTypedArrayElement(MCompareExchangeTypedArrayElement *ins) +{ + MOZ_ASSERT(ins->arrayType() != Scalar::Float32); + MOZ_ASSERT(ins->arrayType() != Scalar::Float64); + + MOZ_ASSERT(ins->elements()->type() == MIRType_Elements); + MOZ_ASSERT(ins->index()->type() == MIRType_Int32); + + const LUse elements = useRegister(ins->elements()); + const LAllocation index = useRegisterOrConstant(ins->index()); + + // If the target is a floating register then we need a temp at the + // CodeGenerator level for creating the result. + // + // Optimization opportunity (bug 1077317): We could do better by + // allowing oldval to remain an immediate, if it is small enough + // to fit in an instruction. + + const LAllocation newval = useRegister(ins->newval()); + const LAllocation oldval = useRegister(ins->oldval()); + LDefinition tempDef = LDefinition::BogusTemp(); + if (ins->arrayType() == Scalar::Uint32 && IsFloatingPointType(ins->type())) + tempDef = temp(); + + LCompareExchangeTypedArrayElement *lir = + new(alloc()) LCompareExchangeTypedArrayElement(elements, index, oldval, newval, tempDef); + + return define(lir, ins); +}
--- a/js/src/jit/arm/Lowering-arm.h +++ b/js/src/jit/arm/Lowering-arm.h @@ -101,16 +101,18 @@ class LIRGeneratorARM : public LIRGenera bool visitAsmJSLoadHeap(MAsmJSLoadHeap *ins); bool visitAsmJSStoreHeap(MAsmJSStoreHeap *ins); bool visitAsmJSLoadFuncPtr(MAsmJSLoadFuncPtr *ins); bool visitStoreTypedArrayElementStatic(MStoreTypedArrayElementStatic *ins); bool visitForkJoinGetSlice(MForkJoinGetSlice *ins); bool visitSimdTernaryBitwise(MSimdTernaryBitwise *ins); bool visitSimdSplatX4(MSimdSplatX4 *ins); bool visitSimdValueX4(MSimdValueX4 *ins); + bool visitCompareExchangeTypedArrayElement(MCompareExchangeTypedArrayElement *ins); + bool visitAtomicTypedArrayElementBinop(MAtomicTypedArrayElementBinop *ins); }; typedef LIRGeneratorARM LIRGeneratorSpecific; } // namespace jit } // namespace js #endif /* jit_arm_Lowering_arm_h */
--- a/js/src/jit/arm/MacroAssembler-arm.cpp +++ b/js/src/jit/arm/MacroAssembler-arm.cpp @@ -4685,9 +4685,285 @@ MacroAssemblerARMCompat::branchValueIsNu Label done; branchTestObject(Assembler::NotEqual, value, cond == Assembler::Equal ? &done : label); branchPtrInNurseryRange(cond, value.payloadReg(), temp, label); bind(&done); } +namespace js { +namespace jit { + +template<> +Register +MacroAssemblerARMCompat::computePointer<BaseIndex>(const BaseIndex &src, Register r) +{ + Register base = src.base; + Register index = src.index; + uint32_t scale = Imm32::ShiftOf(src.scale).value; + int32_t offset = src.offset; + as_add(r, base, lsl(index, scale)); + if (offset != 0) + ma_add(r, Imm32(offset), r); + return r; +} + +template<> +Register +MacroAssemblerARMCompat::computePointer<Address>(const Address &src, Register r) +{ + if (src.offset == 0) + return src.base; + ma_add(src.base, Imm32(src.offset), r); + return r; +} + +} // namespace jit +} // namespace js + +template<typename T> +void +MacroAssemblerARMCompat::compareExchange(int nbytes, bool signExtend, const T &mem, + Register oldval, Register newval, Register output) +{ + // If LDREXB/H and STREXB/H are not available we use the + // word-width operations with read-modify-add. That does not + // abstract well, so fork. + // + // Bug 1077321: We may further optimize for ARMv8 here. + if (nbytes < 4 && !HasLDSTREXBHD()) + compareExchangeARMv6(nbytes, signExtend, mem, oldval, newval, output); + else + compareExchangeARMv7(nbytes, signExtend, mem, oldval, newval, output); +} + +// General algorithm: +// +// ... ptr, <addr> ; compute address of item +// dmb +// L0 ldrex* output, [ptr] +// sxt* output, output, 0 ; sign-extend if applicable +// *xt* tmp, oldval, 0 ; sign-extend or zero-extend if applicable +// cmp output, tmp +// bne L1 ; failed - values are different +// strex* tmp, newval, [ptr] +// cmp tmp, 1 +// beq L0 ; failed - location is dirty, retry +// L1 dmb +// +// Discussion here: http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html. +// However note that that discussion uses 'isb' as the trailing fence. +// I've not quite figured out why, and I've gone with dmb here which +// is safe. Also see the LLVM source, which uses 'dmb ish' generally. +// (Apple's Swift CPU apparently handles ish in a non-default, faster +// way.) + +template<typename T> +void +MacroAssemblerARMCompat::compareExchangeARMv7(int nbytes, bool signExtend, const T &mem, + Register oldval, Register newval, Register output) +{ + Label Lagain; + Label Ldone; + ma_dmb(BarrierST); + Register ptr = computePointer(mem, secondScratchReg_); + bind(&Lagain); + switch (nbytes) { + case 1: + as_ldrexb(output, ptr); + if (signExtend) { + as_sxtb(output, output, 0); + as_sxtb(ScratchRegister, oldval, 0); + } else { + as_uxtb(ScratchRegister, oldval, 0); + } + break; + case 2: + as_ldrexh(output, ptr); + if (signExtend) { + as_sxth(output, output, 0); + as_sxth(ScratchRegister, oldval, 0); + } else { + as_uxth(ScratchRegister, oldval, 0); + } + break; + case 4: + MOZ_ASSERT(!signExtend); + as_ldrex(output, ptr); + break; + } + if (nbytes < 4) + as_cmp(output, O2Reg(ScratchRegister)); + else + as_cmp(output, O2Reg(oldval)); + as_b(&Ldone, NotEqual); + switch (nbytes) { + case 1: + as_strexb(ScratchRegister, newval, ptr); + break; + case 2: + as_strexh(ScratchRegister, newval, ptr); + break; + case 4: + as_strex(ScratchRegister, newval, ptr); + break; + } + as_cmp(ScratchRegister, Imm8(1)); + as_b(&Lagain, Equal); + bind(&Ldone); + ma_dmb(); +} + +template<typename T> +void +MacroAssemblerARMCompat::compareExchangeARMv6(int nbytes, bool signExtend, const T &mem, + Register oldval, Register newval, Register output) +{ + // Bug 1077318: Must use read-modify-write with LDREX / STREX. + MOZ_ASSERT(nbytes == 1 || nbytes == 2); + MOZ_CRASH("NYI"); +} + +template void +js::jit::MacroAssemblerARMCompat::compareExchange(int nbytes, bool signExtend, + const Address &address, Register oldval, + Register newval, Register output); +template void +js::jit::MacroAssemblerARMCompat::compareExchange(int nbytes, bool signExtend, + const BaseIndex &address, Register oldval, + Register newval, Register output); + +template<typename T> +void +MacroAssemblerARMCompat::atomicFetchOp(int nbytes, bool signExtend, AtomicOp op, const Imm32 &value, + const T &mem, Register temp, Register output) +{ + // The Imm32 value case is not needed yet because lowering always + // forces the value into a register at present (bug 1077317). But + // the method must be present for the platform-independent code to + // link. + MOZ_CRASH("Feature NYI"); +} + +// General algorithm: +// +// ... ptr, <addr> ; compute address of item +// dmb +// L0 ldrex* output, [ptr] +// sxt* output, output, 0 ; sign-extend if applicable +// OP tmp, output, value ; compute value to store +// strex* tmp, tmp, [ptr] +// cmp tmp, 1 +// beq L0 ; failed - location is dirty, retry +// dmb ; ordering barrier required +// +// Also see notes above at compareExchange re the barrier strategy. +// +// Observe that the value being operated into the memory element need +// not be sign-extended because no OP will make use of bits to the +// left of the bits indicated by the width of the element, and neither +// output nor the bits stored are affected by OP. + +template<typename T> +void +MacroAssemblerARMCompat::atomicFetchOp(int nbytes, bool signExtend, AtomicOp op, + const Register &value, const T &mem, Register temp, + Register output) +{ + // Fork for non-word operations on ARMv6. + // + // Bug 1077321: We may further optimize for ARMv8 here. + if (nbytes < 4 && !HasLDSTREXBHD()) + atomicFetchOpARMv6(nbytes, signExtend, op, value, mem, temp, output); + else { + MOZ_ASSERT(temp == InvalidReg); + atomicFetchOpARMv7(nbytes, signExtend, op, value, mem, output); + } +} + +template<typename T> +void +MacroAssemblerARMCompat::atomicFetchOpARMv7(int nbytes, bool signExtend, AtomicOp op, + const Register &value, const T &mem, Register output) +{ + Label Lagain; + Register ptr = computePointer(mem, secondScratchReg_); + ma_dmb(); + bind(&Lagain); + switch (nbytes) { + case 1: + as_ldrexb(output, ptr); + if (signExtend) + as_sxtb(output, output, 0); + break; + case 2: + as_ldrexh(output, ptr); + if (signExtend) + as_sxth(output, output, 0); + break; + case 4: + MOZ_ASSERT(!signExtend); + as_ldrex(output, ptr); + break; + } + switch (op) { + case AtomicFetchAddOp: + as_add(ScratchRegister, output, O2Reg(value)); + break; + case AtomicFetchSubOp: + as_sub(ScratchRegister, output, O2Reg(value)); + break; + case AtomicFetchAndOp: + as_and(ScratchRegister, output, O2Reg(value)); + break; + case AtomicFetchOrOp: + as_orr(ScratchRegister, output, O2Reg(value)); + break; + case AtomicFetchXorOp: + as_eor(ScratchRegister, output, O2Reg(value)); + break; + } + switch (nbytes) { + case 1: + as_strexb(ScratchRegister, ScratchRegister, ptr); + break; + case 2: + as_strexh(ScratchRegister, ScratchRegister, ptr); + break; + case 4: + as_strex(ScratchRegister, ScratchRegister, ptr); + break; + } + as_cmp(ScratchRegister, Imm8(1)); + as_b(&Lagain, Equal); + ma_dmb(); +} + +template<typename T> +void +MacroAssemblerARMCompat::atomicFetchOpARMv6(int nbytes, bool signExtend, AtomicOp op, + const Register &value, const T &mem, Register temp, + Register output) +{ + // Bug 1077318: Must use read-modify-write with LDREX / STREX. + MOZ_ASSERT(nbytes == 1 || nbytes == 2); + MOZ_CRASH("NYI"); +} + +template void +js::jit::MacroAssemblerARMCompat::atomicFetchOp(int nbytes, bool signExtend, AtomicOp op, + const Imm32 &value, const Address &mem, + Register temp, Register output); +template void +js::jit::MacroAssemblerARMCompat::atomicFetchOp(int nbytes, bool signExtend, AtomicOp op, + const Imm32 &value, const BaseIndex &mem, + Register temp, Register output); +template void +js::jit::MacroAssemblerARMCompat::atomicFetchOp(int nbytes, bool signExtend, AtomicOp op, + const Register &value, const Address &mem, + Register temp, Register output); +template void +js::jit::MacroAssemblerARMCompat::atomicFetchOp(int nbytes, bool signExtend, AtomicOp op, + const Register &value, const BaseIndex &mem, + Register temp, Register output); + #endif
--- a/js/src/jit/arm/MacroAssembler-arm.h +++ b/js/src/jit/arm/MacroAssembler-arm.h @@ -7,16 +7,17 @@ #ifndef jit_arm_MacroAssembler_arm_h #define jit_arm_MacroAssembler_arm_h #include "mozilla/DebugOnly.h" #include "jsopcode.h" #include "jit/arm/Assembler-arm.h" +#include "jit/AtomicOp.h" #include "jit/IonCaches.h" #include "jit/IonFrames.h" #include "jit/MoveResolver.h" using mozilla::DebugOnly; namespace js { namespace jit { @@ -1415,16 +1416,182 @@ class MacroAssemblerARMCompat : public M } void storeFloat32(FloatRegister src, BaseIndex addr) { // Harder cases not handled yet. MOZ_ASSERT(addr.offset == 0); uint32_t scale = Imm32::ShiftOf(addr.scale).value; ma_vstr(VFPRegister(src).singleOverlay(), addr.base, addr.index, scale); } + private: + template<typename T> + Register computePointer(const T &src, Register r); + + template<typename T> + void compareExchangeARMv6(int nbytes, bool signExtend, const T &mem, Register oldval, + Register newval, Register output); + + template<typename T> + void compareExchangeARMv7(int nbytes, bool signExtend, const T &mem, Register oldval, + Register newval, Register output); + + template<typename T> + void compareExchange(int nbytes, bool signExtend, const T &address, Register oldval, + Register newval, Register output); + + template<typename T> + void atomicFetchOpARMv6(int nbytes, bool signExtend, AtomicOp op, const Register &value, + const T &mem, Register temp, Register output); + + template<typename T> + void atomicFetchOpARMv7(int nbytes, bool signExtend, AtomicOp op, const Register &value, + const T &mem, Register output); + + template<typename T> + void atomicFetchOp(int nbytes, bool signExtend, AtomicOp op, const Imm32 &value, + const T &address, Register temp, Register output); + + template<typename T> + void atomicFetchOp(int nbytes, bool signExtend, AtomicOp op, const Register &value, + const T &address, Register temp, Register output); + + public: + // T in {Address,BaseIndex} + // S in {Imm32,Register} + + template<typename T> + void compareExchange8SignExtend(const T &mem, Register oldval, Register newval, Register output) + { + compareExchange(1, true, mem, oldval, newval, output); + } + template<typename T> + void compareExchange8ZeroExtend(const T &mem, Register oldval, Register newval, Register output) + { + compareExchange(1, false, mem, oldval, newval, output); + } + template<typename T> + void compareExchange16SignExtend(const T &mem, Register oldval, Register newval, Register output) + { + compareExchange(2, true, mem, oldval, newval, output); + } + template<typename T> + void compareExchange16ZeroExtend(const T &mem, Register oldval, Register newval, Register output) + { + compareExchange(2, false, mem, oldval, newval, output); + } + template<typename T> + void compareExchange32(const T &mem, Register oldval, Register newval, Register output) { + compareExchange(4, false, mem, oldval, newval, output); + } + + template<typename T, typename S> + void atomicFetchAdd8SignExtend(const S &value, const T &mem, Register temp, Register output) { + atomicFetchOp(1, true, AtomicFetchAddOp, value, mem, temp, output); + } + template<typename T, typename S> + void atomicFetchAdd8ZeroExtend(const S &value, const T &mem, Register temp, Register output) { + atomicFetchOp(1, false, AtomicFetchAddOp, value, mem, temp, output); + } + template<typename T, typename S> + void atomicFetchAdd16SignExtend(const S &value, const T &mem, Register temp, Register output) { + atomicFetchOp(2, true, AtomicFetchAddOp, value, mem, temp, output); + } + template<typename T, typename S> + void atomicFetchAdd16ZeroExtend(const S &value, const T &mem, Register temp, Register output) { + atomicFetchOp(2, false, AtomicFetchAddOp, value, mem, temp, output); + } + template<typename T, typename S> + void atomicFetchAdd32(const S &value, const T &mem, Register temp, Register output) { + atomicFetchOp(4, false, AtomicFetchAddOp, value, mem, temp, output); + } + + template<typename T, typename S> + void atomicFetchSub8SignExtend(const S &value, const T &mem, Register temp, Register output) { + atomicFetchOp(1, true, AtomicFetchSubOp, value, mem, temp, output); + } + template<typename T, typename S> + void atomicFetchSub8ZeroExtend(const S &value, const T &mem, Register temp, Register output) { + atomicFetchOp(1, false, AtomicFetchSubOp, value, mem, temp, output); + } + template<typename T, typename S> + void atomicFetchSub16SignExtend(const S &value, const T &mem, Register temp, Register output) { + atomicFetchOp(2, true, AtomicFetchSubOp, value, mem, temp, output); + } + template<typename T, typename S> + void atomicFetchSub16ZeroExtend(const S &value, const T &mem, Register temp, Register output) { + atomicFetchOp(2, false, AtomicFetchSubOp, value, mem, temp, output); + } + template<typename T, typename S> + void atomicFetchSub32(const S &value, const T &mem, Register temp, Register output) { + atomicFetchOp(4, false, AtomicFetchSubOp, value, mem, temp, output); + } + + template<typename T, typename S> + void atomicFetchAnd8SignExtend(const S &value, const T &mem, Register temp, Register output) { + atomicFetchOp(1, true, AtomicFetchAndOp, value, mem, temp, output); + } + template<typename T, typename S> + void atomicFetchAnd8ZeroExtend(const S &value, const T &mem, Register temp, Register output) { + atomicFetchOp(1, false, AtomicFetchAndOp, value, mem, temp, output); + } + template<typename T, typename S> + void atomicFetchAnd16SignExtend(const S &value, const T &mem, Register temp, Register output) { + atomicFetchOp(2, true, AtomicFetchAndOp, value, mem, temp, output); + } + template<typename T, typename S> + void atomicFetchAnd16ZeroExtend(const S &value, const T &mem, Register temp, Register output) { + atomicFetchOp(2, false, AtomicFetchAndOp, value, mem, temp, output); + } + template<typename T, typename S> + void atomicFetchAnd32(const S &value, const T &mem, Register temp, Register output) { + atomicFetchOp(4, false, AtomicFetchAndOp, value, mem, temp, output); + } + + template<typename T, typename S> + void atomicFetchOr8SignExtend(const S &value, const T &mem, Register temp, Register output) { + atomicFetchOp(1, true, AtomicFetchOrOp, value, mem, temp, output); + } + template<typename T, typename S> + void atomicFetchOr8ZeroExtend(const S &value, const T &mem, Register temp, Register output) { + atomicFetchOp(1, false, AtomicFetchOrOp, value, mem, temp, output); + } + template<typename T, typename S> + void atomicFetchOr16SignExtend(const S &value, const T &mem, Register temp, Register output) { + atomicFetchOp(2, true, AtomicFetchOrOp, value, mem, temp, output); + } + template<typename T, typename S> + void atomicFetchOr16ZeroExtend(const S &value, const T &mem, Register temp, Register output) { + atomicFetchOp(2, false, AtomicFetchOrOp, value, mem, temp, output); + } + template<typename T, typename S> + void atomicFetchOr32(const S &value, const T &mem, Register temp, Register output) { + atomicFetchOp(4, false, AtomicFetchOrOp, value, mem, temp, output); + } + + template<typename T, typename S> + void atomicFetchXor8SignExtend(const S &value, const T &mem, Register temp, Register output) { + atomicFetchOp(1, true, AtomicFetchXorOp, value, mem, temp, output); + } + template<typename T, typename S> + void atomicFetchXor8ZeroExtend(const S &value, const T &mem, Register temp, Register output) { + atomicFetchOp(1, false, AtomicFetchXorOp, value, mem, temp, output); + } + template<typename T, typename S> + void atomicFetchXor16SignExtend(const S &value, const T &mem, Register temp, Register output) { + atomicFetchOp(2, true, AtomicFetchXorOp, value, mem, temp, output); + } + template<typename T, typename S> + void atomicFetchXor16ZeroExtend(const S &value, const T &mem, Register temp, Register output) { + atomicFetchOp(2, false, AtomicFetchXorOp, value, mem, temp, output); + } + template<typename T, typename S> + void atomicFetchXor32(const S &value, const T &mem, Register temp, Register output) { + atomicFetchOp(4, false, AtomicFetchXorOp, value, mem, temp, output); + } + void clampIntToUint8(Register reg) { // Look at (reg >> 8) if it is 0, then reg shouldn't be clamped if it is // <0, then we want to clamp to 0, otherwise, we wish to clamp to 255 as_mov(ScratchRegister, asr(reg, 8), SetCond); ma_mov(Imm32(0xff), reg, NoSetCond, NotEqual); ma_mov(Imm32(0), reg, NoSetCond, Signed); }