author | Benjamin Bouvier <benj@benj.me> |
Mon, 02 Mar 2015 12:11:19 +0100 | |
changeset 231853 | ddee53b10d77a87b6b553d1fe0a3de71b9448eb2 |
parent 231852 | 085b7d36e31d30904edf5742f420b8ab9eeba171 |
child 231854 | 3f54fe544025d1a49bbf51ca1f40761a2cbe3e5f |
push id | 28362 |
push user | ryanvm@gmail.com |
push date | Wed, 04 Mar 2015 21:35:51 +0000 |
treeherder | mozilla-central@56492f7244a9 [default view] [failures only] |
perfherder | [talos] [build metrics] [platform microbench] (compared to previous push) |
reviewers | bhackett |
bugs | 1135042 |
milestone | 39.0a1 |
first release with | nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
|
last release without | nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
|
--- a/js/src/builtin/SIMD.h +++ b/js/src/builtin/SIMD.h @@ -237,26 +237,26 @@ BITWISE_COMMONX4_SIMD_OP(_) \ WITH_COMMONX4_SIMD_OP(_) \ _(bitselect) \ _(select) \ _(splat) \ _(not) \ _(neg) \ _(swizzle) \ + _(load) \ + _(store) \ _(check) #define FOREACH_COMMONX4_SIMD_OP(_) \ ION_COMMONX4_SIMD_OP(_) \ COMP_COMMONX4_TO_INT32X4_SIMD_OP(_) \ _(shuffle) \ - _(load) \ _(loadX) \ _(loadXY) \ _(loadXYZ) \ - _(store) \ _(storeX) \ _(storeXY) \ _(storeXYZ) #define FORALL_SIMD_OP(_) \ FOREACH_INT32X4_SIMD_OP(_) \ FOREACH_FLOAT32X4_SIMD_OP(_) \ FOREACH_COMMONX4_SIMD_OP(_)
new file mode 100644 --- /dev/null +++ b/js/src/jit-test/tests/SIMD/load.js @@ -0,0 +1,48 @@ +load(libdir + 'simd.js'); + +setJitCompilerOption("ion.warmup.trigger", 40); + +function f() { + var f32 = new Float32Array(16); + for (var i = 0; i < 16; i++) + f32[i] = i + 1; + + var f64 = new Float64Array(f32.buffer); + var i32 = new Int32Array(f32.buffer); + var u32 = new Uint32Array(f32.buffer); + var i16 = new Int16Array(f32.buffer); + var u16 = new Uint16Array(f32.buffer); + var i8 = new Int8Array(f32.buffer); + var u8 = new Uint8Array(f32.buffer); + + var r; + for (var i = 0; i < 150; i++) { + assertEqX4(SIMD.float32x4.load(f64, 0), [1,2,3,4]); + assertEqX4(SIMD.float32x4.load(f32, 1), [2,3,4,5]); + assertEqX4(SIMD.float32x4.load(i32, 2), [3,4,5,6]); + assertEqX4(SIMD.float32x4.load(i16, 3 << 1), [4,5,6,7]); + assertEqX4(SIMD.float32x4.load(u16, 4 << 1), [5,6,7,8]); + assertEqX4(SIMD.float32x4.load(i8 , 5 << 2), [6,7,8,9]); + assertEqX4(SIMD.float32x4.load(u8 , 6 << 2), [7,8,9,10]); + + assertEqX4(SIMD.float32x4.load(f64, (16 >> 1) - (4 >> 1)), [13,14,15,16]); + assertEqX4(SIMD.float32x4.load(f32, 16 - 4), [13,14,15,16]); + assertEqX4(SIMD.float32x4.load(i32, 16 - 4), [13,14,15,16]); + assertEqX4(SIMD.float32x4.load(i16, (16 << 1) - (4 << 1)), [13,14,15,16]); + assertEqX4(SIMD.float32x4.load(u16, (16 << 1) - (4 << 1)), [13,14,15,16]); + assertEqX4(SIMD.float32x4.load(i8, (16 << 2) - (4 << 2)), [13,14,15,16]); + assertEqX4(SIMD.float32x4.load(u8, (16 << 2) - (4 << 2)), [13,14,15,16]); + + var caught = false; + try { + SIMD.float32x4.load(i8, (i < 149) ? 0 : (16 << 2) - (4 << 2) + 1); + } catch (e) { + caught = true; + } + assertEq(i < 149 || caught, true); + } + return r +} + +f(); +
--- a/js/src/jit/CodeGenerator.cpp +++ b/js/src/jit/CodeGenerator.cpp @@ -8649,27 +8649,28 @@ CodeGenerator::visitUnboxObjectOrNull(LU void CodeGenerator::visitLoadTypedArrayElement(LLoadTypedArrayElement *lir) { Register elements = ToRegister(lir->elements()); Register temp = lir->temp()->isBogusTemp() ? InvalidReg : ToRegister(lir->temp()); AnyRegister out = ToAnyRegister(lir->output()); Scalar::Type arrayType = lir->mir()->arrayType(); + Scalar::Type readType = lir->mir()->readType(); int width = Scalar::byteSize(arrayType); Label fail; if (lir->index()->isConstant()) { Address source(elements, ToInt32(lir->index()) * width + lir->mir()->offsetAdjustment()); - masm.loadFromTypedArray(arrayType, source, out, temp, &fail, + masm.loadFromTypedArray(readType, source, out, temp, &fail, lir->mir()->canonicalizeDoubles()); } else { BaseIndex source(elements, ToRegister(lir->index()), ScaleFromElemWidth(width), lir->mir()->offsetAdjustment()); - masm.loadFromTypedArray(arrayType, source, out, temp, &fail, + masm.loadFromTypedArray(readType, source, out, temp, &fail, lir->mir()->canonicalizeDoubles()); } if (fail.used()) bailoutFrom(&fail, lir->snapshot()); } void
--- a/js/src/jit/IonBuilder.h +++ b/js/src/jit/IonBuilder.h @@ -833,16 +833,17 @@ class IonBuilder SimdTypeDescr::Type type); InliningStatus inlineSimdSplat(CallInfo &callInfo, JSNative native, SimdTypeDescr::Type type); InliningStatus inlineSimdSwizzle(CallInfo &callInfo, JSNative native, SimdTypeDescr::Type type); InliningStatus inlineSimdCheck(CallInfo &callInfo, JSNative native, SimdTypeDescr::Type type); InliningStatus inlineSimdConvert(CallInfo &callInfo, JSNative native, bool isCast, SimdTypeDescr::Type from, SimdTypeDescr::Type to); InliningStatus inlineSimdSelect(CallInfo &callInfo, JSNative native, bool isElementWise, SimdTypeDescr::Type type); + InliningStatus inlineSimdLoad(CallInfo &callInfo, JSNative native, SimdTypeDescr::Type type); // Utility intrinsics. InliningStatus inlineIsCallable(CallInfo &callInfo); InliningStatus inlineIsObject(CallInfo &callInfo); InliningStatus inlineToObject(CallInfo &callInfo); InliningStatus inlineToInteger(CallInfo &callInfo); InliningStatus inlineToString(CallInfo &callInfo); InliningStatus inlineDump(CallInfo &callInfo);
--- a/js/src/jit/Lowering.cpp +++ b/js/src/jit/Lowering.cpp @@ -2883,17 +2883,18 @@ void LIRGenerator::visitLoadTypedArrayElement(MLoadTypedArrayElement *ins) { MOZ_ASSERT(IsValidElementsType(ins->elements(), ins->offsetAdjustment())); MOZ_ASSERT(ins->index()->type() == MIRType_Int32); const LUse elements = useRegister(ins->elements()); const LAllocation index = useRegisterOrConstant(ins->index()); - MOZ_ASSERT(IsNumberType(ins->type()) || ins->type() == MIRType_Boolean); + MOZ_ASSERT(IsNumberType(ins->type()) || IsSimdType(ins->type()) || + ins->type() == MIRType_Boolean); // We need a temp register for Uint32Array with known double result. LDefinition tempDef = LDefinition::BogusTemp(); if (ins->arrayType() == Scalar::Uint32 && IsFloatingPointType(ins->type())) tempDef = temp(); if (ins->requiresMemoryBarrier()) { LMemoryBarrier *fence = new(alloc()) LMemoryBarrier(MembarBeforeLoad);
--- a/js/src/jit/MCallOptimize.cpp +++ b/js/src/jit/MCallOptimize.cpp @@ -349,16 +349,21 @@ IonBuilder::inlineNativeCall(CallInfo &c if (native == js::simd_float32x4_select) return inlineSimdSelect(callInfo, native, IsElementWise(true), SimdTypeDescr::TYPE_FLOAT32); if (native == js::simd_float32x4_bitselect) return inlineSimdSelect(callInfo, native, IsElementWise(false), SimdTypeDescr::TYPE_FLOAT32); if (native == js::simd_int32x4_swizzle) return inlineSimdSwizzle(callInfo, native, SimdTypeDescr::TYPE_INT32); + if (native == js::simd_int32x4_load) + return inlineSimdLoad(callInfo, native, SimdTypeDescr::TYPE_INT32); + if (native == js::simd_float32x4_load) + return inlineSimdLoad(callInfo, native, SimdTypeDescr::TYPE_FLOAT32); + return InliningStatus_NotInlined; } IonBuilder::InliningStatus IonBuilder::inlineNativeGetter(CallInfo &callInfo, JSFunction *target) { MOZ_ASSERT(target->isNative()); JSNative native = target->native(); @@ -3113,10 +3118,70 @@ IonBuilder::inlineSimdSwizzle(CallInfo & for (size_t i = 0; i < 4; i++) lanes[i] = callInfo.getArg(1 + i); MIRType mirType = SimdTypeDescrToMIRType(type); MSimdGeneralSwizzle *ins = MSimdGeneralSwizzle::New(alloc(), callInfo.getArg(0), lanes, mirType); return boxSimd(callInfo, ins, templateObj); } +static Scalar::Type +SimdTypeToScalarType(SimdTypeDescr::Type type) +{ + switch (type) { + case SimdTypeDescr::TYPE_FLOAT32: return Scalar::Float32x4; + case SimdTypeDescr::TYPE_INT32: return Scalar::Int32x4; + case SimdTypeDescr::TYPE_FLOAT64: break; + } + MOZ_CRASH("unexpected simd type"); +} + +IonBuilder::InliningStatus +IonBuilder::inlineSimdLoad(CallInfo &callInfo, JSNative native, SimdTypeDescr::Type type) +{ + InlineTypedObject *templateObj = nullptr; + if (!checkInlineSimd(callInfo, native, type, 2, &templateObj)) + return InliningStatus_NotInlined; + + MDefinition *array = callInfo.getArg(0); + MDefinition *index = callInfo.getArg(1); + + Scalar::Type arrayType; + if (!ElementAccessIsAnyTypedArray(constraints(), array, index, &arrayType)) + return InliningStatus_NotInlined; + + MInstruction *indexAsInt32 = MToInt32::New(alloc(), index); + current->add(indexAsInt32); + index = indexAsInt32; + + MDefinition *indexForBoundsCheck = index; + + // Artificially make sure the index is in bounds by adding the difference + // number of slots needed (e.g. reading from Float32Array we need to make + // sure to be in bounds for 4 slots, so add 3, etc.). + MOZ_ASSERT(Simd128DataSize % Scalar::byteSize(arrayType) == 0); + int32_t suppSlotsNeeded = Simd128DataSize / Scalar::byteSize(arrayType) - 1; + if (suppSlotsNeeded) { + MConstant *suppSlots = constant(Int32Value(suppSlotsNeeded)); + MAdd *addedIndex = MAdd::New(alloc(), index, suppSlots); + // Even if this addition overflows, we're fine because the code generated + // for the bounds check uses uint32 arithmetic + addedIndex->setInt32(); + current->add(addedIndex); + indexForBoundsCheck = addedIndex; + } + + MInstruction *length; + MInstruction *elements; + addTypedArrayLengthAndData(array, SkipBoundsCheck, &index, &length, &elements); + + MInstruction *check = MBoundsCheck::New(alloc(), indexForBoundsCheck, length); + current->add(check); + + MLoadTypedArrayElement *load = MLoadTypedArrayElement::New(alloc(), elements, index, arrayType); + load->setResultType(SimdTypeDescrToMIRType(type)); + load->setReadType(SimdTypeToScalarType(type)); + + return boxSimd(callInfo, load, templateObj); +} + } // namespace jit } // namespace js
--- a/js/src/jit/MIR.h +++ b/js/src/jit/MIR.h @@ -8828,25 +8828,27 @@ enum MemoryBarrierRequirement // Also see comments above MMemoryBarrier, below. class MLoadTypedArrayElement : public MBinaryInstruction, public SingleObjectPolicy::Data { Scalar::Type arrayType_; + Scalar::Type readType_; bool requiresBarrier_; int32_t offsetAdjustment_; bool canonicalizeDoubles_; MLoadTypedArrayElement(MDefinition *elements, MDefinition *index, Scalar::Type arrayType, MemoryBarrierRequirement requiresBarrier, int32_t offsetAdjustment, bool canonicalizeDoubles) : MBinaryInstruction(elements, index), arrayType_(arrayType), + readType_(arrayType), requiresBarrier_(requiresBarrier == DoesRequireMemoryBarrier), offsetAdjustment_(offsetAdjustment), canonicalizeDoubles_(canonicalizeDoubles) { setResultType(MIRType_Value); if (requiresBarrier_) setGuard(); // Not removable or movable else @@ -8865,16 +8867,23 @@ class MLoadTypedArrayElement int32_t offsetAdjustment = 0, bool canonicalizeDoubles = true) { return new(alloc) MLoadTypedArrayElement(elements, index, arrayType, requiresBarrier, offsetAdjustment, canonicalizeDoubles); } + void setReadType(Scalar::Type type) { + readType_ = type; + } + Scalar::Type readType() const { + return readType_; + } + Scalar::Type arrayType() const { return arrayType_; } bool fallible() const { // Bailout if the result does not fit in an int32. return arrayType_ == Scalar::Uint32 && type() == MIRType_Int32; } bool requiresMemoryBarrier() const { @@ -8903,16 +8912,18 @@ class MLoadTypedArrayElement bool congruentTo(const MDefinition *ins) const MOZ_OVERRIDE { if (requiresBarrier_) return false; if (!ins->isLoadTypedArrayElement()) return false; const MLoadTypedArrayElement *other = ins->toLoadTypedArrayElement(); if (arrayType_ != other->arrayType_) return false; + if (readType_ != other->readType_) + return false; if (offsetAdjustment() != other->offsetAdjustment()) return false; if (canonicalizeDoubles() != other->canonicalizeDoubles()) return false; return congruentIfOperandsEqual(other); } void printOpcode(FILE *fp) const MOZ_OVERRIDE;
--- a/js/src/jit/MacroAssembler.cpp +++ b/js/src/jit/MacroAssembler.cpp @@ -351,16 +351,22 @@ MacroAssembler::loadFromTypedArray(Scala loadFloat32(src, dest.fpu()); canonicalizeFloat(dest.fpu()); break; case Scalar::Float64: loadDouble(src, dest.fpu()); if (canonicalizeDoubles) canonicalizeDouble(dest.fpu()); break; + case Scalar::Int32x4: + loadUnalignedInt32x4(src, dest.fpu()); + break; + case Scalar::Float32x4: + loadUnalignedFloat32x4(src, dest.fpu()); + break; default: MOZ_CRASH("Invalid typed array type"); } } template void MacroAssembler::loadFromTypedArray(Scalar::Type arrayType, const Address &src, AnyRegister dest, Register temp, Label *fail, bool canonicalizeDoubles); template void MacroAssembler::loadFromTypedArray(Scalar::Type arrayType, const BaseIndex &src, AnyRegister dest,
--- a/js/src/jit/RangeAnalysis.cpp +++ b/js/src/jit/RangeAnalysis.cpp @@ -1643,17 +1643,18 @@ MToInt32::computeRange(TempAllocator &al void MLimitedTruncate::computeRange(TempAllocator &alloc) { Range *output = new(alloc) Range(input()); setRange(output); } -static Range *GetTypedArrayRange(TempAllocator &alloc, int type) +static Range * +GetTypedArrayRange(TempAllocator &alloc, Scalar::Type type) { switch (type) { case Scalar::Uint8Clamped: case Scalar::Uint8: return Range::NewUInt32Range(alloc, 0, UINT8_MAX); case Scalar::Uint16: return Range::NewUInt32Range(alloc, 0, UINT16_MAX); case Scalar::Uint32: @@ -1663,28 +1664,30 @@ static Range *GetTypedArrayRange(TempAll return Range::NewInt32Range(alloc, INT8_MIN, INT8_MAX); case Scalar::Int16: return Range::NewInt32Range(alloc, INT16_MIN, INT16_MAX); case Scalar::Int32: return Range::NewInt32Range(alloc, INT32_MIN, INT32_MAX); case Scalar::Float32: case Scalar::Float64: + case Scalar::Float32x4: + case Scalar::Int32x4: + case Scalar::MaxTypedArrayViewType: break; } - - return nullptr; + return nullptr; } void MLoadTypedArrayElement::computeRange(TempAllocator &alloc) { // We have an Int32 type and if this is a UInt32 load it may produce a value // outside of our range, but we have a bailout to handle those cases. - setRange(GetTypedArrayRange(alloc, arrayType())); + setRange(GetTypedArrayRange(alloc, readType())); } void MLoadTypedArrayElementStatic::computeRange(TempAllocator &alloc) { // We don't currently use MLoadTypedArrayElementStatic for uint32, so we // don't have to worry about it returning a value outside our type. MOZ_ASSERT(AnyTypedArrayType(someTypedArray_) != Scalar::Uint32);
--- a/js/src/jit/arm/MacroAssembler-arm.h +++ b/js/src/jit/arm/MacroAssembler-arm.h @@ -1393,21 +1393,23 @@ class MacroAssemblerARMCompat : public M void loadPtr(AbsoluteAddress address, Register dest); void loadPtr(AsmJSAbsoluteAddress address, Register dest); void loadPrivate(const Address &address, Register dest); void loadAlignedInt32x4(const Address &addr, FloatRegister dest) { MOZ_CRASH("NYI"); } void storeAlignedInt32x4(FloatRegister src, Address addr) { MOZ_CRASH("NYI"); } void loadUnalignedInt32x4(const Address &addr, FloatRegister dest) { MOZ_CRASH("NYI"); } + void loadUnalignedInt32x4(const BaseIndex &addr, FloatRegister dest) { MOZ_CRASH("NYI"); } void storeUnalignedInt32x4(FloatRegister src, Address addr) { MOZ_CRASH("NYI"); } void loadAlignedFloat32x4(const Address &addr, FloatRegister dest) { MOZ_CRASH("NYI"); } void storeAlignedFloat32x4(FloatRegister src, Address addr) { MOZ_CRASH("NYI"); } void loadUnalignedFloat32x4(const Address &addr, FloatRegister dest) { MOZ_CRASH("NYI"); } + void loadUnalignedFloat32x4(const BaseIndex &addr, FloatRegister dest) { MOZ_CRASH("NYI"); } void storeUnalignedFloat32x4(FloatRegister src, Address addr) { MOZ_CRASH("NYI"); } void loadDouble(const Address &addr, FloatRegister dest); void loadDouble(const BaseIndex &src, FloatRegister dest); // Load a float value into a register, then expand it to a double. void loadFloatAsDouble(const Address &addr, FloatRegister dest); void loadFloatAsDouble(const BaseIndex &src, FloatRegister dest);
--- a/js/src/jit/mips/MacroAssembler-mips.h +++ b/js/src/jit/mips/MacroAssembler-mips.h @@ -1254,21 +1254,23 @@ public: void loadPtr(AbsoluteAddress address, Register dest); void loadPtr(AsmJSAbsoluteAddress address, Register dest); void loadPrivate(const Address &address, Register dest); void loadAlignedInt32x4(const Address &addr, FloatRegister dest) { MOZ_CRASH("NYI"); } void storeAlignedInt32x4(FloatRegister src, Address addr) { MOZ_CRASH("NYI"); } void loadUnalignedInt32x4(const Address &addr, FloatRegister dest) { MOZ_CRASH("NYI"); } + void loadUnalignedInt32x4(const BaseIndex &addr, FloatRegister dest) { MOZ_CRASH("NYI"); } void storeUnalignedInt32x4(FloatRegister src, Address addr) { MOZ_CRASH("NYI"); } void loadAlignedFloat32x4(const Address &addr, FloatRegister dest) { MOZ_CRASH("NYI"); } void storeAlignedFloat32x4(FloatRegister src, Address addr) { MOZ_CRASH("NYI"); } void loadUnalignedFloat32x4(const Address &addr, FloatRegister dest) { MOZ_CRASH("NYI"); } + void loadUnalignedFloat32x4(const BaseIndex &addr, FloatRegister dest) { MOZ_CRASH("NYI"); } void storeUnalignedFloat32x4(FloatRegister src, Address addr) { MOZ_CRASH("NYI"); } void loadDouble(const Address &addr, FloatRegister dest); void loadDouble(const BaseIndex &src, FloatRegister dest); // Load a float value into a register, then expand it to a double. void loadFloatAsDouble(const Address &addr, FloatRegister dest); void loadFloatAsDouble(const BaseIndex &src, FloatRegister dest);
--- a/js/src/jit/shared/MacroAssembler-x86-shared.h +++ b/js/src/jit/shared/MacroAssembler-x86-shared.h @@ -925,16 +925,19 @@ class MacroAssemblerX86Shared : public A if (HasAVX() && src.kind() == Operand::FPREG) return FloatRegister::FromCode(src.fpu()); loadAlignedInt32x4(src, dest); return dest; } void loadUnalignedInt32x4(const Address &src, FloatRegister dest) { vmovdqu(Operand(src), dest); } + void loadUnalignedInt32x4(const BaseIndex &src, FloatRegister dest) { + vmovdqu(Operand(src), dest); + } void loadUnalignedInt32x4(const Operand &src, FloatRegister dest) { vmovdqu(src, dest); } void storeUnalignedInt32x4(FloatRegister src, const Address &dest) { vmovdqu(src, Operand(dest)); } void storeUnalignedInt32x4(FloatRegister src, const Operand &dest) { vmovdqu(src, dest); @@ -1006,16 +1009,19 @@ class MacroAssemblerX86Shared : public A if (HasAVX() && src.kind() == Operand::FPREG) return FloatRegister::FromCode(src.fpu()); loadAlignedFloat32x4(src, dest); return dest; } void loadUnalignedFloat32x4(const Address &src, FloatRegister dest) { vmovups(Operand(src), dest); } + void loadUnalignedFloat32x4(const BaseIndex &src, FloatRegister dest) { + vmovdqu(Operand(src), dest); + } void loadUnalignedFloat32x4(const Operand &src, FloatRegister dest) { vmovups(src, dest); } void storeUnalignedFloat32x4(FloatRegister src, const Address &dest) { vmovups(src, Operand(dest)); } void storeUnalignedFloat32x4(FloatRegister src, const Operand &dest) { vmovups(src, dest);