author | Benjamin Bouvier <benj@benj.me> |
Wed, 01 Oct 2014 14:57:15 +0200 | |
changeset 208195 | e6e7586b3e02cdd59e978f306b8f1c2d1ef71c4c |
parent 208194 | 89716e3f169df2469a1f82da92ce95163a8ff8bc |
child 208196 | 4baa041973f8860d94d8f04e16b7c839403ea362 |
push id | 27580 |
push user | kwierso@gmail.com |
push date | Wed, 01 Oct 2014 23:26:55 +0000 |
treeherder | autoland@af6c928893c0 [default view] [failures only] |
perfherder | [talos] [build metrics] [platform microbench] (compared to previous push) |
reviewers | sunfish |
bugs | 1073064 |
milestone | 35.0a1 |
first release with | nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
|
last release without | nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
|
--- a/js/src/jit/LIR-Common.h +++ b/js/src/jit/LIR-Common.h @@ -321,16 +321,38 @@ class LSimdBinaryBitwiseX4 : public LIns const LAllocation *rhs() { return getOperand(1); } MSimdBinaryBitwise::Operation operation() const { return mir_->toSimdBinaryBitwise()->operation(); } }; +class LSimdShift : public LInstructionHelper<1, 2, 0> +{ + public: + LIR_HEADER(SimdShift) + LSimdShift(const LAllocation &vec, const LAllocation &val) { + setOperand(0, vec); + setOperand(1, val); + } + const LAllocation *vector() { + return getOperand(0); + } + const LAllocation *value() { + return getOperand(1); + } + MSimdShift::Operation operation() const { + return mir_->toSimdShift()->operation(); + } + MSimdShift *mir() const { + return mir_->toSimdShift(); + } +}; + // SIMD selection of lanes from two int32x4 or float32x4 arguments based on a // int32x4 argument. class LSimdSelect : public LInstructionHelper<1, 3, 0> { public: LIR_HEADER(SimdSelect); const LAllocation *mask() { return getOperand(0);
--- a/js/src/jit/LOpcodes.h +++ b/js/src/jit/LOpcodes.h @@ -24,16 +24,17 @@ _(SimdInsertElementI) \ _(SimdInsertElementF) \ _(SimdSignMaskX4) \ _(SimdBinaryCompIx4) \ _(SimdBinaryCompFx4) \ _(SimdBinaryArithIx4) \ _(SimdBinaryArithFx4) \ _(SimdBinaryBitwiseX4) \ + _(SimdShift) \ _(SimdSelect) \ _(Value) \ _(CloneLiteral) \ _(Parameter) \ _(Callee) \ _(IsConstructing) \ _(TableSwitch) \ _(TableSwitchV) \
--- a/js/src/jit/Lowering.cpp +++ b/js/src/jit/Lowering.cpp @@ -3839,25 +3839,36 @@ LIRGenerator::visitSimdBinaryArith(MSimd } bool LIRGenerator::visitSimdBinaryBitwise(MSimdBinaryBitwise *ins) { MOZ_ASSERT(IsSimdType(ins->type())); if (ins->type() == MIRType_Int32x4 || ins->type() == MIRType_Float32x4) { - LSimdBinaryBitwiseX4 *add = new(alloc()) LSimdBinaryBitwiseX4; - return lowerForFPU(add, ins, ins->lhs(), ins->rhs()); + LSimdBinaryBitwiseX4 *lir = new(alloc()) LSimdBinaryBitwiseX4; + return lowerForFPU(lir, ins, ins->lhs(), ins->rhs()); } MOZ_CRASH("Unknown SIMD kind when doing bitwise operations"); return false; } bool +LIRGenerator::visitSimdShift(MSimdShift *ins) +{ + MOZ_ASSERT(ins->type() == MIRType_Int32x4); + + LUse vector = useRegisterAtStart(ins->lhs()); + LAllocation value = useRegisterOrConstant(ins->rhs()); + LSimdShift *lir = new(alloc()) LSimdShift(vector, value); + return defineReuseInput(lir, ins, 0); +} + +bool LIRGenerator::visitLexicalCheck(MLexicalCheck *ins) { MDefinition *input = ins->input(); MOZ_ASSERT(input->type() == MIRType_Value); LLexicalCheck *lir = new(alloc()) LLexicalCheck(); return redefine(ins, input) && useBox(lir, LLexicalCheck::Input, input) && add(lir, ins) && assignSafepoint(lir, ins); }
--- a/js/src/jit/Lowering.h +++ b/js/src/jit/Lowering.h @@ -269,16 +269,17 @@ class LIRGenerator : public LIRGenerator bool visitGetDOMMember(MGetDOMMember *ins); bool visitRecompileCheck(MRecompileCheck *ins); bool visitSimdExtractElement(MSimdExtractElement *ins); bool visitSimdInsertElement(MSimdInsertElement *ins); bool visitSimdSignMask(MSimdSignMask *ins); bool visitSimdBinaryComp(MSimdBinaryComp *ins); bool visitSimdBinaryArith(MSimdBinaryArith *ins); bool visitSimdBinaryBitwise(MSimdBinaryBitwise *ins); + bool visitSimdShift(MSimdShift *ins); bool visitSimdConstant(MSimdConstant *ins); bool visitSimdConvert(MSimdConvert *ins); bool visitSimdReinterpretCast(MSimdReinterpretCast *ins); bool visitPhi(MPhi *ins); bool visitBeta(MBeta *ins); bool visitObjectState(MObjectState *ins); bool visitArrayState(MArrayState *ins); bool visitUnknownValue(MUnknownValue *ins);
--- a/js/src/jit/MIR.h +++ b/js/src/jit/MIR.h @@ -1716,16 +1716,59 @@ class MSimdBinaryBitwise : public MBinar if (!binaryCongruentTo(ins)) return false; return operation_ == ins->toSimdBinaryBitwise()->operation(); } ALLOW_CLONE(MSimdBinaryBitwise) }; +class MSimdShift : public MBinaryInstruction +{ + public: + enum Operation { + lsh, + rsh, + ursh + }; + + private: + Operation operation_; + + MSimdShift(MDefinition *left, MDefinition *right, Operation op) + : MBinaryInstruction(left, right), operation_(op) + { + MOZ_ASSERT(left->type() == MIRType_Int32x4 && right->type() == MIRType_Int32); + setResultType(MIRType_Int32x4); + setMovable(); + } + + public: + INSTRUCTION_HEADER(SimdShift); + static MSimdShift *NewAsmJS(TempAllocator &alloc, MDefinition *left, + MDefinition *right, Operation op) + { + return new(alloc) MSimdShift(left, right, op); + } + + AliasSet getAliasSet() const { + return AliasSet::None(); + } + + Operation operation() const { return operation_; } + + bool congruentTo(const MDefinition *ins) const { + if (!binaryCongruentTo(ins)) + return false; + return operation_ == ins->toSimdShift()->operation(); + } + + ALLOW_CLONE(MSimdShift) +}; + class MSimdTernaryBitwise : public MTernaryInstruction { public: enum Operation { select }; private:
--- a/js/src/jit/MOpcodes.h +++ b/js/src/jit/MOpcodes.h @@ -18,16 +18,17 @@ namespace jit { _(SimdConvert) \ _(SimdReinterpretCast) \ _(SimdExtractElement) \ _(SimdInsertElement) \ _(SimdSignMask) \ _(SimdBinaryComp) \ _(SimdBinaryArith) \ _(SimdBinaryBitwise) \ + _(SimdShift) \ _(SimdTernaryBitwise) \ _(CloneLiteral) \ _(Parameter) \ _(Callee) \ _(IsConstructing) \ _(TableSwitch) \ _(Goto) \ _(Test) \
--- a/js/src/jit/ParallelSafetyAnalysis.cpp +++ b/js/src/jit/ParallelSafetyAnalysis.cpp @@ -117,16 +117,17 @@ class ParallelSafetyVisitor : public MDe SAFE_OP(SimdConvert) SAFE_OP(SimdReinterpretCast) SAFE_OP(SimdExtractElement) SAFE_OP(SimdInsertElement) SAFE_OP(SimdSignMask) SAFE_OP(SimdBinaryComp) SAFE_OP(SimdBinaryArith) SAFE_OP(SimdBinaryBitwise) + SAFE_OP(SimdShift) SAFE_OP(SimdTernaryBitwise) UNSAFE_OP(CloneLiteral) SAFE_OP(Parameter) SAFE_OP(Callee) SAFE_OP(IsConstructing) SAFE_OP(TableSwitch) SAFE_OP(Goto) SAFE_OP(Test)
--- a/js/src/jit/shared/Assembler-x86-shared.h +++ b/js/src/jit/shared/Assembler-x86-shared.h @@ -1442,16 +1442,40 @@ class AssemblerX86Shared : public Assemb void psllq(Imm32 shift, FloatRegister dest) { JS_ASSERT(HasSSE2()); masm.psllq_ir(shift.value, dest.code()); } void psrlq(Imm32 shift, FloatRegister dest) { JS_ASSERT(HasSSE2()); masm.psrlq_ir(shift.value, dest.code()); } + void pslld(FloatRegister src, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.pslld_rr(src.code(), dest.code()); + } + void pslld(Imm32 count, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.pslld_ir(count.value, dest.code()); + } + void psrad(FloatRegister src, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.psrad_rr(src.code(), dest.code()); + } + void psrad(Imm32 count, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.psrad_ir(count.value, dest.code()); + } + void psrld(FloatRegister src, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.psrld_rr(src.code(), dest.code()); + } + void psrld(Imm32 count, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + masm.psrld_ir(count.value, dest.code()); + } void cvtsi2sd(const Operand &src, FloatRegister dest) { JS_ASSERT(HasSSE2()); switch (src.kind()) { case Operand::REG: masm.cvtsi2sd_rr(src.reg(), dest.code()); break; case Operand::MEM_REG_DISP:
--- a/js/src/jit/shared/BaseAssembler-x86-shared.h +++ b/js/src/jit/shared/BaseAssembler-x86-shared.h @@ -322,16 +322,19 @@ private: OP2_ANDPD_VpdWpd = 0x54, OP2_ORPD_VpdWpd = 0x56, OP2_XORPD_VpdWpd = 0x57, OP2_PCMPGTD_VdqWdq = 0x66, OP2_MOVD_VdEd = 0x6E, OP2_MOVDQ_VsdWsd = 0x6F, OP2_MOVDQ_VdqWdq = 0x6F, OP2_PSHUFD_VdqWdqIb = 0x70, + OP2_PSLLD_UdqIb = 0x72, + OP2_PSRAD_UdqIb = 0x72, + OP2_PSRLD_UdqIb = 0x72, OP2_PSRLDQ_Vd = 0x73, OP2_PCMPEQW = 0x75, OP2_PCMPEQD_VdqWdq = 0x76, OP2_MOVD_EdVd = 0x7E, OP2_MOVDQ_WdqVdq = 0x7F, OP2_JCC_rel32 = 0x80, OP_SETCC = 0x90, OP2_IMUL_GvEv = 0xAF, @@ -340,17 +343,20 @@ private: OP2_MOVSX_GvEb = 0xBE, OP2_MOVSX_GvEw = 0xBF, OP2_MOVZX_GvEb = 0xB6, OP2_MOVZX_GvEw = 0xB7, OP2_XADD_EvGv = 0xC1, OP2_CMPPS_VpsWps = 0xC2, OP2_PEXTRW_GdUdIb = 0xC5, OP2_SHUFPS_VpsWpsIb = 0xC6, + OP2_PSRLD_VdqWdq = 0xD2, + OP2_PSRAD_VdqWdq = 0xE2, OP2_PXORDQ_VdqWdq = 0xEF, + OP2_PSLLD_VdqWdq = 0xF2, OP2_PSUBD_VdqWdq = 0xFA, OP2_PADDD_VdqWdq = 0xFE } TwoByteOpcodeID; typedef enum { OP3_ROUNDSS_VsdWsd = 0x0A, OP3_ROUNDSD_VsdWsd = 0x0B, OP3_PTEST_VdVd = 0x17, @@ -2919,16 +2925,67 @@ public: { spew("psrlq $%d, %s", shift, nameFPReg(dest)); m_formatter.prefix(PRE_SSE_66); m_formatter.twoByteOp(OP2_PSRLDQ_Vd, (RegisterID)2, (RegisterID)dest); m_formatter.immediate8(shift); } + void pslld_rr(XMMRegisterID src, XMMRegisterID dst) + { + spew("pslld %s, %s", + nameFPReg(src), nameFPReg(dst)); + m_formatter.prefix(PRE_SSE_66); + m_formatter.twoByteOp(OP2_PSLLD_VdqWdq, (RegisterID)dst, (RegisterID)src); + } + + void pslld_ir(int32_t count, XMMRegisterID dst) + { + spew("pslld $%d, %s", + count, nameFPReg(dst)); + m_formatter.prefix(PRE_SSE_66); + m_formatter.twoByteOp(OP2_PSLLD_UdqIb, (RegisterID)6, (RegisterID)dst); + m_formatter.immediate8(int8_t(count)); + } + + void psrad_rr(XMMRegisterID src, XMMRegisterID dst) + { + spew("psrad %s, %s", + nameFPReg(src), nameFPReg(dst)); + m_formatter.prefix(PRE_SSE_66); + m_formatter.twoByteOp(OP2_PSRAD_VdqWdq, (RegisterID)dst, (RegisterID)src); + } + + void psrad_ir(int32_t count, XMMRegisterID dst) + { + spew("psrad $%d, %s", + count, nameFPReg(dst)); + m_formatter.prefix(PRE_SSE_66); + m_formatter.twoByteOp(OP2_PSRAD_UdqIb, (RegisterID)4, (RegisterID)dst); + m_formatter.immediate8(int8_t(count)); + } + + void psrld_rr(XMMRegisterID src, XMMRegisterID dst) + { + spew("psrld %s, %s", + nameFPReg(src), nameFPReg(dst)); + m_formatter.prefix(PRE_SSE_66); + m_formatter.twoByteOp(OP2_PSRLD_VdqWdq, (RegisterID)dst, (RegisterID)src); + } + + void psrld_ir(int32_t count, XMMRegisterID dst) + { + spew("psrld $%d, %s", + count, nameFPReg(dst)); + m_formatter.prefix(PRE_SSE_66); + m_formatter.twoByteOp(OP2_PSRLD_UdqIb, (RegisterID)2, (RegisterID)dst); + m_formatter.immediate8(int8_t(count)); + } + void movmskpd_rr(XMMRegisterID src, RegisterID dst) { spew("movmskpd %s, %s", nameFPReg(src), nameIReg(dst)); m_formatter.prefix(PRE_SSE_66); m_formatter.twoByteOp(OP2_MOVMSKPD_EdVd, dst, (RegisterID)src); }
--- a/js/src/jit/shared/CodeGenerator-x86-shared.cpp +++ b/js/src/jit/shared/CodeGenerator-x86-shared.cpp @@ -2526,16 +2526,64 @@ CodeGeneratorX86Shared::visitSimdBinaryB case MSimdBinaryBitwise::xor_: masm.bitwiseXorX4(rhs, lhs); return true; } MOZ_CRASH("unexpected SIMD bitwise op"); } bool +CodeGeneratorX86Shared::visitSimdShift(LSimdShift *ins) +{ + FloatRegister vec = ToFloatRegister(ins->vector()); + FloatRegister out = ToFloatRegister(ins->output()); + MOZ_ASSERT(vec == out); // defineReuseInput(0); + + // TODO: If the shift count is greater than 31, this will just zero all + // lanes by default for lsh and ursh, and set the count to 32 for rsh + // (which will just extend the sign bit to all bits). Plain JS doesn't do + // this: instead it only keeps the five low bits of the mask. Spec isn't + // clear about that topic so this might need to be fixed. See also bug + // 1068028. + const LAllocation *val = ins->value(); + if (val->isConstant()) { + Imm32 count(ToInt32(val)); + switch (ins->operation()) { + case MSimdShift::lsh: + masm.packedLeftShiftByScalar(count, out); + return true; + case MSimdShift::rsh: + masm.packedRightShiftByScalar(count, out); + return true; + case MSimdShift::ursh: + masm.packedUnsignedRightShiftByScalar(count, out); + return true; + } + MOZ_CRASH("unexpected SIMD bitwise op"); + } + + MOZ_ASSERT(val->isRegister()); + FloatRegister tmp = ScratchFloat32Reg; + masm.movd(ToRegister(val), tmp); + + switch (ins->operation()) { + case MSimdShift::lsh: + masm.packedLeftShiftByScalar(tmp, out); + return true; + case MSimdShift::rsh: + masm.packedRightShiftByScalar(tmp, out); + return true; + case MSimdShift::ursh: + masm.packedUnsignedRightShiftByScalar(tmp, out); + return true; + } + MOZ_CRASH("unexpected SIMD bitwise op"); +} + +bool CodeGeneratorX86Shared::visitSimdSelect(LSimdSelect *ins) { FloatRegister mask = ToFloatRegister(ins->mask()); FloatRegister onTrue = ToFloatRegister(ins->lhs()); FloatRegister onFalse = ToFloatRegister(ins->rhs()); MOZ_ASSERT(onTrue == ToFloatRegister(ins->output())); // The onFalse argument is not destroyed but due to limitations of the
--- a/js/src/jit/shared/CodeGenerator-x86-shared.h +++ b/js/src/jit/shared/CodeGenerator-x86-shared.h @@ -219,16 +219,17 @@ class CodeGeneratorX86Shared : public Co bool visitSimdInsertElementI(LSimdInsertElementI *lir); bool visitSimdInsertElementF(LSimdInsertElementF *lir); bool visitSimdSignMaskX4(LSimdSignMaskX4 *ins); bool visitSimdBinaryCompIx4(LSimdBinaryCompIx4 *lir); bool visitSimdBinaryCompFx4(LSimdBinaryCompFx4 *lir); bool visitSimdBinaryArithIx4(LSimdBinaryArithIx4 *lir); bool visitSimdBinaryArithFx4(LSimdBinaryArithFx4 *lir); bool visitSimdBinaryBitwiseX4(LSimdBinaryBitwiseX4 *lir); + bool visitSimdShift(LSimdShift *lir); bool visitSimdSelect(LSimdSelect *ins); // Out of line visitors. bool visitOutOfLineBailout(OutOfLineBailout *ool); bool visitOutOfLineUndoALUOperation(OutOfLineUndoALUOperation *ool); bool visitMulNegativeZeroCheck(MulNegativeZeroCheck *ool); bool visitModOverflowCheck(ModOverflowCheck *ool); bool visitReturnZero(ReturnZero *ool);
--- a/js/src/jit/shared/MacroAssembler-x86-shared.h +++ b/js/src/jit/shared/MacroAssembler-x86-shared.h @@ -520,16 +520,35 @@ class MacroAssemblerX86Shared : public A } void packedAddInt32(const Operand &src, FloatRegister dest) { paddd(src, dest); } void packedSubInt32(const Operand &src, FloatRegister dest) { psubd(src, dest); } + void packedLeftShiftByScalar(FloatRegister src, FloatRegister dest) { + pslld(src, dest); + } + void packedLeftShiftByScalar(Imm32 count, FloatRegister dest) { + pslld(count, dest); + } + void packedRightShiftByScalar(FloatRegister src, FloatRegister dest) { + psrad(src, dest); + } + void packedRightShiftByScalar(Imm32 count, FloatRegister dest) { + psrad(count, dest); + } + void packedUnsignedRightShiftByScalar(FloatRegister src, FloatRegister dest) { + psrld(src, dest); + } + void packedUnsignedRightShiftByScalar(Imm32 count, FloatRegister dest) { + psrld(count, dest); + } + void loadAlignedFloat32x4(const Address &src, FloatRegister dest) { movaps(Operand(src), dest); } void loadAlignedFloat32x4(const Operand &src, FloatRegister dest) { movaps(src, dest); } void storeAlignedFloat32x4(FloatRegister src, const Address &dest) { movaps(src, Operand(dest));