author | Ivan Jibaja <ivan@cs.utexas.edu> |
Fri, 25 Jul 2014 02:38:24 -0700 | |
changeset 224512 | 5fa26de0a04fcf3f6e44de54f143fbb9cda38004 |
parent 224511 | 3fcf08e891faafb3dfd28253d1e47ba32dc611ae |
child 224513 | 393c2341a26b5419d88cd17add68b9d4cf309423 |
push id | 3979 |
push user | raliiev@mozilla.com |
push date | Mon, 13 Oct 2014 16:35:44 +0000 |
treeherder | mozilla-beta@30f2cc610691 [default view] [failures only] |
perfherder | [talos] [build metrics] [platform microbench] (compared to previous push) |
reviewers | ijibaja, dougc, bbouvier, sunfish |
bugs | 1025127 |
milestone | 34.0a1 |
first release with | nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
|
last release without | nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
|
--- a/js/src/jit/LIR-Common.h +++ b/js/src/jit/LIR-Common.h @@ -180,16 +180,61 @@ class LSimdExtractElementF : public LIns const LAllocation *getBase() { return getOperand(0); } SimdLane lane() const { return lane_; } }; +// Binary SIMD comparison operation between two SIMD operands +class LSimdBinaryComp: public LInstructionHelper<1, 2, 0> +{ + protected: + LSimdBinaryComp() {} + +public: + const LAllocation *lhs() { + return getOperand(0); + } + const LAllocation *rhs() { + return getOperand(1); + } + MSimdBinaryComp::Operation operation() const { + return mir_->toSimdBinaryComp()->operation(); + } + const char *extraName() const { + switch (operation()) { + case MSimdBinaryComp::greaterThan: return "greaterThan"; + case MSimdBinaryComp::greaterThanOrEqual: return "greaterThanOrEqual"; + case MSimdBinaryComp::lessThan: return "lessThan"; + case MSimdBinaryComp::lessThanOrEqual: return "lessThanOrEqual"; + case MSimdBinaryComp::equal: return "equal"; + case MSimdBinaryComp::notEqual: return "notEqual"; + } + MOZ_CRASH("unexpected operation"); + } +}; + +// Binary SIMD comparison operation between two Int32x4 operands +class LSimdBinaryCompIx4 : public LSimdBinaryComp +{ + public: + LIR_HEADER(SimdBinaryCompIx4); + LSimdBinaryCompIx4() : LSimdBinaryComp() {} +}; + +// Binary SIMD comparison operation between two Float32x4 operands +class LSimdBinaryCompFx4 : public LSimdBinaryComp +{ + public: + LIR_HEADER(SimdBinaryCompFx4); + LSimdBinaryCompFx4() : LSimdBinaryComp() {} +}; + // Binary SIMD arithmetic operation between two SIMD operands class LSimdBinaryArith : public LInstructionHelper<1, 2, 0> { public: LSimdBinaryArith() {} const LAllocation *lhs() { return getOperand(0);
--- a/js/src/jit/LOpcodes.h +++ b/js/src/jit/LOpcodes.h @@ -16,16 +16,18 @@ _(Pointer) \ _(Double) \ _(Float32) \ _(SimdValueX4) \ _(Int32x4) \ _(Float32x4) \ _(SimdExtractElementI) \ _(SimdExtractElementF) \ + _(SimdBinaryCompIx4) \ + _(SimdBinaryCompFx4) \ _(SimdBinaryArithIx4) \ _(SimdBinaryArithFx4) \ _(Value) \ _(CloneLiteral) \ _(Parameter) \ _(Callee) \ _(TableSwitch) \ _(TableSwitchV) \
--- a/js/src/jit/Lowering.cpp +++ b/js/src/jit/Lowering.cpp @@ -3723,16 +3723,35 @@ LIRGenerator::visitSimdExtractElement(MS return define(new(alloc()) LSimdExtractElementF(use, ins->lane()), ins); } MOZ_ASSUME_UNREACHABLE("Unknown SIMD kind when extracting element"); return false; } bool +LIRGenerator::visitSimdBinaryComp(MSimdBinaryComp *ins) +{ + MOZ_ASSERT(ins->type() == MIRType_Int32x4); + + if (ins->compareType() == MSimdBinaryComp::CompareInt32x4) { + LSimdBinaryCompIx4 *add = new(alloc()) LSimdBinaryCompIx4(); + return lowerForFPU(add, ins, ins->lhs(), ins->rhs()); + } + + if (ins->compareType() == MSimdBinaryComp::CompareFloat32x4) { + LSimdBinaryCompFx4 *add = new(alloc()) LSimdBinaryCompFx4(); + return lowerForFPU(add, ins, ins->lhs(), ins->rhs()); + } + + MOZ_CRASH("Unknown compare type when comparing values"); + return false; +} + +bool LIRGenerator::visitSimdBinaryArith(MSimdBinaryArith *ins) { JS_ASSERT(IsSimdType(ins->type())); if (ins->type() == MIRType_Int32x4) { LSimdBinaryArithIx4 *add = new(alloc()) LSimdBinaryArithIx4(); return lowerForFPU(add, ins, ins->lhs(), ins->rhs()); }
--- a/js/src/jit/Lowering.h +++ b/js/src/jit/Lowering.h @@ -263,16 +263,17 @@ class LIRGenerator : public LIRGenerator bool visitAsmJSVoidReturn(MAsmJSVoidReturn *ins); bool visitAsmJSPassStackArg(MAsmJSPassStackArg *ins); bool visitAsmJSCall(MAsmJSCall *ins); bool visitSetDOMProperty(MSetDOMProperty *ins); bool visitGetDOMProperty(MGetDOMProperty *ins); bool visitGetDOMMember(MGetDOMMember *ins); bool visitRecompileCheck(MRecompileCheck *ins); bool visitSimdExtractElement(MSimdExtractElement *ins); + bool visitSimdBinaryComp(MSimdBinaryComp *ins); bool visitSimdBinaryArith(MSimdBinaryArith *ins); bool visitSimdValueX4(MSimdValueX4 *ins); bool visitSimdConstant(MSimdConstant *ins); bool visitPhi(MPhi *ins); bool visitBeta(MBeta *ins); bool visitObjectState(MObjectState *ins); bool visitArrayState(MArrayState *ins); };
--- a/js/src/jit/MIR.h +++ b/js/src/jit/MIR.h @@ -1353,16 +1353,81 @@ class MSimdExtractElement : public MUnar return false; const MSimdExtractElement *other = ins->toSimdExtractElement(); if (other->lane_ != lane_) return false; return congruentIfOperandsEqual(other); } }; +// Compares each value of a SIMD vector to each corresponding lane's value of +// another SIMD vector, and returns a int32x4 vector containing the results of +// the comparison: all bits are set to 1 if the comparison is true, 0 otherwise. +class MSimdBinaryComp : public MBinaryInstruction +{ + public: + enum Operation { + greaterThan, + greaterThanOrEqual, + lessThan, + lessThanOrEqual, + equal, + notEqual + }; + + enum CompareType { + CompareInt32x4, + CompareFloat32x4 + }; + + private: + Operation operation_; + CompareType compareType_; + + MSimdBinaryComp(MDefinition *left, MDefinition *right, Operation op) + : MBinaryInstruction(left, right), operation_(op) + { + MOZ_ASSERT(IsSimdType(left->type())); + MOZ_ASSERT(left->type() == right->type()); + + if (left->type() == MIRType_Int32x4) { + compareType_ = CompareInt32x4; + } else { + MOZ_ASSERT(left->type() == MIRType_Float32x4); + compareType_ = CompareFloat32x4; + } + + setResultType(MIRType_Int32x4); + setMovable(); + if (op == equal || op == notEqual) + setCommutative(); + } + + public: + INSTRUCTION_HEADER(SimdBinaryComp); + static MSimdBinaryComp *NewAsmJS(TempAllocator &alloc, MDefinition *left, MDefinition *right, + Operation op) + { + return new(alloc) MSimdBinaryComp(left, right, op); + } + + AliasSet getAliasSet() const { + return AliasSet::None(); + } + + Operation operation() const { return operation_; } + CompareType compareType() const { return compareType_; } + + bool congruentTo(const MDefinition *ins) const { + if (!binaryCongruentTo(ins)) + return false; + return operation_ == ins->toSimdBinaryComp()->operation(); + } +}; + class MSimdBinaryArith : public MBinaryInstruction { public: enum Operation { Add, Sub, Mul, Div
--- a/js/src/jit/MOpcodes.h +++ b/js/src/jit/MOpcodes.h @@ -10,16 +10,17 @@ namespace js { namespace jit { #define MIR_OPCODE_LIST(_) \ _(Constant) \ _(SimdValueX4) \ _(SimdConstant) \ _(SimdExtractElement) \ + _(SimdBinaryComp) \ _(SimdBinaryArith) \ _(CloneLiteral) \ _(Parameter) \ _(Callee) \ _(TableSwitch) \ _(Goto) \ _(Test) \ _(TypeObjectDispatch) \
--- a/js/src/jit/ParallelSafetyAnalysis.cpp +++ b/js/src/jit/ParallelSafetyAnalysis.cpp @@ -110,16 +110,17 @@ class ParallelSafetyVisitor : public MDe // I am taking the policy of blacklisting everything that's not // obviously safe for now. We can loosen as we need. SAFE_OP(Constant) SAFE_OP(SimdValueX4) SAFE_OP(SimdConstant) SAFE_OP(SimdExtractElement) + SAFE_OP(SimdBinaryComp) SAFE_OP(SimdBinaryArith) UNSAFE_OP(CloneLiteral) SAFE_OP(Parameter) SAFE_OP(Callee) SAFE_OP(TableSwitch) SAFE_OP(Goto) SAFE_OP(Test) SAFE_OP(Compare)
--- a/js/src/jit/arm/CodeGenerator-arm.h +++ b/js/src/jit/arm/CodeGenerator-arm.h @@ -229,16 +229,18 @@ class CodeGeneratorARM : public CodeGene public: // Unimplemented SIMD instructions bool visitSimdValueX4(LSimdValueX4 *lir) { MOZ_CRASH("NYI"); } bool visitInt32x4(LInt32x4 *ins) { MOZ_CRASH("NYI"); } bool visitFloat32x4(LFloat32x4 *ins) { MOZ_CRASH("NYI"); } bool visitSimdExtractElementI(LSimdExtractElementI *ins) { MOZ_CRASH("NYI"); } bool visitSimdExtractElementF(LSimdExtractElementF *ins) { MOZ_CRASH("NYI"); } + bool visitSimdBinaryCompIx4(LSimdBinaryCompIx4 *lir) { MOZ_CRASH("NYI"); } + bool visitSimdBinaryCompFx4(LSimdBinaryCompFx4 *lir) { MOZ_CRASH("NYI"); } bool visitSimdBinaryArithIx4(LSimdBinaryArithIx4 *lir) { MOZ_CRASH("NYI"); } bool visitSimdBinaryArithFx4(LSimdBinaryArithFx4 *lir) { MOZ_CRASH("NYI"); } }; typedef CodeGeneratorARM CodeGeneratorSpecific; // An out-of-line bailout thunk. class OutOfLineBailout : public OutOfLineCodeBase<CodeGeneratorARM>
--- a/js/src/jit/shared/Assembler-x86-shared.h +++ b/js/src/jit/shared/Assembler-x86-shared.h @@ -1508,16 +1508,64 @@ class AssemblerX86Shared : public Assemb void ucomiss(FloatRegister lhs, FloatRegister rhs) { JS_ASSERT(HasSSE2()); masm.ucomiss_rr(rhs.code(), lhs.code()); } void pcmpeqw(FloatRegister lhs, FloatRegister rhs) { JS_ASSERT(HasSSE2()); masm.pcmpeqw_rr(rhs.code(), lhs.code()); } + void pcmpeqd(const Operand &src, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src.kind()) { + case Operand::FPREG: + masm.pcmpeqd_rr(src.fpu(), dest.code()); + break; + case Operand::MEM_REG_DISP: + masm.pcmpeqd_mr(src.disp(), src.base(), dest.code()); + break; + case Operand::MEM_ADDRESS32: + masm.pcmpeqd_mr(src.address(), dest.code()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void pcmpgtd(const Operand &src, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src.kind()) { + case Operand::FPREG: + masm.pcmpgtd_rr(src.fpu(), dest.code()); + break; + case Operand::MEM_REG_DISP: + masm.pcmpgtd_mr(src.disp(), src.base(), dest.code()); + break; + case Operand::MEM_ADDRESS32: + masm.pcmpgtd_mr(src.address(), dest.code()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void cmpps(const Operand &src, FloatRegister dest, uint8_t order) { + MOZ_ASSERT(HasSSE2()); + switch (src.kind()) { + case Operand::FPREG: + masm.cmpps_rr(src.fpu(), dest.code(), order); + break; + case Operand::MEM_REG_DISP: + masm.cmpps_mr(src.disp(), src.base(), dest.code(), order); + break; + case Operand::MEM_ADDRESS32: + masm.cmpps_mr(src.address(), dest.code(), order); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } void movd(Register src, FloatRegister dest) { JS_ASSERT(HasSSE2()); masm.movd_rr(src.code(), dest.code()); } void movd(FloatRegister src, Register dest) { JS_ASSERT(HasSSE2()); masm.movd_rr(src.code(), dest.code()); }
--- a/js/src/jit/shared/BaseAssembler-x86-shared.h +++ b/js/src/jit/shared/BaseAssembler-x86-shared.h @@ -307,34 +307,37 @@ private: OP2_DIVSD_VsdWsd = 0x5E, OP2_DIVPS_VpsWps = 0x5E, OP2_MAXSD_VsdWsd = 0x5F, OP2_SQRTSD_VsdWsd = 0x51, OP2_SQRTSS_VssWss = 0x51, OP2_ANDPD_VpdWpd = 0x54, OP2_ORPD_VpdWpd = 0x56, OP2_XORPD_VpdWpd = 0x57, + OP2_PCMPGTD_VdqWdq = 0x66, OP2_MOVD_VdEd = 0x6E, OP2_MOVDQ_VsdWsd = 0x6F, OP2_MOVDQ_VdqWdq = 0x6F, OP2_PSHUFD_VdqWdqIb = 0x70, OP2_PSRLDQ_Vd = 0x73, OP2_PCMPEQW = 0x75, + OP2_PCMPEQD_VdqWdq = 0x76, OP2_MOVD_EdVd = 0x7E, OP2_MOVDQ_WdqVdq = 0x7F, OP2_JCC_rel32 = 0x80, OP_SETCC = 0x90, OP2_IMUL_GvEv = 0xAF, OP2_CMPXCHG_GvEw = 0xB1, OP2_BSR_GvEv = 0xBD, OP2_MOVSX_GvEb = 0xBE, OP2_MOVSX_GvEw = 0xBF, OP2_MOVZX_GvEb = 0xB6, OP2_MOVZX_GvEw = 0xB7, OP2_XADD_EvGv = 0xC1, + OP2_CMPPS_VpsWps = 0xC2, OP2_PEXTRW_GdUdIb = 0xC5, OP2_SHUFPS_VpsWpsIb = 0xC6, OP2_PXORDQ_VdqWdq = 0xEF, OP2_PSUBD_VdqWdq = 0xFA, OP2_PADDD_VdqWdq = 0xFE } TwoByteOpcodeID; typedef enum { @@ -2528,16 +2531,88 @@ public: void pcmpeqw_rr(XMMRegisterID src, XMMRegisterID dst) { spew("pcmpeqw %s, %s", nameFPReg(src), nameFPReg(dst)); m_formatter.prefix(PRE_SSE_66); m_formatter.twoByteOp(OP2_PCMPEQW, (RegisterID)dst, (RegisterID)src); /* right order ? */ } + void pcmpeqd_rr(XMMRegisterID src, XMMRegisterID dst) + { + spew("pcmpeqd %s, %s", + nameFPReg(src), nameFPReg(dst)); + m_formatter.prefix(PRE_SSE_66); + m_formatter.twoByteOp(OP2_PCMPEQD_VdqWdq, (RegisterID)dst, (RegisterID)src); + } + + void pcmpeqd_mr(int offset, RegisterID base, XMMRegisterID dst) + { + spew("pcmpeqd %s0x%x(%s), %s", + PRETTY_PRINT_OFFSET(offset), nameIReg(base), nameFPReg(dst)); + m_formatter.prefix(PRE_SSE_66); + m_formatter.twoByteOp(OP2_PCMPEQD_VdqWdq, (RegisterID)dst, base, offset); + } + + void pcmpeqd_mr(const void* address, XMMRegisterID dst) + { + spew("pcmpeqd %p, %s", + address, nameFPReg(dst)); + m_formatter.prefix(PRE_SSE_66); + m_formatter.twoByteOp(OP2_PCMPEQD_VdqWdq, (RegisterID)dst, address); + } + + void pcmpgtd_rr(XMMRegisterID src, XMMRegisterID dst) + { + spew("pcmpgtd %s, %s", + nameFPReg(src), nameFPReg(dst)); + m_formatter.prefix(PRE_SSE_66); + m_formatter.twoByteOp(OP2_PCMPGTD_VdqWdq, (RegisterID)dst, (RegisterID)src); + } + + void pcmpgtd_mr(int offset, RegisterID base, XMMRegisterID dst) + { + spew("pcmpgtd %s0x%x(%s), %s", + PRETTY_PRINT_OFFSET(offset), nameIReg(base), nameFPReg(dst)); + m_formatter.prefix(PRE_SSE_66); + m_formatter.twoByteOp(OP2_PCMPGTD_VdqWdq, (RegisterID)dst, base, offset); + } + + void pcmpgtd_mr(const void* address, XMMRegisterID dst) + { + spew("pcmpgtd %p, %s", + address, nameFPReg(dst)); + m_formatter.prefix(PRE_SSE_66); + m_formatter.twoByteOp(OP2_PCMPGTD_VdqWdq, (RegisterID)dst, address); + } + + void cmpps_rr(XMMRegisterID src, XMMRegisterID dst, uint8_t order) + { + spew("cmpps %s, %s, %u", + nameFPReg(src), nameFPReg(dst), order); + m_formatter.twoByteOp(OP2_CMPPS_VpsWps, (RegisterID)dst, (RegisterID)src); + m_formatter.immediate8(order); + } + + void cmpps_mr(int offset, RegisterID base, XMMRegisterID dst, uint8_t order) + { + spew("cmpps %s0x%x(%s), %s, %u", + PRETTY_PRINT_OFFSET(offset), nameIReg(base), nameFPReg(dst), order); + m_formatter.twoByteOp(OP2_CMPPS_VpsWps, (RegisterID)dst, base, offset); + m_formatter.immediate8(order); + } + + void cmpps_mr(const void* address, XMMRegisterID dst, uint8_t order) + { + spew("cmpps %p, %s, %u", + address, nameFPReg(dst), order); + m_formatter.twoByteOp(OP2_CMPPS_VpsWps, (RegisterID)dst, address); + m_formatter.immediate8(order); + } + void addsd_rr(XMMRegisterID src, XMMRegisterID dst) { spew("addsd %s, %s", nameFPReg(src), nameFPReg(dst)); m_formatter.prefix(PRE_SSE_F2); m_formatter.twoByteOp(OP2_ADDSD_VsdWsd, (RegisterID)dst, (RegisterID)src); }
--- a/js/src/jit/shared/CodeGenerator-x86-shared.cpp +++ b/js/src/jit/shared/CodeGenerator-x86-shared.cpp @@ -2188,16 +2188,83 @@ CodeGeneratorX86Shared::visitSimdExtract uint32_t mask = MacroAssembler::ComputeShuffleMask(lane); masm.shuffleFloat32(mask, input, output); } masm.canonicalizeFloat(output); return true; } bool +CodeGeneratorX86Shared::visitSimdBinaryCompIx4(LSimdBinaryCompIx4 *ins) +{ + FloatRegister lhs = ToFloatRegister(ins->lhs()); + Operand rhs = ToOperand(ins->rhs()); + MOZ_ASSERT(ToFloatRegister(ins->output()) == lhs); + + MSimdBinaryComp::Operation op = ins->operation(); + switch (op) { + case MSimdBinaryComp::greaterThan: + masm.packedGreaterThanInt32x4(rhs, lhs); + return true; + case MSimdBinaryComp::equal: + masm.packedEqualInt32x4(rhs, lhs); + return true; + case MSimdBinaryComp::lessThan: + // scr := rhs + if (rhs.kind() == Operand::FPREG) + masm.moveAlignedInt32x4(ToFloatRegister(ins->rhs()), ScratchSimdReg); + else + masm.loadAlignedInt32x4(rhs, ScratchSimdReg); + + // scr := scr > lhs (i.e. lhs < rhs) + // Improve by doing custom lowering (rhs is tied to the output register) + masm.packedGreaterThanInt32x4(ToOperand(ins->lhs()), ScratchSimdReg); + masm.moveAlignedInt32x4(ScratchFloat32Reg, lhs); + return true; + case MSimdBinaryComp::notEqual: + case MSimdBinaryComp::greaterThanOrEqual: + case MSimdBinaryComp::lessThanOrEqual: + // These operations are not part of the spec. so are not implemented. + break; + } + MOZ_ASSUME_UNREACHABLE("unexpected SIMD op"); +} + +bool +CodeGeneratorX86Shared::visitSimdBinaryCompFx4(LSimdBinaryCompFx4 *ins) +{ + FloatRegister lhs = ToFloatRegister(ins->lhs()); + Operand rhs = ToOperand(ins->rhs()); + MOZ_ASSERT(ToFloatRegister(ins->output()) == lhs); + + MSimdBinaryComp::Operation op = ins->operation(); + switch (op) { + case MSimdBinaryComp::equal: + masm.cmpps(rhs, lhs, 0x0); + return true; + case MSimdBinaryComp::lessThan: + masm.cmpps(rhs, lhs, 0x1); + return true; + case MSimdBinaryComp::lessThanOrEqual: + masm.cmpps(rhs, lhs, 0x2); + return true; + case MSimdBinaryComp::notEqual: + masm.cmpps(rhs, lhs, 0x4); + return true; + case MSimdBinaryComp::greaterThanOrEqual: + masm.cmpps(rhs, lhs, 0x5); + return true; + case MSimdBinaryComp::greaterThan: + masm.cmpps(rhs, lhs, 0x6); + return true; + } + MOZ_ASSUME_UNREACHABLE("unexpected SIMD op"); +} + +bool CodeGeneratorX86Shared::visitSimdBinaryArithIx4(LSimdBinaryArithIx4 *ins) { FloatRegister lhs = ToFloatRegister(ins->lhs()); Operand rhs = ToOperand(ins->rhs()); JS_ASSERT(ToFloatRegister(ins->output()) == lhs); MSimdBinaryArith::Operation op = ins->operation(); switch (op) {
--- a/js/src/jit/shared/CodeGenerator-x86-shared.h +++ b/js/src/jit/shared/CodeGenerator-x86-shared.h @@ -206,16 +206,18 @@ class CodeGeneratorX86Shared : public Co bool visitNegF(LNegF *lir); // SIMD operators bool visitSimdValueX4(LSimdValueX4 *lir); bool visitInt32x4(LInt32x4 *ins); bool visitFloat32x4(LFloat32x4 *ins); bool visitSimdExtractElementI(LSimdExtractElementI *lir); bool visitSimdExtractElementF(LSimdExtractElementF *lir); + bool visitSimdBinaryCompIx4(LSimdBinaryCompIx4 *lir); + bool visitSimdBinaryCompFx4(LSimdBinaryCompFx4 *lir); bool visitSimdBinaryArithIx4(LSimdBinaryArithIx4 *lir); bool visitSimdBinaryArithFx4(LSimdBinaryArithFx4 *lir); // Out of line visitors. bool visitOutOfLineBailout(OutOfLineBailout *ool); bool visitOutOfLineUndoALUOperation(OutOfLineUndoALUOperation *ool); bool visitMulNegativeZeroCheck(MulNegativeZeroCheck *ool); bool visitModOverflowCheck(ModOverflowCheck *ool);
--- a/js/src/jit/shared/MacroAssembler-x86-shared.h +++ b/js/src/jit/shared/MacroAssembler-x86-shared.h @@ -465,38 +465,50 @@ class MacroAssemblerX86Shared : public A } void convertDoubleToFloat32(FloatRegister src, FloatRegister dest) { cvtsd2ss(src, dest); } void loadAlignedInt32x4(const Address &src, FloatRegister dest) { movdqa(Operand(src), dest); } + void loadAlignedInt32x4(const Operand &src, FloatRegister dest) { + movdqa(src, dest); + } void storeAlignedInt32x4(FloatRegister src, const Address &dest) { movdqa(src, Operand(dest)); } void moveAlignedInt32x4(FloatRegister src, FloatRegister dest) { movdqa(src, dest); } void loadUnalignedInt32x4(const Address &src, FloatRegister dest) { movdqu(Operand(src), dest); } void storeUnalignedInt32x4(FloatRegister src, const Address &dest) { movdqu(src, Operand(dest)); } + void packedEqualInt32x4(const Operand &src, FloatRegister dest) { + pcmpeqd(src, dest); + } + void packedGreaterThanInt32x4(const Operand &src, FloatRegister dest) { + pcmpgtd(src, dest); + } void packedAddInt32(const Operand &src, FloatRegister dest) { paddd(src, dest); } void packedSubInt32(const Operand &src, FloatRegister dest) { psubd(src, dest); } void loadAlignedFloat32x4(const Address &src, FloatRegister dest) { movaps(Operand(src), dest); } + void loadAlignedFloat32x4(const Operand &src, FloatRegister dest) { + movaps(src, dest); + } void storeAlignedFloat32x4(FloatRegister src, const Address &dest) { movaps(src, Operand(dest)); } void moveAlignedFloat32x4(FloatRegister src, FloatRegister dest) { movaps(src, dest); } void loadUnalignedFloat32x4(const Address &src, FloatRegister dest) { movups(Operand(src), dest); @@ -511,17 +523,16 @@ class MacroAssemblerX86Shared : public A subps(src, dest); } void packedMulFloat32(const Operand &src, FloatRegister dest) { mulps(src, dest); } void packedDivFloat32(const Operand &src, FloatRegister dest) { divps(src, dest); } - static uint32_t ComputeShuffleMask(SimdLane x, SimdLane y = LaneX, SimdLane z = LaneX, SimdLane w = LaneX) { uint32_t r = (uint32_t(w) << 6) | (uint32_t(z) << 4) | (uint32_t(y) << 2) | uint32_t(x); JS_ASSERT(r < 256);