author | Dan Gohman <sunfish@mozilla.com> |
Thu, 02 Oct 2014 18:24:16 -0700 | |
changeset 208695 | 696baf50aabd5185d3b4e937f83a8fd8897ad3c2 |
parent 208694 | e4165ab15b4e70f99afd4e429f3f429eb0e61463 |
child 208696 | ca7dc8cb97bc4fe965d34ae2b5e590d864655921 |
push id | 1 |
push user | root |
push date | Mon, 20 Oct 2014 17:29:22 +0000 |
reviewers | bbouvier |
bugs | 1074509 |
milestone | 35.0a1 |
--- a/js/src/jit-test/tests/asm.js/testSIMD.js +++ b/js/src/jit-test/tests/asm.js/testSIMD.js @@ -510,64 +510,70 @@ CheckI4(WWI, 'var x = i4(1,2,3,4); x = w // True yields all bits set to 1 (i.e as an int32, 0xFFFFFFFF === -1), false // yields all bits set to 0 (i.e 0). const T = -1; const F = 0; assertAsmTypeFail('glob', USE_ASM + I32 + "var lt=i4.lessThanOrEqual; function f() {} return f"); assertAsmTypeFail('glob', USE_ASM + I32 + "var ge=i4.greaterThanOrEqual; function f() {} return f"); assertAsmTypeFail('glob', USE_ASM + I32 + "var ne=i4.notEqual; function f() {} return f"); -const LTI32 = 'var lt = i4.lessThan'; -const GTI32 = 'var gt = i4.greaterThan'; -const EQI32 = 'var eq = i4.equal'; +const LTI32 = 'var lt = i4.lessThan;'; +const GTI32 = 'var gt = i4.greaterThan;'; +const EQI32 = 'var eq = i4.equal;'; CheckI4(LTI32, 'var x=i4(1,2,3,4); var y=i4(-1,1,0,2); x=lt(x,y)', [F, F, F, F]); CheckI4(LTI32, 'var x=i4(-1,1,0,2); var y=i4(1,2,3,4); x=lt(x,y)', [T, T, T, T]); CheckI4(LTI32, 'var x=i4(1,0,3,4); var y=i4(1,1,7,0); x=lt(x,y)', [F, T, T, F]); CheckI4(EQI32, 'var x=i4(1,2,3,4); var y=i4(-1,1,0,2); x=eq(x,y)', [F, F, F, F]); CheckI4(EQI32, 'var x=i4(-1,1,0,2); var y=i4(1,2,3,4); x=eq(x,y)', [F, F, F, F]); CheckI4(EQI32, 'var x=i4(1,0,3,4); var y=i4(1,1,7,0); x=eq(x,y)', [T, F, F, F]); CheckI4(GTI32, 'var x=i4(1,2,3,4); var y=i4(-1,1,0,2); x=gt(x,y)', [T, T, T, T]); CheckI4(GTI32, 'var x=i4(-1,1,0,2); var y=i4(1,2,3,4); x=gt(x,y)', [F, F, F, F]); CheckI4(GTI32, 'var x=i4(1,0,3,4); var y=i4(1,1,7,0); x=gt(x,y)', [F, F, F, T]); -const LTF32 = 'var lt=f4.lessThan'; -const LEF32 = 'var le=f4.lessThanOrEqual'; -const GTF32 = 'var gt=f4.greaterThan'; -const GEF32 = 'var ge=f4.greaterThanOrEqual'; -const EQF32 = 'var eq=f4.equal'; -const NEF32 = 'var ne=f4.notEqual'; +const LTF32 = 'var lt=f4.lessThan;'; +const LEF32 = 'var le=f4.lessThanOrEqual;'; +const GTF32 = 'var gt=f4.greaterThan;'; +const GEF32 = 'var ge=f4.greaterThanOrEqual;'; +const EQF32 = 'var eq=f4.equal;'; +const NEF32 = 'var ne=f4.notEqual;'; assertAsmTypeFail('glob', USE_ASM + F32 + "var lt=f4.lessThan; function f() {var x=f4(1,2,3,4); var y=f4(5,6,7,8); x=lt(x,y);} return f"); CheckF4Comp(LTF32, 'var y=f4(1,2,3,4); var z=f4(-1,1,0,2); var x=i4(0,0,0,0); x=lt(y,z)', [F, F, F, F]); CheckF4Comp(LTF32, 'var y=f4(-1,1,0,2); var z=f4(1,2,3,4); var x=i4(0,0,0,0); x=lt(y,z)', [T, T, T, T]); CheckF4Comp(LTF32, 'var y=f4(1,0,3,4); var z=f4(1,1,7,0); var x=i4(0,0,0,0); x=lt(y,z)', [F, T, T, F]); +CheckF4Comp(LTF32 + 'const nan = glob.NaN; const fround=glob.Math.fround', 'var y=f4(0,0,0,0); var z=f4(0,0,0,0); var x=i4(0,0,0,0); y=f4(fround(0.0),fround(-0.0),fround(0.0),fround(nan)); z=f4(fround(-0.0),fround(0.0),fround(nan),fround(0.0)); x=lt(y,z);', [F, F, F, F]); CheckF4Comp(LEF32, 'var y=f4(1,2,3,4); var z=f4(-1,1,0,2); var x=i4(0,0,0,0); x=le(y,z)', [F, F, F, F]); CheckF4Comp(LEF32, 'var y=f4(-1,1,0,2); var z=f4(1,2,3,4); var x=i4(0,0,0,0); x=le(y,z)', [T, T, T, T]); CheckF4Comp(LEF32, 'var y=f4(1,0,3,4); var z=f4(1,1,7,0); var x=i4(0,0,0,0); x=le(y,z)', [T, T, T, F]); +CheckF4Comp(LEF32 + 'const nan = glob.NaN; const fround=glob.Math.fround', 'var y=f4(0,0,0,0); var z=f4(0,0,0,0); var x=i4(0,0,0,0); y=f4(fround(0.0),fround(-0.0),fround(0.0),fround(nan)); z=f4(fround(-0.0),fround(0.0),fround(nan),fround(0.0)); x=le(y,z);', [T, T, F, F]); CheckF4Comp(EQF32, 'var y=f4(1,2,3,4); var z=f4(-1,1,0,2); var x=i4(0,0,0,0); x=eq(y,z)', [F, F, F, F]); CheckF4Comp(EQF32, 'var y=f4(-1,1,0,2); var z=f4(1,2,3,4); var x=i4(0,0,0,0); x=eq(y,z)', [F, F, F, F]); CheckF4Comp(EQF32, 'var y=f4(1,0,3,4); var z=f4(1,1,7,0); var x=i4(0,0,0,0); x=eq(y,z)', [T, F, F, F]); +CheckF4Comp(EQF32 + 'const nan = glob.NaN; const fround=glob.Math.fround', 'var y=f4(0,0,0,0); var z=f4(0,0,0,0); var x=i4(0,0,0,0); y=f4(fround(0.0),fround(-0.0),fround(0.0),fround(nan)); z=f4(fround(-0.0),fround(0.0),fround(nan),fround(0.0)); x=eq(y,z);', [T, T, F, F]); CheckF4Comp(NEF32, 'var y=f4(1,2,3,4); var z=f4(-1,1,0,2); var x=i4(0,0,0,0); x=ne(y,z)', [T, T, T, T]); CheckF4Comp(NEF32, 'var y=f4(-1,1,0,2); var z=f4(1,2,3,4); var x=i4(0,0,0,0); x=ne(y,z)', [T, T, T, T]); CheckF4Comp(NEF32, 'var y=f4(1,0,3,4); var z=f4(1,1,7,0); var x=i4(0,0,0,0); x=ne(y,z)', [F, T, T, T]); +CheckF4Comp(NEF32 + 'const nan = glob.NaN; const fround=glob.Math.fround', 'var y=f4(0,0,0,0); var z=f4(0,0,0,0); var x=i4(0,0,0,0); y=f4(fround(0.0),fround(-0.0),fround(0.0),fround(nan)); z=f4(fround(-0.0),fround(0.0),fround(nan),fround(0.0)); x=ne(y,z);', [F, F, T, T]); CheckF4Comp(GTF32, 'var y=f4(1,2,3,4); var z=f4(-1,1,0,2); var x=i4(0,0,0,0); x=gt(y,z)', [T, T, T, T]); CheckF4Comp(GTF32, 'var y=f4(-1,1,0,2); var z=f4(1,2,3,4); var x=i4(0,0,0,0); x=gt(y,z)', [F, F, F, F]); CheckF4Comp(GTF32, 'var y=f4(1,0,3,4); var z=f4(1,1,7,0); var x=i4(0,0,0,0); x=gt(y,z)', [F, F, F, T]); +CheckF4Comp(GTF32 + 'const nan = glob.NaN; const fround=glob.Math.fround', 'var y=f4(0,0,0,0); var z=f4(0,0,0,0); var x=i4(0,0,0,0); y=f4(fround(0.0),fround(-0.0),fround(0.0),fround(nan)); z=f4(fround(-0.0),fround(0.0),fround(nan),fround(0.0)); x=gt(y,z);', [F, F, F, F]); CheckF4Comp(GEF32, 'var y=f4(1,2,3,4); var z=f4(-1,1,0,2); var x=i4(0,0,0,0); x=ge(y,z)', [T, T, T, T]); CheckF4Comp(GEF32, 'var y=f4(-1,1,0,2); var z=f4(1,2,3,4); var x=i4(0,0,0,0); x=ge(y,z)', [F, F, F, F]); CheckF4Comp(GEF32, 'var y=f4(1,0,3,4); var z=f4(1,1,7,0); var x=i4(0,0,0,0); x=ge(y,z)', [T, F, F, T]); +CheckF4Comp(GEF32 + 'const nan = glob.NaN; const fround=glob.Math.fround', 'var y=f4(0,0,0,0); var z=f4(0,0,0,0); var x=i4(0,0,0,0); y=f4(fround(0.0),fround(-0.0),fround(0.0),fround(nan)); z=f4(fround(-0.0),fround(0.0),fround(nan),fround(0.0)); x=ge(y,z);', [T, T, F, F]); // Conversions operators const CVTIF = 'var cvt=f4.fromInt32x4;'; const CVTFI = 'var cvt=i4.fromFloat32x4;'; assertAsmTypeFail('glob', USE_ASM + I32 + "var cvt=i4.fromInt32x4; return {}"); assertAsmTypeFail('glob', USE_ASM + F32 + "var cvt=f4.fromFloat32x4; return {}"); assertAsmTypeFail('glob', USE_ASM + I32 + F32 + CVTIF + "function f() {var x=i4(1,2,3,4); x=cvt(x);} return f");
--- a/js/src/jit/Lowering.cpp +++ b/js/src/jit/Lowering.cpp @@ -3803,22 +3803,22 @@ LIRGenerator::visitSimdSignMask(MSimdSig bool LIRGenerator::visitSimdBinaryComp(MSimdBinaryComp *ins) { MOZ_ASSERT(ins->type() == MIRType_Int32x4); if (ins->compareType() == MSimdBinaryComp::CompareInt32x4) { LSimdBinaryCompIx4 *add = new(alloc()) LSimdBinaryCompIx4(); - return lowerForFPU(add, ins, ins->lhs(), ins->rhs()); + return lowerForCompIx4(add, ins, ins->lhs(), ins->rhs()); } if (ins->compareType() == MSimdBinaryComp::CompareFloat32x4) { LSimdBinaryCompFx4 *add = new(alloc()) LSimdBinaryCompFx4(); - return lowerForFPU(add, ins, ins->lhs(), ins->rhs()); + return lowerForCompFx4(add, ins, ins->lhs(), ins->rhs()); } MOZ_CRASH("Unknown compare type when comparing values"); return false; } bool LIRGenerator::visitSimdBinaryArith(MSimdBinaryArith *ins)
--- a/js/src/jit/MIR.h +++ b/js/src/jit/MIR.h @@ -1606,16 +1606,31 @@ class MSimdBinaryComp : public MBinaryIn AliasSet getAliasSet() const { return AliasSet::None(); } Operation operation() const { return operation_; } CompareType compareType() const { return compareType_; } + // Swap the operands and reverse the comparison predicate. + void reverse() { + switch (operation()) { + case greaterThan: operation_ = lessThan; break; + case greaterThanOrEqual: operation_ = lessThanOrEqual; break; + case lessThan: operation_ = greaterThan; break; + case lessThanOrEqual: operation_ = greaterThanOrEqual; break; + case equal: + case notEqual: + break; + default: MOZ_CRASH("Unexpected compare operation"); + } + swapOperands(); + } + bool congruentTo(const MDefinition *ins) const { if (!binaryCongruentTo(ins)) return false; return operation_ == ins->toSimdBinaryComp()->operation(); } ALLOW_CLONE(MSimdBinaryComp) };
--- a/js/src/jit/arm/Lowering-arm.h +++ b/js/src/jit/arm/Lowering-arm.h @@ -53,16 +53,28 @@ class LIRGeneratorARM : public LIRGenera MDefinition *input); bool lowerForALU(LInstructionHelper<1, 2, 0> *ins, MDefinition *mir, MDefinition *lhs, MDefinition *rhs); bool lowerForFPU(LInstructionHelper<1, 1, 0> *ins, MDefinition *mir, MDefinition *src); bool lowerForFPU(LInstructionHelper<1, 2, 0> *ins, MDefinition *mir, MDefinition *lhs, MDefinition *rhs); + + bool lowerForCompIx4(LSimdBinaryCompIx4 *ins, MSimdBinaryComp *mir, + MDefinition *lhs, MDefinition *rhs) + { + return lowerForFPU(ins, mir, lhs, rhs); + } + bool lowerForCompFx4(LSimdBinaryCompFx4 *ins, MSimdBinaryComp *mir, + MDefinition *lhs, MDefinition *rhs) + { + return lowerForFPU(ins, mir, lhs, rhs); + } + bool lowerForBitAndAndBranch(LBitAndAndBranch *baab, MInstruction *mir, MDefinition *lhs, MDefinition *rhs); bool lowerConstantDouble(double d, MInstruction *ins); bool lowerConstantFloat32(float d, MInstruction *ins); bool lowerTruncateDToInt32(MTruncateToInt32 *ins); bool lowerTruncateFToInt32(MTruncateToInt32 *ins); bool lowerDivI(MDiv *div); bool lowerModI(MMod *mod);
--- a/js/src/jit/mips/Lowering-mips.h +++ b/js/src/jit/mips/Lowering-mips.h @@ -53,16 +53,28 @@ class LIRGeneratorMIPS : public LIRGener MDefinition *input); bool lowerForALU(LInstructionHelper<1, 2, 0> *ins, MDefinition *mir, MDefinition *lhs, MDefinition *rhs); bool lowerForFPU(LInstructionHelper<1, 1, 0> *ins, MDefinition *mir, MDefinition *src); bool lowerForFPU(LInstructionHelper<1, 2, 0> *ins, MDefinition *mir, MDefinition *lhs, MDefinition *rhs); + + bool lowerForCompIx4(LSimdBinaryCompIx4 *ins, MSimdBinaryComp *mir, + MDefinition *lhs, MDefinition *rhs) + { + return lowerForFPU(ins, mir, lhs, rhs); + } + bool lowerForCompFx4(LSimdBinaryCompFx4 *ins, MSimdBinaryComp *mir, + MDefinition *lhs, MDefinition *rhs) + { + return lowerForFPU(ins, mir, lhs, rhs); + } + bool lowerForBitAndAndBranch(LBitAndAndBranch *baab, MInstruction *mir, MDefinition *lhs, MDefinition *rhs); bool lowerConstantDouble(double d, MInstruction *ins); bool lowerConstantFloat32(float d, MInstruction *ins); bool lowerTruncateDToInt32(MTruncateToInt32 *ins); bool lowerTruncateFToInt32(MTruncateToInt32 *ins); bool lowerDivI(MDiv *div); bool lowerModI(MMod *mod);
--- a/js/src/jit/none/Lowering-none.h +++ b/js/src/jit/none/Lowering-none.h @@ -37,16 +37,24 @@ class LIRGeneratorNone : public LIRGener bool lowerForShift(LInstructionHelper<1, 2, 0> *, MDefinition *, MDefinition *, MDefinition *) { MOZ_CRASH(); } bool lowerUrshD(MUrsh *) { MOZ_CRASH(); } template <typename T> bool lowerForALU(T, MDefinition *, MDefinition *, MDefinition *v = nullptr) { MOZ_CRASH(); } template <typename T> bool lowerForFPU(T, MDefinition *, MDefinition *, MDefinition *v = nullptr) { MOZ_CRASH(); } + bool lowerForCompIx4(LSimdBinaryCompIx4 *ins, MSimdBinaryComp *mir, + MDefinition *lhs, MDefinition *rhs) { + MOZ_CRASH(); + } + bool lowerForCompFx4(LSimdBinaryCompFx4 *ins, MSimdBinaryComp *mir, + MDefinition *lhs, MDefinition *rhs) { + MOZ_CRASH(); + } bool lowerForBitAndAndBranch(LBitAndAndBranch *, MInstruction *, MDefinition *, MDefinition *) { MOZ_CRASH(); } bool lowerConstantDouble(double, MInstruction *) { MOZ_CRASH(); } bool lowerConstantFloat32(float, MInstruction *) { MOZ_CRASH(); } bool lowerTruncateDToInt32(MTruncateToInt32 *) { MOZ_CRASH(); }
--- a/js/src/jit/shared/CodeGenerator-x86-shared.cpp +++ b/js/src/jit/shared/CodeGenerator-x86-shared.cpp @@ -2408,17 +2408,17 @@ CodeGeneratorX86Shared::visitSimdBinaryC if (rhs.kind() == Operand::FPREG) masm.moveAlignedInt32x4(ToFloatRegister(ins->rhs()), ScratchSimdReg); else masm.loadAlignedInt32x4(rhs, ScratchSimdReg); // scr := scr > lhs (i.e. lhs < rhs) // Improve by doing custom lowering (rhs is tied to the output register) masm.packedGreaterThanInt32x4(ToOperand(ins->lhs()), ScratchSimdReg); - masm.moveAlignedInt32x4(ScratchFloat32Reg, lhs); + masm.moveAlignedInt32x4(ScratchSimdReg, lhs); return true; case MSimdBinaryComp::notEqual: case MSimdBinaryComp::greaterThanOrEqual: case MSimdBinaryComp::lessThanOrEqual: // These operations are not part of the spec. so are not implemented. break; } MOZ_CRASH("unexpected SIMD op"); @@ -2441,21 +2441,20 @@ CodeGeneratorX86Shared::visitSimdBinaryC return true; case MSimdBinaryComp::lessThanOrEqual: masm.cmpps(rhs, lhs, 0x2); return true; case MSimdBinaryComp::notEqual: masm.cmpps(rhs, lhs, 0x4); return true; case MSimdBinaryComp::greaterThanOrEqual: - masm.cmpps(rhs, lhs, 0x5); - return true; case MSimdBinaryComp::greaterThan: - masm.cmpps(rhs, lhs, 0x6); - return true; + // We reverse these before register allocation so that we don't have to + // copy into and out of temporaries after codegen. + MOZ_CRASH("lowering should have reversed this"); } MOZ_CRASH("unexpected SIMD op"); } bool CodeGeneratorX86Shared::visitSimdBinaryArithIx4(LSimdBinaryArithIx4 *ins) { FloatRegister lhs = ToFloatRegister(ins->lhs());
--- a/js/src/jit/shared/Lowering-x86-shared.cpp +++ b/js/src/jit/shared/Lowering-x86-shared.cpp @@ -12,16 +12,17 @@ #include "jit/shared/Lowering-shared-inl.h" using namespace js; using namespace js::jit; using mozilla::Abs; using mozilla::FloorLog2; +using mozilla::Swap; LTableSwitch * LIRGeneratorX86Shared::newLTableSwitch(const LAllocation &in, const LDefinition &inputCopy, MTableSwitch *tableswitch) { return new(alloc()) LTableSwitch(in, inputCopy, temp(), tableswitch); } @@ -103,16 +104,41 @@ bool LIRGeneratorX86Shared::lowerForFPU(LInstructionHelper<1, 2, 0> *ins, MDefinition *mir, MDefinition *lhs, MDefinition *rhs) { ins->setOperand(0, useRegisterAtStart(lhs)); ins->setOperand(1, lhs != rhs ? use(rhs) : useAtStart(rhs)); return defineReuseInput(ins, mir, 0); } bool +LIRGeneratorX86Shared::lowerForCompIx4(LSimdBinaryCompIx4 *ins, MSimdBinaryComp *mir, MDefinition *lhs, MDefinition *rhs) +{ + return lowerForALU(ins, mir, lhs, rhs); +} + +bool +LIRGeneratorX86Shared::lowerForCompFx4(LSimdBinaryCompFx4 *ins, MSimdBinaryComp *mir, MDefinition *lhs, MDefinition *rhs) +{ + // Swap the operands around to fit the instructions that x86 actually has. + // We do this here, before register allocation, so that we don't need + // temporaries and copying afterwards. + switch (mir->operation()) { + case MSimdBinaryComp::greaterThan: + case MSimdBinaryComp::greaterThanOrEqual: + mir->reverse(); + Swap(lhs, rhs); + break; + default: + break; + } + + return lowerForFPU(ins, mir, lhs, rhs); +} + +bool LIRGeneratorX86Shared::lowerForBitAndAndBranch(LBitAndAndBranch *baab, MInstruction *mir, MDefinition *lhs, MDefinition *rhs) { baab->setOperand(0, useRegisterAtStart(lhs)); baab->setOperand(1, useRegisterOrConstantAtStart(rhs)); return add(baab, mir); }
--- a/js/src/jit/shared/Lowering-x86-shared.h +++ b/js/src/jit/shared/Lowering-x86-shared.h @@ -28,16 +28,20 @@ class LIRGeneratorX86Shared : public LIR bool visitPowHalf(MPowHalf *ins); bool lowerForShift(LInstructionHelper<1, 2, 0> *ins, MDefinition *mir, MDefinition *lhs, MDefinition *rhs); bool lowerForALU(LInstructionHelper<1, 1, 0> *ins, MDefinition *mir, MDefinition *input); bool lowerForALU(LInstructionHelper<1, 2, 0> *ins, MDefinition *mir, MDefinition *lhs, MDefinition *rhs); bool lowerForFPU(LInstructionHelper<1, 2, 0> *ins, MDefinition *mir, MDefinition *lhs, MDefinition *rhs); + bool lowerForCompIx4(LSimdBinaryCompIx4 *ins, MSimdBinaryComp *mir, + MDefinition *lhs, MDefinition *rhs); + bool lowerForCompFx4(LSimdBinaryCompFx4 *ins, MSimdBinaryComp *mir, + MDefinition *lhs, MDefinition *rhs); bool lowerForBitAndAndBranch(LBitAndAndBranch *baab, MInstruction *mir, MDefinition *lhs, MDefinition *rhs); bool visitConstant(MConstant *ins); bool visitAsmJSNeg(MAsmJSNeg *ins); bool lowerMulI(MMul *mul, MDefinition *lhs, MDefinition *rhs); bool lowerDivI(MDiv *div); bool lowerModI(MMod *mod); bool lowerUDiv(MDiv *div);