Bug 1094855: Update SimdBinaryArith::Min/Max to properly handle comparisons involving -0/0 and NaNs; r=sunfish
authorBenjamin Bouvier <benj@benj.me>
Fri, 21 Nov 2014 17:27:21 +0100
changeset 216929 cecc072d44bccea803829f3d4fdbfceb52e6c8c8
parent 216928 e21104ba92054fe2cd2ffd69d4fd7fb164a52cba
child 216930 8937c8785f74dd7ef92268aa45cfbbd330b2812c
push id27868
push userkwierso@gmail.com
push dateSat, 22 Nov 2014 00:36:06 +0000
treeherdermozilla-central@7ab92d922d19 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerssunfish
bugs1094855
milestone36.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1094855: Update SimdBinaryArith::Min/Max to properly handle comparisons involving -0/0 and NaNs; r=sunfish
js/src/jit-test/tests/asm.js/testSIMD.js
js/src/jit/LIR-Common.h
js/src/jit/Lowering.cpp
js/src/jit/arm/Lowering-arm.cpp
js/src/jit/arm/Lowering-arm.h
js/src/jit/mips/Lowering-mips.cpp
js/src/jit/mips/Lowering-mips.h
js/src/jit/shared/Assembler-x86-shared.h
js/src/jit/shared/CodeGenerator-x86-shared.cpp
js/src/jit/shared/Lowering-x86-shared.cpp
js/src/jit/shared/Lowering-x86-shared.h
--- a/js/src/jit-test/tests/asm.js/testSIMD.js
+++ b/js/src/jit-test/tests/asm.js/testSIMD.js
@@ -508,23 +508,22 @@ CheckRecp([NaN, -Infinity, Infinity, 0])
 
 // Min/Max
 assertAsmTypeFail('glob', USE_ASM + I32 + "var f4m=i4.min; function f() {} return f");
 assertAsmTypeFail('glob', USE_ASM + I32 + "var f4d=i4.max; function f() {} return f");
 
 const F32MIN = 'var min = f4.min;'
 const F32MAX = 'var max = f4.max;'
 
-// TODO amend tests once float32x4.min/max is fully specified, for NaN and -0
-// vs 0. See also comment in js::jit::CodeGeneratorX86Shared::visitSimdBinaryArithFx4.
-// In these tests, we assume x86/x64, so min(+0, -0) === +0, as specified in
-// the Intel developer manual. See also bug 1068028.
 CheckF4(F32MIN, 'var x=f4(1,2,3,4); x=min(x,x)', [1,2,3,4]);
 CheckF4(F32MIN, 'var x=f4(13.37,2,3,4); var y=f4(4,3,5,2); x=min(x,y)', [4,2,3,2]);
-CheckF4(F32MIN + FROUND + 'var Infinity = glob.Infinity;', 'var x=f4(0,0,0,0); var y=f4(2310,3,5,0); x=f4(f32(+Infinity),f32(-Infinity),f32(3),f32(-0.)); x=min(x,y)', [2310,-Infinity,3,0]);
+CheckF4(F32MIN + FROUND + 'var Infinity = glob.Infinity;', 'var x=f4(0,0,0,0); var y=f4(2310,3,5,0); x=f4(f32(+Infinity),f32(-Infinity),f32(3),f32(-0.)); x=min(x,y)', [2310,-Infinity,3,-0]);
+
+CheckF4(F32MIN, 'var x=f4(0,0,-0,-0); var y=f4(0,-0,0,-0); x=min(x,y)', [0,-0,-0,-0]);
+CheckF4(F32MIN + FROUND + 'var NaN = glob.NaN;', 'var x=f4(0,0,0,0); var y=f4(0,0,0,0); var n=f32(0); n=f32(NaN); x=f4(n,0.,n,0.); y=f4(n,n,0.,0.); x=min(x,y)', [NaN, NaN, NaN, 0]);
 
 CheckF4(F32MAX, 'var x=f4(1,2,3,4); x=max(x,x)', [1,2,3,4]);
 CheckF4(F32MAX, 'var x=f4(13.37,2,3,4); var y=f4(4,3,5,2); x=max(x,y)', [13.37, 3, 5, 4]);
 CheckF4(F32MAX + FROUND + 'var Infinity = glob.Infinity;', 'var x=f4(0,0,0,0); var y=f4(2310,3,5,0); x=f4(f32(+Infinity),f32(-Infinity),f32(3),f32(-0.)); x=max(x,y)', [+Infinity,3,5,0]);
 
 // Test NaN
 var f32x4 = SIMD.float32x4(0, NaN, -0, NaN);
 var another = SIMD.float32x4(NaN, -1, -0, NaN);
@@ -533,16 +532,18 @@ assertEqX4(asmLink(asmCompile('glob', US
 CheckF4(F32D, 'var x=f4(1,2,3,4); x=f4d(x,x)', [1,1,1,1]);
 CheckF4(F32D, 'var x=f4(1,2,3,4); var y=f4(4,3,5,2); x=f4d(x,y)', [1/4,2/3,3/5,2]);
 CheckF4(F32D, 'var x=f4(13.37,1,1,4); var y=f4(4,0,-0.,2); x=f4d(x,y)', [Math.fround(13.37) / 4,+Infinity,-Infinity,2]);
 
 // Test NaN
 var f32x4 = SIMD.float32x4(0, 0, -0, NaN);
 var another = SIMD.float32x4(0, -0, 0, 0);
 assertEqX4(asmLink(asmCompile('glob', USE_ASM + F32 + F32D + "function f(x,y) {x=f4(x); y=f4(y); x=f4d(x,y); return f4(x);} return f"), this)(f32x4, another), [NaN, NaN, NaN, NaN]);
+CheckF4(F32MAX, 'var x=f4(0,0,-0,-0); var y=f4(0,-0,0,-0); x=max(x,y)', [0,0,0,-0]);
+CheckF4(F32MAX + FROUND + 'var NaN = glob.NaN;', 'var x=f4(0,0,0,0); var y=f4(0,0,0,0); var n=f32(0); n=f32(NaN); x=f4(n,0.,n,0.); y=f4(n,n,0.,0.); x=max(x,y)', [NaN, NaN, NaN, 0]);
 
 // With
 const WXF = 'var w = f4.withX;';
 const WYF = 'var w = f4.withY;';
 const WZF = 'var w = f4.withZ;';
 const WWF = 'var w = f4.withW;';
 
 assertAsmTypeFail('glob', USE_ASM + F32 + WXF + "function f() {var x = f4(1,2,3,4); x = w(x, 1);} return f");
--- a/js/src/jit/LIR-Common.h
+++ b/js/src/jit/LIR-Common.h
@@ -361,49 +361,55 @@ class LSimdBinaryCompIx4 : public LSimdB
 class LSimdBinaryCompFx4 : public LSimdBinaryComp
 {
   public:
     LIR_HEADER(SimdBinaryCompFx4);
     LSimdBinaryCompFx4() : LSimdBinaryComp() {}
 };
 
 // Binary SIMD arithmetic operation between two SIMD operands
-class LSimdBinaryArith : public LInstructionHelper<1, 2, 0>
+template<size_t Temps>
+class LSimdBinaryArith : public LInstructionHelper<1, 2, Temps>
 {
   public:
     LSimdBinaryArith() {}
 
     const LAllocation *lhs() {
-        return getOperand(0);
+        return this->getOperand(0);
     }
     const LAllocation *rhs() {
-        return getOperand(1);
-    }
+        return this->getOperand(1);
+    }
+
     MSimdBinaryArith::Operation operation() const {
-        return mir_->toSimdBinaryArith()->operation();
+        return this->mir_->toSimdBinaryArith()->operation();
     }
     const char *extraName() const {
         return MSimdBinaryArith::OperationName(operation());
     }
 };
 
 // Binary SIMD arithmetic operation between two Int32x4 operands
-class LSimdBinaryArithIx4 : public LSimdBinaryArith
+class LSimdBinaryArithIx4 : public LSimdBinaryArith<0>
 {
   public:
     LIR_HEADER(SimdBinaryArithIx4);
-    LSimdBinaryArithIx4() : LSimdBinaryArith() {}
+    LSimdBinaryArithIx4() : LSimdBinaryArith<0>() {}
 };
 
 // Binary SIMD arithmetic operation between two Float32x4 operands
-class LSimdBinaryArithFx4 : public LSimdBinaryArith
+class LSimdBinaryArithFx4 : public LSimdBinaryArith<1>
 {
   public:
     LIR_HEADER(SimdBinaryArithFx4);
-    LSimdBinaryArithFx4() : LSimdBinaryArith() {}
+    LSimdBinaryArithFx4() : LSimdBinaryArith<1>() {}
+
+    const LDefinition *temp() {
+        return getTemp(0);
+    }
 };
 
 // Unary SIMD arithmetic operation on a SIMD operand
 class LSimdUnaryArith : public LInstructionHelper<1, 1, 0>
 {
   public:
     explicit LSimdUnaryArith(const LAllocation &in) {
         setOperand(0, in);
--- a/js/src/jit/Lowering.cpp
+++ b/js/src/jit/Lowering.cpp
@@ -4067,30 +4067,31 @@ LIRGenerator::visitSimdBinaryComp(MSimdB
 
 bool
 LIRGenerator::visitSimdBinaryArith(MSimdBinaryArith *ins)
 {
     MOZ_ASSERT(IsSimdType(ins->type()));
 
     MDefinition *lhs = ins->lhs();
     MDefinition *rhs = ins->rhs();
+
     if (ins->isCommutative())
         ReorderCommutative(&lhs, &rhs, ins);
 
-    if (ins->type() == MIRType_Int32x4) {
-        LSimdBinaryArithIx4 *add = new(alloc()) LSimdBinaryArithIx4();
-        return lowerForFPU(add, ins, lhs, rhs);
-    }
-
-    if (ins->type() == MIRType_Float32x4) {
-        LSimdBinaryArithFx4 *add = new(alloc()) LSimdBinaryArithFx4();
-        return lowerForFPU(add, ins, lhs, rhs);
-    }
-
-    MOZ_CRASH("Unknown SIMD kind when adding values");
+    if (ins->type() == MIRType_Int32x4)
+        return lowerForFPU(new(alloc()) LSimdBinaryArithIx4(), ins, lhs, rhs);
+
+    MOZ_ASSERT(ins->type() == MIRType_Float32x4, "unknown simd type on binary arith operation");
+
+    LSimdBinaryArithFx4 *lir = new(alloc()) LSimdBinaryArithFx4();
+
+    bool needsTemp = ins->operation() == MSimdBinaryArith::Max;
+    lir->setTemp(0, needsTemp ? temp(LDefinition::FLOAT32X4) : LDefinition::BogusTemp());
+
+    return lowerForFPU(lir, ins, lhs, rhs);
 }
 
 bool
 LIRGenerator::visitSimdBinaryBitwise(MSimdBinaryBitwise *ins)
 {
     MOZ_ASSERT(IsSimdType(ins->type()));
 
     MDefinition *lhs = ins->lhs();
--- a/js/src/jit/arm/Lowering-arm.cpp
+++ b/js/src/jit/arm/Lowering-arm.cpp
@@ -194,25 +194,31 @@ bool
 LIRGeneratorARM::lowerForFPU(LInstructionHelper<1, 1, 0> *ins, MDefinition *mir, MDefinition *input)
 {
     ins->setOperand(0, useRegisterAtStart(input));
     return define(ins, mir,
                   LDefinition(LDefinition::TypeFrom(mir->type()), LDefinition::REGISTER));
 
 }
 
+template<size_t Temps>
 bool
-LIRGeneratorARM::lowerForFPU(LInstructionHelper<1, 2, 0> *ins, MDefinition *mir, MDefinition *lhs, MDefinition *rhs)
+LIRGeneratorARM::lowerForFPU(LInstructionHelper<1, 2, Temps> *ins, MDefinition *mir, MDefinition *lhs, MDefinition *rhs)
 {
     ins->setOperand(0, useRegisterAtStart(lhs));
     ins->setOperand(1, useRegisterAtStart(rhs));
     return define(ins, mir,
                   LDefinition(LDefinition::TypeFrom(mir->type()), LDefinition::REGISTER));
 }
 
+template bool LIRGeneratorARM::lowerForFPU(LInstructionHelper<1, 2, 0> *ins, MDefinition *mir,
+                                           MDefinition *lhs, MDefinition *rhs);
+template bool LIRGeneratorARM::lowerForFPU(LInstructionHelper<1, 2, 1> *ins, MDefinition *mir,
+                                           MDefinition *lhs, MDefinition *rhs);
+
 bool
 LIRGeneratorARM::lowerForBitAndAndBranch(LBitAndAndBranch *baab, MInstruction *mir,
                                          MDefinition *lhs, MDefinition *rhs)
 {
     baab->setOperand(0, useRegisterAtStart(lhs));
     baab->setOperand(1, useRegisterOrConstantAtStart(rhs));
     return add(baab, mir);
 }
--- a/js/src/jit/arm/Lowering-arm.h
+++ b/js/src/jit/arm/Lowering-arm.h
@@ -51,17 +51,18 @@ class LIRGeneratorARM : public LIRGenera
 
     bool lowerForALU(LInstructionHelper<1, 1, 0> *ins, MDefinition *mir,
                      MDefinition *input);
     bool lowerForALU(LInstructionHelper<1, 2, 0> *ins, MDefinition *mir,
                      MDefinition *lhs, MDefinition *rhs);
 
     bool lowerForFPU(LInstructionHelper<1, 1, 0> *ins, MDefinition *mir,
                      MDefinition *src);
-    bool lowerForFPU(LInstructionHelper<1, 2, 0> *ins, MDefinition *mir,
+    template<size_t Temps>
+    bool lowerForFPU(LInstructionHelper<1, 2, Temps> *ins, MDefinition *mir,
                      MDefinition *lhs, MDefinition *rhs);
 
     bool lowerForCompIx4(LSimdBinaryCompIx4 *ins, MSimdBinaryComp *mir,
                          MDefinition *lhs, MDefinition *rhs)
     {
         return lowerForFPU(ins, mir, lhs, rhs);
     }
     bool lowerForCompFx4(LSimdBinaryCompFx4 *ins, MSimdBinaryComp *mir,
--- a/js/src/jit/mips/Lowering-mips.cpp
+++ b/js/src/jit/mips/Lowering-mips.cpp
@@ -196,26 +196,32 @@ bool
 LIRGeneratorMIPS::lowerForFPU(LInstructionHelper<1, 1, 0> *ins, MDefinition *mir,
                               MDefinition *input)
 {
     ins->setOperand(0, useRegister(input));
     return define(ins, mir,
                   LDefinition(LDefinition::TypeFrom(mir->type()), LDefinition::REGISTER));
 }
 
+template<size_t Temps>
 bool
-LIRGeneratorMIPS::lowerForFPU(LInstructionHelper<1, 2, 0> *ins, MDefinition *mir,
+LIRGeneratorMIPS::lowerForFPU(LInstructionHelper<1, 2, Temps> *ins, MDefinition *mir,
                               MDefinition *lhs, MDefinition *rhs)
 {
     ins->setOperand(0, useRegister(lhs));
     ins->setOperand(1, useRegister(rhs));
     return define(ins, mir,
                   LDefinition(LDefinition::TypeFrom(mir->type()), LDefinition::REGISTER));
 }
 
+template bool LIRGeneratorMIPS::lowerForFPU(LInstructionHelper<1, 2, 0> *ins, MDefinition *mir,
+                                            MDefinition *lhs, MDefinition *rhs);
+template bool LIRGeneratorMIPS::lowerForFPU(LInstructionHelper<1, 2, 1> *ins, MDefinition *mir,
+                                            MDefinition *lhs, MDefinition *rhs);
+
 bool
 LIRGeneratorMIPS::lowerForBitAndAndBranch(LBitAndAndBranch *baab, MInstruction *mir,
                                           MDefinition *lhs, MDefinition *rhs)
 {
     baab->setOperand(0, useRegisterAtStart(lhs));
     baab->setOperand(1, useRegisterOrConstantAtStart(rhs));
     return add(baab, mir);
 }
--- a/js/src/jit/mips/Lowering-mips.h
+++ b/js/src/jit/mips/Lowering-mips.h
@@ -51,17 +51,18 @@ class LIRGeneratorMIPS : public LIRGener
 
     bool lowerForALU(LInstructionHelper<1, 1, 0> *ins, MDefinition *mir,
                      MDefinition *input);
     bool lowerForALU(LInstructionHelper<1, 2, 0> *ins, MDefinition *mir,
                      MDefinition *lhs, MDefinition *rhs);
 
     bool lowerForFPU(LInstructionHelper<1, 1, 0> *ins, MDefinition *mir,
                      MDefinition *src);
-    bool lowerForFPU(LInstructionHelper<1, 2, 0> *ins, MDefinition *mir,
+    template<size_t Temps>
+    bool lowerForFPU(LInstructionHelper<1, 2, Temps> *ins, MDefinition *mir,
                      MDefinition *lhs, MDefinition *rhs);
 
     bool lowerForCompIx4(LSimdBinaryCompIx4 *ins, MSimdBinaryComp *mir,
                          MDefinition *lhs, MDefinition *rhs)
     {
         return lowerForFPU(ins, mir, lhs, rhs);
     }
     bool lowerForCompFx4(LSimdBinaryCompFx4 *ins, MSimdBinaryComp *mir,
--- a/js/src/jit/shared/Assembler-x86-shared.h
+++ b/js/src/jit/shared/Assembler-x86-shared.h
@@ -470,16 +470,19 @@ class AssemblerX86Shared : public Assemb
         MOZ_ASSERT(HasSSE2());
         switch (src.kind()) {
           case Operand::MEM_REG_DISP:
             masm.movaps_mr(src.disp(), src.base(), dest.code());
             break;
           case Operand::MEM_SCALE:
             masm.movaps_mr(src.disp(), src.base(), src.index(), src.scale(), dest.code());
             break;
+          case Operand::FPREG:
+            masm.movaps_rr(src.fpu(), dest.code());
+            break;
           default:
             MOZ_CRASH("unexpected operand kind");
         }
     }
     void movaps(FloatRegister src, const Operand &dest) {
         MOZ_ASSERT(HasSSE2());
         switch (dest.kind()) {
           case Operand::MEM_REG_DISP:
--- a/js/src/jit/shared/CodeGenerator-x86-shared.cpp
+++ b/js/src/jit/shared/CodeGenerator-x86-shared.cpp
@@ -2789,29 +2789,37 @@ CodeGeneratorX86Shared::visitSimdBinaryA
         masm.packedSubFloat32(rhs, lhs);
         return true;
       case MSimdBinaryArith::Mul:
         masm.packedMulFloat32(rhs, lhs);
         return true;
       case MSimdBinaryArith::Div:
         masm.packedDivFloat32(rhs, lhs);
         return true;
-      case MSimdBinaryArith::Max:
-        // TODO: standardization of float32x4.min/max needs to define the
-        // semantics for particular values: if both input operands are -0 or 0,
-        // or one operand is NaN, the return value will be the "second operand"
-        // in the instruction.
-        // See also bug 1068028, which is about fixing semantics once the
-        // specification is stable.
+      case MSimdBinaryArith::Max: {
+        masm.movaps(lhs, ScratchSimdReg);
+        masm.cmpps(rhs, ScratchSimdReg, 0x3);
+
+        FloatRegister tmp = ToFloatRegister(ins->temp());
+        masm.movaps(rhs, tmp);
+        masm.maxps(Operand(lhs), tmp);
         masm.maxps(rhs, lhs);
+
+        masm.andps(tmp, lhs);
+        masm.orps(ScratchSimdReg, lhs); // or in the all-ones NaNs
         return true;
-      case MSimdBinaryArith::Min:
-        // See comment above.
+      }
+      case MSimdBinaryArith::Min: {
+        FloatRegister rhsCopy = ScratchSimdReg;
+        masm.movaps(rhs, rhsCopy);
+        masm.minps(Operand(lhs), rhsCopy);
         masm.minps(rhs, lhs);
+        masm.orps(rhsCopy, lhs); // NaN or'd with arbitrary bits is NaN
         return true;
+      }
     }
     MOZ_CRASH("unexpected SIMD op");
 }
 
 bool
 CodeGeneratorX86Shared::visitSimdUnaryArithIx4(LSimdUnaryArithIx4 *ins)
 {
     Operand in = ToOperand(ins->input());
--- a/js/src/jit/shared/Lowering-x86-shared.cpp
+++ b/js/src/jit/shared/Lowering-x86-shared.cpp
@@ -95,24 +95,30 @@ bool
 LIRGeneratorX86Shared::lowerForALU(LInstructionHelper<1, 2, 0> *ins, MDefinition *mir,
                                    MDefinition *lhs, MDefinition *rhs)
 {
     ins->setOperand(0, useRegisterAtStart(lhs));
     ins->setOperand(1, lhs != rhs ? useOrConstant(rhs) : useOrConstantAtStart(rhs));
     return defineReuseInput(ins, mir, 0);
 }
 
+template<size_t Temps>
 bool
-LIRGeneratorX86Shared::lowerForFPU(LInstructionHelper<1, 2, 0> *ins, MDefinition *mir, MDefinition *lhs, MDefinition *rhs)
+LIRGeneratorX86Shared::lowerForFPU(LInstructionHelper<1, 2, Temps> *ins, MDefinition *mir, MDefinition *lhs, MDefinition *rhs)
 {
     ins->setOperand(0, useRegisterAtStart(lhs));
     ins->setOperand(1, lhs != rhs ? use(rhs) : useAtStart(rhs));
     return defineReuseInput(ins, mir, 0);
 }
 
+template bool LIRGeneratorX86Shared::lowerForFPU(LInstructionHelper<1, 2, 0> *ins, MDefinition *mir,
+                                                 MDefinition *lhs, MDefinition *rhs);
+template bool LIRGeneratorX86Shared::lowerForFPU(LInstructionHelper<1, 2, 1> *ins, MDefinition *mir,
+                                                 MDefinition *lhs, MDefinition *rhs);
+
 bool
 LIRGeneratorX86Shared::lowerForCompIx4(LSimdBinaryCompIx4 *ins, MSimdBinaryComp *mir, MDefinition *lhs, MDefinition *rhs)
 {
     return lowerForALU(ins, mir, lhs, rhs);
 }
 
 bool
 LIRGeneratorX86Shared::lowerForCompFx4(LSimdBinaryCompFx4 *ins, MSimdBinaryComp *mir, MDefinition *lhs, MDefinition *rhs)
--- a/js/src/jit/shared/Lowering-x86-shared.h
+++ b/js/src/jit/shared/Lowering-x86-shared.h
@@ -26,17 +26,18 @@ class LIRGeneratorX86Shared : public LIR
     bool visitGuardShape(MGuardShape *ins);
     bool visitGuardObjectType(MGuardObjectType *ins);
     bool visitPowHalf(MPowHalf *ins);
     bool lowerForShift(LInstructionHelper<1, 2, 0> *ins, MDefinition *mir, MDefinition *lhs,
                        MDefinition *rhs);
     bool lowerForALU(LInstructionHelper<1, 1, 0> *ins, MDefinition *mir, MDefinition *input);
     bool lowerForALU(LInstructionHelper<1, 2, 0> *ins, MDefinition *mir, MDefinition *lhs,
                      MDefinition *rhs);
-    bool lowerForFPU(LInstructionHelper<1, 2, 0> *ins, MDefinition *mir, MDefinition *lhs,
+    template<size_t Temps>
+    bool lowerForFPU(LInstructionHelper<1, 2, Temps> *ins, MDefinition *mir, MDefinition *lhs,
                      MDefinition *rhs);
     bool lowerForCompIx4(LSimdBinaryCompIx4 *ins, MSimdBinaryComp *mir,
                          MDefinition *lhs, MDefinition *rhs);
     bool lowerForCompFx4(LSimdBinaryCompFx4 *ins, MSimdBinaryComp *mir,
                          MDefinition *lhs, MDefinition *rhs);
     bool lowerForBitAndAndBranch(LBitAndAndBranch *baab, MInstruction *mir,
                                  MDefinition *lhs, MDefinition *rhs);
     bool visitConstant(MConstant *ins);