Bug 995934 - IonMonkey: Remove branch out of hot code in negative zero test, r=bbouvier
authorHannes Verschore <hv1989@gmail.com>
Wed, 23 Apr 2014 17:17:29 +0200
changeset 198273 f4b188b044ccd89ade232c7b39330eb52a5bc79b
parent 198272 32616a509bed91f1311ab5c49a8c46d3b5ee4667
child 198274 621bb79845e3519cc24866e4c69ba6103ba4bf6d
push id3624
push userasasaki@mozilla.com
push dateMon, 09 Jun 2014 21:49:01 +0000
treeherdermozilla-beta@b1a5da15899a [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersbbouvier
bugs995934
milestone31.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 995934 - IonMonkey: Remove branch out of hot code in negative zero test, r=bbouvier
js/src/jit/shared/CodeGenerator-x86-shared.cpp
js/src/jit/shared/MacroAssembler-x86-shared.cpp
js/src/jit/shared/MacroAssembler-x86-shared.h
js/src/jit/x64/MacroAssembler-x64.cpp
js/src/jit/x64/MacroAssembler-x64.h
js/src/jit/x86/MacroAssembler-x86.cpp
js/src/jit/x86/MacroAssembler-x86.h
--- a/js/src/jit/shared/CodeGenerator-x86-shared.cpp
+++ b/js/src/jit/shared/CodeGenerator-x86-shared.cpp
@@ -1553,20 +1553,22 @@ CodeGeneratorX86Shared::visitMathF(LMath
 
 bool
 CodeGeneratorX86Shared::visitFloor(LFloor *lir)
 {
     FloatRegister input = ToFloatRegister(lir->input());
     FloatRegister scratch = ScratchFloatReg;
     Register output = ToRegister(lir->output());
 
+    Label bailout;
+
     if (AssemblerX86Shared::HasSSE41()) {
         // Bail on negative-zero.
-        Assembler::Condition bailCond = masm.testNegativeZero(input, output);
-        if (!bailoutIf(bailCond, lir->snapshot()))
+        masm.branchNegativeZero(input, output, &bailout);
+        if (!bailoutFrom(&bailout, lir->snapshot()))
             return false;
 
         // Round toward -Infinity.
         masm.roundsd(input, scratch, JSC::X86Assembler::RoundDown);
 
         masm.cvttsd2si(scratch, output);
         masm.cmp32(output, Imm32(INT_MIN));
         if (!bailoutIf(Assembler::Equal, lir->snapshot()))
@@ -1574,18 +1576,18 @@ CodeGeneratorX86Shared::visitFloor(LFloo
     } else {
         Label negative, end;
 
         // Branch to a slow path for negative inputs. Doesn't catch NaN or -0.
         masm.xorpd(scratch, scratch);
         masm.branchDouble(Assembler::DoubleLessThan, input, scratch, &negative);
 
         // Bail on negative-zero.
-        Assembler::Condition bailCond = masm.testNegativeZero(input, output);
-        if (!bailoutIf(bailCond, lir->snapshot()))
+        masm.branchNegativeZero(input, output, &bailout);
+        if (!bailoutFrom(&bailout, lir->snapshot()))
             return false;
 
         // Input is non-negative, so truncation correctly rounds.
         masm.cvttsd2si(input, output);
         masm.cmp32(output, Imm32(INT_MIN));
         if (!bailoutIf(Assembler::Equal, lir->snapshot()))
             return false;
 
@@ -1620,20 +1622,22 @@ CodeGeneratorX86Shared::visitFloor(LFloo
 
 bool
 CodeGeneratorX86Shared::visitFloorF(LFloorF *lir)
 {
     FloatRegister input = ToFloatRegister(lir->input());
     FloatRegister scratch = ScratchFloatReg;
     Register output = ToRegister(lir->output());
 
+    Label bailout;
+
     if (AssemblerX86Shared::HasSSE41()) {
         // Bail on negative-zero.
-        Assembler::Condition bailCond = masm.testNegativeZeroFloat32(input, output);
-        if (!bailoutIf(bailCond, lir->snapshot()))
+        masm.branchNegativeZeroFloat32(input, output, &bailout);
+        if (!bailoutFrom(&bailout, lir->snapshot()))
             return false;
 
         // Round toward -Infinity.
         masm.roundss(input, scratch, JSC::X86Assembler::RoundDown);
 
         masm.cvttss2si(scratch, output);
         masm.cmp32(output, Imm32(INT_MIN));
         if (!bailoutIf(Assembler::Equal, lir->snapshot()))
@@ -1641,18 +1645,18 @@ CodeGeneratorX86Shared::visitFloorF(LFlo
     } else {
         Label negative, end;
 
         // Branch to a slow path for negative inputs. Doesn't catch NaN or -0.
         masm.xorps(scratch, scratch);
         masm.branchFloat(Assembler::DoubleLessThan, input, scratch, &negative);
 
         // Bail on negative-zero.
-        Assembler::Condition bailCond = masm.testNegativeZeroFloat32(input, output);
-        if (!bailoutIf(bailCond, lir->snapshot()))
+        masm.branchNegativeZeroFloat32(input, output, &bailout);
+        if (!bailoutFrom(&bailout, lir->snapshot()))
             return false;
 
         // Input is non-negative, so truncation correctly rounds.
         masm.cvttss2si(input, output);
         masm.cmp32(output, Imm32(INT_MIN));
         if (!bailoutIf(Assembler::Equal, lir->snapshot()))
             return false;
 
@@ -1688,28 +1692,28 @@ CodeGeneratorX86Shared::visitFloorF(LFlo
 bool
 CodeGeneratorX86Shared::visitRound(LRound *lir)
 {
     FloatRegister input = ToFloatRegister(lir->input());
     FloatRegister temp = ToFloatRegister(lir->temp());
     FloatRegister scratch = ScratchFloatReg;
     Register output = ToRegister(lir->output());
 
-    Label negative, end;
+    Label negative, end, bailout;
 
     // Load 0.5 in the temp register.
     masm.loadConstantDouble(0.5, temp);
 
     // Branch to a slow path for negative inputs. Doesn't catch NaN or -0.
     masm.xorpd(scratch, scratch);
     masm.branchDouble(Assembler::DoubleLessThan, input, scratch, &negative);
 
     // Bail on negative-zero.
-    Assembler::Condition bailCond = masm.testNegativeZero(input, output);
-    if (!bailoutIf(bailCond, lir->snapshot()))
+    masm.branchNegativeZero(input, output, &bailout);
+    if (!bailoutFrom(&bailout, lir->snapshot()))
         return false;
 
     // Input is non-negative. Add 0.5 and truncate, rounding down. Note that we
     // have to add the input to the temp register (which contains 0.5) because
     // we're not allowed to modify the input register.
     masm.addsd(input, temp);
 
     masm.cvttsd2si(temp, output);
@@ -1776,28 +1780,28 @@ CodeGeneratorX86Shared::visitRound(LRoun
 bool
 CodeGeneratorX86Shared::visitRoundF(LRoundF *lir)
 {
     FloatRegister input = ToFloatRegister(lir->input());
     FloatRegister temp = ToFloatRegister(lir->temp());
     FloatRegister scratch = ScratchFloatReg;
     Register output = ToRegister(lir->output());
 
-    Label negative, end;
+    Label negative, end, bailout;
 
     // Load 0.5 in the temp register.
     masm.loadConstantFloat32(0.5f, temp);
 
     // Branch to a slow path for negative inputs. Doesn't catch NaN or -0.
     masm.xorps(scratch, scratch);
     masm.branchFloat(Assembler::DoubleLessThan, input, scratch, &negative);
 
     // Bail on negative-zero.
-    Assembler::Condition bailCond = masm.testNegativeZeroFloat32(input, output);
-    if (!bailoutIf(bailCond, lir->snapshot()))
+    masm.branchNegativeZeroFloat32(input, output, &bailout);
+    if (!bailoutFrom(&bailout, lir->snapshot()))
         return false;
 
     // Input is non-negative. Add 0.5 and truncate, rounding down. Note that we
     // have to add the input to the temp register (which contains 0.5) because
     // we're not allowed to modify the input register.
     masm.addss(input, temp);
 
     masm.cvttss2si(temp, output);
--- a/js/src/jit/shared/MacroAssembler-x86-shared.cpp
+++ b/js/src/jit/shared/MacroAssembler-x86-shared.cpp
@@ -149,8 +149,50 @@ MacroAssemblerX86Shared::callWithExitFra
 bool
 MacroAssemblerX86Shared::buildOOLFakeExitFrame(void *fakeReturnAddr)
 {
     uint32_t descriptor = MakeFrameDescriptor(framePushed(), JitFrame_IonJS);
     Push(Imm32(descriptor));
     Push(ImmPtr(fakeReturnAddr));
     return true;
 }
+
+void
+MacroAssemblerX86Shared::branchNegativeZero(const FloatRegister &reg,
+                                            const Register &scratch,
+                                            Label *label)
+{
+    // Determines whether the low double contained in the XMM register reg
+    // is equal to -0.0.
+
+#if defined(JS_CODEGEN_X86)
+    Label nonZero;
+
+    // Compare to zero. Lets through {0, -0}.
+    xorpd(ScratchFloatReg, ScratchFloatReg);
+
+    // If reg is non-zero, jump to nonZero.
+    branchDouble(DoubleNotEqual, reg, ScratchFloatReg, &nonZero);
+
+    // Input register is either zero or negative zero. Retrieve sign of input.
+    movmskpd(reg, scratch);
+
+    // If reg is 1 or 3, input is negative zero.
+    // If reg is 0 or 2, input is a normal zero.
+    branchTest32(NonZero, scratch, Imm32(1), label);
+
+    bind(&nonZero);
+#elif defined(JS_CODEGEN_X64)
+    movq(reg, scratch);
+    cmpq(scratch, Imm32(1));
+    j(Overflow, label);
+#endif
+}
+
+void
+MacroAssemblerX86Shared::branchNegativeZeroFloat32(const FloatRegister &reg,
+                                                   const Register &scratch,
+                                                   Label *label)
+{
+    movd(reg, scratch);
+    cmpl(scratch, Imm32(1));
+    j(Overflow, label);
+}
--- a/js/src/jit/shared/MacroAssembler-x86-shared.h
+++ b/js/src/jit/shared/MacroAssembler-x86-shared.h
@@ -85,16 +85,19 @@ class MacroAssemblerX86Shared : public A
             j(Parity, label);
             return;
         }
 
         JS_ASSERT(!(cond & DoubleConditionBitSpecial));
         j(ConditionFromDoubleCondition(cond), label);
     }
 
+    void branchNegativeZero(const FloatRegister &reg, const Register &scratch, Label *label);
+    void branchNegativeZeroFloat32(const FloatRegister &reg, const Register &scratch, Label *label);
+
     void move32(const Imm32 &imm, const Register &dest) {
         // Use the ImmWord version of mov to register, which has special
         // optimizations. Casting to uint32_t here ensures that the value
         // is zero-extended.
         mov(ImmWord(uint32_t(imm.value)), dest);
     }
     void move32(const Imm32 &imm, const Operand &dest) {
         movl(imm, dest);
@@ -514,73 +517,43 @@ class MacroAssemblerX86Shared : public A
     }
 
     // Checks whether a double is representable as a 32-bit integer. If so, the
     // integer is written to the output register. Otherwise, a bailout is taken to
     // the given snapshot. This function overwrites the scratch float register.
     void convertDoubleToInt32(FloatRegister src, Register dest, Label *fail,
                               bool negativeZeroCheck = true)
     {
+        // Check for -0.0
+        if (negativeZeroCheck)
+            branchNegativeZero(src, dest, fail);
+
         cvttsd2si(src, dest);
         cvtsi2sd(dest, ScratchFloatReg);
         ucomisd(src, ScratchFloatReg);
         j(Assembler::Parity, fail);
         j(Assembler::NotEqual, fail);
 
-        // Check for -0
-        if (negativeZeroCheck) {
-            Label notZero;
-            testl(dest, dest);
-            j(Assembler::NonZero, &notZero);
-
-            if (Assembler::HasSSE41()) {
-                ptest(src, src);
-                j(Assembler::NonZero, fail);
-            } else {
-                // bit 0 = sign of low double
-                // bit 1 = sign of high double
-                movmskpd(src, dest);
-                andl(Imm32(1), dest);
-                j(Assembler::NonZero, fail);
-            }
-
-            bind(&notZero);
-        }
     }
 
     // Checks whether a float32 is representable as a 32-bit integer. If so, the
     // integer is written to the output register. Otherwise, a bailout is taken to
     // the given snapshot. This function overwrites the scratch float register.
     void convertFloat32ToInt32(FloatRegister src, Register dest, Label *fail,
                                bool negativeZeroCheck = true)
     {
+        // Check for -0.0
+        if (negativeZeroCheck)
+            branchNegativeZeroFloat32(src, dest, fail);
+
         cvttss2si(src, dest);
         convertInt32ToFloat32(dest, ScratchFloatReg);
         ucomiss(src, ScratchFloatReg);
         j(Assembler::Parity, fail);
         j(Assembler::NotEqual, fail);
-
-        // Check for -0
-        if (negativeZeroCheck) {
-            Label notZero;
-            branchTest32(Assembler::NonZero, dest, dest, &notZero);
-
-            if (Assembler::HasSSE41()) {
-                ptest(src, src);
-                j(Assembler::NonZero, fail);
-            } else {
-                // bit 0 = sign of low float
-                // bits 1 to 3 = signs of higher floats
-                movmskps(src, dest);
-                andl(Imm32(1), dest);
-                j(Assembler::NonZero, fail);
-            }
-
-            bind(&notZero);
-        }
     }
 
     void clampIntToUint8(Register reg) {
         Label inRange;
         branchTest32(Assembler::Zero, reg, Imm32(0xffffff00), &inRange);
         {
             sarl(Imm32(31), reg);
             notl(reg);
--- a/js/src/jit/x64/MacroAssembler-x64.cpp
+++ b/js/src/jit/x64/MacroAssembler-x64.cpp
@@ -362,32 +362,16 @@ MacroAssemblerX64::handleFailureWithHand
     // If we are bailing out to baseline to handle an exception, jump to
     // the bailout tail stub.
     bind(&bailout);
     loadPtr(Address(esp, offsetof(ResumeFromException, bailoutInfo)), r9);
     mov(ImmWord(BAILOUT_RETURN_OK), rax);
     jmp(Operand(rsp, offsetof(ResumeFromException, target)));
 }
 
-Assembler::Condition
-MacroAssemblerX64::testNegativeZero(const FloatRegister &reg, const Register &scratch)
-{
-    movq(reg, scratch);
-    cmpq(scratch, Imm32(1));
-    return Overflow;
-}
-
-Assembler::Condition
-MacroAssemblerX64::testNegativeZeroFloat32(const FloatRegister &reg, const Register &scratch)
-{
-    movd(reg, scratch);
-    cmpl(scratch, Imm32(1));
-    return Overflow;
-}
-
 #ifdef JSGC_GENERATIONAL
 
 void
 MacroAssemblerX64::branchPtrInNurseryRange(Register ptr, Register temp, Label *label)
 {
     JS_ASSERT(ptr != temp);
     JS_ASSERT(ptr != ScratchReg);
 
--- a/js/src/jit/x64/MacroAssembler-x64.h
+++ b/js/src/jit/x64/MacroAssembler-x64.h
@@ -502,19 +502,16 @@ class MacroAssemblerX64 : public MacroAs
 
     template <typename T1, typename T2>
     void cmpPtrSet(Assembler::Condition cond, T1 lhs, T2 rhs, const Register &dest)
     {
         cmpPtr(lhs, rhs);
         emitSet(cond, dest);
     }
 
-    Condition testNegativeZero(const FloatRegister &reg, const Register &scratch);
-    Condition testNegativeZeroFloat32(const FloatRegister &reg, const Register &scratch);
-
     /////////////////////////////////////////////////////////////////
     // Common interface.
     /////////////////////////////////////////////////////////////////
     void reserveStack(uint32_t amount) {
         if (amount)
             subq(Imm32(amount), StackPointer);
         framePushed_ += amount;
     }
--- a/js/src/jit/x86/MacroAssembler-x86.cpp
+++ b/js/src/jit/x86/MacroAssembler-x86.cpp
@@ -382,57 +382,16 @@ MacroAssemblerX86::branchTestValue(Condi
         JS_ASSERT(cond == NotEqual);
         j(NotEqual, label);
 
         cmpl(value.typeReg(), Imm32(jv.s.tag));
         j(NotEqual, label);
     }
 }
 
-Assembler::Condition
-MacroAssemblerX86::testNegativeZero(const FloatRegister &reg, const Register &scratch)
-{
-    // Determines whether the single double contained in the XMM register reg
-    // is equal to double-precision -0.
-
-    Label nonZero;
-
-    // Compare to zero. Lets through {0, -0}.
-    xorpd(ScratchFloatReg, ScratchFloatReg);
-
-    // If reg is non-zero, jump to nonZero.
-    // Sets ZF=0 and PF=0.
-    branchDouble(DoubleNotEqual, reg, ScratchFloatReg, &nonZero);
-
-    // Input register is either zero or negative zero. Retrieve sign of input.
-    movmskpd(reg, scratch);
-
-    // If reg is 1 or 3, input is negative zero.
-    // If reg is 0 or 2, input is a normal zero.
-    // So the following test will set PF=1 for negative zero.
-    orl(Imm32(2), scratch);
-
-    bind(&nonZero);
-
-    // Here we need to be able to test if the input is a negative zero.
-    // - branchDouble joins here for non-zero values in which case it sets
-    //   ZF=0 and PF=0. In that case the test should fail.
-    // - orl sets PF=1 on negative zero and PF=0 otherwise
-    // => So testing PF=1 will return if input is negative zero or not.
-    return Parity;
-}
-
-Assembler::Condition
-MacroAssemblerX86::testNegativeZeroFloat32(const FloatRegister &reg, const Register &scratch)
-{
-    movd(reg, scratch);
-    cmpl(scratch, Imm32(1));
-    return Overflow;
-}
-
 #ifdef JSGC_GENERATIONAL
 
 void
 MacroAssemblerX86::branchPtrInNurseryRange(Register ptr, Register temp, Label *label)
 {
     JS_ASSERT(ptr != temp);
     JS_ASSERT(temp != InvalidReg);  // A temp register is required for x86.
 
--- a/js/src/jit/x86/MacroAssembler-x86.h
+++ b/js/src/jit/x86/MacroAssembler-x86.h
@@ -524,19 +524,16 @@ class MacroAssemblerX86 : public MacroAs
 
     template <typename T1, typename T2>
     void cmpPtrSet(Assembler::Condition cond, T1 lhs, T2 rhs, const Register &dest)
     {
         cmpPtr(lhs, rhs);
         emitSet(cond, dest);
     }
 
-    Condition testNegativeZero(const FloatRegister &reg, const Register &scratch);
-    Condition testNegativeZeroFloat32(const FloatRegister &reg, const Register &scratch);
-
     /////////////////////////////////////////////////////////////////
     // Common interface.
     /////////////////////////////////////////////////////////////////
     void reserveStack(uint32_t amount) {
         if (amount)
             subl(Imm32(amount), StackPointer);
         framePushed_ += amount;
     }