Bug 995934 - IonMonkey: Remove branch out of hot code in negative zero test, r=bbouvier
authorHannes Verschore <hv1989@gmail.com>
Wed, 23 Apr 2014 17:17:29 +0200
changeset 180165 f4b188b044ccd89ade232c7b39330eb52a5bc79b
parent 180164 32616a509bed91f1311ab5c49a8c46d3b5ee4667
child 180166 621bb79845e3519cc24866e4c69ba6103ba4bf6d
push id272
push userpvanderbeken@mozilla.com
push dateMon, 05 May 2014 16:31:18 +0000
reviewersbbouvier
bugs995934
milestone31.0a1
Bug 995934 - IonMonkey: Remove branch out of hot code in negative zero test, r=bbouvier
js/src/jit/shared/CodeGenerator-x86-shared.cpp
js/src/jit/shared/MacroAssembler-x86-shared.cpp
js/src/jit/shared/MacroAssembler-x86-shared.h
js/src/jit/x64/MacroAssembler-x64.cpp
js/src/jit/x64/MacroAssembler-x64.h
js/src/jit/x86/MacroAssembler-x86.cpp
js/src/jit/x86/MacroAssembler-x86.h
--- a/js/src/jit/shared/CodeGenerator-x86-shared.cpp
+++ b/js/src/jit/shared/CodeGenerator-x86-shared.cpp
@@ -1553,20 +1553,22 @@ CodeGeneratorX86Shared::visitMathF(LMath
 
 bool
 CodeGeneratorX86Shared::visitFloor(LFloor *lir)
 {
     FloatRegister input = ToFloatRegister(lir->input());
     FloatRegister scratch = ScratchFloatReg;
     Register output = ToRegister(lir->output());
 
+    Label bailout;
+
     if (AssemblerX86Shared::HasSSE41()) {
         // Bail on negative-zero.
-        Assembler::Condition bailCond = masm.testNegativeZero(input, output);
-        if (!bailoutIf(bailCond, lir->snapshot()))
+        masm.branchNegativeZero(input, output, &bailout);
+        if (!bailoutFrom(&bailout, lir->snapshot()))
             return false;
 
         // Round toward -Infinity.
         masm.roundsd(input, scratch, JSC::X86Assembler::RoundDown);
 
         masm.cvttsd2si(scratch, output);
         masm.cmp32(output, Imm32(INT_MIN));
         if (!bailoutIf(Assembler::Equal, lir->snapshot()))
@@ -1574,18 +1576,18 @@ CodeGeneratorX86Shared::visitFloor(LFloo
     } else {
         Label negative, end;
 
         // Branch to a slow path for negative inputs. Doesn't catch NaN or -0.
         masm.xorpd(scratch, scratch);
         masm.branchDouble(Assembler::DoubleLessThan, input, scratch, &negative);
 
         // Bail on negative-zero.
-        Assembler::Condition bailCond = masm.testNegativeZero(input, output);
-        if (!bailoutIf(bailCond, lir->snapshot()))
+        masm.branchNegativeZero(input, output, &bailout);
+        if (!bailoutFrom(&bailout, lir->snapshot()))
             return false;
 
         // Input is non-negative, so truncation correctly rounds.
         masm.cvttsd2si(input, output);
         masm.cmp32(output, Imm32(INT_MIN));
         if (!bailoutIf(Assembler::Equal, lir->snapshot()))
             return false;
 
@@ -1620,20 +1622,22 @@ CodeGeneratorX86Shared::visitFloor(LFloo
 
 bool
 CodeGeneratorX86Shared::visitFloorF(LFloorF *lir)
 {
     FloatRegister input = ToFloatRegister(lir->input());
     FloatRegister scratch = ScratchFloatReg;
     Register output = ToRegister(lir->output());
 
+    Label bailout;
+
     if (AssemblerX86Shared::HasSSE41()) {
         // Bail on negative-zero.
-        Assembler::Condition bailCond = masm.testNegativeZeroFloat32(input, output);
-        if (!bailoutIf(bailCond, lir->snapshot()))
+        masm.branchNegativeZeroFloat32(input, output, &bailout);
+        if (!bailoutFrom(&bailout, lir->snapshot()))
             return false;
 
         // Round toward -Infinity.
         masm.roundss(input, scratch, JSC::X86Assembler::RoundDown);
 
         masm.cvttss2si(scratch, output);
         masm.cmp32(output, Imm32(INT_MIN));
         if (!bailoutIf(Assembler::Equal, lir->snapshot()))
@@ -1641,18 +1645,18 @@ CodeGeneratorX86Shared::visitFloorF(LFlo
     } else {
         Label negative, end;
 
         // Branch to a slow path for negative inputs. Doesn't catch NaN or -0.
         masm.xorps(scratch, scratch);
         masm.branchFloat(Assembler::DoubleLessThan, input, scratch, &negative);
 
         // Bail on negative-zero.
-        Assembler::Condition bailCond = masm.testNegativeZeroFloat32(input, output);
-        if (!bailoutIf(bailCond, lir->snapshot()))
+        masm.branchNegativeZeroFloat32(input, output, &bailout);
+        if (!bailoutFrom(&bailout, lir->snapshot()))
             return false;
 
         // Input is non-negative, so truncation correctly rounds.
         masm.cvttss2si(input, output);
         masm.cmp32(output, Imm32(INT_MIN));
         if (!bailoutIf(Assembler::Equal, lir->snapshot()))
             return false;
 
@@ -1688,28 +1692,28 @@ CodeGeneratorX86Shared::visitFloorF(LFlo
 bool
 CodeGeneratorX86Shared::visitRound(LRound *lir)
 {
     FloatRegister input = ToFloatRegister(lir->input());
     FloatRegister temp = ToFloatRegister(lir->temp());
     FloatRegister scratch = ScratchFloatReg;
     Register output = ToRegister(lir->output());
 
-    Label negative, end;
+    Label negative, end, bailout;
 
     // Load 0.5 in the temp register.
     masm.loadConstantDouble(0.5, temp);
 
     // Branch to a slow path for negative inputs. Doesn't catch NaN or -0.
     masm.xorpd(scratch, scratch);
     masm.branchDouble(Assembler::DoubleLessThan, input, scratch, &negative);
 
     // Bail on negative-zero.
-    Assembler::Condition bailCond = masm.testNegativeZero(input, output);
-    if (!bailoutIf(bailCond, lir->snapshot()))
+    masm.branchNegativeZero(input, output, &bailout);
+    if (!bailoutFrom(&bailout, lir->snapshot()))
         return false;
 
     // Input is non-negative. Add 0.5 and truncate, rounding down. Note that we
     // have to add the input to the temp register (which contains 0.5) because
     // we're not allowed to modify the input register.
     masm.addsd(input, temp);
 
     masm.cvttsd2si(temp, output);
@@ -1776,28 +1780,28 @@ CodeGeneratorX86Shared::visitRound(LRoun
 bool
 CodeGeneratorX86Shared::visitRoundF(LRoundF *lir)
 {
     FloatRegister input = ToFloatRegister(lir->input());
     FloatRegister temp = ToFloatRegister(lir->temp());
     FloatRegister scratch = ScratchFloatReg;
     Register output = ToRegister(lir->output());
 
-    Label negative, end;
+    Label negative, end, bailout;
 
     // Load 0.5 in the temp register.
     masm.loadConstantFloat32(0.5f, temp);
 
     // Branch to a slow path for negative inputs. Doesn't catch NaN or -0.
     masm.xorps(scratch, scratch);
     masm.branchFloat(Assembler::DoubleLessThan, input, scratch, &negative);
 
     // Bail on negative-zero.
-    Assembler::Condition bailCond = masm.testNegativeZeroFloat32(input, output);
-    if (!bailoutIf(bailCond, lir->snapshot()))
+    masm.branchNegativeZeroFloat32(input, output, &bailout);
+    if (!bailoutFrom(&bailout, lir->snapshot()))
         return false;
 
     // Input is non-negative. Add 0.5 and truncate, rounding down. Note that we
     // have to add the input to the temp register (which contains 0.5) because
     // we're not allowed to modify the input register.
     masm.addss(input, temp);
 
     masm.cvttss2si(temp, output);
--- a/js/src/jit/shared/MacroAssembler-x86-shared.cpp
+++ b/js/src/jit/shared/MacroAssembler-x86-shared.cpp
@@ -149,8 +149,50 @@ MacroAssemblerX86Shared::callWithExitFra
 bool
 MacroAssemblerX86Shared::buildOOLFakeExitFrame(void *fakeReturnAddr)
 {
     uint32_t descriptor = MakeFrameDescriptor(framePushed(), JitFrame_IonJS);
     Push(Imm32(descriptor));
     Push(ImmPtr(fakeReturnAddr));
     return true;
 }
+
+void
+MacroAssemblerX86Shared::branchNegativeZero(const FloatRegister &reg,
+                                            const Register &scratch,
+                                            Label *label)
+{
+    // Determines whether the low double contained in the XMM register reg
+    // is equal to -0.0.
+
+#if defined(JS_CODEGEN_X86)
+    Label nonZero;
+
+    // Compare to zero. Lets through {0, -0}.
+    xorpd(ScratchFloatReg, ScratchFloatReg);
+
+    // If reg is non-zero, jump to nonZero.
+    branchDouble(DoubleNotEqual, reg, ScratchFloatReg, &nonZero);
+
+    // Input register is either zero or negative zero. Retrieve sign of input.
+    movmskpd(reg, scratch);
+
+    // If reg is 1 or 3, input is negative zero.
+    // If reg is 0 or 2, input is a normal zero.
+    branchTest32(NonZero, scratch, Imm32(1), label);
+
+    bind(&nonZero);
+#elif defined(JS_CODEGEN_X64)
+    movq(reg, scratch);
+    cmpq(scratch, Imm32(1));
+    j(Overflow, label);
+#endif
+}
+
+void
+MacroAssemblerX86Shared::branchNegativeZeroFloat32(const FloatRegister &reg,
+                                                   const Register &scratch,
+                                                   Label *label)
+{
+    movd(reg, scratch);
+    cmpl(scratch, Imm32(1));
+    j(Overflow, label);
+}
--- a/js/src/jit/shared/MacroAssembler-x86-shared.h
+++ b/js/src/jit/shared/MacroAssembler-x86-shared.h
@@ -85,16 +85,19 @@ class MacroAssemblerX86Shared : public A
             j(Parity, label);
             return;
         }
 
         JS_ASSERT(!(cond & DoubleConditionBitSpecial));
         j(ConditionFromDoubleCondition(cond), label);
     }
 
+    void branchNegativeZero(const FloatRegister &reg, const Register &scratch, Label *label);
+    void branchNegativeZeroFloat32(const FloatRegister &reg, const Register &scratch, Label *label);
+
     void move32(const Imm32 &imm, const Register &dest) {
         // Use the ImmWord version of mov to register, which has special
         // optimizations. Casting to uint32_t here ensures that the value
         // is zero-extended.
         mov(ImmWord(uint32_t(imm.value)), dest);
     }
     void move32(const Imm32 &imm, const Operand &dest) {
         movl(imm, dest);
@@ -514,73 +517,43 @@ class MacroAssemblerX86Shared : public A
     }
 
     // Checks whether a double is representable as a 32-bit integer. If so, the
     // integer is written to the output register. Otherwise, a bailout is taken to
     // the given snapshot. This function overwrites the scratch float register.
     void convertDoubleToInt32(FloatRegister src, Register dest, Label *fail,
                               bool negativeZeroCheck = true)
     {
+        // Check for -0.0
+        if (negativeZeroCheck)
+            branchNegativeZero(src, dest, fail);
+
         cvttsd2si(src, dest);
         cvtsi2sd(dest, ScratchFloatReg);
         ucomisd(src, ScratchFloatReg);
         j(Assembler::Parity, fail);
         j(Assembler::NotEqual, fail);
 
-        // Check for -0
-        if (negativeZeroCheck) {
-            Label notZero;
-            testl(dest, dest);
-            j(Assembler::NonZero, &notZero);
-
-            if (Assembler::HasSSE41()) {
-                ptest(src, src);
-                j(Assembler::NonZero, fail);
-            } else {
-                // bit 0 = sign of low double
-                // bit 1 = sign of high double
-                movmskpd(src, dest);
-                andl(Imm32(1), dest);
-                j(Assembler::NonZero, fail);
-            }
-
-            bind(&notZero);
-        }
     }
 
     // Checks whether a float32 is representable as a 32-bit integer. If so, the
     // integer is written to the output register. Otherwise, a bailout is taken to
     // the given snapshot. This function overwrites the scratch float register.
     void convertFloat32ToInt32(FloatRegister src, Register dest, Label *fail,
                                bool negativeZeroCheck = true)
     {
+        // Check for -0.0
+        if (negativeZeroCheck)
+            branchNegativeZeroFloat32(src, dest, fail);
+
         cvttss2si(src, dest);
         convertInt32ToFloat32(dest, ScratchFloatReg);
         ucomiss(src, ScratchFloatReg);
         j(Assembler::Parity, fail);
         j(Assembler::NotEqual, fail);
-
-        // Check for -0
-        if (negativeZeroCheck) {
-            Label notZero;
-            branchTest32(Assembler::NonZero, dest, dest, &notZero);
-
-            if (Assembler::HasSSE41()) {
-                ptest(src, src);
-                j(Assembler::NonZero, fail);
-            } else {
-                // bit 0 = sign of low float
-                // bits 1 to 3 = signs of higher floats
-                movmskps(src, dest);
-                andl(Imm32(1), dest);
-                j(Assembler::NonZero, fail);
-            }
-
-            bind(&notZero);
-        }
     }
 
     void clampIntToUint8(Register reg) {
         Label inRange;
         branchTest32(Assembler::Zero, reg, Imm32(0xffffff00), &inRange);
         {
             sarl(Imm32(31), reg);
             notl(reg);
--- a/js/src/jit/x64/MacroAssembler-x64.cpp
+++ b/js/src/jit/x64/MacroAssembler-x64.cpp
@@ -362,32 +362,16 @@ MacroAssemblerX64::handleFailureWithHand
     // If we are bailing out to baseline to handle an exception, jump to
     // the bailout tail stub.
     bind(&bailout);
     loadPtr(Address(esp, offsetof(ResumeFromException, bailoutInfo)), r9);
     mov(ImmWord(BAILOUT_RETURN_OK), rax);
     jmp(Operand(rsp, offsetof(ResumeFromException, target)));
 }
 
-Assembler::Condition
-MacroAssemblerX64::testNegativeZero(const FloatRegister &reg, const Register &scratch)
-{
-    movq(reg, scratch);
-    cmpq(scratch, Imm32(1));
-    return Overflow;
-}
-
-Assembler::Condition
-MacroAssemblerX64::testNegativeZeroFloat32(const FloatRegister &reg, const Register &scratch)
-{
-    movd(reg, scratch);
-    cmpl(scratch, Imm32(1));
-    return Overflow;
-}
-
 #ifdef JSGC_GENERATIONAL
 
 void
 MacroAssemblerX64::branchPtrInNurseryRange(Register ptr, Register temp, Label *label)
 {
     JS_ASSERT(ptr != temp);
     JS_ASSERT(ptr != ScratchReg);
 
--- a/js/src/jit/x64/MacroAssembler-x64.h
+++ b/js/src/jit/x64/MacroAssembler-x64.h
@@ -502,19 +502,16 @@ class MacroAssemblerX64 : public MacroAs
 
     template <typename T1, typename T2>
     void cmpPtrSet(Assembler::Condition cond, T1 lhs, T2 rhs, const Register &dest)
     {
         cmpPtr(lhs, rhs);
         emitSet(cond, dest);
     }
 
-    Condition testNegativeZero(const FloatRegister &reg, const Register &scratch);
-    Condition testNegativeZeroFloat32(const FloatRegister &reg, const Register &scratch);
-
     /////////////////////////////////////////////////////////////////
     // Common interface.
     /////////////////////////////////////////////////////////////////
     void reserveStack(uint32_t amount) {
         if (amount)
             subq(Imm32(amount), StackPointer);
         framePushed_ += amount;
     }
--- a/js/src/jit/x86/MacroAssembler-x86.cpp
+++ b/js/src/jit/x86/MacroAssembler-x86.cpp
@@ -382,57 +382,16 @@ MacroAssemblerX86::branchTestValue(Condi
         JS_ASSERT(cond == NotEqual);
         j(NotEqual, label);
 
         cmpl(value.typeReg(), Imm32(jv.s.tag));
         j(NotEqual, label);
     }
 }
 
-Assembler::Condition
-MacroAssemblerX86::testNegativeZero(const FloatRegister &reg, const Register &scratch)
-{
-    // Determines whether the single double contained in the XMM register reg
-    // is equal to double-precision -0.
-
-    Label nonZero;
-
-    // Compare to zero. Lets through {0, -0}.
-    xorpd(ScratchFloatReg, ScratchFloatReg);
-
-    // If reg is non-zero, jump to nonZero.
-    // Sets ZF=0 and PF=0.
-    branchDouble(DoubleNotEqual, reg, ScratchFloatReg, &nonZero);
-
-    // Input register is either zero or negative zero. Retrieve sign of input.
-    movmskpd(reg, scratch);
-
-    // If reg is 1 or 3, input is negative zero.
-    // If reg is 0 or 2, input is a normal zero.
-    // So the following test will set PF=1 for negative zero.
-    orl(Imm32(2), scratch);
-
-    bind(&nonZero);
-
-    // Here we need to be able to test if the input is a negative zero.
-    // - branchDouble joins here for non-zero values in which case it sets
-    //   ZF=0 and PF=0. In that case the test should fail.
-    // - orl sets PF=1 on negative zero and PF=0 otherwise
-    // => So testing PF=1 will return if input is negative zero or not.
-    return Parity;
-}
-
-Assembler::Condition
-MacroAssemblerX86::testNegativeZeroFloat32(const FloatRegister &reg, const Register &scratch)
-{
-    movd(reg, scratch);
-    cmpl(scratch, Imm32(1));
-    return Overflow;
-}
-
 #ifdef JSGC_GENERATIONAL
 
 void
 MacroAssemblerX86::branchPtrInNurseryRange(Register ptr, Register temp, Label *label)
 {
     JS_ASSERT(ptr != temp);
     JS_ASSERT(temp != InvalidReg);  // A temp register is required for x86.
 
--- a/js/src/jit/x86/MacroAssembler-x86.h
+++ b/js/src/jit/x86/MacroAssembler-x86.h
@@ -524,19 +524,16 @@ class MacroAssemblerX86 : public MacroAs
 
     template <typename T1, typename T2>
     void cmpPtrSet(Assembler::Condition cond, T1 lhs, T2 rhs, const Register &dest)
     {
         cmpPtr(lhs, rhs);
         emitSet(cond, dest);
     }
 
-    Condition testNegativeZero(const FloatRegister &reg, const Register &scratch);
-    Condition testNegativeZeroFloat32(const FloatRegister &reg, const Register &scratch);
-
     /////////////////////////////////////////////////////////////////
     // Common interface.
     /////////////////////////////////////////////////////////////////
     void reserveStack(uint32_t amount) {
         if (amount)
             subl(Imm32(amount), StackPointer);
         framePushed_ += amount;
     }