Bug 1073016 - Optimize LRound and LRoundF on x86/x64. r=nbp
authorMukilan Thiyagarajan <mukilanthiagarajan@gmail.com>
Thu, 02 Oct 2014 17:21:39 +0200
changeset 231691 5beba92a3f8e43ebe6d1abd8fdea72a217a665a4
parent 231690 f4eaa493bf0cb422fd8644cca3329f77df77d3d4
child 231692 6e6a4b63628f1fbd7e70aab331c5bf43def5eb6a
push id4187
push userbhearsum@mozilla.com
push dateFri, 28 Nov 2014 15:29:12 +0000
treeherdermozilla-beta@f23cc6a30c11 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersnbp
bugs1073016
milestone35.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1073016 - Optimize LRound and LRoundF on x86/x64. r=nbp
js/src/jit/shared/CodeGenerator-x86-shared.cpp
js/src/jit/shared/MacroAssembler-x86-shared.cpp
js/src/jit/shared/MacroAssembler-x86-shared.h
--- a/js/src/jit/shared/CodeGenerator-x86-shared.cpp
+++ b/js/src/jit/shared/CodeGenerator-x86-shared.cpp
@@ -1898,40 +1898,49 @@ CodeGeneratorX86Shared::visitCeilF(LCeil
 bool
 CodeGeneratorX86Shared::visitRound(LRound *lir)
 {
     FloatRegister input = ToFloatRegister(lir->input());
     FloatRegister temp = ToFloatRegister(lir->temp());
     FloatRegister scratch = ScratchDoubleReg;
     Register output = ToRegister(lir->output());
 
-    Label negative, end, bailout;
-
-    // Branch to a slow path for negative inputs. Doesn't catch NaN or -0.
+    Label negativeOrZero, negative, end, bailout;
+
+    // Branch to a slow path for non-positive inputs. Doesn't catch NaN.
     masm.xorpd(scratch, scratch);
-    masm.branchDouble(Assembler::DoubleLessThan, input, scratch, &negative);
-
-    // Bail on negative-zero.
-    masm.branchNegativeZero(input, output, &bailout);
-    if (!bailoutFrom(&bailout, lir->snapshot()))
-        return false;
-
-    // Input is non-negative. Add the biggest double less than 0.5 and
+    masm.branchDouble(Assembler::DoubleLessThanOrEqual, input, scratch, &negativeOrZero);
+
+    // Input is positive. Add the biggest double less than 0.5 and
     // truncate, rounding down (because if the input is the biggest double less
     // than 0.5, adding 0.5 would undesirably round up to 1). Note that we have
     // to add the input to the temp register because we're not allowed to
     // modify the input register.
     masm.loadConstantDouble(GetBiggestNumberLessThan(0.5), temp);
     masm.addsd(input, temp);
     if (!bailoutCvttsd2si(temp, output, lir->snapshot()))
         return false;
 
     masm.jump(&end);
 
-    // Input is negative, but isn't -0.
+    // Input is negative, +0 or -0.
+    masm.bind(&negativeOrZero);
+    // Branch on negative input.
+    masm.j(Assembler::NotEqual, &negative);
+
+    // Bail on negative-zero.
+    masm.branchNegativeZero(input, output, &bailout, /* maybeNonZero = */ false);
+    if (!bailoutFrom(&bailout, lir->snapshot()))
+        return false;
+
+    // Input is +0
+    masm.xor32(output, output);
+    masm.jump(&end);
+
+    // Input is negative.
     masm.bind(&negative);
     masm.loadConstantDouble(0.5, temp);
 
     if (AssemblerX86Shared::HasSSE41()) {
         // Add 0.5 and round toward -Infinity. The result is stored in the temp
         // register (currently contains 0.5).
         masm.addsd(input, temp);
         masm.roundsd(temp, scratch, X86Assembler::RoundDown);
@@ -1979,41 +1988,50 @@ CodeGeneratorX86Shared::visitRound(LRoun
 bool
 CodeGeneratorX86Shared::visitRoundF(LRoundF *lir)
 {
     FloatRegister input = ToFloatRegister(lir->input());
     FloatRegister temp = ToFloatRegister(lir->temp());
     FloatRegister scratch = ScratchFloat32Reg;
     Register output = ToRegister(lir->output());
 
-    Label negative, end, bailout;
-
-    // Branch to a slow path for negative inputs. Doesn't catch NaN or -0.
+    Label negativeOrZero, negative, end, bailout;
+
+    // Branch to a slow path for non-positive inputs. Doesn't catch NaN.
     masm.xorps(scratch, scratch);
-    masm.branchFloat(Assembler::DoubleLessThan, input, scratch, &negative);
-
-    // Bail on negative-zero.
-    masm.branchNegativeZeroFloat32(input, output, &bailout);
-    if (!bailoutFrom(&bailout, lir->snapshot()))
-        return false;
+    masm.branchFloat(Assembler::DoubleLessThanOrEqual, input, scratch, &negativeOrZero);
 
     // Input is non-negative. Add the biggest float less than 0.5 and truncate,
     // rounding down (because if the input is the biggest float less than 0.5,
     // adding 0.5 would undesirably round up to 1). Note that we have to add
     // the input to the temp register because we're not allowed to modify the
     // input register.
     masm.loadConstantFloat32(GetBiggestNumberLessThan(0.5f), temp);
     masm.addss(input, temp);
 
     if (!bailoutCvttss2si(temp, output, lir->snapshot()))
         return false;
 
     masm.jump(&end);
 
-    // Input is negative, but isn't -0.
+    // Input is negative, +0 or -0.
+    masm.bind(&negativeOrZero);
+    // Branch on negative input.
+    masm.j(Assembler::NotEqual, &negative);
+
+    // Bail on negative-zero.
+    masm.branchNegativeZeroFloat32(input, output, &bailout);
+    if (!bailoutFrom(&bailout, lir->snapshot()))
+        return false;
+
+    // Input is +0.
+    masm.xor32(output, output);
+    masm.jump(&end);
+
+    // Input is negative.
     masm.bind(&negative);
     masm.loadConstantFloat32(0.5f, temp);
 
     if (AssemblerX86Shared::HasSSE41()) {
         // Add 0.5 and round toward -Infinity. The result is stored in the temp
         // register (currently contains 0.5).
         masm.addss(input, temp);
         masm.roundss(temp, scratch, X86Assembler::RoundDown);
--- a/js/src/jit/shared/MacroAssembler-x86-shared.cpp
+++ b/js/src/jit/shared/MacroAssembler-x86-shared.cpp
@@ -192,30 +192,33 @@ MacroAssemblerX86Shared::buildOOLFakeExi
     Push(Imm32(descriptor));
     Push(ImmPtr(fakeReturnAddr));
     return true;
 }
 
 void
 MacroAssemblerX86Shared::branchNegativeZero(FloatRegister reg,
                                             Register scratch,
-                                            Label *label)
+                                            Label *label,
+                                            bool maybeNonZero)
 {
     // Determines whether the low double contained in the XMM register reg
     // is equal to -0.0.
 
 #if defined(JS_CODEGEN_X86)
     Label nonZero;
 
-    // Compare to zero. Lets through {0, -0}.
-    xorpd(ScratchDoubleReg, ScratchDoubleReg);
+    // if not already compared to zero
+    if (maybeNonZero) {
+        // Compare to zero. Lets through {0, -0}.
+        xorpd(ScratchDoubleReg, ScratchDoubleReg);
 
-    // If reg is non-zero, jump to nonZero.
-    branchDouble(DoubleNotEqual, reg, ScratchDoubleReg, &nonZero);
-
+        // If reg is non-zero, jump to nonZero.
+        branchDouble(DoubleNotEqual, reg, ScratchDoubleReg, &nonZero);
+    }
     // Input register is either zero or negative zero. Retrieve sign of input.
     movmskpd(reg, scratch);
 
     // If reg is 1 or 3, input is negative zero.
     // If reg is 0 or 2, input is a normal zero.
     branchTest32(NonZero, scratch, Imm32(1), label);
 
     bind(&nonZero);
--- a/js/src/jit/shared/MacroAssembler-x86-shared.h
+++ b/js/src/jit/shared/MacroAssembler-x86-shared.h
@@ -85,17 +85,17 @@ class MacroAssemblerX86Shared : public A
             j(Parity, label);
             return;
         }
 
         MOZ_ASSERT(!(cond & DoubleConditionBitSpecial));
         j(ConditionFromDoubleCondition(cond), label);
     }
 
-    void branchNegativeZero(FloatRegister reg, Register scratch, Label *label);
+    void branchNegativeZero(FloatRegister reg, Register scratch, Label *label, bool  maybeNonZero = true);
     void branchNegativeZeroFloat32(FloatRegister reg, Register scratch, Label *label);
 
     void move32(Imm32 imm, Register dest) {
         // Use the ImmWord version of mov to register, which has special
         // optimizations. Casting to uint32_t here ensures that the value
         // is zero-extended.
         mov(ImmWord(uint32_t(imm.value)), dest);
     }