Bug 976110 - Part 0: Optimize division and modulus by negative powers of two; r=sunfish
authorMauricio Collares Neto <mau@ric.io>
Sat, 19 Apr 2014 10:37:49 -0700
changeset 197853 cc10a90487bb8ed0948db260b7b6c3da796d0210
parent 197852 7e51adede3b6b78c19c755fe08a1b177745e979c
child 197854 c7925215ca327f482c3b439c1148b8c63cf410e6
push id3624
push userasasaki@mozilla.com
push dateMon, 09 Jun 2014 21:49:01 +0000
treeherdermozilla-beta@b1a5da15899a [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerssunfish
bugs976110
milestone31.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 976110 - Part 0: Optimize division and modulus by negative powers of two; r=sunfish
js/src/jit-test/tests/basic/testDivModWithIntMin.js
js/src/jit/shared/CodeGenerator-x86-shared.cpp
js/src/jit/shared/LIR-x86-shared.h
js/src/jit/shared/Lowering-x86-shared.cpp
new file mode 100644
--- /dev/null
+++ b/js/src/jit-test/tests/basic/testDivModWithIntMin.js
@@ -0,0 +1,19 @@
+var intMin = -2147483648;
+
+assertEq(intMin % (-2147483648), -0);
+assertEq(intMin % (-3), -2);
+assertEq(intMin % (-1), -0);
+assertEq(intMin % 1, -0);
+assertEq(intMin % 3, -2);
+assertEq(intMin % 10, -8);
+assertEq(intMin % 2147483647, -1);
+
+assertEq((-2147483648) % (-2147483648), -0);
+for (var i = -10; i <= 10; ++i)
+    assertEq(i % (-2147483648), i);
+assertEq(2147483647 % (-2147483648), 2147483647);
+
+assertEq((-2147483648) / (-2147483648), 1);
+assertEq(0 / (-2147483648), -0);
+assertEq((-2147483648) / (-1), 2147483648);
+assertEq((-0) / (-2147483648), 0);
--- a/js/src/jit/shared/CodeGenerator-x86-shared.cpp
+++ b/js/src/jit/shared/CodeGenerator-x86-shared.cpp
@@ -864,49 +864,60 @@ CodeGeneratorX86Shared::visitMulNegative
     return true;
 }
 
 bool
 CodeGeneratorX86Shared::visitDivPowTwoI(LDivPowTwoI *ins)
 {
     Register lhs = ToRegister(ins->numerator());
     mozilla::DebugOnly<Register> output = ToRegister(ins->output());
+
     int32_t shift = ins->shift();
+    bool negativeDivisor = ins->negativeDivisor();
+    MDiv *mir = ins->mir();
 
     // We use defineReuseInput so these should always be the same, which is
     // convenient since all of our instructions here are two-address.
     JS_ASSERT(lhs == output);
 
+    if (!mir->isTruncated() && negativeDivisor) {
+        // 0 divided by a negative number must return a double.
+        masm.testl(lhs, lhs);
+        if (!bailoutIf(Assembler::Zero, ins->snapshot()))
+            return false;
+    }
+
     if (shift != 0) {
-        MDiv *mir = ins->mir();
         if (!mir->isTruncated()) {
             // If the remainder is != 0, bailout since this must be a double.
             masm.testl(lhs, Imm32(UINT32_MAX >> (32 - shift)));
             if (!bailoutIf(Assembler::NonZero, ins->snapshot()))
                 return false;
         }
 
-        if (!mir->canBeNegativeDividend()) {
-            // Numerator is unsigned, so needs no adjusting. Do the shift.
-            masm.sarl(Imm32(shift), lhs);
-            return true;
-        }
-
         // Adjust the value so that shifting produces a correctly rounded result
         // when the numerator is negative. See 10-1 "Signed Division by a Known
         // Power of 2" in Henry S. Warren, Jr.'s Hacker's Delight.
-        Register lhsCopy = ToRegister(ins->numeratorCopy());
-        JS_ASSERT(lhsCopy != lhs);
-        if (shift > 1)
-            masm.sarl(Imm32(31), lhs);
-        masm.shrl(Imm32(32 - shift), lhs);
-        masm.addl(lhsCopy, lhs);
+        if (mir->canBeNegativeDividend()) {
+            Register lhsCopy = ToRegister(ins->numeratorCopy());
+            JS_ASSERT(lhsCopy != lhs);
+            if (shift > 1)
+                masm.sarl(Imm32(31), lhs);
+            masm.shrl(Imm32(32 - shift), lhs);
+            masm.addl(lhsCopy, lhs);
+        }
 
-        // Do the shift.
         masm.sarl(Imm32(shift), lhs);
+        if (negativeDivisor)
+            masm.negl(lhs);
+    } else if (shift == 0 && negativeDivisor) {
+        // INT32_MIN / -1 overflows.
+        masm.negl(lhs);
+        if (!mir->isTruncated() && !bailoutIf(Assembler::Overflow, ins->snapshot()))
+            return false;
     }
 
     return true;
 }
 
 bool
 CodeGeneratorX86Shared::visitDivI(LDivI *ins)
 {
@@ -1007,29 +1018,37 @@ CodeGeneratorX86Shared::visitModPowTwoI(
     Label negative;
 
     if (ins->mir()->canBeNegativeDividend()) {
         // Switch based on sign of the lhs.
         // Positive numbers are just a bitmask
         masm.branchTest32(Assembler::Signed, lhs, lhs, &negative);
     }
 
-    masm.andl(Imm32((1 << shift) - 1), lhs);
+    masm.andl(Imm32((uint32_t(1) << shift) - 1), lhs);
 
     if (ins->mir()->canBeNegativeDividend()) {
         Label done;
         masm.jump(&done);
 
         // Negative numbers need a negate, bitmask, negate
         masm.bind(&negative);
-        // visitModI has an overflow check here to catch INT_MIN % -1, but
-        // here the rhs is a power of 2, and cannot be -1, so the check is not generated.
+
+        // Unlike in the visitModI case, we are not computing the mod by means of a
+        // division. Therefore, the divisor = -1 case isn't problematic (the andl
+        // always returns 0, which is what we expect).
+        //
+        // The negl instruction overflows if lhs == INT32_MIN, but this is also not
+        // a problem: shift is at most 31, and so the andl also always returns 0.
         masm.negl(lhs);
-        masm.andl(Imm32((1 << shift) - 1), lhs);
+        masm.andl(Imm32((uint32_t(1) << shift) - 1), lhs);
         masm.negl(lhs);
+
+        // Since a%b has the same sign as b, and a is negative in this branch,
+        // an answer of 0 means the correct result is actually -0. Bail out.
         if (!ins->mir()->isTruncated() && !bailoutIf(Assembler::Zero, ins->snapshot()))
             return false;
         masm.bind(&done);
     }
     return true;
 
 }
 
--- a/js/src/jit/shared/LIR-x86-shared.h
+++ b/js/src/jit/shared/LIR-x86-shared.h
@@ -42,36 +42,40 @@ class LDivI : public LBinaryMath<1>
         return mir_->toDiv();
     }
 };
 
 // Signed division by a power-of-two constant.
 class LDivPowTwoI : public LBinaryMath<0>
 {
     const int32_t shift_;
+    const bool negativeDivisor_;
 
   public:
     LIR_HEADER(DivPowTwoI)
 
-    LDivPowTwoI(const LAllocation &lhs, const LAllocation &lhsCopy, int32_t shift)
-      : shift_(shift)
+    LDivPowTwoI(const LAllocation &lhs, const LAllocation &lhsCopy, int32_t shift, bool negativeDivisor)
+      : shift_(shift), negativeDivisor_(negativeDivisor)
     {
         setOperand(0, lhs);
         setOperand(1, lhsCopy);
     }
 
     const LAllocation *numerator() {
         return getOperand(0);
     }
     const LAllocation *numeratorCopy() {
         return getOperand(1);
     }
     int32_t shift() const {
         return shift_;
     }
+    bool negativeDivisor() const {
+        return negativeDivisor_;
+    }
     MDiv *mir() const {
         return mir_->toDiv();
     }
 };
 
 class LModI : public LBinaryMath<1>
 {
   public:
--- a/js/src/jit/shared/Lowering-x86-shared.cpp
+++ b/js/src/jit/shared/Lowering-x86-shared.cpp
@@ -10,16 +10,17 @@
 
 #include "jit/MIR.h"
 
 #include "jit/shared/Lowering-shared-inl.h"
 
 using namespace js;
 using namespace js::jit;
 
+using mozilla::Abs;
 using mozilla::FloorLog2;
 
 LTableSwitch *
 LIRGeneratorX86Shared::newLTableSwitch(const LAllocation &in, const LDefinition &inputCopy,
                                        MTableSwitch *tableswitch)
 {
     return new(alloc()) LTableSwitch(in, inputCopy, temp(), tableswitch);
 }
@@ -132,32 +133,31 @@ LIRGeneratorX86Shared::lowerDivI(MDiv *d
     if (div->isUnsigned())
         return lowerUDiv(div);
 
     // Division instructions are slow. Division by constant denominators can be
     // rewritten to use other instructions.
     if (div->rhs()->isConstant()) {
         int32_t rhs = div->rhs()->toConstant()->value().toInt32();
 
-        // Check for division by a positive power of two, which is an easy and
-        // important case to optimize. Note that other optimizations are also
-        // possible; division by negative powers of two can be optimized in a
-        // similar manner as positive powers of two, and division by other
-        // constants can be optimized by a reciprocal multiplication technique.
-        int32_t shift = FloorLog2(rhs);
-        if (rhs > 0 && 1 << shift == rhs) {
+        // Check for division by a power of two, which is an easy and
+        // important case to optimize. Note that other optimizations
+        // are also possible: division by other constants can be
+        // optimized by a reciprocal multiplication technique.
+        int32_t shift = FloorLog2(Abs(rhs));
+        if (rhs != 0 && uint32_t(1) << shift == Abs(rhs)) {
             LAllocation lhs = useRegisterAtStart(div->lhs());
             LDivPowTwoI *lir;
             if (!div->canBeNegativeDividend()) {
                 // Numerator is unsigned, so does not need adjusting.
-                lir = new(alloc()) LDivPowTwoI(lhs, lhs, shift);
+                lir = new(alloc()) LDivPowTwoI(lhs, lhs, shift, rhs < 0);
             } else {
                 // Numerator is signed, and needs adjusting, and an extra
                 // lhs copy register is needed.
-                lir = new(alloc()) LDivPowTwoI(lhs, useRegister(div->lhs()), shift);
+                lir = new(alloc()) LDivPowTwoI(lhs, useRegister(div->lhs()), shift, rhs < 0);
             }
             if (div->fallible() && !assignSnapshot(lir, Bailout_BaselineInfo))
                 return false;
             return defineReuseInput(lir, div, 0);
         }
     }
 
     LDivI *lir = new(alloc()) LDivI(useRegister(div->lhs()), useRegister(div->rhs()),
@@ -170,18 +170,18 @@ LIRGeneratorX86Shared::lowerDivI(MDiv *d
 bool
 LIRGeneratorX86Shared::lowerModI(MMod *mod)
 {
     if (mod->isUnsigned())
         return lowerUMod(mod);
 
     if (mod->rhs()->isConstant()) {
         int32_t rhs = mod->rhs()->toConstant()->value().toInt32();
-        int32_t shift = FloorLog2(rhs);
-        if (rhs > 0 && 1 << shift == rhs) {
+        int32_t shift = FloorLog2(Abs(rhs));
+        if (rhs != 0 && uint32_t(1) << shift == Abs(rhs)) {
             LModPowTwoI *lir = new(alloc()) LModPowTwoI(useRegisterAtStart(mod->lhs()), shift);
             if (mod->fallible() && !assignSnapshot(lir, Bailout_BaselineInfo))
                 return false;
             return defineReuseInput(lir, mod, 0);
         }
     }
 
     LModI *lir = new(alloc()) LModI(useRegister(mod->lhs()),