Optimize uncopy algorithm (bug 591836 part 7, r=dmandelin).
☠☠ backed out by b77f32d2d5c9 ☠ ☠
authorDavid Anderson <danderson@mozilla.com>
Wed, 01 Sep 2010 14:05:16 -0700
changeset 74514 e8dc1103c1b76aec702e06c92358b7274bc237f0
parent 74513 b364b2ffc970bd4f77e0f8f5f11f4202217bd294
child 74515 91389f18d45b194728ad8212fe946c94a1b63fb3
child 74553 b77f32d2d5c9283dc61cc7d91c75b5eee0f3c428
push id2
push userbsmedberg@mozilla.com
push dateFri, 19 Aug 2011 14:38:13 +0000
reviewersdmandelin
bugs591836
milestone2.0b5pre
Optimize uncopy algorithm (bug 591836 part 7, r=dmandelin).
js/src/methodjit/Compiler.cpp
js/src/methodjit/FrameState.cpp
js/src/methodjit/FrameState.h
--- a/js/src/methodjit/Compiler.cpp
+++ b/js/src/methodjit/Compiler.cpp
@@ -1054,21 +1054,32 @@ mjit::Compiler::generateMethod()
           BEGIN_CASE(JSOP_GETLOCAL)
           {
             uint32 slot = GET_SLOTNO(PC);
             frame.pushLocal(slot);
           }
           END_CASE(JSOP_GETLOCAL)
 
           BEGIN_CASE(JSOP_SETLOCAL)
+          {
+            jsbytecode *next = &PC[JSOP_SETLOCAL_LENGTH];
+            bool pop = JSOp(*next) == JSOP_POP && !analysis[next].nincoming;
+            frame.storeLocal(GET_SLOTNO(PC), pop);
+            if (pop) {
+                frame.pop();
+                PC += JSOP_SETLOCAL_LENGTH + JSOP_POP_LENGTH;
+                break;
+            }
+          }
+          END_CASE(JSOP_SETLOCAL)
+
           BEGIN_CASE(JSOP_SETLOCALPOP)
-            frame.storeLocal(GET_SLOTNO(PC));
-            if (op == JSOP_SETLOCALPOP)
-                frame.pop();
-          END_CASE(JSOP_SETLOCAL)
+            frame.storeLocal(GET_SLOTNO(PC), true);
+            frame.pop();
+          END_CASE(JSOP_SETLOCALPOP)
 
           BEGIN_CASE(JSOP_UINT16)
             frame.push(Value(Int32Value((int32_t) GET_UINT16(PC))));
           END_CASE(JSOP_UINT16)
 
           BEGIN_CASE(JSOP_NEWINIT)
           {
             jsint i = GET_INT8(PC);
--- a/js/src/methodjit/FrameState.cpp
+++ b/js/src/methodjit/FrameState.cpp
@@ -777,42 +777,18 @@ FrameState::pushCopyOf(uint32 index)
         /* Maintain tracker ordering guarantees for copies. */
         JS_ASSERT(backing->isCopied());
         if (fe->trackerIndex() < backing->trackerIndex())
             swapInTracker(fe, backing);
     }
 }
 
 FrameEntry *
-FrameState::uncopy(FrameEntry *original)
+FrameState::walkTrackerForUncopy(FrameEntry *original)
 {
-    JS_ASSERT(original->isCopied());
-
-    /*
-     * Copies have three critical invariants:
-     *  1) The backing store precedes all copies in the tracker.
-     *  2) The backing store precedes all copies in the FrameState.
-     *  3) The backing store of a copy cannot be popped from the stack
-     *     while the copy is still live.
-     *
-     * Maintaining this invariant iteratively is kind of hard, so we choose
-     * the "lowest" copy in the frame up-front.
-     *
-     * For example, if the stack is:
-     *    [A, B, C, D]
-     * And the tracker has:
-     *    [A, D, C, B]
-     *
-     * If B, C, and D are copies of A - we will walk the tracker to the end
-     * and select B, not D (see bug 583684).
-     *
-     * Note: |tracker.nentries <= (nslots + nargs)|. However, this walk is
-     * sub-optimal if |original->trackerIndex() > sp - original|. With large
-     * scripts this may be a problem worth investigating.
-     */
     uint32 firstCopy = InvalidIndex;
     FrameEntry *bestFe = NULL;
     uint32 ncopies = 0;
     for (uint32 i = original->trackerIndex() + 1; i < tracker.nentries; i++) {
         FrameEntry *fe = tracker[i];
         if (fe >= sp)
             continue;
         if (fe->isCopy() && fe->copyOf() == original) {
@@ -824,17 +800,16 @@ FrameState::uncopy(FrameEntry *original)
             }
             ncopies++;
         }
     }
 
     if (!ncopies) {
         JS_ASSERT(firstCopy == InvalidIndex);
         JS_ASSERT(!bestFe);
-        original->copied = false;
         return NULL;
     }
 
     JS_ASSERT(firstCopy != InvalidIndex);
     JS_ASSERT(bestFe);
     JS_ASSERT(bestFe > original);
 
     /* Mark all extra copies as copies of the new backing index. */
@@ -863,17 +838,87 @@ FrameState::uncopy(FrameEntry *original)
              */
             if (other->trackerIndex() < bestFe->trackerIndex())
                 swapInTracker(bestFe, other);
         }
     } else {
         bestFe->setNotCopied();
     }
 
-    FrameEntry *fe = bestFe;
+    return bestFe;
+}
+
+FrameEntry *
+FrameState::walkFrameForUncopy(FrameEntry *original)
+{
+    FrameEntry *bestFe = NULL;
+    uint32 ncopies = 0;
+
+    /* It's only necessary to visit as many FEs are being tracked. */
+    uint32 maxvisits = tracker.nentries;
+
+    for (FrameEntry *fe = original + 1; fe < sp && maxvisits; fe++) {
+        if (!fe->isTracked())
+            continue;
+
+        maxvisits--;
+
+        if (fe->isCopy() && fe->copyOf() == original) {
+            if (!bestFe) {
+                bestFe = fe;
+                bestFe->setCopyOf(NULL);
+            } else {
+                fe->setCopyOf(bestFe);
+                if (fe->trackerIndex() < bestFe->trackerIndex())
+                    swapInTracker(bestFe, fe);
+            }
+            ncopies++;
+        }
+    }
+
+    return bestFe;
+}
+
+FrameEntry *
+FrameState::uncopy(FrameEntry *original)
+{
+    JS_ASSERT(original->isCopied());
+
+    /*
+     * Copies have three critical invariants:
+     *  1) The backing store precedes all copies in the tracker.
+     *  2) The backing store precedes all copies in the FrameState.
+     *  3) The backing store of a copy cannot be popped from the stack
+     *     while the copy is still live.
+     *
+     * Maintaining this invariant iteratively is kind of hard, so we choose
+     * the "lowest" copy in the frame up-front.
+     *
+     * For example, if the stack is:
+     *    [A, B, C, D]
+     * And the tracker has:
+     *    [A, D, C, B]
+     *
+     * If B, C, and D are copies of A - we will walk the tracker to the end
+     * and select B, not D (see bug 583684).
+     *
+     * Note: |tracker.nentries <= (nslots + nargs)|. However, this walk is
+     * sub-optimal if |tracker.nentries - original->trackerIndex() > sp - original|.
+     * With large scripts this may be a problem worth investigating. Note that
+     * the tracker is walked twice, so we multiply by 2 for pessimism.
+     */
+    FrameEntry *fe;
+    if ((tracker.nentries - original->trackerIndex()) * 2 > uint32(sp - original))
+        fe = walkFrameForUncopy(original);
+    else
+        fe = walkTrackerForUncopy(original);
+    if (!fe) {
+        original->setNotCopied();
+        return NULL;
+    }
 
     /*
      * Switch the new backing store to the old backing store. During
      * this process we also necessarily make sure the copy can be
      * synced.
      */
     if (!original->isTypeKnown()) {
         /*
@@ -897,55 +942,86 @@ FrameState::uncopy(FrameEntry *original)
         regstate[fe->data.reg()].reassociate(fe);
 
     return fe;
 }
 
 void
 FrameState::storeLocal(uint32 n, bool popGuaranteed, bool typeChange)
 {
-    FrameEntry *localFe = getLocal(n);
-    bool cacheable = !eval && !escaping[n];
+    FrameEntry *local = getLocal(n);
+
+    storeTop(local, popGuaranteed, typeChange);
 
-    if (!popGuaranteed && !cacheable) {
-        JS_ASSERT_IF(locals[n].isTracked() && (!eval || n < script->nfixed),
-                     locals[n].type.inMemory() &&
-                     locals[n].data.inMemory());
-        Address local(JSFrameReg, sizeof(JSStackFrame) + n * sizeof(Value));
-        storeTo(peek(-1), local, false);
-        forgetAllRegs(getLocal(n));
-        localFe->resetSynced();
-        return;
+    if (eval || escaping[n]) {
+        /* Ensure that the local variable remains synced. */
+        if (local->isCopy()) {
+            FrameEntry *backing = local->copyOf();
+            if (!local->data.synced()) {
+                if (backing->data.inMemory())
+                    tempRegForData(backing);
+                syncData(backing, addressOf(local), masm);
+            }
+            if (!local->type.synced()) {
+                if (backing->type.inMemory())
+                    tempRegForType(backing);
+                syncType(backing, addressOf(local), masm);
+            }
+        } else if (local->isConstant()) {
+            if (!local->data.synced())
+                syncData(local, addressOf(local), masm);
+        } else {
+            if (!local->data.synced()) {
+                syncData(local, addressOf(local), masm);
+                local->data.sync();
+            }
+            if (!local->type.synced()) {
+                syncType(local, addressOf(local), masm);
+                local->type.sync();
+            }
+            forgetEntry(local);
+        }
+
+        local->resetSynced();
     }
+}
 
-    bool wasSynced = localFe->type.synced();
+void
+FrameState::forgetEntry(FrameEntry *fe)
+{
+    if (fe->isCopied()) {
+        uncopy(fe);
+        if (!fe->isCopied())
+            forgetAllRegs(fe);
+    } else {
+        forgetAllRegs(fe);
+    }
+}
+
+void
+FrameState::storeTop(FrameEntry *target, bool popGuaranteed, bool typeChange)
+{
+    bool wasSynced = target->type.synced();
 
     /* Detect something like (x = x) which is a no-op. */
     FrameEntry *top = peek(-1);
-    if (top->isCopy() && top->copyOf() == localFe) {
-        JS_ASSERT(localFe->isCopied());
+    if (top->isCopy() && top->copyOf() == target) {
+        JS_ASSERT(target->isCopied());
         return;
     }
 
     /* Completely invalidate the local variable. */
-    if (localFe->isCopied()) {
-        uncopy(localFe);
-        if (!localFe->isCopied())
-            forgetAllRegs(localFe);
-    } else {
-        forgetAllRegs(localFe);
-    }
-
-    localFe->resetUnsynced();
+    forgetEntry(target);
+    target->resetUnsynced();
 
     /* Constants are easy to propagate. */
     if (top->isConstant()) {
-        localFe->setCopyOf(NULL);
-        localFe->setNotCopied();
-        localFe->setConstant(Jsvalify(top->getValue()));
+        target->setCopyOf(NULL);
+        target->setNotCopied();
+        target->setConstant(Jsvalify(top->getValue()));
         return;
     }
 
     /*
      * When dealing with copies, there are three important invariants:
      *
      * 1) The backing store precedes all copies in the tracker.
      * 2) The backing store precedes all copies in the FrameState.
@@ -957,28 +1033,28 @@ FrameState::storeLocal(uint32 n, bool po
      * can be rewritten as a copy of the original backing slot. If the first
      * condition does not hold, force it to hold by swapping in-place.
      */
     FrameEntry *backing = top;
     if (top->isCopy()) {
         backing = top->copyOf();
         JS_ASSERT(backing->trackerIndex() < top->trackerIndex());
 
-        if (backing < localFe) {
+        if (backing < target) {
             /* local.idx < backing.idx means local cannot be a copy yet */
-            if (localFe->trackerIndex() < backing->trackerIndex())
-                swapInTracker(backing, localFe);
-            localFe->setNotCopied();
-            localFe->setCopyOf(backing);
+            if (target->trackerIndex() < backing->trackerIndex())
+                swapInTracker(backing, target);
+            target->setNotCopied();
+            target->setCopyOf(backing);
             if (backing->isTypeKnown())
-                localFe->setType(backing->getKnownType());
+                target->setType(backing->getKnownType());
             else
-                localFe->type.invalidate();
-            localFe->data.invalidate();
-            localFe->isNumber = backing->isNumber;
+                target->type.invalidate();
+            target->data.invalidate();
+            target->isNumber = backing->isNumber;
             return;
         }
 
         /*
          * If control flow lands here, then there was a bytecode sequence like
          *
          *  ENTERBLOCK 2
          *  GETLOCAL 1
@@ -995,87 +1071,85 @@ FrameState::storeLocal(uint32 n, bool po
          * but even so there's a quick workaround. We take all copies of the
          * backing fe, and redirect them to be copies of the destination.
          */
         for (uint32 i = backing->trackerIndex() + 1; i < tracker.nentries; i++) {
             FrameEntry *fe = tracker[i];
             if (fe >= sp)
                 continue;
             if (fe->isCopy() && fe->copyOf() == backing)
-                fe->setCopyOf(localFe);
+                fe->setCopyOf(target);
         }
     }
     backing->setNotCopied();
     
     /*
      * This is valid from the top->isCopy() path because we're guaranteed a
      * consistent ordering - all copies of |backing| are tracked after 
      * |backing|. Transitively, only one swap is needed.
      */
-    if (backing->trackerIndex() < localFe->trackerIndex())
-        swapInTracker(backing, localFe);
+    if (backing->trackerIndex() < target->trackerIndex())
+        swapInTracker(backing, target);
 
     /*
      * Move the backing store down - we spill registers here, but we could be
      * smarter and re-use the type reg.
      */
     RegisterID reg = tempRegForData(backing);
-    localFe->data.setRegister(reg);
-    regstate[reg].reassociate(localFe);
+    target->data.setRegister(reg);
+    regstate[reg].reassociate(target);
 
     if (typeChange) {
         if (backing->isTypeKnown()) {
-            localFe->setType(backing->getKnownType());
+            target->setType(backing->getKnownType());
         } else {
             RegisterID reg = tempRegForType(backing);
-            localFe->type.setRegister(reg);
-            regstate[reg].reassociate(localFe);
+            target->type.setRegister(reg);
+            regstate[reg].reassociate(target);
         }
     } else {
         if (!wasSynced)
-            masm.storeTypeTag(ImmType(backing->getKnownType()), addressOf(localFe));
-        localFe->type.setMemory();
+            masm.storeTypeTag(ImmType(backing->getKnownType()), addressOf(target));
+        target->type.setMemory();
     }
 
     if (!backing->isTypeKnown())
         backing->type.invalidate();
     backing->data.invalidate();
-    backing->setCopyOf(localFe);
-    backing->isNumber = localFe->isNumber;
-    localFe->setCopied();
+    backing->setCopyOf(target);
+    backing->isNumber = target->isNumber;
+
+    JS_ASSERT(top->copyOf() == target);
 
-    if (!cacheable) {
-        /* TODO: x64 optimization */
-        if (!localFe->type.synced())
-            syncType(localFe, addressOf(localFe), masm);
-        if (!localFe->data.synced())
-            syncData(localFe, addressOf(localFe), masm);
-        forgetAllRegs(localFe);
-        localFe->type.setMemory();
-        localFe->data.setMemory();
-    }
-
-    JS_ASSERT(top->copyOf() == localFe);
+    /*
+     * If this condition fails, top->copyOf()->isCopied() is temporarily
+     * left broken. This is okay since frame.pop() is guaranteed to follow.
+     *
+     * NB: If |top->isCopy()|, then there are two copies: the backing and the
+     * top. This is why popGuaranteed is not enough.
+     */
+    if (!popGuaranteed || top->isCopy())
+        target->setCopied();
 }
 
 void
 FrameState::shimmy(uint32 n)
 {
     JS_ASSERT(sp - n >= spBase);
     int32 depth = 0 - int32(n);
-    storeLocal(uint32(&sp[depth - 1] - locals), true);
+    storeTop(&sp[depth - 1], true);
     popn(n);
 }
 
 void
 FrameState::shift(int32 n)
 {
     JS_ASSERT(n < 0);
     JS_ASSERT(sp + n - 1 >= spBase);
-    storeLocal(uint32(&sp[n - 1] - locals), true);
+    storeTop(&sp[n - 1], true);
     pop();
 }
 
 static inline bool
 AllocHelper(RematInfo &info, MaybeRegisterID &maybe)
 {
     if (info.inRegister()) {
         maybe = info.reg();
--- a/js/src/methodjit/FrameState.h
+++ b/js/src/methodjit/FrameState.h
@@ -563,19 +563,20 @@ class FrameState
 
     /*
      * Fully stores a FrameEntry at an arbitrary address. popHint specifies
      * how hard the register allocator should try to keep the FE in registers.
      */
     void storeTo(FrameEntry *fe, Address address, bool popHint);
 
     /*
-     * Stores the top stack slot back to a local variable.
+     * Stores the top stack slot back to a slot.
      */
     void storeLocal(uint32 n, bool popGuaranteed = false, bool typeChange = true);
+    void storeTop(FrameEntry *target, bool popGuaranteed = false, bool typeChange = true);
 
     /*
      * Restores state from a slow path.
      */
     void merge(Assembler &masm, Changes changes) const;
 
     /*
      * Writes unsynced stores to an arbitrary buffer.
@@ -777,16 +778,24 @@ class FrameState
      * "Uncopies" the backing store of a FrameEntry that has been copied. The
      * original FrameEntry is not invalidated; this is the responsibility of
      * the caller. The caller can check isCopied() to see if the registers
      * were moved to a copy.
      *
      * Later addition: uncopy() returns the first copy found.
      */
     FrameEntry *uncopy(FrameEntry *original);
+    FrameEntry *walkTrackerForUncopy(FrameEntry *original);
+    FrameEntry *walkFrameForUncopy(FrameEntry *original);
+
+    /*
+     * All registers in the FE are forgotten. If it is copied, it is uncopied
+     * beforehand.
+     */
+    void forgetEntry(FrameEntry *fe);
 
     FrameEntry *entryFor(uint32 index) const {
         JS_ASSERT(entries[index].isTracked());
         return &entries[index];
     }
 
     RegisterID evictSomeReg() {
         return evictSomeReg(Registers::AvailRegs);