Bug 822116 - x86/x64 tuning for backtracking allocator, r=jandem.
☠☠ backed out by 1df75697408f ☠ ☠
authorBrian Hackett <bhackett1024@gmail.com>
Tue, 18 Dec 2012 21:26:09 -0700
changeset 125603 70f1b2db9f5f
parent 125602 d0797dfcab56
child 125604 ccf586b1e679
push id2151
push userlsblakk@mozilla.com
push dateTue, 19 Feb 2013 18:06:57 +0000
treeherdermozilla-beta@4952e88741ec [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersjandem
bugs822116
milestone20.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 822116 - x86/x64 tuning for backtracking allocator, r=jandem.
js/src/ds/SplayTree.h
js/src/ion/BacktrackingAllocator.cpp
js/src/ion/BacktrackingAllocator.h
js/src/ion/LiveRangeAllocator.h
js/src/ion/Lowering.cpp
--- a/js/src/ds/SplayTree.h
+++ b/js/src/ds/SplayTree.h
@@ -131,16 +131,22 @@ class SplayTree
             swapChild->parent = swap->parent;
 
         root->item = swap->item;
         freeNode(swap);
 
         checkCoherency(root, NULL);
     }
 
+    template <class Op>
+    void forEach(Op op)
+    {
+        forEachInner(op, root);
+    }
+
   private:
 
     Node *lookup(const T &v)
     {
         JS_ASSERT(root);
         Node *node = root, *parent;
         do {
             parent = node;
@@ -229,16 +235,27 @@ class SplayTree
                 grandparent->left = node;
             else
                 grandparent->right = node;
         } else {
             root = node;
         }
     }
 
+    template <class Op>
+    void forEachInner(Op op, Node *node)
+    {
+        if (!node)
+            return;
+
+        forEachInner(op, node->left);
+        op(node->item);
+        forEachInner(op, node->right);
+    }
+
     Node *checkCoherency(Node *node, Node *minimum)
     {
 #ifdef DEBUG
         if (!node) {
             JS_ASSERT(!root);
             return NULL;
         }
         JS_ASSERT_IF(!node->parent, node == root);
--- a/js/src/ion/BacktrackingAllocator.cpp
+++ b/js/src/ion/BacktrackingAllocator.cpp
@@ -113,57 +113,73 @@ BacktrackingAllocator::go()
     IonSpew(IonSpew_RegAlloc, "Liveness analysis complete");
 
     if (IonSpewEnabled(IonSpew_RegAlloc))
         dumpLiveness();
 
     if (!init())
         return false;
 
-    if (!queuedIntervals.reserve(graph.numVirtualRegisters() * 3 / 2))
+    if (!allocationQueue.reserve(graph.numVirtualRegisters() * 3 / 2))
         return false;
 
     if (!groupAndQueueRegisters())
         return false;
 
     if (IonSpewEnabled(IonSpew_RegAlloc))
         dumpRegisterGroups();
 
     // Allocate, spill and split register intervals until finished.
-    while (!queuedIntervals.empty()) {
+    while (!allocationQueue.empty()) {
         if (mir->shouldCancel("Backtracking Allocation"))
             return false;
 
-        LiveInterval *interval = queuedIntervals.removeHighest().interval;
-        if (!processInterval(interval))
+        QueueItem item = allocationQueue.removeHighest();
+        if (item.interval ? !processInterval(item.interval) : !processGroup(item.group))
             return false;
     }
 
     if (IonSpewEnabled(IonSpew_RegAlloc))
         dumpAllocations();
 
     return resolveControlFlow() && reifyAllocations();
 }
 
 static bool
-LifetimesMightOverlap(BacktrackingVirtualRegister *reg0, BacktrackingVirtualRegister *reg1)
+LifetimesOverlap(BacktrackingVirtualRegister *reg0, BacktrackingVirtualRegister *reg1)
 {
-    // No fine grained testing, just see if there is a possibility of overlap.
-    CodePosition start0 = reg0->getFirstInterval()->start();
-    CodePosition start1 = reg1->getFirstInterval()->start();
-    CodePosition end0 = reg0->lastInterval()->end();
-    CodePosition end1 = reg1->lastInterval()->end();
-    return (end0 > start1) && (end1 > start0);
+    // Registers may have been eagerly split in two, see tryGroupReusedRegister.
+    // In such cases, only consider the first interval.
+    JS_ASSERT(reg0->numIntervals() <= 2 && reg1->numIntervals() <= 2);
+
+    LiveInterval *interval0 = reg0->getInterval(0), *interval1 = reg1->getInterval(0);
+
+    // Interval ranges are sorted in reverse order. The lifetimes overlap if
+    // any of their ranges overlap.
+    size_t index0 = 0, index1 = 0;
+    while (index0 < interval0->numRanges() && index1 < interval1->numRanges()) {
+        const LiveInterval::Range
+            *range0 = interval0->getRange(index0),
+            *range1 = interval1->getRange(index1);
+        if (range0->from >= range1->to)
+            index0++;
+        else if (range1->from >= range0->to)
+            index1++;
+        else
+            return true;
+    }
+
+    return false;
 }
 
 bool
 BacktrackingAllocator::canAddToGroup(VirtualRegisterGroup *group, BacktrackingVirtualRegister *reg)
 {
     for (size_t i = 0; i < group->registers.length(); i++) {
-        if (LifetimesMightOverlap(reg, &vregs[group->registers[i]]))
+        if (LifetimesOverlap(reg, &vregs[group->registers[i]]))
             return false;
     }
     return true;
 }
 
 bool
 BacktrackingAllocator::tryGroupRegisters(uint32_t vreg0, uint32_t vreg1)
 {
@@ -202,81 +218,191 @@ BacktrackingAllocator::tryGroupRegisters
         if (!canAddToGroup(group0, reg1))
             return true;
         if (!group0->registers.append(vreg1))
             return false;
         reg1->setGroup(group0);
         return true;
     }
 
-    if (LifetimesMightOverlap(reg0, reg1))
+    if (LifetimesOverlap(reg0, reg1))
         return true;
 
     VirtualRegisterGroup *group = new VirtualRegisterGroup();
     if (!group->registers.append(vreg0) || !group->registers.append(vreg1))
         return false;
 
     reg0->setGroup(group);
     reg1->setGroup(group);
     return true;
 }
 
 bool
+BacktrackingAllocator::tryGroupReusedRegister(uint32_t def, uint32_t use)
+{
+    BacktrackingVirtualRegister &reg = vregs[def], &usedReg = vregs[use];
+
+    // reg is a vreg which reuses its input usedReg for its output physical
+    // register. Try to group reg with usedReg if at all possible, as avoiding
+    // copies before reg's instruction is crucial for the quality of the
+    // generated code (MUST_REUSE_INPUT is used by all arithmetic instructions
+    // on x86/x64).
+
+    if (reg.intervalFor(inputOf(reg.ins()))) {
+        JS_ASSERT(reg.isTemp());
+        reg.setMustCopyInput();
+        return true;
+    }
+
+    if (!usedReg.intervalFor(outputOf(reg.ins()))) {
+        // The input is not live after the instruction, either in a safepoint
+        // for the instruction or in subsequent code. The input and output
+        // can thus be in the same group.
+        return tryGroupRegisters(use, def);
+    }
+
+    // The input is live afterwards, either in future instructions or in a
+    // safepoint for the reusing instruction. This is impossible to satisfy
+    // without copying the input.
+    //
+    // It may or may not be better to split the interval at the point of the
+    // definition, which may permit grouping. One case where it is definitely
+    // better to split is if the input never has any register uses after the
+    // instruction. Handle this splitting eagerly.
+
+    if (usedReg.numIntervals() != 1 ||
+        (usedReg.def()->isPreset() && !usedReg.def()->output()->isRegister())) {
+        reg.setMustCopyInput();
+        return true;
+    }
+    LiveInterval *interval = usedReg.getInterval(0);
+    LBlock *block = insData[reg.ins()].block();
+
+    // The input's lifetime must end within the same block as the definition,
+    // otherwise it could live on in phis elsewhere.
+    if (interval->end() > outputOf(block->lastId())) {
+        reg.setMustCopyInput();
+        return true;
+    }
+
+    for (UsePositionIterator iter = interval->usesBegin(); iter != interval->usesEnd(); iter++) {
+        if (iter->pos <= inputOf(reg.ins()))
+            continue;
+
+        LUse *use = iter->use;
+        if (FindReusingDefinition(insData[iter->pos].ins(), use)) {
+            reg.setMustCopyInput();
+            return true;
+        }
+        if (use->policy() != LUse::ANY && use->policy() != LUse::KEEPALIVE) {
+            reg.setMustCopyInput();
+            return true;
+        }
+    }
+
+    LiveInterval *preInterval = new LiveInterval(interval->vreg(), 0);
+    for (size_t i = 0; i < interval->numRanges(); i++) {
+        const LiveInterval::Range *range = interval->getRange(i);
+        JS_ASSERT(range->from <= inputOf(reg.ins()));
+
+        CodePosition to = (range->to <= outputOf(reg.ins())) ? range->to : outputOf(reg.ins());
+        if (!preInterval->addRange(range->from, to))
+            return false;
+    }
+
+    LiveInterval *postInterval = new LiveInterval(interval->vreg(), 0);
+    if (!postInterval->addRange(inputOf(reg.ins()), interval->end()))
+        return false;
+
+    LiveIntervalVector newIntervals;
+    if (!newIntervals.append(preInterval) || !newIntervals.append(postInterval))
+        return false;
+
+    if (!split(interval, newIntervals))
+        return false;
+
+    JS_ASSERT(usedReg.numIntervals() == 2);
+
+    usedReg.setCanonicalSpillExclude(inputOf(reg.ins()));
+
+    return tryGroupRegisters(use, def);
+}
+
+bool
 BacktrackingAllocator::groupAndQueueRegisters()
 {
+    // Try to group registers with their reused inputs.
     for (size_t i = 0; i < graph.numVirtualRegisters(); i++) {
-        if (mir->shouldCancel("Backtracking Group Registers"))
-            return false;
-
         BacktrackingVirtualRegister &reg = vregs[i];
+        if (!reg.numIntervals())
+            continue;
 
-        // Place all intervals for this register on the allocation queue.
-        for (size_t j = 0; j < reg.numIntervals(); j++) {
-            LiveInterval *interval = reg.getInterval(j);
-            if (interval->numRanges() > 0) {
-                size_t priority = computePriority(interval);
-                if (!queuedIntervals.insert(QueuedInterval(interval, priority)))
+        if (reg.def()->policy() == LDefinition::MUST_REUSE_INPUT) {
+            LUse *use = reg.ins()->getOperand(reg.def()->getReusedInput())->toUse();
+            if (!tryGroupReusedRegister(i, use->virtualRegister()))
+                return false;
+        }
+    }
+
+    // Try to group phis with their inputs.
+    for (size_t i = 0; i < graph.numBlocks(); i++) {
+        LBlock *block = graph.getBlock(i);
+        for (size_t j = 0; j < block->numPhis(); j++) {
+            LPhi *phi = block->getPhi(j);
+            uint32_t output = phi->getDef(0)->virtualRegister();
+            for (size_t k = 0; k < phi->numOperands(); k++) {
+                uint32_t input = phi->getOperand(k)->toUse()->virtualRegister();
+                if (!tryGroupRegisters(input, output))
                     return false;
             }
         }
+    }
 
+    for (size_t i = 0; i < graph.numVirtualRegisters(); i++) {
+        if (mir->shouldCancel("Backtracking Enqueue Registers"))
+            return false;
+
+        BacktrackingVirtualRegister &reg = vregs[i];
+        JS_ASSERT(reg.numIntervals() <= 2);
+        JS_ASSERT(!reg.canonicalSpill());
+
+        if (!reg.numIntervals())
+            continue;
+
+        // Eagerly set the canonical spill slot for registers which are preset
+        // for that slot, and reuse it for other registers in the group.
         LDefinition *def = reg.def();
-        if (def && def->policy() == LDefinition::MUST_REUSE_INPUT) {
-            LUse *use = reg.ins()->getOperand(def->getReusedInput())->toUse();
-            VirtualRegister &usedReg = vregs[use->virtualRegister()];
-            if (usedReg.intervalFor(outputOf(reg.ins())) || reg.intervalFor(inputOf(reg.ins()))) {
-                // This definitions reuses an input that is live afterwards
-                // (either in future instructions or a safepoint for the
-                // definition). This is impossible to satisfy without first
-                // copying the input, and rather than encoding this by
-                // splitting intervals (which may require even more copying
-                // later) mark the register as needing this copy during
-                // reification and relax the MUST_REUSE_INPUT constraint.
-                IonSpew(IonSpew_RegAlloc, "Relaxing reuse-input constraint on v%u", i);
-                reg.setMustCopyInput();
-            } else {
-                // This definition reuses an input that is not live afterwards.
-                // The input and output can use the same allocation, and it is
-                // desirable to do this to avoid unnecessary copies.
-                if (!tryGroupRegisters(use->virtualRegister(), def->virtualRegister()))
+        if (def->policy() == LDefinition::PRESET && !def->output()->isRegister()) {
+            reg.setCanonicalSpill(*def->output());
+            if (reg.group() && reg.group()->spill.isUse())
+                reg.group()->spill = *def->output();
+        }
+
+        // Place all intervals for this register on the allocation queue.
+        // During initial queueing use single queue items for groups of
+        // registers, so that they will be allocated together and reduce the
+        // risk of unnecessary conflicts. This is in keeping with the idea that
+        // register groups are effectively single registers whose value changes
+        // during execution. If any intervals in the group are evicted later
+        // then they will be reallocated individually.
+        size_t start = 0;
+        if (VirtualRegisterGroup *group = reg.group()) {
+            if (i == group->canonicalReg()) {
+                size_t priority = computePriority(group);
+                if (!allocationQueue.insert(QueueItem(group, priority)))
                     return false;
             }
+            start++;
         }
-
-        // Try to group phis with their inputs.
-        for (size_t i = 0; i < graph.numBlocks(); i++) {
-            LBlock *block = graph.getBlock(i);
-            for (size_t j = 0; j < block->numPhis(); j++) {
-                LPhi *phi = block->getPhi(j);
-                uint32_t output = phi->getDef(0)->virtualRegister();
-                for (size_t k = 0; k < phi->numOperands(); k++) {
-                    uint32_t input = phi->getOperand(k)->toUse()->virtualRegister();
-                    if (!tryGroupRegisters(input, output))
-                        return false;
-                }
+        for (; start < reg.numIntervals(); start++) {
+            LiveInterval *interval = reg.getInterval(start);
+            if (interval->numRanges() > 0) {
+                size_t priority = computePriority(interval);
+                if (!allocationQueue.insert(QueueItem(interval, priority)))
+                    return false;
             }
         }
     }
 
     return true;
 }
 
 static const size_t MAX_ATTEMPTS = 2;
@@ -379,16 +505,58 @@ BacktrackingAllocator::processInterval(L
         // be constructed so that any minimal interval is allocatable.
         JS_ASSERT(!minimalInterval(interval));
 
         return chooseIntervalSplit(interval);
     }
 }
 
 bool
+BacktrackingAllocator::processGroup(VirtualRegisterGroup *group)
+{
+    if (IonSpewEnabled(IonSpew_RegAlloc)) {
+        IonSpew(IonSpew_RegAlloc, "Allocating group v%u [priority %lu] [weight %lu]",
+                group->registers[0], computePriority(group), computeSpillWeight(group));
+    }
+
+    LiveInterval *conflict;
+    for (size_t attempt = 0;; attempt++) {
+        // Search for any available register which the group can be allocated to.
+        conflict = NULL;
+        for (size_t i = 0; i < AnyRegister::Total; i++) {
+            bool success;
+            if (!tryAllocateGroupRegister(registers[i], group, &success, &conflict))
+                return false;
+            if (success) {
+                conflict = NULL;
+                break;
+            }
+        }
+
+        if (attempt < MAX_ATTEMPTS &&
+            conflict &&
+            computeSpillWeight(conflict) < computeSpillWeight(group))
+        {
+            if (!evictInterval(conflict))
+                return false;
+            continue;
+        }
+
+        for (size_t i = 0; i < group->registers.length(); i++) {
+            VirtualRegister &reg = vregs[group->registers[i]];
+            JS_ASSERT(reg.numIntervals() <= 2);
+            if (!processInterval(reg.getInterval(0)))
+                return false;
+        }
+
+        return true;
+    }
+}
+
+bool
 BacktrackingAllocator::setIntervalRequirement(LiveInterval *interval)
 {
     // Set any requirement or hint on interval according to its definition and
     // uses. Return false if there are conflicting requirements which will
     // require the interval to be split.
     interval->setHint(Requirement());
     interval->setRequirement(Requirement());
 
@@ -436,16 +604,63 @@ BacktrackingAllocator::setIntervalRequir
                 return false;
         }
     }
 
     return true;
 }
 
 bool
+BacktrackingAllocator::tryAllocateGroupRegister(PhysicalRegister &r, VirtualRegisterGroup *group,
+                                                bool *psuccess, LiveInterval **pconflicting)
+{
+    *psuccess = false;
+
+    if (!r.allocatable)
+        return true;
+
+    if (r.reg.isFloat() != vregs[group->registers[0]].isDouble())
+        return true;
+
+    bool allocatable = true;
+    LiveInterval *conflicting = NULL;
+
+    for (size_t i = 0; i < group->registers.length(); i++) {
+        VirtualRegister &reg = vregs[group->registers[i]];
+        JS_ASSERT(reg.numIntervals() <= 2);
+        LiveInterval *interval = reg.getInterval(0);
+
+        for (size_t j = 0; j < interval->numRanges(); j++) {
+            AllocatedRange range(interval, interval->getRange(j)), existing;
+            if (r.allocations.contains(range, &existing)) {
+                if (conflicting) {
+                    if (conflicting != existing.interval)
+                        return true;
+                } else {
+                    conflicting = existing.interval;
+                }
+                allocatable = false;
+            }
+        }
+    }
+
+    if (!allocatable) {
+        JS_ASSERT(conflicting);
+        if (!*pconflicting || computeSpillWeight(conflicting) < computeSpillWeight(*pconflicting))
+            *pconflicting = conflicting;
+        return true;
+    }
+
+    *psuccess = true;
+
+    group->allocation = LAllocation(r.reg);
+    return true;
+}
+
+bool
 BacktrackingAllocator::tryAllocateRegister(PhysicalRegister &r, LiveInterval *interval,
                                            bool *success, LiveInterval **pconflicting)
 {
     *success = false;
 
     if (!r.allocatable)
         return true;
 
@@ -518,27 +733,28 @@ BacktrackingAllocator::evictInterval(Liv
     for (size_t i = 0; i < interval->numRanges(); i++) {
         AllocatedRange range(interval, interval->getRange(i));
         physical.allocations.remove(range);
     }
 
     interval->setAllocation(LAllocation());
 
     size_t priority = computePriority(interval);
-    return queuedIntervals.insert(QueuedInterval(interval, priority));
+    return allocationQueue.insert(QueueItem(interval, priority));
 }
 
 bool
-BacktrackingAllocator::splitAndRequeueInterval(LiveInterval *interval,
-                                               const LiveIntervalVector &newIntervals)
+BacktrackingAllocator::split(LiveInterval *interval,
+                             const LiveIntervalVector &newIntervals)
 {
     JS_ASSERT(newIntervals.length() >= 2);
 
     if (IonSpewEnabled(IonSpew_RegAlloc)) {
-        IonSpew(IonSpew_RegAlloc, "splitting interval %s:", IntervalString(interval));
+        IonSpew(IonSpew_RegAlloc, "splitting interval v%u %s:",
+                interval->vreg(), IntervalString(interval));
         for (size_t i = 0; i < newIntervals.length(); i++)
             IonSpew(IonSpew_RegAlloc, "    %s", IntervalString(newIntervals[i]));
     }
 
     // Find the earliest interval in the new list.
     LiveInterval *first = newIntervals[0];
     for (size_t i = 1; i < newIntervals.length(); i++) {
         if (newIntervals[i]->start() < first->start())
@@ -549,86 +765,102 @@ BacktrackingAllocator::splitAndRequeueIn
     VirtualRegister *reg = &vregs[interval->vreg()];
     reg->replaceInterval(interval, first);
     for (size_t i = 0; i < newIntervals.length(); i++) {
         if (newIntervals[i] != first && !reg->addInterval(newIntervals[i]))
             return false;
     }
 
     // Redistribute uses from the old interval to the new intervals. Intervals
-    // are permitted to overlap. In such cases, assign the use to the interval
-    // with the latest start position.
+    // are permitted to overlap. In such cases, assign the use to either any
+    // minimal interval containing it, otherwise the interval with the latest
+    // start position.
     for (UsePositionIterator iter(interval->usesBegin());
          iter != interval->usesEnd();
          iter++)
     {
         CodePosition pos = iter->pos;
-        LiveInterval *maxInterval = NULL;
+        LiveInterval *addInterval = NULL;
         for (size_t i = 0; i < newIntervals.length(); i++) {
-            if (newIntervals[i]->covers(pos)) {
-                if (!maxInterval || newIntervals[i]->start() > maxInterval->start())
-                    maxInterval = newIntervals[i];
+            LiveInterval *newInterval = newIntervals[i];
+            if (newInterval->covers(pos)) {
+                if (minimalUse(newInterval, insData[pos].ins())) {
+                    addInterval = newInterval;
+                    break;
+                }
+                if (!addInterval || newInterval->start() < addInterval->start())
+                    addInterval = newInterval;
             }
         }
-        maxInterval->addUse(new UsePosition(iter->use, iter->pos));
+        addInterval->addUse(new UsePosition(iter->use, iter->pos));
     }
 
+    return true;
+}
+
+bool BacktrackingAllocator::requeueIntervals(const LiveIntervalVector &newIntervals)
+{
     // Queue the new intervals for register assignment.
     for (size_t i = 0; i < newIntervals.length(); i++) {
         LiveInterval *newInterval = newIntervals[i];
         size_t priority = computePriority(newInterval);
-        if (!queuedIntervals.insert(QueuedInterval(newInterval, priority)))
+        if (!allocationQueue.insert(QueueItem(newInterval, priority)))
             return false;
     }
-
     return true;
 }
 
 void
 BacktrackingAllocator::spill(LiveInterval *interval)
 {
     IonSpew(IonSpew_RegAlloc, "Spilling interval");
 
     JS_ASSERT(interval->requirement()->kind() == Requirement::NONE);
 
     // We can't spill bogus intervals.
     JS_ASSERT(interval->hasVreg());
 
     BacktrackingVirtualRegister *reg = &vregs[interval->vreg()];
 
-    if (reg->canonicalSpill()) {
-        IonSpew(IonSpew_RegAlloc, "  Picked canonical spill location %u",
-                reg->canonicalSpill()->toStackSlot()->slot());
-        interval->setAllocation(*reg->canonicalSpill());
-        return;
-    }
+    bool useCanonical = !reg->hasCanonicalSpillExclude()
+        || interval->start() < reg->canonicalSpillExclude();
 
-    if (reg->group() && reg->group()->spill.isStackSlot()) {
-        IonSpew(IonSpew_RegAlloc, "  Reusing group spill location %u",
-                reg->group()->spill.toStackSlot()->slot());
-        interval->setAllocation(reg->group()->spill);
-        reg->setCanonicalSpill(reg->group()->spill);
-        return;
+    if (useCanonical) {
+        if (reg->canonicalSpill()) {
+            IonSpew(IonSpew_RegAlloc, "  Picked canonical spill location %s",
+                    reg->canonicalSpill()->toString());
+            interval->setAllocation(*reg->canonicalSpill());
+            return;
+        }
+
+        if (reg->group() && !reg->group()->spill.isUse()) {
+            IonSpew(IonSpew_RegAlloc, "  Reusing group spill location %s",
+                    reg->group()->spill.toString());
+            interval->setAllocation(reg->group()->spill);
+            reg->setCanonicalSpill(reg->group()->spill);
+            return;
+        }
     }
 
     uint32_t stackSlot;
-    if (reg->isDouble()) {
+    if (reg->isDouble())
         stackSlot = stackSlotAllocator.allocateDoubleSlot();
-    } else {
+    else
         stackSlot = stackSlotAllocator.allocateSlot();
-    }
     JS_ASSERT(stackSlot <= stackSlotAllocator.stackHeight());
 
-    IonSpew(IonSpew_RegAlloc, "  Allocating canonical spill location %u", stackSlot);
-    interval->setAllocation(LStackSlot(stackSlot, reg->isDouble()));
-    reg->setCanonicalSpill(*interval->getAllocation());
+    LStackSlot alloc(stackSlot, reg->isDouble());
+    interval->setAllocation(alloc);
+
+    IonSpew(IonSpew_RegAlloc, "  Allocating spill location %s", alloc.toString());
 
-    if (reg->group()) {
-        JS_ASSERT(!reg->group()->spill.isStackSlot());
-        reg->group()->spill = *interval->getAllocation();
+    if (useCanonical) {
+        reg->setCanonicalSpill(alloc);
+        if (reg->group())
+            reg->group()->spill = alloc;
     }
 }
 
 // Add moves to resolve conflicting assignments between a block and its
 // predecessors. XXX try to common this with LinearScanAllocator.
 bool
 BacktrackingAllocator::resolveControlFlow()
 {
@@ -727,34 +959,16 @@ BacktrackingAllocator::resolveControlFlo
                 }
             }
         }
     }
 
     return true;
 }
 
-static LDefinition *
-FindReusingDefinition(LInstruction *ins, LAllocation *alloc)
-{
-    for (size_t i = 0; i < ins->numDefs(); i++) {
-        LDefinition *def = ins->getDef(i);
-        if (def->policy() == LDefinition::MUST_REUSE_INPUT &&
-            ins->getOperand(def->getReusedInput()) == alloc)
-            return def;
-    }
-    for (size_t i = 0; i < ins->numTemps(); i++) {
-        LDefinition *def = ins->getTemp(i);
-        if (def->policy() == LDefinition::MUST_REUSE_INPUT &&
-            ins->getOperand(def->getReusedInput()) == alloc)
-            return def;
-    }
-    return NULL;
-}
-
 bool
 BacktrackingAllocator::isReusedInput(LUse *use, LInstruction *ins, bool considerCopy)
 {
     if (LDefinition *def = FindReusingDefinition(ins, use))
         return considerCopy || !vregs[def->virtualRegister()].mustCopyInput();
     return false;
 }
 
@@ -814,29 +1028,21 @@ BacktrackingAllocator::reifyAllocations(
     return true;
 }
 
 void
 BacktrackingAllocator::dumpRegisterGroups()
 {
     printf("Register groups:\n");
     for (size_t i = 0; i < graph.numVirtualRegisters(); i++) {
-        if (VirtualRegisterGroup *group = vregs[i].group()) {
-            bool minimum = true;
-            for (size_t j = 0; j < group->registers.length(); j++) {
-                if (group->registers[j] < i) {
-                    minimum = false;
-                    break;
-                }
-            }
-            if (minimum) {
-                for (size_t j = 0; j < group->registers.length(); j++)
-                    printf(" v%u", group->registers[j]);
-                printf("\n");
-            }
+        VirtualRegisterGroup *group = vregs[i].group();
+        if (group && i == group->canonicalReg()) {
+            for (size_t j = 0; j < group->registers.length(); j++)
+                printf(" v%u", group->registers[j]);
+            printf("\n");
         }
     }
 }
 
 void
 BacktrackingAllocator::dumpLiveness()
 {
 #ifdef DEBUG
@@ -900,16 +1106,30 @@ BacktrackingAllocator::dumpLiveness()
         }
         printf("\n");
     }
 
     printf("\n");
 #endif // DEBUG
 }
 
+#ifdef DEBUG
+struct BacktrackingAllocator::PrintLiveIntervalRange
+{
+    void operator()(const AllocatedRange &item)
+    {
+        if (item.range == item.interval->getRange(0)) {
+            printf("  v%u: %s\n",
+                   item.interval->hasVreg() ? item.interval->vreg() : 0,
+                   IntervalString(item.interval));
+        }
+    }
+};
+#endif
+
 void
 BacktrackingAllocator::dumpAllocations()
 {
 #ifdef DEBUG
     printf("Allocations:\n");
 
     for (size_t i = 0; i < graph.numVirtualRegisters(); i++) {
         printf("v%lu:", i);
@@ -919,16 +1139,23 @@ BacktrackingAllocator::dumpAllocations()
                 printf(" *");
             LiveInterval *interval = vreg.getInterval(j);
             printf("%s :: %s", IntervalString(interval), interval->getAllocation()->toString());
         }
         printf("\n");
     }
 
     printf("\n");
+
+    for (size_t i = 0; i < AnyRegister::Total; i++) {
+        printf("reg %s:\n", AnyRegister::FromCode(i).name());
+        registers[i].allocations.forEach(PrintLiveIntervalRange());
+    }
+
+    printf("\n");
 #endif // DEBUG
 }
 
 bool
 BacktrackingAllocator::addLiveInterval(LiveIntervalVector &intervals, uint32_t vreg,
                                        CodePosition from, CodePosition to)
 {
     LiveInterval *interval = new LiveInterval(vreg, 0);
@@ -950,16 +1177,27 @@ BacktrackingAllocator::computePriority(c
     for (size_t i = 0; i < interval->numRanges(); i++) {
         const LiveInterval::Range *range = interval->getRange(i);
         lifetimeTotal += range->to.pos() - range->from.pos();
     }
 
     return lifetimeTotal;
 }
 
+size_t
+BacktrackingAllocator::computePriority(const VirtualRegisterGroup *group)
+{
+    size_t priority = 0;
+    for (size_t j = 0; j < group->registers.length(); j++) {
+        uint32_t vreg = group->registers[j];
+        priority += computePriority(vregs[vreg].getInterval(0));
+    }
+    return priority;
+}
+
 CodePosition
 BacktrackingAllocator::minimalDefEnd(LInstruction *ins)
 {
     // Compute the shortest interval that captures vregs defined by ins.
     // Watch for instructions that are followed by an OSI point and/or Nop.
     // If moves are introduced between the instruction and the OSI point then
     // safepoint information for the instruction may be incorrect. This is
     // pretty disgusting and should be fixed somewhere else in the compiler.
@@ -986,16 +1224,21 @@ BacktrackingAllocator::minimalUse(const 
     // Whether interval is a minimal interval capturing a use at ins.
     return (interval->start() == inputOf(ins)) &&
         (interval->end() == outputOf(ins) || interval->end() == outputOf(ins).next());
 }
 
 bool
 BacktrackingAllocator::minimalInterval(const LiveInterval *interval, bool *pfixed)
 {
+    if (!interval->hasVreg()) {
+        *pfixed = true;
+        return true;
+    }
+
     if (interval->index() == 0) {
         VirtualRegister &reg = vregs[interval->vreg()];
         if (pfixed)
             *pfixed = reg.def()->policy() == LDefinition::PRESET && reg.def()->output()->isRegister();
         return minimalDef(interval, reg.ins());
     }
 
     for (UsePositionIterator iter = interval->usesBegin(); iter != interval->usesEnd(); iter++) {
@@ -1066,16 +1309,27 @@ BacktrackingAllocator::computeSpillWeigh
         usesTotal += 2000;
 
     // Compute spill weight as a use density, lowering the weight for long
     // lived intervals with relatively few uses.
     size_t lifetimeTotal = computePriority(interval);
     return lifetimeTotal ? usesTotal / lifetimeTotal : 0;
 }
 
+size_t
+BacktrackingAllocator::computeSpillWeight(const VirtualRegisterGroup *group)
+{
+    size_t maxWeight = 0;
+    for (size_t j = 0; j < group->registers.length(); j++) {
+        uint32_t vreg = group->registers[j];
+        maxWeight = Max(maxWeight, computeSpillWeight(vregs[vreg].getInterval(0)));
+    }
+    return maxWeight;
+}
+
 bool
 BacktrackingAllocator::trySplitAcrossHotcode(LiveInterval *interval, bool *success)
 {
     // If this interval has portions that are hot and portions that are cold,
     // split it at the boundaries between hot and cold code.
 
     const LiveInterval::Range *hotRange = NULL;
 
@@ -1138,17 +1392,17 @@ BacktrackingAllocator::trySplitAcrossHot
     if (!newIntervals.append(hotInterval))
         return false;
     if (preInterval && !newIntervals.append(preInterval))
         return false;
     if (postInterval && !newIntervals.append(postInterval))
         return false;
 
     *success = true;
-    return splitAndRequeueInterval(interval, newIntervals);
+    return split(interval, newIntervals) && requeueIntervals(newIntervals);
 }
 
 bool
 BacktrackingAllocator::trySplitAfterLastRegisterUse(LiveInterval *interval, bool *success)
 {
     // If this interval's later uses do not require it to be in a register,
     // split it after the last use which does require a register.
 
@@ -1208,17 +1462,17 @@ BacktrackingAllocator::trySplitAfterLast
         }
     }
 
     LiveIntervalVector newIntervals;
     if (!newIntervals.append(preInterval) || !newIntervals.append(postInterval))
         return false;
 
     *success = true;
-    return splitAndRequeueInterval(interval, newIntervals);
+    return split(interval, newIntervals) && requeueIntervals(newIntervals);
 }
 
 bool
 BacktrackingAllocator::splitAtAllRegisterUses(LiveInterval *interval)
 {
     // Split this interval so that all its register uses become minimal
     // intervals and allow the vreg to be spilled throughout its range.
 
@@ -1283,20 +1537,27 @@ BacktrackingAllocator::splitAtAllRegiste
     for (size_t i = 0; i < registerUses.length(); i++) {
         // Watch for duplicate register use positions.
         if (i > 0 && registerUses[i].from == registerUses[i - 1].from)
             continue;
         if (!addLiveInterval(newIntervals, vreg, registerUses[i].from, registerUses[i].to))
             return false;
     }
 
-    if (!addLiveInterval(newIntervals, vreg, spillStart, interval->end()))
+    LiveInterval *spillInterval = new LiveInterval(vreg, 0);
+    for (size_t i = 0; i < interval->numRanges(); i++) {
+        const LiveInterval::Range *range = interval->getRange(i);
+        CodePosition from = range->from < spillStart ? spillStart : range->from;
+        if (!spillInterval->addRange(from, range->to))
+            return false;
+    }
+    if (!newIntervals.append(spillInterval))
         return false;
 
-    return splitAndRequeueInterval(interval, newIntervals);
+    return split(interval, newIntervals) && requeueIntervals(newIntervals);
 }
 
 bool
 BacktrackingAllocator::chooseIntervalSplit(LiveInterval *interval)
 {
     bool success = false;
 
     if (!trySplitAcrossHotcode(interval, &success))
--- a/js/src/ion/BacktrackingAllocator.h
+++ b/js/src/ion/BacktrackingAllocator.h
@@ -34,27 +34,38 @@ struct VirtualRegisterGroup : public Tem
     LAllocation allocation;
 
     // Spill location to be shared by registers in the group.
     LAllocation spill;
 
     VirtualRegisterGroup()
       : allocation(LUse(0, LUse::ANY)), spill(LUse(0, LUse::ANY))
     {}
+
+    uint32_t canonicalReg() {
+        uint32_t minimum = registers[0];
+        for (size_t i = 1; i < registers.length(); i++)
+            minimum = Min(minimum, registers[i]);
+        return minimum;
+    }
 };
 
 class BacktrackingVirtualRegister : public VirtualRegister
 {
     // If this register's definition is MUST_REUSE_INPUT, whether a copy must
     // be introduced before the definition that relaxes the policy.
     bool mustCopyInput_;
 
     // Spill location to use for this register.
     LAllocation canonicalSpill_;
 
+    // Code position above which the canonical spill cannot be used; such
+    // intervals may overlap other registers in the same group.
+    CodePosition canonicalSpillExclude_;
+
     // If this register is associated with a group of other registers,
     // information about the group. This structure is shared between all
     // registers in the group.
     VirtualRegisterGroup *group_;
 
   public:
     void setMustCopyInput() {
         mustCopyInput_ = true;
@@ -62,50 +73,64 @@ class BacktrackingVirtualRegister : publ
     bool mustCopyInput() {
         return mustCopyInput_;
     }
 
     void setCanonicalSpill(LAllocation alloc) {
         canonicalSpill_ = alloc;
     }
     const LAllocation *canonicalSpill() const {
-        return canonicalSpill_.isStackSlot() ? &canonicalSpill_ : NULL;
+        return canonicalSpill_.isUse() ? NULL : &canonicalSpill_;
+    }
+
+    void setCanonicalSpillExclude(CodePosition pos) {
+        canonicalSpillExclude_ = pos;
     }
-    unsigned canonicalSpillSlot() const {
-        return canonicalSpill_.toStackSlot()->slot();
+    bool hasCanonicalSpillExclude() const {
+        return canonicalSpillExclude_.pos() != 0;
+    }
+    CodePosition canonicalSpillExclude() const {
+        JS_ASSERT(hasCanonicalSpillExclude());
+        return canonicalSpillExclude_;
     }
 
     void setGroup(VirtualRegisterGroup *group) {
         group_ = group;
     }
     VirtualRegisterGroup *group() {
         return group_;
     }
 };
 
 class BacktrackingAllocator : public LiveRangeAllocator<BacktrackingVirtualRegister>
 {
-    // Priority queue element: an interval and its immutable priority.
-    struct QueuedInterval
+    // Priority queue element: either an interval or group of intervals and the
+    // associated priority.
+    struct QueueItem
     {
         LiveInterval *interval;
+        VirtualRegisterGroup *group;
 
-        QueuedInterval(LiveInterval *interval, size_t priority)
-          : interval(interval), priority_(priority)
+        QueueItem(LiveInterval *interval, size_t priority)
+          : interval(interval), group(NULL), priority_(priority)
         {}
 
-        static size_t priority(const QueuedInterval &v) {
+        QueueItem(VirtualRegisterGroup *group, size_t priority)
+          : interval(NULL), group(group), priority_(priority)
+        {}
+
+        static size_t priority(const QueueItem &v) {
             return v.priority_;
         }
 
       private:
         size_t priority_;
     };
 
-    PriorityQueue<QueuedInterval, QueuedInterval, 0, SystemAllocPolicy> queuedIntervals;
+    PriorityQueue<QueueItem, QueueItem, 0, SystemAllocPolicy> allocationQueue;
 
     // A subrange over which a physical register is allocated.
     struct AllocatedRange {
         LiveInterval *interval;
         const LiveInterval::Range *range;
 
         AllocatedRange()
           : interval(NULL), range(NULL)
@@ -151,47 +176,56 @@ class BacktrackingAllocator : public Liv
 
   private:
 
     typedef Vector<LiveInterval *, 4, SystemAllocPolicy> LiveIntervalVector;
 
     bool init();
     bool canAddToGroup(VirtualRegisterGroup *group, BacktrackingVirtualRegister *reg);
     bool tryGroupRegisters(uint32_t vreg0, uint32_t vreg1);
+    bool tryGroupReusedRegister(uint32_t def, uint32_t use);
     bool groupAndQueueRegisters();
     bool processInterval(LiveInterval *interval);
+    bool processGroup(VirtualRegisterGroup *group);
     bool setIntervalRequirement(LiveInterval *interval);
     bool tryAllocateRegister(PhysicalRegister &r, LiveInterval *interval,
                              bool *success, LiveInterval **pconflicting);
+    bool tryAllocateGroupRegister(PhysicalRegister &r, VirtualRegisterGroup *group,
+                                  bool *psuccess, LiveInterval **pconflicting);
     bool evictInterval(LiveInterval *interval);
-    bool splitAndRequeueInterval(LiveInterval *interval,
-                                 const LiveIntervalVector &newIntervals);
+    bool split(LiveInterval *interval, const LiveIntervalVector &newIntervals);
+    bool requeueIntervals(const LiveIntervalVector &newIntervals);
     void spill(LiveInterval *interval);
 
     bool isReusedInput(LUse *use, LInstruction *ins, bool considerCopy = false);
     bool addLiveInterval(LiveIntervalVector &intervals, uint32_t vreg,
                          CodePosition from, CodePosition to);
 
     bool resolveControlFlow();
     bool reifyAllocations();
 
     void dumpRegisterGroups();
     void dumpLiveness();
     void dumpAllocations();
 
+    struct PrintLiveIntervalRange;
+
     CodePosition minimalDefEnd(LInstruction *ins);
     bool minimalDef(const LiveInterval *interval, LInstruction *ins);
     bool minimalUse(const LiveInterval *interval, LInstruction *ins);
     bool minimalInterval(const LiveInterval *interval, bool *pfixed = NULL);
 
     // Heuristic methods.
 
     size_t computePriority(const LiveInterval *interval);
     size_t computeSpillWeight(const LiveInterval *interval);
 
+    size_t computePriority(const VirtualRegisterGroup *group);
+    size_t computeSpillWeight(const VirtualRegisterGroup *group);
+
     bool chooseIntervalSplit(LiveInterval *interval);
     bool trySplitAcrossHotcode(LiveInterval *interval, bool *success);
     bool trySplitAfterLastRegisterUse(LiveInterval *interval, bool *success);
     bool splitAtAllRegisterUses(LiveInterval *interval);
 };
 
 } // namespace ion
 } // namespace js
--- a/js/src/ion/LiveRangeAllocator.h
+++ b/js/src/ion/LiveRangeAllocator.h
@@ -150,16 +150,34 @@ DefinitionCompatibleWith(LInstruction *i
       default:
         JS_NOT_REACHED("Unknown definition policy");
     }
     return false;
 }
 
 #endif // DEBUG
 
+static inline LDefinition *
+FindReusingDefinition(LInstruction *ins, LAllocation *alloc)
+{
+    for (size_t i = 0; i < ins->numDefs(); i++) {
+        LDefinition *def = ins->getDef(i);
+        if (def->policy() == LDefinition::MUST_REUSE_INPUT &&
+            ins->getOperand(def->getReusedInput()) == alloc)
+            return def;
+    }
+    for (size_t i = 0; i < ins->numTemps(); i++) {
+        LDefinition *def = ins->getTemp(i);
+        if (def->policy() == LDefinition::MUST_REUSE_INPUT &&
+            ins->getOperand(def->getReusedInput()) == alloc)
+            return def;
+    }
+    return NULL;
+}
+
 /*
  * A live interval is a set of disjoint ranges of code positions where a
  * virtual register is live. Register allocation operates on these intervals,
  * splitting them as necessary and assigning allocations to them as it runs.
  */
 class LiveInterval
   : public InlineListNode<LiveInterval>,
     public TempObject
--- a/js/src/ion/Lowering.cpp
+++ b/js/src/ion/Lowering.cpp
@@ -461,17 +461,17 @@ LIRGenerator::visitTest(MTest *test)
                 temp1 = temp();
             } else {
                 temp0 = LDefinition::BogusTemp();
                 temp1 = LDefinition::BogusTemp();
             }
 
             LIsNullOrLikeUndefinedAndBranch *lir =
                 new LIsNullOrLikeUndefinedAndBranch(ifTrue, ifFalse, temp0, temp1);
-            if (!useBox(lir, LIsNullOrLikeUndefinedAndBranch::Value, left))
+            if (!useBoxAtStart(lir, LIsNullOrLikeUndefinedAndBranch::Value, left))
                 return false;
             return add(lir, comp);
         }
 
         if (comp->specialization() == MIRType_Boolean) {
             JS_ASSERT(left->type() == MIRType_Value);
             JS_ASSERT(right->type() == MIRType_Boolean);