Bug 1229813 - Improve Branch Pruning heuristics. r=jandem
authorNicolas B. Pierron <nicolas.b.pierron@mozilla.com>
Mon, 27 Jun 2016 16:52:47 +0000
changeset 302712 6e994818e8f60192ca953c7b48adbc368b64f5f4
parent 302711 ab8e9e4b893d13748c71c463296840dc356805b9
child 302713 5f93302810d063dae0f3f2c243008986027a90fe
push id78841
push usernpierron@mozilla.com
push dateMon, 27 Jun 2016 16:53:12 +0000
treeherdermozilla-inbound@6e994818e8f6 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersjandem
bugs1229813
milestone50.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1229813 - Improve Branch Pruning heuristics. r=jandem
js/src/jit-test/tests/ion/recover-object-bug1175233.js
js/src/jit/IonAnalysis.cpp
js/src/jit/JitOptions.cpp
js/src/jit/JitOptions.h
--- a/js/src/jit-test/tests/ion/recover-object-bug1175233.js
+++ b/js/src/jit-test/tests/ion/recover-object-bug1175233.js
@@ -2,27 +2,27 @@
 //
 // Unboxed object optimization might not trigger in all cases, thus we ensure
 // that Scalar Replacement optimization is working well independently of the
 // object representation.
 
 // Ion eager fails the test below because we have not yet created any
 // template object in baseline before running the content of the top-level
 // function.
-if (getJitCompilerOptions()["ion.warmup.trigger"] <= 90)
-    setJitCompilerOption("ion.warmup.trigger", 90);
+if (getJitCompilerOptions()["ion.warmup.trigger"] <= 130)
+    setJitCompilerOption("ion.warmup.trigger", 130);
 
 // This test checks that we are able to remove the getelem & setelem with scalar
 // replacement, so we should not force inline caches, as this would skip the
 // generation of getelem & setelem instructions.
 if (getJitCompilerOptions()["ion.forceinlineCaches"])
     setJitCompilerOption("ion.forceinlineCaches", 0);
 
 var uceFault = function (j) {
-    if (j >= 31)
+    if (j >= max)
         uceFault = function (j) { return true; };
     return false;
 }
 
 function f(j) {
     var i = Math.pow(2, j) | 0;
     var obj = {
       i: i,
@@ -36,13 +36,13 @@ function f(j) {
     if (uceFault(j) || uceFault(j)) {
         // MObjectState::recover should neither fail,
         // nor coerce its result to an int32.
         assertEq(obj.v, 2 * i);
     }
     return 2 * obj.i;
 }
 
-var min = -100;
-for (var j = min; j <= 31; ++j) {
+var max = 150;
+for (var j = 0; j <= max; ++j) {
     with({}){};
     f(j);
 }
--- a/js/src/jit/IonAnalysis.cpp
+++ b/js/src/jit/IonAnalysis.cpp
@@ -260,20 +260,23 @@ jit::PruneUnusedBranches(MIRGenerator* m
     MOZ_ASSERT(!mir->compilingAsmJS(), "AsmJS compilation have no code coverage support.");
 
     // We do a reverse-post-order traversal, marking basic blocks when the block
     // have to be converted into bailing blocks, and flagging block as
     // unreachable if all predecessors are flagged as bailing or unreachable.
     bool someUnreachable = false;
     for (ReversePostorderIterator block(graph.rpoBegin()); block != graph.rpoEnd(); block++) {
         JitSpew(JitSpew_Prune, "Investigate Block %d:", block->id());
+        JitSpewIndent indent(JitSpew_Prune);
 
         // Do not touch entry basic blocks.
-        if (*block == graph.osrBlock() || *block == graph.entryBlock())
+        if (*block == graph.osrBlock() || *block == graph.entryBlock()) {
+            JitSpew(JitSpew_Prune, "Block %d is an entry point.", block->id());
             continue;
+        }
 
         // Compute if all the predecessors of this block are either bailling out
         // or are already flagged as unreachable.
         bool isUnreachable = true;
         bool isLoopHeader = block->isLoopHeader();
         size_t numPred = block->numPredecessors();
         size_t i = 0;
         for (; i < numPred; i++) {
@@ -297,56 +300,133 @@ jit::PruneUnusedBranches(MIRGenerator* m
         // likely to not be visited after.
         bool shouldBailout =
             block->getHitState() == MBasicBlock::HitState::Count &&
             block->getHitCount() == 0;
 
         // Check if the predecessors got accessed a large number of times in
         // comparisons of the current block, in order to know if our attempt at
         // removing this block is not premature.
-        if (shouldBailout) {
+        if (!isUnreachable && shouldBailout) {
             size_t p = numPred;
             size_t predCount = 0;
+            size_t numSuccessorsOfPreds = 1;
             bool isLoopExit = false;
             while (p--) {
                 MBasicBlock* pred = block->getPredecessor(p);
                 if (pred->getHitState() == MBasicBlock::HitState::Count)
                     predCount += pred->getHitCount();
                 isLoopExit |= pred->isLoopHeader() && pred->backedge() != *block;
+                numSuccessorsOfPreds += pred->numSuccessors() - 1;
             }
 
-            // This assumes that instructions are numbered in sequence, which is
-            // the case after IonBuilder creation of basic blocks.
-            size_t numInst = block->rbegin()->id() - block->begin()->id();
-
-            // This sum is not homogeneous but gives good results on benchmarks
-            // like Kraken and Octane. The current block has not been executed
-            // yet, and ...
+            // Iterate over the approximated set of dominated blocks and count
+            // the number of instructions which are dominated.  Note that this
+            // approximation has issues with OSR blocks, but this should not be
+            // a big deal.
+            size_t numDominatedInst = 0;
+            size_t numEffectfulInst = 0;
+            int numInOutEdges = block->numPredecessors();
+            size_t branchSpan = 0;
+            ReversePostorderIterator it(block);
+            do {
+                // Iterate over dominated blocks, and visit exit blocks as well.
+                numInOutEdges -= it->numPredecessors();
+                if (numInOutEdges < 0)
+                    break;
+                numInOutEdges += it->numSuccessors();
+
+                // Collect information about the instructions within the block.
+                for (MDefinitionIterator def(*it); def; def++) {
+                    numDominatedInst++;
+                    if (def->isEffectful())
+                        numEffectfulInst++;
+                }
+
+                it++;
+                branchSpan++;
+            } while(numInOutEdges > 0 && it != graph.rpoEnd());
+
+            // The goal of branch pruning is to remove branches which are
+            // preventing other optimization, while keeping branches which would
+            // be costly if we were to bailout. The following heuristics are
+            // made to prevent bailouts in branches when we estimate that the
+            // confidence is not enough to compensate for the cost of a bailout.
+            //
+            //   1. Confidence for removal varies with the number of hit counts
+            //      of the predecessor. The reason being that the likelyhood of
+            //      taking this branch is decreasing with the number of hit
+            //      counts of the predecessor.
             //
-            //   1. If the number of times the predecessor got executed is
-            //      larger, then we are less likely to hit this block.
+            //   2. Confidence for removal varies with the number of dominated
+            //      instructions. The reason being that the complexity of the
+            //      branch increases with the number of instructions, thus
+            //      working against other optimizations.
+            //
+            //   3. Confidence for removal varies with the span of the
+            //      branch. The reason being that a branch that spans over a
+            //      large set of blocks is likely to remove optimization
+            //      opportunity as it prevents instructions from the other
+            //      branches to dominate the blocks which are after.
+            //
+            //   4. Confidence for removal varies with the number of effectful
+            //      instructions. The reason being that an effectful instruction
+            //      can remove optimization opportunities based on Scalar
+            //      Replacement, and based on Alias Analysis.
             //
-            //   2. If the block is large, then this is likely a corner case,
-            //      and thus we are less likely to hit this block.
-            if (predCount + numInst < 75)
+            // The following converts various units in some form of arbitrary
+            // score, such that we can compare it to a threshold.
+            size_t score = 0;
+            MOZ_ASSERT(numSuccessorsOfPreds >= 1);
+            score += predCount * JitOptions.branchPruningHitCountFactor / numSuccessorsOfPreds;
+            score += numDominatedInst * JitOptions.branchPruningInstFactor;
+            score += branchSpan * JitOptions.branchPruningBlockSpanFactor;
+            score += numEffectfulInst * JitOptions.branchPruningEffectfulInstFactor;
+            if (score < JitOptions.branchPruningThreshold)
+                shouldBailout = false;
+
+            // If the predecessors do not have enough hit counts, keep the
+            // branch, until we recompile this function later, with more
+            // information.
+            if (predCount / numSuccessorsOfPreds < 50)
+                shouldBailout = false;
+
+            // There is only a single successors to the predecessors, thus the
+            // decision should be taken as part of the previous block
+            // investigation, and this block should be unreachable.
+            if (numSuccessorsOfPreds == 1)
                 shouldBailout = false;
 
             // If this is the exit block of a loop, then keep this basic
             // block. This heuristic is useful as a bailout is often much more
             // costly than a simple exit sequence.
             if (isLoopExit)
                 shouldBailout = false;
+
+            // Interpreters are often implemented as a table switch within a for
+            // loop. What might happen is that the interpreter heats up in a
+            // subset of instructions, but might need other instructions for the
+            // rest of the evaluation.
+            if (numSuccessorsOfPreds > 8)
+                shouldBailout = false;
+
+            JitSpew(JitSpew_Prune, "info: block %d,"
+                    " predCount: %lu, domInst: %lu, span: %lu, effectful: %lu, "
+                    " isLoopExit: %s, numSuccessorsOfPred: %lu."
+                    " (score: %lu, shouldBailout: %s)",
+                    block->id(), predCount, numDominatedInst, branchSpan, numEffectfulInst,
+                    isLoopExit ? "true" : "false", numSuccessorsOfPreds,
+                    score, shouldBailout ? "true" : "false");
         }
 
         // Continue to the next basic block if the current basic block can
         // remain unchanged.
         if (!isUnreachable && !shouldBailout)
             continue;
 
-        JitSpewIndent indent(JitSpew_Prune);
         someUnreachable = true;
         if (isUnreachable) {
             JitSpew(JitSpew_Prune, "Mark block %d as unreachable.", block->id());
             block->setUnreachable();
             // If the block is unreachable, then there is no need to convert it
             // to a bailing block.
         } else if (shouldBailout) {
             JitSpew(JitSpew_Prune, "Mark block %d as bailing block.", block->id());
--- a/js/src/jit/JitOptions.cpp
+++ b/js/src/jit/JitOptions.cpp
@@ -167,16 +167,27 @@ DefaultJitOptions::DefaultJitOptions()
 
     // The bytecode length limit for small function.
     SET_DEFAULT(smallFunctionMaxBytecodeLength_, 130);
 
     // An artificial testing limit for the maximum supported offset of
     // pc-relative jump and call instructions.
     SET_DEFAULT(jumpThreshold, UINT32_MAX);
 
+    // Branch pruning heuristic is based on a scoring system, which is look at
+    // different metrics and provide a score. The score is computed as a
+    // projection where each factor defines the weight of each metric. Then this
+    // score is compared against a threshold to prevent a branch from being
+    // removed.
+    SET_DEFAULT(branchPruningHitCountFactor, 1);
+    SET_DEFAULT(branchPruningInstFactor, 10);
+    SET_DEFAULT(branchPruningBlockSpanFactor, 100);
+    SET_DEFAULT(branchPruningEffectfulInstFactor, 3500);
+    SET_DEFAULT(branchPruningThreshold, 4000);
+
     // Force how many invocation or loop iterations are needed before compiling
     // a function with the highest ionmonkey optimization level.
     // (i.e. OptimizationLevel_Normal)
     const char* forcedDefaultIonWarmUpThresholdEnv = "JIT_OPTION_forcedDefaultIonWarmUpThreshold";
     if (const char* env = getenv(forcedDefaultIonWarmUpThresholdEnv)) {
         Maybe<int> value = ParseInt(env);
         if (value.isSome())
             forcedDefaultIonWarmUpThreshold.emplace(value.ref());
--- a/js/src/jit/JitOptions.h
+++ b/js/src/jit/JitOptions.h
@@ -71,16 +71,21 @@ struct DefaultJitOptions
     bool wasmTestMode;
     uint32_t baselineWarmUpThreshold;
     uint32_t exceptionBailoutThreshold;
     uint32_t frequentBailoutThreshold;
     uint32_t maxStackArgs;
     uint32_t osrPcMismatchesBeforeRecompile;
     uint32_t smallFunctionMaxBytecodeLength_;
     uint32_t jumpThreshold;
+    uint32_t branchPruningHitCountFactor;
+    uint32_t branchPruningInstFactor;
+    uint32_t branchPruningBlockSpanFactor;
+    uint32_t branchPruningEffectfulInstFactor;
+    uint32_t branchPruningThreshold;
     mozilla::Maybe<uint32_t> forcedDefaultIonWarmUpThreshold;
     mozilla::Maybe<uint32_t> forcedDefaultIonSmallFunctionWarmUpThreshold;
     mozilla::Maybe<IonRegisterAllocator> forcedRegisterAllocator;
 
     // The options below affect the rest of the VM, and not just the JIT.
     bool disableUnboxedObjects;
 
     DefaultJitOptions();