Bug 1142669 part 4 - Fix some inlining issues and inline scripts with loops. r=h4writer
authorJan de Mooij <jdemooij@mozilla.com>
Thu, 19 Mar 2015 15:10:07 +0100
changeset 263583 f7299a88c59c702bfec43e98ced672af61352147
parent 263582 157929ef51b3b8e63bc3dae44e881297d7003616
child 263584 4d744eeea51ce7d68a72be9f143229c60976d5a7
push id4718
push userraliiev@mozilla.com
push dateMon, 11 May 2015 18:39:53 +0000
treeherdermozilla-beta@c20c4ef55f08 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersh4writer
bugs1142669
milestone39.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1142669 part 4 - Fix some inlining issues and inline scripts with loops. r=h4writer
js/public/TrackedOptimizationInfo.h
js/src/jit/BaselineJIT.cpp
js/src/jit/BaselineJIT.h
js/src/jit/IonBuilder.cpp
js/src/jit/JitOptions.cpp
--- a/js/public/TrackedOptimizationInfo.h
+++ b/js/public/TrackedOptimizationInfo.h
@@ -198,18 +198,16 @@ namespace JS {
     _(CantInlineNeedsArgsObj,                                           \
       "can't inline: needs arguments object")                           \
     _(CantInlineDebuggee,                                               \
       "can't inline: debuggee")                                         \
     _(CantInlineUnknownProps,                                           \
       "can't inline: type has unknown properties")                      \
     _(CantInlineExceededDepth,                                          \
       "can't inline: exceeded inlining depth")                          \
-    _(CantInlineBigLoop,                                                \
-      "can't inline: big function with a loop")                         \
     _(CantInlineExceededTotalBytecodeLength,                            \
       "can't inline: exceeded max total bytecode length")               \
     _(CantInlineBigCaller,                                              \
       "can't inline: big caller")                                       \
     _(CantInlineBigCallee,                                              \
       "can't inline: big callee")                                       \
     _(CantInlineNotHot,                                                 \
       "can't inline: not hot enough")                                   \
--- a/js/src/jit/BaselineJIT.cpp
+++ b/js/src/jit/BaselineJIT.cpp
@@ -60,17 +60,18 @@ BaselineScript::BaselineScript(uint32_t 
     traceLoggerScriptsEnabled_(false),
     traceLoggerEngineEnabled_(false),
 # endif
     traceLoggerEnterToggleOffset_(traceLoggerEnterToggleOffset),
     traceLoggerExitToggleOffset_(traceLoggerExitToggleOffset),
     traceLoggerScriptEvent_(),
 #endif
     postDebugPrologueOffset_(postDebugPrologueOffset),
-    flags_(0)
+    flags_(0),
+    maxInliningDepth_(UINT8_MAX)
 { }
 
 static const unsigned BASELINE_MAX_ARGS_LENGTH = 20000;
 
 static bool
 CheckFrame(InterpreterFrame *fp)
 {
     if (fp->isDebuggerEvalFrame()) {
--- a/js/src/jit/BaselineJIT.h
+++ b/js/src/jit/BaselineJIT.h
@@ -207,16 +207,23 @@ struct BaselineScript
     // List mapping indexes of bytecode type sets to the offset of the opcode
     // they correspond to, for use by TypeScript::BytecodeTypes.
     uint32_t bytecodeTypeMapOffset_;
 
     // For generator scripts, we store the native code address for each yield
     // instruction.
     uint32_t yieldEntriesOffset_;
 
+    // The max inlining depth where we can still inline all functions we inlined
+    // when we Ion-compiled this script. This starts as UINT8_MAX, since we have
+    // no data yet, and won't affect inlining heuristics in that case. The value
+    // is updated when we Ion-compile this script. See makeInliningDecision for
+    // more info.
+    uint8_t maxInliningDepth_;
+
   public:
     // Do not call directly, use BaselineScript::New. This is public for cx->new_.
     BaselineScript(uint32_t prologueOffset, uint32_t epilogueOffset,
                    uint32_t profilerEnterToggleOffset,
                    uint32_t profilerExitToggleOffset,
                    uint32_t traceLoggerEnterToggleOffset,
                    uint32_t traceLoggerExitToggleOffset,
                    uint32_t postDebugPrologueOffset);
@@ -419,16 +426,27 @@ struct BaselineScript
     }
 
     static void writeBarrierPre(Zone *zone, BaselineScript *script);
 
     uint32_t *bytecodeTypeMap() {
         MOZ_ASSERT(bytecodeTypeMapOffset_);
         return reinterpret_cast<uint32_t *>(reinterpret_cast<uint8_t *>(this) + bytecodeTypeMapOffset_);
     }
+
+    uint8_t maxInliningDepth() const {
+        return maxInliningDepth_;
+    }
+    void setMaxInliningDepth(uint32_t depth) {
+        MOZ_ASSERT(depth <= UINT8_MAX);
+        maxInliningDepth_ = depth;
+    }
+    void resetMaxInliningDepth() {
+        maxInliningDepth_ = UINT8_MAX;
+    }
 };
 static_assert(sizeof(BaselineScript) % sizeof(uintptr_t) == 0,
               "The data attached to the script must be aligned for fast JIT access.");
 
 inline bool
 IsBaselineEnabled(JSContext *cx)
 {
 #ifdef JS_CODEGEN_NONE
--- a/js/src/jit/IonBuilder.cpp
+++ b/js/src/jit/IonBuilder.cpp
@@ -771,16 +771,19 @@ IonBuilder::init()
 }
 
 bool
 IonBuilder::build()
 {
     if (!init())
         return false;
 
+    if (script()->hasBaselineScript())
+        script()->baselineScript()->resetMaxInliningDepth();
+
     if (!setCurrentAndSpecializePhis(newBlock(pc)))
         return false;
     if (!current)
         return false;
 
 #ifdef DEBUG
     if (info().isAnalysis()) {
         JitSpew(JitSpew_IonScripts, "Analyzing script %s:%" PRIuSIZE " (%p) %s",
@@ -4836,66 +4839,16 @@ IonBuilder::makeInliningDecision(JSObjec
     // Determine whether inlining is possible at callee site
     InliningDecision decision = canInlineTarget(target, callInfo);
     if (decision != InliningDecision_Inline)
         return decision;
 
     // Heuristics!
     JSScript *targetScript = target->nonLazyScript();
 
-    // Cap the inlining depth.
-    if (js_JitOptions.isSmallFunction(targetScript)) {
-        if (inliningDepth_ >= optimizationInfo().smallFunctionMaxInlineDepth()) {
-            trackOptimizationOutcome(TrackedOutcome::CantInlineExceededDepth);
-            return DontInline(targetScript, "Vetoed: exceeding allowed inline depth");
-        }
-    } else {
-        if (inliningDepth_ >= optimizationInfo().maxInlineDepth()) {
-            trackOptimizationOutcome(TrackedOutcome::CantInlineExceededDepth);
-            return DontInline(targetScript, "Vetoed: exceeding allowed inline depth");
-        }
-
-        if (targetScript->hasLoops()) {
-            // Currently, we are not inlining function which have loops because
-            // the cost inherent to inlining the function overcome the cost
-            // calling it. The reason is not yet clear to everybody, and we
-            // hope that we might be able to remove this restriction in the
-            // future.
-            //
-            // In the mean time, if we have opportunities to optimize the
-            // loop better, then we should try it. Such opportunity might be
-            // suggested by:
-            //
-            //  - Constant as argument. Inlining a function called with a
-            //    constant, might help GVN, as well as UCE, and potentially
-            //    improve bound check removal.
-            //
-            //  - Inner function as argument. Inlining a function called
-            //    with an inner function might help scalar replacement at
-            //    removing the scope chain, and thus using registers within
-            //    the loop instead of writting everything back to memory.
-            bool hasOpportunities = false;
-            for (size_t i = 0, e = callInfo.argv().length(); !hasOpportunities && i < e; i++) {
-                MDefinition *arg = callInfo.argv()[i];
-                hasOpportunities = arg->isLambda() || arg->isConstantValue();
-            }
-
-            if (!hasOpportunities) {
-                trackOptimizationOutcome(TrackedOutcome::CantInlineBigLoop);
-                return DontInline(targetScript, "Vetoed: big function that contains a loop");
-            }
-        }
-
-        // Caller must not be excessively large.
-        if (script()->length() >= optimizationInfo().inliningMaxCallerBytecodeLength()) {
-            trackOptimizationOutcome(TrackedOutcome::CantInlineBigCaller);
-            return DontInline(targetScript, "Vetoed: caller excessively large");
-        }
-    }
-
     // Callee must not be excessively large.
     // This heuristic also applies to the callsite as a whole.
     bool offThread = options.offThreadCompilationAvailable();
     if (targetScript->length() > optimizationInfo().inlineMaxBytecodePerCallSite(offThread)) {
         trackOptimizationOutcome(TrackedOutcome::CantInlineBigCallee);
         return DontInline(targetScript, "Vetoed: callee excessively large");
     }
 
@@ -4917,16 +4870,75 @@ IonBuilder::makeInliningDecision(JSObjec
     // Cap the total bytecode length we inline under a single script, to avoid
     // excessive inlining in pathological cases.
     size_t totalBytecodeLength = outerBuilder->inlinedBytecodeLength_ + targetScript->length();
     if (totalBytecodeLength > optimizationInfo().inlineMaxTotalBytecodeLength()) {
         trackOptimizationOutcome(TrackedOutcome::CantInlineExceededTotalBytecodeLength);
         return DontInline(targetScript, "Vetoed: exceeding max total bytecode length");
     }
 
+    // Cap the inlining depth.
+
+    uint32_t maxInlineDepth;
+    if (js_JitOptions.isSmallFunction(targetScript)) {
+        maxInlineDepth = optimizationInfo().smallFunctionMaxInlineDepth();
+    } else {
+        maxInlineDepth = optimizationInfo().maxInlineDepth();
+
+        // Caller must not be excessively large.
+        if (script()->length() >= optimizationInfo().inliningMaxCallerBytecodeLength()) {
+            trackOptimizationOutcome(TrackedOutcome::CantInlineBigCaller);
+            return DontInline(targetScript, "Vetoed: caller excessively large");
+        }
+    }
+
+    BaselineScript *outerBaseline = outermostBuilder()->script()->baselineScript();
+    if (inliningDepth_ >= maxInlineDepth) {
+        // We hit the depth limit and won't inline this function. Give the
+        // outermost script a max inlining depth of 0, so that it won't be
+        // inlined in other scripts. This heuristic is currently only used
+        // when we're inlining scripts with loops, see the comment below.
+        outerBaseline->setMaxInliningDepth(0);
+
+        trackOptimizationOutcome(TrackedOutcome::CantInlineExceededDepth);
+        return DontInline(targetScript, "Vetoed: exceeding allowed inline depth");
+    }
+
+    // Inlining functions with loops can be complicated. For instance, if we're
+    // close to the inlining depth limit and we inline the function f below, we
+    // can no longer inline the call to g:
+    //
+    //   function f() {
+    //      while (cond) {
+    //          g();
+    //      }
+    //   }
+    //
+    // If the loop has many iterations, it's more efficient to call f and inline
+    // g in f.
+    //
+    // To avoid this problem, we record a separate max inlining depth for each
+    // script, indicating at which depth we won't be able to inline all functions
+    // we inlined this time. This solves the issue above, because we will only
+    // inline f if it means we can also inline g.
+    if (targetScript->hasLoops() &&
+        inliningDepth_ >= targetScript->baselineScript()->maxInliningDepth())
+    {
+        trackOptimizationOutcome(TrackedOutcome::CantInlineExceededDepth);
+        return DontInline(targetScript, "Vetoed: exceeding allowed script inline depth");
+    }
+
+    // Update the max depth at which we can inline the outer script.
+    MOZ_ASSERT(maxInlineDepth > inliningDepth_);
+    uint32_t scriptInlineDepth = maxInlineDepth - inliningDepth_ - 1;
+    if (scriptInlineDepth < outerBaseline->maxInliningDepth())
+        outerBaseline->setMaxInliningDepth(scriptInlineDepth);
+
+    // End of heuristics, we will inline this function.
+
     // TI calls ObjectStateChange to trigger invalidation of the caller.
     TypeSet::ObjectKey *targetKey = TypeSet::ObjectKey::get(target);
     targetKey->watchStateChangeForInlinedCall(constraints());
 
     outerBuilder->inlinedBytecodeLength_ += targetScript->length();
 
     return InliningDecision_Inline;
 }
--- a/js/src/jit/JitOptions.cpp
+++ b/js/src/jit/JitOptions.cpp
@@ -155,20 +155,16 @@ JitOptions::JitOptions()
     // How many actual arguments are accepted on the C stack.
     SET_DEFAULT(maxStackArgs, 4096);
 
     // How many times we will try to enter a script via OSR before
     // invalidating the script.
     SET_DEFAULT(osrPcMismatchesBeforeRecompile, 6000);
 
     // The bytecode length limit for small function.
-    //
-    // The default for this was arrived at empirically via benchmarking.
-    // We may want to tune it further after other optimizations have gone
-    // in.
     SET_DEFAULT(smallFunctionMaxBytecodeLength_, 100);
 }
 
 bool
 JitOptions::isSmallFunction(JSScript *script) const
 {
     return script->length() <= smallFunctionMaxBytecodeLength_;
 }