Summary: Fix a performance problem with some Alchemy generated SWFs
authorAlexander MacDonald <alexmac@adobe.com>
Thu, 18 Oct 2012 09:30:37 -0700
changeset 7567 c22a2f1de742840a26bcd946267c308f5d8a7a7d
parent 7566 40afc58cab030f342bd054541720efcb9463a2fa
child 7568 165e49235c6b367faaf593dbad754a61f385dc45
push id4262
push userdschaffe@adobe.com
push dateWed, 30 Jan 2013 19:01:31 +0000
bugs1126621
Summary: Fix a performance problem with some Alchemy generated SWFs Severity: ignore Detailed Description: This is a slightly tweaked version of a fix that Edwin Smith came up with a year ago, in his words: During phase 1, the verifier iterates to a fixed point while checking the code, using a worklist algorithm. The worklist is not sorted, so it is possible for a block to be revisited far more often than necessary. If the ABC basic blocks are roughly in reverse postorder, then this problem is worst when the blocks in the worklist are in high-to-low abc order. It turns out the main cause of re-queues is int-typed variables changing from notNull=true -> false. Really, the notnull bit doesn't apply to integers, so we need to force them =true for numbers or just ignore the bool. this patch makes them true by default but doesn't force them. I think there still is noise with uint, but the requeues are way down This visit order assumes the ABC is approximately ideal (nearly reverse postorder) because I didn't want to add the complexity of really finding the CFG and really visiting in reverse postorder. However, if we did that, the asymptotic time would probably go down even more. Dev/QA impact: the usual QA testing notes: N/A API change:N/A Bugs fixed: N/A Doc impact: N/A Smokes passed: avmshell acceptance + build smokes + Alchemy smokes Reviewer: Stan Switzer CL@1126621
core/FrameState-inlines.h
core/FrameState.h
core/Verifier.cpp
--- a/core/FrameState-inlines.h
+++ b/core/FrameState-inlines.h
@@ -1,16 +1,29 @@
 /* -*- Mode: C++; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 4 -*- */
 /* vi: set ts=4 sw=4 expandtab: (add to ~/.vimrc: set modeline modelines=5) */
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 namespace avmplus
 {
+
+REALLY_INLINE bool FrameState::typeNotNull(Traits* t) {
+    switch (Traits::getBuiltinType(t)) {
+        case BUILTIN_int:
+        case BUILTIN_uint:
+        case BUILTIN_number:
+        case BUILTIN_boolean:
+            return true;
+        default:
+            return false;
+    }
+}
+
 REALLY_INLINE FrameValue& FrameState::value(int32_t i)
 {
     AvmAssert(i >= 0 && i < frameSize);
     return locals[i];
 }
 
 REALLY_INLINE const FrameValue& FrameState::value(int32_t i) const
 {
@@ -41,16 +54,21 @@ REALLY_INLINE FrameValue& FrameState::st
     return value(stackBase + stackDepth - 1);
 }
 
 REALLY_INLINE int32_t FrameState::sp() const
 {
     return stackBase + stackDepth - 1;
 }
 
+REALLY_INLINE void FrameState::setType(int32_t i, Traits* t)
+{
+    setType(i, t, typeNotNull(t));
+}
+
 REALLY_INLINE void FrameState::setType(int32_t i, Traits* t, bool notNull, bool isWith)
 {
     FrameValue& v = value(i);
     v.traits = t;
     v.notNull = notNull;
     v.isWith = isWith;
 #ifdef VMCFG_NANOJIT
     BuiltinType bt = Traits::getBuiltinType(t);
@@ -73,27 +91,37 @@ REALLY_INLINE FrameValue& FrameState::pe
     return value(stackBase + stackDepth - n);
 }
 
 REALLY_INLINE const FrameValue& FrameState::peek(int32_t n) const
 {
     return value(stackBase + stackDepth - n);
 }
 
+REALLY_INLINE void FrameState::pop_push(int32_t n, Traits* t)
+{
+    pop_push(n, t, typeNotNull(t));
+}
+
 REALLY_INLINE void FrameState::pop_push(int32_t n, Traits* type, bool notNull)
 {
     int32_t sp = stackDepth - n;
     setType(stackBase + sp, type, notNull);
     stackDepth = sp+1;
 }
 
 REALLY_INLINE void FrameState::push(FrameValue& value)
 {
     AvmAssert(stackBase + stackDepth + 1 <= frameSize);
     setType(stackBase + stackDepth++, value.traits, value.notNull);
 }
 
+REALLY_INLINE void FrameState::push(Traits* traits)
+{
+    push(traits, typeNotNull(traits));
+}
+
 REALLY_INLINE void FrameState::push(Traits* traits, bool notNull)
 {
     AvmAssert(stackBase + stackDepth+1 <= frameSize);
     setType(stackBase + stackDepth++, traits, notNull);
 }
 } // namespace avmplus
--- a/core/FrameState.h
+++ b/core/FrameState.h
@@ -64,19 +64,25 @@ namespace avmplus
         void init(const FrameState* other);
         FrameValue& value(int32_t i);
         const FrameValue& value(int32_t i) const;
         FrameValue& scopeValue(int32_t i);
         const FrameValue& scopeValue(int32_t i) const;
         FrameValue& stackValue(int32_t i);
         FrameValue& stackTop();
         int32_t sp() const;
-        void setType(int32_t i, Traits* t, bool notNull=false, bool isWith=false);
+        void setType(int32_t i, Traits* t);
+		void setType(int32_t i, Traits* t, bool notNull, bool isWith = false);
         void pop(int32_t n=1);
         FrameValue& peek(int32_t n=1);
         const FrameValue& peek(int32_t n) const;
-        void pop_push(int32_t n, Traits* type, bool notNull=false);
+        void pop_push(int32_t n, Traits* t);
+		void pop_push(int32_t n, Traits* t, bool notNull);
         void push(FrameValue& _value);
-        void push(Traits* traits, bool notNull=false);
+        void push(Traits* t);
+        void push(Traits* traits, bool notNull);
+
+    private:
+        bool typeNotNull(Traits* t);
     };
 }
 
 #endif /* __avmplus_FrameState__ */
--- a/core/Verifier.cpp
+++ b/core/Verifier.cpp
@@ -3276,16 +3276,30 @@ namespace avmplus
         }
         #endif
         toplevel->throwVerifyError(errorID, arg1, arg2, arg3);
 
         // This function throws, and should never return.
         AvmAssert(false);
     }
 
+    /**
+      * Insert f into list in abc_pc order.  Use a linear search for the
+      * right position.  Keeping the worklist sorted avoids pathologically
+      * revisiting the same block too many times as long as predecessors
+      * generally come before successors in ABC order.
+      */
+    void requeue(FrameState* f, FrameState** list) {
+      while (*list && (*list)->abc_pc < f->abc_pc)
+           list = &(*list)->wl_next;
+      f->wl_next = *list;
+      *list = f;
+      f->wl_pending = true;
+    }
+
     // Merge the current FrameState (this->state) with the target
     // FrameState (getFrameState(target)), and report verify errors.
     // Fixme: Bug 558876 - |current| must not be dereferenced, it could point
     // outside the valid range of bytecodes.  Its only for back-edge detection.
     void Verifier::checkTarget(const uint8_t* current, const uint8_t* target, bool isExceptionEdge)
     {
         if (emitPass) {
             AvmAssert(hasFrameState(target));
@@ -3372,21 +3386,18 @@ namespace avmplus
             targetChanged |= true;
         targetState->targetOfBackwardsBranch = targetOfBackwardsBranch;
 
         bool targetOfExceptionBranch = targetState->targetOfExceptionBranch || isExceptionEdge;
         if (targetOfExceptionBranch != targetState->targetOfExceptionBranch)
             targetChanged |= true;
         targetState->targetOfExceptionBranch = targetOfExceptionBranch;
 
-        if (targetChanged && !targetState->wl_pending) {
-            targetState->wl_pending = true;
-            targetState->wl_next = worklist;
-            worklist = targetState;
-        }
+        if (targetChanged && !targetState->wl_pending)
+            requeue(targetState, &worklist);
     }
 
     bool Verifier::mergeState(FrameState* targetState)
     {
 #ifdef AVMPLUS_VERBOSE
         if (verbose) {
             core->console << "------------------------------------\n";
             StringBuffer buf(core);