Added loop-tail driven context slot type demotion. Without all the buzzwords, what this essentially means is that we detect if the last value that the trace leaves in a slot (which is the value that flows along the backedge back to the loop header) is known to originate from an integer value via i2f, we flag the slot as type integer and recompile the trace. We do this where type stability is certain (result of an and, i.e.) or where its very likely (++operator). If the speculation fails, the same analysis flags the slot as blocked, which means it will always be double. The hope is that this analysis converges quickly (1-2 recompilations tops).
authorAndreas Gal <gal@mozilla.com>
Sun, 06 Jul 2008 22:35:19 -0700
changeset 17479 d268132c1bf643d4f98d139a5444468bbd87e5ee
parent 17478 7f3e0b5b05dd1e749b772a6ca4357262b48b46eb
child 17480 a051c0844e1ba0b0127db7aeccebebd09222e781
push id1452
push usershaver@mozilla.com
push dateFri, 22 Aug 2008 00:08:22 +0000
treeherderautoland@d13bb0868596 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
milestone1.9.1a1pre
Added loop-tail driven context slot type demotion. Without all the buzzwords, what this essentially means is that we detect if the last value that the trace leaves in a slot (which is the value that flows along the backedge back to the loop header) is known to originate from an integer value via i2f, we flag the slot as type integer and recompile the trace. We do this where type stability is certain (result of an and, i.e.) or where its very likely (++operator). If the speculation fails, the same analysis flags the slot as blocked, which means it will always be double. The hope is that this analysis converges quickly (1-2 recompilations tops).
js/src/jstracer.cpp
js/src/jstracer.h
--- a/js/src/jstracer.cpp
+++ b/js/src/jstracer.cpp
@@ -173,16 +173,24 @@ static inline bool isNumber(jsval v)
 static inline jsdouble asNumber(jsval v)
 {
     JS_ASSERT(isNumber(v));
     if (JSVAL_IS_DOUBLE(v))
         return *JSVAL_TO_DOUBLE(v);
     return (jsdouble)JSVAL_TO_INT(v);
 }
 
+static inline bool isInt32(jsval v)
+{
+    if (!isNumber(v))
+        return false;
+    jsdouble d = asNumber(v);
+    return d == (jsint)d;
+}
+
 static LIns* demote(LirWriter *out, LInsp i)
 {
     if (i->isCall())
         return callArgN(i,0);
     if (i->isop(LIR_i2f) || i->isop(LIR_u2f))
         return i->oprnd1();
     AvmAssert(i->isconstq());
     double cf = i->constvalf();
@@ -309,37 +317,37 @@ public:
          printf("%c", "OID?S?B"[t]);
 #endif         
          return t;
     }
     
     /* Write out a type map for the current scopes and all outer scopes,
        up until the entry scope. */
     void
-    buildTypeMap(JSStackFrame* fp, JSFrameRegs& regs, uint8_t* m)
+    buildExitMap(JSStackFrame* fp, JSFrameRegs& regs, uint8* m)
     {
 #ifdef DEBUG
         printf("side exit type map: ");
 #endif        
         if (fp != recorder.getEntryFrame())
-            buildTypeMap(fp->down, *fp->down->regs, m);
+            buildExitMap(fp->down, *fp->down->regs, m);
         for (unsigned n = 0; n < fp->argc; ++n)
             *m++ = getStoreType(fp->argv[n]);
         for (unsigned n = 0; n < fp->nvars; ++n)
             *m++ = getStoreType(fp->vars[n]);
         for (jsval* sp = fp->spbase; sp < regs.sp; ++sp)
             *m++ = getStoreType(*sp);
 #ifdef DEBUG
         printf("\n");
 #endif        
     }
 
     virtual LInsp insGuard(LOpcode v, LIns *c, SideExit *x) {
         VMSideExitInfo* i = (VMSideExitInfo*)x->vmprivate;
-        buildTypeMap(recorder.getFp(), recorder.getRegs(), i->typeMap);
+        buildExitMap(recorder.getFp(), recorder.getRegs(), i->typeMap);
         return out->insGuard(v, c, x);
     }
 };
 
 TraceRecorder::TraceRecorder(JSContext* cx, Fragmento* fragmento, Fragment* _fragment)
 {
     this->cx = cx;
     this->fragment = _fragment;
@@ -359,31 +367,36 @@ TraceRecorder::TraceRecorder(JSContext* 
     lirbuf->names = new (&gc) LirNameMap(&gc, builtins, fragmento->labels);
     lir = verbose_filter = new (&gc) VerboseWriter(&gc, lir, lirbuf->names);
 #endif
     lir = cse_filter = new (&gc) CseFilter(lir, &gc);
     lir = expr_filter = new (&gc) ExprFilter(lir);
     lir = exit_filter = new (&gc) ExitFilter(lir, *this);
     lir = func_filter = new (&gc) FuncFilter(lir);
     lir->ins0(LIR_trace);
-    /* generate the entry map and stash it in the trace */
-    unsigned entryNativeFrameSlots = nativeFrameSlots(entryFrame, entryRegs);
-    LIns* data = lir_buf_writer->skip(sizeof(VMFragmentInfo) + 
-            entryNativeFrameSlots * sizeof(char));
-    fragmentInfo = (VMFragmentInfo*)data->payload();
-    fragmentInfo->entryNativeFrameSlots = entryNativeFrameSlots;
-    fragmentInfo->maxNativeFrameSlots = entryNativeFrameSlots;
-    uint8_t* m = fragmentInfo->typeMap;
-    for (unsigned n = 0; n < entryFrame->argc; ++n)
-        *m++ = getCoercedType(entryFrame->argv[n]);
-    for (unsigned n = 0; n < entryFrame->nvars; ++n)
-        *m++ = getCoercedType(entryFrame->vars[n]);
-    for (jsval* sp = entryFrame->spbase; sp < entryRegs.sp; ++sp)
-        *m++ = getCoercedType(*sp);
-    fragmentInfo->nativeStackBase = nativeFrameOffset(&cx->fp->spbase[0]);
+    if (fragment->vmprivate == NULL) {
+        /* generate the entry map and stash it in the trace */
+        unsigned entryNativeFrameSlots = nativeFrameSlots(entryFrame, entryRegs);
+        LIns* data = lir_buf_writer->skip(sizeof(VMFragmentInfo) + 
+                entryNativeFrameSlots * sizeof(char));
+        fragmentInfo = (VMFragmentInfo*)data->payload();
+        fragmentInfo->entryNativeFrameSlots = entryNativeFrameSlots;
+        fragmentInfo->maxNativeFrameSlots = entryNativeFrameSlots;
+        /* build the entry type map */
+        uint8* m = fragmentInfo->typeMap;
+        for (unsigned n = 0; n < entryFrame->argc; ++n)
+            *m++ = getCoercedType(entryFrame->argv[n]);
+        for (unsigned n = 0; n < entryFrame->nvars; ++n)
+            *m++ = getCoercedType(entryFrame->vars[n]);
+        for (jsval* sp = entryFrame->spbase; sp < entryRegs.sp; ++sp)
+            *m++ = getCoercedType(*sp);
+        fragmentInfo->nativeStackBase = nativeFrameOffset(&cx->fp->spbase[0]);
+    } else {
+        JS_ASSERT(0);
+    }
     fragment->vmprivate = fragmentInfo;
     fragment->param0 = lir->insImm8(LIR_param, Assembler::argRegs[0], 0);
     fragment->param1 = lir->insImm8(LIR_param, Assembler::argRegs[1], 0);
     fragment->sp = lir->insLoadi(fragment->param0, offsetof(InterpState, sp));
     cx_ins = lir->insLoadi(fragment->param0, offsetof(InterpState, cx));
 #ifdef DEBUG
     lirbuf->names->addName(fragment->param0, "state");
     lirbuf->names->addName(fragment->sp, "sp");
@@ -393,16 +406,18 @@ TraceRecorder::TraceRecorder(JSContext* 
     JSStackFrame* fp = cx->fp;
     unsigned n;
     for (n = 0; n < fp->argc; ++n)
         import(&fp->argv[n], "arg", n);
     for (n = 0; n < fp->nvars; ++n)
         import(&fp->vars[n], "var", n);
     for (n = 0; n < unsigned(fp->regs->sp - fp->spbase); ++n)
         import(&fp->spbase[n], "stack", n);
+    
+    recompileFlag = false;
 }
 
 TraceRecorder::~TraceRecorder()
 {
 #ifdef DEBUG
     delete lirbuf->names;
     delete verbose_filter;
 #endif
@@ -571,17 +586,17 @@ box_jsval(JSContext* cx, jsval* vp, int 
         break;
     }
     return true;
 }
 
 /* Attempt to unbox the given JS frame into a native frame, checking along the way that the 
    supplied typemap holds. */
 static bool
-unbox(JSStackFrame* fp, JSFrameRegs& regs, uint8_t* m, double* native)
+unbox(JSStackFrame* fp, JSFrameRegs& regs, uint8* m, double* native)
 {
     jsval* vp;
     for (vp = fp->argv; vp < fp->argv + fp->argc; ++vp)
         if (!unbox_jsval(*vp, (JSType)*m++, native++))
             return false;
     for (vp = fp->vars; vp < fp->vars + fp->nvars; ++vp)
         if (!unbox_jsval(*vp, (JSType)*m++, native++))
             return false;
@@ -589,17 +604,17 @@ unbox(JSStackFrame* fp, JSFrameRegs& reg
         if (!unbox_jsval(*vp, (JSType)*m++, native++))
             return false;
     return true;
 }
 
 /* Box the given native frame into a JS frame. This only fails due to a hard error 
    (out of memory for example). */
 static bool
-box(JSContext* cx, JSStackFrame* fp, JSFrameRegs& regs, uint8_t* m, double* native)
+box(JSContext* cx, JSStackFrame* fp, JSFrameRegs& regs, uint8* m, double* native)
 {
     jsval* vp;
     for (vp = fp->argv; vp < fp->argv + fp->argc; ++vp)
         if (!box_jsval(cx, vp, (JSType)*m++, native++))
             return false;
     for (vp = fp->vars; vp < fp->vars + fp->nvars; ++vp)
         if (!box_jsval(cx, vp, (JSType)*m++, native++))
             return false;
@@ -686,48 +701,89 @@ void
 TraceRecorder::guard(bool expected, LIns* cond)
 {
     lir->insGuard(expected ? LIR_xf : LIR_xt,
                   cond,
                   snapshot());
 }
 
 bool
-TraceRecorder::checkType(jsval& v, int type)
+TraceRecorder::checkType(jsval& v, uint8& t)
 {
-    /* we initially start all numbers out as JSVAL_DOUBLE so this can't be integer here */
-    JS_ASSERT(type != JSVAL_INT);
-    if (type == JSVAL_DOUBLE && isNumber(v)) {
-        /* lets see whether this is an integer value that we are propagating across the
-           loop */
+    if (isNumber(v)) {
+        /* Initially we start out all numbers as JSVAL_DOUBLE in the type map. If we still
+           see a number in v, its a valid trace but we might want to ask to demote the 
+           slot if we know or suspect that its integer. */
         LIns* i = get(&v);
-        if (i->isop(LIR_i2f)) {
-            // printf("yes!\n");
+        if (TYPEMAP_GET_TYPE(t) == JSVAL_DOUBLE) {
+            if (isInt32(v)) { /* value the interpreter calculated should be integer */
+                /* If the value associated with v via the tracker comes from a i2f operation,
+                   we can be sure it will always be an int. If we see INCVAR, we similarly
+                   speculate that the result will be int, even though this is not
+                   guaranteed and this might cause the entry map to mismatch and thus
+                   the trace never to be entered. */
+                if (i->isop(LIR_i2f) || 
+                        (i->isop(LIR_fadd) && i->oprnd2()->isconstq() && 
+                                fabs(i->oprnd2()->constvalf()) == 1.0)) {
+#ifdef DEBUG
+                    printf("demoting type of an entry slot #%d, triggering re-compilation\n",
+                            nativeFrameOffset(&v));
+#endif                    
+                    TYPEMAP_SET_FLAG(t, TYPEMAP_FLAG_DEMOTE);
+                    recompileFlag = true;
+                    return true; /* keep going */
+                }
+            }
+            return true; 
+        } 
+        /* Looks like we are compiling an integer slot. The recorder always casts to doubles
+           after each integer operation, or emits an operation that produces a double right
+           away. If we started with an integer, we must arrive here pointing at a i2f cast.
+           If not, than demoting the slot didn't work out. Flag the slot to be not
+           demoted again. */
+        JS_ASSERT(TYPEMAP_GET_TYPE(t) == JSVAL_INT);
+        if (!i->isop(LIR_i2f)) {
+#ifdef DEBUG            
+            printf("demoting type of a slot #%d failed, locking it and re-compiling\n",
+                    nativeFrameOffset(&v));
+#endif
+            TYPEMAP_SET_FLAG(t, TYPEMAP_FLAG_DONT_DEMOTE);
+            recompileFlag = true;
+            return true; /* keep going, recompileFlag will trigger error when we are done with
+                            all the slots */
+            
         }
+        JS_ASSERT(isInt32(v));
+        /* Looks like we got the final LIR_i2f as we expected. Overwrite the value in that
+           slot with the argument of i2f since we want the integer store to flow along
+           the loop edge, not the casted value. */
+        set(&v, i->oprnd1());
         return true;
     }
-    return JSVAL_TAG(v) == (jsuint)type;
+    /* for non-number types we expect a precise match of the type */
+    return JSVAL_TAG(v) == TYPEMAP_GET_TYPE(t);
 }
 
-/* Make sure that all loop-carrying values have a stable type along the loop edge. */
+/* Make sure that the current values in the given stack frame and all stack frames
+   up and including entryFrame are type-compatible with the entry map. */
 bool
-TraceRecorder::verifyTypeStability(JSStackFrame* fp, JSFrameRegs& regs, uint8_t* m)
+TraceRecorder::verifyTypeStability(JSStackFrame* fp, JSFrameRegs& regs, uint8* m)
 {
     if (fp != entryFrame)
         verifyTypeStability(fp->down, *fp->down->regs, m);
     for (unsigned n = 0; n < fp->argc; ++n, ++m)
         if (!checkType(fp->argv[n], *m))
             return false;
     for (unsigned n = 0; n < fp->nvars; ++n, ++m)
         if (!checkType(fp->vars[n], *m))
             return false;
     for (jsval* sp = fp->spbase; sp < regs.sp; ++sp, ++m)
         if (!checkType(*sp, *m))
             return false;
-    return true;
+    return !recompileFlag;
 }
 
 void
 TraceRecorder::closeLoop(Fragmento* fragmento)
 {
     if (!verifyTypeStability(entryFrame, entryRegs, fragmentInfo->typeMap)) {
 #ifdef DEBUG
         printf("Trace rejected: unstable loop variables.\n");
--- a/js/src/jstracer.h
+++ b/js/src/jstracer.h
@@ -87,26 +87,29 @@ public:
     void            set(const void* v, T ins);
     void            clear();
 };
 
 struct VMFragmentInfo {
     unsigned                entryNativeFrameSlots;
     unsigned                maxNativeFrameSlots;
     size_t                  nativeStackBase;
-    uint8_t                 typeMap[0];
+    uint8                   typeMap[0];
 };
 
 struct VMSideExitInfo {
-    uint8_t                 typeMap[0];
+    uint8                   typeMap[0];
 };
 
-#define TYPEMAP_GET_TYPE(x) ((x) & JSVAL_TAGMASK)
-#define TYPEMAP_HAS_INT_HINT(x) ((x) & 0x10)
-#define TYPEMAP_SET_INT_HINT(x) ((x) |= 0x10)
+#define TYPEMAP_GET_TYPE(x)         ((x) & JSVAL_TAGMASK)
+#define TYPEMAP_GET_FLAG(x, flag)   ((x) & flag)
+#define TYPEMAP_SET_FLAG(x, flag)   do { (x) |= flag; } while (0)
+
+#define TYPEMAP_FLAG_DEMOTE 0x10 /* try to record as int */
+#define TYPEMAP_FLAG_DONT_DEMOTE 0x20 /* do not try to record as int */
 
 class TraceRecorder {
     JSContext*              cx;
     Tracker<nanojit::LIns*> tracker;
     char*                   entryTypeMap;
     struct JSStackFrame*    entryFrame;
     struct JSFrameRegs      entryRegs;
     nanojit::Fragment*      fragment;
@@ -116,32 +119,33 @@ class TraceRecorder {
     nanojit::LirBufWriter*  lir_buf_writer;
     nanojit::LirWriter*     verbose_filter;
     nanojit::LirWriter*     cse_filter;
     nanojit::LirWriter*     expr_filter;
     nanojit::LirWriter*     exit_filter;
     nanojit::LirWriter*     func_filter;
     nanojit::LIns*          cx_ins;
     nanojit::SideExit       exit;
+    bool                    recompileFlag;
     
     JSStackFrame* findFrame(void* p) const;
     bool onFrame(void* p) const;
     unsigned nativeFrameSlots(JSStackFrame* fp, JSFrameRegs& regs) const;
-    size_t   nativeFrameOffset(void* p) const;
+    size_t nativeFrameOffset(void* p) const;
     void import(jsval*, char *prefix = NULL, int index = 0);
     void trackNativeFrameUse(unsigned slots);
     
     nanojit::SideExit* snapshot();
     unsigned getCallDepth() const;
     void guard(bool expected, nanojit::LIns* cond);
 
     void set(void* p, nanojit::LIns* l);
 
-    bool checkType(jsval& v, int type);
-    bool verifyTypeStability(JSStackFrame* fp, JSFrameRegs& regs, uint8_t* m);
+    bool checkType(jsval& v, uint8& type);
+    bool verifyTypeStability(JSStackFrame* fp, JSFrameRegs& regs, uint8* m);
     void closeLoop(nanojit::Fragmento* fragmento);
     
     jsval& argval(unsigned n) const;
     jsval& varval(unsigned n) const;
     jsval& stackval(int n) const;
     
     nanojit::LIns* arg(unsigned n);
     void arg(unsigned n, nanojit::LIns* i);
@@ -170,27 +174,16 @@ class TraceRecorder {
     void stobj_set_slot(nanojit::LIns* obj_ins, unsigned slot, 
             nanojit::LIns*& dslots_ins, nanojit::LIns* v_ins);
     nanojit::LIns* stobj_get_slot(nanojit::LIns* obj_ins, unsigned slot, 
             nanojit::LIns*& dslots_ins);
     bool native_set(nanojit::LIns* obj_ins, JSScopeProperty* sprop, 
             nanojit::LIns*& dslots_ins, nanojit::LIns* v_ins);
     bool native_get(nanojit::LIns* obj_ins, nanojit::LIns* pobj_ins, JSScopeProperty* sprop, 
             nanojit::LIns*& dslots_ins, nanojit::LIns*& v_ins);
-    bool box_into_jsval(jsval& v, nanojit::LIns* cx_ins, nanojit::LIns* in_ins, 
-            nanojit::LIns*& out_ins);
-    void guard_jsval_tag(nanojit::LIns* v_ins, jsuint tag);
-    nanojit::LIns* int32_to_jsval(nanojit::LIns* i_ins);
-    nanojit::LIns* double_to_jsval(nanojit::LIns* d_ins);
-    nanojit::LIns* boolean_to_jsval(nanojit::LIns* b_ins);
-    nanojit::LIns* object_to_jsval(nanojit::LIns* b_ins);
-    nanojit::LIns* jsval_to_int32(nanojit::LIns* v_ins);
-    nanojit::LIns* jsval_to_double(nanojit::LIns* v_ins);
-    nanojit::LIns* jsval_to_boolean(nanojit::LIns* v_ins);
-    nanojit::LIns* jsval_to_object(nanojit::LIns* v_ins);
 
     bool box_jsval(jsval v, nanojit::LIns*& v_ins);
     bool unbox_jsval(jsval v, nanojit::LIns*& v_ins);
     bool guardThatObjectIsDenseArray(JSObject* obj, 
             nanojit::LIns* obj_ins, nanojit::LIns*& dslots_ins);
     bool guardDenseArrayIndexWithinBounds(JSObject* obj, jsint idx, 
             nanojit::LIns* obj_ins, nanojit::LIns*& dslots_ins, nanojit::LIns* idx_ins);
 public: