Bug 475115: LIR_jtbl: jump tables to implement jsop_tableswitch, r=gal,r=edwsmith
authorDavid Mandelin <dmandelin@mozilla.com>
Wed, 11 Feb 2009 17:40:27 -0800
changeset 25099 99f3744acfef8e6e14f20aa9ecbdb452a1720f23
parent 25098 f2cef256bf25930c09069eafa4164134deaadaba
child 25100 975b36c50d33c8de608e798fcde43043db10bf68
push idunknown
push userunknown
push dateunknown
reviewersgal, edwsmith
bugs475115
milestone1.9.2a1pre
Bug 475115: LIR_jtbl: jump tables to implement jsop_tableswitch, r=gal,r=edwsmith
js/src/jstracer.cpp
js/src/jstracer.h
js/src/nanojit/Assembler.cpp
js/src/nanojit/Assembler.h
js/src/nanojit/LIR.cpp
js/src/nanojit/LIR.h
js/src/nanojit/LIRopcode.tbl
js/src/nanojit/Native.h
js/src/nanojit/Nativei386.cpp
js/src/nanojit/Nativei386.h
--- a/js/src/jstracer.cpp
+++ b/js/src/jstracer.cpp
@@ -343,16 +343,28 @@ static inline bool isInt32(jsval v)
 {
     if (!isNumber(v))
         return false;
     jsdouble d = asNumber(v);
     jsint i;
     return JSDOUBLE_IS_INT(d, i);
 }
 
+static inline bool asInt32(jsval v, jsint& rv)
+{
+    if (!isNumber(v))
+        return false;
+    if (JSVAL_IS_INT(v)) {
+        rv = JSVAL_TO_INT(v);
+        return true;
+    }
+    jsdouble d = asNumber(v);
+    return JSDOUBLE_IS_INT(d, rv);
+}
+
 /* Return JSVAL_DOUBLE for all numbers (int and double) and the tag otherwise. */
 static inline uint8 getPromotedType(jsval v)
 {
     return JSVAL_IS_INT(v) ? JSVAL_DOUBLE : JSVAL_IS_NULL(v) ? JSVAL_TNULL : uint8(JSVAL_TAG(v));
 }
 
 /* Return JSVAL_INT for all whole numbers that fit into signed 32-bit and the tag otherwise. */
 static inline uint8 getCoercedType(jsval v)
@@ -2493,30 +2505,37 @@ JS_REQUIRES_STACK void
 TraceRecorder::compile(JSTraceMonitor* tm)
 {
     Fragmento* fragmento = tm->fragmento;
     if (treeInfo->maxNativeStackSlots >= MAX_NATIVE_STACK_SLOTS) {
         debug_only_v(printf("Trace rejected: excessive stack use.\n"));
         js_BlacklistPC(tm, fragment, treeInfo->globalShape);
         return;
     }
-    ++treeInfo->branchCount;
+    if (anchor && anchor->exitType != CASE_EXIT)
+        ++treeInfo->branchCount;
     if (lirbuf->outOMem()) {
         fragmento->assm()->setError(nanojit::OutOMem);
         return;
     }
     ::compile(fragmento->assm(), fragment);
     if (fragmento->assm()->error() == nanojit::OutOMem)
         return;
     if (fragmento->assm()->error() != nanojit::None) {
         js_BlacklistPC(tm, fragment, treeInfo->globalShape);
         return;
     }
-    if (anchor)
-        fragmento->assm()->patch(anchor);
+    if (anchor) {
+#ifdef NANOJIT_IA32
+        if (anchor->exitType == CASE_EXIT)
+            fragmento->assm()->patch(anchor, anchor->switchInfo);
+        else
+#endif
+            fragmento->assm()->patch(anchor);
+    }
     JS_ASSERT(fragment->code());
     JS_ASSERT(!fragment->vmprivate);
     if (fragment == fragment->root)
         fragment->vmprivate = treeInfo;
     /* :TODO: windows support */
 #if defined DEBUG && !defined WIN32
     const char* filename = cx->fp->script->filename;
     char* label = (char*)malloc((filename ? strlen(filename) : 7) + 16);
@@ -4273,16 +4292,17 @@ monitor_loop:
 
     /* If we exit on a branch, or on a tree call guard, try to grow the inner tree (in case
        of a branch exit), or the tree nested around the tree we exited from (in case of the
        tree call guard). */
     switch (lr->exitType) {
       case UNSTABLE_LOOP_EXIT:
         return js_AttemptToStabilizeTree(cx, lr, NULL);
       case BRANCH_EXIT:
+      case CASE_EXIT:
         return js_AttemptToExtendTree(cx, lr, NULL, NULL);
       case LOOP_EXIT:
         if (innermostNestedGuard)
             return js_AttemptToExtendTree(cx, innermostNestedGuard, lr, NULL);
         return false;
       default:
         /* No, this was an unusual exit (i.e. out of memory/GC), so just resume interpretation. */
         return false;
@@ -4328,16 +4348,28 @@ TraceRecorder::monitorRecording(JSContex
 
         /* An explicit return from callDepth 0 should end the loop, not abort it. */
         if (*pc == JSOP_RETURN && tr->callDepth == 0) {
             AUDIT(returnLoopExits);
             tr->endLoop(&JS_TRACE_MONITOR(cx));
             js_DeleteRecorder(cx);
             return JSMRS_STOP; /* done recording */
         }
+#ifdef NANOJIT_IA32
+        /* Handle tableswitches specially--prepare a jump table if needed. */
+        if (*pc == JSOP_TABLESWITCH || *pc == JSOP_TABLESWITCHX) {
+            LIns* guardIns = tr->tableswitch();
+            if (guardIns) {
+                tr->fragment->lastIns = guardIns;
+                tr->compile(&JS_TRACE_MONITOR(cx));
+                js_DeleteRecorder(cx);
+                return JSMRS_STOP;
+            }
+        }
+#endif
     }
 
     /* If it's not a break or a return from a loop, continue recording and follow the trace. */
 
     /* We check for imacro-calling bytecodes inside the switch cases to resolve
        the "if" condition at the compile time. */
     bool flag;
     switch (op) {
@@ -4960,16 +4992,74 @@ TraceRecorder::ifop()
     if (!x->isCond()) {
         x = lir->ins_eq0(x);
         expected = !expected;
     }
     guard(expected, x, BRANCH_EXIT);
     return true;
 }
 
+#ifdef NANOJIT_IA32
+/* Record LIR for a tableswitch or tableswitchx op. We record LIR only
+ * the "first" time we hit the op. Later, when we start traces after
+ * exiting that trace, we just patch. */
+JS_REQUIRES_STACK LIns*
+TraceRecorder::tableswitch()
+{
+    jsval& v = stackval(-1);
+    LIns* v_ins = get(&v);
+    /* no need to guard if condition is constant */
+    if (v_ins->isconst() || v_ins->isconstq())
+        return NULL;
+
+    jsbytecode* pc = cx->fp->regs->pc;
+    /* Starting a new trace after exiting a trace via switch. */
+    if (anchor && (anchor->exitType == CASE_EXIT ||
+                   anchor->exitType == DEFAULT_EXIT) && fragment->ip == pc)
+        return NULL;
+
+    /* Decode jsop. */
+    jsint low, high;
+    if (*pc == JSOP_TABLESWITCH) {
+        pc += JUMP_OFFSET_LEN;
+        low = GET_JUMP_OFFSET(pc);
+        pc += JUMP_OFFSET_LEN;
+        high = GET_JUMP_OFFSET(pc);
+    } else {
+        pc += JUMPX_OFFSET_LEN;
+        low = GET_JUMPX_OFFSET(pc);
+        pc += JUMPX_OFFSET_LEN;
+        high = GET_JUMPX_OFFSET(pc);            
+    }
+
+    /* Really large tables won't fit in a page. This is a conservative
+     * check. If it matters in practice we need to go off-page. */
+    if ((high + 1 - low) * sizeof(intptr_t*) + 128 > (unsigned) LARGEST_UNDERRUN_PROT) {
+        // This throws away the return value of switchop but it seems
+        // ok because switchop always returns true.
+        (void) switchop();
+        return NULL;
+    }
+
+    /* Generate switch LIR. */
+    LIns* si_ins = lir_buf_writer->skip(sizeof(SwitchInfo));
+    SwitchInfo* si = (SwitchInfo*) si_ins->payload();
+    si->count = high + 1 - low;
+    si->table = 0;
+    si->index = (uint32) -1;
+    LIns* diff = lir->ins2(LIR_sub, f2i(v_ins), lir->insImm(low));
+    LIns* cmp = lir->ins2(LIR_ult, diff, lir->insImm(si->count));
+    lir->insGuard(LIR_xf, cmp, snapshot(DEFAULT_EXIT));
+    lir->insStore(diff, lir->insImmPtr(&si->index), lir->insImm(0));
+    LIns* exit = snapshot(CASE_EXIT);
+    ((GuardRecord*) exit->payload())->exit->switchInfo = si;
+    return lir->insGuard(LIR_xtbl, diff, exit);
+}
+#endif
+
 JS_REQUIRES_STACK bool
 TraceRecorder::switchop()
 {
     jsval& v = stackval(-1);
     LIns* v_ins = get(&v);
     /* no need to guard if condition is constant */
     if (v_ins->isconst() || v_ins->isconstq())
         return true;
@@ -7828,17 +7918,21 @@ JS_REQUIRES_STACK bool
 TraceRecorder::record_JSOP_AND()
 {
     return ifop();
 }
 
 JS_REQUIRES_STACK bool
 TraceRecorder::record_JSOP_TABLESWITCH()
 {
+#ifdef NANOJIT_IA32
+    return true;
+#else
     return switchop();
+#endif
 }
 
 JS_REQUIRES_STACK bool
 TraceRecorder::record_JSOP_LOOKUPSWITCH()
 {
     return switchop();
 }
 
@@ -8469,17 +8563,21 @@ JS_REQUIRES_STACK bool
 TraceRecorder::record_JSOP_DEFAULTX()
 {
     return true;
 }
 
 JS_REQUIRES_STACK bool
 TraceRecorder::record_JSOP_TABLESWITCHX()
 {
+#ifdef NANOJIT_IA32
+    return true;
+#else
     return switchop();
+#endif
 }
 
 JS_REQUIRES_STACK bool
 TraceRecorder::record_JSOP_LOOKUPSWITCHX()
 {
     return switchop();
 }
 
--- a/js/src/jstracer.h
+++ b/js/src/jstracer.h
@@ -202,16 +202,18 @@ public:
     JS_REQUIRES_STACK void captureMissingGlobalTypes(JSContext* cx,
                                                      SlotList& slots,
                                                      unsigned stackSlots);
     bool matches(TypeMap& other) const;
 };
 
 enum ExitType {
     BRANCH_EXIT,
+    CASE_EXIT,          // Exit at a tableswitch via a numbered case
+    DEFAULT_EXIT,       // Exit at a tableswitch via default
     LOOP_EXIT,
     NESTED_EXIT,
     MISMATCH_EXIT,
     OOM_EXIT,
     OVERFLOW_EXIT,
     UNSTABLE_LOOP_EXIT,
     TIMEOUT_EXIT,
     DEEP_BAIL_EXIT,
@@ -435,16 +437,19 @@ class TraceRecorder : public avmplus::GC
     nanojit::LIns* f2i(nanojit::LIns* f);
     JS_REQUIRES_STACK nanojit::LIns* makeNumberInt32(nanojit::LIns* f);
     JS_REQUIRES_STACK nanojit::LIns* stringify(jsval& v);
 
     JS_REQUIRES_STACK bool call_imacro(jsbytecode* imacro);
 
     JS_REQUIRES_STACK bool ifop();
     JS_REQUIRES_STACK bool switchop();
+#ifdef NANOJIT_IA32
+    JS_REQUIRES_STACK nanojit::LIns* tableswitch();
+#endif
     JS_REQUIRES_STACK bool inc(jsval& v, jsint incr, bool pre = true);
     JS_REQUIRES_STACK bool inc(jsval& v, nanojit::LIns*& v_ins, jsint incr, bool pre = true);
     JS_REQUIRES_STACK bool incProp(jsint incr, bool pre = true);
     JS_REQUIRES_STACK bool incElem(jsint incr, bool pre = true);
     JS_REQUIRES_STACK bool incName(jsint incr, bool pre = true);
 
     JS_REQUIRES_STACK void strictEquality(bool equal, bool cmpCase);
     JS_REQUIRES_STACK bool equality(bool negate, bool tryBranchAfterCond);
--- a/js/src/nanojit/Assembler.cpp
+++ b/js/src/nanojit/Assembler.cpp
@@ -674,16 +674,27 @@ namespace nanojit
         GuardRecord *rec = exit->guards;
         AvmAssert(rec);
         while (rec) {
             patch(rec);
             rec = rec->next;
         }
     }
 
+#ifdef NANOJIT_IA32
+    void Assembler::patch(SideExit* exit, SwitchInfo* si)
+    {
+		for (GuardRecord* lr = exit->guards; lr; lr = lr->next) {
+			Fragment *frag = lr->exit->target;
+			NanoAssert(frag->fragEntry != 0);
+			si->table[si->index] = frag->fragEntry;
+		}
+    }
+#endif
+
     NIns* Assembler::asm_exit(LInsp guard)
     {
 		SideExit *exit = guard->record()->exit;
 		NIns* at = 0;
 		if (!_branchStateMap->get(exit))
 		{
 			at = asm_leave_trace(guard);
 		}
@@ -1021,20 +1032,21 @@ namespace nanojit
 #define countlir_xcc()
 #define countlir_x()
 #define countlir_loop()
 #define countlir_call()
 #endif
 
 	void Assembler::gen(LirFilter* reader,  NInsList& loopJumps)
 	{
-		// trace must end with LIR_x, LIR_loop, or LIR_ret
+		// trace must end with LIR_x, LIR_loop, LIR_ret, or LIR_xtbl
 		NanoAssert(reader->pos()->isop(LIR_x) ||
 		           reader->pos()->isop(LIR_loop) ||
-		           reader->pos()->isop(LIR_ret));
+		           reader->pos()->isop(LIR_ret) ||
+				   reader->pos()->isop(LIR_xtbl));
 		 
 		for (LInsp ins = reader->read(); ins != 0 && !error(); ins = reader->read())
 		{
 			LOpcode op = ins->opcode();			
 			switch(op)
 			{
 				default:
 					NanoAssertMsgf(false, "unsupported LIR instruction: %d (~0x40: %d)", op, op&~LIR64);
@@ -1336,16 +1348,27 @@ namespace nanojit
                         label->addr = _nIns;
                     }
 					verbose_only( if (_verbose) { outputAddr=true; asm_output("[%s]", _thisfrag->lirbuf->names->formatRef(ins)); } )
 					break;
 				}
 				case LIR_xbarrier: {
 					break;
 				}
+#ifdef NANOJIT_IA32
+				case LIR_xtbl: {
+                    NIns* exit = asm_exit(ins); // does intersectRegisterState()
+					asm_switch(ins, exit);
+					break;
+				}
+#else
+ 			    case LIR_xtbl:
+					NanoAssertMsg(0, "Not supported for this architecture");
+					break;
+#endif
                 case LIR_xt:
 				case LIR_xf:
 				{
                     countlir_xcc();
 					// we only support cmp with guard right now, also assume it is 'close' and only emit the branch
                     NIns* exit = asm_exit(ins); // does intersectRegisterState()
 					LIns* cond = ins->oprnd1();
 					asm_branch(op == LIR_xf, cond, exit, false);
@@ -1437,16 +1460,38 @@ namespace nanojit
 				return;
 
 			// check that all is well (don't check in exit paths since its more complicated)
 			debug_only( pageValidate(); )
 			debug_only( resourceConsistencyCheck();  )
 		}
 	}
 
+	/*
+	 * Write a jump table for the given SwitchInfo and store the table
+	 * address in the SwitchInfo. Every entry will initially point to
+	 * target.
+	 */
+	void Assembler::emitJumpTable(SwitchInfo* si, NIns* target)
+	{
+		underrunProtect(si->count * sizeof(NIns*) + 20);
+		// Align for platform. The branch should be optimized away and is
+		// required to select the compatible int type.
+		if (sizeof(NIns*) == 8) {
+			_nIns = (NIns*) (uint64(_nIns) & ~7);
+		} else if (sizeof(NIns*) == 4) {
+		    _nIns = (NIns*) (uint32(_nIns) & ~3);
+		}
+		for (uint32_t i = 0; i < si->count; ++i) {
+			_nIns = (NIns*) (((uint8*) _nIns) - sizeof(NIns*));
+			*(NIns**) _nIns = target;
+		}
+		si->table = (NIns**) _nIns;
+	}
+
     void Assembler::assignSavedRegs()
     {
         // restore saved regs
 		releaseRegisters();
         LirBuffer *b = _thisfrag->lirbuf;
         for (int i=0, n = NumSavedRegs; i < n; i++) {
             LIns *p = b->savedRegs[i];
             if (p)
--- a/js/src/nanojit/Assembler.h
+++ b/js/src/nanojit/Assembler.h
@@ -186,16 +186,19 @@ namespace nanojit
 
 			void		assemble(Fragment* frag, NInsList& loopJumps);
 			void		endAssembly(Fragment* frag, NInsList& loopJumps);
 			void		beginAssembly(Fragment *frag, RegAllocMap* map);
 			void		copyRegisters(RegAlloc* copyTo);
 			void		releaseRegisters();
             void        patch(GuardRecord *lr);
             void        patch(SideExit *exit);
+#ifdef NANOJIT_IA32
+			void        patch(SideExit* exit, SwitchInfo* si);
+#endif
 			AssmError   error()	{ return _err; }
 			void		setError(AssmError e) { _err = e; }
 			void		setCallTable(const CallInfo *functions);
 			void		pageReset();
 			int32_t		codeBytes();
 			Page*		handoverPages(bool exitPages=false);
 
 			debug_only ( void		pageValidate(); )
@@ -310,16 +313,18 @@ namespace nanojit
 			void		asm_i2f(LInsp ins);
 			void		asm_u2f(LInsp ins);
 			Register	asm_prep_fcall(Reservation *rR, LInsp ins);
 			void		asm_nongp_copy(Register r, Register s);
 			void		asm_call(LInsp);
             void        asm_arg(ArgSize, LInsp, Register);
 			Register	asm_binop_rhs_reg(LInsp ins);
 			NIns*		asm_branch(bool branchOnFalse, LInsp cond, NIns* targ, bool isfar);
+			void        asm_switch(LIns* ins, NIns* target);
+			void        emitJumpTable(SwitchInfo* si, NIns* target);
             void        assignSavedRegs();
             void        reserveSavedRegs();
             void        assignParamRegs();
             void        handleLoopCarriedExprs();
 			
 			// flag values for nMarkExecute
 			enum 
 			{
--- a/js/src/nanojit/LIR.cpp
+++ b/js/src/nanojit/LIR.cpp
@@ -1884,16 +1884,17 @@ namespace nanojit
 			case LIR_not: 
 				sprintf(s, "%s = %s %s", formatRef(i), lirNames[op], formatRef(i->oprnd1()));
 				break;
 
 			case LIR_x:
 			case LIR_xt:
 			case LIR_xf:
 			case LIR_xbarrier:
+			case LIR_xtbl:
 				formatGuard(i, s);
 				break;
 
 			case LIR_add:
 			case LIR_addp:
 			case LIR_sub: 
 		 	case LIR_mul: 
 			case LIR_fadd:
--- a/js/src/nanojit/LIR.h
+++ b/js/src/nanojit/LIR.h
@@ -134,18 +134,31 @@ namespace nanojit
             return _count_args(_ARGSIZE_MASK_ANY) + isIndirect();
         }
 		inline uint32_t FASTCALL count_iargs() const {
             return _count_args(_ARGSIZE_MASK_INT);
         }
 		// fargs = args - iargs
 	};
 
+	/*
+	 * Record for extra data used to compile switches as jump tables.
+	 */
+	struct SwitchInfo
+	{
+		NIns**      table;       // Jump table; a jump address is NIns*
+		uint32_t    count;       // Number of table entries
+		// Index value at last execution of the switch. The index value
+		// is the offset into the jump table. Thus it is computed as 
+		// (switch expression) - (lowest case value).
+		uint32_t    index;
+	};
+
     inline bool isGuard(LOpcode op) {
-        return op == LIR_x || op == LIR_xf || op == LIR_xt || op == LIR_loop || op == LIR_xbarrier;
+        return op == LIR_x || op == LIR_xf || op == LIR_xt || op == LIR_loop || op == LIR_xbarrier || op == LIR_xtbl;
     }
 
     inline bool isCall(LOpcode op) {
         op = LOpcode(op & ~LIR64);
         return op == LIR_call || op == LIR_calli;
     }
 
     inline bool isStore(LOpcode op) {
--- a/js/src/nanojit/LIRopcode.tbl
+++ b/js/src/nanojit/LIRopcode.tbl
@@ -172,18 +172,18 @@ OPDEF(ult,      60, 2) // 0x3C 0011 1100
 OPDEF(ugt,      61, 2) // 0x3D 0011 1101
 OPDEF(ule,      62, 2) // 0x3E 0011 1110
 OPDEF(uge,      63, 2) // 0x3F 0011 1111
 
 OPDEF64(2,          0, 2) // wraps a pair of refs
 OPDEF64(file,       1, 2)
 OPDEF64(line,       2, 2)
 OPDEF64(xbarrier,   3, 1) // memory barrier (dummy guard)
+OPDEF64(xtbl,       4, 1) // exit via indirect jump
 
-OPDEF64(unused4_64,   4, 2)
 OPDEF64(unused5_64,   5, 2)
 OPDEF64(unused6_64,   6, 2)
 OPDEF64(unused7_64,   7, 2)
 OPDEF64(unused8_64,   8, 2)
 OPDEF64(unused9_64,   9, 2)
 OPDEF64(unused10_64, 10, 2)
 
 OPDEF64(stq, LIR_st, 2) // quad store
--- a/js/src/nanojit/Native.h
+++ b/js/src/nanojit/Native.h
@@ -57,29 +57,31 @@
 #error "unknown nanojit architecture"
 #endif
 
 namespace nanojit {
 	const uint32_t NJ_PAGE_SIZE = 1 << NJ_LOG2_PAGE_SIZE;
 	
     class Fragment;
     struct SideExit;
+	struct SwitchInfo;
     
     struct GuardRecord 
     {
         void* jmp;
         GuardRecord* next;
         SideExit* exit;
     };
     
     struct SideExit
     {
         GuardRecord* guards;
         Fragment* from;
         Fragment* target;
+		SwitchInfo* switchInfo;
         
         void addGuard(GuardRecord* lr) 
         {
             lr->next = guards;
             guards = lr;
         }
     };
 }
--- a/js/src/nanojit/Nativei386.cpp
+++ b/js/src/nanojit/Nativei386.cpp
@@ -142,37 +142,44 @@ namespace nanojit
 
 	void Assembler::nFragExit(LInsp guard)
 	{
 		SideExit *exit = guard->record()->exit;
 		bool trees = _frago->core()->config.tree_opt;
         Fragment *frag = exit->target;
         GuardRecord *lr = 0;
 		bool destKnown = (frag && frag->fragEntry);
-		if (destKnown && !trees)
-		{
-			// already exists, emit jump now.  no patching required.
-			JMP(frag->fragEntry);
-            lr = 0;
-		}
-		else
-		{
-			// target doesn't exit yet.  emit jump to epilog, and set up to patch later.
+		// Generate jump to epilog and initialize lr.
+		// If the guard is LIR_xtbl, use a jump table with epilog in every entry
+		if (guard->isop(LIR_xtbl)) {
 			lr = guard->record();
-#if defined NANOJIT_AMD64
-            /* 8 bytes for address, 4 for imm32, 2 for jmp */
-            underrunProtect(14);
-            _nIns -= 8;
-            *(intptr_t *)_nIns = intptr_t(_epilogue);
-            lr->jmp = _nIns;
-            JMPm_nochk(0);
-#else
-            JMP_long(_epilogue);
-            lr->jmp = _nIns;
-#endif
+			Register r = EBX;
+			SwitchInfo* si = guard->record()->exit->switchInfo;
+			emitJumpTable(si, _epilogue);
+			JMP_indirect(r);
+			LEAmi4(r, si->table, r);
+		} else {
+			// If the guard already exists, use a simple jump.
+			if (destKnown && !trees) {
+				JMP(frag->fragEntry);
+				lr = 0;
+			} else {  // target doesn't exist. Use 0 jump offset and patch later
+				lr = guard->record();
+	#if defined NANOJIT_AMD64
+				/* 8 bytes for address, 4 for imm32, 2 for jmp */
+				underrunProtect(14);
+				_nIns -= 8;
+				*(intptr_t *)_nIns = intptr_t(_epilogue);
+				lr->jmp = _nIns;
+				JMPm_nochk(0);
+	#else
+				JMP_long(_epilogue);
+				lr->jmp = _nIns;
+	#endif
+			}
 		}
 		// first restore ESP from EBP, undoing SUBi(SP,amt) from genPrologue
         MR(SP,FP);
 
 		// return value is GuardRecord*
 	#if defined NANOJIT_IA32
         LDi(EAX, int(lr));
 	#elif defined NANOJIT_AMD64
@@ -881,16 +888,23 @@ namespace nanojit
 			else //if (condop == LIR_uge)
 				JAE(targ, isfar);
 		}
 		at = _nIns;
 		asm_cmp(cond);
 		return at;
 	}
 
+	void Assembler::asm_switch(LIns* ins, NIns* exit)
+	{
+		LIns* diff = ins->oprnd1();
+		findSpecificRegFor(diff, EBX);
+		JMP(exit);
+   	}
+
 	void Assembler::asm_cmp(LIns *cond)
 	{
         LOpcode condop = cond->opcode();
         
         // LIR_ov and LIR_cs recycle the flags set by arithmetic ops
         if ((condop == LIR_ov) || (condop == LIR_cs))
             return;
         
--- a/js/src/nanojit/Nativei386.h
+++ b/js/src/nanojit/Nativei386.h
@@ -96,17 +96,17 @@ namespace nanojit
 	#define NJ_MAX_STACK_ENTRY 256
 	#define NJ_MAX_PARAMETERS 1
 
         // Preserve a 16-byte stack alignment, to support the use of
         // SSE instructions like MOVDQA (if not by Tamarin itself,
         // then by the C functions it calls).
 	const int NJ_ALIGN_STACK = 16;
 
-	const int32_t LARGEST_UNDERRUN_PROT = 32;  // largest value passed to underrunProtect
+	const int32_t LARGEST_UNDERRUN_PROT = 3200;  // largest value passed to underrunProtect
 	
 	typedef uint8_t NIns;
 
 	// These are used as register numbers in various parts of the code
 	typedef enum
 	{
 		// general purpose 32bit regs
 		EAX = 0, // return value, scratch
@@ -371,16 +371,19 @@ namespace nanojit
 #define ADDmi(d,b,i) do { count_alust(); ALUmi(0x05, d, b, i); asm_output("add %d(%s), %d", d, gpn(b), i); } while(0)
 
 #define TEST(d,s)	do { count_alu(); ALU(0x85,d,s);				asm_output("test %s,%s",gpn(d),gpn(s)); } while(0)
 #define CMP(l,r)	do { count_alu(); ALU(0x3b, (l),(r));			asm_output("cmp %s,%s",gpn(l),gpn(r)); } while(0)
 #define CMPi(r,i)	do { count_alu(); ALUi(0x3d,r,i);				asm_output("cmp %s,%d",gpn(r),i); } while(0)
 
 #define MR(d,s)		do { count_mov(); ALU(0x8b,d,s);				asm_output("mov %s,%s",gpn(d),gpn(s)); } while(0)
 #define LEA(r,d,b)	do { count_alu(); ALUm(0x8d, r,d,b);			asm_output("lea %s,%d(%s)",gpn(r),d,gpn(b)); } while(0)
+// lea %r, d(%i*4)
+// This addressing mode is not supported by the MODRMSIB macro.
+#define LEAmi4(r,d,i) do { count_alu(); IMM32(d); *(--_nIns) = (2<<6)|(i<<3)|5; *(--_nIns) = (0<<6)|(r<<3)|4; *(--_nIns) = 0x8d;                    asm_output("lea %s, %p(%s*4)", gpn(r), d, gpn(i)); } while(0)
 
 #define SETE(r)		do { count_alu(); ALU2(0x0f94,(r),(r));			asm_output("sete %s",gpn(r)); } while(0)
 #define SETNP(r)	do { count_alu(); ALU2(0x0f9B,(r),(r));			asm_output("setnp %s",gpn(r)); } while(0)
 #define SETL(r)		do { count_alu(); ALU2(0x0f9C,(r),(r));			asm_output("setl %s",gpn(r)); } while(0)
 #define SETLE(r)	do { count_alu(); ALU2(0x0f9E,(r),(r));			asm_output("setle %s",gpn(r)); } while(0)
 #define SETG(r)		do { count_alu(); ALU2(0x0f9F,(r),(r));			asm_output("setg %s",gpn(r)); } while(0)
 #define SETGE(r)	do { count_alu(); ALU2(0x0f9D,(r),(r));			asm_output("setge %s",gpn(r)); } while(0)
 #define SETB(r)     do { count_alu(); ALU2(0x0f92,(r),(r));          asm_output("setb %s",gpn(r)); } while(0)
@@ -568,16 +571,22 @@ namespace nanojit
 
 // this should only be used when you can guarantee there is enough room on the page
 #define JMP_long_nochk_offset(o) do {\
 		verbose_only( NIns* next = _nIns; (void)next; ) \
  		IMM32((o)); \
  		*(--_nIns) = JMP32; \
 		asm_output("jmp %p",(next+(o))); } while(0)
 
+#define JMP_indirect(r) do { \
+        underrunProtect(2);  \
+        MODRMm(4, 0, r);     \
+        *(--_nIns) = 0xff;   \
+        asm_output("jmp *(%s)", gpn(r)); } while (0)
+
 #define JE(t, isfar)	   JCC(0x04, t, isfar, "je")
 #define JNE(t, isfar)	   JCC(0x05, t, isfar, "jne")
 #define JP(t, isfar)	   JCC(0x0A, t, isfar, "jp")
 #define JNP(t, isfar)	   JCC(0x0B, t, isfar, "jnp")
 
 #define JB(t, isfar)	   JCC(0x02, t, isfar, "jb")
 #define JNB(t, isfar)	   JCC(0x03, t, isfar, "jnb")
 #define JBE(t, isfar)	   JCC(0x06, t, isfar, "jbe")