Landed nanojit in TraceMonkey. This is untested and DEBUG must be off for now since we don't support AVM's String class.
authorAndreas Gal <gal@uci.edu>
Thu, 19 Jun 2008 10:47:58 -0700
changeset 17275 378807e85abc0e91a11fb3c414b85d265c356ca8
parent 17274 af2c8cea077ab73bdb895a5f302001e9fd7262d9
child 17276 8c621db6a231a8c9d47a164f86785455afe3b977
push id1452
push usershaver@mozilla.com
push dateFri, 22 Aug 2008 00:08:22 +0000
treeherdermozilla-central@d13bb0868596 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
milestone1.9.1a1pre
Landed nanojit in TraceMonkey. This is untested and DEBUG must be off for now since we don't support AVM's String class.
js/src/Makefile.ref
js/src/nanojit/Assembler.cpp
js/src/nanojit/Fragmento.cpp
js/src/nanojit/LIR.cpp
js/src/nanojit/Nativei386.cpp
js/src/nanojit/RegAlloc.cpp
js/src/nanojit/avmplus.cpp
js/src/nanojit/avmplus.h
js/src/nanojit/vm_fops.h
--- a/js/src/Makefile.ref
+++ b/js/src/Makefile.ref
@@ -56,17 +56,17 @@ include config.mk
 
 DEFINES += -DJS_TRACER
 
 ifdef NARCISSUS
 DEFINES += -DNARCISSUS
 endif
 
 # Look in OBJDIR to find jsautocfg.h and jsautokw.h
-INCLUDES   += -I$(OBJDIR)
+INCLUDES   += -I. -Inanojit -I$(OBJDIR)
 
 ifdef JS_THREADSAFE
 DEFINES += -DJS_THREADSAFE
 INCLUDES += -I$(DIST)/include/nspr
 ifdef USE_MSVC
 OTHER_LIBS += $(DIST)/lib/libnspr$(NSPR_LIBSUFFIX).lib
 else
 OTHER_LIBS += -L$(DIST)/lib -lnspr$(NSPR_LIBSUFFIX)
@@ -76,16 +76,18 @@ endif
 ifdef JS_NO_THIN_LOCKS
 DEFINES += -DJS_USE_ONLY_NSPR_LOCKS
 endif
 
 ifdef JS_HAS_FILE_OBJECT
 DEFINES += -DJS_HAS_FILE_OBJECT
 endif
 
+DEFINES += -DFEATURE_NANOJIT -DAVMPLUS_IA32 -DTRACEMONKEY
+
 #
 # XCFLAGS may be set in the environment or on the gmake command line
 #
 #CFLAGS += -DDEBUG -DDEBUG_brendan -DJS_ARENAMETER -DJS_HASHMETER -DJS_DUMP_PROPTREE_STATS -DJS_DUMP_SCOPE_METERS -DJS_SCOPE_DEPTH_METER -DJS_BASIC_STATS
 CFLAGS          += $(OPTIMIZER) $(OS_CFLAGS) $(DEFINES) $(INCLUDES) $(XCFLAGS)
 INTERP_CFLAGS   += $(INTERP_OPTIMIZER) $(OS_CFLAGS) $(DEFINES) $(INCLUDES) $(XCFLAGS) $(INTERP_XCFLAGS)
 
 LDFLAGS		= $(XLDFLAGS)
@@ -175,16 +177,27 @@ JS_HFILES =		\
 	jspubtd.h	\
 	jsregexp.h	\
 	jsscan.h	\
 	jsscope.h	\
 	jsscript.h	\
 	jsstr.h		\
 	jsxdrapi.h	\
 	jsxml.h		\
+	nanojit/Assembler.h     \
+	nanojit/LIR.h		\
+	nanojit/NativeARM.h	\
+	nanojit/Nativei386.h	\
+	nanojit/avmplus.h	\
+	nanojit/vm_fops.h	\
+	nanojit/Fragmento.h	\
+	nanojit/Native.h	\
+	nanojit/NativeThumb.h	\
+	nanojit/RegAlloc.h	\
+	nanojit/nanojit.h	\
 	$(NULL)
 
 API_HFILES =		\
 	jsapi.h		\
 	jsdbgapi.h	\
 	$(NULL)
 
 OTHER_HFILES =		\
@@ -242,16 +255,22 @@ JS_CPPFILES =		\
 	jsscope.cpp	\
 	jsscript.cpp	\
 	jsstr.cpp	\
 	jstracer.cpp	\
 	jsutil.cpp	\
 	jsxdrapi.cpp	\
 	jsxml.cpp	\
 	prmjtime.cpp	\
+	nanojit/Assembler.cpp  \
+	nanojit/Fragmento.cpp  \
+	nanojit/LIR.cpp        \
+	nanojit/Nativei386.cpp \
+	nanojit/RegAlloc.cpp   \
+	nanojit/avmplus.cpp    \
 	$(NULL)
 
 ifdef JS_LIVECONNECT
 DIRS      += liveconnect
 endif
 
 ifdef JS_HAS_FILE_OBJECT
 JS_CPPFILES += jsfile.cpp
new file mode 100755
--- /dev/null
+++ b/js/src/nanojit/Assembler.cpp
@@ -0,0 +1,1951 @@
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is [Open Source Virtual Machine].
+ *
+ * The Initial Developer of the Original Code is
+ * Adobe System Incorporated.
+ * Portions created by the Initial Developer are Copyright (C) 2004-2007
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Contributor(s):
+ *   Adobe AS3 Team
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+#include "nanojit.h"
+
+namespace nanojit
+{
+	#ifdef FEATURE_NANOJIT
+
+
+	#ifdef AVMPLUS_WIN32
+		#define AVMPLUS_ALIGN16(type) __declspec(align(16)) type
+	#else
+		#define AVMPLUS_ALIGN16(type) type __attribute__ ((aligned (16)))
+	#endif
+
+	class DeadCodeFilter: public LirFilter
+	{
+		Assembler *assm;
+	public:
+		DeadCodeFilter(LirFilter *in, Assembler *a) : LirFilter(in), assm(a) {}
+		LInsp read() {
+			for (;;) {
+				LInsp i = in->read();
+				if (!i || i->isGuard() 
+					|| i->isCall() && !assm->_functions[i->imm8()]._cse
+					|| !assm->ignoreInstruction(i))
+					return i;
+				if (i->isCall()) {
+					// skip args
+					while (in->pos()->isArg())
+						in->read();
+				}
+			}
+		}
+	};
+
+#ifdef NJ_VERBOSE
+	class VerboseBlockReader: public LirFilter
+	{
+		Assembler *assm;
+		LirNameMap *names;
+		avmplus::List<LInsp, avmplus::LIST_NonGCObjects> block;
+	public:
+		VerboseBlockReader(LirFilter *in, Assembler *a, LirNameMap *n) 
+			: LirFilter(in), assm(a), names(n), block(a->_gc) {}
+
+		void flush() {
+			assm->outputf("        %p:", assm->_nIns);
+			assm->output("");
+			for (int j=0,n=block.size(); j < n; j++)
+				assm->outputf("    %s", names->formatIns(block[j]));
+			assm->output("");
+			block.clear();
+		}
+
+		LInsp read() {
+			LInsp i = in->read();
+			if (!i) {
+				flush();
+				return i;
+			}
+			if (i->isGuard()) {
+				flush();
+				block.add(i);
+				if (i->oprnd1())
+					block.add(i->oprnd1());
+			}
+			else if (!i->isArg()) {
+				block.add(i);
+			}
+			return i;
+		}
+	};
+#endif
+	
+	/**
+	 * Need the following:
+	 *
+	 *	- merging paths ( build a graph? ), possibly use external rep to drive codegen
+	 */
+    Assembler::Assembler(Fragmento* frago)
+        : _frago(frago)
+        , _gc(frago->core()->gc)
+	{
+        AvmCore *core = frago->core();
+		nInit(core);
+		verbose_only( _verbose = !core->quiet_opt() && core->verbose() );
+		verbose_only( _outputCache = 0);
+		
+		verbose_only(Lir::initEngine();)
+		internalReset();
+		pageReset();
+	}
+
+    void Assembler::arReset()
+	{
+		_activation.highwatermark = 0;
+		_activation.lowwatermark = 0;
+		_activation.tos = 0;
+
+		for(uint32_t i=0; i<NJ_MAX_STACK_ENTRY; i++)
+			_activation.entry[i] = 0;
+		for(uint32_t i=0; i<NJ_MAX_PARAMETERS; i++)
+			_activation.parameter[i] = 0;
+	}
+
+ 	void Assembler::registerResetAll()
+	{
+		nRegisterResetAll(_allocator);
+
+		// keep a tally of the registers to check that our allocator works correctly
+		debug_only(_allocator.count = _allocator.countFree(); )
+		debug_only(_allocator.checkCount(); )
+		debug_only(_fpuStkDepth = 0; )
+	}
+
+	Register Assembler::registerAlloc(RegisterMask allow)
+	{
+		RegAlloc &regs = _allocator;
+//		RegisterMask prefer = livePastCall(_ins) ? saved : scratch;
+		RegisterMask prefer = SavedRegs & allow;
+		RegisterMask free = regs.free & allow;
+
+		RegisterMask set = prefer;
+		if (set == 0) set = allow;
+
+        if (free)
+        {
+    		// at least one is free
+		    set &= free;
+
+		    // ok we have at least 1 free register so let's try to pick 
+		    // the best one given the profile of the instruction 
+		    if (!set)
+		    {
+			    // desired register class is not free so pick first of any class
+			    set = free;
+		    }
+		    NanoAssert((set & allow) != 0);
+		    Register r = nRegisterAllocFromSet(set);
+		    regs.used |= rmask(r);
+		    return r;
+        }
+		counter_increment(steals);
+
+		// nothing free, steal one 
+		// LSRA says pick the one with the furthest use
+		LIns* vic = findVictim(regs,allow,prefer);
+	    Reservation* resv = getresv(vic);
+
+		// restore vic
+	    Register r = resv->reg;
+        regs.removeActive(r);
+        resv->reg = UnknownReg;
+
+		asm_restore(vic, resv, r);
+		return r;
+	}
+
+	void Assembler::reserveReset()
+	{
+		_resvTable[0].arIndex = 0;
+		int i;
+		for(i=1; i<NJ_MAX_STACK_ENTRY; i++)
+			_resvTable[i].arIndex = i-1;
+		_resvFree= i-1;
+	}
+
+	Reservation* Assembler::reserveAlloc(LInsp i)
+	{
+		uint32_t item = _resvFree;
+        Reservation *r = &_resvTable[item];
+		_resvFree = r->arIndex;
+		r->reg = UnknownReg;
+		r->arIndex = 0;
+		if (!item) 
+			setError(ResvFull); 
+
+        if (i->isconst() || i->isconstq())
+            r->cost = 0;
+        else if (i->isop(LIR_ld) && 
+                 i->oprnd1() == _thisfrag->param0 &&
+                 (i->oprnd2()->isconstval(offsetof(avmplus::InterpState,sp)) ||
+                  i->oprnd2()->isconstval(offsetof(avmplus::InterpState,rp))))
+            r->cost = 2;
+        else
+            r->cost = 1;
+
+        i->setresv(item);
+		return r;
+	}
+
+	void Assembler::reserveFree(LInsp i)
+	{
+        Reservation *rs = getresv(i);
+        NanoAssert(rs == &_resvTable[i->resv()]);
+		rs->arIndex = _resvFree;
+		_resvFree = i->resv();
+        i->setresv(0);
+	}
+
+	void Assembler::internalReset()
+	{
+		// readies for a brand spanking new code generation pass.
+		registerResetAll();
+		reserveReset();
+		arReset();
+	}
+
+	NIns* Assembler::pageAlloc(bool exitPage)
+	{
+		Page*& list = (exitPage) ? _nativeExitPages : _nativePages;
+		Page* page = _frago->pageAlloc();
+		if (page)
+		{
+			page->next = list;
+			list = page;
+			nMarkExecute(page);
+		}
+		else
+		{
+			// return prior page (to allow overwrites) and mark out of mem 
+			page = list;
+			setError(OutOMem);
+		}
+		return &page->code[sizeof(page->code)/sizeof(NIns)]; // just past the end
+	}
+	
+	void Assembler::pageReset()
+	{
+		pagesFree(_nativePages);		
+		pagesFree(_nativeExitPages);
+		
+		_nIns = 0;
+		_nExitIns = 0;
+
+		nativePageReset();
+	}
+	
+	void Assembler::pagesFree(Page*& page)
+	{
+		while(page)
+		{
+			Page *next = page->next;  // pull next ptr prior to free
+			_frago->pageFree(page);
+			page = next;
+		}
+	}
+
+	Page* Assembler::handoverPages(bool exitPages)
+	{
+		Page*& list = (exitPages) ? _nativeExitPages : _nativePages;
+		NIns*& ins =  (exitPages) ? _nExitIns : _nIns;
+		Page* start = list;
+		list = 0;
+		ins = 0;
+		return start;
+	}
+	
+	#ifdef _DEBUG
+	bool Assembler::onPage(NIns* where, bool exitPages)
+	{
+		Page* page = (exitPages) ? _nativeExitPages : _nativePages;
+		bool on = false;
+		while(page)
+		{
+			if (samepage(where-1,page))
+				on = true;
+			page = page->next;
+		}
+		return on;
+	}
+	
+	void Assembler::pageValidate()
+	{
+		if (error()) return;
+		// _nIns and _nExitIns need to be at least on
+		// one of these pages
+		NanoAssertMsg( onPage(_nIns)&& onPage(_nExitIns,true), "Native instruction pointer overstep paging bounds; check overrideProtect for last instruction");
+	}
+	#endif
+
+	const CallInfo* Assembler::callInfoFor(int32_t fid)
+	{	
+		NanoAssert(fid < CI_Max);
+		return &_functions[fid];
+	}
+
+	#ifdef _DEBUG
+	
+	void Assembler::resourceConsistencyCheck()
+	{
+		if (error()) return;
+
+#ifdef NANOJIT_IA32
+        NanoAssert(_allocator.active[FST0] && _fpuStkDepth == -1 ||
+            !_allocator.active[FST0] && _fpuStkDepth == 0);
+#endif
+		
+		// for tracking resv usage
+		LIns* resv[NJ_MAX_STACK_ENTRY];
+		for(int i=0; i<NJ_MAX_STACK_ENTRY; i++)
+			resv[i]=0;
+			
+		// check AR entries
+		NanoAssert(_activation.highwatermark < NJ_MAX_STACK_ENTRY);
+		LIns* ins = 0;
+		RegAlloc* regs = &_allocator;
+		for(uint32_t i=_activation.lowwatermark; i<_activation.tos; i++)
+		{
+			ins = _activation.entry[i];
+			if ( !ins )
+				continue;
+			Reservation *r = getresv(ins);
+			int32_t idx = r - _resvTable;
+			resv[idx]=ins;
+			NanoAssertMsg(idx, "MUST have a resource for the instruction for it to have a stack location assigned to it");
+			NanoAssertMsg( r->arIndex==0 || r->arIndex==i || (ins->isQuad()&&r->arIndex==i-(stack_direction(1))), "Stack record index mismatch");
+			NanoAssertMsg( r->reg==UnknownReg || regs->isConsistent(r->reg,ins), "Register record mismatch");
+		}
+	
+		registerConsistencyCheck(resv);
+				
+		// check resv table
+		int32_t inuseCount = 0;
+		int32_t notInuseCount = 0;
+		for(uint32_t i=1; i<NJ_MAX_STACK_ENTRY; i++)
+		{
+			if (resv[i]==0)
+			{
+				notInuseCount++;
+			}
+			else
+			{
+				inuseCount++;
+			}
+		}
+
+		int32_t freeCount = 0;
+		uint32_t free = _resvFree;
+		while(free)
+		{
+			free = _resvTable[free].arIndex;
+			freeCount++;
+		}
+		NanoAssert( ( freeCount==notInuseCount && inuseCount+notInuseCount==(NJ_MAX_STACK_ENTRY-1) ) );
+	}
+
+	void Assembler::registerConsistencyCheck(LIns** resv)
+	{	
+		// check registers
+		RegAlloc *regs = &_allocator;
+		uint32_t managed = regs->managed;
+		Register r = FirstReg;
+		while(managed)
+		{
+			if (managed&1)
+			{
+				if (regs->isFree(r))
+				{
+					NanoAssert(regs->getActive(r)==0);
+				}
+				else
+				{
+					LIns* ins = regs->getActive(r);
+					// @todo we should be able to check across RegAlloc's somehow (to include savedGP...)
+					Reservation *v = getresv(ins);
+					NanoAssert(v);
+					int32_t idx = v - _resvTable;
+					NanoAssert(idx >= 0 && idx < NJ_MAX_STACK_ENTRY);
+					resv[idx]=ins;
+					NanoAssertMsg(idx, "MUST have a resource for the instruction for it to have a register assigned to it");
+					NanoAssertMsg( v->arIndex==0 || ins==_activation.entry[v->arIndex], "Stack record index mismatch");
+					NanoAssertMsg( regs->getActive(v->reg)==ins, "Register record mismatch");
+				}			
+			}
+			
+			// next register in bitfield
+			r = nextreg(r);
+			managed >>= 1;
+		}
+	}
+	#endif /* _DEBUG */
+
+	void Assembler::findRegFor2(RegisterMask allow, LIns* ia, Reservation* &resva, LIns* ib, Reservation* &resvb)
+	{
+		if (ia == ib) 
+		{
+			findRegFor(ia, allow);
+			resva = resvb = getresv(ia);
+		}
+		else
+		{
+			Register rb = UnknownReg;
+			resvb = getresv(ib);
+			if (resvb && (rb = resvb->reg) != UnknownReg)
+				allow &= ~rmask(rb);
+			Register ra = findRegFor(ia, allow);
+			resva = getresv(ia);
+			NanoAssert(error() || (resva != 0 && ra != UnknownReg));
+			if (rb == UnknownReg)
+			{
+				allow &= ~rmask(ra);
+				findRegFor(ib, allow);
+				resvb = getresv(ib);
+			}
+		}
+	}
+
+	Register Assembler::findSpecificRegFor(LIns* i, Register w)
+	{
+		return findRegFor(i, rmask(w));
+	}
+			
+	Register Assembler::findRegFor(LIns* i, RegisterMask allow)
+	{
+		Reservation* resv = getresv(i);
+		Register r;
+
+        if (resv && (r=resv->reg) != UnknownReg && (rmask(r) & allow)) {
+			return r;
+        }
+
+		RegisterMask prefer = hint(i, allow);
+		if (!resv) 	
+			resv = reserveAlloc(i);
+
+        if ((r=resv->reg) == UnknownReg)
+		{
+            if (resv->cost == 2 && (allow&SavedRegs))
+                prefer = allow&SavedRegs;
+			r = resv->reg = registerAlloc(prefer);
+			_allocator.addActive(r, i);
+			return r;
+		}
+		else 
+		{
+			// r not allowed
+			resv->reg = UnknownReg;
+			_allocator.retire(r);
+            if (resv->cost == 2 && (allow&SavedRegs))
+                prefer = allow&SavedRegs;
+			Register s = resv->reg = registerAlloc(prefer);
+			_allocator.addActive(s, i);
+            if (rmask(r) & GpRegs) {
+    			MR(r, s);
+            } 
+#ifdef NANOJIT_IA32
+            else if ((rmask(r) & XmmRegs) && (rmask(s) & XmmRegs)) {
+                MOVSD(r, s);
+            } else {
+                if (rmask(r) & XmmRegs) {
+                    // x87 -> xmm
+                    NanoAssert(false);
+                } else {
+                    // xmm -> x87
+                    NanoAssert(false);
+                }
+            }
+#endif
+			return s;
+		}
+	}
+
+	int Assembler::findMemFor(LIns *i)
+	{
+		Reservation* resv = getresv(i);
+		if (!resv)
+			resv = reserveAlloc(i);
+		if (!resv->arIndex)
+			resv->arIndex = arReserve(i);
+		return disp(resv);
+	}
+
+	Register Assembler::prepResultReg(LIns *i, RegisterMask allow)
+	{
+		Reservation* resv = getresv(i);
+		const bool pop = !resv || resv->reg == UnknownReg;
+		Register rr = findRegFor(i, allow);
+		freeRsrcOf(i, pop);
+		return rr;
+	}
+
+	void Assembler::freeRsrcOf(LIns *i, bool pop)
+	{
+		Reservation* resv = getresv(i);
+		int index = resv->arIndex;
+		Register rr = resv->reg;
+
+		if (rr != UnknownReg)
+		{
+			asm_spill(i, resv, pop);
+			_allocator.retire(rr);	// free any register associated with entry
+		}
+		arFree(index);			// free any stack stack space associated with entry
+		reserveFree(i);		// clear fields of entry and add it to free list
+	}
+
+	void Assembler::evict(Register r)
+	{
+		registerAlloc(rmask(r));
+		_allocator.addFree(r);
+	}
+
+	void Assembler::asm_cmp(LIns *cond)
+	{
+        LInsp lhs = cond->oprnd1();
+		LInsp rhs = cond->oprnd2();
+		NanoAssert(!lhs->isQuad() && !rhs->isQuad());
+		Reservation *rA, *rB;
+
+		// ready to issue the compare
+		if (rhs->isconst())
+		{
+			int c = rhs->constval();
+			Register r = findRegFor(lhs, GpRegs);
+			if (c == 0 && cond->isop(LIR_eq))
+				TEST(r,r);
+			else
+				CMPi(r, c);
+		}
+		else
+		{
+			findRegFor2(GpRegs, lhs, rA, rhs, rB);
+			Register ra = rA->reg;
+			Register rb = rB->reg;
+			CMP(ra, rb);
+		}
+	}
+
+#ifndef NJ_SOFTFLOAT
+	void Assembler::asm_fcmp(LIns *cond)
+	{
+		LOpcode condop = cond->opcode();
+		NanoAssert(condop == LIR_eq || condop == LIR_le || condop == LIR_lt || condop == LIR_gt || condop == LIR_ge);
+	    LIns* lhs = cond->oprnd1();
+	    LIns* rhs = cond->oprnd2();
+
+        int mask;
+	    if (condop == LIR_eq)
+		    mask = 0x44;
+	    else if (condop == LIR_le)
+		    mask = 0x41;
+	    else if (condop == LIR_lt)
+		    mask = 0x05;
+        else if (condop == LIR_ge) {
+            // swap, use le
+            LIns* t = lhs; lhs = rhs; rhs = t;
+            mask = 0x41;
+        } else { // if (condop == LIR_gt)
+            // swap, use lt
+            LIns* t = lhs; lhs = rhs; rhs = t;
+		    mask = 0x05;
+        }
+
+        if (sse2)
+        {
+            // UNORDERED:    ZF,PF,CF <- 111;
+            // GREATER_THAN: ZF,PF,CF <- 000;
+            // LESS_THAN:    ZF,PF,CF <- 001;
+            // EQUAL:        ZF,PF,CF <- 100;
+
+            if (condop == LIR_eq && lhs == rhs) {
+                // nan check
+                Register r = findRegFor(lhs, XmmRegs);
+                UCOMISD(r, r);
+            } else {
+                evict(EAX);
+                TEST_AH(mask);
+                LAHF();
+                Reservation *rA, *rB;
+                findRegFor2(XmmRegs, lhs, rA, rhs, rB);
+                UCOMISD(rA->reg, rB->reg);
+            }
+        }
+        else
+        {
+            evict(EAX);
+            TEST_AH(mask);
+		    FNSTSW_AX();
+
+		    NanoAssert(lhs->isQuad() && rhs->isQuad());
+		    Reservation *rA;
+		    if (lhs != rhs)
+		    {
+			    // compare two different numbers
+			    int d = findMemFor(rhs);
+			    rA = getresv(lhs);
+			    int pop = !rA || rA->reg == UnknownReg; 
+			    findSpecificRegFor(lhs, FST0);
+			    // lhs is in ST(0) and rhs is on stack
+			    FCOM(pop, d, FP);
+		    }
+		    else
+		    {
+			    // compare n to itself, this is a NaN test.
+			    rA = getresv(lhs);
+			    int pop = !rA || rA->reg == UnknownReg; 
+			    findSpecificRegFor(lhs, FST0);
+			    // value in ST(0)
+			    if (pop)
+				    FCOMPP();
+			    else
+				    FCOMP();
+			    FLDr(FST0); // DUP
+		    }
+        }
+	}
+#endif //NJ_SOFTFLOAT
+
+    void Assembler::patch(GuardRecord *lr)
+    {
+        Fragment *frag = lr->target;
+		NanoAssert(frag->fragEntry);
+		NIns* was = asm_adjustBranch(lr->jmp, frag->fragEntry);
+		if (!lr->origTarget) lr->origTarget = was;
+		verbose_only(verbose_outputf("patching jump at %X to target %X (was %X)\n",(int)lr->jmp,(int)frag->fragEntry,was);)
+    }
+
+    void Assembler::unpatch(GuardRecord *lr)
+    {
+		NIns* was = asm_adjustBranch(lr->jmp, lr->origTarget);
+		(void)was;
+		verbose_only(verbose_outputf("unpatching jump at %X to original target %X (was %X)\n",(int)lr->jmp,(int)lr->origTarget,(int)was);)
+    }
+
+    NIns* Assembler::asm_exit(SideExit *exit)
+    {
+		NIns* at = 0;
+		if (!_branchStateMap->get(exit))
+		{
+			at = asm_leave_trace(exit);
+		}
+		else
+		{
+			RegAlloc* captured = _branchStateMap->get(exit);
+			verbose_only(verbose_outputf("merged trunk with branch for SID %d",exit->sid);)			
+			mergeRegisterState(*captured);
+			verbose_only(verbose_outputf("merging trunk with branch for SID %d",exit->sid);)			
+			at = exit->target->fragEntry;
+			NanoAssert(at);
+		}
+		return at;
+	}
+	
+	NIns* Assembler::asm_leave_trace(SideExit* exit)
+	{
+        verbose_only(bool priorVerbose = _verbose; )
+		verbose_only( _verbose = verbose_enabled() && _frago->core()->config.verbose_exits; )
+        verbose_only( int32_t nativeSave = _stats.native );
+		verbose_only(verbose_outputf("--------------------------------------- end exit block SID %d",exit->sid);)
+
+		RegAlloc capture = _allocator;
+
+        // this point is unreachable.  so free all the registers.
+		// if an instruction has a stack entry we will leave it alone,
+		// otherwise we free it entirely.  mergeRegisterState will restore.
+		releaseRegisters();
+		
+		swapptrs();
+		_inExit = true;
+		
+		//verbose_only( verbose_outputf("         LIR_xend swapptrs, _nIns is now %08X(%08X), _nExitIns is now %08X(%08X)",_nIns, *_nIns,_nExitIns,*_nExitIns) );
+		debug_only( _sv_fpuStkDepth = _fpuStkDepth; _fpuStkDepth = 0; )
+
+		GuardRecord *lr = nFragExit(exit); (void)lr;
+		verbose_only( if (lr) lr->gid = ++gid; )
+
+		mergeRegisterState(capture);
+
+		// this can be useful for breaking whenever an exit is taken
+		//INT3();
+		//NOP();
+
+		// we are done producing the exit logic for the guard so demark where our exit block code begins
+		NIns* jmpTarget = _nIns;	 // target in exit path for our mainline conditional jump 
+
+		// swap back pointers, effectively storing the last location used in the exit path
+		swapptrs();
+		_inExit = false;
+		
+		//verbose_only( verbose_outputf("         LIR_xt/xf swapptrs, _nIns is now %08X(%08X), _nExitIns is now %08X(%08X)",_nIns, *_nIns,_nExitIns,*_nExitIns) );
+		verbose_only( verbose_outputf("        %p:",jmpTarget);)
+		verbose_only( verbose_outputf("--------------------------------------- exit block (LIR_xt|LIR_xf)") );
+
+#ifdef NANOJIT_IA32
+		NanoAssertMsgf(_fpuStkDepth == _sv_fpuStkDepth, ("LIR_xtf, _fpuStkDepth=%d, expect %d\n",_fpuStkDepth, _sv_fpuStkDepth));
+		debug_only( _fpuStkDepth = _sv_fpuStkDepth; _sv_fpuStkDepth = 9999; )
+#endif
+
+        verbose_only( _verbose = priorVerbose; )
+        verbose_only(_stats.exitnative += (_stats.native-nativeSave));
+
+        return jmpTarget;
+    }
+	
+	bool Assembler::ignoreInstruction(LInsp ins)
+	{
+        LOpcode op = ins->opcode();
+        if (ins->isStore() || op == LIR_loop || ins->isArg())
+            return false;
+	    return getresv(ins) == 0;
+	}
+
+	NIns* Assembler::beginAssembly(RegAllocMap* branchStateMap)
+	{
+		_activation.lowwatermark = 1;
+		_activation.tos = _activation.lowwatermark;
+		_activation.highwatermark = _activation.tos;
+		
+		counter_reset(native);
+		counter_reset(exitnative);
+		counter_reset(steals);
+		counter_reset(spills);
+		counter_reset(remats);
+
+		setError(None);
+
+		// native code gen buffer setup
+		nativePageSetup();
+		
+		// make sure we got memory at least one page
+		if (error()) return 0;
+			
+        _epilogue = genEpilogue(SavedRegs);
+		_branchStateMap = branchStateMap;
+		
+		verbose_only( verbose_outputf("        %p:",_nIns) );
+		verbose_only( verbose_output("        epilogue:") );
+		return _epilogue;
+	}
+	
+	NIns* Assembler::assemble(Fragment* frag)
+	{
+		if (error()) return 0;	
+		AvmCore *core = _frago->core();
+		GC *gc = core->gc;
+        _thisfrag = frag;
+
+		// set up backwards pipeline: assembler -> StoreFilter -> LirReader
+		LirReader bufreader(frag->lastIns);
+		StoreFilter storefilter(&bufreader, gc, this, 
+			frag->param0, frag->sp, frag->rp);
+		DeadCodeFilter deadfilter(&storefilter, this);
+		LirFilter* rdr = &deadfilter;
+		verbose_only(
+			VerboseBlockReader vbr(rdr, this, frag->lirbuf->names);
+			if (verbose_enabled())
+				rdr = &vbr;
+		)
+
+		verbose_only(_thisfrag->compileNbr++; )
+		verbose_only(_frago->_stats.compiles++; )
+		_latestGuard = 0;
+		_inExit = false;		
+		NIns* loopJump =  gen(rdr);
+		frag->fragEntry = _nIns;
+		frag->outbound = core->config.tree_opt? _latestGuard : 0;
+		//fprintf(stderr, "assemble frag %X entry %X\n", (int)frag, (int)frag->fragEntry);
+		return loopJump;
+	}
+
+	NIns* Assembler::endAssembly(Fragment* frag, NInsList& loopJumps)
+	{
+		while(!loopJumps.isEmpty())
+		{
+			NIns* loopJump = (NIns*)loopJumps.removeLast();
+			nPatchBranch(loopJump, _nIns);
+		}
+
+		NIns* patchEntry = genPrologue(SavedRegs);
+		verbose_only( verbose_outputf("        %p:",_nIns); )
+		verbose_only( verbose_output("        prologue"); )
+
+		// something bad happened?
+		if (!error())
+		{
+			// check for resource leaks 
+			debug_only( 
+				for(uint32_t i=_activation.lowwatermark;i<_activation.highwatermark; i++) {
+					NanoAssertMsgf(_activation.entry[i] == 0, ("frame entry %d wasn't freed\n",-4*i)); 
+				}
+			)
+
+            frag->fragEntry = patchEntry;
+			frag->setCode(_nIns);
+			//fprintf(stderr, "endAssembly frag %X entry %X\n", (int)frag, (int)frag->fragEntry);
+
+		}
+		
+		AvmAssertMsg(error() || _fpuStkDepth == 0, ("_fpuStkDepth %d\n",_fpuStkDepth));
+
+		internalReset();  // clear the reservation tables and regalloc
+		_branchStateMap = 0;
+		
+		#ifdef UNDER_CE
+		// If we've modified the code, we need to flush so we don't end up trying 
+		// to execute junk
+		FlushInstructionCache(GetCurrentProcess(), NULL, NULL);
+		#endif
+		
+		return patchEntry;
+	}
+	
+	void Assembler::copyRegisters(RegAlloc* copyTo)
+	{
+		*copyTo = _allocator;
+	}
+
+	void Assembler::releaseRegisters()
+	{
+		for (Register r = FirstReg; r <= LastReg; r = nextreg(r))
+		{
+			LIns *i = _allocator.getActive(r);
+			if (i)
+			{
+				// clear reg allocation, preserve stack allocation.
+				Reservation* resv = getresv(i);
+				NanoAssert(resv != 0);
+				_allocator.retire(r);
+				if (r == resv->reg)
+					resv->reg = UnknownReg;
+
+				if (!resv->arIndex && resv->reg == UnknownReg)
+				{
+					reserveFree(i);
+				}
+			}
+		}
+	}
+	
+	NIns* Assembler::gen(LirFilter* reader)
+	{
+		NIns *loopJump = 0;
+		const CallInfo* call = 0;		// current call being emitted; if any
+		uint32_t iargs = 0;
+		uint32_t fargs = 0;
+		int32_t stackUsed = 0;	// stack space used for call
+
+		// trace must start with LIR_x or LIR_loop
+		NanoAssert(reader->pos()->isop(LIR_x) || reader->pos()->isop(LIR_loop));
+		 
+		for (LInsp ins = reader->read(); ins != 0 && !error(); ins = reader->read())
+		{
+    		Reservation *rR = getresv(ins);
+			LOpcode op = ins->opcode();			
+			switch(op)
+			{
+				default:
+					NanoAssertMsg(false, "unsupported LIR instruction");
+					break;
+					
+				case LIR_short:
+				case LIR_int:
+				{
+					Register rr = prepResultReg(ins, GpRegs);
+					int32_t val;
+					if (op == LIR_int)
+						val = ins->imm32();
+					else
+						val = ins->imm16();
+					if (val == 0)
+						XOR(rr,rr);
+					else
+						LDi(rr, val);
+					break;
+				}
+				case LIR_quad:
+				{
+#ifdef NANOJIT_IA32
+					Register rr = rR->reg;
+                    if (rr != UnknownReg)
+                    {
+						// @todo -- add special-cases for 0 and 1
+						_allocator.retire(rr);
+						rR->reg = UnknownReg;
+						NanoAssert((rmask(rr) & FpRegs) != 0);
+
+						const double d = ins->constvalf();
+                        if (rmask(rr) & XmmRegs) {
+						    if (d == 0.0) {
+								XORPDr(rr, rr);
+						    } else if (d == 1.0) {
+								// 1.0 is extremely frequent and worth special-casing!
+								static const double k_ONE = 1.0;
+								LDSDm(rr, &k_ONE);
+							} else {
+								findMemFor(ins);
+								const int d = disp(rR);
+                                LDQ(rr, d, FP);
+							}
+                        } else {
+						    if (d == 0.0) {
+							    FLDZ();
+						    } else if (d == 1.0) {
+							    FLD1();
+						    } else {
+							    findMemFor(ins);
+							    int d = disp(rR);
+							    FLDQ(d,FP);
+						    }
+                        }
+                    }
+#endif
+					// @todo, if we used xor, ldsd, fldz, etc above, we don't need mem here
+                    int d = disp(rR);
+					freeRsrcOf(ins, false);
+					if (d)
+					{
+						const int32_t* p = (const int32_t*) (ins-2);
+						STi(FP,d+4,p[1]);
+						STi(FP,d,p[0]);
+					}
+					break;
+				}
+				case LIR_callh:
+				{
+					// return result of quad-call in register
+					prepResultReg(ins, rmask(retRegs[1]));
+                    // if hi half was used, we must use the call to ensure it happens
+                    findRegFor(ins->oprnd1(), rmask(retRegs[0]));
+					break;
+				}
+				case LIR_param:
+				{
+					Register w = Register(ins->imm8());
+                    NanoAssert(w != UnknownReg);
+					// incoming arg in register
+					prepResultReg(ins, rmask(w));
+					break;
+				}
+				case LIR_qlo:
+                {
+					LIns *q = ins->oprnd1();
+#ifdef NANOJIT_IA32
+                    if (sse2) {
+                        Reservation *resv = getresv(ins);
+   		                Register rr = resv->reg;
+                        if (rr == UnknownReg) {
+                            // store quad in spill loc
+                            int d = disp(resv);
+                            freeRsrcOf(ins, false);
+                            Register qr = findRegFor(q, XmmRegs);
+                            STD(d, FP, qr);
+                        } else {
+    		                freeRsrcOf(ins, false);
+                            Register qr = findRegFor(q, XmmRegs);
+                            MOVD(rr,qr);
+                        }
+                    }
+					else
+#endif
+					{
+    					Register rr = prepResultReg(ins, GpRegs);
+				        int d = findMemFor(q);
+				        LD(rr, d, FP);
+                    }
+					break;
+                }
+				case LIR_qhi:
+				{
+					Register rr = prepResultReg(ins, GpRegs);
+					LIns *q = ins->oprnd1();
+					int d = findMemFor(q);
+				    LD(rr, d+4, FP);
+					break;
+				}
+
+				case LIR_cmov:
+				{
+					LIns* condval = ins->oprnd1();
+					NanoAssert(condval->isCmp());
+
+					LIns* values = ins->oprnd2();
+
+					// note that 'LIR_eq' is just a placeholder to hold two values...
+					// can't use the 123 form because we need space for reservation
+					NanoAssert(values->opcode() == LIR_2);
+					LIns* iftrue = values->oprnd1();
+					LIns* iffalse = values->oprnd2();
+					NanoAssert(!iftrue->isQuad() && !iffalse->isQuad());
+					
+					const Register rr = prepResultReg(ins, GpRegs);
+
+					// this code assumes that neither LD nor MR nor MRcc set any of the condition flags.
+					// (This is true on Intel, is it true on all architectures?)
+					const Register iffalsereg = findRegFor(iffalse, GpRegs & ~rmask(rr));
+					switch (condval->opcode())
+					{
+						// note that these are all opposites...
+						case LIR_eq:	MRNE(rr, iffalsereg);	break;
+						case LIR_lt:	MRGE(rr, iffalsereg);	break;
+						case LIR_le:	MRG(rr, iffalsereg);	break;
+						case LIR_gt:	MRLE(rr, iffalsereg);	break;
+						case LIR_ge:	MRL(rr, iffalsereg);	break;
+						case LIR_ult:	MRAE(rr, iffalsereg);	break;
+						case LIR_ule:	MRA(rr, iffalsereg);	break;
+						case LIR_ugt:	MRBE(rr, iffalsereg);	break;
+						case LIR_uge:	MRB(rr, iffalsereg);	break;
+						debug_only( default: NanoAssert(0); break; )
+					}
+					/*const Register iftruereg =*/ findSpecificRegFor(iftrue, rr);
+					asm_cmp(condval);
+					break;
+				}
+
+				case LIR_ld:
+				case LIR_ldc:
+				case LIR_ldcb:
+				{
+					LIns* base = ins->oprnd1();
+					LIns* disp = ins->oprnd2();
+					Register rr = prepResultReg(ins, GpRegs);
+					Register ra = findRegFor(base, GpRegs);
+					int d = disp->constval();
+					if (op == LIR_ldcb)
+						LD8Z(rr, d, ra);
+					else
+						LD(rr, d, ra); 
+					break;
+				}
+
+				case LIR_ldq:
+				{
+					asm_load64(ins);
+					break;
+				}
+
+				case LIR_neg:
+				case LIR_not:
+				{
+					Register rr = prepResultReg(ins, GpRegs);
+
+					LIns* lhs = ins->oprnd1();
+					Reservation *rA = getresv(lhs);
+					// if this is last use of lhs in reg, we can re-use result reg
+					Register ra;
+					if (rA == 0 || (ra=rA->reg) == UnknownReg)
+						ra = findSpecificRegFor(lhs, rr);
+					// else, rA already has a register assigned.
+
+					if (op == LIR_not)
+						NOT(rr); 
+					else
+						NEG(rr); 
+
+					if ( rr != ra ) 
+						MR(rr,ra); 
+					break;
+				}
+				
+				case LIR_qjoin:
+				{
+                    asm_qjoin(ins);
+					break;
+				}
+
+				case LIR_add:
+				case LIR_sub:
+				case LIR_mul:
+				case LIR_and:
+				case LIR_or:
+				case LIR_xor:
+				case LIR_lsh:
+				case LIR_rsh:
+				case LIR_ush:
+				{
+                    LInsp lhs = ins->oprnd1();
+                    LInsp rhs = ins->oprnd2();
+
+					Register rb = UnknownReg;
+					RegisterMask allow = GpRegs;
+					if (lhs != rhs && (op == LIR_mul || !rhs->isconst()))
+					{
+#ifdef NANOJIT_IA32
+						if (op == LIR_lsh || op == LIR_rsh || op == LIR_ush)
+							rb = findSpecificRegFor(rhs, ECX);
+						else
+#endif
+							rb = findRegFor(rhs, allow);
+						allow &= ~rmask(rb);
+					}
+
+					Register rr = prepResultReg(ins, allow);
+					Reservation* rA = getresv(lhs);
+					Register ra;
+					// if this is last use of lhs in reg, we can re-use result reg
+					if (rA == 0 || (ra = rA->reg) == UnknownReg)
+						ra = findSpecificRegFor(lhs, rr);
+					// else, rA already has a register assigned.
+
+					if (!rhs->isconst() || op == LIR_mul)
+					{
+						if (lhs == rhs)
+							rb = ra;
+
+						if (op == LIR_add)
+							ADD(rr, rb);
+						else if (op == LIR_sub)
+							SUB(rr, rb);
+						else if (op == LIR_mul)
+							MUL(rr, rb);
+						else if (op == LIR_and)
+							AND(rr, rb);
+						else if (op == LIR_or)
+							OR(rr, rb);
+						else if (op == LIR_xor)
+							XOR(rr, rb);
+						else if (op == LIR_lsh)
+							SHL(rr, rb);
+						else if (op == LIR_rsh)
+							SAR(rr, rb);
+						else if (op == LIR_ush)
+							SHR(rr, rb);
+						else
+							NanoAssertMsg(0, "Unsupported");
+					}
+					else
+					{
+						int c = rhs->constval();
+						if (op == LIR_add) {
+#ifdef NANOJIT_IA32
+							if (ra != rr) {
+								LEA(rr, c, ra);
+								ra = rr; // suppress mov
+							} else
+#endif
+							{
+								ADDi(rr, c); 
+							}
+						} else if (op == LIR_sub) {
+#ifdef NANOJIT_IA32
+							if (ra != rr) {
+								LEA(rr, -c, ra);
+								ra = rr;
+							} else
+#endif
+							{
+								SUBi(rr, c); 
+							}
+						} else if (op == LIR_and)
+							ANDi(rr, c);
+						else if (op == LIR_or)
+							ORi(rr, c);
+						else if (op == LIR_xor)
+							XORi(rr, c);
+						else if (op == LIR_lsh)
+							SHLi(rr, c);
+						else if (op == LIR_rsh)
+							SARi(rr, c);
+						else if (op == LIR_ush)
+							SHRi(rr, c);
+						else
+							NanoAssertMsg(0, "Unsupported");
+					}
+
+					if ( rr != ra ) 
+						MR(rr,ra);
+					break;
+				}
+#ifndef NJ_SOFTFLOAT
+				case LIR_fneg:
+				{
+					if (sse2)
+					{
+                        LIns *lhs = ins->oprnd1();
+
+                        Register rr = prepResultReg(ins, XmmRegs);
+                        Reservation *rA = getresv(lhs);
+                        Register ra;
+
+					    // if this is last use of lhs in reg, we can re-use result reg
+					    if (rA == 0 || (ra = rA->reg) == UnknownReg)
+						    ra = findSpecificRegFor(lhs, rr);
+					    // else, rA already has a register assigned.
+
+						static const AVMPLUS_ALIGN16(uint32_t) negateMask[] = {0,0x80000000,0,0};
+						XORPD(rr, negateMask);
+
+                        if (rr != ra)
+                            MOVSD(rr, ra);
+					}
+					else
+					{
+						Register rr = prepResultReg(ins, FpRegs);
+
+						LIns* lhs = ins->oprnd1();
+
+						// lhs into reg, prefer same reg as result
+						Reservation* rA = getresv(lhs);
+						// if this is last use of lhs in reg, we can re-use result reg
+						if (rA == 0 || rA->reg == UnknownReg)
+							findSpecificRegFor(lhs, rr);
+						// else, rA already has a different reg assigned
+
+						NanoAssert(getresv(lhs)!=0 && getresv(lhs)->reg==FST0);
+						// assume that the lhs is in ST(0) and rhs is on stack
+						FCHS();
+
+						// if we had more than one fpu reg, this is where
+						// we would move ra into rr if rr != ra.
+					}
+					break;
+				}
+				case LIR_fadd:
+				case LIR_fsub:
+				case LIR_fmul:
+				case LIR_fdiv:
+				{
+                    if (sse2) 
+                    {
+                        LIns *lhs = ins->oprnd1();
+                        LIns *rhs = ins->oprnd2();
+
+                        RegisterMask allow = XmmRegs;
+                        Register rb = UnknownReg;
+                        if (lhs != rhs) {
+                            rb = findRegFor(rhs,allow);
+                            allow &= ~rmask(rb);
+                        }
+
+                        Register rr = prepResultReg(ins, allow);
+                        Reservation *rA = getresv(lhs);
+                        Register ra;
+
+					    // if this is last use of lhs in reg, we can re-use result reg
+					    if (rA == 0 || (ra = rA->reg) == UnknownReg)
+						    ra = findSpecificRegFor(lhs, rr);
+					    // else, rA already has a register assigned.
+
+                        if (lhs == rhs)
+                            rb = ra;
+
+                        if (op == LIR_fadd)
+                            ADDSD(rr, rb);
+                        else if (op == LIR_fsub)
+                            SUBSD(rr, rb);
+                        else if (op == LIR_fmul)
+                            MULSD(rr, rb);
+                        else //if (op == LIR_fdiv)
+                            DIVSD(rr, rb);
+
+                        if (rr != ra)
+                            MOVSD(rr, ra);
+                    }
+                    else
+                    {
+                        // we swap lhs/rhs on purpose here, works out better
+					    // if you only have one fpu reg.  use divr/subr.
+					    LIns* rhs = ins->oprnd1();
+					    LIns* lhs = ins->oprnd2();
+                        Register rr = prepResultReg(ins, rmask(FST0));
+
+					    // make sure rhs is in memory
+					    int db = findMemFor(rhs);
+
+					    // lhs into reg, prefer same reg as result
+					    Reservation* rA = getresv(lhs);
+					    // last use of lhs in reg, can reuse rr
+					    if (rA == 0 || rA->reg == UnknownReg)
+						    findSpecificRegFor(lhs, rr);
+					    // else, rA already has a different reg assigned
+
+					    NanoAssert(getresv(lhs)!=0 && getresv(lhs)->reg==FST0);
+					    // assume that the lhs is in ST(0) and rhs is on stack
+					    if (op == LIR_fadd)
+						    { FADD(db, FP); }
+					    else if (op == LIR_fsub)
+						    { FSUBR(db, FP); }
+					    else if (op == LIR_fmul)
+						    { FMUL(db, FP); }
+					    else if (op == LIR_fdiv)
+						    { FDIVR(db, FP); }
+                    }
+                    break;
+				}
+				case LIR_i2f:
+				{
+					// where our result goes
+					Register rr = prepResultReg(ins, FpRegs);
+                    if (rmask(rr) & XmmRegs) 
+					{
+                        // todo support int value in memory
+                        Register gr = findRegFor(ins->oprnd1(), GpRegs);
+						CVTSI2SD(rr, gr);
+                    } 
+					else 
+					{
+       					int d = findMemFor(ins->oprnd1());
+						FILD(d, FP);
+                    }
+					break;
+				}
+				case LIR_u2f:
+				{
+					// where our result goes
+					Register rr = prepResultReg(ins, FpRegs);
+					const int disp = -8;
+					const Register base = ESP;
+                    if (rmask(rr) & XmmRegs) 
+					{
+						// don't call findRegFor, we want a reg we can stomp on for a very short time,
+						// not a reg that will continue to be associated with the LIns
+						Register gr = registerAlloc(GpRegs);
+
+						// technique inspired by gcc disassembly 
+						// Edwin explains it:
+						//
+						// gr is 0..2^32-1
+						//
+						//	   sub gr,0x80000000
+						//
+						// now gr is -2^31..2^31-1, i.e. the range of int, but not the same value
+						// as before
+						//
+						//	   cvtsi2sd rr,gr
+						//
+						// rr is now a double with the int value range
+						//
+						//     addsd rr, 2147483648.0
+						//
+						// adding back double(0x80000000) makes the range 0..2^32-1.  
+						
+						static const double k_NEGONE = 2147483648.0;
+						ADDSDm(rr, &k_NEGONE);
+						CVTSI2SD(rr, gr);
+
+						Reservation* resv = getresv(ins->oprnd1());
+						Register xr;
+						if (resv && (xr = resv->reg) != UnknownReg && (rmask(xr) & GpRegs))
+						{
+#ifdef NANOJIT_IA32
+							LEA(gr, 0x80000000, xr);
+#else
+							SUBi(gr, 0x80000000);
+							MR(gr, xr);
+#endif
+						}
+						else
+						{
+							const int d = findMemFor(ins->oprnd1());
+							SUBi(gr, 0x80000000);
+							LD(gr, d, FP);
+						}
+						
+						// ok, we're done with it
+						_allocator.addFree(gr); 
+                    } 
+					else 
+					{
+						Register gr = findRegFor(ins->oprnd1(), GpRegs);
+						NanoAssert(rr == FST0);
+						FILDQ(disp, base);
+						STi(base, disp+4, 0);	// high 32 bits = 0
+						ST(base, disp, gr);		// low 32 bits = unsigned value
+                    }
+					break;
+				}
+#endif // NJ_SOFTFLOAT
+				case LIR_st:
+				case LIR_sti:
+				{
+					LIns* value = ins->oprnd1();
+					LIns* base = ins->oprnd2();
+					int dr = ins->immdisp();
+                    if (!value->isQuad()) 
+                        asm_store32(value, dr, base);
+					else
+						asm_store64(value, dr, base);
+                    break;
+				}
+				case LIR_xt:
+				case LIR_xf:
+				{
+                    NIns* exit = asm_exit(ins->exit());
+	
+					// we only support cmp with guard right now, also assume it is 'close' and only emit the branch
+					LIns* cond = ins->oprnd1();
+					LOpcode condop = cond->opcode();
+					NanoAssert(cond->isCmp());
+#ifndef NJ_SOFTFLOAT
+					bool fp = cond->oprnd1()->isQuad();
+
+                    if (fp)
+					{
+						if (op == LIR_xf)
+							JP(exit);
+						else
+							JNP(exit);
+						asm_fcmp(cond);
+                        break;
+					}
+#endif
+					// produce the branch
+					if (op == LIR_xf)
+					{
+						if (condop == LIR_eq)
+							JNE(exit);
+						else if (condop == LIR_lt)
+							JNL(exit);
+						else if (condop == LIR_le)
+							JNLE(exit);
+						else if (condop == LIR_gt)
+							JNG(exit);
+						else if (condop == LIR_ge)
+							JNGE(exit);
+						else if (condop == LIR_ult)
+							JNB(exit);
+						else if (condop == LIR_ule)
+							JNBE(exit);
+						else if (condop == LIR_ugt)
+							JNA(exit);
+						else //if (condop == LIR_uge)
+							JNAE(exit);
+					}
+					else // op == LIR_xt
+					{
+						if (condop == LIR_eq)
+							JE(exit);
+						else if (condop == LIR_lt)
+							JL(exit);
+						else if (condop == LIR_le)
+							JLE(exit);
+						else if (condop == LIR_gt)
+							JG(exit);
+						else if (condop == LIR_ge)
+							JGE(exit);
+						else if (condop == LIR_ult)
+							JB(exit);
+						else if (condop == LIR_ule)
+							JBE(exit);
+						else if (condop == LIR_ugt)
+							JA(exit);
+						else //if (condop == LIR_uge)
+							JAE(exit);
+					}
+					asm_cmp(cond);
+					break;
+				}
+				case LIR_x:
+				{
+		            verbose_only(verbose_output(""));
+					// generate the side exit branch on the main trace.
+                    NIns *exit = asm_exit(ins->exit());
+					JMP( exit ); 
+					break;
+				}
+				case LIR_loop:
+				{
+					JMP_long_placeholder(); // jump to SOT	
+					verbose_only( if (_verbose && _outputCache) { _outputCache->removeLast(); outputf("         jmp   SOT"); } );
+					
+					loopJump = _nIns;
+
+                    #ifdef NJ_VERBOSE
+                    // branching from this frag to ourself.
+                    if (_frago->core()->config.show_stats)
+                        LDi(argRegs[1], int((Fragment*)_thisfrag));
+                    #endif
+
+					// restore parameter 1, the only one we use
+                    LInsp param0 = _thisfrag->param0;
+                    Register a0 = Register(param0->imm8());
+					findSpecificRegFor(param0, a0); 
+					break;
+				}
+				case LIR_eq:
+				case LIR_le:
+				case LIR_lt:
+				case LIR_gt:
+				case LIR_ge:
+				case LIR_ult:
+				case LIR_ule:
+				case LIR_ugt:
+				case LIR_uge:
+				{
+					// only want certain regs 
+					uint32_t allow = AllowableFlagRegs;
+
+					Register r = prepResultReg(ins, allow);
+					// SETcc only sets low 8 bits, so extend 
+					MOVZX8(r,r);
+#ifndef NJ_SOFTFLOAT
+					if (ins->oprnd1()->isQuad())
+                    {
+   						SETNP(r);
+						asm_fcmp(ins);
+                        break;
+					}
+#endif
+					if (op == LIR_eq)
+						SETE(r);
+					else if (op == LIR_lt)
+						SETL(r);
+					else if (op == LIR_le)
+						SETLE(r);
+					else if (op == LIR_gt)
+						SETG(r);
+					else if (op == LIR_ge)
+						SETGE(r);
+					else if (op == LIR_ult)
+						SETB(r);
+					else if (op == LIR_ule)
+						SETBE(r);
+					else if (op == LIR_ugt)
+						SETA(r);
+					else // if (op == LIR_uge)
+						SETAE(r);
+					asm_cmp(ins);
+					break;
+				}
+				case LIR_ref:
+				{
+					// ref arg - use lea
+					LIns *p = ins->oprnd1();
+					if (ins->resv())
+					{
+						// arg in specific reg
+						Register r = imm2register(ins->resv());
+						int da = findMemFor(p);
+						LEA(r, da, FP);
+					}
+					else
+					{
+						NanoAssert(0); // not supported
+					}
+					++iargs;
+					nArgEmitted(call, 0, iargs, fargs);
+					break;
+				}
+				case LIR_arg:
+				{
+					LIns* p = ins->oprnd1();
+					if (ins->resv())
+					{
+						// arg goes in specific register
+						Register r = imm2register(ins->resv());
+						if (p->isconst())
+							LDi(r, p->constval());
+						else
+							findSpecificRegFor(p, r);
+					}
+					else
+					{
+						asm_pusharg(p);
+						stackUsed += 1;
+					}
+					++iargs;
+					nArgEmitted(call, stackUsed, iargs, fargs);
+					break;
+				}
+#ifdef NANOJIT_IA32
+				case LIR_farg:
+				{
+					LIns* p = ins->oprnd1();
+					Register r = findRegFor(p, FpRegs);
+                    if (rmask(r) & XmmRegs) {
+                        STQ(0, SP, r); 
+                    } else {
+    					FSTPQ(0, SP);
+                    }
+					PUSHr(ECX); // 2*pushr is smaller than sub
+					PUSHr(ECX);
+					stackUsed += 2;
+					++fargs;
+					nArgEmitted(call, stackUsed, iargs, fargs);
+					break;
+				}
+#endif
+
+#ifndef NJ_SOFTFLOAT
+				case LIR_fcall:
+#endif
+				case LIR_call:
+				{
+					const FunctionID fid = (FunctionID) ins->imm8();
+				// bogus assertion: zero is a legal value right now, with fmod() in that slot
+				//	NanoAssertMsg(fid!=0, "Function does not exist in the call table");
+					call = &_functions[ fid ];
+					iargs = 0;
+					fargs = 0;
+
+                    Register rr = UnknownReg;
+#ifndef NJ_SOFTFLOAT
+                    if (op == LIR_fcall)
+                    {
+                        if (rR) {
+                            if ((rr=rR->reg) != UnknownReg && (rmask(rr) & XmmRegs))
+                                evict(rr);
+                        }
+                        rr = FST0;
+						prepResultReg(ins, rmask(rr));
+                    }
+                    else
+#endif
+                    {
+                        rr = retRegs[0];
+						prepResultReg(ins, rmask(rr));
+                    }
+
+					// do this after we've handled the call result, so we dont
+					// force the call result to be spilled unnecessarily.
+					restoreCallerSaved();
+
+					nPostCallCleanup(call);
+			#ifdef NJ_VERBOSE
+					CALL(call->_address, call->_name);
+			#else
+					CALL(call->_address, "");
+			#endif
+
+					stackUsed = 0;
+					LirReader argReader(reader->pos());
+
+#ifdef NANOJIT_ARM
+					// pre-assign registers R0-R3 for arguments (if they fit)
+					int regsUsed = 0;
+					for (LInsp a = argReader.read(); a->isArg(); a = argReader.read())
+					{
+						if (a->isop(LIR_arg) || a->isop(LIR_ref))
+						{
+							a->setresv((int)R0 + 1 + regsUsed);
+							regsUsed++;
+						}
+						if (regsUsed>=4)
+							break;
+					}
+#endif
+#ifdef NANOJIT_IA32
+					debug_only( if (rr == FST0) fpu_push(); )
+					// make sure fpu stack is empty before call (restoreCallerSaved)
+					NanoAssert(_allocator.isFree(FST0));
+					// note: this code requires that LIR_ref arguments be one of the first two arguments
+					// pre-assign registers to the first 2 4B args
+					const uint32_t iargs = call->count_iargs();
+					const int max_regs = (iargs < 2) ? iargs : 2;
+					int n = 0;
+					for(LIns* a = argReader.read(); a->isArg() && n<max_regs; a = argReader.read())
+					{
+						if (a->isop(LIR_arg)||a->isop(LIR_ref))
+						{
+							a->setresv(argRegs[n++]); // tell LIR_arg what reg to use
+						}
+					}
+#endif
+				}
+			}
+
+			// check that all is well (don't check in exit paths since its more complicated)
+			debug_only( pageValidate(); )
+			debug_only( resourceConsistencyCheck();  )
+		}
+		return loopJump;
+	}
+
+	uint32_t Assembler::arFree(uint32_t idx)
+	{
+		if (idx > 0 && _activation.entry[idx] == _activation.entry[idx+stack_direction(1)])
+			_activation.entry[idx+stack_direction(1)] = 0;  // clear 2 slots for doubles 
+		_activation.entry[idx] = 0;
+		return 0;
+	}
+
+#ifdef NJ_VERBOSE
+	void Assembler::printActivationState()
+	{
+		bool verbose_activation = false;
+		if (!verbose_activation)
+			return;
+			
+#ifdef NANOJIT_ARM
+		verbose_only(
+			if (_verbose) {
+				char* s = &outline[0];
+				memset(s, ' ', 51);  s[51] = '\0';
+				s += strlen(s);
+				sprintf(s, " SP ");
+				s += strlen(s);
+				for(uint32_t i=_activation.lowwatermark; i<_activation.tos;i++) {
+					LInsp ins = _activation.entry[i];
+					if (ins && ins !=_activation.entry[i+1]) {
+						sprintf(s, "%d(%s) ", 4*i, _thisfrag->lirbuf->names->formatRef(ins));
+						s += strlen(s);
+					}
+				}
+				output(&outline[0]);
+			}
+		)
+#else
+		verbose_only(
+			char* s = &outline[0];
+			if (_verbose) {
+				memset(s, ' ', 51);  s[51] = '\0';
+				s += strlen(s);
+				sprintf(s, " ebp ");
+				s += strlen(s);
+
+				for(uint32_t i=_activation.lowwatermark; i<_activation.tos;i++) {
+					LInsp ins = _activation.entry[i];
+					if (ins /* && _activation.entry[i]!=_activation.entry[i+1]*/) {
+						sprintf(s, "%d(%s) ", -4*i,_thisfrag->lirbuf->names->formatRef(ins));
+						s += strlen(s);
+					}
+				}
+				output(&outline[0]);
+			}
+		)
+#endif
+	}
+#endif
+	
+	uint32_t Assembler::arReserve(LIns* l)
+	{
+		NanoAssert(!l->isop(LIR_tramp));
+
+		//verbose_only(printActivationState());
+		const bool quad = l->isQuad();
+		const int32_t n = _activation.tos;
+		int32_t start = _activation.lowwatermark;
+		int32_t i = 0;
+		NanoAssert(start>0);
+		if (n >= NJ_MAX_STACK_ENTRY-2)
+		{	
+			setError(StackFull);
+			return start;
+		}
+		else if (quad)
+		{
+			if ( (start&1)==1 ) start++;  // even 
+			for(i=start; i <= n; i+=2)
+			{
+				if ( (_activation.entry[i+stack_direction(1)] == 0) && (i==n || (_activation.entry[i] == 0)) )
+					break;   //  for fp we need 2 adjacent aligned slots
+			}
+		}
+		else
+		{
+			for(i=start; i < n; i++)
+			{
+				if (_activation.entry[i] == 0)
+					break;   // not being used
+			}
+		}
+
+		int32_t inc = ((i-n+1) < 0) ? 0 : (i-n+1);
+		if (quad && stack_direction(1)>0) inc++;
+		_activation.tos += inc;
+		_activation.highwatermark += inc;
+
+		// place the entry in the table and mark the instruction with it
+		_activation.entry[i] = l;
+		if (quad) _activation.entry[i+stack_direction(1)] = l;
+		return i;
+	}
+
+	void Assembler::restoreCallerSaved()
+	{
+		// generate code to restore callee saved registers 
+		// @todo speed this up
+		RegisterMask scratch = ~SavedRegs;
+		for (Register r = FirstReg; r <= LastReg; r = nextreg(r))
+		{
+			if ((rmask(r) & scratch) && _allocator.getActive(r))
+            {
+				evict(r);
+            }
+		}
+	}
+	
+	/**
+	 * Merge the current state of the registers with a previously stored version
+	 */
+	void Assembler::mergeRegisterState(RegAlloc& saved)
+	{
+		// evictions and pops first
+		RegisterMask skip = 0;
+		for (Register r=FirstReg; r <= LastReg; r = nextreg(r))
+		{
+			LIns * curins = _allocator.getActive(r);
+			LIns * savedins = saved.getActive(r);
+			if (curins == savedins)
+			{
+				verbose_only( if (curins) verbose_outputf("skip %s", regNames[r]); )
+				skip |= rmask(r);
+			}
+			else 
+			{
+				if (curins)
+					evict(r);
+				
+    			#ifdef NANOJIT_IA32
+				if (savedins && (rmask(r) & x87Regs))
+					FSTP(r);
+				#endif
+			}
+		}
+
+		// now reassign mainline registers
+		for (Register r=FirstReg; r <= LastReg; r = nextreg(r))
+		{
+			LIns *i = saved.getActive(r);
+			if (i && !(skip&rmask(r)))
+				findSpecificRegFor(i, r);
+		}
+		debug_only(saved.used = 0);  // marker that we are no longer in exit path
+	}
+	
+	/**																 
+	 * Guard records are laid out in the exit block buffer (_nInsExit),
+	 * intersperced with the code.   Preceding the record are the native
+	 * instructions associated with the record (i.e. the exit code).
+	 * 
+	 * The layout is as follows:
+	 * 
+	 * [ native code ] [ GuardRecord1 ]
+	 * ...
+	 * [ native code ] [ GuardRecordN ]
+	 * 
+	 * The guard record 'code' field should be used to locate 
+	 * the start of the native code associated with the
+	 * exit block. N.B the code may lie in a different page 
+	 * than the guard record  
+	 * 
+	 * The last guard record is used for the unconditional jump
+	 * at the end of the trace. 
+	 * 
+	 * NOTE:  It is also not guaranteed that the native code 
+	 *        is contained on a single page.
+	 */
+	GuardRecord* Assembler::placeGuardRecord(SideExit *exit)
+	{
+		// we align the guards to 4Byte boundary
+		NIns* ptr = (NIns*)alignTo(_nIns-sizeof(GuardRecord), 4);
+		underrunProtect( (int)_nIns-(int)ptr );  // either got us a new page or there is enough space for us
+		GuardRecord* rec = (GuardRecord*) alignTo(_nIns-sizeof(GuardRecord),4);
+		rec->outgoing = _latestGuard;
+		_latestGuard = rec;
+		_nIns = (NIns*)rec;
+		rec->next = 0;
+		rec->origTarget = 0;		
+		rec->target = exit->target;
+		rec->calldepth = exit->calldepth;
+		rec->from = _thisfrag;
+		rec->exit = exit;
+		if (exit->target) 
+			exit->target->addLink(rec);
+		verbose_only( rec->compileNbr = _thisfrag->compileNbr; )
+		return rec;
+	}
+
+	void Assembler::setCallTable(const CallInfo* functions)
+	{
+		_functions = functions;
+	}
+
+	#ifdef NJ_VERBOSE
+		char Assembler::outline[8192];
+
+		void Assembler::outputf(const char* format, ...)
+		{
+			va_list args;
+			va_start(args, format);
+			outline[0] = '\0';
+			vsprintf(outline, format, args);
+			output(outline);
+		}
+
+		void Assembler::output(const char* s)
+		{
+			if (_outputCache)
+			{
+				char* str = (char*)_gc->Alloc(strlen(s)+1);
+				strcpy(str, s);
+				_outputCache->add(str);
+			}
+			else
+			{
+				_frago->core()->console << s << "\n";
+			}
+		}
+
+		void Assembler::output_asm(const char* s)
+		{
+			if (!verbose_enabled())
+				return;
+			if (*s != '^')
+				output(s);
+		}
+
+		char* Assembler::outputAlign(char *s, int col) 
+		{
+			int len = strlen(s);
+			int add = ((col-len)>0) ? col-len : 1;
+			memset(&s[len], ' ', add);
+			s[col] = '\0';
+			return &s[col];
+		}
+	#endif // verbose
+
+	#endif /* FEATURE_NANOJIT */
+
+#if defined(FEATURE_NANOJIT) || defined(NJ_VERBOSE)
+	uint32_t CallInfo::_count_args(uint32_t mask) const
+	{
+		uint32_t argc = 0;
+		uint32_t argt = _argtypes;
+		for (int i = 0; i < 5; ++i)
+		{
+			argt >>= 2;
+			argc += (argt & mask) != 0;
+		}
+		return argc;
+	}
+#endif
+
+}
new file mode 100644
--- /dev/null
+++ b/js/src/nanojit/Fragmento.cpp
@@ -0,0 +1,617 @@
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is [Open Source Virtual Machine].
+ *
+ * The Initial Developer of the Original Code is
+ * Adobe System Incorporated.
+ * Portions created by the Initial Developer are Copyright (C) 2004-2007
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Contributor(s):
+ *   Adobe AS3 Team
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+#include "nanojit.h"
+
+namespace nanojit
+{	
+	#ifdef FEATURE_NANOJIT
+
+	using namespace avmplus;
+
+	/**
+	 * This is the main control center for creating and managing fragments.
+	 */
+	Fragmento::Fragmento(AvmCore* core) : _allocList(core->GetGC())
+	{
+		_core = core;
+		GC *gc = core->GetGC();
+		_frags = new (gc) FragmentMap(gc, 128);
+		_assm = new (gc) nanojit::Assembler(this);
+		verbose_only( enterCounts = new (gc) BlockHist(gc); )
+		verbose_only( mergeCounts = new (gc) BlockHist(gc); )
+	}
+
+	Fragmento::~Fragmento()
+	{
+		debug_only( clearFrags() );
+		NanoAssert(_stats.freePages == _stats.pages);
+
+        _frags->clear();		
+		while( _allocList.size() > 0 )
+		{
+			//fprintf(stderr,"dealloc %x\n", (intptr_t)_allocList.get(_allocList.size()-1));
+			_gcHeap->Free( _allocList.removeLast() );	
+		}
+	}
+
+	Page* Fragmento::pageAlloc()
+	{
+        NanoAssert(sizeof(Page) == NJ_PAGE_SIZE);
+		if (!_pageList)
+			pagesGrow(NJ_PAGES);	// try to get more mem
+		Page *page = _pageList;
+		if (page)
+		{
+			_pageList = page->next;
+			debug_only(_stats.freePages--;)
+		}
+		//fprintf(stderr, "Fragmento::pageAlloc %X,  %d free pages of %d\n", (int)page, _stats.freePages, _stats.pages);
+		debug_only( NanoAssert(pageCount()==_stats.freePages); )
+		return page;
+	}
+	
+	void Fragmento::pageFree(Page* page)
+	{ 
+		//fprintf(stderr, "Fragmento::pageFree %X,  %d free pages of %d\n", (int)page, _stats.freePages+1, _stats.pages);
+
+		// link in the page
+		page->next = _pageList;
+		_pageList = page;
+		debug_only(_stats.freePages++;)
+		debug_only( NanoAssert(pageCount()==_stats.freePages); )
+	}
+
+	void Fragmento::pagesGrow(int32_t count)
+	{
+		NanoAssert(!_pageList);
+		MMGC_MEM_TYPE("NanojitFragmentoMem"); 
+		Page* memory = 0;
+		if (NJ_UNLIMITED_GROWTH || _stats.pages < (uint32_t)NJ_PAGES)
+		{
+			// @todo nastiness that needs a fix'n
+			_gcHeap = _core->GetGC()->GetGCHeap();
+			NanoAssert(NJ_PAGE_SIZE<=_gcHeap->kNativePageSize);
+			
+			// convert NJ_PAGES to gc page count 
+			int32_t gcpages = (count*NJ_PAGE_SIZE) / _gcHeap->kNativePageSize;
+			MMGC_MEM_TYPE("NanojitMem"); 
+			memory = (Page*)_gcHeap->Alloc(gcpages);
+			NanoAssert((int*)memory == pageTop(memory));
+			//fprintf(stderr,"head alloc of %d at %x of %d pages using nj page size of %d\n", gcpages, (intptr_t)memory, (intptr_t)_gcHeap->kNativePageSize, NJ_PAGE_SIZE);
+			
+			// can't add memory if its not addressable from all locations
+			for(uint32_t i=0; i<_allocList.size(); i++)
+			{
+				Page* a = _allocList.get(i);
+				int32_t delta = (a < memory) ? (intptr_t)memory+(NJ_PAGE_SIZE*(count+1))-(intptr_t)a : (intptr_t)a+(NJ_PAGE_SIZE*(count+1))-(intptr_t)memory;
+				if ( delta > 16777215 )
+				{
+					// can't use this memory
+					_gcHeap->Free(memory);
+					return;
+				}
+			}
+			_allocList.add(memory);
+
+			Page* page = memory;
+			_pageList = page;
+			_stats.pages += count;
+			debug_only(_stats.freePages += count;)
+			while(--count > 0)
+			{
+				Page *next = page + 1;
+				//fprintf(stderr,"Fragmento::pageGrow adding page %x ; %d\n", (intptr_t)page, count);
+				page->next = next;
+				page = next; 
+			}
+			page->next = 0;
+			debug_only( NanoAssert(pageCount()==_stats.freePages); )
+			//fprintf(stderr,"Fragmento::pageGrow adding page %x ; %d\n", (intptr_t)page, count);
+		}
+	}
+	
+	void Fragmento::clearFrags()
+	{
+		//fprintf(stderr, "Fragmento::clearFrags()\n");
+
+        while (!_frags->isEmpty()) {
+            Fragment *f = _frags->removeLast();
+            f->clear();
+        }
+		
+		// reclaim native pages @todo this is to be moved into tree code.
+		_assm->pageReset();
+
+		verbose_only( enterCounts->clear();)
+		verbose_only( mergeCounts->clear();)
+		verbose_only( _flushes++ );
+	}
+
+	Assembler* Fragmento::assm()
+	{
+		return _assm;
+	}
+
+	AvmCore* Fragmento::core()
+	{
+		return _core;
+	}
+
+    Fragment* Fragmento::getLoop(const avmplus::InterpState &is)
+	{
+		Fragment* f = _frags->get(is.ip);
+		if (!f) {
+			f = newFrag(is);
+			_frags->put(is.ip, f);
+            f->anchor = f;
+			f->kind = LoopTrace;
+			f->mergeCounts = new (_core->gc) BlockHist(_core->gc);
+            verbose_only( addLabel(f, "T", _frags->size()); )
+		}
+		return f;
+	}
+
+#ifdef NJ_VERBOSE
+	void Fragmento::addLabel(Fragment *f, const char *prefix, int id)
+	{
+		char fragname[20];
+		sprintf(fragname,"%s%d", prefix, id);
+		labels->add(f, sizeof(Fragment), 0, fragname);
+	}
+#endif
+
+	Fragment *Fragmento::getMerge(GuardRecord *lr, const avmplus::InterpState &is)
+    {
+		Fragment *anchor = lr->from->anchor;
+		for (Fragment *f = anchor->branches; f != 0; f = f->nextbranch) {
+			if (f->kind == MergeTrace && f->frid == is.ip && f->calldepth == lr->calldepth) {
+				// found existing shared branch on anchor
+				return f;
+			}
+		}
+
+		Fragment *f = newBranch(anchor, is);
+		f->kind = MergeTrace;
+		f->calldepth = lr->calldepth;
+		verbose_only(addLabel(f, "M", ++anchor->mergeid); )
+        return f;
+    }
+
+	Fragment *Fragmento::createBranch(GuardRecord *lr, const avmplus::InterpState &is)
+    {
+		Fragment *from = lr->from;
+        Fragment *f = newBranch(from, is);
+		f->kind = BranchTrace;
+		f->calldepth = lr->calldepth;
+		f->treeBranches = f->anchor->treeBranches;
+		f->anchor->treeBranches = f;
+		verbose_only( labels->add(f, sizeof(Fragment), 0, "-"); );
+        return f;
+    }
+
+#ifdef NJ_VERBOSE
+	uint32_t Fragmento::pageCount()
+	{
+		uint32_t n = 0;
+		for(Page* page=_pageList; page; page = page->next)
+			n++;
+		return n;
+	}
+
+	void Fragmento::dumpFragStats(Fragment *f, int level, int& size,
+		uint64_t &traceDur, uint64_t &interpDur)
+    {
+        avmplus::String *filep = f->file;
+        if (!filep)
+            filep = _core->k_str[avmplus::kstrconst_emptyString];
+        avmplus::StringNullTerminatedUTF8 file(_core->gc, filep);
+        const char *s = file.c_str();
+        const char *t = strrchr(s,'\\');
+        if (!t) t = strrchr(s,'/');
+        if (t) s = t+1;
+
+        char buf[500];
+		int namewidth = 35;
+        sprintf(buf, "%*c%s %.*s:%d", 1+level, ' ', labels->format(f), namewidth, s, f->line);
+
+        int called = f->hits();
+        if (called >= 0)
+            called += f->_called;
+        else
+            called = -(1<<f->blacklistLevel) - called - 1;
+
+        uint32_t main = f->_native - f->_exitNative;
+
+        char cause[200];
+        if (f->_token && strcmp(f->_token,"loop")==0)
+            sprintf(cause,"%s %d", f->_token, f->xjumpCount);
+		else if (f->_token) {
+			if (f->eot_target) {
+				sprintf(cause,"%s %s", f->_token, labels->format(f->eot_target));
+			} else {
+	            strcpy(cause, f->_token);
+			}
+		}
+        else
+            cause[0] = 0;
+
+		FOpcodep ip = f->frid;
+        _assm->outputf("%-*s %7d %6d %6d %6d %4d %9llu %9llu %-12s %s", namewidth, buf,
+            called, f->guardCount, main, f->_native, f->compileNbr, f->traceTicks/1000, f->interpTicks/1000,
+			cause, core()->interp.labels->format(ip));
+
+        size += main;
+		traceDur += f->traceTicks;
+		interpDur += f->interpTicks;
+
+		for (Fragment *x = f->branches; x != 0; x = x->nextbranch)
+			if (x->kind != MergeTrace)
+	            dumpFragStats(x,level+1,size,traceDur,interpDur);
+        for (Fragment *x = f->branches; x != 0; x = x->nextbranch)
+			if (x->kind == MergeTrace)
+	            dumpFragStats(x,level+1,size,traceDur,interpDur);
+
+        if (f->anchor == f && f->branches != 0) {
+            //_assm->outputf("tree size %d ticks %llu",size,dur);
+            _assm->output("");
+        }
+    }
+
+    class DurData { public:
+        DurData(): frag(0), traceDur(0), interpDur(0), size(0) {}
+        DurData(int): frag(0), traceDur(0), interpDur(0), size(0) {}
+        DurData(Fragment* f, uint64_t td, uint64_t id, int32_t s)
+			: frag(f), traceDur(td), interpDur(id), size(s) {}
+        Fragment* frag;
+        uint64_t traceDur;
+        uint64_t interpDur;
+		int32_t size;
+    };
+
+	void Fragmento::dumpRatio(const char *label, BlockHist *hist)
+	{
+		int total=0, unique=0;
+		for (int i = 0, n=hist->size(); i < n; i++) {
+			const void * id = hist->keyAt(i);
+			int c = hist->get(id);
+			if (c > 1) {
+				//_assm->outputf("%d %X", c, id);
+				unique += 1;
+			}
+			else if (c == 1) {
+				unique += 1;
+			}
+			total += c;
+		}
+		_assm->outputf("%s total %d unique %d ratio %.1f%", label, total, unique, double(total)/unique);
+	}
+
+	void Fragmento::dumpStats()
+	{
+		bool vsave = _assm->_verbose;
+		_assm->_verbose = true;
+
+		_assm->output("");
+		dumpRatio("inline", enterCounts);
+		dumpRatio("merges", mergeCounts);
+		_assm->outputf("abc %d il %d (%.1fx) abc+il %d (%.1fx)",
+			_stats.abcsize, _stats.ilsize, (double)_stats.ilsize/_stats.abcsize,
+			_stats.abcsize + _stats.ilsize,
+			double(_stats.abcsize+_stats.ilsize)/_stats.abcsize);
+
+		int32_t count = _frags->size();
+		int32_t pages =  _stats.pages;
+		int32_t free = _stats.freePages;
+		if (!count)
+		{
+			_assm->outputf("No fragments in cache, %d flushes", _flushes);
+    		_assm->_verbose = vsave;
+            return;
+		}
+
+        _assm->outputf("\nFragment statistics for %d entries after %d cache flushes of %d pages (%dKB) where %d used and %d free", 
+            count, _flushes, pages, pages<<NJ_LOG2_PAGE_SIZE>>10, pages-free,free);
+		_assm->outputf("h=loop header, x=exit trace, L=loop");
+		_assm->output("         location                     calls guards   main native  gen   T-trace  T-interp");
+
+		avmplus::SortedMap<uint64_t, DurData, avmplus::LIST_NonGCObjects> durs(_core->gc);
+		uint64_t totaldur=0;
+		uint64_t totaltrace=0;
+		int totalsize=0;
+        for (int32_t i=0; i<count; i++)
+        {
+            Fragment *f = _frags->at(i);
+            int size = 0;
+            uint64_t traceDur=0, interpDur=0;
+            dumpFragStats(f, 0, size, traceDur, interpDur);
+			uint64_t bothDur = traceDur + interpDur;
+			if (bothDur) {
+				totaltrace += traceDur;
+				totaldur += bothDur;
+				totalsize += size;
+				while (durs.containsKey(bothDur)) bothDur++;
+				DurData d(f, traceDur, interpDur, size);
+				durs.put(bothDur, d);
+			}
+        }
+		_assm->outputf("");
+		_assm->outputf("       trace         interp");
+		_assm->outputf("%9lld (%2d%%)  %9lld (%2d%%)",
+			totaltrace/1000, int(100.0*totaltrace/totaldur),
+			(totaldur-totaltrace)/1000, int(100.0*(totaldur-totaltrace)/totaldur));
+		_assm->outputf("");
+		_assm->outputf("trace      ticks            trace           interp           size");
+		for (int32_t i=durs.size()-1; i >= 0; i--) {
+			uint64_t bothDur = durs.keyAt(i);
+			DurData d = durs.get(bothDur);
+			int size = d.size;
+			_assm->outputf("%-4s %9lld (%2d%%)  %9lld (%2d%%)  %9lld (%2d%%)  %6d (%2d%%)", 
+				labels->format(d.frag),
+				bothDur/1000, int(100.0*bothDur/totaldur),
+				d.traceDur/1000, int(100.0*d.traceDur/totaldur),
+				d.interpDur/1000, int(100.0*d.interpDur/totaldur),
+				size, int(100.0*size/totalsize));
+		}
+
+		_assm->_verbose = vsave;
+
+	}
+
+	void Fragmento::countBlock(BlockHist *hist, FOpcodep ip)
+	{
+		int c = hist->count(ip);
+		if (_assm->_verbose)
+			_assm->outputf("++ %s %d", core()->interp.labels->format(ip), c);
+	}
+
+	void Fragmento::countIL(uint32_t il, uint32_t abc)
+	{
+		_stats.ilsize += il;
+		_stats.abcsize += abc;
+	}
+#endif // NJ_VERBOSE
+
+	//
+	// Fragment
+	//
+	Fragment::Fragment(FragID id) : frid(id)
+	{
+        // Fragment is a gc object which is zero'd by the GC, no need to clear fields
+    }
+
+	void Fragment::addLink(GuardRecord* lnk)
+	{
+		//fprintf(stderr,"addLink %x from %X target %X\n",(int)lnk,(int)lnk->from,(int)lnk->target);
+		lnk->next = _links;
+		_links = lnk;
+	}
+
+	void Fragment::removeLink(GuardRecord* lnk)
+	{
+		GuardRecord*  lr = _links;
+		GuardRecord** lrp = &_links;
+		while(lr)
+		{
+			if (lr == lnk)
+			{
+				*lrp = lr->next;
+				lnk->next = 0;
+				break;
+			}
+			lrp = &(lr->next);
+			lr = lr->next;
+		}
+	}
+	
+	void Fragment::link(Assembler* assm)
+	{
+		// patch all jumps into this fragment
+		GuardRecord* lr = _links;
+		while (lr)
+		{
+			GuardRecord* next = lr->next;
+			Fragment* from = lr->target;
+			if (from && from->fragEntry) assm->patch(lr);
+			lr = next;
+		}
+
+		// and then patch all jumps leading out
+		lr = outbound;
+		while(lr)
+		{
+			GuardRecord* next = lr->outgoing;
+			Fragment* targ = lr->target;
+			if (targ && targ->fragEntry) assm->patch(lr);
+			lr = next;
+		}
+	}
+
+	void Fragment::unlink(Assembler* assm)
+	{
+		// remove our guards from others' in-bound list, so they don't patch to us 
+		GuardRecord* lr = outbound;
+		while (lr)
+		{
+			GuardRecord* next = lr->outgoing;
+			Fragment* targ = lr->target;
+			if (targ) targ->removeLink(lr);
+			verbose_only( lr->gid = 0; )
+			lr = next;
+		}	
+
+		// then unpatch all jumps into this fragment
+		lr = _links;
+		while (lr)
+		{
+			GuardRecord* next = lr->next;
+			Fragment* from = lr->target;
+			if (from && from->fragEntry) assm->unpatch(lr);
+			verbose_only( lr->gid = 0; )
+			lr = next;
+		}
+	}
+
+	bool Fragment::hasOnlyTreeLinks()
+	{
+		// check that all incoming links are on the same tree
+		bool isIt = true;
+		GuardRecord *lr = _links;
+		while (lr)
+		{
+			GuardRecord *next = lr->next;
+			NanoAssert(lr->target == this);  // def'n of GuardRecord
+			if (lr->from->anchor != anchor)
+			{
+				isIt = false;
+				break;
+			}
+			lr = next;
+		}	
+		return isIt;		
+	}
+
+	void Fragment::removeIntraLinks()
+	{
+		// should only be called on root of tree
+		NanoAssert(this == anchor);
+		GuardRecord *lr = _links;
+		while (lr)
+		{
+			GuardRecord *next = lr->next;
+			NanoAssert(lr->target == this);  // def'n of GuardRecord
+			if (lr->from->anchor == anchor && lr->from->kind != MergeTrace)
+				removeLink(lr);
+			lr = next;
+		}	
+	}
+	
+	void Fragment::unlinkBranches(Assembler* /*assm*/)
+	{
+		// should only be called on root of tree
+		NanoAssert(this == anchor);
+		Fragment* frag = treeBranches;
+		while(frag)
+		{
+			NanoAssert(frag->kind == BranchTrace && frag->hasOnlyTreeLinks());
+			frag->_links = 0;
+			frag->fragEntry = 0;
+			frag = frag->treeBranches;
+		}
+	}
+
+	void Fragment::linkBranches(Assembler* assm)
+	{
+		// should only be called on root of tree
+		NanoAssert(this == anchor);
+		Fragment* frag = treeBranches;
+		while(frag)
+		{
+			if (frag->fragEntry) frag->link(assm);
+			frag = frag->treeBranches;
+		}
+	}
+	
+    void Fragment::blacklist()
+    {
+        blacklistLevel++;
+        _hits = -(1<<blacklistLevel);
+    }
+
+    Fragment *Fragmento::newFrag(const avmplus::InterpState &interp)
+    {
+        FragID frid = interp.ip;
+		GC *gc = _core->gc;
+        Fragment *f = new (gc) Fragment(frid);
+		f->blacklistLevel = 5;
+#ifdef AVMPLUS_VERBOSE
+        if (interp.f->filename) {
+            f->line = interp.f->linenum;
+            f->file = interp.f->filename;
+        }
+#endif
+        return f;
+    }
+
+	Fragment *Fragmento::newBranch(Fragment *from, const avmplus::InterpState &interp)
+	{
+		Fragment *f = newFrag(interp);
+		f->anchor = from->anchor;
+		f->mergeCounts = from->anchor->mergeCounts;
+        f->xjumpCount = from->xjumpCount;
+		/*// prepend
+		f->nextbranch = from->branches;
+		from->branches = f;*/
+		// append
+		if (!from->branches) {
+			from->branches = f;
+		} else {
+			Fragment *p = from->branches;
+			while (p->nextbranch != 0)
+				p = p->nextbranch;
+			p->nextbranch = f;
+		}
+		return f;
+	}
+
+    void Fragment::clear()
+    {
+        if (lirbuf) {
+            lirbuf->clear();
+            lirbuf = 0;
+        }
+		lastIns = 0;
+    }
+
+    void Fragment::removeExit(Fragment *target)
+    {
+        if (target && target == branches) {
+            branches = branches->nextbranch;
+            // @todo this doesn't seem right : target->clear();
+        } else {
+            for (Fragment *x = branches; x && x->nextbranch; x = x->nextbranch) {
+                if (target == x->nextbranch) {
+                    x->nextbranch = x->nextbranch->nextbranch;
+                    // @todo this doesn't seem righ : target->clear();
+                    return;
+                }
+            }
+        }
+    }
+
+	#endif /* FEATURE_NANOJIT */
+}
new file mode 100755
--- /dev/null
+++ b/js/src/nanojit/LIR.cpp
@@ -0,0 +1,2058 @@
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is [Open Source Virtual Machine].
+ *
+ * The Initial Developer of the Original Code is
+ * Adobe System Incorporated.
+ * Portions created by the Initial Developer are Copyright (C) 2004-2007
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Contributor(s):
+ *   Adobe AS3 Team
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+
+#include "nanojit.h"
+#include <stdio.h>
+
+namespace nanojit
+{
+    using namespace avmplus;
+	#ifdef FEATURE_NANOJIT
+
+	// @todo -- a lookup table would be better here
+	uint32_t FASTCALL operandCount(LOpcode op)
+	{
+		switch(op)
+		{
+			case LIR_trace:
+			case LIR_skip:
+			case LIR_tramp:
+			case LIR_loop:
+			case LIR_x:
+			case LIR_short:
+			case LIR_int:
+			case LIR_quad:
+			case LIR_call:
+			case LIR_fcall:
+            case LIR_param:
+                return 0;
+
+            case LIR_callh:
+			case LIR_arg:
+			case LIR_ref:
+			case LIR_farg:
+			case LIR_not:
+			case LIR_xt:
+			case LIR_xf:
+			case LIR_qlo:
+			case LIR_qhi:
+			case LIR_neg:
+			case LIR_fneg:
+			case LIR_i2f:
+			case LIR_u2f:
+                return 1;
+				
+			default:
+                return 2;
+		}
+	}				
+
+	// LIR verbose specific
+	#ifdef NJ_VERBOSE
+
+	void Lir::initEngine()
+	{
+		debug_only( { LIns l; l.initOpcode(LIR_last); NanoAssert(l.opcode()>0); } );
+		NanoAssert( LIR_last < (1<<8)-1 );  // only 8 bits or the opcode 
+		verbose_only( initVerboseStructures() );
+	}
+
+		const char* Lir::_lirNames[LIR_last];
+
+		void Lir::initVerboseStructures()
+		{
+			memset(_lirNames, 0, sizeof(_lirNames));
+
+			_lirNames[LIR_short] = "short";
+			_lirNames[LIR_int] =  "int";
+			_lirNames[LIR_quad]  =  "quad";
+			_lirNames[LIR_trace] =  "trace";
+			_lirNames[LIR_skip]  =  "skip";
+			_lirNames[LIR_tramp] =  "tramp";
+			_lirNames[LIR_loop] =	"loop";
+			_lirNames[LIR_x]	=	"x";
+			_lirNames[LIR_xt]	=	"xt";
+			_lirNames[LIR_xf]	=	"xf";
+			_lirNames[LIR_eq]   =   "eq";
+			_lirNames[LIR_lt]   =   "lt";
+			_lirNames[LIR_le]   =   "le";
+			_lirNames[LIR_gt]   =   "gt";
+			_lirNames[LIR_ge]   =   "ge";
+			_lirNames[LIR_ult]   =  "ult";
+			_lirNames[LIR_ule]   =  "ule";
+			_lirNames[LIR_ugt]   =  "ugt";
+			_lirNames[LIR_uge]   =  "uge";
+			_lirNames[LIR_neg] =    "neg";
+			_lirNames[LIR_add] =	"add";
+			_lirNames[LIR_sub] =	"sub";
+			_lirNames[LIR_mul] =	"mul";
+			_lirNames[LIR_and] =	"and";
+			_lirNames[LIR_or] =		"or";
+			_lirNames[LIR_xor] =	"xor";
+			_lirNames[LIR_not] =	"not";
+			_lirNames[LIR_lsh] =	"lsh";
+			_lirNames[LIR_rsh] =	"rsh";
+			_lirNames[LIR_ush] =	"ush";
+			_lirNames[LIR_fneg] =   "fneg";
+			_lirNames[LIR_fadd] =	"fadd";
+			_lirNames[LIR_fsub] =	"fsub";
+			_lirNames[LIR_fmul] =	"fmul";
+			_lirNames[LIR_fdiv] =	"fdiv";
+			_lirNames[LIR_i2f] =	"i2f";
+			_lirNames[LIR_u2f] =	"u2f";
+			_lirNames[LIR_ld] =		"ld";
+			_lirNames[LIR_ldc] =	"ldc";
+			_lirNames[LIR_ldcb] =	"ldcb";
+			_lirNames[LIR_cmov] =	"cmov";
+            _lirNames[LIR_2] =      "";
+			_lirNames[LIR_ldq] =	"ldq";
+			_lirNames[LIR_st] =		"st";
+			_lirNames[LIR_sti] =	"sti";
+			_lirNames[LIR_arg] =	"arg";
+			_lirNames[LIR_param] =	"param";
+			_lirNames[LIR_call] =	"call";
+			_lirNames[LIR_callh] =	"callh";
+			_lirNames[LIR_qjoin] =	"qjoin";
+			_lirNames[LIR_qlo] =	"qlo";
+			_lirNames[LIR_qhi] =	"qhi";
+			_lirNames[LIR_ref] =	"ref";
+			_lirNames[LIR_last]=	"???";
+			_lirNames[LIR_farg] =	"farg";
+			_lirNames[LIR_fcall] =	"fcall";
+		}
+	#endif /* NANOJIT_VEBROSE */
+	
+	// implementation
+
+#ifdef NJ_PROFILE
+	// @todo fixup move to nanojit.h
+	#undef counter_value
+	#define counter_value(x)		x
+#endif /* NJ_PROFILE */
+
+	//static int32_t buffer_count = 0;
+	
+	// LCompressedBuffer
+	LirBuffer::LirBuffer(Fragmento* frago, const CallInfo* functions)
+		: _frago(frago), _functions(functions)
+	{
+		_start = 0;
+		clear();
+		_start = pageAlloc();
+		if (_start)
+		{
+			verbose_only(_start->seq = 0;)
+			_unused = &_start->lir[0];
+		}
+		//buffer_count++;
+		//fprintf(stderr, "LirBuffer %x start %x count %d\n", (int)this, (int)_start, buffer_count);
+	}
+
+	LirBuffer::~LirBuffer()
+	{
+		//buffer_count--;
+		//fprintf(stderr, "~LirBuffer %x count %d\n", (int)this, buffer_count);
+		clear();
+		_frago = 0;
+	}
+	
+	void LirBuffer::clear()
+	{
+		// free all the memory and clear the stats
+		debug_only( if (_start) validate();)
+		while( _start )
+		{
+			Page *next = _start->next;
+			_frago->pageFree( _start );
+			_start = next;
+			_stats.pages--;
+		}
+		NanoAssert(_stats.pages == 0);
+		_unused = 0;
+		_stats.lir = 0;
+		_noMem = 0;
+	}
+
+	#ifdef _DEBUG
+	void LirBuffer::validate() const
+	{
+		uint32_t count = 0;
+		Page *last = 0;
+		Page *page = _start;
+		while(page)
+		{
+			last = page;
+			page = page->next;
+			count++;
+		}
+		NanoAssert(count == _stats.pages);
+		NanoAssert(_noMem || _unused->page()->next == 0);
+		NanoAssert(_noMem || samepage(last,_unused));
+	}
+	#endif 
+	
+	Page* LirBuffer::pageAlloc()
+	{
+		Page* page = _frago->pageAlloc();
+		if (page)
+		{
+			page->next = 0;	// end of list marker for new page
+			_stats.pages++;
+		}
+		else
+		{
+			_noMem = 1;
+		}
+		return page;
+	}
+	
+	uint32_t LirBuffer::size()
+	{
+		debug_only( validate(); )
+		return _stats.lir;
+	}
+	
+	LInsp LirBuffer::next()
+	{
+		debug_only( validate(); )
+		return _unused;
+	}
+
+	bool LirBuffer::addPage()
+	{
+		LInsp last = _unused;
+		// we need to pull in a new page and stamp the old one with a link to it
+        Page *lastPage = last->page();
+		Page *page = pageAlloc();
+		if (page)
+		{
+			lastPage->next = page;  // forward link to next page 
+			_unused = &page->lir[0];
+            verbose_only(page->seq = lastPage->seq+1;)
+			//fprintf(stderr, "Fragmento::ensureRoom stamping %x with %x; start %x unused %x\n", (int)pageBottom(last), (int)page, (int)_start, (int)_unused);
+			debug_only( validate(); )
+			return true;
+		} 
+		else {
+			// mem failure, rewind pointer to top of page so that subsequent instruction works
+			verbose_only(if (_frago->assm()->_verbose) _frago->assm()->outputf("page alloc failed");)
+			_unused = &lastPage->lir[0];
+		}
+		return false;
+	}
+	
+	bool LirBufWriter::ensureRoom(uint32_t count)
+	{
+		LInsp last = _buf->next();
+		if (!samepage(last,last+count)
+			&& _buf->addPage()) 
+		{
+			// link LIR stream back to prior instruction (careful insFar relies on _unused...)
+			LInsp next = _buf->next();
+			insFar(LIR_skip, last-1-next);
+		}
+		return !_buf->outOmem();
+	}
+
+	LInsp LirBuffer::commit(uint32_t count)
+	{
+		debug_only(validate();)
+		NanoAssertMsg( samepage(_unused, _unused+count), "You need to call ensureRoom first!" );
+		return _unused += count;
+	}
+	
+	uint32_t LIns::reference(LIns *r)
+	{
+		int delta = this-r-1;
+		NanoAssert(isU8(delta));
+		return delta;
+	}
+
+	LInsp LirBufWriter::ensureReferenceable(LInsp i, int32_t addedDistance)
+	{
+		if (!i) return 0;
+		NanoAssert(!i->isop(LIR_tramp));
+		LInsp next = _buf->next();
+		LInsp from = next + addedDistance;
+		if ( canReference(from,i) )
+			return i;
+
+		// need a trampoline to get to i
+		LInsp tramp = insFar(LIR_tramp, i-next);
+		NanoAssert( tramp+tramp->imm24() == i );
+		return tramp;
+	}
+	
+	LInsp LirBufWriter::insStore(LInsp o1, LInsp o2, LInsp o3)
+	{
+		LOpcode op = LIR_st;
+		NanoAssert(o1 && o2 && o3);
+		ensureRoom(4);
+		LInsp r1 = ensureReferenceable(o1,3);
+		LInsp r2 = ensureReferenceable(o2,2);
+		LInsp r3 = ensureReferenceable(o3,1);
+
+		LInsp l = _buf->next();
+		l->initOpcode(op);
+		l->setOprnd1(r1);
+		l->setOprnd2(r2);
+		l->setOprnd3(r3);
+
+		_buf->commit(1);
+		_buf->_stats.lir++;
+		return l;
+	}
+	
+	LInsp LirBufWriter::insStorei(LInsp o1, LInsp o2, int32_t d)
+	{
+		LOpcode op = LIR_sti;
+		NanoAssert(o1 && o2 && isS8(d));
+		ensureRoom(3);
+		LInsp r1 = ensureReferenceable(o1,2);
+		LInsp r2 = ensureReferenceable(o2,1);
+
+		LInsp l = _buf->next();
+		l->initOpcode(op);
+		l->setOprnd1(r1);
+		l->setOprnd2(r2);
+		l->setDisp(int8_t(d));
+
+		_buf->commit(1);
+		_buf->_stats.lir++;
+		return l;
+	}
+
+	LInsp LirBufWriter::ins0(LOpcode op)
+	{
+		if (!ensureRoom(1)) return 0;
+		LInsp l = _buf->next();
+		l->initOpcode(op);
+		_buf->commit(1);
+		_buf->_stats.lir++;
+		return l;
+	}
+	
+	LInsp LirBufWriter::ins1(LOpcode op, LInsp o1)
+	{
+		ensureRoom(2);
+		LInsp r1 = ensureReferenceable(o1,1);
+
+		LInsp l = _buf->next();
+		l->initOpcode(op);
+		if (r1)
+			l->setOprnd1(r1);
+
+		_buf->commit(1);
+		_buf->_stats.lir++;
+		return l;
+	}
+	
+	LInsp LirBufWriter::ins2(LOpcode op, LInsp o1, LInsp o2)
+	{
+		ensureRoom(3);
+		LInsp r1 = ensureReferenceable(o1,2);
+		LInsp r2 = ensureReferenceable(o2,1);
+
+		LInsp l = _buf->next();
+		l->initOpcode(op);
+		if (r1)
+			l->setOprnd1(r1);
+		if (r2)
+			l->setOprnd2(r2);
+
+		_buf->commit(1);
+		_buf->_stats.lir++;
+		return l;
+	}
+
+	LInsp LirBufWriter::insLoad(LOpcode op, LInsp base, LInsp d)
+	{
+		return ins2(op,base,d);
+	}
+
+	LInsp LirBufWriter::insGuard(LOpcode op, LInsp c, SideExit *x)
+	{
+		LInsp data = skip(sizeof(SideExit));
+		*((SideExit*)data->payload()) = *x;
+		return ins2(op, c, data);
+	}
+
+	LInsp LirBufWriter::insImm8(LOpcode op, int32_t a, int32_t b)
+	{
+		ensureRoom(1);
+		LInsp l = _buf->next();
+		l->initOpcode(op);
+		l->setimm8(a,b);
+
+		_buf->commit(1);
+		_buf->_stats.lir++;
+		return l;
+	}
+	
+	LInsp LirBufWriter::insFar(LOpcode op, int32_t imm)
+	{
+		ensureRoom(1);
+
+		LInsp l = _buf->next();
+		l->initOpcode(op);
+		l->setimm24(imm);
+
+		_buf->commit(1);
+		return l;
+	}
+	
+	LInsp LirBufWriter::insImm(int32_t imm)
+	{
+		if (isS16(imm)) {
+			ensureRoom(1);
+			LInsp l = _buf->next();
+			l->initOpcode(LIR_short);
+			l->setimm16(imm);
+			_buf->commit(1);
+			_buf->_stats.lir++;
+			return l;
+		} else {
+			ensureRoom(2);
+			int32_t* l = (int32_t*)_buf->next();
+			*l = imm;
+			_buf->commit(1);
+			return ins0(LIR_int);
+		}
+	}
+	
+	LInsp LirBufWriter::insImmq(uint64_t imm)
+	{
+		ensureRoom(3);
+		int32_t* l = (int32_t*)_buf->next();
+		l[0] = int32_t(imm);
+		l[1] = int32_t(imm>>32);
+		_buf->commit(2);	
+		return ins0(LIR_quad);
+	}
+
+	LInsp LirBufWriter::skip(size_t size)
+	{
+        const uint32_t n = (size+sizeof(LIns)-1)/sizeof(LIns);
+		ensureRoom(n+1);
+		LInsp i = _buf->next();
+		_buf->commit(n);
+		return insFar(LIR_skip, i-1-_buf->next());
+	}
+
+	LInsp LirReader::read()	
+	{
+		LInsp cur = _i;
+		if (!cur)
+			return 0;
+		LIns* i = cur;
+		LOpcode iop = i->opcode();
+		do
+		{
+			switch (iop)
+			{					
+				default:
+					i--;
+					break;
+
+				case LIR_skip:
+					NanoAssert(i->imm24() != 0);
+					i += i->imm24();
+					break;
+		
+				case LIR_int:
+					NanoAssert(samepage(i, i-2));
+					i -= 2;
+					break;
+
+				case LIR_quad:
+					NanoAssert(samepage(i,i-3));
+					i -= 3;
+					break;
+
+				case LIR_trace:
+					_i = 0;  // start of trace
+					return cur;
+			}
+			iop = i->opcode();
+		}
+		while (is_trace_skip_tramp(iop)||iop==LIR_2);
+		_i = i;
+		return cur;
+	}
+
+	bool FASTCALL isCmp(LOpcode c) {
+		return c >= LIR_eq && c <= LIR_uge;
+	}
+
+	bool LIns::isCmp() const {
+		return nanojit::isCmp(u.code);
+	}
+
+	bool LIns::isCall() const
+	{
+		return (u.code&~LIR64) == LIR_call;
+	}
+
+	bool LIns::isGuard() const
+	{
+		return u.code==LIR_x || u.code==LIR_xf || u.code==LIR_xt;
+	}
+
+    bool LIns::isStore() const
+    {
+        return u.code == LIR_st || u.code == LIR_sti;
+    }
+
+    bool LIns::isLoad() const
+    {
+        return u.code == LIR_ldq || u.code == LIR_ld || u.code == LIR_ldc;
+    }
+
+	bool LIns::isconst() const
+	{
+		return (opcode()&~1) == LIR_short;
+	}
+
+	bool LIns::isconstval(int32_t val) const
+	{
+		return isconst() && constval()==val;
+	}
+
+	bool LIns::isconstq() const
+	{	
+		return isop(LIR_quad);
+	}
+
+	bool FASTCALL isCse(LOpcode op) {
+		op = LOpcode(op & ~LIR64);
+		return op >= LIR_cmov && op <= LIR_uge;
+	}
+
+    bool LIns::isCse(const CallInfo *functions) const
+    { 
+		return nanojit::isCse(u.code) || isCall() && functions[imm8()]._cse;
+    }
+
+	void LIns::setimm8(int32_t a, int32_t b)
+	{
+		NanoAssert(isS8(a) && isS8(b));
+		c.imm8a = int8_t(a);
+		c.imm8b = int8_t(b);
+	}
+
+	void LIns::setimm16(int32_t x)
+	{
+		NanoAssert(isS16(x));
+		i.imm16 = int16_t(x);
+	}
+
+	void LIns::setimm24(int32_t x)
+	{
+		t.imm24 = x;
+	}
+
+	void LIns::setresv(uint32_t resv)
+	{
+		NanoAssert(isU8(resv));
+		g.resv = resv;
+	}
+
+	void LIns::initOpcode(LOpcode op)
+	{
+		t.code = op;
+		t.imm24 = 0;
+	}
+
+	void LIns::setOprnd1(LInsp r)
+	{
+		u.oprnd_1 = reference(r);
+	}
+
+	void LIns::setOprnd2(LInsp r)
+	{
+		u.oprnd_2 = reference(r);
+	}
+
+	void LIns::setOprnd3(LInsp r)
+	{
+		u.oprnd_3 = reference(r);
+	}
+
+    void LIns::setDisp(int8_t d)
+    {
+        sti.disp = d;
+    }
+
+	LInsp	LIns::oprnd1() const	
+	{ 
+		LInsp i = (LInsp) this - u.oprnd_1 - 1;
+		if (i->isop(LIR_tramp)) 
+		{
+			i += i->imm24();
+			if (i->isop(LIR_tramp))
+				i += i->imm24();
+		}
+		return i;
+	}
+	
+	LInsp	LIns::oprnd2() const
+	{ 
+		LInsp i = (LInsp) this - u.oprnd_2 - 1;
+		if (i->isop(LIR_tramp)) 
+		{
+			i += i->imm24();
+			if (i->isop(LIR_tramp))
+				i += i->imm24();
+		}
+		return i;
+	}
+
+	LInsp	LIns::oprnd3() const
+	{ 
+		LInsp i = (LInsp) this - u.oprnd_3 - 1;
+		if (i->isop(LIR_tramp)) 
+		{
+			i += i->imm24();
+			if (i->isop(LIR_tramp))
+				i += i->imm24();
+		}
+		return i;
+	}
+
+    void *LIns::payload() const
+    {
+        NanoAssert(opcode() == LIR_skip);
+        return (void*) (this+imm24()+1);
+    }
+
+    LIns* LirWriter::ins2i(LOpcode v, LIns* oprnd1, int32_t imm)
+    {
+        return ins2(v, oprnd1, insImm(imm));
+    }
+
+    bool insIsS16(LInsp i)
+    {
+        if (i->isconst()) {
+            int c = i->constval();
+            return isS16(c);
+        }
+        if (i->isop(LIR_cmov)) {
+            LInsp vals = i->oprnd2();
+            return insIsS16(vals->oprnd1()) && insIsS16(vals->oprnd2());
+        }
+        if (i->isCmp())
+            return true;
+        // many other possibilities too.
+        return false;
+    }
+
+	LIns* ExprFilter::ins1(LOpcode v, LIns* i)
+	{
+		if (v == LIR_qlo) {
+			if (i->isconstq())
+				return insImm(int32_t(i->constvalq()));
+			if (i->isop(LIR_qjoin))
+				return i->oprnd1();
+		}
+		else if (v == LIR_qhi) {
+			if (i->isconstq())
+				return insImm(int32_t(i->constvalq()>>32));
+			if (i->isop(LIR_qjoin))
+				return i->oprnd2();
+		}
+		else if (v == i->opcode() && (v == LIR_not || v == LIR_neg || v == LIR_fneg)) {
+			return i->oprnd1();
+		}
+
+		// todo
+		// -(a-b) = b-a
+
+		return out->ins1(v, i);
+	}
+
+	LIns* ExprFilter::ins2(LOpcode v, LIns* oprnd1, LIns* oprnd2)
+	{
+		NanoAssert(oprnd1 && oprnd2);
+		if (v == LIR_cmov) {
+			if (oprnd2->oprnd1() == oprnd2->oprnd2()) {
+				// c ? a : a => a
+				return oprnd2->oprnd1();
+			}
+		}
+		if (oprnd1 == oprnd2)
+		{
+			if (v == LIR_xor || v == LIR_sub ||
+				!oprnd1->isQuad() && (v == LIR_ult || v == LIR_ugt || v == LIR_gt || v == LIR_lt))
+				return insImm(0);
+			if (v == LIR_or || v == LIR_and)
+				return oprnd1;
+			if (!oprnd1->isQuad() && (v == LIR_le || v == LIR_ule || v == LIR_ge || v == LIR_uge)) {
+				// x <= x == 1; x >= x == 1
+				return insImm(1);
+			}
+		}
+		if (oprnd1->isconst() && oprnd2->isconst())
+		{
+			int c1 = oprnd1->constval();
+			int c2 = oprnd2->constval();
+			if (v == LIR_qjoin) {
+				uint64_t q = c1 | uint64_t(c2)<<32;
+				return insImmq(q);
+			}
+			if (v == LIR_eq)
+				return insImm(c1 == c2);
+			if (v == LIR_lt)
+				return insImm(c1 < c2);
+			if (v == LIR_gt)
+				return insImm(c1 > c2);
+			if (v == LIR_le)
+				return insImm(c1 <= c2);
+			if (v == LIR_ge)
+				return insImm(c1 >= c2);
+			if (v == LIR_ult)
+				return insImm(uint32_t(c1) < uint32_t(c2));
+			if (v == LIR_ugt)
+				return insImm(uint32_t(c1) > uint32_t(c2));
+			if (v == LIR_ule)
+				return insImm(uint32_t(c1) <= uint32_t(c2));
+			if (v == LIR_uge)
+				return insImm(uint32_t(c1) >= uint32_t(c2));
+			if (v == LIR_rsh)
+				return insImm(int32_t(c1) >> int32_t(c2));
+			if (v == LIR_lsh)
+				return insImm(int32_t(c1) << int32_t(c2));
+			if (v == LIR_ush)
+				return insImm(uint32_t(c1) >> int32_t(c2));
+		}
+		else if (oprnd1->isconstq() && oprnd2->isconstq())
+		{
+			double c1 = oprnd1->constvalf();
+			double c2 = oprnd1->constvalf();
+			if (v == LIR_eq)
+				return insImm(c1 == c2);
+			if (v == LIR_lt)
+				return insImm(c1 < c2);
+			if (v == LIR_gt)
+				return insImm(c1 > c2);
+			if (v == LIR_le)
+				return insImm(c1 <= c2);
+			if (v == LIR_ge)
+				return insImm(c1 >= c2);
+		}
+		else if (oprnd1->isconst() && !oprnd2->isconst())
+		{
+			if (v == LIR_add || v == LIR_mul ||
+				v == LIR_fadd || v == LIR_fmul ||
+				v == LIR_xor || v == LIR_or || v == LIR_and ||
+				v == LIR_eq) {
+				// move const to rhs
+				LIns* t = oprnd2;
+				oprnd2 = oprnd1;
+				oprnd1 = t;
+			}
+			else if (v >= LIR_lt && v <= LIR_uge && !oprnd2->isQuad()) {
+				// move const to rhs, swap the operator
+				LIns *t = oprnd2;
+				oprnd2 = oprnd1;
+				oprnd1 = t;
+				v = LOpcode(v^1);
+			}
+			else if (v == LIR_cmov) {
+				// const ? x : y => return x or y depending on const
+				return oprnd1->constval() ? oprnd2->oprnd1() : oprnd2->oprnd2();
+			}
+		}
+
+		if (oprnd2->isconst())
+		{
+			int c = oprnd2->constval();
+			if (v == LIR_add && oprnd1->isop(LIR_add) && oprnd1->oprnd2()->isconst()) {
+				// add(add(x,c1),c2) => add(x,c1+c2)
+				c += oprnd1->oprnd2()->constval();
+				oprnd2 = insImm(c);
+				oprnd1 = oprnd1->oprnd1();
+			}
+			else if (v == LIR_sub && oprnd1->isop(LIR_add) && oprnd1->oprnd2()->isconst()) {
+				// sub(add(x,c1),c2) => add(x,c1-c2)
+				c = oprnd1->oprnd2()->constval() - c;
+				oprnd2 = insImm(c);
+				oprnd1 = oprnd1->oprnd1();
+				v = LIR_add;
+			}
+			else if (v == LIR_rsh && c == 16 && oprnd1->isop(LIR_lsh) &&
+					 oprnd1->oprnd2()->isconstval(16)) {
+				if (insIsS16(oprnd1->oprnd1())) {
+					// rsh(lhs(x,16),16) == x, if x is S16
+					return oprnd1->oprnd1();
+				}
+			}
+			else if (v == LIR_ult) {
+				if (oprnd1->isop(LIR_cmov)) {
+					LInsp a = oprnd1->oprnd2()->oprnd1();
+					LInsp b = oprnd1->oprnd2()->oprnd2();
+					if (a->isconst() && b->isconst()) {
+						bool a_lt = uint32_t(a->constval()) < uint32_t(oprnd2->constval());
+						bool b_lt = uint32_t(b->constval()) < uint32_t(oprnd2->constval());
+						if (a_lt == b_lt)
+							return insImm(a_lt);
+					}
+				}
+			}
+
+			if (c == 0)
+			{
+				if (v == LIR_add || v == LIR_or || v == LIR_xor ||
+					v == LIR_sub || v == LIR_lsh || v == LIR_rsh || v == LIR_ush)
+					return oprnd1;
+				else if (v == LIR_and || v == LIR_mul)
+					return oprnd2;
+				else if (v == LIR_eq && oprnd1->isop(LIR_or) && 
+					oprnd1->oprnd2()->isconst() &&
+					oprnd1->oprnd2()->constval() != 0) {
+					// (x or c) != 0 if c != 0
+					return insImm(0);
+				}
+			}
+			else if (c == -1 || c == 1 && oprnd1->isCmp()) {
+				if (v == LIR_or) {
+					// x | -1 = -1, cmp | 1 = 1
+					return oprnd2;
+				}
+				else if (v == LIR_and) {
+					// x & -1 = x, cmp & 1 = cmp
+					return oprnd1;
+				}
+			}
+		}
+
+		LInsp i;
+		if (v == LIR_qjoin && oprnd1->isop(LIR_qlo) && oprnd2->isop(LIR_qhi) 
+			&& (i = oprnd1->oprnd1()) == oprnd1->oprnd1()) {
+			// qjoin(qlo(x),qhi(x)) == x
+			return i;
+		}
+
+		return out->ins2(v, oprnd1, oprnd2);
+	}
+
+	LIns* ExprFilter::insGuard(LOpcode v, LInsp c, SideExit *x)
+	{
+		if (v != LIR_x) {
+			if (c->isconst()) {
+				if (v == LIR_xt && !c->constval() || v == LIR_xf && c->constval()) {
+					return 0; // no guard needed
+				}
+				else {
+					// need a way to EOT now, since this is trace end.
+					return out->insGuard(LIR_x, 0, x);
+				}
+			}
+			else {
+				while (c->isop(LIR_eq) && c->oprnd1()->isCmp() && 
+					c->oprnd2()->isconstval(0)) {
+				    // xt(eq(cmp,0)) => xf(cmp)   or   xf(eq(cmp,0)) => xt(cmp)
+				    v = LOpcode(v^1);
+				    c = c->oprnd1();
+				}
+			}
+		}
+		return out->insGuard(v, c, x);
+	}
+
+    LIns* LirWriter::insLoadi(LIns *base, int disp) 
+    { 
+        return insLoad(LIR_ld,base,disp);
+    }
+
+	LIns* LirWriter::insLoad(LOpcode op, LIns *base, int disp)
+	{
+		return insLoad(op, base, insImm(disp));
+	}
+
+	LIns* LirWriter::ins_eq0(LIns* oprnd1)
+	{
+		return ins2i(LIR_eq, oprnd1, 0);
+	}
+
+	LIns* LirWriter::qjoin(LInsp lo, LInsp hi)
+	{
+		return ins2(LIR_qjoin, lo, hi);
+	}
+
+	LIns* LirWriter::ins_choose(LIns* cond, LIns* iftrue, LIns* iffalse, bool hasConditionalMove)
+	{
+		// if not a conditional, make it implicitly an ==0 test (then flop results)
+		if (!cond->isCmp())
+		{
+			cond = ins_eq0(cond);
+			LInsp tmp = iftrue;
+			iftrue = iffalse;
+			iffalse = tmp;
+		}
+
+		if (hasConditionalMove)
+		{
+			return ins2(LIR_cmov, cond, ins2(LIR_2, iftrue, iffalse));
+		}
+
+		// @todo -- it might be better to use a short conditional branch rather than
+		// the bit-twiddling on systems that don't provide a conditional move instruction.
+		LInsp ncond = ins1(LIR_neg, cond); // cond ? -1 : 0
+		return ins2(LIR_or, 
+					ins2(LIR_and, iftrue, ncond), 
+					ins2(LIR_and, iffalse, ins1(LIR_not, ncond)));
+	}
+
+    LIns* LirBufWriter::insCall(int32_t fid, LInsp args[])
+	{
+		static const LOpcode k_argmap[] = { LIR_farg, LIR_arg, LIR_ref };
+		static const LOpcode k_callmap[] = { LIR_call, LIR_fcall, LIR_call, LIR_callh };
+
+		const CallInfo& ci = _functions[fid];
+		uint32_t argt = ci._argtypes;
+		int32_t argc = ci.count_args();
+		const uint32_t ret = argt & 3;
+		LOpcode op = k_callmap[ret];
+		//printf("   ret is type %d %s\n", ret, Lir::_lirNames[op]);
+
+#ifdef NJ_SOFTFLOAT
+		if (op == LIR_fcall)
+			op = LIR_callh;
+		LInsp args2[5*2]; // arm could require 2 args per double
+		int32_t j = 0;
+		uint32_t argt2 = argt&3; // copy of return type
+		for (int32_t i = 0; i < argc; i++) {
+			argt >>= 2;
+			uint32_t a = argt&3;
+			if (a == ARGSIZE_F) {
+				LInsp q = args[i];
+				args2[j++] = ins1(LIR_qhi, q);
+				argt2 |= ARGSIZE_LO << (j*2);
+				args2[j++] = ins1(LIR_qlo, q);
+				argt2 |= ARGSIZE_LO << (j*2);
+			} else {
+				args2[j++] = args[i];
+				argt2 |= a << (j*2);
+			}
+		}
+		args = args2;
+		argt = argt2;
+		argc = j;
+#endif
+
+		for (int32_t i = 0; i < argc; i++) {
+			argt >>= 2;
+			AvmAssert((argt&3)!=0);
+			ins1(k_argmap[(argt&3)-1], args[i]);
+		}
+
+		return insImm8(op==LIR_callh ? LIR_call : op, fid, argc);
+	}
+
+	/*
+#ifdef AVMPLUS_VERBOSE
+    void printTracker(const char* s, RegionTracker& trk, Assembler* assm)
+    {
+        assm->outputf("%s tracker width %d starting %X zeroth %X indexOf(starting) %d", s, trk.width, trk.starting, trk.zeroth, trk.indexOf(trk.starting));
+        assm->output_ins("   location ", trk.location);
+        for(int k=0;k<trk.length;k++)
+        {
+            if (trk.element[k])
+            {
+                assm->outputf(" [%d]", k+1);
+                assm->output_ins(" val ", trk.element[k]);
+            }
+        }
+    }
+#endif
+
+	LInsp adjustTracker(RegionTracker& trk, int32_t adj, LInsp i)
+	{
+		int32_t d = i->immdisp();
+		LInsp unaligned = 0;
+		if ( d&((1<<trk.width)-1) )
+		{
+			unaligned = i;
+		}
+		else
+		{
+			// put() if there is nothing at this slot
+			void* at = (void*)(d+adj);
+			if (!trk.get(at))
+			{
+				LInsp v = i->oprnd1();
+				trk.set(at, v);
+			}
+		}
+		return unaligned;
+	}
+
+    void trackersAtExit(SideExit* exit, RegionTracker& rtrk, RegionTracker& strk, Assembler *assm)
+    {
+        (void)assm;
+        int32_t s_adj=(int32_t)strk.starting, r_adj=(int32_t)rtrk.starting;
+		Fragment* frag = exit->from;
+		LInsp param0=frag->param0, sp=frag->sp, rp=frag->rp;
+        LirReader *r = frag->lirbuf->reader();
+        AvmCore *core = frag->lirbuf->_frago->core();
+		InsList live(core->gc);
+
+		rtrk.location->setresv(0);
+		strk.location->setresv(0);
+		
+        verbose_only(if (assm->_verbose) assm->output_ins("Reconstituting region trackers, starting from ", exit->ins);)
+
+		LInsp i = 0;
+		bool checkLive = true;
+#if 0 
+		// @todo needed for partial tree compile 
+		bool checkLive = true;
+		
+		// build a list of known live-valued instructions at the exit
+		verbose_only(if (assm->_verbose) assm->output("   compile-time live values at exit");)
+		LInsp i = r->setPos(exit->arAtExit);
+        while(i)
+        {
+			if (i->isop(LIR_2))
+			{
+				LInsp t = i->oprnd1();
+				if (live.indexOf(t)<0)
+				{
+					verbose_only(if (assm->_verbose) assm->output_ins("  ", t);)
+					live.add(t);
+				}
+			}
+			i = r->previous();
+		}
+#endif
+		
+        // traverse backward starting from the exit instruction
+		i = r->setPos(exit->ins);
+        while(i)
+        {
+            if (i->isStore())
+            {
+                LInsp base = i->oprnd2();
+                if (base == param0) 
+                {
+                    // update stop/rstop
+                    int32_t d = i->immdisp();
+                    if (d == offsetof(InterpState,sp)) 
+					{
+                        s_adj += i->oprnd1()->oprnd2()->constval();
+                    }
+                    else if (d == offsetof(InterpState,rp))
+					{
+                        r_adj += i->oprnd1()->oprnd2()->constval();
+					}
+                }
+                else if (base == sp) 
+                {
+					LInsp what = i->oprnd1();
+					bool imm = what->isconst() || what->isconstq();
+					if (!checkLive || (imm || live.indexOf(what)>=0))
+					{
+						verbose_only(if (assm->_verbose) assm->output_ins("  strk-adding ", i);)
+						adjustTracker(strk, s_adj, i);
+					}
+					else
+					{
+						verbose_only(if (assm->_verbose) assm->output_ins("  strk-ignoring ", i);)
+					}
+                }
+                else if (base == rp) 
+                {
+					LInsp what = i->oprnd1();
+					bool imm = what->isconst() || what->isconstq();
+					if (!checkLive || imm || live.indexOf(what))
+					{
+						verbose_only(if (assm->_verbose) assm->output_ins("  rtrk-adding ", i);)
+						adjustTracker(rtrk, r_adj, i);
+					}
+					else
+					{
+						verbose_only(if (assm->_verbose) assm->output_ins("  rtrk-adding ", i);)
+					}
+                }
+            }
+            i = r->previous();
+        }
+
+        verbose_only(if (assm->_verbose) { printTracker("rtrk", rtrk,assm); } )
+        verbose_only(if (assm->_verbose) { printTracker("strk", strk,assm); } )
+    }
+	*/
+
+    using namespace avmplus;
+
+	StoreFilter::StoreFilter(LirFilter *in, GC *gc, Assembler *assm, LInsp p0, LInsp sp, LInsp rp) 
+		: LirFilter(in), gc(gc), assm(assm), param0(p0), sp(sp), rp(rp), stop(0), rtop(0)
+	{}
+
+	LInsp StoreFilter::read() 
+	{
+		for (;;) 
+		{
+			LInsp i = in->read();
+			if (!i)
+				return i;
+			bool remove = false;
+			if (i->isStore())
+			{
+				LInsp base = i->oprnd2();
+				if (base == param0) 
+				{
+					// update stop/rstop
+					int d = i->immdisp();
+					if (d == offsetof(InterpState,sp)) {
+						stop = i->oprnd1()->oprnd2()->constval() >> 2;
+						NanoAssert(!(stop&1));
+					}
+					else if (d == offsetof(InterpState,rp))
+						rtop = i->oprnd1()->oprnd2()->constval() >> 2;
+				}
+				else if (base == sp) 
+				{
+					LInsp v = i->oprnd1();
+					int d = i->immdisp() >> 2;
+					int top = stop+2;
+					if (d >= top) {
+						remove = true;
+					} else {
+						d = top - d;
+						if (v->isQuad()) {
+							// storing 8 bytes
+							if (stk.get(d) && stk.get(d-1)) {
+								remove = true;
+							} else {
+								stk.set(gc, d);
+								stk.set(gc, d-1);
+							}
+						}
+						else {
+							// storing 4 bytes
+							if (stk.get(d))
+								remove = true;
+							else
+								stk.set(gc, d);
+						}
+					}
+				}
+				else if (base == rp) 
+				{
+					int d = i->immdisp() >> 2;
+					if (d >= rtop) {
+						remove = true;
+					} else {
+						d = rtop - d;
+						if (rstk.get(d))
+							remove = true;
+						else
+							rstk.set(gc, d);
+					}
+				}
+			}
+			else if (i->isGuard())
+			{
+				rstk.reset();
+				stk.reset();
+				SideExit *exit = i->exit();
+				stop = exit->sp_adj >> 2;
+				rtop = exit->rp_adj >> 2;
+				NanoAssert(!(stop&1));
+			}
+			if (!remove)
+				return i;
+		}
+	}
+
+	//
+	// inlined/separated version of SuperFastHash
+	// This content is copyrighted by Paul Hsieh, For reference see : http://www.azillionmonkeys.com/qed/hash.html
+	//
+	inline uint32_t _hash8(uint32_t hash, const uint8_t data)
+	{
+		hash += data;
+		hash ^= hash << 10;
+		hash += hash >> 1;
+		return hash;
+	}
+
+	inline uint32_t _hash32(uint32_t hash, const uint32_t data)
+	{
+		const uint32_t dlo = data & 0xffff;
+		const uint32_t dhi = data >> 16;
+		hash += dlo;
+		const uint32_t tmp = (dhi << 11) ^ hash;
+		hash = (hash << 16) ^ tmp;
+		hash += hash >> 11;
+		return hash;
+	}
+	
+	inline uint32_t _hashptr(uint32_t hash, const void* data)
+	{
+#ifdef AVMPLUS_64BIT
+		hash = _hash32(hash, uint32_t(uintptr_t(data) >> 32));
+		hash = _hash32(hash, uint32_t(uintptr_t(data)));
+		return hash;
+#else
+		return _hash32(hash, uint32_t(data));
+#endif
+	}
+
+	inline uint32_t _hashfinish(uint32_t hash)
+	{
+		/* Force "avalanching" of final 127 bits */
+		hash ^= hash << 3;
+		hash += hash >> 5;
+		hash ^= hash << 4;
+		hash += hash >> 17;
+		hash ^= hash << 25;
+		hash += hash >> 6;
+		return hash;
+	}
+
+	LInsHashSet::LInsHashSet(GC* gc) : 
+			m_list(gc, kInitialCap), m_used(0), m_gc(gc)
+	{
+		m_list.set(kInitialCap-1, 0);
+	}
+
+	/*static*/ uint32_t FASTCALL LInsHashSet::hashcode(LInsp i)
+	{
+		const LOpcode op = i->opcode();
+		switch (op)
+		{
+			case LIR_short:
+				return hashimm(i->imm16());
+			case LIR_int:
+				return hashimm(i->imm32());
+			case LIR_quad:
+				return hashimmq(i->constvalq());
+			case LIR_call:
+			case LIR_fcall:
+			{
+				LInsp args[10];
+				int32_t argc = i->imm8b();
+				NanoAssert(argc < 10);
+				LirReader ri(i);
+				for (int32_t j=argc; j > 0; )
+					args[--j] = ri.previous()->oprnd1();
+				return hashcall(i->imm8(), argc, args);
+			} 
+			default:
+				if (operandCount(op) == 2)
+					return hash2(op, i->oprnd1(), i->oprnd2());
+				else
+					return hash1(op, i->oprnd1());
+		}
+	}
+
+	/*static*/ bool FASTCALL LInsHashSet::equals(LInsp a, LInsp b) 
+	{
+		if (a==b)
+			return true;
+		AvmAssert(a->opcode() == b->opcode());
+		const LOpcode op = a->opcode();
+		switch (op)
+		{
+			case LIR_short:
+			{
+				return a->imm16() == b->imm16();
+			} 
+			case LIR_int:
+			{
+				return a->imm32() == b->imm32();
+			} 
+			case LIR_quad:
+			{
+				return a->constvalq() == b->constvalq();
+			}
+			case LIR_call:
+			case LIR_fcall:
+			{
+				uint32_t argc;
+				if (a->imm8() != b->imm8()) return false;
+				if ((argc=a->imm8b()) != b->imm8b()) return false;
+				LirReader ra(a), rb(b);
+				while (argc-- > 0)
+					if (ra.previous()->oprnd1() != rb.previous()->oprnd1())
+						return false;
+				return true;
+			} 
+			default:
+			{
+				const uint32_t count = operandCount(op);
+				if ((count >= 1 && a->oprnd1() != b->oprnd1()) ||
+					(count >= 2 && a->oprnd2() != b->oprnd2()))
+					return false;
+				return true;
+			}
+		}
+	}
+
+	void FASTCALL LInsHashSet::grow()
+	{
+		const uint32_t newcap = m_list.size() << 1;
+		InsList newlist(m_gc, newcap);
+		newlist.set(newcap-1, 0);
+		for (uint32_t i=0, n=m_list.size(); i < n; i++)
+		{
+			LInsp name = m_list.get(i);
+			if (!name) continue;
+			uint32_t j = find(name, hashcode(name), newlist, newcap);
+			newlist.set(j, name);
+		}
+		m_list.become(newlist);
+	}
+
+	uint32_t FASTCALL LInsHashSet::find(LInsp name, uint32_t hash, const InsList& list, uint32_t cap)
+	{
+		const uint32_t bitmask = (cap - 1) & ~0x1;
+
+		uint32_t n = 7 << 1;
+		hash &= bitmask;  
+		LInsp k;
+		while ((k = list.get(hash)) != NULL &&
+			(!LIns::sameop(k,name) || !equals(k, name)))
+		{
+			hash = (hash + (n += 2)) & bitmask;		// quadratic probe
+		}
+		return hash;
+	}
+
+	LInsp LInsHashSet::add(LInsp name, uint32_t k)
+	{
+		// this is relatively short-lived so let's try a more aggressive load factor
+		// in the interest of improving performance
+		if (((m_used+1)<<1) >= m_list.size()) // 0.50
+		{
+			grow();
+			k = find(name, hashcode(name), m_list, m_list.size());
+		}
+		NanoAssert(!m_list.get(k));
+		m_used++;
+		m_list.set(k, name);
+		return name;
+	}
+
+	uint32_t LInsHashSet::hashimm(int32_t a) {
+		return _hashfinish(_hash32(0,a));
+	}
+
+	uint32_t LInsHashSet::hashimmq(uint64_t a) {
+		uint32_t hash = _hash32(0, uint32_t(a >> 32));
+		return _hashfinish(_hash32(hash, uint32_t(a)));
+	}
+
+	uint32_t LInsHashSet::hash1(LOpcode op, LInsp a) {
+		uint32_t hash = _hash8(0,uint8_t(op));
+		return _hashfinish(_hashptr(hash, a));
+	}
+
+	uint32_t LInsHashSet::hash2(LOpcode op, LInsp a, LInsp b) {
+		uint32_t hash = _hash8(0,uint8_t(op));
+		hash = _hashptr(hash, a);
+		return _hashfinish(_hashptr(hash, b));
+	}
+
+	uint32_t LInsHashSet::hashcall(int32_t fid, uint32_t argc, LInsp args[]) {
+		uint32_t hash = _hash32(0,fid);
+		for (int32_t j=argc-1; j >= 0; j--)
+			hash = _hashptr(hash,args[j]);
+		return _hashfinish(hash);
+	}
+
+	LInsp LInsHashSet::find32(int32_t a, uint32_t &i)
+	{
+		uint32_t cap = m_list.size();
+		const InsList& list = m_list;
+		const uint32_t bitmask = (cap - 1) & ~0x1;
+		uint32_t hash = hashimm(a) & bitmask;
+		uint32_t n = 7 << 1;
+		LInsp k;
+		while ((k = list.get(hash)) != NULL && 
+			(!k->isconst() || k->constval() != a))
+		{
+			hash = (hash + (n += 2)) & bitmask;		// quadratic probe
+		}
+		i = hash;
+		return k;
+	}
+
+	LInsp LInsHashSet::find64(uint64_t a, uint32_t &i)
+	{
+		uint32_t cap = m_list.size();
+		const InsList& list = m_list;
+		const uint32_t bitmask = (cap - 1) & ~0x1;
+		uint32_t hash = hashimmq(a) & bitmask;  
+		uint32_t n = 7 << 1;
+		LInsp k;
+		while ((k = list.get(hash)) != NULL && 
+			(!k->isconstq() || k->constvalq() != a))
+		{
+			hash = (hash + (n += 2)) & bitmask;		// quadratic probe
+		}
+		i = hash;
+		return k;
+	}
+
+	LInsp LInsHashSet::find1(LOpcode op, LInsp a, uint32_t &i)
+	{
+		uint32_t cap = m_list.size();
+		const InsList& list = m_list;
+		const uint32_t bitmask = (cap - 1) & ~0x1;
+		uint32_t hash = hash1(op,a) & bitmask;  
+		uint32_t n = 7 << 1;
+		LInsp k;
+		while ((k = list.get(hash)) != NULL && 
+			(k->opcode() != op || k->oprnd1() != a))
+		{
+			hash = (hash + (n += 2)) & bitmask;		// quadratic probe
+		}
+		i = hash;
+		return k;
+	}
+
+	LInsp LInsHashSet::find2(LOpcode op, LInsp a, LInsp b, uint32_t &i)
+	{
+		uint32_t cap = m_list.size();
+		const InsList& list = m_list;
+		const uint32_t bitmask = (cap - 1) & ~0x1;
+		uint32_t hash = hash2(op,a,b) & bitmask;  
+		uint32_t n = 7 << 1;
+		LInsp k;
+		while ((k = list.get(hash)) != NULL && 
+			(k->opcode() != op || k->oprnd1() != a || k->oprnd2() != b))
+		{
+			hash = (hash + (n += 2)) & bitmask;		// quadratic probe
+		}
+		i = hash;
+		return k;
+	}
+
+	bool argsmatch(LInsp i, uint32_t argc, LInsp args[])
+	{
+		// we don't have callinfo here so we cannot use argiterator
+		LirReader r(i);
+		for (LInsp a = r.previous(); a->isArg(); a=r.previous())
+			if (a->oprnd1() != args[--argc])
+				return false;
+		return true;
+	}
+
+	LInsp LInsHashSet::findcall(int32_t fid, uint32_t argc, LInsp args[], uint32_t &i)
+	{
+		uint32_t cap = m_list.size();
+		const InsList& list = m_list;
+		const uint32_t bitmask = (cap - 1) & ~0x1;
+		uint32_t hash = hashcall(fid, argc, args) & bitmask;  
+		uint32_t n = 7 << 1;
+		LInsp k;
+		while ((k = list.get(hash)) != NULL &&
+			(!k->isCall() || k->imm8() != fid || !argsmatch(k, argc, args)))
+		{
+			hash = (hash + (n += 2)) & bitmask;		// quadratic probe
+		}
+		i = hash;
+		return k;
+	}
+
+    SideExit *LIns::exit()
+    {
+        NanoAssert(isGuard());
+        return (SideExit*)oprnd2()->payload();
+    }
+
+#ifdef NJ_VERBOSE
+    class RetiredEntry: public GCObject
+    {
+    public:
+        List<LInsp, LIST_NonGCObjects> live;
+        LInsp i;
+        RetiredEntry(GC *gc): live(gc) {}
+    };
+	class LiveTable 
+	{
+	public:
+		SortedMap<LInsp,LInsp,LIST_NonGCObjects> live;
+        List<RetiredEntry*, LIST_GCObjects> retired;
+		int maxlive;
+		LiveTable(GC *gc) : live(gc), retired(gc), maxlive(0) {}
+		void add(LInsp i, LInsp use) {
+            if (!i->isconst() && !i->isconstq() && !live.containsKey(i)) {
+                live.put(i,use);
+            }
+		}
+        void retire(LInsp i, GC *gc) {
+            RetiredEntry *e = new (gc) RetiredEntry(gc);
+            e->i = i;
+            for (int j=0, n=live.size(); j < n; j++) {
+                LInsp l = live.keyAt(j);
+                if (!l->isStore() && !l->isGuard() && !l->isArg() && !l->isop(LIR_loop))
+                    e->live.add(l);
+            }
+            int size=0;
+		    if ((size = e->live.size()) > maxlive)
+			    maxlive = size;
+
+            live.remove(i);
+            retired.add(e);
+		}
+		bool contains(LInsp i) {
+			return live.containsKey(i);
+		}
+	};
+
+    void live(GC *gc, Assembler *assm, Fragment *frag)
+	{
+		// traverse backwards to find live exprs and a few other stats.
+
+		LInsp sp = frag->sp;
+		LInsp rp = frag->rp;
+		LiveTable live(gc);
+		uint32_t exits = 0;
+		LirBuffer *lirbuf = frag->lirbuf;
+        LirReader br(lirbuf);
+		StoreFilter r(&br, gc, 0, frag->param0, sp, rp);
+        bool skipargs = false;
+        int total = 0;
+        live.add(frag->param0, r.pos());
+		for (LInsp i = r.read(); i != 0; i = r.read())
+		{
+            total++;
+
+            if (i->isArg()) {
+                if (!skipargs)
+                    live.add(i->oprnd1(),0);
+            } else {
+                skipargs = false;
+            }
+
+            // first handle side-effect instructions
+			if (i->isStore() || i->isGuard() || i->isop(LIR_loop) ||
+				i->isCall() && !assm->callInfoFor(i->imm8())->_cse)
+			{
+				live.add(i,0);
+                if (i->isGuard())
+                    exits++;
+			}
+
+			// now propagate liveness
+			if (live.contains(i))
+			{
+				live.retire(i,gc);
+				if (i->isStore()) {
+					live.add(i->oprnd2(),i); // base
+					live.add(i->oprnd1(),i); // val
+				}
+                else if (i->isop(LIR_cmov)) {
+                    live.add(i->oprnd1(),i);
+                    live.add(i->oprnd2()->oprnd1(),i);
+                    live.add(i->oprnd2()->oprnd2(),i);
+                }
+				else if (operandCount(i->opcode()) == 1) {
+				    live.add(i->oprnd1(),i);
+				}
+				else if (operandCount(i->opcode()) == 2) {
+					live.add(i->oprnd1(),i);
+					live.add(i->oprnd2(),i);
+				}
+			}
+			else
+			{
+                skipargs = i->isCall();
+			}
+		}
+ 
+		assm->outputf("live instruction count %ld, total %ld, max pressure %d",
+			live.retired.size(), total, live.maxlive);
+        assm->outputf("side exits %ld", exits);
+
+		// print live exprs, going forwards
+		LirNameMap *names = frag->lirbuf->names;
+		for (int j=live.retired.size()-1; j >= 0; j--) 
+        {
+            RetiredEntry *e = live.retired[j];
+            char livebuf[1000], *s=livebuf;
+            *s = 0;
+            for (int k=0,n=e->live.size(); k < n; k++) {
+				strcpy(s, names->formatRef(e->live[k]));
+				s += strlen(s);
+				*s++ = ' '; *s = 0;
+				NanoAssert(s < livebuf+sizeof(livebuf));
+            }
+			printf("%-60s %s\n", livebuf, names->formatIns(e->i));
+			if (e->i->isGuard())
+				printf("\n");
+		}
+	}
+
+	void LirNameMap::addName(LInsp i, Stringp name) {
+		Entry *e = new (labels->core->gc) Entry(name);
+		names.put(i, e);
+	}
+	void LirNameMap::addName(LInsp i, const char *name) {
+		addName(i, labels->core->newString(name));
+	}
+
+	void LirNameMap::copyName(LInsp i, const char *s, int suffix) {
+		char s2[200];
+		sprintf(s2,"%s%d", s,suffix);
+		addName(i, labels->core->newString(s2));
+	}
+
+	void LirNameMap::formatImm(int32_t c, char *buf) {
+		if (c >= 10000 || c <= -10000)
+			sprintf(buf,"#%s",labels->format((void*)c));
+        else
+            sprintf(buf,"%d", c);
+	}
+
+	const char* LirNameMap::formatRef(LIns *ref)
+	{
+		char buffer[200], *buf=buffer;
+		buf[0]=0;
+		GC *gc = labels->core->gc;
+		if (names.containsKey(ref)) {
+			StringNullTerminatedUTF8 cname(gc, names.get(ref)->name);
+			strcat(buf, cname.c_str());
+		}
+		else if (ref->isconstq()) {
+			formatImm(uint32_t(ref->constvalq()>>32), buf);
+			buf += strlen(buf);
+			*buf++ = ':';
+			formatImm(uint32_t(ref->constvalq()), buf);
+		}
+		else if (ref->isconst()) {
+			formatImm(ref->constval(), buf);
+		}
+		else {
+			if (ref->isCall()) {
+				copyName(ref, _functions[ref->imm8()]._name, funccounts.add(ref->imm8()));
+			} else {
+				copyName(ref, nameof(ref), lircounts.add(ref->opcode()));
+			}
+			StringNullTerminatedUTF8 cname(gc, names.get(ref)->name);
+			strcat(buf, cname.c_str());
+		}
+		return labels->dup(buffer);
+	}
+
+	const char* LirNameMap::formatIns(LIns* i)
+	{
+		char sbuf[200];
+		char *s = sbuf;
+		if (!i->isStore() && !i->isGuard() && !i->isop(LIR_trace) && !i->isop(LIR_loop)) {
+			sprintf(s, "%s = ", formatRef(i));
+			s += strlen(s);
+		}
+
+		switch(i->opcode())
+		{
+			case LIR_short:
+			case LIR_int:
+			{
+                sprintf(s, "%s", formatRef(i));
+				break;
+			}
+
+			case LIR_quad:
+			{
+				int32_t *p = (int32_t*) (i-2);
+				sprintf(s, "#%X:%X", p[1], p[0]);
+				break;
+			}
+
+			case LIR_loop:
+			case LIR_trace:
+				sprintf(s, "%s", nameof(i));
+				break;
+
+			case LIR_fcall:
+			case LIR_call: {
+				sprintf(s, "%s ( ", _functions[i->imm8()]._name);
+				LirReader r(i);
+				for (LInsp a = r.previous(); a->isArg(); a = r.previous()) {
+					s += strlen(s);
+					sprintf(s, "%s ",formatRef(a->oprnd1()));
+				}
+				s += strlen(s);
+				sprintf(s, ")");
+				break;
+			}
+
+			case LIR_param:
+                sprintf(s, "%s %s", nameof(i), gpn(i->imm8()));
+				break;
+
+			case LIR_x: {
+                SideExit *x = (SideExit*) i->oprnd2()->payload();
+				uint32_t ip = uint32_t(x->from->frid) + x->ip_adj;
+				sprintf(s, "%s -> %s sp%+d rp%+d f%+d", nameof(i), 
+					labels->format((void*)ip),
+					x->sp_adj, x->rp_adj, x->f_adj);
+                break;
+			}
+
+            case LIR_callh:
+			case LIR_neg:
+			case LIR_fneg:
+			case LIR_arg:
+			case LIR_farg:
+			case LIR_i2f:
+			case LIR_u2f:
+			case LIR_qlo:
+			case LIR_qhi:
+			case LIR_ref:
+				sprintf(s, "%s %s", nameof(i), formatRef(i->oprnd1()));
+				break;
+
+			case LIR_xt:
+			case LIR_xf: {
+                SideExit *x = (SideExit*) i->oprnd2()->payload();
+				uint32_t ip = int32_t(x->from->frid) + x->ip_adj;
+				sprintf(s, "%s %s -> %s sp%+d rp%+d f%+d", nameof(i),
+					formatRef(i->oprnd1()),
+					labels->format((void*)ip),
+					x->sp_adj, x->rp_adj, x->f_adj);
+				break;
+            }
+			case LIR_add:
+			case LIR_sub: 
+		 	case LIR_mul: 
+			case LIR_fadd:
+			case LIR_fsub: 
+		 	case LIR_fmul: 
+			case LIR_fdiv: 
+			case LIR_and: 
+			case LIR_or: 
+			case LIR_not: 
+			case LIR_xor: 
+			case LIR_lsh: 
+			case LIR_rsh:
+			case LIR_ush:
+			case LIR_eq:
+			case LIR_lt:
+			case LIR_le:
+			case LIR_gt:
+			case LIR_ge:
+			case LIR_ult:
+			case LIR_ule:
+			case LIR_ugt:
+			case LIR_uge:
+			case LIR_qjoin:
+				sprintf(s, "%s %s, %s", nameof(i), 
+					formatRef(i->oprnd1()), 
+					formatRef(i->oprnd2()));
+				break;
+
+			case LIR_cmov:
+                sprintf(s, "%s ? %s : %s", 
+					formatRef(i->oprnd1()), 
+					formatRef(i->oprnd2()->oprnd1()), 
+					formatRef(i->oprnd2()->oprnd2()));
+				break;
+
+			case LIR_ld: 
+			case LIR_ldc: 
+			case LIR_ldq: 
+			case LIR_ldcb: 
+				sprintf(s, "%s %s[%s]", nameof(i), 
+					formatRef(i->oprnd1()), 
+					formatRef(i->oprnd2()));
+				break;
+
+			case LIR_st: 
+            case LIR_sti:
+				sprintf(s, "%s[%d] = %s", 
+					formatRef(i->oprnd2()), 
+					i->immdisp(), 
+					formatRef(i->oprnd1()));
+				break;
+
+			default:
+				sprintf(s, "?");
+				break;
+		}
+		return labels->dup(sbuf);
+	}
+
+
+#endif
+
+	CseFilter::CseFilter(LirWriter *out, GC *gc)
+		: LirWriter(out), exprs(gc) {}
+
+	LIns* CseFilter::insImm(int32_t imm)
+	{
+		uint32_t k;
+		LInsp found = exprs.find32(imm, k);
+		if (found)
+			return found;
+		return exprs.add(out->insImm(imm), k);
+	}
+
+	LIns* CseFilter::insImmq(uint64_t q)
+	{
+		uint32_t k;
+		LInsp found = exprs.find64(q, k);
+		if (found)
+			return found;
+		return exprs.add(out->insImmq(q), k);
+	}
+
+	LIns* CseFilter::ins1(LOpcode v, LInsp a)
+	{
+		if (isCse(v)) {
+			NanoAssert(operandCount(v)==1);
+			uint32_t k;
+			LInsp found = exprs.find1(v, a, k);
+			if (found)
+				return found;
+			return exprs.add(out->ins1(v,a), k);
+		}
+		return out->ins1(v,a);
+	}
+
+	LIns* CseFilter::ins2(LOpcode v, LInsp a, LInsp b)
+	{
+		if (isCse(v)) {
+			NanoAssert(operandCount(v)==2);
+			uint32_t k;
+			LInsp found = exprs.find2(v, a, b, k);
+			if (found)
+				return found;
+			return exprs.add(out->ins2(v,a,b), k);
+		}
+		return out->ins2(v,a,b);
+	}
+
+	LIns* CseFilter::insLoad(LOpcode v, LInsp base, LInsp disp)
+	{
+		if (isCse(v)) {
+			NanoAssert(operandCount(v)==2);
+			uint32_t k;
+			LInsp found = exprs.find2(v, base, disp, k);
+			if (found)
+				return found;
+			return exprs.add(out->insLoad(v,base,disp), k);
+		}
+		return out->insLoad(v,base,disp);
+	}
+
+	LInsp CseFilter::insGuard(LOpcode v, LInsp c, SideExit *x)
+	{
+		if (isCse(v)) {
+			// conditional guard
+			NanoAssert(operandCount(v)==1);
+			uint32_t k;
+			LInsp found = exprs.find1(v, c, k);
+			if (found)
+				return 0;
+			return exprs.add(out->insGuard(v,c,x), k);
+		}
+		return out->insGuard(v, c, x);
+	}
+
+	LInsp CseFilter::insCall(int32_t fid, LInsp args[])
+	{
+		const CallInfo *c = &_functions[fid];
+		if (c->_cse) {
+			uint32_t k;
+			LInsp found = exprs.findcall(fid, c->count_args(), args, k);
+			if (found)
+				return found;
+			return exprs.add(out->insCall(fid, args), k);
+		}
+		return out->insCall(fid, args);
+	}
+
+    LIns* FASTCALL callArgN(LIns* i, uint32_t n)
+	{
+		// @todo clean up; shouldn't have to create a reader                                               
+		LirReader rdr(i);
+		do
+			i = rdr.read();
+		while (n-- > 0);
+		return i;
+	}
+
+    void compile(Assembler* assm, Fragment* triggerFrag)
+    {
+        AvmCore *core = triggerFrag->lirbuf->_frago->core();
+        GC *gc = core->gc;
+
+		verbose_only( StringList asmOutput(gc); )
+		verbose_only( assm->_outputCache = &asmOutput; )
+
+		verbose_only(if (assm->_verbose && core->config.verbose_live)
+			live(gc, assm, triggerFrag);)
+
+		bool treeCompile = core->config.tree_opt && (triggerFrag->kind == BranchTrace);
+		RegAllocMap regMap(gc);
+		NInsList loopJumps(gc);
+		assm->beginAssembly(&regMap);
+
+		//fprintf(stderr, "recompile trigger %X kind %d\n", (int)triggerFrag, triggerFrag->kind);
+		Fragment* root = triggerFrag;
+		if (treeCompile)
+		{
+			// recompile the entire tree
+			root = triggerFrag->anchor;
+			root->removeIntraLinks();
+			root->unlink(assm);			// unlink all incoming jumps ; since the compile() can fail
+			root->unlinkBranches(assm); // no one jumps into a branch (except from within the tree) so safe to clear the links table
+			root->fragEntry = 0;
+			
+			// do the tree branches
+			Fragment* frag = root->treeBranches;
+			while(frag)
+			{
+				// compile til no more frags
+				if (frag->lastIns)
+				{
+					NIns* loopJump = assm->assemble(frag);
+					verbose_only(if (assm->_verbose) assm->outputf("compiling branch %X that exits from SID %d",frag->frid,frag->spawnedFrom->sid);)
+					if (loopJump) loopJumps.add((intptr_t)loopJump);
+					
+					NanoAssert(frag->kind == BranchTrace);
+					RegAlloc* regs = new (gc) RegAlloc();
+					assm->copyRegisters(regs);
+					assm->releaseRegisters();
+					SideExit* exit = frag->spawnedFrom;
+					regMap.put(exit, regs);
+				}
+				frag = frag->treeBranches;
+			}
+		}
+		
+		// now the the main trunk
+
+		NIns* loopJump = assm->assemble(root);
+		verbose_only(if (assm->_verbose) assm->output("compiling trunk");)
+		if (loopJump) loopJumps.add((intptr_t)loopJump);
+		assm->endAssembly(root, loopJumps);
+
+		// remove the map enties
+		while(!regMap.isEmpty())
+			gc->Free(regMap.removeLast());
+			
+		// reverse output so that assembly is displayed low-to-high
+		verbose_only( assm->_outputCache = 0; )
+		verbose_only(for(int i=asmOutput.size()-1; i>=0; --i) { assm->outputf("%s",asmOutput.get(i)); } );
+
+		if (assm->error())
+		{
+			root->fragEntry = 0;
+		}
+		else
+		{
+			root->link(assm);
+			if (treeCompile) root->linkBranches(assm);
+		}
+    }
+
+	#endif /* FEATURE_NANOJIT */
+
+#if defined(NJ_VERBOSE)
+    LabelMap::LabelMap(AvmCore *core, LabelMap* parent)
+        : parent(parent), names(core->gc), addrs(core->config.verbose_addrs), end(buf), core(core)
+	{}
+
+    void LabelMap::add(const void *p, size_t size, size_t align, const char *name)
+	{
+		if (!this) return;
+		add(p, size, align, core->newString(name));
+	}
+
+    void LabelMap::add(const void *p, size_t size, size_t align, Stringp name)
+    {
+		if (!this) return;
+		Entry *e = new (core->gc) Entry(name, size<<align, align);
+		names.put(p, e);
+    }
+
+    const char *LabelMap::format(const void *p)
+    {
+		char b[200];
+		int i = names.findNear(p);
+		if (i >= 0) {
+			const void *start = names.keyAt(i);
+			Entry *e = names.at(i);
+			const void *end = (const char*)start + e->size;
+			avmplus::StringNullTerminatedUTF8 cname(core->gc, e->name);
+			const char *name = cname.c_str();
+			if (p == start) {
+				if (addrs)
+					sprintf(b,"%p %s",p,name);
+				else
+					strcpy(b, name);
+				return dup(b);
+			}
+			else if (p > start && p < end) {
+				int d = (int(p)-int(start)) >> e->align;
+				if (addrs)
+					sprintf(b, "%p %s+%d", p, name, d);
+				else
+					sprintf(b,"%s+%d", name, d);
+				return dup(b);
+			}
+		}
+		if (parent)
+			return parent->format(p);
+
+		sprintf(b, "%p", p);
+		return dup(b);
+    }
+
+	const char *LabelMap::dup(const char *b)
+	{
+		int need = strlen(b)+1;
+		char *s = end;
+		end += need;
+		if (end > buf+sizeof(buf)) {
+			s = buf;
+			end = s+need;
+		}
+		strcpy(s, b);
+		return s;
+	}
+#endif // NJ_VERBOSE
+}
+	
new file mode 100644
--- /dev/null
+++ b/js/src/nanojit/Nativei386.cpp
@@ -0,0 +1,580 @@
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is [Open Source Virtual Machine].
+ *
+ * The Initial Developer of the Original Code is
+ * Adobe System Incorporated.
+ * Portions created by the Initial Developer are Copyright (C) 2004-2007
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Contributor(s):
+ *   Adobe AS3 Team
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+#ifdef _MAC
+// for MakeDataExecutable
+#include <CoreServices/CoreServices.h>
+#endif
+
+#include "nanojit.h"
+
+namespace nanojit
+{
+	#ifdef FEATURE_NANOJIT
+
+	#ifdef NJ_VERBOSE
+		const char *regNames[] = {
+			"eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi",
+			"xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7",
+			"f0",  "f1",  "f2",  "f3",  "f4",  "f5",  "f6",  "f7"
+		};
+	#endif
+
+    const Register Assembler::argRegs[] = { ECX, EDX };
+    const Register Assembler::retRegs[] = { EAX, EDX };
+
+	void Assembler::nInit(AvmCore* core)
+	{
+        sse2 = core->use_sse2();
+		// CMOVcc is actually available on most PPro+ chips (except for a few
+		// oddballs like Via C3) but for now tie to SSE2 detection
+		has_cmov = sse2;
+        OSDep::getDate();
+	}
+
+	NIns* Assembler::genPrologue(RegisterMask needSaving)
+	{
+		/**
+		 * Prologue
+		 */
+		uint32_t stackNeeded = 4 * _activation.highwatermark;
+		uint32_t savingCount = 0;
+
+		for(Register i=FirstReg; i <= LastReg; i = nextreg(i))
+			if (needSaving&rmask(i)) 
+				savingCount++;
+
+		// so for alignment purposes we've pushed  return addr, fp, and savingCount registers
+		uint32_t stackPushed = 4 * (3+savingCount);
+		uint32_t aligned = alignUp(stackNeeded + stackPushed, NJ_ALIGN_STACK);
+		uint32_t amt = aligned - stackPushed;
+
+		if (amt) 
+			SUBi(SP, amt);
+
+		verbose_only( verbose_outputf("        %p:",_nIns); )
+		verbose_only( verbose_output("        patch entry:"); )
+        NIns *patchEntry = _nIns;
+		MR(FP, SP);
+		PUSHr(FP); // push ebp twice to align frame on 8bytes
+		PUSHr(FP);
+
+		for(Register i=FirstReg; i <= LastReg; i = nextreg(i))
+			if (needSaving&rmask(i))
+				PUSHr(i);
+
+		#ifndef DARWIN
+		// dynamically align the stack
+		PUSHr(FP);//fake returnaddr.
+		ANDi(SP, -NJ_ALIGN_STACK);
+		MR(FP,SP);
+		PUSHr(FP);
+		#endif
+
+		return patchEntry;
+	}
+
+	GuardRecord * Assembler::nFragExit(SideExit *exit)
+	{
+		bool trees = _frago->core()->config.tree_opt;
+        Fragment *frag = exit->target;
+        GuardRecord *lr = 0;
+		bool destKnown = (frag && frag->fragEntry);
+		if (destKnown && !trees)
+		{
+			// already exists, emit jump now.  no patching required.
+			JMP(frag->fragEntry);
+            lr = 0;
+		}
+		else
+		{
+			// target doesn't exit yet.  emit jump to epilog, and set up to patch later.
+			lr = placeGuardRecord(exit);
+            JMP_long(_epilogue);
+			lr->jmp = _nIns;
+#if 0			
+			// @todo optimization ; is it worth it? It means we can remove the loop over outbound in Fragment.link() 
+			// for trees we need the patch entry on the incoming fragment so we can unhook it later if needed
+			if (tress && destKnown)
+				patch(lr);
+#endif
+		}
+		// first restore ESP from EBP, undoing SUBi(SP,amt) from genPrologue
+        MR(SP,FP);
+
+
+        #ifdef NJ_VERBOSE
+        if (_frago->core()->config.show_stats) {
+			// load EDX (arg1) with Fragment *fromFrag, target fragment
+			// will make use of this when calling fragenter().
+            int fromfrag = int((Fragment*)_thisfrag);
+            LDi(argRegs[1], fromfrag);
+        }
+        #endif
+
+		// return value is GuardRecord*
+        LDi(EAX, int(lr));
+
+		// if/when we patch this exit to jump over to another fragment,
+		// that fragment will need its parameters set up just like ours.
+        LInsp param0 = _thisfrag->param0;
+		Register state = findSpecificRegFor(param0, Register(param0->imm8()));
+
+        // update InterpState
+        
+        if (exit->rp_adj)
+            ADDmi((int32_t)offsetof(avmplus::InterpState, rp), state, exit->rp_adj);
+
+        if (exit->sp_adj)
+            ADDmi((int32_t)offsetof(avmplus::InterpState, sp), state, exit->sp_adj);
+
+        if (exit->ip_adj)
+			ADDmi((int32_t)offsetof(avmplus::InterpState, ip), state, exit->ip_adj);
+
+        if (exit->f_adj)
+            ADDmi((int32_t)offsetof(avmplus::InterpState, f), state, exit->f_adj);
+
+        return lr;
+	}
+
+    NIns *Assembler::genEpilogue(RegisterMask restore)
+    {
+        RET();
+
+		#ifndef DARWIN
+		// undo dynamic alignment
+		POP(FP);
+		MR(SP,FP);
+		#endif
+
+		for (Register i=UnknownReg; i >= FirstReg; i = prevreg(i))
+			if (restore&rmask(i)) { POP(i); } 
+
+		POP(FP);
+		POP(FP);
+        return  _nIns;
+    }
+	
+	void Assembler::nArgEmitted(const CallInfo* call, uint32_t stackSlotCount, uint32_t iargs, uint32_t fargs)
+	{
+		// see if we have finished emitting all args.  If so then make sure the 
+		// new stack pointer is NJ_ALIGN_STACK aligned
+		const uint32_t istack = call->count_iargs();
+		const uint32_t fstack = call->count_args() - istack;
+		//printf("call %s iargs %d fargs %d istack %d fstack %d\n",call->_name,iargs,fargs,istack,fstack);
+		AvmAssert(iargs <= istack);
+		AvmAssert(fargs <= fstack);
+		if (iargs == istack && fargs == fstack)
+		{
+			const int32_t size = 4*stackSlotCount;
+			const int32_t extra = alignUp(size, NJ_ALIGN_STACK) - size; 
+			if (extra > 0)
+				SUBi(SP, extra);
+		}
+	}
+	
+	void Assembler::nPostCallCleanup(const CallInfo* call)
+	{
+		// must be signed, not unsigned
+		int32_t istack = call->count_iargs();
+		int32_t fstack = call->count_args() - istack;
+
+		istack -= 2;  // first 2 4B args are in registers
+		if (istack <= 0)
+		{
+			istack = 0;
+			if (fstack == 0)
+				return;  // only using ECX/EDX nothing passed on the stack so no cleanup needed
+		}
+
+		const int32_t size = 4*istack + 8*fstack; // actual stack space used
+		NanoAssert( size > 0 );
+		
+		const int32_t extra = alignUp(size, NJ_ALIGN_STACK) - (size); 
+
+		// stack re-alignment 
+		// only pop our adjustment amount since callee pops args in FASTCALL mode
+		if (extra > 0)
+			{ ADDi(SP, extra); }
+	}
+	
+	void Assembler::nMarkExecute(Page* page, int32_t count, bool enable)
+	{
+		#ifdef _MAC
+			MakeDataExecutable(page, count*NJ_PAGE_SIZE);
+		#else
+			(void)page;
+			(void)count;
+		#endif
+			(void)enable;
+	}
+			
+	Register Assembler::nRegisterAllocFromSet(int set)
+	{
+		Register r;
+		RegAlloc &regs = _allocator;
+	#ifdef WIN32
+		_asm
+		{
+			mov ecx, regs
+			bsf eax, set					// i = first bit set
+			btr RegAlloc::free[ecx], eax	// free &= ~rmask(i)
+			mov r, eax
+		}
+	#else
+		asm(
+			"bsf	%1, %%eax\n\t"
+			"btr	%%eax, %2\n\t"
+			"movl	%%eax, %0\n\t"
+			: "=m"(r) : "m"(set), "m"(regs.free) : "%eax", "memory" );
+	#endif /* WIN32 */
+		return r;
+	}
+
+	void Assembler::nRegisterResetAll(RegAlloc& a)
+	{
+		// add scratch registers to our free list for the allocator
+		a.clear();
+		a.used = 0;
+		a.free = SavedRegs | ScratchRegs;
+        if (!sse2)
+            a.free &= ~XmmRegs;
+		debug_only( a.managed = a.free; )
+	}
+
+	void Assembler::nPatchBranch(NIns* branch, NIns* location)
+	{
+		uint32_t offset = location - branch;
+		if (branch[0] == JMPc)
+			*(uint32_t*)&branch[1] = offset - 5;
+		else
+			*(uint32_t*)&branch[2] = offset - 6;
+	}
+
+	RegisterMask Assembler::hint(LIns* i, RegisterMask allow)
+	{
+		uint32_t op = i->opcode();
+		int prefer = allow;
+		if (op == LIR_call)
+			prefer &= rmask(EAX);
+		else if (op == LIR_param)
+			prefer &= rmask(Register(i->imm8()));
+        else if (op == LIR_callh || op == LIR_rsh && i->oprnd1()->opcode()==LIR_callh)
+            prefer &= rmask(EDX);
+		else if (i->isCmp())
+			prefer &= AllowableFlagRegs;
+        else if (i->isconst())
+            prefer &= ScratchRegs;
+		return (_allocator.free & prefer) ? prefer : allow;
+	}
+
+    void Assembler::asm_qjoin(LIns *ins)
+    {
+		int d = findMemFor(ins);
+		AvmAssert(d);
+		LIns* lo = ins->oprnd1();
+		LIns* hi = ins->oprnd2();
+
+        Reservation *resv = getresv(ins);
+        Register rr = resv->reg;
+
+        if (rr != UnknownReg && (rmask(rr) & FpRegs))
+            evict(rr);
+
+        if (hi->isconst())
+		{
+			STi(FP, d+4, hi->constval());
+		}
+		else
+		{
+			Register r = findRegFor(hi, GpRegs);
+			ST(FP, d+4, r);
+		}
+
+        if (lo->isconst())
+		{
+			STi(FP, d, lo->constval());
+		}
+		else
+		{
+			// okay if r gets recycled.
+			Register r = findRegFor(lo, GpRegs);
+			ST(FP, d, r);
+		}
+
+        freeRsrcOf(ins, false);	// if we had a reg in use, emit a ST to flush it to mem
+    }
+
+	void Assembler::asm_restore(LInsp i, Reservation *resv, Register r)
+	{
+        if (i->isconst())
+        {
+            if (!resv->arIndex) {
+                reserveFree(i);
+            }
+            LDi(r, i->constval());
+        }
+        else
+        {
+            int d = findMemFor(i);
+            if (rmask(r) & FpRegs)
+		    {
+                if (rmask(r) & XmmRegs) {
+                    LDQ(r, d, FP);
+                } else {
+			        FLDQ(d, FP); 
+                }
+            }
+            else
+		    {
+			    LD(r, d, FP);
+		    }
+			verbose_only(if (_verbose) {
+				outputf("        restore %s", _thisfrag->lirbuf->names->formatRef(i));
+			})
+        }
+	}
+
+    void Assembler::asm_store32(LIns *value, int dr, LIns *base)
+    {
+        if (value->isconst())
+        {
+			Register rb = findRegFor(base, GpRegs);
+            int c = value->constval();
+			STi(rb, dr, c);
+        }
+        else
+        {
+		    // make sure what is in a register
+		    Reservation *rA, *rB;
+		    findRegFor2(GpRegs, value, rA, base, rB);
+		    Register ra = rA->reg;
+		    Register rb = rB->reg;
+		    ST(rb, dr, ra);
+        }
+    }
+
+	void Assembler::asm_spill(LInsp i, Reservation *resv, bool pop)
+	{
+		(void)i;
+		int d = disp(resv);
+		Register rr = resv->reg;
+		if (d)
+		{
+			// save to spill location
+            if (rmask(rr) & FpRegs)
+			{
+                if (rmask(rr) & XmmRegs) {
+                    STQ(d, FP, rr);
+                } else {
+					FSTQ((pop?1:0), d, FP);
+                }
+			}
+			else
+			{
+				ST(FP, d, rr);
+			}
+			verbose_only(if (_verbose) {
+				outputf("        spill %s",_thisfrag->lirbuf->names->formatRef(i));
+			})
+		}
+		else if (pop && (rmask(rr) & x87Regs))
+		{
+			// pop the fpu result since it isn't used
+			FSTP(FST0);
+		}
+	}
+
+	void Assembler::asm_load64(LInsp ins)
+	{
+		LIns* base = ins->oprnd1();
+		int db = ins->oprnd2()->constval();
+		Reservation *resv = getresv(ins);
+		int dr = disp(resv);
+		Register rr = resv->reg;
+
+		if (rr != UnknownReg && rmask(rr) & XmmRegs)
+		{
+			freeRsrcOf(ins, false);
+			Register rb = findRegFor(base, GpRegs);
+			LDQ(rr, db, rb);
+		}
+		else
+		{
+			Register rb = findRegFor(base, GpRegs);
+			resv->reg = UnknownReg;
+
+			// don't use an fpu reg to simply load & store the value.
+			if (dr)
+				asm_mmq(FP, dr, rb, db);
+
+			freeRsrcOf(ins, false);
+
+			if (rr != UnknownReg)
+			{
+				NanoAssert(rmask(rr)&FpRegs);
+				_allocator.retire(rr);
+				FLDQ(db, rb);
+			}
+		}
+	}
+
+	void Assembler::asm_store64(LInsp value, int dr, LInsp base)
+	{
+		if (value->isconstq())
+		{
+			// if a constant 64-bit value just store it now rather than
+			// generating a pointless store/load/store sequence
+			Register rb = findRegFor(base, GpRegs);
+			const int32_t* p = (const int32_t*) (value-2);
+			STi(rb, dr+4, p[1]);
+			STi(rb, dr, p[0]);
+            return;
+		}
+
+        if (value->isop(LIR_ldq) || value->isop(LIR_qjoin))
+		{
+			// value is 64bit struct or int64_t, or maybe a double.
+			// it may be live in an FPU reg.  Either way, don't
+			// put it in an FPU reg just to load & store it.
+
+			// a) if we know it's not a double, this is right.
+			// b) if we guarded that its a double, this store could be on
+			// the side exit, copying a non-double.
+			// c) maybe its a double just being stored.  oh well.
+
+			if (sse2) {
+                Register rv = findRegFor(value, XmmRegs);
+                Register rb = findRegFor(base, GpRegs);
+                STQ(dr, rb, rv);
+				return;
+            }
+
+			int da = findMemFor(value);
+		    Register rb = findRegFor(base, GpRegs);
+		    asm_mmq(rb, dr, FP, da);
+            return;
+		}
+
+		Reservation* rA = getresv(value);
+		int pop = !rA || rA->reg==UnknownReg;
+		Register rv = findRegFor(value, FpRegs);
+		Register rb = findRegFor(base, GpRegs);
+
+		if (rmask(rv) & XmmRegs) {
+            STQ(dr, rb, rv);
+		} else {
+			FSTQ(pop, dr, rb);
+		}
+	}
+
+    /**
+     * copy 64 bits: (rd+dd) <- (rs+ds)
+     */
+    void Assembler::asm_mmq(Register rd, int dd, Register rs, int ds)
+    {
+        // value is either a 64bit struct or maybe a float
+        // that isn't live in an FPU reg.  Either way, don't
+        // put it in an FPU reg just to load & store it.
+        if (sse2)
+        {
+            // use SSE to load+store 64bits
+            Register t = registerAlloc(XmmRegs);
+            _allocator.addFree(t);
+            STQ(dd, rd, t);
+            LDQ(t, ds, rs);
+        }
+        else
+        {
+            // get a scratch reg
+            Register t = registerAlloc(GpRegs & ~(rmask(rd)|rmask(rs)));
+            _allocator.addFree(t);
+            ST(rd, dd+4, t);
+            LD(t, ds+4, rs);
+            ST(rd, dd, t);
+            LD(t, ds, rs);
+        }
+    }
+
+	void Assembler::asm_pusharg(LInsp p)
+	{
+		// arg goes on stack
+		Reservation* rA = getresv(p);
+		if (rA == 0)
+		{
+			if (p->isconst())
+			{
+				// small const we push directly
+				PUSHi(p->constval());
+			}
+			else
+			{
+				Register ra = findRegFor(p, GpRegs);
+				PUSHr(ra);
+			}
+		}
+		else if (rA->reg == UnknownReg)
+		{
+			PUSHm(disp(rA), FP);
+		}
+		else
+		{
+			PUSHr(rA->reg);
+		}
+	}
+	
+	NIns* Assembler::asm_adjustBranch(NIns* at, NIns* target)
+	{
+		NIns* save = _nIns;
+		NIns* was = (NIns*)( (intptr_t)*(int32_t*)(at+1)+(intptr_t)(at+5) );
+		_nIns = at +5; // +5 is size of JMP
+		intptr_t tt = (intptr_t)target - (intptr_t)_nIns;
+		IMM32(tt);
+		*(--_nIns) = JMPc;
+		_nIns = save;
+		return was;
+	}
+	
+	void Assembler::nativePageReset()	{}
+
+	void Assembler::nativePageSetup()
+	{
+		if (!_nIns)		 _nIns	   = pageAlloc();
+		if (!_nExitIns)  _nExitIns = pageAlloc(true);
+	}
+	#endif /* FEATURE_NANOJIT */
+}
new file mode 100644
--- /dev/null
+++ b/js/src/nanojit/RegAlloc.cpp
@@ -0,0 +1,182 @@
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is [Open Source Virtual Machine].
+ *
+ * The Initial Developer of the Original Code is
+ * Adobe System Incorporated.
+ * Portions created by the Initial Developer are Copyright (C) 2004-2007
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Contributor(s):
+ *   Adobe AS3 Team
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+#include "nanojit.h"
+
+namespace nanojit
+{	
+	#ifdef FEATURE_NANOJIT
+
+	/**
+	 * Generic register allocation routines.
+	 */
+	void RegAlloc::clear()
+	{
+		free = 0;
+		used = 0;
+		memset(active, 0, NJ_MAX_REGISTERS * sizeof(LIns*));
+	}
+
+	bool RegAlloc::isFree(Register r) 
+	{
+		NanoAssert(r != UnknownReg);
+		return (free & rmask(r)) != 0;
+	}
+		
+	void RegAlloc::addFree(Register r)
+	{
+		NanoAssert(!isFree(r));
+		free |= rmask(r);
+	}
+
+	void RegAlloc::removeFree(Register r)
+	{
+		NanoAssert(isFree(r));
+		free &= ~rmask(r);
+	}
+
+	void RegAlloc::addActive(Register r, LIns* v)
+	{
+		//addActiveCount++;
+		NanoAssert(v && r != UnknownReg && active[r] == NULL );
+		active[r] = v;
+	}
+
+	void RegAlloc::removeActive(Register r)
+	{
+		//registerReleaseCount++;
+		NanoAssert(r != UnknownReg);
+		NanoAssert(active[r] != NULL);
+
+		// remove the given register from the active list
+		active[r] = NULL;
+	}
+
+	LIns* RegAlloc::getActive(Register r)
+	{
+		NanoAssert(r != UnknownReg);
+		return active[r];
+	}
+
+	void RegAlloc::retire(Register r)
+	{
+		NanoAssert(r != UnknownReg);
+		NanoAssert(active[r] != NULL);
+		active[r] = NULL;
+		free |= rmask(r);
+	}
+
+	// scan table for instruction with longest span
+	LIns* Assembler::findVictim(RegAlloc &regs, RegisterMask allow, RegisterMask prefer)
+	{
+		NanoAssert(allow != 0 && (allow&prefer)==prefer);
+		LIns *i, *a=0, *p = 0;
+        int acost=10, pcost=10;
+		for (Register r=FirstReg; r <= LastReg; r = nextreg(r))
+		{
+            if ((allow & rmask(r)) && (i = regs.getActive(r)) != 0)
+            {
+                int cost = getresv(i)->cost;
+                if (!a || cost < acost || cost == acost && nbr(i) < nbr(a)) {
+                    a = i;
+                    acost = cost;
+                }
+                if (prefer & rmask(r)) {
+                    if (!p || cost < pcost || cost == pcost && nbr(i) < nbr(p)) {
+                        p = i;
+                        pcost = cost;
+                    }
+                }
+			}
+		}
+        return acost < pcost ? a : p;
+	}
+
+	#ifdef  NJ_VERBOSE
+	/* static */ void RegAlloc::formatRegisters(RegAlloc& regs, char* s, LirNameMap *names)
+	{
+		for(int i=0; i<NJ_MAX_REGISTERS; i++)
+		{
+			LIns* ins = regs.active[i];
+			Register r = (Register)i;
+			if (ins && regs.isFree(r))
+				{ NanoAssertMsg( 0, "Coding error; register is both free and active! " ); }
+			//if (!ins && !regs.isFree(r))
+			//	{ NanoAssertMsg( 0, "Coding error; register is not in the free list when it should be" ); }
+			if (!ins)
+				continue;				
+
+			s += strlen(s);
+			const char* rname = ins->isQuad() ? fpn(r) : gpn(r);
+			sprintf(s, " %s(%s)", rname, names->formatRef(ins));
+		}
+	}
+	#endif /* NJ_VERBOSE */
+
+	#ifdef _DEBUG
+
+	uint32_t RegAlloc::countFree()
+	{
+		int cnt = 0;
+		for(Register i=FirstReg; i <= LastReg; i = nextreg(i))
+			cnt += isFree(i) ? 1 : 0;
+		return cnt;
+	}
+
+	uint32_t RegAlloc::countActive()
+	{
+		int cnt = 0;
+		for(Register i=FirstReg; i <= LastReg; i = nextreg(i))
+			cnt += active[i] ? 1 : 0;
+		return cnt;
+	}
+
+	void RegAlloc::checkCount()
+	{
+		NanoAssert(count == (countActive() + countFree()));
+	}
+
+    bool RegAlloc::isConsistent(Register r, LIns* i)
+    {
+		NanoAssert(r != UnknownReg);
+        return (isFree(r)  && !getActive(r)     && !i) ||
+               (!isFree(r) &&  getActive(r)== i && i );
+    }
+
+	#endif /*DEBUG*/
+	#endif /* FEATURE_NANOJIT */
+}
new file mode 100644
--- /dev/null
+++ b/js/src/nanojit/avmplus.cpp
@@ -0,0 +1,41 @@
+/* ***** BEGIN LICENSE BLOCK ***** 
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1 
+ *
+ * The contents of this file are subject to the Mozilla Public License Version 1.1 (the 
+ * "License"); you may not use this file except in compliance with the License. You may obtain 
+ * a copy of the License at http://www.mozilla.org/MPL/ 
+ * 
+ * Software distributed under the License is distributed on an "AS IS" basis, WITHOUT 
+ * WARRANTY OF ANY KIND, either express or implied. See the License for the specific 
+ * language governing rights and limitations under the License. 
+ * 
+ * The Original Code is [Open Source Virtual Machine.] 
+ * 
+ * The Initial Developer of the Original Code is Adobe System Incorporated.  Portions created 
+ * by the Initial Developer are Copyright (C)[ 2004-2006 ] Adobe Systems Incorporated. All Rights 
+ * Reserved. 
+ * 
+ * Contributor(s): Adobe AS3 Team
+ *                 Andreas Gal <gal@uci.edu>
+ * 
+ * Alternatively, the contents of this file may be used under the terms of either the GNU 
+ * General Public License Version 2 or later (the "GPL"), or the GNU Lesser General Public 
+ * License Version 2.1 or later (the "LGPL"), in which case the provisions of the GPL or the 
+ * LGPL are applicable instead of those above. If you wish to allow use of your version of this 
+ * file only under the terms of either the GPL or the LGPL, and not to allow others to use your 
+ * version of this file under the terms of the MPL, indicate your decision by deleting provisions 
+ * above and replace them with the notice and other provisions required by the GPL or the 
+ * LGPL. If you do not delete the provisions above, a recipient may use your version of this file 
+ * under the terms of any one of the MPL, the GPL or the LGPL. 
+ * 
+ ***** END LICENSE BLOCK ***** */
+
+#include "avmplus.h"
+
+using namespace avmplus;
+
+AvmConfiguration AvmCore::config;
+static GC _gc;
+GC* AvmCore::gc = &_gc;
+GCHeap GC::heap;
+
--- a/js/src/nanojit/avmplus.h
+++ b/js/src/nanojit/avmplus.h
@@ -30,63 +30,151 @@
  * 
  ***** END LICENSE BLOCK ***** */
 
 #ifndef avm_h___
 #define avm_h___
 
 #include <assert.h>
 #include <string.h>
+#include <stdio.h>
 #include "jstypes.h"
 
 #ifdef _MSC_VER
 #define __msvc_only(x)  x
 #else
 #define __msvc_only(x)
 #endif
 
 #define FASTCALL
 
+//#ifdef DEBUG
+//#define _DEBUG
+//#define NJ_VERBOSE
+//#endif
+
 #define AvmAssert(x) assert(x)
+#define AvmAssertMsg(x, y) 
+#define AvmDebugLog(x) printf x
 
 typedef JSUint8 uint8_t;
 typedef JSUint16 uint16_t;
 typedef JSUint32 uint32_t;
 typedef JSUint64 uint64_t;
 
-class GC 
-{
-};
-
-class GCHeap
-{
-};
-
 class GCObject 
 {
 };
 
 class GCFinalizedObject
 {
 };
 
+class GCHeap
+{
+public:
+    uint32_t kNativePageSize;
+    
+    GCHeap() 
+    {
+        kNativePageSize = 4096; // @todo: what is this?
+    }
+    
+    inline void*
+    Alloc(uint32_t pages) 
+    {
+        void* p = malloc((pages + 1) * kNativePageSize);
+        p = (void*)(((int)(((char*)p) + kNativePageSize)) & (~0xfff));
+        return p;
+    }
+    
+    inline void
+    Free(void* p)
+    {
+        // @todo: don't know how to free
+    }
+    
+};
+
+class GC 
+{
+    static GCHeap heap;
+    
+public:
+    static inline void
+    Free(void* p)
+    {
+    }
+    
+    static inline GCHeap*
+    GetGCHeap()
+    {
+        return &heap;
+    }
+};
+
+inline void*
+operator new(size_t size, GC* gc)
+{
+    return (void*)new char[size];
+}
+
 #define DWB(x) x
 
+#define MMGC_MEM_TYPE(x)
+
+typedef int FunctionID;
+
 namespace avmplus
 {
     class InterpState
     {
+    public:
+        void* f;
+        const uint16_t* ip;
+        void* rp;
+        void* sp;
     };
 
+    class AvmConfiguration 
+    {
+    public:
+        AvmConfiguration() {
+            memset(this, 0, sizeof(AvmConfiguration));
+        }
+        
+        uint32_t tree_opt:1;
+    };
+    
     class AvmCore 
     {
+    public:
+        static AvmConfiguration config;
+        static GC* gc;
+        
+        static inline bool 
+        use_sse2() 
+        {
+            return true;
+        }
+
+        static inline GC*
+        GetGC() 
+        {
+            return gc;
+        }
     };
     
     class OSDep
     {
+    public:
+        static inline void
+        getDate()
+        {
+        }
     };
     
     /**
      * The List<T> template implements a simple List, which can
      * be templated to support different types.
      * 
      * Elements can be added to the end, modified in the middle, 
      * but no holes are allowed.  That is for set(n, v) to work
@@ -122,16 +210,27 @@ namespace avmplus
         }
 
         inline void destroy()
         {
             if (data)
                 delete data;
         }
         
+        // 'this' steals the guts of 'that' and 'that' gets reset.
+        void FASTCALL become(List& that)
+        {
+            this->destroy();
+                
+            this->data = that.data;
+            this->len = that.len;
+            
+            that.data = 0;
+            that.len = 0;
+        }
         uint32_t FASTCALL add(T value)
         {
             if (len >= capacity) {
                 grow();
             }
             wb(len++, value);
             return len-1;
         }
--- a/js/src/nanojit/vm_fops.h
+++ b/js/src/nanojit/vm_fops.h
@@ -30,8 +30,10 @@
  * use your version of this file under the terms of the MPL, indicate your
  * decision by deleting the provisions above and replace them with the notice
  * and other provisions required by the GPL or the LGPL. If you do not delete
  * the provisions above, a recipient may use your version of this file under
  * the terms of any one of the MPL, the GPL or the LGPL.
  *
  * ***** END LICENSE BLOCK ***** */
 
+INTERP_FOPCODE_LIST_BEGIN
+INTERP_FOPCODE_LIST_END