Added Native*.h from TT.
authorAndreas Gal <gal@uci.edu>
Wed, 18 Jun 2008 20:57:17 -0700
changeset 17271 98b15b1d79bdc5b2be5404e3faa8bc0ff49073ee
parent 17270 77195f4d8de82dd03e9271ca4b8664f255461e80
child 17272 822efff560ea9b6c84a035030547edb0c4e7c56b
push id1452
push usershaver@mozilla.com
push dateFri, 22 Aug 2008 00:08:22 +0000
treeherderautoland@d13bb0868596 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
milestone1.9.1a1pre
Added Native*.h from TT.
js/src/nanojit/Native.h
js/src/nanojit/NativeARM.h
js/src/nanojit/NativeThumb.h
js/src/nanojit/Nativei386.h
js/src/nanojit/avmplus.h
js/src/nanojit/nanojit.h
new file mode 100644
--- /dev/null
+++ b/js/src/nanojit/Native.h
@@ -0,0 +1,92 @@
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is [Open Source Virtual Machine].
+ *
+ * The Initial Developer of the Original Code is
+ * Adobe System Incorporated.
+ * Portions created by the Initial Developer are Copyright (C) 2004-2007
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Contributor(s):
+ *   Adobe AS3 Team
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+
+#ifndef __nanojit_Native__
+#define __nanojit_Native__
+
+
+#ifdef NANOJIT_IA32
+#include "Nativei386.h"
+#elif defined(NANOJIT_ARM)
+#ifdef THUMB
+#include "NativeThumb.h"
+#else
+#include "NativeArm.h"
+#endif
+#elif defined(NANOJIT_PPC)
+#include "NativePpc.h"
+#else
+#error "unknown nanojit architecture"
+#endif
+
+	#ifdef NJ_STACK_GROWTH_UP
+		#define stack_direction(n)   n
+	#else
+		#define stack_direction(n)  -n
+	#endif
+	
+	#define isSPorFP(r)		( (r)==SP || (r)==FP )
+
+	#ifdef NJ_VERBOSE
+		#define PRFX					counter_increment(native);\
+			if (verbose_enabled()) {\
+				outline[0]='\0';\
+				sprintf(outline, "                   ");\
+				sprintf(&outline[19]
+		#define PSFX					Assembler::outputAlign(outline, 45);\
+			RegAlloc::formatRegisters(_allocator, outline, _thisfrag->lirbuf->names);\
+			Assembler::output_asm(outline); }
+		//#define PRFX					fprintf(stdout
+		//#define PSFX					fprintf(stdout,"\n")
+		#define asm_output(s)			PRFX,s); PSFX
+		#define asm_output1(s,x)		PRFX,s,x); PSFX
+		#define asm_output2(s,x,y)		PRFX,s,x,y); PSFX
+		#define asm_output3(s,x,y,z)	PRFX,s,x,y,z); PSFX
+		#define gpn(r)					regNames[(r)] 
+		#define fpn(r)					regNames[(r)] 
+	#else
+		#define PRFX			
+		#define asm_output(s)
+		#define asm_output1(s,x)	
+		#define asm_output2(s,x,y)	
+		#define asm_output3(s,x,y,z)	
+		#define gpn(r)		
+	#endif /* NJ_VERBOSE */
+
+
+#endif // __nanojit_Native__
new file mode 100644
--- /dev/null
+++ b/js/src/nanojit/NativeARM.h
@@ -0,0 +1,783 @@
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is [Open Source Virtual Machine].
+ *
+ * The Initial Developer of the Original Code is
+ * Adobe System Incorporated.
+ * Portions created by the Initial Developer are Copyright (C) 2004-2007
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Contributor(s):
+ *   Adobe AS3 Team
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+
+#ifndef __nanojit_NativeArm__
+#define __nanojit_NativeArm__
+
+
+namespace nanojit
+{
+	const int NJ_LOG2_PAGE_SIZE	= 12;		// 4K
+	const int NJ_LOG2_CACHE_SIZE = 21;		// 128K
+	const int NJ_LOG2_PAGES = NJ_LOG2_CACHE_SIZE - NJ_LOG2_PAGE_SIZE;
+	const int NJ_PAGES = 1 << NJ_LOG2_PAGES;
+	const int NJ_PAGE_SIZE = 1 << NJ_LOG2_PAGE_SIZE;
+	#define NJ_MAX_REGISTERS				11
+	#define NJ_MAX_STACK_ENTRY				256
+	#define NJ_MAX_PARAMETERS				16
+	#define NJ_ALIGN_STACK					8
+	#define NJ_STACK_OFFSET					8
+
+	#define NJ_SOFTFLOAT
+	#define NJ_STACK_GROWTH_UP
+
+	#define NJ_CONSTANT_POOLS
+	const int NJ_MAX_CPOOL_OFFSET = 4096;
+	const int NJ_CPOOL_SIZE = 16;
+
+	// WARNING: setting this allows the NJ to growth memory as needed without bounds
+	const bool NJ_UNLIMITED_GROWTH	= true;
+
+	typedef int NIns;
+
+	/* ARM registers */
+	typedef enum 
+	{
+		R0  = 0,
+		R1  = 1,
+		R2  = 2,
+		R3  = 3,
+		R4  = 4,
+		R5  = 5,
+		R6  = 6,
+		R7  = 7,
+		R8  = 8,
+		R9  = 9,
+		R10 = 10,
+		//FP  =11,
+		IP  = 12,
+		SP  = 13,
+		LR  = 14,
+		PC  = 15,
+
+		FP = 13,
+		
+		// Pseudo-register for floating point
+		F0  = 0,
+
+		// helpers
+		FRAME_PTR = 11,
+		ESP	= 13,
+		
+		FirstReg = 0,
+		LastReg = 10,
+		Scratch	= 12,
+		UnknownReg = 11
+	}
+	Register;
+
+	/* ARM condition codes */
+	typedef enum
+	{
+		EQ = 0x0, // Equal
+		NE = 0x1, // Not Equal
+		CS = 0x2, // Carry Set (or HS)
+		CC = 0x3, // Carry Clear (or LO)
+		MI = 0x4, // MInus
+		PL = 0x5, // PLus
+		VS = 0x6, // oVerflow Set
+		VC = 0x7, // oVerflow Clear
+		HI = 0x8, // HIgher
+		LS = 0x9, // Lower or Same
+		GE = 0xA, // Greater or Equal
+		LT = 0xB, // Less Than
+		GT = 0xC, // Greater Than
+		LE = 0xD, // Less or Equal
+		AL = 0xE, // ALways
+		NV = 0xF  // NeVer
+	}
+	ConditionCode;
+
+
+	typedef int RegisterMask;
+	typedef struct _FragInfo
+	{
+		RegisterMask	needRestoring;
+		NIns*			epilogue;
+	} 
+	FragInfo;
+
+	static const RegisterMask SavedRegs = 1<<R4 | 1<<R5 | 1<<R6 | 1<<R7 | 1<<R8 | 1<<R9 | 1<<R10;
+	static const RegisterMask FpRegs = 0x0000; // FST0-FST7
+	static const RegisterMask GpRegs = 0x07FF;
+	static const RegisterMask AllowableFlagRegs = 1<<R0 | 1<<R1 | 1<<R2 | 1<<R3 | 1<<R4 | 1<<R5 | 1<<R6 | 1<<R7 | 1<<R8 | 1<<R9 | 1<<R10;
+
+	#define firstreg()		R0
+	#define nextreg(r)		(Register)((int)r+1)
+	#define imm2register(c) (Register)(c-1)
+
+	verbose_only( extern const char* regNames[]; )
+
+	// abstract to platform specific calls
+	#define nExtractPlatformFlags(x)	0
+
+	#define DECLARE_PLATFORM_STATS() \
+		counter_define(x87Top);
+
+	#define DECLARE_PLATFORM_REGALLOC()
+
+
+	#define DECLARE_PLATFORM_ASSEMBLER()\
+		const static Register argRegs[4], retRegs[2];\
+		void LD32_nochk(Register r, int32_t imm);\
+		void CALL(intptr_t addr, const char* nm);\
+		void underrunProtect(int bytes);\
+		bool has_cmov;\
+		void nativePageReset();\
+		void nativePageSetup();\
+		int* _nSlot;\
+		int* _nExitSlot;
+
+	//printf("jmp_l_n count=%d, nins=%X, %X = %X\n", (_c), nins, _nIns, ((intptr_t)(nins+(_c))-(intptr_t)_nIns - 4) );
+
+	#define swapptrs()  { NIns* _tins = _nIns; _nIns=_nExitIns; _nExitIns=_tins; \
+								int* _nslot = _nSlot;\
+								_nSlot = _nExitSlot;\
+								_nExitSlot = _nslot;}
+
+
+#define IMM32(imm)	*(--_nIns) = (NIns)((imm));
+
+#define FUNCADDR(addr) ( ((int)addr) )	
+
+
+#define OP_IMM	(1<<25)
+
+#define COND_AL	(0xE<<28)
+
+typedef enum
+{
+	LSL_imm = 0, // LSL #c - Logical Shift Left
+	LSL_reg = 1, // LSL Rc - Logical Shift Left
+	LSR_imm = 2, // LSR #c - Logical Shift Right
+	LSR_reg = 3, // LSR Rc - Logical Shift Right
+	ASR_imm = 4, // ASR #c - Arithmetic Shift Right
+	ASR_reg = 5, // ASR Rc - Arithmetic Shift Right
+	ROR_imm = 6, // Rotate Right (c != 0)
+	RRX     = 6, // Rotate Right one bit with extend (c == 0)
+	ROR_reg = 7  // Rotate Right
+}
+ShiftOperator;
+
+#define LD32_size 4
+
+
+#define BEGIN_NATIVE_CODE(x) \
+	{ DWORD* _nIns = (uint8_t*)x
+
+#define END_NATIVE_CODE(x) \
+	(x) = (dictwordp*)_nIns; }
+
+// BX 
+#define BX(_r)	do {\
+	underrunProtect(4);\
+	*(--_nIns) = (NIns)( COND_AL | (0x12<<20) | (0xFFF<<8) | (1<<4) | (_r));\
+	asm_output("bx LR"); } while(0)
+
+// _l = _r OR _l
+#define OR(_l,_r)		do {\
+	underrunProtect(4);\
+	*(--_nIns) = (NIns)( COND_AL | (0xC<<21) | (_r<<16) | (_l<<12) | (_l) );\
+	asm_output2("or %s,%s",gpn(_l),gpn(_r)); } while(0)
+
+// _r = _r OR _imm
+#define ORi(_r,_imm)	do {\
+	NanoAssert(isU8((_imm)));\
+	underrunProtect(4); \
+	*(--_nIns) = (NIns)( COND_AL | OP_IMM | (0xC<<21) | (_r<<16) | (_r<<12) | ((_imm)&0xFF) );\
+	asm_output2("or %s,%d",gpn(_r), (_imm)); } while(0)
+
+// _l = _r AND _l
+#define AND(_l,_r) do {\
+	underrunProtect(4);\
+	*(--_nIns) = (NIns)( COND_AL | ((_r)<<16) | ((_l)<<12) | (_l));\
+	asm_output2("and %s,%s",gpn(_l),gpn(_r)); } while(0)
+
+// _r = _r AND _imm
+#define ANDi(_r,_imm) do {\
+	if (isU8((_imm))) {\
+		underrunProtect(4);\
+		*(--_nIns) = (NIns)( COND_AL | OP_IMM | ((_r)<<16) | ((_r)<<12) | ((_imm)&0xFF) );\
+		asm_output2("and %s,%d",gpn(_r),(_imm));}\
+	else if ((_imm)<0 && (_imm)>-256) {\
+		*(--_nIns) = (NIns)( COND_AL | ((_r)<<16) | ((_r)<<12) | (Scratch) );\
+		asm_output2("and %s,%s",gpn(_r),gpn(Scratch));\
+		*(--_nIns) = (NIns)( COND_AL | (0x3E<<20) | ((Scratch)<<12) | (((_imm)^0xFFFFFFFF)&0xFF) );\
+		asm_output2("mvn %s,%d",gpn(Scratch),(_imm));}\
+	else NanoAssert(0);\
+	} while (0)
+
+
+// _l = _l XOR _r
+#define XOR(_l,_r)	do {\
+	underrunProtect(4);\
+	*(--_nIns) = (NIns)( COND_AL | (1<<21) | ((_r)<<16) | ((_l)<<12) | (_l));\
+	asm_output2("eor %s,%s",gpn(_l),gpn(_r)); } while(0)
+
+// _r = _r XOR _imm
+#define XORi(_r,_imm)	do {	\
+	NanoAssert(isU8((_imm)));\
+	underrunProtect(4);		\
+	*(--_nIns) = (NIns)( COND_AL | OP_IMM | (1<<21) | ((_r)<<16) | ((_r)<<12) | ((_imm)&0xFF) );\
+	asm_output2("eor %s,%d",gpn(_r),(_imm)); } while(0)
+
+// _l = _l + _r
+#define ADD(_l,_r) do {\
+	underrunProtect(4);\
+	*(--_nIns) = (NIns)( COND_AL | (1<<23) | ((_r)<<16) | ((_l)<<12) | (_l));\
+	asm_output2("add %s,%s",gpn(_l),gpn(_r)); } while(0)
+
+// _r = _r + _imm
+#define ADDi(_r,_imm)	do {\
+	if ((_imm)>-256 && (_imm)<256) {\
+		underrunProtect(4);\
+		if	((_imm)>=0) *(--_nIns) = (NIns)( COND_AL | OP_IMM | (1<<23) | ((_r)<<16) | ((_r)<<12) | ((_imm)&0xFF) );\
+		else			*(--_nIns) = (NIns)( COND_AL | OP_IMM | (1<<22) | ((_r)<<16) | ((_r)<<12) | ((-(_imm))&0xFF) );}\
+	else {\
+		if ((_imm)>=0){\
+			if ((_imm)<=1020 && (((_imm)&3)==0) ){\
+				underrunProtect(4);\
+				*(--_nIns) = (NIns)( COND_AL | OP_IMM | (1<<23) | ((_r)<<16) | ((_r)<<12) | (15<<8)| ((_imm)>>2) );}\
+			else {\
+				underrunProtect(4+LD32_size);\
+				*(--_nIns) = (NIns)( COND_AL | (1<<23) | ((_r)<<16) | ((_r)<<12) | (Scratch));\
+				LD32_nochk(Scratch, _imm);}}\
+		else{\
+      if ((_imm)>=-510){\
+			  underrunProtect(8);\
+			  int rem = -(_imm) - 255;\
+			  *(--_nIns) = (NIns)( COND_AL | OP_IMM | (1<<22) | ((_r)<<16) | ((_r)<<12) | ((rem)&0xFF) );\
+        *(--_nIns) = (NIns)( COND_AL | OP_IMM | (1<<22) | ((_r)<<16) | ((_r)<<12) | (0xFF) );}\
+      else {\
+				underrunProtect(4+LD32_size);\
+				*(--_nIns) = (NIns)( COND_AL | (1<<22) | ((_r)<<16) | ((_r)<<12) | (Scratch));\
+        LD32_nochk(Scratch, -(_imm));}\
+    }\
+  }\
+	asm_output2("addi %s,%d",gpn(_r),(_imm));} while(0)
+
+// _l = _l - _r
+#define SUB(_l,_r)	do {\
+	underrunProtect(4); \
+	*(--_nIns) = (NIns)( COND_AL | (1<<22) | ((_l)<<16) | ((_l)<<12) | (_r));\
+	asm_output2("sub %s,%s",gpn(_l),gpn(_r)); } while(0)
+
+// _r = _r - _imm
+#define SUBi(_r,_imm)	do{\
+	if ((_imm)>-256 && (_imm)<256){\
+		underrunProtect(4);\
+		if ((_imm)>=0)	*(--_nIns) = (NIns)( COND_AL | OP_IMM | (1<<22) | ((_r)<<16) | ((_r)<<12) | ((_imm)&0xFF) );\
+		else			*(--_nIns) = (NIns)( COND_AL | OP_IMM | (1<<23) | ((_r)<<16) | ((_r)<<12) | ((-(_imm))&0xFF) );}\
+	else {\
+		if ((_imm)>=0){\
+			if ((_imm)<=510){\
+				underrunProtect(8);\
+				int rem = (_imm) - 255;\
+				NanoAssert(rem<256);\
+				*(--_nIns) = (NIns)( COND_AL | OP_IMM | (1<<22) | ((_r)<<16) | ((_r)<<12) | (rem&0xFF) );\
+				*(--_nIns) = (NIns)( COND_AL | OP_IMM | (1<<22) | ((_r)<<16) | ((_r)<<12) | (0xFF) );}\
+			else {\
+				underrunProtect(4+LD32_size);\
+				*(--_nIns) = (NIns)( COND_AL | (1<<22) | ((_r)<<16) | ((_r)<<12) | (Scratch));\
+				LD32_nochk(Scratch, _imm);}}\
+		else{\
+      if ((_imm)>=-510) {\
+			  underrunProtect(8);\
+			  int rem = -(_imm) - 255;\
+			  *(--_nIns) = (NIns)( COND_AL | OP_IMM | (1<<23) | ((_r)<<16) | ((_r)<<12) | ((rem)&0xFF) );\
+			  *(--_nIns) = (NIns)( COND_AL | OP_IMM | (1<<23) | ((_r)<<16) | ((_r)<<12) | (0xFF) );}\
+      else {\
+				underrunProtect(4+LD32_size);\
+				*(--_nIns) = (NIns)( COND_AL | (1<<23) | ((_r)<<16) | ((_r)<<12) | (Scratch));\
+				LD32_nochk(Scratch, -(_imm));}\
+    }\
+  }\
+	asm_output2("sub %s,%d",gpn(_r),(_imm));} while (0)
+
+// _l = _l * _r
+#define MUL(_l,_r)	do {\
+	underrunProtect(4);\
+	*(--_nIns) = (NIns)( COND_AL | (_l)<<16 | (_l)<<8 | 0x90 | (_r) );\
+	asm_output2("mul %s,%s",gpn(_l),gpn(_r)); } while(0)
+
+
+// RSBS
+// _r = -_r
+#define NEG(_r)	do {\
+	underrunProtect(4);	\
+	*(--_nIns) = (NIns)( COND_AL |  (0x27<<20) | ((_r)<<16) | ((_r)<<12) ); \
+	asm_output1("neg %s",gpn(_r)); } while(0)
+
+// MVNS
+// _r = !_r
+#define NOT(_r)	do {\
+	underrunProtect(4);	\
+	*(--_nIns) = (NIns)( COND_AL |  (0x1F<<20) | ((_r)<<12) |  (_r) ); \
+	asm_output1("mvn %s",gpn(_r)); } while(0)
+
+// MOVS _r, _r, LSR <_s>
+// _r = _r >> _s
+#define SHR(_r,_s) do {\
+	underrunProtect(4); \
+	*(--_nIns) = (NIns)( COND_AL | (0x1B<<20) | ((_r)<<12) | ((_s)<<8) | (LSR_reg<<4) | (_r) ); \
+	asm_output2("shr %s,%s",gpn(_r),gpn(_s)); } while(0)
+
+// MOVS _r, _r, LSR #_imm
+// _r = _r >> _imm
+#define SHRi(_r,_imm) do {\
+	underrunProtect(4); \
+	*(--_nIns) = (NIns)( COND_AL | (0x1B<<20) | ((_r)<<12) | ((_imm)<<7) | (LSR_imm<<4) | (_r) ); \
+	asm_output2("shr %s,%d",gpn(_r),_imm); } while(0)
+
+// MOVS _r, _r, ASR <_s>
+// _r = _r >> _s
+#define SAR(_r,_s) do {\
+	underrunProtect(4); \
+	*(--_nIns) = (NIns)( COND_AL | (0x1B<<20) | ((_r)<<12) | ((_s)<<8) | (ASR_reg<<4) | (_r) ); \
+	asm_output2("asr %s,%s",gpn(_r),gpn(_s)); } while(0)
+
+
+// MOVS _r, _r, ASR #_imm
+// _r = _r >> _imm
+#define SARi(_r,_imm) do {\
+	underrunProtect(4); \
+	*(--_nIns) = (NIns)( COND_AL | (0x1B<<20) | ((_r)<<12) | ((_imm)<<7) | (ASR_imm<<4) | (_r) ); \
+	asm_output2("asr %s,%d",gpn(_r),_imm); } while(0)
+
+// MOVS _r, _r, LSL <_s>
+// _r = _r << _s
+#define SHL(_r,_s) do {\
+	underrunProtect(4);\
+	*(--_nIns) = (NIns)( COND_AL | (0x1B<<20) | ((_r)<<12) | ((_s)<<8) | (LSL_reg<<4) | (_r) ); \
+	asm_output2("lsl %s,%s",gpn(_r),gpn(_s)); } while(0)
+
+// MOVS _r, _r, LSL #_imm
+// _r = _r << _imm
+#define SHLi(_r,_imm) do {\
+	underrunProtect(4); \
+	*(--_nIns) = (NIns)( COND_AL | (0x1B<<20) | ((_r)<<12) | ((_imm)<<7) | (LSL_imm<<4) | (_r) ); \
+	asm_output2("lsl %s,%d",gpn(_r),(_imm)); } while(0)
+					
+// TST
+#define TEST(_d,_s) do{\
+	underrunProtect(4); \
+	*(--_nIns) = (NIns)( COND_AL | (0x11<<20) | ((_d)<<16) | (_s) ); \
+	asm_output2("test %s,%s",gpn(_d),gpn(_s));} while(0)
+
+// CMP
+#define CMP(_l,_r)	do{\
+	underrunProtect(4); \
+	*(--_nIns) = (NIns)( COND_AL | (0x015<<20) | ((_l)<<16) | (_r) ); \
+	asm_output2("cmp %s,%s",gpn(_l),gpn(_r));} while(0)
+
+// CMP (or CMN)
+#define CMPi(_r,_imm)	do{\
+	if (_imm<0) {	\
+		if ((_imm)>-256) {\
+			underrunProtect(4);\
+			*(--_nIns) = (NIns)( COND_AL | (0x37<<20) | ((_r)<<16) | (-(_imm)) );}\
+		else {\
+			underrunProtect(4+LD32_size);\
+			*(--_nIns) = (NIns)( COND_AL | (0x17<<20) | ((_r)<<16) | (Scratch) ); \
+			LD32_nochk(Scratch, (_imm));}\
+	} else {\
+		if ((_imm)<256){\
+			underrunProtect(4);\
+			*(--_nIns) = (NIns)( COND_AL | (0x035<<20) | ((_r)<<16) | ((_imm)&0xFF) ); \
+		} else {\
+			underrunProtect(4+LD32_size);\
+			*(--_nIns) = (NIns)( COND_AL | (0x015<<20) | ((_r)<<16) | (Scratch) ); \
+			LD32_nochk(Scratch, (_imm));\
+		}\
+	}\
+	asm_output2("cmp %s,%X",gpn(_r),(_imm)); } while(0)
+
+// MOV
+#define MR(_d,_s)	do {\
+	underrunProtect(4);\
+	*(--_nIns) = (NIns)( COND_AL | (0xD<<21) | ((_d)<<12) | (_s) );\
+	asm_output2("mov %s,%s",gpn(_d),gpn(_s)); } while (0)
+
+
+#define MR_cond(_d,_s,_cond,_nm)	do {\
+	underrunProtect(4);\
+	*(--_nIns) = (NIns)( ((_cond)<<28) | (0xD<<21) | ((_d)<<12) | (_s) );\
+	asm_output2(_nm " %s,%s",gpn(_d),gpn(_s)); } while (0)
+
+#define MREQ(dr,sr)	MR_cond(dr, sr, EQ, "moveq")
+#define MRNE(dr,sr)	MR_cond(dr, sr, NE, "movne")
+#define MRL(dr,sr)	MR_cond(dr, sr, LT, "movlt")
+#define MRLE(dr,sr)	MR_cond(dr, sr, LE, "movle")
+#define MRG(dr,sr)	MR_cond(dr, sr, GT, "movgt")
+#define MRGE(dr,sr)	MR_cond(dr, sr, GE, "movge")
+#define MRB(dr,sr)	MR_cond(dr, sr, CC, "movcc")
+#define MRBE(dr,sr)	MR_cond(dr, sr, LS, "movls")
+#define MRA(dr,sr)	MR_cond(dr, sr, HI, "movcs")
+#define MRAE(dr,sr)	MR_cond(dr, sr, CS, "movhi")
+
+#define LD(_d,_off,_b) do{\
+	if ((_off)<0){\
+	  underrunProtect(4);\
+    NanoAssert((_off)>-4096);\
+		*(--_nIns) = (NIns)( COND_AL | (0x51<<20) | ((_b)<<16) | ((_d)<<12) | ((-(_off))&0xFFF) );\
+	} else {\
+    if (isS16(_off) || isU16(_off)) {\
+	    underrunProtect(4);\
+      NanoAssert((_off)<4096);\
+      *(--_nIns) = (NIns)( COND_AL | (0x59<<20) | ((_b)<<16) | ((_d)<<12) | ((_off)&0xFFF) );}\
+    else {\
+  	  underrunProtect(4+LD32_size);\
+      *(--_nIns) = (NIns)( COND_AL | (0x79<<20) | ((_b)<<16) | ((_d)<<12) | Scratch );\
+      LD32_nochk(Scratch, _off);}\
+	}  asm_output3("ld %s,%d(%s)",gpn((_d)),(_off),gpn((_b))); }while(0)
+
+
+#define LDi(_d,_imm) do {\
+	if (isS8((_imm)) || isU8((_imm))) {	\
+		underrunProtect(4);	\
+		if ((_imm)<0)	*(--_nIns) = (NIns)( COND_AL | (0x3E<<20) | ((_d)<<12) | (((_imm)^0xFFFFFFFF)&0xFF) );\
+		else			*(--_nIns) = (NIns)( COND_AL | (0x3B<<20) | ((_d)<<12) | ((_imm)&0xFF) );\
+	} else {\
+		underrunProtect(LD32_size);\
+		LD32_nochk(_d, (_imm));\
+	} asm_output2("ld %s,%d",gpn((_d)),(_imm)); } while(0)
+
+
+// load 8-bit, zero extend (aka LDRB)
+// note, only 5-bit offsets (!) are supported for this, but that's all we need at the moment
+// (LDRB actually allows 12-bit offset in ARM mode but constraining to 5-bit gives us advantage for Thumb)
+// @todo, untested!
+#define LD8Z(_d,_off,_b) do{    \
+    NanoAssert((d)>=0&&(d)<=31);\
+    underrunProtect(4);\
+    *(--_nIns) = (NIns)( COND_AL | (0x5D<<20) | ((_b)<<16) | ((_d)<<12) |  ((_off)&0xfff)  );\
+    asm_output3("ldrb %s,%d(%s)", gpn(_d),(_off),gpn(_b));\
+    } while(0)
+
+#define ST(_b,_off,_r) do{\
+	underrunProtect(4);	\
+	if ((_off)<0)	*(--_nIns) = (NIns)( COND_AL | (0x50<<20) | ((_b)<<16) | ((_r)<<12) | ((-(_off))&0xFFF) );\
+	else			*(--_nIns) = (NIns)( COND_AL | (0x58<<20) | ((_b)<<16) | ((_r)<<12) | ((_off)&0xFFF) );\
+	asm_output3("str %s, %d(%s)",gpn(_r), (_off),gpn(_b)); } while(0)
+
+
+#define STi(_b,_off,_imm) do{\
+	NanoAssert((_off)>0);\
+	if (isS8((_imm)) || isU8((_imm))) {	\
+		underrunProtect(8);	\
+	  *(--_nIns) = (NIns)( COND_AL | (0x58<<20) | ((_b)<<16) | ((Scratch)<<12) | ((_off)&0xFFF) );\
+	  asm_output3("str %s, %d(%s)",gpn(Scratch), (_off),gpn(_b));			\
+		if ((_imm)<0)	*(--_nIns) = (NIns)( COND_AL | (0x3E<<20) | (Scratch<<12) | (((_imm)^0xFFFFFFFF)&0xFF) );\
+		else			*(--_nIns) = (NIns)( COND_AL | (0x3B<<20) | (Scratch<<12) | ((_imm)&0xFF) );\
+    asm_output2("ld %s,%d",gpn((Scratch)),(_imm));	}\
+  else {\
+		underrunProtect(4+LD32_size);\
+	  *(--_nIns) = (NIns)( COND_AL | (0x58<<20) | ((_b)<<16) | ((Scratch)<<12) | ((_off)&0xFFF) );\
+	  asm_output3("str %s, %d(%s)",gpn(Scratch), (_off),gpn(_b));			\
+    LD32_nochk(Scratch, (_imm));}\
+ } while(0);
+
+
+#define LEA(_r,_d,_b) do{						\
+	NanoAssert((_d)<=1020);						\
+	NanoAssert(((_d)&3)==0);						\
+	if (_b!=SP) NanoAssert(0);					\
+	if ((_d)<256) {								\
+		underrunProtect(4);							\
+		*(--_nIns) = (NIns)( COND_AL | (0x28<<20) | ((_b)<<16) | ((_r)<<12) | ((_d)&0xFF) );}\
+	else{										\
+		underrunProtect(8);							\
+		*(--_nIns) = (NIns)( COND_AL | (0x4<<21) | ((_b)<<16) | ((_r)<<12) | (2<<7)| (_r) );\
+		*(--_nIns) = (NIns)( COND_AL | (0x3B<<20) | ((_r)<<12) | (((_d)>>2)&0xFF) );}\
+	asm_output2("lea %s, %d(SP)", gpn(_r), _d);	\
+	} while(0)
+
+
+//#define RET()   underrunProtect(1); *(--_nIns) = 0xc3;	asm_output("ret")
+//#define NOP() 	underrunProtect(1); *(--_nIns) = 0x90;	asm_output("nop")
+//#define INT3()  underrunProtect(1); *(--_nIns) = 0xcc;  asm_output("int3")
+//#define RET() INT3()
+
+
+// this is pushing a reg
+#define PUSHr(_r)  do {\
+	underrunProtect(4);\
+	*(--_nIns) = (NIns)( COND_AL | (0x92<<20) | (SP<<16) | (1<<(_r)) );	\
+	asm_output1("push %s",gpn(_r)); } while (0)
+
+// STMDB
+#define PUSH_mask(_mask)  do {\
+	underrunProtect(4);			\
+	*(--_nIns) = (NIns)( COND_AL | (0x92<<20) | (SP<<16) | (_mask) );	\
+	asm_output1("push %x", (_mask));} while (0)
+
+// this form of PUSH takes a base + offset
+// we need to load into scratch reg, then push onto stack
+#define PUSHm(_off,_b)	do {\
+	NanoAssert( (int)(_off)>0 );\
+	underrunProtect(8);\
+	*(--_nIns) = (NIns)( COND_AL | (0x92<<20) | (SP<<16) | (1<<(Scratch)) );	\
+	*(--_nIns) = (NIns)( COND_AL | (0x59<<20) | ((_b)<<16) | ((Scratch)<<12) | ((_off)&0xFFF) );\
+	asm_output2("push %d(%s)",(_off),gpn(_b)); } while (0)
+
+#define POP(_r) do {\
+	underrunProtect(4);			\
+	*(--_nIns) = (NIns)( COND_AL | (0x8B<<20) | (SP<<16) | (1<<(_r)) );\
+	asm_output1("pop %s",gpn(_r));} while (0)
+
+#define POP_mask(_mask) do {\
+	underrunProtect(4);			\
+	*(--_nIns) = (NIns)( COND_AL | (0x8B<<20) | (SP<<16) | (_mask) );\
+	asm_output1("pop %x", (_mask));} while (0)
+
+// takes an offset (right?)
+#define JMP_long_nochk_offset(_off) do {\
+	*(--_nIns) = (NIns)( COND_AL | (0xA<<24) | (((_off)>>2) & 0xFFFFFF) );	\
+	asm_output1("jmp_l_n 0x%08x",(_off));} while (0)
+
+// take an address, not an offset
+#define JMP(t)	do {\
+	underrunProtect(4);\
+	intptr_t tt = (intptr_t)(t) - ((intptr_t)_nIns + 4);\
+	*(--_nIns) = (NIns)( COND_AL | (0xA<<24) | (((tt)>>2) & 0xFFFFFF) );	\
+	asm_output1("JMP 0x%08x\n",(t)); } while (0)
+
+#define JMP_nochk(t)	do {\
+	intptr_t tt = (intptr_t)(t) - ((intptr_t)_nIns + 4);\
+	*(--_nIns) = (NIns)( COND_AL | (0xA<<24) | (((tt)>>2) & 0xFFFFFF) );	\
+	asm_output1("JMP 0x%08x\n",(t)); } while (0)
+
+#define JMP_long_placeholder()	do {JMP_long(0xffffffff); } while(0)
+
+#define JMP_long(_t)	do {\
+	underrunProtect(4);\
+	*(--_nIns) = (NIns)( COND_AL | (0xA<<24) | (((_t)>>2) & 0xFFFFFF) );	\
+	asm_output1("JMP_long 0x%08x\n", (_t) ); } while (0)
+
+#define BL(_t)	do {\
+	underrunProtect(4);\
+	intptr_t _tt = (intptr_t)(_t) - ((intptr_t)_nIns + 4);\
+	*(--_nIns) = (NIns)( COND_AL | (0xB<<24) | (((_tt)>>2) & 0xFFFFFF) );	\
+	asm_output2("BL 0x%08x offset=%d",(intptr_t)(_nIns) + (_tt),(_tt)) } while (0)
+
+
+#define JMP_long_nochk(_t)	do {\
+	intptr_t tt = (intptr_t)(_t) - ((intptr_t)_nIns + 4);\
+	*(--_nIns) = (NIns)( COND_AL | (0xA<<24) | (((tt)>>2) & 0xFFFFFF) );	\
+	asm_output1("JMP_l_n 0x%08x\n", (_t)) } while (0)
+
+
+#define B_cond(_c,_t)\
+	underrunProtect(4);\
+	intptr_t tt = (intptr_t)(_t) - ((intptr_t)_nIns + 4);\
+	*(--_nIns) = (NIns)( ((_c)<<28) | (0xA<<24) | ((tt >>2)& 0xFFFFFF) );	\
+	asm_output2("b(cond) 0x%08x (%tX)",(_t), tt);
+
+
+//#define B(c,t) \
+//	intptr_t tt = (intptr_t)t - (intptr_t)_nIns;	\
+//	NanoAssert( ((int)tt < 256) && ((int)tt>-255) );						\
+//	underrunProtect(2);							\
+//	*(--_nIns) = (NIns)(0xD000 | (c<<8) | (tt>>1)&0xFF );
+
+
+#define JA(t)	do {B_cond(HI,t); asm_output1("ja 0x%08x",t); } while(0)
+#define JNA(t)	do {B_cond(LS,t); asm_output1("jna 0x%08x",t); } while(0)
+#define JB(t)	do {B_cond(CC,t); asm_output1("jb 0x%08x",t); } while(0)
+#define JNB(t)	do {B_cond(CS,t); asm_output1("jnb 0x%08x",t); } while(0)
+#define JE(t)	do {B_cond(EQ,t); asm_output1("je 0x%08x",t); } while(0)
+#define JNE(t)	do {B_cond(NE,t); asm_output1("jne 0x%08x",t); } while(0)						
+#define JBE(t)	do {B_cond(LS,t); asm_output1("jbe 0x%08x",t); } while(0)
+#define JNBE(t) do {B_cond(HI,t); asm_output1("jnbe 0x%08x",t); } while(0)
+#define JAE(t)	do {B_cond(CS,t); asm_output1("jae 0x%08x",t); } while(0)
+#define JNAE(t) do {B_cond(CC,t); asm_output1("jnae 0x%08x",t); } while(0)
+#define JL(t)	do {B_cond(LT,t); asm_output1("jl 0x%08x",t); } while(0)	
+#define JNL(t)	do {B_cond(GE,t); asm_output1("jnl 0x%08x",t); } while(0)
+#define JLE(t)	do {B_cond(LE,t); asm_output1("jle 0x%08x",t); } while(0)
+#define JNLE(t)	do {B_cond(GT,t); asm_output1("jnle 0x%08x",t); } while(0)
+#define JGE(t)	do {B_cond(GE,t); asm_output1("jge 0x%08x",t); } while(0)
+#define JNGE(t)	do {B_cond(LT,t); asm_output1("jnge 0x%08x",t); } while(0)
+#define JG(t)	do {B_cond(GT,t); asm_output1("jg 0x%08x",t); } while(0)	
+#define JNG(t)	do {B_cond(LE,t); asm_output1("jng 0x%08x",t); } while(0)
+
+// used for testing result of an FP compare
+// JP = comparison  false
+#define JP(t)	do {B_cond(EQ,NE,t); asm_output1("jp 0x%08x",t); } while(0)	
+
+// JNP = comparison true
+#define JNP(t)	do {B_cond(NE,EQ,t); asm_output1("jnp 0x%08x",t); } while(0)
+
+
+// floating point
+#define FNSTSW_AX()	do {NanoAssert(0);		asm_output("fnstsw_ax"); } while(0)
+#define FFREE(r)	do {NanoAssert(0);		asm_output1("ffree %s",gpn(b)); } while(0)
+#define FSTQ(p,d,b)	do {NanoAssert(0);		asm_output2("fstq %d(%s)",d,gpn(b)); } while(0)
+#define FSTPQ(d,b)  FSTQ(1,d,b)
+//#define FSTPQ(d,b)	do {NanoAssert(0);		asm_output2("fstpq %d(%s)",d,gpn(b)); } while(0)
+#define FCOM(p,d,b)	do {NanoAssert(0);		asm_output2("fcom %d(%s)",d,gpn(b)); } while(0)
+#define FCOMP(d,b)	do {NanoAssert(0);		asm_output2("fcomp %d(%s)",d,gpn(b)); } while(0)
+#define FLDQ(d,b)	do {NanoAssert(0);		asm_output2("fldq %d(%s)",d,gpn(b)); } while(0)
+#define FILDQ(d,b)	do {NanoAssert(0);		asm_output2("fildq %d(%s)",d,gpn(b)); } while(0)
+#define FILD(d,b)	do {NanoAssert(0);		asm_output2("fild %d(%s)",d,gpn(b)); } while(0)
+#define FADD(d,b)	do {NanoAssert(0);		asm_output2("faddq %d(%s)",d,gpn(b)); } while(0)
+#define FSUB(d,b)	do {NanoAssert(0);		asm_output2("fsubq %d(%s)",d,gpn(b)); } while(0)
+#define FSUBR(d,b)	do {NanoAssert(0);		asm_output2("fsubr %d(%s)",d,gpn(b)); } while(0)
+#define FMUL(d,b)	do {NanoAssert(0);		asm_output2("fmulq %d(%s)",d,gpn(b)); } while(0)
+#define FDIV(d,b)	do {NanoAssert(0);		asm_output2("fdivq %d(%s)",d,gpn(b)); } while(0)
+#define FDIVR(d,b)	do {NanoAssert(0);		asm_output2("fdivr %d(%s)",d,gpn(b)); } while(0)
+#define FSTP(r)		do {NanoAssert(0);		asm_output1("fst st(%d)",r); } while(0)
+#define FLD1()		do {NanoAssert(0);		asm_output("fld1"); } while(0)
+#define FLDZ()		do {NanoAssert(0);		asm_output("fldz"); } while(0)
+
+
+
+// MOV(EQ) _r, #1 
+// EOR(NE) _r, _r
+#define SET(_r,_cond,_opp)\
+	underrunProtect(8);								\
+	*(--_nIns) = (NIns)( (_opp<<28) | (1<<21) | ((_r)<<16) | ((_r)<<12) | (_r) );\
+	*(--_nIns) = (NIns)( (_cond<<28) | (0x3A<<20) | ((_r)<<12) | (1) );
+
+
+#define SETE(r)		do {SET(r,EQ,NE); asm_output1("sete %s",gpn(r)); } while(0)
+#define SETL(r)		do {SET(r,LT,GE); asm_output1("setl %s",gpn(r)); } while(0)
+#define SETLE(r)	do {SET(r,LE,GT); asm_output1("setle %s",gpn(r)); } while(0)
+#define SETG(r)		do {SET(r,GT,LE); asm_output1("setg %s",gpn(r)); } while(0)
+#define SETGE(r)	do {SET(r,GE,LT); asm_output1("setge %s",gpn(r)); } while(0)
+#define SETB(r)		do {SET(r,CC,CS); asm_output1("setb %s",gpn(r)); } while(0)
+#define SETBE(r)	do {SET(r,LS,HI); asm_output1("setb %s",gpn(r)); } while(0)
+#define SETAE(r)	do {SET(r,CS,CC); asm_output1("setae %s",gpn(r)); } while(0)
+#define SETA(r)		do {SET(r,HI,LS); asm_output1("seta %s",gpn(r)); } while(0)
+
+// This zero-extends a reg that has been set using one of the SET macros,
+// but is a NOOP on ARM/Thumb
+#define MOVZX8(r,r2)
+
+// Load and sign extend a 16-bit value into a reg
+#define MOVSX(_d,_off,_b) do{\
+	if ((_off)>=0){\
+		if ((_off)<256){\
+			underrunProtect(4);\
+			*(--_nIns) = (NIns)( COND_AL | (0x1D<<20) | ((_b)<<16) | ((_d)<<12) |  ((((_off)>>4)&0xF)<<8) | (0xF<<4) | ((_off)&0xF)  );}\
+		else if ((_off)<=510) {\
+			underrunProtect(8);\
+			int rem = (_off) - 255;\
+			NanoAssert(rem<256);\
+			*(--_nIns) = (NIns)( COND_AL | (0x1D<<20) | ((_d)<<16) | ((_d)<<12) |  ((((rem)>>4)&0xF)<<8) | (0xF<<4) | ((rem)&0xF)  );\
+			*(--_nIns) = (NIns)( COND_AL | OP_IMM | (1<<23) | ((_b)<<16) | ((_d)<<12) | (0xFF) );}\
+		else {\
+			underrunProtect(16);\
+			int rem = (_off) & 3;\
+			*(--_nIns) = (NIns)( COND_AL | (0x19<<20) | ((_b)<<16) | ((_d)<<12) | (0xF<<4) | (_d) );\
+			asm_output3("ldrsh %s,[%s, #%d]",gpn(_d), gpn(_b), (_off));\
+			*(--_nIns) = (NIns)( COND_AL | OP_IMM | (1<<23) | ((_d)<<16) | ((_d)<<12) | rem );\
+			*(--_nIns) = (NIns)( COND_AL | (0x1A<<20) | ((_d)<<12) | (2<<7)| (_d) );\
+			*(--_nIns) = (NIns)( COND_AL | (0x3B<<20) | ((_d)<<12) | (((_off)>>2)&0xFF) );\
+			asm_output2("mov %s,%d",gpn(_d),(_off));}}\
+	else {\
+		if ((_off)>-256) {\
+			underrunProtect(4);\
+			*(--_nIns) = (NIns)( COND_AL | (0x15<<20) | ((_b)<<16) | ((_d)<<12) |  ((((-(_off))>>4)&0xF)<<8) | (0xF<<4) | ((-(_off))&0xF)  );\
+			asm_output3("ldrsh %s,[%s, #%d]",gpn(_d), gpn(_b), (_off));}\
+		else if ((_off)>=-510){\
+			underrunProtect(8);\
+			int rem = -(_off) - 255;\
+			NanoAssert(rem<256);\
+			*(--_nIns) = (NIns)( COND_AL | (0x15<<20) | ((_d)<<16) | ((_d)<<12) |  ((((rem)>>4)&0xF)<<8) | (0xF<<4) | ((rem)&0xF)  );\
+			*(--_nIns) = (NIns)( COND_AL | OP_IMM | (1<<22) | ((_b)<<16) | ((_d)<<12) | (0xFF) );}\
+		else NanoAssert(0);\
+	}\
+} while(0)
+
+#define STMIA(_b, _mask) do {\
+		  underrunProtect(2);\
+		  NanoAssert(((_mask)&rmask(_b))==0 && isU8(_mask));\
+      *(--_nIns) = (NIns)(COND_AL | (0x8A<<20) | ((_b)<<16) | (_mask)&0xFF);\
+      asm_output2("stmia %s!,{%x}", gpn(_b), _mask);} while (0)
+
+#define LDMIA(_b, _mask) do {\
+      underrunProtect(2);\
+ 		  NanoAssert(((_mask)&rmask(_b))==0 && isU8(_mask));\
+     *(--_nIns) = (NIns)(COND_AL | (0x8B<<20) | ((_b)<<16) | (_mask)&0xFF);\
+      asm_output2("ldmia %s!,{%x}", gpn(_b), (_mask));} while (0)
+
+/*
+#define MOVSX(_d,_off,_b) do{\
+	if ((_b)==SP){\
+		NanoAssert( (_off)>=0 );\
+		if ((_off)<256){\
+			underrunProtect(4);\
+			*(--_nIns) = (NIns)( COND_AL | (0x1D<<20) | ((_b)<<16) | ((_d)<<12) |  ((((_off)>>4)&0xF)<<8) | (0xF<<4) | ((_off)&0xF)  );}\
+		else if ((_off)<=510) {\
+			underrunProtect(8);\
+			int rem = (_off) - 255;\
+			NanoAssert(rem<256);\
+			*(--_nIns) = (NIns)( COND_AL | (0x1D<<20) | ((_d)<<16) | ((_d)<<12) |  ((((rem)>>4)&0xF)<<8) | (0xF<<4) | ((rem)&0xF)  );\
+			*(--_nIns) = (NIns)( COND_AL | OP_IMM | (1<<23) | ((_b)<<16) | ((_d)<<12) | (0xFF) );}\
+		else {\
+			underrunProtect(16);\
+			int rem = (_off) & 3;\
+			*(--_nIns) = (NIns)( COND_AL | (0x19<<20) | ((_b)<<16) | ((_d)<<12) | (0xF<<4) | (_d) );\
+			asm_output3("ldrsh %s,[%s, #%d]",gpn(_d), gpn(_b), (_off));\
+			*(--_nIns) = (NIns)( COND_AL | OP_IMM | (1<<23) | ((_d)<<16) | ((_d)<<12) | rem );\
+			*(--_nIns) = (NIns)( COND_AL | (0x1A<<20) | ((_d)<<12) | (2<<7)| (_d) );\
+			*(--_nIns) = (NIns)( COND_AL | (0x3B<<20) | ((_d)<<12) | (((_off)>>2)&0xFF) );\
+			asm_output2("mov %s,%d",gpn(_d),(_off));}}\
+	else {\
+		if ((_off)>=0){\
+			if ((_off)<256) {\
+				underrunProtect(4);							\
+				*(--_nIns) = (NIns)( COND_AL | (0x1D<<20) | ((_b)<<16) | ((_d)<<12) |  ((((_off)>>4)&0xF)<<8) | (0xF<<4) | ((_off)&0xF)  );\
+				asm_output3("ldrsh %s,[%s, #%d]",gpn(_d), gpn(_b), (_off));}\
+			else if ((_off)<=510) {\
+				underrunProtect(8);\
+				int rem = (_off) - 255;\
+				NanoAssert(rem<256);\
+				*(--_nIns) = (NIns)( COND_AL | (0x1D<<20) | ((_d)<<16) | ((_d)<<12) |  ((((rem)>>4)&0xF)<<8) | (0xF<<4) | ((rem)&0xF)  );\
+				*(--_nIns) = (NIns)( COND_AL | OP_IMM | (1<<23) | ((_b)<<16) | ((_d)<<12) | (0xFF) );}\
+			else NanoAssert(0);}\
+		else {\
+			if ((_off)>-256) {\
+				*(--_nIns) = (NIns)( COND_AL | (0x15<<20) | ((_b)<<16) | ((_d)<<12) |  ((((-(_off))>>4)&0xF)<<8) | (0xF<<4) | ((-(_off))&0xF)  );\
+				asm_output3("ldrsh %s,[%s, #%d]",gpn(_d), gpn(_b), (_off));}\
+			else {}}\
+	} while(0)
+*/
+
+}
+#endif // __nanojit_NativeThumb__
new file mode 100755
--- /dev/null
+++ b/js/src/nanojit/NativeThumb.h
@@ -0,0 +1,526 @@
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is [Open Source Virtual Machine].
+ *
+ * The Initial Developer of the Original Code is
+ * Adobe System Incorporated.
+ * Portions created by the Initial Developer are Copyright (C) 2004-2007
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Contributor(s):
+ *   Adobe AS3 Team
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+
+#ifndef __nanojit_NativeThumb__
+#define __nanojit_NativeThumb__
+
+
+namespace nanojit
+{
+	const int NJ_LOG2_PAGE_SIZE	= 12;		// 4K
+	const int NJ_LOG2_CACHE_SIZE = 20;		// 128K
+	const int NJ_LOG2_PAGES = NJ_LOG2_CACHE_SIZE - NJ_LOG2_PAGE_SIZE;
+	const int NJ_PAGES = 1 << NJ_LOG2_PAGES;
+	const int NJ_PAGE_SIZE = 1 << NJ_LOG2_PAGE_SIZE;
+	const int NJ_MAX_REGISTERS = 6; // R0-R5
+	const int NJ_MAX_STACK_ENTRY = 256;
+	const int NJ_MAX_PARAMETERS = 1;
+	const int NJ_ALIGN_STACK = 8;
+	const int NJ_STACK_OFFSET = 8;
+
+	// WARNING: setting this allows the NJ to growth memory as needed without bounds
+	const bool NJ_UNLIMITED_GROWTH	= true;
+
+	#define NJ_CONSTANT_POOLS
+    const int NJ_MAX_CPOOL_OFFSET = 1024;
+    const int NJ_CPOOL_SIZE = 16;
+
+	#define NJ_SOFTFLOAT
+	#define NJ_STACK_GROWTH_UP
+	#define NJ_THUMB_JIT
+
+	
+	typedef unsigned short NIns;
+
+	/* ARM registers */
+	typedef enum 
+	{
+		R0  = 0,
+		R1  = 1,
+		R2  = 2,
+		R3  = 3,
+		R4  = 4,
+		R5  = 5,
+		R6  = 6,
+		R7  = 7,
+		R8  = 8,
+		//R9  = 9,
+		//R10 = 10,
+		//R11  = 11,
+		IP  = 12,
+		SP  = 13,
+		LR  = 14,
+		PC  = 15,
+
+		FP = SP,
+		
+		// helpers
+		FRAME_PTR = R7,
+		
+		FirstReg = 0,
+		LastReg = 5,
+		Scratch	= 6,
+		UnknownReg = 6
+	}
+	Register;
+
+	typedef int RegisterMask;
+	typedef struct _FragInfo
+	{
+		RegisterMask	needRestoring;
+		NIns*			epilogue;
+	} 
+	FragInfo;
+
+	static const RegisterMask SavedRegs = 1<<R4 | 1<<R5 | 1<<R6 | 1<<R7;
+	static const RegisterMask FpRegs = 0x0000; // FST0-FST7
+	static const RegisterMask GpRegs = 0x003F;
+	static const RegisterMask AllowableFlagRegs = 1<<R0 | 1<<R1 | 1<<R2 | 1<<R3 | 1<<R4 | 1<<R5;
+
+	#define firstreg()		R0
+	#define nextreg(r)		(Register)((int)r+1)
+	#define imm2register(c) (Register)(c-1)
+ 
+	verbose_only( extern const char* regNames[]; )
+
+	// abstract to platform specific calls
+	#define nExtractPlatformFlags(x)	0
+
+	#define DECLARE_PLATFORM_STATS()
+
+	#define DECLARE_PLATFORM_REGALLOC()
+
+	#define DECLARE_PLATFORM_ASSEMBLER()\
+        const static Register argRegs[4], retRegs[2];\
+		bool has_cmov;\
+		void STi(Register b, int32_t d, int32_t v);\
+		void LDi(Register r, int32_t v);\
+		void BL(NIns* target);\
+		void PUSH_mask(RegisterMask);\
+		void POP_mask(RegisterMask);\
+		void POP(Register);\
+		void underrunProtect(int bytes);\
+		void B_cond(int c, NIns *target);\
+		void B(NIns *target);\
+		void MOVi(Register r, int32_t imm);\
+		void ST(Register base, int32_t offset, Register reg);\
+		void STR_m(Register base, int32_t offset, Register reg);\
+		void STR_index(Register base, Register off, Register reg);\
+		void STR_sp(int32_t offset, Register reg);\
+		void STMIA(Register base, RegisterMask regs);\
+		void LDMIA(Register base, RegisterMask regs);\
+		void ADDi(Register r, int32_t imm);\
+		void ADDi8(Register r, int32_t imm);\
+		void SUBi(Register r, int32_t imm);\
+		void SUBi8(Register r, int32_t imm);\
+		void JMP(NIns *target);\
+        void LD32_nochk(Register r, int32_t imm);\
+		void CALL(intptr_t addr, const char* nm);\
+		void nativePageReset();\
+		void nativePageSetup();\
+		int* _nPool;\
+		int* _nSlot;\
+		int* _nExitPool;\
+		int* _nExitSlot;
+
+
+	#define swapptrs()  { NIns* _tins = _nIns; _nIns=_nExitIns; _nExitIns=_tins; \
+								int* _npool = _nPool;\
+								int* _nslot = _nSlot;\
+								_nPool = _nExitPool; _nExitPool = _npool;\
+								_nSlot = _nExitSlot; _nExitSlot = _nslot;}
+
+#define BX(r)		do {\
+	underrunProtect(2); \
+	*(--_nIns) = (NIns)(0x4700 | ((r)<<3));\
+	asm_output1("bx %s",gpn(r)); } while(0)
+
+#define OR(l,r)		do {underrunProtect(2); *(--_nIns) = (NIns)(0x4300 | (r<<3) | l); asm_output2("or %s,%s",gpn(l),gpn(r)); } while(0)
+#define ORi(r,i)	do {										\
+	if (isS8(i)) {												\
+		underrunProtect(4); 									\
+		*(--_nIns) = (NIns)(0x4300 | (Scratch<<3) | (r));			\
+		*(--_nIns) = (NIns)(0x2000 | (Scratch<<8) | (i&0xFF));} \
+	else if (isS8(i) && int32_t(i)<0) {									\
+		underrunProtect(6);												\
+		*(--_nIns) = (NIns)(0x4030 | (Scratch<<3) | (r) ); 				\
+		*(--_nIns) = (NIns)(0x4240 | ((Scratch)<<3) | (Scratch));		\
+		*(--_nIns) = (NIns)(0x2000 | ((Scratch)<<8) | ((-(i))&0xFF) );}	\
+	else NanoAssert(0);													\
+	asm_output2("or %s,%d",gpn(r), i); } while(0)
+
+#define AND(l,r)	do {underrunProtect(2); *(--_nIns) = (NIns)(0x4000 | ((r)<<3) | (l)); asm_output2("and %s,%s",gpn(l),gpn(r)); } while(0)
+
+#define ANDi(_r,_i) do {													\
+	if (isU8(_i)) {														\
+		underrunProtect(4);												\
+		*(--_nIns) = (NIns)(0x4000 | (Scratch<<3) | (_r) ); 				\
+		*(--_nIns) = (NIns)(0x2000 | (Scratch<<8) | ((_i)&0xFF) );} 		\
+	else if (isS8(_i) && int32_t(_i)<0) {									\
+		underrunProtect(6);												\
+		*(--_nIns) = (NIns)(0x4000 | (Scratch<<3) | (_r) ); 				\
+		*(--_nIns) = (NIns)(0x4240 | ((Scratch)<<3) | (Scratch));		\
+		*(--_nIns) = (NIns)(0x2000 | ((Scratch)<<8) | ((-(_i))&0xFF) );}	\
+	else {											\
+		underrunProtect(2);										\
+		*(--_nIns) = (NIns)(0x4000 |  ((Scratch)<<3) | (_r));	\
+		LDi(Scratch, (_i));}											\
+	asm_output2("and %s,%d",gpn(_r),(_i)); } while (0)
+
+
+#define XOR(l,r)	do {underrunProtect(2); *(--_nIns) = (NIns)(0x4040 | ((r)<<3) | (l)); asm_output2("eor %s,%s",gpn(l),gpn(r)); } while(0)
+#define XORi(r,i)	do {	\
+	if (isS8(i)){	\
+		underrunProtect(4);		\
+		*(--_nIns) = (NIns)(0x4040 | (Scratch<<3) | (r)); \
+		*(--_nIns) = (NIns)(0x2000 | (Scratch<<8) | ((i)&0xFF));} \
+	else if (isS8(i) && int32_t(i)<0) {									\
+		underrunProtect(6);												\
+		*(--_nIns) = (NIns)(0x4040 | (Scratch<<3) | (r) ); 				\
+		*(--_nIns) = (NIns)(0x4240 | ((Scratch)<<3) | (Scratch));		\
+		*(--_nIns) = (NIns)(0x2000 | ((Scratch)<<8) | ((-(i))&0xFF) );}	\
+	else NanoAssert(0);													\
+	asm_output2("eor %s,%d",gpn(r),(i)); } while(0)
+
+#define ADD3(d,l,r) do {underrunProtect(2); *(--_nIns) = (NIns)(0x1800 | ((r)<<6) | ((l)<<3) | (d)); asm_output3("add %s,%s,%s",gpn(d),gpn(l),gpn(r)); } while(0)
+#define ADD(l,r)    ADD3(l,l,r)
+
+#define SUB(l,r)	do {underrunProtect(2); *(--_nIns) = (NIns)(0x1A00 | ((r)<<6) | ((l)<<3) | (l)); asm_output2("sub %s,%s",gpn(l),gpn(r)); } while(0)
+#define MUL(l,r)	do {underrunProtect(2); *(--_nIns) = (NIns)(0x4340 | ((r)<<3) | (l)); asm_output2("mul %s,%s",gpn(l),gpn(r)); } while(0)
+
+#define NEG(r)		do {\
+	underrunProtect(2);\
+	*(--_nIns) = (NIns)(0x4240 | ((r)<<3) | (r) );\
+	asm_output1("neg %s",gpn(r));\
+ } while(0)
+
+#define NOT(r)		do {underrunProtect(2);	*(--_nIns) = (NIns)(0x43C0 | ((r)<<3) | (r) ); asm_output1("mvn %s",gpn(r)); } while(0)
+
+#define SHR(r,s)	do {underrunProtect(2); *(--_nIns) = (NIns)(0x40C0 | ((s)<<3) | (r)); asm_output2("shr %s,%s",gpn(r),gpn(s)); } while(0)
+#define SHRi(r,i)	do {underrunProtect(2); *(--_nIns) = (NIns)(0x0800 | ((i)<<6) | ((r)<<3) | (r)); asm_output2("shr %s,%d",gpn(r),i); } while(0)
+
+#define SAR(r,s)	do {underrunProtect(2); *(--_nIns) = (NIns)(0x4100 | ((s)<<3) | (r)); asm_output2("asr %s,%s",gpn(r),gpn(s)); } while(0)
+#define SARi(r,i)	do {underrunProtect(2); *(--_nIns) = (NIns)(0x1000 | ((i)<<6) | ((r)<<3) | (r)); asm_output2("asr %s,%d",gpn(r),i); } while(0)
+
+#define SHL(r,s)	do {\
+	underrunProtect(2); \
+	*(--_nIns) = (NIns)(0x4080 | ((s)<<3) | (r)); \
+	asm_output2("lsl %s,%s",gpn(r),gpn(s));\
+ } while(0)
+
+#define SHLi(r,i)	do {\
+	underrunProtect(2);\
+	NanoAssert((i)>=0 && (i)<32);\
+	*(--_nIns) = (NIns)(0x0000 | ((i)<<6) | ((r)<<3) | (r)); \
+	asm_output2("lsl %s,%d",gpn(r),(i));\
+ } while(0)
+					
+
+					
+#define TEST(d,s)	do{underrunProtect(2); *(--_nIns) = (NIns)(0x4200 | ((d)<<3) | (s)); asm_output2("test %s,%s",gpn(d),gpn(s));} while(0)
+#define CMP(l,r)	do{underrunProtect(2); *(--_nIns) = (NIns)(0x4280 | ((r)<<3) | (l)); asm_output2("cmp %s,%s",gpn(l),gpn(r));} while(0)
+
+#define CMPi(_r,_i)	do{													\
+	if (_i<0) {															\
+		NanoAssert(isS16((_i)));											\
+		if ((_i)>-256)	{													\
+			underrunProtect(4);													\
+			*(--_nIns) = (NIns)(0x42C0 | ((Scratch)<<3) | (_r));					\
+			asm_output2("cmn %s,%s",gpn(_r),gpn(Scratch));						\
+			*(--_nIns) = (NIns)(0x2000 | (Scratch<<8) | ((-(_i))&0xFF) );		\
+			asm_output2("mov %s,%d",gpn(Scratch),(_i));}					\
+		else {																\
+			NanoAssert(!((_i)&3));											\
+			underrunProtect(10);											\
+			*(--_nIns) = (NIns)(0x42C0 | ((Scratch)<<3) | (_r));			\
+			asm_output2("cmn %s,%s",gpn(_r),gpn(Scratch));					\
+			*(--_nIns) = (NIns)(0x0000 | (2<<6) | (Scratch<<3) | (Scratch) );\
+			asm_output2("lsl %s,%d",gpn(Scratch),2);						\
+			*(--_nIns) = (NIns)(0x2000 | (Scratch<<8) | ((-(_i)/4)&0xFF) );	\
+			asm_output2("mov %s,%d",gpn(Scratch),(_i));}				\
+	} else {																\
+		if ((_i)>255) {														\
+			int pad=0;														\
+			underrunProtect(2*(7));										\
+			*(--_nIns) = (NIns)(0x4280 | ((Scratch)<<3) | (_r));			\
+			asm_output2("cmp %s,%X",gpn(_r),(_i));							\
+			if ( (((int)(_nIns-2))%4) != 0)	pad=1;							\
+			if (pad) {														\
+				*(--_nIns) = 0xBAAD;										\
+				asm_output("PAD 0xBAAD"); }									\
+			*(--_nIns) = (short)((_i) >> 16);								\
+			*(--_nIns) = (short)((_i) & 0xFFFF);							\
+			asm_output1("imm %d", (_i));									\
+			*(--_nIns) = 0xBAAD;											\
+			asm_output("PAD 0xBAAD");										\
+			if (pad) *(--_nIns) = (NIns)(0xE000 | (6>>1));					\
+			else *(--_nIns) = (NIns)(0xE000 | (4>>1));						\
+			asm_output1("b %X", (int)_nIns+(pad?6:4)+4);					\
+			*(--_nIns) = (NIns)(0x4800 | ((Scratch)<<8) | (1));}			\
+		else {																\
+			NanoAssert((_i)<256);											\
+			underrunProtect(2);												\
+			*(--_nIns) = (NIns)(0x2800 | ((_r)<<8) | ((_i)&0xFF));			\
+			asm_output2("cmp %s,%X",gpn(_r),(_i));}							\
+	} } while(0)
+
+#define MR(d,s)	do {											\
+	underrunProtect(2);											\
+	if (((d)<8) && ((s)<8))											\
+		*(--_nIns) = (NIns)(0x1C00 | ((s)<<3) | (d));				\
+	else {														\
+		if (d<8) *(--_nIns) = (NIns)(0x4600 | ((s)<<3) | ((d)&7));	\
+		else *(--_nIns) = (NIns)(0x4600 | (1<<7) | ((s)<<3) | ((d)&7));\
+	} asm_output2("mov %s,%s",gpn(d),gpn(s)); } while (0)
+
+// Thumb doesn't support conditional-move
+#define MREQ(d,s)	do { NanoAssert(0); } while (0)
+#define MRNE(d,s)	do { NanoAssert(0); } while (0)
+#define MRL(d,s)	do { NanoAssert(0); } while (0)
+#define MRLE(d,s)	do { NanoAssert(0); } while (0)
+#define MRG(d,s)	do { NanoAssert(0); } while (0)
+#define MRGE(d,s)	do { NanoAssert(0); } while (0)
+#define MRB(d,s)	do { NanoAssert(0); } while (0)
+#define MRBE(d,s)	do { NanoAssert(0); } while (0)
+#define MRA(d,s)	do { NanoAssert(0); } while (0)
+#define MRAE(d,s)	do { NanoAssert(0); } while (0)
+
+#define LD(reg,offset,base) do{												\
+	int off = (offset) >> 2;													\
+	if (base==PC){															\
+		underrunProtect(2);													\
+		NanoAssert(off>=0 && off<256);												\
+		*(--_nIns) = (NIns)(0x4800 | ((reg)<<8) | (off&0xFF));				\
+		asm_output3("ld %s,%d(%s)",gpn(reg),(offset),gpn(base));			\
+	} else if (base==SP) {													\
+		NanoAssert(off>=0);													\
+		if (off<256){														\
+			underrunProtect(2);												\
+			*(--_nIns) = (NIns)(0x9800 | ((reg)<<8) | (off&0xFF));}			\
+		else {																\
+			underrunProtect(4);												\
+			int rem = (offset) - 1020; NanoAssert(rem<125);					\
+			*(--_nIns) = (NIns)(0x6800 | (rem&0x1F)<<6 | (reg)<<3 | (reg));	\
+			*(--_nIns) = (NIns)(0xA800 | ((reg)<<8) | (0xFF));}				\
+		asm_output3("ld %s,%d(%s)",gpn(reg),(offset),gpn(base));			\
+	} else if ((offset)<0) {												\
+		underrunProtect(8);													\
+		*(--_nIns) = (NIns)(0x5800 | (Scratch<<6) | (base<<3) | (reg));		\
+		asm_output3("ld %s,%d(%s)",gpn(reg),(offset),gpn(Scratch));			\
+		*(--_nIns) = (NIns)(0x4240 | (Scratch<<3) | Scratch);				\
+		asm_output2("neg %s,%s",gpn(Scratch),gpn(Scratch));					\
+		if ((offset)<-255){												\
+			NanoAssert( (offset)>=-1020);											\
+			*(--_nIns) = (NIns)(0x0000 | (2<<6) | (Scratch<<3) | (Scratch) );	\
+			asm_output2("lsl %s,%d",gpn(Scratch),2);					\
+			*(--_nIns) = (NIns)(0x2000 | (Scratch<<8) | ((-(off))&0xFF) );	\
+			asm_output2("mov %s,%d",gpn(Scratch),(offset));}					\
+		else {																\
+			*(--_nIns) = (NIns)(0x2000 | (Scratch<<8) | ((-(offset))&0xFF) );	\
+			asm_output2("mov %s,%d",gpn(Scratch),(offset));}					\
+	} else {																		\
+		if ((off)<32) {																\
+			underrunProtect(2);														\
+			*(--_nIns) = (NIns)(0x6800 | ((off&0x1F)<<6) | ((base)<<3) | (reg));	\
+			asm_output3("ld %s,%d(%s)",gpn(reg),(offset),gpn(base));}				\
+		else {																		\
+			underrunProtect(2);														\
+			*(--_nIns) = (NIns)(0x5800 | (Scratch<<6) | (base<<3) | (reg));			\
+			asm_output3("ld %s,%d(%s)",gpn(reg),(offset),gpn(Scratch));				\
+			LDi(Scratch, (offset));}}											\
+	} while(0)
+
+// load 8-bit, zero extend  (aka LDRB)
+// note, only 5-bit offsets (!) are supported for this, but that's all we need at the moment,
+// and we get a nice advantage in Thumb mode...
+#define LD8Z(_r,_d,_b) do{    \
+    NanoAssert((_d)>=0 && (_d)<=31);\
+    underrunProtect(2);\
+    *(--_nIns) = (NIns)(0x7800 | (((_d)&31)<<6) | ((_b)<<3) | (_r) ); \
+    asm_output3("ldrb %s,%d(%s)", gpn(_r),(_d),gpn(_b));\
+    } while(0)
+
+
+#define LEA(_r,_d,_b) do{										\
+	NanoAssert((_d)>=0);										\
+	if (_b!=SP) NanoAssert(0);									\
+	if ((int)(_d)<=1020 && (_d)>=0) {							\
+		underrunProtect(2);										\
+		*(--_nIns) = (NIns)(0xA800 | ((_r)<<8) | ((_d)>>2));}	\
+	else {														\
+		underrunProtect(4);										\
+		int rem = (_d) - 1020; NanoAssert(rem<256);				\
+		*(--_nIns) = (NIns)(0x3000 | ((_r)<<8) | ((rem)&0xFF));	\
+		*(--_nIns) = (NIns)(0xA800 | ((_r)<<8) | (0xFF));}		\
+	asm_output2("lea %s, %d(SP)", gpn(_r), _d);					\
+	} while(0)
+
+
+//NanoAssert((t)<2048);
+#define JMP_long_nochk_offset(t) do {								\
+	*(--_nIns) = (NIns)(0xF800 | (((t)&0xFFF)>>1) );	\
+	*(--_nIns) = (NIns)(0xF000 | (((t)>>12)&0x7FF) );	\
+	asm_output1("BL offset=%d",int(t));} while (0)
+
+#define JMP_long_placeholder()	BL(_nIns)
+
+// conditional branch
+enum {
+		EQ=0,
+		NE,
+		CSHS,
+		CCLO,
+		MI,		
+		PL,
+		VS,
+		VC,
+		HI,
+		LS,
+		GE,
+		LT,
+		GT,
+		LE,
+		AL,
+		NV
+};
+
+
+#define JA(t)	B_cond(HI,t)
+#define JNA(t)	B_cond(LS,t)
+#define JB(t)	B_cond(CCLO,t)
+#define JNB(t)	B_cond(CSHS,t)
+#define JE(t)	B_cond(EQ,t)
+#define JNE(t)	B_cond(NE,t)
+#define JBE(t)	B_cond(LS,t)
+#define JNBE(t) B_cond(HI,t)
+#define JAE(t)	B_cond(CSHS,t)
+#define JNAE(t) B_cond(CCLO,t)
+#define JL(t)	B_cond(LT,t)
+#define JNL(t)	B_cond(GE,t)
+#define JG(t)	B_cond(GT,t)
+#define JNG(t)	B_cond(LE,t)
+#define JLE(t)	B_cond(LE,t)
+#define JNLE(t)	B_cond(GT,t)
+#define JGE(t)	B_cond(GE,t)
+#define JNGE(t)	B_cond(LT,t)
+
+// B(cond) +4	- if condition, skip to MOV
+// EOR R, R		- set register to 0	
+// B(AL) +2		- skip over next
+// MOV R, 1 	- set register to 1
+#define SET(r,cond)									\
+	underrunProtect(10);								\
+	*(--_nIns) = (NIns)(0x0000);					\
+	*(--_nIns) = (NIns)(0x2000 | (r<<8) | (1));		\
+	*(--_nIns) = (NIns)(0xE000 | 1 );				\
+	*(--_nIns) = (NIns)(0x4040 | (r<<3) | r);		\
+	*(--_nIns) = (NIns)(0xD000 | ((cond)<<8) | (1) );
+
+#define SETE(r)		do {SET(r,EQ); asm_output1("sete %s",gpn(r)); } while(0)
+#define SETL(r)		do {SET(r,LT); asm_output1("setl %s",gpn(r)); } while(0)
+#define SETLE(r)	do {SET(r,LE); asm_output1("setle %s",gpn(r)); } while(0)
+#define SETG(r)		do {SET(r,GT); asm_output1("setg %s",gpn(r)); } while(0)
+#define SETGE(r)	do {SET(r,GE); asm_output1("setge %s",gpn(r)); } while(0)
+#define SETB(r)		do {SET(r,CCLO); asm_output1("setb %s",gpn(r)); } while(0)
+#define SETBE(r)	do {SET(r,LS); asm_output1("setbe %s",gpn(r)); } while(0)
+#define SETAE(r)	do {SET(r,CSHS); asm_output1("setae %s",gpn(r)); } while(0) /* warning, untested */
+#define SETA(r)		do {SET(r,HI); asm_output1("seta %s",gpn(r)); } while(0) /* warning, untested */
+
+// This zero-extends a reg that has been set using one of the SET macros,
+// but is a NOOP on ARM/Thumb
+#define MOVZX8(r,r2)
+
+// If the offset is 0-255, no problem, just load 8-bit imm
+// If the offset is greater than that, we load the SP
+// 
+// If offset is 8-bit, we
+// 		MOV 	Scratch, SP
+// 		MOV 	_r, offset
+// 		LDRSH	_r, offset, Scratch
+// 
+// else
+// 		ADD_sp	Scratch, offset/4
+// 		MOV		_r, offset%4
+// 		LDRSH	_r, Scratch, _r
+#define LD16S(_r,_d,_b) do{														\
+	if (_b==SP) {																\
+		NanoAssert((int)(_d)>=0);												\
+		if (isU8(_d)) {															\
+			underrunProtect(6);													\
+			*(--_nIns) = (NIns)(0x5E00 | ((_r)<<6) | (Scratch<<3) | (_r) );		\
+			*(--_nIns) = (NIns)(0x2000 | ((_r)<<8) | (_d)&0xFF );				\
+			*(--_nIns) = (NIns)(0x4600 | (SP<<3) | Scratch );}					\
+		else {																\
+			underrunProtect(6);													\
+			*(--_nIns) = (NIns)(0x5E00 | ((_r)<<6) | (Scratch<<3) | (_r) );		\
+			*(--_nIns) = (NIns)(0x2000 | ((_r)<<8) | ((_d)%4) );				\
+			*(--_nIns) = (NIns)(0xA800 | (Scratch<<8) | (alignTo((_d), 4))/4);}	\
+	} else {																	\
+		if ((_d)<0) {														\
+			if ((_d)<-255) {														\
+				NanoAssert((_d)>=-510);										\
+				underrunProtect(8);													\
+				int rem = -(_d) - 255;												\
+				*(--_nIns) = (NIns)(0x5E00 | (Scratch<<6) | ((_b)<<3) | (_r));			\
+				*(--_nIns) = (NIns)(0x4240 | (Scratch<<3) | Scratch);						\
+				*(--_nIns) = (NIns)(0x3000 | (Scratch<<8) | (rem&0xFF));				\
+				*(--_nIns) = (NIns)(0x2000 | (Scratch<<8) | 0xFF );}					\
+			else {																	\
+				underrunProtect(6);													\
+				*(--_nIns) = (NIns)(0x5E00 | ((Scratch)<<6) | ((_b)<<3) | (_r) );	\
+				*(--_nIns) = (NIns)(0x4240 | (Scratch<<3) | Scratch);				\
+				*(--_nIns) = (NIns)(0x2000 | (Scratch<<8) | ((-(_d))&0xFF) );}}		\
+		else if ((int)(_d)<256) {												\
+			underrunProtect(4);													\
+			*(--_nIns) = (NIns)(0x5E00 | (Scratch<<6) | ((_b)<<3) | (_r));		\
+			*(--_nIns) = (NIns)(0x2000 | (Scratch<<8) | ((_d)&0xFF) );}			\
+		else {																	\
+			NanoAssert((int)(_d)<=510);											\
+			underrunProtect(6);													\
+			int rem = (_d) - 255;												\
+			*(--_nIns) = (NIns)(0x5E00 | (Scratch<<6) | ((_b)<<3) | (_r));			\
+			*(--_nIns) = (NIns)(0x3000 | (Scratch<<8) | (rem&0xFF));				\
+			*(--_nIns) = (NIns)(0x2000 | (Scratch<<8) | 0xFF );}					\
+	} asm_output3("movsx %s, %d(%s)", gpn(_r), (_d), gpn(_b)); } while(0)
+
+	//*(--_nIns) = (NIns)(0x8800 | (((_d)>>1)<<6) | (Scratch<<3) | (_r));\
+	//*(--_nIns) = (NIns)(0x4600 | (SP<<3) | Scratch );}				\
+
+
+}
+#endif // __nanojit_NativeThumb__
new file mode 100644
--- /dev/null
+++ b/js/src/nanojit/Nativei386.h
@@ -0,0 +1,674 @@
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is [Open Source Virtual Machine].
+ *
+ * The Initial Developer of the Original Code is
+ * Adobe System Incorporated.
+ * Portions created by the Initial Developer are Copyright (C) 2004-2007
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Contributor(s):
+ *   Adobe AS3 Team
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+
+#ifndef __nanojit_Nativei386__
+#define __nanojit_Nativei386__
+
+
+namespace nanojit
+{
+	const int NJ_LOG2_PAGE_SIZE	= 12;		// 4K
+	const int NJ_LOG2_CACHE_SIZE = 24;		// 16M
+	const int NJ_LOG2_PAGES = NJ_LOG2_CACHE_SIZE - NJ_LOG2_PAGE_SIZE;
+	const int NJ_PAGES = 1 << NJ_LOG2_PAGES;
+	const int NJ_PAGE_SIZE = 1 << NJ_LOG2_PAGE_SIZE;
+	const int NJ_MAX_REGISTERS = 24; // gpregs, x87 regs, xmm regs
+	const int NJ_STACK_OFFSET = 0;
+	
+	// WARNING: setting this allows the NJ to growth memory as needed without bounds
+	const bool NJ_UNLIMITED_GROWTH	= true;
+
+	#define NJ_MAX_STACK_ENTRY 256
+	#define NJ_MAX_PARAMETERS 1
+
+#ifdef DARWIN
+	const int NJ_ALIGN_STACK = 16;
+#else
+	const int NJ_ALIGN_STACK = 8;
+#endif
+	
+	typedef uint8_t NIns;
+
+	// These are used as register numbers in various parts of the code
+	typedef enum
+	{
+		// general purpose 32bit regs
+		EAX = 0, // return value, scratch
+		ECX = 1, // this/arg0, scratch
+		EDX = 2, // arg1, return-msw, scratch
+		EBX = 3,
+		ESP = 4, // stack pointer
+		EBP = 5, // frame pointer
+		ESI = 6,
+		EDI = 7,
+
+		SP = ESP, // alias SP to ESP for convenience
+		FP = EBP, // alias FP to EBP for convenience
+
+		// SSE regs come before X87 so we prefer them
+		XMM0 = 8,
+		XMM1 = 9,
+		XMM2 = 10,
+		XMM3 = 11,
+		XMM4 = 12,
+		XMM5 = 13,
+		XMM6 = 14,
+		XMM7 = 15,
+
+        // X87 regs
+		FST0 = 16,
+		FST1 = 17,
+		FST2 = 18,
+		FST3 = 19,
+		FST4 = 20,
+		FST5 = 21,
+		FST6 = 22,
+		FST7 = 23,
+
+		FirstReg = 0,
+		LastReg = 23,
+		UnknownReg = 24
+	} 
+	Register;
+
+	typedef int RegisterMask;
+
+	static const RegisterMask SavedRegs = 1<<EBX | 1<<EDI | 1<<ESI;
+	static const RegisterMask GpRegs = SavedRegs | 1<<EAX | 1<<ECX | 1<<EDX;
+    static const RegisterMask XmmRegs = 1<<XMM0|1<<XMM1|1<<XMM2|1<<XMM3|1<<XMM4|1<<XMM5|1<<XMM6|1<<XMM7;
+    static const RegisterMask x87Regs = 1<<FST0;
+	static const RegisterMask FpRegs = x87Regs | XmmRegs;
+	static const RegisterMask ScratchRegs = 1<<EAX | 1<<ECX | 1<<EDX | FpRegs;
+
+	static const RegisterMask AllowableFlagRegs = 1<<EAX |1<<ECX | 1<<EDX | 1<<EBX;
+
+	#define _rmask_(r)		(1<<(r))
+	#define _is_xmm_reg_(r)	((_rmask_(r)&XmmRegs)!=0)
+	#define _is_x87_reg_(r)	((_rmask_(r)&x87Regs)!=0)
+	#define _is_fp_reg_(r)	((_rmask_(r)&FpRegs)!=0)
+	#define _is_gp_reg_(r)	((_rmask_(r)&GpRegs)!=0)
+
+	#define nextreg(r)		Register(r+1)
+	#define prevreg(r)		Register(r-1)
+	#define imm2register(c) (Register)(c)
+	
+	verbose_only( extern const char* regNames[]; )
+
+	#define DECLARE_PLATFORM_STATS()
+
+	#define DECLARE_PLATFORM_REGALLOC()
+
+	#define DECLARE_PLATFORM_ASSEMBLER()	\
+        const static Register argRegs[2], retRegs[2]; \
+		bool x87Dirty;						\
+        bool sse2;							\
+		bool has_cmov; \
+		bool pad[1];\
+		void nativePageReset();\
+		void nativePageSetup();
+		
+	#define swapptrs()  { NIns* _tins = _nIns; _nIns=_nExitIns; _nExitIns=_tins; }
+		
+	// enough room for n bytes
+	#define underrunProtect(n)									\
+		{														\
+			intptr_t u = n + sizeof(PageHeader)/sizeof(NIns);	\
+			if ( !samepage(_nIns-u,_nIns-1) )					\
+			{													\
+				NIns *tt = _nIns; \
+				_nIns = pageAlloc(_inExit);						\
+				int d = tt-_nIns; \
+				JMP_long_nochk_offset(d);			\
+			}													\
+		}														\
+
+#define IMM32(i)	\
+	_nIns -= 4;		\
+	*((int32_t*)_nIns) = (int32_t)(i)
+
+#define MODRMs(r,d,b,l,i) \
+		NanoAssert(unsigned(r)<8 && unsigned(b)<8 && unsigned(i)<8); \
+ 		if ((d) == 0 && (b) != EBP) { \
+			_nIns -= 2; \
+ 			_nIns[0] = (uint8_t)     ( 0<<6 |   (r)<<3 | 4); \
+ 			_nIns[1] = (uint8_t) ((l)<<6 | (i)<<3 | (b)); \
+ 		} else if (isS8(d)) { \
+ 			_nIns -= 3; \
+ 			_nIns[0] = (uint8_t)     ( 1<<6 |   (r)<<3 | 4 ); \
+ 			_nIns[1] = (uint8_t) ( (l)<<6 | (i)<<3 | (b) ); \
+			_nIns[2] = (uint8_t) (d); \
+ 		} else { \
+ 			IMM32(d); \
+ 			*(--_nIns) = (uint8_t) ( (l)<<6 | (i)<<3 | (b) ); \
+ 			*(--_nIns) = (uint8_t)    ( 2<<6 |   (r)<<3 | 4 ); \
+ 		}
+
+#define MODRMm(r,d,b) \
+		NanoAssert(unsigned(r)<8 && unsigned(b)<8); \
+ 		if ((b) == ESP) { \
+ 			MODRMs(r, d, b, 0, (Register)4); \
+ 		} \
+		else if ( (d) == 0 && (b) != EBP) { \
+ 			*(--_nIns) = (uint8_t) ( 0<<6 | (r)<<3 | (b) ); \
+ 		} else if (isS8(d)) { \
+ 			*(--_nIns) = (uint8_t) (d); \
+ 			*(--_nIns) = (uint8_t) ( 1<<6 | (r)<<3 | (b) ); \
+ 		} else { \
+ 			IMM32(d); \
+ 			*(--_nIns) = (uint8_t) ( 2<<6 | (r)<<3 | (b) ); \
+ 		} 
+
+#define MODRM(d,s) \
+		NanoAssert(((unsigned)(d))<8 && ((unsigned)(s))<8); \
+		*(--_nIns) = (uint8_t) ( 3<<6|(d)<<3|(s) )
+
+#define ALU0(o)				\
+		underrunProtect(1);\
+		*(--_nIns) = (uint8_t) (o)
+
+#define ALUm(c,r,d,b)		\
+		underrunProtect(8); \
+		MODRMm(r,d,b);		\
+		*(--_nIns) = uint8_t(c)
+
+#define ALUm16(c,r,d,b)		\
+		underrunProtect(9); \
+		MODRMm(r,d,b);		\
+		*(--_nIns) = uint8_t(c);\
+		*(--_nIns) = 0x66
+
+#define ALU2m(c,r,d,b)      \
+        underrunProtect(9); \
+        MODRMm(r,d,b);      \
+        *(--_nIns) = (uint8_t) (c);\
+        *(--_nIns) = (uint8_t) ((c)>>8)
+
+#define ALU(c,d,s)  \
+		underrunProtect(2);\
+		MODRM(d,s); \
+		*(--_nIns) = (uint8_t) (c)
+
+#define ALUi(c,r,i) \
+   		underrunProtect(6); \
+		NanoAssert(unsigned(r)<8);\
+ 		if (isS8(i)) { \
+			*(--_nIns) = uint8_t(i); \
+            MODRM((c>>3),(r)); \
+            *(--_nIns) = uint8_t(0x83); \
+ 		} else { \
+ 			IMM32(i); \
+ 			if ( (r) == EAX) { \
+ 				*(--_nIns) = (uint8_t) (c); \
+ 			} else { \
+                MODRM((c>>3),(r)); \
+                *(--_nIns) = uint8_t(0x81); \
+ 			} \
+ 		}
+
+#define ALUmi(c,d,b,i) \
+		underrunProtect(10); \
+		NanoAssert(((unsigned)b)<8); \
+ 		if (isS8(i)) { \
+			*(--_nIns) = uint8_t(i); \
+            MODRMm((c>>3),(d),(b)); \
+            *(--_nIns) = uint8_t(0x83); \
+ 		} else { \
+ 			IMM32(i); \
+            MODRMm((c>>3),(d),(b)); \
+            *(--_nIns) = uint8_t(0x81); \
+ 		}
+
+#define ALU2(c,d,s) \
+		underrunProtect(3); \
+		MODRM((d),(s));	\
+		_nIns -= 2; \
+		_nIns[0] = (uint8_t) ( ((c)>>8) ); \
+		_nIns[1] = (uint8_t) ( (c) )
+
+#define LAHF()		do { ALU0(0x9F);					asm_output("lahf"); } while(0)
+#define SAHF()		do { ALU0(0x9E);					asm_output("sahf"); } while(0)
+#define OR(l,r)		do { ALU(0x0b, (l),(r));			asm_output2("or %s,%s",gpn(l),gpn(r)); } while(0)
+#define AND(l,r)	do { ALU(0x23, (l),(r));			asm_output2("and %s,%s",gpn(l),gpn(r)); } while(0)
+#define XOR(l,r)	do { ALU(0x33, (l),(r));			asm_output2("xor %s,%s",gpn(l),gpn(r)); } while(0)
+#define ADD(l,r)	do { ALU(0x03, (l),(r));			asm_output2("add %s,%s",gpn(l),gpn(r)); } while(0)
+#define SUB(l,r)	do { ALU(0x2b, (l),(r));			asm_output2("sub %s,%s",gpn(l),gpn(r)); } while(0)
+#define MUL(l,r)	do { ALU2(0x0faf,(l),(r));		asm_output2("mul %s,%s",gpn(l),gpn(r)); } while(0)
+#define NOT(r)		do { ALU(0xf7, (Register)2,(r));	asm_output1("not %s",gpn(r)); } while(0)
+#define NEG(r)		do { ALU(0xf7, (Register)3,(r));	asm_output1("neg %s",gpn(r)); } while(0)
+#define SHR(r,s)	do { ALU(0xd3, (Register)5,(r));	asm_output2("shr %s,%s",gpn(r),gpn(s)); } while(0)
+#define SAR(r,s)	do { ALU(0xd3, (Register)7,(r));	asm_output2("sar %s,%s",gpn(r),gpn(s)); } while(0)
+#define SHL(r,s)	do { ALU(0xd3, (Register)4,(r));	asm_output2("shl %s,%s",gpn(r),gpn(s)); } while(0)
+
+#define SHIFT(c,r,i) \
+		underrunProtect(3);\
+		*--_nIns = (uint8_t)(i);\
+		MODRM((Register)c,r);\
+		*--_nIns = 0xc1;
+
+#define SHLi(r,i)	do { SHIFT(4,r,i);	asm_output2("shl %s,%d", gpn(r),i); } while(0)
+#define SHRi(r,i)	do { SHIFT(5,r,i);	asm_output2("shr %s,%d", gpn(r),i); } while(0)
+#define SARi(r,i)	do { SHIFT(7,r,i);	asm_output2("sar %s,%d", gpn(r),i); } while(0)
+
+#define MOVZX8(d,s) do { ALU2(0x0fb6,d,s); asm_output2("movzx %s,%s", gpn(d),gpn(s)); } while(0)
+
+#define SUBi(r,i)	do { ALUi(0x2d,r,i);				asm_output2("sub %s,%d",gpn(r),i); } while(0)
+#define ADDi(r,i)	do { ALUi(0x05,r,i);				asm_output2("add %s,%d",gpn(r),i); } while(0)
+#define ANDi(r,i)	do { ALUi(0x25,r,i);				asm_output2("and %s,%d",gpn(r),i); } while(0)
+#define ORi(r,i)	do { ALUi(0x0d,r,i);				asm_output2("or %s,%d",gpn(r),i); } while(0)
+#define XORi(r,i)	do { ALUi(0x35,r,i);				asm_output2("xor %s,%d",gpn(r),i); } while(0)
+
+#define ADDmi(d,b,i) do { ALUmi(0x05, d, b, i); asm_output3("add %d(%s), %d", d, gpn(b), i); } while(0)
+
+#define TEST(d,s)	do { ALU(0x85,d,s);				asm_output2("test %s,%s",gpn(d),gpn(s)); } while(0)
+#define CMP(l,r)	do { ALU(0x3b, (l),(r));			asm_output2("cmp %s,%s",gpn(l),gpn(r)); } while(0)
+#define CMPi(r,i)	do { ALUi(0x3d,r,i);				asm_output2("cmp %s,%d",gpn(r),i); } while(0)
+
+#define MR(d,s)		do { ALU(0x8b,d,s);				asm_output2("mov %s,%s",gpn(d),gpn(s)); } while(0)
+#define LEA(r,d,b)	do { ALUm(0x8d, r,d,b);			asm_output3("lea %s,%d(%s)",gpn(r),d,gpn(b)); } while(0)
+
+#define SETE(r)		do { ALU2(0x0f94,(r),(r));			asm_output1("sete %s",gpn(r)); } while(0)
+#define SETNP(r)	do { ALU2(0x0f9B,(r),(r));			asm_output1("setnp %s",gpn(r)); } while(0)
+#define SETL(r)		do { ALU2(0x0f9C,(r),(r));			asm_output1("setl %s",gpn(r)); } while(0)
+#define SETLE(r)	do { ALU2(0x0f9E,(r),(r));			asm_output1("setle %s",gpn(r)); } while(0)
+#define SETG(r)		do { ALU2(0x0f9F,(r),(r));			asm_output1("setg %s",gpn(r)); } while(0)
+#define SETGE(r)	do { ALU2(0x0f9D,(r),(r));			asm_output1("setge %s",gpn(r)); } while(0)
+#define SETB(r)     do { ALU2(0x0f92,(r),(r));          asm_output1("setb %s",gpn(r)); } while(0)
+#define SETBE(r)    do { ALU2(0x0f96,(r),(r));          asm_output1("setbe %s",gpn(r)); } while(0)
+#define SETA(r)     do { ALU2(0x0f97,(r),(r));          asm_output1("seta %s",gpn(r)); } while(0)
+#define SETAE(r)    do { ALU2(0x0f93,(r),(r));          asm_output1("setae %s",gpn(r)); } while(0)
+#define SETC(r)     do { ALU2(0x0f90,(r),(r));          asm_output1("setc %s",gpn(r)); } while(0)
+#define SETO(r)     do { ALU2(0x0f92,(r),(r));          asm_output1("seto %s",gpn(r)); } while(0)
+
+#define MREQ(dr,sr)	do { ALU2(0x0f44,dr,sr); asm_output2("cmove %s,%s", gpn(dr),gpn(sr)); } while(0)
+#define MRNE(dr,sr)	do { ALU2(0x0f45,dr,sr); asm_output2("cmovne %s,%s", gpn(dr),gpn(sr)); } while(0)
+#define MRL(dr,sr)	do { ALU2(0x0f4C,dr,sr); asm_output2("cmovl %s,%s", gpn(dr),gpn(sr)); } while(0)
+#define MRLE(dr,sr)	do { ALU2(0x0f4E,dr,sr); asm_output2("cmovle %s,%s", gpn(dr),gpn(sr)); } while(0)
+#define MRG(dr,sr)	do { ALU2(0x0f4F,dr,sr); asm_output2("cmovg %s,%s", gpn(dr),gpn(sr)); } while(0)
+#define MRGE(dr,sr)	do { ALU2(0x0f4D,dr,sr); asm_output2("cmovge %s,%s", gpn(dr),gpn(sr)); } while(0)
+#define MRB(dr,sr)	do { ALU2(0x0f42,dr,sr); asm_output2("cmovb %s,%s", gpn(dr),gpn(sr)); } while(0)
+#define MRBE(dr,sr)	do { ALU2(0x0f46,dr,sr); asm_output2("cmovbe %s,%s", gpn(dr),gpn(sr)); } while(0)
+#define MRA(dr,sr)	do { ALU2(0x0f47,dr,sr); asm_output2("cmova %s,%s", gpn(dr),gpn(sr)); } while(0)
+#define MRAE(dr,sr)	do { ALU2(0x0f43,dr,sr); asm_output2("cmovae %s,%s", gpn(dr),gpn(sr)); } while(0)
+
+// these aren't currently used but left in for reference
+//#define LDEQ(r,d,b) do { ALU2m(0x0f44,r,d,b); asm_output3("cmove %s,%d(%s)", gpn(r),d,gpn(b)); } while(0)
+//#define LDNEQ(r,d,b) do { ALU2m(0x0f45,r,d,b); asm_output3("cmovne %s,%d(%s)", gpn(r),d,gpn(b)); } while(0)
+
+#define LD(reg,disp,base)	do { 	\
+	ALUm(0x8b,reg,disp,base);	\
+	asm_output3("mov %s,%d(%s)",gpn(reg),disp,gpn(base)); } while(0)
+
+// load 16-bit, sign extend
+#define LD16S(r,d,b) do { ALU2m(0x0fbf,r,d,b); asm_output3("movsx %s,%d(%s)", gpn(r),d,gpn(b)); } while(0)
+
+// load 8-bit, zero extend
+// note, only 5-bit offsets (!) are supported for this, but that's all we need at the moment
+// (movzx actually allows larger offsets mode but 5-bit gives us advantage in Thumb mode)
+#define LD8Z(r,d,b)	do { NanoAssert((d)>=0&&(d)<=31); ALU2m(0x0fb6,r,d,b); asm_output3("movzx %s,%d(%s)", gpn(r),d,gpn(b)); } while(0)
+
+#define LDi(r,i) do { \
+	underrunProtect(5);			\
+	IMM32(i);					\
+	NanoAssert(((unsigned)r)<8); \
+	*(--_nIns) = (uint8_t) (0xb8 | (r) );		\
+	asm_output2("mov %s,%d",gpn(r),i); } while(0)
+
+#define ST(base,disp,reg) do {  \
+	ALUm(0x89,reg,disp,base);	\
+	asm_output3("mov %d(%s),%s",disp,gpn(base),gpn(reg)); } while(0)
+
+#define STi(base,disp,imm)	do { \
+	underrunProtect(12);	\
+	IMM32(imm);				\
+	MODRMm(0, disp, base);	\
+	*(--_nIns) = 0xc7;		\
+	asm_output3("mov %d(%s),%d",disp,gpn(base),imm); } while(0)
+
+#define RET()   do { ALU0(0xc3); asm_output("ret"); } while(0)
+#define NOP() 	do { ALU0(0x90); asm_output("nop"); } while(0)
+#define INT3()  do { ALU0(0xcc); asm_output("int3"); } while(0)
+
+#define PUSHi(i) do { \
+	if (isS8(i)) { \
+		underrunProtect(2);			\
+		_nIns-=2; _nIns[0] = 0x6a; _nIns[1] = (uint8_t)(i); \
+		asm_output1("push %d",i); \
+	} else \
+		{ PUSHi32(i); } } while(0)
+
+#define PUSHi32(i)	do {	\
+	underrunProtect(5);	\
+	IMM32(i);			\
+	*(--_nIns) = 0x68;	\
+	asm_output1("push %d",i); } while(0)
+
+#define PUSHr(r) do {  \
+	underrunProtect(1);			\
+	NanoAssert(((unsigned)r)<8); \
+	*(--_nIns) = (uint8_t) ( 0x50 | (r) );	\
+	asm_output1("push %s",gpn(r)); } while(0)
+
+#define PUSHm(d,b) do { \
+	ALUm(0xff, 6, d, b);		\
+	asm_output2("push %d(%s)",d,gpn(b)); } while(0)
+
+#define POP(r) do { \
+	underrunProtect(1);			\
+	NanoAssert(((unsigned)r)<8); \
+	*(--_nIns) = (uint8_t) ( 0x58 | (r) ); \
+	asm_output1("pop %s",gpn(r)); } while(0)
+
+#define JCC(o,t,n) do { \
+	underrunProtect(6);	\
+	intptr_t tt = (intptr_t)t - (intptr_t)_nIns;	\
+	if (isS8(tt)) { \
+		verbose_only( NIns* next = _nIns; (void)next; ) \
+		_nIns -= 2; \
+		_nIns[0] = (uint8_t) ( 0x70 | (o) ); \
+		_nIns[1] = (uint8_t) (tt); \
+		asm_output2("%s %p",(n),(next+tt)); \
+	} else { \
+		verbose_only( NIns* next = _nIns; ) \
+		IMM32(tt); \
+		_nIns -= 2; \
+		_nIns[0] = 0x0f; \
+		_nIns[1] = (uint8_t) ( 0x80 | (o) ); \
+		asm_output2("%s %p",(n),(next+tt)); \
+	} } while(0)
+
+#define JMP_long(t) do { \
+	underrunProtect(5);	\
+	intptr_t tt = (intptr_t)t - (intptr_t)_nIns;	\
+	JMP_long_nochk_offset(tt);	\
+	} while(0)
+
+#define JMP(t)		do { 	\
+   	underrunProtect(5);	\
+	intptr_t tt = (intptr_t)t - (intptr_t)_nIns;	\
+	if (isS8(tt)) { \
+		verbose_only( NIns* next = _nIns; (void)next; ) \
+		_nIns -= 2; \
+		_nIns[0] = 0xeb; \
+		_nIns[1] = (uint8_t) ( (tt)&0xff ); \
+		asm_output1("jmp %p",(next+tt)); \
+	} else { \
+		JMP_long_nochk_offset(tt);	\
+	} } while(0)
+
+#define JMPc 0xe9
+		
+#define JMP_long_placeholder()	do {\
+	underrunProtect(5);				\
+	JMP_long_nochk_offset(0xffffffff); } while(0)
+	
+// this should only be used when you can guarantee there is enough room on the page
+#define JMP_long_nochk_offset(o) do {\
+		verbose_only( NIns* next = _nIns; (void)next; ) \
+ 		IMM32((o)); \
+ 		*(--_nIns) = JMPc; \
+		asm_output1("jmp %p",(next+(o))); } while(0)
+
+#define JE(t)	JCC(0x04, t, "je")
+#define JNE(t)	JCC(0x05, t, "jne")
+#define JP(t)	JCC(0x0A, t, "jp")
+#define JNP(t)	JCC(0x0B, t, "jnp")
+
+#define JB(t)	JCC(0x02, t, "jb")
+#define JNB(t)	JCC(0x03, t, "jnb")
+#define JBE(t)	JCC(0x06, t, "jbe")
+#define JNBE(t) JCC(0x07, t, "jnbe")
+
+#define JA(t)	JCC(0x07, t, "ja")
+#define JNA(t)	JCC(0x06, t, "jna")
+#define JAE(t)	JCC(0x03, t, "jae")
+#define JNAE(t) JCC(0x02, t, "jnae")
+
+#define JL(t)	JCC(0x0C, t, "jl")
+#define JNL(t)	JCC(0x0D, t, "jnl")
+#define JLE(t)	JCC(0x0E, t, "jle")
+#define JNLE(t)	JCC(0x0F, t, "jnle")
+
+#define JG(t)	JCC(0x0F, t, "jg")
+#define JNG(t)	JCC(0x0E, t, "jng")
+#define JGE(t)	JCC(0x0D, t, "jge")
+#define JNGE(t)	JCC(0x0C, t, "jnge")
+
+#define JC(t)   JCC(0x02, t, "jc")
+#define JNC(t)  JCC(0x03, t, "jnc")
+#define JO(t)   JCC(0x00, t, "jo")
+#define JNO(t)  JCC(0x01, t, "jno")
+
+// sse instructions 
+#define SSE(c,d,s)  \
+		underrunProtect(9);	\
+		MODRM((d),(s));	\
+		_nIns -= 3; \
+ 		_nIns[0] = (uint8_t)(((c)>>16)&0xff); \
+		_nIns[1] = (uint8_t)(((c)>>8)&0xff); \
+		_nIns[2] = (uint8_t)((c)&0xff)
+
+#define SSEm(c,r,d,b)	\
+		underrunProtect(9);	\
+ 		MODRMm((r),(d),(b));	\
+		_nIns -= 3;		\
+ 		_nIns[0] = (uint8_t)(((c)>>16)&0xff); \
+		_nIns[1] = (uint8_t)(((c)>>8)&0xff); \
+		_nIns[2] = (uint8_t)((c)&0xff)
+
+#define LDSD(r,d,b)do {     \
+    SSEm(0xf20f10, (r)&7, (d), (b)); \
+    asm_output3("movsd %s,%d(%s)",gpn(r),(d),gpn(b)); \
+    } while(0)
+
+#define LDSDm(r,addr)do {     \
+    underrunProtect(8); \
+	const double* daddr = addr; \
+    IMM32(int32_t(daddr));\
+    *(--_nIns) = uint8_t(((r)&7)<<3|5); \
+    *(--_nIns) = 0x10;\
+    *(--_nIns) = 0x0f;\
+    *(--_nIns) = 0xf2;\
+    asm_output3("movsd %s,%p // =%f",gpn(r),daddr,*daddr); \
+    } while(0)
+
+#define STSD(d,b,r)do {     \
+    SSEm(0xf20f11, (r)&7, (d), (b)); \
+    asm_output3("movsd %d(%s),%s",(d),gpn(b),gpn(r)); \
+    } while(0)
+
+#define LDQ(r,d,b)do {  \
+    SSEm(0xf30f7e, (r)&7, (d), (b)); \
+    asm_output3("movq %s,%d(%s)",gpn(r),d,gpn(b)); \
+    } while(0)
+
+#define STQ(d,b,r)do {  \
+    SSEm(0x660fd6, (r)&7, (d), (b)); \
+    asm_output3("movq %d(%s),%s",(d),gpn(b),gpn(r)); \
+    } while(0)
+
+#define CVTSI2SD(xr,gr) do{ \
+    SSE(0xf20f2a, (xr)&7, (gr)&7); \
+    asm_output2("cvtsi2sd %s,%s",gpn(xr),gpn(gr)); \
+    } while(0)
+
+#define CVTDQ2PD(dstr,srcr) do{ \
+    SSE(0xf30fe6, (dstr)&7, (srcr)&7); \
+    asm_output2("cvtdq2pd %s,%s",gpn(dstr),gpn(srcr)); \
+    } while(0)
+
+// move and zero-extend gpreg to xmm reg
+#define MOVD(d,s) do{ \
+	if (_is_xmm_reg_(s)) { \
+		NanoAssert(_is_gp_reg_(d)); \
+		SSE(0x660f7e, (s)&7, (d)&7); \
+	} else { \
+		NanoAssert(_is_gp_reg_(s)); \
+		NanoAssert(_is_xmm_reg_(d)); \
+		SSE(0x660f6e, (d)&7, (s)&7); \
+	} \
+    asm_output2("movd %s,%s",gpn(d),gpn(s)); \
+    } while(0)
+
+#define MOVSD(rd,rs) do{ \
+    SSE(0xf20f10, (rd)&7, (rs)&7); \
+    asm_output2("movsd %s,%s",gpn(rd),gpn(rs)); \
+    } while(0)
+
+#define STD(d,b,xrs) do {\
+    SSEm(0x660f7e, (xrs)&7, d, b);\
+    asm_output3("movd %d(%s),%s", d, gpn(b), gpn(xrs));\
+    } while(0)
+
+#define ADDSD(rd,rs) do{ \
+    SSE(0xf20f58, (rd)&7, (rs)&7); \
+    asm_output2("addsd %s,%s",gpn(rd),gpn(rs)); \
+    } while(0)
+
+#define ADDSDm(r,addr)do {     \
+    underrunProtect(8); \
+	const double* daddr = addr; \
+    IMM32(int32_t(daddr));\
+    *(--_nIns) = uint8_t(((r)&7)<<3|5); \
+    *(--_nIns) = 0x58;\
+    *(--_nIns) = 0x0f;\
+    *(--_nIns) = 0xf2;\
+    asm_output3("addsd %s,%p // =%f",gpn(r),daddr,*daddr); \
+    } while(0)
+
+#define SUBSD(rd,rs) do{ \
+    SSE(0xf20f5c, (rd)&7, (rs)&7); \
+    asm_output2("subsd %s,%s",gpn(rd),gpn(rs)); \
+    } while(0)
+#define MULSD(rd,rs) do{ \
+    SSE(0xf20f59, (rd)&7, (rs)&7); \
+    asm_output2("mulsd %s,%s",gpn(rd),gpn(rs)); \
+    } while(0)
+#define DIVSD(rd,rs) do{ \
+    SSE(0xf20f5e, (rd)&7, (rs)&7); \
+    asm_output2("divsd %s,%s",gpn(rd),gpn(rs)); \
+    } while(0)
+#define UCOMISD(rl,rr) do{ \
+    SSE(0x660f2e, (rl)&7, (rr)&7); \
+    asm_output2("ucomisd %s,%s",gpn(rl),gpn(rr)); \
+    } while(0)
+
+#define CVTSI2SDm(xr,d,b) do{ \
+    SSEm(0xf20f2a, (xr)&7, (d), (b)); \
+    asm_output3("cvtsi2sd %s,%d(%s)",gpn(xr),(d),gpn(b)); \
+    } while(0)
+
+#define XORPD(r, maskaddr) do {\
+    underrunProtect(8); \
+    IMM32(maskaddr);\
+    *(--_nIns) = uint8_t(((r)&7)<<3|5); \
+    *(--_nIns) = 0x57;\
+    *(--_nIns) = 0x0f;\
+    *(--_nIns) = 0x66;\
+    asm_output2("xorpd %s,[0x%p]",gpn(r),(maskaddr));\
+    } while(0)
+
+#define XORPDr(rd,rs) do{ \
+    SSE(0x660f57, (rd)&7, (rs)&7); \
+    asm_output2("xorpd %s,%s",gpn(rd),gpn(rs)); \
+    } while(0)
+
+// floating point unit
+#define FPUc(o)								\
+		underrunProtect(2);					\
+		*(--_nIns) = ((uint8_t)(o)&0xff);		\
+		*(--_nIns) = (uint8_t)(((o)>>8)&0xff)
+
+#define FPU(o,r)							\
+		underrunProtect(2);					\
+		*(--_nIns) = uint8_t(((uint8_t)(o)&0xff) | r&7);\
+		*(--_nIns) = (uint8_t)(((o)>>8)&0xff)
+
+#define FPUm(o,d,b)							\
+		underrunProtect(7);					\
+		MODRMm((uint8_t)(o), d, b);			\
+		*(--_nIns) = (uint8_t)((o)>>8)
+
+#define TEST_AH(i) do { 							\
+		underrunProtect(3);					\
+		*(--_nIns) = ((uint8_t)(i));			\
+		*(--_nIns) = 0xc4;					\
+		*(--_nIns) = 0xf6;					\
+		asm_output1("test ah, %d",i); } while(0)
+
+#define TEST_AX(i) do { 							\
+		underrunProtect(5);					\
+		*(--_nIns) = (0);		\
+		*(--_nIns) = ((uint8_t)(i));			\
+		*(--_nIns) = ((uint8_t)((i)>>8));		\
+		*(--_nIns) = (0);		\
+		*(--_nIns) = 0xa9;					\
+		asm_output1("test ax, %d",i); } while(0)
+
+#define FNSTSW_AX()	do { FPUc(0xdfe0);				asm_output("fnstsw_ax"); } while(0)
+#define FCHS()		do { FPUc(0xd9e0);				asm_output("fchs"); } while(0)
+#define FLD1()		do { FPUc(0xd9e8);				asm_output("fld1"); fpu_push(); } while(0)
+#define FLDZ()		do { FPUc(0xd9ee);				asm_output("fldz"); fpu_push(); } while(0)
+#define FFREE(r)	do { FPU(0xddc0, r);			asm_output1("ffree %s",fpn(r)); } while(0)
+#define FSTQ(p,d,b)	do { FPUm(0xdd02|(p), d, b);	asm_output3("fst%sq %d(%s)",((p)?"p":""),d,gpn(b)); if (p) fpu_pop(); } while(0)
+#define FSTPQ(d,b)  FSTQ(1,d,b)
+#define FCOM(p,d,b)	do { FPUm(0xdc02|(p), d, b);	asm_output3("fcom%s %d(%s)",((p)?"p":""),d,gpn(b)); if (p) fpu_pop(); } while(0)
+#define FLDQ(d,b)	do { FPUm(0xdd00, d, b);		asm_output2("fldq %d(%s)",d,gpn(b)); fpu_push();} while(0)
+#define FILDQ(d,b)	do { FPUm(0xdf05, d, b);		asm_output2("fildq %d(%s)",d,gpn(b)); fpu_push(); } while(0)
+#define FILD(d,b)	do { FPUm(0xdb00, d, b);		asm_output2("fild %d(%s)",d,gpn(b)); fpu_push(); } while(0)
+#define FADD(d,b)	do { FPUm(0xdc00, d, b);		asm_output2("fadd %d(%s)",d,gpn(b)); } while(0)
+#define FSUB(d,b)	do { FPUm(0xdc04, d, b);		asm_output2("fsub %d(%s)",d,gpn(b)); } while(0)
+#define FSUBR(d,b)	do { FPUm(0xdc05, d, b);		asm_output2("fsubr %d(%s)",d,gpn(b)); } while(0)
+#define FMUL(d,b)	do { FPUm(0xdc01, d, b);		asm_output2("fmul %d(%s)",d,gpn(b)); } while(0)
+#define FDIV(d,b)	do { FPUm(0xdc06, d, b);		asm_output2("fdiv %d(%s)",d,gpn(b)); } while(0)
+#define FDIVR(d,b)	do { FPUm(0xdc07, d, b);		asm_output2("fdivr %d(%s)",d,gpn(b)); } while(0)
+#define FINCSTP()	do { FPUc(0xd9f7);				asm_output2("fincstp"); } while(0)
+#define FSTP(r)		do { FPU(0xddd8, r&7);			asm_output1("fstp %s",fpn(r)); fpu_pop();} while(0)
+#define FCOMP()		do { FPUc(0xD8D9);				asm_output("fcomp"); fpu_pop();} while(0)
+#define FCOMPP()	do { FPUc(0xDED9);				asm_output("fcompp"); fpu_pop();fpu_pop();} while(0)
+#define FLDr(r)		do { FPU(0xd9c0,r);				asm_output1("fld %s",fpn(r)); fpu_push(); } while(0)
+#define EMMS()		do { FPUc(0x0f77);				asm_output("emms"); } while (0)
+
+#define CALL(a,nm)	do { \
+	  underrunProtect(5);					\
+	  int offset = (a) - ((int)_nIns); \
+	  IMM32( (uint32_t)offset );	\
+	  *(--_nIns) = 0xE8;		\
+	  asm_output1("call %s",(nm)); \
+	} while (0)
+}
+#endif // __nanojit_Nativei386__
--- a/js/src/nanojit/avmplus.h
+++ b/js/src/nanojit/avmplus.h
@@ -47,18 +47,16 @@
 
 #define AvmAssert(x) assert(x)
 
 typedef JSUint8 uint8_t;
 typedef JSUint16 uint16_t;
 typedef JSUint32 uint32_t;
 typedef JSUint64 uint64_t;
 
-#define NJ_PAGE_SIZE 4096
-
 class GC 
 {
 };
 
 namespace avmplus
 {
     class AvmCore 
     {
--- a/js/src/nanojit/nanojit.h
+++ b/js/src/nanojit/nanojit.h
@@ -152,15 +152,15 @@ namespace nanojit
 
 #define alignTo(x,s)		((((uint32_t)(x)))&~((s)-1))
 #define alignUp(x,s)		((((uint32_t)(x))+((s)-1))&~((s)-1))
 
 #define pageTop(x)			( (int*)alignTo(x,NJ_PAGE_SIZE) )
 #define pageBottom(x)		( (int*)(alignTo(x,NJ_PAGE_SIZE)+NJ_PAGE_SIZE)-1 )
 #define samepage(x,y)		(pageTop(x) == pageTop(y))
 
-/*#include "Native.h"
-#include "LIR.h"
+#include "Native.h"
+/*#include "LIR.h"
 #include "RegAlloc.h"
 #include "Fragmento.h"
 #include "Assembler.h"*/
 
 #endif // __nanojit_h_