Comment Bug 602389 - nanojit: make Register a non-numeric type on MIPS. r=nnethercote.
authorChris Dearman <chris@mips.com>
Sun, 24 Jul 2011 19:55:58 -0700
changeset 77103 806abed2a8f77dbaa627688b60647f75c4665af1
parent 77102 cd22ad96188785a55e28ed172c70e1ce867f991a
child 77104 0412880dec39053fe7cffcc0222704a840db6522
push id78
push userclegnitto@mozilla.com
push dateFri, 16 Dec 2011 17:32:24 +0000
treeherdermozilla-release@79d24e644fdd [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersnnethercote
bugs602389
milestone9.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Comment Bug 602389 - nanojit: make Register a non-numeric type on MIPS. r=nnethercote.
js/src/nanojit/NativeMIPS.cpp
js/src/nanojit/NativeMIPS.h
--- a/js/src/nanojit/NativeMIPS.cpp
+++ b/js/src/nanojit/NativeMIPS.cpp
@@ -114,21 +114,21 @@ namespace nanojit
         return isLittleEndian() ? 4 : 0;
     }
 
     static inline int lswoff(void) {
         return isLittleEndian() ? 0 : 4;
     }
 
     static inline Register mswregpair(Register r) {
-        return Register(r + (isLittleEndian() ? 1 : 0));
+        return isLittleEndian() ? r+1 : r;
     }
 
     static inline Register lswregpair(Register r) {
-        return Register(r + (isLittleEndian() ? 0 : 1));
+        return isLittleEndian() ? r : r+1;
     }
 
 // These variables affect the code generator
 // They can be defined as constants and the compiler will remove
 // the unused paths through dead code elimination
 // Alternatively they can be defined as variables which will allow
 // the exact code generated to be determined at runtime
 //
@@ -273,62 +273,62 @@ namespace nanojit
         MOVE(d, s);
     }
 
     // General load/store operation
     void Assembler::asm_ldst(int op, Register rt, int dr, Register rbase)
     {
 #if !PEDANTIC
         if (isS16(dr)) {
-            LDST(op, rt, dr, rbase);
+            LDSTGPR(op, rt, dr, rbase);
             return;
         }
 #endif
 
         // lui AT,hi(d)
         // addu AT,rbase
         // ldst rt,lo(d)(AT)
-        LDST(op, rt, lo(dr), AT);
+        LDSTGPR(op, rt, lo(dr), AT);
         ADDU(AT, AT, rbase);
         LUI(AT, hi(dr));
     }
 
     void Assembler::asm_ldst64(bool store, Register r, int dr, Register rbase)
     {
 #if !PEDANTIC
         if (isS16(dr) && isS16(dr+4)) {
             if (IsGpReg(r)) {
-                LDST(store ? OP_SW : OP_LW, r+1, dr+4, rbase);
-                LDST(store ? OP_SW : OP_LW, r,   dr, rbase);
+                LDSTGPR(store ? OP_SW : OP_LW, r+1, dr+4, rbase);
+                LDSTGPR(store ? OP_SW : OP_LW, r,   dr,   rbase);
             }
             else {
                 NanoAssert(cpu_has_fpu);
                 // NanoAssert((dr & 7) == 0);
                 if (cpu_has_lsdc1 && ((dr & 7) == 0)) {
                     // lsdc1 $fr,dr($rbase)
-                    LDST(store ? OP_SDC1 : OP_LDC1, r, dr, rbase);
+                    LDSTFPR(store ? OP_SDC1 : OP_LDC1, r, dr, rbase);
                 }
                 else {
                     // lswc1 $fr,  dr+LSWOFF($rbase)
                     // lswc1 $fr+1,dr+MSWOFF($rbase)
-                    LDST(store ? OP_SWC1 : OP_LWC1, r+1, dr+mswoff(), rbase);
-                    LDST(store ? OP_SWC1 : OP_LWC1, r,   dr+lswoff(), rbase);
+                    LDSTFPR(store ? OP_SWC1 : OP_LWC1, r+1, dr+mswoff(), rbase);
+                    LDSTFPR(store ? OP_SWC1 : OP_LWC1, r,   dr+lswoff(), rbase);
                 }
                 return;
             }
         }
 #endif
 
         if (IsGpReg(r)) {
             // lui   $at,%hi(d)
             // addu  $at,$rbase
             // ldsw  $r,  %lo(d)($at)
             // ldst  $r+1,%lo(d+4)($at)
-            LDST(store ? OP_SW : OP_LW, r+1, lo(dr+4), AT);
-            LDST(store ? OP_SW : OP_LW, r,   lo(dr), AT);
+            LDSTGPR(store ? OP_SW : OP_LW, r+1, lo(dr+4), AT);
+            LDSTGPR(store ? OP_SW : OP_LW, r,   lo(dr), AT);
             ADDU(AT, AT, rbase);
             LUI(AT, hi(dr));
         }
         else {
             NanoAssert(cpu_has_fpu);
             if (cpu_has_lsdxc1) {
                 // li     $at,dr
                 // lsdcx1 $r,$at($rbase)
@@ -337,27 +337,27 @@ namespace nanojit
                 else
                     LDXC1(r, AT, rbase);
                 asm_li(AT, dr);
             }
             else if (cpu_has_lsdc1) {
                 // lui    $at,%hi(dr)
                 // addu   $at,$rbase
                 // lsdc1  $r,%lo(dr)($at)
-                LDST(store ? OP_SDC1 : OP_LDC1, r, lo(dr), AT);
+                LDSTFPR(store ? OP_SDC1 : OP_LDC1, r, lo(dr), AT);
                 ADDU(AT, AT, rbase);
                 LUI(AT, hi(dr));
             }
             else {
                 // lui   $at,%hi(d)
                 // addu  $at,$rbase
                 // lswc1 $r,  %lo(d+LSWOFF)($at)
                 // lswc1 $r+1,%lo(d+MSWOFF)($at)
-                LDST(store ? OP_SWC1 : OP_LWC1, r+1, lo(dr+mswoff()), AT);
-                LDST(store ? OP_SWC1 : OP_LWC1, r,   lo(dr+lswoff()), AT);
+                LDSTFPR(store ? OP_SWC1 : OP_LWC1, r+1, lo(dr+mswoff()), AT);
+                LDSTFPR(store ? OP_SWC1 : OP_LWC1, r,   lo(dr+lswoff()), AT);
                 ADDU(AT, AT, rbase);
                 LUI(AT, hi(dr));
             }
         }
     }
 
     void Assembler::asm_store_imm64(LIns *value, int dr, Register rbase)
     {
@@ -476,36 +476,36 @@ namespace nanojit
         NanoAssert(cpu_has_fpu);
 #endif
 
         // O32 ABI requires that 64-bit arguments are aligned on even-numbered
         // registers, as A0:A1/FA0 or A2:A3/FA1. Use the stack offset to keep track
         // where we are
         if (stkd & 4) {
             if (stkd < 16) {
-                r = Register(r + 1);
-                fr = Register(fr + 1);
+                r = r + 1;
+                fr = fr + 1;
             }
             stkd += 4;
         }
 
         if (stkd < 16) {
             NanoAssert(fr == FA0 || fr == FA1 || fr == A2);
             if (fr == FA0 || fr == FA1)
                 findSpecificRegFor(arg, fr);
             else {
                 findSpecificRegFor(arg, FA1);
                 // Move it to the integer pair
                 Register fpupair = arg->getReg();
                 Register intpair = fr;
-                MFC1(mswregpair(intpair), Register(fpupair + 1));  // Odd fpu register contains sign,expt,manthi
-                MFC1(lswregpair(intpair), fpupair);                // Even fpu register contains mantlo
+                MFC1(mswregpair(intpair), fpupair+1); // Odd fpu register contains sign,expt,manthi
+                MFC1(lswregpair(intpair), fpupair);   // Even fpu register contains mantlo
             }
-            r = Register(r + 2);
-            fr = Register(fr + 2);
+            r = r + 2;
+            fr = fr + 2;
         }
         else
             asm_stkarg(arg, stkd);
 
         stkd += 8;
     }
 
     /* Required functions */
@@ -1692,18 +1692,18 @@ namespace nanojit
 
         if (ty == ARGTYPE_D) {
             // This task is fairly complex and so is delegated to asm_arg_64.
             asm_arg_64(arg, r, fr, stkd);
         } else {
             NanoAssert(ty == ARGTYPE_I || ty == ARGTYPE_UI);
             if (stkd < 16) {
                 asm_regarg(ty, arg, r);
-                fr = Register(fr + 1);
-                r = Register(r + 1);
+                fr = fr + 1;
+                r = r + 1;
             }
             else
                 asm_stkarg(arg, stkd);
             // The o32 ABI calling convention is that if the first arguments
             // is not a double, subsequent double values are passed in integer registers
             fr = r;
             stkd += 4;
         }
@@ -1770,36 +1770,36 @@ namespace nanojit
         if (stkd > max_out_args)
             max_out_args = stkd;
         TAG("asm_call(ins=%p{%s})", ins, lirNames[ins->opcode()]);
     }
 
     Register
     Assembler::nRegisterAllocFromSet(RegisterMask set)
     {
-        Register i;
         int n;
 
         // note, deliberate truncation of 64->32 bits
         if (set & 0xffffffff) {
             // gp reg
             n = ffs(int(set));
             NanoAssert(n != 0);
-            i = Register(n - 1);
+            n = n - 1;
         }
         else {
             // fp reg
             NanoAssert(cpu_has_fpu);
             n = ffs(int(set >> 32));
             NanoAssert(n != 0);
-            i = Register(32 + n - 1);
+            n = 32 + n - 1;
         }
-        _allocator.free &= ~rmask(i);
-        TAG("nRegisterAllocFromSet(set=%016llx) => %s", set, gpn(i));
-        return i;
+        Register r = { n };
+        _allocator.free &= ~rmask(r);
+        TAG("nRegisterAllocFromSet(set=%016llx) => %s", set, gpn(r));
+        return r;
     }
 
     void
     Assembler::nRegisterResetAll(RegAlloc& regs)
     {
         regs.clear();
         regs.free = GpRegs;
         if (cpu_has_fpu)
@@ -1831,19 +1831,19 @@ namespace nanojit
                     // Full 32-bit jump
                     // bxx tramp
                     //  lui $at,(target>>16)>0xffff
                     // ..
                     // tramp:
                     // ori $at,target & 0xffff
                     // jr $at
                     //  nop
-                    branch[1] = U_FORMAT(OP_LUI,0,AT,hi(uint32_t(target)));
-                    tramp[0] = U_FORMAT(OP_ADDIU,AT,AT,lo(uint32_t(target)));
-                    tramp[1] = R_FORMAT(OP_SPECIAL,AT,0,0,0,SPECIAL_JR);
+                    branch[1] = U_FORMAT(OP_LUI, 0, GPR(AT), hi(uint32_t(target)));
+                    tramp[0] = U_FORMAT(OP_ADDIU, GPR(AT), GPR(AT), lo(uint32_t(target)));
+                    tramp[1] = R_FORMAT(OP_SPECIAL, GPR(AT), 0, 0, 0, SPECIAL_JR);
                 }
             }
         }
         else if (op == OP_J) {
             NanoAssert (SEG(branch) == SEG(target));
             branch[0] = J_FORMAT(OP_J,JINDEX(target));
         }
         else
--- a/js/src/nanojit/NativeMIPS.h
+++ b/js/src/nanojit/NativeMIPS.h
@@ -35,16 +35,18 @@
  * the provisions above, a recipient may use your version of this file under
  * the terms of any one of the MPL, the GPL or the LGPL.
  *
  * ***** END LICENSE BLOCK ***** */
 
 #ifndef __nanojit_NativeMIPS__
 #define __nanojit_NativeMIPS__
 
+#include "NativeCommon.h"
+
 #include "../vprof/vprof.h"
 #ifdef PERFM
 #define DOPROF
 #endif
 #define count_instr()   _nvprof("mips", 1)
 #define count_mov()     do { _nvprof("mips-mov", 1); count_instr(); } while (0)
 #define count_jmp()     do { _nvprof("mips-jmp", 1); count_instr(); } while (0)
 #define count_prolog()  do { _nvprof("mips-prolog", 1); count_instr(); } while (0)
@@ -61,19 +63,19 @@ namespace nanojit
     static const uint32_t NJ_MAX_STACK_ENTRY = 4096;
 #else
 #define NJ_MAX_STACK_ENTRY 4096
 #endif
     static const int NJ_ALIGN_STACK = 8;
 
     typedef uint32_t NIns;                // REQ: Instruction count
     typedef uint64_t RegisterMask;        // REQ: Large enough to hold LastRegNum-FirstRegNum bits
-#define _rmask_(r)        (1LL<<(r))
+#define _rmask_(r)      (1LL<<(r))
+#define REGMASK(r)      _rmask_(REGNUM(r))
 
-    typedef uint32_t Register;            // REQ: Register identifiers
     // Register numbers for Native code generator
     static const Register
         ZERO = { 0 },
         AT = { 1 },
         V0 = { 2 },
         V1 = { 3 },
         A0 = { 4 },
         A1 = { 5 },
@@ -158,23 +160,20 @@ namespace nanojit
         FS1 = F22,
         FS2 = F24,
         FS3 = F26,
         FS4 = F28,
         FS5 = F30,
 
         deprecated_UnknownReg = { 127 };    // XXX: remove eventually, see bug 538924
 
-    static const uint32_t FirstRegNum = ZERO;
-    static const uint32_t LastRegNum = F31;
+    static const uint32_t FirstRegNum = 0; // R0
+    static const uint32_t LastRegNum = 63; // F31
 }
 
-#define NJ_USE_UINT32_REGISTER 1
-#include "NativeCommon.h"
-
 namespace nanojit {
     // REQ: register names
     verbose_only(extern const char* regNames[];)
 
     // REQ: Bytes of icache to flush after Assembler::patch
     const size_t LARGEST_BRANCH_PATCH = 2 * sizeof(NIns);
 
     // REQ: largest value passed to underrunProtect
@@ -185,57 +184,75 @@ namespace nanojit {
     static const int NumSavedRegs = 14;
 #else
     static const int NumSavedRegs = 8;
 #endif
 
     // REQ: Callee saved registers
     const RegisterMask SavedRegs =
 #ifdef FPCALLEESAVED
-                    _rmask_(FS0) | _rmask_(FS1) | _rmask_(FS2) |
-                    _rmask_(FS3) | _rmask_(FS4) | _rmask_(FS5) |
+        REGMASK(FS0) | REGMASK(FS1) | REGMASK(FS2) |
+        REGMASK(FS3) | REGMASK(FS4) | REGMASK(FS5) |
 #endif
-                    _rmask_(S0) | _rmask_(S1) | _rmask_(S2) | _rmask_(S3) |
-                    _rmask_(S4) | _rmask_(S5) | _rmask_(S6) | _rmask_(S7);
+        REGMASK(S0) | REGMASK(S1) | REGMASK(S2) | REGMASK(S3) |
+        REGMASK(S4) | REGMASK(S5) | REGMASK(S6) | REGMASK(S7);
 
     // REQ: General purpose registers
     static const RegisterMask GpRegs =
-                    _rmask_(V0) | _rmask_(V1) |
-                    _rmask_(A0) | _rmask_(A1) | _rmask_(A2) | _rmask_(A3) |
-                    _rmask_(S0) | _rmask_(S1) | _rmask_(S2) | _rmask_(S3) |
-                    _rmask_(S4) | _rmask_(S5) | _rmask_(S6) | _rmask_(S7) |
-                    _rmask_(T0) | _rmask_(T1) | _rmask_(T2) | _rmask_(T3) |
-                    _rmask_(T4) | _rmask_(T5) | _rmask_(T6) | _rmask_(T7) |
-                    _rmask_(T8) | _rmask_(T9);
+        REGMASK(V0) | REGMASK(V1) |
+        REGMASK(A0) | REGMASK(A1) | REGMASK(A2) | REGMASK(A3) |
+        REGMASK(S0) | REGMASK(S1) | REGMASK(S2) | REGMASK(S3) |
+        REGMASK(S4) | REGMASK(S5) | REGMASK(S6) | REGMASK(S7) |
+        REGMASK(T0) | REGMASK(T1) | REGMASK(T2) | REGMASK(T3) |
+        REGMASK(T4) | REGMASK(T5) | REGMASK(T6) | REGMASK(T7) |
+        REGMASK(T8) | REGMASK(T9);
 
     // REQ: Floating point registers
     static const RegisterMask FpRegs =
 #ifdef FPCALLEESAVED
-                    _rmask_(FS0) | _rmask_(FS1) | _rmask_(FS2) |
-                    _rmask_(FS3) | _rmask_(FS4) | _rmask_(FS5) |
+        REGMASK(FS0) | REGMASK(FS1) | REGMASK(FS2) |
+        REGMASK(FS3) | REGMASK(FS4) | REGMASK(FS5) |
 #endif
-                    _rmask_(FV0) | _rmask_(FV1) |
-                    _rmask_(FA0) | _rmask_(FA1) |
-                    _rmask_(FT0) | _rmask_(FT1) | _rmask_(FT2) |
-                    _rmask_(FT3) | _rmask_(FT4) | _rmask_(FT5);
+        REGMASK(FV0) | REGMASK(FV1) |
+        REGMASK(FA0) | REGMASK(FA1) |
+        REGMASK(FT0) | REGMASK(FT1) | REGMASK(FT2) |
+        REGMASK(FT3) | REGMASK(FT4) | REGMASK(FT5);
 
-    static const RegisterMask AllowableFlagRegs = GpRegs;        // REQ: Registers that can hold flag results FIXME
+    static inline bool IsGpReg(Register r)
+    {
+        return (REGMASK(r) & GpRegs) != 0;
+    }
 
     static inline bool IsFpReg(Register r)
     {
-        return (_rmask_(r) & FpRegs) != 0;
+        return (REGMASK(r) & FpRegs) != 0;
+    }
+
+    static inline bool IsGPR(Register r)
+    {
+        return (r >= ZERO && r <= RA);
     }
 
-    static inline bool IsGpReg(Register r)
+    static inline bool IsFPR(Register r)
     {
-        return (_rmask_(r) & GpRegs) != 0;
+        return (r >= F0 && r <= F31);
+
     }
 
-#define GPR(r) ((r)&31)
-#define FPR(r) ((r)&31)
+    static inline uint32_t GPR(Register r)
+    {
+        NanoAssertMsg(IsGPR(r), "Not GPR");
+        return REGNUM(r)&31;
+    }
+
+    static inline uint32_t FPR(Register r)
+    {
+        NanoAssertMsg(IsFPR(r), "Not FPR");
+        return REGNUM(r)&31;
+    }
 
 // REQ: Platform specific declarations to include in Stats structure
 #define DECLARE_PLATFORM_STATS()
 
 // REQ: Platform specific declarations to include in Assembler class
 #define DECLARE_PLATFORM_ASSEMBLER()                                    \
     const static Register argRegs[4];                                   \
     const static Register retRegs[2];                                   \
@@ -407,222 +424,245 @@ namespace nanojit {
 #define COND_NGE        0xd
 #define COND_LE         0xe
 #define COND_NGT        0xf
 
 // Helper definitions to encode different classes of MIPS instructions
 // Parameters are in instruction order
 
 #define R_FORMAT(op, rs, rt, rd, re, func)                              \
-    (((op)<<26)|(GPR(rs)<<21)|(GPR(rt)<<16)|(GPR(rd)<<11)|((re)<<6)|(func))
+    (((op)<<26)|((rs)<<21)|((rt)<<16)|((rd)<<11)|((re)<<6)|(func))
 
 #define I_FORMAT(op, rs, rt, simm)                              \
-    (((op)<<26)|(GPR(rs)<<21)|(GPR(rt)<<16)|((simm)&0xffff))
+    (((op)<<26)|((rs)<<21)|((rt)<<16)|((simm)&0xffff))
 
 #define J_FORMAT(op, index)                     \
     (((op)<<26)|(index))
 
 #define U_FORMAT(op, rs, rt, uimm)                              \
-    (((op)<<26)|(GPR(rs)<<21)|(GPR(rt)<<16)|((uimm)&0xffff))
+    (((op)<<26)|((rs)<<21)|((rt)<<16)|((uimm)&0xffff))
 
 #define F_FORMAT(op, ffmt, ft, fs, fd, func)                            \
-    (((op)<<26)|((ffmt)<<21)|(FPR(ft)<<16)|(FPR(fs)<<11)|(FPR(fd)<<6)|(func))
+    (((op)<<26)|((ffmt)<<21)|((ft)<<16)|((fs)<<11)|((fd)<<6)|(func))
+
+#define X_FORMAT(op, base, index, fs, fd, func)                            \
+    (((op)<<26)|((base)<<21)|((index)<<16)|((fs)<<11)|((fd)<<6)|(func))
 
 #define oname(op) Assembler::oname[op]
 #define cname(cond) Assembler::cname[cond]
 #define fname(ffmt) Assembler::fname[ffmt]
 #define fpn(fr) gpn(fr)
 
 #define BOFFSET(targ)    (uint32_t(targ - (_nIns+1)))
 
-#define LDST(op, rt, offset, base)                                      \
-    do { count_misc(); EMIT(I_FORMAT(op, base, rt, offset),             \
+#define LDSTGPR(op, rt, offset, base)                                      \
+    do { count_misc(); EMIT(I_FORMAT(op, GPR(base), GPR(rt), offset),   \
                             "%s %s, %d(%s)", oname[op], gpn(rt), offset, gpn(base)); } while (0)
 
-#define BX(op, rs, rt, targ)                                            \
-    do { count_br(); EMIT(I_FORMAT(op, rs, rt, BOFFSET(targ)),          \
-                          "%s %s, %s, %p", oname[op], gpn(rt), gpn(rs), targ); } while (0)
+#define LDSTFPR(op, ft, offset, base)                                      \
+    do { count_misc(); EMIT(I_FORMAT(op, GPR(base), FPR(ft), offset),   \
+                            "%s %s, %d(%s)", oname[op], fpn(ft), offset, gpn(base)); } while (0)
 
 // MIPS instructions
 // Parameters are in "assembler" order
 #define ADDIU(rt, rs, simm)                                             \
-    do { count_alu(); EMIT(I_FORMAT(OP_ADDIU, rs, rt, simm),            \
+    do { count_alu(); EMIT(I_FORMAT(OP_ADDIU, GPR(rs), GPR(rt), simm),  \
                            "addiu %s, %s, %d", gpn(rt), gpn(rs), simm); } while (0)
 
 #define trampADDIU(rt, rs, simm)                                        \
-    do { count_alu(); TRAMP(I_FORMAT(OP_ADDIU, rs, rt, simm),           \
+    do { count_alu(); TRAMP(I_FORMAT(OP_ADDIU, GPR(rs), GPR(rt), simm), \
                             "addiu %s, %s, %d", gpn(rt), gpn(rs), simm); } while (0)
 
 #define ADDU(rd, rs, rt)                                                \
-    do { count_alu(); EMIT(R_FORMAT(OP_SPECIAL, rs, rt, rd, 0, SPECIAL_ADDU), \
+    do { count_alu(); EMIT(R_FORMAT(OP_SPECIAL, GPR(rs), GPR(rt), GPR(rd), 0, SPECIAL_ADDU), \
                            "addu %s, %s, %s", gpn(rd), gpn(rs), gpn(rt)); } while (0)
 
 #define AND(rd, rs, rt)                                                 \
-    do { count_alu(); EMIT(R_FORMAT(OP_SPECIAL, rs, rt, rd, 0, SPECIAL_AND), \
+    do { count_alu(); EMIT(R_FORMAT(OP_SPECIAL, GPR(rs), GPR(rt), GPR(rd), 0, SPECIAL_AND), \
                            "and %s, %s, %s", gpn(rd), gpn(rs), gpn(rt)); } while (0)
 
 #define ANDI(rt, rs, uimm)                                              \
-    do { count_alu(); EMIT(U_FORMAT(OP_ANDI, rs, rt, uimm),             \
+    do { count_alu(); EMIT(U_FORMAT(OP_ANDI, GPR(rs), GPR(rt), uimm),   \
                            "andi %s, %s, 0x%x", gpn(rt), gpn(rs), ((uimm)&0xffff)); } while (0)
 
 #define BC1F(targ)                                                      \
     do { count_br(); EMIT(I_FORMAT(OP_COP1, COP1_BC, 0, BOFFSET(targ)), \
                           "bc1f %p", targ); } while (0)
 
 #define BC1T(targ)                                                      \
     do { count_br(); EMIT(I_FORMAT(OP_COP1, COP1_BC, 1, BOFFSET(targ)), \
                           "bc1t %p", targ); } while (0)
 
-#define B(targ)                 BX(OP_BEQ, ZERO, ZERO, targ)
-#define BEQ(rs, rt, targ)       BX(OP_BEQ, rs, rt, targ)
-#define BNE(rs, rt, targ)       BX(OP_BNE, rs, rt, targ)
-#define BLEZ(rs, targ)          BX(OP_BLEZ, rs, ZERO, targ)
-#define BGTZ(rs, targ)          BX(OP_BGTZ, rs, ZERO, targ)
-#define BGEZ(rs, targ)          BX(OP_REGIMM, rs, REGIMM_BGEZ, targ)
-#define BLTZ(rs, targ)          BX(OP_REGIMM, rs, REGIMM_BLTZ, targ)
+#define B(targ)                                                         \
+        do { count_br(); EMIT(I_FORMAT(OP_BEQ, GPR(ZERO), GPR(ZERO), BOFFSET(targ)), \
+                          "b %p", targ); } while (0)
+
+#define BEQ(rs, rt, targ)                                               \
+    do { count_br(); EMIT(I_FORMAT(OP_BEQ, GPR(rs), GPR(rt), BOFFSET(targ)), \
+                          "%s %s, %s, %p", oname[OP_BEQ], gpn(rs), gpn(rt), targ); } while (0)
+
+#define BNE(rs, rt, targ)                                               \
+    do { count_br(); EMIT(I_FORMAT(OP_BNE, GPR(rs), GPR(rt), BOFFSET(targ)), \
+                          "%s %s, %s, %p", oname[OP_BNE], gpn(rs), gpn(rt), targ); } while (0)
+
+#define BLEZ(rs, targ)                                                  \
+    do { count_br(); EMIT(I_FORMAT(OP_BLEZ, GPR(rs), 0, BOFFSET(targ)), \
+                          "%s %s, %p", oname[OP_BLEZ], gpn(rs), targ); } while (0)
+
+#define BGTZ(rs, targ)                                                  \
+    do { count_br(); EMIT(I_FORMAT(OP_BGTZ, GPR(rs), 0, BOFFSET(targ)), \
+                          "%s %s, %p", oname[OP_BGTZ], gpn(rs), targ); } while (0)
+
+#define BGEZ(rs, targ)                                                  \
+    do { count_br(); EMIT(I_FORMAT(OP_REGIMM, GPR(rs), REGIMM_BGEZ, BOFFSET(targ)), \
+                          "bgez %s, %p", gpn(rs), targ); } while (0)
+
+#define BLTZ(rs, targ)                                                  \
+    do { count_br(); EMIT(I_FORMAT(OP_REGIMM, GPR(rs), REGIMM_BLTZ, BOFFSET(targ)), \
+                          "bltz %s, %p", gpn(rs), targ); } while (0)
 
 #define JINDEX(dest) ((uint32_t(dest)>>2)&0x03ffffff)
 
 #define J(dest)                                             \
     do { count_jmp(); EMIT(J_FORMAT(OP_J, JINDEX(dest)),    \
                            "j %p", dest); } while (0)
 
 #define trampJ(dest)                                        \
     do { count_jmp(); TRAMP(J_FORMAT(OP_J, JINDEX(dest)),   \
                             "j %p", dest); } while (0)
 
 #define JAL(dest)                                           \
     do { count_jmp(); EMIT(J_FORMAT(OP_JAL, JINDEX(dest)),  \
                            "jal %p", dest); } while (0)
 
 #define JALR(rs)                                                        \
-    do { count_jmp(); EMIT(R_FORMAT(OP_SPECIAL, rs, 0, RA, 0, SPECIAL_JALR), \
+    do { count_jmp(); EMIT(R_FORMAT(OP_SPECIAL, GPR(rs), 0, GPR(RA), 0, SPECIAL_JALR), \
                            "jalr %s", gpn(rs)); } while (0)
 
 #define JR(rs)                                                            \
-    do { count_jmp(); EMIT(R_FORMAT(OP_SPECIAL, rs, 0, 0, 0, SPECIAL_JR), \
+    do { count_jmp(); EMIT(R_FORMAT(OP_SPECIAL, GPR(rs), 0, 0, 0, SPECIAL_JR), \
                            "jr %s", gpn(rs)); } while (0)
 #define trampJR(rs)                                                     \
-    do { count_jmp(); TRAMP(R_FORMAT(OP_SPECIAL, rs, 0, 0, 0, SPECIAL_JR), \
+    do { count_jmp(); TRAMP(R_FORMAT(OP_SPECIAL, GPR(rs), 0, 0, 0, SPECIAL_JR), \
                             "jr %s", gpn(rs)); } while (0)
 
 #define LB(rt, offset, base)                    \
-    LDST(OP_LB, rt, offset, base)
+    LDSTGPR(OP_LB, rt, offset, base)
 
 #define LH(rt, offset, base)                    \
-    LDST(OP_LH, rt, offset, base)
+    LDSTGPR(OP_LH, rt, offset, base)
 
 #define LUI(rt, uimm)                                                   \
-    do { count_alu(); EMIT(U_FORMAT(OP_LUI, 0, rt, uimm),               \
+    do { count_alu(); EMIT(U_FORMAT(OP_LUI, 0, GPR(rt), uimm),          \
                            "lui %s, 0x%x", gpn(rt), ((uimm)&0xffff)); } while (0)
 
 #define LW(rt, offset, base)                    \
-    LDST(OP_LW, rt, offset, base)
+    LDSTGPR(OP_LW, rt, offset, base)
 
 #define MFHI(rd)                                                        \
-    do { count_alu(); EMIT(R_FORMAT(OP_SPECIAL, 0, 0, rd, 0, SPECIAL_MFHI), \
+    do { count_alu(); EMIT(R_FORMAT(OP_SPECIAL, 0, 0, GPR(rd), 0, SPECIAL_MFHI), \
                            "mfhi %s", gpn(rd)); } while (0)
 
 #define MFLO(rd)                                                        \
-    do { count_alu(); EMIT(R_FORMAT(OP_SPECIAL, 0, 0, rd, 0, SPECIAL_MFLO), \
+    do { count_alu(); EMIT(R_FORMAT(OP_SPECIAL, 0, 0, GPR(rd), 0, SPECIAL_MFLO), \
                            "mflo %s", gpn(rd)); } while (0)
 
 #define MUL(rd, rs, rt)                                                 \
-    do { count_alu(); EMIT(R_FORMAT(OP_SPECIAL2, rs, rt, rd, 0, SPECIAL2_MUL), \
+    do { count_alu(); EMIT(R_FORMAT(OP_SPECIAL2, GPR(rs), GPR(rt), GPR(rd), 0, SPECIAL2_MUL), \
                            "mul %s, %s, %s", gpn(rd), gpn(rs), gpn(rt)); } while (0)
 
 #define MULT(rs, rt)                                                    \
-    do { count_alu(); EMIT(R_FORMAT(OP_SPECIAL, rs, rt, 0, 0, SPECIAL_MULT), \
+    do { count_alu(); EMIT(R_FORMAT(OP_SPECIAL, GPR(rs), GPR(rt), 0, 0, SPECIAL_MULT), \
                            "mult %s, %s", gpn(rs), gpn(rt)); } while (0)
 
 #define MOVE(rd, rs)                                                    \
-    do { count_alu(); EMIT(R_FORMAT(OP_SPECIAL, rs, ZERO, rd, 0, SPECIAL_ADDU), \
+    do { count_alu(); EMIT(R_FORMAT(OP_SPECIAL, GPR(rs), GPR(ZERO), GPR(rd), 0, SPECIAL_ADDU), \
                            "move %s, %s", gpn(rd), gpn(rs)); } while (0)
 
 #define MOVN(rd, rs, rt)                                                \
-    do { count_alu(); EMIT(R_FORMAT(OP_SPECIAL, rs, rt, rd, 0, SPECIAL_MOVN), \
+    do { count_alu(); EMIT(R_FORMAT(OP_SPECIAL, GPR(rs), GPR(rt), GPR(rd), 0, SPECIAL_MOVN), \
                            "movn %s, %s, %s", gpn(rd), gpn(rs), gpn(rt)); } while (0)
 
 #define NEGU(rd, rt)                                                    \
-    do { count_alu(); EMIT(R_FORMAT(OP_SPECIAL, ZERO, rt, rd, 0, SPECIAL_SUBU), \
+    do { count_alu(); EMIT(R_FORMAT(OP_SPECIAL, GPR(ZERO), GPR(rt), GPR(rd), 0, SPECIAL_SUBU), \
                            "negu %s, %s", gpn(rd), gpn(rt)); } while (0)
 
 #define NOP()                                                           \
-    do { count_misc(); EMIT(R_FORMAT(OP_SPECIAL, 0, 0, 0, 0, SPECIAL_SLL), \
+    do { count_misc(); EMIT(R_FORMAT(OP_SPECIAL, GPR(ZERO), GPR(ZERO), GPR(ZERO), 0, SPECIAL_SLL), \
                             "nop"); } while (0)
 
 #define trampNOP()                                                      \
-    do { count_misc(); TRAMP(R_FORMAT(OP_SPECIAL, 0, 0, 0, 0, SPECIAL_SLL), \
+    do { count_misc(); TRAMP(R_FORMAT(OP_SPECIAL, GPR(ZERO), GPR(ZERO), GPR(ZERO), 0, SPECIAL_SLL), \
                              "nop"); } while (0)
 
 #define NOR(rd, rs, rt)                                                 \
-    do { count_alu(); EMIT(R_FORMAT(OP_SPECIAL, rs, rt, rd, 0, SPECIAL_NOR), \
+    do { count_alu(); EMIT(R_FORMAT(OP_SPECIAL, GPR(rs), GPR(rt), GPR(rd), 0, SPECIAL_NOR), \
                            "nor %s, %s, %s", gpn(rd), gpn(rs), gpn(rt)); } while (0)
 
 #define NOT(rd, rs)                                                     \
-    do { count_alu(); EMIT(R_FORMAT(OP_SPECIAL, rs, ZERO, rd, 0, SPECIAL_NOR), \
+    do { count_alu(); EMIT(R_FORMAT(OP_SPECIAL, GPR(rs), GPR(ZERO), GPR(rd), 0, SPECIAL_NOR), \
                            "not %s, %s", gpn(rd), gpn(rs)); } while (0)
 
 #define OR(rd, rs, rt)                                                  \
-    do { count_alu(); EMIT(R_FORMAT(OP_SPECIAL, rs, rt, rd, 0, SPECIAL_OR), \
+    do { count_alu(); EMIT(R_FORMAT(OP_SPECIAL, GPR(rs), GPR(rt), GPR(rd), 0, SPECIAL_OR), \
                            "or %s, %s, %s", gpn(rd), gpn(rs), gpn(rt)); } while (0)
 
 #define ORI(rt, rs, uimm)                                               \
-    do { count_alu(); EMIT(U_FORMAT(OP_ORI, rs, rt, uimm),              \
+    do { count_alu(); EMIT(U_FORMAT(OP_ORI, GPR(rs), GPR(rt), uimm),    \
                            "ori %s, %s, 0x%x", gpn(rt), gpn(rs), ((uimm)&0xffff)); } while (0)
 
 #define SLTIU(rt, rs, simm)                                             \
-    do { count_alu(); EMIT(I_FORMAT(OP_SLTIU, rs, rt, simm),            \
+    do { count_alu(); EMIT(I_FORMAT(OP_SLTIU, GPR(rs), GPR(rt), simm),  \
                            "sltiu %s, %s, %d", gpn(rt), gpn(rs), simm); } while (0)
 
 #define SLT(rd, rs, rt)                                                 \
-    do { count_alu(); EMIT(R_FORMAT(OP_SPECIAL, rs, rt, rd, 0, SPECIAL_SLT), \
+    do { count_alu(); EMIT(R_FORMAT(OP_SPECIAL, GPR(rs), GPR(rt), GPR(rd), 0, SPECIAL_SLT), \
                            "slt %s, %s, %s", gpn(rd), gpn(rs), gpn(rt)); } while (0)
 
 #define SLTU(rd, rs, rt)                                                \
-    do { count_alu(); EMIT(R_FORMAT(OP_SPECIAL, rs, rt, rd, 0, SPECIAL_SLTU), \
+    do { count_alu(); EMIT(R_FORMAT(OP_SPECIAL, GPR(rs), GPR(rt), GPR(rd), 0, SPECIAL_SLTU), \
                            "sltu %s, %s, %s", gpn(rd), gpn(rs), gpn(rt)); } while (0)
 
 #define SLL(rd, rt, sa)                                                 \
-    do { count_alu(); EMIT(R_FORMAT(OP_SPECIAL, 0, rt, rd, sa, SPECIAL_SLL), \
+    do { count_alu(); EMIT(R_FORMAT(OP_SPECIAL, 0, GPR(rt), GPR(rd), sa, SPECIAL_SLL), \
                            "sll %s, %s, %d", gpn(rd), gpn(rt), sa); } while (0)
 
 #define SLLV(rd, rt, rs)                                                \
-    do { count_misc(); EMIT(R_FORMAT(OP_SPECIAL, rs, rt, rd, 0, SPECIAL_SLLV), \
+    do { count_misc(); EMIT(R_FORMAT(OP_SPECIAL, GPR(rs), GPR(rt), GPR(rd), 0, SPECIAL_SLLV), \
                             "sllv %s, %s, %s", gpn(rd), gpn(rt), gpn(rs)); } while (0)
 
 #define SRA(rd, rt, sa)                                                 \
-    do { count_alu(); EMIT(R_FORMAT(OP_SPECIAL, 0, rt, rd, sa, SPECIAL_SRA), \
+    do { count_alu(); EMIT(R_FORMAT(OP_SPECIAL, 0, GPR(rt), GPR(rd), sa, SPECIAL_SRA), \
                            "sra %s, %s, %d", gpn(rd), gpn(rt), sa); } while (0)
 
 #define SRAV(rd, rt, rs)                                                \
-    do { count_alu(); EMIT(R_FORMAT(OP_SPECIAL, rs, rt, rd, 0, SPECIAL_SRAV), \
+    do { count_alu(); EMIT(R_FORMAT(OP_SPECIAL, GPR(rs), GPR(rt), GPR(rd), 0, SPECIAL_SRAV), \
                            "srav %s, %s, %s", gpn(rd), gpn(rt), gpn(rs)); } while (0)
 
 #define SRL(rd, rt, sa)                                                 \
-    do { count_alu(); EMIT(R_FORMAT(OP_SPECIAL, 0, rt, rd, sa, SPECIAL_SRL), \
+    do { count_alu(); EMIT(R_FORMAT(OP_SPECIAL, 0, GPR(rt), GPR(rd), sa, SPECIAL_SRL), \
                            "srl %s, %s, %d", gpn(rd), gpn(rt), sa); } while (0)
 
 #define SRLV(rd, rt, rs)                                                \
-    do { count_alu(); EMIT(R_FORMAT(OP_SPECIAL, rs, rt, rd, 0, SPECIAL_SRLV), \
+    do { count_alu(); EMIT(R_FORMAT(OP_SPECIAL, GPR(rs), GPR(rt), GPR(rd), 0, SPECIAL_SRLV), \
                            "srlv %s, %s, %s", gpn(rd), gpn(rt), gpn(rs)); } while (0)
 
 #define SUBU(rd, rs, rt)                                                \
-    do { count_alu(); EMIT(R_FORMAT(OP_SPECIAL, rs, rt, rd, 0, SPECIAL_SUBU), \
+    do { count_alu(); EMIT(R_FORMAT(OP_SPECIAL, GPR(rs), GPR(rt), GPR(rd), 0, SPECIAL_SUBU), \
                            "subu %s, %s, %s", gpn(rd), gpn(rs), gpn(rt)); } while (0)
 
 #define SW(rt, offset, base)                    \
-    LDST(OP_SW, rt, offset, base)
+    LDSTGPR(OP_SW, rt, offset, base)
 
 #define XOR(rd, rs, rt)                                                 \
-    do { count_alu(); EMIT(R_FORMAT(OP_SPECIAL, rs, rt, rd, 0, SPECIAL_XOR), \
+    do { count_alu(); EMIT(R_FORMAT(OP_SPECIAL, GPR(rs), GPR(rt), GPR(rd), 0, SPECIAL_XOR), \
                            "xor %s, %s, %s", gpn(rd), gpn(rs), gpn(rt)); } while (0)
 
 #define XORI(rt, rs, uimm)                                              \
-    do { count_alu(); EMIT(U_FORMAT(OP_XORI, rs, rt, uimm),             \
+    do { count_alu(); EMIT(U_FORMAT(OP_XORI, GPR(rs), GPR(rt), uimm),   \
                            "xori %s, %s, 0x%x", gpn(rt), gpn(rs), ((uimm)&0xffff)); } while (0)
 
 
 /* FPU instructions */
 #ifdef NJ_SOFTFLOAT_SUPPORTED
 
 #if !defined(__mips_soft_float) || __mips_soft_float != 1
 #error NJ_SOFTFLOAT_SUPPORTED defined but not compiled with -msoft-float
@@ -652,57 +692,57 @@ namespace nanojit {
 
 #else
 
 #if defined(__mips_soft_float) && __mips_soft_float != 0
 #error compiled with -msoft-float but NJ_SOFTFLOAT_SUPPORTED not defined
 #endif
 
 #define FOP_FMT2(ffmt, fd, fs, func, name)                              \
-    do { count_fpu(); EMIT(F_FORMAT(OP_COP1, ffmt, 0, fs, fd, func),    \
+    do { count_fpu(); EMIT(F_FORMAT(OP_COP1, ffmt, FPR(F0), FPR(fs), FPR(fd), func), \
                            "%s.%s %s, %s", name, fname[ffmt], fpn(fd), fpn(fs)); } while (0)
 
 #define FOP_FMT3(ffmt, fd, fs, ft, func, name)                          \
-    do { count_fpu(); EMIT(F_FORMAT(OP_COP1, ffmt, ft, fs, fd, func),   \
+    do { count_fpu(); EMIT(F_FORMAT(OP_COP1, ffmt, FPR(ft), FPR(fs), FPR(fd), func), \
                            "%s.%s %s, %s, %s", name, fname[ffmt], fpn(fd), fpn(fs), fpn(ft)); } while (0)
 
 #define C_COND_FMT(cond, ffmt, fs, ft)                                  \
-    do { count_fpu(); EMIT(F_FORMAT(OP_COP1, ffmt, ft, fs, 0, 0x30|(cond)), \
+    do { count_fpu(); EMIT(F_FORMAT(OP_COP1, ffmt, FPR(ft), FPR(fs), FPR(F0), 0x30|(cond)), \
                            "c.%s.%s %s, %s", cname[cond], fname[ffmt], fpn(fs), fpn(ft)); } while (0)
 
 #define MFC1(rt, fs)                                                    \
-    do { count_fpu(); EMIT(F_FORMAT(OP_COP1, 0, rt, fs, 0, 0),          \
+    do { count_fpu(); EMIT(F_FORMAT(OP_COP1, 0, GPR(rt), FPR(fs), FPR(F0), 0), \
                            "mfc1 %s, %s", gpn(rt), fpn(fs)); } while (0)
 
 #define MTC1(rt, fs)                                                    \
-    do { count_fpu(); EMIT(F_FORMAT(OP_COP1, 4, rt, fs, 0, 0),          \
+    do { count_fpu(); EMIT(F_FORMAT(OP_COP1, 4, GPR(rt), FPR(fs), FPR(F0), 0), \
                            "mtc1 %s, %s", gpn(rt), fpn(fs)); } while (0)
 
 #define MOVF(rd, rs, cc)                                                \
-    do { count_fpu(); EMIT(R_FORMAT(OP_SPECIAL, rs, (cc)<<2, rd, 0, SPECIAL_MOVCI), \
+    do { count_fpu(); EMIT(R_FORMAT(OP_SPECIAL, GPR(rs), (cc)<<2, GPR(rd), 0, SPECIAL_MOVCI), \
                            "movf %s, %s, $fcc%d", gpn(rd), gpn(rs), cc); } while (0)
 
 #define CVT_D_W(fd, fs)                                                 \
-    do { count_fpu(); EMIT(F_FORMAT(OP_COP1, FMT_W, 0, fs, fd, COP1_CVTD), \
+    do { count_fpu(); EMIT(F_FORMAT(OP_COP1, FMT_W, FPR(F0), FPR(fs), FPR(fd), COP1_CVTD), \
                            "cvt.d.w %s, %s", fpn(fd), fpn(fs)); } while (0)
 
 #define TRUNC_W_D(fd, fs)                                               \
-    do { count_fpu(); EMIT(F_FORMAT(OP_COP1, FMT_D, 0, fs, fd, COP1_TRUNCW), \
+    do { count_fpu(); EMIT(F_FORMAT(OP_COP1, FMT_D, FPR(F0), FPR(fs), FPR(fd), COP1_TRUNCW), \
                            "trunc.w.d %s, %s", fpn(fd), fpn(fs)); } while (0)
 
 
-#define LWC1(ft, offset, base)  LDST(OP_LWC1, ft, offset, base)
-#define SWC1(ft, offset, base)  LDST(OP_SWC1, ft, offset, base)
-#define LDC1(ft, offset, base)  LDST(OP_LDC1, ft, offset, base)
-#define SDC1(ft, offset, base)  LDST(OP_SDC1, ft, offset, base)
+#define LWC1(ft, offset, base)  LDSTFPR(OP_LWC1, FPR(ft), offset, GPR(base))
+#define SWC1(ft, offset, base)  LDSTFPR(OP_SWC1, FPR(ft), offset, GPR(base))
+#define LDC1(ft, offset, base)  LDSTFPR(OP_LDC1, FPR(ft), offset, GPR(base))
+#define SDC1(ft, offset, base)  LDSTFPR(OP_SDC1, FPR(ft), offset, GPR(base))
 #define LDXC1(fd, index, base)                                          \
-    do { count_fpu(); EMIT(R_FORMAT(OP_COP1X, base, index, 0, fd, COP1X_LDXC1), \
+    do { count_fpu(); EMIT(X_FORMAT(OP_COP1X, GPR(base), GPR(index), FPR(F0), FPR(fd), COP1X_LDXC1), \
                            "ldxc1 %s, %s(%s)", fpn(fd), gpn(index), gpn(base)); } while (0)
 #define SDXC1(fs, index, base)                                          \
-    do { count_fpu(); EMIT(R_FORMAT(OP_COP1X, base, index, fs, 0, COP1X_SDXC1), \
+    do { count_fpu(); EMIT(X_FORMAT(OP_COP1X, GPR(base), GPR(index), FPR(fs), FPR(F0), COP1X_SDXC1), \
                            "sdxc1 %s, %s(%s)", fpn(fs), gpn(index), gpn(base)); } while (0)
 
 #define C_EQ_D(fs, ft)          C_COND_FMT(COND_EQ, FMT_D, fs, ft)
 #define C_LE_D(fs, ft)          C_COND_FMT(COND_LE, FMT_D, fs, ft)
 #define C_LT_D(fs, ft)          C_COND_FMT(COND_LT, FMT_D, fs, ft)
 #define ADD_D(fd, fs, ft)       FOP_FMT3(FMT_D, fd, fs, ft, COP1_ADD, "add")
 #define DIV_D(fd, fs, ft)       FOP_FMT3(FMT_D, fd, fs, ft, COP1_DIV, "div")
 #define MOV_D(fd, fs)           FOP_FMT2(FMT_D, fd, fs, COP1_MOV, "mov")