[arm] b=481761; ARM ALU step 1; r=graydon
authorVladimir Vukicevic <vladimir@pobox.com>
Fri, 20 Mar 2009 15:53:14 -0700
changeset 26537 4d5c8ae3362f1611d57bd8d78a4ab8aef6613c70
parent 26536 ba9a838e2d45c68996bae455a85e0d2f2431c26d
child 26538 77f4c1affaa22182690760f258403b5ecdceee76
push id6115
push userrsayre@mozilla.com
push dateTue, 24 Mar 2009 17:50:03 +0000
treeherdermozilla-central@4a34c6235bb7 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersgraydon
bugs481761
milestone1.9.2a1pre
[arm] b=481761; ARM ALU step 1; r=graydon
js/src/nanojit/Assembler.cpp
js/src/nanojit/NativeARM.cpp
js/src/nanojit/NativeARM.h
--- a/js/src/nanojit/Assembler.cpp
+++ b/js/src/nanojit/Assembler.cpp
@@ -597,17 +597,21 @@ namespace nanojit
 			// the already-allocated register isn't in the allowed mask;
 			// we need to grab a new one and then copy over the old
 			// contents to the new.
 			resv->reg = UnknownReg;
 			_allocator.retire(r);
 			Register s = resv->reg = registerAlloc(prefer);
 			_allocator.addActive(s, i);
             if ((rmask(r) & GpRegs) && (rmask(s) & GpRegs)) {
+#ifdef NANOJIT_ARM
+				MOV(r, s);
+#else
     			MR(r, s);
+#endif
             } 
             else {
 				asm_nongp_copy(r, s);
 			}
 			return s;
 		}
 	}
 
--- a/js/src/nanojit/NativeARM.cpp
+++ b/js/src/nanojit/NativeARM.cpp
@@ -101,17 +101,17 @@ Assembler::genPrologue()
     // Make room on stack for what we are doing
     if (amt)
         SUBi(SP, amt);
 
     verbose_only( verbose_outputf("         %p:",_nIns); )
     verbose_only( verbose_output("         patch entry"); )
     NIns *patchEntry = _nIns;
 
-    MR(FP, SP);
+    MOV(FP, SP);
     PUSH_mask(savingMask);
     return patchEntry;
 }
 
 void
 Assembler::nFragExit(LInsp guard)
 {
     SideExit* exit = guard->record()->exit;
@@ -129,17 +129,17 @@ Assembler::nFragExit(LInsp guard)
         // patching.
         JMP_far(_epilogue);
 
         // stick the jmp pointer to the start of the sequence
         lr->jmp = _nIns;
     }
 
     // pop the stack frame first
-    MR(SP, FP);
+    MOV(SP, FP);
 
 #ifdef NJ_VERBOSE
     if (_frago->core()->config.show_stats) {
         // load R1 with Fragment *fromFrag, target fragment
         // will make use of this when calling fragenter().
         int fromfrag = int((Fragment*)_thisfrag);
         LDi(argRegs[1], fromfrag);
     }
@@ -160,20 +160,20 @@ Assembler::genEpilogue()
     RegisterMask savingMask = rmask(FP) | rmask(LR);
 
     if (!_thisfrag->lirbuf->explicitSavedRegs)
         for (int i = 0; i < NumSavedRegs; ++i)
             savingMask |= rmask(savedRegs[i]);
 
     POP_mask(savingMask); // regs
 
-    MR(SP,FP);
+    MOV(SP,FP);
 
     // this is needed if we jump here from nFragExit
-    MR(R0,R2); // return LinkRecord*
+    MOV(R0,R2); // return LinkRecord*
 
     return _nIns;
 }
 
 /* ARM EABI (used by gcc/linux) calling conventions differ from Windows CE; use these
  * as the default.
  *
  * - double arg following an initial dword arg use r0 for the int arg
@@ -873,17 +873,17 @@ Assembler::BL(NIns* addr)
         asm_output("bl %p (32-bit)", addr);
     }
 }
 
 void
 Assembler::LD32_nochk(Register r, int32_t imm)
 {
     if (imm == 0) {
-        XOR(r, r);
+        EOR(r, r, r);
         return;
     }
 
     // We should always reach the const pool, since it's on the same page (<4096);
     // if we can't, someone didn't underrunProtect enough.
 
     *(++_nSlot) = (int)imm;
 
@@ -1102,24 +1102,26 @@ Assembler::asm_branch(bool branchOnFalse
 
         if (branchOnFalse) {
             switch (condop) {
                 case LIR_feq: cc = NE; break;
                 case LIR_flt: cc = PL; break;
                 case LIR_fgt: cc = LE; break;
                 case LIR_fle: cc = HI; break;
                 case LIR_fge: cc = LT; break;
+                default: NanoAssert(0); break;
             }
         } else {
             switch (condop) {
                 case LIR_feq: cc = EQ; break;
                 case LIR_flt: cc = MI; break;
                 case LIR_fgt: cc = GT; break;
                 case LIR_fle: cc = LS; break;
                 case LIR_fge: cc = GE; break;
+                default: NanoAssert(0); break;
             }
         }
 
         B_cond(cc, targ);
         asm_output("b(%d) 0x%08x", cc, (unsigned int) targ);
 
         NIns *at = _nIns;
         asm_fcmp(cond);
@@ -1222,31 +1224,32 @@ Assembler::asm_loop(LInsp ins, NInsList&
     // XXX asm_loop should be in Assembler.cpp!
 
     JMP_far(0);
     loopJumps.add(_nIns);
 
     // If the target we are looping to is in a different fragment, we have to restore
     // SP since we will target fragEntry and not loopEntry.
     if (ins->record()->exit->target != _thisfrag)
-        MR(SP,FP);
+        MOV(SP,FP);
 }
 
 void
 Assembler::asm_fcond(LInsp ins)
 {
     // only want certain regs
     Register r = prepResultReg(ins, AllowableFlagRegs);
 
     switch (ins->opcode()) {
         case LIR_feq: SET(r,EQ,NE); break;
         case LIR_flt: SET(r,MI,PL); break;
         case LIR_fgt: SET(r,GT,LE); break;
         case LIR_fle: SET(r,LS,HI); break;
         case LIR_fge: SET(r,GE,LT); break;
+        default: NanoAssert(0); break;
     }
 
     asm_fcmp(ins);
 }
 
 void
 Assembler::asm_cond(LInsp ins)
 {
@@ -1325,53 +1328,53 @@ Assembler::asm_arith(LInsp ins)
 
         if (op == LIR_add || op == LIR_addp)
             ADD(rr, rb);
         else if (op == LIR_sub)
             SUB(rr, rb);
         else if (op == LIR_mul)
             MUL(rr, rb);
         else if (op == LIR_and)
-            AND(rr, rb);
+            AND(rr, rr, rb);
         else if (op == LIR_or)
-            OR(rr, rb);
+            ORR(rr, rr, rb);
         else if (op == LIR_xor)
-            XOR(rr, rb);
+            EOR(rr, rr, rb);
         else if (op == LIR_lsh)
             SHL(rr, rb);
         else if (op == LIR_rsh)
             SAR(rr, rb);
         else if (op == LIR_ush)
             SHR(rr, rb);
         else
             NanoAssertMsg(0, "Unsupported");
     } else {
         int c = rhs->constval();
         if (op == LIR_add || op == LIR_addp)
             ADDi(rr, c);
         else if (op == LIR_sub)
                     SUBi(rr, c);
         else if (op == LIR_and)
-            ANDi(rr, c);
+            ANDi(rr, rr, c);
         else if (op == LIR_or)
-            ORi(rr, c);
+            ORRi(rr, rr, c);
         else if (op == LIR_xor)
-            XORi(rr, c);
+            EORi(rr, rr, c);
         else if (op == LIR_lsh)
             SHLi(rr, c);
         else if (op == LIR_rsh)
             SARi(rr, c);
         else if (op == LIR_ush)
             SHRi(rr, c);
         else
             NanoAssertMsg(0, "Unsupported");
     }
 
     if (rr != ra)
-        MR(rr,ra);
+        MOV(rr,ra);
 }
 
 void
 Assembler::asm_neg_not(LInsp ins)
 {
     LOpcode op = ins->opcode();
     Register rr = prepResultReg(ins, GpRegs);
 
@@ -1384,17 +1387,17 @@ Assembler::asm_neg_not(LInsp ins)
     // else, rA already has a register assigned.
 
     if (op == LIR_not)
         NOT(rr);
     else
         NEG(rr);
 
     if ( rr != ra )
-        MR(rr,ra);
+        MOV(rr,ra);
 }
 
 void
 Assembler::asm_ld(LInsp ins)
 {
     LOpcode op = ins->opcode();
     LIns* base = ins->oprnd1();
     LIns* disp = ins->oprnd2();
@@ -1441,27 +1444,27 @@ Assembler::asm_cmov(LInsp ins)
     const Register rr = prepResultReg(ins, GpRegs);
 
     // this code assumes that neither LD nor MR nor MRcc set any of the condition flags.
     // (This is true on Intel, is it true on all architectures?)
     const Register iffalsereg = findRegFor(iffalse, GpRegs & ~rmask(rr));
     if (op == LIR_cmov) {
         switch (condval->opcode()) {
             // note that these are all opposites...
-            case LIR_eq:    MRNE(rr, iffalsereg);   break;
-            case LIR_ov:    MRNO(rr, iffalsereg);   break;
-            case LIR_cs:    MRNC(rr, iffalsereg);   break;
-            case LIR_lt:    MRGE(rr, iffalsereg);   break;
-            case LIR_le:    MRG(rr, iffalsereg);    break;
-            case LIR_gt:    MRLE(rr, iffalsereg);   break;
-            case LIR_ge:    MRL(rr, iffalsereg);    break;
-            case LIR_ult:   MRAE(rr, iffalsereg);   break;
-            case LIR_ule:   MRA(rr, iffalsereg);    break;
-            case LIR_ugt:   MRBE(rr, iffalsereg);   break;
-            case LIR_uge:   MRB(rr, iffalsereg);    break;
+            case LIR_eq:    MOVNE(rr, iffalsereg);   break;
+            case LIR_ov:    MOVNO(rr, iffalsereg);   break;
+            case LIR_cs:    MOVNC(rr, iffalsereg);   break;
+            case LIR_lt:    MOVGE(rr, iffalsereg);   break;
+            case LIR_le:    MOVG(rr, iffalsereg);    break;
+            case LIR_gt:    MOVLE(rr, iffalsereg);   break;
+            case LIR_ge:    MOVL(rr, iffalsereg);    break;
+            case LIR_ult:   MOVAE(rr, iffalsereg);   break;
+            case LIR_ule:   MOVA(rr, iffalsereg);    break;
+            case LIR_ugt:   MOVBE(rr, iffalsereg);   break;
+            case LIR_uge:   MOVB(rr, iffalsereg);    break;
             default: debug_only( NanoAssert(0) );   break;
         }
     } else if (op == LIR_qcmov) {
         NanoAssert(0);
     }
     /*const Register iftruereg =*/ findSpecificRegFor(iftrue, rr);
     asm_cmp(condval);
 }
@@ -1528,28 +1531,28 @@ Assembler::asm_param(LInsp ins)
 }
 
 void
 Assembler::asm_short(LInsp ins)
 {
     Register rr = prepResultReg(ins, GpRegs);
     int32_t val = ins->imm16();
     if (val == 0)
-        XOR(rr,rr);
+        EOR(rr,rr,rr);
     else
         LDi(rr, val);
 }
 
 void
 Assembler::asm_int(LInsp ins)
 {
     Register rr = prepResultReg(ins, GpRegs);
     int32_t val = ins->imm32();
     if (val == 0)
-        XOR(rr,rr);
+        EOR(rr,rr,rr);
     else
         LDi(rr, val);
 }
 
 #if 0
 void
 Assembler::asm_quad(LInsp ins)
 {
@@ -1627,17 +1630,17 @@ Assembler::asm_arg(ArgSize sz, LInsp p, 
                         int d = findMemFor(p);
                         if (p->isop(LIR_alloc)) {
                             LEA(r, d, FP);
                         } else {
                             LD(r, d, FP);
                         }
                     } else {
                         // it must be in a saved reg
-                        MR(r, rA->reg);
+                        MOV(r, rA->reg);
                     }
                 } else {
                     // this is the last use, so fine to assign it
                     // to the scratch reg, it's dead after this point.
                     findSpecificRegFor(p, r);
                 }
             }
         } else {
--- a/js/src/nanojit/NativeARM.h
+++ b/js/src/nanojit/NativeARM.h
@@ -253,63 +253,118 @@ typedef enum {
     (x) = (dictwordp*)_nIns; }
 
 // BX 
 #define BX(_r)  do {                                                    \
         underrunProtect(4);                                             \
         *(--_nIns) = (NIns)( COND_AL | (0x12<<20) | (0xFFF<<8) | (1<<4) | (_r)); \
         asm_output("bx LR"); } while(0)
 
-// _l = _r OR _l
-#define OR(_l,_r)       do {                                            \
-        underrunProtect(4);                                             \
-        *(--_nIns) = (NIns)( COND_AL | (0xC<<21) | (_r<<16) | (_l<<12) | (_l) ); \
-        asm_output("or %s,%s",gpn(_l),gpn(_r)); } while(0)
+/*
+ * ALU operations
+ */
 
-// _r = _r OR _imm
-#define ORi(_r,_imm)    do {                                            \
-        NanoAssert(isU8((_imm)));                                       \
-        underrunProtect(4);                                             \
-        *(--_nIns) = (NIns)( COND_AL | OP_IMM | (0xC<<21) | (_r<<16) | (_r<<12) | ((_imm)&0xFF) ); \
-        asm_output("or %s,%d",gpn(_r), (_imm)); } while(0)
+enum {
+    ARM_and = 0,
+    ARM_eor = 1,
+    ARM_sub = 2,
+    ARM_rsb = 3,
+    ARM_add = 4,
+    ARM_adc = 5,
+    ARM_sbc = 6,
+    ARM_rsc = 7,
+    ARM_tst = 8,
+    ARM_teq = 9,
+    ARM_cmp = 10,
+    ARM_cmn = 11,
+    ARM_orr = 12,
+    ARM_mov = 13,
+    ARM_bic = 14,
+    ARM_mvn = 15
+};
 
-// _l = _r AND _l
-#define AND(_l,_r) do {                                                 \
-        underrunProtect(4);                                             \
-        *(--_nIns) = (NIns)( COND_AL | ((_r)<<16) | ((_l)<<12) | (_l)); \
-        asm_output("and %s,%s",gpn(_l),gpn(_r)); } while(0)
+// ALU operation with register and 8-bit immediate arguments
+//  S   - bit, 0 or 1, whether the CPSR register is updated
+//  rd  - destination register
+//  rl  - first (left) operand register
+//  imm - immediate (max 8 bits)
+#define ALUi(cond, op, S, rd, rl, imm) do {\
+        underrunProtect(4);\
+        NanoAssert(isU8(imm));\
+        *(--_nIns) = (NIns) ((cond)<<28 | OP_IMM | (ARM_##op)<<21 | (S)<<20 | (rl)<<16 | (rd)<<12 | (imm));\
+        if (ARM_##op == ARM_mov || ARM_##op == ARM_mvn)\
+            asm_output("%s%s%s %s, #0x%X", #op, condNames[cond], (S)?"s":"", gpn(rd), (imm));\
+        else if (ARM_##op >= ARM_tst && ARM_##op <= ARM_cmn) {\
+            NanoAssert(S==1);\
+            asm_output("%s%s %s, #0x%X", #op, condNames[cond], gpn(rl), (imm));\
+        } else\
+            asm_output("%s%s%s %s, %s, #0x%X", #op, condNames[cond], (S)?"s":"", gpn(rd), gpn(rl), (imm));\
+    } while (0)
 
-// _r = _r AND _imm
-#define ANDi(_r,_imm) do {                                              \
-        if (isU8((_imm))) {                                             \
-            underrunProtect(4);                                         \
-            *(--_nIns) = (NIns)( COND_AL | OP_IMM | ((_r)<<16) | ((_r)<<12) | ((_imm)&0xFF) ); \
-            asm_output("and %s,%d",gpn(_r),(_imm));}                   \
-        else if ((_imm)<0 && (_imm)>-256) {                             \
-            underrunProtect(8);                                         \
-            *(--_nIns) = (NIns)( COND_AL | ((_r)<<16) | ((_r)<<12) | (IP) ); \
-            asm_output("and %s,%s",gpn(_r),gpn(IP));              \
-            *(--_nIns) = (NIns)( COND_AL | (0x3E<<20) | ((IP)<<12) | (((_imm)^0xFFFFFFFF)&0xFF) ); \
-            asm_output("mvn %s,%d",gpn(IP),(_imm));}              \
-        else NanoAssert(0);                                             \
+// ALU operation with two register arguments
+//  S   - bit, 0 or 1, whether the CPSR register is updated
+//  rd  - destination register
+//  rl  - first (left) operand register
+//  rr  - first (left) operand register
+#define ALUr(cond, op, S, rd, rl, rr) do {\
+        underrunProtect(4);\
+        *(--_nIns) = (NIns) ((cond)<<28 |(ARM_##op)<<21 | (S)<<20 | (rl)<<16 | (rd)<<12 | (rr));\
+        if (ARM_##op == ARM_mov || ARM_##op == ARM_mvn)\
+            asm_output("%s%s%s %s, %s", #op, condNames[cond], (S)?"s":"", gpn(rd), gpn(rr));\
+        else if (ARM_##op >= ARM_tst && ARM_##op <= ARM_cmn) {\
+            NanoAssert(S==1);\
+            asm_output("%s%s  %s, %s", #op, condNames[cond], gpn(rl), gpn(rr));\
+        } else\
+            asm_output("%s%s%s %s, %s, %s", #op, condNames[cond], (S)?"s":"", gpn(rd), gpn(rl), gpn(rr));\
+    } while (0)
+
+// ALU operator with two register arguments, with rr operated on by a shift and shift immediate
+//  S   - bit, 0 or 1, whether the CPSR register is updated
+//  rd  - destination register
+//  rl  - first (left) operand register
+//  rr  - first (left) operand register
+//  sh  - a ShiftOperator
+//  imm - immediate argument to shift operator, 5 bits (0..31)
+#define ALUr_shi(cond, op, S, rd, rl, rr, sh, imm) do {\
+        underrunProtect(4);\
+        NanoAssert((imm)>=0 && (imm)<32);\
+        *(--_nIns) = (NIns) ((cond)<<28 |(ARM_##op)<<21 | (S)<<20 | (rl)<<16 | (rd)<<12 | (imm)<<7 | (sh)<<4 | (rr));\
+        if (ARM_##op == ARM_mov || ARM_##op == ARM_mvn)\
+            asm_output("%s%s%s %s, %s, %s #%d", #op, condNames[cond], (S)?"s":"", gpn(rd), gpn(rr), shiftNames[sh], (imm));\
+        else if (ARM_##op >= ARM_tst && ARM_##op <= ARM_cmn) {\
+            NanoAssert(S==1);\
+            asm_output("%s%s  %s, %s, %s #%d", #op, condNames[cond], gpn(rl), gpn(rr), shiftNames[sh], (imm));\
+        } else\
+            asm_output("%s%s%s %s, %s, %s, %s #%d", #op, condNames[cond], (S)?"s":"", gpn(rd), gpn(rl), gpn(rr), shiftNames[sh], (imm));\
     } while (0)
 
 
-// _l = _l XOR _r
-#define XOR(_l,_r)  do {                                                \
-        underrunProtect(4);                                             \
-        *(--_nIns) = (NIns)( COND_AL | (1<<21) | ((_r)<<16) | ((_l)<<12) | (_l)); \
-        asm_output("eor %s,%s",gpn(_l),gpn(_r)); } while(0)
+
+
+
+
+
+
+// _d = _l OR _r
+#define ORR(_d,_l,_r) ALUr(AL, orr, 0, _d, _l, _r)
+
+// _d = _l OR _imm
+#define ORRi(_d,_l,_imm) ALUi(AL, orr, 0, _d, _l, _imm)
 
-// _r = _r XOR _imm
-#define XORi(_r,_imm)   do {                                            \
-        NanoAssert(isU8((_imm)));                                       \
-        underrunProtect(4);                                             \
-        *(--_nIns) = (NIns)( COND_AL | OP_IMM | (1<<21) | ((_r)<<16) | ((_r)<<12) | ((_imm)&0xFF) ); \
-        asm_output("eor %s,%d",gpn(_r),(_imm)); } while(0)
+// _d = _l AND _r
+#define AND(_d,_l,_r) ALUr(AL, and, 0, _d, _l, _r)
+
+// _d = _l AND _imm
+#define ANDi(_d,_l,_imm) ALUi(AL, and, 0, _d, _l, _imm)
+
+// _d = _l ^ _r
+#define EOR(_d,_l,_r) ALUr(AL, eor, 0, _d, _l, _r)
+
+// _d = _l ^ _imm
+#define EORi(_d,_l,_imm) ALUi(AL, eor, 0, _d, _l, _imm)
 
 // _d = _n + _m
 #define arm_ADD(_d,_n,_m) do {                                          \
         underrunProtect(4);                                             \
         *(--_nIns) = (NIns)( COND_AL | OP_STAT | (1<<23) | ((_n)<<16) | ((_d)<<12) | (_m)); \
         asm_output("add %s,%s+%s",gpn(_d),gpn(_n),gpn(_m)); } while(0)
 
 // _l = _l + _r
@@ -464,39 +519,39 @@ typedef enum {
                 *(--_nIns) = (NIns)( COND_AL | (0x015<<20) | ((_r)<<16) | (IP) ); \
                 LD32_nochk(IP, (_imm));                            \
             }                                                           \
         }                                                               \
         asm_output("cmp %s,0x%x",gpn(_r),(_imm));                      \
     } while(0)
 
 // MOV
-#define MR(_d,_s)  do {                                                 \
+#define MOV(_d,_s)  do {                                                 \
         underrunProtect(4);                                             \
         *(--_nIns) = (NIns)( COND_AL | (0xD<<21) | ((_d)<<12) | (_s) ); \
         asm_output("mov %s,%s",gpn(_d),gpn(_s)); } while (0)
 
 
-#define MR_cond(_d,_s,_cond,_nm)  do {                                  \
+#define MOV_cond(_d,_s,_cond,_nm)  do {                                  \
         underrunProtect(4);                                             \
         *(--_nIns) = (NIns)( ((_cond)<<28) | (0xD<<21) | ((_d)<<12) | (_s) ); \
         asm_output(_nm " %s,%s",gpn(_d),gpn(_s)); } while (0)
 
-#define MREQ(dr,sr) MR_cond(dr, sr, EQ, "moveq")
-#define MRNE(dr,sr) MR_cond(dr, sr, NE, "movne")
-#define MRL(dr,sr)  MR_cond(dr, sr, LT, "movlt")
-#define MRLE(dr,sr) MR_cond(dr, sr, LE, "movle")
-#define MRG(dr,sr)  MR_cond(dr, sr, GT, "movgt")
-#define MRGE(dr,sr) MR_cond(dr, sr, GE, "movge")
-#define MRB(dr,sr)  MR_cond(dr, sr, CC, "movcc")
-#define MRBE(dr,sr) MR_cond(dr, sr, LS, "movls")
-#define MRA(dr,sr)  MR_cond(dr, sr, HI, "movcs")
-#define MRAE(dr,sr) MR_cond(dr, sr, CS, "movhi")
-#define MRNO(dr,sr) MR_cond(dr, sr, VC, "movvc") // overflow clear
-#define MRNC(dr,sr) MR_cond(dr, sr, CC, "movcc") // carry clear
+#define MOVEQ(dr,sr) MOV_cond(dr, sr, EQ, "moveq")
+#define MOVNE(dr,sr) MOV_cond(dr, sr, NE, "movne")
+#define MOVL(dr,sr)  MOV_cond(dr, sr, LT, "movlt")
+#define MOVLE(dr,sr) MOV_cond(dr, sr, LE, "movle")
+#define MOVG(dr,sr)  MOV_cond(dr, sr, GT, "movgt")
+#define MOVGE(dr,sr) MOV_cond(dr, sr, GE, "movge")
+#define MOVB(dr,sr)  MOV_cond(dr, sr, CC, "movcc")
+#define MOVBE(dr,sr) MOV_cond(dr, sr, LS, "movls")
+#define MOVA(dr,sr)  MOV_cond(dr, sr, HI, "movcs")
+#define MOVAE(dr,sr) MOV_cond(dr, sr, CS, "movhi")
+#define MOVNO(dr,sr) MOV_cond(dr, sr, VC, "movvc") // overflow clear
+#define MOVNC(dr,sr) MOV_cond(dr, sr, CC, "movcc") // carry clear
 
 #define LDR_chk(_d,_b,_off,_chk) do {                                   \
         if (IsFpReg(_d)) {                                              \
             FLDD_chk(_d,_b,_off,_chk);                                  \
         } else if ((_off) > -4096 && (_off) < 4096) {                   \
             if (_chk) underrunProtect(4);                               \
             *(--_nIns) = (NIns)( COND_AL | (((_off) < 0 ? 0x51 : 0x59)<<20) | ((_b)<<16) | ((_d)<<12) | (((_off) < 0 ? -(_off) : (_off))&0xFFF) ); \
         } else {                                                        \
@@ -512,17 +567,17 @@ typedef enum {
 #define LDR_nochk(_d,_b,_off)  LDR_chk(_d,_b,_off,0)
 
 // i386 compat, for Assembler.cpp
 #define LD(reg,offset,base)    LDR_chk(reg,base,offset,1)
 #define ST(base,offset,reg)    STR(reg,base,offset)
 
 #define LDi(_d,_imm) do {                                               \
         if ((_imm) == 0) {                                              \
-            XOR(_d,_d);                                                 \
+            EOR(_d,_d,_d);                                              \
         } else if (isS8((_imm)) || isU8((_imm))) {                      \
             underrunProtect(4);                                         \
             if ((_imm)<0)   *(--_nIns) = (NIns)( COND_AL | (0x3E<<20) | ((_d)<<12) | (((_imm)^0xFFFFFFFF)&0xFF) ); \
             else            *(--_nIns) = (NIns)( COND_AL | (0x3B<<20) | ((_d)<<12) | ((_imm)&0xFF) ); \
             asm_output("ld  %s,0x%x",gpn((_d)),(_imm));                \
         } else {                                                        \
             underrunProtect(LD32_size);                                 \
             LD32_nochk(_d, (_imm));                                     \
@@ -757,22 +812,16 @@ typedef enum {
 
 #define LDMIA(_b, _mask) do {                                           \
         underrunProtect(4);                                             \
         NanoAssert(((_mask)&rmask(_b))==0 && isU8(_mask));              \
         *(--_nIns) = (NIns)(COND_AL | (0x8B<<20) | ((_b)<<16) | (_mask)&0xFF); \
         asm_output("ldmia %s!,{0x%x}", gpn(_b), (_mask)); \
     } while (0)
 
-#define MRS(_d) do {                            \
-        underrunProtect(4);                     \
-        *(--_nIns) = (NIns)(COND_AL | (0x10<<20) | (0xF<<16) | ((_d)<<12)); \
-        asm_output("msr %s", gpn(_d));                                 \
-    } while (0)
-
 /*
  * VFP
  */
 
 #define FMDRR(_Dm,_Rd,_Rn) do {                                         \
         underrunProtect(4);                                             \
         NanoAssert(IsFpReg(_Dm) && IsGpReg(_Rd) && IsGpReg(_Rn));       \
         *(--_nIns) = (NIns)( COND_AL | (0xC4<<20) | ((_Rn)<<16) | ((_Rd)<<12) | (0xB1<<4) | (FpRegNum(_Dm)) ); \