Bug 605428 Implement NJ_EXPANDED_LOADSTORE for SPARC r=wmaddox
authorGinn Chen <ginn.chen@oracle.com>
Fri, 05 Nov 2010 16:25:14 +0800
changeset 57771 a2d93fed36714d5cd04db196f51a9290a693ae66
parent 57770 c94d644bd7566bd9f755360164aec2b6ddae99b4
child 57772 993c90c046ffd589c4794f4c689e41e0618efdad
push id1
push usershaver@mozilla.com
push dateTue, 04 Jan 2011 17:58:04 +0000
reviewerswmaddox
bugs605428
milestone2.0b8pre
Bug 605428 Implement NJ_EXPANDED_LOADSTORE for SPARC r=wmaddox
js/src/nanojit/NativeSparc.cpp
js/src/nanojit/NativeSparc.h
--- a/js/src/nanojit/NativeSparc.cpp
+++ b/js/src/nanojit/NativeSparc.cpp
@@ -110,26 +110,26 @@ namespace nanojit
     inline void Assembler::ORN(Register rs1, Register rs2, Register rd) {
         IntegerOperation(rs1, rs2, rd, 0x6, "orn");
     }
     inline void Assembler::SMULCC(Register rs1, Register rs2, Register rd) {
         IntegerOperation(rs1, rs2, rd, 0x1b, "smulcc");
     }
     inline void Assembler::SUB(Register rs1, Register rs2, Register rd) {
         IntegerOperation(rs1, rs2, rd, 0x4, "sub");
-    };
+    }
     inline void Assembler::SUBCC(Register rs1, Register rs2, Register rd) {
         IntegerOperation(rs1, rs2, rd, 0x14, "subcc");
-    };
+    }
     inline void Assembler::SUBI(Register rs1, int32_t simm13, Register rd) {
         IntegerOperationI(rs1, simm13, rd, 0x4, "sub");
     }
     inline void Assembler::XOR(Register rs1, Register rs2, Register rd) {
         IntegerOperation(rs1, rs2, rd, 0x3, "xor");
-    };
+    }
 
     inline void Assembler::Bicc(int32_t a, int32_t dsp22, int32_t cond, const char *opcode) {
         Format_2_2(a, cond, 0x2, dsp22);
         asm_output("%s 0x%x", opcode, _nIns + dsp22 - 1);
     }
 
     inline void Assembler::BA  (int32_t a, int32_t dsp22) { Bicc(a, dsp22, 0x8, "ba");   }
     inline void Assembler::BE  (int32_t a, int32_t dsp22) { Bicc(a, dsp22, 0x1, "be");   }
@@ -203,16 +203,22 @@ namespace nanojit
         FloatOperation(G0, rs2, rd, 0x2, "fmovd");
     }
     inline void Assembler::FNEGD(Register rs2, Register rd) {
         FloatOperation(G0, rs2, rd, 0x6, "fnegd");
     }
     inline void Assembler::FITOD(Register rs2, Register rd) {
         FloatOperation(G0, rs2, rd, 0xc8, "fitod");
     }
+    inline void Assembler::FDTOS(Register rs2, Register rd) {
+        FloatOperation(G0, rs2, rd, 0xc6, "fdtos");
+    }
+    inline void Assembler::FSTOD(Register rs2, Register rd) {
+        FloatOperation(G0, rs2, rd, 0xc9, "fstod");
+    }
 
     inline void Assembler::JMPL(Register rs1, Register rs2, Register rd) {
         Format_3_1(2, rd, 0x38, rs1, 0, rs2);
         asm_output("jmpl [%s + %s]", gpn(rs1), gpn(rs2));
     }
 
     inline void Assembler::JMPLI(Register rs1, int32_t simm13, Register rd) {
         Format_3_1I(2, rd, 0x38, rs1, simm13);
@@ -233,16 +239,25 @@ namespace nanojit
 
     inline void Assembler::LDF(Register rs1, Register rs2, Register rd) {
         LoadOperation(rs1, rs2, rd, 0x20, "ldf");
     }
     inline void Assembler::LDFI(Register rs1, int32_t simm13, Register rd) {
         LoadOperationI(rs1, simm13, rd, 0x20, "ldf");
     }
 
+    inline void Assembler::LDF32(Register rs1, int32_t immI, Register rd) {
+        if (isIMM13(immI)) {
+            LDFI(rs1, immI, rd);
+        } else {
+            LDF(rs1, L0, rd);
+            SET32(immI, L0);
+        }
+    }
+
     inline void Assembler::LDDF32(Register rs1, int32_t immI, Register rd) {
         if (isIMM13(immI+4)) {
             LDFI(rs1, immI+4, rd + 1);
             LDFI(rs1, immI, rd);
         } else {
             LDF(rs1, L0, rd + 1);
             SET32(immI+4, L0);
             LDF(rs1, L0, rd);
@@ -261,32 +276,64 @@ namespace nanojit
         if (isIMM13(immI)) {
             LDUBI(rs1, immI, rd);
         } else {
             LDUB(rs1, L0, rd);
             SET32(immI, L0);
         }
     }
 
+    inline void Assembler::LDSB(Register rs1, Register rs2, Register rd) {
+        LoadOperation(rs1, rs2, rd,  0x9, "ldsb");
+    }
+    inline void Assembler::LDSBI(Register rs1, int32_t simm13, Register rd) {
+        LoadOperationI(rs1, simm13, rd, 0x9, "ldsb");
+    }
+
+    inline void Assembler::LDSB32(Register rs1, int32_t immI, Register rd) {
+        if (isIMM13(immI)) {
+            LDSBI(rs1, immI, rd);
+        } else {
+            LDSB(rs1, L0, rd);
+            SET32(immI, L0);
+        }
+    }
+
     inline void Assembler::LDUH(Register rs1, Register rs2, Register rd) {
         LoadOperation(rs1, rs2, rd,  0x2, "lduh");
     }
     inline void Assembler::LDUHI(Register rs1, int32_t simm13, Register rd) {
         LoadOperationI(rs1, simm13, rd, 0x2, "lduh");
     }
 
     inline void Assembler::LDUH32(Register rs1, int32_t immI, Register rd) {
         if (isIMM13(immI)) {
             LDUHI(rs1, immI, rd);
         } else {
             LDUH(rs1, L0, rd);
             SET32(immI, L0);
         }
     }
 
+    inline void Assembler::LDSH(Register rs1, Register rs2, Register rd) {
+        LoadOperation(rs1, rs2, rd,  0xa, "ldsh");
+    }
+    inline void Assembler::LDSHI(Register rs1, int32_t simm13, Register rd) {
+        LoadOperationI(rs1, simm13, rd, 0xa, "ldsh");
+    }
+
+    inline void Assembler::LDSH32(Register rs1, int32_t immI, Register rd) {
+        if (isIMM13(immI)) {
+            LDSHI(rs1, immI, rd);
+        } else {
+            LDSH(rs1, L0, rd);
+            SET32(immI, L0);
+        }
+    }
+
     inline void Assembler::LDSW(Register rs1, Register rs2, Register rd) {
         LoadOperation(rs1, rs2, rd,  0x8, "ldsw");
     }
     inline void Assembler::LDSWI(Register rs1, int32_t simm13, Register rd) {
         LoadOperationI(rs1, simm13, rd, 0x8, "ldsw");
     }
 
     inline void Assembler::LDSW32(Register rs1, int32_t immI, Register rd) {
@@ -470,16 +517,32 @@ namespace nanojit
         if (isIMM13(immI)) {
             STWI(rd, immI, rs1);
          } else {
             STW(rd, L0, rs1);
             SET32(immI, L0);
          }
     }
 
+    inline void Assembler::STH(Register rd, Register rs1, Register rs2) {
+        Store(rd, rs1, rs2, 0x6, "sth");
+    }
+    inline void Assembler::STHI(Register rd, int32_t simm13, Register rs1) {
+        StoreI(rd, simm13, rs1, 0x6, "sth");
+    }
+
+    inline void Assembler::STH32(Register rd, int32_t immI, Register rs1) {
+        if (isIMM13(immI)) {
+            STHI(rd, immI, rs1);
+         } else {
+            STH(rd, L0, rs1);
+            SET32(immI, L0);
+         }
+    }
+
     inline void Assembler::STB(Register rd, Register rs1, Register rs2) {
         Store(rd, rs1, rs2, 0x5, "stb");
     }
     inline void Assembler::STBI(Register rd, int32_t simm13, Register rs1) {
         StoreI(rd, simm13, rs1, 0x5, "stb");
     }
 
     inline void Assembler::STB32(Register rd, int32_t immI, Register rs1) {
@@ -748,21 +811,19 @@ namespace nanojit
         }
     }
 
     void Assembler::asm_store32(LOpcode op, LIns *value, int dr, LIns *base)
     {
         switch (op) {
             case LIR_sti:
             case LIR_sti2c:
+            case LIR_sti2s:
                 // handled by mainline code below for now
                 break;
-            case LIR_sti2s:
-                NanoAssertMsg(0, "NJ_EXPANDED_LOADSTORE_SUPPORTED not yet supported for this architecture");
-                return;
             default:
                 NanoAssertMsg(0, "asm_store32 should never receive this LIR opcode");
                 return;
         }
 
         underrunProtect(20);
         if (value->isImmI())
             {
@@ -770,16 +831,19 @@ namespace nanojit
                 int c = value->immI();
                 switch (op) {
                 case LIR_sti:
                     STW32(L2, dr, rb);
                     break;
                 case LIR_sti2c:
                     STB32(L2, dr, rb);
                     break;
+                case LIR_sti2s:
+                    STH32(L2, dr, rb);
+                    break;
                 }
                 SET32(c, L2);
             }
         else
             {
                 // make sure what is in a register
                 Register ra, rb;
                 if (base->isImmI()) {
@@ -792,16 +856,19 @@ namespace nanojit
                 }
                 switch (op) {
                 case LIR_sti:
                     STW32(ra, dr, rb);
                     break;
                 case LIR_sti2c:
                     STB32(ra, dr, rb);
                     break;
+                case LIR_sti2s:
+                    STH32(ra, dr, rb);
+                    break;
                 }
             }
     }
 
     void Assembler::asm_spill(Register rr, int d, bool quad)
     {
         underrunProtect(24);
         (void)quad;
@@ -812,75 +879,85 @@ namespace nanojit
             STW32(rr, d, FP);
         }
     }
 
     void Assembler::asm_load64(LIns* ins)
     {
         switch (ins->opcode()) {
             case LIR_ldd:
+            case LIR_ldf2d:
                 // handled by mainline code below for now
                 break;
-            case LIR_ldf2d:
-                NanoAssertMsg(0, "NJ_EXPANDED_LOADSTORE_SUPPORTED not yet supported for this architecture");
-                return;
             default:
                 NanoAssertMsg(0, "asm_load64 should never receive this LIR opcode");
                 return;
         }
 
-        underrunProtect(72);
+        underrunProtect(48);
         LIns* base = ins->oprnd1();
         int db = ins->disp();
-        Register rr = ins->deprecated_getReg();
+        Register rb = getBaseReg(base, db, GpRegs);
 
-        int dr = deprecated_disp(ins);
-        Register rb;
-        if (base->isop(LIR_allocp)) {
-            rb = FP;
-            db += findMemFor(base);
-        } else {
-            rb = findRegFor(base, GpRegs);
-        }
-        ins->clearReg();
+        if (ins->isInReg()) {
+            Register rr =  ins->getReg();
+            asm_maybe_spill(ins, false);
+            NanoAssert(rmask(rr) & FpRegs);
 
-        // don't use an fpu reg to simply load & store the value.
-        if (dr)
-            asm_mmq(FP, dr, rb, db);
-
-        deprecated_freeRsrcOf(ins);
+            if (ins->opcode() == LIR_ldd) {
+                LDDF32(rb, db, rr);
+            } else {
+                FSTOD(F28, rr);
+                LDF32(rb, db, F28);
+            }
+        } else {
+            NanoAssert(ins->isInAr());
+            int dr = arDisp(ins);
 
-        if (rr != deprecated_UnknownReg)
-            {
-                NanoAssert(rmask(rr)&FpRegs);
-                _allocator.retire(rr);
-                LDDF32(rb, db, rr);
+            if (ins->opcode() == LIR_ldd) {
+                // don't use an fpu reg to simply load & store the value.
+                asm_mmq(FP, dr, rb, db);
+            } else {
+                STDF32(F28, dr, FP);
+                FSTOD(F28, F28);
+                LDF32(rb, db, F28);
             }
+        }
+
+        freeResourcesOf(ins);
     }
 
     void Assembler::asm_store64(LOpcode op, LIns* value, int dr, LIns* base)
     {
         switch (op) {
             case LIR_std:
+            case LIR_std2f:
                 // handled by mainline code below for now
                 break;
-            case LIR_std2f:
-                NanoAssertMsg(0, "NJ_EXPANDED_LOADSTORE_SUPPORTED not yet supported for this architecture");
-                return;
             default:
                 NanoAssertMsg(0, "asm_store64 should never receive this LIR opcode");
                 return;
         }
 
         underrunProtect(48);
+        Register rb = getBaseReg(base, dr, GpRegs);
+        if (op == LIR_std2f) {
+            Register rv = ( !value->isInReg()
+                            ? findRegFor(value, FpRegs)
+                            : value->getReg() );
+            NanoAssert(rmask(rv) & FpRegs);
+            STF32(F28, dr, rb);
+            FDTOS(rv, F28);
+            return;
+        }
+
         if (value->isImmD())
             {
                 // if a constant 64-bit value just store it now rather than
                 // generating a pointless store/load/store sequence
-                Register rb = findRegFor(base, GpRegs);
                 STW32(L2, dr+4, rb);
                 SET32(value->immDlo(), L2);
                 STW32(L2, dr, rb);
                 SET32(value->immDhi(), L2);
                 return;
             }
 
         if (value->isop(LIR_ldd))
@@ -890,40 +967,25 @@ namespace nanojit
                 // put it in an FPU reg just to load & store it.
 
                 // a) if we know it's not a double, this is right.
                 // b) if we guarded that its a double, this store could be on
                 // the side exit, copying a non-double.
                 // c) maybe its a double just being stored.  oh well.
 
                 int da = findMemFor(value);
-                Register rb;
-                if (base->isop(LIR_allocp)) {
-                    rb = FP;
-                    dr += findMemFor(base);
-                } else {
-                    rb = findRegFor(base, GpRegs);
-                }
                 asm_mmq(rb, dr, FP, da);
                 return;
             }
 
-        Register rb;
-        if (base->isop(LIR_allocp)) {
-            rb = FP;
-            dr += findMemFor(base);
-        } else {
-            rb = findRegFor(base, GpRegs);
-        }
-
         // if value already in a reg, use that, otherwise
-        // try to get it into XMM regs before FPU regs.
+        // get it into FPU regs.
         Register rv = ( !value->isInReg()
                       ? findRegFor(value, FpRegs)
-                      : value->deprecated_getReg() );
+                      : value->getReg() );
 
         STDF32(rv, dr, rb);
     }
 
     /**
      * copy 64 bits: (rd+dd) <- (rs+ds)
      */
     void Assembler::asm_mmq(Register rd, int dd, Register rs, int ds)
@@ -1239,19 +1301,21 @@ namespace nanojit
                 break;
             case LIR_ldus2ui:
                 LDUH32(ra, d, rr);
                 break;
             case LIR_ldi:
                 LDSW32(ra, d, rr);
                 break;
             case LIR_ldc2i:
+                LDSB32(ra, d, rr);
+                break;
             case LIR_lds2i:
-                NanoAssertMsg(0, "NJ_EXPANDED_LOADSTORE_SUPPORTED not yet supported for this architecture");
-                return;
+                LDSH32(ra, d, rr);
+                break;
             default:
                 NanoAssertMsg(0, "asm_load32 should never receive this LIR opcode");
                 return;
         }
     }
 
     void Assembler::asm_cmov(LIns* ins)
     {
--- a/js/src/nanojit/NativeSparc.h
+++ b/js/src/nanojit/NativeSparc.h
@@ -72,17 +72,17 @@ namespace nanojit
     const int NJ_MAX_REGISTERS = 30; // L0 - L7, I0 - I5, F2 - F14
 
     const int LARGEST_UNDERRUN_PROT = 32;  // largest value passed to underrunProtect
 
 #define NJ_MAX_STACK_ENTRY              8192
 #define NJ_MAX_PARAMETERS               1
 
 #define NJ_JTBL_SUPPORTED               0
-#define NJ_EXPANDED_LOADSTORE_SUPPORTED 0
+#define NJ_EXPANDED_LOADSTORE_SUPPORTED 1
 #define NJ_F2I_SUPPORTED                1
 #define NJ_SOFTFLOAT_SUPPORTED          0
 #define NJ_DIVI_SUPPORTED               0
 
     const int NJ_ALIGN_STACK = 16;
 
     typedef uint32_t NIns;
 
@@ -291,29 +291,29 @@ namespace nanojit
     } \
     void Format_4_4(Register rd, int32_t op3, Register rs1, int32_t rcond, int32_t opf_low, Register rs2) { \
         Format_3A(2, rd, op3, _reg_(rs1) << 14 | (rcond & 0x7) << 10 | (opf_low & 0x1F) << 5 | _reg_(rs2)); \
     } \
     void Format_4_5(Register rd, int32_t op3, int32_t cond, int32_t opf_cc, int32_t opf_low, Register rs2) { \
         Format_3A(2, rd, op3, (cond & 0xF) << 14 | (opf_cc & 0x7) << 11 | (opf_low & 0x3F) << 5 | _reg_(rs2)); \
     } \
     void IntegerOperation(Register rs1, Register rs2, Register rd, int32_t op3, const char *opcode); \
-    void Assembler::IntegerOperationI(Register rs1, int32_t simm13, Register rd, int32_t op3, const char *opcode); \
+    void IntegerOperationI(Register rs1, int32_t simm13, Register rd, int32_t op3, const char *opcode); \
     void FloatOperation(Register rs1, Register rs2, Register rd, int32_t op3, const char *opcode); \
     void Bicc(int32_t a, int32_t dsp22, int32_t cond, const char *opcode); \
     void FBfcc(int32_t a, int32_t dsp22, int32_t cond, const char *opcode); \
     void LoadOperation(Register rs1, Register rs2, Register rd, int32_t op3, const char* opcode); \
     void LoadOperationI(Register rs1, int32_t simm13, Register rd, int32_t op3, const char* opcode); \
     void MOVcc(Register rs, int32_t cc2, int32_t cc1, int32_t cc0, Register rd, int32_t cond, const char *opcode); \
     void MOVccI(int32_t simm11, int32_t cc2, int32_t cc1, int32_t cc0, Register rd, int32_t cond, const char *opcode); \
     void FMOVDcc(Register rs, int32_t opt_cc, Register rd, int32_t cond, const char *opcode); \
     void ShiftOperation(Register rs1, Register rs2, Register rd, int32_t op3, const char* opcode); \
     void ShiftOperationI(Register rs1, int32_t shcnt32, Register rd, int32_t op3, const char* opcode); \
     void Store(Register rd, Register rs1, Register rs2, int32_t op3, const char* opcode); \
-    void Assembler::StoreI(Register rd, int32_t simm13, Register rs1, int32_t op3, const char* opcode); \
+    void StoreI(Register rd, int32_t simm13, Register rs1, int32_t op3, const char* opcode); \
     void ADD(Register rs1, Register rs2, Register rd); \
     void ADDCC(Register rs1, Register rs2, Register rd); \
     void AND(Register rs1, Register rs2, Register rd); \
     void ANDCC(Register rs1, Register rs2, Register rd); \
     void OR(Register rs1, Register rs2, Register rd); \
     void ORI(Register rs1, int32_t simm13, Register rd); \
     void ORN(Register rs1, Register rs2, Register rd); \
     void SMULCC(Register rs1, Register rs2, Register rd); \
@@ -350,27 +350,36 @@ namespace nanojit
     void FCMPD(Register rs1, Register rs2); \
     void FSUBD(Register rs1, Register rs2, Register rd); \
     void FMULD(Register rs1, Register rs2, Register rd); \
     void FDTOI(Register rs2, Register rd); \
     void FDIVD(Register rs1, Register rs2, Register rd); \
     void FMOVD(Register rs2, Register rd); \
     void FNEGD(Register rs2, Register rd); \
     void FITOD(Register rs2, Register rd); \
+    void FDTOS(Register rs2, Register rd); \
+    void FSTOD(Register rs2, Register rd); \
     void JMPL(Register rs1, Register rs2, Register rd); \
     void JMPLI(Register rs1, int32_t simm13, Register rd); \
     void LDF(Register rs1, Register rs2, Register rd); \
     void LDFI(Register rs1, int32_t simm13, Register rd); \
+    void LDF32(Register rs1, int32_t immI, Register rd); \
     void LDDF32(Register rs1, int32_t immI, Register rd); \
     void LDUB(Register rs1, Register rs2, Register rd); \
     void LDUBI(Register rs1, int32_t simm13, Register rd); \
     void LDUB32(Register rs1, int32_t immI, Register rd); \
+    void LDSB(Register rs1, Register rs2, Register rd); \
+    void LDSBI(Register rs1, int32_t simm13, Register rd); \
+    void LDSB32(Register rs1, int32_t immI, Register rd); \
     void LDUH(Register rs1, Register rs2, Register rd); \
     void LDUHI(Register rs1, int32_t simm13, Register rd); \
     void LDUH32(Register rs1, int32_t immI, Register rd); \
+    void LDSH(Register rs1, Register rs2, Register rd); \
+    void LDSHI(Register rs1, int32_t simm13, Register rd); \
+    void LDSH32(Register rs1, int32_t immI, Register rd); \
     void LDSW(Register rs1, Register rs2, Register rd); \
     void LDSWI(Register rs1, int32_t simm13, Register rd); \
     void LDSW32(Register rs1, int32_t immI, Register rd); \
     void MOVE(Register rs, Register rd); \
     void MOVNE(Register rs, Register rd); \
     void MOVL(Register rs, Register rd); \
     void MOVLE(Register rs, Register rd); \
     void MOVG(Register rs, Register rd); \
@@ -423,16 +432,19 @@ namespace nanojit
     void SRL(Register rs1, Register rs2, Register rd); \
     void STF(Register rd, Register rs1, Register rs2); \
     void STFI(Register rd, int32_t simm13, Register rs1); \
     void STF32(Register rd, int32_t immI, Register rs1); \
     void STDF32(Register rd, int32_t immI, Register rs1); \
     void STW(Register rd, Register rs1, Register rs2); \
     void STWI(Register rd, int32_t simm13, Register rs1); \
     void STW32(Register rd, int32_t immI, Register rs1); \
+    void STH(Register rd, Register rs1, Register rs2); \
+    void STHI(Register rd, int32_t simm13, Register rs1); \
+    void STH32(Register rd, int32_t immI, Register rs1); \
     void STB(Register rd, Register rs1, Register rs2); \
     void STBI(Register rd, int32_t simm13, Register rs1); \
     void STB32(Register rd, int32_t immI, Register rs1); \
     bool isIMM13(int32_t imm) { return (imm) <= 0xfff && (imm) >= -0x1000; } \
     bool isIMM19(int32_t imm) { return (imm) <= 0x3ffff && (imm) >= -0x40000; } \
     bool isIMM22(int32_t imm) { return (imm) <= 0x1fffff && (imm) >= -0x200000; } \
     void JMP_long_nocheck(int32_t t); \
     void JMP_long(int32_t t); \