Bug 538060 - nanojit: improve 64-bit loads and stores in the X64 back-end. r=gal,rreitmai,edwsmith.
authorNicholas Nethercote <nnethercote@mozilla.com>
Mon, 11 Jan 2010 15:51:49 +1100
changeset 37672 56cdca9fe3d85d314361ff8830364e0a7430e791
parent 37671 2fd767b696f852c5671b1c72ba273c985dc42eba
child 37673 b6af8fb33d4a80700992d80522ba99f9518f46cc
push id11426
push userrsayre@mozilla.com
push dateSun, 31 Jan 2010 16:36:36 +0000
treeherderautoland@3048d03980e7 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersgal, rreitmai, edwsmith
bugs538060
milestone1.9.3a1pre
Bug 538060 - nanojit: improve 64-bit loads and stores in the X64 back-end. r=gal,rreitmai,edwsmith.
js/src/lirasm/lirasm.cpp
js/src/nanojit/LIR.cpp
js/src/nanojit/LIR.h
js/src/nanojit/NativeX64.cpp
js/src/nanojit/NativeX64.h
js/src/nanojit/Nativei386.cpp
--- a/js/src/lirasm/lirasm.cpp
+++ b/js/src/lirasm/lirasm.cpp
@@ -654,19 +654,19 @@ FragmentAssembler::assemble_call(const s
         int ty;
 
         ci->_abi = _abi;
 
         ci->_argtypes = 0;
         size_t argc = mTokens.size();
         for (size_t i = 0; i < argc; ++i) {
             args[i] = ref(mTokens[mTokens.size() - (i+1)]);
-            if      (args[i]->isFloat()) ty = ARGSIZE_F;
-            else if (args[i]->isQuad())  ty = ARGSIZE_Q;
-            else                         ty = ARGSIZE_I;
+            if      (args[i]->isF64()) ty = ARGSIZE_F;
+            else if (args[i]->isI64()) ty = ARGSIZE_Q;
+            else                       ty = ARGSIZE_I;
             // Nb: i+1 because argMask() uses 1-based arg counting.
             ci->_argtypes |= argMask(ty, i+1, argc);
         }
 
         // Select return type from opcode.
         ty = 0;
         if      (mOpcode == LIR_icall) ty = ARGSIZE_LO;
         else if (mOpcode == LIR_fcall) ty = ARGSIZE_F;
--- a/js/src/nanojit/LIR.cpp
+++ b/js/src/nanojit/LIR.cpp
@@ -418,32 +418,16 @@ namespace nanojit
         // oprnd_2 must be in the same position in LIns{Op2,Op3,Sti}
         // because oprnd2() is used for both of them.
         NanoStaticAssert( (offsetof(LInsOp2, ins) - offsetof(LInsOp2, oprnd_2)) ==
                           (offsetof(LInsOp3, ins) - offsetof(LInsOp3, oprnd_2)) );
         NanoStaticAssert( (offsetof(LInsOp3, ins) - offsetof(LInsOp3, oprnd_2)) ==
                           (offsetof(LInsSti, ins) - offsetof(LInsSti, oprnd_2)) );
     }
 
-    bool LIns::isFloat() const {
-        switch (opcode()) {
-            default:
-                return false;
-            case LIR_fadd:
-            case LIR_fsub:
-            case LIR_fmul:
-            case LIR_fdiv:
-            case LIR_fneg:
-            case LIR_fcall:
-            case LIR_i2f:
-            case LIR_u2f:
-                return true;
-        }
-    }
-
     LIns* LirWriter::ins2i(LOpcode v, LIns* oprnd1, int32_t imm)
     {
         return ins2(v, oprnd1, insImm(imm));
     }
 
     bool insIsS16(LInsp i)
     {
         if (i->isconst()) {
--- a/js/src/nanojit/LIR.h
+++ b/js/src/nanojit/LIR.h
@@ -487,24 +487,19 @@ namespace nanojit
             return isCseOpcode(opcode()) || (isCall() && callInfo()->_cse);
         }
         bool isRet() const {
             return isRetOpcode(opcode());
         }
         bool isop(LOpcode o) const {
             return opcode() == o;
         }
-        bool isQuad() const {
-            LTy ty = retTypes[opcode()];
-            return ty == LTy_I64 || ty == LTy_F64;
-        }
         bool isCond() const {
             return (isop(LIR_ov)) || isCmp();
         }
-        bool isFloat() const;   // not inlined because it contains a switch
         bool isCmp() const {
             LOpcode op = opcode();
             return (op >= LIR_eq  && op <= LIR_uge) ||
                    (op >= LIR_qeq && op <= LIR_quge) ||
                    (op >= LIR_feq && op <= LIR_fge);
         }
         bool isCall() const {
             return isop(LIR_icall) || isop(LIR_fcall) || isop(LIR_qcall);
@@ -545,38 +540,53 @@ namespace nanojit
         bool isconstf() const {
             return isop(LIR_float);
         }
 
         bool isBranch() const {
             return isop(LIR_jt) || isop(LIR_jf) || isop(LIR_j) || isop(LIR_jtbl);
         }
 
-        bool isPtr() {
-#ifdef NANOJIT_64BIT
+        bool isVoid() const {
+            return retTypes[opcode()] == LTy_Void;
+        }
+        bool isI32() const {
+            return retTypes[opcode()] == LTy_I32;
+        }
+        bool isI64() const {
             return retTypes[opcode()] == LTy_I64;
+        }
+        bool isF64() const {
+            return retTypes[opcode()] == LTy_F64;
+        }
+        bool isQuad() const {
+            return isI64() || isF64();
+        }
+        bool isPtr() const {
+#ifdef NANOJIT_64BIT
+            return isI64();
 #else
-            return retTypes[opcode()] == LTy_I32;
+            return isI32();
 #endif
         }
 
         // Return true if removal of 'ins' from a LIR fragment could
         // possibly change the behaviour of that fragment, even if any
         // value computed by 'ins' is not used later in the fragment.
         // In other words, can 'ins' possibly alter control flow or memory?
         // Note, this assumes that loads will never fault and hence cannot
         // affect the control flow.
         bool isStmt() {
             NanoAssert(!isop(LIR_start) && !isop(LIR_skip));
             // All instructions with Void retType are statements.  And some
             // calls are statements too.
             if (isCall())
                 return !isCse();
             else
-                return retTypes[opcode()] == LTy_Void;
+                return isVoid();
         }
 
         inline void* constvalp() const
         {
         #ifdef NANOJIT_64BIT
             return (void*)imm64();
         #else
             return (void*)imm32();
--- a/js/src/nanojit/NativeX64.cpp
+++ b/js/src/nanojit/NativeX64.cpp
@@ -769,17 +769,17 @@ namespace nanojit
 
         NanoAssert(div->isop(LIR_div));
 
         LIns *lhs = div->oprnd1();
         LIns *rhs = div->oprnd2();
 
         prepResultReg(div, rmask(RAX));
 
-        Register rhsReg = findRegFor(rhs, (GpRegs ^ (rmask(RAX)|rmask(RDX))));
+        Register rhsReg = findRegFor(rhs, GpRegs & ~(rmask(RAX)|rmask(RDX)));
         Register lhsReg = lhs->isUnusedOrHasUnknownReg()
                           ? findSpecificRegForUnallocated(lhs, RAX)
                           : lhs->getReg();
         IDIV(rhsReg);
         SARI(RDX, 31);
         MR(RDX, RAX);
         if (RAX != lhsReg)
             MR(RAX, lhsReg);
@@ -1380,33 +1380,30 @@ namespace nanojit
             // keep already assigned register
             rr = ins->getReg();
             NanoAssert(allow & rmask(rr));
             freeRsrcOf(ins, false);
         }
     }
 
     void Assembler::asm_load64(LIns *ins) {
-
         Register rr, rb;
         int32_t dr;
         switch (ins->opcode()) {
             case LIR_ldq:
             case LIR_ldqc:
+                regalloc_load(ins, GpRegs, rr, dr, rb);
+                NanoAssert(IsGpReg(rr));
+                MOVQRM(rr, dr, rb);     // general 64bit load, 32bit const displacement
+                break;
             case LIR_ldf:
             case LIR_ldfc:
-                regalloc_load(ins, GpRegs, rr, dr, rb);
-                if (IsGpReg(rr)) {
-                    // general 64bit load, 32bit const displacement
-                    MOVQRM(rr, dr, rb);
-                } else {
-                    NanoAssert(IsFpReg(rr));
-                    // load 64bits into XMM.  don't know if double or int64, assume double.
-                    MOVSDRM(rr, dr, rb);
-                }
+                regalloc_load(ins, FpRegs, rr, dr, rb);
+                NanoAssert(IsFpReg(rr));
+                MOVSDRM(rr, dr, rb);    // load 64bits into XMM
                 break;
             case LIR_ld32f:
             case LIR_ldc32f:
                 regalloc_load(ins, FpRegs, rr, dr, rb);
                 NanoAssert(IsFpReg(rr));
                 CVTSS2SD(rr, rr);
                 MOVSSRM(rr, dr, rb);
                 break;
@@ -1449,84 +1446,48 @@ namespace nanojit
                 break;
         }
     }
 
     void Assembler::asm_store64(LOpcode op, LIns *value, int d, LIns *base) {
         NanoAssert(value->isQuad());
 
         Register b = getBaseReg(base, d, BaseRegs);
-        Register r;
-
-        // if we have to choose a register, use a GPR, but not the base reg
-        if (value->isUnusedOrHasUnknownReg()) {
-            RegisterMask allow;
-            // If op is LIR_st32f and we have no reg, prefer FPR over GPR: saves an instruction later,
-            // and the value is almost certainly going to operated on as FP later anyway.
-            // XXX: isFloat doesn't cover float/fmod!  see bug 520208.
-            if (op == LIR_st32f || value->isFloat() || value->isop(LIR_float) || value->isop(LIR_fmod)) {
-                allow = FpRegs;
-            } else {
-                allow = GpRegs;
-            }
-            r = findRegFor(value, allow & ~rmask(b));
-        } else {
-            r = value->getReg();
-        }
 
         switch (op) {
-            case LIR_stqi:
-            case LIR_stfi:
-            {
-                if (IsGpReg(r)) {
-                    // gpr store
-                    MOVQMR(r, d, b);
-                }
-                else {
-                    // xmm store
-                    MOVSDMR(r, d, b);
-                }
+            case LIR_stqi: {
+                Register r = findRegFor(value, GpRegs & ~rmask(b));
+                MOVQMR(r, d, b);    // gpr store
                 break;
             }
-            case LIR_st32f:
-            {
-                // need a scratch FPR reg
+            case LIR_stfi: {
+                Register r = findRegFor(value, FpRegs);
+                MOVSDMR(r, d, b);   // xmm store
+                break;
+            }
+            case LIR_st32f: {
+                Register r = findRegFor(value, FpRegs);
                 Register t = registerAllocTmp(FpRegs & ~rmask(r));
 
-                // store
-                MOVSSMR(t, d, b);
-
-                // cvt to single-precision
-                if (IsGpReg(r))
-                {
-                    CVTSD2SS(t, t);
-                    MOVQXR(t, r); // xmm <- gpr: use movq xmm, r/m64 (66 REX.W 0F 6E /r)
-                }
-                else
-                {
-                    NanoAssert(IsFpReg(r));
-                    CVTSD2SS(t, r);
-                }
-                XORPS(t); // break dependency chains
+                MOVSSMR(t, d, b);   // store
+                CVTSD2SS(t, r);     // cvt to single-precision
+                XORPS(t);           // break dependency chains
                 break;
             }
             default:
                 NanoAssertMsg(0, "asm_store64 should never receive this LIR opcode");
                 break;
         }
     }
 
     void Assembler::asm_store32(LOpcode op, LIns *value, int d, LIns *base) {
 
-        // quirk of x86-64: reg cannot appear to be ah/bh/ch/dh
-        // for single-byte stores with REX prefix
-        const RegisterMask SrcRegs =
-                        (op == LIR_stb) ?
-                        (GpRegs & ~(1<<RSP | 1<<RBP | 1<<RSI | 1<<RDI)) :
-                        GpRegs;
+        // Quirk of x86-64: reg cannot appear to be ah/bh/ch/dh for
+        // single-byte stores with REX prefix.
+        const RegisterMask SrcRegs = (op == LIR_stb) ? SingleByteStoreRegs : GpRegs;
 
         NanoAssert(!value->isQuad());
         Register b = getBaseReg(base, d, BaseRegs);
         Register r = findRegFor(value, SrcRegs & ~rmask(b));
 
         switch (op) {
             case LIR_stb:
                 MOVBMR(r, d, b);
--- a/js/src/nanojit/NativeX64.h
+++ b/js/src/nanojit/NativeX64.h
@@ -324,16 +324,20 @@ namespace nanojit
     static const RegisterMask SavedRegs = 1<<RBX | 1<<RSI | 1<<RDI | 1<<R12 | 1<<R13 | 1<<R14 | 1<<R15;
     static const int NumSavedRegs = 7; // rbx, rsi, rdi, r12-15
     static const int NumArgRegs = 4;
 #else
     static const RegisterMask SavedRegs = 1<<RBX | 1<<R12 | 1<<R13 | 1<<R14 | 1<<R15;
     static const int NumSavedRegs = 5; // rbx, r12-15
     static const int NumArgRegs = 6;
 #endif
+    // Warning:  when talking about single byte registers, RSP/RBP/RSI/RDI are
+    // actually synonyms for AH/CH/DH/BH.  So this value means "any
+    // single-byte GpReg except AH/CH/DH/BH".
+    static const int SingleByteStoreRegs = GpRegs & ~(1<<RSP | 1<<RBP | 1<<RSI | 1<<RDI);
 
     static inline bool IsFpReg(Register r) {
         return ((1<<r) & FpRegs) != 0;
     }
     static inline bool IsGpReg(Register r) {
         return ((1<<r) & GpRegs) != 0;
     }
 
--- a/js/src/nanojit/Nativei386.cpp
+++ b/js/src/nanojit/Nativei386.cpp
@@ -584,17 +584,16 @@ namespace nanojit
                     break;
                 default:
                     NanoAssertMsg(0, "asm_load64 should never receive this LIR opcode");
                     break;
             }
         }
         else
         {
-
             int dr = disp(ins);
             Register rb;
             if (base->isop(LIR_alloc)) {
                 rb = FP;
                 db += findMemFor(base);
             } else {
                 rb = findRegFor(base, GpRegs);
             }