Bug 538060 - nanojit: improve 64-bit loads and stores in the X64 back-end. r=gal,rreitmai,edwsmith.
--- a/js/src/lirasm/lirasm.cpp
+++ b/js/src/lirasm/lirasm.cpp
@@ -654,19 +654,19 @@ FragmentAssembler::assemble_call(const s
int ty;
ci->_abi = _abi;
ci->_argtypes = 0;
size_t argc = mTokens.size();
for (size_t i = 0; i < argc; ++i) {
args[i] = ref(mTokens[mTokens.size() - (i+1)]);
- if (args[i]->isFloat()) ty = ARGSIZE_F;
- else if (args[i]->isQuad()) ty = ARGSIZE_Q;
- else ty = ARGSIZE_I;
+ if (args[i]->isF64()) ty = ARGSIZE_F;
+ else if (args[i]->isI64()) ty = ARGSIZE_Q;
+ else ty = ARGSIZE_I;
// Nb: i+1 because argMask() uses 1-based arg counting.
ci->_argtypes |= argMask(ty, i+1, argc);
}
// Select return type from opcode.
ty = 0;
if (mOpcode == LIR_icall) ty = ARGSIZE_LO;
else if (mOpcode == LIR_fcall) ty = ARGSIZE_F;
--- a/js/src/nanojit/LIR.cpp
+++ b/js/src/nanojit/LIR.cpp
@@ -418,32 +418,16 @@ namespace nanojit
// oprnd_2 must be in the same position in LIns{Op2,Op3,Sti}
// because oprnd2() is used for both of them.
NanoStaticAssert( (offsetof(LInsOp2, ins) - offsetof(LInsOp2, oprnd_2)) ==
(offsetof(LInsOp3, ins) - offsetof(LInsOp3, oprnd_2)) );
NanoStaticAssert( (offsetof(LInsOp3, ins) - offsetof(LInsOp3, oprnd_2)) ==
(offsetof(LInsSti, ins) - offsetof(LInsSti, oprnd_2)) );
}
- bool LIns::isFloat() const {
- switch (opcode()) {
- default:
- return false;
- case LIR_fadd:
- case LIR_fsub:
- case LIR_fmul:
- case LIR_fdiv:
- case LIR_fneg:
- case LIR_fcall:
- case LIR_i2f:
- case LIR_u2f:
- return true;
- }
- }
-
LIns* LirWriter::ins2i(LOpcode v, LIns* oprnd1, int32_t imm)
{
return ins2(v, oprnd1, insImm(imm));
}
bool insIsS16(LInsp i)
{
if (i->isconst()) {
--- a/js/src/nanojit/LIR.h
+++ b/js/src/nanojit/LIR.h
@@ -487,24 +487,19 @@ namespace nanojit
return isCseOpcode(opcode()) || (isCall() && callInfo()->_cse);
}
bool isRet() const {
return isRetOpcode(opcode());
}
bool isop(LOpcode o) const {
return opcode() == o;
}
- bool isQuad() const {
- LTy ty = retTypes[opcode()];
- return ty == LTy_I64 || ty == LTy_F64;
- }
bool isCond() const {
return (isop(LIR_ov)) || isCmp();
}
- bool isFloat() const; // not inlined because it contains a switch
bool isCmp() const {
LOpcode op = opcode();
return (op >= LIR_eq && op <= LIR_uge) ||
(op >= LIR_qeq && op <= LIR_quge) ||
(op >= LIR_feq && op <= LIR_fge);
}
bool isCall() const {
return isop(LIR_icall) || isop(LIR_fcall) || isop(LIR_qcall);
@@ -545,38 +540,53 @@ namespace nanojit
bool isconstf() const {
return isop(LIR_float);
}
bool isBranch() const {
return isop(LIR_jt) || isop(LIR_jf) || isop(LIR_j) || isop(LIR_jtbl);
}
- bool isPtr() {
-#ifdef NANOJIT_64BIT
+ bool isVoid() const {
+ return retTypes[opcode()] == LTy_Void;
+ }
+ bool isI32() const {
+ return retTypes[opcode()] == LTy_I32;
+ }
+ bool isI64() const {
return retTypes[opcode()] == LTy_I64;
+ }
+ bool isF64() const {
+ return retTypes[opcode()] == LTy_F64;
+ }
+ bool isQuad() const {
+ return isI64() || isF64();
+ }
+ bool isPtr() const {
+#ifdef NANOJIT_64BIT
+ return isI64();
#else
- return retTypes[opcode()] == LTy_I32;
+ return isI32();
#endif
}
// Return true if removal of 'ins' from a LIR fragment could
// possibly change the behaviour of that fragment, even if any
// value computed by 'ins' is not used later in the fragment.
// In other words, can 'ins' possibly alter control flow or memory?
// Note, this assumes that loads will never fault and hence cannot
// affect the control flow.
bool isStmt() {
NanoAssert(!isop(LIR_start) && !isop(LIR_skip));
// All instructions with Void retType are statements. And some
// calls are statements too.
if (isCall())
return !isCse();
else
- return retTypes[opcode()] == LTy_Void;
+ return isVoid();
}
inline void* constvalp() const
{
#ifdef NANOJIT_64BIT
return (void*)imm64();
#else
return (void*)imm32();
--- a/js/src/nanojit/NativeX64.cpp
+++ b/js/src/nanojit/NativeX64.cpp
@@ -769,17 +769,17 @@ namespace nanojit
NanoAssert(div->isop(LIR_div));
LIns *lhs = div->oprnd1();
LIns *rhs = div->oprnd2();
prepResultReg(div, rmask(RAX));
- Register rhsReg = findRegFor(rhs, (GpRegs ^ (rmask(RAX)|rmask(RDX))));
+ Register rhsReg = findRegFor(rhs, GpRegs & ~(rmask(RAX)|rmask(RDX)));
Register lhsReg = lhs->isUnusedOrHasUnknownReg()
? findSpecificRegForUnallocated(lhs, RAX)
: lhs->getReg();
IDIV(rhsReg);
SARI(RDX, 31);
MR(RDX, RAX);
if (RAX != lhsReg)
MR(RAX, lhsReg);
@@ -1380,33 +1380,30 @@ namespace nanojit
// keep already assigned register
rr = ins->getReg();
NanoAssert(allow & rmask(rr));
freeRsrcOf(ins, false);
}
}
void Assembler::asm_load64(LIns *ins) {
-
Register rr, rb;
int32_t dr;
switch (ins->opcode()) {
case LIR_ldq:
case LIR_ldqc:
+ regalloc_load(ins, GpRegs, rr, dr, rb);
+ NanoAssert(IsGpReg(rr));
+ MOVQRM(rr, dr, rb); // general 64bit load, 32bit const displacement
+ break;
case LIR_ldf:
case LIR_ldfc:
- regalloc_load(ins, GpRegs, rr, dr, rb);
- if (IsGpReg(rr)) {
- // general 64bit load, 32bit const displacement
- MOVQRM(rr, dr, rb);
- } else {
- NanoAssert(IsFpReg(rr));
- // load 64bits into XMM. don't know if double or int64, assume double.
- MOVSDRM(rr, dr, rb);
- }
+ regalloc_load(ins, FpRegs, rr, dr, rb);
+ NanoAssert(IsFpReg(rr));
+ MOVSDRM(rr, dr, rb); // load 64bits into XMM
break;
case LIR_ld32f:
case LIR_ldc32f:
regalloc_load(ins, FpRegs, rr, dr, rb);
NanoAssert(IsFpReg(rr));
CVTSS2SD(rr, rr);
MOVSSRM(rr, dr, rb);
break;
@@ -1449,84 +1446,48 @@ namespace nanojit
break;
}
}
void Assembler::asm_store64(LOpcode op, LIns *value, int d, LIns *base) {
NanoAssert(value->isQuad());
Register b = getBaseReg(base, d, BaseRegs);
- Register r;
-
- // if we have to choose a register, use a GPR, but not the base reg
- if (value->isUnusedOrHasUnknownReg()) {
- RegisterMask allow;
- // If op is LIR_st32f and we have no reg, prefer FPR over GPR: saves an instruction later,
- // and the value is almost certainly going to operated on as FP later anyway.
- // XXX: isFloat doesn't cover float/fmod! see bug 520208.
- if (op == LIR_st32f || value->isFloat() || value->isop(LIR_float) || value->isop(LIR_fmod)) {
- allow = FpRegs;
- } else {
- allow = GpRegs;
- }
- r = findRegFor(value, allow & ~rmask(b));
- } else {
- r = value->getReg();
- }
switch (op) {
- case LIR_stqi:
- case LIR_stfi:
- {
- if (IsGpReg(r)) {
- // gpr store
- MOVQMR(r, d, b);
- }
- else {
- // xmm store
- MOVSDMR(r, d, b);
- }
+ case LIR_stqi: {
+ Register r = findRegFor(value, GpRegs & ~rmask(b));
+ MOVQMR(r, d, b); // gpr store
break;
}
- case LIR_st32f:
- {
- // need a scratch FPR reg
+ case LIR_stfi: {
+ Register r = findRegFor(value, FpRegs);
+ MOVSDMR(r, d, b); // xmm store
+ break;
+ }
+ case LIR_st32f: {
+ Register r = findRegFor(value, FpRegs);
Register t = registerAllocTmp(FpRegs & ~rmask(r));
- // store
- MOVSSMR(t, d, b);
-
- // cvt to single-precision
- if (IsGpReg(r))
- {
- CVTSD2SS(t, t);
- MOVQXR(t, r); // xmm <- gpr: use movq xmm, r/m64 (66 REX.W 0F 6E /r)
- }
- else
- {
- NanoAssert(IsFpReg(r));
- CVTSD2SS(t, r);
- }
- XORPS(t); // break dependency chains
+ MOVSSMR(t, d, b); // store
+ CVTSD2SS(t, r); // cvt to single-precision
+ XORPS(t); // break dependency chains
break;
}
default:
NanoAssertMsg(0, "asm_store64 should never receive this LIR opcode");
break;
}
}
void Assembler::asm_store32(LOpcode op, LIns *value, int d, LIns *base) {
- // quirk of x86-64: reg cannot appear to be ah/bh/ch/dh
- // for single-byte stores with REX prefix
- const RegisterMask SrcRegs =
- (op == LIR_stb) ?
- (GpRegs & ~(1<<RSP | 1<<RBP | 1<<RSI | 1<<RDI)) :
- GpRegs;
+ // Quirk of x86-64: reg cannot appear to be ah/bh/ch/dh for
+ // single-byte stores with REX prefix.
+ const RegisterMask SrcRegs = (op == LIR_stb) ? SingleByteStoreRegs : GpRegs;
NanoAssert(!value->isQuad());
Register b = getBaseReg(base, d, BaseRegs);
Register r = findRegFor(value, SrcRegs & ~rmask(b));
switch (op) {
case LIR_stb:
MOVBMR(r, d, b);
--- a/js/src/nanojit/NativeX64.h
+++ b/js/src/nanojit/NativeX64.h
@@ -324,16 +324,20 @@ namespace nanojit
static const RegisterMask SavedRegs = 1<<RBX | 1<<RSI | 1<<RDI | 1<<R12 | 1<<R13 | 1<<R14 | 1<<R15;
static const int NumSavedRegs = 7; // rbx, rsi, rdi, r12-15
static const int NumArgRegs = 4;
#else
static const RegisterMask SavedRegs = 1<<RBX | 1<<R12 | 1<<R13 | 1<<R14 | 1<<R15;
static const int NumSavedRegs = 5; // rbx, r12-15
static const int NumArgRegs = 6;
#endif
+ // Warning: when talking about single byte registers, RSP/RBP/RSI/RDI are
+ // actually synonyms for AH/CH/DH/BH. So this value means "any
+ // single-byte GpReg except AH/CH/DH/BH".
+ static const int SingleByteStoreRegs = GpRegs & ~(1<<RSP | 1<<RBP | 1<<RSI | 1<<RDI);
static inline bool IsFpReg(Register r) {
return ((1<<r) & FpRegs) != 0;
}
static inline bool IsGpReg(Register r) {
return ((1<<r) & GpRegs) != 0;
}
--- a/js/src/nanojit/Nativei386.cpp
+++ b/js/src/nanojit/Nativei386.cpp
@@ -584,17 +584,16 @@ namespace nanojit
break;
default:
NanoAssertMsg(0, "asm_load64 should never receive this LIR opcode");
break;
}
}
else
{
-
int dr = disp(ins);
Register rb;
if (base->isop(LIR_alloc)) {
rb = FP;
db += findMemFor(base);
} else {
rb = findRegFor(base, GpRegs);
}