--- a/js/src/nanojit/LIR.cpp
+++ b/js/src/nanojit/LIR.cpp
@@ -1144,34 +1144,41 @@ namespace nanojit
LInsHashSet::LInsHashSet(Allocator& alloc, uint32_t kInitialCaps[]) : alloc(alloc)
{
for (LInsHashKind kind = LInsFirst; kind <= LInsLast; kind = nextKind(kind)) {
m_cap[kind] = kInitialCaps[kind];
m_list[kind] = new (alloc) LInsp[m_cap[kind]];
}
clear();
- m_find[LInsImm] = &LInsHashSet::findImm;
- m_find[LInsImmq] = PTR_SIZE(NULL, &LInsHashSet::findImmq);
- m_find[LInsImmf] = &LInsHashSet::findImmf;
- m_find[LIns1] = &LInsHashSet::find1;
- m_find[LIns2] = &LInsHashSet::find2;
- m_find[LIns3] = &LInsHashSet::find3;
- m_find[LInsLoad] = &LInsHashSet::findLoad;
- m_find[LInsCall] = &LInsHashSet::findCall;
+ m_find[LInsImm] = &LInsHashSet::findImm;
+ m_find[LInsImmq] = PTR_SIZE(NULL, &LInsHashSet::findImmq);
+ m_find[LInsImmf] = &LInsHashSet::findImmf;
+ m_find[LIns1] = &LInsHashSet::find1;
+ m_find[LIns2] = &LInsHashSet::find2;
+ m_find[LIns3] = &LInsHashSet::find3;
+ m_find[LInsCall] = &LInsHashSet::findCall;
+ m_find[LInsLoadReadOnly] = &LInsHashSet::findLoadReadOnly;
+ m_find[LInsLoadStack] = &LInsHashSet::findLoadStack;
+ m_find[LInsLoadRStack] = &LInsHashSet::findLoadRStack;
+ m_find[LInsLoadOther] = &LInsHashSet::findLoadOther;
+ m_find[LInsLoadMultiple] = &LInsHashSet::findLoadMultiple;
+ }
+
+ void LInsHashSet::clear(LInsHashKind kind) {
+ VMPI_memset(m_list[kind], 0, sizeof(LInsp)*m_cap[kind]);
+ m_used[kind] = 0;
}
void LInsHashSet::clear() {
for (LInsHashKind kind = LInsFirst; kind <= LInsLast; kind = nextKind(kind)) {
- VMPI_memset(m_list[kind], 0, sizeof(LInsp)*m_cap[kind]);
- m_used[kind] = 0;
+ clear(kind);
}
}
-
inline uint32_t LInsHashSet::hashImm(int32_t a) {
return _hashfinish(_hash32(0,a));
}
inline uint32_t LInsHashSet::hashImmq(uint64_t a) {
uint32_t hash = _hash32(0, uint32_t(a >> 32));
return _hashfinish(_hash32(hash, uint32_t(a)));
}
@@ -1189,20 +1196,25 @@ namespace nanojit
inline uint32_t LInsHashSet::hash3(LOpcode op, LInsp a, LInsp b, LInsp c) {
uint32_t hash = _hash8(0,uint8_t(op));
hash = _hashptr(hash, a);
hash = _hashptr(hash, b);
return _hashfinish(_hashptr(hash, c));
}
- inline uint32_t LInsHashSet::hashLoad(LOpcode op, LInsp a, int32_t d) {
+ NanoStaticAssert(sizeof(AccSet) == 1); // required for hashLoad to work properly
+
+ // Nb: no need to hash the load's AccSet because each region's loads go in
+ // a different hash table.
+ inline uint32_t LInsHashSet::hashLoad(LOpcode op, LInsp a, int32_t d, AccSet accSet) {
uint32_t hash = _hash8(0,uint8_t(op));
hash = _hashptr(hash, a);
- return _hashfinish(_hash32(hash, d));
+ hash = _hash32(hash, d);
+ return _hashfinish(_hash8(hash, accSet));
}
inline uint32_t LInsHashSet::hashCall(const CallInfo *ci, uint32_t argc, LInsp args[]) {
uint32_t hash = _hashptr(0, ci);
for (int32_t j=argc-1; j >= 0; j--)
hash = _hashptr(hash,args[j]);
return _hashfinish(hash);
}
@@ -1214,234 +1226,264 @@ namespace nanojit
LInsp *oldlist = m_list[kind];
m_list[kind] = new (alloc) LInsp[m_cap[kind]];
VMPI_memset(m_list[kind], 0, m_cap[kind] * sizeof(LInsp));
find_t find = m_find[kind];
for (uint32_t i = 0; i < oldcap; i++) {
LInsp ins = oldlist[i];
if (!ins) continue;
uint32_t j = (this->*find)(ins);
+ NanoAssert(!m_list[kind][j]);
m_list[kind][j] = ins;
}
}
- LInsp LInsHashSet::add(LInsHashKind kind, LInsp ins, uint32_t k)
+ void LInsHashSet::add(LInsHashKind kind, LInsp ins, uint32_t k)
{
NanoAssert(!m_list[kind][k]);
m_used[kind]++;
m_list[kind][k] = ins;
if ((m_used[kind] * 4) >= (m_cap[kind] * 3)) { // load factor of 0.75
grow(kind);
}
- return ins;
}
LInsp LInsHashSet::findImm(int32_t a, uint32_t &k)
{
LInsHashKind kind = LInsImm;
const uint32_t bitmask = m_cap[kind] - 1;
- uint32_t hash = hashImm(a) & bitmask;
+ k = hashImm(a) & bitmask;
uint32_t n = 1;
- LInsp ins;
- while ((ins = m_list[kind][hash]) != NULL &&
- (ins->imm32() != a))
- {
+ while (true) {
+ LInsp ins = m_list[kind][k];
+ if (!ins)
+ return NULL;
NanoAssert(ins->isconst());
+ if (ins->imm32() == a)
+ return ins;
// Quadratic probe: h(k,i) = h(k) + 0.5i + 0.5i^2, which gives the
// sequence h(k), h(k)+1, h(k)+3, h(k)+6, h+10, ... This is a
// good sequence for 2^n-sized tables as the values h(k,i) for i
// in [0,m − 1] are all distinct so termination is guaranteed.
// See http://portal.acm.org/citation.cfm?id=360737 and
// http://en.wikipedia.org/wiki/Quadratic_probing (fetched
// 06-Nov-2009) for more details.
- hash = (hash + n) & bitmask;
+ k = (k + n) & bitmask;
n += 1;
}
- k = hash;
- return ins;
}
uint32_t LInsHashSet::findImm(LInsp ins)
{
uint32_t k;
findImm(ins->imm32(), k);
return k;
}
#ifdef NANOJIT_64BIT
LInsp LInsHashSet::findImmq(uint64_t a, uint32_t &k)
{
LInsHashKind kind = LInsImmq;
const uint32_t bitmask = m_cap[kind] - 1;
- uint32_t hash = hashImmq(a) & bitmask;
+ k = hashImmq(a) & bitmask;
uint32_t n = 1;
- LInsp ins;
- while ((ins = m_list[kind][hash]) != NULL &&
- (ins->imm64() != a))
- {
+ while (true) {
+ LInsp ins = m_list[kind][k];
+ if (!ins)
+ return NULL;
NanoAssert(ins->isconstq());
- hash = (hash + n) & bitmask;
+ if (ins->imm64() == a)
+ return ins;
+ k = (k + n) & bitmask;
n += 1;
}
- k = hash;
- return ins;
}
uint32_t LInsHashSet::findImmq(LInsp ins)
{
uint32_t k;
findImmq(ins->imm64(), k);
return k;
}
#endif
LInsp LInsHashSet::findImmf(uint64_t a, uint32_t &k)
{
LInsHashKind kind = LInsImmf;
const uint32_t bitmask = m_cap[kind] - 1;
- uint32_t hash = hashImmq(a) & bitmask;
+ k = hashImmq(a) & bitmask;
uint32_t n = 1;
- LInsp ins;
- while ((ins = m_list[kind][hash]) != NULL &&
- (ins->imm64() != a))
- {
+ while (true) {
+ LInsp ins = m_list[kind][k];
+ if (!ins)
+ return NULL;
NanoAssert(ins->isconstf());
- hash = (hash + n) & bitmask;
+ if (ins->imm64() == a)
+ return ins;
+ k = (k + n) & bitmask;
n += 1;
}
- k = hash;
- return ins;
}
uint32_t LInsHashSet::findImmf(LInsp ins)
{
uint32_t k;
findImmf(ins->imm64(), k);
return k;
}
LInsp LInsHashSet::find1(LOpcode op, LInsp a, uint32_t &k)
{
LInsHashKind kind = LIns1;
const uint32_t bitmask = m_cap[kind] - 1;
- uint32_t hash = hash1(op,a) & bitmask;
+ k = hash1(op, a) & bitmask;
uint32_t n = 1;
- LInsp ins;
- while ((ins = m_list[kind][hash]) != NULL &&
- (ins->opcode() != op || ins->oprnd1() != a))
- {
- hash = (hash + n) & bitmask;
+ while (true) {
+ LInsp ins = m_list[kind][k];
+ if (!ins)
+ return NULL;
+ if (ins->isop(op) && ins->oprnd1() == a)
+ return ins;
+ k = (k + n) & bitmask;
n += 1;
}
- k = hash;
- return ins;
}
uint32_t LInsHashSet::find1(LInsp ins)
{
uint32_t k;
find1(ins->opcode(), ins->oprnd1(), k);
return k;
}
LInsp LInsHashSet::find2(LOpcode op, LInsp a, LInsp b, uint32_t &k)
{
LInsHashKind kind = LIns2;
const uint32_t bitmask = m_cap[kind] - 1;
- uint32_t hash = hash2(op,a,b) & bitmask;
+ k = hash2(op, a, b) & bitmask;
uint32_t n = 1;
- LInsp ins;
- while ((ins = m_list[kind][hash]) != NULL &&
- (ins->opcode() != op || ins->oprnd1() != a || ins->oprnd2() != b))
- {
- hash = (hash + n) & bitmask;
+ while (true) {
+ LInsp ins = m_list[kind][k];
+ if (!ins)
+ return NULL;
+ if (ins->isop(op) && ins->oprnd1() == a && ins->oprnd2() == b)
+ return ins;
+ k = (k + n) & bitmask;
n += 1;
}
- k = hash;
- return ins;
}
uint32_t LInsHashSet::find2(LInsp ins)
{
uint32_t k;
find2(ins->opcode(), ins->oprnd1(), ins->oprnd2(), k);
return k;
}
LInsp LInsHashSet::find3(LOpcode op, LInsp a, LInsp b, LInsp c, uint32_t &k)
{
LInsHashKind kind = LIns3;
const uint32_t bitmask = m_cap[kind] - 1;
- uint32_t hash = hash3(op,a,b,c) & bitmask;
+ k = hash3(op, a, b, c) & bitmask;
uint32_t n = 1;
- LInsp ins;
- while ((ins = m_list[kind][hash]) != NULL &&
- (ins->opcode() != op || ins->oprnd1() != a || ins->oprnd2() != b || ins->oprnd3() != c))
- {
- hash = (hash + n) & bitmask;
+ while (true) {
+ LInsp ins = m_list[kind][k];
+ if (!ins)
+ return NULL;
+ if (ins->isop(op) && ins->oprnd1() == a && ins->oprnd2() == b && ins->oprnd3() == c)
+ return ins;
+ k = (k + n) & bitmask;
n += 1;
}
- k = hash;
- return ins;
}
uint32_t LInsHashSet::find3(LInsp ins)
{
uint32_t k;
find3(ins->opcode(), ins->oprnd1(), ins->oprnd2(), ins->oprnd3(), k);
return k;
}
- LInsp LInsHashSet::findLoad(LOpcode op, LInsp a, int32_t d, uint32_t &k)
+ LInsp LInsHashSet::findLoad(LOpcode op, LInsp a, int32_t d, AccSet accSet, LInsHashKind kind,
+ uint32_t &k)
{
- LInsHashKind kind = LInsLoad;
+ (void)accSet;
const uint32_t bitmask = m_cap[kind] - 1;
- uint32_t hash = hashLoad(op,a,d) & bitmask;
+ k = hashLoad(op, a, d, accSet) & bitmask;
uint32_t n = 1;
- LInsp ins;
- while ((ins = m_list[kind][hash]) != NULL &&
- (ins->opcode() != op || ins->oprnd1() != a || ins->disp() != d))
- {
- hash = (hash + n) & bitmask;
+ while (true) {
+ LInsp ins = m_list[kind][k];
+ if (!ins)
+ return NULL;
+ NanoAssert(ins->accSet() == accSet);
+ if (ins->isop(op) && ins->oprnd1() == a && ins->disp() == d)
+ return ins;
+ k = (k + n) & bitmask;
n += 1;
}
- k = hash;
- return ins;
}
- uint32_t LInsHashSet::findLoad(LInsp ins)
+ uint32_t LInsHashSet::findLoadReadOnly(LInsp ins)
+ {
+ uint32_t k;
+ findLoad(ins->opcode(), ins->oprnd1(), ins->disp(), ins->accSet(), LInsLoadReadOnly, k);
+ return k;
+ }
+
+ uint32_t LInsHashSet::findLoadStack(LInsp ins)
{
uint32_t k;
- findLoad(ins->opcode(), ins->oprnd1(), ins->disp(), k);
+ findLoad(ins->opcode(), ins->oprnd1(), ins->disp(), ins->accSet(), LInsLoadStack, k);
+ return k;
+ }
+
+ uint32_t LInsHashSet::findLoadRStack(LInsp ins)
+ {
+ uint32_t k;
+ findLoad(ins->opcode(), ins->oprnd1(), ins->disp(), ins->accSet(), LInsLoadRStack, k);
+ return k;
+ }
+
+ uint32_t LInsHashSet::findLoadOther(LInsp ins)
+ {
+ uint32_t k;
+ findLoad(ins->opcode(), ins->oprnd1(), ins->disp(), ins->accSet(), LInsLoadOther, k);
+ return k;
+ }
+
+ uint32_t LInsHashSet::findLoadMultiple(LInsp ins)
+ {
+ uint32_t k;
+ findLoad(ins->opcode(), ins->oprnd1(), ins->disp(), ins->accSet(), LInsLoadMultiple, k);
return k;
}
bool argsmatch(LInsp ins, uint32_t argc, LInsp args[])
{
for (uint32_t j=0; j < argc; j++)
if (ins->arg(j) != args[j])
return false;
return true;
}
LInsp LInsHashSet::findCall(const CallInfo *ci, uint32_t argc, LInsp args[], uint32_t &k)
{
LInsHashKind kind = LInsCall;
const uint32_t bitmask = m_cap[kind] - 1;
- uint32_t hash = hashCall(ci, argc, args) & bitmask;
+ k = hashCall(ci, argc, args) & bitmask;
uint32_t n = 1;
- LInsp ins;
- while ((ins = m_list[kind][hash]) != NULL &&
- (!ins->isCall() || ins->callInfo() != ci || !argsmatch(ins, argc, args)))
- {
- hash = (hash + n) & bitmask;
+ while (true) {
+ LInsp ins = m_list[kind][k];
+ if (!ins)
+ return NULL;
+ if (ins->isCall() && ins->callInfo() == ci && argsmatch(ins, argc, args))
+ return ins;
+ k = (k + n) & bitmask;
n += 1;
}
- k = hash;
- return ins;
}
uint32_t LInsHashSet::findCall(LInsp ins)
{
LInsp args[MAXARGS];
uint32_t argc = ins->argc();
NanoAssert(argc < MAXARGS);
for (uint32_t j=0; j < argc; j++)
@@ -1723,17 +1765,16 @@ namespace nanojit
if (e->i->isGuard() || e->i->isBranch() || e->i->isRet()) {
logc->printf("\n");
newblock = true;
}
}
}
-
void LirNameMap::addNameWithSuffix(LInsp ins, const char *name, int suffix,
bool ignoreOneSuffix) {
// The lookup may succeed, ie. we may already have a name for this
// instruction. This can happen because of CSE. Eg. if we have this:
//
// ins = addName("foo", insImm(0))
//
// that assigns the name "foo1" to 'ins'. If we later do this:
@@ -1787,33 +1828,28 @@ namespace nanojit
const char* LirNameMap::lookupName(LInsp ins)
{
Entry* e = names.get(ins);
return e ? e->name : NULL;
}
- char* LInsPrinter::formatAccSet(RefBuf* buf, LInsp ins, bool isLoad) {
- AccSet accSet = ins->accSet();
+ char* LInsPrinter::formatAccSet(RefBuf* buf, AccSet accSet) {
int i = 0;
- if ((isLoad && accSet == ACC_LOAD_ANY) ||
- (!isLoad && accSet == ACC_STORE_ANY))
- {
- // boring, don't bother with a suffix
- } else {
- buf->buf[i++] = '.';
- if (accSet & ACC_READONLY) { buf->buf[i++] = 'r'; accSet &= ~ACC_READONLY; }
- if (accSet & ACC_STACK) { buf->buf[i++] = 's'; accSet &= ~ACC_STACK; }
- if (accSet & ACC_OTHER) { buf->buf[i++] = 'o'; accSet &= ~ACC_OTHER; }
- // This assertion will fail if we add a new accSet value but
- // forget to handle it here.
- NanoAssert(accSet == 0);
- }
+ // 'c' is short for "const", because 'r' is used for RSTACK.
+ if (accSet & ACC_READONLY) { buf->buf[i++] = 'c'; accSet &= ~ACC_READONLY; }
+ if (accSet & ACC_STACK) { buf->buf[i++] = 's'; accSet &= ~ACC_STACK; }
+ if (accSet & ACC_RSTACK) { buf->buf[i++] = 'r'; accSet &= ~ACC_RSTACK; }
+ if (accSet & ACC_OTHER) { buf->buf[i++] = 'o'; accSet &= ~ACC_OTHER; }
+ // This assertion will fail if we add a new accSet value but
+ // forget to handle it here.
+ NanoAssert(accSet == 0);
buf->buf[i] = 0;
+ NanoAssert(size_t(i) < buf->len);
return buf->buf;
}
void LInsPrinter::formatImm(RefBuf* buf, int32_t c) {
if (-10000 < c || c < 10000) {
VMPI_snprintf(buf->buf, buf->len, "%d", c);
} else {
#if !defined NANOJIT_64BIT
@@ -1914,21 +1950,22 @@ namespace nanojit
case LIR_icall:
case LIR_fcall:
CASE64(LIR_qcall:) {
const CallInfo* call = i->callInfo();
int32_t argc = i->argc();
int32_t m = int32_t(n); // Windows doesn't have 'ssize_t'
if (call->isIndirect())
- m -= VMPI_snprintf(s, m, "%s = %s [%s] ( ", formatRef(&b1, i), lirNames[op],
- formatRef(&b2, i->arg(--argc)));
+ m -= VMPI_snprintf(s, m, "%s = %s.%s [%s] ( ", formatRef(&b1, i), lirNames[op],
+ formatAccSet(&b2, call->_storeAccSet),
+ formatRef(&b3, i->arg(--argc)));
else
- m -= VMPI_snprintf(s, m, "%s = %s #%s ( ", formatRef(&b1, i), lirNames[op],
- call->_name);
+ m -= VMPI_snprintf(s, m, "%s = %s.%s #%s ( ", formatRef(&b1, i), lirNames[op],
+ formatAccSet(&b2, call->_storeAccSet), call->_name);
if (m < 0) break;
for (int32_t j = argc - 1; j >= 0; j--) {
s += VMPI_strlen(s);
m -= VMPI_snprintf(s, m, "%s ",formatRef(&b2, i->arg(j)));
if (m < 0) break;
}
s += VMPI_strlen(s);
m -= VMPI_snprintf(s, m, ")");
@@ -2069,175 +2106,217 @@ namespace nanojit
case LIR_ld:
CASE64(LIR_ldq:)
case LIR_ldf:
case LIR_ldzb:
case LIR_ldzs:
case LIR_ldsb:
case LIR_ldss:
case LIR_ld32f:
- VMPI_snprintf(s, n, "%s = %s%s %s[%d]", formatRef(&b1, i), lirNames[op],
- formatAccSet(&b2, i, /*isLoad*/true),
+ VMPI_snprintf(s, n, "%s = %s.%s %s[%d]", formatRef(&b1, i), lirNames[op],
+ formatAccSet(&b2, i->accSet()),
formatRef(&b3, i->oprnd1()),
i->disp());
break;
case LIR_sti:
CASE64(LIR_stqi:)
case LIR_stfi:
case LIR_stb:
case LIR_sts:
case LIR_st32f:
- VMPI_snprintf(s, n, "%s%s %s[%d] = %s", lirNames[op],
- formatAccSet(&b1, i, /*isLoad*/false),
+ VMPI_snprintf(s, n, "%s.%s %s[%d] = %s", lirNames[op],
+ formatAccSet(&b1, i->accSet()),
formatRef(&b2, i->oprnd2()),
i->disp(),
formatRef(&b3, i->oprnd1()));
break;
default:
NanoAssertMsgf(0, "Can't handle opcode %s\n", lirNames[op]);
break;
}
return buf->buf;
}
#endif
CseFilter::CseFilter(LirWriter *out, Allocator& alloc)
- : LirWriter(out)
+ : LirWriter(out), storesSinceLastLoad(ACC_NONE)
{
uint32_t kInitialCaps[LInsLast + 1];
- kInitialCaps[LInsImm] = 128;
- kInitialCaps[LInsImmq] = PTR_SIZE(0, 16);
- kInitialCaps[LInsImmf] = 16;
- kInitialCaps[LIns1] = 256;
- kInitialCaps[LIns2] = 512;
- kInitialCaps[LIns3] = 16;
- kInitialCaps[LInsLoad] = 16;
- kInitialCaps[LInsCall] = 64;
+ kInitialCaps[LInsImm] = 128;
+ kInitialCaps[LInsImmq] = PTR_SIZE(0, 16);
+ kInitialCaps[LInsImmf] = 16;
+ kInitialCaps[LIns1] = 256;
+ kInitialCaps[LIns2] = 512;
+ kInitialCaps[LIns3] = 16;
+ kInitialCaps[LInsCall] = 64;
+ kInitialCaps[LInsLoadReadOnly] = 16;
+ kInitialCaps[LInsLoadStack] = 16;
+ kInitialCaps[LInsLoadRStack] = 16;
+ kInitialCaps[LInsLoadOther] = 16;
+ kInitialCaps[LInsLoadMultiple] = 16;
exprs = new (alloc) LInsHashSet(alloc, kInitialCaps);
}
LIns* CseFilter::insImm(int32_t imm)
{
uint32_t k;
LInsp ins = exprs->findImm(imm, k);
- if (ins)
- return ins;
- ins = out->insImm(imm);
+ if (!ins) {
+ ins = out->insImm(imm);
+ exprs->add(LInsImm, ins, k);
+ }
// We assume that downstream stages do not modify the instruction, so
// that we can insert 'ins' into slot 'k'. Check this.
- NanoAssert(ins->opcode() == LIR_int && ins->imm32() == imm);
- return exprs->add(LInsImm, ins, k);
+ NanoAssert(ins->isop(LIR_int) && ins->imm32() == imm);
+ return ins;
}
#ifdef NANOJIT_64BIT
LIns* CseFilter::insImmq(uint64_t q)
{
uint32_t k;
LInsp ins = exprs->findImmq(q, k);
- if (ins)
- return ins;
- ins = out->insImmq(q);
- NanoAssert(ins->opcode() == LIR_quad && ins->imm64() == q);
- return exprs->add(LInsImmq, ins, k);
+ if (!ins) {
+ ins = out->insImmq(q);
+ exprs->add(LInsImmq, ins, k);
+ }
+ NanoAssert(ins->isop(LIR_quad) && ins->imm64() == q);
+ return ins;
}
#endif
LIns* CseFilter::insImmf(double d)
{
uint32_t k;
// We must pun 'd' as a uint64_t otherwise 0 and -0 will be treated as
// equal, which breaks things (see bug 527288).
union {
double d;
uint64_t u64;
} u;
u.d = d;
LInsp ins = exprs->findImmf(u.u64, k);
- if (ins)
- return ins;
- ins = out->insImmf(d);
- NanoAssert(ins->opcode() == LIR_float && ins->imm64() == u.u64);
- return exprs->add(LInsImmf, ins, k);
+ if (!ins) {
+ ins = out->insImmf(d);
+ exprs->add(LInsImmf, ins, k);
+ }
+ NanoAssert(ins->isop(LIR_float) && ins->imm64() == u.u64);
+ return ins;
}
- LIns* CseFilter::ins0(LOpcode v)
+ LIns* CseFilter::ins0(LOpcode op)
{
- if (v == LIR_label)
+ if (op == LIR_label)
exprs->clear();
- return out->ins0(v);
+ return out->ins0(op);
}
- LIns* CseFilter::ins1(LOpcode v, LInsp a)
+ LIns* CseFilter::ins1(LOpcode op, LInsp a)
{
- if (isCseOpcode(v)) {
+ LInsp ins;
+ if (isCseOpcode(op)) {
uint32_t k;
- LInsp ins = exprs->find1(v, a, k);
- if (ins)
- return ins;
- ins = out->ins1(v, a);
- NanoAssert(ins->opcode() == v && ins->oprnd1() == a);
- return exprs->add(LIns1, ins, k);
+ ins = exprs->find1(op, a, k);
+ if (!ins) {
+ ins = out->ins1(op, a);
+ exprs->add(LIns1, ins, k);
+ }
+ } else {
+ ins = out->ins1(op, a);
}
- return out->ins1(v,a);
+ NanoAssert(ins->isop(op) && ins->oprnd1() == a);
+ return ins;
}
- LIns* CseFilter::ins2(LOpcode v, LInsp a, LInsp b)
+ LIns* CseFilter::ins2(LOpcode op, LInsp a, LInsp b)
{
- if (isCseOpcode(v)) {
+ LInsp ins;
+ if (isCseOpcode(op)) {
uint32_t k;
- LInsp ins = exprs->find2(v, a, b, k);
- if (ins)
- return ins;
- ins = out->ins2(v, a, b);
- NanoAssert(ins->opcode() == v && ins->oprnd1() == a && ins->oprnd2() == b);
- return exprs->add(LIns2, ins, k);
+ ins = exprs->find2(op, a, b, k);
+ if (!ins) {
+ ins = out->ins2(op, a, b);
+ exprs->add(LIns2, ins, k);
+ }
+ } else {
+ ins = out->ins2(op, a, b);
}
- return out->ins2(v,a,b);
+ NanoAssert(ins->isop(op) && ins->oprnd1() == a && ins->oprnd2() == b);
+ return ins;
}
- LIns* CseFilter::ins3(LOpcode v, LInsp a, LInsp b, LInsp c)
+ LIns* CseFilter::ins3(LOpcode op, LInsp a, LInsp b, LInsp c)
{
- NanoAssert(isCseOpcode(v));
+ NanoAssert(isCseOpcode(op));
uint32_t k;
- LInsp ins = exprs->find3(v, a, b, c, k);
- if (ins)
- return ins;
- ins = out->ins3(v, a, b, c);
- NanoAssert(ins->opcode() == v && ins->oprnd1() == a && ins->oprnd2() == b &&
- ins->oprnd3() == c);
- return exprs->add(LIns3, ins, k);
+ LInsp ins = exprs->find3(op, a, b, c, k);
+ if (!ins) {
+ ins = out->ins3(op, a, b, c);
+ exprs->add(LIns3, ins, k);
+ }
+ NanoAssert(ins->isop(op) && ins->oprnd1() == a && ins->oprnd2() == b && ins->oprnd3() == c);
+ return ins;
}
- LIns* CseFilter::insLoad(LOpcode v, LInsp base, int32_t disp, AccSet accSet)
+ LIns* CseFilter::insLoad(LOpcode op, LInsp base, int32_t disp, AccSet loadAccSet)
{
+ LInsp ins;
if (isS16(disp)) {
- // XXX: This condition is overly strict. Bug 517910 will make it better.
- if (accSet == ACC_READONLY) {
- uint32_t k;
- LInsp ins = exprs->findLoad(v, base, disp, k);
- if (ins)
- return ins;
- ins = out->insLoad(v, base, disp, accSet);
- NanoAssert(ins->opcode() == v && ins->oprnd1() == base && ins->disp() == disp);
- return exprs->add(LInsLoad, ins, k);
+ // Clear all loads aliased by stores and calls since the last time
+ // we were in this function.
+ if (storesSinceLastLoad != ACC_NONE) {
+ NanoAssert(!(storesSinceLastLoad & ACC_READONLY)); // can't store to READONLY
+ if (storesSinceLastLoad & ACC_STACK) { exprs->clear(LInsLoadStack); }
+ if (storesSinceLastLoad & ACC_RSTACK) { exprs->clear(LInsLoadRStack); }
+ if (storesSinceLastLoad & ACC_OTHER) { exprs->clear(LInsLoadOther); }
+ // Loads marked with multiple access regions must be treated
+ // conservatively -- we always clear all of them.
+ exprs->clear(LInsLoadMultiple);
+ storesSinceLastLoad = ACC_NONE;
}
- return out->insLoad(v, base, disp, accSet);
+
+ LInsHashKind kind;
+ switch (loadAccSet) {
+ case ACC_READONLY: kind = LInsLoadReadOnly; break;
+ case ACC_STACK: kind = LInsLoadStack; break;
+ case ACC_RSTACK: kind = LInsLoadRStack; break;
+ case ACC_OTHER: kind = LInsLoadOther; break;
+ default: kind = LInsLoadMultiple; break;
+ }
+
+ uint32_t k;
+ ins = exprs->findLoad(op, base, disp, loadAccSet, kind, k);
+ if (!ins) {
+ ins = out->insLoad(op, base, disp, loadAccSet);
+ exprs->add(kind, ins, k);
+ }
+ NanoAssert(ins->isop(op) && ins->oprnd1() == base && ins->disp() == disp);
+
} else {
// If the displacement is more than 16 bits, put it in a separate
- // instruction. LirBufWriter also does this, we do it here as
- // well because CseFilter relies on LirBufWriter not changing
- // code.
- return insLoad(v, ins2(LIR_addp, base, insImmWord(disp)), 0, accSet);
+ // instruction. Nb: LirBufWriter also does this, we do it here
+ // too because CseFilter relies on LirBufWriter not changing code.
+ ins = insLoad(op, ins2(LIR_addp, base, insImmWord(disp)), 0, loadAccSet);
}
+ return ins;
}
- LInsp CseFilter::insGuard(LOpcode v, LInsp c, GuardRecord *gr)
+ LIns* CseFilter::insStore(LOpcode op, LInsp value, LInsp base, int32_t disp, AccSet accSet)
+ {
+ storesSinceLastLoad |= accSet;
+ LIns* ins = out->insStore(op, value, base, disp, accSet);
+ NanoAssert(ins->isop(op) && ins->oprnd1() == value && ins->oprnd2() == base &&
+ ins->disp() == disp && ins->accSet() == accSet);
+ return ins;
+ }
+
+ LInsp CseFilter::insGuard(LOpcode op, LInsp c, GuardRecord *gr)
{
// LIR_xt and LIR_xf guards are CSEable. Note that we compare the
// opcode and condition when determining if two guards are equivalent
// -- in find1() and hash1() -- but we do *not* compare the
// GuardRecord. This works because:
// - If guard 1 is taken (exits) then guard 2 is never reached, so
// guard 2 can be removed.
// - If guard 1 is not taken then neither is guard 2, so guard 2 can
@@ -2246,114 +2325,68 @@ namespace nanojit
// The underlying assumptions that are required for this to be safe:
// - There's never a path from the side exit of guard 1 back to guard
// 2; for tree-shaped fragments this should be true.
// - GuardRecords do not contain information other than what is needed
// to execute a successful exit. That is currently true.
// - The CSE algorithm will always keep guard 1 and remove guard 2
// (not vice versa). The current algorithm does this.
//
- if (isCseOpcode(v)) {
+ LInsp ins;
+ if (isCseOpcode(op)) {
// conditional guard
uint32_t k;
- LInsp ins = exprs->find1(v, c, k);
- if (ins)
- return 0;
- ins = out->insGuard(v, c, gr);
- NanoAssert(ins->opcode() == v && ins->oprnd1() == c);
- return exprs->add(LIns1, ins, k);
+ ins = exprs->find1(op, c, k);
+ if (!ins) {
+ ins = out->insGuard(op, c, gr);
+ exprs->add(LIns1, ins, k);
+ }
+ } else {
+ ins = out->insGuard(op, c, gr);
}
- return out->insGuard(v, c, gr);
+ NanoAssert(ins->isop(op) && ins->oprnd1() == c);
+ return ins;
}
- LInsp CseFilter::insGuardXov(LOpcode v, LInsp a, LInsp b, GuardRecord *gr)
+ LInsp CseFilter::insGuardXov(LOpcode op, LInsp a, LInsp b, GuardRecord *gr)
{
// LIR_*xov are CSEable. See CseFilter::insGuard() for details.
- NanoAssert(isCseOpcode(v));
+ NanoAssert(isCseOpcode(op));
// conditional guard
uint32_t k;
- LInsp ins = exprs->find2(v, a, b, k);
- if (ins)
- return ins;
- ins = out->insGuardXov(v, a, b, gr);
- NanoAssert(ins->opcode() == v && ins->oprnd1() == a && ins->oprnd2() == b);
- return exprs->add(LIns2, ins, k);
+ LInsp ins = exprs->find2(op, a, b, k);
+ if (!ins) {
+ ins = out->insGuardXov(op, a, b, gr);
+ exprs->add(LIns2, ins, k);
+ }
+ NanoAssert(ins->isop(op) && ins->oprnd1() == a && ins->oprnd2() == b);
+ return ins;
}
LInsp CseFilter::insCall(const CallInfo *ci, LInsp args[])
{
+ LInsp ins;
+ uint32_t argc = ci->count_args();
if (ci->_isPure) {
NanoAssert(ci->_storeAccSet == ACC_NONE);
uint32_t k;
- uint32_t argc = ci->count_args();
- LInsp ins = exprs->findCall(ci, argc, args, k);
- if (ins)
- return ins;
+ ins = exprs->findCall(ci, argc, args, k);
+ if (!ins) {
+ ins = out->insCall(ci, args);
+ exprs->add(LInsCall, ins, k);
+ }
+ } else {
+ // We only need to worry about aliasing if !ci->_isPure.
+ storesSinceLastLoad |= ci->_storeAccSet;
ins = out->insCall(ci, args);
- NanoAssert(ins->isCall() && ins->callInfo() == ci && argsmatch(ins, argc, args));
- return exprs->add(LInsCall, ins, k);
}
- return out->insCall(ci, args);
+ NanoAssert(ins->isCall() && ins->callInfo() == ci && argsmatch(ins, argc, args));
+ return ins;
}
- LInsp LoadFilter::insLoad(LOpcode v, LInsp base, int32_t disp, AccSet accSet)
- {
- if (base != sp && base != rp)
- {
- switch (v)
- {
- case LIR_ld:
- CASE64(LIR_ldq:)
- case LIR_ldf:
- case LIR_ld32f:
- case LIR_ldsb:
- case LIR_ldss:
- case LIR_ldzb:
- case LIR_ldzs:
- {
- uint32_t k;
- LInsp ins = exprs->findLoad(v, base, disp, k);
- if (ins)
- return ins;
- ins = out->insLoad(v, base, disp, accSet);
- return exprs->add(LInsLoad, ins, k);
- }
- default:
- // fall thru
- break;
- }
- }
- return out->insLoad(v, base, disp, accSet);
- }
-
- void LoadFilter::clear(LInsp p)
- {
- if (p != sp && p != rp)
- exprs->clear();
- }
-
- LInsp LoadFilter::insStore(LOpcode op, LInsp v, LInsp b, int32_t d, AccSet accSet)
- {
- clear(b);
- return out->insStore(op, v, b, d, accSet);
- }
-
- LInsp LoadFilter::insCall(const CallInfo *ci, LInsp args[])
- {
- if (!ci->_isPure)
- exprs->clear();
- return out->insCall(ci, args);
- }
-
- LInsp LoadFilter::ins0(LOpcode op)
- {
- if (op == LIR_label)
- exprs->clear();
- return out->ins0(op);
- }
#if NJ_SOFTFLOAT_SUPPORTED
static double FASTCALL i2f(int32_t i) { return i; }
static double FASTCALL u2f(uint32_t u) { return u; }
static double FASTCALL fneg(double a) { return -a; }
static double FASTCALL fadd(double a, double b) { return a + b; }
static double FASTCALL fsub(double a, double b) { return a - b; }
static double FASTCALL fmul(double a, double b) { return a * b; }
@@ -2559,37 +2592,38 @@ namespace nanojit
// opposed to printing a message and continuing) is that at
// most one type error will be detected per run. But type
// errors should be rare, and assertion failures are certain
// to be caught by test suites whereas error messages may not
// be.
NanoAssertMsgf(0,
"LIR type error (%s): arg %d of '%s' is '%s' "
"which has type %s (expected %s)",
- _whereInPipeline, i+1, lirNames[op],
+ whereInPipeline, i+1, lirNames[op],
lirNames[args[i]->opcode()],
type2string(actual), type2string(formal));
}
}
}
void ValidateWriter::errorStructureShouldBe(LOpcode op, const char* argDesc, int argN,
LIns* arg, const char* shouldBeDesc)
{
NanoAssertMsgf(0,
"LIR structure error (%s): %s %d of '%s' is '%s' (expected %s)",
- _whereInPipeline, argDesc, argN,
+ whereInPipeline, argDesc, argN,
lirNames[op], lirNames[arg->opcode()], shouldBeDesc);
}
- void ValidateWriter::errorAccSetShould(const char* what, AccSet accSet, const char* shouldDesc)
+ void ValidateWriter::errorAccSet(const char* what, AccSet accSet, const char* shouldDesc)
{
+ RefBuf b;
NanoAssertMsgf(0,
- "LIR AccSet error (%s): '%s' AccSet is %d; it should %s",
- _whereInPipeline, what, accSet, shouldDesc);
+ "LIR AccSet error (%s): '%s' AccSet is '%s'; %s",
+ whereInPipeline, what, printer->formatAccSet(&b, accSet), shouldDesc);
}
void ValidateWriter::checkLInsIsACondOrConst(LOpcode op, int argN, LIns* ins)
{
// We could introduce a LTy_B32 type in the type system but that's a
// bit weird because its representation is identical to LTy_I32. It's
// easier to just do this check structurally. Also, optimization can
// cause the condition to become a LIR_int.
@@ -2604,27 +2638,70 @@ namespace nanojit
}
void ValidateWriter::checkLInsHasOpcode(LOpcode op, int argN, LIns* ins, LOpcode op2)
{
if (!ins->isop(op2))
errorStructureShouldBe(op, "argument", argN, ins, lirNames[op2]);
}
- ValidateWriter::ValidateWriter(LirWriter *out, const char* stageName)
- : LirWriter(out), _whereInPipeline(stageName)
+ void ValidateWriter::checkAccSet(LOpcode op, LInsp base, AccSet accSet, AccSet maxAccSet)
+ {
+ if (accSet == ACC_NONE)
+ errorAccSet(lirNames[op], accSet, "it should not equal ACC_NONE");
+
+ if (accSet & ~maxAccSet)
+ errorAccSet(lirNames[op], accSet,
+ "it should not contain bits that aren't in ACC_LOAD_ANY/ACC_STORE_ANY");
+
+ // Some sanity checking, which is based on the following assumptions:
+ // - STACK ones should use 'sp' or 'sp+k' as the base. (We could look
+ // for more complex patterns, but that feels dangerous. Better to
+ // keep it really simple.)
+ // - RSTACK ones should use 'rp' as the base.
+ // - READONLY/OTHER ones should not use 'sp'/'sp+k' or 'rp' as the base.
+ //
+ // Things that aren't checked:
+ // - There's no easy way to check if READONLY ones really are read-only.
+
+ bool isStack = base == sp ||
+ (base->isop(LIR_piadd) && base->oprnd1() == sp && base->oprnd2()->isconstp());
+ bool isRStack = base == rp;
+
+ switch (accSet) {
+ case ACC_STACK:
+ if (!isStack)
+ errorAccSet(lirNames[op], accSet, "but it's not a stack access");
+ break;
+
+ case ACC_RSTACK:
+ if (!isRStack)
+ errorAccSet(lirNames[op], accSet, "but it's not an rstack access");
+ break;
+
+ case ACC_READONLY:
+ case ACC_OTHER:
+ if (isStack)
+ errorAccSet(lirNames[op], accSet, "but it's a stack access");
+ if (isRStack)
+ errorAccSet(lirNames[op], accSet, "but it's an rstack access");
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ ValidateWriter::ValidateWriter(LirWriter *out, LInsPrinter* printer, const char* where)
+ : LirWriter(out), printer(printer), whereInPipeline(where), sp(0), rp(0)
{}
LIns* ValidateWriter::insLoad(LOpcode op, LIns* base, int32_t d, AccSet accSet)
{
- if (accSet == ACC_NONE)
- errorAccSetShould(lirNames[op], accSet, "not equal ACC_NONE");
-
- if (accSet & ~ACC_LOAD_ANY)
- errorAccSetShould(lirNames[op], accSet, "not contain bits that aren't in ACC_LOAD_ANY");
+ checkAccSet(op, base, accSet, ACC_LOAD_ANY);
int nArgs = 1;
LTy formals[1] = { LTy_Ptr };
LIns* args[1] = { base };
switch (op) {
case LIR_ld:
case LIR_ldf:
@@ -2641,21 +2718,17 @@ namespace nanojit
typeCheckArgs(op, nArgs, formals, args);
return out->insLoad(op, base, d, accSet);
}
LIns* ValidateWriter::insStore(LOpcode op, LIns* value, LIns* base, int32_t d, AccSet accSet)
{
- if (accSet == ACC_NONE)
- errorAccSetShould(lirNames[op], accSet, "not equal ACC_NONE");
-
- if (accSet & ~ACC_STORE_ANY)
- errorAccSetShould(lirNames[op], accSet, "not contain bits that aren't in ACC_STORE_ANY");
+ checkAccSet(op, base, accSet, ACC_STORE_ANY);
int nArgs = 2;
LTy formals[2] = { LTy_Void, LTy_Ptr }; // LTy_Void is overwritten shortly
LIns* args[2] = { value, base };
switch (op) {
case LIR_stb:
case LIR_sts:
@@ -2675,17 +2748,17 @@ namespace nanojit
break;
default:
NanoAssert(0);
}
typeCheckArgs(op, nArgs, formals, args);
- return out->insStore(op, value, base, d);
+ return out->insStore(op, value, base, d, accSet);
}
LIns* ValidateWriter::ins0(LOpcode op)
{
switch (op) {
case LIR_start:
case LIR_regfence:
case LIR_label:
@@ -2918,21 +2991,21 @@ namespace nanojit
ArgType argTypes[MAXARGS];
uint32_t nArgs = ci->getArgTypes(argTypes);
LTy formals[MAXARGS];
LIns* args[MAXARGS]; // in left-to-right order, unlike args0[]
LOpcode op = getCallOpcode(ci);
if (ci->_isPure && ci->_storeAccSet != ACC_NONE)
- errorAccSetShould(ci->_name, ci->_storeAccSet, "equal ACC_NONE for pure functions");
+ errorAccSet(ci->_name, ci->_storeAccSet, "it should be ACC_NONE for pure functions");
if (ci->_storeAccSet & ~ACC_STORE_ANY)
- errorAccSetShould(lirNames[op], ci->_storeAccSet,
- "not contain bits that aren't in ACC_STORE_ANY");
+ errorAccSet(lirNames[op], ci->_storeAccSet,
+ "it should not contain bits that aren't in ACC_STORE_ANY");
// This loop iterates over the args from right-to-left (because arg()
// and getArgTypes() use right-to-left order), but puts the results
// into formals[] and args[] in left-to-right order so that arg
// numbers in error messages make sense to the user.
for (uint32_t i = 0; i < nArgs; i++) {
uint32_t i2 = nArgs - i - 1; // converts right-to-left to left-to-right
switch (argTypes[i]) {
--- a/js/src/nanojit/LIR.h
+++ b/js/src/nanojit/LIR.h
@@ -205,17 +205,20 @@ namespace nanojit
// then they do not alias.
//
// The access regions used are as follows:
//
// - READONLY: all memory that is read-only, ie. never stored to.
// A load from a READONLY region will never alias with any stores.
//
// - STACK: the stack. Stack loads/stores can usually be easily
- // identified because they use SP as the stack pointer.
+ // identified because they use SP as the base pointer.
+ //
+ // - RSTACK: the return stack. Return stack loads/stores can usually be
+ // easily identified because they use RP as the base pointer.
//
// - OTHER: all other regions of memory.
//
// It makes sense to add new access regions when doing so will help with
// one or more optimisations.
//
// One subtlety is that the meanings of the access region markings only
// apply to the LIR fragment that they are in. For example, if a memory
@@ -254,45 +257,54 @@ namespace nanojit
//
// In other words, a load/store can be marked with an access region set
// that is a superset of its actual access region set. Taking this to
// its logical conclusion, any load can be safely marked with LOAD_ANY and
// any store can be safely marked with with STORE_ANY (and the latter is
// true for the store set of a function.)
//
// Such imprecision is safe but may reduce optimisation opportunities.
+ //
+ // Optimisations that use access region info
+ // -----------------------------------------
+ // Currently only CseFilter uses this, and only for determining whether
+ // loads can be CSE'd. Note that CseFilter treats loads that are marked
+ // with a single access region precisely, but all loads marked with
+ // multiple access regions get lumped together. So if you can't mark a
+ // load with a single access region, you might as well use ACC_LOAD_ANY.
//-----------------------------------------------------------------------
// An access region set is represented as a bitset. Nb: this restricts us
// to at most eight alias regions for the moment.
typedef uint8_t AccSet;
// The access regions. Note that because of the bitset representation
// these constants are also valid (singleton) AccSet values. If you add
- // new ones please update ACC_ALL_WRITABLE and LirNameMap::formatAccSet().
+ // new ones please update ACC_ALL_STORABLE and formatAccSet() and
+ // CseFilter.
//
static const AccSet ACC_READONLY = 1 << 0; // 0000_0001b
static const AccSet ACC_STACK = 1 << 1; // 0000_0010b
- static const AccSet ACC_OTHER = 1 << 2; // 0000_0100b
+ static const AccSet ACC_RSTACK = 1 << 2; // 0000_0100b
+ static const AccSet ACC_OTHER = 1 << 3; // 0000_1000b
// Some common (non-singleton) access region sets. ACC_NONE does not make
// sense for loads or stores (which must access at least one region), it
// only makes sense for calls.
//
// A convention that's worth using: use ACC_LOAD_ANY/ACC_STORE_ANY for
// cases that you're unsure about or haven't considered carefully. Use
- // ACC_ALL/ACC_ALL_WRITABLE for cases that you have considered carefully.
+ // ACC_ALL/ACC_ALL_STORABLE for cases that you have considered carefully.
// That way it's easy to tell which ones have been considered and which
// haven't.
static const AccSet ACC_NONE = 0x0;
- static const AccSet ACC_ALL_WRITABLE = ACC_STACK | ACC_OTHER;
- static const AccSet ACC_ALL = ACC_READONLY | ACC_ALL_WRITABLE;
+ static const AccSet ACC_ALL_STORABLE = ACC_STACK | ACC_RSTACK | ACC_OTHER;
+ static const AccSet ACC_ALL = ACC_READONLY | ACC_ALL_STORABLE;
static const AccSet ACC_LOAD_ANY = ACC_ALL; // synonym
- static const AccSet ACC_STORE_ANY = ACC_ALL_WRITABLE; // synonym
-
+ static const AccSet ACC_STORE_ANY = ACC_ALL_STORABLE; // synonym
struct CallInfo
{
private:
public:
uintptr_t _address;
uint32_t _typesig:27; // 9 3-bit fields indicating arg type, by ARGTYPE above (including ret type): a1 a2 a3 a4 a5 ret
@@ -1483,27 +1495,16 @@ namespace nanojit
return ins1(LIR_u2q, uintIns);
#else
return uintIns;
#endif
}
// Chooses LIR_sti or LIR_stqi based on size of value.
LIns* insStorei(LIns* value, LIns* base, int32_t d, AccSet accSet);
-
- // Insert a load/store with the most pessimistic region access info, which is always safe.
- LIns* insLoad(LOpcode op, LIns* base, int32_t d) {
- return insLoad(op, base, d, ACC_LOAD_ANY);
- }
- LIns* insStore(LOpcode op, LIns* value, LIns* base, int32_t d) {
- return insStore(op, value, base, d, ACC_STORE_ANY);
- }
- LIns* insStorei(LIns* value, LIns* base, int32_t d) {
- return insStorei(value, base, d, ACC_STORE_ANY);
- }
};
#ifdef NJ_VERBOSE
extern const char* lirNames[];
// Maps address ranges to meaningful names.
class AddrNameMap
@@ -1593,29 +1594,29 @@ namespace nanojit
{
private:
Allocator& alloc;
void formatImm(RefBuf* buf, int32_t c);
void formatImmq(RefBuf* buf, uint64_t c);
void formatGuard(InsBuf* buf, LInsp ins);
void formatGuardXov(InsBuf* buf, LInsp ins);
- char* formatAccSet(RefBuf* buf, LInsp ins, bool isLoad);
public:
LInsPrinter(Allocator& alloc)
: alloc(alloc)
{
addrNameMap = new (alloc) AddrNameMap(alloc);
lirNameMap = new (alloc) LirNameMap(alloc);
}
char *formatAddr(RefBuf* buf, void* p);
char *formatRef(RefBuf* buf, LInsp ref);
char *formatIns(InsBuf* buf, LInsp ins);
+ char *formatAccSet(RefBuf* buf, AccSet accSet);
AddrNameMap* addrNameMap;
LirNameMap* lirNameMap;
};
class VerboseWriter : public LirWriter
{
@@ -1734,114 +1735,135 @@ namespace nanojit
LIns* insBranch(LOpcode, LIns *cond, LIns *target);
LIns* insLoad(LOpcode op, LInsp base, int32_t off, AccSet accSet);
};
enum LInsHashKind {
// We divide instruction kinds into groups for the use of LInsHashSet.
// LIns0 isn't present because we don't need to record any 0-ary
// instructions.
- LInsImm = 0,
- LInsImmq = 1, // only occurs on 64-bit platforms
- LInsImmf = 2,
- LIns1 = 3,
- LIns2 = 4,
- LIns3 = 5,
- LInsLoad = 6,
- LInsCall = 7,
+ LInsImm = 0,
+ LInsImmq = 1, // only occurs on 64-bit platforms
+ LInsImmf = 2,
+ LIns1 = 3,
+ LIns2 = 4,
+ LIns3 = 5,
+ LInsCall = 6,
+
+ // Loads are special. We group them by access region: one table for
+ // each region, and then a catch-all table for any loads marked with
+ // multiple regions. This arrangement makes the removal of
+ // invalidated loads fast -- eg. we can invalidate all STACK loads by
+ // just clearing the LInsLoadStack table. The disadvantage is that
+ // loads marked with multiple regions must be invalidated
+ // conservatively, eg. if any intervening stores occur. But loads
+ // marked with multiple regions should be rare.
+ LInsLoadReadOnly = 7,
+ LInsLoadStack = 8,
+ LInsLoadRStack = 9,
+ LInsLoadOther = 10,
+ LInsLoadMultiple = 11,
LInsFirst = 0,
- LInsLast = 7,
+ LInsLast = 11,
// need a value after "last" to outsmart compilers that will insist last+1 is impossible
- LInsInvalid = 8
+ LInsInvalid = 12
};
#define nextKind(kind) LInsHashKind(kind+1)
- // @todo, this could be replaced by a generic HashMap or HashSet, if we had one
class LInsHashSet
{
// Must be a power of 2.
// Don't start too small, or we'll waste time growing and rehashing.
// Don't start too large, will waste memory.
static const uint32_t kInitialCap[LInsLast + 1];
// There is one list for each instruction kind. This lets us size the
// lists appropriately (some instructions are more common than others).
// It also lets us have kind-specific find/add/grow functions, which
// are faster than generic versions.
LInsp *m_list[LInsLast + 1];
uint32_t m_cap[LInsLast + 1];
uint32_t m_used[LInsLast + 1];
typedef uint32_t (LInsHashSet::*find_t)(LInsp);
find_t m_find[LInsLast + 1];
+
Allocator& alloc;
static uint32_t hashImm(int32_t);
- static uint32_t hashImmq(uint64_t); // not NANOJIT_64BIT only used by findImmf()
- static uint32_t hash1(LOpcode v, LInsp);
- static uint32_t hash2(LOpcode v, LInsp, LInsp);
- static uint32_t hash3(LOpcode v, LInsp, LInsp, LInsp);
- static uint32_t hashLoad(LOpcode v, LInsp, int32_t);
+ static uint32_t hashImmq(uint64_t); // not NANOJIT_64BIT-only -- used by findImmf()
+ static uint32_t hash1(LOpcode op, LInsp);
+ static uint32_t hash2(LOpcode op, LInsp, LInsp);
+ static uint32_t hash3(LOpcode op, LInsp, LInsp, LInsp);
+ static uint32_t hashLoad(LOpcode op, LInsp, int32_t, AccSet);
static uint32_t hashCall(const CallInfo *call, uint32_t argc, LInsp args[]);
// These private versions are used after an LIns has been created;
// they are used for rehashing after growing.
uint32_t findImm(LInsp ins);
#ifdef NANOJIT_64BIT
uint32_t findImmq(LInsp ins);
#endif
uint32_t findImmf(LInsp ins);
uint32_t find1(LInsp ins);
uint32_t find2(LInsp ins);
uint32_t find3(LInsp ins);
- uint32_t findLoad(LInsp ins);
uint32_t findCall(LInsp ins);
+ uint32_t findLoadReadOnly(LInsp ins);
+ uint32_t findLoadStack(LInsp ins);
+ uint32_t findLoadRStack(LInsp ins);
+ uint32_t findLoadOther(LInsp ins);
+ uint32_t findLoadMultiple(LInsp ins);
void grow(LInsHashKind kind);
public:
// kInitialCaps[i] holds the initial size for m_list[i].
LInsHashSet(Allocator&, uint32_t kInitialCaps[]);
// These public versions are used before an LIns has been created.
LInsp findImm(int32_t a, uint32_t &k);
#ifdef NANOJIT_64BIT
LInsp findImmq(uint64_t a, uint32_t &k);
#endif
LInsp findImmf(uint64_t d, uint32_t &k);
LInsp find1(LOpcode v, LInsp a, uint32_t &k);
LInsp find2(LOpcode v, LInsp a, LInsp b, uint32_t &k);
LInsp find3(LOpcode v, LInsp a, LInsp b, LInsp c, uint32_t &k);
- LInsp findLoad(LOpcode v, LInsp a, int32_t b, uint32_t &k);
+ LInsp findLoad(LOpcode v, LInsp a, int32_t b, AccSet accSet, LInsHashKind kind,
+ uint32_t &k);
LInsp findCall(const CallInfo *call, uint32_t argc, LInsp args[], uint32_t &k);
// 'k' is the index found by findXYZ().
- LInsp add(LInsHashKind kind, LInsp ins, uint32_t k);
+ void add(LInsHashKind kind, LInsp ins, uint32_t k);
- void clear();
+ void clear(); // clears all tables
+ void clear(LInsHashKind); // clears one table
};
class CseFilter: public LirWriter
{
private:
LInsHashSet* exprs;
+ AccSet storesSinceLastLoad; // regions stored to since the last load
public:
CseFilter(LirWriter *out, Allocator&);
LIns* insImm(int32_t imm);
#ifdef NANOJIT_64BIT
LIns* insImmq(uint64_t q);
#endif
LIns* insImmf(double d);
LIns* ins0(LOpcode v);
LIns* ins1(LOpcode v, LInsp);
LIns* ins2(LOpcode v, LInsp, LInsp);
LIns* ins3(LOpcode v, LInsp, LInsp, LInsp);
- LIns* insLoad(LOpcode op, LInsp cond, int32_t d, AccSet accSet);
+ LIns* insLoad(LOpcode op, LInsp base, int32_t d, AccSet accSet);
+ LIns* insStore(LOpcode op, LInsp value, LInsp base, int32_t d, AccSet accSet);
LIns* insCall(const CallInfo *call, LInsp args[]);
LIns* insGuard(LOpcode op, LInsp cond, GuardRecord *gr);
LIns* insGuardXov(LOpcode op, LInsp a, LInsp b, GuardRecord *gr);
};
class LirBuffer
{
public:
@@ -1970,47 +1992,16 @@ namespace nanojit
int top;
int getTop(LInsp br);
public:
StackFilter(LirFilter *in, Allocator& alloc, LInsp sp);
LInsp read();
};
- // eliminate redundant loads by watching for stores & mutator calls
- class LoadFilter: public LirWriter
- {
- public:
- LInsp sp, rp;
- LInsHashSet* exprs;
-
- void clear(LInsp p);
-
- public:
- LoadFilter(LirWriter *out, Allocator& alloc)
- : LirWriter(out), sp(NULL), rp(NULL)
- {
- uint32_t kInitialCaps[LInsLast + 1];
- kInitialCaps[LInsImm] = 1;
- kInitialCaps[LInsImmq] = 1;
- kInitialCaps[LInsImmf] = 1;
- kInitialCaps[LIns1] = 1;
- kInitialCaps[LIns2] = 1;
- kInitialCaps[LIns3] = 1;
- kInitialCaps[LInsLoad] = 64;
- kInitialCaps[LInsCall] = 1;
- exprs = new (alloc) LInsHashSet(alloc, kInitialCaps);
- }
-
- LInsp ins0(LOpcode);
- LInsp insLoad(LOpcode op, LInsp base, int32_t disp, AccSet accSet);
- LInsp insStore(LOpcode op, LInsp value, LInsp base, int32_t disp, AccSet accSet);
- LInsp insCall(const CallInfo *call, LInsp args[]);
- };
-
struct SoftFloatOps
{
const CallInfo* opmap[LIR_sentinel];
SoftFloatOps();
};
extern const SoftFloatOps softFloatOps;
@@ -2044,29 +2035,36 @@ namespace nanojit
// writer pipeline, exactly as it is generated by the compiler front-end.
//
// A general note about the errors produced by this class: for
// TraceMonkey, they won't include special names for instructions that
// have them unless TMFLAGS is specified.
class ValidateWriter : public LirWriter
{
private:
- const char* _whereInPipeline;
+ LInsPrinter* printer;
+ const char* whereInPipeline;
const char* type2string(LTy type);
void typeCheckArgs(LOpcode op, int nArgs, LTy formals[], LIns* args[]);
void errorStructureShouldBe(LOpcode op, const char* argDesc, int argN, LIns* arg,
const char* shouldBeDesc);
- void errorAccSetShould(const char* what, AccSet accSet, const char* shouldDesc);
+ void errorAccSet(const char* what, AccSet accSet, const char* shouldDesc);
void checkLInsHasOpcode(LOpcode op, int argN, LIns* ins, LOpcode op2);
void checkLInsIsACondOrConst(LOpcode op, int argN, LIns* ins);
void checkLInsIsNull(LOpcode op, int argN, LIns* ins);
+ void checkAccSet(LOpcode op, LInsp base, AccSet accSet, AccSet maxAccSet);
+
+ LInsp sp, rp;
public:
- ValidateWriter(LirWriter* out, const char* stageName);
+ ValidateWriter(LirWriter* out, LInsPrinter* printer, const char* where);
+ void setSp(LInsp ins) { sp = ins; }
+ void setRp(LInsp ins) { rp = ins; }
+
LIns* insLoad(LOpcode op, LIns* base, int32_t d, AccSet accSet);
LIns* insStore(LOpcode op, LIns* value, LIns* base, int32_t d, AccSet accSet);
LIns* ins0(LOpcode v);
LIns* ins1(LOpcode v, LIns* a);
LIns* ins2(LOpcode v, LIns* a, LIns* b);
LIns* ins3(LOpcode v, LIns* a, LIns* b, LIns* c);
LIns* insParam(int32_t arg, int32_t kind);
LIns* insImm(int32_t imm);