Merge tamarin-redux (nanojit2) into tracemonkey (457786, r=edwsmith,gal,danderson).
Merge tamarin-redux (nanojit2) into tracemonkey (457786, r=edwsmith,gal,danderson).
--- a/js/src/jsbuiltins.h
+++ b/js/src/jsbuiltins.h
@@ -140,17 +140,17 @@ struct JSTraceableNative {
#define _JS_RETSIZE2(ctype, size) size##_ARGSIZE
#define _JS_RETSIZE(tyname) _JS_EXPAND(_JS_RETSIZE2 _JS_TYPEINFO_##tyname)
#define _JS_ARGSIZE2(ctype, size) size##_RETSIZE
#define _JS_ARGSIZE(tyname) _JS_EXPAND(_JS_ARGSIZE2 _JS_TYPEINFO_##tyname)
#define _JS_DEFINE_CALLINFO(name, crtype, cargtypes, argtypes, cse, fold) \
crtype FASTCALL js_##name cargtypes; \
const nanojit::CallInfo ci_##name = \
- { (intptr_t) &js_##name, argtypes, cse, fold _JS_CI_NAME(name) };
+ { (intptr_t) &js_##name, argtypes, cse, fold, nanojit::ABI_FASTCALL _JS_CI_NAME(name) };
/*
* Declare a C function named js_<op> and a CallInfo struct named ci_<op> so
* the tracer can call it.
*/
#define JS_DEFINE_CALLINFO_1(rt, op, at0, cse, fold) \
_JS_DEFINE_CALLINFO(op, _JS_CTYPE(rt), (_JS_CTYPE(at0)), \
(_JS_ARGSIZE(at0) << 2) | _JS_RETSIZE(rt), cse, fold)
--- a/js/src/jsmath.cpp
+++ b/js/src/jsmath.cpp
@@ -339,26 +339,22 @@ math_max(JSContext *cx, uintN argc, jsva
for (i = 0; i < argc; i++) {
x = js_ValueToNumber(cx, &argv[i]);
if (JSVAL_IS_NULL(argv[i]))
return JS_FALSE;
if (JSDOUBLE_IS_NaN(x)) {
*vp = DOUBLE_TO_JSVAL(cx->runtime->jsNaN);
return JS_TRUE;
}
- if (x == 0 && x == z && fd_copysign(1.0, z) == -1)
- z = x;
- else
- /*
- * Note: it is essential that you write the ternary expression
- * here such that the false branch produces z not x, as the case
- * of x=-0, z=0, for which we wind up in this expression but
- * evaluate either > order as false, whether we do x>z *or* z>x.
- */
+ if (x == 0 && x == z) {
+ if (fd_copysign(1.0, z) == -1)
+ z = x;
+ } else {
z = (x > z) ? x : z;
+ }
}
return js_NewNumberInRootedValue(cx, z, vp);
}
static JSBool
math_min(JSContext *cx, uintN argc, jsval *vp)
{
jsdouble x, z = *cx->runtime->jsPositiveInfinity;
@@ -373,19 +369,20 @@ math_min(JSContext *cx, uintN argc, jsva
for (i = 0; i < argc; i++) {
x = js_ValueToNumber(cx, &argv[i]);
if (JSVAL_IS_NULL(argv[i]))
return JS_FALSE;
if (JSDOUBLE_IS_NaN(x)) {
*vp = DOUBLE_TO_JSVAL(cx->runtime->jsNaN);
return JS_TRUE;
}
- if (x == 0 && x == z && fd_copysign(1.0,x) == -1)
- z = x;
- else
+ if (x == 0 && x == z) {
+ if (fd_copysign(1.0, x) == -1)
+ z = x;
+ } else
z = (x < z) ? x : z;
}
return js_NewNumberInRootedValue(cx, z, vp);
}
static JSBool
math_pow(JSContext *cx, uintN argc, jsval *vp)
{
@@ -618,19 +615,23 @@ js_Math_log(jsdouble d)
}
jsdouble FASTCALL
js_Math_max(jsdouble d, jsdouble p)
{
if (JSDOUBLE_IS_NaN(d) || JSDOUBLE_IS_NaN(p))
return js_NaN;
- if (p == 0 && p == d && fd_copysign(1.0, d) == -1)
- return p;
- return (d > p) ? d : p;
+ if (p == 0 && p == d) {
+ if (fd_copysign(1.0, d) == -1)
+ return p;
+ else
+ return d;
+ }
+ return (p > d) ? p : d;
}
jsdouble FASTCALL
js_Math_pow(jsdouble d, jsdouble p)
{
if (!JSDOUBLE_IS_FINITE(p) && (d == 1.0 || d == -1.0))
return js_NaN;
if (p == 0)
--- a/js/src/jstracer.cpp
+++ b/js/src/jstracer.cpp
@@ -995,22 +995,24 @@ TraceRecorder::TraceRecorder(JSContext*
lir = verbose_filter = new (&gc) VerboseWriter(&gc, lir, lirbuf->names);
#endif
#ifdef NJ_SOFTFLOAT
lir = float_filter = new (&gc) SoftFloatFilter(lir);
#endif
lir = cse_filter = new (&gc) CseFilter(lir, &gc);
lir = expr_filter = new (&gc) ExprFilter(lir);
lir = func_filter = new (&gc) FuncFilter(lir, *this);
- lir->ins0(LIR_trace);
+ lir->ins0(LIR_start);
if (!nanojit::AvmCore::config.tree_opt || fragment->root == fragment) {
- lirbuf->state = addName(lir->insParam(0), "state");
- lirbuf->param1 = addName(lir->insParam(1), "param1");
- }
+ lirbuf->state = addName(lir->insParam(0, 0), "state");
+ lirbuf->param1 = addName(lir->insParam(1, 0), "param1");
+ }
+ loop_header_ins = addName(lir->ins0(LIR_label), "loop_header");
+
lirbuf->sp = addName(lir->insLoad(LIR_ldp, lirbuf->state, (int)offsetof(InterpState, sp)), "sp");
lirbuf->rp = addName(lir->insLoad(LIR_ldp, lirbuf->state, offsetof(InterpState, rp)), "rp");
cx_ins = addName(lir->insLoad(LIR_ldp, lirbuf->state, offsetof(InterpState, cx)), "cx");
gp_ins = addName(lir->insLoad(LIR_ldp, lirbuf->state, offsetof(InterpState, gp)), "gp");
eos_ins = addName(lir->insLoad(LIR_ldp, lirbuf->state, offsetof(InterpState, eos)), "eos");
eor_ins = addName(lir->insLoad(LIR_ldp, lirbuf->state, offsetof(InterpState, eor)), "eor");
/* read into registers all values on the stack and all globals we know so far */
@@ -1950,20 +1952,19 @@ TraceRecorder::closeLoop(Fragmento* frag
debug_only_v(printf("Trace rejected: unstable loop variables.\n");)
if (!trashTree)
fragment->blacklist();
return;
}
SideExit *exit = snapshot(LOOP_EXIT);
exit->target = fragment->root;
if (fragment == fragment->root) {
- fragment->lastIns = lir->insGuard(LIR_loop, lir->insImm(1), exit);
- } else {
- fragment->lastIns = lir->insGuard(LIR_x, lir->insImm(1), exit);
- }
+ fragment->lastIns = lir->insBranch(LIR_j, NULL, loop_header_ins);
+ }
+ fragment->lastIns = lir->insGuard(LIR_x, lir->insImm(1), exit);
compile(fragmento);
debug_only_v(printf("recording completed at %s:%u@%u via closeLoop\n", cx->fp->script->filename,
js_PCToLineNumber(cx, cx->fp->script, cx->fp->regs->pc),
cx->fp->regs->pc - cx->fp->script->code););
}
/* Emit an always-exit guard and compile the tree (used for break statements. */
@@ -2102,19 +2103,19 @@ TraceRecorder::fuseIf(jsbytecode* pc, bo
flipIf(pc, cond);
guard(cond, x, BRANCH_EXIT);
}
}
int
nanojit::StackFilter::getTop(LInsp guard)
{
- if (sp == frag->lirbuf->sp)
+ if (sp == lirbuf->sp)
return guard->exit()->sp_adj;
- JS_ASSERT(sp == frag->lirbuf->rp);
+ JS_ASSERT(sp == lirbuf->rp);
return guard->exit()->rp_adj;
}
#if defined NJ_VERBOSE
void
nanojit::LirNameMap::formatGuard(LIns *i, char *out)
{
uint32_t ip;
@@ -2382,17 +2383,17 @@ js_RecordTree(JSContext* cx, JSTraceMoni
}
AUDIT(recorderStarted);
/* Try to find an unused peer fragment, or allocate a new one. */
while (f->code() && f->peer)
f = f->peer;
if (f->code())
- f = JS_TRACE_MONITOR(cx).fragmento->newLoop(f->ip);
+ f = JS_TRACE_MONITOR(cx).fragmento->getAnchor(f->ip);
f->calldepth = 0;
f->root = f;
/* allocate space to store the LIR for this tree */
if (!f->lirbuf) {
f->lirbuf = new (&gc) LirBuffer(tm->fragmento, NULL);
#ifdef DEBUG
f->lirbuf->names = new (&gc) LirNameMap(&gc, NULL, tm->fragmento->labels);
@@ -2599,17 +2600,17 @@ js_ExecuteTree(JSContext* cx, Fragment**
if (ngslots &&
(OBJ_SHAPE(globalObj) != tm->globalShape ||
!BuildNativeGlobalFrame(cx, ngslots, gslots, tm->globalTypeMap->data(), global))) {
AUDIT(globalShapeMismatchAtEntry);
debug_only_v(printf("Global shape mismatch (%u vs. %u), flushing cache.\n",
OBJ_SHAPE(globalObj), tm->globalShape);)
const void* ip = f->ip;
js_FlushJITCache(cx);
- *treep = tm->fragmento->newLoop(ip);
+ *treep = tm->fragmento->getAnchor(ip);
return NULL;
}
if (!BuildNativeStackFrame(cx, 0/*callDepth*/, ti->stackTypeMap.data(), stack)) {
AUDIT(typeMapMismatchAtEntry);
debug_only_v(printf("type-map mismatch.\n");)
if (++ti->mismatchCount > MAX_MISMATCH) {
debug_only_v(printf("excessive mismatches, flushing tree.\n"));
@@ -2619,17 +2620,17 @@ js_ExecuteTree(JSContext* cx, Fragment**
return NULL;
}
/* replenish the reserve pool (this might trigger a GC */
if (tm->recoveryDoublePoolPtr < tm->recoveryDoublePool + MAX_NATIVE_STACK_SLOTS) {
bool didGC;
const void* ip = f->ip;
if (!ReplenishReservePool(cx, tm, didGC) || didGC) {
- *treep = tm->fragmento->newLoop(ip);
+ *treep = tm->fragmento->getAnchor(ip);
return NULL;
}
}
ti->mismatchCount = 0;
double* entry_sp = &stack[ti->nativeStackBase/sizeof(double)];
FrameInfo* callstack = (FrameInfo*) alloca(MAX_CALL_STACK_ENTRIES * sizeof(FrameInfo));
@@ -2658,16 +2659,17 @@ js_ExecuteTree(JSContext* cx, Fragment**
* recording. Rather than over-generalize by using a counter instead of a
* flag, we simply sample and update tm->onTrace if necessary.
*/
bool onTrace = tm->onTrace;
if (!onTrace)
tm->onTrace = true;
GuardRecord* lr;
+ debug_only(fflush(NULL);)
#if defined(JS_NO_FASTCALL) && defined(NANOJIT_IA32)
SIMULATE_FASTCALL(lr, &state, NULL, u.func);
#else
lr = u.func(&state, NULL);
#endif
JS_ASSERT(lr->exit->exitType != LOOP_EXIT || !lr->calldepth);
@@ -2849,17 +2851,17 @@ js_MonitorLoopEdge(JSContext* cx, uintN&
jsbytecode* pc = cx->fp->regs->pc;
Fragment* f;
JSFragmentCacheEntry* cacheEntry = &tm->fcache[jsuword(pc) & JS_FRAGMENT_CACHE_MASK];
if (cacheEntry->pc == pc) {
f = cacheEntry->fragment;
} else {
f = tm->fragmento->getLoop(pc);
if (!f)
- f = tm->fragmento->newLoop(pc);
+ f = tm->fragmento->getAnchor(pc);
cacheEntry->pc = pc;
cacheEntry->fragment = f;
}
/* If there is a chance that js_ExecuteTree will actually succeed, invoke it (either the
first fragment must contain some code, or at least it must have a peer fragment). */
GuardRecord* lr = NULL;
GuardRecord* innermostNestedGuard = NULL;
--- a/js/src/jstracer.h
+++ b/js/src/jstracer.h
@@ -215,16 +215,17 @@ class TraceRecorder : public GCObject {
nanojit::LirBufWriter* lir_buf_writer;
nanojit::LirWriter* verbose_filter;
nanojit::LirWriter* cse_filter;
nanojit::LirWriter* expr_filter;
nanojit::LirWriter* func_filter;
#ifdef NJ_SOFTFLOAT
nanojit::LirWriter* float_filter;
#endif
+ nanojit::LIns* loop_header_ins;
nanojit::LIns* cx_ins;
nanojit::LIns* gp_ins;
nanojit::LIns* eos_ins;
nanojit::LIns* eor_ins;
nanojit::LIns* rval_ins;
nanojit::LIns* inner_sp_ins;
nanojit::SideExit exit;
bool deepAborted;
--- a/js/src/nanojit/Assembler.cpp
+++ b/js/src/nanojit/Assembler.cpp
@@ -33,93 +33,131 @@
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
#include "nanojit.h"
+#ifdef FEATURE_NANOJIT
+
#ifdef AVMPLUS_PORTING_API
#include "portapi_nanojit.h"
#endif
-#if defined(AVMPLUS_LINUX) && defined(AVMPLUS_ARM)
+#if defined(AVMPLUS_UNIX) && defined(AVMPLUS_ARM)
#include <asm/unistd.h>
extern "C" void __clear_cache(char *BEG, char *END);
#endif
namespace nanojit
{
- #ifdef FEATURE_NANOJIT
class DeadCodeFilter: public LirFilter
{
- Assembler *assm;
- public:
- DeadCodeFilter(LirFilter *in, Assembler *a) : LirFilter(in), assm(a) {}
+ const CallInfo *functions;
+
+ bool ignoreInstruction(LInsp ins)
+ {
+ LOpcode op = ins->opcode();
+ if (ins->isStore() ||
+ op == LIR_loop ||
+ op == LIR_label ||
+ op == LIR_live ||
+ isRet(op)) {
+ return false;
+ }
+ return ins->resv() == 0;
+ }
+
+ public:
+ DeadCodeFilter(LirFilter *in, const CallInfo *f) : LirFilter(in), functions(f) {}
LInsp read() {
for (;;) {
LInsp i = in->read();
- if (!i || i->isGuard()
- || i->isCall() && !i->callInfo()->_cse
- || !assm->ignoreInstruction(i))
+ if (!i || i->isGuard() || i->isBranch()
+ || i->isCall() && !i->isCse(functions)
+ || !ignoreInstruction(i))
return i;
}
}
};
#ifdef NJ_VERBOSE
class VerboseBlockReader: public LirFilter
{
Assembler *assm;
LirNameMap *names;
avmplus::List<LInsp, avmplus::LIST_NonGCObjects> block;
+ bool flushnext;
public:
VerboseBlockReader(LirFilter *in, Assembler *a, LirNameMap *n)
- : LirFilter(in), assm(a), names(n), block(a->_gc) {}
+ : LirFilter(in), assm(a), names(n), block(a->_gc), flushnext(false)
+ {}
void flush() {
- assm->outputf(" %p:", assm->_nIns);
- assm->output("");
- for (int j=0,n=block.size(); j < n; j++)
- assm->outputf(" %s", names->formatIns(block[j]));
- assm->output("");
- block.clear();
+ flushnext = false;
+ if (!block.isEmpty()) {
+ for (int j=0,n=block.size(); j < n; j++) {
+ LIns *i = block[j];
+ assm->outputf(" %s", names->formatIns(block[j]));
+ if (i->isop(LIR_label)) {
+ assm->outputf(" %p:", assm->_nIns);
+ assm->output("");
+ }
+ }
+ block.clear();
+ }
}
+ void flush_add(LInsp i) {
+ flush();
+ block.add(i);
+ }
+
LInsp read() {
LInsp i = in->read();
if (!i) {
flush();
return i;
}
- if (i->isGuard()) {
- flush();
- block.add(i);
+ if (i->isGuard()) {
+ flush_add(i);
if (i->oprnd1())
block.add(i->oprnd1());
- }
+ }
+ else if (isRet(i->opcode()) || i->isBranch()) {
+ flush_add(i);
+ }
else {
- block.add(i);
+ if (flushnext)
+ flush();
+ flush_add(i);//block.add(i);
+ if (i->isop(LIR_label))
+ flushnext = true;
}
return i;
}
};
#endif
/**
* Need the following:
*
* - merging paths ( build a graph? ), possibly use external rep to drive codegen
*/
Assembler::Assembler(Fragmento* frago)
- : _frago(frago)
+ : hasLoop(0)
+ , _frago(frago)
, _gc(frago->core()->gc)
+ , _labels(_gc)
+ , _patches(_gc)
+ , pending_lives(_gc)
{
AvmCore *core = frago->core();
nInit(core);
verbose_only( _verbose = !core->quiet_opt() && core->verbose() );
verbose_only( _outputCache = 0);
internalReset();
pageReset();
@@ -128,18 +166,16 @@ namespace nanojit
void Assembler::arReset()
{
_activation.highwatermark = 0;
_activation.lowwatermark = 0;
_activation.tos = 0;
for(uint32_t i=0; i<NJ_MAX_STACK_ENTRY; i++)
_activation.entry[i] = 0;
- for(uint32_t i=0; i<NJ_MAX_PARAMETERS; i++)
- _activation.parameter[i] = 0;
}
void Assembler::registerResetAll()
{
nRegisterResetAll(_allocator);
// keep a tally of the registers to check that our allocator works correctly
debug_only(_allocator.count = _allocator.countFree(); )
@@ -173,17 +209,17 @@ namespace nanojit
Register r = nRegisterAllocFromSet(set);
regs.used |= rmask(r);
return r;
}
counter_increment(steals);
// nothing free, steal one
// LSRA says pick the one with the furthest use
- LIns* vic = findVictim(regs,allow,prefer);
+ LIns* vic = findVictim(regs,allow);
NanoAssert(vic != NULL);
Reservation* resv = getresv(vic);
// restore vic
Register r = resv->reg;
regs.removeActive(r);
resv->reg = UnknownReg;
@@ -191,68 +227,74 @@ namespace nanojit
asm_restore(vic, resv, r);
return r;
}
void Assembler::reserveReset()
{
_resvTable[0].arIndex = 0;
int i;
- for(i=1; i<NJ_MAX_STACK_ENTRY; i++)
+ for(i=1; i<NJ_MAX_STACK_ENTRY; i++) {
_resvTable[i].arIndex = i-1;
+ _resvTable[i].used = 0;
+ }
_resvFree= i-1;
}
+ /**
+ * these instructions don't have to be saved & reloaded to spill,
+ * they can just be recalculated w/out any inputs.
+ */
+ bool Assembler::canRemat(LIns *i) {
+ return i->isconst() || i->isconstq() || i->isop(LIR_alloc);
+ }
+
Reservation* Assembler::reserveAlloc(LInsp i)
{
uint32_t item = _resvFree;
Reservation *r = &_resvTable[item];
_resvFree = r->arIndex;
r->reg = UnknownReg;
r->arIndex = 0;
+ r->used = 1;
if (!item)
setError(ResvFull);
-
- if (i->isconst() || i->isconstq())
- r->cost = 0;
- else if (i == _thisfrag->lirbuf->sp || i == _thisfrag->lirbuf->rp)
- r->cost = 2;
- else
- r->cost = 1;
-
i->setresv(item);
return r;
}
void Assembler::reserveFree(LInsp i)
{
Reservation *rs = getresv(i);
NanoAssert(rs == &_resvTable[i->resv()]);
rs->arIndex = _resvFree;
+ rs->used = 0;
_resvFree = i->resv();
i->setresv(0);
}
void Assembler::internalReset()
{
// readies for a brand spanking new code generation pass.
registerResetAll();
reserveReset();
arReset();
+ pending_lives.clear();
}
NIns* Assembler::pageAlloc(bool exitPage)
{
Page*& list = (exitPage) ? _nativeExitPages : _nativePages;
Page* page = _frago->pageAlloc();
if (page)
{
page->next = list;
list = page;
nMarkExecute(page);
+ _stats.pages++;
}
else
{
// return prior page (to allow overwrites) and mark out of mem
page = list;
setError(OutOMem);
}
return &page->code[sizeof(page->code)/sizeof(NIns)]; // just past the end
@@ -260,40 +302,80 @@ namespace nanojit
void Assembler::pageReset()
{
pagesFree(_nativePages);
pagesFree(_nativeExitPages);
_nIns = 0;
_nExitIns = 0;
+ _stats.pages = 0;
nativePageReset();
}
void Assembler::pagesFree(Page*& page)
{
while(page)
{
Page *next = page->next; // pull next ptr prior to free
_frago->pageFree(page);
page = next;
}
}
+ #define bytesFromTop(x) ( (size_t)(x) - (size_t)pageTop(x) )
+ #define bytesToBottom(x) ( (size_t)pageBottom(x) - (size_t)(x) )
+ #define bytesBetween(x,y) ( (size_t)(x) - (size_t)(y) )
+
+ int32_t Assembler::codeBytes()
+ {
+ // start and end on same page?
+ size_t exit = 0;
+ int32_t pages = _stats.pages;
+ if (_nExitIns-1 == _stats.codeExitStart)
+ ;
+ else if (samepage(_nExitIns,_stats.codeExitStart))
+ exit = bytesBetween(_stats.codeExitStart, _nExitIns);
+ else
+ {
+ pages--;
+ exit = ((intptr_t)_stats.codeExitStart & (NJ_PAGE_SIZE-1)) ? bytesFromTop(_stats.codeExitStart)+1 : 0;
+ exit += bytesToBottom(_nExitIns)+1;
+ }
+
+ size_t main = 0;
+ if (_nIns-1 == _stats.codeStart)
+ ;
+ else if (samepage(_nIns,_stats.codeStart))
+ main = bytesBetween(_stats.codeStart, _nIns);
+ else
+ {
+ pages--;
+ main = ((intptr_t)_stats.codeStart & (NJ_PAGE_SIZE-1)) ? bytesFromTop(_stats.codeStart)+1 : 0;
+ main += bytesToBottom(_nIns)+1;
+ }
+ //fprintf(stderr,"size %d, exit is %d, main is %d, page count %d, sizeof %d\n", (int)((pages) * NJ_PAGE_SIZE + main + exit),(int)exit, (int)main, (int)_stats.pages, (int)sizeof(Page));
+ return (pages) * NJ_PAGE_SIZE + main + exit;
+ }
+
+ #undef bytesFromTop
+ #undef bytesToBottom
+ #undef byteBetween
+
Page* Assembler::handoverPages(bool exitPages)
{
Page*& list = (exitPages) ? _nativeExitPages : _nativePages;
NIns*& ins = (exitPages) ? _nExitIns : _nIns;
Page* start = list;
list = 0;
ins = 0;
return start;
}
-
+
#ifdef _DEBUG
bool Assembler::onPage(NIns* where, bool exitPages)
{
Page* page = (exitPages) ? _nativeExitPages : _nativePages;
bool on = false;
while(page)
{
if (samepage(where-1,page))
@@ -318,66 +400,69 @@ namespace nanojit
{
if (error()) return;
#ifdef NANOJIT_IA32
NanoAssert(_allocator.active[FST0] && _fpuStkDepth == -1 ||
!_allocator.active[FST0] && _fpuStkDepth == 0);
#endif
- // for tracking resv usage
- LIns* resv[NJ_MAX_STACK_ENTRY];
- for(int i=0; i<NJ_MAX_STACK_ENTRY; i++)
- resv[i]=0;
-
+ AR &ar = _activation;
// check AR entries
- NanoAssert(_activation.highwatermark < NJ_MAX_STACK_ENTRY);
+ NanoAssert(ar.highwatermark < NJ_MAX_STACK_ENTRY);
LIns* ins = 0;
RegAlloc* regs = &_allocator;
- for(uint32_t i=_activation.lowwatermark; i<_activation.tos; i++)
+ for(uint32_t i = ar.lowwatermark; i < ar.tos; i++)
{
- ins = _activation.entry[i];
+ ins = ar.entry[i];
if ( !ins )
continue;
Reservation *r = getresv(ins);
+ NanoAssert(r != 0);
int32_t idx = r - _resvTable;
- resv[idx]=ins;
NanoAssertMsg(idx, "MUST have a resource for the instruction for it to have a stack location assigned to it");
- NanoAssertMsg( r->arIndex==0 || r->arIndex==i || (ins->isQuad()&&r->arIndex==i-(stack_direction(1))), "Stack record index mismatch");
+ if (r->arIndex) {
+ if (ins->isop(LIR_alloc)) {
+ int j=i+1;
+ for (int n = i + (ins->size()>>2); j < n; j++) {
+ NanoAssert(ar.entry[j]==ins);
+ }
+ NanoAssert(r->arIndex == (uint32_t)j-1);
+ i = j-1;
+ }
+ else if (ins->isQuad()) {
+ NanoAssert(ar.entry[i - stack_direction(1)]==ins);
+ i += 1; // skip high word
+ }
+ else {
+ NanoAssertMsg(r->arIndex == i, "Stack record index mismatch");
+ }
+ }
NanoAssertMsg( r->reg==UnknownReg || regs->isConsistent(r->reg,ins), "Register record mismatch");
}
- registerConsistencyCheck(resv);
+ registerConsistencyCheck();
// check resv table
int32_t inuseCount = 0;
int32_t notInuseCount = 0;
- for(uint32_t i=1; i<NJ_MAX_STACK_ENTRY; i++)
- {
- if (resv[i]==0)
- {
- notInuseCount++;
- }
- else
- {
- inuseCount++;
- }
- }
+ for(uint32_t i=1; i < sizeof(_resvTable)/sizeof(_resvTable[0]); i++) {
+ _resvTable[i].used ? inuseCount++ : notInuseCount++;
+ }
int32_t freeCount = 0;
uint32_t free = _resvFree;
- while(free)
- {
+ while(free) {
free = _resvTable[free].arIndex;
freeCount++;
}
NanoAssert( ( freeCount==notInuseCount && inuseCount+notInuseCount==(NJ_MAX_STACK_ENTRY-1) ) );
}
- void Assembler::registerConsistencyCheck(LIns** resv)
+ void Assembler::registerConsistencyCheck()
{
// check registers
RegAlloc *regs = &_allocator;
uint32_t managed = regs->managed;
Register r = FirstReg;
while(managed)
{
if (managed&1)
@@ -386,22 +471,20 @@ namespace nanojit
{
NanoAssert(regs->getActive(r)==0);
}
else
{
LIns* ins = regs->getActive(r);
// @todo we should be able to check across RegAlloc's somehow (to include savedGP...)
Reservation *v = getresv(ins);
- NanoAssert(v);
+ NanoAssert(v != 0);
int32_t idx = v - _resvTable;
NanoAssert(idx >= 0 && idx < NJ_MAX_STACK_ENTRY);
- resv[idx]=ins;
NanoAssertMsg(idx, "MUST have a resource for the instruction for it to have a register assigned to it");
- NanoAssertMsg( v->arIndex==0 || ins==_activation.entry[v->arIndex], "Stack record index mismatch");
NanoAssertMsg( regs->getActive(v->reg)==ins, "Register record mismatch");
}
}
// next register in bitfield
r = nextreg(r);
managed >>= 1;
}
@@ -413,96 +496,113 @@ namespace nanojit
if (ia == ib)
{
findRegFor(ia, allow);
resva = resvb = getresv(ia);
}
else
{
Register rb = UnknownReg;
- resvb = getresv(ib);
- if (resvb && (rb = resvb->reg) != UnknownReg)
- allow &= ~rmask(rb);
- Register ra = findRegFor(ia, allow);
- resva = getresv(ia);
- NanoAssert(error() || (resva != 0 && ra != UnknownReg));
- if (rb == UnknownReg)
- {
- allow &= ~rmask(ra);
- findRegFor(ib, allow);
- resvb = getresv(ib);
- }
+ resvb = getresv(ib);
+ if (resvb && (rb = resvb->reg) != UnknownReg) {
+ if (allow & rmask(rb)) {
+ // ib already assigned to an allowable reg, keep that one
+ allow &= ~rmask(rb);
+ } else {
+ // ib assigned to unusable reg, pick a different one below.
+ rb = UnknownReg;
+ }
+ }
+ Register ra = findRegFor(ia, allow);
+ resva = getresv(ia);
+ NanoAssert(error() || (resva != 0 && ra != UnknownReg));
+ if (rb == UnknownReg)
+ {
+ allow &= ~rmask(ra);
+ findRegFor(ib, allow);
+ resvb = getresv(ib);
+ }
}
}
Register Assembler::findSpecificRegFor(LIns* i, Register w)
{
return findRegFor(i, rmask(w));
}
Register Assembler::findRegFor(LIns* i, RegisterMask allow)
{
- Reservation* resv = getresv(i);
+ if (i->isop(LIR_alloc)) {
+ // never allocate a reg for this w/out stack space too
+ findMemFor(i);
+ }
+
+ Reservation* resv = getresv(i);
Register r;
// if we have an existing reservation and it has a non-unknown
// register allocated, and that register is in our allowed mask,
// return it.
if (resv && (r=resv->reg) != UnknownReg && (rmask(r) & allow)) {
+ _allocator.useActive(r);
return r;
}
// figure out what registers are preferred for this instruction
RegisterMask prefer = hint(i, allow);
// if we didn't have a reservation, allocate one now
- if (!resv)
+ if (!resv)
resv = reserveAlloc(i);
- // if the reservation doesn't have a register assigned to it...
- if ((r=resv->reg) == UnknownReg)
+ r = resv->reg;
+ if (r != UnknownReg &&
+ ((rmask(r)&XmmRegs) && !(allow&XmmRegs) ||
+ (rmask(r)&x87Regs) && !(allow&x87Regs)))
+ {
+ // x87 <-> xmm copy required
+ //_nvprof("fpu-evict",1);
+ evict(r);
+ r = UnknownReg;
+ }
+
+ if (r == UnknownReg)
{
- // .. if the cost is 2 and the allowed mask includes
- // the saved regs, then prefer just those.
- if (resv->cost == 2 && (allow&SavedRegs))
- prefer = allow&SavedRegs;
- // grab one.
r = resv->reg = registerAlloc(prefer);
_allocator.addActive(r, i);
return r;
}
- else
+ else
{
// the already-allocated register isn't in the allowed mask;
// we need to grab a new one and then copy over the old
// contents to the new.
resv->reg = UnknownReg;
_allocator.retire(r);
- if (resv->cost == 2 && (allow&SavedRegs))
- prefer = allow&SavedRegs;
Register s = resv->reg = registerAlloc(prefer);
_allocator.addActive(s, i);
if ((rmask(r) & GpRegs) && (rmask(s) & GpRegs)) {
MR(r, s);
}
- else
- {
+ else {
asm_nongp_copy(r, s);
}
return s;
}
}
int Assembler::findMemFor(LIns *i)
{
Reservation* resv = getresv(i);
if (!resv)
resv = reserveAlloc(i);
- if (!resv->arIndex)
+ if (!resv->arIndex) {
resv->arIndex = arReserve(i);
+ NanoAssert(resv->arIndex <= _activation.highwatermark);
+ }
return disp(resv);
}
Register Assembler::prepResultReg(LIns *i, RegisterMask allow)
{
Reservation* resv = getresv(i);
const bool pop = !resv || resv->reg == UnknownReg;
Register rr = findRegFor(i, allow);
@@ -513,20 +613,21 @@ namespace nanojit
void Assembler::freeRsrcOf(LIns *i, bool pop)
{
Reservation* resv = getresv(i);
int index = resv->arIndex;
Register rr = resv->reg;
if (rr != UnknownReg)
{
- asm_spill(i, resv, pop);
+ asm_spilli(i, resv, pop);
_allocator.retire(rr); // free any register associated with entry
}
- arFree(index); // free any stack stack space associated with entry
+ if (index)
+ arFree(index); // free any stack stack space associated with entry
reserveFree(i); // clear fields of entry and add it to free list
}
void Assembler::evict(Register r)
{
registerAlloc(rmask(r));
_allocator.addFree(r);
}
@@ -538,64 +639,68 @@ namespace nanojit
// LIR_ov and LIR_cs recycle the flags set by arithmetic ops
if ((condop == LIR_ov) || (condop == LIR_cs))
return;
LInsp lhs = cond->oprnd1();
LInsp rhs = cond->oprnd2();
Reservation *rA, *rB;
+ NanoAssert((!lhs->isQuad() && !rhs->isQuad()) || (lhs->isQuad() && rhs->isQuad()));
+
// Not supported yet.
#if !defined NANOJIT_64BIT
NanoAssert(!lhs->isQuad() && !rhs->isQuad());
#endif
// ready to issue the compare
if (rhs->isconst())
{
int c = rhs->constval();
- Register r = findRegFor(lhs, GpRegs);
if (c == 0 && cond->isop(LIR_eq)) {
- if (rhs->isQuad() || lhs->isQuad()) {
+ Register r = findRegFor(lhs, GpRegs);
+ if (rhs->isQuad()) {
#if defined NANOJIT_64BIT
TESTQ(r, r);
#endif
} else {
TEST(r,r);
}
-#if defined NANOJIT_64BIT
- } else if (rhs->isQuad() || lhs->isQuad()) {
- findRegFor2(GpRegs, lhs, rA, rhs, rB);
- Register ra = rA->reg;
- Register rb = rB->reg;
- CMPQ(ra,rb);
-#endif
- } else {
+ // No 64-bit immediates so fall-back to below
+ }
+ else if (!rhs->isQuad()) {
+ Register r;
+ if (lhs->isop(LIR_alloc)) {
+ r = FP;
+ c += findMemFor(lhs);
+ } else {
+ r = findRegFor(lhs, GpRegs);
+ }
CMPi(r, c);
- }
+ }
}
else
{
findRegFor2(GpRegs, lhs, rA, rhs, rB);
Register ra = rA->reg;
Register rb = rB->reg;
- if (rhs->isQuad() || lhs->isQuad()) {
+ if (rhs->isQuad()) {
#if defined NANOJIT_64BIT
CMPQ(ra, rb);
#endif
} else {
CMP(ra, rb);
}
}
}
void Assembler::patch(GuardRecord *lr)
{
Fragment *frag = lr->target;
- NanoAssert(frag->fragEntry);
+ NanoAssert(frag->fragEntry != 0);
NIns* was = asm_adjustBranch((NIns*)lr->jmp, frag->fragEntry);
if (!lr->origTarget) lr->origTarget = was;
verbose_only(verbose_outputf("patching jump at %p to target %p (was %p)\n",
lr->jmp, frag->fragEntry, was);)
}
void Assembler::unpatch(GuardRecord *lr)
{
@@ -611,58 +716,61 @@ namespace nanojit
NIns* at = 0;
if (!_branchStateMap->get(exit))
{
at = asm_leave_trace(guard);
}
else
{
RegAlloc* captured = _branchStateMap->get(exit);
- mergeRegisterState(*captured);
+ intersectRegisterState(*captured);
verbose_only(
verbose_outputf(" merging trunk with %s",
_frago->labels->format(exit->target));
verbose_outputf(" %p:",_nIns);
)
at = exit->target->fragEntry;
- NanoAssert(at);
+ NanoAssert(at != 0);
_branchStateMap->remove(exit);
}
return at;
}
NIns* Assembler::asm_leave_trace(LInsp guard)
{
verbose_only(bool priorVerbose = _verbose; )
verbose_only( _verbose = verbose_enabled() && _frago->core()->config.verbose_exits; )
verbose_only( int32_t nativeSave = _stats.native );
verbose_only(verbose_outputf("--------------------------------------- end exit block SID %d", guard->exit()->sid);)
RegAlloc capture = _allocator;
// this point is unreachable. so free all the registers.
// if an instruction has a stack entry we will leave it alone,
- // otherwise we free it entirely. mergeRegisterState will restore.
+ // otherwise we free it entirely. intersectRegisterState will restore.
releaseRegisters();
swapptrs();
_inExit = true;
//verbose_only( verbose_outputf(" LIR_xend swapptrs, _nIns is now %08X(%08X), _nExitIns is now %08X(%08X)",_nIns, *_nIns,_nExitIns,*_nExitIns) );
debug_only( _sv_fpuStkDepth = _fpuStkDepth; _fpuStkDepth = 0; )
nFragExit(guard);
+ // restore the callee-saved register (aka saved params)
+ assignSavedParams();
+
// if/when we patch this exit to jump over to another fragment,
// that fragment will need its parameters set up just like ours.
LInsp stateins = _thisfrag->lirbuf->state;
- Register state = findSpecificRegFor(stateins, Register(stateins->imm8()));
+ Register state = findSpecificRegFor(stateins, argRegs[stateins->imm8()]);
asm_bailout(guard, state);
- mergeRegisterState(capture);
+ intersectRegisterState(capture);
// this can be useful for breaking whenever an exit is taken
//INT3();
//NOP();
// we are done producing the exit logic for the guard so demark where our exit block code begins
NIns* jmpTarget = _nIns; // target in exit path for our mainline conditional jump
@@ -680,30 +788,22 @@ namespace nanojit
#endif
verbose_only( _verbose = priorVerbose; )
verbose_only(_stats.exitnative += (_stats.native-nativeSave));
return jmpTarget;
}
- bool Assembler::ignoreInstruction(LInsp ins)
+ void Assembler::beginAssembly(Fragment *frag, RegAllocMap* branchStateMap)
{
- LOpcode op = ins->opcode();
- if (ins->isStore() || op == LIR_loop)
- return false;
- return getresv(ins) == 0;
- }
-
- void Assembler::beginAssembly(Fragment* frag, RegAllocMap* branchStateMap)
- {
+ _thisfrag = frag;
_activation.lowwatermark = 1;
_activation.tos = _activation.lowwatermark;
_activation.highwatermark = _activation.tos;
- _thisfrag = frag;
counter_reset(native);
counter_reset(exitnative);
counter_reset(steals);
counter_reset(spills);
counter_reset(remats);
setError(None);
@@ -714,101 +814,129 @@ namespace nanojit
#ifdef AVMPLUS_PORTING_API
_endJit1Addr = _nIns;
_endJit2Addr = _nExitIns;
#endif
// make sure we got memory at least one page
if (error()) return;
- _epilogue = genEpilogue(SavedRegs);
+#ifdef PERFM
+ _stats.pages = 0;
+ _stats.codeStart = _nIns-1;
+ _stats.codeExitStart = _nExitIns-1;
+ //fprintf(stderr,"pageReset %d start %x exit start %x\n", _stats.pages, (int)_stats.codeStart, (int)_stats.codeExitStart);
+#endif /* PERFM */
+
+ _epilogue = genEpilogue();
_branchStateMap = branchStateMap;
-
+ _labels.clear();
+ _patches.clear();
+
verbose_only( verbose_outputf(" %p:",_nIns) );
verbose_only( verbose_output(" epilogue:") );
}
void Assembler::assemble(Fragment* frag, NInsList& loopJumps)
{
if (error()) return;
AvmCore *core = _frago->core();
- GC *gc = core->gc;
_thisfrag = frag;
// set up backwards pipeline: assembler -> StackFilter -> LirReader
LirReader bufreader(frag->lastIns);
- StackFilter storefilter1(&bufreader, gc, frag, frag->lirbuf->sp);
- StackFilter storefilter2(&storefilter1, gc, frag, frag->lirbuf->rp);
- DeadCodeFilter deadfilter(&storefilter2, this);
+ GC *gc = core->gc;
+ StackFilter storefilter1(&bufreader, gc, frag->lirbuf, frag->lirbuf->sp);
+ StackFilter storefilter2(&storefilter1, gc, frag->lirbuf, frag->lirbuf->rp);
+ DeadCodeFilter deadfilter(&storefilter2, frag->lirbuf->_functions);
LirFilter* rdr = &deadfilter;
verbose_only(
VerboseBlockReader vbr(rdr, this, frag->lirbuf->names);
if (verbose_enabled())
rdr = &vbr;
)
verbose_only(_thisfrag->compileNbr++; )
verbose_only(_frago->_stats.compiles++; )
verbose_only(_frago->_stats.totalCompiles++; )
_latestGuard = 0;
- _inExit = false;
- gen(rdr, loopJumps);
+ _inExit = false;
+ gen(rdr, loopJumps);
frag->fragEntry = _nIns;
frag->outbound = core->config.tree_opt? _latestGuard : 0;
//fprintf(stderr, "assemble frag %X entry %X\n", (int)frag, (int)frag->fragEntry);
+
+ if (!error()) {
+ // patch all branches
+ while(!_patches.isEmpty())
+ {
+ NIns* where = _patches.lastKey();
+ LInsp targ = _patches.removeLast();
+ LabelState *label = _labels.get(targ);
+ NIns* ntarg = label->addr;
+ if (ntarg) {
+ nPatchBranch(where,ntarg);
+ }
+ else {
+ _err = UnknownBranch;
+ break;
+ }
+ }
+ }
}
void Assembler::endAssembly(Fragment* frag, NInsList& loopJumps)
{
while(!loopJumps.isEmpty())
{
NIns* loopJump = (NIns*)loopJumps.removeLast();
nPatchBranch(loopJump, _nIns);
}
NIns* patchEntry = 0;
if (!error())
{
- patchEntry = genPrologue(SavedRegs);
+ patchEntry = genPrologue();
verbose_only( verbose_outputf(" %p:",_nIns); )
verbose_only( verbose_output(" prologue"); )
}
// something bad happened?
if (!error())
{
// check for resource leaks
debug_only(
for(uint32_t i=_activation.lowwatermark;i<_activation.highwatermark; i++) {
NanoAssertMsgf(_activation.entry[i] == 0, "frame entry %d wasn't freed\n",-4*i);
}
)
frag->fragEntry = patchEntry;
NIns* code = _nIns;
-
+#ifdef PERFM
+ _nvprof("code", codeBytes()); // requires that all pages are released between begin/endAssembly()otherwise we double count
+#endif
// let the fragment manage the pages if we're using trees and there are branches
- Page* manage = (_frago->core()->config.tree_opt) ? handoverPages() : 0;
+ Page* manage = (_frago->core()->config.tree_opt) ? handoverPages() : 0;
frag->setCode(code, manage); // root of tree should manage all pages
- NanoAssert(!_frago->core()->config.tree_opt || frag == frag->anchor || frag->kind == MergeTrace);
//fprintf(stderr, "endAssembly frag %X entry %X\n", (int)frag, (int)frag->fragEntry);
}
- AvmAssertMsg(error() || _fpuStkDepth == 0, ("_fpuStkDepth %d\n",_fpuStkDepth));
+ NanoAssertMsgf(error() || _fpuStkDepth == 0,"_fpuStkDepth %d\n",_fpuStkDepth);
internalReset(); // clear the reservation tables and regalloc
NanoAssert(_branchStateMap->isEmpty());
_branchStateMap = 0;
#ifdef AVMPLUS_ARM
// If we've modified the code, we need to flush so we don't end up trying
// to execute junk
# if defined(UNDER_CE)
FlushInstructionCache(GetCurrentProcess(), NULL, NULL);
-# elif defined(AVMPLUS_LINUX)
+# elif defined(AVMPLUS_UNIX)
for (int i = 0; i < 2; i++) {
Page *p = (i == 0) ? _nativePages : _nativeExitPages;
Page *first = p;
while (p) {
if (!p->next || p->next != p+1) {
__clear_cache((char*)first, (char*)(p+1));
first = p->next;
@@ -846,94 +974,212 @@ namespace nanojit
if (!resv->arIndex && resv->reg == UnknownReg)
{
reserveFree(i);
}
}
}
}
-
+
+#ifdef PERFM
+#define countlir_live() _nvprof("lir-live",1)
+#define countlir_ret() _nvprof("lir-ret",1)
+#define countlir_alloc() _nvprof("lir-alloc",1)
+#define countlir_var() _nvprof("lir-var",1)
+#define countlir_use() _nvprof("lir-use",1)
+#define countlir_def() _nvprof("lir-def",1)
+#define countlir_imm() _nvprof("lir-imm",1)
+#define countlir_param() _nvprof("lir-param",1)
+#define countlir_cmov() _nvprof("lir-cmov",1)
+#define countlir_ld() _nvprof("lir-ld",1)
+#define countlir_ldq() _nvprof("lir-ldq",1)
+#define countlir_alu() _nvprof("lir-alu",1)
+#define countlir_qjoin() _nvprof("lir-qjoin",1)
+#define countlir_qlo() _nvprof("lir-qlo",1)
+#define countlir_qhi() _nvprof("lir-qhi",1)
+#define countlir_fpu() _nvprof("lir-fpu",1)
+#define countlir_st() _nvprof("lir-st",1)
+#define countlir_stq() _nvprof("lir-stq",1)
+#define countlir_jmp() _nvprof("lir-jmp",1)
+#define countlir_jcc() _nvprof("lir-jcc",1)
+#define countlir_label() _nvprof("lir-label",1)
+#define countlir_xcc() _nvprof("lir-xcc",1)
+#define countlir_x() _nvprof("lir-x",1)
+#define countlir_loop() _nvprof("lir-loop",1)
+#define countlir_call() _nvprof("lir-call",1)
+#else
+#define countlir_live()
+#define countlir_ret()
+#define countlir_alloc()
+#define countlir_var()
+#define countlir_use()
+#define countlir_def()
+#define countlir_imm()
+#define countlir_param()
+#define countlir_cmov()
+#define countlir_ld()
+#define countlir_ldq()
+#define countlir_alu()
+#define countlir_qjoin()
+#define countlir_qlo()
+#define countlir_qhi()
+#define countlir_fpu()
+#define countlir_st()
+#define countlir_stq()
+#define countlir_jmp()
+#define countlir_jcc()
+#define countlir_label()
+#define countlir_xcc()
+#define countlir_x()
+#define countlir_loop()
+#define countlir_call()
+#endif
+
void Assembler::gen(LirFilter* reader, NInsList& loopJumps)
{
// trace must start with LIR_x or LIR_loop
NanoAssert(reader->pos()->isop(LIR_x) || reader->pos()->isop(LIR_loop));
for (LInsp ins = reader->read(); ins != 0 && !error(); ins = reader->read())
{
- Reservation *rR = getresv(ins);
LOpcode op = ins->opcode();
switch(op)
{
default:
NanoAssertMsgf(false, "unsupported LIR instruction: %d (~0x40: %d)\n", op, op&~LIR64);
break;
+ case LIR_live: {
+ countlir_live();
+ pending_lives.add(ins->oprnd1());
+ break;
+ }
+
+ case LIR_ret: {
+ countlir_ret();
+ if (_nIns != _epilogue) {
+ JMP(_epilogue);
+ }
+ assignSavedParams();
+ findSpecificRegFor(ins->oprnd1(), retRegs[0]);
+ break;
+ }
+
+ case LIR_fret: {
+ countlir_ret();
+ if (_nIns != _epilogue) {
+ JMP(_epilogue);
+ }
+ assignSavedParams();
+ findSpecificRegFor(ins->oprnd1(), FST0);
+ fpu_pop();
+ break;
+ }
+
+ // allocate some stack space. the value of this instruction
+ // is the address of the stack space.
+ case LIR_alloc: {
+ countlir_alloc();
+ Reservation *resv = getresv(ins);
+ NanoAssert(resv->arIndex != 0);
+ Register r = resv->reg;
+ if (r != UnknownReg) {
+ _allocator.retire(r);
+ resv->reg = UnknownReg;
+ asm_restore(ins, resv, r);
+ }
+ freeRsrcOf(ins, 0);
+ break;
+ }
case LIR_short:
case LIR_int:
{
+ countlir_imm();
Register rr = prepResultReg(ins, GpRegs);
int32_t val;
if (op == LIR_int)
val = ins->imm32();
else
val = ins->imm16();
if (val == 0)
XOR(rr,rr);
else
LDi(rr, val);
break;
}
case LIR_quad:
{
+ countlir_imm();
asm_quad(ins);
break;
}
#if !defined NANOJIT_64BIT
case LIR_callh:
{
// return result of quad-call in register
prepResultReg(ins, rmask(retRegs[1]));
// if hi half was used, we must use the call to ensure it happens
findRegFor(ins->oprnd1(), rmask(retRegs[0]));
break;
}
#endif
case LIR_param:
{
- Register w = Register(ins->imm8());
- NanoAssert(w != UnknownReg);
- // incoming arg in register
- prepResultReg(ins, rmask(w));
+ countlir_param();
+ uint32_t a = ins->imm8();
+ uint32_t kind = ins->imm8b();
+ if (kind == 0) {
+ // ordinary param
+ AbiKind abi = _thisfrag->lirbuf->abi;
+ uint32_t abi_regcount = abi == ABI_FASTCALL ? 2 : abi == ABI_THISCALL ? 1 : 0;
+ if (a < abi_regcount) {
+ // incoming arg in register
+ prepResultReg(ins, rmask(argRegs[a]));
+ } else {
+ // incoming arg is on stack, and EAX points nearby (see genPrologue)
+ //_nvprof("param-evict-eax",1);
+ Register r = prepResultReg(ins, GpRegs & ~rmask(EAX));
+ int d = (a - abi_regcount) * sizeof(intptr_t) + 8;
+ LD(r, d, FP);
+ }
+ }
+ else {
+ // saved param
+ prepResultReg(ins, rmask(savedRegs[a]));
+ }
break;
}
case LIR_qlo:
{
+ countlir_qlo();
LIns *q = ins->oprnd1();
if (!asm_qlo(ins, q))
{
Register rr = prepResultReg(ins, GpRegs);
int d = findMemFor(q);
LD(rr, d, FP);
}
break;
}
case LIR_qhi:
{
+ countlir_qhi();
Register rr = prepResultReg(ins, GpRegs);
LIns *q = ins->oprnd1();
int d = findMemFor(q);
LD(rr, d+4, FP);
break;
}
case LIR_qcmov:
case LIR_cmov:
{
+ countlir_cmov();
LIns* condval = ins->oprnd1();
NanoAssert(condval->isCmp());
LIns* values = ins->oprnd2();
NanoAssert(values->opcode() == LIR_2);
LIns* iftrue = values->oprnd1();
LIns* iffalse = values->oprnd2();
@@ -988,37 +1234,47 @@ namespace nanojit
asm_cmp(condval);
break;
}
case LIR_ld:
case LIR_ldc:
case LIR_ldcb:
{
+ countlir_ld();
LIns* base = ins->oprnd1();
LIns* disp = ins->oprnd2();
Register rr = prepResultReg(ins, GpRegs);
- Register ra = findRegFor(base, GpRegs);
+ Register ra;
int d = disp->constval();
+ if (base->isop(LIR_alloc)) {
+ ra = FP;
+ d += findMemFor(base);
+ } else {
+ ra = findRegFor(base, GpRegs);
+ }
if (op == LIR_ldcb)
LD8Z(rr, d, ra);
else
LD(rr, d, ra);
break;
}
case LIR_ldq:
+ case LIR_ldqc:
{
+ countlir_ldq();
asm_load64(ins);
break;
}
case LIR_neg:
case LIR_not:
{
+ countlir_alu();
Register rr = prepResultReg(ins, GpRegs);
LIns* lhs = ins->oprnd1();
Reservation *rA = getresv(lhs);
// if this is last use of lhs in reg, we can re-use result reg
Register ra;
if (rA == 0 || (ra=rA->reg) == UnknownReg)
ra = findSpecificRegFor(lhs, rr);
@@ -1031,41 +1287,44 @@ namespace nanojit
if ( rr != ra )
MR(rr,ra);
break;
}
case LIR_qjoin:
{
+ countlir_qjoin();
asm_qjoin(ins);
break;
}
#if defined NANOJIT_64BIT
case LIR_qiadd:
case LIR_qiand:
case LIR_qilsh:
case LIR_qior:
{
asm_qbinop(ins);
break;
}
#endif
case LIR_add:
+ case LIR_addp:
case LIR_sub:
case LIR_mul:
case LIR_and:
case LIR_or:
case LIR_xor:
case LIR_lsh:
case LIR_rsh:
case LIR_ush:
{
+ countlir_alu();
LInsp lhs = ins->oprnd1();
LInsp rhs = ins->oprnd2();
Register rb = UnknownReg;
RegisterMask allow = GpRegs;
bool forceReg = (op == LIR_mul || !rhs->isconst());
#ifdef NANOJIT_ARM
@@ -1081,31 +1340,38 @@ namespace nanojit
if (lhs != rhs && forceReg)
{
if ((rb = asm_binop_rhs_reg(ins)) == UnknownReg) {
rb = findRegFor(rhs, allow);
}
allow &= ~rmask(rb);
}
+ else if ((op == LIR_add||op == LIR_addp) && lhs->isop(LIR_alloc) && rhs->isconst()) {
+ // add alloc+const, use lea
+ Register rr = prepResultReg(ins, allow);
+ int d = findMemFor(lhs) + rhs->constval();
+ LEA(rr, d, FP);
+ break;
+ }
Register rr = prepResultReg(ins, allow);
Reservation* rA = getresv(lhs);
Register ra;
// if this is last use of lhs in reg, we can re-use result reg
if (rA == 0 || (ra = rA->reg) == UnknownReg)
ra = findSpecificRegFor(lhs, rr);
// else, rA already has a register assigned.
if (forceReg)
{
if (lhs == rhs)
rb = ra;
- if (op == LIR_add)
+ if (op == LIR_add || op == LIR_addp)
ADD(rr, rb);
else if (op == LIR_sub)
SUB(rr, rb);
else if (op == LIR_mul)
MUL(rr, rb);
else if (op == LIR_and)
AND(rr, rb);
else if (op == LIR_or)
@@ -1119,19 +1385,20 @@ namespace nanojit
else if (op == LIR_ush)
SHR(rr, rb);
else
NanoAssertMsg(0, "Unsupported");
}
else
{
int c = rhs->constval();
- if (op == LIR_add) {
-#ifdef NANOJIT_IA32
+ if (op == LIR_add || op == LIR_addp) {
+#ifdef NANOJIT_IA32_TODO
if (ra != rr) {
+ // this doesn't set cc's, only use it when cc's not required.
LEA(rr, c, ra);
ra = rr; // suppress mov
} else
#endif
{
ADDi(rr, c);
}
} else if (op == LIR_sub) {
@@ -1162,181 +1429,219 @@ namespace nanojit
if ( rr != ra )
MR(rr,ra);
break;
}
#ifndef NJ_SOFTFLOAT
case LIR_fneg:
{
+ countlir_fpu();
asm_fneg(ins);
break;
}
case LIR_fadd:
case LIR_fsub:
case LIR_fmul:
case LIR_fdiv:
{
+ countlir_fpu();
asm_fop(ins);
break;
}
case LIR_i2f:
{
+ countlir_fpu();
asm_i2f(ins);
break;
}
case LIR_u2f:
{
+ countlir_fpu();
asm_u2f(ins);
break;
}
#endif // NJ_SOFTFLOAT
case LIR_st:
case LIR_sti:
{
+ countlir_st();
asm_store32(ins->oprnd1(), ins->immdisp(), ins->oprnd2());
break;
}
case LIR_stq:
case LIR_stqi:
{
+ countlir_stq();
LIns* value = ins->oprnd1();
LIns* base = ins->oprnd2();
int dr = ins->immdisp();
- if (value->isop(LIR_qjoin)) {
+ if (value->isop(LIR_qjoin))
+ {
// this is correct for little-endian only
asm_store32(value->oprnd1(), dr, base);
asm_store32(value->oprnd2(), dr+4, base);
}
- else {
+ else
+ {
asm_store64(value, dr, base);
}
break;
}
- case LIR_xt:
+
+ case LIR_j:
+ {
+ countlir_jmp();
+ LInsp to = ins->getTarget();
+ LabelState *label = _labels.get(to);
+ // the jump is always taken so whatever register state we
+ // have from downstream code, is irrelevant to code before
+ // this jump. so clear it out. we will pick up register
+ // state from the jump target, if we have seen that label.
+ releaseRegisters();
+ if (label && label->addr) {
+ // forward jump - pick up register state from target.
+ unionRegisterState(label->regs);
+ JMP(label->addr);
+ }
+ else {
+ // backwards jump
+ hasLoop = true;
+ handleLoopCarriedExprs();
+ if (!label) {
+ // save empty register state at loop header
+ _labels.add(to, 0, _allocator);
+ }
+ else {
+ intersectRegisterState(label->regs);
+ }
+ JMP(0);
+ _patches.put(_nIns, to);
+ verbose_only(
+ verbose_outputf(" Loop %s -> %s",
+ lirNames[ins->opcode()],
+ _thisfrag->lirbuf->names->formatRef(to));
+ )
+ }
+ break;
+ }
+
+ case LIR_jt:
+ case LIR_jf:
+ {
+ countlir_jcc();
+ LInsp to = ins->getTarget();
+ LIns* cond = ins->oprnd1();
+ LabelState *label = _labels.get(to);
+ if (label && label->addr) {
+ // forward jump to known label. need to merge with label's register state.
+ unionRegisterState(label->regs);
+ asm_branch(op == LIR_jf, cond, label->addr);
+ }
+ else {
+ // back edge.
+ hasLoop = true;
+ handleLoopCarriedExprs();
+ if (!label) {
+ // evict all registers, most conservative approach.
+ evictRegs(~_allocator.free);
+ _labels.add(to, 0, _allocator);
+ }
+ else {
+ // evict all registers, most conservative approach.
+ intersectRegisterState(label->regs);
+ }
+ NIns *branch = asm_branch(op == LIR_jf, cond, 0);
+ _patches.put(branch,to);
+ verbose_only(
+ verbose_outputf("Loop %s -> %s",
+ lirNames[ins->opcode()],
+ _thisfrag->lirbuf->names->formatRef(to));
+ )
+ }
+ break;
+ }
+ case LIR_label:
+ {
+ countlir_label();
+ LabelState *label = _labels.get(ins);
+ if (!label) {
+ // label seen first, normal target of forward jump, save addr & allocator
+ _labels.add(ins, _nIns, _allocator);
+ }
+ else {
+ // we're at the top of a loop
+ hasLoop = true;
+ NanoAssert(label->addr == 0 && label->regs.isValid());
+ //evictRegs(~_allocator.free);
+ intersectRegisterState(label->regs);
+ //asm_align_code();
+ label->addr = _nIns;
+ verbose_only(
+ verbose_outputf("Loop %s", _thisfrag->lirbuf->names->formatRef(ins));
+ )
+ }
+ break;
+ }
+
+ case LIR_xt:
case LIR_xf:
{
- NIns* exit = asm_exit(ins);
-
+ countlir_xcc();
// we only support cmp with guard right now, also assume it is 'close' and only emit the branch
+ NIns* exit = asm_exit(ins); // does intersectRegisterState()
LIns* cond = ins->oprnd1();
- LOpcode condop = cond->opcode();
- NanoAssert(cond->isCond());
-#if !defined(NJ_SOFTFLOAT)
- if (condop >= LIR_feq && condop <= LIR_fge)
- {
-#if defined(NJ_ARM_VFP)
- if (op == LIR_xf)
- JNE(exit);
- else
- JE(exit);
-#else
- if (op == LIR_xf)
- JP(exit);
- else
- JNP(exit);
-#endif
- asm_fcmp(cond);
- break;
- }
-#endif
- // produce the branch
- if (op == LIR_xf)
- {
- if (condop == LIR_eq)
- JNE(exit);
- else if (condop == LIR_ov)
- JNO(exit);
- else if (condop == LIR_cs)
- JNC(exit);
- else if (condop == LIR_lt)
- JNL(exit);
- else if (condop == LIR_le)
- JNLE(exit);
- else if (condop == LIR_gt)
- JNG(exit);
- else if (condop == LIR_ge)
- JNGE(exit);
- else if (condop == LIR_ult)
- JNB(exit);
- else if (condop == LIR_ule)
- JNBE(exit);
- else if (condop == LIR_ugt)
- JNA(exit);
- else //if (condop == LIR_uge)
- JNAE(exit);
- }
- else // op == LIR_xt
- {
- if (condop == LIR_eq)
- JE(exit);
- else if (condop == LIR_ov)
- JO(exit);
- else if (condop == LIR_cs)
- JC(exit);
- else if (condop == LIR_lt)
- JL(exit);
- else if (condop == LIR_le)
- JLE(exit);
- else if (condop == LIR_gt)
- JG(exit);
- else if (condop == LIR_ge)
- JGE(exit);
- else if (condop == LIR_ult)
- JB(exit);
- else if (condop == LIR_ule)
- JBE(exit);
- else if (condop == LIR_ugt)
- JA(exit);
- else //if (condop == LIR_uge)
- JAE(exit);
- }
- asm_cmp(cond);
+ asm_branch(op == LIR_xf, cond, exit);
break;
}
case LIR_x:
{
+ countlir_x();
verbose_only(verbose_output(""));
// generate the side exit branch on the main trace.
NIns *exit = asm_exit(ins);
JMP( exit );
break;
}
case LIR_loop:
{
+ countlir_loop();
JMP_long_placeholder(); // jump to SOT
verbose_only( if (_verbose && _outputCache) { _outputCache->removeLast(); outputf(" jmp SOT"); } );
loopJumps.add(_nIns);
#ifdef NJ_VERBOSE
// branching from this frag to ourself.
if (_frago->core()->config.show_stats)
- #if defined NANOJIT_64BIT
+ #if defined NANOJIT_AMD64
LDQi(argRegs[1], intptr_t((Fragment*)_thisfrag));
#else
LDi(argRegs[1], int((Fragment*)_thisfrag));
#endif
#endif
+ assignSavedParams();
+
// restore first parameter, the only one we use
LInsp state = _thisfrag->lirbuf->state;
- Register a0 = Register(state->imm8());
- findSpecificRegFor(state, a0);
+ findSpecificRegFor(state, argRegs[state->imm8()]);
break;
}
#ifndef NJ_SOFTFLOAT
case LIR_feq:
case LIR_fle:
case LIR_flt:
case LIR_fgt:
case LIR_fge:
{
+ countlir_fpu();
// only want certain regs
Register r = prepResultReg(ins, AllowableFlagRegs);
+ asm_setcc(r, ins);
#ifdef NJ_ARM_VFP
SETE(r);
#else
// SETcc only sets low 8 bits, so extend
MOVZX8(r,r);
SETNP(r);
#endif
asm_fcmp(ins);
@@ -1350,16 +1655,17 @@ namespace nanojit
case LIR_lt:
case LIR_gt:
case LIR_ge:
case LIR_ult:
case LIR_ule:
case LIR_ugt:
case LIR_uge:
{
+ countlir_alu();
// only want certain regs
Register r = prepResultReg(ins, AllowableFlagRegs);
// SETcc only sets low 8 bits, so extend
MOVZX8(r,r);
if (op == LIR_eq)
SETE(r);
else if (op == LIR_ov)
SETO(r);
@@ -1382,98 +1688,162 @@ namespace nanojit
else // if (op == LIR_uge)
SETAE(r);
asm_cmp(ins);
break;
}
#ifndef NJ_SOFTFLOAT
case LIR_fcall:
+ case LIR_fcalli:
#endif
#if defined NANOJIT_64BIT
case LIR_callh:
#endif
case LIR_call:
+ case LIR_calli:
{
+ countlir_call();
Register rr = UnknownReg;
#ifndef NJ_SOFTFLOAT
- if (op == LIR_fcall)
+ if ((op&LIR64))
{
+ // fcall or fcalli
+ Reservation* rR = getresv(ins);
rr = asm_prep_fcall(rR, ins);
}
else
#endif
{
- (void)rR;
rr = retRegs[0];
prepResultReg(ins, rmask(rr));
}
// do this after we've handled the call result, so we dont
// force the call result to be spilled unnecessarily.
- restoreCallerSaved();
+
+ evictScratchRegs();
asm_call(ins);
}
}
// check that all is well (don't check in exit paths since its more complicated)
debug_only( pageValidate(); )
debug_only( resourceConsistencyCheck(); )
}
}
- void Assembler::asm_arg(ArgSize sz, LInsp p, Register r)
- {
- if (sz == ARGSIZE_Q)
- {
- // ref arg - use lea
- if (r != UnknownReg)
- {
- // arg in specific reg
- int da = findMemFor(p);
- LEA(r, da, FP);
- }
- else
- {
- NanoAssert(0); // not supported
- }
+ NIns* Assembler::asm_branch(bool branchOnFalse, LInsp cond, NIns* targ)
+ {
+ NIns* at = 0;
+ LOpcode condop = cond->opcode();
+ NanoAssert(cond->isCond());
+#ifndef NJ_SOFTFLOAT
+ if (condop >= LIR_feq && condop <= LIR_fge)
+ {
+ return asm_jmpcc(branchOnFalse, cond, targ);
}
- else if (sz == ARGSIZE_LO)
+#endif
+ // produce the branch
+ if (branchOnFalse)
+ {
+ if (condop == LIR_eq)
+ JNE(targ);
+ else if (condop == LIR_ov)
+ JNO(targ);
+ else if (condop == LIR_cs)
+ JNC(targ);
+ else if (condop == LIR_lt)
+ JNL(targ);
+ else if (condop == LIR_le)
+ JNLE(targ);
+ else if (condop == LIR_gt)
+ JNG(targ);
+ else if (condop == LIR_ge)
+ JNGE(targ);
+ else if (condop == LIR_ult)
+ JNB(targ);
+ else if (condop == LIR_ule)
+ JNBE(targ);
+ else if (condop == LIR_ugt)
+ JNA(targ);
+ else //if (condop == LIR_uge)
+ JNAE(targ);
+ }
+ else // op == LIR_xt
{
- if (r != UnknownReg)
- {
- // arg goes in specific register
- if (p->isconst())
- LDi(r, p->constval());
- else
- findSpecificRegFor(p, r);
- }
- else
- {
- asm_pusharg(p);
- }
+ if (condop == LIR_eq)
+ JE(targ);
+ else if (condop == LIR_ov)
+ JO(targ);
+ else if (condop == LIR_cs)
+ JC(targ);
+ else if (condop == LIR_lt)
+ JL(targ);
+ else if (condop == LIR_le)
+ JLE(targ);
+ else if (condop == LIR_gt)
+ JG(targ);
+ else if (condop == LIR_ge)
+ JGE(targ);
+ else if (condop == LIR_ult)
+ JB(targ);
+ else if (condop == LIR_ule)
+ JBE(targ);
+ else if (condop == LIR_ugt)
+ JA(targ);
+ else //if (condop == LIR_uge)
+ JAE(targ);
}
- else
- {
- asm_farg(p);
- }
+ at = _nIns;
+ asm_cmp(cond);
+ return at;
+ }
+
+ void Assembler::assignSavedParams()
+ {
+ // restore saved regs
+ releaseRegisters();
+ LirBuffer *b = _thisfrag->lirbuf;
+ for (int i=0, n = NumSavedRegs; i < n; i++) {
+ LIns *p = b->savedParams[i];
+ if (p)
+ findSpecificRegFor(p, savedRegs[p->imm8()]);
+ }
}
- uint32_t Assembler::arFree(uint32_t idx)
- {
- // nothing to free
- if (idx == 0)
- return 0;
+ void Assembler::reserveSavedParams()
+ {
+ LirBuffer *b = _thisfrag->lirbuf;
+ for (int i=0, n = NumSavedRegs; i < n; i++) {
+ LIns *p = b->savedParams[i];
+ if (p)
+ findMemFor(p);
+ }
+ }
- if (idx > 0 && _activation.entry[idx] == _activation.entry[idx+stack_direction(1)])
- _activation.entry[idx+stack_direction(1)] = 0; // clear 2 slots for doubles
+ void Assembler::handleLoopCarriedExprs()
+ {
+ // ensure that exprs spanning the loop are marked live at the end of the loop
+ reserveSavedParams();
+ for (int i=0, n=pending_lives.size(); i < n; i++) {
+ findMemFor(pending_lives[i]);
+ }
+ }
- _activation.entry[idx] = 0;
- return 0;
+ void Assembler::arFree(uint32_t idx)
+ {
+ AR &ar = _activation;
+ LIns *i = ar.entry[idx];
+ NanoAssert(i != 0);
+ do {
+ ar.entry[idx] = 0;
+ idx--;
+ } while (ar.entry[idx] == i);
}
#ifdef NJ_VERBOSE
void Assembler::printActivationState()
{
bool verbose_activation = false;
if (!verbose_activation)
return;
@@ -1513,104 +1883,245 @@ namespace nanojit
}
}
output(&outline[0]);
}
)
#endif
}
#endif
+
+ bool canfit(int32_t size, int32_t loc, AR &ar) {
+ for (int i=0; i < size; i++) {
+ if (ar.entry[loc+stack_direction(i)])
+ return false;
+ }
+ return true;
+ }
uint32_t Assembler::arReserve(LIns* l)
{
NanoAssert(!l->isTramp());
//verbose_only(printActivationState());
- const bool quad = l->isQuad();
- const int32_t n = _activation.tos;
- int32_t start = _activation.lowwatermark;
+ int32_t size = l->isop(LIR_alloc) ? (l->size()>>2) : l->isQuad() ? 2 : sizeof(intptr_t)>>2;
+ AR &ar = _activation;
+ const int32_t tos = ar.tos;
+ int32_t start = ar.lowwatermark;
int32_t i = 0;
NanoAssert(start>0);
- if (n >= NJ_MAX_STACK_ENTRY-2)
- {
- setError(StackFull);
- return start;
- }
- else if (quad)
- {
- if ( (start&1)==1 ) start++; // even
- for(i=start; i <= n; i+=2)
- {
- if ( (_activation.entry[i+stack_direction(1)] == 0) && (i==n || (_activation.entry[i] == 0)) )
- break; // for fp we need 2 adjacent aligned slots
+
+ if (size == 1) {
+ // easy most common case -- find a hole, or make the frame bigger
+ for (i=start; i < NJ_MAX_STACK_ENTRY; i++) {
+ if (ar.entry[i] == 0) {
+ // found a hole
+ ar.entry[i] = l;
+ break;
+ }
+ }
+ }
+ else if (size == 2) {
+ if ( (start&1)==1 ) start++; // even 8 boundary
+ for (i=start; i < NJ_MAX_STACK_ENTRY; i+=2) {
+ if ( (ar.entry[i+stack_direction(1)] == 0) && (i==tos || (ar.entry[i] == 0)) ) {
+ // found 2 adjacent aligned slots
+ NanoAssert(_activation.entry[i] == 0);
+ NanoAssert(_activation.entry[i+stack_direction(1)] == 0);
+ ar.entry[i] = l;
+ ar.entry[i+stack_direction(1)] = l;
+ break;
+ }
}
}
- else
- {
- for(i=start; i < n; i++)
- {
- if (_activation.entry[i] == 0)
- break; // not being used
- }
+ else {
+ // alloc larger block on 8byte boundary.
+ if (start < size) start = size;
+ if ((start&1)==1) start++;
+ for (i=start; i < NJ_MAX_STACK_ENTRY; i+=2) {
+ if (canfit(size, i, ar)) {
+ // place the entry in the table and mark the instruction with it
+ for (int32_t j=0; j < size; j++) {
+ NanoAssert(_activation.entry[i+stack_direction(j)] == 0);
+ _activation.entry[i+stack_direction(j)] = l;
+ }
+ break;
+ }
+ }
}
-
- int32_t inc = ((i-n+1) < 0) ? 0 : (i-n+1);
- if (quad && stack_direction(1)>0) inc++;
- _activation.tos += inc;
- _activation.highwatermark += inc;
-
- // place the entry in the table and mark the instruction with it
- _activation.entry[i] = l;
- if (quad) _activation.entry[i+stack_direction(1)] = l;
- return i;
+ if (i >= (int32_t)ar.tos) {
+ ar.tos = ar.highwatermark = i+1;
+ }
+ if (tos+size >= NJ_MAX_STACK_ENTRY) {
+ setError(StackFull);
+ }
+ return i;
}
- void Assembler::restoreCallerSaved()
+ /**
+ * move regs around so the SavedRegs contains the highest priority regs.
+ */
+ void Assembler::evictScratchRegs()
+ {
+ // find the top GpRegs that are candidates to put in SavedRegs
+
+ // tosave is a binary heap stored in an array. the root is tosave[0],
+ // left child is at i+1, right child is at i+2.
+
+ Register tosave[LastReg-FirstReg+1];
+ int len=0;
+ RegAlloc *regs = &_allocator;
+ for (Register r = FirstReg; r <= LastReg; r = nextreg(r)) {
+ if (rmask(r) & GpRegs) {
+ LIns *i = regs->getActive(r);
+ if (i) {
+ if (canRemat(i)) {
+ evict(r);
+ }
+ else {
+ int32_t pri = regs->getPriority(r);
+ // add to heap by adding to end and bubbling up
+ int j = len++;
+ while (j > 0 && pri > regs->getPriority(tosave[j/2])) {
+ tosave[j] = tosave[j/2];
+ j /= 2;
+ }
+ NanoAssert(size_t(j) < sizeof(tosave)/sizeof(tosave[0]));
+ tosave[j] = r;
+ }
+ }
+ }
+ }
+
+ // now primap has the live exprs in priority order.
+ // allocate each of the top priority exprs to a SavedReg
+
+ RegisterMask allow = SavedRegs;
+ while (allow && len > 0) {
+ // get the highest priority var
+ Register hi = tosave[0];
+ LIns *i = regs->getActive(hi);
+ Register r = findRegFor(i, allow);
+ allow &= ~rmask(r);
+
+ // remove from heap by replacing root with end element and bubbling down.
+ if (allow && --len > 0) {
+ Register last = tosave[len];
+ int j = 0;
+ while (j+1 < len) {
+ int child = j+1;
+ if (j+2 < len && regs->getPriority(tosave[j+2]) > regs->getPriority(tosave[j+1]))
+ child++;
+ if (regs->getPriority(last) > regs->getPriority(tosave[child]))
+ break;
+ tosave[j] = tosave[child];
+ j = child;
+ }
+ tosave[j] = last;
+ }
+ }
+
+ // now evict everything else.
+ evictRegs(~SavedRegs);
+ }
+
+ void Assembler::evictRegs(RegisterMask regs)
{
// generate code to restore callee saved registers
// @todo speed this up
- RegisterMask scratch = ~SavedRegs;
- for (Register r = FirstReg; r <= LastReg; r = nextreg(r))
- {
- if ((rmask(r) & scratch) && _allocator.getActive(r))
- {
+ for (Register r = FirstReg; r <= LastReg; r = nextreg(r)) {
+ if ((rmask(r) & regs) && _allocator.getActive(r)) {
evict(r);
}
}
}
/**
* Merge the current state of the registers with a previously stored version
+ * current == saved skip
+ * current & saved evict current, keep saved
+ * current & !saved evict current (unionRegisterState would keep)
+ * !current & saved keep saved
*/
- void Assembler::mergeRegisterState(RegAlloc& saved)
+ void Assembler::intersectRegisterState(RegAlloc& saved)
{
// evictions and pops first
RegisterMask skip = 0;
for (Register r=FirstReg; r <= LastReg; r = nextreg(r))
{
LIns * curins = _allocator.getActive(r);
LIns * savedins = saved.getActive(r);
if (curins == savedins)
{
- verbose_only( if (curins)
- verbose_outputf(" skip %s", regNames[r]); )
+ verbose_only( if (curins) verbose_outputf(" skip %s", regNames[r]); )
skip |= rmask(r);
}
else
{
- if (curins)
+ if (curins) {
+ //_nvprof("intersect-evict",1);
evict(r);
+ }
#ifdef NANOJIT_IA32
if (savedins && (rmask(r) & x87Regs))
FSTP(r);
#endif
}
}
+ assignSaved(saved, skip);
+ }
+ /**
+ * Merge the current state of the registers with a previously stored version.
+ *
+ * current == saved skip
+ * current & saved evict current, keep saved
+ * current & !saved keep current (intersectRegisterState would evict)
+ * !current & saved keep saved
+ */
+ void Assembler::unionRegisterState(RegAlloc& saved)
+ {
+ // evictions and pops first
+ RegisterMask skip = 0;
+ for (Register r=FirstReg; r <= LastReg; r = nextreg(r))
+ {
+ LIns * curins = _allocator.getActive(r);
+ LIns * savedins = saved.getActive(r);
+ if (curins == savedins)
+ {
+ verbose_only( if (curins) verbose_outputf(" skip %s", regNames[r]); )
+ skip |= rmask(r);
+ }
+ else
+ {
+ if (curins && savedins) {
+ //_nvprof("union-evict",1);
+ evict(r);
+ }
+
+ #ifdef NANOJIT_IA32
+ if (rmask(r) & x87Regs) {
+ if (savedins) {
+ FSTP(r);
+ }
+ else {
+ // saved state did not have fpu reg allocated,
+ // so we must evict here to keep x87 stack balanced.
+ evict(r);
+ }
+ }
+ #endif
+ }
+ }
+ assignSaved(saved, skip);
+ }
+
+ void Assembler::assignSaved(RegAlloc &saved, RegisterMask skip)
+ {
// now reassign mainline registers
for (Register r=FirstReg; r <= LastReg; r = nextreg(r))
{
LIns *i = saved.getActive(r);
if (i && !(skip&rmask(r)))
findSpecificRegFor(i, r);
}
debug_only(saved.used = 0); // marker that we are no longer in exit path
@@ -1710,38 +2221,50 @@ namespace nanojit
#endif /* FEATURE_NANOJIT */
#if defined(FEATURE_NANOJIT) || defined(NJ_VERBOSE)
uint32_t CallInfo::_count_args(uint32_t mask) const
{
uint32_t argc = 0;
uint32_t argt = _argtypes;
- for (int i = 0; i < 5; ++i)
- {
+ for (uint32_t i = 0; i < MAXARGS; ++i) {
argt >>= 2;
argc += (argt & mask) != 0;
}
return argc;
}
uint32_t CallInfo::get_sizes(ArgSize* sizes) const
{
uint32_t argt = _argtypes;
uint32_t argc = 0;
- for (int32_t i = 0; i < 5; i++) {
+ for (uint32_t i = 0; i < MAXARGS; i++) {
argt >>= 2;
ArgSize a = ArgSize(argt&3);
#ifdef NJ_SOFTFLOAT
if (a == ARGSIZE_F) {
sizes[argc++] = ARGSIZE_LO;
sizes[argc++] = ARGSIZE_LO;
continue;
}
#endif
if (a != ARGSIZE_NONE) {
sizes[argc++] = a;
}
}
+ if (isIndirect()) {
+ // add one more arg for indirect call address
+ argc++;
+ }
return argc;
}
-#endif
+
+ void LabelStateMap::add(LIns *label, NIns *addr, RegAlloc ®s) {
+ LabelState *st = new (gc) LabelState(addr, regs);
+ labels.put(label, st);
+ }
+
+ LabelState* LabelStateMap::get(LIns *label) {
+ return labels.get(label);
+ }
}
+#endif // FEATURE_NANOJIT
--- a/js/src/nanojit/Assembler.h
+++ b/js/src/nanojit/Assembler.h
@@ -68,67 +68,45 @@ namespace nanojit
#define STACK_GRANULARITY sizeof(void *)
/**
* The Assembler is only concerned with transforming LIR to native instructions
*/
struct Reservation
{
uint32_t arIndex:16; /* index into stack frame. displ is -4*arIndex */
- Register reg:8; /* register UnkownReg implies not in register */
- int cost:8;
+ Register reg:15; /* register UnkownReg implies not in register */
+ uint32_t used:1;
};
struct AR
{
LIns* entry[ NJ_MAX_STACK_ENTRY ]; /* maps to 4B contiguous locations relative to the frame pointer */
uint32_t tos; /* current top of stack entry */
uint32_t highwatermark; /* max tos hit */
uint32_t lowwatermark; /* we pre-allocate entries from 0 upto this index-1; so dynamic entries are added above this index */
- LIns* parameter[ NJ_MAX_PARAMETERS ]; /* incoming parameters */
- };
-
- enum ArgSize {
- ARGSIZE_NONE = 0,
- ARGSIZE_F = 1,
- ARGSIZE_LO = 2,
- ARGSIZE_Q = 3,
- _ARGSIZE_MASK_INT = 2,
- _ARGSIZE_MASK_ANY = 3
- };
-
- struct CallInfo
- {
- intptr_t _address;
- uint16_t _argtypes; // 6 2-bit fields indicating arg type, by ARGSIZE above (including ret type): a1 a2 a3 a4 a5 ret
- uint8_t _cse; // true if no side effects
- uint8_t _fold; // true if no side effects
- verbose_only ( const char* _name; )
-
- uint32_t FASTCALL _count_args(uint32_t mask) const;
- uint32_t get_sizes(ArgSize*) const;
-
- inline uint32_t FASTCALL count_args() const { return _count_args(_ARGSIZE_MASK_ANY); }
- inline uint32_t FASTCALL count_iargs() const { return _count_args(_ARGSIZE_MASK_INT); }
- // fargs = args - iargs
};
#ifdef AVMPLUS_WIN32
#define AVMPLUS_ALIGN16(type) __declspec(align(16)) type
#else
#define AVMPLUS_ALIGN16(type) type __attribute__ ((aligned (16)))
#endif
struct Stats
{
counter_define(steals;)
counter_define(remats;)
counter_define(spills;)
counter_define(native;)
counter_define(exitnative;)
+
+ int32_t pages;
+ NIns* codeStart;
+ NIns* codeExitStart;
DECLARE_PLATFORM_STATS()
#ifdef __GNUC__
// inexplicably, gnuc gives padding/alignment warnings without this. pacify it.
bool pad[4];
#endif
};
@@ -141,20 +119,44 @@ namespace nanojit
,OutOMem
,StackFull
,ResvFull
,RegionFull
,MaxLength
,MaxExit
,MaxXJump
,UnknownPrim
+ ,UnknownBranch
};
typedef avmplus::List<NIns*, avmplus::LIST_NonGCObjects> NInsList;
+ typedef avmplus::SortedMap<LIns*,NIns*,avmplus::LIST_NonGCObjects> InsMap;
+ typedef avmplus::SortedMap<NIns*,LIns*,avmplus::LIST_NonGCObjects> NInsMap;
+ class LabelState MMGC_SUBCLASS_DECL
+ {
+ public:
+ RegAlloc regs;
+ NIns *addr;
+ LabelState(NIns *a, RegAlloc &r) : regs(r), addr(a)
+ {}
+ };
+
+ class LabelStateMap
+ {
+ GC *gc;
+ avmplus::SortedMap<LIns*, LabelState*, avmplus::LIST_GCObjects> labels;
+ public:
+ LabelStateMap(GC *gc) : gc(gc), labels(gc)
+ {}
+
+ void clear() { labels.clear(); }
+ void add(LIns *label, NIns *addr, RegAlloc ®s);
+ LabelState *get(LIns *);
+ };
/**
* Information about the activation record for the method is built up
* as we generate machine code. As part of the prologue, we issue
* a stack adjustment instruction and then later patch the adjustment
* value. Temporary values can be placed into the AR as method calls
* are issued. Also MIR_alloc instructions will consume space.
*/
class Assembler MMGC_SUBCLASS_DECL
@@ -185,66 +187,73 @@ namespace nanojit
void copyRegisters(RegAlloc* copyTo);
void releaseRegisters();
void patch(GuardRecord *lr);
void unpatch(GuardRecord *lr);
AssmError error() { return _err; }
void setError(AssmError e) { _err = e; }
void setCallTable(const CallInfo *functions);
void pageReset();
+ int32_t codeBytes();
Page* handoverPages(bool exitPages=false);
debug_only ( void pageValidate(); )
debug_only ( bool onPage(NIns* where, bool exitPages=false); )
// support calling out from a fragment ; used to debug the jit
debug_only( void resourceConsistencyCheck(); )
- debug_only( void registerConsistencyCheck(LIns** resv); )
+ debug_only( void registerConsistencyCheck(); )
Stats _stats;
+ int hasLoop;
private:
void gen(LirFilter* toCompile, NInsList& loopJumps);
- NIns* genPrologue(RegisterMask);
- NIns* genEpilogue(RegisterMask);
-
- bool ignoreInstruction(LInsp ins);
+ NIns* genPrologue();
+ NIns* genEpilogue();
GuardRecord* placeGuardRecord(LInsp guard);
void initGuardRecord(LInsp guard, GuardRecord*);
uint32_t arReserve(LIns* l);
- uint32_t arFree(uint32_t idx);
+ void arFree(uint32_t idx);
void arReset();
Register registerAlloc(RegisterMask allow);
void registerResetAll();
- void restoreCallerSaved();
- void mergeRegisterState(RegAlloc& saved);
- LInsp findVictim(RegAlloc& regs, RegisterMask allow, RegisterMask prefer);
+ void evictRegs(RegisterMask regs);
+ void evictScratchRegs();
+ void intersectRegisterState(RegAlloc& saved);
+ void unionRegisterState(RegAlloc& saved);
+ void assignSaved(RegAlloc &saved, RegisterMask skip);
+ LInsp findVictim(RegAlloc& regs, RegisterMask allow);
int findMemFor(LIns* i);
Register findRegFor(LIns* i, RegisterMask allow);
void findRegFor2(RegisterMask allow, LIns* ia, Reservation* &ra, LIns *ib, Reservation* &rb);
Register findSpecificRegFor(LIns* i, Register w);
Register prepResultReg(LIns *i, RegisterMask allow);
void freeRsrcOf(LIns *i, bool pop);
void evict(Register r);
RegisterMask hint(LIns*i, RegisterMask allow);
NIns* pageAlloc(bool exitPage=false);
void pagesFree(Page*& list);
void internalReset();
+ bool canRemat(LIns*);
Reservation* reserveAlloc(LInsp i);
void reserveFree(LInsp i);
void reserveReset();
- Reservation* getresv(LIns *x) { return x->resv() ? &_resvTable[x->resv()] : 0; }
+ Reservation* getresv(LIns *x) {
+ uint32_t resv_index = x->resv();
+ return resv_index ? &_resvTable[resv_index] : 0;
+ }
DWB(Fragmento*) _frago;
GC* _gc;
DWB(Fragment*) _thisfrag;
RegAllocMap* _branchStateMap;
GuardRecord* _latestGuard;
const CallInfo *_functions;
@@ -254,60 +263,72 @@ namespace nanojit
NIns* _epilogue;
Page* _nativePages; // list of NJ_PAGE_SIZE pages that have been alloc'd
Page* _nativeExitPages; // list of pages that have been allocated for exit code
AssmError _err; // 0 = means assemble() appears ok, otherwise it failed
AR _activation;
RegAlloc _allocator;
+ LabelStateMap _labels;
+ NInsMap _patches;
Reservation _resvTable[ NJ_MAX_STACK_ENTRY ]; // table where we house stack and register information
uint32_t _resvFree;
- bool _inExit,vpad2[3];
+ bool _inExit, vpad2[3];
+ avmplus::List<LIns*, avmplus::LIST_GCObjects> pending_lives;
void asm_cmp(LIns *cond);
#ifndef NJ_SOFTFLOAT
void asm_fcmp(LIns *cond);
+ void asm_setcc(Register res, LIns *cond);
+ NIns * asm_jmpcc(bool brOnFalse, LIns *cond, NIns *target);
#endif
void asm_mmq(Register rd, int dd, Register rs, int ds);
NIns* asm_exit(LInsp guard);
NIns* asm_leave_trace(LInsp guard);
void asm_qjoin(LIns *ins);
void asm_store32(LIns *val, int d, LIns *base);
void asm_store64(LIns *val, int d, LIns *base);
void asm_restore(LInsp, Reservation*, Register);
- void asm_spill(LInsp i, Reservation *resv, bool pop);
+ void asm_load(int d, Register r);
+ void asm_spilli(LInsp i, Reservation *resv, bool pop);
+ void asm_spill(Register rr, int d, bool pop=false, bool quad=false);
void asm_load64(LInsp i);
void asm_pusharg(LInsp p);
NIns* asm_adjustBranch(NIns* at, NIns* target);
void asm_quad(LInsp i);
bool asm_qlo(LInsp ins, LInsp q);
void asm_fneg(LInsp ins);
void asm_fop(LInsp ins);
void asm_i2f(LInsp ins);
void asm_u2f(LInsp ins);
Register asm_prep_fcall(Reservation *rR, LInsp ins);
void asm_nongp_copy(Register r, Register s);
void asm_bailout(LInsp guard, Register state);
void asm_call(LInsp);
void asm_arg(ArgSize, LInsp, Register);
Register asm_binop_rhs_reg(LInsp ins);
+ NIns* asm_branch(bool branchOnFalse, LInsp cond, NIns* targ);
+ void assignSavedParams();
+ void reserveSavedParams();
+ void handleLoopCarriedExprs();
// platform specific implementation (see NativeXXX.cpp file)
void nInit(uint32_t flags);
void nInit(AvmCore *);
Register nRegisterAllocFromSet(int32_t set);
void nRegisterResetAll(RegAlloc& a);
void nMarkExecute(Page* page, int32_t count=1, bool enable=true);
void nFrameRestore(RegisterMask rmask);
static void nPatchBranch(NIns* branch, NIns* location);
void nFragExit(LIns* guard);
// platform specific methods
public:
+ const static Register savedRegs[NumSavedRegs];
DECLARE_PLATFORM_ASSEMBLER()
private:
debug_only( int32_t _fpuStkDepth; )
debug_only( int32_t _sv_fpuStkDepth; )
// since we generate backwards the depth is negative
inline void fpu_push() {
--- a/js/src/nanojit/Fragmento.cpp
+++ b/js/src/nanojit/Fragmento.cpp
@@ -34,16 +34,17 @@
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
#include "nanojit.h"
+#undef MEMORY_INFO
namespace nanojit
{
#ifdef FEATURE_NANOJIT
using namespace avmplus;
static uint32_t calcSaneCacheSize(uint32_t in)
@@ -53,26 +54,27 @@ namespace nanojit
return in;
}
/**
* This is the main control center for creating and managing fragments.
*/
Fragmento::Fragmento(AvmCore* core, uint32_t cacheSizeLog2)
: _allocList(core->GetGC()),
- _max_pages(1 << (calcSaneCacheSize(cacheSizeLog2) - NJ_LOG2_PAGE_SIZE))
+ _max_pages(1 << (calcSaneCacheSize(cacheSizeLog2) - NJ_LOG2_PAGE_SIZE)),
+ _pagesGrowth(1)
{
#ifdef MEMORY_INFO
_allocList.set_meminfo_name("Fragmento._allocList");
#endif
+ NanoAssert(_max_pages > _pagesGrowth); // shrink growth if needed
_core = core;
GC *gc = core->GetGC();
_frags = new (gc) FragmentMap(gc, 128);
_assm = new (gc) nanojit::Assembler(this);
- _pageGrowth = 1;
verbose_only( enterCounts = new (gc) BlockHist(gc); )
verbose_only( mergeCounts = new (gc) BlockHist(gc); )
}
Fragmento::~Fragmento()
{
AllocEntry *entry;
@@ -104,20 +106,20 @@ namespace nanojit
if (_stats.maxPageUse < pageUse)
_stats.maxPageUse = pageUse;
}
Page* Fragmento::pageAlloc()
{
NanoAssert(sizeof(Page) == NJ_PAGE_SIZE);
if (!_pageList) {
- pagesGrow(_pageGrowth); // try to get more mem
- if ((_pageGrowth << 1) < _max_pages)
- _pageGrowth <<= 1;
- }
+ pagesGrow(_pagesGrowth); // try to get more mem
+ if ((_pagesGrowth << 1) < _max_pages)
+ _pagesGrowth <<= 1;
+ }
Page *page = _pageList;
if (page)
{
_pageList = page->next;
trackFree(-1);
}
//fprintf(stderr, "Fragmento::pageAlloc %X, %d free pages of %d\n", (int)page, _stats.freePages, _stats.pages);
NanoAssert(pageCount()==_stats.freePages);
@@ -216,17 +218,17 @@ namespace nanojit
return _assm;
}
AvmCore* Fragmento::core()
{
return _core;
}
- Fragment* Fragmento::newLoop(const void* ip)
+ Fragment* Fragmento::getAnchor(const void* ip)
{
Fragment *f = newFrag(ip);
Fragment *p = _frags->get(ip);
if (p) {
f->first = p;
/* append at the end of the peer list */
Fragment* next;
while ((next = p->peer) != NULL)
@@ -475,17 +477,17 @@ namespace nanojit
_assm->_verbose = vsave;
}
void Fragmento::countBlock(BlockHist *hist, const void* ip)
{
int c = hist->count(ip);
if (_assm->_verbose)
- _assm->outputf("++ %s %d", core()->interp.labels->format(ip), c);
+ _assm->outputf("++ %s %d", labels->format(ip), c);
}
void Fragmento::countIL(uint32_t il, uint32_t abc)
{
_stats.ilsize += il;
_stats.abcsize += abc;
}
--- a/js/src/nanojit/Fragmento.h
+++ b/js/src/nanojit/Fragmento.h
@@ -49,17 +49,16 @@ extern void drawTraceTrees(Fragmento *fr
namespace nanojit
{
struct GuardRecord;
class Assembler;
struct PageHeader
{
struct Page *next;
- verbose_only (int seq;) // sequence # of page
};
struct Page: public PageHeader
{
union {
LIns lir[(NJ_PAGE_SIZE-sizeof(PageHeader))/sizeof(LIns)];
NIns code[(NJ_PAGE_SIZE-sizeof(PageHeader))/sizeof(NIns)];
};
};
@@ -96,18 +95,18 @@ namespace nanojit
~Fragmento();
void addMemory(void* firstPage, uint32_t pageCount); // gives memory to the Assembler
Assembler* assm();
AvmCore* core();
Page* pageAlloc();
void pageFree(Page* page);
- Fragment* newLoop(const void* ip);
Fragment* getLoop(const void* ip);
+ Fragment* getAnchor(const void* ip);
void clearFrags(); // clear all fragments from the cache
Fragment* getMerge(GuardRecord *lr, const void* ip);
Fragment* createBranch(GuardRecord *lr, const void* ip);
Fragment* newFrag(const void* ip);
Fragment* newBranch(Fragment *from, const void* ip);
verbose_only ( uint32_t pageCount(); )
verbose_only ( void dumpStats(); )
@@ -140,23 +139,23 @@ namespace nanojit
private:
void pagesGrow(int32_t count);
void trackFree(int32_t delta);
AvmCore* _core;
DWB(Assembler*) _assm;
DWB(FragmentMap*) _frags; /* map from ip -> Fragment ptr */
Page* _pageList;
- uint32_t _pageGrowth;
/* unmanaged mem */
AllocList _allocList;
GCHeap* _gcHeap;
const uint32_t _max_pages;
+ uint32_t _pagesGrowth;
};
enum TraceKind {
LoopTrace,
BranchTrace,
MergeTrace
};
@@ -231,23 +230,10 @@ namespace nanojit
void* vmprivate;
private:
NIns* _code; // ptr to start of code
GuardRecord* _links; // code which is linked (or pending to be) to this fragment
int32_t _hits;
Page* _pages; // native code pages
};
-
-#ifdef NJ_VERBOSE
- inline int nbr(LInsp x)
- {
- Page *p = x->page();
- return (p->seq * NJ_PAGE_SIZE + (intptr_t(x)-intptr_t(p))) / sizeof(LIns);
- }
-#else
- inline int nbr(LInsp x)
- {
- return (int)(intptr_t(x) & intptr_t(NJ_PAGE_SIZE-1));
- }
-#endif
}
#endif // __nanojit_Fragmento__
--- a/js/src/nanojit/LIR.cpp
+++ b/js/src/nanojit/LIR.cpp
@@ -33,57 +33,62 @@
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
#include "nanojit.h"
#include <stdio.h>
+#include <ctype.h>
+
+#ifdef PERFM
+#include "../vprof/vprof.h"
+#endif /* PERFM */
namespace nanojit
{
using namespace avmplus;
#ifdef FEATURE_NANOJIT
const uint8_t operandCount[] = {
- /* 0 */ 2, 2, /*trace*/0, /*nearskip*/0, /*skip*/0, /*neartramp*/0, /*tramp*/0, 2, 2, 2,
- /* 10 */ /*param*/0, 2, 2, 2, 2, 2, 2, 2, /*call*/0, /*loop*/0,
- /* 20 */ /*x*/0, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ /* 0 */ /*trace*/0, /*nearskip*/0, /*skip*/0, /*neartramp*/0, /*tramp*/0, 2, 2, 2, 2, /*addp*/2,
+ /* 10 */ /*param*/0, 2, 2, /*alloc*/0, 2, /*ret*/1, /*live*/1, /*calli*/0, /*call*/0, /*loop*/0,
+ /* 20 */ /*x*/0, 0, 1, 1, /*label*/0, 2, 2, 2, 2, 2,
/* 30 */ 2, 2, /*short*/0, /*int*/0, 2, 2, /*neg*/1, 2, 2, 2,
#if defined NANOJIT_64BIT
/* 40 */ /*callh*/0, 2, 2, 2, /*not*/1, 2, 2, 2, /*xt*/1, /*xf*/1,
#else
/* 40 */ /*callh*/1, 2, 2, 2, /*not*/1, 2, 2, 2, /*xt*/1, /*xf*/1,
#endif
/* 50 */ /*qlo*/1, /*qhi*/1, 2, /*ov*/1, /*cs*/1, 2, 2, 2, 2, 2,
- /* 60 */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- /* 70 */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- /* 80 */ 2, 2, /*fcall*/0, 2, 2, 2, 2, 2, 2, 2,
+ /* 60 */ 2, 2, 2, 2, 2, /*file*/1, /*line*/1, 2, 2, 2,
+ /* 70 */ 2, 2, 2, 2, 2, 2, 2, 2, 2, /*fret*/1,
+ /* 80 */ 2, /*fcalli*/0, /*fcall*/0, 2, 2, 2, 2, 2, 2, 2,
/* 90 */ 2, 2, 2, 2, 2, 2, 2, /*quad*/0, 2, 2,
/* 100 */ /*fneg*/1, 2, 2, 2, 2, 2, /*i2f*/1, /*u2f*/1, 2, 2,
/* 110 */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
/* 120 */ 2, 2, 2, 2, 2, 2, 2, 2,
};
// LIR verbose specific
#ifdef NJ_VERBOSE
const char* lirNames[] = {
- /* 0-9 */ "0","1","trace","nearskip","skip","neartramp","tramp","7","8","9",
- /* 10-19 */ "param","st","ld","13","sti","15","16","17","call","loop",
- /* 20-29 */ "x","21","22","23","24","25","feq","flt","fgt","fle",
+ /* 0-9 */ "start","nearskip","skip","neartramp","tramp","5","6","7","8","addp",
+ /* 10-19 */ "param","st","ld","alloc","sti","ret","live","calli","call","loop",
+ /* 20-29 */ "x","j","jt","jf","label","25","feq","flt","fgt","fle",
/* 30-39 */ "fge","cmov","short","int","ldc","","neg","add","sub","mul",
/* 40-49 */ "callh","and","or","xor","not","lsh","rsh","ush","xt","xf",
/* 50-59 */ "qlo","qhi","ldcb","ov","cs","eq","lt","gt","le","ge",
/* 60-63 */ "ult","ugt","ule","uge",
- /* 64-69 */ "LIR64","65","66","67","68","69",
- /* 70-79 */ "70","71","72","73","74","stq","ldq","77","stqi","79",
- /* 80-89 */ "80","81","fcall","83","84","85","86","87","qiand","qiadd",
- /* 90-99 */ "90","91","92","93","qcmov","95","96","quad","98","99",
+ /* 64-69 */ "LIR64","file","line","67","68","69",
+ /* 70-79 */ "70","71","72","73","74","stq","ldq","77","stqi","fret",
+ /* 80-89 */ "80","fcalli","fcall","83","84","85","86","87","88","89",
+ /* 90-99 */ "90","91","92","93","94","95","96","quad","ldqc","99",
/* 100-109 */ "fneg","fadd","fsub","fmul","fdiv","qjoin","i2f","u2f","qior","qilsh",
/* 110-119 */ "110","111","112","113","114","115","116","117","118","119",
/* 120-127 */ "120","121","122","123","124","125","126","127"
};
#endif /* NANOJIT_VEBROSE */
// implementation
@@ -93,26 +98,22 @@ namespace nanojit
#undef counter_value
#define counter_value(x) x
#endif /* NJ_PROFILE */
//static int32_t buffer_count = 0;
// LCompressedBuffer
LirBuffer::LirBuffer(Fragmento* frago, const CallInfo* functions)
- : _frago(frago), _functions(functions)
+ : _frago(frago), _functions(functions), abi(ABI_FASTCALL), _start(0)
{
- _start = 0;
clear();
_start = pageAlloc();
if (_start)
- {
- verbose_only(_start->seq = 0;)
_unused = &_start->lir[0];
- }
//buffer_count++;
//fprintf(stderr, "LirBuffer %x start %x\n", (int)this, (int)_start);
}
LirBuffer::~LirBuffer()
{
//buffer_count--;
//fprintf(stderr, "~LirBuffer %x start %x\n", (int)this, (int)_start);
@@ -153,25 +154,26 @@ namespace nanojit
count++;
}
NanoAssert(count == _stats.pages);
NanoAssert(_noMem || _unused->page()->next == 0);
NanoAssert(_noMem || samepage(last,_unused));
}
#endif
-#ifdef NJ_VERBOSE
- int LirBuffer::insCount() {
+ int32_t LirBuffer::insCount()
+ {
+ // doesn't include embedded constants nor LIR_skip payload
return _stats.lir;
}
- int LirBuffer::byteCount() {
- return (_stats.pages-1) * (sizeof(Page)-sizeof(PageHeader)) +
- (_unused - &_unused->page()->lir[0]) * sizeof(LIns);
+ int32_t LirBuffer::byteCount()
+ {
+ return ((_stats.pages-1) * sizeof(Page)) +
+ ((int32_t)_unused - (int32_t)pageTop(_unused));
}
-#endif
Page* LirBuffer::pageAlloc()
{
Page* page = _frago->pageAlloc();
if (page)
{
page->next = 0; // end of list marker for new page
_stats.pages++;
@@ -194,17 +196,16 @@ namespace nanojit
LInsp last = _unused;
// we need to pull in a new page and stamp the old one with a link to it
Page *lastPage = last->page();
Page *page = pageAlloc();
if (page)
{
lastPage->next = page; // forward link to next page
_unused = &page->lir[0];
- verbose_only(page->seq = lastPage->seq+1;)
//fprintf(stderr, "Fragmento::ensureRoom stamping %x with %x; start %x unused %x\n", (int)pageBottom(last), (int)page, (int)_start, (int)_unused);
debug_only( validate(); )
return true;
}
else {
// mem failure, rewind pointer to top of page so that subsequent instruction works
verbose_only(if (_frago->assm()->_verbose) _frago->assm()->outputf("page alloc failed");)
_unused = &lastPage->lir[0];
@@ -213,18 +214,18 @@ namespace nanojit
}
bool LirBufWriter::ensureRoom(uint32_t count)
{
LInsp last = _buf->next();
if (!samepage(last,last+2*count)
&& _buf->addPage())
{
- // link LIR stream back to prior instruction (careful insFar relies on _unused...)
- insFar(LIR_skip, last-1);
+ // link LIR stream back to prior instruction (careful insLink relies on _unused...)
+ insLink(LIR_skip, last-1);
}
return !_buf->outOmem();
}
LInsp LirBuffer::commit(uint32_t count)
{
debug_only(validate();)
NanoAssertMsg( samepage(_unused, _unused+count), "You need to call ensureRoom first!" );
@@ -236,35 +237,36 @@ namespace nanojit
int delta = this-r-1;
NanoAssert(isU8(delta));
return delta;
}
LIns* LIns::deref(int32_t off) const
{
LInsp i = (LInsp) this-1 - off;
- while (i->isTramp())
+ while (i && i->isTramp()) {
i = i->ref();
+ }
return i;
}
LInsp LirBufWriter::ensureReferenceable(LInsp i, int32_t addedDistance)
{
- NanoAssert(!i->isTramp());
+ NanoAssert(i != 0 && !i->isTramp());
LInsp next = _buf->next();
LInsp from = next + 2*addedDistance;
if (canReference(from,i))
return i;
if (i == _buf->sp && spref && canReference(from, spref))
return spref;
if (i == _buf->rp && rpref && canReference(from, rpref))
return rpref;
// need a trampoline to get to i
- LInsp tramp = insFar(LIR_tramp, i);
+ LInsp tramp = insLink(LIR_tramp, i);
NanoAssert( tramp->ref() == i );
if (i == _buf->sp)
spref = tramp;
else if (i == _buf->rp)
rpref = tramp;
return tramp;
}
@@ -306,50 +308,54 @@ namespace nanojit
_buf->commit(1);
_buf->_stats.lir++;
return l;
}
LInsp LirBufWriter::ins0(LOpcode op)
{
ensureRoom(1);
- LInsp l = _buf->next();
+ LirBuffer *b = this->_buf;
+ LInsp l = b->next();
l->initOpcode(op);
- _buf->commit(1);
- _buf->_stats.lir++;
+ b->commit(1);
+ b->_stats.lir++;
+ if (op == LIR_start) {
+ // create params for saved regs -- processor specific
+ for (int i=0; i < NumSavedRegs; i++) {
+ insParam(i, 1);
+ }
+ }
return l;
}
LInsp LirBufWriter::ins1(LOpcode op, LInsp o1)
{
ensureRoom(2);
LInsp r1 = ensureReferenceable(o1,1);
LInsp l = _buf->next();
l->initOpcode(op);
- if (r1)
- l->setOprnd1(r1);
+ l->setOprnd1(r1);
_buf->commit(1);
_buf->_stats.lir++;
return l;
}
LInsp LirBufWriter::ins2(LOpcode op, LInsp o1, LInsp o2)
{
ensureRoom(3);
LInsp r1 = ensureReferenceable(o1,2);
- LInsp r2 = ensureReferenceable(o2,1);
+ LInsp r2 = o2==o1 ? r1 : ensureReferenceable(o2,1);
LInsp l = _buf->next();
l->initOpcode(op);
- if (r1)
- l->setOprnd1(r1);
- if (r2)
- l->setOprnd2(r2);
+ l->setOprnd1(r1);
+ l->setOprnd2(r2);
_buf->commit(1);
_buf->_stats.lir++;
return l;
}
LInsp LirBufWriter::insLoad(LOpcode op, LInsp base, LInsp d)
{
@@ -358,55 +364,89 @@ namespace nanojit
LInsp LirBufWriter::insGuard(LOpcode op, LInsp c, SideExit *x)
{
LInsp data = skip(SideExitSize(x));
*((SideExit*)data->payload()) = *x;
return ins2(op, c, data);
}
- LInsp LirBufWriter::insParam(int32_t arg)
+ LInsp LirBufWriter::insBranch(LOpcode op, LInsp condition, LInsp toLabel)
+ {
+ if (!toLabel)
+ toLabel = insFar(LIR_tramp,0); //empty tramp
+ if (!condition) {
+ // unconditional, just point to something
+ condition = toLabel;
+ }
+ return ins2(op,condition,toLabel);
+ }
+
+ LInsp LirBufWriter::insAlloc(int32_t size)
{
+ size = (size+3)>>2; // # of required 32bit words
+ NanoAssert(isU16(size));
ensureRoom(1);
LInsp l = _buf->next();
- l->initOpcode(LIR_param);
- l->c.imm8a = Assembler::argRegs[arg];
-
+ l->initOpcode(LIR_alloc);
+ l->i.imm16 = uint16_t(size);
_buf->commit(1);
_buf->_stats.lir++;
return l;
}
+
+ LInsp LirBufWriter::insParam(int32_t arg, int32_t kind)
+ {
+ ensureRoom(1);
+ LirBuffer *b = this->_buf;
+ LInsp l = b->next();
+ l->initOpcode(LIR_param);
+ NanoAssert(isU8(arg) && isU8(kind));
+ l->c.imm8a = arg;
+ l->c.imm8b = kind;
+ if (kind) {
+ NanoAssert(arg < NumSavedRegs);
+ b->savedParams[arg] = l;
+ }
+ b->commit(1);
+ b->_stats.lir++;
+ return l;
+ }
LInsp LirBufWriter::insFar(LOpcode op, LInsp target)
{
- NanoAssert(op == LIR_skip || op == LIR_tramp);
+ ensureRoom(2);
LInsp l = _buf->next();
- int d = target-l;
- if (isS24(d)) {
- ensureRoom(1);
+
+ // write the pointer and operation
+ l = _buf->next()+1;
+ *((LInsp*)(l-1)) = target;
+ l->initOpcode(op);
+ _buf->commit(2);
+ _buf->_stats.lir++;
+ return l;
+ }
+
+ LInsp LirBufWriter::insLink(LOpcode op, LInsp target)
+ {
+ NanoAssert(op == LIR_skip || op == LIR_tramp);
+ ensureRoom(2); // must be before _buf->next()
+ LInsp l = _buf->next();
+ if (can24bReach(l,target))
+ {
l->initOpcode(LOpcode(op-1)); // nearskip or neartramp
- l->t.imm24 = d;
+ l->t.imm24 = target-l;
_buf->commit(1);
- return l;
+ _buf->_stats.lir++;
}
- else {
- #if defined NANOJIT_64BIT
- const unsigned int extra = 1;
- #else
- const unsigned int extra = 0;
- #endif
-
- ensureRoom(2 + extra);
- // write the pointer and instruction
- l = _buf->next()+1+extra;
- *((LInsp*)(l-1-extra)) = target;
- l->initOpcode(op);
- _buf->commit(2+extra);
- return l;
- }
+ else
+ {
+ l = insFar(op,target);
+ }
+ return l;
}
LInsp LirBufWriter::insImm(int32_t imm)
{
if (isS16(imm)) {
ensureRoom(1);
LInsp l = _buf->next();
l->initOpcode(LIR_short);
@@ -434,17 +474,17 @@ namespace nanojit
}
LInsp LirBufWriter::skip(size_t size)
{
const uint32_t n = (size+sizeof(LIns)-1)/sizeof(LIns);
ensureRoom(n+2);
LInsp last = _buf->next()-1;
_buf->commit(n);
- return insFar(LIR_skip, last);
+ return insLink(LIR_skip, last);
}
LInsp LirReader::read()
{
LInsp cur = _i;
if (!cur)
return 0;
LIns* i = cur;
@@ -457,16 +497,18 @@ namespace nanojit
i--;
break;
#if defined NANOJIT_64BIT
case LIR_callh:
#endif
case LIR_call:
case LIR_fcall:
+ case LIR_calli:
+ case LIR_fcalli:
i -= i->callInsWords();
break;
case LIR_skip:
case LIR_nearskip:
NanoAssert(i->ref() != i);
i = i->ref();
break;
@@ -486,17 +528,17 @@ namespace nanojit
i -= 2;
break;
case LIR_quad:
NanoAssert(samepage(i, i-3));
i -= 3;
break;
- case LIR_trace:
+ case LIR_start:
_i = 0; // start of trace
return cur;
}
iop = i->opcode();
}
while (is_trace_skip_tramp(iop)||iop==LIR_2);
_i = i;
return cur;
@@ -504,56 +546,46 @@ namespace nanojit
bool FASTCALL isCmp(LOpcode c) {
return c >= LIR_eq && c <= LIR_uge || c >= LIR_feq && c <= LIR_fge;
}
bool FASTCALL isCond(LOpcode c) {
return (c == LIR_ov) || (c == LIR_cs) || isCmp(c);
}
+
+ bool FASTCALL isFloat(LOpcode c) {
+ switch (c) {
+ default:
+ return false;
+ case LIR_fadd:
+ case LIR_fsub:
+ case LIR_fmul:
+ case LIR_fdiv:
+ case LIR_fneg:
+ case LIR_fcall:
+ case LIR_fcalli:
+ case LIR_i2f:
+ case LIR_u2f:
+ return true;
+ }
+ }
bool LIns::isCmp() const {
return nanojit::isCmp(u.code);
}
bool LIns::isCond() const {
return nanojit::isCond(u.code);
}
bool LIns::isQuad() const {
return ((u.code & LIR64) != 0 || u.code == LIR_callh);
}
- bool LIns::isCall() const
- {
- return ((u.code&~LIR64) == LIR_call
- || (u.code == LIR_callh));
- }
-
- bool LIns::isGuard() const
- {
- return u.code==LIR_x || u.code==LIR_xf || u.code==LIR_xt || u.code==LIR_loop;
- }
-
- bool LIns::isStore() const
- {
- int c = u.code & ~LIR64;
- return c == LIR_st || c == LIR_sti;
- }
-
- bool LIns::isLoad() const
- {
- return u.code == LIR_ldq || u.code == LIR_ld || u.code == LIR_ldc;
- }
-
- bool LIns::isconst() const
- {
- return (opcode()&~1) == LIR_short;
- }
-
bool LIns::isconstval(int32_t val) const
{
return isconst() && constval()==val;
}
bool LIns::isconstq() const
{
return isop(LIR_quad);
@@ -579,16 +611,22 @@ namespace nanojit
}
void LIns::setimm16(int32_t x)
{
NanoAssert(isS16(x));
i.imm16 = int16_t(x);
}
+ void LIns::setimm24(int32_t x)
+ {
+ NanoAssert(isS24(x));
+ t.imm24 = x;
+ }
+
void LIns::setresv(uint32_t resv)
{
NanoAssert(isU8(resv));
g.resv = resv;
}
void LIns::initOpcode(LOpcode op)
{
@@ -612,16 +650,38 @@ namespace nanojit
u.oprnd_3 = reference(r);
}
void LIns::setDisp(int8_t d)
{
sti.disp = d;
}
+ LIns **LIns::targetAddr() {
+ NanoAssert(isBranch());
+ LInsp i = (LInsp) this-1 - u.oprnd_2;
+ NanoAssert(i->isTramp());
+ LInsp ref;
+ while ((ref=i->ref()) != 0 && ref->isTramp())
+ i = ref;
+ NanoAssert(i->isop(LIR_tramp));
+ return (LIns**)(i-1);
+ }
+
+ void LIns::target(LInsp label) {
+ NanoAssert(label && label->isop(LIR_label));
+ *(targetAddr()) = label;
+ }
+
+ LInsp LIns::getTarget()
+ {
+ NanoAssert(isBranch());
+ return oprnd2();
+ }
+
LInsp LIns::oprnd1() const
{
return deref(u.oprnd_1);
}
LInsp LIns::oprnd2() const
{
return deref(u.oprnd_2);
@@ -668,19 +728,36 @@ namespace nanojit
return i->oprnd1();
}
else if (v == LIR_qhi) {
if (i->isconstq())
return insImm(int32_t(i->constvalq()>>32));
if (i->isop(LIR_qjoin))
return i->oprnd2();
}
+ else if (i->isconst()) {
+ int32_t c = i->constval();
+ if (v == LIR_neg)
+ return insImm(-c);
+ if (v == LIR_not)
+ return insImm(~c);
+ }
else if (v == i->opcode() && (v == LIR_not || v == LIR_neg || v == LIR_fneg)) {
+ // not(not(x)) = x; neg(neg(x)) = x; fneg(fneg(x)) = x;
return i->oprnd1();
}
+ /* [ed 8.27.08] this causes a big slowdown in gameoflife.as. why?
+ else if (i->isconst()) {
+ if (v == LIR_i2f) {
+ return insImmf(i->constval());
+ }
+ else if (v == LIR_u2f) {
+ return insImmf((uint32_t)i->constval());
+ }
+ }*/
// todo
// -(a-b) = b-a
return out->ins1(v, i);
}
LIns* ExprFilter::ins2(LOpcode v, LIns* oprnd1, LIns* oprnd2)
@@ -749,31 +826,31 @@ namespace nanojit
if (v == LIR_and)
return insImm(uint32_t(c1) & int32_t(c2));
if (v == LIR_xor)
return insImm(uint32_t(c1) ^ int32_t(c2));
}
else if (oprnd1->isconstq() && oprnd2->isconstq())
{
double c1 = oprnd1->constvalf();
- double c2 = oprnd1->constvalf();
+ double c2 = oprnd2->constvalf();
if (v == LIR_feq)
return insImm(c1 == c2);
if (v == LIR_flt)
return insImm(c1 < c2);
if (v == LIR_fgt)
return insImm(c1 > c2);
if (v == LIR_fle)
return insImm(c1 <= c2);
if (v == LIR_fge)
return insImm(c1 >= c2);
}
else if (oprnd1->isconst() && !oprnd2->isconst())
{
- if (v == LIR_add || v == LIR_mul ||
+ if (v == LIR_add || v == LIR_addp || v == LIR_mul ||
v == LIR_fadd || v == LIR_fmul ||
v == LIR_xor || v == LIR_or || v == LIR_and ||
v == LIR_eq) {
// move const to rhs
LIns* t = oprnd2;
oprnd2 = oprnd1;
oprnd1 = t;
}
@@ -819,17 +896,17 @@ namespace nanojit
if (a_lt == b_lt)
return insImm(a_lt);
}
}
}
if (c == 0)
{
- if (v == LIR_add || v == LIR_or || v == LIR_xor ||
+ if (v == LIR_add || v == LIR_addp || v == LIR_or || v == LIR_xor ||
v == LIR_sub || v == LIR_lsh || v == LIR_rsh || v == LIR_ush)
return oprnd1;
else if (v == LIR_and || v == LIR_mul)
return oprnd2;
else if (v == LIR_eq && oprnd1->isop(LIR_or) &&
oprnd1->oprnd2()->isconst() &&
oprnd1->oprnd2()->constval() != 0) {
// (x or c) != 0 if c != 0
@@ -880,31 +957,59 @@ namespace nanojit
v = LOpcode(v^1);
c = c->oprnd1();
}
}
}
return out->insGuard(v, c, x);
}
+ LIns* ExprFilter::insBranch(LOpcode v, LIns *c, LIns *t)
+ {
+ if (v == LIR_jt || v == LIR_jf) {
+ while (c->isop(LIR_eq) && c->oprnd1()->isCmp() && c->oprnd2()->isconstval(0)) {
+ // jt(eq(cmp,0)) => jf(cmp) or jf(eq(cmp,0)) => jt(cmp)
+ v = LOpcode(v ^ 1);
+ c = c->oprnd1();
+ }
+ }
+ return out->insBranch(v, c, t);
+ }
+
LIns* LirWriter::insLoadi(LIns *base, int disp)
{
return insLoad(LIR_ld,base,disp);
}
LIns* LirWriter::insLoad(LOpcode op, LIns *base, int disp)
{
return insLoad(op, base, insImm(disp));
}
+ LIns* LirWriter::store(LInsp value, LInsp base, int32_t d)
+ {
+ return isS8(d) ? insStorei(value, base, d)
+ : insStore(value, base, insImm(d));
+ }
+
LIns* LirWriter::ins_eq0(LIns* oprnd1)
{
return ins2i(LIR_eq, oprnd1, 0);
}
+ LIns* LirWriter::insImmf(double f)
+ {
+ union {
+ double f;
+ uint64_t q;
+ } u;
+ u.f = f;
+ return insImmq(u.q);
+ }
+
LIns* LirWriter::qjoin(LInsp lo, LInsp hi)
{
return ins2(LIR_qjoin, lo, hi);
}
LIns* LirWriter::insImmPtr(const void *ptr)
{
return sizeof(ptr) == 8 ? insImmq((uintptr_t)ptr) : insImm((intptr_t)ptr);
@@ -932,44 +1037,46 @@ namespace nanojit
return ins2(LIR_or,
ins2(LIR_and, iftrue, ncond),
ins2(LIR_and, iffalse, ins1(LIR_not, ncond)));
}
LIns* LirBufWriter::insCall(const CallInfo *ci, LInsp args[])
{
static const LOpcode k_callmap[] = { LIR_call, LIR_fcall, LIR_call, LIR_callh };
+ static const LOpcode k_callimap[] = { LIR_calli, LIR_fcalli, LIR_calli, LIR_skip };
uint32_t argt = ci->_argtypes;
- LOpcode op = k_callmap[argt & 3];
+ LOpcode op = (ci->isIndirect() ? k_callimap : k_callmap)[argt & 3];
+ NanoAssert(op != LIR_skip); // LIR_skip here is just an error condition
- ArgSize sizes[10];
+ ArgSize sizes[2*MAXARGS];
uint32_t argc = ci->get_sizes(sizes);
#ifdef NJ_SOFTFLOAT
if (op == LIR_fcall)
op = LIR_callh;
- LInsp args2[5*2]; // arm could require 2 args per double
+ LInsp args2[MAXARGS*2]; // arm could require 2 args per double
int32_t j = 0;
- for (int32_t i = 0; i < 5; i++) {
+ for (int32_t i = 0; i < MAXARGS; i++) {
argt >>= 2;
ArgSize a = ArgSize(argt&3);
if (a == ARGSIZE_F) {
LInsp q = args[i];
args2[j++] = ins1(LIR_qhi, q);
args2[j++] = ins1(LIR_qlo, q);
} else if (a != ARGSIZE_NONE) {
args2[j++] = args[i];
}
}
args = args2;
NanoAssert(j == argc);
#endif
- NanoAssert(argc < 8);
+ NanoAssert(argc <= MAXARGS);
uint32_t words = argwords(argc);
ensureRoom(words+LIns::callInfoWords+1+argc); // ins size + possible tramps
for (uint32_t i=0; i < argc; i++)
args[i] = ensureReferenceable(args[i], argc-i);
uint8_t* offs = (uint8_t*)_buf->next();
LIns *l = _buf->next() + words;
*(const CallInfo **)l = ci;
l += LIns::callInfoWords;
@@ -984,18 +1091,18 @@ namespace nanojit
l->c.imm8b = argc;
_buf->commit(words+LIns::callInfoWords+1);
_buf->_stats.lir++;
return l;
}
using namespace avmplus;
- StackFilter::StackFilter(LirFilter *in, GC *gc, Fragment *frag, LInsp sp)
- : LirFilter(in), gc(gc), frag(frag), sp(sp), top(0)
+ StackFilter::StackFilter(LirFilter *in, GC *gc, LirBuffer *lirbuf, LInsp sp)
+ : LirFilter(in), gc(gc), lirbuf(lirbuf), sp(sp), top(0)
{}
LInsp StackFilter::read()
{
for (;;)
{
LInsp i = in->read();
if (!i)
@@ -1025,16 +1132,21 @@ namespace nanojit
if (stk.get(d))
continue;
else
stk.set(gc, d);
}
}
}
}
+ /*
+ * NB: If there is a backward branch other than the loop-restart branch, this is
+ * going to be wrong. Unfortunately there doesn't seem to be an easy way to detect
+ * such branches. Just do not create any.
+ */
else if (i->isGuard())
{
stk.reset();
top = getTop(i) >> 2;
}
return i;
}
}
@@ -1081,23 +1193,29 @@ namespace nanojit
hash ^= hash << 4;
hash += hash >> 17;
hash ^= hash << 25;
hash += hash >> 6;
return hash;
}
LInsHashSet::LInsHashSet(GC* gc) :
- m_list(gc, kInitialCap), m_used(0), m_gc(gc)
+ m_used(0), m_cap(kInitialCap), m_gc(gc)
{
#ifdef MEMORY_INFO
- m_list.set_meminfo_name("LInsHashSet.list");
+// m_list.set_meminfo_name("LInsHashSet.list");
#endif
- m_list.set(kInitialCap-1, 0);
+ LInsp *list = (LInsp*) gc->Alloc(sizeof(LInsp)*m_cap);
+ WB(gc, this, &m_list, list);
}
+
+ void LInsHashSet::clear() {
+ memset(m_list, 0, sizeof(LInsp)*m_cap);
+ m_used = 0;
+ }
/*static*/ uint32_t FASTCALL LInsHashSet::hashcode(LInsp i)
{
const LOpcode op = i->opcode();
switch (op)
{
case LIR_short:
return hashimm(i->imm16());
@@ -1168,68 +1286,68 @@ namespace nanojit
return false;
return true;
}
}
}
void FASTCALL LInsHashSet::grow()
{
- const uint32_t newcap = m_list.size() << 1;
- InsList newlist(m_gc, newcap);
+ const uint32_t newcap = m_cap << 1;
+ LInsp *newlist = (LInsp*) m_gc->Alloc(newcap * sizeof(LInsp));
+ LInsp *list = m_list;
#ifdef MEMORY_INFO
- newlist.set_meminfo_name("LInsHashSet.list");
+// newlist.set_meminfo_name("LInsHashSet.list");
#endif
- newlist.set(newcap-1, 0);
- for (uint32_t i=0, n=m_list.size(); i < n; i++)
- {
- LInsp name = m_list.get(i);
+ for (uint32_t i=0, n=m_cap; i < n; i++) {
+ LInsp name = list[i];
if (!name) continue;
uint32_t j = find(name, hashcode(name), newlist, newcap);
- newlist.set(j, name);
+ newlist[j] = name;
}
- m_list.become(newlist);
+ m_cap = newcap;
+ WB(m_gc, this, &m_list, newlist);
}
- uint32_t FASTCALL LInsHashSet::find(LInsp name, uint32_t hash, const InsList& list, uint32_t cap)
+ uint32_t FASTCALL LInsHashSet::find(LInsp name, uint32_t hash, const LInsp *list, uint32_t cap)
{
const uint32_t bitmask = (cap - 1) & ~0x1;
uint32_t n = 7 << 1;
hash &= bitmask;
LInsp k;
- while ((k = list.get(hash)) != NULL &&
+ while ((k = list[hash]) != NULL &&
(!LIns::sameop(k,name) || !equals(k, name)))
{
hash = (hash + (n += 2)) & bitmask; // quadratic probe
}
return hash;
}
LInsp LInsHashSet::add(LInsp name, uint32_t k)
{
// this is relatively short-lived so let's try a more aggressive load factor
// in the interest of improving performance
- if (((m_used+1)<<1) >= m_list.size()) // 0.50
+ if (((m_used+1)<<1) >= m_cap) // 0.50
{
grow();
- k = find(name, hashcode(name), m_list, m_list.size());
+ k = find(name, hashcode(name), m_list, m_cap);
}
- NanoAssert(!m_list.get(k));
+ NanoAssert(!m_list[k]);
m_used++;
- m_list.set(k, name);
- return name;
+ return m_list[k] = name;
}
void LInsHashSet::replace(LInsp i)
{
- uint32_t k = find(i, hashcode(i), m_list, m_list.size());
- if (m_list.get(k)) {
+ LInsp *list = m_list;
+ uint32_t k = find(i, hashcode(i), list, m_cap);
+ if (list[k]) {
// already there, so replace it
- m_list.set(k, i);
+ list[k] = i;
} else {
add(i, k);
}
}
uint32_t LInsHashSet::hashimm(int32_t a) {
return _hashfinish(_hash32(0,a));
}
@@ -1254,74 +1372,74 @@ namespace nanojit
uint32_t hash = _hashptr(0, ci);
for (int32_t j=argc-1; j >= 0; j--)
hash = _hashptr(hash,args[j]);
return _hashfinish(hash);
}
LInsp LInsHashSet::find32(int32_t a, uint32_t &i)
{
- uint32_t cap = m_list.size();
- const InsList& list = m_list;
+ uint32_t cap = m_cap;
+ const LInsp *list = m_list;
const uint32_t bitmask = (cap - 1) & ~0x1;
uint32_t hash = hashimm(a) & bitmask;
uint32_t n = 7 << 1;
LInsp k;
- while ((k = list.get(hash)) != NULL &&
+ while ((k = list[hash]) != NULL &&
(!k->isconst() || k->constval() != a))
{
hash = (hash + (n += 2)) & bitmask; // quadratic probe
}
i = hash;
return k;
}
LInsp LInsHashSet::find64(uint64_t a, uint32_t &i)
{
- uint32_t cap = m_list.size();
- const InsList& list = m_list;
+ uint32_t cap = m_cap;
+ const LInsp *list = m_list;
const uint32_t bitmask = (cap - 1) & ~0x1;
uint32_t hash = hashimmq(a) & bitmask;
uint32_t n = 7 << 1;
LInsp k;
- while ((k = list.get(hash)) != NULL &&
+ while ((k = list[hash]) != NULL &&
(!k->isconstq() || k->constvalq() != a))
{
hash = (hash + (n += 2)) & bitmask; // quadratic probe
}
i = hash;
return k;
}
LInsp LInsHashSet::find1(LOpcode op, LInsp a, uint32_t &i)
{
- uint32_t cap = m_list.size();
- const InsList& list = m_list;
+ uint32_t cap = m_cap;
+ const LInsp *list = m_list;
const uint32_t bitmask = (cap - 1) & ~0x1;
uint32_t hash = hash1(op,a) & bitmask;
uint32_t n = 7 << 1;
LInsp k;
- while ((k = list.get(hash)) != NULL &&
+ while ((k = list[hash]) != NULL &&
(k->opcode() != op || k->oprnd1() != a))
{
hash = (hash + (n += 2)) & bitmask; // quadratic probe
}
i = hash;
return k;
}
LInsp LInsHashSet::find2(LOpcode op, LInsp a, LInsp b, uint32_t &i)
{
- uint32_t cap = m_list.size();
- const InsList& list = m_list;
+ uint32_t cap = m_cap;
+ const LInsp *list = m_list;
const uint32_t bitmask = (cap - 1) & ~0x1;
uint32_t hash = hash2(op,a,b) & bitmask;
uint32_t n = 7 << 1;
LInsp k;
- while ((k = list.get(hash)) != NULL &&
+ while ((k = list[hash]) != NULL &&
(k->opcode() != op || k->oprnd1() != a || k->oprnd2() != b))
{
hash = (hash + (n += 2)) & bitmask; // quadratic probe
}
i = hash;
return k;
}
@@ -1330,23 +1448,23 @@ namespace nanojit
for (uint32_t j=0; j < argc; j++)
if (i->arg(j) != args[j])
return false;
return true;
}
LInsp LInsHashSet::findcall(const CallInfo *ci, uint32_t argc, LInsp args[], uint32_t &i)
{
- uint32_t cap = m_list.size();
- const InsList& list = m_list;
+ uint32_t cap = m_cap;
+ const LInsp *list = m_list;
const uint32_t bitmask = (cap - 1) & ~0x1;
uint32_t hash = hashcall(ci, argc, args) & bitmask;
uint32_t n = 7 << 1;
LInsp k;
- while ((k = list.get(hash)) != NULL &&
+ while ((k = list[hash]) != NULL &&
(!k->isCall() || k->callInfo() != ci || !argsmatch(k, argc, args)))
{
hash = (hash + (n += 2)) & bitmask; // quadratic probe
}
i = hash;
return k;
}
@@ -1399,36 +1517,34 @@ namespace nanojit
live.remove(i);
retired.add(e);
}
bool contains(LInsp i) {
return live.containsKey(i);
}
};
- void live(GC *gc, Assembler *assm, Fragment *frag)
+ void live(GC *gc, LirBuffer *lirbuf)
{
// traverse backwards to find live exprs and a few other stats.
- LInsp sp = frag->lirbuf->sp;
- LInsp rp = frag->lirbuf->rp;
LiveTable live(gc);
uint32_t exits = 0;
- LirBuffer *lirbuf = frag->lirbuf;
LirReader br(lirbuf);
- StackFilter sf(&br, gc, frag, sp);
- StackFilter r(&sf, gc, frag, rp);
- int total = 0;
- live.add(frag->lirbuf->state, r.pos());
+ StackFilter sf(&br, gc, lirbuf, lirbuf->sp);
+ StackFilter r(&sf, gc, lirbuf, lirbuf->rp);
+ int total = 0;
+ if (lirbuf->state)
+ live.add(lirbuf->state, r.pos());
for (LInsp i = r.read(); i != 0; i = r.read())
{
total++;
// first handle side-effect instructions
- if (i->isStore() || i->isGuard() || i->isCall() && !i->callInfo()->_cse)
+ if (!i->isCse(lirbuf->_functions))
{
live.add(i,0);
if (i->isGuard())
exits++;
}
// now propagate liveness
if (live.contains(i))
@@ -1453,36 +1569,43 @@ namespace nanojit
}
else if (i->isCall()) {
for (int j=0, c=i->argc(); j < c; j++)
live.add(i->arg(j),i);
}
}
}
- assm->outputf("live instruction count %ld, total %ld, max pressure %d",
+ printf("live instruction count %d, total %u, max pressure %d\n",
live.retired.size(), total, live.maxlive);
- assm->outputf("side exits %ld", exits);
+ printf("side exits %u\n", exits);
// print live exprs, going forwards
- LirNameMap *names = frag->lirbuf->names;
+ LirNameMap *names = lirbuf->names;
+ bool newblock = true;
for (int j=live.retired.size()-1; j >= 0; j--)
{
RetiredEntry *e = live.retired[j];
- char livebuf[1000], *s=livebuf;
+ char livebuf[4000], *s=livebuf;
*s = 0;
+ if (!newblock && e->i->isop(LIR_label)) {
+ printf("\n");
+ }
+ newblock = false;
for (int k=0,n=e->live.size(); k < n; k++) {
strcpy(s, names->formatRef(e->live[k]));
s += strlen(s);
*s++ = ' '; *s = 0;
NanoAssert(s < livebuf+sizeof(livebuf));
}
printf("%-60s %s\n", livebuf, names->formatIns(e->i));
- if (e->i->isGuard())
+ if (e->i->isGuard() || e->i->isBranch() || isRet(e->i->opcode())) {
printf("\n");
+ newblock = true;
+ }
}
}
LabelMap::Entry::~Entry()
{
}
LirNameMap::Entry::~Entry()
@@ -1511,17 +1634,22 @@ namespace nanojit
Stringp new_name = labels->core->newString(name);
if (!addName(i, new_name)) {
labels->core->freeString(new_name);
}
}
void LirNameMap::copyName(LInsp i, const char *s, int suffix) {
char s2[200];
- sprintf(s2,"%s%d", s,suffix);
+ if (isdigit(s[strlen(s)-1])) {
+ // if s ends with a digit, add '_' to clarify the suffix
+ sprintf(s2,"%s_%d", s, suffix);
+ } else {
+ sprintf(s2,"%s%d", s, suffix);
+ }
addName(i, labels->core->newString(s2));
}
void LirNameMap::formatImm(int32_t c, char *buf) {
if (c >= 10000 || c <= -10000)
sprintf(buf,"#%s",labels->format((void*)c));
else
sprintf(buf,"%d", c);
@@ -1570,81 +1698,130 @@ namespace nanojit
}
return labels->dup(buffer);
}
const char* LirNameMap::formatIns(LIns* i)
{
char sbuf[200];
char *s = sbuf;
- if (!i->isStore() && !i->isGuard() && !i->isop(LIR_trace)) {
- sprintf(s, "%s = ", formatRef(i));
- s += strlen(s);
- }
-
LOpcode op = i->opcode();
switch(op)
{
case LIR_short:
case LIR_int:
{
sprintf(s, "%s", formatRef(i));
break;
}
+ case LIR_alloc: {
+ sprintf(s, "%s = %s %d", formatRef(i), lirNames[op], i->size());
+ break;
+ }
+
case LIR_quad:
{
int32_t *p = (int32_t*) (i-2);
- sprintf(s, "#%X:%X", p[1], p[0]);
+ sprintf(s, "#%X:%X /* %g */", p[1], p[0], i->constvalf());
break;
}
case LIR_loop:
- case LIR_trace:
+ case LIR_start:
sprintf(s, "%s", lirNames[op]);
break;
#if defined NANOJIT_64BIT
case LIR_callh:
#endif
case LIR_fcall:
case LIR_call: {
- sprintf(s, "%s ( ", i->callInfo()->_name);
+ sprintf(s, "%s = %s ( ", formatRef(i), i->callInfo()->_name);
for (int32_t j=i->argc()-1; j >= 0; j--) {
s += strlen(s);
sprintf(s, "%s ",formatRef(i->arg(j)));
}
s += strlen(s);
sprintf(s, ")");
break;
}
+ case LIR_fcalli:
+ case LIR_calli: {
+ int32_t argc = i->argc();
+ sprintf(s, "%s = [%s] ( ", formatRef(i), formatRef(i->arg(argc-1)));
+ s += strlen(s);
+ argc--;
+ for (int32_t j=argc-1; j >= 0; j--) {
+ s += strlen(s);
+ sprintf(s, "%s ",formatRef(i->arg(j)));
+ }
+ s += strlen(s);
+ sprintf(s, ")");
+ break;
+ }
- case LIR_param:
- sprintf(s, "%s %s", lirNames[op], gpn(i->imm8()));
+ case LIR_param: {
+ uint32_t arg = i->imm8();
+ if (!i->imm8b()) {
+ if (arg < sizeof(Assembler::argRegs)/sizeof(Assembler::argRegs[0])) {
+ sprintf(s, "%s = %s %d %s", formatRef(i), lirNames[op],
+ arg, gpn(Assembler::argRegs[arg]));
+ } else {
+ sprintf(s, "%s = %s %d", formatRef(i), lirNames[op], arg);
+ }
+ } else {
+ sprintf(s, "%s = %s %d %s", formatRef(i), lirNames[op],
+ arg, gpn(Assembler::savedRegs[arg]));
+ }
+ break;
+ }
+
+ case LIR_label:
+ sprintf(s, "%s:", formatRef(i));
break;
+ case LIR_jt:
+ case LIR_jf:
+ sprintf(s, "%s %s -> %s", lirNames[op], formatRef(i->oprnd1()),
+ i->oprnd2() ? formatRef(i->oprnd2()) : "unpatched");
+ break;
+
+ case LIR_j:
+ sprintf(s, "%s -> %s", lirNames[op],
+ i->oprnd2() ? formatRef(i->oprnd2()) : "unpatched");
+ break;
+
+ case LIR_live:
+ case LIR_ret:
+ case LIR_fret:
+ sprintf(s, "%s %s", lirNames[op], formatRef(i->oprnd1()));
+ break;
+
+ case LIR_callh:
case LIR_neg:
case LIR_fneg:
case LIR_i2f:
case LIR_u2f:
case LIR_qlo:
case LIR_qhi:
case LIR_ov:
case LIR_cs:
case LIR_not:
- sprintf(s, "%s %s", lirNames[op], formatRef(i->oprnd1()));
+ sprintf(s, "%s = %s %s", formatRef(i), lirNames[op], formatRef(i->oprnd1()));
break;
case LIR_x:
case LIR_xt:
case LIR_xf:
formatGuard(i, s);
break;
case LIR_add:
+ case LIR_addp:
case LIR_sub:
case LIR_mul:
case LIR_fadd:
case LIR_fsub:
case LIR_fmul:
case LIR_fdiv:
case LIR_and:
case LIR_or:
@@ -1665,49 +1842,50 @@ namespace nanojit
case LIR_flt:
case LIR_fle:
case LIR_fgt:
case LIR_fge:
case LIR_qiadd:
case LIR_qiand:
case LIR_qilsh:
case LIR_qior:
- sprintf(s, "%s %s, %s", lirNames[op],
+ sprintf(s, "%s = %s %s, %s", formatRef(i), lirNames[op],
formatRef(i->oprnd1()),
formatRef(i->oprnd2()));
break;
case LIR_qjoin:
sprintf(s, "%s (%s), %s", lirNames[op],
formatIns(i->oprnd1()),
formatRef(i->oprnd2()));
break;
case LIR_qcmov:
case LIR_cmov:
- sprintf(s, "%s ? %s : %s",
+ sprintf(s, "%s = %s %s ? %s : %s", formatRef(i), lirNames[op],
formatRef(i->oprnd1()),
formatRef(i->oprnd2()->oprnd1()),
formatRef(i->oprnd2()->oprnd2()));
break;
case LIR_ld:
case LIR_ldc:
case LIR_ldq:
+ case LIR_ldqc:
case LIR_ldcb:
- sprintf(s, "%s %s[%s]", lirNames[op],
+ sprintf(s, "%s = %s %s[%s]", formatRef(i), lirNames[op],
formatRef(i->oprnd1()),
formatRef(i->oprnd2()));
break;
case LIR_st:
case LIR_sti:
case LIR_stq:
case LIR_stqi:
- sprintf(s, "%s[%d] = %s",
+ sprintf(s, "%s %s[%d] = %s", lirNames[op],
formatRef(i->oprnd2()),
i->immdisp(),
formatRef(i->oprnd1()));
break;
default:
sprintf(s, "?");
break;
@@ -1828,23 +2006,23 @@ namespace nanojit
Fragmento *frago = triggerFrag->lirbuf->_frago;
AvmCore *core = frago->core();
GC *gc = core->gc;
verbose_only( StringList asmOutput(gc); )
verbose_only( assm->_outputCache = &asmOutput; )
verbose_only(if (assm->_verbose && core->config.verbose_live)
- live(gc, assm, triggerFrag);)
+ live(gc, triggerFrag->lirbuf);)
bool treeCompile = core->config.tree_opt && (triggerFrag->kind == BranchTrace);
RegAllocMap regMap(gc);
NInsList loopJumps(gc);
#ifdef MEMORY_INFO
- loopJumps.set_meminfo_name("LIR loopjumps");
+// loopJumps.set_meminfo_name("LIR loopjumps");
#endif
assm->beginAssembly(triggerFrag, ®Map);
//fprintf(stderr, "recompile trigger %X kind %d\n", (int)triggerFrag, triggerFrag->kind);
Fragment* root = triggerFrag;
if (treeCompile)
{
// recompile the entire tree
@@ -1879,37 +2057,76 @@ namespace nanojit
}
}
// now the the main trunk
assm->assemble(root, loopJumps);
verbose_only(if (assm->_verbose)
assm->outputf("compiling trunk %s",
frago->labels->format(root));)
+ NanoAssert(!frago->core()->config.tree_opt || root == root->anchor || root->kind == MergeTrace);
assm->endAssembly(root, loopJumps);
// reverse output so that assembly is displayed low-to-high
verbose_only( assm->_outputCache = 0; )
verbose_only(for(int i=asmOutput.size()-1; i>=0; --i) { assm->outputf("%s",asmOutput.get(i)); } );
if (assm->error())
{
root->fragEntry = 0;
}
else
{
root->link(assm);
if (treeCompile) root->linkBranches(assm);
}
+ }
-#if defined(NJ_VERBOSE)
- for (size_t i = 0; i < asmOutput.size(); i++) {
- gc->Free(asmOutput.get(i));
+ LInsp LoadFilter::insLoad(LOpcode v, LInsp base, LInsp disp)
+ {
+ if (base != sp && base != rp && (v == LIR_ld || v == LIR_ldq)) {
+ uint32_t k;
+ LInsp found = exprs.find2(v, base, disp, k);
+ if (found)
+ return found;
+ return exprs.add(out->insLoad(v,base,disp), k);
}
-#endif
+ return out->insLoad(v, base, disp);
+ }
+
+ void LoadFilter::clear(LInsp p)
+ {
+ if (p != sp && p != rp)
+ exprs.clear();
+ }
+
+ LInsp LoadFilter::insStore(LInsp v, LInsp b, LInsp d)
+ {
+ clear(b);
+ return out->insStore(v, b, d);
+ }
+
+ LInsp LoadFilter::insStorei(LInsp v, LInsp b, int32_t d)
+ {
+ clear(b);
+ return out->insStorei(v, b, d);
+ }
+
+ LInsp LoadFilter::insCall(const CallInfo *call, LInsp args[])
+ {
+ if (!call->_cse)
+ exprs.clear();
+ return out->insCall(call, args);
+ }
+
+ LInsp LoadFilter::ins0(LOpcode op)
+ {
+ if (op == LIR_label)
+ exprs.clear();
+ return out->ins0(op);
}
#endif /* FEATURE_NANOJIT */
#if defined(NJ_VERBOSE)
LabelMap::LabelMap(AvmCore *core, LabelMap* parent)
: parent(parent), names(core->gc), addrs(core->config.verbose_addrs), end(buf), core(core)
{}
@@ -1952,17 +2169,17 @@ namespace nanojit
if (p == start) {
if (addrs)
sprintf(b,"%p %s",p,name);
else
strcpy(b, name);
return dup(b);
}
else if (p > start && p < end) {
- int d = (intptr_t(p)-intptr_t(start)) >> e->align;
+ int32_t d = int32_t(intptr_t(p)-intptr_t(start)) >> e->align;
if (addrs)
sprintf(b, "%p %s+%d", p, name, d);
else
sprintf(b,"%s+%d", name, d);
return dup(b);
}
else {
if (parent)
@@ -1976,17 +2193,17 @@ namespace nanojit
return parent->format(p);
sprintf(b, "%p", p);
return dup(b);
}
const char *LabelMap::dup(const char *b)
{
- int need = strlen(b)+1;
+ size_t need = strlen(b)+1;
char *s = end;
end += need;
if (end > buf+sizeof(buf)) {
s = buf;
end = s+need;
}
strcpy(s, b);
return s;
--- a/js/src/nanojit/LIR.h
+++ b/js/src/nanojit/LIR.h
@@ -58,33 +58,44 @@ namespace nanojit
#if defined(_MSC_VER) && _MSC_VER >= 1400
: unsigned
#endif
{
// flags; upper bits reserved
LIR64 = 0x40, // result is double or quad
// special operations (must be 0..N)
- LIR_trace = 2,
- LIR_nearskip = 3, // must be LIR_skip-1 and lsb=1
- LIR_skip = 4,
- LIR_neartramp = 5, // must be LIR_tramp-1 and lsb=1
- LIR_tramp = 6,
+ LIR_start = 0,
+ LIR_nearskip = 1, // must be LIR_skip-1 and lsb=1
+ LIR_skip = 2,
+ LIR_neartramp = 3, // must be LIR_tramp-1 and lsb=1
+ LIR_tramp = 4,
// non-pure operations
+ LIR_addp = 9,
LIR_param = 10,
LIR_st = 11, // 32-bit store
LIR_ld = 12, // 32-bit load
+ LIR_alloc = 13, // alloca some stack space
LIR_sti = 14,
- LIR_call = 18, // subrouting call returning a 32-bit value
+ LIR_ret = 15,
+ LIR_live = 16, // extend live range of reference
+ LIR_calli = 17, // indirect call
+ LIR_call = 18, // subroutine call returning a 32-bit value
// guards
LIR_loop = 19, // loop fragment
LIR_x = 20, // exit always
+ // branches
+ LIR_j = 21, // jump always
+ LIR_jt = 22, // jump true
+ LIR_jf = 23, // jump false
+ LIR_label = 24, // a jump target
+ LIR_ji = 25, // jump indirect
// operators
// LIR_feq though LIR_fge must only be used on float arguments. They
// return integers.
LIR_feq = 26, // floating-point equality [2 float inputs]
LIR_flt = 27, // floating-point less than: arg1 < arg2
LIR_fgt = 28, // floating-point greater than: arg1 > arg2
LIR_fle = 29, // arg1 <= arg2, both floating-point
@@ -132,64 +143,137 @@ namespace nanojit
LIR_le = 58, // 0x3A 0011 1010
LIR_ge = 59, // 0x3B 0011 1011
// and the unsigned integer versions
LIR_ult = 60, // 0x3C 0011 1100
LIR_ugt = 61, // 0x3D 0011 1101
LIR_ule = 62, // 0x3E 0011 1110
LIR_uge = 63, // 0x3F 0011 1111
+ // non-64bit ops, but we're out of code space below 64
+ LIR_file = 1 | LIR64,
+ LIR_line = 2 | LIR64,
+
/**
* 64bit operations
*/
LIR_stq = LIR_st | LIR64, // quad store
LIR_stqi = LIR_sti | LIR64,
+ LIR_fret = LIR_ret | LIR64,
LIR_quad = LIR_int | LIR64, // quad constant value
LIR_ldq = LIR_ld | LIR64, // quad load
+ LIR_ldqc = LIR_ldc | LIR64,
LIR_qiand = 24 | LIR64,
LIR_qiadd = 25 | LIR64,
LIR_qilsh = LIR_lsh | LIR64,
LIR_fcall = LIR_call | LIR64, // subroutine call returning quad
+ LIR_fcalli = LIR_calli | LIR64,
LIR_fneg = LIR_neg | LIR64, // floating-point numeric negation
LIR_fadd = LIR_add | LIR64, // floating-point addition
LIR_fsub = LIR_sub | LIR64, // floating-point subtraction
LIR_fmul = LIR_mul | LIR64, // floating-point multiplication
LIR_fdiv = 40 | LIR64, // floating-point division
LIR_qcmov = LIR_cmov | LIR64,
LIR_qjoin = 41 | LIR64,
LIR_i2f = 42 | LIR64, // convert an integer to a float
LIR_u2f = 43 | LIR64, // convert an unsigned integer to a float
LIR_qior = 44 | LIR64
};
#if defined NANOJIT_64BIT
#define LIR_ldp LIR_ldq
+ #define LIR_stp LIR_stq
#define LIR_piadd LIR_qiadd
#define LIR_piand LIR_qiand
#define LIR_pilsh LIR_qilsh
#define LIR_pcmov LIR_qcmov
#define LIR_pior LIR_qior
#else
#define LIR_ldp LIR_ld
+ #define LIR_stp LIR_st
#define LIR_piadd LIR_add
#define LIR_piand LIR_and
#define LIR_pilsh LIR_lsh
#define LIR_pcmov LIR_cmov
#define LIR_pior LIR_or
#endif
inline uint32_t argwords(uint32_t argc) {
return (argc+3)>>2;
}
struct SideExit;
struct Page;
- struct CallInfo;
+
+ enum AbiKind {
+ ABI_FASTCALL,
+ ABI_THISCALL,
+ ABI_STDCALL,
+ ABI_CDECL
+ };
+
+ enum ArgSize {
+ ARGSIZE_NONE = 0,
+ ARGSIZE_F = 1,
+ ARGSIZE_LO = 2,
+ ARGSIZE_Q = 3,
+ _ARGSIZE_MASK_INT = 2,
+ _ARGSIZE_MASK_ANY = 3
+ };
+
+ struct CallInfo
+ {
+ uintptr_t _address;
+ uint32_t _argtypes:18; // 9 2-bit fields indicating arg type, by ARGSIZE above (including ret type): a1 a2 a3 a4 a5 ret
+ uint8_t _cse:1; // true if no side effects
+ uint8_t _fold:1; // true if no side effects
+ AbiKind _abi:3;
+ verbose_only ( const char* _name; )
+
+ uint32_t FASTCALL _count_args(uint32_t mask) const;
+ uint32_t get_sizes(ArgSize*) const;
+
+ inline bool isInterface() const {
+ return _address == 2 || _address == 3; /* hack! */
+ }
+ inline bool isIndirect() const {
+ return _address < 256;
+ }
+ inline uint32_t FASTCALL count_args() const {
+ return _count_args(_ARGSIZE_MASK_ANY) + isIndirect();
+ }
+ inline uint32_t FASTCALL count_iargs() const {
+ return _count_args(_ARGSIZE_MASK_INT);
+ }
+ // fargs = args - iargs
+ };
+
+ inline bool isGuard(LOpcode op) {
+ return op==LIR_x || op==LIR_xf || op==LIR_xt || op==LIR_loop;
+ }
+
+ inline bool isCall(LOpcode op) {
+ op = LOpcode(op & ~LIR64);
+ return op == LIR_call || op == LIR_calli;
+ }
+
+ inline bool isStore(LOpcode op) {
+ op = LOpcode(op & ~LIR64);
+ return op == LIR_st || op == LIR_sti;
+ }
+
+ inline bool isConst(LOpcode op) {
+ return (op & ~1) == LIR_short;
+ }
+
+ inline bool isLoad(LOpcode op) {
+ return op == LIR_ldq || op == LIR_ld || op == LIR_ldc || op == LIR_ldqc;
+ }
// Low-level Instruction 4B
// had to lay it our as a union with duplicate code fields since msvc couldn't figure out how to compact it otherwise.
class LIns
{
friend class LirBufWriter;
// 3-operand form (backwards reach only)
struct u_type
@@ -285,28 +369,38 @@ namespace nanojit
public:
LIns* FASTCALL oprnd1() const;
LIns* FASTCALL oprnd2() const;
LIns* FASTCALL oprnd3() const;
inline LOpcode opcode() const { return u.code; }
inline uint8_t imm8() const { return c.imm8a; }
+ inline uint8_t imm8b() const { return c.imm8b; }
inline int16_t imm16() const { return i.imm16; }
+ inline int32_t imm24() const { return t.imm24; }
inline LIns* ref() const {
#if defined NANOJIT_64BIT
return (t.code & 1) ? (LIns*)this+t.imm24 : *(LIns**)(this-2);
#else
return (t.code & 1) ? (LIns*)this+t.imm24 : *(LIns**)(this-1);
#endif
}
inline int32_t imm32() const { return *(int32_t*)(this-1); }
inline uint8_t resv() const { return g.resv; }
void* payload() const;
inline Page* page() { return (Page*) alignTo(this,NJ_PAGE_SIZE); }
+ inline int32_t size() const {
+ NanoAssert(isop(LIR_alloc));
+ return i.imm16<<2;
+ }
+ inline void setSize(int32_t bytes) {
+ NanoAssert(isop(LIR_alloc) && (bytes&3)==0 && isU16(bytes>>2));
+ i.imm16 = bytes>>2;
+ }
// index args in r-l order. arg(0) is rightmost arg
inline LIns* arg(uint32_t i) {
uint32_t c = argc();
NanoAssert(i < c);
uint8_t* offs = (uint8_t*) (this-callInfoWords-argwords(c));
return deref(offs[i]);
}
@@ -370,46 +464,52 @@ namespace nanojit
#endif
}
bool isCse(const CallInfo *functions) const;
bool isop(LOpcode o) const { return u.code == o; }
bool isQuad() const;
bool isCond() const;
bool isCmp() const;
- bool isCall() const;
- bool isStore() const;
- bool isLoad() const;
- bool isGuard() const;
+ bool isCall() const { return nanojit::isCall(u.code); }
+ bool isStore() const { return nanojit::isStore(u.code); }
+ bool isLoad() const { return nanojit::isLoad(u.code); }
+ bool isGuard() const { return nanojit::isGuard(u.code); }
// True if the instruction is a 32-bit or smaller constant integer.
- bool isconst() const;
+ bool isconst() const { return nanojit::isConst(u.code); }
// True if the instruction is a 32-bit or smaller constant integer and
// has the value val when treated as a 32-bit signed integer.
bool isconstval(int32_t val) const;
// True if the instruction is a constant quad value.
bool isconstq() const;
// True if the instruction is a constant pointer value.
bool isconstp() const;
bool isTramp() {
return isop(LIR_neartramp) || isop(LIR_tramp);
}
-
+ bool isBranch() const {
+ return isop(LIR_jt) || isop(LIR_jf) || isop(LIR_j);
+ }
// Set the imm16 member. Should only be used on instructions that use
// that. If you're not sure, you shouldn't be calling it.
void setimm16(int32_t i);
+ void setimm24(int32_t x);
// Set the resv member. Should only be used on instructions that use
// that. If you're not sure, you shouldn't be calling it.
void setresv(uint32_t resv);
// Set the opcode
void initOpcode(LOpcode);
// operand-setting methods
void setOprnd1(LIns*);
void setOprnd2(LIns*);
void setOprnd3(LIns*);
void setDisp(int8_t d);
+ void target(LIns* t);
+ LIns **targetAddr();
+ LIns* getTarget();
SideExit *exit();
inline uint32_t argc() const {
NanoAssert(isCall());
return c.imm8b;
}
inline size_t callInsWords() const {
@@ -419,29 +519,31 @@ namespace nanojit
return *(const CallInfo **) (this - callInfoWords);
}
};
typedef LIns* LInsp;
bool FASTCALL isCse(LOpcode v);
bool FASTCALL isCmp(LOpcode v);
bool FASTCALL isCond(LOpcode v);
+ inline bool isRet(LOpcode c) {
+ return (c & ~LIR64) == LIR_ret;
+ }
+ bool FASTCALL isFloat(LOpcode v);
LIns* FASTCALL callArgN(LInsp i, uint32_t n);
extern const uint8_t operandCount[];
class Fragmento; // @todo remove this ; needed for minbuild for some reason?!? Should not be compiling this code at all
class LirFilter;
- struct CallInfo;
// make it a GCObject so we can explicitly delete it early
class LirWriter : public GCObject
{
public:
LirWriter *out;
- public:
const CallInfo *_functions;
virtual ~LirWriter() {}
LirWriter(LirWriter* out)
: out(out), _functions(out?out->_functions : 0) {}
virtual LInsp ins0(LOpcode v) {
return out->ins0(v);
@@ -450,18 +552,23 @@ namespace nanojit
return out->ins1(v, a);
}
virtual LInsp ins2(LOpcode v, LIns* a, LIns* b) {
return out->ins2(v, a, b);
}
virtual LInsp insGuard(LOpcode v, LIns *c, SideExit *x) {
return out->insGuard(v, c, x);
}
- virtual LInsp insParam(int32_t i) {
- return out->insParam(i);
+ virtual LInsp insBranch(LOpcode v, LInsp condition, LInsp to) {
+ return out->insBranch(v, condition, to);
+ }
+ // arg: 0=first, 1=second, ...
+ // kind: 0=arg 1=saved-reg
+ virtual LInsp insParam(int32_t arg, int32_t kind) {
+ return out->insParam(arg, kind);
}
virtual LInsp insImm(int32_t imm) {
return out->insImm(imm);
}
virtual LInsp insImmq(uint64_t imm) {
return out->insImmq(imm);
}
virtual LInsp insLoad(LOpcode op, LIns* base, LIns* d) {
@@ -472,30 +579,35 @@ namespace nanojit
}
virtual LInsp insStorei(LIns* value, LIns* base, int32_t d) {
return isS8(d) ? out->insStorei(value, base, d)
: out->insStore(value, base, insImm(d));
}
virtual LInsp insCall(const CallInfo *call, LInsp args[]) {
return out->insCall(call, args);
}
+ virtual LInsp insAlloc(int32_t size) {
+ return out->insAlloc(size);
+ }
// convenience
LIns* insLoadi(LIns *base, int disp);
LIns* insLoad(LOpcode op, LIns *base, int disp);
+ LIns* store(LIns* value, LIns* base, int32_t d);
// Inserts a conditional to execute and branches to execute if
// the condition is true and false respectively.
LIns* ins_choose(LIns* cond, LIns* iftrue, LIns* iffalse);
// Inserts an integer comparison to 0
LIns* ins_eq0(LIns* oprnd1);
// Inserts a binary operation where the second operand is an
// integer immediate.
LIns* ins2i(LOpcode op, LIns *oprnd1, int32_t);
LIns* qjoin(LInsp lo, LInsp hi);
LIns* insImmPtr(const void *ptr);
+ LIns* insImmf(double f);
};
#ifdef NJ_VERBOSE
extern const char* lirNames[];
/**
* map address ranges to meaningful names.
*/
@@ -511,18 +623,18 @@ namespace nanojit
DRCWB(avmplus::String*) name;
size_t size:29, align:3;
};
avmplus::SortedMap<const void*, Entry*, avmplus::LIST_GCObjects> names;
bool addrs, pad[3];
char buf[1000], *end;
void formatAddr(const void *p, char *buf);
public:
- AvmCore *core;
- LabelMap(AvmCore *, LabelMap* parent);
+ avmplus::AvmCore *core;
+ LabelMap(avmplus::AvmCore *, LabelMap* parent);
~LabelMap();
void add(const void *p, size_t size, size_t align, const char *name);
void add(const void *p, size_t size, size_t align, avmplus::String*);
const char *dup(const char *);
const char *format(const void *p);
void promoteAll(const void *newbase);
};
@@ -574,110 +686,128 @@ namespace nanojit
const char *formatIns(LInsp i);
void formatGuard(LInsp i, char *buf);
};
class VerboseWriter : public LirWriter
{
avmplus::List<LInsp, avmplus::LIST_NonGCObjects> code;
- LirNameMap *names;
+ DWB(LirNameMap*) names;
public:
VerboseWriter(GC *gc, LirWriter *out, LirNameMap* names)
: LirWriter(out), code(gc), names(names)
{}
LInsp add(LInsp i) {
- code.add(i);
+ if (i)
+ code.add(i);
return i;
}
+ LInsp add_flush(LInsp i) {
+ if ((i = add(i)) != 0)
+ flush();
+ return i;
+ }
+
void flush()
{
- for (int j=0, n=code.size(); j < n; j++)
- printf(" %s\n",names->formatIns(code[j]));
- code.clear();
- printf("\n");
+ int n = code.size();
+ if (n) {
+ for (int i=0; i < n; i++)
+ printf(" %s\n",names->formatIns(code[i]));
+ code.clear();
+ if (n > 1)
+ printf("\n");
+ }
}
LIns* insGuard(LOpcode op, LInsp cond, SideExit *x) {
- LInsp i = add(out->insGuard(op,cond,x));
- if (i)
- flush();
- return i;
+ return add_flush(out->insGuard(op,cond,x));
+ }
+
+ LIns* insBranch(LOpcode v, LInsp condition, LInsp to) {
+ return add_flush(out->insBranch(v, condition, to));
}
+
LIns* ins0(LOpcode v) {
- LInsp i = add(out->ins0(v));
- if (i)
- flush();
- return i;
+ if (v == LIR_label || v == LIR_start) {
+ flush();
+ }
+ return add(out->ins0(v));
}
LIns* ins1(LOpcode v, LInsp a) {
- return add(out->ins1(v, a));
+ return isRet(v) ? add_flush(out->ins1(v, a)) : add(out->ins1(v, a));
}
LIns* ins2(LOpcode v, LInsp a, LInsp b) {
return v == LIR_2 ? out->ins2(v,a,b) : add(out->ins2(v, a, b));
}
LIns* insCall(const CallInfo *call, LInsp args[]) {
- return add(out->insCall(call, args));
+ return add_flush(out->insCall(call, args));
}
- LIns* insParam(int32_t i) {
- return add(out->insParam(i));
+ LIns* insParam(int32_t i, int32_t kind) {
+ return add(out->insParam(i, kind));
}
LIns* insLoad(LOpcode v, LInsp base, LInsp disp) {
return add(out->insLoad(v, base, disp));
}
LIns* insStore(LInsp v, LInsp b, LInsp d) {
return add(out->insStore(v, b, d));
}
LIns* insStorei(LInsp v, LInsp b, int32_t d) {
return add(out->insStorei(v, b, d));
}
+ LIns* insAlloc(int32_t size) {
+ return add(out->insAlloc(size));
+ }
};
#endif
class ExprFilter: public LirWriter
{
public:
ExprFilter(LirWriter *out) : LirWriter(out) {}
LIns* ins1(LOpcode v, LIns* a);
LIns* ins2(LOpcode v, LIns* a, LIns* b);
- LIns* insGuard(LOpcode v, LIns *c, SideExit *x);
+ LIns* insGuard(LOpcode, LIns *cond, SideExit *);
+ LIns* insBranch(LOpcode, LIns *cond, LIns *target);
};
// @todo, this could be replaced by a generic HashMap or HashSet, if we had one
class LInsHashSet
{
// must be a power of 2.
// don't start too small, or we'll waste time growing and rehashing.
// don't start too large, will waste memory.
- static const uint32_t kInitialCap = 2048;
+ static const uint32_t kInitialCap = 64;
- InsList m_list;
- uint32_t m_used;
+ LInsp *m_list; // explicit WB's are used, no DWB needed.
+ uint32_t m_used, m_cap;
GC* m_gc;
static uint32_t FASTCALL hashcode(LInsp i);
- uint32_t FASTCALL find(LInsp name, uint32_t hash, const InsList& list, uint32_t cap);
+ uint32_t FASTCALL find(LInsp name, uint32_t hash, const LInsp *list, uint32_t cap);
static bool FASTCALL equals(LInsp a, LInsp b);
void FASTCALL grow();
public:
LInsHashSet(GC* gc);
LInsp find32(int32_t a, uint32_t &i);
LInsp find64(uint64_t a, uint32_t &i);
LInsp find1(LOpcode v, LInsp a, uint32_t &i);
LInsp find2(LOpcode v, LInsp a, LInsp b, uint32_t &i);
LInsp findcall(const CallInfo *call, uint32_t argc, LInsp args[], uint32_t &i);
LInsp add(LInsp i, uint32_t k);
void replace(LInsp i);
+ void clear();
static uint32_t FASTCALL hashimm(int32_t);
static uint32_t FASTCALL hashimmq(uint64_t);
static uint32_t FASTCALL hash1(LOpcode v, LInsp);
static uint32_t FASTCALL hash2(LOpcode v, LInsp, LInsp);
static uint32_t FASTCALL hashcall(const CallInfo *call, uint32_t argc, LInsp args[]);
};
@@ -690,50 +820,55 @@ namespace nanojit
LIns* insImmq(uint64_t q);
LIns* ins1(LOpcode v, LInsp);
LIns* ins2(LOpcode v, LInsp, LInsp);
LIns* insLoad(LOpcode v, LInsp b, LInsp d);
LIns* insCall(const CallInfo *call, LInsp args[]);
LIns* insGuard(LOpcode op, LInsp cond, SideExit *x);
};
- struct Page;
class LirBuffer : public GCFinalizedObject
{
public:
DWB(Fragmento*) _frago;
LirBuffer(Fragmento* frago, const CallInfo* functions);
virtual ~LirBuffer();
void clear();
LInsp next();
- LInsp commit(uint32_t count);
- bool addPage();
bool outOmem() { return _noMem != 0; }
- debug_only (void validate() const;)
+
+ debug_only (void validate() const;)
verbose_only(DWB(LirNameMap*) names;)
- verbose_only(int insCount();)
- verbose_only(int byteCount();)
+
+ int32_t insCount();
+ int32_t byteCount();
// stats
struct
{
uint32_t lir; // # instructions
uint32_t pages; // pages consumed
}
_stats;
const CallInfo* _functions;
+ AbiKind abi;
LInsp state,param1,sp,rp;
+ LInsp savedParams[NumSavedRegs];
- private:
+ protected:
+ friend class LirBufWriter;
+
+ LInsp commit(uint32_t count);
+ bool addPage();
Page* pageAlloc();
- Page* _start; // first page
- LInsp _unused; // next unused instruction slot
- int _noMem; // set if ran out of memory when writing to buffer
+ Page* _start; // first page
+ LInsp _unused; // next unused instruction slot
+ int _noMem; // set if ran out of memory when writing to buffer
};
class LirBufWriter : public LirWriter
{
DWB(LirBuffer*) _buf; // underlying buffer housing the instructions
LInsp spref, rpref;
public:
@@ -744,27 +879,34 @@ namespace nanojit
// LirWriter interface
LInsp insLoad(LOpcode op, LInsp base, LInsp off);
LInsp insStore(LInsp o1, LInsp o2, LInsp o3);
LInsp insStorei(LInsp o1, LInsp o2, int32_t imm);
LInsp ins0(LOpcode op);
LInsp ins1(LOpcode op, LInsp o1);
LInsp ins2(LOpcode op, LInsp o1, LInsp o2);
- LInsp insParam(int32_t i);
+ LInsp insParam(int32_t i, int32_t kind);
LInsp insImm(int32_t imm);
LInsp insImmq(uint64_t imm);
LInsp insCall(const CallInfo *call, LInsp args[]);
LInsp insGuard(LOpcode op, LInsp cond, SideExit *x);
+ LInsp insBranch(LOpcode v, LInsp condition, LInsp to);
+ LInsp insAlloc(int32_t size);
// buffer mgmt
LInsp skip(size_t);
+
+ protected:
LInsp insFar(LOpcode op, LInsp target);
+ LInsp insLink(LOpcode op, LInsp target);
LInsp ensureReferenceable(LInsp i, int32_t addedDistance);
bool ensureRoom(uint32_t count);
+ bool can8bReach(LInsp from, LInsp to) { return isU8(from-to-1); }
+ bool can24bReach(LInsp from, LInsp to){ return isS24(from-to); }
bool canReference(LInsp from, LInsp to) {
return isU8(from-to-1);
}
};
class LirFilter
{
public:
@@ -790,40 +932,61 @@ namespace nanojit
LirReader(LInsp i) : LirFilter(0), _i(i) { }
virtual ~LirReader() {}
// LirReader i/f
LInsp read(); // advance to the prior instruction
LInsp pos() {
return _i;
}
+ void setpos(LIns *i) {
+ _i = i;
+ }
};
class Assembler;
void compile(Assembler *assm, Fragment *frag);
verbose_only( void printTracker(const char* s, avmplus::RegionTracker& trk, Assembler* assm); )
- verbose_only(void live(GC *gc, Assembler *assm, Fragment *frag);)
+ verbose_only(void live(GC *gc, LirBuffer *lirbuf);)
class StackFilter: public LirFilter
{
GC *gc;
- Fragment *frag;
+ LirBuffer *lirbuf;
LInsp sp;
avmplus::BitSet stk;
int top;
- int getTop(LInsp guard);
+ int getTop(LInsp br);
public:
- StackFilter(LirFilter *in, GC *gc, Fragment *frag, LInsp sp);
+ StackFilter(LirFilter *in, GC *gc, LirBuffer *lirbuf, LInsp sp);
virtual ~StackFilter() {}
LInsp read();
};
class CseReader: public LirFilter
{
LInsHashSet *exprs;
const CallInfo *functions;
public:
CseReader(LirFilter *in, LInsHashSet *exprs, const CallInfo*);
LInsp read();
};
+
+ // eliminate redundant loads by watching for stores & mutator calls
+ class LoadFilter: public LirWriter
+ {
+ public:
+ LInsp sp, rp;
+ LInsHashSet exprs;
+ void clear(LInsp p);
+ public:
+ LoadFilter(LirWriter *out, GC *gc)
+ : LirWriter(out), exprs(gc) { }
+
+ LInsp ins0(LOpcode);
+ LInsp insLoad(LOpcode, LInsp base, LInsp disp);
+ LInsp insStore(LInsp v, LInsp b, LInsp d);
+ LInsp insStorei(LInsp v, LInsp b, int32_t d);
+ LInsp insCall(const CallInfo *call, LInsp args[]);
+ };
}
#endif // __nanojit_LIR__
--- a/js/src/nanojit/NativeAMD64.h
+++ b/js/src/nanojit/NativeAMD64.h
@@ -104,16 +104,17 @@ namespace nanojit
LastReg = 31,
UnknownReg = 32
}
Register;
typedef int RegisterMask;
/* RBX, R13-R15 */
+ static const int NumSavedRegs = 3;
static const RegisterMask SavedRegs = /*(1<<RBX) |*/ /*(1<<R12) |*/ (1<<R13) | (1<<R14) | (1<<R15);
/* RAX, RCX, RDX, RDI, RSI, R8-R11 */
static const RegisterMask TempRegs = (1<<RAX) | (1<<RCX) | (1<<RDX) | (1<<R8) | (1<<R9) | (1<<R10) | (1<<R11) | (1<<RDI) | (1<<RSI);
static const RegisterMask GpRegs = SavedRegs | TempRegs;
/* XMM0-XMM7 */
static const RegisterMask XmmRegs = (1<<XMM0) | (1<<XMM1) | (1<<XMM2) | (1<<XMM3) | (1<<XMM4) | (1<<XMM5) | (1<<XMM6) | (1<<XMM7) | (1<<XMM8) | (1<<XMM9) | (1<<XMM10) | (1<<XMM11) | (1<<XMM13) | (1<<XMM14) | (1<<XMM15);
static const RegisterMask FpRegs = XmmRegs;
static const RegisterMask ScratchRegs = TempRegs | XmmRegs;
--- a/js/src/nanojit/NativeARM.h
+++ b/js/src/nanojit/NativeARM.h
@@ -152,16 +152,17 @@ typedef struct _FragInfo {
NIns* epilogue;
} FragInfo;
#ifdef ARM_VFP
static const RegisterMask SavedFpRegs = 1<<D0 | 1<<D1 | 1<<D2 | 1<<D3 | 1<<D4 | 1<<D5 | 1<<D6 | 1<<D7;
#else
static const RegisterMask SavedFpRegs = 0;
#endif
+static const int NumSavedRegs = 7;
static const RegisterMask SavedRegs = 1<<R4 | 1<<R5 | 1<<R6 | 1<<R7 | 1<<R8 | 1<<R9 | 1<<R10 | SavedFpRegs;
static const RegisterMask FpRegs = 1<<D0 | 1<<D1 | 1<<D2 | 1<<D3 | 1<<D4 | 1<<D5 | 1<<D6; // no D7; S14-S15 are used for i2f/u2f.
static const RegisterMask GpRegs = 0x07FF;
static const RegisterMask AllowableFlagRegs = 1<<R0 | 1<<R1 | 1<<R2 | 1<<R3 | 1<<R4 | 1<<R5 | 1<<R6 | 1<<R7 | 1<<R8 | 1<<R9 | 1<<R10;
#define IsFpReg(_r) ((rmask(_r) & (FpRegs | (1<<D7))) != 0)
#define IsGpReg(_r) ((rmask(_r) & (GpRegs | (1<<Scratch))) != 0)
#define FpRegNum(_fpr) ((_fpr) - FirstFloatReg)
--- a/js/src/nanojit/NativeThumb.cpp
+++ b/js/src/nanojit/NativeThumb.cpp
@@ -56,16 +56,22 @@ namespace nanojit
#ifdef NJ_VERBOSE
const char* regNames[] = {"r0","r1","r2","r3","r4","r5","r6","r7","r8","r9","r10","r11","IP","SP","LR","PC"};
#endif
const Register Assembler::argRegs[] = { R0, R1, R2, R3 };
const Register Assembler::retRegs[] = { R0, R1 };
+#ifdef NJ_THUMB_JIT
+ const Register Assembler::savedRegs[] = { R4, R5, R6, R7 };
+#else
+ const Register Assembler::savedRegs[] = { R4, R5, R6, R7, R8, R9, R10 };
+#endif
+
void Assembler::nInit(AvmCore*)
{
// Thumb mode does not have conditional move, alas
has_cmov = false;
}
NIns* Assembler::genPrologue(RegisterMask needSaving)
{
@@ -264,17 +270,17 @@ namespace nanojit
uint32_t op = i->opcode();
int prefer = ~0;
if (op==LIR_call || op==LIR_fcall)
prefer = rmask(R0);
else if (op == LIR_callh)
prefer = rmask(R1);
else if (op == LIR_param)
- prefer = rmask(imm2register(i->imm8()));
+ prefer = rmask(imm2register(argRegs[i->imm8()]));
if (_allocator.free & allow & prefer)
allow &= prefer;
return allow;
}
void Assembler::asm_qjoin(LIns *ins)
{
--- a/js/src/nanojit/NativeThumb.h
+++ b/js/src/nanojit/NativeThumb.h
@@ -96,16 +96,17 @@ namespace nanojit
typedef int RegisterMask;
typedef struct _FragInfo
{
RegisterMask needRestoring;
NIns* epilogue;
}
FragInfo;
+ static const int NumSavedRegs = 4;
static const RegisterMask SavedRegs = 1<<R4 | 1<<R5 | 1<<R6 | 1<<R7;
static const RegisterMask FpRegs = 0x0000; // FST0-FST7
static const RegisterMask GpRegs = 0x003F;
static const RegisterMask AllowableFlagRegs = 1<<R0 | 1<<R1 | 1<<R2 | 1<<R3 | 1<<R4 | 1<<R5;
#define firstreg() R0
#define nextreg(r) (Register)((int)r+1)
#define imm2register(c) (Register)(c-1)
--- a/js/src/nanojit/Nativei386.cpp
+++ b/js/src/nanojit/Nativei386.cpp
@@ -66,49 +66,54 @@ namespace nanojit
"xmm8","xmm9","xmm10","xmm11","xmm12","xmm13","xmm14","xmm15"
#endif
};
#endif
#if defined NANOJIT_IA32
const Register Assembler::argRegs[] = { ECX, EDX };
const Register Assembler::retRegs[] = { EAX, EDX };
+ const Register Assembler::savedRegs[] = { EBX, ESI, EDI };
#elif defined NANOJIT_AMD64
#if defined WIN64
const Register Assembler::argRegs[] = { R8, R9, RCX, RDX };
#else
const Register Assembler::argRegs[] = { RDI, RSI, RDX, RCX, R8, R9 };
#endif
const Register Assembler::retRegs[] = { RAX, RDX };
+ const Register Assembler::savedRegs[] = { R13, R14, R15 };
#endif
+ const static uint8_t max_abi_regs[] = {
+ 2, /* ABI_FASTCALL */
+ 1, /* ABI_THISCALL */
+ 0, /* ABI_STDCALL */
+ 0 /* ABI_CDECL */
+ };
+
+
void Assembler::nInit(AvmCore* core)
{
OSDep::getDate();
#ifdef NANOJIT_AMD64
avmplus::AvmCore::cmov_available =
avmplus::AvmCore::sse2_available = true;
#endif
}
- NIns* Assembler::genPrologue(RegisterMask needSaving)
+ NIns* Assembler::genPrologue()
{
/**
* Prologue
*/
uint32_t stackNeeded = STACK_GRANULARITY * _activation.highwatermark;
- uint32_t savingCount = 0;
- for(Register i=FirstReg; i <= LastReg; i = nextreg(i))
- if (needSaving&rmask(i))
- savingCount++;
-
- // After forcing alignment, we've pushed the pre-alignment SP
- // and savingCount registers.
- uint32_t stackPushed = STACK_GRANULARITY * (1+savingCount);
+ uint32_t stackPushed =
+ STACK_GRANULARITY + // returnaddr
+ STACK_GRANULARITY; // ebp
uint32_t aligned = alignUp(stackNeeded + stackPushed, NJ_ALIGN_STACK);
uint32_t amt = aligned - stackPushed;
// Reserve stackNeeded bytes, padded
// to preserve NJ_ALIGN_STACK-byte alignment.
if (amt)
{
#if defined NANOJIT_IA32
@@ -117,47 +122,47 @@ namespace nanojit
SUBQi(SP, amt);
#endif
}
verbose_only( verbose_outputf(" %p:",_nIns); )
verbose_only( verbose_output(" patch entry:"); )
NIns *patchEntry = _nIns;
MR(FP, SP); // Establish our own FP.
-
- // Save pre-alignment SP value here, where the FP will point,
- // to preserve the illusion of a valid frame chain for
- // functions like MMgc::GetStackTrace. The 'return address'
- // of this 'frame' will be the last-saved register, but that's
- // fine, because the next-older frame will be legit.
- PUSHr(FP);
-
- for(Register i=FirstReg; i <= LastReg; i = nextreg(i))
- if (needSaving&rmask(i))
- PUSHr(i);
+ PUSHr(FP); // Save caller's FP.
- // We'd like to be able to use SSE instructions like MOVDQA on
- // stack slots; it requires 16B alignment. Darwin requires a
- // 16B stack alignment, and Linux GCC seems to intend to
- // establish and preserve the same, but we're told that GCC
- // has not always done this right. To avoid doubt, do it on
- // all platforms. The prologue runs only when we enter
- // fragments from the interpreter, so forcing 16B alignment
- // here is cheap.
-#if defined NANOJIT_IA32
- ANDi(SP, -NJ_ALIGN_STACK);
-#elif defined NANOJIT_AMD64
- ANDQi(SP, -NJ_ALIGN_STACK);
-#endif
- MR(FP,SP);
- PUSHr(FP); // Save caller's FP.
+ // align the entry point
+ asm_align_code();
return patchEntry;
}
+ void Assembler::asm_align_code() {
+ static char nop[][9] = {
+ {0x90},
+ {0x66,0x90},
+ {0x0f,0x1f,0x00},
+ {0x0f,0x1f,0x40,0x00},
+ {0x0f,0x1f,0x44,0x00,0x00},
+ {0x66,0x0f,0x1f,0x44,0x00,0x00},
+ {0x0f,0x1f,0x80,0x00,0x00,0x00,0x00},
+ {0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00},
+ {0x66,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00},
+ };
+ unsigned n;
+ while((n = uintptr_t(_nIns) & 15) != 0) {
+ if (n > 9)
+ n = 9;
+ underrunProtect(n);
+ _nIns -= n;
+ memcpy(_nIns, nop[n-1], n);
+ asm_output1("nop%d", n);
+ }
+ }
+
void Assembler::nFragExit(LInsp guard)
{
SideExit *exit = guard->exit();
bool trees = _frago->core()->config.tree_opt;
Fragment *frag = exit->target;
GuardRecord *lr = 0;
bool destKnown = (frag && frag->fragEntry);
if (destKnown && !trees)
@@ -186,17 +191,16 @@ namespace nanojit
// for trees we need the patch entry on the incoming fragment so we can unhook it later if needed
if (tress && destKnown)
patch(lr);
#endif
}
// first restore ESP from EBP, undoing SUBi(SP,amt) from genPrologue
MR(SP,FP);
-
#ifdef NJ_VERBOSE
if (_frago->core()->config.show_stats) {
// load EDX (arg1) with Fragment *fromFrag, target fragment
// will make use of this when calling fragenter().
#if defined NANOJIT_IA32
int fromfrag = int((Fragment*)_thisfrag);
LDi(argRegs[1], fromfrag);
#elif defined NANOJIT_AMD64
@@ -208,114 +212,125 @@ namespace nanojit
// return value is GuardRecord*
#if defined NANOJIT_IA32
LDi(EAX, int(lr));
#elif defined NANOJIT_AMD64
LDQi(RAX, intptr_t(lr));
#endif
}
- NIns *Assembler::genEpilogue(RegisterMask restore)
+ NIns *Assembler::genEpilogue()
{
RET();
POPr(FP); // Restore caller's FP.
- MR(SP,FP); // Undo forced alignment.
-
- // Restore saved registers.
- for (Register i=UnknownReg; i >= FirstReg; i = prevreg(i))
- if (restore&rmask(i)) { POPr(i); }
-
- POPr(FP); // Pop the pre-alignment SP.
+ MR(SP,FP); // pop the stack frame
return _nIns;
}
#if defined NANOJIT_IA32
void Assembler::asm_call(LInsp ins)
{
const CallInfo* call = ins->callInfo();
// must be signed, not unsigned
- const uint32_t iargs = call->count_iargs();
- int32_t fstack = call->count_args() - iargs;
+ uint32_t iargs = call->count_iargs();
+ int32_t fargs = call->count_args() - iargs - call->isIndirect();
- int32_t extra = 0;
+ bool imt = call->isInterface();
+ if (imt)
+ iargs --;
+
+ uint32_t max_regs = max_abi_regs[call->_abi];
+ if (max_regs > iargs)
+ max_regs = iargs;
-#if defined NJ_NO_FASTCALL
- int32_t istack = iargs;
+ int32_t istack = iargs-max_regs; // first 2 4B args are in registers
+ int32_t extra = 0;
+ const int32_t pushsize = 4*istack + 8*fargs; // actual stack space used
+
+#if _MSC_VER
+ // msc is slack, and MIR doesn't do anything extra, so lets use this
+ // call-site alignment to at least have code size parity with MIR.
+ uint32_t align = 4;//NJ_ALIGN_STACK;
#else
- int32_t istack = iargs-2; // first 2 4B args are in registers
- if (istack <= 0)
- {
- istack = 0;
- }
+ uint32_t align = NJ_ALIGN_STACK;
#endif
- const int32_t size = 4*istack + 8*fstack; // actual stack space used
- if (size) {
+ if (pushsize) {
// stack re-alignment
// only pop our adjustment amount since callee pops args in FASTCALL mode
- extra = alignUp(size, NJ_ALIGN_STACK) - (size);
-#ifndef NJ_NO_FASTCALL
- if (extra > 0)
- {
+ extra = alignUp(pushsize, align) - pushsize;
+ if (call->_abi == ABI_CDECL) {
+ // with CDECL only, caller pops args
+ ADDi(SP, extra+pushsize);
+ } else if (extra > 0) {
ADDi(SP, extra);
- }
-#endif
+ }
}
-#ifdef NJ_NO_FASTCALL
- // In C calling conventions, callee doesn't pop args.
- ADDi(SP, 4*iargs + 8*fstack + extra);
-#endif
-
- CALL(call);
-
-#ifdef NJ_NO_FASTCALL
- if (iargs >= 1) {
- PUSHr(ECX);
- if (iargs >= 2) {
- PUSHr(EDX);
- }
+ bool indirect = false;
+ if (ins->isop(LIR_call) || ins->isop(LIR_fcall)) {
+ verbose_only(if (_verbose)
+ outputf(" %p:", _nIns);
+ )
+ CALL(call);
}
-#endif
+ else {
+ // indirect call. x86 Calling conventions don't use EAX as an
+ // argument, and do use EAX as a return value. We need a register
+ // for the address to call, so we use EAX since it will always be
+ // available
+ NanoAssert(ins->isop(LIR_calli) || ins->isop(LIR_fcalli));
+ CALLr(call, EAX);
+ indirect = true;
+ }
// make sure fpu stack is empty before call (restoreCallerSaved)
NanoAssert(_allocator.isFree(FST0));
// note: this code requires that ref arguments (ARGSIZE_Q)
// be one of the first two arguments
- // pre-assign registers to the first 2 4B args
- const int max_regs = (iargs < 2) ? iargs : 2;
- int n = 0;
+ // pre-assign registers to the first N 4B args based on the calling convention
+ uint32_t n = 0;
- ArgSize sizes[10];
+ ArgSize sizes[2*MAXARGS];
uint32_t argc = call->get_sizes(sizes);
+ if (indirect) {
+ argc--;
+ asm_arg(ARGSIZE_LO, ins->arg(argc), EAX);
+ }
+
+ if (imt) {
+ // interface thunk calling convention: put iid in EDX
+ NanoAssert(call->_abi == ABI_CDECL);
+ argc--;
+ asm_arg(ARGSIZE_LO, ins->arg(argc), EDX);
+ }
for(uint32_t i=0; i < argc; i++)
{
uint32_t j = argc-i-1;
ArgSize sz = sizes[j];
Register r = UnknownReg;
- if (n < max_regs && sz != ARGSIZE_F)
- r = argRegs[n++]; // tell asm_arg what reg to use
+ if (n < max_regs && sz != ARGSIZE_F) {
+ r = argRegs[n++]; // tell asm_arg what reg to use
+ }
asm_arg(sz, ins->arg(j), r);
}
if (extra > 0)
- {
SUBi(SP, extra);
- }
}
#elif defined NANOJIT_AMD64
void Assembler::asm_call(LInsp ins)
{
Register fpu_reg = XMM0;
const CallInfo* call = ins->callInfo();
int n = 0;
-
+
CALL(call);
ArgSize sizes[10];
uint32_t argc = call->get_sizes(sizes);
for(uint32_t i=0; i < argc; i++)
{
uint32_t j = argc-i-1;
@@ -412,35 +427,36 @@ namespace nanojit
}
#endif
}
RegisterMask Assembler::hint(LIns* i, RegisterMask allow)
{
uint32_t op = i->opcode();
int prefer = allow;
- if (op == LIR_call)
-#if defined NANOJIT_IA32
- prefer &= rmask(EAX);
-#elif defined NANOJIT_AMD64
- prefer &= rmask(RAX);
-#endif
- else if (op == LIR_param)
- prefer &= rmask(Register(i->imm8()));
-#if defined NANOJIT_IA32
- else if (op == LIR_callh || op == LIR_rsh && i->oprnd1()->opcode()==LIR_callh)
- prefer &= rmask(EDX);
-#else
- else if (op == LIR_callh)
- prefer &= rmask(RAX);
-#endif
- else if (i->isCmp())
+ if (op == LIR_call || op == LIR_calli) {
+ prefer &= rmask(retRegs[0]);
+ }
+ else if (op == LIR_fcall || op == LIR_fcalli) {
+ prefer &= rmask(FST0);
+ }
+ else if (op == LIR_param) {
+ uint32_t max_regs = max_abi_regs[_thisfrag->lirbuf->abi];
+ if (i->imm8() < max_regs)
+ prefer &= rmask(Register(i->imm8()));
+ }
+ else if (op == LIR_callh || op == LIR_rsh && i->oprnd1()->opcode()==LIR_callh) {
+ prefer &= rmask(retRegs[1]);
+ }
+ else if (i->isCmp()) {
prefer &= AllowableFlagRegs;
- else if (i->isconst())
+ }
+ else if (i->isconst()) {
prefer &= ScratchRegs;
+ }
return (_allocator.free & prefer) ? prefer : allow;
}
void Assembler::asm_qjoin(LIns *ins)
{
int d = findMemFor(ins);
AvmAssert(d);
LIns* lo = ins->oprnd1();
@@ -471,125 +487,171 @@ namespace nanojit
// okay if r gets recycled.
Register r = findRegFor(lo, GpRegs);
ST(FP, d, r);
}
freeRsrcOf(ins, false); // if we had a reg in use, emit a ST to flush it to mem
}
+ void Assembler::asm_load(int d, Register r)
+ {
+ if (rmask(r) & FpRegs)
+ {
+#if defined NANOJIT_IA32
+ if (rmask(r) & XmmRegs) {
+#endif
+ SSE_LDQ(r, d, FP);
+#if defined NANOJIT_IA32
+ } else {
+ FLDQ(d, FP);
+ }
+#endif
+ }
+#if defined NANOJIT_AMD64
+ else if (i->opcode() == LIR_param)
+ {
+ LDQ(r, d, FP);
+ }
+#endif
+ else
+ {
+ LD(r, d, FP);
+ }
+ }
+
void Assembler::asm_restore(LInsp i, Reservation *resv, Register r)
{
- if (i->isconst())
- {
+ if (i->isop(LIR_alloc)) {
+ LEA(r, disp(resv), FP);
+ verbose_only(if (_verbose) {
+ outputf(" remat %s size %d", _thisfrag->lirbuf->names->formatRef(i), i->size());
+ })
+ }
+ else if (i->isconst()) {
if (!resv->arIndex) {
reserveFree(i);
}
LDi(r, i->constval());
}
- else
- {
+ else {
int d = findMemFor(i);
- if (rmask(r) & FpRegs)
- {
-#if defined NANOJIT_IA32
- if (rmask(r) & XmmRegs) {
-#endif
- SSE_LDQ(r, d, FP);
-#if defined NANOJIT_IA32
- } else {
- FLDQ(d, FP);
- }
-#endif
- }
- else
- {
-#if defined NANOJIT_AMD64
- LDQ(r, d, FP);
-#else
- LD(r, d, FP);
-#endif
- }
+ asm_load(d,r);
verbose_only(if (_verbose) {
outputf(" restore %s", _thisfrag->lirbuf->names->formatRef(i));
})
}
}
void Assembler::asm_store32(LIns *value, int dr, LIns *base)
{
if (value->isconst())
{
- Register rb = findRegFor(base, GpRegs);
+ Register rb;
+ if (base->isop(LIR_alloc)) {
+ rb = FP;
+ dr += findMemFor(base);
+ } else {
+ rb = findRegFor(base, GpRegs);
+ }
int c = value->constval();
STi(rb, dr, c);
}
else
{
// make sure what is in a register
Reservation *rA, *rB;
- findRegFor2(GpRegs, value, rA, base, rB);
- Register ra = rA->reg;
- Register rb = rB->reg;
+ Register ra, rb;
+ if (base->isop(LIR_alloc)) {
+ rb = FP;
+ dr += findMemFor(base);
+ ra = findRegFor(value, GpRegs);
+ } else if (base->isconst()) {
+ // absolute address
+ dr += base->constval();
+ ra = findRegFor(value, GpRegs);
+ rb = UnknownReg;
+ } else {
+ findRegFor2(GpRegs, value, rA, base, rB);
+ ra = rA->reg;
+ rb = rB->reg;
+ }
ST(rb, dr, ra);
}
}
- void Assembler::asm_spill(LInsp i, Reservation *resv, bool pop)
+ void Assembler::asm_spill(Register rr, int d, bool pop, bool quad)
{
- (void)i;
- int d = disp(resv);
- Register rr = resv->reg;
+ (void)quad;
if (d)
{
// save to spill location
if (rmask(rr) & FpRegs)
{
#if defined NANOJIT_IA32
if (rmask(rr) & XmmRegs) {
#endif
SSE_STQ(d, FP, rr);
#if defined NANOJIT_IA32
} else {
FSTQ((pop?1:0), d, FP);
}
#endif
}
+#if defined NANOJIT_AMD64
+ else if (quad)
+ {
+ STQ(FP, d, rr);
+ }
+#endif
else
{
-#if defined NANOJIT_AMD64
- STQ(FP, d, rr);
-#else
ST(FP, d, rr);
-#endif
}
- verbose_only(if (_verbose) {
- outputf(" spill %s",_thisfrag->lirbuf->names->formatRef(i));
- })
}
#if defined NANOJIT_IA32
else if (pop && (rmask(rr) & x87Regs))
{
// pop the fpu result since it isn't used
FSTP(FST0);
}
-#endif
+#endif
+ }
+
+ void Assembler::asm_spilli(LInsp i, Reservation *resv, bool pop)
+ {
+ int d = disp(resv);
+ Register rr = resv->reg;
+ bool quad = i->opcode() == LIR_param || i->isQuad();
+ asm_spill(rr, d, pop, quad);
+ if (d)
+ {
+ verbose_only(if (_verbose) {
+ outputf(" spill %s",_thisfrag->lirbuf->names->formatRef(i));
+ })
+ }
}
void Assembler::asm_load64(LInsp ins)
{
LIns* base = ins->oprnd1();
int db = ins->oprnd2()->constval();
Reservation *resv = getresv(ins);
Register rr = resv->reg;
if (rr != UnknownReg && rmask(rr) & XmmRegs)
{
freeRsrcOf(ins, false);
- Register rb = findRegFor(base, GpRegs);
+ Register rb;
+ if (base->isop(LIR_alloc)) {
+ rb = FP;
+ db += findMemFor(base);
+ } else {
+ rb = findRegFor(base, GpRegs);
+ }
SSE_LDQ(rr, db, rb);
}
#if defined NANOJIT_AMD64
else if (rr != UnknownReg && rmask(rr) & GpRegs)
{
freeRsrcOf(ins, false);
Register rb = findRegFor(base, GpRegs);
LDQ(rr, db, rb);
@@ -609,17 +671,23 @@ namespace nanojit
_allocator.addFree(rr);
freeRsrcOf(ins, false);
}
#elif defined NANOJIT_IA32
else
{
int dr = disp(resv);
- Register rb = findRegFor(base, GpRegs);
+ Register rb;
+ if (base->isop(LIR_alloc)) {
+ rb = FP;
+ db += findMemFor(base);
+ } else {
+ rb = findRegFor(base, GpRegs);
+ }
resv->reg = UnknownReg;
// don't use an fpu reg to simply load & store the value.
if (dr)
asm_mmq(FP, dr, rb, db);
freeRsrcOf(ins, false);
@@ -634,52 +702,84 @@ namespace nanojit
}
void Assembler::asm_store64(LInsp value, int dr, LInsp base)
{
if (value->isconstq())
{
// if a constant 64-bit value just store it now rather than
// generating a pointless store/load/store sequence
- Register rb = findRegFor(base, GpRegs);
+ Register rb;
+ if (base->isop(LIR_alloc)) {
+ rb = FP;
+ dr += findMemFor(base);
+ } else {
+ rb = findRegFor(base, GpRegs);
+ }
const int32_t* p = (const int32_t*) (value-2);
STi(rb, dr+4, p[1]);
STi(rb, dr, p[0]);
return;
}
#if defined NANOJIT_IA32
- if (value->isop(LIR_ldq) || value->isop(LIR_qjoin))
+ if (value->isop(LIR_ldq) || value->isop(LIR_ldqc) || value->isop(LIR_qjoin))
{
// value is 64bit struct or int64_t, or maybe a double.
// it may be live in an FPU reg. Either way, don't
// put it in an FPU reg just to load & store it.
// a) if we know it's not a double, this is right.
// b) if we guarded that its a double, this store could be on
// the side exit, copying a non-double.
// c) maybe its a double just being stored. oh well.
if (avmplus::AvmCore::use_sse2()) {
Register rv = findRegFor(value, XmmRegs);
- Register rb = findRegFor(base, GpRegs);
+ Register rb;
+ if (base->isop(LIR_alloc)) {
+ rb = FP;
+ dr += findMemFor(base);
+ } else {
+ rb = findRegFor(base, GpRegs);
+ }
SSE_STQ(dr, rb, rv);
return;
}
int da = findMemFor(value);
- Register rb = findRegFor(base, GpRegs);
+ Register rb;
+ if (base->isop(LIR_alloc)) {
+ rb = FP;
+ dr += findMemFor(base);
+ } else {
+ rb = findRegFor(base, GpRegs);
+ }
asm_mmq(rb, dr, FP, da);
return;
}
+ Register rb;
+ if (base->isop(LIR_alloc)) {
+ rb = FP;
+ dr += findMemFor(base);
+ } else {
+ rb = findRegFor(base, GpRegs);
+ }
+
+ // if value already in a reg, use that, otherwise
+ // try to get it into XMM regs before FPU regs.
Reservation* rA = getresv(value);
+ Register rv;
int pop = !rA || rA->reg==UnknownReg;
- Register rv = findRegFor(value, avmplus::AvmCore::use_sse2() ? XmmRegs : FpRegs);
- Register rb = findRegFor(base, GpRegs);
+ if (pop) {
+ rv = findRegFor(value, avmplus::AvmCore::use_sse2() ? XmmRegs : FpRegs);
+ } else {
+ rv = rA->reg;
+ }
if (rmask(rv) & XmmRegs) {
SSE_STQ(dr, rb, rv);
} else {
FSTQ(pop, dr, rb);
}
#elif defined NANOJIT_AMD64
/* If this is not a float operation, we can use GpRegs instead.
@@ -758,30 +858,33 @@ namespace nanojit
if (rr != UnknownReg)
{
// @todo -- add special-cases for 0 and 1
_allocator.retire(rr);
rR->reg = UnknownReg;
NanoAssert((rmask(rr) & FpRegs) != 0);
const double d = ins->constvalf();
+ const uint64_t q = ins->constvalq();
if (rmask(rr) & XmmRegs) {
- if (d == 0.0) {
+ if (q == 0.0) {
+ // test (int64)0 since -0.0 == 0.0
SSE_XORPDr(rr, rr);
} else if (d == 1.0) {
// 1.0 is extremely frequent and worth special-casing!
static const double k_ONE = 1.0;
LDSDm(rr, &k_ONE);
} else {
findMemFor(ins);
const int d = disp(rR);
SSE_LDQ(rr, d, FP);
}
} else {
- if (d == 0.0) {
+ if (q == 0.0) {
+ // test (int64)0 since -0.0 == 0.0
FLDZ();
} else if (d == 1.0) {
FLD1();
} else {
findMemFor(ins);
int d = disp(rR);
FLDQ(d,FP);
}
@@ -798,48 +901,48 @@ namespace nanojit
STi(FP,d,p[0]);
}
#elif defined NANOJIT_AMD64
Reservation *rR = getresv(ins);
int64_t val = *(int64_t *)(ins - 2);
if (rR->reg != UnknownReg)
{
- Register rr = rR->reg;
- freeRsrcOf(ins, false);
- if (rmask(rr) & GpRegs)
+ if (rmask(rR->reg) & GpRegs)
{
- LDQi(rr, val);
+ LDQi(rR->reg, val);
}
- else if (rmask(rr) & XmmRegs)
+ else if (rmask(rR->reg) & XmmRegs)
{
if (ins->constvalf() == 0.0)
{
- SSE_XORPDr(rr, rr);
+ SSE_XORPDr(rR->reg, rR->reg);
}
else
{
/* Get a short-lived register, not associated with instruction */
+ Register rd = rR->reg;
Register rs = registerAlloc(GpRegs);
-
- SSE_MOVD(rr, rs);
+
+ SSE_MOVD(rd, rs);
LDQi(rs, val);
_allocator.addFree(rs);
}
}
}
else
{
const int32_t* p = (const int32_t*) (ins-2);
int dr = disp(rR);
- freeRsrcOf(ins, false);
STi(FP, dr+4, p[1]);
STi(FP, dr, p[0]);
}
+
+ freeRsrcOf(ins, false);
#endif
}
bool Assembler::asm_qlo(LInsp ins, LInsp q)
{
#if defined NANOJIT_IA32
if (!avmplus::AvmCore::use_sse2())
{
@@ -920,54 +1023,109 @@ namespace nanojit
FCHS();
// if we had more than one fpu reg, this is where
// we would move ra into rr if rr != ra.
}
#endif
}
+ void Assembler::asm_arg(ArgSize sz, LInsp p, Register r)
+ {
+ if (sz == ARGSIZE_Q)
+ {
+ // ref arg - use lea
+ if (r != UnknownReg)
+ {
+ // arg in specific reg
+ int da = findMemFor(p);
+ LEA(r, da, FP);
+ }
+ else
+ {
+ NanoAssert(0); // not supported
+ }
+ }
+ else if (sz == ARGSIZE_LO)
+ {
+ if (r != UnknownReg) {
+ // arg goes in specific register
+ if (p->isconst()) {
+ LDi(r, p->constval());
+ } else {
+ Reservation* rA = getresv(p);
+ if (rA) {
+ if (rA->reg == UnknownReg) {
+ // load it into the arg reg
+ int d = findMemFor(p);
+ if (p->isop(LIR_alloc)) {
+ LEA(r, d, FP);
+ } else {
+ LD(r, d, FP);
+ }
+ } else {
+ // it must be in a saved reg
+ MR(r, rA->reg);
+ }
+ }
+ else {
+ // this is the last use, so fine to assign it
+ // to the scratch reg, it's dead after this point.
+ findSpecificRegFor(p, r);
+ }
+ }
+ }
+ else {
+ asm_pusharg(p);
+ }
+ }
+ else
+ {
+ NanoAssert(sz == ARGSIZE_F);
+ asm_farg(p);
+ }
+ }
+
void Assembler::asm_pusharg(LInsp p)
{
// arg goes on stack
Reservation* rA = getresv(p);
- if (rA == 0)
+ if (rA == 0 && p->isconst())
{
- if (p->isconst())
- {
- // small const we push directly
- PUSHi(p->constval());
- }
- else
- {
- Register ra = findRegFor(p, GpRegs);
- PUSHr(ra);
- }
+ // small const we push directly
+ PUSHi(p->constval());
+ }
+ else if (rA == 0 || p->isop(LIR_alloc))
+ {
+ Register ra = findRegFor(p, GpRegs);
+ PUSHr(ra);
}
else if (rA->reg == UnknownReg)
{
PUSHm(disp(rA), FP);
}
else
{
PUSHr(rA->reg);
}
}
void Assembler::asm_farg(LInsp p)
{
#if defined NANOJIT_IA32
+ NanoAssert(p->isQuad());
Register r = findRegFor(p, FpRegs);
if (rmask(r) & XmmRegs) {
SSE_STQ(0, SP, r);
} else {
FSTPQ(0, SP);
}
- PUSHr(ECX); // 2*pushr is smaller than sub
- PUSHr(ECX);
+ SUBi(ESP,8);
+ //PUSHr(ECX); // 2*pushr is smaller than sub
+ //PUSHr(ECX);
#endif
}
void Assembler::asm_fop(LInsp ins)
{
LOpcode op = ins->opcode();
#if defined NANOJIT_IA32
if (avmplus::AvmCore::use_sse2())
@@ -992,17 +1150,20 @@ namespace nanojit
ra = findSpecificRegFor(lhs, rr);
} else if ((rmask(ra) & XmmRegs) == 0) {
/* We need this case on AMD64, because it's possible that
* an earlier instruction has done a quadword load and reserved a
* GPR. If so, ask for a new register.
*/
ra = findRegFor(lhs, XmmRegs);
}
- // else, rA already has a register assigned.
+ else {
+ // rA already has a register assigned but maybe not from the allow set
+ ra = findRegFor(lhs, allow);
+ }
if (lhs == rhs)
rb = ra;
if (op == LIR_fadd)
SSE_ADDSD(rr, rb);
else if (op == LIR_fsub)
SSE_SUBSD(rr, rb);
@@ -1185,36 +1346,107 @@ namespace nanojit
NanoAssertMsg(false, "Should not move data from GPR to XMM");
} else {
// xmm -> x87
NanoAssertMsg(false, "Should not move data from GPR/XMM to x87 FPU");
}
}
}
+ NIns * Assembler::asm_jmpcc(bool branchOnFalse, LIns *cond, NIns *targ)
+ {
+ LOpcode c = cond->opcode();
+ if (avmplus::AvmCore::use_sse2() && c != LIR_feq) {
+ LIns *lhs = cond->oprnd1();
+ LIns *rhs = cond->oprnd2();
+ if (c == LIR_flt) {
+ LIns *t = lhs; lhs = rhs; rhs = t;
+ c = LIR_fgt;
+ }
+ else if (c == LIR_fle) {
+ LIns *t = lhs; lhs = rhs; rhs = t;
+ c = LIR_fge;
+ }
+
+ if (c == LIR_fgt) {
+ if (branchOnFalse) { JNA(targ); } else { JA(targ); }
+ }
+ else { // if (c == LIR_fge)
+ if (branchOnFalse) { JNAE(targ); } else { JAE(targ); }
+ }
+ NIns *at = _nIns;
+ Reservation *rA, *rB;
+ findRegFor2(XmmRegs, lhs, rA, rhs, rB);
+ SSE_UCOMISD(rA->reg, rB->reg);
+ return at;
+ }
+
+ if (branchOnFalse)
+ JP(targ);
+ else
+ JNP(targ);
+ NIns *at = _nIns;
+ asm_fcmp(cond);
+ return at;
+ }
+
+ void Assembler::asm_setcc(Register r, LIns *cond)
+ {
+ LOpcode c = cond->opcode();
+ if (avmplus::AvmCore::use_sse2() && c != LIR_feq) {
+ MOVZX8(r,r);
+ LIns *lhs = cond->oprnd1();
+ LIns *rhs = cond->oprnd2();
+ if (c == LIR_flt) {
+ LIns *t = lhs; lhs = rhs; rhs = t;
+ SETA(r);
+ }
+ else if (c == LIR_fle) {
+ LIns *t = lhs; lhs = rhs; rhs = t;
+ SETAE(r);
+ }
+ else if (c == LIR_fgt) {
+ SETA(r);
+ }
+ else { // if (c == LIR_fge)
+ SETAE(r);
+ }
+ Reservation *rA, *rB;
+ findRegFor2(XmmRegs, lhs, rA, rhs, rB);
+ SSE_UCOMISD(rA->reg, rB->reg);
+ return;
+ }
+ // SETcc only sets low 8 bits, so extend
+ MOVZX8(r,r);
+ SETNP(r);
+ asm_fcmp(cond);
+ }
+
void Assembler::asm_fcmp(LIns *cond)
{
LOpcode condop = cond->opcode();
NanoAssert(condop >= LIR_feq && condop <= LIR_fge);
LIns* lhs = cond->oprnd1();
LIns* rhs = cond->oprnd2();
int mask;
if (condop == LIR_feq)
mask = 0x44;
else if (condop == LIR_fle)
mask = 0x41;
else if (condop == LIR_flt)
mask = 0x05;
else if (condop == LIR_fge) {
// swap, use le
+ condop = LIR_fle;
LIns* t = lhs; lhs = rhs; rhs = t;
mask = 0x41;
} else { // if (condop == LIR_fgt)
// swap, use lt
+ condop = LIR_flt;
LIns* t = lhs; lhs = rhs; rhs = t;
mask = 0x05;
}
#if defined NANOJIT_IA32
if (avmplus::AvmCore::use_sse2())
{
#endif
@@ -1222,17 +1454,18 @@ namespace nanojit
// GREATER_THAN: ZF,PF,CF <- 000;
// LESS_THAN: ZF,PF,CF <- 001;
// EQUAL: ZF,PF,CF <- 100;
if (condop == LIR_feq && lhs == rhs) {
// nan check
Register r = findRegFor(lhs, XmmRegs);
SSE_UCOMISD(r, r);
- } else {
+ }
+ else {
#if defined NANOJIT_IA32
evict(EAX);
TEST_AH(mask);
LAHF();
#elif defined NANOJIT_AMD64
evict(RAX);
TEST_AL(mask);
POPr(RAX);
@@ -1379,10 +1612,24 @@ namespace nanojit
}
#endif
void Assembler::nativePageSetup()
{
if (!_nIns) _nIns = pageAlloc();
if (!_nExitIns) _nExitIns = pageAlloc(true);
}
+
+ // enough room for n bytes
+ void Assembler::underrunProtect(int n)
+ {
+ NIns *eip = this->_nIns;
+ Page *p = (Page*)pageTop(eip-1);
+ NIns *top = (NIns*) &p->code[0];
+ if (eip - n < top) {
+ _nIns = pageAlloc(_inExit);
+ JMP(eip);
+ }
+ }
+
+
#endif /* FEATURE_NANOJIT */
}
--- a/js/src/nanojit/Nativei386.h
+++ b/js/src/nanojit/Nativei386.h
@@ -96,16 +96,17 @@ namespace nanojit
FirstReg = 0,
LastReg = 23,
UnknownReg = 24
}
Register;
typedef int RegisterMask;
+ static const int NumSavedRegs = 3;
static const RegisterMask SavedRegs = 1<<EBX | 1<<EDI | 1<<ESI;
static const RegisterMask GpRegs = SavedRegs | 1<<EAX | 1<<ECX | 1<<EDX;
static const RegisterMask XmmRegs = 1<<XMM0|1<<XMM1|1<<XMM2|1<<XMM3|1<<XMM4|1<<XMM5|1<<XMM6|1<<XMM7;
static const RegisterMask x87Regs = 1<<FST0;
static const RegisterMask FpRegs = x87Regs | XmmRegs;
static const RegisterMask ScratchRegs = 1<<EAX | 1<<ECX | 1<<EDX | FpRegs;
static const RegisterMask AllowableFlagRegs = 1<<EAX |1<<ECX | 1<<EDX | 1<<EBX;
@@ -127,33 +128,22 @@ namespace nanojit
#define DECLARE_PLATFORM_REGALLOC()
#define DECLARE_PLATFORM_ASSEMBLER() \
const static Register argRegs[2], retRegs[2]; \
bool x87Dirty; \
bool pad[3];\
void nativePageReset();\
void nativePageSetup();\
- void asm_farg(LInsp);
+ void underrunProtect(int);\
+ void asm_farg(LInsp);\
+ void asm_align_code();
#define swapptrs() { NIns* _tins = _nIns; _nIns=_nExitIns; _nExitIns=_tins; }
- // enough room for n bytes
- #define underrunProtect(n) \
- { \
- intptr_t u = n + sizeof(PageHeader)/sizeof(NIns) + 5; \
- if ( !samepage(_nIns-u,_nIns-1) ) \
- { \
- NIns *tt = _nIns; \
- _nIns = pageAlloc(_inExit); \
- int d = tt-_nIns; \
- JMP_long_nochk_offset(d); \
- } \
- } \
-
#define IMM32(i) \
_nIns -= 4; \
*((int32_t*)_nIns) = (int32_t)(i)
#define MODRMs(r,d,b,l,i) \
NanoAssert(unsigned(r)<8 && unsigned(b)<8 && unsigned(i)<8); \
if ((d) == 0 && (b) != EBP) { \
_nIns -= 2; \
@@ -166,18 +156,21 @@ namespace nanojit
_nIns[2] = (uint8_t) (d); \
} else { \
IMM32(d); \
*(--_nIns) = (uint8_t) ( (l)<<6 | (i)<<3 | (b) ); \
*(--_nIns) = (uint8_t) ( 2<<6 | (r)<<3 | 4 ); \
}
#define MODRMm(r,d,b) \
- NanoAssert(unsigned(r)<8 && unsigned(b)<8); \
- if ((b) == ESP) { \
+ NanoAssert(unsigned(r)<8 && ((b)==UnknownReg || unsigned(b)<8)); \
+ if ((b) == UnknownReg) {\
+ IMM32(d);\
+ *(--_nIns) = (uint8_t) (0<<6 | (r)<<3 | 5);\
+ } else if ((b) == ESP) { \
MODRMs(r, d, b, 0, (Register)4); \
} \
else if ( (d) == 0 && (b) != EBP) { \
*(--_nIns) = (uint8_t) ( 0<<6 | (r)<<3 | (b) ); \
} else if (isS8(d)) { \
*(--_nIns) = (uint8_t) (d); \
*(--_nIns) = (uint8_t) ( 1<<6 | (r)<<3 | (b) ); \
} else { \
@@ -339,17 +332,17 @@ namespace nanojit
underrunProtect(5); \
IMM32(i); \
NanoAssert(((unsigned)r)<8); \
*(--_nIns) = (uint8_t) (0xb8 | (r) ); \
asm_output2("mov %s,%d",gpn(r),i); } while(0)
#define ST(base,disp,reg) do { \
ALUm(0x89,reg,disp,base); \
- asm_output3("mov %d(%s),%s",disp,gpn(base),gpn(reg)); } while(0)
+ asm_output3("mov %d(%s),%s",disp,base==UnknownReg?"0":gpn(base),gpn(reg)); } while(0)
#define STi(base,disp,imm) do { \
underrunProtect(12); \
IMM32(imm); \
MODRMm(0, disp, base); \
*(--_nIns) = 0xc7; \
asm_output3("mov %d(%s),%d",disp,gpn(base),imm); } while(0)
@@ -492,17 +485,17 @@ namespace nanojit
#define LDSDm(r,addr)do { \
underrunProtect(8); \
const double* daddr = addr; \
IMM32(int32_t(daddr));\
*(--_nIns) = uint8_t(((r)&7)<<3|5); \
*(--_nIns) = 0x10;\
*(--_nIns) = 0x0f;\
*(--_nIns) = 0xf2;\
- asm_output3("movsd %s,%p // =%f",gpn(r),daddr,*daddr); \
+ asm_output3("movsd %s,(#%p) // =%f",gpn(r),(void*)daddr,*daddr); \
} while(0)
#define STSD(d,b,r)do { \
SSEm(0xf20f11, (r)&7, (d), (b)); \
asm_output3("movsd %d(%s),%s",(d),gpn(b),gpn(r)); \
} while(0)
#define SSE_LDQ(r,d,b)do { \
@@ -534,71 +527,80 @@ namespace nanojit
NanoAssert(_is_gp_reg_(s)); \
NanoAssert(_is_xmm_reg_(d)); \
SSE(0x660f6e, (d)&7, (s)&7); \
} \
asm_output2("movd %s,%s",gpn(d),gpn(s)); \
} while(0)
#define SSE_MOVSD(rd,rs) do{ \
+ NanoAssert(_is_xmm_reg_(rd) && _is_xmm_reg_(rs));\
SSE(0xf20f10, (rd)&7, (rs)&7); \
asm_output2("movsd %s,%s",gpn(rd),gpn(rs)); \
} while(0)
#define SSE_MOVDm(d,b,xrs) do {\
+ NanoAssert(_is_xmm_reg_(xrs) && _is_gp_reg_(b));\
SSEm(0x660f7e, (xrs)&7, d, b);\
asm_output3("movd %d(%s),%s", d, gpn(b), gpn(xrs));\
} while(0)
#define SSE_ADDSD(rd,rs) do{ \
+ NanoAssert(_is_xmm_reg_(rd) && _is_xmm_reg_(rs));\
SSE(0xf20f58, (rd)&7, (rs)&7); \
asm_output2("addsd %s,%s",gpn(rd),gpn(rs)); \
} while(0)
#define SSE_ADDSDm(r,addr)do { \
underrunProtect(8); \
+ NanoAssert(_is_xmm_reg_(r));\
const double* daddr = addr; \
IMM32(int32_t(daddr));\
*(--_nIns) = uint8_t(((r)&7)<<3|5); \
*(--_nIns) = 0x58;\
*(--_nIns) = 0x0f;\
*(--_nIns) = 0xf2;\
- asm_output3("addsd %s,%p // =%f",gpn(r),daddr,*daddr); \
+ asm_output3("addsd %s,%p // =%f",gpn(r),(void*)daddr,*daddr); \
} while(0)
#define SSE_SUBSD(rd,rs) do{ \
+ NanoAssert(_is_xmm_reg_(rd) && _is_xmm_reg_(rs));\
SSE(0xf20f5c, (rd)&7, (rs)&7); \
asm_output2("subsd %s,%s",gpn(rd),gpn(rs)); \
} while(0)
#define SSE_MULSD(rd,rs) do{ \
+ NanoAssert(_is_xmm_reg_(rd) && _is_xmm_reg_(rs));\
SSE(0xf20f59, (rd)&7, (rs)&7); \
asm_output2("mulsd %s,%s",gpn(rd),gpn(rs)); \
} while(0)
#define SSE_DIVSD(rd,rs) do{ \
+ NanoAssert(_is_xmm_reg_(rd) && _is_xmm_reg_(rs));\
SSE(0xf20f5e, (rd)&7, (rs)&7); \
asm_output2("divsd %s,%s",gpn(rd),gpn(rs)); \
} while(0)
#define SSE_UCOMISD(rl,rr) do{ \
+ NanoAssert(_is_xmm_reg_(rl) && _is_xmm_reg_(rr));\
SSE(0x660f2e, (rl)&7, (rr)&7); \
asm_output2("ucomisd %s,%s",gpn(rl),gpn(rr)); \
} while(0)
#define CVTSI2SDm(xr,d,b) do{ \
+ NanoAssert(_is_xmm_reg_(xr) && _is_gp_reg_(b));\
SSEm(0xf20f2a, (xr)&7, (d), (b)); \
asm_output3("cvtsi2sd %s,%d(%s)",gpn(xr),(d),gpn(b)); \
} while(0)
#define SSE_XORPD(r, maskaddr) do {\
- underrunProtect(8); \
+ underrunProtect(8); \
IMM32(maskaddr);\
*(--_nIns) = uint8_t(((r)&7)<<3|5); \
*(--_nIns) = 0x57;\
*(--_nIns) = 0x0f;\
*(--_nIns) = 0x66;\
- asm_output2("xorpd %s,[0x%p]",gpn(r),(maskaddr));\
+ asm_output2("xorpd %s,[0x%p]",gpn(r),(void*)(maskaddr));\
} while(0)
#define SSE_XORPDr(rd,rs) do{ \
SSE(0x660f57, (rd)&7, (rs)&7); \
asm_output2("xorpd %s,%s",gpn(rd),gpn(rs)); \
} while(0)
// floating point unit
@@ -652,19 +654,29 @@ namespace nanojit
#define FDIVR(d,b) do { FPUm(0xdc07, d, b); asm_output2("fdivr %d(%s)",d,gpn(b)); } while(0)
#define FINCSTP() do { FPUc(0xd9f7); asm_output2("fincstp"); } while(0)
#define FSTP(r) do { FPU(0xddd8, r&7); asm_output1("fstp %s",fpn(r)); fpu_pop();} while(0)
#define FCOMP() do { FPUc(0xD8D9); asm_output("fcomp"); fpu_pop();} while(0)
#define FCOMPP() do { FPUc(0xDED9); asm_output("fcompp"); fpu_pop();fpu_pop();} while(0)
#define FLDr(r) do { FPU(0xd9c0,r); asm_output1("fld %s",fpn(r)); fpu_push(); } while(0)
#define EMMS() do { FPUc(0x0f77); asm_output("emms"); } while (0)
+// standard direct call
#define CALL(c) do { \
underrunProtect(5); \
int offset = (c->_address) - ((int)_nIns); \
IMM32( (uint32_t)offset ); \
*(--_nIns) = 0xE8; \
verbose_only(asm_output1("call %s",(c->_name));) \
debug_only(if ((c->_argtypes&3)==ARGSIZE_F) fpu_push();)\
} while (0)
+// indirect call thru register
+#define CALLr(c,r) do { \
+ underrunProtect(2);\
+ ALU(0xff, 2, (r));\
+ verbose_only(asm_output1("call %s",gpn(r));) \
+ debug_only(if ((c->_argtypes&3)==ARGSIZE_F) fpu_push();)\
+} while (0)
+
+
}
#endif // __nanojit_Nativei386__
--- a/js/src/nanojit/RegAlloc.cpp
+++ b/js/src/nanojit/RegAlloc.cpp
@@ -67,69 +67,66 @@ namespace nanojit
void RegAlloc::removeFree(Register r)
{
NanoAssert(isFree(r));
free &= ~rmask(r);
}
void RegAlloc::addActive(Register r, LIns* v)
{
- //addActiveCount++;
+ // Count++;
NanoAssert(v && r != UnknownReg && active[r] == NULL );
active[r] = v;
+ useActive(r);
}
+ void RegAlloc::useActive(Register r)
+ {
+ NanoAssert(r != UnknownReg && active[r] != NULL);
+ usepri[r] = priority++;
+ }
+
void RegAlloc::removeActive(Register r)
{
//registerReleaseCount++;
NanoAssert(r != UnknownReg);
NanoAssert(active[r] != NULL);
// remove the given register from the active list
active[r] = NULL;
}
- LIns* RegAlloc::getActive(Register r)
- {
- NanoAssert(r != UnknownReg);
- return active[r];
- }
-
void RegAlloc::retire(Register r)
{
NanoAssert(r != UnknownReg);
NanoAssert(active[r] != NULL);
active[r] = NULL;
free |= rmask(r);
}
- // scan table for instruction with longest span
- LIns* Assembler::findVictim(RegAlloc ®s, RegisterMask allow, RegisterMask prefer)
+ // scan table for instruction with the lowest priority, meaning it is used
+ // furthest in the future.
+ LIns* Assembler::findVictim(RegAlloc ®s, RegisterMask allow)
{
- NanoAssert(allow != 0 && (allow&prefer)==prefer);
- LIns *i, *a=0, *p = 0;
- int acost=10, pcost=10;
+ NanoAssert(allow != 0);
+ LIns *i, *a=0;
+ int allow_pri = 0x7fffffff;
for (Register r=FirstReg; r <= LastReg; r = nextreg(r))
{
if ((allow & rmask(r)) && (i = regs.getActive(r)) != 0)
{
- int cost = getresv(i)->cost;
- if (!a || cost < acost || cost == acost && nbr(i) < nbr(a)) {
+ int pri = canRemat(i) ? 0 : regs.getPriority(r);
+ if (!a || pri < allow_pri) {
a = i;
- acost = cost;
- }
- if (prefer & rmask(r)) {
- if (!p || cost < pcost || cost == pcost && nbr(i) < nbr(p)) {
- p = i;
- pcost = cost;
- }
+ allow_pri = pri;
}
}
}
- return acost < pcost ? a : p;
+ NanoAssert(a != 0);
+ return a;
}
#ifdef NJ_VERBOSE
/* static */ void RegAlloc::formatRegisters(RegAlloc& regs, char* s, Fragment *frag)
{
if (!frag || !frag->lirbuf)
return;
LirNameMap *names = frag->lirbuf->names;
--- a/js/src/nanojit/RegAlloc.h
+++ b/js/src/nanojit/RegAlloc.h
@@ -46,37 +46,50 @@ namespace nanojit
inline RegisterMask rmask(Register r)
{
return 1 << r;
}
class RegAlloc MMGC_SUBCLASS_DECL
{
public:
- RegAlloc() {}
+ RegAlloc() : free(0), used(0), priority(0) {}
void clear();
bool isFree(Register r);
void addFree(Register r);
void removeFree(Register r);
void addActive(Register r, LIns* ins);
+ void useActive(Register r);
void removeActive(Register r);
- LIns* getActive(Register r);
void retire(Register r);
+ bool isValid() {
+ return (free|used) != 0;
+ }
+
+ int32_t getPriority(Register r) {
+ NanoAssert(r != UnknownReg && active[r]);
+ return usepri[r];
+ }
+
+ LIns* getActive(Register r) {
+ NanoAssert(r != UnknownReg);
+ return active[r];
+ }
debug_only( uint32_t countFree(); )
debug_only( uint32_t countActive(); )
debug_only( void checkCount(); )
debug_only( bool isConsistent(Register r, LIns* v); )
debug_only( uint32_t count; )
debug_only( RegisterMask managed; ) // bitfield of 0..NJ_MAX_REGISTERS denoting which are under our management
- // RegisterMask is a 32-bit value, so we can never have more than 32 active.
- // hardcode 32 here in case we have non-contiguous register numbers
- LIns* active[32]; // active[r] = OP that defines r
+ LIns* active[LastReg + 1]; // active[r] = OP that defines r
+ int32_t usepri[LastReg + 1]; // used priority. lower = more likely to spill.
RegisterMask free;
RegisterMask used;
+ int32_t priority;
verbose_only( static void formatRegisters(RegAlloc& regs, char* s, Fragment*); )
DECLARE_PLATFORM_REGALLOC()
};
}
#endif // __nanojit_RegAlloc__
--- a/js/src/nanojit/avmplus.h
+++ b/js/src/nanojit/avmplus.h
@@ -321,16 +321,18 @@ public:
GetGCHeap()
{
return &heap;
}
};
#define DWB(x) x
#define DRCWB(x) x
+#define WB(gc, container, addr, value) do { *(addr) = (value); } while(0)
+#define WBRC(gc, container, addr, value) do { *(addr) = (value); } while(0)
#define MMGC_MEM_TYPE(x)
typedef int FunctionID;
namespace avmplus
{
struct InterpState
--- a/js/src/nanojit/nanojit.h
+++ b/js/src/nanojit/nanojit.h
@@ -37,16 +37,18 @@
* ***** END LICENSE BLOCK ***** */
#ifndef __nanojit_h__
#define __nanojit_h__
#include <stddef.h>
#include "avmplus.h"
+#ifdef FEATURE_NANOJIT
+
#ifdef AVMPLUS_IA32
#define NANOJIT_IA32
#elif AVMPLUS_ARM
#define NANOJIT_ARM
#elif AVMPLUS_PPC
#define NANOJIT_PPC
#elif AVMPLUS_AMD64
#define NANOJIT_AMD64
@@ -68,16 +70,18 @@ namespace nanojit
class RegAlloc;
typedef avmplus::AvmCore AvmCore;
typedef avmplus::OSDep OSDep;
typedef avmplus::GCSortedMap<const void*,Fragment*,avmplus::LIST_GCObjects> FragmentMap;
typedef avmplus::SortedMap<SideExit*,RegAlloc*,avmplus::LIST_GCObjects> RegAllocMap;
typedef avmplus::List<LIns*,avmplus::LIST_NonGCObjects> InsList;
typedef avmplus::List<char*, avmplus::LIST_GCObjects> StringList;
+ const uint32_t MAXARGS = 8;
+
#if defined(_MSC_VER) && _MSC_VER < 1400
static void NanoAssertMsgf(bool a,const char *f,...) {}
static void NanoAssertMsg(bool a,const char *m) {}
static void NanoAssert(bool a) {}
#elif defined(_DEBUG)
#define __NanoAssertMsgf(a, file_, line_, f, ...) \
if (!(a)) { \
@@ -108,22 +112,22 @@ namespace nanojit
#define NJ_PROFILE 1
#endif
#ifdef NJ_VERBOSE
#include <stdio.h>
#define verbose_output if (verbose_enabled()) Assembler::output
#define verbose_outputf if (verbose_enabled()) Assembler::outputf
#define verbose_enabled() (_verbose)
- #define verbose_only(x) x
+ #define verbose_only(...) __VA_ARGS__
#else
#define verbose_output
#define verbose_outputf
#define verbose_enabled()
- #define verbose_only(x)
+ #define verbose_only(...)
#endif /*NJ_VERBOSE*/
#ifdef _DEBUG
#define debug_only(x) x
#else
#define debug_only(x)
#endif /* DEBUG */
@@ -167,9 +171,10 @@ namespace nanojit
#include "Native.h"
#include "LIR.h"
#include "RegAlloc.h"
#include "Fragmento.h"
#include "Assembler.h"
#include "TraceTreeDrawer.h"
+#endif // FEATURE_NANOJIT
#endif // __nanojit_h__
deleted file mode 100644
--- a/js/src/t/crypto-sha1.js
+++ /dev/null
@@ -1,224 +0,0 @@
-/*
- * A JavaScript implementation of the Secure Hash Algorithm, SHA-1, as defined
- * in FIPS PUB 180-1
- * Version 2.1a Copyright Paul Johnston 2000 - 2002.
- * Other contributors: Greg Holt, Andrew Kepert, Ydnar, Lostinet
- * Distributed under the BSD License
- * See http://pajhome.org.uk/crypt/md5 for details.
- */
-
-/*
- * Configurable variables. You may need to tweak these to be compatible with
- * the server-side, but the defaults work in most cases.
- */
-var hexcase = 0; /* hex output format. 0 - lowercase; 1 - uppercase */
-var b64pad = ""; /* base-64 pad character. "=" for strict RFC compliance */
-var chrsz = 8; /* bits per input character. 8 - ASCII; 16 - Unicode */
-
-/*
- * These are the functions you'll usually want to call
- * They take string arguments and return either hex or base-64 encoded strings
- */
-function hex_sha1(s){return binb2hex(core_sha1(str2binb(s),s.length * chrsz));}
-function b64_sha1(s){return binb2b64(core_sha1(str2binb(s),s.length * chrsz));}
-function str_sha1(s){return binb2str(core_sha1(str2binb(s),s.length * chrsz));}
-function hex_hmac_sha1(key, data){ return binb2hex(core_hmac_sha1(key, data));}
-function b64_hmac_sha1(key, data){ return binb2b64(core_hmac_sha1(key, data));}
-function str_hmac_sha1(key, data){ return binb2str(core_hmac_sha1(key, data));}
-
-/*
- * Perform a simple self-test to see if the VM is working
- */
-function sha1_vm_test()
-{
- return hex_sha1("abc") == "a9993e364706816aba3e25717850c26c9cd0d89d";
-}
-
-/*
- * Calculate the SHA-1 of an array of big-endian words, and a bit length
- */
-function core_sha1(x, len)
-{
- /* append padding */
- x[len >> 5] |= 0x80 << (24 - len % 32);
- x[((len + 64 >> 9) << 4) + 15] = len;
-
- var w = Array(80);
- var a = 1732584193;
- var b = -271733879;
- var c = -1732584194;
- var d = 271733878;
- var e = -1009589776;
-
- for(var i = 0; i < x.length; i += 16)
- {
- var olda = a;
- var oldb = b;
- var oldc = c;
- var oldd = d;
- var olde = e;
-
- for(var j = 0; j < 80; j++)
- {
- if(j < 16) w[j] = x[i + j];
- else w[j] = rol(w[j-3] ^ w[j-8] ^ w[j-14] ^ w[j-16], 1);
- var t = safe_add(safe_add(rol(a, 5), sha1_ft(j, b, c, d)),
- safe_add(safe_add(e, w[j]), sha1_kt(j)));
- e = d;
- d = c;
- c = rol(b, 30);
- b = a;
- a = t;
- }
-
- a = safe_add(a, olda);
- b = safe_add(b, oldb);
- c = safe_add(c, oldc);
- d = safe_add(d, oldd);
- e = safe_add(e, olde);
- }
- return Array(a, b, c, d, e);
-
-}
-
-/*
- * Perform the appropriate triplet combination function for the current
- * iteration
- */
-function sha1_ft(t, b, c, d)
-{
- if(t < 20) return (b & c) | ((~b) & d);
- if(t < 40) return b ^ c ^ d;
- if(t < 60) return (b & c) | (b & d) | (c & d);
- return b ^ c ^ d;
-}
-
-/*
- * Determine the appropriate additive constant for the current iteration
- */
-function sha1_kt(t)
-{
- return (t < 20) ? 1518500249 : (t < 40) ? 1859775393 :
- (t < 60) ? -1894007588 : -899497514;
-}
-
-/*
- * Calculate the HMAC-SHA1 of a key and some data
- */
-function core_hmac_sha1(key, data)
-{
- var bkey = str2binb(key);
- if(bkey.length > 16) bkey = core_sha1(bkey, key.length * chrsz);
-
- var ipad = Array(16), opad = Array(16);
- for(var i = 0; i < 16; i++)
- {
- ipad[i] = bkey[i] ^ 0x36363636;
- opad[i] = bkey[i] ^ 0x5C5C5C5C;
- }
-
- var hash = core_sha1(ipad.concat(str2binb(data)), 512 + data.length * chrsz);
- return core_sha1(opad.concat(hash), 512 + 160);
-}
-
-/*
- * Add integers, wrapping at 2^32. This uses 16-bit operations internally
- * to work around bugs in some JS interpreters.
- */
-function safe_add(x, y)
-{
- var lsw = (x & 0xFFFF) + (y & 0xFFFF);
- var msw = (x >> 16) + (y >> 16) + (lsw >> 16);
- return (msw << 16) | (lsw & 0xFFFF);
-}
-
-/*
- * Bitwise rotate a 32-bit number to the left.
- */
-function rol(num, cnt)
-{
- return (num << cnt) | (num >>> (32 - cnt));
-}
-
-/*
- * Convert an 8-bit or 16-bit string to an array of big-endian words
- * In 8-bit function, characters >255 have their hi-byte silently ignored.
- */
-function str2binb(str)
-{
- var bin = Array();
- var mask = (1 << chrsz) - 1;
- for(var i = 0; i < str.length * chrsz; i += chrsz)
- bin[i>>5] |= (str.charCodeAt(i / chrsz) & mask) << (32 - chrsz - i%32);
- return bin;
-}
-
-/*
- * Convert an array of big-endian words to a string
- */
-function binb2str(bin)
-{
- var str = "";
- var mask = (1 << chrsz) - 1;
- for(var i = 0; i < bin.length * 32; i += chrsz)
- str += String.fromCharCode((bin[i>>5] >>> (32 - chrsz - i%32)) & mask);
- return str;
-}
-
-/*
- * Convert an array of big-endian words to a hex string.
- */
-function binb2hex(binarray)
-{
- var hex_tab = hexcase ? "0123456789ABCDEF" : "0123456789abcdef";
- var str = "";
- for(var i = 0; i < binarray.length * 4; i++)
- {
- str += hex_tab.charAt((binarray[i>>2] >> ((3 - i%4)*8+4)) & 0xF) +
- hex_tab.charAt((binarray[i>>2] >> ((3 - i%4)*8 )) & 0xF);
- }
- return str;
-}
-
-/*
- * Convert an array of big-endian words to a base-64 string
- */
-function binb2b64(binarray)
-{
- var tab = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
- var str = "";
- for(var i = 0; i < binarray.length * 4; i += 3)
- {
- var triplet = (((binarray[i >> 2] >> 8 * (3 - i %4)) & 0xFF) << 16)
- | (((binarray[i+1 >> 2] >> 8 * (3 - (i+1)%4)) & 0xFF) << 8 )
- | ((binarray[i+2 >> 2] >> 8 * (3 - (i+2)%4)) & 0xFF);
- for(var j = 0; j < 4; j++)
- {
- if(i * 8 + j * 6 > binarray.length * 32) str += b64pad;
- else str += tab.charAt((triplet >> 6*(3-j)) & 0x3F);
- }
- }
- return str;
-}
-
-
-var plainText = "Two households, both alike in dignity,\n\
-In fair Verona, where we lay our scene,\n\
-From ancient grudge break to new mutiny,\n\
-Where civil blood makes civil hands unclean.\n\
-From forth the fatal loins of these two foes\n\
-A pair of star-cross'd lovers take their life;\n\
-Whole misadventured piteous overthrows\n\
-Do with their death bury their parents' strife.\n\
-The fearful passage of their death-mark'd love,\n\
-And the continuance of their parents' rage,\n\
-Which, but their children's end, nought could remove,\n\
-Is now the two hours' traffic of our stage;\n\
-The which if you with patient ears attend,\n\
-What here shall miss, our toil shall strive to mend.";
-
-for (var i = 0; i <4; i++) {
- plainText += plainText;
-}
-
-var sha1Output = hex_sha1(plainText);