Synced nanojit with TT tip.
Synced nanojit with TT tip.
--- a/js/src/jstracer.cpp
+++ b/js/src/jstracer.cpp
@@ -403,23 +403,23 @@ public:
*m++ = getStoreType(*vp));
return out->insGuard(v, c, x);
}
/* Sink all type casts into the stack into the side exit by simply storing the original
(uncasted) value. Each guard generates the side exit map based on the types of the
last stores to every stack location, so its safe to not perform them on-trace. */
virtual LInsp insStore(LIns* value, LIns* base, LIns* disp) {
- if (base == _fragment->sp && isPromoteInt(value))
+ if (base == _fragment->lirbuf->sp && isPromoteInt(value))
value = demote(out, value);
return out->insStore(value, base, disp);
}
virtual LInsp insStorei(LIns* value, LIns* base, int32_t d) {
- if (base == _fragment->sp && isPromoteInt(value))
+ if (base == _fragment->lirbuf->sp && isPromoteInt(value))
value = demote(out, value);
return out->insStorei(value, base, d);
}
};
TraceRecorder::TraceRecorder(JSContext* cx, Fragmento* fragmento, Fragment* _fragment)
{
this->cx = cx;
@@ -486,25 +486,25 @@ TraceRecorder::TraceRecorder(JSContext*
*m++ = getCoercedType(*vp)
);
} else {
/* recompiling the trace, we already have a fragment info structure */
fragmentInfo = (VMFragmentInfo*)fragment->vmprivate;
}
fragment->vmprivate = fragmentInfo;
- fragment->state = lir->insImm8(LIR_param, Assembler::argRegs[0], 0);
- fragment->param1 = lir->insImm8(LIR_param, Assembler::argRegs[1], 0);
- fragment->sp = lir->insLoadi(fragment->state, offsetof(InterpState, sp));
- fragment->rp = lir->insLoadi(fragment->state, offsetof(InterpState, rp));
- cx_ins = lir->insLoadi(fragment->state, offsetof(InterpState, cx));
+ fragment->lirbuf->state = lir->insParam(0);
+ fragment->lirbuf->param1 = lir->insParam(1);
+ fragment->lirbuf->sp = lir->insLoadi(fragment->lirbuf->state, offsetof(InterpState, sp));
+ fragment->lirbuf->rp = lir->insLoadi(fragment->lirbuf->state, offsetof(InterpState, rp));
+ cx_ins = lir->insLoadi(fragment->lirbuf->state, offsetof(InterpState, cx));
#ifdef DEBUG
- lirbuf->names->addName(fragment->state, "state");
- lirbuf->names->addName(fragment->sp, "sp");
- lirbuf->names->addName(fragment->rp, "rp");
+ lirbuf->names->addName(fragment->lirbuf->state, "state");
+ lirbuf->names->addName(fragment->lirbuf->sp, "sp");
+ lirbuf->names->addName(fragment->lirbuf->rp, "rp");
lirbuf->names->addName(cx_ins, "cx");
#endif
uint8* m = fragmentInfo->typeMap;
FORALL_SLOTS_IN_PENDING_FRAMES(cx, fragmentInfo->ngslots, fragmentInfo->gslots,
entryFrame, entryFrame,
import(vp, *m, vpname, vpnum);
m++
@@ -849,20 +849,20 @@ TraceRecorder::import(jsval* p, uint8& t
not me, so don't blame the messenger. */
ptrdiff_t offset = -fragmentInfo->nativeStackBase + nativeFrameOffset(p) + 8;
if (TYPEMAP_GET_TYPE(t) == JSVAL_INT) { /* demoted */
JS_ASSERT(isInt32(*p));
/* Ok, we have a valid demotion attempt pending, so insert an integer
read and promote it to double since all arithmetic operations expect
to see doubles on entry. The first op to use this slot will emit a
f2i cast which will cancel out the i2f we insert here. */
- ins = lir->ins1(LIR_i2f, lir->insLoadi(fragment->sp, offset));
+ ins = lir->ins1(LIR_i2f, lir->insLoadi(fragment->lirbuf->sp, offset));
} else {
JS_ASSERT(isNumber(*p) == (TYPEMAP_GET_TYPE(t) == JSVAL_DOUBLE));
- ins = lir->insLoad(t == JSVAL_DOUBLE ? LIR_ldq : LIR_ld, fragment->sp, offset);
+ ins = lir->insLoad(t == JSVAL_DOUBLE ? LIR_ldq : LIR_ld, fragment->lirbuf->sp, offset);
}
tracker.set(p, ins);
#ifdef DEBUG
char name[16];
JS_ASSERT(strlen(prefix) < 10);
JS_snprintf(name, sizeof name, "$%s%d", prefix, index);
lirbuf->names->addName(ins, name);
static const char* typestr[] = {
@@ -874,17 +874,17 @@ TraceRecorder::import(jsval* p, uint8& t
/* Update the tracker. If the value is part of any argv/vars/stack of any
currently active frame (onFrame), then issue a write back store. */
void
TraceRecorder::set(jsval* p, LIns* i)
{
tracker.set(p, i);
if (onFrame(p))
- lir->insStorei(i, fragment->sp, -fragmentInfo->nativeStackBase + nativeFrameOffset(p) + 8);
+ lir->insStorei(i, fragment->lirbuf->sp, -fragmentInfo->nativeStackBase + nativeFrameOffset(p) + 8);
}
LIns*
TraceRecorder::get(jsval* p)
{
return tracker.get(p);
}
@@ -1027,19 +1027,19 @@ void
TraceRecorder::stop()
{
fragment->blacklist();
}
int
nanojit::StackFilter::getTop(LInsp guard)
{
- if (sp == frag->sp)
+ if (sp == frag->lirbuf->sp)
return guard->exit()->sp_adj + 8;
- JS_ASSERT(sp == frag->rp);
+ JS_ASSERT(sp == frag->lirbuf->rp);
return guard->exit()->rp_adj + 4;
}
#if defined NJ_VERBOSE
void
nanojit::LirNameMap::formatGuard(LIns *i, char *out)
{
uint32_t ip;
--- a/js/src/nanojit/Assembler.cpp
+++ b/js/src/nanojit/Assembler.cpp
@@ -55,24 +55,19 @@ namespace nanojit
{
Assembler *assm;
public:
DeadCodeFilter(LirFilter *in, Assembler *a) : LirFilter(in), assm(a) {}
LInsp read() {
for (;;) {
LInsp i = in->read();
if (!i || i->isGuard()
- || i->isCall() && !assm->_functions[i->imm8()]._cse
+ || i->isCall() && !assm->_functions[i->fid()]._cse
|| !assm->ignoreInstruction(i))
return i;
- if (i->isCall()) {
- // skip args
- while (in->pos()->isArg())
- in->read();
- }
}
}
};
#ifdef NJ_VERBOSE
class VerboseBlockReader: public LirFilter
{
Assembler *assm;
@@ -98,17 +93,17 @@ namespace nanojit
return i;
}
if (i->isGuard()) {
flush();
block.add(i);
if (i->oprnd1())
block.add(i->oprnd1());
}
- else if (!i->isArg()) {
+ else {
block.add(i);
}
return i;
}
};
#endif
/**
@@ -210,17 +205,17 @@ namespace nanojit
_resvFree = r->arIndex;
r->reg = UnknownReg;
r->arIndex = 0;
if (!item)
setError(ResvFull);
if (i->isconst() || i->isconstq())
r->cost = 0;
- else if (i == _thisfrag->sp || i == _thisfrag->rp)
+ else if (i == _thisfrag->lirbuf->sp || i == _thisfrag->lirbuf->rp)
r->cost = 2;
else
r->cost = 1;
i->setresv(item);
return r;
}
@@ -309,17 +304,17 @@ namespace nanojit
{
if (error()) return;
// _nIns and _nExitIns need to be at least on
// one of these pages
NanoAssertMsg( onPage(_nIns)&& onPage(_nExitIns,true), "Native instruction pointer overstep paging bounds; check overrideProtect for last instruction");
}
#endif
- const CallInfo* Assembler::callInfoFor(int32_t fid)
+ const CallInfo* Assembler::callInfoFor(uint32_t fid)
{
NanoAssert(fid < CI_Max);
return &_functions[fid];
}
#ifdef _DEBUG
void Assembler::resourceConsistencyCheck()
@@ -620,17 +615,17 @@ namespace nanojit
//verbose_only( verbose_outputf(" LIR_xend swapptrs, _nIns is now %08X(%08X), _nExitIns is now %08X(%08X)",_nIns, *_nIns,_nExitIns,*_nExitIns) );
debug_only( _sv_fpuStkDepth = _fpuStkDepth; _fpuStkDepth = 0; )
nFragExit(guard);
// if/when we patch this exit to jump over to another fragment,
// that fragment will need its parameters set up just like ours.
- LInsp stateins = _thisfrag->state;
+ LInsp stateins = _thisfrag->lirbuf->state;
Register state = findSpecificRegFor(stateins, Register(stateins->imm8()));
asm_bailout(guard, state);
mergeRegisterState(capture);
// this can be useful for breaking whenever an exit is taken
//INT3();
//NOP();
@@ -655,17 +650,17 @@ namespace nanojit
verbose_only(_stats.exitnative += (_stats.native-nativeSave));
return jmpTarget;
}
bool Assembler::ignoreInstruction(LInsp ins)
{
LOpcode op = ins->opcode();
- if (ins->isStore() || op == LIR_loop || ins->isArg())
+ if (ins->isStore() || op == LIR_loop)
return false;
return getresv(ins) == 0;
}
void Assembler::beginAssembly(RegAllocMap* branchStateMap)
{
_activation.lowwatermark = 1;
_activation.tos = _activation.lowwatermark;
@@ -701,18 +696,18 @@ namespace nanojit
{
if (error()) return;
AvmCore *core = _frago->core();
GC *gc = core->gc;
_thisfrag = frag;
// set up backwards pipeline: assembler -> StackFilter -> LirReader
LirReader bufreader(frag->lastIns);
- StackFilter storefilter1(&bufreader, gc, frag, frag->sp);
- StackFilter storefilter2(&storefilter1, gc, frag, frag->rp);
+ StackFilter storefilter1(&bufreader, gc, frag, frag->lirbuf->sp);
+ StackFilter storefilter2(&storefilter1, gc, frag, frag->lirbuf->rp);
DeadCodeFilter deadfilter(&storefilter2, this);
LirFilter* rdr = &deadfilter;
verbose_only(
VerboseBlockReader vbr(rdr, this, frag->lirbuf->names);
if (verbose_enabled())
rdr = &vbr;
)
@@ -826,21 +821,16 @@ namespace nanojit
reserveFree(i);
}
}
}
}
void Assembler::gen(LirFilter* reader, NInsList& loopJumps)
{
- _call = NULL;
- _iargs = 0;
- _fargs = 0;
- _stackUsed = 0;
-
// trace must start with LIR_x or LIR_loop
NanoAssert(reader->pos()->isop(LIR_x) || reader->pos()->isop(LIR_loop));
for (LInsp ins = reader->read(); ins != 0 && !error(); ins = reader->read())
{
Reservation *rR = getresv(ins);
LOpcode op = ins->opcode();
switch(op)
@@ -1238,17 +1228,17 @@ namespace nanojit
#ifdef NJ_VERBOSE
// branching from this frag to ourself.
if (_frago->core()->config.show_stats)
LDi(argRegs[1], int((Fragment*)_thisfrag));
#endif
// restore first parameter, the only one we use
- LInsp state = _thisfrag->state;
+ LInsp state = _thisfrag->lirbuf->state;
Register a0 = Register(state->imm8());
findSpecificRegFor(state, a0);
break;
}
#ifndef NJ_SOFTFLOAT
case LIR_feq:
case LIR_fle:
case LIR_flt:
@@ -1300,76 +1290,22 @@ namespace nanojit
SETBE(r);
else if (op == LIR_ugt)
SETA(r);
else // if (op == LIR_uge)
SETAE(r);
asm_cmp(ins);
break;
}
- case LIR_ref:
- {
- // ref arg - use lea
- LIns *p = ins->oprnd1();
- if (ins->resv())
- {
- // arg in specific reg
- Register r = imm2register(ins->resv());
- int da = findMemFor(p);
- LEA(r, da, FP);
- }
- else
- {
- NanoAssert(0); // not supported
- }
- ++_iargs;
- nArgEmitted(_call, 0, _iargs, _fargs);
- break;
- }
- case LIR_arg:
- {
- LIns* p = ins->oprnd1();
- if (ins->resv())
- {
- // arg goes in specific register
- Register r = imm2register(ins->resv());
- if (p->isconst())
- LDi(r, p->constval());
- else
- findSpecificRegFor(p, r);
- }
- else
- {
- asm_pusharg(p);
- _stackUsed += 1;
- }
- ++_iargs;
- nArgEmitted(_call, _stackUsed, _iargs, _fargs);
- break;
- }
-#if defined NANOJIT_IA32 || defined NANOJIT_AMD64
- case LIR_farg:
- {
- asm_farg(ins);
- break;
- }
-#endif
#ifndef NJ_SOFTFLOAT
case LIR_fcall:
#endif
case LIR_call:
{
- const FunctionID fid = (FunctionID) ins->imm8();
- // bogus assertion: zero is a legal value right now, with fmod() in that slot
- // NanoAssertMsg(fid!=0, "Function does not exist in the call table");
- _call = &_functions[ fid ];
- _iargs = 0;
- _fargs = 0;
-
Register rr = UnknownReg;
#ifndef NJ_SOFTFLOAT
if (op == LIR_fcall)
{
rr = asm_prep_fcall(rR, ins);
}
else
#endif
@@ -1378,66 +1314,63 @@ namespace nanojit
rr = retRegs[0];
prepResultReg(ins, rmask(rr));
}
// do this after we've handled the call result, so we dont
// force the call result to be spilled unnecessarily.
restoreCallerSaved();
- nPostCallCleanup(_call);
- #ifdef NJ_VERBOSE
- CALL(_call->_address, _call->_name);
- #else
- CALL(_call->_address, "");
- #endif
-
- _stackUsed = 0;
- LirReader argReader(reader->pos());
-
-#ifdef NANOJIT_ARM
- // pre-assign registers R0-R3 for arguments (if they fit)
- int regsUsed = 0;
- for (LInsp a = argReader.read(); a->isArg(); a = argReader.read())
- {
- if (a->isop(LIR_arg) || a->isop(LIR_ref))
- {
- a->setresv((int)R0 + 1 + regsUsed);
- regsUsed++;
- }
- if (regsUsed>=4)
- break;
- }
-#endif
-#ifdef NANOJIT_IA32
- debug_only( if (rr == FST0) fpu_push(); )
- // make sure fpu stack is empty before call (restoreCallerSaved)
- NanoAssert(_allocator.isFree(FST0));
- // note: this code requires that LIR_ref arguments be one of the first two arguments
- // pre-assign registers to the first 2 4B args
- const uint32_t iargs = _call->count_iargs();
- const int max_regs = (iargs < 2) ? iargs : 2;
- int n = 0;
- for(LIns* a = argReader.read(); a->isArg() && n<max_regs; a = argReader.read())
- {
- if (a->isop(LIR_arg)||a->isop(LIR_ref))
- {
- a->setresv(argRegs[n++]); // tell LIR_arg what reg to use
- }
- }
-#endif
+ asm_call(ins);
}
}
// check that all is well (don't check in exit paths since its more complicated)
debug_only( pageValidate(); )
debug_only( resourceConsistencyCheck(); )
}
}
+ void Assembler::asm_arg(ArgSize sz, LInsp p, Register r)
+ {
+ if (sz == ARGSIZE_Q)
+ {
+ // ref arg - use lea
+ if (r != UnknownReg)
+ {
+ // arg in specific reg
+ int da = findMemFor(p);
+ LEA(r, da, FP);
+ }
+ else
+ {
+ NanoAssert(0); // not supported
+ }
+ }
+ else if (sz == ARGSIZE_LO)
+ {
+ if (r != UnknownReg)
+ {
+ // arg goes in specific register
+ if (p->isconst())
+ LDi(r, p->constval());
+ else
+ findSpecificRegFor(p, r);
+ }
+ else
+ {
+ asm_pusharg(p);
+ }
+ }
+ else
+ {
+ asm_farg(p);
+ }
+ }
+
uint32_t Assembler::arFree(uint32_t idx)
{
if (idx > 0 && _activation.entry[idx] == _activation.entry[idx+stack_direction(1)])
_activation.entry[idx+stack_direction(1)] = 0; // clear 2 slots for doubles
_activation.entry[idx] = 0;
return 0;
}
@@ -1486,17 +1419,17 @@ namespace nanojit
}
)
#endif
}
#endif
uint32_t Assembler::arReserve(LIns* l)
{
- NanoAssert(!l->isop(LIR_tramp));
+ NanoAssert(!l->isTramp());
//verbose_only(printActivationState());
const bool quad = l->isQuad();
const int32_t n = _activation.tos;
int32_t start = _activation.lowwatermark;
int32_t i = 0;
NanoAssert(start>0);
if (n >= NJ_MAX_STACK_ENTRY-2)
@@ -1687,11 +1620,31 @@ namespace nanojit
uint32_t argt = _argtypes;
for (int i = 0; i < 5; ++i)
{
argt >>= 2;
argc += (argt & mask) != 0;
}
return argc;
}
+
+ uint32_t CallInfo::get_sizes(ArgSize* sizes) const
+ {
+ uint32_t argt = _argtypes;
+ uint32_t argc = 0;
+ for (int32_t i = 0; i < 5; i++) {
+ argt >>= 2;
+ ArgSize a = ArgSize(argt&3);
+#ifdef NJ_SOFTFLOAT
+ if (a == ARGSIZE_F) {
+ sizes[argc++] = ARGSIZE_LO;
+ sizes[argc++] = ARGSIZE_LO;
+ continue;
+ }
#endif
-
+ if (a != ARGSIZE_NONE) {
+ sizes[argc++] = a;
+ }
+ }
+ return argc;
+ }
+#endif
}
--- a/js/src/nanojit/Assembler.h
+++ b/js/src/nanojit/Assembler.h
@@ -79,32 +79,35 @@ namespace nanojit
{
LIns* entry[ NJ_MAX_STACK_ENTRY ]; /* maps to 4B contiguous locations relative to the frame pointer */
uint32_t tos; /* current top of stack entry */
uint32_t highwatermark; /* max tos hit */
uint32_t lowwatermark; /* we pre-allocate entries from 0 upto this index-1; so dynamic entries are added above this index */
LIns* parameter[ NJ_MAX_PARAMETERS ]; /* incoming parameters */
};
- const uint32_t ARGSIZE_NONE = 0;
- const uint32_t ARGSIZE_F = 1;
- const uint32_t ARGSIZE_LO = 2;
- const uint32_t ARGSIZE_Q = 3;
- const uint32_t _ARGSIZE_MASK_INT = 2;
- const uint32_t _ARGSIZE_MASK_ANY = 3;
+ enum ArgSize {
+ ARGSIZE_NONE = 0,
+ ARGSIZE_F = 1,
+ ARGSIZE_LO = 2,
+ ARGSIZE_Q = 3,
+ _ARGSIZE_MASK_INT = 2,
+ _ARGSIZE_MASK_ANY = 3
+ };
struct CallInfo
{
intptr_t _address;
uint16_t _argtypes; // 6 2-bit fields indicating arg type, by ARGSIZE above (including ret type): a1 a2 a3 a4 a5 ret
uint8_t _cse; // true if no side effects
uint8_t _fold; // true if no side effects
verbose_only ( const char* _name; )
uint32_t FASTCALL _count_args(uint32_t mask) const;
+ uint32_t get_sizes(ArgSize*) const;
inline uint32_t FASTCALL count_args() const { return _count_args(_ARGSIZE_MASK_ANY); }
inline uint32_t FASTCALL count_iargs() const { return _count_args(_ARGSIZE_MASK_INT); }
// fargs = args - iargs
};
#define FUNCTIONID(name) CI_avmplus_##name
@@ -209,20 +212,20 @@ namespace nanojit
debug_only ( bool onPage(NIns* where, bool exitPages=false); )
// support calling out from a fragment ; used to debug the jit
debug_only( void resourceConsistencyCheck(); )
debug_only( void registerConsistencyCheck(LIns** resv); )
Stats _stats;
- const CallInfo* callInfoFor(int32_t fid);
+ const CallInfo* callInfoFor(uint32_t fid);
const CallInfo* callInfoFor(LInsp call)
{
- return callInfoFor(call->imm8());
+ return callInfoFor(call->fid());
}
private:
void gen(LirFilter* toCompile, NInsList& loopJumps);
NIns* genPrologue(RegisterMask);
NIns* genEpilogue(RegisterMask);
@@ -260,21 +263,16 @@ namespace nanojit
Reservation* getresv(LIns *x) { return x->resv() ? &_resvTable[x->resv()] : 0; }
DWB(Fragmento*) _frago;
GC* _gc;
DWB(Fragment*) _thisfrag;
RegAllocMap* _branchStateMap;
GuardRecord* _latestGuard;
-
- const CallInfo *_call;
- uint32_t _iargs;
- uint32_t _fargs;
- int32_t _stackUsed;
const CallInfo *_functions;
NIns* _nIns; // current native instruction
NIns* _nExitIns; // current instruction in exit fragment page
NIns* _epilogue;
Page* _nativePages; // list of NJ_PAGE_SIZE pages that have been alloc'd
Page* _nativeExitPages; // list of pages that have been allocated for exit code
@@ -300,32 +298,31 @@ namespace nanojit
void asm_restore(LInsp, Reservation*, Register);
void asm_spill(LInsp i, Reservation *resv, bool pop);
void asm_load64(LInsp i);
void asm_pusharg(LInsp p);
NIns* asm_adjustBranch(NIns* at, NIns* target);
void asm_quad(LInsp i);
bool asm_qlo(LInsp ins, LInsp q);
void asm_fneg(LInsp ins);
- void asm_farg(LInsp ins);
void asm_fop(LInsp ins);
void asm_i2f(LInsp ins);
void asm_u2f(LInsp ins);
Register asm_prep_fcall(Reservation *rR, LInsp ins);
void asm_nongp_copy(Register r, Register s);
void asm_bailout(LInsp guard, Register state);
+ void asm_call(LInsp);
+ void asm_arg(ArgSize, LInsp, Register);
// platform specific implementation (see NativeXXX.cpp file)
void nInit(uint32_t flags);
void nInit(AvmCore *);
Register nRegisterAllocFromSet(int32_t set);
void nRegisterResetAll(RegAlloc& a);
void nMarkExecute(Page* page, int32_t count=1, bool enable=true);
- void nPostCallCleanup(const CallInfo* call);
- void nArgEmitted(const CallInfo* call, uint32_t stackSlotCount, uint32_t iargs, uint32_t fargs);
void nFrameRestore(RegisterMask rmask);
static void nPatchBranch(NIns* branch, NIns* location);
void nFragExit(LIns* guard);
// platform specific methods
public:
DECLARE_PLATFORM_ASSEMBLER()
--- a/js/src/nanojit/Fragmento.cpp
+++ b/js/src/nanojit/Fragmento.cpp
@@ -134,33 +134,18 @@ namespace nanojit
int32_t gcpages = (count*NJ_PAGE_SIZE) / _gcHeap->kNativePageSize;
MMGC_MEM_TYPE("NanoJitMem");
memory = (Page*)_gcHeap->Alloc(gcpages);
#ifdef MEMORY_INFO
ChangeSizeExplicit("NanoJitMem", 1, _gcHeap->Size(memory));
#endif
NanoAssert((int*)memory == pageTop(memory));
//fprintf(stderr,"head alloc of %d at %x of %d pages using nj page size of %d\n", gcpages, (intptr_t)memory, (intptr_t)_gcHeap->kNativePageSize, NJ_PAGE_SIZE);
-
- // can't add memory if its not addressable from all locations
- for(uint32_t i=0; i<_allocList.size(); i++)
- {
- Page* a = _allocList.get(i);
- int32_t delta = (a < memory) ? (intptr_t)memory+(NJ_PAGE_SIZE*(count+1))-(intptr_t)a : (intptr_t)a+(NJ_PAGE_SIZE*(count+1))-(intptr_t)memory;
- if ( delta > 16777215 )
- {
- // can't use this memory
-#ifdef MEMORY_INFO
- ChangeSizeExplicit("NanoJitMem", -1, _gcHeap->Size(memory));
-#endif
- _gcHeap->Free(memory);
- return;
- }
- }
- _allocList.add(memory);
+
+ _allocList.add(memory);
Page* page = memory;
_pageList = page;
_stats.pages += count;
_stats.freePages += count;
trackFree(0);
while(--count > 0)
{
@@ -390,34 +375,37 @@ namespace nanojit
avmplus::SortedMap<uint64_t, DurData, avmplus::LIST_NonGCObjects> durs(_core->gc);
uint64_t totaldur=0;
fragstats totalstat = { 0,0,0,0,0 };
for (int32_t i=0; i<count; i++)
{
Fragment *f = _frags->at(i);
fragstats stat = { 0,0,0,0,0 };
dumpFragStats(f, 0, stat);
+ if (stat.lir) {
+ totalstat.lir += stat.lir;
+ totalstat.lirbytes += stat.lirbytes;
+ }
uint64_t bothDur = stat.traceDur + stat.interpDur;
if (bothDur) {
totalstat.interpDur += stat.interpDur;
totalstat.traceDur += stat.traceDur;
totalstat.size += stat.size;
- totalstat.lir += stat.lir;
- totalstat.lirbytes += stat.lirbytes;
totaldur += bothDur;
while (durs.containsKey(bothDur)) bothDur++;
DurData d(f, stat.traceDur, stat.interpDur, stat.size);
durs.put(bothDur, d);
}
}
uint64_t totaltrace = totalstat.traceDur;
int totalsize = totalstat.size;
_assm->outputf("");
- _assm->outputf("avg %.1f bytes/lir", double(totalstat.lirbytes)/totalstat.lir);
+ _assm->outputf("lirbytes %d / lir %d = %.1f bytes/lir", totalstat.lirbytes,
+ totalstat.lir, double(totalstat.lirbytes)/totalstat.lir);
_assm->outputf(" trace interp");
_assm->outputf("%9lld (%2d%%) %9lld (%2d%%)",
totaltrace/1000, int(100.0*totaltrace/totaldur),
(totaldur-totaltrace)/1000, int(100.0*(totaldur-totaltrace)/totaldur));
_assm->outputf("");
_assm->outputf("trace ticks trace interp size");
for (int32_t i=durs.size()-1; i >= 0; i--) {
uint64_t bothDur = durs.keyAt(i);
--- a/js/src/nanojit/Fragmento.h
+++ b/js/src/nanojit/Fragmento.h
@@ -210,17 +210,16 @@ namespace nanojit
GuardRecord* outbound;
TraceKind kind;
const void* ip;
uint32_t guardCount;
uint32_t xjumpCount;
int32_t blacklistLevel;
NIns* fragEntry;
- LInsp state,param1,sp,rp;
int32_t calldepth;
void* vmprivate;
private:
NIns* _code; // ptr to start of code
GuardRecord* _links; // code which is linked (or pending to be) to this fragment
int32_t _hits;
Page* _pages; // native code pages
--- a/js/src/nanojit/LIR.cpp
+++ b/js/src/nanojit/LIR.cpp
@@ -41,44 +41,44 @@
#include <stdio.h>
namespace nanojit
{
using namespace avmplus;
#ifdef FEATURE_NANOJIT
const uint8_t operandCount[] = {
- /* 0 */ 2, 2, /*trace*/0, /*skip*/0, /*tramp*/0, 2, 2, 2, 2, /*arg*/1,
- /* 10 */ /*param*/0, 2, 2, /*ref*/1, 2, 2, 2, 2, /*call*/0, /*loop*/0,
+ /* 0 */ 2, 2, /*trace*/0, /*nearskip*/0, /*skip*/0, /*neartramp*/0, /*tramp*/0, 2, 2, 2,
+ /* 10 */ /*param*/0, 2, 2, 2, 2, 2, 2, 2, /*call*/0, /*loop*/0,
/* 20 */ /*x*/0, 2, 2, 2, 2, 2, 2, 2, 2, 2,
/* 30 */ 2, 2, /*short*/0, /*int*/0, 2, 2, /*neg*/1, 2, 2, 2,
/* 40 */ /*callh*/1, 2, 2, 2, /*not*/1, 2, 2, 2, /*xt*/1, /*xf*/1,
/* 50 */ /*qlo*/1, /*qhi*/1, 2, /*ov*/1, /*cs*/1, 2, 2, 2, 2, 2,
/* 60 */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- /* 70 */ 2, 2, 2, /*farg*/1, 2, 2, 2, 2, 2, 2,
+ /* 70 */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
/* 80 */ 2, 2, /*fcall*/0, 2, 2, 2, 2, 2, 2, 2,
/* 90 */ 2, 2, 2, 2, 2, 2, 2, /*quad*/0, 2, 2,
/* 100 */ /*fneg*/1, 2, 2, 2, 2, 2, /*i2f*/1, /*u2f*/1, 2, 2,
/* 110 */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
/* 120 */ 2, 2, 2, 2, 2, 2, 2, 2,
};
// LIR verbose specific
#ifdef NJ_VERBOSE
const char* lirNames[] = {
- /* 0-9 */ "0","1","trace","skip","tramp","5","6","7","8","arg",
- /* 10-19 */ "param","st","ld","ref","sti","15","16","17","call","loop",
+ /* 0-9 */ "0","1","trace","nearskip","skip","neartramp","tramp","7","8","9",
+ /* 10-19 */ "param","st","ld","13","sti","15","16","17","call","loop",
/* 20-29 */ "x","21","22","23","24","25","feq","flt","fgt","fle",
/* 30-39 */ "fge","cmov","short","int","ldc","","neg","add","sub","mul",
/* 40-49 */ "callh","and","or","xor","not","lsh","rsh","ush","xt","xf",
/* 50-59 */ "qlo","qhi","ldcb","ov","cs","eq","lt","gt","le","ge",
/* 60-63 */ "ult","ugt","ule","uge",
/* 64-69 */ "LIR64","65","66","67","68","69",
- /* 70-79 */ "70","71","72","farg","74","stq","ldq","77","stqi","79",
+ /* 70-79 */ "70","71","72","73","74","stq","ldq","77","stqi","79",
/* 80-89 */ "80","81","fcall","83","84","85","86","87","88","89",
/* 90-99 */ "90","91","92","93","94","95","96","quad","98","99",
/* 100-109 */ "fneg","fadd","fsub","fmul","fdiv","qjoin","i2f","u2f","108","109",
/* 110-119 */ "110","111","112","113","114","115","116","117","118","119",
/* 120-127 */ "120","121","122","123","124","125","126","127"
};
#endif /* NANOJIT_VEBROSE */
@@ -204,51 +204,67 @@ namespace nanojit
_unused = &lastPage->lir[0];
}
return false;
}
bool LirBufWriter::ensureRoom(uint32_t count)
{
LInsp last = _buf->next();
- if (!samepage(last,last+count)
+ if (!samepage(last,last+2*count)
&& _buf->addPage())
{
// link LIR stream back to prior instruction (careful insFar relies on _unused...)
- LInsp next = _buf->next();
- insFar(LIR_skip, last-1-next);
+ insFar(LIR_skip, last-1);
}
return !_buf->outOmem();
}
LInsp LirBuffer::commit(uint32_t count)
{
debug_only(validate();)
NanoAssertMsg( samepage(_unused, _unused+count), "You need to call ensureRoom first!" );
return _unused += count;
}
- uint32_t LIns::reference(LIns *r)
+ uint32_t LIns::reference(LIns *r) const
{
int delta = this-r-1;
NanoAssert(isU8(delta));
return delta;
}
+ LIns* LIns::deref(int32_t off) const
+ {
+ LInsp i = (LInsp) this-1 - off;
+ while (i->isTramp())
+ i = i->ref();
+ return i;
+ }
+
LInsp LirBufWriter::ensureReferenceable(LInsp i, int32_t addedDistance)
{
- NanoAssert(!i->isop(LIR_tramp));
+ NanoAssert(!i->isTramp());
LInsp next = _buf->next();
- LInsp from = next + addedDistance;
- if ( canReference(from,i) )
+ LInsp from = next + 2*addedDistance;
+ if (canReference(from,i))
return i;
+ if (i == _buf->sp && spref && canReference(from, spref))
+ return spref;
+ if (i == _buf->rp && rpref && canReference(from, rpref))
+ return rpref;
// need a trampoline to get to i
- LInsp tramp = insFar(LIR_tramp, i-next);
- NanoAssert( tramp+tramp->imm24() == i );
+ LInsp tramp = insFar(LIR_tramp, i);
+ NanoAssert( tramp->ref() == i );
+
+ if (i == _buf->sp)
+ spref = tramp;
+ else if (i == _buf->rp)
+ rpref = tramp;
return tramp;
}
LInsp LirBufWriter::insStore(LInsp val, LInsp base, LInsp off)
{
LOpcode op = val->isQuad() ? LIR_stq : LIR_st;
NanoAssert(val && base && off);
ensureRoom(4);
@@ -336,38 +352,51 @@ namespace nanojit
LInsp LirBufWriter::insGuard(LOpcode op, LInsp c, SideExit *x)
{
LInsp data = skip(SideExitSize(x));
*((SideExit*)data->payload()) = *x;
return ins2(op, c, data);
}
- LInsp LirBufWriter::insImm8(LOpcode op, int32_t a, int32_t b)
- {
+ LInsp LirBufWriter::insParam(int32_t arg)
+ {
ensureRoom(1);
LInsp l = _buf->next();
- l->initOpcode(op);
- l->setimm8(a,b);
+ l->initOpcode(LIR_param);
+ l->c.imm8a = Assembler::argRegs[arg];
_buf->commit(1);
_buf->_stats.lir++;
return l;
- }
+ }
- LInsp LirBufWriter::insFar(LOpcode op, int32_t imm)
- {
- ensureRoom(1);
+#define isS24(x) (((int32_t(x)<<8)>>8) == (x))
- LInsp l = _buf->next();
- l->initOpcode(op);
- l->setimm24(imm);
-
- _buf->commit(1);
- return l;
+ LInsp LirBufWriter::insFar(LOpcode op, LInsp target)
+ {
+ NanoAssert(op == LIR_skip || op == LIR_tramp);
+ LInsp l = _buf->next();
+ int d = target-l;
+ if (isS24(d)) {
+ ensureRoom(1);
+ l->initOpcode(LOpcode(op-1)); // nearskip or neartramp
+ l->t.imm24 = d;
+ _buf->commit(1);
+ return l;
+ }
+ else {
+ ensureRoom(2);
+ // write the pointer and instruction
+ l = _buf->next()+1;
+ *((LInsp*)(l-1)) = target;
+ l->initOpcode(op);
+ _buf->commit(2);
+ return l;
+ }
}
LInsp LirBufWriter::insImm(int32_t imm)
{
if (isS16(imm)) {
ensureRoom(1);
LInsp l = _buf->next();
l->initOpcode(LIR_short);
@@ -392,20 +421,20 @@ namespace nanojit
l[1] = int32_t(imm>>32);
_buf->commit(2);
return ins0(LIR_quad);
}
LInsp LirBufWriter::skip(size_t size)
{
const uint32_t n = (size+sizeof(LIns)-1)/sizeof(LIns);
- ensureRoom(n+1);
- LInsp i = _buf->next();
+ ensureRoom(n+2);
+ LInsp last = _buf->next()-1;
_buf->commit(n);
- return insFar(LIR_skip, i-1-_buf->next());
+ return insFar(LIR_skip, last);
}
LInsp LirReader::read()
{
LInsp cur = _i;
if (!cur)
return 0;
LIns* i = cur;
@@ -413,28 +442,35 @@ namespace nanojit
do
{
switch (iop)
{
default:
i--;
break;
+ case LIR_call:
+ case LIR_fcall:
+ i -= argwords(i->argc())+1;
+ break;
+
case LIR_skip:
- NanoAssert(i->imm24() != 0);
- i += i->imm24();
+ case LIR_nearskip:
+ NanoAssert(i->ref() != i);
+ i = i->ref();
break;
-
+
+ case LIR_tramp:
case LIR_int:
NanoAssert(samepage(i, i-2));
i -= 2;
break;
case LIR_quad:
- NanoAssert(samepage(i,i-3));
+ NanoAssert(samepage(i, i-3));
i -= 3;
break;
case LIR_trace:
_i = 0; // start of trace
return cur;
}
iop = i->opcode();
@@ -498,47 +534,36 @@ namespace nanojit
bool FASTCALL isCse(LOpcode op) {
op = LOpcode(op & ~LIR64);
return op >= LIR_feq && op <= LIR_uge;
}
bool LIns::isCse(const CallInfo *functions) const
{
- return nanojit::isCse(u.code) || isCall() && functions[imm8()]._cse;
+ return nanojit::isCse(u.code) || isCall() && functions[fid()]._cse;
}
- void LIns::setimm8(int32_t a, int32_t b)
- {
- NanoAssert(isS8(a) && isS8(b));
- c.imm8a = int8_t(a);
- c.imm8b = int8_t(b);
- }
-
void LIns::setimm16(int32_t x)
{
NanoAssert(isS16(x));
i.imm16 = int16_t(x);
}
- void LIns::setimm24(int32_t x)
- {
- t.imm24 = x;
- }
-
void LIns::setresv(uint32_t resv)
{
NanoAssert(isU8(resv));
g.resv = resv;
}
void LIns::initOpcode(LOpcode op)
{
- t.code = op;
- t.imm24 = 0;
+ i.code = op;
+ i.imm16 = 0;
+ i.resv = 0;
}
void LIns::setOprnd1(LInsp r)
{
u.oprnd_1 = reference(r);
}
void LIns::setOprnd2(LInsp r)
@@ -552,43 +577,34 @@ namespace nanojit
}
void LIns::setDisp(int8_t d)
{
sti.disp = d;
}
LInsp LIns::oprnd1() const
- {
- LInsp i = (LInsp) this - u.oprnd_1 - 1;
- while (i->isop(LIR_tramp))
- i += i->imm24();
- return i;
+ {
+ return deref(u.oprnd_1);
}
LInsp LIns::oprnd2() const
{
- LInsp i = (LInsp) this - u.oprnd_2 - 1;
- while (i->isop(LIR_tramp))
- i += i->imm24();
- return i;
+ return deref(u.oprnd_2);
}
LInsp LIns::oprnd3() const
{
- LInsp i = (LInsp) this - u.oprnd_3 - 1;
- while (i->isop(LIR_tramp))
- i += i->imm24();
- return i;
+ return deref(u.oprnd_3);
}
void *LIns::payload() const
{
- NanoAssert(opcode() == LIR_skip);
- return (void*) (this+imm24()+1);
+ NanoAssert(opcode()==LIR_skip || opcode()==LIR_nearskip);
+ return (void*) (ref()+1);
}
LIns* LirWriter::ins2i(LOpcode v, LIns* oprnd1, int32_t imm)
{
return ins2(v, oprnd1, insImm(imm));
}
bool insIsS16(LInsp i)
@@ -868,60 +884,62 @@ namespace nanojit
// @todo -- it might be better to use a short conditional branch rather than
// the bit-twiddling on systems that don't provide a conditional move instruction.
LInsp ncond = ins1(LIR_neg, cond); // cond ? -1 : 0
return ins2(LIR_or,
ins2(LIR_and, iftrue, ncond),
ins2(LIR_and, iffalse, ins1(LIR_not, ncond)));
}
- LIns* LirBufWriter::insCall(int32_t fid, LInsp args[])
+ LIns* LirBufWriter::insCall(uint32_t fid, LInsp args[])
{
- static const LOpcode k_argmap[] = { LIR_farg, LIR_arg, LIR_ref };
static const LOpcode k_callmap[] = { LIR_call, LIR_fcall, LIR_call, LIR_callh };
const CallInfo& ci = _functions[fid];
uint32_t argt = ci._argtypes;
- int32_t argc = ci.count_args();
- const uint32_t ret = argt & 3;
- LOpcode op = k_callmap[ret];
- //printf(" ret is type %d %s\n", ret, lirNames[op]);
+ LOpcode op = k_callmap[argt & 3];
+
+ ArgSize sizes[10];
+ uint32_t argc = ci.get_sizes(sizes);
#ifdef NJ_SOFTFLOAT
if (op == LIR_fcall)
op = LIR_callh;
LInsp args2[5*2]; // arm could require 2 args per double
int32_t j = 0;
- uint32_t argt2 = argt&3; // copy of return type
- for (int32_t i = 0; i < argc; i++) {
+ for (int32_t i = 0; i < 5; i++) {
argt >>= 2;
- uint32_t a = argt&3;
+ ArgSize a = ArgSize(argt&3);
if (a == ARGSIZE_F) {
LInsp q = args[i];
args2[j++] = ins1(LIR_qhi, q);
- argt2 |= ARGSIZE_LO << (j*2);
args2[j++] = ins1(LIR_qlo, q);
- argt2 |= ARGSIZE_LO << (j*2);
- } else {
+ } else if (a != ARGSIZE_NONE) {
args2[j++] = args[i];
- argt2 |= a << (j*2);
}
}
args = args2;
- argt = argt2;
- argc = j;
+ NanoAssert(j == argc);
#endif
- for (int32_t i = 0; i < argc; i++) {
- argt >>= 2;
- AvmAssert((argt&3)!=0);
- ins1(k_argmap[(argt&3)-1], args[i]);
- }
-
- return insImm8(op==LIR_callh ? LIR_call : op, fid, argc);
+ NanoAssert(argc < 8);
+ uint32_t words = argwords(argc);
+ ensureRoom(words+argc+1); // ins size + possible tramps
+ for (uint32_t i=0; i < argc; i++)
+ args[i] = ensureReferenceable(args[i], argc-i);
+ uint8_t* offs = (uint8_t*)_buf->next();
+ LIns *l = _buf->next() + words;
+ for (uint32_t i=0; i < argc; i++)
+ offs[i] = (uint8_t) l->reference(args[i]);
+ l->initOpcode(op==LIR_callh ? LIR_call : op);
+ l->c.imm8a = fid;
+ l->c.imm8b = argc;
+ _buf->commit(words+1);
+ _buf->_stats.lir++;
+ return l;
}
using namespace avmplus;
StackFilter::StackFilter(LirFilter *in, GC *gc, Fragment *frag, LInsp sp)
: LirFilter(in), gc(gc), frag(frag), sp(sp), top(0)
{}
@@ -1036,22 +1054,21 @@ namespace nanojit
case LIR_int:
return hashimm(i->imm32());
case LIR_quad:
return hashimmq(i->constvalq());
case LIR_call:
case LIR_fcall:
{
LInsp args[10];
- int32_t argc = i->imm8b();
+ int32_t argc = i->argc();
NanoAssert(argc < 10);
- LirReader ri(i);
- for (int32_t j=argc; j > 0; )
- args[--j] = ri.previous()->oprnd1();
- return hashcall(i->imm8(), argc, args);
+ for (int32_t j=0; j < argc; j++)
+ args[j] = i->arg(j);
+ return hashcall(i->fid(), argc, args);
}
default:
if (operandCount[op] == 2)
return hash2(op, i->oprnd1(), i->oprnd2());
else
return hash1(op, i->oprnd1());
}
}
@@ -1074,22 +1091,21 @@ namespace nanojit
}
case LIR_quad:
{
return a->constvalq() == b->constvalq();
}
case LIR_call:
case LIR_fcall:
{
- uint32_t argc;
- if (a->imm8() != b->imm8()) return false;
- if ((argc=a->imm8b()) != b->imm8b()) return false;
- LirReader ra(a), rb(b);
- while (argc-- > 0)
- if (ra.previous()->oprnd1() != rb.previous()->oprnd1())
+ if (a->fid() != b->fid()) return false;
+ uint32_t argc=a->argc();
+ NanoAssert(argc == b->argc());
+ for (uint32_t i=0; i < argc; i++)
+ if (a->arg(i) != b->arg(i))
return false;
return true;
}
default:
{
const uint32_t count = operandCount[op];
if ((count >= 1 && a->oprnd1() != b->oprnd1()) ||
(count >= 2 && a->oprnd2() != b->oprnd2()))
@@ -1173,17 +1189,17 @@ namespace nanojit
}
uint32_t LInsHashSet::hash2(LOpcode op, LInsp a, LInsp b) {
uint32_t hash = _hash8(0,uint8_t(op));
hash = _hashptr(hash, a);
return _hashfinish(_hashptr(hash, b));
}
- uint32_t LInsHashSet::hashcall(int32_t fid, uint32_t argc, LInsp args[]) {
+ uint32_t LInsHashSet::hashcall(uint32_t fid, uint32_t argc, LInsp args[]) {
uint32_t hash = _hash32(0,fid);
for (int32_t j=argc-1; j >= 0; j--)
hash = _hashptr(hash,args[j]);
return _hashfinish(hash);
}
LInsp LInsHashSet::find32(int32_t a, uint32_t &i)
{
@@ -1250,34 +1266,32 @@ namespace nanojit
hash = (hash + (n += 2)) & bitmask; // quadratic probe
}
i = hash;
return k;
}
bool argsmatch(LInsp i, uint32_t argc, LInsp args[])
{
- // we don't have callinfo here so we cannot use argiterator
- LirReader r(i);
- for (LInsp a = r.previous(); a->isArg(); a=r.previous())
- if (a->oprnd1() != args[--argc])
+ for (uint32_t j=0; j < argc; j++)
+ if (i->arg(j) != args[j])
return false;
return true;
}
- LInsp LInsHashSet::findcall(int32_t fid, uint32_t argc, LInsp args[], uint32_t &i)
+ LInsp LInsHashSet::findcall(uint32_t fid, uint32_t argc, LInsp args[], uint32_t &i)
{
uint32_t cap = m_list.size();
const InsList& list = m_list;
const uint32_t bitmask = (cap - 1) & ~0x1;
uint32_t hash = hashcall(fid, argc, args) & bitmask;
uint32_t n = 7 << 1;
LInsp k;
while ((k = list.get(hash)) != NULL &&
- (!k->isCall() || k->imm8() != fid || !argsmatch(k, argc, args)))
+ (!k->isCall() || k->fid() != fid || !argsmatch(k, argc, args)))
{
hash = (hash + (n += 2)) & bitmask; // quadratic probe
}
i = hash;
return k;
}
SideExit *LIns::exit()
@@ -1306,17 +1320,17 @@ namespace nanojit
live.put(i,use);
}
}
void retire(LInsp i, GC *gc) {
RetiredEntry *e = new (gc) RetiredEntry(gc);
e->i = i;
for (int j=0, n=live.size(); j < n; j++) {
LInsp l = live.keyAt(j);
- if (!l->isStore() && !l->isGuard() && !l->isArg())
+ if (!l->isStore() && !l->isGuard())
e->live.add(l);
}
int size=0;
if ((size = e->live.size()) > maxlive)
maxlive = size;
live.remove(i);
retired.add(e);
@@ -1325,41 +1339,33 @@ namespace nanojit
return live.containsKey(i);
}
};
void live(GC *gc, Assembler *assm, Fragment *frag)
{
// traverse backwards to find live exprs and a few other stats.
- LInsp sp = frag->sp;
- LInsp rp = frag->rp;
+ LInsp sp = frag->lirbuf->sp;
+ LInsp rp = frag->lirbuf->rp;
LiveTable live(gc);
uint32_t exits = 0;
LirBuffer *lirbuf = frag->lirbuf;
LirReader br(lirbuf);
StackFilter sf(&br, gc, frag, sp);
StackFilter r(&sf, gc, frag, rp);
- bool skipargs = false;
int total = 0;
- live.add(frag->state, r.pos());
+ live.add(frag->lirbuf->state, r.pos());
for (LInsp i = r.read(); i != 0; i = r.read())
{
total++;
- if (i->isArg()) {
- if (!skipargs)
- live.add(i->oprnd1(),0);
- } else {
- skipargs = false;
- }
-
// first handle side-effect instructions
if (i->isStore() || i->isGuard() ||
- i->isCall() && !assm->callInfoFor(i->imm8())->_cse)
+ i->isCall() && !assm->callInfoFor(i->fid())->_cse)
{
live.add(i,0);
if (i->isGuard())
exits++;
}
// now propagate liveness
if (live.contains(i))
@@ -1376,20 +1382,20 @@ namespace nanojit
}
else if (operandCount[i->opcode()] == 1) {
live.add(i->oprnd1(),i);
}
else if (operandCount[i->opcode()] == 2) {
live.add(i->oprnd1(),i);
live.add(i->oprnd2(),i);
}
- }
- else
- {
- skipargs = i->isCall();
+ else if (i->isCall()) {
+ for (int j=0, c=i->argc(); j < c; j++)
+ live.add(i->arg(j),i);
+ }
}
}
assm->outputf("live instruction count %ld, total %ld, max pressure %d",
live.retired.size(), total, live.maxlive);
assm->outputf("side exits %ld", exits);
// print live exprs, going forwards
@@ -1449,17 +1455,17 @@ namespace nanojit
*buf++ = ':';
formatImm(uint32_t(ref->constvalq()), buf);
}
else if (ref->isconst()) {
formatImm(ref->constval(), buf);
}
else {
if (ref->isCall()) {
- copyName(ref, _functions[ref->imm8()]._name, funccounts.add(ref->imm8()));
+ copyName(ref, _functions[ref->fid()]._name, funccounts.add(ref->fid()));
} else {
copyName(ref, lirNames[ref->opcode()], lircounts.add(ref->opcode()));
}
StringNullTerminatedUTF8 cname(gc, names.get(ref)->name);
strcat(buf, cname.c_str());
}
return labels->dup(buffer);
}
@@ -1492,43 +1498,40 @@ namespace nanojit
case LIR_loop:
case LIR_trace:
sprintf(s, "%s", lirNames[op]);
break;
case LIR_fcall:
case LIR_call: {
- sprintf(s, "%s ( ", _functions[i->imm8()]._name);
- LirReader r(i);
- for (LInsp a = r.previous(); a->isArg(); a = r.previous()) {
+ sprintf(s, "%s ( ", _functions[i->fid()]._name);
+ for (int32_t j=i->argc()-1; j >= 0; j--) {
s += strlen(s);
- sprintf(s, "%s ",formatRef(a->oprnd1()));
+ sprintf(s, "%s ",formatRef(i->arg(j)));
}
s += strlen(s);
sprintf(s, ")");
break;
}
case LIR_param:
sprintf(s, "%s %s", lirNames[op], gpn(i->imm8()));
break;
case LIR_callh:
case LIR_neg:
case LIR_fneg:
- case LIR_arg:
- case LIR_farg:
case LIR_i2f:
case LIR_u2f:
case LIR_qlo:
case LIR_qhi:
- case LIR_ref:
case LIR_ov:
case LIR_cs:
+ case LIR_not:
sprintf(s, "%s %s", lirNames[op], formatRef(i->oprnd1()));
break;
case LIR_x:
case LIR_xt:
case LIR_xf:
formatGuard(i, s);
break;
@@ -1537,17 +1540,16 @@ namespace nanojit
case LIR_sub:
case LIR_mul:
case LIR_fadd:
case LIR_fsub:
case LIR_fmul:
case LIR_fdiv:
case LIR_and:
case LIR_or:
- case LIR_not:
case LIR_xor:
case LIR_lsh:
case LIR_rsh:
case LIR_ush:
case LIR_eq:
case LIR_lt:
case LIR_le:
case LIR_gt:
@@ -1671,22 +1673,23 @@ namespace nanojit
LInsp found = exprs.find1(v, c, k);
if (found)
return 0;
return exprs.add(out->insGuard(v,c,x), k);
}
return out->insGuard(v, c, x);
}
- LInsp CseFilter::insCall(int32_t fid, LInsp args[])
+ LInsp CseFilter::insCall(uint32_t fid, LInsp args[])
{
const CallInfo *c = &_functions[fid];
if (c->_cse) {
uint32_t k;
- LInsp found = exprs.findcall(fid, c->count_args(), args, k);
+ uint32_t argc = c->count_args();
+ LInsp found = exprs.findcall(fid, argc, args, k);
if (found)
return found;
return exprs.add(out->insCall(fid, args), k);
}
return out->insCall(fid, args);
}
CseReader::CseReader(LirFilter *in, LInsHashSet *exprs, const CallInfo *functions)
@@ -1700,22 +1703,17 @@ namespace nanojit
if (i->isCse(functions))
exprs->replace(i);
}
return i;
}
LIns* FASTCALL callArgN(LIns* i, uint32_t n)
{
- // @todo clean up; shouldn't have to create a reader
- LirReader rdr(i);
- do
- i = rdr.read();
- while (n-- > 0);
- return i;
+ return i->arg(i->argc()-n-1);
}
void compile(Assembler* assm, Fragment* triggerFrag)
{
Fragmento *frago = triggerFrag->lirbuf->_frago;
AvmCore *core = frago->core();
GC *gc = core->gc;
--- a/js/src/nanojit/LIR.h
+++ b/js/src/nanojit/LIR.h
@@ -47,25 +47,25 @@ namespace nanojit
enum LOpcode __msvc_only(: unsigned)
{
// flags; upper bits reserved
LIR64 = 0x40, // result is double or quad
// special operations (must be 0..N)
LIR_trace = 2,
- LIR_skip = 3,
- LIR_tramp = 4,
+ LIR_nearskip = 3, // must be LIR_skip-1 and lsb=1
+ LIR_skip = 4,
+ LIR_neartramp = 5, // must be LIR_tramp-1 and lsb=1
+ LIR_tramp = 6,
// non-pure operations
- LIR_arg = 9,
LIR_param = 10,
LIR_st = 11,
LIR_ld = 12,
- LIR_ref = 13, // ref arg
LIR_sti = 14,
LIR_call = 18,
// guards
LIR_loop = 19, // loop fragment
LIR_x = 20, // exit always
// operators
@@ -115,122 +115,136 @@ namespace nanojit
/**
* 64bit operations
*/
LIR_stq = LIR_st | LIR64,
LIR_stqi = LIR_sti | LIR64,
LIR_quad = LIR_int | LIR64,
LIR_ldq = LIR_ld | LIR64,
- LIR_farg = LIR_arg | LIR64,
LIR_fcall = LIR_call | LIR64,
LIR_fneg = LIR_neg | LIR64,
LIR_fadd = LIR_add | LIR64,
LIR_fsub = LIR_sub | LIR64,
LIR_fmul = LIR_mul | LIR64,
LIR_fdiv = 40 | LIR64,
LIR_qjoin = 41 | LIR64,
LIR_i2f = 42 | LIR64,
LIR_u2f = 43 | LIR64
};
+ inline uint32_t argwords(uint32_t argc) {
+ return (argc+3)>>2;
+ }
+
struct SideExit;
struct Page;
struct CallInfo;
// Low-level Instruction 4B
// had to lay it our as a union with duplicate code fields since msvc couldn't figure out how to compact it otherwise.
class LIns
{
+ friend class LirBufWriter;
// 3-operand form (backwards reach only)
struct u_type
{
LOpcode code:8;
+ uint32_t oprnd_3:8; // only used for store, since this location gets clobbered during generation
uint32_t oprnd_1:8; // 256 ins window and since they only point backwards this is sufficient.
uint32_t oprnd_2:8;
- uint32_t oprnd_3:8; // only used for store, since this location gets clobbered during generation
};
struct sti_type
{
LOpcode code:8;
+ int32_t disp:8;
uint32_t oprnd_1:8; // 256 ins window and since they only point backwards this is sufficient.
uint32_t oprnd_2:8;
- int32_t disp:8;
};
// imm8 form
struct c_type
{
LOpcode code:8;
+ uint32_t resv:8; // cobberred during assembly
uint32_t imm8a:8;
uint32_t imm8b:8;
- uint32_t resv:8; // cobberred during assembly
};
+ // imm24 form for short tramp & skip
+ struct t_type
+ {
+ LOpcode code:8;
+ int32_t imm24:24;
+ };
+
// imm16 form
struct i_type
{
LOpcode code:8;
+ uint32_t resv:8; // cobberred during assembly
int32_t imm16:16;
- uint32_t resv:8; // cobberred during assembly
- };
-
- // tramp form (imm24)
- struct t_type
- {
- LOpcode code:8;
- int32_t imm24:24; // +/- 8MB
};
// overlay used during code generation ( note that last byte is reserved for allocation )
struct g_type
{
LOpcode code:8;
+ uint32_t resv:8; // cobberred during assembly
uint32_t unused:16;
- uint32_t resv:8; // cobberred during assembly
};
/**
* Various forms of the instruction.
*
* In general the oprnd_x entries contain an uint value 0-255 that identifies a previous
* instruction, where 0 means the previous instruction and 255 means the instruction two
* hundred and fifty five prior to this one.
*
* For pointing to instructions further than this range LIR_tramp is used.
*/
union
{
u_type u;
c_type c;
i_type i;
- t_type t;
+ t_type t;
g_type g;
sti_type sti;
};
- uint32_t reference(LIns*);
+ uint32_t reference(LIns*) const;
+ LIns* deref(int32_t off) const;
public:
LIns* FASTCALL oprnd1() const;
LIns* FASTCALL oprnd2() const;
LIns* FASTCALL oprnd3() const;
inline LOpcode opcode() const { return u.code; }
inline uint8_t imm8() const { return c.imm8a; }
- inline uint8_t imm8b() const { return c.imm8b; }
inline int16_t imm16() const { return i.imm16; }
- inline int32_t imm24() const { return t.imm24; }
+ inline LIns* ref() const {
+ return (t.code & 1) ? (LIns*)this+t.imm24 : *(LIns**)(this-1);
+ }
inline int32_t imm32() const { return *(int32_t*)(this-1); }
inline uint8_t resv() const { return g.resv; }
void* payload() const;
inline Page* page() { return (Page*) alignTo(this,NJ_PAGE_SIZE); }
+ // index args in r-l order. arg(0) is rightmost arg
+ inline LIns* arg(uint32_t i) {
+ uint32_t c = argc();
+ NanoAssert(i < c);
+ uint8_t* offs = (uint8_t*) (this-argwords(c));
+ return deref(offs[i]);
+ }
+
inline int32_t immdisp()const
{
return (u.code&~LIR64) == LIR_sti ? sti.disp : oprnd3()->constval();
}
inline static bool sameop(LIns* a, LIns* b)
{
// hacky but more efficient than opcode() == opcode() due to bit masking of 7-bit field
@@ -250,58 +264,68 @@ namespace nanojit
inline uint64_t constvalq() const
{
NanoAssert(isconstq());
#ifdef AVMPLUS_UNALIGNED_ACCESS
return *(const uint64_t*)(this-2);
#else
uint64_t tmp;
- memcpy(&tmp, this-2, sizeof(tmp));
+ memcpy(&tmp, this-2, sizeof(tmp));
return tmp;
#endif
}
inline double constvalf() const
{
NanoAssert(isconstq());
#ifdef AVMPLUS_UNALIGNED_ACCESS
return *(const double*)(this-2);
#else
- double tmpf;
- memcpy(&tmpf, this-2, sizeof(tmpf));
- return tmpf;
+ union { uint64_t tmp; double tmpf; } u;
+ memcpy(&u.tmpf, this-2, sizeof(u.tmpf));
+ return u.tmpf;
#endif
}
bool isCse(const CallInfo *functions) const;
bool isop(LOpcode o) const { return u.code == o; }
bool isQuad() const { return (u.code & LIR64) != 0; }
- bool isArg() const { return (u.code & ~LIR64)==LIR_arg || u.code == LIR_ref; }
bool isCond() const;
bool isCmp() const;
bool isCall() const;
bool isStore() const;
bool isLoad() const;
bool isGuard() const;
bool isconst() const;
bool isconstval(int32_t val) const;
bool isconstq() const;
+ bool isTramp() {
+ return isop(LIR_neartramp) || isop(LIR_tramp);
+ }
- void setimm8(int32_t a, int32_t b);
void setimm16(int32_t i);
void setimm24(int32_t i);
void setresv(uint32_t resv);
void initOpcode(LOpcode);
void setOprnd1(LIns*);
void setOprnd2(LIns*);
void setOprnd3(LIns*);
void setDisp(int8_t d);
SideExit *exit();
+
+ inline uint32_t argc() {
+ NanoAssert(isCall());
+ return c.imm8b;
+ }
+ inline uint8_t fid() const {
+ NanoAssert(isCall());
+ return c.imm8a;
+ }
};
typedef LIns* LInsp;
bool FASTCALL isCse(LOpcode v);
bool FASTCALL isCmp(LOpcode v);
bool FASTCALL isCond(LOpcode v);
LIns* FASTCALL callArgN(LInsp i, uint32_t n);
extern const uint8_t operandCount[];
@@ -329,18 +353,18 @@ namespace nanojit
return out->ins1(v, a);
}
virtual LInsp ins2(LOpcode v, LIns* a, LIns* b) {
return out->ins2(v, a, b);
}
virtual LInsp insGuard(LOpcode v, LIns *c, SideExit *x) {
return out->insGuard(v, c, x);
}
- virtual LInsp insImm8(LOpcode v, int32_t a, int32_t b) {
- return out->insImm8(v, a, b);
+ virtual LInsp insParam(int32_t i) {
+ return out->insParam(i);
}
virtual LInsp insImm(int32_t imm) {
return out->insImm(imm);
}
virtual LInsp insImmq(uint64_t imm) {
return out->insImmq(imm);
}
virtual LInsp insLoad(LOpcode op, LIns* base, LIns* d) {
@@ -348,17 +372,17 @@ namespace nanojit
}
virtual LInsp insStore(LIns* value, LIns* base, LIns* disp) {
return out->insStore(value, base, disp);
}
virtual LInsp insStorei(LIns* value, LIns* base, int32_t d) {
return isS8(d) ? out->insStorei(value, base, d)
: out->insStore(value, base, insImm(d));
}
- virtual LInsp insCall(int32_t fid, LInsp args[]) {
+ virtual LInsp insCall(uint32_t fid, LInsp args[]) {
return out->insCall(fid, args);
}
// convenience
LIns* insLoadi(LIns *base, int disp);
LIns* insLoad(LOpcode op, LIns *base, int disp);
LIns* ins_choose(LIns* cond, LIns* iftrue, LIns* iffalse, bool);
LIns* ins_eq0(LIns* oprnd1);
@@ -479,21 +503,21 @@ namespace nanojit
}
LIns* ins1(LOpcode v, LInsp a) {
return add(out->ins1(v, a));
}
LIns* ins2(LOpcode v, LInsp a, LInsp b) {
return v == LIR_2 ? out->ins2(v,a,b) : add(out->ins2(v, a, b));
}
- LIns* insCall(int32_t fid, LInsp args[]) {
+ LIns* insCall(uint32_t fid, LInsp args[]) {
return add(out->insCall(fid, args));
}
- LIns* insImm8(LOpcode v, int32_t a, int32_t b) {
- return add(out->insImm8(v, a, b));
+ LIns* insParam(int32_t i) {
+ return add(out->insParam(i));
}
LIns* insLoad(LOpcode v, LInsp base, LInsp disp) {
return add(out->insLoad(v, base, disp));
}
LIns* insStore(LInsp v, LInsp b, LInsp d) {
return add(out->insStore(v, b, d));
}
LIns* insStorei(LInsp v, LInsp b, int32_t d) {
@@ -531,47 +555,46 @@ namespace nanojit
public:
LInsHashSet(GC* gc);
LInsp find32(int32_t a, uint32_t &i);
LInsp find64(uint64_t a, uint32_t &i);
LInsp find1(LOpcode v, LInsp a, uint32_t &i);
LInsp find2(LOpcode v, LInsp a, LInsp b, uint32_t &i);
- LInsp findcall(int32_t fid, uint32_t argc, LInsp args[], uint32_t &i);
+ LInsp findcall(uint32_t fid, uint32_t argc, LInsp args[], uint32_t &i);
LInsp add(LInsp i, uint32_t k);
void replace(LInsp i);
static uint32_t FASTCALL hashimm(int32_t);
static uint32_t FASTCALL hashimmq(uint64_t);
static uint32_t FASTCALL hash1(LOpcode v, LInsp);
static uint32_t FASTCALL hash2(LOpcode v, LInsp, LInsp);
- static uint32_t FASTCALL hashcall(int32_t fid, uint32_t argc, LInsp args[]);
+ static uint32_t FASTCALL hashcall(uint32_t fid, uint32_t argc, LInsp args[]);
};
class CseFilter: public LirWriter
{
public:
LInsHashSet exprs;
CseFilter(LirWriter *out, GC *gc);
LIns* insImm(int32_t imm);
LIns* insImmq(uint64_t q);
LIns* ins1(LOpcode v, LInsp);
LIns* ins2(LOpcode v, LInsp, LInsp);
LIns* insLoad(LOpcode v, LInsp b, LInsp d);
- LIns* insCall(int32_t fid, LInsp args[]);
+ LIns* insCall(uint32_t fid, LInsp args[]);
LIns* insGuard(LOpcode op, LInsp cond, SideExit *x);
};
struct Page;
class LirBuffer : public GCFinalizedObject
{
public:
DWB(Fragmento*) _frago;
- public:
LirBuffer(Fragmento* frago, const CallInfo* functions);
virtual ~LirBuffer();
void clear();
LInsp next();
LInsp commit(uint32_t count);
bool addPage();
bool outOmem() { return _noMem != 0; }
debug_only (void validate() const;)
@@ -583,51 +606,53 @@ namespace nanojit
struct
{
uint32_t lir; // # instructions
uint32_t pages; // pages consumed
}
_stats;
const CallInfo* _functions;
+ LInsp state,param1,sp,rp;
private:
Page* pageAlloc();
Page* _start; // first page
LInsp _unused; // next unused instruction slot
int _noMem; // set if ran out of memory when writing to buffer
};
class LirBufWriter : public LirWriter
{
DWB(LirBuffer*) _buf; // underlying buffer housing the instructions
+ LInsp spref, rpref;
public:
LirBufWriter(LirBuffer* buf)
: LirWriter(0), _buf(buf) {
_functions = buf->_functions;
}
// LirWriter interface
LInsp insLoad(LOpcode op, LInsp base, LInsp off);
LInsp insStore(LInsp o1, LInsp o2, LInsp o3);
LInsp insStorei(LInsp o1, LInsp o2, int32_t imm);
LInsp ins0(LOpcode op);
LInsp ins1(LOpcode op, LInsp o1);
LInsp ins2(LOpcode op, LInsp o1, LInsp o2);
- LInsp insImm8(LOpcode op, int32_t a, int32_t b);
+ LInsp insParam(int32_t i);
LInsp insImm(int32_t imm);
LInsp insImmq(uint64_t imm);
- LInsp insCall(int32_t fid, LInsp args[]);
+ LInsp insCall(uint32_t fid, LInsp args[]);
LInsp insGuard(LOpcode op, LInsp cond, SideExit *x);
// buffer mgmt
LInsp skip(size_t);
- LInsp insFar(LOpcode op, int32_t imm);
+ LInsp insFar(LOpcode op, LInsp target);
LInsp ensureReferenceable(LInsp i, int32_t addedDistance);
bool ensureRoom(uint32_t count);
bool canReference(LInsp from, LInsp to) {
return isU8(from-to-1);
}
};
class LirFilter
@@ -638,21 +663,16 @@ namespace nanojit
virtual ~LirFilter() {}
virtual LInsp read() {
return in->read();
}
virtual LInsp pos() {
return in->pos();
}
-
- LInsp previous() {
- read();
- return pos();
- }
};
// concrete
class LirReader : public LirFilter
{
LInsp _i; // current instruction that this decoder is operating on.
public:
--- a/js/src/nanojit/Native.h
+++ b/js/src/nanojit/Native.h
@@ -50,17 +50,18 @@
#include "NativeARM.h"
#endif
#elif defined(NANOJIT_PPC)
#include "NativePpc.h"
#else
#error "unknown nanojit architecture"
#endif
-namespace nanojit {
+namespace nanojit
+{
const uint32_t NJ_PAGE_SIZE = 1 << NJ_LOG2_PAGE_SIZE;
}
#ifdef NJ_STACK_GROWTH_UP
#define stack_direction(n) n
#else
#define stack_direction(n) -n
#endif
--- a/js/src/nanojit/NativeARM.h
+++ b/js/src/nanojit/NativeARM.h
@@ -144,24 +144,27 @@ namespace nanojit
counter_define(x87Top);
#define DECLARE_PLATFORM_REGALLOC()
#define DECLARE_PLATFORM_ASSEMBLER()\
const static Register argRegs[4], retRegs[2];\
void LD32_nochk(Register r, int32_t imm);\
- void CALL(intptr_t addr, const char* nm);\
+ void CALL(const CallInfo*);\
void underrunProtect(int bytes);\
bool has_cmov;\
void nativePageReset();\
void nativePageSetup();\
int* _nSlot;\
int* _nExitSlot;
+
+ #define asm_farg(i) NanoAssert(false)
+
//printf("jmp_l_n count=%d, nins=%X, %X = %X\n", (_c), nins, _nIns, ((intptr_t)(nins+(_c))-(intptr_t)_nIns - 4) );
#define swapptrs() { NIns* _tins = _nIns; _nIns=_nExitIns; _nExitIns=_tins; \
int* _nslot = _nSlot;\
_nSlot = _nExitSlot;\
_nExitSlot = _nslot;}
--- a/js/src/nanojit/NativeThumb.cpp
+++ b/js/src/nanojit/NativeThumb.cpp
@@ -188,70 +188,31 @@ namespace nanojit
BX(LR); // return
MR(R0,R2); // return LinkRecord*
RegisterMask savingMask = restore | rmask(FRAME_PTR) | rmask(LR);
POP_mask(savingMask); // regs
return _nIns;
#endif
}
- void Assembler::nArgEmitted(const CallInfo* call, uint32_t stackSlotCount, uint32_t iargs, uint32_t fargs)
- {
-#if 1
- (void)call;
- (void)stackSlotCount;
- (void)iargs;
- (void)fargs;
-#else
- // see if we have finished emitting all args. If so then make sure the
- // new stack pointer is NJ_ALIGN_STACK aligned
- if (iargs == call->iargs && fargs == call->fargs)
- {
- int32_t istack = iargs;
- istack -= 4;
- if (istack<=0)
- return; // nothing on stack
-
- const int32_t size = 4*stackSlotCount;
- const int32_t extra = alignUp(size, NJ_ALIGN_STACK) - size;
- if (extra > 0)
- SUBi(SP, extra);
- }
-#endif
- }
-
- void Assembler::nPostCallCleanup(const CallInfo* call)
+ void Assembler::asm_call(LInsp ins)
{
-#if 1
- (void)call;
-#else
- int32_t istack = call->iargs;
- int32_t fstack = call->fargs;
-
- istack -= 4; // first 4 4B args are in registers
- if (istack <= 0)
+ const CallInfo* call = callInfoFor(ins->fid());
+ CALL(call);
+ ArgSize sizes[10];
+ uint32_t argc = call->get_sizes(sizes);
+ for(uint32_t i=0; i < argc; i++)
{
- return; // nothing on stack
-
- //istack = 0;
- //if (fstack == 0)
- //return; // only using ECX/EDX nothing passed on the stack so no cleanup needed
+ uint32_t j = argc - i - 1;
+ ArgSize sz = sizes[j];
+ NanoAssert(sz == ARGSIZES_LO || sz == ARGSIZES_Q);
+ // pre-assign registers R0-R3 for arguments (if they fit)
+ Register r = i < 4 ? argRegs[i] : UnknownReg;
+ asm_arg(sz, ins->arg(j), r);
}
-
- const int32_t size = 4*istack + 8*fstack; // actual stack space used
- NanoAssert( size > 0 );
-
- const int32_t extra = alignUp(size, NJ_ALIGN_STACK);
-
- // stack re-alignment
- // only pop our adjustment amount since callee pops args in FASTCALL mode
- if (extra > 0)
- { ADDi(SP, extra); }
-#endif
- return;
}
void Assembler::nMarkExecute(Page* page, int32_t count, bool enable)
{
#ifdef UNDER_CE
DWORD dwOld;
VirtualProtect(page, NJ_PAGE_SIZE, PAGE_EXECUTE_READWRITE, &dwOld);
#endif
@@ -853,24 +814,24 @@ namespace nanojit
SUBi8(r,255);
}
else {
SUB(r, Scratch);
LDi(Scratch, i);
}
}
- void Assembler::CALL(intptr_t addr, const char* nm)
+ void Assembler::CALL(const CallInfo *ci)
{
- (void)nm;
+ intptr_t addr = ci->_address;
if (isB22((NIns*)addr, _nIns)) {
int offset = int(addr)-int(_nIns-2+2);
*(--_nIns) = (NIns)(0xF800 | ((offset>>1)&0x7FF) );
*(--_nIns) = (NIns)(0xF000 | ((offset>>12)&0x7FF) );
- asm_output2("call %08X:%s",(addr),(nm));
+ asm_output2("call %08X:%s", addr, ci->_name);
}
else
{
underrunProtect(2*(10));
if ( (((int(_nIns))&0xFFFF)%4) != 0)
*(--_nIns) = (NIns)0;
@@ -879,17 +840,17 @@ namespace nanojit
*(--_nIns) = (NIns)(0x4600 | (1<<7) | (Scratch<<3) | (IP&7));
*(--_nIns) = (NIns)0;
*(--_nIns) = (short)((addr) >> 16);
*(--_nIns) = (short)((addr) & 0xFFFF);
*(--_nIns) = (NIns)(0x4700 | (IP<<3));
*(--_nIns) = (NIns)(0xE000 | (4>>1));
*(--_nIns) = (NIns)(0x4800 | (Scratch<<8) | (1));
- asm_output2("call %08X:%s",(addr),(nm));
+ asm_output2("call %08X:%s", addr, ci->_name);
}
}
#else // ARM_JIT
void Assembler::underrunProtect(int bytes)
{
intptr_t u = (bytes) + 4;
if ( (samepage(_nIns,_nSlot) && (((intptr_t)_nIns-u) <= intptr_t(_nSlot+1))) ||
@@ -903,35 +864,35 @@ namespace nanojit
}
bool isB24(NIns *target, NIns *cur)
{
int offset = int(target)-int(cur-2+2);
return (-(1<<24) <= offset && offset < (1<<24));
}
- void Assembler::CALL(intptr_t addr, const char* nm)
+ void Assembler::CALL(const CallInfo *ci)
{
- (void)nm;
- if (isB24((NIns*)addr,_nIns))
+ intptr_t addr = ci->_address;
+ if (isB24((NIns*)addr, _nIns))
{
// we can do this with a single BL call
underrunProtect(4);
BL(addr);
- asm_output2("call %08X:%s",(addr),(nm));
+ asm_output2("call %08X:%s", addr, ci->_name);
}
else
{
underrunProtect(16);
*(--_nIns) = (NIns)((addr));
*(--_nIns) = (NIns)( COND_AL | (0x9<<21) | (0xFFF<<8) | (1<<4) | (IP) );
*(--_nIns) = (NIns)( COND_AL | OP_IMM | (1<<23) | (PC<<16) | (LR<<12) | (4) );
*(--_nIns) = (NIns)( COND_AL | (0x59<<20) | (PC<<16) | (IP<<12) | (4));
- asm_output2("call %08X:%s",(addr),(nm));
+ asm_output2("call %08X:%s", addr, ci->_name);
}
}
#endif // NJ_THUMB_JIT
void Assembler::LD32_nochk(Register r, int32_t imm)
{
--- a/js/src/nanojit/NativeThumb.h
+++ b/js/src/nanojit/NativeThumb.h
@@ -139,24 +139,25 @@ namespace nanojit
void STMIA(Register base, RegisterMask regs);\
void LDMIA(Register base, RegisterMask regs);\
void ADDi(Register r, int32_t imm);\
void ADDi8(Register r, int32_t imm);\
void SUBi(Register r, int32_t imm);\
void SUBi8(Register r, int32_t imm);\
void JMP(NIns *target);\
void LD32_nochk(Register r, int32_t imm);\
- void CALL(intptr_t addr, const char* nm);\
+ void CALL(const CallInfo*);\
void nativePageReset();\
void nativePageSetup();\
int* _nPool;\
int* _nSlot;\
int* _nExitPool;\
int* _nExitSlot;
+ #define asm_farg(i) NanoAssert(false)
#define swapptrs() { NIns* _tins = _nIns; _nIns=_nExitIns; _nExitIns=_tins; \
int* _npool = _nPool;\
int* _nslot = _nSlot;\
_nPool = _nExitPool; _nExitPool = _npool;\
_nSlot = _nExitSlot; _nExitSlot = _nslot;}
#define BX(r) do {\
--- a/js/src/nanojit/Nativei386.cpp
+++ b/js/src/nanojit/Nativei386.cpp
@@ -177,57 +177,65 @@ namespace nanojit
// Restore saved registers.
for (Register i=UnknownReg; i >= FirstReg; i = prevreg(i))
if (restore&rmask(i)) { POPr(i); }
POPr(FP); // Pop the pre-alignment SP.
return _nIns;
}
- void Assembler::nArgEmitted(const CallInfo* call, uint32_t stackSlotCount, uint32_t iargs, uint32_t fargs)
+ void Assembler::asm_call(LInsp ins)
{
- // see if we have finished emitting all args. If so then make sure the
- // new stack pointer is NJ_ALIGN_STACK aligned
- const uint32_t istack = call->count_iargs();
- const uint32_t fstack = call->count_args() - istack;
- //printf("call %s iargs %d fargs %d istack %d fstack %d\n",call->_name,iargs,fargs,istack,fstack);
- AvmAssert(iargs <= istack);
- AvmAssert(fargs <= fstack);
- if (iargs == istack && fargs == fstack)
- {
- const int32_t size = 4*stackSlotCount;
- const int32_t extra = alignUp(size, NJ_ALIGN_STACK) - size;
- if (extra > 0)
- SUBi(SP, extra);
- }
- }
-
- void Assembler::nPostCallCleanup(const CallInfo* call)
- {
+ uint32_t fid = ins->fid();
+ const CallInfo* call = callInfoFor(fid);
// must be signed, not unsigned
- int32_t istack = call->count_iargs();
- int32_t fstack = call->count_args() - istack;
+ const uint32_t iargs = call->count_iargs();
+ int32_t fstack = call->count_args() - iargs;
- istack -= 2; // first 2 4B args are in registers
+ int32_t extra = 0;
+ int32_t istack = iargs-2; // first 2 4B args are in registers
if (istack <= 0)
{
istack = 0;
- if (fstack == 0)
- return; // only using ECX/EDX nothing passed on the stack so no cleanup needed
}
const int32_t size = 4*istack + 8*fstack; // actual stack space used
- NanoAssert( size > 0 );
-
- const int32_t extra = alignUp(size, NJ_ALIGN_STACK) - (size);
+ if (size) {
+ // stack re-alignment
+ // only pop our adjustment amount since callee pops args in FASTCALL mode
+ extra = alignUp(size, NJ_ALIGN_STACK) - (size);
+ if (extra > 0)
+ ADDi(SP, extra);
+ }
+
+ CALL(call);
- // stack re-alignment
- // only pop our adjustment amount since callee pops args in FASTCALL mode
+ // make sure fpu stack is empty before call (restoreCallerSaved)
+ NanoAssert(_allocator.isFree(FST0));
+ // note: this code requires that ref arguments (ARGSIZE_Q)
+ // be one of the first two arguments
+ // pre-assign registers to the first 2 4B args
+ const int max_regs = (iargs < 2) ? iargs : 2;
+ int n = 0;
+
+ ArgSize sizes[10];
+ uint32_t argc = call->get_sizes(sizes);
+
+ for(uint32_t i=0; i < argc; i++)
+ {
+ uint32_t j = argc-i-1;
+ ArgSize sz = sizes[j];
+ Register r = UnknownReg;
+ if (n < max_regs && sz != ARGSIZE_F)
+ r = argRegs[n++]; // tell asm_arg what reg to use
+ asm_arg(sz, ins->arg(j), r);
+ }
+
if (extra > 0)
- { ADDi(SP, extra); }
+ SUBi(SP, extra);
}
void Assembler::nMarkExecute(Page* page, int32_t count, bool enable)
{
#ifdef _MAC
MakeDataExecutable(page, count*NJ_PAGE_SIZE);
#elif defined WIN32
DWORD dwIgnore;
@@ -664,30 +672,26 @@ namespace nanojit
PUSHm(disp(rA), FP);
}
else
{
PUSHr(rA->reg);
}
}
- void Assembler::asm_farg(LInsp ins)
+ void Assembler::asm_farg(LInsp p)
{
- LIns* p = ins->oprnd1();
Register r = findRegFor(p, FpRegs);
if (rmask(r) & XmmRegs) {
STQ(0, SP, r);
} else {
FSTPQ(0, SP);
}
PUSHr(ECX); // 2*pushr is smaller than sub
PUSHr(ECX);
- _stackUsed += 2;
- ++_fargs;
- nArgEmitted(_call, _stackUsed, _iargs, _fargs);
}
void Assembler::asm_fop(LInsp ins)
{
LOpcode op = ins->opcode();
if (sse2)
{
LIns *lhs = ins->oprnd1();
@@ -769,23 +773,22 @@ namespace nanojit
{
int d = findMemFor(ins->oprnd1());
FILD(d, FP);
}
}
Register Assembler::asm_prep_fcall(Reservation *rR, LInsp ins)
{
- Register rr;
if (rR) {
+ Register rr;
if ((rr=rR->reg) != UnknownReg && (rmask(rr) & XmmRegs))
evict(rr);
}
- prepResultReg(ins, rmask(FST0));
- return FST0;
+ return prepResultReg(ins, rmask(FST0));
}
void Assembler::asm_u2f(LInsp ins)
{
// where our result goes
Register rr = prepResultReg(ins, FpRegs);
const int disp = -8;
const Register base = ESP;
--- a/js/src/nanojit/Nativei386.h
+++ b/js/src/nanojit/Nativei386.h
@@ -128,17 +128,18 @@ namespace nanojit
#define DECLARE_PLATFORM_ASSEMBLER() \
const static Register argRegs[2], retRegs[2]; \
bool x87Dirty; \
bool sse2; \
bool has_cmov; \
bool pad[1];\
void nativePageReset();\
- void nativePageSetup();
+ void nativePageSetup();\
+ void asm_farg(LInsp);
#define swapptrs() { NIns* _tins = _nIns; _nIns=_nExitIns; _nExitIns=_tins; }
// enough room for n bytes
#define underrunProtect(n) \
{ \
intptr_t u = n + sizeof(PageHeader)/sizeof(NIns); \
if ( !samepage(_nIns-u,_nIns-1) ) \
@@ -653,17 +654,19 @@ namespace nanojit
#define FDIVR(d,b) do { FPUm(0xdc07, d, b); asm_output2("fdivr %d(%s)",d,gpn(b)); } while(0)
#define FINCSTP() do { FPUc(0xd9f7); asm_output2("fincstp"); } while(0)
#define FSTP(r) do { FPU(0xddd8, r&7); asm_output1("fstp %s",fpn(r)); fpu_pop();} while(0)
#define FCOMP() do { FPUc(0xD8D9); asm_output("fcomp"); fpu_pop();} while(0)
#define FCOMPP() do { FPUc(0xDED9); asm_output("fcompp"); fpu_pop();fpu_pop();} while(0)
#define FLDr(r) do { FPU(0xd9c0,r); asm_output1("fld %s",fpn(r)); fpu_push(); } while(0)
#define EMMS() do { FPUc(0x0f77); asm_output("emms"); } while (0)
-#define CALL(a,nm) do { \
- underrunProtect(5); \
- int offset = (a) - ((int)_nIns); \
- IMM32( (uint32_t)offset ); \
- *(--_nIns) = 0xE8; \
- asm_output1("call %s",(nm)); \
- } while (0)
+#define CALL(c) do { \
+ underrunProtect(5); \
+ int offset = (c->_address) - ((int)_nIns); \
+ IMM32( (uint32_t)offset ); \
+ *(--_nIns) = 0xE8; \
+ verbose_only(asm_output1("call %s",(c->_name));) \
+ debug_only(if ((c->_argtypes&3)==ARGSIZE_F) fpu_push();)\
+} while (0)
+
}
#endif // __nanojit_Nativei386__