Bug 539379 - TM: Crash [@ ExecuteTrace] or [@ ExecuteTree]. r=edwsmith.
authorNicholas Nethercote <nnethercote@mozilla.com>
Fri, 15 Jan 2010 15:07:32 +1100
changeset 37705 80ff3ca19b1cfdec8fd86551cc615595465593fc
parent 37704 67967239b5569ac5d11cd21ccd56b3da3d7484ef
child 37706 b831915b57aff7b77e12651228f9d32951b82d64
push id11426
push userrsayre@mozilla.com
push dateSun, 31 Jan 2010 16:36:36 +0000
treeherdermozilla-central@3048d03980e7 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersedwsmith
bugs539379
milestone1.9.3a1pre
Bug 539379 - TM: Crash [@ ExecuteTrace] or [@ ExecuteTree]. r=edwsmith.
js/src/nanojit/Assembler.cpp
js/src/nanojit/Assembler.h
js/src/nanojit/NativeARM.cpp
js/src/nanojit/NativePPC.cpp
js/src/nanojit/NativeSparc.cpp
js/src/nanojit/NativeX64.cpp
js/src/nanojit/Nativei386.cpp
js/src/nanojit/Nativei386.h
--- a/js/src/nanojit/Assembler.cpp
+++ b/js/src/nanojit/Assembler.cpp
@@ -366,44 +366,49 @@ namespace nanojit
                 // neither free nor active.
                 NanoAssert(!_allocator.isFree(r));
                 NanoAssert(!_allocator.getActive(r));
             }
         }
     }
     #endif /* _DEBUG */
 
-    void Assembler::findRegFor2(RegisterMask allow, LIns* ia, Register& ra, LIns* ib, Register& rb)
+    void Assembler::findRegFor2(RegisterMask allowa, LIns* ia, Register& ra,
+                                RegisterMask allowb, LIns* ib, Register& rb)
     {
+        // There should be some overlap between 'allowa' and 'allowb', else
+        // there's no point calling this function.
+        NanoAssert(allowa & allowb);
+
         if (ia == ib) {
-            ra = rb = findRegFor(ia, allow);
+            ra = rb = findRegFor(ia, allowa & allowb);  // use intersection(allowa, allowb)
         } else {
             // You might think we could just do this:
             //
-            //   ra = findRegFor(ia, allow);
-            //   rb = findRegFor(ib, allow & ~rmask(ra));
+            //   ra = findRegFor(ia, allowa);
+            //   rb = findRegFor(ib, allowb & ~rmask(ra));
             //
             // But if 'ib' was already in an allowed register, the first
             // findRegFor() call could evict it, whereupon the second
             // findRegFor() call would immediately restore it, which is
             // sub-optimal.  What we effectively do instead is this:
             //
-            //   ra = findRegFor(ia, allow & ~rmask(rb));
-            //   rb = findRegFor(ib, allow & ~rmask(ra));
+            //   ra = findRegFor(ia, allowa & ~rmask(rb));
+            //   rb = findRegFor(ib, allowb & ~rmask(ra));
             //
             // but we have to determine what 'rb' initially is to avoid the
             // mutual dependency between the assignments.
-            bool rbDone = !ib->isUnusedOrHasUnknownReg() && (rb = ib->getReg(), allow & rmask(rb));
+            bool rbDone = !ib->isUnusedOrHasUnknownReg() && (rb = ib->getReg(), allowb & rmask(rb));
             if (rbDone) {
-                allow &= ~rmask(rb);    // ib already in an allowable reg, keep that one
+                allowa &= ~rmask(rb);   // ib already in an allowable reg, keep that one
             }
-            ra = findRegFor(ia, allow);
+            ra = findRegFor(ia, allowa);
             if (!rbDone) {
-                allow &= ~rmask(ra);
-                rb = findRegFor(ib, allow);
+                allowb &= ~rmask(ra);
+                rb = findRegFor(ib, allowb);
             }
         }
     }
 
     Register Assembler::findSpecificRegFor(LIns* i, Register w)
     {
         return findRegFor(i, rmask(w));
     }
@@ -427,16 +432,37 @@ namespace nanojit
             return FP;
         }
     #else
         (void) d;
     #endif
         return findRegFor(i, allow);
     }
 
+    // Like findRegFor2(), but used for stores where the base value has the
+    // same type as the stored value, eg. in asm_store32() on 32-bit platforms
+    // and asm_store64() on 64-bit platforms.  Similar to getBaseReg(),
+    // findRegFor2() can be called instead, but this function can optimize the
+    // case where the base value is a LIR_alloc.
+    void Assembler::getBaseReg2(RegisterMask allowValue, LIns* value, Register& rv,
+                                RegisterMask allowBase, LIns* base, Register& rb, int &d)
+    {
+    #if !PEDANTIC
+        if (base->isop(LIR_alloc)) {
+            rb = FP;
+            d += findMemFor(base);
+            rv = findRegFor(value, allowValue);
+            return;
+        }
+    #else
+        (void) d;
+    #endif
+        findRegFor2(allowValue, value, rv, allowBase, base, rb);
+    }
+
     // Finds a register in 'allow' to hold the result of 'ins'.  Used when we
     // encounter a use of 'ins'.  The actions depend on the prior regstate of
     // 'ins':
     // - If the result of 'ins' is not in any register, we find an allowed
     //   one, evicting one if necessary.
     // - If the result of 'ins' is already in an allowed register, we use that.
     // - If the result of 'ins' is already in a not-allowed register, we find an
     //   allowed one and move it.
--- a/js/src/nanojit/Assembler.h
+++ b/js/src/nanojit/Assembler.h
@@ -322,23 +322,26 @@ namespace nanojit
             void        evictSomeActiveRegs(RegisterMask regs);
             void        evictScratchRegs();
             void        intersectRegisterState(RegAlloc& saved);
             void        unionRegisterState(RegAlloc& saved);
             void        assignSaved(RegAlloc &saved, RegisterMask skip);
             LInsp       findVictim(RegisterMask allow);
 
             Register    getBaseReg(LIns *i, int &d, RegisterMask allow);
+            void        getBaseReg2(RegisterMask allowValue, LIns* value, Register& rv,
+                                    RegisterMask allowBase, LIns* base, Register& rb, int &d);
 #if NJ_USES_QUAD_CONSTANTS
             const uint64_t*
                         findQuadConstant(uint64_t q);
 #endif
             int         findMemFor(LIns* i);
             Register    findRegFor(LIns* i, RegisterMask allow);
-            void        findRegFor2(RegisterMask allow, LIns* ia, Register &ra, LIns *ib, Register &rb);
+            void        findRegFor2(RegisterMask allowa, LIns* ia, Register &ra,
+                                    RegisterMask allowb, LIns *ib, Register &rb);
             Register    findSpecificRegFor(LIns* i, Register r);
             Register    findSpecificRegForUnallocated(LIns* i, Register r);
             Register    prepResultReg(LIns *i, RegisterMask allow);
             Register    prepareResultReg(LIns *i, RegisterMask allow);
             void        freeRsrcOf(LIns *i, bool pop);
             void        freeResourcesOf(LIns *ins);
             void        evictIfActive(Register r);
             void        evict(LIns* vic);
--- a/js/src/nanojit/NativeARM.cpp
+++ b/js/src/nanojit/NativeARM.cpp
@@ -1214,23 +1214,17 @@ Assembler::asm_store32(LOpcode op, LIns 
             NanoAssertMsg(0, "NJ_EXPANDED_LOADSTORE_SUPPORTED not yet supported for this architecture");
             return;
         default:
             NanoAssertMsg(0, "asm_store32 should never receive this LIR opcode");
             return;
     }
 
     Register ra, rb;
-    if (base->isop(LIR_alloc)) {
-        rb = FP;
-        dr += findMemFor(base);
-        ra = findRegFor(value, GpRegs);
-    } else {
-        findRegFor2(GpRegs, value, ra, base, rb);
-    }
+    getBaseReg2(GpRegs, value, ra, GpRegs, base, rb, dr);
 
     if (isU12(-dr) || isU12(dr)) {
         STR(ra, rb, dr);
     } else {
         STR(ra, IP, 0);
         asm_add_imm(IP, rb, dr);
     }
 }
@@ -2097,17 +2091,17 @@ Assembler::asm_fcmp(LInsp ins)
 {
     LInsp lhs = ins->oprnd1();
     LInsp rhs = ins->oprnd2();
     LOpcode op = ins->opcode();
 
     NanoAssert(op >= LIR_feq && op <= LIR_fge);
 
     Register ra, rb;
-    findRegFor2(FpRegs, lhs, ra, rhs, rb);
+    findRegFor2(FpRegs, lhs, ra, FpRegs, rhs, rb);
 
     int e_bit = (op != LIR_feq);
 
     // do the comparison and get results loaded in ARM status register
     FMSTAT();
     FCMPD(ra, rb, e_bit);
 }
 
@@ -2210,17 +2204,17 @@ Assembler::asm_cmp(LIns *cond)
         Register r = findRegFor(lhs, GpRegs);
         if (c == 0 && cond->isop(LIR_eq)) {
             TST(r, r);
         } else {
             asm_cmpi(r, c);
         }
     } else {
         Register ra, rb;
-        findRegFor2(GpRegs, lhs, ra, rhs, rb);
+        findRegFor2(GpRegs, lhs, ra, GpRegs, rhs, rb);
         CMP(ra, rb);
     }
 }
 
 void
 Assembler::asm_cmpi(Register r, int32_t imm)
 {
     if (imm < 0) {
--- a/js/src/nanojit/NativePPC.cpp
+++ b/js/src/nanojit/NativePPC.cpp
@@ -967,17 +967,17 @@ namespace nanojit
 
     void Assembler::asm_fop(LIns *ins) {
         LOpcode op = ins->opcode();
         LInsp lhs = ins->oprnd1();
         LInsp rhs = ins->oprnd2();
         RegisterMask allow = FpRegs;
         Register rr = prepResultReg(ins, allow);
         Register ra, rb;
-        findRegFor2(allow, lhs, ra, rhs, rb);
+        findRegFor2(allow, lhs, ra, allow, rhs, rb);
         switch (op) {
             case LIR_fadd: FADD(rr, ra, rb); break;
             case LIR_fsub: FSUB(rr, ra, rb); break;
             case LIR_fmul: FMUL(rr, ra, rb); break;
             case LIR_fdiv: FDIV(rr, ra, rb); break;
             default:
                 debug_only(outputf("%s",lirNames[op]);)
                 TODO(asm_fop);
--- a/js/src/nanojit/NativeSparc.cpp
+++ b/js/src/nanojit/NativeSparc.cpp
@@ -329,27 +329,23 @@ namespace nanojit
                 int c = value->imm32();
                 STW32(L2, dr, rb);
                 SET32(c, L2);
             }
         else
             {
                 // make sure what is in a register
                 Register ra, rb;
-                if (base->isop(LIR_alloc)) {
-                    rb = FP;
-                    dr += findMemFor(base);
-                    ra = findRegFor(value, GpRegs);
-                } else if (base->isconst()) {
+                if (base->isconst()) {
                     // absolute address
                     dr += base->imm32();
                     ra = findRegFor(value, GpRegs);
                     rb = G0;
                 } else {
-                    findRegFor2(GpRegs, value, ra, base, rb);
+                    getBaseReg2(GpRegs, value, ra, GpRegs, base, rb, dr);
                 }
                 STW32(ra, dr, rb);
             }
     }
 
     void Assembler::asm_spill(Register rr, int d, bool pop, bool quad)
     {
         underrunProtect(24);
@@ -596,17 +592,17 @@ namespace nanojit
                 else {
                     SUBCC(r, L2, G0);
                     SET32(c, L2);
                 }
             }
         else
             {
                 Register ra, rb;
-                findRegFor2(GpRegs, lhs, ra, rhs, rb);
+                findRegFor2(GpRegs, lhs, ra, GpRegs, rhs, rb);
                 SUBCC(ra, rb, G0);
             }
     }
 
     void Assembler::asm_fcond(LInsp ins)
     {
         // only want certain regs
         Register r = prepResultReg(ins, AllowableFlagRegs);
--- a/js/src/nanojit/NativeX64.cpp
+++ b/js/src/nanojit/NativeX64.cpp
@@ -1150,17 +1150,17 @@ namespace nanojit
         LIns *b = cond->oprnd2();
         if (isImm32(b)) {
             asm_cmp_imm(cond);
             return;
         }
         LIns *a = cond->oprnd1();
         Register ra, rb;
         if (a != b) {
-            findRegFor2(GpRegs, a, ra, b, rb);
+            findRegFor2(GpRegs, a, ra, GpRegs, b, rb);
         } else {
             // optimize-me: this will produce a const result!
             ra = rb = findRegFor(a, GpRegs);
         }
 
         LOpcode condop = cond->opcode();
         if (LIR_qeq <= condop && condop <= LIR_quge) {
             CMPQR(ra, rb);
@@ -1282,17 +1282,17 @@ namespace nanojit
             else
                 SETAE(r);
         }
         fcmp(a, b);
     }
 
     void Assembler::fcmp(LIns *a, LIns *b) {
         Register ra, rb;
-        findRegFor2(FpRegs, a, ra, b, rb);
+        findRegFor2(FpRegs, a, ra, FpRegs, b, rb);
         UCOMISD(ra, rb);
     }
 
     void Assembler::asm_restore(LIns *ins, Register r) {
         if (ins->isop(LIR_alloc)) {
             int d = disp(ins);
             LEAQRM(r, d, FP);
         }
@@ -1458,30 +1458,31 @@ namespace nanojit
                 NanoAssertMsg(0, "asm_load32 should never receive this LIR opcode");
                 break;
         }
     }
 
     void Assembler::asm_store64(LOpcode op, LIns *value, int d, LIns *base) {
         NanoAssert(value->isQuad());
 
-        Register b = getBaseReg(base, d, BaseRegs);
-
         switch (op) {
             case LIR_stqi: {
-                Register r = findRegFor(value, GpRegs & ~rmask(b));
+                Register r, b;
+                getBaseReg2(GpRegs, value, r, BaseRegs, base, b, d);
                 MOVQMR(r, d, b);    // gpr store
                 break;
             }
             case LIR_stfi: {
+                Register b = getBaseReg(base, d, BaseRegs);
                 Register r = findRegFor(value, FpRegs);
                 MOVSDMR(r, d, b);   // xmm store
                 break;
             }
             case LIR_st32f: {
+                Register b = getBaseReg(base, d, BaseRegs);
                 Register r = findRegFor(value, FpRegs);
                 Register t = registerAllocTmp(FpRegs & ~rmask(r));
 
                 MOVSSMR(t, d, b);   // store
                 CVTSD2SS(t, r);     // cvt to single-precision
                 XORPS(t);           // break dependency chains
                 break;
             }
--- a/js/src/nanojit/Nativei386.cpp
+++ b/js/src/nanojit/Nativei386.cpp
@@ -499,27 +499,23 @@ namespace nanojit
         else
         {
             // Quirk of x86-32: reg must be a/b/c/d for single-byte stores.
             const RegisterMask SrcRegs = (op == LIR_stb) ?
                             (1<<EAX | 1<<ECX | 1<<EDX | 1<<EBX) :
                             GpRegs;
 
             Register ra, rb;
-            if (base->isop(LIR_alloc)) {
-                rb = FP;
-                dr += findMemFor(base);
-                ra = findRegFor(value, SrcRegs);
-            } else if (base->isconst()) {
+            if (base->isconst()) {
                 // absolute address
                 rb = UnknownReg;
                 dr += base->imm32();
                 ra = findRegFor(value, SrcRegs);
             } else {
-                findRegFor2(SrcRegs, value, ra, base, rb);
+                getBaseReg2(SrcRegs, value, ra, GpRegs, base, rb, dr);
             }
             switch (op) {
                 case LIR_stb:
                     ST8(rb, dr, ra);
                     break;
                 case LIR_sts:
                     ST16(rb, dr, ra);
                     break;
@@ -849,17 +845,17 @@ namespace nanojit
             if (c == 0 && cond->isop(LIR_eq)) {
                 TEST(r, r);
             } else {
                 CMPi(r, c);
             }
 
         } else {
             Register ra, rb;
-            findRegFor2(GpRegs, lhs, ra, rhs, rb);
+            findRegFor2(GpRegs, lhs, ra, GpRegs, rhs, rb);
             CMP(ra, rb);
         }
     }
 
     void Assembler::asm_fcond(LInsp ins)
     {
         LOpcode opcode = ins->opcode();
         Register r = prepareResultReg(ins, AllowableFlagRegs);
@@ -1932,17 +1928,17 @@ namespace nanojit
                     // -------       ---   ---------       ---   -------
                     // UNORDERED     111   0100_0100       001   SETNP/JNP fails
                     // EQUAL         100   0100_0000       000   SETNP/JNP succeeds
                     // GREATER_THAN  000   0000_0000       011   SETNP/JNP fails
                     // LESS_THAN     001   0000_0000       011   SETNP/JNP fails
 
                     evictIfActive(EAX);
                     Register ra, rb;
-                    findRegFor2(XmmRegs, lhs, ra, rhs, rb);
+                    findRegFor2(XmmRegs, lhs, ra, XmmRegs, rhs, rb);
 
                     TEST_AH(mask);
                     LAHF();
                     SSE_UCOMISD(ra, rb);
                 }
             } else {
                 // LIR_fgt:
                 //   ucomisd       ZPC   outcome (SETA/JA succeeds if CZ==00)
@@ -1956,17 +1952,17 @@ namespace nanojit
                 //   ucomisd       ZPC   outcome (SETAE/JAE succeeds if C==0)
                 //   -------       ---   -------
                 //   UNORDERED     111   SETAE/JAE fails
                 //   EQUAL         100   SETAE/JAE succeeds
                 //   GREATER_THAN  000   SETAE/JAE succeeds
                 //   LESS_THAN     001   SETAE/JAE fails
 
                 Register ra, rb;
-                findRegFor2(XmmRegs, lhs, ra, rhs, rb);
+                findRegFor2(XmmRegs, lhs, ra, XmmRegs, rhs, rb);
                 SSE_UCOMISD(ra, rb);
             }
 
         } else {
             // First, we convert (a > b) into (b < a), and (a >= b) into (b <= a).
             // Note that this is the opposite of the sse2 conversion above.
             if (condop == LIR_fgt) {
                 condop = LIR_flt;
--- a/js/src/nanojit/Nativei386.h
+++ b/js/src/nanojit/Nativei386.h
@@ -928,38 +928,38 @@ namespace nanojit
 #define FLD1()      do { count_fpu(); FPUc(0xd9e8);             asm_output("fld1"); fpu_push(); } while(0)
 #define FLDZ()      do { count_fpu(); FPUc(0xd9ee);             asm_output("fldz"); fpu_push(); } while(0)
 #define FFREE(r)    do { count_fpu(); FPU(0xddc0, r);           asm_output("ffree %s",fpn(r)); } while(0)
 #define FST32(p,d,b) do { count_stq(); FPUm(0xd902|(p), d, b);   asm_output("fst%s32 %d(%s)",((p)?"p":""),d,gpn(b)); if (p) fpu_pop(); } while(0)
 #define FSTQ(p,d,b) do { count_stq(); FPUm(0xdd02|(p), d, b);   asm_output("fst%sq %d(%s)",((p)?"p":""),d,gpn(b)); if (p) fpu_pop(); } while(0)
 #define FSTPQ(d,b)  FSTQ(1,d,b)
 #define FCOM(p,d,b) do { count_fpuld(); FPUm(0xdc02|(p), d, b); asm_output("fcom%s %d(%s)",((p)?"p":""),d,gpn(b)); if (p) fpu_pop(); } while(0)
 #define FCOMdm(p,m) do { const double* const dm = m; \
-                         count_fpuld(); FPUdm(0xdc02|(p), dm);   asm_output("fcom%s (%p)",((p)?"p":""),dm); if (p) fpu_pop(); } while(0)
+                         count_fpuld(); FPUdm(0xdc02|(p), dm);   asm_output("fcom%s (%p)",((p)?"p":""),(void*)dm); if (p) fpu_pop(); } while(0)
 #define FLD32(d,b)  do { count_ldq(); FPUm(0xd900, d, b);       asm_output("fld32 %d(%s)",d,gpn(b)); fpu_push();} while(0)
 #define FLDQ(d,b)   do { count_ldq(); FPUm(0xdd00, d, b);       asm_output("fldq %d(%s)",d,gpn(b)); fpu_push();} while(0)
 #define FLDQdm(m)   do { const double* const dm = m; \
-                         count_ldq(); FPUdm(0xdd00, dm);        asm_output("fldq (%p)",dm); fpu_push();} while(0)
+                         count_ldq(); FPUdm(0xdd00, dm);        asm_output("fldq (%p)",(void*)dm); fpu_push();} while(0)
 #define FILDQ(d,b)  do { count_fpuld(); FPUm(0xdf05, d, b);     asm_output("fildq %d(%s)",d,gpn(b)); fpu_push(); } while(0)
 #define FILD(d,b)   do { count_fpuld(); FPUm(0xdb00, d, b);     asm_output("fild %d(%s)",d,gpn(b)); fpu_push(); } while(0)
 #define FIST(p,d,b) do { count_fpu(); FPUm(0xdb02|(p), d, b);   asm_output("fist%s %d(%s)",((p)?"p":""),d,gpn(b)); if(p) fpu_pop(); } while(0)
 #define FADD(d,b)   do { count_fpu(); FPUm(0xdc00, d, b);       asm_output("fadd %d(%s)",d,gpn(b)); } while(0)
 #define FADDdm(m)   do { const double* const dm = m; \
-                         count_ldq(); FPUdm(0xdc00, dm);        asm_output("fadd (%p)",dm); } while(0)
+                         count_ldq(); FPUdm(0xdc00, dm);        asm_output("fadd (%p)",(void*)dm); } while(0)
 #define FSUB(d,b)   do { count_fpu(); FPUm(0xdc04, d, b);       asm_output("fsub %d(%s)",d,gpn(b)); } while(0)
 #define FSUBR(d,b)  do { count_fpu(); FPUm(0xdc05, d, b);       asm_output("fsubr %d(%s)",d,gpn(b)); } while(0)
 #define FSUBRdm(m)  do { const double* const dm = m; \
-                         count_ldq(); FPUdm(0xdc05, dm);        asm_output("fsubr (%p)",dm); } while(0)
+                         count_ldq(); FPUdm(0xdc05, dm);        asm_output("fsubr (%p)",(void*)dm); } while(0)
 #define FMUL(d,b)   do { count_fpu(); FPUm(0xdc01, d, b);       asm_output("fmul %d(%s)",d,gpn(b)); } while(0)
 #define FMULdm(m)   do { const double* const dm = m; \
-                         count_ldq(); FPUdm(0xdc01, dm);        asm_output("fmul (%p)",dm); } while(0)
+                         count_ldq(); FPUdm(0xdc01, dm);        asm_output("fmul (%p)",(void*)dm); } while(0)
 #define FDIV(d,b)   do { count_fpu(); FPUm(0xdc06, d, b);       asm_output("fdiv %d(%s)",d,gpn(b)); } while(0)
 #define FDIVR(d,b)  do { count_fpu(); FPUm(0xdc07, d, b);       asm_output("fdivr %d(%s)",d,gpn(b)); } while(0)
 #define FDIVRdm(m)  do { const double* const dm = m; \
-                         count_ldq(); FPUdm(0xdc07, dm);        asm_output("fdivr (%p)",dm); } while(0)
+                         count_ldq(); FPUdm(0xdc07, dm);        asm_output("fdivr (%p)",(void*)dm); } while(0)
 #define FINCSTP()   do { count_fpu(); FPUc(0xd9f7);             asm_output("fincstp"); } while(0)
 #define FSTP(r)     do { count_fpu(); FPU(0xddd8, r&7);         asm_output("fstp %s",fpn(r)); fpu_pop();} while(0)
 #define FCOMP()     do { count_fpu(); FPUc(0xD8D9);             asm_output("fcomp"); fpu_pop();} while(0)
 #define FCOMPP()    do { count_fpu(); FPUc(0xDED9);             asm_output("fcompp"); fpu_pop();fpu_pop();} while(0)
 #define FLDr(r)     do { count_ldq(); FPU(0xd9c0,r);            asm_output("fld %s",fpn(r)); fpu_push(); } while(0)
 #define EMMS()      do { count_fpu(); FPUc(0x0f77);             asm_output("emms"); } while (0)
 
 // standard direct call