Bug 679454 - Correctly handle cross-segment jumps on MIPS [chris@mips.com] (r=wmaddox)
authorWilliam Maddox <wmaddox@adobe.com>
Wed, 17 Aug 2011 14:29:08 -0700
changeset 75794 3c8c7eb5e4ff975f2ae2a6fe9a9f14442ac10b25
parent 75793 0412880dec39053fe7cffcc0222704a840db6522
child 75795 78c8c065bc958554339382c18dbdd18d6fa8d1cd
push id21056
push usermak77@bonardo.net
push dateWed, 24 Aug 2011 08:19:04 +0000
treeherdermozilla-central@5d9989c3bff6 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerswmaddox
bugs679454
milestone9.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 679454 - Correctly handle cross-segment jumps on MIPS [chris@mips.com] (r=wmaddox)
js/src/nanojit/NativeMIPS.cpp
--- a/js/src/nanojit/NativeMIPS.cpp
+++ b/js/src/nanojit/NativeMIPS.cpp
@@ -1407,30 +1407,30 @@ namespace nanojit
                     // NB trampoline code is emitted in the correct order
                     trampJ(targ);
                     trampNOP();                         // trampoline delay slot
 
                 }
                 else {
                     //  [linkedinstructions]
                     //  bxxx trampoline
-                    //   lui $at,%hi(targ)
+                    //   lui $ra,%hi(targ)
                     //  ...
                     // trampoline:
-                    //  addiu $at,%lo(targ)
-                    //  jr $at
+                    //  addiu $ra,%lo(targ)
+                    //  jr $ra
                     //   nop
 
                     underrunProtect(5 * 4);             // keep bxx and trampoline together
 
-                    LUI(AT,hi(uint32_t(targ)));         // delay slot
+                    LUI(RA,hi(uint32_t(targ)));         // delay slot
 
                     // NB trampoline code is emitted in the correct order
-                    trampADDIU(AT, AT, lo(uint32_t(targ)));
-                    trampJR(AT);
+                    trampADDIU(RA, RA, lo(uint32_t(targ)));
+                    trampJR(RA);
                     trampNOP();                         // trampoline delay slot
 
                 }
             }
             else {
                 // Worst case is bxxx,lui addiu;jr;nop as above
                 // Best case is branch to trampoline can be replaced
                 // with branch to target in which case the trampoline will be abandoned
@@ -1629,27 +1629,41 @@ namespace nanojit
         Register rb = (b==a) ? ra : findRegFor(b, allow & ~rmask(ra));
 
         return Branches(asm_bxx(branchOnFalse, condop, ra, rb, targ));
     }
 
     void Assembler::asm_j(NIns * const targ, bool bdelay)
     {
         if (targ == NULL) {
+            // target is unknown - asm_bxx wiill generate tramopline code
             NanoAssert(bdelay);
             (void) asm_bxx(false, LIR_eqi, ZERO, ZERO, targ);
         }
-        else {
-            NanoAssert(SEG(targ) == SEG(_nIns));
+        else if (SEG(targ) == SEG(_nIns)) {
+            // target is known and in same segment
             if (bdelay) {
                 underrunProtect(2*4);    // j + delay
                 NOP();
             }
             J(targ);
         }
+        else {
+            // target is known but in different segment
+            // generate register jump using $ra
+            // lui $ra,%hi(targ)
+            // ori $ra,%lo(targ) # will be omitted if (targ & 0xffff)==0
+            // jr $ra
+            //  [nop]
+            underrunProtect(4*4); // worst case to prevent underrunProtect from reinvoking asm_j
+            if (bdelay)
+                NOP();
+            JR(RA);
+            asm_li(RA, (uint32_t)targ);
+        }
         TAG("asm_j(targ=%p) bdelay=%d", targ);
     }
 
     void
     Assembler::asm_spill(Register rr, int d, bool quad)
     {
         USE(quad);
         NanoAssert(d);
@@ -1869,17 +1883,16 @@ namespace nanojit
             asm_j(frag->fragEntry, false);
         }
         else {
             // Target doesn't exist. Jump to an epilogue for now.
             // This can be patched later.
             if (!_epilogue)
                 _epilogue = genEpilogue();
             GuardRecord *lr = guard->record();
-            // FIXME: _epilogue may be in another segment
             // lui    $v0,%hi(lr)
             // j      _epilogue
             //  addiu $v0,%lo(lr)
             underrunProtect(2 * 4);     // j + branch delay
             ADDIU(V0, V0, lo(int32_t(lr)));
             asm_j(_epilogue, false);
             LUI(V0, hi(int32_t(lr)));
             lr->jmp = _nIns;
@@ -1961,21 +1974,21 @@ namespace nanojit
     }
 
 
     NIns*
     Assembler::genPrologue(void)
     {
         /*
          * Use a non standard fp because we don't know the final framesize until now
-         * addiu  $sp,-FRAMESIZE
+         * addiu   $sp,-FRAMESIZE
          * sw      $ra,RA_OFFSET($sp)
          * sw      $fp,FP_OFFSET($sp)
-         * move   $fp,$sp
-         * addu      $sp,-stackNeeded
+         * move    $fp,$sp
+         * addu    $sp,-stackNeeded
          */
 
         uint32_t stackNeeded = max_out_args + STACK_GRANULARITY * _activation.stackSlotsNeeded();
         uint32_t amt = alignUp(stackNeeded, NJ_ALIGN_STACK);
 
         if (amt) {
             if (isS16(-amt))
                 ADDIU(SP, SP, -amt);
@@ -1984,39 +1997,40 @@ namespace nanojit
                 asm_li(AT, -amt);
             }
         }
 
         NIns *patchEntry = _nIns; // FIXME: who uses this value and where should it point?
 
         MOVE(FP, SP);
         SW(FP, FP_OFFSET, SP);
-        SW(RA, RA_OFFSET, SP);        // No need to save for leaf functions
+        underrunProtect(2 * 4);         // code page switch could change $ra
+        SW(RA, RA_OFFSET, SP);
         ADDIU(SP, SP, -FRAMESIZE);
 
         TAG("genPrologue()");
 
         return patchEntry;
     }
 
     NIns*
     Assembler::genEpilogue(void)
     {
         /*
          * move    $sp,$fp
+         * lw      $fp,FP_OFFSET($sp)
          * lw      $ra,RA_OFFSET($sp)
-         * lw      $fp,FP_OFFSET($sp)
          * j       $ra
          * addiu   $sp,FRAMESIZE
          */
-        underrunProtect(2*4);   // j $ra; addiu $sp,FRAMESIZE
+        underrunProtect(3*4);   // lw $ra,RA_OFFSET($sp);j $ra; addiu $sp,FRAMESIZE
         ADDIU(SP, SP, FRAMESIZE);
         JR(RA);
+        LW(RA, RA_OFFSET, SP);
         LW(FP, FP_OFFSET, SP);
-        LW(RA, RA_OFFSET, SP);
         MOVE(SP, FP);
 
         TAG("genEpilogue()");
 
         return _nIns;
     }
 
     RegisterMask