Bug 560926 - Add support for arithmetic with branch on overflow (r=nnethercote+ edwsmith+ wmaddox+)
authorEdwin Smith <edwsmith@adobe.com>
Wed, 26 May 2010 14:03:14 -0400
changeset 43268 0090cce519265f90fded8aa329ce169894440c53
parent 43267 04f69ec5483b4852b3fe9188068a17d67e08876e
child 43269 d43dc29cf8b4a97694d44974bae904e234e4777a
push idunknown
push userunknown
push dateunknown
reviewersnnethercote
bugs560926
milestone1.9.3a5pre
Bug 560926 - Add support for arithmetic with branch on overflow (r=nnethercote+ edwsmith+ wmaddox+) Patch to add 'j' (branch) variants of the exit-on-overflow instructions, plus a few dependents: 32-bit: LIR_addjovi LIR_subjovi LIR_muljovi 64-bit: LIR_subq LIR_addjovq LIR_subjovq
js/src/lirasm/lirasm.cpp
js/src/lirasm/tests/addjovi.in
js/src/lirasm/tests/addjovi.out
js/src/lirasm/tests/addjovi_ovf.in
js/src/lirasm/tests/addjovi_ovf.out
js/src/lirasm/tests/muljovi.in
js/src/lirasm/tests/muljovi.out
js/src/lirasm/tests/muljovi_ovf.in
js/src/lirasm/tests/muljovi_ovf.out
js/src/lirasm/tests/subjovi.in
js/src/lirasm/tests/subjovi.out
js/src/lirasm/tests/subjovi_ovf.in
js/src/lirasm/tests/subjovi_ovf.out
js/src/nanojit/Assembler.cpp
js/src/nanojit/Assembler.h
js/src/nanojit/LIR.cpp
js/src/nanojit/LIR.h
js/src/nanojit/LIRopcode.tbl
js/src/nanojit/NativeARM.cpp
js/src/nanojit/NativePPC.cpp
js/src/nanojit/NativeSparc.cpp
js/src/nanojit/NativeX64.cpp
js/src/nanojit/Nativei386.cpp
--- a/js/src/lirasm/lirasm.cpp
+++ b/js/src/lirasm/lirasm.cpp
@@ -324,19 +324,21 @@ private:
     void need(size_t);
     LIns *ref(const string &);
     LIns *assemble_jump(bool isCond);
     LIns *assemble_load();
     LIns *assemble_call(const string &);
     LIns *assemble_ret(ReturnType rt);
     LIns *assemble_guard(bool isCond);
     LIns *assemble_guard_xov();
+    LIns *assemble_jump_jov();
     void bad(const string &msg);
     void nyi(const string &opname);
     void extract_any_label(string &lab, char lab_delim);
+    void resolve_forward_jumps(string &lab, LIns *ins);
     void endFragment();
 };
 
 // Meaning: arg 'm' of 'n' has type 'ty'.
 static int argMask(int ty, int m, int n)
 {
     // Order examples, from MSB to LSB:  
     // - 3 args: 000 | 000 | 000 | 000 | 000 | arg1| arg2| arg3| ret
@@ -740,17 +742,16 @@ FragmentAssembler::createGuardRecord(Las
 {
     GuardRecord *rec = new (mParent.mAlloc) GuardRecord;
     memset(rec, 0, sizeof(GuardRecord));
     rec->exit = exit;
     exit->addGuard(rec);
     return rec;
 }
 
-
 LIns *
 FragmentAssembler::assemble_guard(bool isCond)
 {
     GuardRecord* guard = createGuardRecord(createSideExit());
 
     LIns *ins_cond;
     if (isCond) {
         need(1);
@@ -775,16 +776,39 @@ FragmentAssembler::assemble_guard_xov()
 
     need(2);
 
     mReturnTypeBits |= RT_GUARD;
 
     return mLir->insGuardXov(mOpcode, ref(mTokens[0]), ref(mTokens[1]), guard);
 }
 
+LIns *
+FragmentAssembler::assemble_jump_jov()
+{
+    need(3);
+
+    LIns *a = ref(mTokens[0]);
+    LIns *b = ref(mTokens[1]);
+    string name = mTokens[2];
+
+    if (mLabels.find(name) != mLabels.end()) {
+        LIns *target = ref(name);
+        return mLir->insBranchJov(mOpcode, a, b, target);
+    } else {
+        LIns *ins = mLir->insBranchJov(mOpcode, a, b, NULL);
+#ifdef __SUNPRO_CC
+        mFwdJumps.insert(make_pair<const string, LIns *>(name, ins));
+#else
+        mFwdJumps.insert(make_pair(name, ins));
+#endif
+        return ins;
+    }
+}
+
 void
 FragmentAssembler::endFragment()
 {
     if (mReturnTypeBits == 0) {
         cerr << "warning: no return type in fragment '"
              << mFragName << "'" << endl;
     }
     if (mReturnTypeBits != RT_INT32 && mReturnTypeBits != RT_FLOAT &&
@@ -854,16 +878,32 @@ FragmentAssembler::extract_any_label(str
         pop_front(mTokens);  // remove punctuation
 
         if (mLabels.find(lab) != mLabels.end())
             bad("duplicate label");
     }
 }
 
 void
+FragmentAssembler::resolve_forward_jumps(string &lab, LIns *ins)
+{
+    typedef multimap<string, LIns *> mulmap;
+#ifdef __SUNPRO_CC
+    typedef mulmap::iterator ci;
+#else
+    typedef mulmap::const_iterator ci;
+#endif
+    pair<ci, ci> range = mFwdJumps.equal_range(lab);
+    for (ci i = range.first; i != range.second; ++i) {
+        i->second->setTarget(ins);
+    }
+    mFwdJumps.erase(lab);
+}
+
+void
 FragmentAssembler::assembleFragment(LirTokenStream &in, bool implicitBegin, const LirToken *firstToken)
 {
     LirToken token;
     while (true) {
         if (firstToken) {
             token = *firstToken;
             firstToken = NULL;
         } else if (!in.get(token)) {
@@ -892,27 +932,17 @@ FragmentAssembler::assembleFragment(LirT
 
         string lab;
         LIns *ins = NULL;
         extract_any_label(lab, ':');
 
         /* Save label and do any back-patching of deferred forward-jumps. */
         if (!lab.empty()) {
             ins = mLir->ins0(LIR_label);
-            typedef multimap<string, LIns *> mulmap;
-#ifdef __SUNPRO_CC
-            typedef mulmap::iterator ci;
-#else
-            typedef mulmap::const_iterator ci;
-#endif
-            pair<ci, ci> range = mFwdJumps.equal_range(lab);
-            for (ci i = range.first; i != range.second; ++i) {
-                i->second->setTarget(ins);
-            }
-            mFwdJumps.erase(lab);
+            resolve_forward_jumps(lab, ins);
             lab.clear();
         }
         extract_any_label(lab, '=');
 
         assert(!mTokens.empty());
         op = pop_front(mTokens);
         if (mParent.mOpMap.find(op) == mParent.mOpMap.end())
             bad("unknown instruction '" + op + "'");
@@ -1096,32 +1126,46 @@ FragmentAssembler::assembleFragment(LirT
             break;
 
           case LIR_addxovi:
           case LIR_subxovi:
           case LIR_mulxovi:
             ins = assemble_guard_xov();
             break;
 
+          case LIR_addjovi:
+          case LIR_subjovi:
+          case LIR_muljovi:
+          CASE64(LIR_addjovq:)
+          CASE64(LIR_subjovq:)
+            ins = assemble_jump_jov();
+            break;
+
           case LIR_calli:
           CASESF(LIR_hcalli:)
           case LIR_calld:
           CASE64(LIR_callq:)
             ins = assemble_call(op);
             break;
 
           case LIR_reti:
             ins = assemble_ret(RT_INT32);
             break;
 
           case LIR_retd:
             ins = assemble_ret(RT_FLOAT);
             break;
 
           case LIR_label:
+            ins = mLir->ins0(LIR_label);
+            if (!lab.empty()) {
+                resolve_forward_jumps(lab, ins);
+            }
+            break;
+
           case LIR_file:
           case LIR_line:
           case LIR_xtbl:
           case LIR_jtbl:
           CASE64(LIR_retq:)
             nyi(op);
             break;
 
new file mode 100644
--- /dev/null
+++ b/js/src/lirasm/tests/addjovi.in
@@ -0,0 +1,14 @@
+	ptr = allocp 8
+
+	a = immi 2147483647
+	b = immi 0
+	c = addjovi a b ovf
+	sti c ptr 0
+
+	j done
+
+ovf:	i = immi 12345678
+	sti i ptr 0
+
+done:	res = ldi ptr 0
+	reti res
new file mode 100644
--- /dev/null
+++ b/js/src/lirasm/tests/addjovi.out
@@ -0,0 +1,1 @@
+Output is: 2147483647
new file mode 100644
--- /dev/null
+++ b/js/src/lirasm/tests/addjovi_ovf.in
@@ -0,0 +1,14 @@
+	ptr = allocp 8
+
+	a = immi 2147483647
+	b = immi 1
+	c = addjovi a b ovf
+	sti c ptr 0
+
+	j done
+
+ovf:	i = immi 12345678
+	sti i ptr 0
+
+done:	res = ldi ptr 0
+	reti res
new file mode 100644
--- /dev/null
+++ b/js/src/lirasm/tests/addjovi_ovf.out
@@ -0,0 +1,1 @@
+Output is: 12345678
new file mode 100644
--- /dev/null
+++ b/js/src/lirasm/tests/muljovi.in
@@ -0,0 +1,15 @@
+	ptr = allocp 8
+
+	a = immi 65536
+	b = immi 32767
+	c = muljovi a b ovf
+	sti c ptr 0
+
+	j done
+
+ovf:	i = immi 12345678
+	sti i ptr 0
+
+done:	res = ldi ptr 0
+	reti res
+
new file mode 100644
--- /dev/null
+++ b/js/src/lirasm/tests/muljovi.out
@@ -0,0 +1,1 @@
+Output is: 2147418112
new file mode 100644
--- /dev/null
+++ b/js/src/lirasm/tests/muljovi_ovf.in
@@ -0,0 +1,14 @@
+	ptr = allocp 8
+
+	a = immi 65536
+	b = immi 32768
+	c = muljovi a b ovf
+	sti c ptr 0
+
+	j done
+
+ovf:	i = immi 12345678
+	sti i ptr 0
+
+done:	res = ldi ptr 0
+	reti res
new file mode 100644
--- /dev/null
+++ b/js/src/lirasm/tests/muljovi_ovf.out
@@ -0,0 +1,1 @@
+Output is: 12345678
new file mode 100644
--- /dev/null
+++ b/js/src/lirasm/tests/subjovi.in
@@ -0,0 +1,15 @@
+	ptr = allocp 8
+
+	a = immi -2147483647
+	b = immi 1
+	c = subjovi a b ovf
+	sti c ptr 0
+
+	j done
+
+ovf:	i = immi 12345678
+	sti i ptr 0
+
+done:	res = ldi ptr 0
+	reti res
+
new file mode 100644
--- /dev/null
+++ b/js/src/lirasm/tests/subjovi.out
@@ -0,0 +1,1 @@
+Output is: -2147483648
new file mode 100644
--- /dev/null
+++ b/js/src/lirasm/tests/subjovi_ovf.in
@@ -0,0 +1,14 @@
+	ptr = allocp 8
+
+	a = immi -2147483647
+	b = immi 2
+	c = subjovi a b ovf
+	sti c ptr 0
+
+	j done
+
+ovf:	i = immi 12345678
+	sti i ptr 0
+
+done:	res = ldi ptr 0
+	reti res
new file mode 100644
--- /dev/null
+++ b/js/src/lirasm/tests/subjovi_ovf.out
@@ -0,0 +1,1 @@
+Output is: 12345678
--- a/js/src/nanojit/Assembler.cpp
+++ b/js/src/nanojit/Assembler.cpp
@@ -1211,16 +1211,18 @@ namespace nanojit
             if ((!branchOnFalse && !cond->immI()) || (branchOnFalse && cond->immI())) {
                 // jmp never taken, not needed
             } else {
                 asm_jmp(ins, pending_lives);    // jmp always taken
             }
             return;
         }
 
+        // Changes to the logic below will likely need to be propagated to Assembler::asm_jov().
+
         countlir_jcc();
         LInsp to = ins->getTarget();
         LabelState *label = _labels.get(to);
         if (label && label->addr) {
             // Forward jump to known label.  Need to merge with label's register state.
             unionRegisterState(label->regs);
             asm_branch(branchOnFalse, cond, label->addr);
         }
@@ -1236,16 +1238,47 @@ namespace nanojit
                 // Evict all registers, most conservative approach.
                 intersectRegisterState(label->regs);
             }
             NIns *branch = asm_branch(branchOnFalse, cond, 0);
             _patches.put(branch,to);
         }
     }
 
+    void Assembler::asm_jov(LInsp ins, InsList& pending_lives)
+    {
+        // The caller is responsible for countlir_* profiling, unlike
+        // asm_jcc above.  The reason for this is that asm_jov may not be
+        // be called if the instruction is dead, and it is our convention
+        // to count such instructions anyway.
+        LOpcode op = ins->opcode();
+        LInsp to = ins->getTarget();
+        LabelState *label = _labels.get(to);
+        if (label && label->addr) {
+            // forward jump to known label.  need to merge with label's register state.
+            unionRegisterState(label->regs);
+            asm_branch_ov(op, label->addr);
+        }
+        else {
+            // back edge.
+            handleLoopCarriedExprs(pending_lives);
+            if (!label) {
+                // evict all registers, most conservative approach.
+                evictAllActiveRegs();
+                _labels.add(to, 0, _allocator);
+            }
+            else {
+                // evict all registers, most conservative approach.
+                intersectRegisterState(label->regs);
+            }
+            NIns *branch = asm_branch_ov(op, 0);
+            _patches.put(branch,to);
+        }
+    }
+
     void Assembler::asm_x(LInsp ins)
     {
         verbose_only( _thisfrag->nStaticExits++; )
         countlir_x();
         // Generate the side exit branch on the main trace.
         NIns *exit = asm_exit(ins);
         JMP(exit);
     }
@@ -1502,16 +1535,17 @@ namespace nanojit
                     ins->oprnd1()->setResultLive();
                     if (ins->isExtant()) {
                         asm_neg_not(ins);
                     }
                     break;
 
 #if defined NANOJIT_64BIT
                 case LIR_addq:
+                case LIR_subq:
                 case LIR_andq:
                 case LIR_lshq:
                 case LIR_rshuq:
                 case LIR_rshq:
                 case LIR_orq:
                 case LIR_xorq:
                     countlir_alu();
                     ins->oprnd1()->setResultLive();
@@ -1762,29 +1796,55 @@ namespace nanojit
                     break;
 
                 case LIR_x:
                     asm_x(ins);
                     break;
 
                 case LIR_addxovi:
                 case LIR_subxovi:
-                case LIR_mulxovi: {
+                case LIR_mulxovi:
                     verbose_only( _thisfrag->nStaticExits++; )
                     countlir_xcc();
                     countlir_alu();
                     ins->oprnd1()->setResultLive();
                     ins->oprnd2()->setResultLive();
                     if (ins->isExtant()) {
                         NIns* exit = asm_exit(ins); // does intersectRegisterState()
-                        asm_branch_xov(op, exit);
+                        asm_branch_ov(op, exit);
                         asm_arith(ins);
                     }
                     break;
-                }
+
+                case LIR_addjovi:
+                case LIR_subjovi:
+                case LIR_muljovi:
+                    countlir_jcc();
+                    countlir_alu();
+                    ins->oprnd1()->setResultLive();
+                    ins->oprnd2()->setResultLive();
+                    if (ins->isExtant()) {
+                        asm_jov(ins, pending_lives);
+                        asm_arith(ins);
+                    }
+                    break;
+
+#ifdef NANOJIT_64BIT
+                case LIR_addjovq:
+                case LIR_subjovq:
+                    countlir_jcc();
+                    countlir_alu();
+                    ins->oprnd1()->setResultLive();
+                    ins->oprnd2()->setResultLive();
+                    if (ins->isExtant()) {
+                        asm_jov(ins, pending_lives);
+                        asm_qbinop(ins);
+                    }
+                    break;
+#endif
 
                 case LIR_eqd:
                 case LIR_led:
                 case LIR_ltd:
                 case LIR_gtd:
                 case LIR_ged:
                     countlir_fpu();
                     ins->oprnd1()->setResultLive();
--- a/js/src/nanojit/Assembler.h
+++ b/js/src/nanojit/Assembler.h
@@ -407,16 +407,17 @@ namespace nanojit
 
             AR          _activation;
             RegAlloc    _allocator;
 
             verbose_only( void asm_inc_m32(uint32_t*); )
             void        asm_mmq(Register rd, int dd, Register rs, int ds);
             void        asm_jmp(LInsp ins, InsList& pending_lives);
             void        asm_jcc(LInsp ins, InsList& pending_lives);
+            void        asm_jov(LInsp ins, InsList& pending_lives);
             void        asm_x(LInsp ins);
             void        asm_xcc(LInsp ins);
             NIns*       asm_exit(LInsp guard);
             NIns*       asm_leave_trace(LInsp guard);
             void        asm_store32(LOpcode op, LIns *val, int d, LIns *base);
             void        asm_store64(LOpcode op, LIns *val, int d, LIns *base);
 
             // WARNING: the implementation of asm_restore() should emit fast code
@@ -453,17 +454,17 @@ namespace nanojit
 #ifdef NANOJIT_64BIT
             void        asm_q2i(LInsp ins);
             void        asm_promote(LIns *ins);
 #endif
             void        asm_nongp_copy(Register r, Register s);
             void        asm_call(LInsp);
             Register    asm_binop_rhs_reg(LInsp ins);
             NIns*       asm_branch(bool branchOnFalse, LInsp cond, NIns* targ);
-            void        asm_branch_xov(LOpcode op, NIns* targ);
+            NIns*       asm_branch_ov(LOpcode op, NIns* targ);
             void        asm_switch(LIns* ins, NIns* target);
             void        asm_jtbl(LIns* ins, NIns** table);
             void        emitJumpTable(SwitchInfo* si, NIns* target);
             void        assignSavedRegs();
             void        reserveSavedRegs();
             void        assignParamRegs();
             void        handleLoopCarriedExprs(InsList& pending_lives);
 
--- a/js/src/nanojit/LIR.cpp
+++ b/js/src/nanojit/LIR.cpp
@@ -322,16 +322,21 @@ namespace nanojit
 
     LInsp LirBufWriter::insBranch(LOpcode op, LInsp condition, LInsp toLabel)
     {
         NanoAssert((op == LIR_j && !condition) ||
                    ((op == LIR_jf || op == LIR_jt) && condition));
         return ins2(op, condition, toLabel);
     }
 
+    LInsp LirBufWriter::insBranchJov(LOpcode op, LInsp a, LInsp b, LInsp toLabel)
+    {
+        return ins3(op, a, b, toLabel);
+    }
+
     LIns* LirBufWriter::insJtbl(LIns* index, uint32_t size)
     {
         LInsJtbl* insJtbl = (LInsJtbl*) _buf->makeRoom(sizeof(LInsJtbl));
         LIns**    table   = new (_buf->_allocator) LIns*[size];
         LIns*     ins     = insJtbl->getLIns();
         VMPI_memset(table, 0, size * sizeof(LIns*));
         ins->initLInsJtbl(index, size, table);
         return ins;
@@ -890,67 +895,98 @@ namespace nanojit
                     v = invertCondGuardOpcode(v);
                     c = c->oprnd1();
                 }
             }
         }
         return out->insGuard(v, c, gr);
     }
 
-    LIns* ExprFilter::insGuardXov(LOpcode op, LInsp oprnd1, LInsp oprnd2, GuardRecord *gr)
+    // Simplify operator if possible.  Always return NULL if overflow is possible.
+
+    LIns* ExprFilter::simplifyOverflowArith(LOpcode op, LInsp *opnd1, LInsp *opnd2)
     {
+        LInsp oprnd1 = *opnd1;
+        LInsp oprnd2 = *opnd2;
+
         if (oprnd1->isImmI() && oprnd2->isImmI()) {
             int32_t c1 = oprnd1->immI();
             int32_t c2 = oprnd2->immI();
             double d = 0.0;
 
+            // The code below attempts to perform the operation while
+            // detecting overflow.  For multiplication, we may unnecessarily
+            // infer a possible overflow due to the insufficient integer
+            // range of the double type.
+
             switch (op) {
+            case LIR_addjovi:
             case LIR_addxovi:    d = double(c1) + double(c2);    break;
+            case LIR_subjovi:
             case LIR_subxovi:    d = double(c1) - double(c2);    break;
+            case LIR_muljovi:
             case LIR_mulxovi:    d = double(c1) * double(c2);    break;
-            default:            NanoAssert(0);                  break;
+            default:             NanoAssert(0);                  break;
             }
             int32_t r = int32_t(d);
             if (r == d)
                 return insImmI(r);
 
         } else if (oprnd1->isImmI() && !oprnd2->isImmI()) {
             switch (op) {
+            case LIR_addjovi:
             case LIR_addxovi:
+            case LIR_muljovi:
             case LIR_mulxovi: {
-                // move const to rhs
+                // swap operands, moving const to rhs
                 LIns* t = oprnd2;
                 oprnd2 = oprnd1;
                 oprnd1 = t;
+                // swap actual arguments in caller as well
+                *opnd1 = oprnd1;
+                *opnd2 = oprnd2;
                 break;
             }
+            case LIR_subjovi:
             case LIR_subxovi:
                 break;
             default:
                 NanoAssert(0);
             }
         }
 
         if (oprnd2->isImmI()) {
             int c = oprnd2->immI();
             if (c == 0) {
                 switch (op) {
+                case LIR_addjovi:
                 case LIR_addxovi:
+                case LIR_subjovi:
                 case LIR_subxovi:
                     return oprnd1;
+                case LIR_muljovi:
                 case LIR_mulxovi:
                     return oprnd2;
                 default:
                     ;
                 }
-            } else if (c == 1 && op == LIR_mulxovi) {
+            } else if (c == 1 && (op == LIR_muljovi || op == LIR_mulxovi)) {
                 return oprnd1;
             }
         }
 
+        return NULL;
+    }
+
+    LIns* ExprFilter::insGuardXov(LOpcode op, LInsp oprnd1, LInsp oprnd2, GuardRecord *gr)
+    {
+        LIns* simplified = simplifyOverflowArith(op, &oprnd1, &oprnd2);
+        if (simplified)
+            return simplified;
+
         return out->insGuardXov(op, oprnd1, oprnd2, gr);
     }
 
     LIns* ExprFilter::insBranch(LOpcode v, LIns *c, LIns *t)
     {
         if (v == LIR_jt || v == LIR_jf) {
             if (c->isImmI()) {
                 if ((v == LIR_jt && !c->immI()) || (v == LIR_jf && c->immI())) {
@@ -972,16 +1008,25 @@ namespace nanojit
                     v = invertCondJmpOpcode(v);
                     c = c->oprnd1();
                 }
             }
         }
         return out->insBranch(v, c, t);
     }
 
+    LIns* ExprFilter::insBranchJov(LOpcode op, LInsp oprnd1, LInsp oprnd2, LIns* target)
+    {
+        LIns* simplified = simplifyOverflowArith(op, &oprnd1, &oprnd2);
+        if (simplified)
+            return simplified;
+
+        return out->insBranchJov(op, oprnd1, oprnd2, target);
+    }
+
     LIns* ExprFilter::insLoad(LOpcode op, LIns* base, int32_t off, AccSet accSet) {
         if (base->isImmP() && !isS8(off)) {
             // if the effective address is constant, then transform:
             // ld const[bigconst] => ld (const+bigconst)[0]
             // note: we don't do this optimization for <8bit field offsets,
             // under the assumption that we're more likely to CSE-match the
             // constant base address if we dont const-fold small offsets.
             uintptr_t p = (uintptr_t)base->immP() + off;
@@ -1319,22 +1364,28 @@ namespace nanojit
                 CASE64(LIR_rshq:)
                 CASE64(LIR_rshuq:)
                 case LIR_addi:
                 case LIR_subi:
                 case LIR_muli:
                 case LIR_addxovi:
                 case LIR_subxovi:
                 case LIR_mulxovi:
+                case LIR_addjovi:
+                case LIR_subjovi:
+                case LIR_muljovi:
                 CASE86(LIR_divi:)
                 case LIR_addd:
                 case LIR_subd:
                 case LIR_muld:
                 case LIR_divd:
                 CASE64(LIR_addq:)
+                CASE64(LIR_subq:)
+                CASE64(LIR_addjovq:)
+                CASE64(LIR_subjovq:)
                 case LIR_andi:
                 case LIR_ori:
                 case LIR_xori:
                 CASE64(LIR_andq:)
                 CASE64(LIR_orq:)
                 CASE64(LIR_xorq:)
                 CASESF(LIR_ii2d:)
                 case LIR_file:
@@ -1705,30 +1756,41 @@ namespace nanojit
                 break;
 
             case LIR_addxovi:
             case LIR_subxovi:
             case LIR_mulxovi:
                 formatGuardXov(buf, i);
                 break;
 
+            case LIR_addjovi:
+            case LIR_subjovi:
+            case LIR_muljovi:
+            CASE64(LIR_addjovq:)
+            CASE64(LIR_subjovq:)
+                VMPI_snprintf(s, n, "%s = %s %s, %s ; ovf -> %s", formatRef(&b1, i), lirNames[op],
+                    formatRef(&b2, i->oprnd1()),
+                    formatRef(&b3, i->oprnd2()),
+                    i->oprnd3() ? formatRef(&b4, i->oprnd3()) : "unpatched");
+                break;
+
             case LIR_addi:       CASE64(LIR_addq:)
-            case LIR_subi:
+            case LIR_subi:       CASE64(LIR_subq:)
             case LIR_muli:
             CASE86(LIR_divi:)
             case LIR_addd:
             case LIR_subd:
             case LIR_muld:
             case LIR_divd:
             case LIR_andi:       CASE64(LIR_andq:)
             case LIR_ori:        CASE64(LIR_orq:)
             case LIR_xori:       CASE64(LIR_xorq:)
             case LIR_lshi:       CASE64(LIR_lshq:)
             case LIR_rshi:       CASE64(LIR_rshq:)
-            case LIR_rshui:       CASE64(LIR_rshuq:)
+            case LIR_rshui:      CASE64(LIR_rshuq:)
             case LIR_eqi:        CASE64(LIR_eqq:)
             case LIR_lti:        CASE64(LIR_ltq:)
             case LIR_lei:        CASE64(LIR_leq:)
             case LIR_gti:        CASE64(LIR_gtq:)
             case LIR_gei:        CASE64(LIR_geq:)
             case LIR_ltui:       CASE64(LIR_ltuq:)
             case LIR_leui:       CASE64(LIR_leuq:)
             case LIR_gtui:       CASE64(LIR_gtuq:)
@@ -2404,16 +2466,18 @@ namespace nanojit
         if (!ins) {
             ins = out->insGuardXov(op, a, b, gr);
             add(LIns2, ins, k);
         }
         NanoAssert(ins->isop(op) && ins->oprnd1() == a && ins->oprnd2() == b);
         return ins;
     }
 
+    // There is no CseFilter::insBranchJov(), as LIR_*jov* are not CSEable.
+
     LInsp CseFilter::insCall(const CallInfo *ci, LInsp args[])
     {
         LInsp ins;
         uint32_t argc = ci->count_args();
         if (ci->_isPure) {
             NanoAssert(ci->_storeAccSet == ACC_NONE);
             uint32_t k;
             ins = findCall(ci, argc, args, k);
@@ -2929,16 +2993,17 @@ namespace nanojit
             break;
 #endif
 
 #ifdef NANOJIT_64BIT
         case LIR_andq:
         case LIR_orq:
         case LIR_xorq:
         case LIR_addq:
+        case LIR_subq:
         case LIR_eqq:
         case LIR_ltq:
         case LIR_gtq:
         case LIR_leq:
         case LIR_geq:
         case LIR_ltuq:
         case LIR_gtuq:
         case LIR_leuq:
@@ -3152,16 +3217,49 @@ namespace nanojit
         // We check that target is a label in ValidateReader because it may
         // not have been set here.
 
         typeCheckArgs(op, nArgs, formals, args);
 
         return out->insBranch(op, cond, to);
     }
 
+    LIns* ValidateWriter::insBranchJov(LOpcode op, LIns* a, LIns* b, LIns* to)
+    {
+        int nArgs = 2;
+        LTy formals[2];
+        LIns* args[2] = { a, b };
+
+        switch (op) {
+        case LIR_addjovi:
+        case LIR_subjovi:
+        case LIR_muljovi:
+            formals[0] = LTy_I;
+            formals[1] = LTy_I;
+            break;
+
+#ifdef NANOJIT_64BIT
+        case LIR_addjovq:
+        case LIR_subjovq:
+            formals[0] = LTy_Q;
+            formals[1] = LTy_Q;
+            break;
+#endif
+        default:
+            NanoAssert(0);
+        }
+
+        // We check that target is a label in ValidateReader because it may
+        // not have been set here.
+
+        typeCheckArgs(op, nArgs, formals, args);
+
+        return out->insBranchJov(op, a, b, to);
+    }
+
     LIns* ValidateWriter::insAlloc(int32_t size)
     {
         return out->insAlloc(size);
     }
 
     LIns* ValidateWriter::insJtbl(LIns* index, uint32_t size)
     {
         int nArgs = 1;
@@ -3183,16 +3281,25 @@ namespace nanojit
     {
         LIns *ins = in->read();
         switch (ins->opcode()) {
         case LIR_jt:
         case LIR_jf:
         case LIR_j:
             NanoAssert(ins->getTarget() && ins->oprnd2()->isop(LIR_label));
             break;
+
+        case LIR_addjovi:
+        case LIR_subjovi:
+        case LIR_muljovi:
+        CASE64(LIR_addjovq:)
+        CASE64(LIR_subjovq:)
+            NanoAssert(ins->getTarget() && ins->oprnd3()->isop(LIR_label));
+            break;
+
         case LIR_jtbl: {
             uint32_t tableSize = ins->getTableSize();
             NanoAssert(tableSize > 0);
             for (uint32_t i = 0; i < tableSize; i++) {
                 LIns* target = ins->getTarget(i);
                 NanoAssert(target);
                 NanoAssert(target->isop(LIR_label));
             }
--- a/js/src/nanojit/LIR.h
+++ b/js/src/nanojit/LIR.h
@@ -57,49 +57,51 @@ namespace nanojit
 #ifdef NANOJIT_64BIT
 #  define PTR_SIZE(a,b)  b
 #else
 #  define PTR_SIZE(a,b)  a
 #endif
 
         // Pointer-sized synonyms.
 
-        LIR_paramp  = PTR_SIZE(LIR_parami, LIR_paramq),
+        LIR_paramp  = PTR_SIZE(LIR_parami,  LIR_paramq),
 
-        LIR_retp    = PTR_SIZE(LIR_reti,   LIR_retq),
+        LIR_retp    = PTR_SIZE(LIR_reti,    LIR_retq),
 
-        LIR_livep   = PTR_SIZE(LIR_livei,  LIR_liveq),
+        LIR_livep   = PTR_SIZE(LIR_livei,   LIR_liveq),
 
-        LIR_ldp     = PTR_SIZE(LIR_ldi,    LIR_ldq),
+        LIR_ldp     = PTR_SIZE(LIR_ldi,     LIR_ldq),
 
-        LIR_stp     = PTR_SIZE(LIR_sti,    LIR_stq),
+        LIR_stp     = PTR_SIZE(LIR_sti,     LIR_stq),
 
-        LIR_callp   = PTR_SIZE(LIR_calli,  LIR_callq),
+        LIR_callp   = PTR_SIZE(LIR_calli,   LIR_callq),
 
-        LIR_eqp     = PTR_SIZE(LIR_eqi,    LIR_eqq),
-        LIR_ltp     = PTR_SIZE(LIR_lti,    LIR_ltq),
-        LIR_gtp     = PTR_SIZE(LIR_gti,    LIR_gtq),
-        LIR_lep     = PTR_SIZE(LIR_lei,    LIR_leq),
-        LIR_gep     = PTR_SIZE(LIR_gei,    LIR_geq),
-        LIR_ltup    = PTR_SIZE(LIR_ltui,   LIR_ltuq),
-        LIR_gtup    = PTR_SIZE(LIR_gtui,   LIR_gtuq),
-        LIR_leup    = PTR_SIZE(LIR_leui,   LIR_leuq),
-        LIR_geup    = PTR_SIZE(LIR_geui,   LIR_geuq),
+        LIR_eqp     = PTR_SIZE(LIR_eqi,     LIR_eqq),
+        LIR_ltp     = PTR_SIZE(LIR_lti,     LIR_ltq),
+        LIR_gtp     = PTR_SIZE(LIR_gti,     LIR_gtq),
+        LIR_lep     = PTR_SIZE(LIR_lei,     LIR_leq),
+        LIR_gep     = PTR_SIZE(LIR_gei,     LIR_geq),
+        LIR_ltup    = PTR_SIZE(LIR_ltui,    LIR_ltuq),
+        LIR_gtup    = PTR_SIZE(LIR_gtui,    LIR_gtuq),
+        LIR_leup    = PTR_SIZE(LIR_leui,    LIR_leuq),
+        LIR_geup    = PTR_SIZE(LIR_geui,    LIR_geuq),
 
-        LIR_addp    = PTR_SIZE(LIR_addi,   LIR_addq),
-
-        LIR_andp    = PTR_SIZE(LIR_andi,   LIR_andq),
-        LIR_orp     = PTR_SIZE(LIR_ori,    LIR_orq),
-        LIR_xorp    = PTR_SIZE(LIR_xori,   LIR_xorq),
+        LIR_addp    = PTR_SIZE(LIR_addi,    LIR_addq),
+        LIR_subp    = PTR_SIZE(LIR_subi,    LIR_subq),
+        LIR_addjovp = PTR_SIZE(LIR_addjovi, LIR_addjovq),
 
-        LIR_lshp    = PTR_SIZE(LIR_lshi,   LIR_lshq),
-        LIR_rshp    = PTR_SIZE(LIR_rshi,   LIR_rshq),
-        LIR_rshup   = PTR_SIZE(LIR_rshui,  LIR_rshuq),
+        LIR_andp    = PTR_SIZE(LIR_andi,    LIR_andq),
+        LIR_orp     = PTR_SIZE(LIR_ori,     LIR_orq),
+        LIR_xorp    = PTR_SIZE(LIR_xori,    LIR_xorq),
 
-        LIR_cmovp   = PTR_SIZE(LIR_cmovi,  LIR_cmovq)
+        LIR_lshp    = PTR_SIZE(LIR_lshi,    LIR_lshq),
+        LIR_rshp    = PTR_SIZE(LIR_rshi,    LIR_rshq),
+        LIR_rshup   = PTR_SIZE(LIR_rshui,   LIR_rshuq),
+
+        LIR_cmovp   = PTR_SIZE(LIR_cmovi,   LIR_cmovq)
     };
 
     // 32-bit integer comparisons must be contiguous, as must 64-bit integer
     // comparisons and 64-bit float comparisons.
     NanoStaticAssert(LIR_eqi + 1 == LIR_lti  &&
                      LIR_eqi + 2 == LIR_gti  &&
                      LIR_eqi + 3 == LIR_lei  &&
                      LIR_eqi + 4 == LIR_gei  &&
@@ -864,16 +866,23 @@ namespace nanojit
         bool isLoad() const {
             return isLInsLd();
         }
         bool isGuard() const {
             return isop(LIR_x) || isop(LIR_xf) || isop(LIR_xt) ||
                    isop(LIR_xbarrier) || isop(LIR_xtbl) ||
                    isop(LIR_addxovi) || isop(LIR_subxovi) || isop(LIR_mulxovi);
         }
+        bool isJov() const {
+            return
+#ifdef NANOJIT_64BIT
+                isop(LIR_addjovq) || isop(LIR_subjovq) ||
+#endif
+                isop(LIR_addjovi) || isop(LIR_subjovi) || isop(LIR_muljovi);
+        }
         // True if the instruction is a 32-bit integer immediate.
         bool isImmI() const {
             return isop(LIR_immi);
         }
         // True if the instruction is a 32-bit integer immediate and
         // has the value 'val' when treated as a 32-bit signed integer.
         bool isImmI(int32_t val) const {
             return isImmI() && immI()==val;
@@ -906,17 +915,17 @@ namespace nanojit
                 isImmD();
         }
         // True if the instruction an any type of immediate.
         bool isImmAny() const {
             return isImmI() || isImmQorD();
         }
 
         bool isBranch() const {
-            return isop(LIR_jt) || isop(LIR_jf) || isop(LIR_j) || isop(LIR_jtbl);
+            return isop(LIR_jt) || isop(LIR_jf) || isop(LIR_j) || isop(LIR_jtbl) || isJov();
         }
 
         LTy retType() const {
             return retTypes[opcode()];
         }
         bool isV() const {
             return retType() == LTy_V;
         }
@@ -999,17 +1008,17 @@ namespace nanojit
         LIns*       oprnd_1;
 
         LIns        ins;
 
     public:
         LIns* getLIns() { return &ins; };
     };
 
-    // 3-operand form.  Used for conditional moves and xov guards.
+    // 3-operand form.  Used for conditional moves, jov branches, and xov guards.
     class LInsOp3
     {
     private:
         friend class LIns;
 
         LIns*       oprnd_3;
 
         LIns*       oprnd_2;
@@ -1263,23 +1272,29 @@ namespace nanojit
     }
     LIns* LIns::oprnd3() const {
         NanoAssert(isLInsOp3());
         return toLInsOp3()->oprnd_3;
     }
 
     LIns* LIns::getTarget() const {
         NanoAssert(isBranch() && !isop(LIR_jtbl));
-        return oprnd2();
+        if (isJov())
+            return oprnd3();
+        else
+            return oprnd2();
     }
 
     void LIns::setTarget(LIns* label) {
         NanoAssert(label && label->isop(LIR_label));
         NanoAssert(isBranch() && !isop(LIR_jtbl));
-        toLInsOp2()->oprnd_2 = label;
+        if (isJov())
+            toLInsOp3()->oprnd_3 = label;
+        else
+            toLInsOp2()->oprnd_2 = label;
     }
 
     LIns* LIns::getTarget(uint32_t index) const {
         NanoAssert(isop(LIR_jtbl));
         NanoAssert(index < toLInsJtbl()->size);
         return toLInsJtbl()->table[index];
     }
 
@@ -1425,19 +1440,22 @@ namespace nanojit
             return out->ins3(v, a, b, c);
         }
         virtual LInsp insGuard(LOpcode v, LIns *c, GuardRecord *gr) {
             return out->insGuard(v, c, gr);
         }
         virtual LInsp insGuardXov(LOpcode v, LIns *a, LIns* b, GuardRecord *gr) {
             return out->insGuardXov(v, a, b, gr);
         }
-        virtual LInsp insBranch(LOpcode v, LInsp condition, LInsp to) {
+        virtual LInsp insBranch(LOpcode v, LIns* condition, LIns* to) {
             return out->insBranch(v, condition, to);
         }
+        virtual LInsp insBranchJov(LOpcode v, LIns* a, LIns* b, LIns* to) {
+            return out->insBranchJov(v, a, b, to);
+        }
         // arg: 0=first, 1=second, ...
         // kind: 0=arg 1=saved-reg
         virtual LInsp insParam(int32_t arg, int32_t kind) {
             return out->insParam(arg, kind);
         }
         virtual LInsp insImmI(int32_t imm) {
             return out->insImmI(imm);
         }
@@ -1694,16 +1712,20 @@ namespace nanojit
         LIns* insGuardXov(LOpcode op, LInsp a, LInsp b, GuardRecord *gr) {
             return add_flush(out->insGuardXov(op,a,b,gr));
         }
 
         LIns* insBranch(LOpcode v, LInsp condition, LInsp to) {
             return add_flush(out->insBranch(v, condition, to));
         }
 
+        LIns* insBranchJov(LOpcode v, LInsp a, LInsp b, LInsp to) {
+            return add_flush(out->insBranchJov(v, a, b, to));
+        }
+
         LIns* insJtbl(LIns* index, uint32_t size) {
             return add_flush(out->insJtbl(index, size));
         }
 
         LIns* ins0(LOpcode v) {
             if (v == LIR_label || v == LIR_start) {
                 flush();
             }
@@ -1751,20 +1773,23 @@ namespace nanojit
 
     class ExprFilter: public LirWriter
     {
     public:
         ExprFilter(LirWriter *out) : LirWriter(out) {}
         LIns* ins1(LOpcode v, LIns* a);
         LIns* ins2(LOpcode v, LIns* a, LIns* b);
         LIns* ins3(LOpcode v, LIns* a, LIns* b, LIns* c);
-        LIns* insGuard(LOpcode, LIns *cond, GuardRecord *);
+        LIns* insGuard(LOpcode, LIns* cond, GuardRecord *);
         LIns* insGuardXov(LOpcode, LIns* a, LIns* b, GuardRecord *);
-        LIns* insBranch(LOpcode, LIns *cond, LIns *target);
+        LIns* insBranch(LOpcode, LIns* cond, LIns* target);
+        LIns* insBranchJov(LOpcode, LIns* a, LIns* b, LIns* target);
         LIns* insLoad(LOpcode op, LInsp base, int32_t off, AccSet accSet);
+    private:
+        LIns* simplifyOverflowArith(LOpcode op, LInsp *opnd1, LInsp *opnd2);
     };
 
     class CseFilter: public LirWriter
     {
         enum LInsHashKind {
             // We divide instruction kinds into groups.  LIns0 isn't present
             // because we don't need to record any 0-ary instructions.
             LInsImmI = 0,
@@ -1949,16 +1974,17 @@ namespace nanojit
 #ifdef NANOJIT_64BIT
             LInsp   insImmQ(uint64_t imm);
 #endif
             LInsp   insImmD(double d);
             LInsp   insCall(const CallInfo *call, LInsp args[]);
             LInsp   insGuard(LOpcode op, LInsp cond, GuardRecord *gr);
             LInsp   insGuardXov(LOpcode op, LInsp a, LInsp b, GuardRecord *gr);
             LInsp   insBranch(LOpcode v, LInsp condition, LInsp to);
+            LInsp   insBranchJov(LOpcode v, LInsp a, LInsp b, LInsp to);
             LInsp   insAlloc(int32_t size);
             LInsp   insJtbl(LIns* index, uint32_t size);
     };
 
     class LirFilter
     {
     public:
         LirFilter *in;
@@ -2096,16 +2122,17 @@ namespace nanojit
 #ifdef NANOJIT_64BIT
         LIns* insImmQ(uint64_t imm);
 #endif
         LIns* insImmD(double d);
         LIns* insCall(const CallInfo *call, LIns* args[]);
         LIns* insGuard(LOpcode v, LIns *c, GuardRecord *gr);
         LIns* insGuardXov(LOpcode v, LIns* a, LIns* b, GuardRecord* gr);
         LIns* insBranch(LOpcode v, LIns* condition, LIns* to);
+        LIns* insBranchJov(LOpcode v, LIns* a, LIns* b, LIns* to);
         LIns* insAlloc(int32_t size);
         LIns* insJtbl(LIns* index, uint32_t size);
     };
 
     // This just checks things that aren't possible to check in
     // ValidateWriter, eg. whether all branch targets are set and are labels.
     class ValidateReader: public LirFilter {
     public:
--- a/js/src/nanojit/LIRopcode.tbl
+++ b/js/src/nanojit/LIRopcode.tbl
@@ -265,81 +265,82 @@ OP___(andi,     86, Op2,  I,    1)  // b
 OP___(ori,      87, Op2,  I,    1)  // bitwise-OR int
 OP___(xori,     88, Op2,  I,    1)  // bitwise-XOR int
 
 OP___(lshi,     89, Op2,  I,    1)  // left shift int
 OP___(rshi,     90, Op2,  I,    1)  // right shift int (>>)
 OP___(rshui,    91, Op2,  I,    1)  // right shift unsigned int (>>>)
 
 OP_64(addq,     92, Op2,  Q,    1)  // add quad
+OP_64(subq,     93, Op2,  Q,    1)  // subtract quad
 
-OP_64(andq,     93, Op2,  Q,    1)  // bitwise-AND quad
-OP_64(orq,      94, Op2,  Q,    1)  // bitwise-OR quad
-OP_64(xorq,     95, Op2,  Q,    1)  // bitwise-XOR quad
+OP_64(andq,     94, Op2,  Q,    1)  // bitwise-AND quad
+OP_64(orq,      95, Op2,  Q,    1)  // bitwise-OR quad
+OP_64(xorq,     96, Op2,  Q,    1)  // bitwise-XOR quad
 
-OP_64(lshq,     96, Op2,  Q,    1)  // left shift quad;           2nd operand is an int
-OP_64(rshq,     97, Op2,  Q,    1)  // right shift quad;          2nd operand is an int
-OP_64(rshuq,    98, Op2,  Q,    1)  // right shift unsigned quad; 2nd operand is an int
+OP_64(lshq,     97, Op2,  Q,    1)  // left shift quad;           2nd operand is an int
+OP_64(rshq,     98, Op2,  Q,    1)  // right shift quad;          2nd operand is an int
+OP_64(rshuq,    99, Op2,  Q,    1)  // right shift unsigned quad; 2nd operand is an int
 
-OP___(negd,     99, Op1,  D,    1)  // negate double
-OP___(addd,    100, Op2,  D,    1)  // add double
-OP___(subd,    101, Op2,  D,    1)  // subtract double
-OP___(muld,    102, Op2,  D,    1)  // multiply double
-OP___(divd,    103, Op2,  D,    1)  // divide double
+OP___(negd,    100, Op1,  D,    1)  // negate double
+OP___(addd,    101, Op2,  D,    1)  // add double
+OP___(subd,    102, Op2,  D,    1)  // subtract double
+OP___(muld,    103, Op2,  D,    1)  // multiply double
+OP___(divd,    104, Op2,  D,    1)  // divide double
 // LIR_modd is just a place-holder opcode, ie. the back-ends cannot generate
 // code for it.  It's used in TraceMonkey briefly but is always demoted to a
 // LIR_modl or converted to a function call before Nanojit has to do anything
 // serious with it.
-OP___(modd,    104, Op2,  D,    1)  // modulo double
+OP___(modd,    105, Op2,  D,    1)  // modulo double
 
-OP___(cmovi,   105, Op3,  I,    1)  // conditional move int
-OP_64(cmovq,   106, Op3,  Q,    1)  // conditional move quad
+OP___(cmovi,   106, Op3,  I,    1)  // conditional move int
+OP_64(cmovq,   107, Op3,  Q,    1)  // conditional move quad
 
-OP_UN(107)
 OP_UN(108)
 
 //---------------------------------------------------------------------------
 // Conversions
 //---------------------------------------------------------------------------
 OP_64(i2q,     109, Op1,  Q,    1)  // sign-extend int to quad
 OP_64(ui2uq,   110, Op1,  Q,    1)  // zero-extend unsigned int to unsigned quad
 OP_64(q2i,     111, Op1,  I,    1)  // truncate quad to int (removes the high 32 bits)
 
 OP___(i2d,     112, Op1,  D,    1)  // convert int to double
 OP___(ui2d,    113, Op1,  D,    1)  // convert unsigned int to double
 OP___(d2i,     114, Op1,  I,    1)  // convert double to int (no exceptions raised, platform rounding rules)
 
-OP_UN(115)
-OP_UN(116)
-
 //---------------------------------------------------------------------------
 // Overflow arithmetic
 //---------------------------------------------------------------------------
-// These all exit if overflow occurred.  The results is valid on either path.
-OP___(addxovi, 117, Op3,  I,    1)  // add int and exit on overflow
-OP___(subxovi, 118, Op3,  I,    1)  // sub int and exit on overflow
-OP___(mulxovi, 119, Op3,  I,    1)  // multiply int and exit on overflow
+// These all exit if overflow occurred.  The result is valid on either path.
+OP___(addxovi, 115, Op3,  I,    1)  // add int and exit on overflow
+OP___(subxovi, 116, Op3,  I,    1)  // subtract int and exit on overflow
+OP___(mulxovi, 117, Op3,  I,    1)  // multiply int and exit on overflow
 
-OP_UN(120)
+// These all branch if overflow occurred.  The result is valid on either path.
+OP___(addjovi, 118, Op3,  I,    1)  // add int and branch on overflow
+OP___(subjovi, 119, Op3,  I,    1)  // subtract int and branch on overflow
+OP___(muljovi, 120, Op3,  I,    1)  // multiply int and branch on overflow
+
+OP_64(addjovq, 121, Op3,  Q,    1)  // add quad and branch on overflow
+OP_64(subjovq, 122, Op3,  Q,    1)  // subtract quad and branch on overflow
 
 //---------------------------------------------------------------------------
 // SoftFloat
 //---------------------------------------------------------------------------
-OP_SF(dlo2i,   121, Op1,  I,    1)  // get the low  32 bits of a double as an int
-OP_SF(dhi2i,   122, Op1,  I,    1)  // get the high 32 bits of a double as an int
-OP_SF(ii2d,    123, Op2,  D,    1)  // join two ints (1st arg is low bits, 2nd is high)
+OP_SF(dlo2i,   123, Op1,  I,    1)  // get the low  32 bits of a double as an int
+OP_SF(dhi2i,   124, Op1,  I,    1)  // get the high 32 bits of a double as an int
+OP_SF(ii2d,    125, Op2,  D,    1)  // join two ints (1st arg is low bits, 2nd is high)
 
 // LIR_hcalli is a hack that's only used on 32-bit platforms that use
 // SoftFloat.  Its operand is always a LIR_calli, but one that specifies a
 // function that returns a double.  It indicates that the double result is
 // returned via two 32-bit integer registers.  The result is always used as the
 // second operand of a LIR_ii2d.
-OP_SF(hcalli,  124, Op1,  I,    1)
+OP_SF(hcalli,  126, Op1,  I,    1)
 
-OP_UN(125)
-OP_UN(126)
 OP_UN(127)
 
 #undef OP_UN
 #undef OP_32
 #undef OP_64
 #undef OP_SF
 #undef OP_86
--- a/js/src/nanojit/NativeARM.cpp
+++ b/js/src/nanojit/NativeARM.cpp
@@ -2386,25 +2386,26 @@ Assembler::asm_branch(bool branchOnFalse
     if (_config.arm_vfp && fp_cond)
         asm_cmpd(cond);
     else
         asm_cmp(cond);
 
     return at;
 }
 
-void Assembler::asm_branch_xov(LOpcode op, NIns* target)
+NIns* Assembler::asm_branch_ov(LOpcode op, NIns* target)
 {
     // Because MUL can't set the V flag, we use SMULL and CMP to set the Z flag
     // to detect overflow on multiply. Thus, if we have a LIR_mulxovi, we must
     // be conditional on !Z, not V.
     ConditionCode cc = ( op == LIR_mulxovi ? NE : VS );
 
     // Emit a suitable branch instruction.
     B_cond(cc, target);
+    return _nIns;
 }
 
 void
 Assembler::asm_cmp(LIns *cond)
 {
     LInsp lhs = cond->oprnd1();
     LInsp rhs = cond->oprnd2();
 
--- a/js/src/nanojit/NativePPC.cpp
+++ b/js/src/nanojit/NativePPC.cpp
@@ -540,18 +540,19 @@ namespace nanojit
             asm_li64(R0, uint64_t(targ));
         } else {
             asm_li32(R0, uint32_t(uintptr_t(targ)));
         }
     #endif
         return _nIns;
     }
 
-    void Assembler::asm_branch_xov(LOpcode, NIns*) {
-        TODO(asm_branch_xov);
+    NIns* Assembler::asm_branch_ov(LOpcode, NIns*) {
+        TODO(asm_branch_ov);
+        return _nIns;
     }
 
     void Assembler::asm_cmp(LOpcode condop, LIns *a, LIns *b, ConditionRegister cr) {
         RegisterMask allow = isCmpDOpcode(condop) ? FpRegs : GpRegs;
         Register ra = findRegFor(a, allow);
 
     #if !PEDANTIC
         if (b->isImmI()) {
--- a/js/src/nanojit/NativeSparc.cpp
+++ b/js/src/nanojit/NativeSparc.cpp
@@ -530,30 +530,33 @@ namespace nanojit
                     BGU(0, tt);
                 else //if (condop == LIR_geui)
                     BCC(0, tt);
             }
         asm_cmp(cond);
         return at;
     }
 
-    void Assembler::asm_branch_xov(LOpcode, NIns* targ)
+    NIns* Assembler::asm_branch_ov(LOpcode, NIns* targ)
     {
+        NIns* at = 0;
         underrunProtect(32);
         intptr_t tt = ((intptr_t)targ - (intptr_t)_nIns + 8) >> 2;
         // !targ means that it needs patch.
         if( !(isIMM22((int32_t)tt)) || !targ ) {
             JMP_long_nocheck((intptr_t)targ);
+            at = _nIns;
             NOP();
             BA(0, 5);
             tt = 4;
         }
         NOP();
 
         BVS(0, tt);
+        return at;
     }
 
     void Assembler::asm_cmp(LIns *cond)
     {
         underrunProtect(12);
 
         LInsp lhs = cond->oprnd1();
         LInsp rhs = cond->oprnd2();
--- a/js/src/nanojit/NativeX64.cpp
+++ b/js/src/nanojit/NativeX64.cpp
@@ -695,52 +695,62 @@ namespace nanojit
     }
 
     // Binary op, integer regs, rhs is int32 constant.
     void Assembler::asm_arith_imm(LIns *ins) {
         LIns *b = ins->oprnd2();
         int32_t imm = getImm32(b);
         LOpcode op = ins->opcode();
         Register rr, ra;
-        if (op == LIR_muli || op == LIR_mulxovi) {
+        if (op == LIR_muli || op == LIR_muljovi || op == LIR_mulxovi) {
             // Special case: imul-by-imm has true 3-addr form.  So we don't
             // need the MR(rr, ra) after the IMULI.
             beginOp1Regs(ins, GpRegs, rr, ra);
             IMULI(rr, ra, imm);
             endOpRegs(ins, rr, ra);
             return;
         }
 
         beginOp1Regs(ins, GpRegs, rr, ra);
         if (isS8(imm)) {
             switch (ins->opcode()) {
             default: TODO(arith_imm8);
             case LIR_addi:
+            case LIR_addjovi:
             case LIR_addxovi:    ADDLR8(rr, imm);   break;   // XXX: bug 547125: could use LEA for LIR_addi
             case LIR_andi:       ANDLR8(rr, imm);   break;
             case LIR_ori:        ORLR8( rr, imm);   break;
             case LIR_subi:
+            case LIR_subjovi:
             case LIR_subxovi:    SUBLR8(rr, imm);   break;
             case LIR_xori:       XORLR8(rr, imm);   break;
-            case LIR_addq:       ADDQR8(rr, imm);   break;
+            case LIR_addq:
+            case LIR_addjovq:    ADDQR8(rr, imm);   break;
+            case LIR_subq:
+            case LIR_subjovq:    SUBQR8(rr, imm);   break;
             case LIR_andq:       ANDQR8(rr, imm);   break;
             case LIR_orq:        ORQR8( rr, imm);   break;
             case LIR_xorq:       XORQR8(rr, imm);   break;
             }
         } else {
             switch (ins->opcode()) {
             default: TODO(arith_imm);
             case LIR_addi:
+            case LIR_addjovi:
             case LIR_addxovi:    ADDLRI(rr, imm);   break;   // XXX: bug 547125: could use LEA for LIR_addi
             case LIR_andi:       ANDLRI(rr, imm);   break;
             case LIR_ori:        ORLRI( rr, imm);   break;
             case LIR_subi:
+            case LIR_subjovi:
             case LIR_subxovi:    SUBLRI(rr, imm);   break;
             case LIR_xori:       XORLRI(rr, imm);   break;
-            case LIR_addq:       ADDQRI(rr, imm);   break;
+            case LIR_addq:
+            case LIR_addjovq:    ADDQRI(rr, imm);   break;
+            case LIR_subq:
+            case LIR_subjovq:    SUBQRI(rr, imm);   break;
             case LIR_andq:       ANDQRI(rr, imm);   break;
             case LIR_orq:        ORQRI( rr, imm);   break;
             case LIR_xorq:       XORQRI(rr, imm);   break;
             }
         }
         if (rr != ra)
             MR(rr, ra);
 
@@ -829,27 +839,33 @@ namespace nanojit
             asm_arith_imm(ins);
             return;
         }
         beginOp2Regs(ins, GpRegs, rr, ra, rb);
         switch (ins->opcode()) {
         default:           TODO(asm_arith);
         case LIR_ori:      ORLRR(rr, rb);  break;
         case LIR_subi:
+        case LIR_subjovi:
         case LIR_subxovi:  SUBRR(rr, rb);  break;
         case LIR_addi:
+        case LIR_addjovi:
         case LIR_addxovi:  ADDRR(rr, rb);  break;  // XXX: bug 547125: could use LEA for LIR_addi
         case LIR_andi:     ANDRR(rr, rb);  break;
         case LIR_xori:     XORRR(rr, rb);  break;
         case LIR_muli:
+        case LIR_muljovi:
         case LIR_mulxovi:  IMUL(rr, rb);   break;
         case LIR_xorq:     XORQRR(rr, rb); break;
         case LIR_orq:      ORQRR(rr, rb);  break;
         case LIR_andq:     ANDQRR(rr, rb); break;
-        case LIR_addq:     ADDQRR(rr, rb); break;
+        case LIR_addq:
+        case LIR_addjovq:  ADDQRR(rr, rb); break;
+        case LIR_subq:
+        case LIR_subjovq:  SUBQRR(rr, rb); break;
         }
         if (rr != ra)
             MR(rr, ra);
 
         endOpRegs(ins, rr, ra);
     }
 
     // Binary op with fp registers.
@@ -1199,27 +1215,28 @@ namespace nanojit
                 }
             }
         }
         NIns *patch = _nIns;    // address of instruction to patch
         asm_cmp(cond);
         return patch;
     }
 
-    void Assembler::asm_branch_xov(LOpcode, NIns* target) {
+    NIns* Assembler::asm_branch_ov(LOpcode, NIns* target) {
         if (target && !isTargetWithinS32(target)) {
             setError(ConditionalBranchTooFar);
             NanoAssert(0);
         }
         // We must ensure there's room for the instr before calculating
         // the offset.  And the offset determines the opcode (8bit or 32bit).
         if (target && isTargetWithinS8(target))
             JO8(8, target);
         else
             JO( 8, target);
+        return _nIns;
     }
 
     // WARNING: this function cannot generate code that will affect the
     // condition codes prior to the generation of the test/cmp.  See
     // Nativei386.cpp:asm_cmp() for details.
     void Assembler::asm_cmp(LIns *cond) {
         LIns *b = cond->oprnd2();
         if (isImm32(b)) {
--- a/js/src/nanojit/Nativei386.cpp
+++ b/js/src/nanojit/Nativei386.cpp
@@ -1487,19 +1487,20 @@ namespace nanojit
             default:        NanoAssert(0);  break;
             }
         }
         NIns* at = _nIns;
         asm_cmp(cond);
         return at;
     }
 
-    void Assembler::asm_branch_xov(LOpcode, NIns* target)
+    NIns* Assembler::asm_branch_ov(LOpcode, NIns* target)
     {
         JO(target);
+        return _nIns;
     }
 
     void Assembler::asm_switch(LIns* ins, NIns* exit)
     {
         LIns* diff = ins->oprnd1();
         findSpecificRegFor(diff, EDX);
         JMP(exit);
     }
@@ -1708,16 +1709,17 @@ namespace nanojit
             // Nb: if the div feeds into a mod it will be handled by
             // asm_div_mod() rather than here.
             isConstRhs = false;
             rb = findRegFor(rhs, (GpRegs & ~(rmask(EAX)|rmask(EDX))));
             allow = rmask(EAX);
             evictIfActive(EDX);
             break;
         case LIR_muli:
+        case LIR_muljovi:
         case LIR_mulxovi:
             isConstRhs = false;
             if (lhs != rhs) {
                 rb = findRegFor(rhs, allow);
                 allow &= ~rmask(rb);
             }
             break;
         case LIR_lshi:
@@ -1745,20 +1747,23 @@ namespace nanojit
         Register ra = lhs->isInReg() ? lhs->getReg() : rr;
 
         if (!isConstRhs) {
             if (lhs == rhs)
                 rb = ra;
 
             switch (op) {
             case LIR_addi:
+            case LIR_addjovi:
             case LIR_addxovi:    ADD(rr, rb); break;     // XXX: bug 547125: could use LEA for LIR_addi
             case LIR_subi:
+            case LIR_subjovi:
             case LIR_subxovi:    SUB(rr, rb); break;
             case LIR_muli:
+            case LIR_muljovi:
             case LIR_mulxovi:    MUL(rr, rb); break;
             case LIR_andi:       AND(rr, rb); break;
             case LIR_ori:        OR( rr, rb); break;
             case LIR_xori:       XOR(rr, rb); break;
             case LIR_lshi:       SHL(rr, rb); break;
             case LIR_rshi:       SAR(rr, rb); break;
             case LIR_rshui:      SHR(rr, rb); break;
             case LIR_divi:
@@ -1771,18 +1776,20 @@ namespace nanojit
         } else {
             int c = rhs->immI();
             switch (op) {
             case LIR_addi:
                 // this doesn't set cc's, only use it when cc's not required.
                 LEA(rr, c, ra);
                 ra = rr; // suppress mov
                 break;
+            case LIR_addjovi:
             case LIR_addxovi:    ADDi(rr, c);    break;
             case LIR_subi:
+            case LIR_subjovi:
             case LIR_subxovi:    SUBi(rr, c);    break;
             case LIR_andi:       ANDi(rr, c);    break;
             case LIR_ori:        ORi( rr, c);    break;
             case LIR_xori:       XORi(rr, c);    break;
             case LIR_lshi:       SHLi(rr, c);    break;
             case LIR_rshi:       SARi(rr, c);    break;
             case LIR_rshui:      SHRi(rr, c);    break;
             default:            NanoAssert(0);  break;