Bug 1303178 - Make ARM ScratchRegister usage explicit. r=nbp
☠☠ backed out by 83a86d52b038 ☠ ☠
authorSean Stangl <sstangl@mozilla.com>
Wed, 17 Aug 2016 15:23:41 -0700
changeset 316095 7d4875921ecf575a0ddc6acfee68dc093c10a4c6
parent 316094 964959527d7d25947fb2214e84ab46fc71d0cb1e
child 316096 87b7e5eac522bf5e87bcce12d5bcf50665d23442
push id30759
push userphilringnalda@gmail.com
push dateSat, 01 Oct 2016 06:25:09 +0000
treeherdermozilla-central@fcc62bbf09ee [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersnbp
bugs1303178
milestone52.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1303178 - Make ARM ScratchRegister usage explicit. r=nbp
js/src/asmjs/WasmStubs.cpp
js/src/jit/arm/Assembler-arm.cpp
js/src/jit/arm/Assembler-arm.h
js/src/jit/arm/CodeGenerator-arm.cpp
js/src/jit/arm/MacroAssembler-arm-inl.h
js/src/jit/arm/MacroAssembler-arm.cpp
js/src/jit/arm/MacroAssembler-arm.h
js/src/jit/arm/MoveEmitter-arm.cpp
js/src/jit/arm/SharedIC-arm.cpp
js/src/jit/arm/SharedICHelpers-arm.h
js/src/jit/arm/Trampoline-arm.cpp
--- a/js/src/asmjs/WasmStubs.cpp
+++ b/js/src/asmjs/WasmStubs.cpp
@@ -1042,17 +1042,17 @@ wasm::GenerateInterruptStub(MacroAssembl
                             FloatRegisterSet(uint32_t(0))));
 
     // Save both the APSR and FPSCR in non-volatile registers.
     masm.as_mrs(r4);
     masm.as_vmrs(r5);
     // Save the stack pointer in a non-volatile register.
     masm.mov(sp,r6);
     // Align the stack.
-    masm.ma_and(Imm32(~7), sp, sp);
+    masm.as_bic(sp, sp, Imm8(7));
 
     // Store resumePC into the return PC stack slot.
     masm.loadWasmActivationFromSymbolicAddress(IntArgReg0);
     masm.loadPtr(Address(IntArgReg0, WasmActivation::offsetOfResumePC()), IntArgReg1);
     masm.storePtr(IntArgReg1, Address(r6, 14 * sizeof(uint32_t*)));
 
     // Save all FP registers
     static_assert(!SupportsSimd, "high lanes of SIMD registers need to be saved too.");
--- a/js/src/jit/arm/Assembler-arm.cpp
+++ b/js/src/jit/arm/Assembler-arm.cpp
@@ -1112,17 +1112,17 @@ Imm8::EncodeTwoImms(uint32_t imm)
     imm2 = ((imm >> (32 - imm2shift)) | (imm << imm2shift)) & 0xff;
     MOZ_ASSERT((imm1shift & 0x1) == 0);
     MOZ_ASSERT((imm2shift & 0x1) == 0);
     return TwoImm8mData(datastore::Imm8mData(imm1, imm1shift >> 1),
                         datastore::Imm8mData(imm2, imm2shift >> 1));
 }
 
 ALUOp
-jit::ALUNeg(ALUOp op, Register dest, Imm32* imm, Register* negDest)
+jit::ALUNeg(ALUOp op, Register dest, Register scratch, Imm32* imm, Register* negDest)
 {
     // Find an alternate ALUOp to get the job done, and use a different imm.
     *negDest = dest;
     switch (op) {
       case OpMov:
         *imm = Imm32(~imm->value);
         return OpMvn;
       case OpMvn:
@@ -1144,17 +1144,17 @@ jit::ALUNeg(ALUOp op, Register dest, Imm
         *imm = Imm32(-imm->value);
         return OpCmn;
       case OpCmn:
         *imm = Imm32(-imm->value);
         return OpCmp;
       case OpTst:
         MOZ_ASSERT(dest == InvalidReg);
         *imm = Imm32(~imm->value);
-        *negDest = ScratchRegister;
+        *negDest = scratch;
         return OpBic;
         // orr has orn on thumb2 only.
       default:
         return OpInvalid;
     }
 }
 
 bool
@@ -3416,8 +3416,13 @@ Assembler::GetPoolMaxOffset()
         char* poolMaxOffsetStr = getenv("ASM_POOL_MAX_OFFSET");
         uint32_t poolMaxOffset;
         if (poolMaxOffsetStr && sscanf(poolMaxOffsetStr, "%u", &poolMaxOffset) == 1)
             AsmPoolMaxOffset = poolMaxOffset;
         isSet = true;
     }
     return AsmPoolMaxOffset;
 }
+
+SecondScratchRegisterScope::SecondScratchRegisterScope(MacroAssembler &masm)
+  : AutoRegisterScope(masm, masm.getSecondScratchReg())
+{
+}
--- a/js/src/jit/arm/Assembler-arm.h
+++ b/js/src/jit/arm/Assembler-arm.h
@@ -50,16 +50,21 @@ static constexpr Register ScratchRegiste
 // of code thinks it has exclusive ownership of the scratch register.
 struct ScratchRegisterScope : public AutoRegisterScope
 {
     explicit ScratchRegisterScope(MacroAssembler& masm)
       : AutoRegisterScope(masm, ScratchRegister)
     { }
 };
 
+struct SecondScratchRegisterScope : public AutoRegisterScope
+{
+    explicit SecondScratchRegisterScope(MacroAssembler& masm);
+};
+
 static constexpr Register OsrFrameReg = r3;
 static constexpr Register ArgumentsRectifierReg = r8;
 static constexpr Register CallTempReg0 = r5;
 static constexpr Register CallTempReg1 = r6;
 static constexpr Register CallTempReg2 = r7;
 static constexpr Register CallTempReg3 = r8;
 static constexpr Register CallTempReg4 = r0;
 static constexpr Register CallTempReg5 = r1;
@@ -404,17 +409,17 @@ enum VFPOp {
     OpvAbs  = 0xB << 20 | 0x3 << 6,
     OpvNeg  = 0xB << 20 | 0x1 << 6 | 0x1 << 16,
     OpvSqrt = 0xB << 20 | 0x3 << 6 | 0x1 << 16,
     OpvCmp  = 0xB << 20 | 0x1 << 6 | 0x4 << 16,
     OpvCmpz  = 0xB << 20 | 0x1 << 6 | 0x5 << 16
 };
 
 // Negate the operation, AND negate the immediate that we were passed in.
-ALUOp ALUNeg(ALUOp op, Register dest, Imm32* imm, Register* negDest);
+ALUOp ALUNeg(ALUOp op, Register dest, Register scratch, Imm32* imm, Register* negDest);
 bool can_dbl(ALUOp op);
 bool condsAreSafe(ALUOp op);
 
 // If there is a variant of op that has a dest (think cmp/sub) return that
 // variant of it.
 ALUOp getDestVariant(ALUOp op);
 
 static const ValueOperand JSReturnOperand = ValueOperand(JSReturnReg_Type, JSReturnReg_Data);
--- a/js/src/jit/arm/CodeGenerator-arm.cpp
+++ b/js/src/jit/arm/CodeGenerator-arm.cpp
@@ -66,17 +66,17 @@ OutOfLineBailout::accept(CodeGeneratorAR
 void
 CodeGeneratorARM::visitTestIAndBranch(LTestIAndBranch* test)
 {
     const LAllocation* opd = test->getOperand(0);
     MBasicBlock* ifTrue = test->ifTrue();
     MBasicBlock* ifFalse = test->ifFalse();
 
     // Test the operand
-    masm.ma_cmp(ToRegister(opd), Imm32(0));
+    masm.as_cmp(ToRegister(opd), Imm8(0));
 
     if (isNextBlock(ifFalse->lir())) {
         jumpToBlock(ifTrue, Assembler::NonZero);
     } else if (isNextBlock(ifTrue->lir())) {
         jumpToBlock(ifFalse, Assembler::Zero);
     } else {
         jumpToBlock(ifFalse, Assembler::Zero);
         jumpToBlock(ifTrue);
@@ -86,39 +86,47 @@ CodeGeneratorARM::visitTestIAndBranch(LT
 void
 CodeGeneratorARM::visitCompare(LCompare* comp)
 {
     Assembler::Condition cond = JSOpToCondition(comp->mir()->compareType(), comp->jsop());
     const LAllocation* left = comp->getOperand(0);
     const LAllocation* right = comp->getOperand(1);
     const LDefinition* def = comp->getDef(0);
 
-    if (right->isConstant())
-        masm.ma_cmp(ToRegister(left), Imm32(ToInt32(right)));
-    else if (right->isRegister())
+    ScratchRegisterScope scratch(masm);
+
+    if (right->isConstant()) {
+        masm.ma_cmp(ToRegister(left), Imm32(ToInt32(right)), scratch);
+    } else if (right->isRegister()) {
         masm.ma_cmp(ToRegister(left), ToRegister(right));
-    else
-        masm.ma_cmp(ToRegister(left), Operand(ToAddress(right)));
+    } else {
+        SecondScratchRegisterScope scratch2(masm);
+        masm.ma_cmp(ToRegister(left), Operand(ToAddress(right)), scratch, scratch2);
+    }
     masm.ma_mov(Imm32(0), ToRegister(def));
     masm.ma_mov(Imm32(1), ToRegister(def), cond);
 }
 
 void
 CodeGeneratorARM::visitCompareAndBranch(LCompareAndBranch* comp)
 {
     Assembler::Condition cond = JSOpToCondition(comp->cmpMir()->compareType(), comp->jsop());
     const LAllocation* left = comp->left();
     const LAllocation* right = comp->right();
 
-    if (right->isConstant())
-        masm.ma_cmp(ToRegister(left), Imm32(ToInt32(right)));
-    else if (right->isRegister())
+    ScratchRegisterScope scratch(masm);
+
+    if (right->isConstant()) {
+        masm.ma_cmp(ToRegister(left), Imm32(ToInt32(right)), scratch);
+    } else if (right->isRegister()) {
         masm.ma_cmp(ToRegister(left), ToRegister(right));
-    else
-        masm.ma_cmp(ToRegister(left), Operand(ToAddress(right)));
+    } else {
+        SecondScratchRegisterScope scratch2(masm);
+        masm.ma_cmp(ToRegister(left), Operand(ToAddress(right)), scratch, scratch2);
+    }
     emitBranch(cond, comp->ifTrue(), comp->ifFalse());
 }
 
 bool
 CodeGeneratorARM::generateOutOfLineCode()
 {
     if (!CodeGeneratorShared::generateOutOfLineCode())
         return false;
@@ -276,18 +284,20 @@ CodeGeneratorARM::visitSqrtF(LSqrtF* ins
 
 void
 CodeGeneratorARM::visitAddI(LAddI* ins)
 {
     const LAllocation* lhs = ins->getOperand(0);
     const LAllocation* rhs = ins->getOperand(1);
     const LDefinition* dest = ins->getDef(0);
 
+    ScratchRegisterScope scratch(masm);
+
     if (rhs->isConstant())
-        masm.ma_add(ToRegister(lhs), Imm32(ToInt32(rhs)), ToRegister(dest), SetCC);
+        masm.ma_add(ToRegister(lhs), Imm32(ToInt32(rhs)), ToRegister(dest), scratch, SetCC);
     else if (rhs->isRegister())
         masm.ma_add(ToRegister(lhs), ToRegister(rhs), ToRegister(dest), SetCC);
     else
         masm.ma_add(ToRegister(lhs), Operand(ToAddress(rhs)), ToRegister(dest), SetCC);
 
     if (ins->snapshot())
         bailoutIf(Assembler::Overflow, ins->snapshot());
 }
@@ -310,18 +320,20 @@ CodeGeneratorARM::visitAddI64(LAddI64* l
 
 void
 CodeGeneratorARM::visitSubI(LSubI* ins)
 {
     const LAllocation* lhs = ins->getOperand(0);
     const LAllocation* rhs = ins->getOperand(1);
     const LDefinition* dest = ins->getDef(0);
 
+    ScratchRegisterScope scratch(masm);
+
     if (rhs->isConstant())
-        masm.ma_sub(ToRegister(lhs), Imm32(ToInt32(rhs)), ToRegister(dest), SetCC);
+        masm.ma_sub(ToRegister(lhs), Imm32(ToInt32(rhs)), ToRegister(dest), scratch, SetCC);
     else if (rhs->isRegister())
         masm.ma_sub(ToRegister(lhs), ToRegister(rhs), ToRegister(dest), SetCC);
     else
         masm.ma_sub(ToRegister(lhs), Operand(ToAddress(rhs)), ToRegister(dest), SetCC);
 
     if (ins->snapshot())
         bailoutIf(Assembler::Overflow, ins->snapshot());
 }
@@ -353,23 +365,23 @@ CodeGeneratorARM::visitMulI(LMulI* ins)
 
     if (rhs->isConstant()) {
         // Bailout when this condition is met.
         Assembler::Condition c = Assembler::Overflow;
         // Bailout on -0.0
         int32_t constant = ToInt32(rhs);
         if (mul->canBeNegativeZero() && constant <= 0) {
             Assembler::Condition bailoutCond = (constant == 0) ? Assembler::LessThan : Assembler::Equal;
-            masm.ma_cmp(ToRegister(lhs), Imm32(0));
+            masm.as_cmp(ToRegister(lhs), Imm8(0));
             bailoutIf(bailoutCond, ins->snapshot());
         }
         // TODO: move these to ma_mul.
         switch (constant) {
           case -1:
-            masm.ma_rsb(ToRegister(lhs), Imm32(0), ToRegister(dest), SetCC);
+            masm.as_rsb(ToRegister(dest), ToRegister(lhs), Imm8(0), SetCC);
             break;
           case 0:
             masm.ma_mov(Imm32(0), ToRegister(dest));
             return; // Escape overflow check;
           case 1:
             // Nop
             masm.ma_mov(ToRegister(lhs), ToRegister(dest));
             return; // Escape overflow check;
@@ -417,41 +429,44 @@ CodeGeneratorARM::visitMulI(LMulI* ins)
                         masm.as_cmp(ToRegister(lhs), asr(ToRegister(dest), shift));
                         c = Assembler::NotEqual;
                         handled = true;
                     }
                 }
             }
 
             if (!handled) {
+                ScratchRegisterScope scratch(masm);
                 if (mul->canOverflow())
-                    c = masm.ma_check_mul(ToRegister(lhs), Imm32(ToInt32(rhs)), ToRegister(dest), c);
+                    c = masm.ma_check_mul(ToRegister(lhs), Imm32(ToInt32(rhs)), ToRegister(dest), scratch, c);
                 else
-                    masm.ma_mul(ToRegister(lhs), Imm32(ToInt32(rhs)), ToRegister(dest));
+                    masm.ma_mul(ToRegister(lhs), Imm32(ToInt32(rhs)), ToRegister(dest), scratch);
             }
           }
         }
         // Bailout on overflow.
         if (mul->canOverflow())
             bailoutIf(c, ins->snapshot());
     } else {
         Assembler::Condition c = Assembler::Overflow;
 
-        if (mul->canOverflow())
-            c = masm.ma_check_mul(ToRegister(lhs), ToRegister(rhs), ToRegister(dest), c);
-        else
+        if (mul->canOverflow()) {
+            ScratchRegisterScope scratch(masm);
+            c = masm.ma_check_mul(ToRegister(lhs), ToRegister(rhs), ToRegister(dest), scratch, c);
+        } else {
             masm.ma_mul(ToRegister(lhs), ToRegister(rhs), ToRegister(dest));
+        }
 
         // Bailout on overflow.
         if (mul->canOverflow())
             bailoutIf(c, ins->snapshot());
 
         if (mul->canBeNegativeZero()) {
             Label done;
-            masm.ma_cmp(ToRegister(dest), Imm32(0));
+            masm.as_cmp(ToRegister(dest), Imm8(0));
             masm.ma_b(&done, Assembler::NotEqual);
 
             // Result is -0 if lhs or rhs is negative.
             masm.ma_cmn(ToRegister(lhs), ToRegister(rhs));
             bailoutIf(Assembler::Signed, ins->snapshot());
 
             masm.bind(&done);
         }
@@ -498,24 +513,26 @@ CodeGeneratorARM::visitMulI64(LMulI64* l
         masm.mul64(ToOperandOrRegister64(rhs), ToRegister64(lhs), temp);
     }
 }
 
 void
 CodeGeneratorARM::divICommon(MDiv* mir, Register lhs, Register rhs, Register output,
                              LSnapshot* snapshot, Label& done)
 {
+    ScratchRegisterScope scratch(masm);
+
     if (mir->canBeNegativeOverflow()) {
         // Handle INT32_MIN / -1;
         // The integer division will give INT32_MIN, but we want -(double)INT32_MIN.
 
         // Sets EQ if lhs == INT32_MIN.
-        masm.ma_cmp(lhs, Imm32(INT32_MIN));
+        masm.ma_cmp(lhs, Imm32(INT32_MIN), scratch);
         // If EQ (LHS == INT32_MIN), sets EQ if rhs == -1.
-        masm.ma_cmp(rhs, Imm32(-1), Assembler::Equal);
+        masm.ma_cmp(rhs, Imm32(-1), scratch, Assembler::Equal);
         if (mir->canTruncateOverflow()) {
             if (mir->trapOnError()) {
                 masm.ma_b(wasm::JumpTarget::IntegerOverflow, Assembler::Equal);
             } else {
                 // (-INT32_MIN)|0 = INT32_MIN
                 Label skip;
                 masm.ma_b(&skip, Assembler::NotEqual);
                 masm.ma_mov(Imm32(INT32_MIN), output);
@@ -525,17 +542,17 @@ CodeGeneratorARM::divICommon(MDiv* mir, 
         } else {
             MOZ_ASSERT(mir->fallible());
             bailoutIf(Assembler::Equal, snapshot);
         }
     }
 
     // Handle divide by zero.
     if (mir->canBeDivideByZero()) {
-        masm.ma_cmp(rhs, Imm32(0));
+        masm.as_cmp(rhs, Imm8(0));
         if (mir->canTruncateInfinities()) {
             if (mir->trapOnError()) {
                 masm.ma_b(wasm::JumpTarget::IntegerDivideByZero, Assembler::Equal);
             } else {
                 // Infinity|0 == 0
                 Label skip;
                 masm.ma_b(&skip, Assembler::NotEqual);
                 masm.ma_mov(Imm32(0), output);
@@ -546,19 +563,19 @@ CodeGeneratorARM::divICommon(MDiv* mir, 
             MOZ_ASSERT(mir->fallible());
             bailoutIf(Assembler::Equal, snapshot);
         }
     }
 
     // Handle negative 0.
     if (!mir->canTruncateNegativeZero() && mir->canBeNegativeZero()) {
         Label nonzero;
-        masm.ma_cmp(lhs, Imm32(0));
+        masm.as_cmp(lhs, Imm8(0));
         masm.ma_b(&nonzero, Assembler::NotEqual);
-        masm.ma_cmp(rhs, Imm32(0));
+        masm.as_cmp(rhs, Imm8(0));
         MOZ_ASSERT(mir->fallible());
         bailoutIf(Assembler::LessThan, snapshot);
         masm.bind(&nonzero);
     }
 }
 
 void
 CodeGeneratorARM::visitDivI(LDivI* ins)
@@ -610,17 +627,17 @@ CodeGeneratorARM::visitSoftDivI(LSoftDiv
     if (gen->compilingAsmJS())
         masm.callWithABI(wasm::SymbolicAddress::aeabi_idivmod);
     else
         masm.callWithABI(JS_FUNC_TO_DATA_PTR(void*, __aeabi_idivmod));
 
     // idivmod returns the quotient in r0, and the remainder in r1.
     if (!mir->canTruncateRemainder()) {
         MOZ_ASSERT(mir->fallible());
-        masm.ma_cmp(r1, Imm32(0));
+        masm.as_cmp(r1, Imm8(0));
         bailoutIf(Assembler::NonZero, ins->snapshot());
     }
 
     masm.bind(&done);
 }
 
 void
 CodeGeneratorARM::visitDivPowTwoI(LDivPowTwoI* ins)
@@ -682,18 +699,18 @@ CodeGeneratorARM::modICommon(MMod* mir, 
     // There are three cases: (Y < 0), (Y == 0) and (Y > 0).
     // If (Y < 0), then we compare X with 0, and bail if X == 0.
     // If (Y == 0), then we simply want to bail. Since this does not set the
     // flags necessary for LT to trigger, we don't test X, and take the bailout
     // because the EQ flag is set.
     // If (Y > 0), we don't set EQ, and we don't trigger LT, so we don't take
     // the bailout.
     if (mir->canBeDivideByZero() || mir->canBeNegativeDividend()) {
-        masm.ma_cmp(rhs, Imm32(0));
-        masm.ma_cmp(lhs, Imm32(0), Assembler::LessThan);
+        masm.as_cmp(rhs, Imm8(0));
+        masm.as_cmp(lhs, Imm8(0), Assembler::LessThan);
         if (mir->isTruncated()) {
             if (mir->trapOnError()) {
                 masm.ma_b(wasm::JumpTarget::IntegerDivideByZero, Assembler::Equal);
             } else {
                 // NaN|0 == 0 and (0 % -X)|0 == 0
                 Label skip;
                 masm.ma_b(&skip, Assembler::NotEqual);
                 masm.ma_mov(Imm32(0), output);
@@ -717,28 +734,31 @@ CodeGeneratorARM::visitModI(LModI* ins)
     MMod* mir = ins->mir();
 
     // Save the lhs in case we end up with a 0 that should be a -0.0 because lhs < 0.
     masm.ma_mov(lhs, callTemp);
 
     Label done;
     modICommon(mir, lhs, rhs, output, ins->snapshot(), done);
 
-    masm.ma_smod(lhs, rhs, output);
+    {
+        ScratchRegisterScope scratch(masm);
+        masm.ma_smod(lhs, rhs, output, scratch);
+    }
 
     // If X%Y == 0 and X < 0, then we *actually* wanted to return -0.0.
     if (mir->canBeNegativeDividend()) {
         if (mir->isTruncated()) {
             // -0.0|0 == 0
         } else {
             MOZ_ASSERT(mir->fallible());
             // See if X < 0
-            masm.ma_cmp(output, Imm32(0));
+            masm.as_cmp(output, Imm8(0));
             masm.ma_b(&done, Assembler::NotEqual);
-            masm.ma_cmp(callTemp, Imm32(0));
+            masm.as_cmp(callTemp, Imm8(0));
             bailoutIf(Assembler::Signed, ins->snapshot());
         }
     }
 
     masm.bind(&done);
 }
 
 void
@@ -751,23 +771,27 @@ CodeGeneratorARM::visitSoftModI(LSoftMod
     Register callTemp = ToRegister(ins->callTemp());
     MMod* mir = ins->mir();
     Label done;
 
     // Save the lhs in case we end up with a 0 that should be a -0.0 because lhs < 0.
     MOZ_ASSERT(callTemp.code() > r3.code() && callTemp.code() < r12.code());
     masm.ma_mov(lhs, callTemp);
 
+
     // Prevent INT_MIN % -1;
     // The integer division will give INT_MIN, but we want -(double)INT_MIN.
     if (mir->canBeNegativeDividend()) {
-        // Sets EQ if lhs == INT_MIN
-        masm.ma_cmp(lhs, Imm32(INT_MIN));
-        // If EQ (LHS == INT_MIN), sets EQ if rhs == -1
-        masm.ma_cmp(rhs, Imm32(-1), Assembler::Equal);
+        {
+            ScratchRegisterScope scratch(masm);
+            // Sets EQ if lhs == INT_MIN
+            masm.ma_cmp(lhs, Imm32(INT_MIN), scratch);
+            // If EQ (LHS == INT_MIN), sets EQ if rhs == -1
+            masm.ma_cmp(rhs, Imm32(-1), scratch, Assembler::Equal);
+        }
         if (mir->isTruncated()) {
             // (INT_MIN % -1)|0 == 0
             Label skip;
             masm.ma_b(&skip, Assembler::NotEqual);
             masm.ma_mov(Imm32(0), output);
             masm.ma_b(&done);
             masm.bind(&skip);
         } else {
@@ -788,39 +812,43 @@ CodeGeneratorARM::visitSoftModI(LSoftMod
 
     // If X%Y == 0 and X < 0, then we *actually* wanted to return -0.0
     if (mir->canBeNegativeDividend()) {
         if (mir->isTruncated()) {
             // -0.0|0 == 0
         } else {
             MOZ_ASSERT(mir->fallible());
             // See if X < 0
-            masm.ma_cmp(r1, Imm32(0));
+            masm.as_cmp(r1, Imm8(0));
             masm.ma_b(&done, Assembler::NotEqual);
-            masm.ma_cmp(callTemp, Imm32(0));
+            masm.as_cmp(callTemp, Imm8(0));
             bailoutIf(Assembler::Signed, ins->snapshot());
         }
     }
     masm.bind(&done);
 }
 
 void
 CodeGeneratorARM::visitModPowTwoI(LModPowTwoI* ins)
 {
     Register in = ToRegister(ins->getOperand(0));
     Register out = ToRegister(ins->getDef(0));
     MMod* mir = ins->mir();
     Label fin;
     // bug 739870, jbramley has a different sequence that may help with speed
     // here.
+
     masm.ma_mov(in, out, SetCC);
     masm.ma_b(&fin, Assembler::Zero);
-    masm.ma_rsb(Imm32(0), out, LeaveCC, Assembler::Signed);
-    masm.ma_and(Imm32((1 << ins->shift()) - 1), out);
-    masm.ma_rsb(Imm32(0), out, SetCC, Assembler::Signed);
+    masm.as_rsb(out, out, Imm8(0), LeaveCC, Assembler::Signed);
+    {
+        ScratchRegisterScope scratch(masm);
+        masm.ma_and(Imm32((1 << ins->shift()) - 1), out, scratch);
+    }
+    masm.as_rsb(out, out, Imm8(0), SetCC, Assembler::Signed);
     if (mir->canBeNegativeDividend()) {
         if (!mir->isTruncated()) {
             MOZ_ASSERT(mir->fallible());
             bailoutIf(Assembler::Zero, ins->snapshot());
         } else {
             // -0|0 == 0
         }
     }
@@ -830,17 +858,22 @@ CodeGeneratorARM::visitModPowTwoI(LModPo
 void
 CodeGeneratorARM::visitModMaskI(LModMaskI* ins)
 {
     Register src = ToRegister(ins->getOperand(0));
     Register dest = ToRegister(ins->getDef(0));
     Register tmp1 = ToRegister(ins->getTemp(0));
     Register tmp2 = ToRegister(ins->getTemp(1));
     MMod* mir = ins->mir();
-    masm.ma_mod_mask(src, dest, tmp1, tmp2, ins->shift());
+
+    ScratchRegisterScope scratch(masm);
+    SecondScratchRegisterScope scratch2(masm);
+
+    masm.ma_mod_mask(src, dest, tmp1, tmp2, scratch, scratch2, ins->shift());
+
     if (mir->canBeNegativeDividend()) {
         if (!mir->isTruncated()) {
             MOZ_ASSERT(mir->fallible());
             bailoutIf(Assembler::Zero, ins->snapshot());
         } else {
             // -0|0 == 0
         }
     }
@@ -859,33 +892,36 @@ CodeGeneratorARM::visitBitNotI(LBitNotI*
 }
 
 void
 CodeGeneratorARM::visitBitOpI(LBitOpI* ins)
 {
     const LAllocation* lhs = ins->getOperand(0);
     const LAllocation* rhs = ins->getOperand(1);
     const LDefinition* dest = ins->getDef(0);
+
+    ScratchRegisterScope scratch(masm);
+
     // All of these bitops should be either imm32's, or integer registers.
     switch (ins->bitop()) {
       case JSOP_BITOR:
         if (rhs->isConstant())
-            masm.ma_orr(Imm32(ToInt32(rhs)), ToRegister(lhs), ToRegister(dest));
+            masm.ma_orr(Imm32(ToInt32(rhs)), ToRegister(lhs), ToRegister(dest), scratch);
         else
             masm.ma_orr(ToRegister(rhs), ToRegister(lhs), ToRegister(dest));
         break;
       case JSOP_BITXOR:
         if (rhs->isConstant())
-            masm.ma_eor(Imm32(ToInt32(rhs)), ToRegister(lhs), ToRegister(dest));
+            masm.ma_eor(Imm32(ToInt32(rhs)), ToRegister(lhs), ToRegister(dest), scratch);
         else
             masm.ma_eor(ToRegister(rhs), ToRegister(lhs), ToRegister(dest));
         break;
       case JSOP_BITAND:
         if (rhs->isConstant())
-            masm.ma_and(Imm32(ToInt32(rhs)), ToRegister(lhs), ToRegister(dest));
+            masm.ma_and(Imm32(ToInt32(rhs)), ToRegister(lhs), ToRegister(dest), scratch);
         else
             masm.ma_and(ToRegister(rhs), ToRegister(lhs), ToRegister(dest));
         break;
       default:
         MOZ_CRASH("unexpected binary opcode");
     }
 }
 
@@ -913,42 +949,42 @@ CodeGeneratorARM::visitShiftI(LShiftI* i
             break;
           case JSOP_URSH:
             if (shift) {
                 masm.ma_lsr(Imm32(shift), lhs, dest);
             } else {
                 // x >>> 0 can overflow.
                 masm.ma_mov(lhs, dest);
                 if (ins->mir()->toUrsh()->fallible()) {
-                    masm.ma_cmp(dest, Imm32(0));
+                    masm.as_cmp(dest, Imm8(0));
                     bailoutIf(Assembler::LessThan, ins->snapshot());
                 }
             }
             break;
           default:
             MOZ_CRASH("Unexpected shift op");
         }
     } else {
         // The shift amounts should be AND'ed into the 0-31 range since arm
         // shifts by the lower byte of the register (it will attempt to shift by
         // 250 if you ask it to).
-        masm.ma_and(Imm32(0x1F), ToRegister(rhs), dest);
+        masm.as_and(dest, ToRegister(rhs), Imm8(0x1F));
 
         switch (ins->bitop()) {
           case JSOP_LSH:
             masm.ma_lsl(dest, lhs, dest);
             break;
           case JSOP_RSH:
             masm.ma_asr(dest, lhs, dest);
             break;
           case JSOP_URSH:
             masm.ma_lsr(dest, lhs, dest);
             if (ins->mir()->toUrsh()->fallible()) {
                 // x >>> 0 can overflow.
-                masm.ma_cmp(dest, Imm32(0));
+                masm.as_cmp(dest, Imm8(0));
                 bailoutIf(Assembler::LessThan, ins->snapshot());
             }
             break;
           default:
             MOZ_CRASH("Unexpected shift op");
         }
     }
 }
@@ -964,17 +1000,17 @@ CodeGeneratorARM::visitUrshD(LUrshD* ins
 
     if (rhs->isConstant()) {
         int32_t shift = ToInt32(rhs) & 0x1F;
         if (shift)
             masm.ma_lsr(Imm32(shift), lhs, temp);
         else
             masm.ma_mov(lhs, temp);
     } else {
-        masm.ma_and(Imm32(0x1F), ToRegister(rhs), temp);
+        masm.as_and(temp, ToRegister(rhs), Imm8(0x1F));
         masm.ma_lsr(temp, lhs, temp);
     }
 
     masm.convertUInt32ToDouble(temp, out);
 }
 
 void
 CodeGeneratorARM::visitClzI(LClzI* ins)
@@ -1114,20 +1150,22 @@ CodeGeneratorARM::emitTableSwitchDispatc
     // current instruction *PLUS 8*. This means that ldr foo, [pc, +0] reads
     // $pc+8. In other words, there is an empty word after the branch into the
     // switch table before the table actually starts. Since the only other
     // unhandled case is the default case (both out of range high and out of
     // range low) I then insert a branch to default case into the extra slot,
     // which ensures we don't attempt to execute the address table.
     Label* defaultcase = skipTrivialBlocks(mir->getDefault())->lir()->label();
 
+    ScratchRegisterScope scratch(masm);
+
     int32_t cases = mir->numCases();
     // Lower value with low value.
-    masm.ma_sub(index, Imm32(mir->low()), index, SetCC);
-    masm.ma_rsb(index, Imm32(cases - 1), index, SetCC, Assembler::NotSigned);
+    masm.ma_sub(index, Imm32(mir->low()), index, scratch, SetCC);
+    masm.ma_rsb(index, Imm32(cases - 1), index, scratch, SetCC, Assembler::NotSigned);
     // Inhibit pools within the following sequence because we are indexing into
     // a pc relative table. The region will have one instruction for ma_ldr, one
     // for ma_b, and each table case takes one word.
     AutoForbidPools afp(&masm, 1 + 1 + cases);
     masm.ma_ldr(DTRAddr(pc, DtrRegImmShift(index, LSL, 2)), pc, Offset, Assembler::NotSigned);
     masm.ma_b(defaultcase);
 
     // To fill in the CodeLabels for the case entries, we need to first generate
@@ -1257,21 +1295,22 @@ CodeGeneratorARM::visitRoundF(LRoundF* l
     masm.roundf(input, output, &bail, tmp);
     bailoutFrom(&bail, lir->snapshot());
 }
 
 void
 CodeGeneratorARM::emitRoundDouble(FloatRegister src, Register dest, Label* fail)
 {
     ScratchDoubleScope scratch(masm);
+    ScratchRegisterScope scratchReg(masm);
 
     masm.ma_vcvt_F64_I32(src, scratch);
     masm.ma_vxfer(scratch, dest);
-    masm.ma_cmp(dest, Imm32(0x7fffffff));
-    masm.ma_cmp(dest, Imm32(0x80000000), Assembler::NotEqual);
+    masm.ma_cmp(dest, Imm32(0x7fffffff), scratchReg);
+    masm.ma_cmp(dest, Imm32(0x80000000), scratchReg, Assembler::NotEqual);
     masm.ma_b(fail, Assembler::Equal);
 }
 
 void
 CodeGeneratorARM::visitTruncateDToInt32(LTruncateDToInt32* ins)
 {
     emitTruncateDouble(ToFloatRegister(ins->input()), ToRegister(ins->output()), ins->mir());
 }
@@ -1375,18 +1414,20 @@ CodeGeneratorARM::visitBoxFloatingPoint(
 void
 CodeGeneratorARM::visitUnbox(LUnbox* unbox)
 {
     // Note that for unbox, the type and payload indexes are switched on the
     // inputs.
     MUnbox* mir = unbox->mir();
     Register type = ToRegister(unbox->type());
 
+    ScratchRegisterScope scratch(masm);
+
     if (mir->fallible()) {
-        masm.ma_cmp(type, Imm32(MIRTypeToTag(mir->type())));
+        masm.ma_cmp(type, Imm32(MIRTypeToTag(mir->type())), scratch);
         bailoutIf(Assembler::NotEqual, unbox->snapshot());
     }
 }
 
 void
 CodeGeneratorARM::visitDouble(LDouble* ins)
 {
     const LDefinition* out = ins->getDef(0);
@@ -1578,18 +1619,19 @@ CodeGeneratorARM::visitCompareBitwiseAnd
     jumpToBlock(notEqual, Assembler::NotEqual);
     masm.cmp32(lhs.payloadReg(), rhs.payloadReg());
     emitBranch(cond, lir->ifTrue(), lir->ifFalse());
 }
 
 void
 CodeGeneratorARM::visitBitAndAndBranch(LBitAndAndBranch* baab)
 {
+    ScratchRegisterScope scratch(masm);
     if (baab->right()->isConstant())
-        masm.ma_tst(ToRegister(baab->left()), Imm32(ToInt32(baab->right())));
+        masm.ma_tst(ToRegister(baab->left()), Imm32(ToInt32(baab->right())), scratch);
     else
         masm.ma_tst(ToRegister(baab->left()), ToRegister(baab->right()));
     emitBranch(Assembler::NonZero, baab->ifTrue(), baab->ifFalse());
 }
 
 void
 CodeGeneratorARM::visitAsmJSUInt32ToDouble(LAsmJSUInt32ToDouble* lir)
 {
@@ -1601,28 +1643,28 @@ CodeGeneratorARM::visitAsmJSUInt32ToFloa
 {
     masm.convertUInt32ToFloat32(ToRegister(lir->input()), ToFloatRegister(lir->output()));
 }
 
 void
 CodeGeneratorARM::visitNotI(LNotI* ins)
 {
     // It is hard to optimize !x, so just do it the basic way for now.
-    masm.ma_cmp(ToRegister(ins->input()), Imm32(0));
+    masm.as_cmp(ToRegister(ins->input()), Imm8(0));
     masm.emitSet(Assembler::Equal, ToRegister(ins->output()));
 }
 
 void
 CodeGeneratorARM::visitNotI64(LNotI64* lir)
 {
     Register64 input = ToRegister64(lir->getInt64Operand(0));
     Register output = ToRegister(lir->output());
 
     masm.ma_orr(input.low, input.high, output);
-    masm.ma_cmp(output, Imm32(0));
+    masm.as_cmp(output, Imm8(0));
     masm.emitSet(Assembler::Equal, output);
 }
 
 void
 CodeGeneratorARM::visitNotD(LNotD* ins)
 {
     // Since this operation is not, we want to set a bit if the double is
     // falsey, which means 0.0, -0.0 or NaN. When comparing with 0, an input of
@@ -1635,17 +1677,17 @@ CodeGeneratorARM::visitNotD(LNotD* ins)
     // TODO There are three variations here to compare performance-wise.
     bool nocond = true;
     if (nocond) {
         // Load the value into the dest register.
         masm.as_vmrs(dest);
         masm.ma_lsr(Imm32(28), dest, dest);
         // 28 + 2 = 30
         masm.ma_alu(dest, lsr(dest, 2), dest, OpOrr);
-        masm.ma_and(Imm32(1), dest);
+        masm.as_and(dest, dest, Imm8(1));
     } else {
         masm.as_vmrs(pc);
         masm.ma_mov(Imm32(0), dest);
         masm.ma_mov(Imm32(1), dest, Assembler::Equal);
         masm.ma_mov(Imm32(1), dest, Assembler::Overflow);
     }
 }
 
@@ -1663,60 +1705,64 @@ CodeGeneratorARM::visitNotF(LNotF* ins)
     // TODO There are three variations here to compare performance-wise.
     bool nocond = true;
     if (nocond) {
         // Load the value into the dest register.
         masm.as_vmrs(dest);
         masm.ma_lsr(Imm32(28), dest, dest);
         // 28 + 2 = 30
         masm.ma_alu(dest, lsr(dest, 2), dest, OpOrr);
-        masm.ma_and(Imm32(1), dest);
+        masm.as_and(dest, dest, Imm8(1));
     } else {
         masm.as_vmrs(pc);
         masm.ma_mov(Imm32(0), dest);
         masm.ma_mov(Imm32(1), dest, Assembler::Equal);
         masm.ma_mov(Imm32(1), dest, Assembler::Overflow);
     }
 }
 
 void
 CodeGeneratorARM::visitGuardShape(LGuardShape* guard)
 {
     Register obj = ToRegister(guard->input());
     Register tmp = ToRegister(guard->tempInt());
 
+    ScratchRegisterScope scratch(masm);
     masm.ma_ldr(DTRAddr(obj, DtrOffImm(ShapedObject::offsetOfShape())), tmp);
-    masm.ma_cmp(tmp, ImmGCPtr(guard->mir()->shape()));
+    masm.ma_cmp(tmp, ImmGCPtr(guard->mir()->shape()), scratch);
 
     bailoutIf(Assembler::NotEqual, guard->snapshot());
 }
 
 void
 CodeGeneratorARM::visitGuardObjectGroup(LGuardObjectGroup* guard)
 {
     Register obj = ToRegister(guard->input());
     Register tmp = ToRegister(guard->tempInt());
     MOZ_ASSERT(obj != tmp);
 
+    ScratchRegisterScope scratch(masm);
     masm.ma_ldr(DTRAddr(obj, DtrOffImm(JSObject::offsetOfGroup())), tmp);
-    masm.ma_cmp(tmp, ImmGCPtr(guard->mir()->group()));
+    masm.ma_cmp(tmp, ImmGCPtr(guard->mir()->group()), scratch);
 
     Assembler::Condition cond =
         guard->mir()->bailOnEquality() ? Assembler::Equal : Assembler::NotEqual;
     bailoutIf(cond, guard->snapshot());
 }
 
 void
 CodeGeneratorARM::visitGuardClass(LGuardClass* guard)
 {
     Register obj = ToRegister(guard->input());
     Register tmp = ToRegister(guard->tempInt());
 
+    ScratchRegisterScope scratch(masm);
+
     masm.loadObjClass(obj, tmp);
-    masm.ma_cmp(tmp, Imm32((uint32_t)guard->mir()->getClass()));
+    masm.ma_cmp(tmp, Imm32((uint32_t)guard->mir()->getClass()), scratch);
     bailoutIf(Assembler::NotEqual, guard->snapshot());
 }
 
 void
 CodeGeneratorARM::generateInvalidateEpilogue()
 {
     // Ensure that there is enough space in the buffer for the OsiPoint patching
     // to occur. Otherwise, we could overwrite the invalidation epilogue.
@@ -2125,17 +2171,17 @@ CodeGeneratorARM::visitAtomicTypedArrayE
 }
 
 void
 CodeGeneratorARM::visitAsmSelect(LAsmSelect* ins)
 {
     MIRType mirType = ins->mir()->type();
 
     Register cond = ToRegister(ins->condExpr());
-    masm.ma_cmp(cond, Imm32(0));
+    masm.as_cmp(cond, Imm8(0));
 
     if (mirType == MIRType::Int32) {
         Register falseExpr = ToRegister(ins->falseExpr());
         Register out = ToRegister(ins->output());
         MOZ_ASSERT(ToRegister(ins->trueExpr()) == out, "true expr input is reused for output");
         masm.ma_mov(falseExpr, out, LeaveCC, Assembler::Zero);
         return;
     }
@@ -2258,57 +2304,60 @@ CodeGeneratorARM::visitAsmJSLoadHeap(LAs
       default: MOZ_CRASH("unexpected array type");
     }
 
     if (ptr->isConstant()) {
         MOZ_ASSERT(!mir->needsBoundsCheck());
         int32_t ptrImm = ptr->toConstant()->toInt32();
         MOZ_ASSERT(ptrImm >= 0);
         if (isFloat) {
+            ScratchRegisterScope scratch(masm);
             VFPRegister vd(ToFloatRegister(ins->output()));
             if (size == 32)
-                masm.ma_vldr(Address(HeapReg, ptrImm), vd.singleOverlay(), Assembler::Always);
+                masm.ma_vldr(Address(HeapReg, ptrImm), vd.singleOverlay(), scratch, Assembler::Always);
             else
-                masm.ma_vldr(Address(HeapReg, ptrImm), vd, Assembler::Always);
+                masm.ma_vldr(Address(HeapReg, ptrImm), vd, scratch, Assembler::Always);
         }  else {
+            ScratchRegisterScope scratch(masm);
             masm.ma_dataTransferN(IsLoad, size, isSigned, HeapReg, Imm32(ptrImm),
-                                  ToRegister(ins->output()), Offset, Assembler::Always);
+                                  ToRegister(ins->output()), scratch, Offset, Assembler::Always);
         }
     } else {
+        ScratchRegisterScope scratch(masm);
         Register ptrReg = ToRegister(ptr);
         if (isFloat) {
             FloatRegister output = ToFloatRegister(ins->output());
             if (size == 32)
                 output = output.singleOverlay();
 
             Assembler::Condition cond = Assembler::Always;
             if (mir->needsBoundsCheck()) {
                 BufferOffset cmp = masm.as_cmp(ptrReg, Imm8(0));
                 masm.append(wasm::BoundsCheck(cmp.getOffset()));
 
                 size_t nanOffset = size == 32 ? wasm::NaN32GlobalDataOffset : wasm::NaN64GlobalDataOffset;
-                masm.ma_vldr(Address(GlobalReg, nanOffset - AsmJSGlobalRegBias), output,
+                masm.ma_vldr(Address(GlobalReg, nanOffset - AsmJSGlobalRegBias), output, scratch,
                              Assembler::AboveOrEqual);
                 cond = Assembler::Below;
             }
 
-            masm.ma_vldr(output, HeapReg, ptrReg, 0, cond);
+            masm.ma_vldr(output, HeapReg, ptrReg, scratch, 0, cond);
         } else {
             Register output = ToRegister(ins->output());
 
             Assembler::Condition cond = Assembler::Always;
             if (mir->needsBoundsCheck()) {
                 uint32_t cmpOffset = masm.as_cmp(ptrReg, Imm8(0)).getOffset();
                 masm.append(wasm::BoundsCheck(cmpOffset));
 
                 masm.ma_mov(Imm32(0), output, Assembler::AboveOrEqual);
                 cond = Assembler::Below;
             }
 
-            masm.ma_dataTransferN(IsLoad, size, isSigned, HeapReg, ptrReg, output, Offset, cond);
+            masm.ma_dataTransferN(IsLoad, size, isSigned, HeapReg, ptrReg, output, scratch, Offset, cond);
         }
     }
 }
 
 template <typename T>
 void
 CodeGeneratorARM::emitWasmLoad(T* lir)
 {
@@ -2317,52 +2366,53 @@ CodeGeneratorARM::emitWasmLoad(T* lir)
     uint32_t offset = mir->offset();
     MOZ_ASSERT(offset < wasm::OffsetGuardLimit);
 
     Register ptr = ToRegister(lir->ptr());
     Scalar::Type type = mir->accessType();
 
     // Maybe add the offset.
     if (offset || type == Scalar::Int64) {
+        ScratchRegisterScope scratch(masm);
         Register ptrPlusOffset = ToRegister(lir->ptrCopy());
         if (offset)
-            masm.ma_add(Imm32(offset), ptrPlusOffset);
+            masm.ma_add(Imm32(offset), ptrPlusOffset, scratch);
         ptr = ptrPlusOffset;
     } else {
         MOZ_ASSERT(lir->ptrCopy()->isBogusTemp());
     }
 
     bool isSigned = type == Scalar::Int8 || type == Scalar::Int16 || type == Scalar::Int32 ||
                     type == Scalar::Int64;
     unsigned byteSize = mir->byteSize();
 
     memoryBarrier(mir->barrierBefore());
 
     if (mir->type() == MIRType::Int64) {
         Register64 output = ToOutRegister64(lir);
         if (type == Scalar::Int64) {
             MOZ_ASSERT(INT64LOW_OFFSET == 0);
             masm.ma_dataTransferN(IsLoad, 32, /* signed = */ false, HeapReg, ptr, output.low);
-            masm.ma_add(Imm32(INT64HIGH_OFFSET), ptr);
+            masm.as_add(ptr, ptr, Imm8(INT64HIGH_OFFSET));
             masm.ma_dataTransferN(IsLoad, 32, isSigned, HeapReg, ptr, output.high);
         } else {
             masm.ma_dataTransferN(IsLoad, byteSize * 8, isSigned, HeapReg, ptr, output.low);
             if (isSigned)
                 masm.ma_asr(Imm32(31), output.low, output.high);
             else
                 masm.ma_mov(Imm32(0), output.high);
         }
     } else {
         AnyRegister output = ToAnyRegister(lir->output());
         bool isFloat = output.isFloat();
         if (isFloat) {
             MOZ_ASSERT((byteSize == 4) == output.fpu().isSingle());
             ScratchRegisterScope scratch(masm);
             masm.ma_add(HeapReg, ptr, scratch);
-            masm.ma_vldr(Address(scratch, 0), output.fpu());
+            masm.ma_vldr(Operand(Address(scratch, 0)).toVFPAddr(), output.fpu());
         } else {
             masm.ma_dataTransferN(IsLoad, byteSize * 8, isSigned, HeapReg, ptr, output.gpr());
         }
     }
 
     memoryBarrier(mir->barrierAfter());
 }
 
@@ -2383,18 +2433,20 @@ void
 CodeGeneratorARM::emitWasmUnalignedLoad(T* lir)
 {
     const MWasmLoad* mir = lir->mir();
 
     uint32_t offset = mir->offset();
     MOZ_ASSERT(offset < wasm::OffsetGuardLimit);
 
     Register ptr = ToRegister(lir->ptrCopy());
-    if (offset)
-        masm.ma_add(Imm32(offset), ptr);
+    if (offset) {
+        ScratchRegisterScope scratch(masm);
+        masm.ma_add(Imm32(offset), ptr, scratch);
+    }
 
     // Add HeapReg to ptr, so we can use base+index addressing in the byte loads.
     masm.ma_add(HeapReg, ptr);
 
     unsigned byteSize = mir->byteSize();
     Scalar::Type type = mir->accessType();
     bool isSigned = type == Scalar::Int8 || type == Scalar::Int16 || type == Scalar::Int32 ||
                     type == Scalar::Int64;
@@ -2462,17 +2514,18 @@ CodeGeneratorARM::visitWasmUnalignedLoad
 
 void
 CodeGeneratorARM::visitWasmAddOffset(LWasmAddOffset* lir)
 {
     MWasmAddOffset* mir = lir->mir();
     Register base = ToRegister(lir->base());
     Register out = ToRegister(lir->output());
 
-    masm.ma_add(base, Imm32(mir->offset()), out, SetCC);
+    ScratchRegisterScope scratch(masm);
+    masm.ma_add(base, Imm32(mir->offset()), out, scratch, SetCC);
     masm.ma_b(wasm::JumpTarget::OutOfBounds, Assembler::CarrySet);
 }
 
 template <typename T>
 void
 CodeGeneratorARM::emitWasmStore(T* lir)
 {
     const MWasmStore* mir = lir->mir();
@@ -2481,41 +2534,42 @@ CodeGeneratorARM::emitWasmStore(T* lir)
     MOZ_ASSERT(offset < wasm::OffsetGuardLimit);
 
     Register ptr = ToRegister(lir->ptr());
     unsigned byteSize = mir->byteSize();
     Scalar::Type type = mir->accessType();
 
     // Maybe add the offset.
     if (offset || type == Scalar::Int64) {
+        ScratchRegisterScope scratch(masm);
         Register ptrPlusOffset = ToRegister(lir->ptrCopy());
         if (offset)
-            masm.ma_add(Imm32(offset), ptrPlusOffset);
+            masm.ma_add(Imm32(offset), ptrPlusOffset, scratch);
         ptr = ptrPlusOffset;
     } else {
         MOZ_ASSERT(lir->ptrCopy()->isBogusTemp());
     }
 
     memoryBarrier(mir->barrierBefore());
 
     if (type == Scalar::Int64) {
         MOZ_ASSERT(INT64LOW_OFFSET == 0);
 
         Register64 value = ToRegister64(lir->getInt64Operand(lir->ValueIndex));
         masm.ma_dataTransferN(IsStore, 32 /* bits */, /* signed */ false, HeapReg, ptr, value.low);
-        masm.ma_add(Imm32(INT64HIGH_OFFSET), ptr);
+        masm.as_add(ptr, ptr, Imm8(INT64HIGH_OFFSET));
         masm.ma_dataTransferN(IsStore, 32 /* bits */, /* signed */ true, HeapReg, ptr, value.high);
     } else {
         AnyRegister value = ToAnyRegister(lir->getOperand(lir->ValueIndex));
         if (value.isFloat()) {
+            ScratchRegisterScope scratch(masm);
             FloatRegister val = value.fpu();
             MOZ_ASSERT((byteSize == 4) == val.isSingle());
-            ScratchRegisterScope scratch(masm);
             masm.ma_add(HeapReg, ptr, scratch);
-            masm.ma_vstr(val, Address(scratch, 0));
+            masm.ma_vstr(val, Operand(Address(scratch, 0)).toVFPAddr());
         } else {
             bool isSigned = type == Scalar::Uint32 || type == Scalar::Int32; // see AsmJSStoreHeap;
             Register val = value.gpr();
             masm.ma_dataTransferN(IsStore, 8 * byteSize /* bits */, isSigned, HeapReg, ptr, val);
         }
     }
 
     memoryBarrier(mir->barrierAfter());
@@ -2538,18 +2592,20 @@ void
 CodeGeneratorARM::emitWasmUnalignedStore(T* lir)
 {
     const MWasmStore* mir = lir->mir();
 
     uint32_t offset = mir->offset();
     MOZ_ASSERT(offset < wasm::OffsetGuardLimit);
 
     Register ptr = ToRegister(lir->ptrCopy());
-    if (offset)
-        masm.ma_add(Imm32(offset), ptr);
+    if (offset) {
+        ScratchRegisterScope scratch(masm);
+        masm.ma_add(Imm32(offset), ptr, scratch);
+    }
 
     // Add HeapReg to ptr, so we can use base+index addressing in the byte loads.
     masm.ma_add(HeapReg, ptr);
 
     MIRType mirType = mir->value()->type();
 
     memoryBarrier(mir->barrierAfter());
 
@@ -2625,39 +2681,42 @@ CodeGeneratorARM::visitAsmJSStoreHeap(LA
         if (isFloat) {
             VFPRegister vd(ToFloatRegister(ins->value()));
             Address addr(HeapReg, ptrImm);
             if (size == 32)
                 masm.storeFloat32(vd, addr);
             else
                 masm.storeDouble(vd, addr);
         } else {
+            ScratchRegisterScope scratch(masm);
             masm.ma_dataTransferN(IsStore, size, isSigned, HeapReg, Imm32(ptrImm),
-                                  ToRegister(ins->value()), Offset, Assembler::Always);
+                                  ToRegister(ins->value()), scratch, Offset, Assembler::Always);
         }
     } else {
         Register ptrReg = ToRegister(ptr);
 
         Assembler::Condition cond = Assembler::Always;
         if (mir->needsBoundsCheck()) {
             BufferOffset cmp = masm.as_cmp(ptrReg, Imm8(0));
             masm.append(wasm::BoundsCheck(cmp.getOffset()));
 
             cond = Assembler::Below;
         }
 
         if (isFloat) {
+            ScratchRegisterScope scratch(masm);
             FloatRegister value = ToFloatRegister(ins->value());
             if (size == 32)
                 value = value.singleOverlay();
 
-            masm.ma_vstr(value, HeapReg, ptrReg, 0, 0, Assembler::Below);
+            masm.ma_vstr(value, HeapReg, ptrReg, scratch, 0, Assembler::Below);
         } else {
+            ScratchRegisterScope scratch(masm);
             Register value = ToRegister(ins->value());
-            masm.ma_dataTransferN(IsStore, size, isSigned, HeapReg, ptrReg, value, Offset, cond);
+            masm.ma_dataTransferN(IsStore, size, isSigned, HeapReg, ptrReg, value, scratch, Offset, cond);
         }
     }
 }
 
 void
 CodeGeneratorARM::visitAsmJSCompareExchangeHeap(LAsmJSCompareExchangeHeap* ins)
 {
     MAsmJSCompareExchangeHeap* mir = ins->mir();
@@ -2836,23 +2895,27 @@ CodeGeneratorARM::visitAsmJSAtomicBinopC
     }
 }
 
 void
 CodeGeneratorARM::visitAsmJSPassStackArg(LAsmJSPassStackArg* ins)
 {
     const MAsmJSPassStackArg* mir = ins->mir();
     Address dst(StackPointer, mir->spOffset());
+    ScratchRegisterScope scratch(masm);
+    SecondScratchRegisterScope scratch2(masm);
+
     if (ins->arg()->isConstant()) {
-        masm.ma_storeImm(Imm32(ToInt32(ins->arg())), dst);
+        masm.ma_mov(Imm32(ToInt32(ins->arg())), scratch);
+        masm.ma_str(scratch, dst, scratch2);
     } else {
         if (ins->arg()->isGeneralReg())
-            masm.ma_str(ToRegister(ins->arg()), dst);
+            masm.ma_str(ToRegister(ins->arg()), dst, scratch);
         else
-            masm.ma_vstr(ToFloatRegister(ins->arg()), dst);
+            masm.ma_vstr(ToFloatRegister(ins->arg()), dst, scratch);
     }
 }
 
 void
 CodeGeneratorARM::visitUDiv(LUDiv* ins)
 {
     Register lhs = ToRegister(ins->lhs());
     Register rhs = ToRegister(ins->rhs());
@@ -2861,17 +2924,17 @@ CodeGeneratorARM::visitUDiv(LUDiv* ins)
     Label done;
     generateUDivModZeroCheck(rhs, output, &done, ins->snapshot(), ins->mir());
 
     masm.ma_udiv(lhs, rhs, output);
 
     // Check for large unsigned result - represent as double.
     if (!ins->mir()->isTruncated()) {
         MOZ_ASSERT(ins->mir()->fallible());
-        masm.ma_cmp(output, Imm32(0));
+        masm.as_cmp(output, Imm8(0));
         bailoutIf(Assembler::LessThan, ins->snapshot());
     }
 
     // Check for non-zero remainder if not truncating to int.
     if (!ins->mir()->canTruncateRemainder()) {
         MOZ_ASSERT(ins->mir()->fallible());
         {
             ScratchRegisterScope scratch(masm);
@@ -2890,38 +2953,41 @@ CodeGeneratorARM::visitUMod(LUMod* ins)
 {
     Register lhs = ToRegister(ins->lhs());
     Register rhs = ToRegister(ins->rhs());
     Register output = ToRegister(ins->output());
 
     Label done;
     generateUDivModZeroCheck(rhs, output, &done, ins->snapshot(), ins->mir());
 
-    masm.ma_umod(lhs, rhs, output);
+    {
+        ScratchRegisterScope scratch(masm);
+        masm.ma_umod(lhs, rhs, output, scratch);
+    }
 
     // Check for large unsigned result - represent as double.
     if (!ins->mir()->isTruncated()) {
         MOZ_ASSERT(ins->mir()->fallible());
-        masm.ma_cmp(output, Imm32(0));
+        masm.as_cmp(output, Imm8(0));
         bailoutIf(Assembler::LessThan, ins->snapshot());
     }
 
     if (done.used())
         masm.bind(&done);
 }
 
 template<class T>
 void
 CodeGeneratorARM::generateUDivModZeroCheck(Register rhs, Register output, Label* done,
                                            LSnapshot* snapshot, T* mir)
 {
     if (!mir)
         return;
     if (mir->canBeDivideByZero()) {
-        masm.ma_cmp(rhs, Imm32(0));
+        masm.as_cmp(rhs, Imm8(0));
         if (mir->isTruncated()) {
             if (mir->trapOnError()) {
                 masm.ma_b(wasm::JumpTarget::IntegerDivideByZero, Assembler::Equal);
             } else {
                 Label skip;
                 masm.ma_b(&skip, Assembler::NotEqual);
                 // Infinity|0 == 0
                 masm.ma_mov(Imm32(0), output);
@@ -2962,98 +3028,108 @@ CodeGeneratorARM::visitSoftUDivOrMod(LSo
     if (gen->compilingAsmJS())
         masm.callWithABI(wasm::SymbolicAddress::aeabi_uidivmod);
     else
         masm.callWithABI(JS_FUNC_TO_DATA_PTR(void*, __aeabi_uidivmod));
 
     // uidivmod returns the quotient in r0, and the remainder in r1.
     if (div && !div->canTruncateRemainder()) {
         MOZ_ASSERT(div->fallible());
-        masm.ma_cmp(r1, Imm32(0));
+        masm.as_cmp(r1, Imm8(0));
         bailoutIf(Assembler::NonZero, ins->snapshot());
     }
 
     // Bailout for big unsigned results
     if ((div && !div->isTruncated()) || (mod && !mod->isTruncated())) {
         DebugOnly<bool> isFallible = (div && div->fallible()) || (mod && mod->fallible());
         MOZ_ASSERT(isFallible);
-        masm.ma_cmp(output, Imm32(0));
+        masm.as_cmp(output, Imm8(0));
         bailoutIf(Assembler::LessThan, ins->snapshot());
     }
 
     masm.bind(&done);
 }
 
 void
 CodeGeneratorARM::visitEffectiveAddress(LEffectiveAddress* ins)
 {
     const MEffectiveAddress* mir = ins->mir();
     Register base = ToRegister(ins->base());
     Register index = ToRegister(ins->index());
     Register output = ToRegister(ins->output());
+
+    ScratchRegisterScope scratch(masm);
+
     masm.as_add(output, base, lsl(index, mir->scale()));
-    masm.ma_add(Imm32(mir->displacement()), output);
+    masm.ma_add(Imm32(mir->displacement()), output, scratch);
 }
 
 void
 CodeGeneratorARM::visitWasmLoadGlobalVar(LWasmLoadGlobalVar* ins)
 {
     const MWasmLoadGlobalVar* mir = ins->mir();
     unsigned addr = mir->globalDataOffset() - AsmJSGlobalRegBias;
+
+    ScratchRegisterScope scratch(masm);
+
     if (mir->type() == MIRType::Int32) {
-        masm.ma_dtr(IsLoad, GlobalReg, Imm32(addr), ToRegister(ins->output()));
+        masm.ma_dtr(IsLoad, GlobalReg, Imm32(addr), ToRegister(ins->output()), scratch);
     } else if (mir->type() == MIRType::Float32) {
         VFPRegister vd(ToFloatRegister(ins->output()));
-        masm.ma_vldr(Address(GlobalReg, addr), vd.singleOverlay());
+        masm.ma_vldr(Address(GlobalReg, addr), vd.singleOverlay(), scratch);
     } else {
         MOZ_ASSERT(mir->type() == MIRType::Double);
-        masm.ma_vldr(Address(GlobalReg, addr), ToFloatRegister(ins->output()));
+        masm.ma_vldr(Address(GlobalReg, addr), ToFloatRegister(ins->output()), scratch);
     }
 }
 
 void
 CodeGeneratorARM::visitWasmLoadGlobalVarI64(LWasmLoadGlobalVarI64* ins)
 {
     const MWasmLoadGlobalVar* mir = ins->mir();
     unsigned addr = mir->globalDataOffset() - AsmJSGlobalRegBias;
     MOZ_ASSERT(mir->type() == MIRType::Int64);
     Register64 output = ToOutRegister64(ins);
 
-    masm.ma_dtr(IsLoad, GlobalReg, Imm32(addr + INT64LOW_OFFSET), output.low);
-    masm.ma_dtr(IsLoad, GlobalReg, Imm32(addr + INT64HIGH_OFFSET), output.high);
+    ScratchRegisterScope scratch(masm);
+    masm.ma_dtr(IsLoad, GlobalReg, Imm32(addr + INT64LOW_OFFSET), output.low, scratch);
+    masm.ma_dtr(IsLoad, GlobalReg, Imm32(addr + INT64HIGH_OFFSET), output.high, scratch);
 }
 
 void
 CodeGeneratorARM::visitWasmStoreGlobalVar(LWasmStoreGlobalVar* ins)
 {
     const MWasmStoreGlobalVar* mir = ins->mir();
     MIRType type = mir->value()->type();
 
+    ScratchRegisterScope scratch(masm);
+
     unsigned addr = mir->globalDataOffset() - AsmJSGlobalRegBias;
     if (type == MIRType::Int32) {
-        masm.ma_dtr(IsStore, GlobalReg, Imm32(addr), ToRegister(ins->value()));
+        masm.ma_dtr(IsStore, GlobalReg, Imm32(addr), ToRegister(ins->value()), scratch);
     } else if (type == MIRType::Float32) {
         VFPRegister vd(ToFloatRegister(ins->value()));
-        masm.ma_vstr(vd.singleOverlay(), Address(GlobalReg, addr));
+        masm.ma_vstr(vd.singleOverlay(), Address(GlobalReg, addr), scratch);
     } else {
         MOZ_ASSERT(type == MIRType::Double);
-        masm.ma_vstr(ToFloatRegister(ins->value()), Address(GlobalReg, addr));
+        masm.ma_vstr(ToFloatRegister(ins->value()), Address(GlobalReg, addr), scratch);
     }
 }
 
 void
 CodeGeneratorARM::visitWasmStoreGlobalVarI64(LWasmStoreGlobalVarI64* ins)
 {
     const MWasmStoreGlobalVar* mir = ins->mir();
     unsigned addr = mir->globalDataOffset() - AsmJSGlobalRegBias;
     MOZ_ASSERT (mir->value()->type() == MIRType::Int64);
     Register64 input = ToRegister64(ins->value());
 
-    masm.ma_dtr(IsStore, GlobalReg, Imm32(addr + INT64LOW_OFFSET), input.low);
-    masm.ma_dtr(IsStore, GlobalReg, Imm32(addr + INT64HIGH_OFFSET), input.high);
+    ScratchRegisterScope scratch(masm);
+    masm.ma_dtr(IsStore, GlobalReg, Imm32(addr + INT64LOW_OFFSET), input.low, scratch);
+    masm.ma_dtr(IsStore, GlobalReg, Imm32(addr + INT64HIGH_OFFSET), input.high, scratch);
 }
 
 void
 CodeGeneratorARM::visitNegI(LNegI* ins)
 {
     Register input = ToRegister(ins->input());
     masm.ma_neg(input, ToRegister(ins->output()));
 }
@@ -3123,52 +3199,53 @@ CodeGeneratorARM::visitWasmTruncateToInt
             masm.compareFloat(input, input);
         else
             MOZ_CRASH("unexpected type in visitWasmTruncateToInt32");
 
         masm.ma_b(ool->entry(), Assembler::VFP_Unordered);
     }
 
     ScratchDoubleScope scratchScope(masm);
+    ScratchRegisterScope scratchReg(masm);
     FloatRegister scratch = scratchScope.uintOverlay();
 
     // ARM conversion instructions clamp the value to ensure it fits within the
     // target's type bounds, so every time we see those, we need to check the
     // input.
     if (mir->isUnsigned()) {
         if (fromType == MIRType::Double)
             masm.ma_vcvt_F64_U32(input, scratch);
         else if (fromType == MIRType::Float32)
             masm.ma_vcvt_F32_U32(input, scratch);
         else
             MOZ_CRASH("unexpected type in visitWasmTruncateToInt32");
 
         masm.ma_vxfer(scratch, output);
 
         // int32_t(UINT32_MAX) == -1.
-        masm.ma_cmp(output, Imm32(-1));
-        masm.ma_cmp(output, Imm32(0), Assembler::NotEqual);
+        masm.ma_cmp(output, Imm32(-1), scratchReg);
+        masm.as_cmp(output, Imm8(0), Assembler::NotEqual);
         masm.ma_b(ool->entry(), Assembler::Equal);
 
         masm.bind(ool->rejoin());
         return;
     }
 
     scratch = scratchScope.sintOverlay();
 
     if (fromType == MIRType::Double)
         masm.ma_vcvt_F64_I32(input, scratch);
     else if (fromType == MIRType::Float32)
         masm.ma_vcvt_F32_I32(input, scratch);
     else
         MOZ_CRASH("unexpected type in visitWasmTruncateToInt32");
 
     masm.ma_vxfer(scratch, output);
-    masm.ma_cmp(output, Imm32(INT32_MAX));
-    masm.ma_cmp(output, Imm32(INT32_MIN), Assembler::NotEqual);
+    masm.ma_cmp(output, Imm32(INT32_MAX), scratchReg);
+    masm.ma_cmp(output, Imm32(INT32_MIN), scratchReg, Assembler::NotEqual);
     masm.ma_b(ool->entry(), Assembler::Equal);
 
     masm.bind(ool->rejoin());
 }
 
 void
 CodeGeneratorARM::visitWasmTruncateToInt64(LWasmTruncateToInt64* lir)
 {
@@ -3194,18 +3271,19 @@ CodeGeneratorARM::visitWasmTruncateToInt
     masm.passABIArg(inputDouble, MoveOp::DOUBLE);
     if (lir->mir()->isUnsigned())
         masm.callWithABI(wasm::SymbolicAddress::TruncateDoubleToUint64);
     else
         masm.callWithABI(wasm::SymbolicAddress::TruncateDoubleToInt64);
 
     masm.Pop(input);
 
-    masm.ma_cmp(output.high, Imm32(0x80000000));
-    masm.ma_cmp(output.low, Imm32(0x00000000), Assembler::Equal);
+    ScratchRegisterScope scratch(masm);
+    masm.ma_cmp(output.high, Imm32(0x80000000), scratch);
+    masm.as_cmp(output.low, Imm8(0x00000000), Assembler::Equal);
     masm.ma_b(ool->entry(), Assembler::Equal);
 
     masm.bind(ool->rejoin());
 
     MOZ_ASSERT(ReturnReg64 == output);
 }
 
 void
@@ -3331,21 +3409,23 @@ CodeGeneratorARM::visitCopySignF(LCopySi
     FloatRegister output = ToFloatRegister(ins->getDef(0));
 
     Register lhsi = ToRegister(ins->getTemp(0));
     Register rhsi = ToRegister(ins->getTemp(1));
 
     masm.ma_vxfer(lhs, lhsi);
     masm.ma_vxfer(rhs, rhsi);
 
+    ScratchRegisterScope scratch(masm);
+
     // Clear lhs's sign.
-    masm.ma_and(Imm32(INT32_MAX), lhsi, lhsi);
+    masm.ma_and(Imm32(INT32_MAX), lhsi, lhsi, scratch);
 
     // Keep rhs's sign.
-    masm.ma_and(Imm32(INT32_MIN), rhsi, rhsi);
+    masm.ma_and(Imm32(INT32_MIN), rhsi, rhsi, scratch);
 
     // Combine.
     masm.ma_orr(lhsi, rhsi, rhsi);
 
     masm.ma_vxfer(rhsi, output);
 }
 
 void
@@ -3357,21 +3437,23 @@ CodeGeneratorARM::visitCopySignD(LCopySi
 
     Register lhsi = ToRegister(ins->getTemp(0));
     Register rhsi = ToRegister(ins->getTemp(1));
 
     // Manipulate high words of double inputs.
     masm.as_vxfer(lhsi, InvalidReg, lhs, Assembler::FloatToCore, Assembler::Always, 1);
     masm.as_vxfer(rhsi, InvalidReg, rhs, Assembler::FloatToCore, Assembler::Always, 1);
 
+    ScratchRegisterScope scratch(masm);
+
     // Clear lhs's sign.
-    masm.ma_and(Imm32(INT32_MAX), lhsi, lhsi);
+    masm.ma_and(Imm32(INT32_MAX), lhsi, lhsi, scratch);
 
     // Keep rhs's sign.
-    masm.ma_and(Imm32(INT32_MIN), rhsi, rhsi);
+    masm.ma_and(Imm32(INT32_MIN), rhsi, rhsi, scratch);
 
     // Combine.
     masm.ma_orr(lhsi, rhsi, rhsi);
 
     // Reconstruct the output.
     masm.as_vxfer(lhsi, InvalidReg, lhs, Assembler::FloatToCore, Assembler::Always, 0);
     masm.ma_vxfer(lhsi, rhsi, output);
 }
@@ -3675,23 +3757,24 @@ void
 CodeGeneratorARM::visitAsmSelectI64(LAsmSelectI64* lir)
 {
     Register cond = ToRegister(lir->condExpr());
     const LInt64Allocation falseExpr = lir->falseExpr();
 
     Register64 out = ToOutRegister64(lir);
     MOZ_ASSERT(ToRegister64(lir->trueExpr()) == out, "true expr is reused for input");
 
-    masm.ma_cmp(cond, Imm32(0));
+    masm.as_cmp(cond, Imm8(0));
     if (falseExpr.low().isRegister()) {
         masm.ma_mov(ToRegister(falseExpr.low()), out.low, LeaveCC, Assembler::Equal);
         masm.ma_mov(ToRegister(falseExpr.high()), out.high, LeaveCC, Assembler::Equal);
     } else {
-        masm.ma_ldr(ToAddress(falseExpr.low()), out.low, Offset, Assembler::Equal);
-        masm.ma_ldr(ToAddress(falseExpr.high()), out.high, Offset, Assembler::Equal);
+        ScratchRegisterScope scratch(masm);
+        masm.ma_ldr(ToAddress(falseExpr.low()), out.low, scratch, Offset, Assembler::Equal);
+        masm.ma_ldr(ToAddress(falseExpr.high()), out.high, scratch, Offset, Assembler::Equal);
     }
 }
 
 void
 CodeGeneratorARM::visitAsmReinterpretFromI64(LAsmReinterpretFromI64* lir)
 {
     MOZ_ASSERT(lir->mir()->type() == MIRType::Double);
     MOZ_ASSERT(lir->mir()->input()->type() == MIRType::Int64);
@@ -3742,13 +3825,13 @@ CodeGeneratorARM::visitCtzI64(LCtzI64* l
     masm.move32(Imm32(0), output.high);
 }
 
 void
 CodeGeneratorARM::visitTestI64AndBranch(LTestI64AndBranch* lir)
 {
     Register64 input = ToRegister64(lir->getInt64Operand(0));
 
-    masm.ma_cmp(input.high, Imm32(0));
+    masm.as_cmp(input.high, Imm8(0));
     jumpToBlock(lir->ifTrue(), Assembler::NonZero);
-    masm.ma_cmp(input.low, Imm32(0));
+    masm.as_cmp(input.low, Imm8(0));
     emitBranch(Assembler::NonZero, lir->ifTrue(), lir->ifFalse());
 }
--- a/js/src/jit/arm/MacroAssembler-arm-inl.h
+++ b/js/src/jit/arm/MacroAssembler-arm-inl.h
@@ -65,46 +65,52 @@ void
 MacroAssembler::and32(Register src, Register dest)
 {
     ma_and(src, dest, SetCC);
 }
 
 void
 MacroAssembler::and32(Imm32 imm, Register dest)
 {
-    ma_and(imm, dest, SetCC);
+    ScratchRegisterScope scratch(*this);
+    ma_and(imm, dest, scratch, SetCC);
 }
 
 void
 MacroAssembler::and32(Imm32 imm, const Address& dest)
 {
     ScratchRegisterScope scratch(*this);
-    load32(dest, scratch);
-    ma_and(imm, scratch);
-    store32(scratch, dest);
+    SecondScratchRegisterScope scratch2(*this);
+
+    ma_ldr(dest, scratch, scratch2);
+    ma_and(imm, scratch, scratch2);
+    ma_str(scratch, dest, scratch2);
 }
 
 void
 MacroAssembler::and32(const Address& src, Register dest)
 {
     ScratchRegisterScope scratch(*this);
-    load32(src, scratch);
+    SecondScratchRegisterScope scratch2(*this);
+
+    ma_ldr(src, scratch, scratch2);
     ma_and(scratch, dest, SetCC);
 }
 
 void
 MacroAssembler::andPtr(Register src, Register dest)
 {
     ma_and(src, dest);
 }
 
 void
 MacroAssembler::andPtr(Imm32 imm, Register dest)
 {
-    ma_and(imm, dest);
+    ScratchRegisterScope scratch(*this);
+    ma_and(imm, dest, scratch);
 }
 
 void
 MacroAssembler::and64(Imm64 imm, Register64 dest)
 {
     if (imm.low().value != int32_t(0xFFFFFFFF))
         and32(imm.low(), dest.low);
     if (imm.hi().value != int32_t(0xFFFFFFFF))
@@ -133,38 +139,42 @@ void
 MacroAssembler::or32(Register src, Register dest)
 {
     ma_orr(src, dest);
 }
 
 void
 MacroAssembler::or32(Imm32 imm, Register dest)
 {
-    ma_orr(imm, dest);
+    ScratchRegisterScope scratch(*this);
+    ma_orr(imm, dest, scratch);
 }
 
 void
 MacroAssembler::or32(Imm32 imm, const Address& dest)
 {
     ScratchRegisterScope scratch(*this);
-    load32(dest, scratch);
-    ma_orr(imm, scratch);
-    store32(scratch, dest);
+    SecondScratchRegisterScope scratch2(*this);
+
+    ma_ldr(dest, scratch, scratch2);
+    ma_orr(imm, scratch, scratch2);
+    ma_str(scratch, dest, scratch2);
 }
 
 void
 MacroAssembler::orPtr(Register src, Register dest)
 {
     ma_orr(src, dest);
 }
 
 void
 MacroAssembler::orPtr(Imm32 imm, Register dest)
 {
-    ma_orr(imm, dest);
+    ScratchRegisterScope scratch(*this);
+    ma_orr(imm, dest, scratch);
 }
 
 void
 MacroAssembler::and64(Register64 src, Register64 dest)
 {
     and32(src.low, dest.low);
     and32(src.high, dest.high);
 }
@@ -187,109 +197,121 @@ void
 MacroAssembler::xor32(Register src, Register dest)
 {
     ma_eor(src, dest, SetCC);
 }
 
 void
 MacroAssembler::xor32(Imm32 imm, Register dest)
 {
-    ma_eor(imm, dest, SetCC);
+    ScratchRegisterScope scratch(*this);
+    ma_eor(imm, dest, scratch, SetCC);
 }
 
 void
 MacroAssembler::xorPtr(Register src, Register dest)
 {
     ma_eor(src, dest);
 }
 
 void
 MacroAssembler::xorPtr(Imm32 imm, Register dest)
 {
-    ma_eor(imm, dest);
+    ScratchRegisterScope scratch(*this);
+    ma_eor(imm, dest, scratch);
 }
 
 // ===============================================================
 // Arithmetic functions
 
 void
 MacroAssembler::add32(Register src, Register dest)
 {
     ma_add(src, dest, SetCC);
 }
 
 void
 MacroAssembler::add32(Imm32 imm, Register dest)
 {
-    ma_add(imm, dest, SetCC);
+    ScratchRegisterScope scratch(*this);
+    ma_add(imm, dest, scratch, SetCC);
 }
 
 void
 MacroAssembler::add32(Imm32 imm, const Address& dest)
 {
     ScratchRegisterScope scratch(*this);
-    load32(dest, scratch);
-    ma_add(imm, scratch, SetCC);
-    store32(scratch, dest);
+    SecondScratchRegisterScope scratch2(*this);
+
+    ma_ldr(dest, scratch, scratch2);
+    ma_add(imm, scratch, scratch2, SetCC);
+    ma_str(scratch, dest, scratch2);
 }
 
 void
 MacroAssembler::addPtr(Register src, Register dest)
 {
     ma_add(src, dest);
 }
 
 void
 MacroAssembler::addPtr(Imm32 imm, Register dest)
 {
-    ma_add(imm, dest);
+    ScratchRegisterScope scratch(*this);
+    ma_add(imm, dest, scratch);
 }
 
 void
 MacroAssembler::addPtr(ImmWord imm, Register dest)
 {
     addPtr(Imm32(imm.value), dest);
 }
 
 void
 MacroAssembler::addPtr(Imm32 imm, const Address& dest)
 {
     ScratchRegisterScope scratch(*this);
-    loadPtr(dest, scratch);
-    addPtr(imm, scratch);
-    storePtr(scratch, dest);
+    SecondScratchRegisterScope scratch2(*this);
+
+    ma_ldr(dest, scratch, scratch2);
+    ma_add(imm, scratch, scratch2);
+    ma_str(scratch, dest, scratch2);
 }
 
 void
 MacroAssembler::addPtr(const Address& src, Register dest)
 {
     ScratchRegisterScope scratch(*this);
-    load32(src, scratch);
+    SecondScratchRegisterScope scratch2(*this);
+
+    ma_ldr(src, scratch, scratch2);
     ma_add(scratch, dest, SetCC);
 }
 
 void
 MacroAssembler::add64(Register64 src, Register64 dest)
 {
     ma_add(src.low, dest.low, SetCC);
     ma_adc(src.high, dest.high);
 }
 
 void
 MacroAssembler::add64(Imm32 imm, Register64 dest)
 {
-    ma_add(imm, dest.low, SetCC);
-    ma_adc(Imm32(0), dest.high, LeaveCC);
+    ScratchRegisterScope scratch(*this);
+    ma_add(imm, dest.low, scratch, SetCC);
+    as_adc(dest.high, dest.high, Imm8(0), LeaveCC);
 }
 
 void
 MacroAssembler::add64(Imm64 imm, Register64 dest)
 {
-    ma_add(imm.low(), dest.low, SetCC);
-    ma_adc(imm.hi(), dest.high, LeaveCC);
+    ScratchRegisterScope scratch(*this);
+    ma_add(imm.low(), dest.low, scratch, SetCC);
+    ma_adc(imm.hi(), dest.high, scratch, LeaveCC);
 }
 
 void
 MacroAssembler::addDouble(FloatRegister src, FloatRegister dest)
 {
     ma_vadd(dest, src, dest);
 }
 
@@ -303,68 +325,77 @@ void
 MacroAssembler::sub32(Register src, Register dest)
 {
     ma_sub(src, dest, SetCC);
 }
 
 void
 MacroAssembler::sub32(Imm32 imm, Register dest)
 {
-    ma_sub(imm, dest, SetCC);
+    ScratchRegisterScope scratch(*this);
+    ma_sub(imm, dest, scratch, SetCC);
 }
 
 void
 MacroAssembler::sub32(const Address& src, Register dest)
 {
     ScratchRegisterScope scratch(*this);
-    load32(src, scratch);
+    SecondScratchRegisterScope scratch2(*this);
+
+    ma_ldr(src, scratch, scratch2);
     ma_sub(scratch, dest, SetCC);
 }
 
 void
 MacroAssembler::subPtr(Register src, Register dest)
 {
     ma_sub(src, dest);
 }
 
 void
 MacroAssembler::subPtr(Register src, const Address& dest)
 {
     ScratchRegisterScope scratch(*this);
-    loadPtr(dest, scratch);
+    SecondScratchRegisterScope scratch2(*this);
+
+    ma_ldr(dest, scratch, scratch2);
     ma_sub(src, scratch);
-    storePtr(scratch, dest);
+    ma_str(scratch, dest, scratch2);
 }
 
 void
 MacroAssembler::subPtr(Imm32 imm, Register dest)
 {
-    ma_sub(imm, dest);
+    ScratchRegisterScope scratch(*this);
+    ma_sub(imm, dest, scratch);
 }
 
 void
 MacroAssembler::subPtr(const Address& addr, Register dest)
 {
     ScratchRegisterScope scratch(*this);
-    loadPtr(addr, scratch);
+    SecondScratchRegisterScope scratch2(*this);
+
+    ma_ldr(addr, scratch, scratch2);
     ma_sub(scratch, dest);
 }
 
 void
 MacroAssembler::sub64(Register64 src, Register64 dest)
 {
     ma_sub(src.low, dest.low, SetCC);
     ma_sbc(src.high, dest.high, LeaveCC);
 }
 
 void
 MacroAssembler::sub64(Imm64 imm, Register64 dest)
 {
-    ma_sub(imm.low(), dest.low, SetCC);
-    ma_sbc(imm.hi(), dest.high, LeaveCC);
+    ScratchRegisterScope scratch(*this);
+    ma_sub(imm.low(), dest.low, scratch, SetCC);
+    ma_sbc(imm.hi(), dest.high, scratch, LeaveCC);
 }
 
 void
 MacroAssembler::subDouble(FloatRegister src, FloatRegister dest)
 {
     ma_vsub(dest, src, dest);
 }
 
@@ -383,52 +414,56 @@ MacroAssembler::mul32(Register rhs, Regi
 void
 MacroAssembler::mul64(Imm64 imm, const Register64& dest)
 {
     // LOW32  = LOW(LOW(dest) * LOW(imm));
     // HIGH32 = LOW(HIGH(dest) * LOW(imm)) [multiply imm into upper bits]
     //        + LOW(LOW(dest) * HIGH(imm)) [multiply dest into upper bits]
     //        + HIGH(LOW(dest) * LOW(imm)) [carry]
 
+    ScratchRegisterScope scratch(*this);
+    SecondScratchRegisterScope scratch2(*this);
+
     // HIGH(dest) = LOW(HIGH(dest) * LOW(imm));
-    ma_mov(Imm32(imm.value & 0xFFFFFFFFL), ScratchRegister);
-    as_mul(dest.high, dest.high, ScratchRegister);
+    ma_mov(Imm32(imm.value & 0xFFFFFFFFL), scratch);
+    as_mul(dest.high, dest.high, scratch);
 
     // high:low = LOW(dest) * LOW(imm);
-    as_umull(secondScratchReg_, ScratchRegister, dest.low, ScratchRegister);
+    as_umull(scratch2, scratch, dest.low, scratch);
 
     // HIGH(dest) += high;
-    as_add(dest.high, dest.high, O2Reg(secondScratchReg_));
+    as_add(dest.high, dest.high, O2Reg(scratch2));
 
     // HIGH(dest) += LOW(LOW(dest) * HIGH(imm));
     if (((imm.value >> 32) & 0xFFFFFFFFL) == 5)
-        as_add(secondScratchReg_, dest.low, lsl(dest.low, 2));
+        as_add(scratch2, dest.low, lsl(dest.low, 2));
     else
         MOZ_CRASH("Not supported imm");
-    as_add(dest.high, dest.high, O2Reg(secondScratchReg_));
+    as_add(dest.high, dest.high, O2Reg(scratch2));
 
     // LOW(dest) = low;
-    ma_mov(ScratchRegister, dest.low);
+    ma_mov(scratch, dest.low);
 }
 
 void
 MacroAssembler::mul64(Imm64 imm, const Register64& dest, const Register temp)
 {
     // LOW32  = LOW(LOW(dest) * LOW(src));                                  (1)
     // HIGH32 = LOW(HIGH(dest) * LOW(src)) [multiply src into upper bits]   (2)
     //        + LOW(LOW(dest) * HIGH(src)) [multiply dest into upper bits]  (3)
     //        + HIGH(LOW(dest) * LOW(src)) [carry]                          (4)
 
     MOZ_ASSERT(temp != dest.high && temp != dest.low);
 
     // Compute mul64
-    ma_mul(dest.high, imm.low(), dest.high); // (2)
-    ma_mul(dest.low, imm.hi(), temp); // (3)
+    ScratchRegisterScope scratch(*this);
+    ma_mul(dest.high, imm.low(), dest.high, scratch); // (2)
+    ma_mul(dest.low, imm.hi(), temp, scratch); // (3)
     ma_add(dest.high, temp, temp);
-    ma_umull(dest.low, imm.low(), dest.high, dest.low); // (4) + (1)
+    ma_umull(dest.low, imm.low(), dest.high, dest.low, scratch); // (4) + (1)
     ma_add(temp, dest.high, dest.high);
 }
 
 void
 MacroAssembler::mul64(const Register64& src, const Register64& dest, const Register temp)
 {
     // LOW32  = LOW(LOW(dest) * LOW(src));                                  (1)
     // HIGH32 = LOW(HIGH(dest) * LOW(src)) [multiply src into upper bits]   (2)
@@ -462,39 +497,44 @@ void
 MacroAssembler::mulDouble(FloatRegister src, FloatRegister dest)
 {
     ma_vmul(dest, src, dest);
 }
 
 void
 MacroAssembler::mulDoublePtr(ImmPtr imm, Register temp, FloatRegister dest)
 {
-    movePtr(imm, ScratchRegister);
-    loadDouble(Address(ScratchRegister, 0), ScratchDoubleReg);
-    mulDouble(ScratchDoubleReg, dest);
+    ScratchRegisterScope scratch(*this);
+    ScratchDoubleScope scratchDouble(*this);
+
+    movePtr(imm, scratch);
+    ma_vldr(Operand(Address(scratch, 0)).toVFPAddr(), scratchDouble);
+    mulDouble(scratchDouble, dest);
 }
 
 void
 MacroAssembler::quotient32(Register rhs, Register srcDest, bool isUnsigned)
 {
     MOZ_ASSERT(HasIDIV());
     if (isUnsigned)
         ma_udiv(srcDest, rhs, srcDest);
     else
         ma_sdiv(srcDest, rhs, srcDest);
 }
 
 void
 MacroAssembler::remainder32(Register rhs, Register srcDest, bool isUnsigned)
 {
     MOZ_ASSERT(HasIDIV());
+
+    ScratchRegisterScope scratch(*this);
     if (isUnsigned)
-        ma_umod(srcDest, rhs, srcDest);
+        ma_umod(srcDest, rhs, srcDest, scratch);
     else
-        ma_smod(srcDest, rhs, srcDest);
+        ma_smod(srcDest, rhs, srcDest, scratch);
 }
 
 void
 MacroAssembler::divFloat32(FloatRegister src, FloatRegister dest)
 {
     ma_vdiv_f32(dest, src, dest);
 }
 
@@ -509,34 +549,34 @@ MacroAssembler::inc64(AbsoluteAddress de
 {
     ScratchRegisterScope scratch(*this);
 
     ma_strd(r0, r1, EDtrAddr(sp, EDtrOffImm(-8)), PreIndex);
 
     ma_mov(Imm32((int32_t)dest.addr), scratch);
     ma_ldrd(EDtrAddr(scratch, EDtrOffImm(0)), r0, r1);
 
-    ma_add(Imm32(1), r0, SetCC);
-    ma_adc(Imm32(0), r1, LeaveCC);
+    as_add(r0, r0, Imm8(1), SetCC);
+    as_adc(r1, r1, Imm8(0), LeaveCC);
 
     ma_strd(r0, r1, EDtrAddr(scratch, EDtrOffImm(0)));
     ma_ldrd(EDtrAddr(sp, EDtrOffImm(8)), r0, r1, PostIndex);
 }
 
 void
 MacroAssembler::neg32(Register reg)
 {
     ma_neg(reg, reg, SetCC);
 }
 
 void
 MacroAssembler::neg64(Register64 reg)
 {
-    ma_rsb(Imm32(0), reg.low, SetCC);
-    ma_rsc(Imm32(0), reg.high);
+    as_rsb(reg.low, reg.low, Imm8(0), SetCC);
+    as_rsc(reg.high, reg.high, Imm8(0));
 }
 
 void
 MacroAssembler::negateDouble(FloatRegister reg)
 {
     ma_vneg(reg, reg);
 }
 
@@ -626,23 +666,23 @@ MacroAssembler::lshift64(Imm32 imm, Regi
 
 void
 MacroAssembler::lshift64(Register unmaskedShift, Register64 dest)
 {
     // dest.high = dest.high << shift | dest.low << shift - 32 | dest.low >> 32 - shift
     // Note: one of the two dest.low shift will always yield zero due to negative shift.
 
     ScratchRegisterScope shift(*this);
-    ma_and(Imm32(0x3f), unmaskedShift, shift);
+    as_and(shift, unmaskedShift, Imm8(0x3f));
     as_mov(dest.high, lsl(dest.high, shift));
-    ma_sub(shift, Imm32(32), shift);
+    as_sub(shift, shift, Imm8(32));
     as_orr(dest.high, dest.high, lsl(dest.low, shift));
     ma_neg(shift, shift);
     as_orr(dest.high, dest.high, lsr(dest.low, shift));
-    ma_and(Imm32(0x3f), unmaskedShift, shift);
+    as_and(shift, unmaskedShift, Imm8(0x3f));
     as_mov(dest.low, lsl(dest.low, shift));
 }
 
 void
 MacroAssembler::lshift32(Register src, Register dest)
 {
     ma_lsl(src, dest, dest);
 }
@@ -705,29 +745,29 @@ MacroAssembler::rshift64Arithmetic(Regis
     Label proceed;
 
     // dest.low = dest.low >>> shift | dest.high <<< 32 - shift
     // if (shift - 32 >= 0)
     //   dest.low |= dest.high >>> shift - 32
     // Note: Negative shifts yield a zero as result, except for the signed
     //       right shift. Therefore we need to test for it and only do it if
     //       it isn't negative.
+    ScratchRegisterScope shift(*this);
 
-    ScratchRegisterScope shift(*this);
-    ma_and(Imm32(0x3f), unmaskedShift, shift);
+    as_and(shift, unmaskedShift, Imm8(0x3f));
     as_mov(dest.low, lsr(dest.low, shift));
-    ma_rsb(shift, Imm32(32), shift);
+    as_rsb(shift, shift, Imm8(32));
     as_orr(dest.low, dest.low, lsl(dest.high, shift));
     ma_neg(shift, shift, SetCC);
     ma_b(&proceed, Signed);
 
     as_orr(dest.low, dest.low, asr(dest.high, shift));
 
     bind(&proceed);
-    ma_and(Imm32(0x3f), unmaskedShift, shift);
+    as_and(shift, unmaskedShift, Imm8(0x3f));
     as_mov(dest.high, asr(dest.high, shift));
 }
 
 void
 MacroAssembler::rshift32Arithmetic(Register src, Register dest)
 {
     ma_asr(src, dest, dest);
 }
@@ -759,23 +799,23 @@ MacroAssembler::rshift64(Imm32 imm, Regi
 
 void
 MacroAssembler::rshift64(Register unmaskedShift, Register64 dest)
 {
     // dest.low = dest.low >> shift | dest.high >> shift - 32 | dest.high << 32 - shift
     // Note: one of the two dest.high shifts will always yield zero due to negative shift.
 
     ScratchRegisterScope shift(*this);
-    ma_and(Imm32(0x3f), unmaskedShift, shift);
+    as_and(shift, unmaskedShift, Imm8(0x3f));
     as_mov(dest.low, lsr(dest.low, shift));
-    ma_sub(shift, Imm32(32), shift);
+    as_sub(shift, shift, Imm8(32));
     as_orr(dest.low, dest.low, lsr(dest.high, shift));
     ma_neg(shift, shift);
     as_orr(dest.low, dest.low, lsl(dest.high, shift));
-    ma_and(Imm32(0x3f), unmaskedShift, shift);
+    as_and(shift, unmaskedShift, Imm8(0x3f));
     as_mov(dest.high, lsr(dest.high, shift));
 }
 
 // ===============================================================
 // Rotate functions
 void
 MacroAssembler::rotateLeft(Imm32 count, Register input, Register dest)
 {
@@ -783,17 +823,18 @@ MacroAssembler::rotateLeft(Imm32 count, 
         ma_rol(count, input, dest);
     else
         ma_mov(input, dest);
 }
 
 void
 MacroAssembler::rotateLeft(Register count, Register input, Register dest)
 {
-    ma_rol(count, input, dest);
+    ScratchRegisterScope scratch(*this);
+    ma_rol(count, input, dest, scratch);
 }
 
 void
 MacroAssembler::rotateLeft64(Imm32 count, Register64 input, Register64 dest, Register temp)
 {
     MOZ_ASSERT(temp == InvalidReg);
     MOZ_ASSERT(input.low != dest.high && input.high != dest.low);
 
@@ -828,45 +869,44 @@ MacroAssembler::rotateLeft64(Register sh
     MOZ_ASSERT(temp != src.low && temp != src.high);
     MOZ_ASSERT(shift != src.low && shift != src.high);
     MOZ_ASSERT(temp != InvalidReg);
 
     ScratchRegisterScope shift_value(*this);
     Label high, done;
 
     ma_mov(src.high, temp);
-    ma_and(Imm32(0x3f), shift, shift_value);
-
-    ma_cmp(shift_value, Imm32(32));
+    as_and(shift_value, shift, Imm8(0x3f));
+    as_cmp(shift_value, Imm8(32));
     ma_b(&high, GreaterThanOrEqual);
 
     // high = high << shift | low >> 32 - shift
     // low = low << shift | high >> 32 - shift
     as_mov(dest.high, lsl(src.high, shift_value));
-    ma_rsb(Imm32(32), shift_value);
+    as_rsb(shift_value, shift_value, Imm8(32));
     as_orr(dest.high, dest.high, lsr(src.low, shift_value));
 
-    ma_rsb(Imm32(32), shift_value);
+    as_rsb(shift_value, shift_value, Imm8(32));
     as_mov(dest.low, lsl(src.low, shift_value));
-    ma_rsb(Imm32(32), shift_value);
+    as_rsb(shift_value, shift_value, Imm8(32));
     as_orr(dest.low, dest.low, lsr(temp, shift_value));
 
     ma_b(&done);
 
     // A 32 - 64 shift is a 0 - 32 shift in the other direction.
     bind(&high);
-    ma_rsb(Imm32(64), shift_value);
+    as_rsb(shift_value, shift_value, Imm8(64));
 
     as_mov(dest.high, lsr(src.high, shift_value));
-    ma_rsb(Imm32(32), shift_value);
+    as_rsb(shift_value, shift_value, Imm8(32));
     as_orr(dest.high, dest.high, lsl(src.low, shift_value));
 
-    ma_rsb(Imm32(32), shift_value);
+    as_rsb(shift_value, shift_value, Imm8(32));
     as_mov(dest.low, lsr(src.low, shift_value));
-    ma_rsb(Imm32(32), shift_value);
+    as_rsb(shift_value, shift_value, Imm8(32));
     as_orr(dest.low, dest.low, lsl(temp, shift_value));
 
     bind(&done);
 }
 
 void
 MacroAssembler::rotateRight(Imm32 count, Register input, Register dest)
 {
@@ -919,45 +959,44 @@ MacroAssembler::rotateRight64(Register s
     MOZ_ASSERT(temp != src.low && temp != src.high);
     MOZ_ASSERT(shift != src.low && shift != src.high);
     MOZ_ASSERT(temp != InvalidReg);
 
     ScratchRegisterScope shift_value(*this);
     Label high, done;
 
     ma_mov(src.high, temp);
-    ma_and(Imm32(0x3f), shift, shift_value);
-
-    ma_cmp(shift_value, Imm32(32));
+    as_and(shift_value, shift, Imm8(0x3f));
+    as_cmp(shift_value, Imm8(32));
     ma_b(&high, GreaterThanOrEqual);
 
     // high = high >> shift | low << 32 - shift
     // low = low >> shift | high << 32 - shift
     as_mov(dest.high, lsr(src.high, shift_value));
-    ma_rsb(Imm32(32), shift_value);
+    as_rsb(shift_value, shift_value, Imm8(32));
     as_orr(dest.high, dest.high, lsl(src.low, shift_value));
 
-    ma_rsb(Imm32(32), shift_value);
+    as_rsb(shift_value, shift_value, Imm8(32));
     as_mov(dest.low, lsr(src.low, shift_value));
-    ma_rsb(Imm32(32), shift_value);
+    as_rsb(shift_value, shift_value, Imm8(32));
     as_orr(dest.low, dest.low, lsl(temp, shift_value));
 
     ma_b(&done);
 
     // A 32 - 64 shift is a 0 - 32 shift in the other direction.
     bind(&high);
-    ma_rsb(Imm32(64), shift_value);
+    as_rsb(shift_value, shift_value, Imm8(64));
 
     as_mov(dest.high, lsl(src.high, shift_value));
-    ma_rsb(Imm32(32), shift_value);
+    as_rsb(shift_value, shift_value, Imm8(32));
     as_orr(dest.high, dest.high, lsr(src.low, shift_value));
 
-    ma_rsb(Imm32(32), shift_value);
+    as_rsb(shift_value, shift_value, Imm8(32));
     as_mov(dest.low, lsl(src.low, shift_value));
-    ma_rsb(Imm32(32), shift_value);
+    as_rsb(shift_value, shift_value, Imm8(32));
     as_orr(dest.low, dest.low, lsr(temp, shift_value));
 
     bind(&done);
 }
 
 // ===============================================================
 // Bit counting functions
 
@@ -966,63 +1005,67 @@ MacroAssembler::clz32(Register src, Regi
 {
     ma_clz(src, dest);
 }
 
 void
 MacroAssembler::clz64(Register64 src, Register dest)
 {
     ScratchRegisterScope scratch(*this);
+
     ma_clz(src.high, scratch);
-    ma_cmp(scratch, Imm32(32));
+    as_cmp(scratch, Imm8(32));
     ma_mov(scratch, dest, LeaveCC, NotEqual);
     ma_clz(src.low, dest, Equal);
-    ma_add(Imm32(32), dest, LeaveCC, Equal);
+    as_add(dest, dest, Imm8(32), LeaveCC, Equal);
 }
 
 void
 MacroAssembler::ctz32(Register src, Register dest, bool knownNotZero)
 {
-    ma_ctz(src, dest);
+    ScratchRegisterScope scratch(*this);
+    ma_ctz(src, dest, scratch);
 }
 
 void
 MacroAssembler::ctz64(Register64 src, Register dest)
 {
     Label done, high;
-
-    ma_cmp(src.low, Imm32(0));
+    as_cmp(src.low, Imm8(0));
     ma_b(&high, Equal);
 
     ctz32(src.low, dest, /* knownNotZero = */ true);
     ma_b(&done);
 
     bind(&high);
     ctz32(src.high, dest, /* knownNotZero = */ false);
-    ma_add(Imm32(32), dest);
+    as_add(dest, dest, Imm8(32));
 
     bind(&done);
 }
 
 void
 MacroAssembler::popcnt32(Register input,  Register output, Register tmp)
 {
     // Equivalent to GCC output of mozilla::CountPopulation32()
 
+    ScratchRegisterScope scratch(*this);
+
     if (input != output)
         ma_mov(input, output);
     as_mov(tmp, asr(output, 1));
-    ma_and(Imm32(0x55555555), tmp);
+    ma_and(Imm32(0x55555555), tmp, scratch);
     ma_sub(output, tmp, output);
     as_mov(tmp, asr(output, 2));
-    ma_and(Imm32(0x33333333), output);
-    ma_and(Imm32(0x33333333), tmp);
+    ma_mov(Imm32(0x33333333), scratch);
+    ma_and(scratch, output);
+    ma_and(scratch, tmp);
     ma_add(output, tmp, output);
     as_add(output, output, lsr(output, 4));
-    ma_and(Imm32(0xF0F0F0F), output);
+    ma_and(Imm32(0xF0F0F0F), output, scratch);
     as_add(output, output, lsl(output, 8));
     as_add(output, output, lsl(output, 16));
     as_mov(output, asr(output, 24));
 }
 
 void
 MacroAssembler::popcnt64(Register64 src, Register64 dest, Register tmp)
 {
@@ -1053,70 +1096,103 @@ MacroAssembler::branch32(Condition cond,
     ma_cmp(lhs, rhs);
     ma_b(label, cond);
 }
 
 template <class L>
 void
 MacroAssembler::branch32(Condition cond, Register lhs, Imm32 rhs, L label)
 {
-    ma_cmp(lhs, rhs);
+    ScratchRegisterScope scratch(*this);
+
+    ma_cmp(lhs, rhs, scratch);
     ma_b(label, cond);
 }
 
 void
 MacroAssembler::branch32(Condition cond, const Address& lhs, Register rhs, Label* label)
 {
     ScratchRegisterScope scratch(*this);
-    load32(lhs, scratch);
-    branch32(cond, scratch, rhs, label);
+    SecondScratchRegisterScope scratch2(*this);
+
+    ma_ldr(lhs, scratch, scratch2);
+    ma_cmp(scratch, rhs);
+    ma_b(label, cond);
 }
 
 void
 MacroAssembler::branch32(Condition cond, const Address& lhs, Imm32 rhs, Label* label)
 {
-    // branch32 will use ScratchRegister.
-    AutoRegisterScope scratch(*this, secondScratchReg_);
-    load32(lhs, scratch);
-    branch32(cond, scratch, rhs, label);
+    ScratchRegisterScope scratch(*this);
+    SecondScratchRegisterScope scratch2(*this);
+
+    ma_ldr(lhs, scratch, scratch2);
+    ma_cmp(scratch, rhs, scratch2);
+    ma_b(label, cond);
 }
 
 void
 MacroAssembler::branch32(Condition cond, const AbsoluteAddress& lhs, Register rhs, Label* label)
 {
-    AutoRegisterScope scratch2(*this, secondScratchReg_);
-    loadPtr(lhs, scratch2); // ma_cmp will use the scratch register.
-    ma_cmp(scratch2, rhs);
+    ScratchRegisterScope scratch(*this);
+
+    // Load into scratch.
+    movePtr(ImmWord(uintptr_t(lhs.addr)), scratch);
+    ma_ldr(DTRAddr(scratch, DtrOffImm(0)), scratch);
+
+    ma_cmp(scratch, rhs);
     ma_b(label, cond);
 }
 
 void
 MacroAssembler::branch32(Condition cond, const AbsoluteAddress& lhs, Imm32 rhs, Label* label)
 {
-    AutoRegisterScope scratch2(*this, secondScratchReg_);
-    loadPtr(lhs, scratch2); // ma_cmp will use the scratch register.
-    ma_cmp(scratch2, rhs);
+    ScratchRegisterScope scratch(*this);
+    SecondScratchRegisterScope scratch2(*this);
+
+    // Load into scratch.
+    movePtr(ImmWord(uintptr_t(lhs.addr)), scratch);
+    ma_ldr(DTRAddr(scratch, DtrOffImm(0)), scratch);
+
+    ma_cmp(scratch, rhs, scratch2);
     ma_b(label, cond);
 }
 
 void
 MacroAssembler::branch32(Condition cond, const BaseIndex& lhs, Imm32 rhs, Label* label)
 {
-    // branch32 will use ScratchRegister.
-    AutoRegisterScope scratch2(*this, secondScratchReg_);
-    load32(lhs, scratch2);
+    SecondScratchRegisterScope scratch2(*this);
+    {
+        ScratchRegisterScope scratch(*this);
+
+        Register base = lhs.base;
+        uint32_t scale = Imm32::ShiftOf(lhs.scale).value;
+
+        // Load lhs into scratch2.
+        if (lhs.offset != 0) {
+            ma_add(base, Imm32(lhs.offset), scratch, scratch2);
+            ma_ldr(DTRAddr(scratch, DtrRegImmShift(lhs.index, LSL, scale)), scratch2);
+        } else {
+            ma_ldr(DTRAddr(base, DtrRegImmShift(lhs.index, LSL, scale)), scratch2);
+        }
+    }
     branch32(cond, scratch2, rhs, label);
 }
 
 void
 MacroAssembler::branch32(Condition cond, wasm::SymbolicAddress lhs, Imm32 rhs, Label* label)
 {
     ScratchRegisterScope scratch(*this);
-    loadPtr(lhs, scratch);
-    branch32(cond, scratch, rhs, label);
+    SecondScratchRegisterScope scratch2(*this);
+
+    movePtr(lhs, scratch);
+    ma_ldr(DTRAddr(scratch, DtrOffImm(0)), scratch);
+
+    ma_cmp(scratch, rhs, scratch2);
+    ma_b(label, cond);
 }
 
 void
 MacroAssembler::branch64(Condition cond, const Address& lhs, Imm64 val, Label* label)
 {
     MOZ_ASSERT(cond == Assembler::NotEqual,
                "other condition codes not supported");
 
@@ -1292,68 +1368,91 @@ void
 MacroAssembler::branchPtr(Condition cond, const Address& lhs, ImmPtr rhs, Label* label)
 {
     branchPtr(cond, lhs, ImmWord(uintptr_t(rhs.value)), label);
 }
 
 void
 MacroAssembler::branchPtr(Condition cond, const Address& lhs, ImmGCPtr rhs, Label* label)
 {
-    AutoRegisterScope scratch2(*this, secondScratchReg_);
+    SecondScratchRegisterScope scratch2(*this);
     loadPtr(lhs, scratch2);
     branchPtr(cond, scratch2, rhs, label);
 }
 
 void
 MacroAssembler::branchPtr(Condition cond, const Address& lhs, ImmWord rhs, Label* label)
 {
-    AutoRegisterScope scratch2(*this, secondScratchReg_);
+    SecondScratchRegisterScope scratch2(*this);
     loadPtr(lhs, scratch2);
     branchPtr(cond, scratch2, rhs, label);
 }
 
 void
 MacroAssembler::branchPtr(Condition cond, const AbsoluteAddress& lhs, Register rhs, Label* label)
 {
-    ScratchRegisterScope scratch(*this);
-    loadPtr(lhs, scratch);
-    branchPtr(cond, scratch, rhs, label);
+    SecondScratchRegisterScope scratch2(*this);
+    loadPtr(lhs, scratch2);
+    branchPtr(cond, scratch2, rhs, label);
 }
 
 void
 MacroAssembler::branchPtr(Condition cond, const AbsoluteAddress& lhs, ImmWord rhs, Label* label)
 {
-    ScratchRegisterScope scratch(*this);
-    loadPtr(lhs, scratch);
-    branchPtr(cond, scratch, rhs, label);
+    SecondScratchRegisterScope scratch2(*this);
+    loadPtr(lhs, scratch2);
+    branchPtr(cond, scratch2, rhs, label);
 }
 
 void
 MacroAssembler::branchPtr(Condition cond, wasm::SymbolicAddress lhs, Register rhs, Label* label)
 {
-    ScratchRegisterScope scratch(*this);
-    loadPtr(lhs, scratch);
-    branchPtr(cond, scratch, rhs, label);
+    SecondScratchRegisterScope scratch2(*this);
+    loadPtr(lhs, scratch2);
+    branchPtr(cond, scratch2, rhs, label);
 }
 
 template <typename T>
-CodeOffsetJump
+inline CodeOffsetJump
 MacroAssembler::branchPtrWithPatch(Condition cond, Register lhs, T rhs, RepatchLabel* label)
 {
     ma_cmp(lhs, rhs);
     return jumpWithPatch(label, cond);
 }
 
+template <>
+inline CodeOffsetJump
+MacroAssembler::branchPtrWithPatch(Condition cond, Register lhs, ImmGCPtr rhs, RepatchLabel* label)
+{
+    ScratchRegisterScope scratch(*this);
+    ma_cmp(lhs, rhs, scratch);
+    return jumpWithPatch(label, cond);
+}
+
 template <typename T>
-CodeOffsetJump
+inline CodeOffsetJump
 MacroAssembler::branchPtrWithPatch(Condition cond, Address lhs, T rhs, RepatchLabel* label)
 {
-    AutoRegisterScope scratch2(*this, secondScratchReg_);
-    ma_ldr(lhs, scratch2);
-    ma_cmp(scratch2, rhs);
+    ScratchRegisterScope scratch(*this);
+    SecondScratchRegisterScope scratch2(*this);
+
+    ma_ldr(lhs, scratch, scratch2);
+    ma_cmp(scratch, rhs);
+    return jumpWithPatch(label, cond);
+}
+
+template <>
+inline CodeOffsetJump
+MacroAssembler::branchPtrWithPatch(Condition cond, Address lhs, ImmGCPtr rhs, RepatchLabel* label)
+{
+    ScratchRegisterScope scratch(*this);
+    SecondScratchRegisterScope scratch2(*this);
+
+    ma_ldr(lhs, scratch, scratch2);
+    ma_cmp(scratch, rhs, scratch2);
     return jumpWithPatch(label, cond);
 }
 
 void
 MacroAssembler::branchPrivatePtr(Condition cond, const Address& lhs, Register rhs, Label* label)
 {
     branchPtr(cond, lhs, rhs, label);
 }
@@ -1386,21 +1485,23 @@ void
 MacroAssembler::branchTruncateFloat32MaybeModUint32(FloatRegister src, Register dest, Label* fail)
 {
     branchTruncateFloat32ToInt32(src, dest, fail);
 }
 
 void
 MacroAssembler::branchTruncateFloat32ToInt32(FloatRegister src, Register dest, Label* fail)
 {
-    ScratchFloat32Scope scratch(*this);
-    ma_vcvt_F32_I32(src, scratch.sintOverlay());
-    ma_vxfer(scratch, dest);
-    ma_cmp(dest, Imm32(0x7fffffff));
-    ma_cmp(dest, Imm32(0x80000000), Assembler::NotEqual);
+    ScratchFloat32Scope scratchFloat32(*this);
+    ScratchRegisterScope scratch(*this);
+
+    ma_vcvt_F32_I32(src, scratchFloat32.sintOverlay());
+    ma_vxfer(scratchFloat32, dest);
+    ma_cmp(dest, Imm32(0x7fffffff), scratch);
+    ma_cmp(dest, Imm32(0x80000000), scratch, Assembler::NotEqual);
     ma_b(fail, Assembler::Equal);
 }
 
 void
 MacroAssembler::branchDouble(DoubleCondition cond, FloatRegister lhs, FloatRegister rhs,
                              Label* label)
 {
     compareDouble(lhs, rhs);
@@ -1435,86 +1536,87 @@ MacroAssembler::branchTruncateDoubleMayb
 // was clamped to INT_MIN/INT_MAX, and we can test it. NOTE: if the value
 // really was supposed to be INT_MAX / INT_MIN then it will be wrong.
 //
 // 2. Convert the floating point value to an integer, if it did not fit, then it
 // set one or two bits in the fpcsr. Check those.
 void
 MacroAssembler::branchTruncateDoubleToInt32(FloatRegister src, Register dest, Label* fail)
 {
-    ScratchDoubleScope scratch(*this);
-    FloatRegister scratchSIntReg = scratch.sintOverlay();
+    ScratchDoubleScope scratchDouble(*this);
+    FloatRegister scratchSIntReg = scratchDouble.sintOverlay();
+    ScratchRegisterScope scratch(*this);
 
     ma_vcvt_F64_I32(src, scratchSIntReg);
     ma_vxfer(scratchSIntReg, dest);
-    ma_cmp(dest, Imm32(0x7fffffff));
-    ma_cmp(dest, Imm32(0x80000000), Assembler::NotEqual);
+    ma_cmp(dest, Imm32(0x7fffffff), scratch);
+    ma_cmp(dest, Imm32(0x80000000), scratch, Assembler::NotEqual);
     ma_b(fail, Assembler::Equal);
 }
 
 template <typename T, typename L>
 void
 MacroAssembler::branchAdd32(Condition cond, T src, Register dest, L label)
 {
     add32(src, dest);
     as_b(label, cond);
 }
 
 template <typename T>
 void
 MacroAssembler::branchSub32(Condition cond, T src, Register dest, Label* label)
 {
-    ma_sub(src, dest, SetCC);
+    sub32(src, dest);
     j(cond, label);
 }
 
 void
 MacroAssembler::decBranchPtr(Condition cond, Register lhs, Imm32 rhs, Label* label)
 {
-    ma_sub(rhs, lhs, SetCC);
+    ScratchRegisterScope scratch(*this);
+    ma_sub(rhs, lhs, scratch, SetCC);
     as_b(label, cond);
 }
 
 template <class L>
 void
 MacroAssembler::branchTest32(Condition cond, Register lhs, Register rhs, L label)
 {
     MOZ_ASSERT(cond == Zero || cond == NonZero || cond == Signed || cond == NotSigned);
     // x86 likes test foo, foo rather than cmp foo, #0.
     // Convert the former into the latter.
     if (lhs == rhs && (cond == Zero || cond == NonZero))
-        ma_cmp(lhs, Imm32(0));
+        as_cmp(lhs, Imm8(0));
     else
         ma_tst(lhs, rhs);
     ma_b(label, cond);
 }
 
 template <class L>
 void
 MacroAssembler::branchTest32(Condition cond, Register lhs, Imm32 rhs, L label)
 {
     MOZ_ASSERT(cond == Zero || cond == NonZero || cond == Signed || cond == NotSigned);
-    ma_tst(lhs, rhs);
+    ScratchRegisterScope scratch(*this);
+    ma_tst(lhs, rhs, scratch);
     ma_b(label, cond);
 }
 
 void
 MacroAssembler::branchTest32(Condition cond, const Address& lhs, Imm32 rhs, Label* label)
 {
-    // branchTest32 will use ScratchRegister.
-    AutoRegisterScope scratch2(*this, secondScratchReg_);
+    SecondScratchRegisterScope scratch2(*this);
     load32(lhs, scratch2);
     branchTest32(cond, scratch2, rhs, label);
 }
 
 void
 MacroAssembler::branchTest32(Condition cond, const AbsoluteAddress& lhs, Imm32 rhs, Label* label)
 {
-    // branchTest32 will use ScratchRegister.
-    AutoRegisterScope scratch2(*this, secondScratchReg_);
+    SecondScratchRegisterScope scratch2(*this);
     load32(lhs, scratch2);
     branchTest32(cond, scratch2, rhs, label);
 }
 
 template <class L>
 void
 MacroAssembler::branchTestPtr(Condition cond, Register lhs, Register rhs, L label)
 {
@@ -1533,21 +1635,23 @@ MacroAssembler::branchTestPtr(Condition 
     branchTest32(cond, lhs, rhs, label);
 }
 
 template <class L>
 void
 MacroAssembler::branchTest64(Condition cond, Register64 lhs, Register64 rhs, Register temp,
                              L label)
 {
+    ScratchRegisterScope scratch(*this);
+
     if (cond == Assembler::Zero) {
         MOZ_ASSERT(lhs.low == rhs.low);
         MOZ_ASSERT(lhs.high == rhs.high);
-        ma_orr(lhs.low, lhs.high, ScratchRegister);
-        branchTestPtr(cond, ScratchRegister, ScratchRegister, label);
+        ma_orr(lhs.low, lhs.high, scratch);
+        branchTestPtr(cond, scratch, scratch, label);
     } else {
         MOZ_CRASH("Unsupported condition");
     }
 }
 
 void
 MacroAssembler::branchTestUndefined(Condition cond, Register tag, Label* label)
 {
@@ -1920,35 +2024,41 @@ MacroAssembler::branchTestMagic(Conditio
     branch32(cond, ToPayload(valaddr), Imm32(why), label);
 }
 
 // ========================================================================
 // Memory access primitives.
 void
 MacroAssembler::storeUncanonicalizedDouble(FloatRegister src, const Address& addr)
 {
-    ma_vstr(src, addr);
+    ScratchRegisterScope scratch(*this);
+    ma_vstr(src, addr, scratch);
 }
 void
 MacroAssembler::storeUncanonicalizedDouble(FloatRegister src, const BaseIndex& addr)
 {
+    ScratchRegisterScope scratch(*this);
+    SecondScratchRegisterScope scratch2(*this);
     uint32_t scale = Imm32::ShiftOf(addr.scale).value;
-    ma_vstr(src, addr.base, addr.index, scale, addr.offset);
+    ma_vstr(src, addr.base, addr.index, scratch, scratch2, scale, addr.offset);
 }
 
 void
 MacroAssembler::storeUncanonicalizedFloat32(FloatRegister src, const Address& addr)
 {
-    ma_vstr(src.asSingle(), addr);
+    ScratchRegisterScope scratch(*this);
+    ma_vstr(src.asSingle(), addr, scratch);
 }
 void
 MacroAssembler::storeUncanonicalizedFloat32(FloatRegister src, const BaseIndex& addr)
 {
+    ScratchRegisterScope scratch(*this);
+    SecondScratchRegisterScope scratch2(*this);
     uint32_t scale = Imm32::ShiftOf(addr.scale).value;
-    ma_vstr(src.asSingle(), addr.base, addr.index, scale, addr.offset);
+    ma_vstr(src.asSingle(), addr.base, addr.index, scratch, scratch2, scale, addr.offset);
 }
 
 void
 MacroAssembler::storeFloat32x3(FloatRegister src, const Address& dest)
 {
     MOZ_CRASH("NYI");
 }
 void
--- a/js/src/jit/arm/MacroAssembler-arm.cpp
+++ b/js/src/jit/arm/MacroAssembler-arm.cpp
@@ -40,48 +40,49 @@ isValueDTRDCandidate(ValueOperand& val)
     return true;
 }
 
 void
 MacroAssemblerARM::convertBoolToInt32(Register source, Register dest)
 {
     // Note that C++ bool is only 1 byte, so zero extend it to clear the
     // higher-order bits.
-    ma_and(Imm32(0xff), source, dest);
+    as_and(dest, source, Imm8(0xff));
 }
 
 void
 MacroAssemblerARM::convertInt32ToDouble(Register src, FloatRegister dest_)
 {
     // Direct conversions aren't possible.
     VFPRegister dest = VFPRegister(dest_);
     as_vxfer(src, InvalidReg, dest.sintOverlay(), CoreToFloat);
     as_vcvt(dest, dest.sintOverlay());
 }
 
 void
 MacroAssemblerARM::convertInt32ToDouble(const Address& src, FloatRegister dest)
 {
     ScratchDoubleScope scratch(asMasm());
-    ma_vldr(src, scratch);
+    SecondScratchRegisterScope scratch2(asMasm());
+    ma_vldr(src, scratch, scratch2);
     as_vcvt(dest, VFPRegister(scratch).sintOverlay());
 }
 
 void
 MacroAssemblerARM::convertInt32ToDouble(const BaseIndex& src, FloatRegister dest)
 {
     Register base = src.base;
     uint32_t scale = Imm32::ShiftOf(src.scale).value;
 
     ScratchRegisterScope scratch(asMasm());
+    SecondScratchRegisterScope scratch2(asMasm());
 
     if (src.offset != 0) {
-        ma_mov(base, scratch);
+        ma_add(base, Imm32(src.offset), scratch, scratch2);
         base = scratch;
-        ma_add(Imm32(src.offset), base);
     }
     ma_ldr(DTRAddr(base, DtrRegImmShift(src.index, LSL, scale)), scratch);
     convertInt32ToDouble(scratch, dest);
 }
 
 void
 MacroAssemblerARM::convertUInt32ToDouble(Register src, FloatRegister dest_)
 {
@@ -98,22 +99,27 @@ MacroAssemblerARMCompat::convertUInt64To
 {
     return false;
 }
 
 void
 MacroAssemblerARMCompat::convertUInt64ToDouble(Register64 src, FloatRegister dest, Register temp)
 {
     MOZ_ASSERT(temp == Register::Invalid());
+    ScratchDoubleScope scratchDouble(asMasm());
+
     convertUInt32ToDouble(src.high, dest);
-    movePtr(ImmPtr(&TO_DOUBLE_HIGH_SCALE), ScratchRegister);
-    loadDouble(Address(ScratchRegister, 0), ScratchDoubleReg);
-    asMasm().mulDouble(ScratchDoubleReg, dest);
-    convertUInt32ToDouble(src.low, ScratchDoubleReg);
-    asMasm().addDouble(ScratchDoubleReg, dest);
+    {
+        ScratchRegisterScope scratch(asMasm());
+        movePtr(ImmPtr(&TO_DOUBLE_HIGH_SCALE), scratch);
+        ma_vldr(Operand(Address(scratch, 0)).toVFPAddr(), scratchDouble);
+    }
+    asMasm().mulDouble(scratchDouble, dest);
+    convertUInt32ToDouble(src.low, scratchDouble);
+    asMasm().addDouble(scratchDouble, dest);
 }
 
 void
 MacroAssemblerARM::convertUInt32ToFloat32(Register src, FloatRegister dest_)
 {
     // Direct conversions aren't possible.
     VFPRegister dest = VFPRegister(dest_);
     as_vxfer(src, InvalidReg, dest.uintOverlay(), CoreToFloat);
@@ -132,33 +138,35 @@ void MacroAssemblerARM::convertDoubleToF
 void
 MacroAssemblerARM::convertDoubleToInt32(FloatRegister src, Register dest,
                                         Label* fail, bool negativeZeroCheck)
 {
     // Convert the floating point value to an integer, if it did not fit, then
     // when we convert it *back* to a float, it will have a different value,
     // which we can test.
     ScratchDoubleScope scratchDouble(asMasm());
+    ScratchRegisterScope scratch(asMasm());
+
     FloatRegister scratchSIntReg = scratchDouble.sintOverlay();
 
     ma_vcvt_F64_I32(src, scratchSIntReg);
     // Move the value into the dest register.
     ma_vxfer(scratchSIntReg, dest);
     ma_vcvt_I32_F64(scratchSIntReg, scratchDouble);
     ma_vcmp(src, scratchDouble);
     as_vmrs(pc);
     ma_b(fail, Assembler::VFP_NotEqualOrUnordered);
 
     if (negativeZeroCheck) {
-        ma_cmp(dest, Imm32(0));
+        as_cmp(dest, Imm8(0));
         // Test and bail for -0.0, when integer result is 0. Move the top word
         // of the double into the output reg, if it is non-zero, then the
         // original value was -0.0.
         as_vxfer(dest, InvalidReg, src, FloatToCore, Assembler::Equal, 1);
-        ma_cmp(dest, Imm32(0x80000000), Assembler::Equal);
+        ma_cmp(dest, Imm32(0x80000000), scratch, Assembler::Equal);
         ma_b(fail, Assembler::Equal);
     }
 }
 
 // Checks whether a float32 is representable as a 32-bit integer. If so, the
 // integer is written to the output register. Otherwise, a bailout is taken to
 // the given snapshot. This function overwrites the scratch float register.
 void
@@ -166,40 +174,41 @@ MacroAssemblerARM::convertFloat32ToInt32
                                          Label* fail, bool negativeZeroCheck)
 {
     // Converting the floating point value to an integer and then converting it
     // back to a float32 would not work, as float to int32 conversions are
     // clamping (e.g. float(INT32_MAX + 1) would get converted into INT32_MAX
     // and then back to float(INT32_MAX + 1)).  If this ever happens, we just
     // bail out.
     ScratchFloat32Scope scratchFloat(asMasm());
+    ScratchRegisterScope scratch(asMasm());
 
     FloatRegister ScratchSIntReg = scratchFloat.sintOverlay();
     ma_vcvt_F32_I32(src, ScratchSIntReg);
 
     // Store the result
     ma_vxfer(ScratchSIntReg, dest);
 
     ma_vcvt_I32_F32(ScratchSIntReg, scratchFloat);
     ma_vcmp(src, scratchFloat);
     as_vmrs(pc);
     ma_b(fail, Assembler::VFP_NotEqualOrUnordered);
 
     // Bail out in the clamped cases.
-    ma_cmp(dest, Imm32(0x7fffffff));
-    ma_cmp(dest, Imm32(0x80000000), Assembler::NotEqual);
+    ma_cmp(dest, Imm32(0x7fffffff), scratch);
+    ma_cmp(dest, Imm32(0x80000000), scratch, Assembler::NotEqual);
     ma_b(fail, Assembler::Equal);
 
     if (negativeZeroCheck) {
-        ma_cmp(dest, Imm32(0));
+        as_cmp(dest, Imm8(0));
         // Test and bail for -0.0, when integer result is 0. Move the float into
         // the output reg, and if it is non-zero then the original value was
         // -0.0
         as_vxfer(dest, InvalidReg, VFPRegister(src).singleOverlay(), FloatToCore, Assembler::Equal, 0);
-        ma_cmp(dest, Imm32(0x80000000), Assembler::Equal);
+        ma_cmp(dest, Imm32(0x80000000), scratch, Assembler::Equal);
         ma_b(fail, Assembler::Equal);
     }
 }
 
 void
 MacroAssemblerARM::convertFloat32ToDouble(FloatRegister src, FloatRegister dest)
 {
     MOZ_ASSERT(dest.isDouble());
@@ -214,17 +223,18 @@ MacroAssemblerARM::convertInt32ToFloat32
     as_vxfer(src, InvalidReg, dest.sintOverlay(), CoreToFloat);
     as_vcvt(dest.singleOverlay(), dest.sintOverlay());
 }
 
 void
 MacroAssemblerARM::convertInt32ToFloat32(const Address& src, FloatRegister dest)
 {
     ScratchFloat32Scope scratch(asMasm());
-    ma_vldr(src, scratch);
+    SecondScratchRegisterScope scratch2(asMasm());
+    ma_vldr(src, scratch, scratch2);
     as_vcvt(dest, VFPRegister(scratch).sintOverlay());
 }
 
 bool
 MacroAssemblerARM::alu_dbl(Register src1, Imm32 imm, Register dest, ALUOp op,
                            SBit s, Condition c)
 {
     if ((s == SetCC && ! condsAreSafe(op)) || !can_dbl(op))
@@ -241,22 +251,23 @@ MacroAssemblerARM::alu_dbl(Register src1
     // don't do second operation if first operation overflowed. This preserves
     // the overflow condition code. Unfortunately, it is horribly brittle.
     as_alu(dest, src1, Operand2(both.fst), interop, LeaveCC, c);
     as_alu(dest, dest, Operand2(both.snd), op, s, c);
     return true;
 }
 
 void
-MacroAssemblerARM::ma_alu(Register src1, Imm32 imm, Register dest,
+MacroAssemblerARM::ma_alu(Register src1, Imm32 imm, Register dest, AutoRegisterScope& scratch,
                           ALUOp op, SBit s, Condition c)
 {
     // ma_mov should be used for moves.
     MOZ_ASSERT(op != OpMov);
     MOZ_ASSERT(op != OpMvn);
+    MOZ_ASSERT(src1 != scratch);
 
     // As it turns out, if you ask for a compare-like instruction you *probably*
     // want it to set condition codes.
     if (dest == InvalidReg)
         MOZ_ASSERT(s == SetCC);
 
     // The operator gives us the ability to determine how this can be used.
     Imm8 imm8 = Imm8(imm.value);
@@ -264,24 +275,22 @@ MacroAssemblerARM::ma_alu(Register src1,
     if (!imm8.invalid) {
         as_alu(dest, src1, imm8, op, s, c);
         return;
     }
 
     // One instruction, negated:
     Imm32 negImm = imm;
     Register negDest;
-    ALUOp negOp = ALUNeg(op, dest, &negImm, &negDest);
+    ALUOp negOp = ALUNeg(op, dest, scratch, &negImm, &negDest);
     Imm8 negImm8 = Imm8(negImm.value);
-    // 'add r1, r2, -15' can be replaced with 'sub r1, r2, 15'. For bonus
-    // points, dest can be replaced (nearly always invalid => ScratchRegister)
+    // 'add r1, r2, -15' can be replaced with 'sub r1, r2, 15'.
+    // The dest can be replaced (InvalidReg => scratch).
     // This is useful if we wish to negate tst. tst has an invalid (aka not
-    // used) dest, but its negation is bic *requires* a dest. We can accomodate,
-    // but it will need to clobber *something*, and the scratch register isn't
-    // being used, so...
+    // used) dest, but its negation bic requires a dest.
     if (negOp != OpInvalid && !negImm8.invalid) {
         as_alu(negDest, src1, negImm8, negOp, s, c);
         return;
     }
 
     // Start by attempting to generate a two instruction form. Some things
     // cannot be made into two-inst forms correctly. Namely, adds dest, src,
     // 0xffff. Since we want the condition codes (and don't know which ones
@@ -291,29 +300,16 @@ MacroAssemblerARM::ma_alu(Register src1,
     // instruction variant.
     if (alu_dbl(src1, imm, dest, op, s, c))
         return;
 
     // And try with its negative.
     if (negOp != OpInvalid && alu_dbl(src1, negImm, negDest, negOp, s, c))
         return;
 
-    // Often this code is called with dest as the ScratchRegister.  The register
-    // is logically owned by the caller after this call.
-    const Register& scratch = ScratchRegister;
-    MOZ_ASSERT(src1 != scratch);
-#ifdef DEBUG
-    if (dest != scratch) {
-        // If the destination register is not the scratch register, double check
-        // that the current function does not erase the content of the scratch
-        // register.
-        ScratchRegisterScope assertScratch(asMasm());
-    }
-#endif
-
     ma_mov(imm, scratch, c);
     as_alu(dest, src1, O2Reg(scratch), op, s, c);
 }
 
 void
 MacroAssemblerARM::ma_alu(Register src1, Operand op2, Register dest, ALUOp op,
             SBit s, Assembler::Condition c)
 {
@@ -394,22 +390,19 @@ MacroAssemblerARM::ma_mov(Imm32 imm, Reg
     // Try mov with Imm8 operand.
     Imm8 imm8 = Imm8(imm.value);
     if (!imm8.invalid) {
         as_alu(dest, InvalidReg, imm8, OpMov, LeaveCC, c);
         return;
     }
 
     // Try mvn with Imm8 operand.
-    Imm32 negImm = imm;
-    Register negDest;
-    ALUOp negOp = ALUNeg(OpMov, dest, &negImm, &negDest);
-    Imm8 negImm8 = Imm8(negImm.value);
-    if (negOp != OpInvalid && !negImm8.invalid) {
-        as_alu(negDest, InvalidReg, negImm8, negOp, LeaveCC, c);
+    Imm8 negImm8 = Imm8(~imm.value);
+    if (!negImm8.invalid) {
+        as_alu(dest, InvalidReg, negImm8, OpMvn, LeaveCC, c);
         return;
     }
 
     // Try movw/movt.
     if (HasMOVWT()) {
         // ARMv7 supports movw/movt. movw zero-extends its 16 bit argument,
         // so we can set the register this way. movt leaves the bottom 16
         // bits in tact, so we always need a movw.
@@ -490,20 +483,19 @@ MacroAssemblerARM::ma_asr(Register shift
 
 void
 MacroAssemblerARM::ma_ror(Register shift, Register src, Register dst)
 {
     as_mov(dst, ror(src, shift));
 }
 
 void
-MacroAssemblerARM::ma_rol(Register shift, Register src, Register dst)
-{
-    ScratchRegisterScope scratch(asMasm());
-    ma_rsb(shift, Imm32(32), scratch);
+MacroAssemblerARM::ma_rol(Register shift, Register src, Register dst, AutoRegisterScope& scratch)
+{
+    as_rsb(scratch, shift, Imm8(32));
     as_mov(dst, ror(src, scratch));
 }
 
 // Move not (dest <- ~src)
 void
 MacroAssemblerARM::ma_mvn(Register src1, Register dest, SBit s, Assembler::Condition c)
 {
     as_alu(dest, InvalidReg, O2Reg(src1), OpMvn, s, c);
@@ -526,33 +518,34 @@ MacroAssemblerARM::ma_and(Register src, 
 void
 MacroAssemblerARM::ma_and(Register src1, Register src2, Register dest,
                           SBit s, Assembler::Condition c)
 {
     as_and(dest, src1, O2Reg(src2), s, c);
 }
 
 void
-MacroAssemblerARM::ma_and(Imm32 imm, Register dest, SBit s, Assembler::Condition c)
-{
-    ma_alu(dest, imm, dest, OpAnd, s, c);
-}
-
-void
-MacroAssemblerARM::ma_and(Imm32 imm, Register src1, Register dest,
+MacroAssemblerARM::ma_and(Imm32 imm, Register dest, AutoRegisterScope& scratch,
                           SBit s, Assembler::Condition c)
 {
-    ma_alu(src1, imm, dest, OpAnd, s, c);
+    ma_alu(dest, imm, dest, scratch, OpAnd, s, c);
+}
+
+void
+MacroAssemblerARM::ma_and(Imm32 imm, Register src1, Register dest, AutoRegisterScope& scratch,
+                          SBit s, Assembler::Condition c)
+{
+    ma_alu(src1, imm, dest, scratch, OpAnd, s, c);
 }
 
 // Bit clear (dest <- dest & ~imm) or (dest <- src1 & ~src2).
 void
-MacroAssemblerARM::ma_bic(Imm32 imm, Register dest, SBit s, Assembler::Condition c)
-{
-    ma_alu(dest, imm, dest, OpBic, s, c);
+MacroAssemblerARM::ma_bic(Imm32 imm, Register dest, AutoRegisterScope& scratch, SBit s, Assembler::Condition c)
+{
+    ma_alu(dest, imm, dest, scratch, OpBic, s, c);
 }
 
 // Exclusive or.
 void
 MacroAssemblerARM::ma_eor(Register src, Register dest, SBit s, Assembler::Condition c)
 {
     ma_eor(dest, src, dest, s, c);
 }
@@ -560,26 +553,26 @@ MacroAssemblerARM::ma_eor(Register src, 
 void
 MacroAssemblerARM::ma_eor(Register src1, Register src2, Register dest,
                           SBit s, Assembler::Condition c)
 {
     as_eor(dest, src1, O2Reg(src2), s, c);
 }
 
 void
-MacroAssemblerARM::ma_eor(Imm32 imm, Register dest, SBit s, Assembler::Condition c)
-{
-    ma_alu(dest, imm, dest, OpEor, s, c);
-}
-
-void
-MacroAssemblerARM::ma_eor(Imm32 imm, Register src1, Register dest,
+MacroAssemblerARM::ma_eor(Imm32 imm, Register dest, AutoRegisterScope& scratch, SBit s, Assembler::Condition c)
+{
+    ma_alu(dest, imm, dest, scratch, OpEor, s, c);
+}
+
+void
+MacroAssemblerARM::ma_eor(Imm32 imm, Register src1, Register dest, AutoRegisterScope& scratch,
        SBit s, Assembler::Condition c)
 {
-    ma_alu(src1, imm, dest, OpEor, s, c);
+    ma_alu(src1, imm, dest, scratch, OpEor, s, c);
 }
 
 // Or.
 void
 MacroAssemblerARM::ma_orr(Register src, Register dest, SBit s, Assembler::Condition c)
 {
     ma_orr(dest, src, dest, s, c);
 }
@@ -587,53 +580,53 @@ MacroAssemblerARM::ma_orr(Register src, 
 void
 MacroAssemblerARM::ma_orr(Register src1, Register src2, Register dest,
                           SBit s, Assembler::Condition c)
 {
     as_orr(dest, src1, O2Reg(src2), s, c);
 }
 
 void
-MacroAssemblerARM::ma_orr(Imm32 imm, Register dest, SBit s, Assembler::Condition c)
-{
-    ma_alu(dest, imm, dest, OpOrr, s, c);
-}
-
-void
-MacroAssemblerARM::ma_orr(Imm32 imm, Register src1, Register dest,
+MacroAssemblerARM::ma_orr(Imm32 imm, Register dest, AutoRegisterScope& scratch, SBit s, Assembler::Condition c)
+{
+    ma_alu(dest, imm, dest, scratch, OpOrr, s, c);
+}
+
+void
+MacroAssemblerARM::ma_orr(Imm32 imm, Register src1, Register dest, AutoRegisterScope& scratch,
                           SBit s, Assembler::Condition c)
 {
-    ma_alu(src1, imm, dest, OpOrr, s, c);
+    ma_alu(src1, imm, dest, scratch, OpOrr, s, c);
 }
 
 // Arithmetic-based ops.
 // Add with carry.
 void
-MacroAssemblerARM::ma_adc(Imm32 imm, Register dest, SBit s, Condition c)
-{
-    ma_alu(dest, imm, dest, OpAdc, s, c);
+MacroAssemblerARM::ma_adc(Imm32 imm, Register dest, AutoRegisterScope& scratch, SBit s, Condition c)
+{
+    ma_alu(dest, imm, dest, scratch, OpAdc, s, c);
 }
 
 void
 MacroAssemblerARM::ma_adc(Register src, Register dest, SBit s, Condition c)
 {
     as_alu(dest, dest, O2Reg(src), OpAdc, s, c);
 }
 
 void
 MacroAssemblerARM::ma_adc(Register src1, Register src2, Register dest, SBit s, Condition c)
 {
     as_alu(dest, src1, O2Reg(src2), OpAdc, s, c);
 }
 
 // Add.
 void
-MacroAssemblerARM::ma_add(Imm32 imm, Register dest, SBit s, Condition c)
-{
-    ma_alu(dest, imm, dest, OpAdd, s, c);
+MacroAssemblerARM::ma_add(Imm32 imm, Register dest, AutoRegisterScope& scratch, SBit s, Condition c)
+{
+    ma_alu(dest, imm, dest, scratch, OpAdd, s, c);
 }
 
 void
 MacroAssemblerARM::ma_add(Register src1, Register dest, SBit s, Condition c)
 {
     ma_alu(dest, O2Reg(src1), dest, OpAdd, s, c);
 }
 
@@ -645,45 +638,45 @@ MacroAssemblerARM::ma_add(Register src1,
 
 void
 MacroAssemblerARM::ma_add(Register src1, Operand op, Register dest, SBit s, Condition c)
 {
     ma_alu(src1, op, dest, OpAdd, s, c);
 }
 
 void
-MacroAssemblerARM::ma_add(Register src1, Imm32 op, Register dest, SBit s, Condition c)
-{
-    ma_alu(src1, op, dest, OpAdd, s, c);
+MacroAssemblerARM::ma_add(Register src1, Imm32 op, Register dest, AutoRegisterScope& scratch, SBit s, Condition c)
+{
+    ma_alu(src1, op, dest, scratch, OpAdd, s, c);
 }
 
 // Subtract with carry.
 void
-MacroAssemblerARM::ma_sbc(Imm32 imm, Register dest, SBit s, Condition c)
-{
-    ma_alu(dest, imm, dest, OpSbc, s, c);
+MacroAssemblerARM::ma_sbc(Imm32 imm, Register dest, AutoRegisterScope& scratch, SBit s, Condition c)
+{
+    ma_alu(dest, imm, dest, scratch, OpSbc, s, c);
 }
 
 void
 MacroAssemblerARM::ma_sbc(Register src1, Register dest, SBit s, Condition c)
 {
     as_alu(dest, dest, O2Reg(src1), OpSbc, s, c);
 }
 
 void
 MacroAssemblerARM::ma_sbc(Register src1, Register src2, Register dest, SBit s, Condition c)
 {
     as_alu(dest, src1, O2Reg(src2), OpSbc, s, c);
 }
 
 // Subtract.
 void
-MacroAssemblerARM::ma_sub(Imm32 imm, Register dest, SBit s, Condition c)
-{
-    ma_alu(dest, imm, dest, OpSub, s, c);
+MacroAssemblerARM::ma_sub(Imm32 imm, Register dest, AutoRegisterScope& scratch, SBit s, Condition c)
+{
+    ma_alu(dest, imm, dest, scratch, OpSub, s, c);
 }
 
 void
 MacroAssemblerARM::ma_sub(Register src1, Register dest, SBit s, Condition c)
 {
     ma_alu(dest, Operand(src1), dest, OpSub, s, c);
 }
 
@@ -695,51 +688,51 @@ MacroAssemblerARM::ma_sub(Register src1,
 
 void
 MacroAssemblerARM::ma_sub(Register src1, Operand op, Register dest, SBit s, Condition c)
 {
     ma_alu(src1, op, dest, OpSub, s, c);
 }
 
 void
-MacroAssemblerARM::ma_sub(Register src1, Imm32 op, Register dest, SBit s, Condition c)
-{
-    ma_alu(src1, op, dest, OpSub, s, c);
+MacroAssemblerARM::ma_sub(Register src1, Imm32 op, Register dest, AutoRegisterScope& scratch, SBit s, Condition c)
+{
+    ma_alu(src1, op, dest, scratch, OpSub, s, c);
 }
 
 // Reverse subtract.
 void
-MacroAssemblerARM::ma_rsb(Imm32 imm, Register dest, SBit s, Condition c)
-{
-    ma_alu(dest, imm, dest, OpRsb, s, c);
+MacroAssemblerARM::ma_rsb(Imm32 imm, Register dest, AutoRegisterScope& scratch, SBit s, Condition c)
+{
+    ma_alu(dest, imm, dest, scratch, OpRsb, s, c);
 }
 
 void
 MacroAssemblerARM::ma_rsb(Register src1, Register dest, SBit s, Condition c)
 {
     as_alu(dest, src1, O2Reg(dest), OpRsb, s, c);
 }
 
 void
 MacroAssemblerARM::ma_rsb(Register src1, Register src2, Register dest, SBit s, Condition c)
 {
     as_alu(dest, src1, O2Reg(src2), OpRsb, s, c);
 }
 
 void
-MacroAssemblerARM::ma_rsb(Register src1, Imm32 op2, Register dest, SBit s, Condition c)
-{
-    ma_alu(src1, op2, dest, OpRsb, s, c);
+MacroAssemblerARM::ma_rsb(Register src1, Imm32 op2, Register dest, AutoRegisterScope& scratch, SBit s, Condition c)
+{
+    ma_alu(src1, op2, dest, scratch, OpRsb, s, c);
 }
 
 // Reverse subtract with carry.
 void
-MacroAssemblerARM::ma_rsc(Imm32 imm, Register dest, SBit s, Condition c)
-{
-    ma_alu(dest, imm, dest, OpRsc, s, c);
+MacroAssemblerARM::ma_rsc(Imm32 imm, Register dest, AutoRegisterScope& scratch, SBit s, Condition c)
+{
+    ma_alu(dest, imm, dest, scratch, OpRsc, s, c);
 }
 
 void
 MacroAssemblerARM::ma_rsc(Register src1, Register dest, SBit s, Condition c)
 {
     as_alu(dest, dest, O2Reg(src1), OpRsc, s, c);
 }
 
@@ -747,102 +740,109 @@ void
 MacroAssemblerARM::ma_rsc(Register src1, Register src2, Register dest, SBit s, Condition c)
 {
     as_alu(dest, src1, O2Reg(src2), OpRsc, s, c);
 }
 
 // Compares/tests.
 // Compare negative (sets condition codes as src1 + src2 would).
 void
-MacroAssemblerARM::ma_cmn(Register src1, Imm32 imm, Condition c)
-{
-    ma_alu(src1, imm, InvalidReg, OpCmn, SetCC, c);
+MacroAssemblerARM::ma_cmn(Register src1, Imm32 imm, AutoRegisterScope& scratch, Condition c)
+{
+    ma_alu(src1, imm, InvalidReg, scratch, OpCmn, SetCC, c);
 }
 
 void
 MacroAssemblerARM::ma_cmn(Register src1, Register src2, Condition c)
 {
     as_alu(InvalidReg, src2, O2Reg(src1), OpCmn, SetCC, c);
 }
 
 void
 MacroAssemblerARM::ma_cmn(Register src1, Operand op, Condition c)
 {
     MOZ_CRASH("Feature NYI");
 }
 
 // Compare (src - src2).
 void
-MacroAssemblerARM::ma_cmp(Register src1, Imm32 imm, Condition c)
-{
-    ma_alu(src1, imm, InvalidReg, OpCmp, SetCC, c);
-}
-
-void
-MacroAssemblerARM::ma_cmp(Register src1, ImmWord ptr, Condition c)
-{
-    ma_cmp(src1, Imm32(ptr.value), c);
-}
-
-void
-MacroAssemblerARM::ma_cmp(Register src1, ImmGCPtr ptr, Condition c)
-{
-    ScratchRegisterScope scratch(asMasm());
+MacroAssemblerARM::ma_cmp(Register src1, Imm32 imm, AutoRegisterScope& scratch, Condition c)
+{
+    ma_alu(src1, imm, InvalidReg, scratch, OpCmp, SetCC, c);
+}
+
+void
+MacroAssemblerARM::ma_cmp(Register src1, ImmTag tag, Condition c)
+{
+    // ImmTag comparisons can always be done without use of a scratch register.
+    Imm8 negtag = Imm8(-tag.value);
+    MOZ_ASSERT(!negtag.invalid);
+    as_cmn(src1, negtag, c);
+}
+
+void
+MacroAssemblerARM::ma_cmp(Register src1, ImmWord ptr, AutoRegisterScope& scratch, Condition c)
+{
+    ma_cmp(src1, Imm32(ptr.value), scratch, c);
+}
+
+void
+MacroAssemblerARM::ma_cmp(Register src1, ImmGCPtr ptr, AutoRegisterScope& scratch, Condition c)
+{
     ma_mov(ptr, scratch);
     ma_cmp(src1, scratch, c);
 }
 
 void
-MacroAssemblerARM::ma_cmp(Register src1, Operand op, Condition c)
+MacroAssemblerARM::ma_cmp(Register src1, Operand op, AutoRegisterScope& scratch,
+                          AutoRegisterScope& scratch2, Condition c)
 {
     switch (op.getTag()) {
       case Operand::OP2:
         as_cmp(src1, op.toOp2(), c);
         break;
-      case Operand::MEM: {
-        ScratchRegisterScope scratch(asMasm());
-        ma_ldr(op.toAddress(), scratch);
+      case Operand::MEM:
+        ma_ldr(op.toAddress(), scratch, scratch2);
         as_cmp(src1, O2Reg(scratch), c);
         break;
-      }
       default:
         MOZ_CRASH("trying to compare FP and integer registers");
     }
 }
 
 void
 MacroAssemblerARM::ma_cmp(Register src1, Register src2, Condition c)
 {
     as_cmp(src1, O2Reg(src2), c);
 }
 
 // Test for equality, (src1 ^ src2).
 void
-MacroAssemblerARM::ma_teq(Register src1, Imm32 imm, Condition c)
-{
-    ma_alu(src1, imm, InvalidReg, OpTeq, SetCC, c);
+MacroAssemblerARM::ma_teq(Register src1, Imm32 imm, AutoRegisterScope& scratch, Condition c)
+{
+    ma_alu(src1, imm, InvalidReg, scratch, OpTeq, SetCC, c);
 }
 
 void
 MacroAssemblerARM::ma_teq(Register src1, Register src2, Condition c)
 {
     as_tst(src1, O2Reg(src2), c);
 }
 
 void
 MacroAssemblerARM::ma_teq(Register src1, Operand op, Condition c)
 {
     as_teq(src1, op.toOp2(), c);
 }
 
 // Test (src1 & src2).
 void
-MacroAssemblerARM::ma_tst(Register src1, Imm32 imm, Condition c)
-{
-    ma_alu(src1, imm, InvalidReg, OpTst, SetCC, c);
+MacroAssemblerARM::ma_tst(Register src1, Imm32 imm, AutoRegisterScope& scratch, Condition c)
+{
+    ma_alu(src1, imm, InvalidReg, scratch, OpTst, SetCC, c);
 }
 
 void
 MacroAssemblerARM::ma_tst(Register src1, Register src2, Condition c)
 {
     as_tst(src1, O2Reg(src2), c);
 }
 
@@ -854,81 +854,79 @@ MacroAssemblerARM::ma_tst(Register src1,
 
 void
 MacroAssemblerARM::ma_mul(Register src1, Register src2, Register dest)
 {
     as_mul(dest, src1, src2);
 }
 
 void
-MacroAssemblerARM::ma_mul(Register src1, Imm32 imm, Register dest)
-{
-    ScratchRegisterScope scratch(asMasm());
+MacroAssemblerARM::ma_mul(Register src1, Imm32 imm, Register dest, AutoRegisterScope& scratch)
+{
     ma_mov(imm, scratch);
     as_mul(dest, src1, scratch);
 }
 
 Assembler::Condition
-MacroAssemblerARM::ma_check_mul(Register src1, Register src2, Register dest, Condition cond)
-{
-    ScratchRegisterScope scratch(asMasm());
-
-    // TODO: this operation is illegal on armv6 and earlier if src2 ==
-    // ScratchRegister or src2 == dest.
+MacroAssemblerARM::ma_check_mul(Register src1, Register src2, Register dest,
+                                AutoRegisterScope& scratch, Condition cond)
+{
+    // TODO: this operation is illegal on armv6 and earlier
+    // if src2 == scratch or src2 == dest.
     if (cond == Equal || cond == NotEqual) {
         as_smull(scratch, dest, src1, src2, SetCC);
         return cond;
     }
 
     if (cond == Overflow) {
         as_smull(scratch, dest, src1, src2);
         as_cmp(scratch, asr(dest, 31));
         return NotEqual;
     }
 
     MOZ_CRASH("Condition NYI");
 }
 
 Assembler::Condition
-MacroAssemblerARM::ma_check_mul(Register src1, Imm32 imm, Register dest, Condition cond)
-{
-    ScratchRegisterScope scratch(asMasm());
-
+MacroAssemblerARM::ma_check_mul(Register src1, Imm32 imm, Register dest,
+                                AutoRegisterScope& scratch, Condition cond)
+{
     ma_mov(imm, scratch);
+
     if (cond == Equal || cond == NotEqual) {
         as_smull(scratch, dest, scratch, src1, SetCC);
         return cond;
     }
 
     if (cond == Overflow) {
         as_smull(scratch, dest, scratch, src1);
         as_cmp(scratch, asr(dest, 31));
         return NotEqual;
     }
 
     MOZ_CRASH("Condition NYI");
 }
 
 void
-MacroAssemblerARM::ma_umull(Register src1, Imm32 imm, Register destHigh, Register destLow)
-{
-    ScratchRegisterScope scratch(asMasm());
+MacroAssemblerARM::ma_umull(Register src1, Imm32 imm, Register destHigh, Register destLow,
+                            AutoRegisterScope& scratch)
+{
     ma_mov(imm, scratch);
     as_umull(destHigh, destLow, src1, scratch);
 }
 
 void
 MacroAssemblerARM::ma_umull(Register src1, Register src2, Register destHigh, Register destLow)
 {
     as_umull(destHigh, destLow, src1, src2);
 }
 
 void
 MacroAssemblerARM::ma_mod_mask(Register src, Register dest, Register hold, Register tmp,
-                               int32_t shift)
+                               AutoRegisterScope& scratch, AutoRegisterScope& scratch2, int32_t shift)
 {
     // We wish to compute x % (1<<y) - 1 for a known constant, y.
     //
     // 1. Let b = (1<<y) and C = (1<<y)-1, then think of the 32 bit dividend as
     // a number in base b, namely c_0*1 + c_1*b + c_2*b^2 ... c_n*b^n
     //
     // 2. Since both addition and multiplication commute with modulus:
     //   x % C == (c_0 + c_1*b + ... + c_n*b^n) % C ==
@@ -939,78 +937,70 @@ MacroAssemblerARM::ma_mod_mask(Register 
     //
     // Each c_n can easily be computed by a shift/bitextract, and the modulus
     // can be maintained by simply subtracting by C whenever the number gets
     // over C.
     int32_t mask = (1 << shift) - 1;
     Label head;
 
     // Register 'hold' holds -1 if the value was negative, 1 otherwise. The
-    // ScratchRegister holds the remaining bits that have not been processed lr
+    // scratch reg holds the remaining bits that have not been processed lr
     // serves as a temporary location to store extracted bits into as well as
     // holding the trial subtraction as a temp value dest is the accumulator
     // (and holds the final result)
     //
     // Move the whole value into tmp, setting the codition codes so we can muck
     // with them later.
-    //
-    // Note that we cannot use ScratchRegister in place of tmp here, as ma_and
-    // below on certain architectures move the mask into ScratchRegister before
-    // performing the bitwise and.
     as_mov(tmp, O2Reg(src), SetCC);
     // Zero out the dest.
     ma_mov(Imm32(0), dest);
     // Set the hold appropriately.
     ma_mov(Imm32(1), hold);
     ma_mov(Imm32(-1), hold, Signed);
-    ma_rsb(Imm32(0), tmp, SetCC, Signed);
+    as_rsb(tmp, tmp, Imm8(0), SetCC, Signed);
 
     // Begin the main loop.
     bind(&head);
     {
-        AutoRegisterScope scratch2(asMasm(), secondScratchReg_);
-
-        // Extract the bottom bits into lr.
-        ma_and(Imm32(mask), tmp, scratch2);
+        // Extract the bottom bits.
+        ma_and(Imm32(mask), tmp, scratch, scratch2);
         // Add those bits to the accumulator.
-        ma_add(scratch2, dest, dest);
+        ma_add(scratch, dest, dest);
         // Do a trial subtraction, this is the same operation as cmp, but we store
         // the dest.
-        ma_sub(dest, Imm32(mask), scratch2, SetCC);
+        ma_sub(dest, Imm32(mask), scratch, scratch2, SetCC);
         // If (sum - C) > 0, store sum - C back into sum, thus performing a modulus.
-        ma_mov(scratch2, dest, LeaveCC, NotSigned);
+        ma_mov(scratch, dest, LeaveCC, NotSigned);
         // Get rid of the bits that we extracted before, and set the condition codes.
         as_mov(tmp, lsr(tmp, shift), SetCC);
         // If the shift produced zero, finish, otherwise, continue in the loop.
         ma_b(&head, NonZero);
     }
 
     // Check the hold to see if we need to negate the result. Hold can only be
     // 1 or -1, so this will never set the 0 flag.
-    ma_cmp(hold, Imm32(0));
+    as_cmp(hold, Imm8(0));
     // If the hold was non-zero, negate the result to be in line with what JS
     // wants this will set the condition codes if we try to negate.
-    ma_rsb(Imm32(0), dest, SetCC, Signed);
+    as_rsb(dest, dest, Imm8(0), SetCC, Signed);
     // Since the Zero flag is not set by the compare, we can *only* set the Zero
     // flag in the rsb, so Zero is set iff we negated zero (e.g. the result of
     // the computation was -0.0).
 }
 
 void
-MacroAssemblerARM::ma_smod(Register num, Register div, Register dest)
-{
-    ScratchRegisterScope scratch(asMasm());
+MacroAssemblerARM::ma_smod(Register num, Register div, Register dest, AutoRegisterScope& scratch)
+{
     as_sdiv(scratch, num, div);
     as_mls(dest, num, scratch, div);
 }
 
 void
-MacroAssemblerARM::ma_umod(Register num, Register div, Register dest)
-{
-    ScratchRegisterScope scratch(asMasm());
+MacroAssemblerARM::ma_umod(Register num, Register div, Register dest, AutoRegisterScope& scratch)
+{
     as_udiv(scratch, num, div);
     as_mls(dest, num, scratch, div);
 }
 
 // Division
 void
 MacroAssemblerARM::ma_sdiv(Register num, Register div, Register dest, Condition cond)
 {
@@ -1026,60 +1016,52 @@ MacroAssemblerARM::ma_udiv(Register num,
 // Miscellaneous instructions.
 void
 MacroAssemblerARM::ma_clz(Register src, Register dest, Condition cond)
 {
     as_clz(dest, src, cond);
 }
 
 void
-MacroAssemblerARM::ma_ctz(Register src, Register dest)
+MacroAssemblerARM::ma_ctz(Register src, Register dest, AutoRegisterScope& scratch)
 {
     // int c = __clz(a & -a);
     // return a ? 31 - c : c;
-
-    ScratchRegisterScope scratch(asMasm());
     as_rsb(scratch, src, Imm8(0), SetCC);
     as_and(dest, src, O2Reg(scratch), LeaveCC);
     as_clz(dest, dest);
     as_rsb(dest, dest, Imm8(0x1F), LeaveCC, Assembler::NotEqual);
 }
 
 // Memory.
 // Shortcut for when we know we're transferring 32 bits of data.
 void
 MacroAssemblerARM::ma_dtr(LoadStore ls, Register rn, Imm32 offset, Register rt,
-                          Index mode, Assembler::Condition cc)
-{
-    ma_dataTransferN(ls, 32, true, rn, offset, rt, mode, cc);
-}
-
-void
-MacroAssemblerARM::ma_dtr(LoadStore ls, Register rn, Register rm, Register rt,
-                          Index mode, Assembler::Condition cc)
-{
-    MOZ_CRASH("Feature NYI");
+                          AutoRegisterScope& scratch, Index mode, Assembler::Condition cc)
+{
+    ma_dataTransferN(ls, 32, true, rn, offset, rt, scratch, mode, cc);
+}
+
+void
+MacroAssemblerARM::ma_dtr(LoadStore ls, Register rt, const Address& addr,
+                          AutoRegisterScope& scratch, Index mode, Condition cc)
+{
+    ma_dataTransferN(ls, 32, true, addr.base, Imm32(addr.offset), rt, scratch, mode, cc);
 }
 
 void
 MacroAssemblerARM::ma_str(Register rt, DTRAddr addr, Index mode, Condition cc)
 {
     as_dtr(IsStore, 32, mode, rt, addr, cc);
 }
 
 void
-MacroAssemblerARM::ma_dtr(LoadStore ls, Register rt, const Address& addr, Index mode, Condition cc)
-{
-    ma_dataTransferN(ls, 32, true, addr.base, Imm32(addr.offset), rt, mode, cc);
-}
-
-void
-MacroAssemblerARM::ma_str(Register rt, const Address& addr, Index mode, Condition cc)
-{
-    ma_dtr(IsStore, rt, addr, mode, cc);
+MacroAssemblerARM::ma_str(Register rt, const Address& addr, AutoRegisterScope& scratch, Index mode, Condition cc)
+{
+    ma_dtr(IsStore, rt, addr, scratch, mode, cc);
 }
 
 void
 MacroAssemblerARM::ma_strd(Register rt, DebugOnly<Register> rt2, EDtrAddr addr, Index mode, Condition cc)
 {
     MOZ_ASSERT((rt.code() & 1) == 0);
     MOZ_ASSERT(rt2.value.code() == rt.code() + 1);
     as_extdtr(IsStore, 64, true, mode, rt, addr, cc);
@@ -1087,19 +1069,19 @@ MacroAssemblerARM::ma_strd(Register rt, 
 
 void
 MacroAssemblerARM::ma_ldr(DTRAddr addr, Register rt, Index mode, Condition cc)
 {
     as_dtr(IsLoad, 32, mode, rt, addr, cc);
 }
 
 void
-MacroAssemblerARM::ma_ldr(const Address& addr, Register rt, Index mode, Condition cc)
-{
-    ma_dtr(IsLoad, rt, addr, mode, cc);
+MacroAssemblerARM::ma_ldr(const Address& addr, Register rt, AutoRegisterScope& scratch, Index mode, Condition cc)
+{
+    ma_dtr(IsLoad, rt, addr, scratch, mode, cc);
 }
 
 void
 MacroAssemblerARM::ma_ldrb(DTRAddr addr, Register rt, Index mode, Condition cc)
 {
     as_dtr(IsLoad, 8, mode, rt, addr, cc);
 }
 
@@ -1142,50 +1124,64 @@ void
 MacroAssemblerARM::ma_strb(Register rt, DTRAddr addr, Index mode, Condition cc)
 {
     as_dtr(IsStore, 8, mode, rt, addr, cc);
 }
 
 // Specialty for moving N bits of data, where n == 8,16,32,64.
 BufferOffset
 MacroAssemblerARM::ma_dataTransferN(LoadStore ls, int size, bool IsSigned,
-                                    Register rn, Register rm, Register rt,
-                                    Index mode, Assembler::Condition cc, unsigned shiftAmount)
-{
+                                    Register rn, Register rm, Register rt, AutoRegisterScope& scratch,
+                                    Index mode, Assembler::Condition cc, Scale scale)
+{
+    MOZ_ASSERT(size == 8 || size == 16 || size == 32 || size == 64);
+
     if (size == 32 || (size == 8 && !IsSigned))
-        return as_dtr(ls, size, mode, rt, DTRAddr(rn, DtrRegImmShift(rm, LSL, shiftAmount)), cc);
-
-    ScratchRegisterScope scratch(asMasm());
-
-    if (shiftAmount != 0) {
-        MOZ_ASSERT(rn != scratch);
-        MOZ_ASSERT(rt != scratch);
-        ma_lsl(Imm32(shiftAmount), rm, scratch);
+        return as_dtr(ls, size, mode, rt, DTRAddr(rn, DtrRegImmShift(rm, LSL, scale)), cc);
+
+    if (scale != TimesOne) {
+        ma_lsl(Imm32(scale), rm, scratch);
         rm = scratch;
     }
 
     return as_extdtr(ls, size, IsSigned, mode, rt, EDtrAddr(rn, EDtrOffReg(rm)), cc);
 }
 
+// No scratch register is required if scale is TimesOne.
 BufferOffset
 MacroAssemblerARM::ma_dataTransferN(LoadStore ls, int size, bool IsSigned,
-                                    Register rn, Imm32 offset, Register rt,
+                                    Register rn, Register rm, Register rt,
                                     Index mode, Assembler::Condition cc)
 {
+    MOZ_ASSERT(size == 8 || size == 16 || size == 32 || size == 64);
+    if (size == 32 || (size == 8 && !IsSigned))
+        return as_dtr(ls, size, mode, rt, DTRAddr(rn, DtrRegImmShift(rm, LSL, TimesOne)), cc);
+    return as_extdtr(ls, size, IsSigned, mode, rt, EDtrAddr(rn, EDtrOffReg(rm)), cc);
+}
+
+
+BufferOffset
+MacroAssemblerARM::ma_dataTransferN(LoadStore ls, int size, bool IsSigned,
+                                    Register rn, Imm32 offset, Register rt, AutoRegisterScope& scratch,
+                                    Index mode, Assembler::Condition cc)
+{
+    MOZ_ASSERT(!(ls == IsLoad && mode == PostIndex && rt == pc),
+               "Large-offset PostIndex loading into PC requires special logic: see ma_popn_pc().");
+
     int off = offset.value;
 
     // We can encode this as a standard ldr.
     if (size == 32 || (size == 8 && !IsSigned) ) {
         if (off < 4096 && off > -4096) {
             // This encodes as a single instruction, Emulating mode's behavior
             // in a multi-instruction sequence is not necessary.
             return as_dtr(ls, size, mode, rt, DTRAddr(rn, DtrOffImm(off)), cc);
         }
 
-        // We cannot encode this offset in a a single ldr. For mode == index,
+        // We cannot encode this offset in a single ldr. For mode == index,
         // try to encode it as |add scratch, base, imm; ldr dest, [scratch, +offset]|.
         // This does not wark for mode == PreIndex or mode == PostIndex.
         // PreIndex is simple, just do the add into the base register first,
         // then do a PreIndex'ed load. PostIndexed loads can be tricky.
         // Normally, doing the load with an index of 0, then doing an add would
         // work, but if the destination is the PC, you don't get to execute the
         // instruction after the branch, which will lead to the base register
         // not being updated correctly. Explicitly handle this case, without
@@ -1193,56 +1189,23 @@ MacroAssemblerARM::ma_dataTransferN(Load
 
         // mode == Offset
         //  add   scratch, base, offset_hi
         //  ldr   dest, [scratch, +offset_lo]
         //
         // mode == PreIndex
         //  add   base, base, offset_hi
         //  ldr   dest, [base, +offset_lo]!
-        //
-        // mode == PostIndex, dest == pc
-        //  ldr   scratch, [base]
-        //  add   base, base, offset_hi
-        //  add   base, base, offset_lo
-        //  mov   dest, scratch
-        // PostIndex with the pc as the destination needs to be handled
-        // specially, since in the code below, the write into 'dest' is going to
-        // alter the control flow, so the following instruction would never get
-        // emitted.
-        //
-        // mode == PostIndex, dest != pc
-        //  ldr   dest, [base], offset_lo
-        //  add   base, base, offset_hi
-
-        if (rt == pc && mode == PostIndex && ls == IsLoad) {
-            ScratchRegisterScope scratch(asMasm());
-            ma_mov(rn, scratch);
-            ma_alu(rn, offset, rn, OpAdd);
-            return as_dtr(IsLoad, size, Offset, pc, DTRAddr(scratch, DtrOffImm(0)), cc);
-        }
-
-        // Often this code is called with rt as the ScratchRegister.
-        // The register is logically owned by the caller, so we cannot ask
-        // for exclusive ownership here. If full checking is desired,
-        // this function should take an explicit scratch register argument.
-        const Register& scratch = ScratchRegister;
-        MOZ_ASSERT(rn != scratch);
 
         int bottom = off & 0xfff;
         int neg_bottom = 0x1000 - bottom;
-        // For a regular offset, base == ScratchRegister does what we want.
-        // Modify the scratch register, leaving the actual base unscathed.
-        Register base = scratch;
-        // For the preindex case, we want to just re-use rn as the base
-        // register, so when the base register is updated *before* the load, rn
-        // is updated.
-        if (mode == PreIndex)
-            base = rn;
+
+        MOZ_ASSERT(rn != scratch);
         MOZ_ASSERT(mode != PostIndex);
+
         // At this point, both off - bottom and off + neg_bottom will be
         // reasonable-ish quantities.
         //
         // Note a neg_bottom of 0x1000 can not be encoded as an immediate
         // negative offset in the instruction and this occurs when bottom is
         // zero, so this case is guarded against below.
         if (off < 0) {
             Operand2 sub_off = Imm8(-(off - bottom)); // sub_off = bottom - off
@@ -1279,18 +1242,16 @@ MacroAssemblerARM::ma_dataTransferN(Load
                 as_add(scratch, rn, sub_off, LeaveCC,  cc);
                 return as_dtr(ls, size, Offset, rt, DTRAddr(scratch, DtrOffImm(-neg_bottom)), cc);
             }
         }
 
         ma_mov(offset, scratch);
         return as_dtr(ls, size, mode, rt, DTRAddr(rn, DtrRegImmShift(scratch, LSL, 0)));
     } else {
-        ScratchRegisterScope scratch(asMasm());
-
         // Should attempt to use the extended load/store instructions.
         if (off < 256 && off > -256)
             return as_extdtr(ls, size, IsSigned, mode, rt, EDtrAddr(rn, EDtrOffImm(off)), cc);
 
         // We cannot encode this offset in a single extldr. Try to encode it as
         // an add scratch, base, imm; extldr dest, [scratch, +offset].
         int bottom = off & 0xff;
         int neg_bottom = 0x100 - bottom;
@@ -1346,31 +1307,48 @@ MacroAssemblerARM::ma_dataTransferN(Load
         ma_mov(offset, scratch);
         return as_extdtr(ls, size, IsSigned, mode, rt, EDtrAddr(rn, EDtrOffReg(scratch)), cc);
     }
 }
 
 void
 MacroAssemblerARM::ma_pop(Register r)
 {
-    ma_dtr(IsLoad, sp, Imm32(4), r, PostIndex);
+    as_dtr(IsLoad, 32, PostIndex, r, DTRAddr(sp, DtrOffImm(4)));
+}
+
+void
+MacroAssemblerARM::ma_popn_pc(Imm32 n, AutoRegisterScope& scratch, AutoRegisterScope& scratch2)
+{
+    // pc <- [sp]; sp += n
+    int32_t nv = n.value;
+
+    if (nv < 4096 && nv >= -4096) {
+        as_dtr(IsLoad, 32, PostIndex, pc, DTRAddr(sp, DtrOffImm(nv)));
+    } else {
+        ma_mov(sp, scratch);
+        ma_add(Imm32(n), sp, scratch2);
+        as_dtr(IsLoad, 32, Offset, pc, DTRAddr(scratch, DtrOffImm(0)));
+    }
 }
 
 void
 MacroAssemblerARM::ma_push(Register r)
 {
-    // Pushing sp is not well defined: use two instructions.
-    if (r == sp) {
-        ScratchRegisterScope scratch(asMasm());
-        ma_mov(sp, scratch);
-        ma_dtr(IsStore, sp, Imm32(-4), scratch, PreIndex);
-        return;
-    }
-
-    ma_dtr(IsStore, sp, Imm32(-4), r, PreIndex);
+    MOZ_ASSERT(r != sp, "Use ma_push_sp().");
+    as_dtr(IsStore, 32, PreIndex, r, DTRAddr(sp, DtrOffImm(-4)));
+}
+
+void
+MacroAssemblerARM::ma_push_sp(Register r, AutoRegisterScope& scratch)
+{
+    // Pushing sp is not well-defined: use two instructions.
+    MOZ_ASSERT(r == sp);
+    ma_mov(sp, scratch);
+    as_dtr(IsStore, 32, PreIndex, scratch, DTRAddr(sp, DtrOffImm(-4)));
 }
 
 void
 MacroAssemblerARM::ma_vpop(VFPRegister r)
 {
     startFloatTransferM(IsLoad, sp, IA, WriteBack);
     transferFloatReg(r);
     finishFloatTransfer();
@@ -1720,26 +1698,25 @@ MacroAssemblerARM::ma_vxfer(Register src
 
 void
 MacroAssemblerARM::ma_vxfer(Register src1, Register src2, FloatRegister dest, Condition cc)
 {
     as_vxfer(src1, src2, VFPRegister(dest), CoreToFloat, cc);
 }
 
 BufferOffset
-MacroAssemblerARM::ma_vdtr(LoadStore ls, const Address& addr, VFPRegister rt, Condition cc)
+MacroAssemblerARM::ma_vdtr(LoadStore ls, const Address& addr, VFPRegister rt,
+                           AutoRegisterScope& scratch, Condition cc)
 {
     int off = addr.offset;
     MOZ_ASSERT((off & 3) == 0);
     Register base = addr.base;
     if (off > -1024 && off < 1024)
         return as_vdtr(ls, rt, Operand(addr).toVFPAddr(), cc);
 
-    ScratchRegisterScope scratch(asMasm());
-
     // We cannot encode this offset in a a single ldr. Try to encode it as an
     // add scratch, base, imm; ldr dest, [scratch, +offset].
     int bottom = off & (0xff << 2);
     int neg_bottom = (0x100 << 2) - bottom;
     // At this point, both off - bottom and off + neg_bottom will be
     // reasonable-ish quantities.
     //
     // Note a neg_bottom of 0x400 can not be encoded as an immediate negative
@@ -1775,60 +1752,70 @@ MacroAssemblerARM::ma_vdtr(LoadStore ls,
         if (!sub_off.invalid && bottom != 0) {
             // Guarded against by: bottom != 0
             MOZ_ASSERT(neg_bottom < 0x400);
             // sub_off = neg_bottom + off
             as_add(scratch, base, sub_off, LeaveCC, cc);
             return as_vdtr(ls, rt, VFPAddr(scratch, VFPOffImm(-neg_bottom)), cc);
         }
     }
-    ma_add(base, Imm32(off), scratch, LeaveCC, cc);
+
+    // Safe to use scratch as dest, since ma_add() overwrites dest at the end
+    // and can't use it as internal scratch since it may also == base.
+    ma_add(base, Imm32(off), scratch, scratch, LeaveCC, cc);
     return as_vdtr(ls, rt, VFPAddr(scratch, VFPOffImm(0)), cc);
 }
 
 BufferOffset
 MacroAssemblerARM::ma_vldr(VFPAddr addr, VFPRegister dest, Condition cc)
 {
     return as_vdtr(IsLoad, dest, addr, cc);
 }
 
 BufferOffset
-MacroAssemblerARM::ma_vldr(const Address& addr, VFPRegister dest, Condition cc)
-{
-    return ma_vdtr(IsLoad, addr, dest, cc);
+MacroAssemblerARM::ma_vldr(const Address& addr, VFPRegister dest, AutoRegisterScope& scratch, Condition cc)
+{
+    return ma_vdtr(IsLoad, addr, dest, scratch, cc);
 }
 
 BufferOffset
-MacroAssemblerARM::ma_vldr(VFPRegister src, Register base, Register index, int32_t shift,
-                           Condition cc)
-{
-    ScratchRegisterScope scratch(asMasm());
+MacroAssemblerARM::ma_vldr(VFPRegister src, Register base, Register index, AutoRegisterScope& scratch,
+                           int32_t shift, Condition cc)
+{
     as_add(scratch, base, lsl(index, shift), LeaveCC, cc);
-    return ma_vldr(Address(scratch, 0), src, cc);
+    return as_vdtr(IsLoad, src, Operand(Address(scratch, 0)).toVFPAddr(), cc);
 }
 
 BufferOffset
 MacroAssemblerARM::ma_vstr(VFPRegister src, VFPAddr addr, Condition cc)
 {
     return as_vdtr(IsStore, src, addr, cc);
 }
 
 BufferOffset
-MacroAssemblerARM::ma_vstr(VFPRegister src, const Address& addr, Condition cc)
-{
-    return ma_vdtr(IsStore, addr, src, cc);
+MacroAssemblerARM::ma_vstr(VFPRegister src, const Address& addr, AutoRegisterScope& scratch, Condition cc)
+{
+    return ma_vdtr(IsStore, addr, src, scratch, cc);
 }
 
 BufferOffset
-MacroAssemblerARM::ma_vstr(VFPRegister src, Register base, Register index, int32_t shift,
-                           int32_t offset, Condition cc)
-{
-    ScratchRegisterScope scratch(asMasm());
+MacroAssemblerARM::ma_vstr(VFPRegister src, Register base, Register index, AutoRegisterScope& scratch,
+                           AutoRegisterScope& scratch2, int32_t shift, int32_t offset, Condition cc)
+{
     as_add(scratch, base, lsl(index, shift), LeaveCC, cc);
-    return ma_vstr(src, Address(scratch, offset), cc);
+    return ma_vstr(src, Address(scratch, offset), scratch2, cc);
+}
+
+// Without an offset, no second scratch register is necessary.
+BufferOffset
+MacroAssemblerARM::ma_vstr(VFPRegister src, Register base, Register index, AutoRegisterScope& scratch,
+                           int32_t shift, Condition cc)
+{
+    as_add(scratch, base, lsl(index, shift), LeaveCC, cc);
+    return as_vdtr(IsStore, src, Operand(Address(scratch, 0)).toVFPAddr(), cc);
 }
 
 bool
 MacroAssemblerARMCompat::buildOOLFakeExitFrame(void* fakeReturnAddr)
 {
     DebugOnly<uint32_t> initialDepth = asMasm().framePushed();
     uint32_t descriptor = MakeFrameDescriptor(asMasm().framePushed(), JitFrame_IonJS,
                                               ExitFrameLayout::Size());
@@ -1892,117 +1879,126 @@ MacroAssemblerARMCompat::movePtr(wasm::S
 {
     append(AsmJSAbsoluteAddress(CodeOffset(currentOffset()), imm));
     ma_movPatchable(Imm32(-1), dest, Always);
 }
 
 void
 MacroAssemblerARMCompat::load8ZeroExtend(const Address& address, Register dest)
 {
-    ma_dataTransferN(IsLoad, 8, false, address.base, Imm32(address.offset), dest);
+    ScratchRegisterScope scratch(asMasm());
+    ma_dataTransferN(IsLoad, 8, false, address.base, Imm32(address.offset), dest, scratch);
 }
 
 void
 MacroAssemblerARMCompat::load8ZeroExtend(const BaseIndex& src, Register dest)
 {
     Register base = src.base;
     uint32_t scale = Imm32::ShiftOf(src.scale).value;
 
+    ScratchRegisterScope scratch(asMasm());
+    SecondScratchRegisterScope scratch2(asMasm());
+
     if (src.offset == 0) {
         ma_ldrb(DTRAddr(base, DtrRegImmShift(src.index, LSL, scale)), dest);
     } else {
-        ScratchRegisterScope scratch(asMasm());
-        ma_add(base, Imm32(src.offset), scratch);
+        ma_add(base, Imm32(src.offset), scratch, scratch2);
         ma_ldrb(DTRAddr(scratch, DtrRegImmShift(src.index, LSL, scale)), dest);
     }
 }
 
 void
 MacroAssemblerARMCompat::load8SignExtend(const Address& address, Register dest)
 {
-    ma_dataTransferN(IsLoad, 8, true, address.base, Imm32(address.offset), dest);
+    ScratchRegisterScope scratch(asMasm());
+    ma_dataTransferN(IsLoad, 8, true, address.base, Imm32(address.offset), dest, scratch);
 }
 
 void
 MacroAssemblerARMCompat::load8SignExtend(const BaseIndex& src, Register dest)
 {
     Register index = src.index;
 
     ScratchRegisterScope scratch(asMasm());
+    SecondScratchRegisterScope scratch2(asMasm());
 
     // ARMv7 does not have LSL on an index register with an extended load.
     if (src.scale != TimesOne) {
         ma_lsl(Imm32::ShiftOf(src.scale), index, scratch);
         index = scratch;
     }
 
     if (src.offset != 0) {
         if (index != scratch) {
             ma_mov(index, scratch);
             index = scratch;
         }
-        ma_add(Imm32(src.offset), index);
+        ma_add(Imm32(src.offset), index, scratch2);
     }
     ma_ldrsb(EDtrAddr(src.base, EDtrOffReg(index)), dest);
 }
 
 void
 MacroAssemblerARMCompat::load16ZeroExtend(const Address& address, Register dest)
 {
-    ma_dataTransferN(IsLoad, 16, false, address.base, Imm32(address.offset), dest);
+    ScratchRegisterScope scratch(asMasm());
+    ma_dataTransferN(IsLoad, 16, false, address.base, Imm32(address.offset), dest, scratch);
 }
 
 void
 MacroAssemblerARMCompat::load16ZeroExtend(const BaseIndex& src, Register dest)
 {
     Register index = src.index;
 
     ScratchRegisterScope scratch(asMasm());
+    SecondScratchRegisterScope scratch2(asMasm());
 
     // ARMv7 does not have LSL on an index register with an extended load.
     if (src.scale != TimesOne) {
         ma_lsl(Imm32::ShiftOf(src.scale), index, scratch);
         index = scratch;
     }
 
     if (src.offset != 0) {
         if (index != scratch) {
             ma_mov(index, scratch);
             index = scratch;
         }
-        ma_add(Imm32(src.offset), index);
+        ma_add(Imm32(src.offset), index, scratch2);
     }
     ma_ldrh(EDtrAddr(src.base, EDtrOffReg(index)), dest);
 }
 
 void
 MacroAssemblerARMCompat::load16SignExtend(const Address& address, Register dest)
 {
-    ma_dataTransferN(IsLoad, 16, true, address.base, Imm32(address.offset), dest);
+    ScratchRegisterScope scratch(asMasm());
+    ma_dataTransferN(IsLoad, 16, true, address.base, Imm32(address.offset), dest, scratch);
 }
 
 void
 MacroAssemblerARMCompat::load16SignExtend(const BaseIndex& src, Register dest)
 {
     Register index = src.index;
 
     ScratchRegisterScope scratch(asMasm());
+    SecondScratchRegisterScope scratch2(asMasm());
 
     // We don't have LSL on index register yet.
     if (src.scale != TimesOne) {
         ma_lsl(Imm32::ShiftOf(src.scale), index, scratch);
         index = scratch;
     }
 
     if (src.offset != 0) {
         if (index != scratch) {
             ma_mov(index, scratch);
             index = scratch;
         }
-        ma_add(Imm32(src.offset), index);
+        ma_add(Imm32(src.offset), index, scratch2);
     }
     ma_ldrsh(EDtrAddr(src.base, EDtrOffReg(index)), dest);
 }
 
 void
 MacroAssemblerARMCompat::load32(const Address& address, Register dest)
 {
     loadPtr(address, dest);
@@ -2018,34 +2014,35 @@ void
 MacroAssemblerARMCompat::load32(AbsoluteAddress address, Register dest)
 {
     loadPtr(address, dest);
 }
 
 void
 MacroAssemblerARMCompat::loadPtr(const Address& address, Register dest)
 {
-    ma_ldr(address, dest);
+    ScratchRegisterScope scratch(asMasm());
+    ma_ldr(address, dest, scratch);
 }
 
 void
 MacroAssemblerARMCompat::loadPtr(const BaseIndex& src, Register dest)
 {
     Register base = src.base;
     uint32_t scale = Imm32::ShiftOf(src.scale).value;
 
+    ScratchRegisterScope scratch(asMasm());
+    SecondScratchRegisterScope scratch2(asMasm());
+
     if (src.offset != 0) {
-        ScratchRegisterScope scratch(asMasm());
-        ma_mov(base, scratch);
-        ma_add(Imm32(src.offset), scratch);
+        ma_add(base, Imm32(src.offset), scratch, scratch2);
         ma_ldr(DTRAddr(scratch, DtrRegImmShift(src.index, LSL, scale)), dest);
-        return;
+    } else {
+        ma_ldr(DTRAddr(base, DtrRegImmShift(src.index, LSL, scale)), dest);
     }
-
-    ma_ldr(DTRAddr(base, DtrRegImmShift(src.index, LSL, scale)), dest);
 }
 
 void
 MacroAssemblerARMCompat::loadPtr(AbsoluteAddress address, Register dest)
 {
     MOZ_ASSERT(dest != pc); // Use dest as a scratch register.
     movePtr(ImmWord(uintptr_t(address.addr)), dest);
     loadPtr(Address(dest, 0), dest);
@@ -2057,160 +2054,202 @@ MacroAssemblerARMCompat::loadPtr(wasm::S
     MOZ_ASSERT(dest != pc); // Use dest as a scratch register.
     movePtr(address, dest);
     loadPtr(Address(dest, 0), dest);
 }
 
 void
 MacroAssemblerARMCompat::loadPrivate(const Address& address, Register dest)
 {
-    ma_ldr(ToPayload(address), dest);
+    ScratchRegisterScope scratch(asMasm());
+    ma_ldr(ToPayload(address), dest, scratch);
 }
 
 void
 MacroAssemblerARMCompat::loadDouble(const Address& address, FloatRegister dest)
 {
-    ma_vldr(address, dest);
+    ScratchRegisterScope scratch(asMasm());
+    ma_vldr(address, dest, scratch);
 }
 
 void
 MacroAssemblerARMCompat::loadDouble(const BaseIndex& src, FloatRegister dest)
 {
     // VFP instructions don't even support register Base + register Index modes,
     // so just add the index, then handle the offset like normal.
     Register base = src.base;
     Register index = src.index;
     uint32_t scale = Imm32::ShiftOf(src.scale).value;
     int32_t offset = src.offset;
 
     ScratchRegisterScope scratch(asMasm());
+    SecondScratchRegisterScope scratch2(asMasm());
+
     as_add(scratch, base, lsl(index, scale));
-    ma_vldr(Address(scratch, offset), dest);
+    ma_vldr(Address(scratch, offset), dest, scratch2);
 }
 
 void
 MacroAssemblerARMCompat::loadFloatAsDouble(const Address& address, FloatRegister dest)
 {
+    ScratchRegisterScope scratch(asMasm());
+
     VFPRegister rt = dest;
-    ma_vldr(address, rt.singleOverlay());
+    ma_vldr(address, rt.singleOverlay(), scratch);
     as_vcvt(rt, rt.singleOverlay());
 }
 
 void
 MacroAssemblerARMCompat::loadFloatAsDouble(const BaseIndex& src, FloatRegister dest)
 {
     // VFP instructions don't even support register Base + register Index modes,
     // so just add the index, then handle the offset like normal.
     Register base = src.base;
     Register index = src.index;
     uint32_t scale = Imm32::ShiftOf(src.scale).value;
     int32_t offset = src.offset;
     VFPRegister rt = dest;
 
     ScratchRegisterScope scratch(asMasm());
+    SecondScratchRegisterScope scratch2(asMasm());
+
     as_add(scratch, base, lsl(index, scale));
-    ma_vldr(Address(scratch, offset), rt.singleOverlay());
+    ma_vldr(Address(scratch, offset), rt.singleOverlay(), scratch2);
     as_vcvt(rt, rt.singleOverlay());
 }
 
 void
 MacroAssemblerARMCompat::loadFloat32(const Address& address, FloatRegister dest)
 {
-    ma_vldr(address, VFPRegister(dest).singleOverlay());
+    ScratchRegisterScope scratch(asMasm());
+    ma_vldr(address, VFPRegister(dest).singleOverlay(), scratch);
 }
 
 void
 MacroAssemblerARMCompat::loadFloat32(const BaseIndex& src, FloatRegister dest)
 {
     // VFP instructions don't even support register Base + register Index modes,
     // so just add the index, then handle the offset like normal.
     Register base = src.base;
     Register index = src.index;
     uint32_t scale = Imm32::ShiftOf(src.scale).value;
     int32_t offset = src.offset;
 
     ScratchRegisterScope scratch(asMasm());
+    SecondScratchRegisterScope scratch2(asMasm());
+
     as_add(scratch, base, lsl(index, scale));
-    ma_vldr(Address(scratch, offset), VFPRegister(dest).singleOverlay());
+    ma_vldr(Address(scratch, offset), VFPRegister(dest).singleOverlay(), scratch2);
 }
 
 void
 MacroAssemblerARMCompat::store8(Imm32 imm, const Address& address)
 {
-    AutoRegisterScope scratch2(asMasm(), secondScratchReg_);
+    SecondScratchRegisterScope scratch2(asMasm());
     ma_mov(imm, scratch2);
     store8(scratch2, address);
 }
 
 void
 MacroAssemblerARMCompat::store8(Register src, const Address& address)
 {
-    ma_dataTransferN(IsStore, 8, false, address.base, Imm32(address.offset), src);
+    ScratchRegisterScope scratch(asMasm());
+    ma_dataTransferN(IsStore, 8, false, address.base, Imm32(address.offset), src, scratch);
 }
 
 void
 MacroAssemblerARMCompat::store8(Imm32 imm, const BaseIndex& dest)
 {
-    AutoRegisterScope scratch2(asMasm(), secondScratchReg_);
-    ma_mov(imm, scratch2);
-    store8(scratch2, dest);
+    Register base = dest.base;
+    uint32_t scale = Imm32::ShiftOf(dest.scale).value;
+
+    ScratchRegisterScope scratch(asMasm());
+    SecondScratchRegisterScope scratch2(asMasm());
+
+    if (dest.offset != 0) {
+        ma_add(base, Imm32(dest.offset), scratch, scratch2);
+        ma_mov(imm, scratch2);
+        ma_strb(scratch2, DTRAddr(scratch, DtrRegImmShift(dest.index, LSL, scale)));
+    } else {
+        ma_mov(imm, scratch2);
+        ma_strb(scratch2, DTRAddr(base, DtrRegImmShift(dest.index, LSL, scale)));
+    }
 }
 
 void
 MacroAssemblerARMCompat::store8(Register src, const BaseIndex& dest)
 {
     Register base = dest.base;
     uint32_t scale = Imm32::ShiftOf(dest.scale).value;
 
     ScratchRegisterScope scratch(asMasm());
+    SecondScratchRegisterScope scratch2(asMasm());
 
     if (dest.offset != 0) {
-        ma_add(base, Imm32(dest.offset), scratch);
-        base = scratch;
+        ma_add(base, Imm32(dest.offset), scratch, scratch2);
+        ma_strb(src, DTRAddr(scratch, DtrRegImmShift(dest.index, LSL, scale)));
+    } else {
+        ma_strb(src, DTRAddr(base, DtrRegImmShift(dest.index, LSL, scale)));
     }
-    ma_strb(src, DTRAddr(base, DtrRegImmShift(dest.index, LSL, scale)));
 }
 
 void
 MacroAssemblerARMCompat::store16(Imm32 imm, const Address& address)
 {
-    AutoRegisterScope scratch2(asMasm(), secondScratchReg_);
+    SecondScratchRegisterScope scratch2(asMasm());
     ma_mov(imm, scratch2);
     store16(scratch2, address);
 }
 
 void
 MacroAssemblerARMCompat::store16(Register src, const Address& address)
 {
-    ma_dataTransferN(IsStore, 16, false, address.base, Imm32(address.offset), src);
+    ScratchRegisterScope scratch(asMasm());
+    ma_dataTransferN(IsStore, 16, false, address.base, Imm32(address.offset), src, scratch);
 }
 
 void
 MacroAssemblerARMCompat::store16(Imm32 imm, const BaseIndex& dest)
 {
-    AutoRegisterScope scratch2(asMasm(), secondScratchReg_);
+    Register index = dest.index;
+
+    ScratchRegisterScope scratch(asMasm());
+    SecondScratchRegisterScope scratch2(asMasm());
+
+    // We don't have LSL on index register yet.
+    if (dest.scale != TimesOne) {
+        ma_lsl(Imm32::ShiftOf(dest.scale), index, scratch);
+        index = scratch;
+    }
+
+    if (dest.offset != 0) {
+        ma_add(index, Imm32(dest.offset), scratch, scratch2);
+        index = scratch;
+    }
+
     ma_mov(imm, scratch2);
-    store16(scratch2, dest);
+    ma_strh(scratch2, EDtrAddr(dest.base, EDtrOffReg(index)));
 }
 
 void
 MacroAssemblerARMCompat::store16(Register src, const BaseIndex& address)
 {
     Register index = address.index;
 
     ScratchRegisterScope scratch(asMasm());
+    SecondScratchRegisterScope scratch2(asMasm());
 
     // We don't have LSL on index register yet.
     if (address.scale != TimesOne) {
         ma_lsl(Imm32::ShiftOf(address.scale), index, scratch);
         index = scratch;
     }
 
     if (address.offset != 0) {
-        ma_add(index, Imm32(address.offset), scratch);
+        ma_add(index, Imm32(address.offset), scratch, scratch2);
         index = scratch;
     }
     ma_strh(src, EDtrAddr(address.base, EDtrOffReg(index)));
 }
 
 void
 MacroAssemblerARMCompat::store32(Register src, AbsoluteAddress address)
 {
@@ -2221,42 +2260,57 @@ void
 MacroAssemblerARMCompat::store32(Register src, const Address& address)
 {
     storePtr(src, address);
 }
 
 void
 MacroAssemblerARMCompat::store32(Imm32 src, const Address& address)
 {
-    AutoRegisterScope scratch2(asMasm(), secondScratchReg_);
-    move32(src, scratch2);
-    storePtr(scratch2, address);
+    ScratchRegisterScope scratch(asMasm());
+    SecondScratchRegisterScope scratch2(asMasm());
+    move32(src, scratch);
+    ma_str(scratch, address, scratch2);
 }
 
 void
 MacroAssemblerARMCompat::store32(Imm32 imm, const BaseIndex& dest)
 {
+    Register base = dest.base;
+    uint32_t scale = Imm32::ShiftOf(dest.scale).value;
+
     ScratchRegisterScope scratch(asMasm());
-    ma_mov(imm, scratch);
-    store32(scratch, dest);
+    SecondScratchRegisterScope scratch2(asMasm());
+
+    if (dest.offset != 0) {
+        ma_add(base, Imm32(dest.offset), scratch, scratch2);
+        ma_mov(imm, scratch2);
+        ma_str(scratch2, DTRAddr(scratch, DtrRegImmShift(dest.index, LSL, scale)));
+    } else {
+        ma_mov(imm, scratch);
+        ma_str(scratch, DTRAddr(base, DtrRegImmShift(dest.index, LSL, scale)));
+    }
+
 }
 
 void
 MacroAssemblerARMCompat::store32(Register src, const BaseIndex& dest)
 {
     Register base = dest.base;
     uint32_t scale = Imm32::ShiftOf(dest.scale).value;
 
-    AutoRegisterScope scratch2(asMasm(), secondScratchReg_);
+    ScratchRegisterScope scratch(asMasm());
+    SecondScratchRegisterScope scratch2(asMasm());
 
     if (dest.offset != 0) {
-        ma_add(base, Imm32(dest.offset), scratch2);
-        base = scratch2;
+        ma_add(base, Imm32(dest.offset), scratch, scratch2);
+        ma_str(src, DTRAddr(scratch, DtrRegImmShift(dest.index, LSL, scale)));
+    } else {
+        ma_str(src, DTRAddr(base, DtrRegImmShift(dest.index, LSL, scale)));
     }
-    ma_str(src, DTRAddr(base, DtrRegImmShift(dest.index, LSL, scale)));
 }
 
 template <typename T>
 void
 MacroAssemblerARMCompat::storePtr(ImmWord imm, T address)
 {
     ScratchRegisterScope scratch(asMasm());
     movePtr(imm, scratch);
@@ -2286,31 +2340,32 @@ MacroAssemblerARMCompat::storePtr(ImmGCP
 }
 
 template void MacroAssemblerARMCompat::storePtr<Address>(ImmGCPtr imm, Address address);
 template void MacroAssemblerARMCompat::storePtr<BaseIndex>(ImmGCPtr imm, BaseIndex address);
 
 void
 MacroAssemblerARMCompat::storePtr(Register src, const Address& address)
 {
-    ma_str(src, address);
+    SecondScratchRegisterScope scratch2(asMasm());
+    ma_str(src, address, scratch2);
 }
 
 void
 MacroAssemblerARMCompat::storePtr(Register src, const BaseIndex& address)
 {
     store32(src, address);
 }
 
 void
 MacroAssemblerARMCompat::storePtr(Register src, AbsoluteAddress dest)
 {
     ScratchRegisterScope scratch(asMasm());
     movePtr(ImmWord(uintptr_t(dest.addr)), scratch);
-    storePtr(src, Address(scratch, 0));
+    ma_str(src, DTRAddr(scratch, DtrOffImm(0)));
 }
 
 // Note: this function clobbers the input register.
 void
 MacroAssembler::clampDoubleToUint8(FloatRegister input, Register output)
 {
     if (HasVFPv3()) {
         Label notSplit;
@@ -2326,33 +2381,32 @@ MacroAssembler::clampDoubleToUint8(Float
         }
 
         // Move the fixed point value into an integer register.
         {
             ScratchFloat32Scope scratchFloat(*this);
             as_vxfer(output, InvalidReg, scratchFloat.uintOverlay(), FloatToCore);
         }
 
+        ScratchRegisterScope scratch(*this);
+
         // See if this value *might* have been an exact integer after adding
         // 0.5. This tests the 1/2 through 1/16,777,216th places, but 0.5 needs
         // to be tested out to the 1/140,737,488,355,328th place.
-        ma_tst(output, Imm32(0x00ffffff));
+        ma_tst(output, Imm32(0x00ffffff), scratch);
         // Convert to a uint8 by shifting out all of the fraction bits.
         ma_lsr(Imm32(24), output, output);
         // If any of the bottom 24 bits were non-zero, then we're good, since
         // this number can't be exactly XX.0
         ma_b(&notSplit, NonZero);
-        {
-            ScratchRegisterScope scratch(*this);
-            as_vxfer(scratch, InvalidReg, input, FloatToCore);
-            ma_cmp(scratch, Imm32(0));
-        }
+        as_vxfer(scratch, InvalidReg, input, FloatToCore);
+        as_cmp(scratch, Imm8(0));
         // If the lower 32 bits of the double were 0, then this was an exact number,
         // and it should be even.
-        ma_bic(Imm32(1), output, LeaveCC, Zero);
+        as_bic(output, output, Imm8(1), LeaveCC, Zero);
         bind(&notSplit);
     } else {
         ScratchDoubleScope scratchDouble(*this);
         MOZ_ASSERT(input != scratchDouble);
         loadConstantDouble(0.5, scratchDouble);
 
         Label outOfRange;
         ma_vcmpz(input);
@@ -2360,115 +2414,120 @@ MacroAssembler::clampDoubleToUint8(Float
         ma_vadd(input, scratchDouble, input);
         // Do the conversion to an integer.
         as_vcvt(VFPRegister(scratchDouble).uintOverlay(), VFPRegister(input));
         // Copy the converted value out.
         as_vxfer(output, InvalidReg, scratchDouble, FloatToCore);
         as_vmrs(pc);
         ma_mov(Imm32(0), output, Overflow);  // NaN => 0
         ma_b(&outOfRange, Overflow);  // NaN
-        ma_cmp(output, Imm32(0xff));
+        as_cmp(output, Imm8(0xff));
         ma_mov(Imm32(0xff), output, Above);
         ma_b(&outOfRange, Above);
         // Convert it back to see if we got the same value back.
         as_vcvt(scratchDouble, VFPRegister(scratchDouble).uintOverlay());
         // Do the check.
         as_vcmp(scratchDouble, input);
         as_vmrs(pc);
-        ma_bic(Imm32(1), output, LeaveCC, Zero);
+        as_bic(output, output, Imm8(1), LeaveCC, Zero);
         bind(&outOfRange);
     }
 }
 
 void
 MacroAssemblerARMCompat::cmp32(Register lhs, Imm32 rhs)
 {
-    MOZ_ASSERT(lhs != ScratchRegister);
-    ma_cmp(lhs, rhs);
+    ScratchRegisterScope scratch(asMasm());
+    ma_cmp(lhs, rhs, scratch);
 }
 
 void
 MacroAssemblerARMCompat::cmp32(Register lhs, Register rhs)
 {
     ma_cmp(lhs, rhs);
 }
 
 void
 MacroAssemblerARMCompat::cmpPtr(Register lhs, ImmWord rhs)
 {
-    MOZ_ASSERT(lhs != ScratchRegister);
-    ma_cmp(lhs, Imm32(rhs.value));
+    cmp32(lhs, Imm32(rhs.value));
 }
 
 void
 MacroAssemblerARMCompat::cmpPtr(Register lhs, ImmPtr rhs)
 {
-    return cmpPtr(lhs, ImmWord(uintptr_t(rhs.value)));
+    cmpPtr(lhs, ImmWord(uintptr_t(rhs.value)));
 }
 
 void
 MacroAssemblerARMCompat::cmpPtr(Register lhs, Register rhs)
 {
     ma_cmp(lhs, rhs);
 }
 
 void
 MacroAssemblerARMCompat::cmpPtr(Register lhs, ImmGCPtr rhs)
 {
-    ma_cmp(lhs, rhs);
+    ScratchRegisterScope scratch(asMasm());
+    ma_cmp(lhs, rhs, scratch);
 }
 
 void
 MacroAssemblerARMCompat::cmpPtr(Register lhs, Imm32 rhs)
 {
-    ma_cmp(lhs, rhs);
+    cmp32(lhs, rhs);
 }
 
 void
 MacroAssemblerARMCompat::cmpPtr(const Address& lhs, Register rhs)
 {
     ScratchRegisterScope scratch(asMasm());
-    loadPtr(lhs, scratch);
-    cmpPtr(scratch, rhs);
+    SecondScratchRegisterScope scratch2(asMasm());
+    ma_ldr(lhs, scratch, scratch2);
+    ma_cmp(scratch, rhs);
 }
 
 void
 MacroAssemblerARMCompat::cmpPtr(const Address& lhs, ImmWord rhs)
 {
-    AutoRegisterScope scratch2(asMasm(), secondScratchReg_);
-    loadPtr(lhs, scratch2);
-    ma_cmp(scratch2, Imm32(rhs.value));
+    ScratchRegisterScope scratch(asMasm());
+    SecondScratchRegisterScope scratch2(asMasm());
+    ma_ldr(lhs, scratch, scratch2);
+    ma_cmp(scratch, Imm32(rhs.value), scratch2);
 }
 
 void
 MacroAssemblerARMCompat::cmpPtr(const Address& lhs, ImmPtr rhs)
 {
     cmpPtr(lhs, ImmWord(uintptr_t(rhs.value)));
 }
 
 void
 MacroAssemblerARMCompat::cmpPtr(const Address& lhs, ImmGCPtr rhs)
 {
-    AutoRegisterScope scratch2(asMasm(), secondScratchReg_);
-    loadPtr(lhs, scratch2);
-    ma_cmp(scratch2, rhs);
+    ScratchRegisterScope scratch(asMasm());
+    SecondScratchRegisterScope scratch2(asMasm());
+    ma_ldr(lhs, scratch, scratch2);
+    ma_cmp(scratch, rhs, scratch2);
 }
 
 void
 MacroAssemblerARMCompat::cmpPtr(const Address& lhs, Imm32 rhs)
 {
-    AutoRegisterScope scratch2(asMasm(), secondScratchReg_);
-    loadPtr(lhs, scratch2);
-    ma_cmp(scratch2, rhs);
+    ScratchRegisterScope scratch(asMasm());
+    SecondScratchRegisterScope scratch2(asMasm());
+    ma_ldr(lhs, scratch, scratch2);
+    ma_cmp(scratch, rhs, scratch2);
 }
 
 void
 MacroAssemblerARMCompat::setStackArg(Register reg, uint32_t arg)
 {
-    ma_dataTransferN(IsStore, 32, true, sp, Imm32(arg * sizeof(intptr_t)), reg);
+    ScratchRegisterScope scratch(asMasm());
+    ma_dataTransferN(IsStore, 32, true, sp, Imm32(arg * sizeof(intptr_t)), reg, scratch);
 }
 
 void
 MacroAssemblerARMCompat::minMaxDouble(FloatRegister srcDest, FloatRegister second, bool canBeNaN,
                                       bool isMax)
 {
     FloatRegister first = srcDest;
 
@@ -2605,17 +2664,18 @@ MacroAssemblerARMCompat::testBoolean(Ass
     return cond;
 }
 
 Assembler::Condition
 MacroAssemblerARMCompat::testDouble(Assembler::Condition cond, const ValueOperand& value)
 {
     MOZ_ASSERT(cond == Assembler::Equal || cond == Assembler::NotEqual);
     Assembler::Condition actual = (cond == Equal) ? Below : AboveOrEqual;
-    ma_cmp(value.typeReg(), ImmTag(JSVAL_TAG_CLEAR));
+    ScratchRegisterScope scratch(asMasm());
+    ma_cmp(value.typeReg(), ImmTag(JSVAL_TAG_CLEAR), scratch);
     return actual;
 }
 
 Assembler::Condition
 MacroAssemblerARMCompat::testNull(Assembler::Condition cond, const ValueOperand& value)
 {
     MOZ_ASSERT(cond == Assembler::Equal || cond == Assembler::NotEqual);
     ma_cmp(value.typeReg(), ImmType(JSVAL_TYPE_NULL));
@@ -2965,40 +3025,43 @@ MacroAssemblerARMCompat::unboxNonDouble(
 {
     if (operand.payloadReg() != dest)
         ma_mov(operand.payloadReg(), dest);
 }
 
 void
 MacroAssemblerARMCompat::unboxNonDouble(const Address& src, Register dest)
 {
-    ma_ldr(ToPayload(src), dest);
+    ScratchRegisterScope scratch(asMasm());
+    ma_ldr(ToPayload(src), dest, scratch);
 }
 
 void
 MacroAssemblerARMCompat::unboxNonDouble(const BaseIndex& src, Register dest)
 {
     ScratchRegisterScope scratch(asMasm());
+    SecondScratchRegisterScope scratch2(asMasm());
     ma_alu(src.base, lsl(src.index, src.scale), scratch, OpAdd);
-    ma_ldr(Address(scratch, src.offset), dest);
+    ma_ldr(Address(scratch, src.offset), dest, scratch2);
 }
 
 void
 MacroAssemblerARMCompat::unboxDouble(const ValueOperand& operand, FloatRegister dest)
 {
     MOZ_ASSERT(dest.isDouble());
     as_vxfer(operand.payloadReg(), operand.typeReg(),
              VFPRegister(dest), CoreToFloat);
 }
 
 void
 MacroAssemblerARMCompat::unboxDouble(const Address& src, FloatRegister dest)
 {
     MOZ_ASSERT(dest.isDouble());
-    ma_vldr(src, dest);
+    ScratchRegisterScope scratch(asMasm());
+    ma_vldr(src, dest, scratch);
 }
 
 void
 MacroAssemblerARMCompat::unboxValue(const ValueOperand& src, AnyRegister dest)
 {
     if (dest.isFloat()) {
         Label notInt32, end;
         asMasm().branchTestInt32(Assembler::NotEqual, src, &notInt32);
@@ -3031,17 +3094,17 @@ MacroAssemblerARMCompat::boxNonDouble(JS
     ma_mov(ImmType(type), dest.typeReg());
 }
 
 void
 MacroAssemblerARMCompat::boolValueToDouble(const ValueOperand& operand, FloatRegister dest)
 {
     VFPRegister d = VFPRegister(dest);
     loadConstantDouble(1.0, dest);
-    ma_cmp(operand.payloadReg(), Imm32(0));
+    as_cmp(operand.payloadReg(), Imm8(0));
     // If the source is 0, then subtract the dest from itself, producing 0.
     as_vsub(d, d, d, Equal);
 }
 
 void
 MacroAssemblerARMCompat::int32ValueToDouble(const ValueOperand& operand, FloatRegister dest)
 {
     VFPRegister vfpdest = VFPRegister(dest);
@@ -3053,17 +3116,17 @@ MacroAssemblerARMCompat::int32ValueToDou
     as_vcvt(vfpdest, scratch.sintOverlay());
 }
 
 void
 MacroAssemblerARMCompat::boolValueToFloat32(const ValueOperand& operand, FloatRegister dest)
 {
     VFPRegister d = VFPRegister(dest).singleOverlay();
     loadConstantFloat32(1.0, dest);
-    ma_cmp(operand.payloadReg(), Imm32(0));
+    as_cmp(operand.payloadReg(), Imm8(0));
     // If the source is 0, then subtract the dest from itself, producing 0.
     as_vsub(d, d, d, Equal);
 }
 
 void
 MacroAssemblerARMCompat::int32ValueToFloat32(const ValueOperand& operand, FloatRegister dest)
 {
     // Transfer the integral value to a floating point register.
@@ -3089,27 +3152,31 @@ MacroAssemblerARMCompat::loadConstantFlo
 void
 MacroAssemblerARMCompat::loadInt32OrDouble(const Address& src, FloatRegister dest)
 {
     Label notInt32, end;
 
     // If it's an int, convert to a double.
     {
         ScratchRegisterScope scratch(asMasm());
-
-        ma_ldr(ToType(src), scratch);
+        SecondScratchRegisterScope scratch2(asMasm());
+
+        ma_ldr(ToType(src), scratch, scratch2);
         asMasm().branchTestInt32(Assembler::NotEqual, scratch, &notInt32);
-        ma_ldr(ToPayload(src), scratch);
+        ma_ldr(ToPayload(src), scratch, scratch2);
         convertInt32ToDouble(scratch, dest);
         ma_b(&end);
     }
 
     // Not an int, just load as double.
     bind(&notInt32);
-    ma_vldr(src, dest);
+    {
+        ScratchRegisterScope scratch(asMasm());
+        ma_vldr(src, dest, scratch);
+    }
     bind(&end);
 }
 
 void
 MacroAssemblerARMCompat::loadInt32OrDouble(Register base, Register index,
                                            FloatRegister dest, int32_t shift)
 {
     Label notInt32, end;
@@ -3118,30 +3185,30 @@ MacroAssemblerARMCompat::loadInt32OrDoub
 
     ScratchRegisterScope scratch(asMasm());
 
     // If it's an int, convert it to double.
     ma_alu(base, lsl(index, shift), scratch, OpAdd);
 
     // Since we only have one scratch register, we need to stomp over it with
     // the tag.
-    ma_ldr(Address(scratch, NUNBOX32_TYPE_OFFSET), scratch);
+    ma_ldr(DTRAddr(scratch, DtrOffImm(NUNBOX32_TYPE_OFFSET)), scratch);
     asMasm().branchTestInt32(Assembler::NotEqual, scratch, &notInt32);
 
     // Implicitly requires NUNBOX32_PAYLOAD_OFFSET == 0: no offset provided
     ma_ldr(DTRAddr(base, DtrRegImmShift(index, LSL, shift)), scratch);
     convertInt32ToDouble(scratch, dest);
     ma_b(&end);
 
     // Not an int, just load as double.
     bind(&notInt32);
     // First, recompute the offset that had been stored in the scratch register
     // since the scratch register was overwritten loading in the type.
     ma_alu(base, lsl(index, shift), scratch, OpAdd);
-    ma_vldr(Address(scratch, 0), dest);
+    ma_vldr(VFPAddr(scratch, VFPOffImm(0)), dest);
     bind(&end);
 }
 
 void
 MacroAssemblerARMCompat::loadConstantDouble(double dp, FloatRegister dest)
 {
     loadConstantDouble(wasm::RawF64(dp), dest);
 }
@@ -3174,24 +3241,26 @@ MacroAssemblerARMCompat::testDoubleTruth
     as_vmrs(pc);
     as_cmp(r0, O2Reg(r0), Overflow);
     return truthy ? NonZero : Zero;
 }
 
 Register
 MacroAssemblerARMCompat::extractObject(const Address& address, Register scratch)
 {
-    ma_ldr(ToPayload(address), scratch);
+    SecondScratchRegisterScope scratch2(asMasm());
+    ma_ldr(ToPayload(address), scratch, scratch2);
     return scratch;
 }
 
 Register
 MacroAssemblerARMCompat::extractTag(const Address& address, Register scratch)
 {
-    ma_ldr(ToType(address), scratch);
+    SecondScratchRegisterScope scratch2(asMasm());
+    ma_ldr(ToType(address), scratch, scratch2);
     return scratch;
 }
 
 Register
 MacroAssemblerARMCompat::extractTag(const BaseIndex& address, Register scratch)
 {
     ma_alu(address.base, lsl(address.index, address.scale), scratch, OpAdd, LeaveCC);
     return extractTag(Address(scratch, address.offset), scratch);
@@ -3215,18 +3284,19 @@ MacroAssemblerARMCompat::moveValue(const
 }
 
 /////////////////////////////////////////////////////////////////
 // X86/X64-common (ARM too now) interface.
 /////////////////////////////////////////////////////////////////
 void
 MacroAssemblerARMCompat::storeValue(ValueOperand val, const Address& dst)
 {
-    ma_str(val.payloadReg(), ToPayload(dst));
-    ma_str(val.typeReg(), ToType(dst));
+    SecondScratchRegisterScope scratch2(asMasm());
+    ma_str(val.payloadReg(), ToPayload(dst), scratch2);
+    ma_str(val.typeReg(), ToType(dst), scratch2);
 }
 
 void
 MacroAssemblerARMCompat::storeValue(ValueOperand val, const BaseIndex& dest)
 {
     ScratchRegisterScope scratch(asMasm());
 
     if (isValueDTRDCandidate(val) && Abs(dest.offset) <= 255) {
@@ -3321,21 +3391,23 @@ MacroAssemblerARMCompat::loadValue(Addre
             transferReg(val.typeReg());
             finishDataTransfer();
             return;
         }
     }
     // Ensure that loading the payload does not erase the pointer to the Value
     // in memory.
     if (type.base != val.payloadReg()) {
-        ma_ldr(payload, val.payloadReg());
-        ma_ldr(type, val.typeReg());
+        SecondScratchRegisterScope scratch2(asMasm());
+        ma_ldr(payload, val.payloadReg(), scratch2);
+        ma_ldr(type, val.typeReg(), scratch2);
     } else {
-        ma_ldr(type, val.typeReg());
-        ma_ldr(payload, val.payloadReg());
+        SecondScratchRegisterScope scratch2(asMasm());
+        ma_ldr(type, val.typeReg(), scratch2);
+        ma_ldr(payload, val.payloadReg(), scratch2);
     }
 }
 
 void
 MacroAssemblerARMCompat::tagValue(JSValueType type, Register payload, ValueOperand dest)
 {
     MOZ_ASSERT(dest.typeReg() != dest.payloadReg());
     if (payload != dest.payloadReg())
@@ -3349,131 +3421,135 @@ MacroAssemblerARMCompat::pushValue(Value
     ma_push(val.typeReg());
     ma_push(val.payloadReg());
 }
 
 void
 MacroAssemblerARMCompat::pushValue(const Address& addr)
 {
     ScratchRegisterScope scratch(asMasm());
-    ma_ldr(ToType(addr), scratch);
+    SecondScratchRegisterScope scratch2(asMasm());
+
+    ma_ldr(ToType(addr), scratch, scratch2);
     ma_push(scratch);
-    ma_ldr(ToPayloadAfterStackPush(addr), scratch);
+    ma_ldr(ToPayloadAfterStackPush(addr), scratch, scratch2);
     ma_push(scratch);
 }
 
 void
 MacroAssemblerARMCompat::popValue(ValueOperand val)
 {
     ma_pop(val.payloadReg());
     ma_pop(val.typeReg());
 }
 
 void
 MacroAssemblerARMCompat::storePayload(const Value& val, const Address& dest)
 {
-    AutoRegisterScope scratch2(asMasm(), secondScratchReg_);
+    ScratchRegisterScope scratch(asMasm());
+    SecondScratchRegisterScope scratch2(asMasm());
 
     jsval_layout jv = JSVAL_TO_IMPL(val);
     if (val.isMarkable())
-        ma_mov(ImmGCPtr((gc::Cell*)jv.s.payload.ptr), scratch2);
+        ma_mov(ImmGCPtr((gc::Cell*)jv.s.payload.ptr), scratch);
     else
-        ma_mov(Imm32(jv.s.payload.i32), scratch2);
-    ma_str(scratch2, ToPayload(dest));
+        ma_mov(Imm32(jv.s.payload.i32), scratch);
+    ma_str(scratch, ToPayload(dest), scratch2);
 }
 
 void
 MacroAssemblerARMCompat::storePayload(Register src, const Address& dest)
 {
-    ma_str(src, ToPayload(dest));
+    ScratchRegisterScope scratch(asMasm());
+    ma_str(src, ToPayload(dest), scratch);
 }
 
 void
 MacroAssemblerARMCompat::storePayload(const Value& val, const BaseIndex& dest)
 {
     unsigned shift = ScaleToShift(dest.scale);
 
     ScratchRegisterScope scratch(asMasm());
+    SecondScratchRegisterScope scratch2(asMasm());
 
     jsval_layout jv = JSVAL_TO_IMPL(val);
     if (val.isMarkable())
         ma_mov(ImmGCPtr((gc::Cell*)jv.s.payload.ptr), scratch);
     else
         ma_mov(Imm32(jv.s.payload.i32), scratch);
 
     // If NUNBOX32_PAYLOAD_OFFSET is not zero, the memory operand [base + index
     // << shift + imm] cannot be encoded into a single instruction, and cannot
     // be integrated into the as_dtr call.
     JS_STATIC_ASSERT(NUNBOX32_PAYLOAD_OFFSET == 0);
 
     // If an offset is used, modify the base so that a [base + index << shift]
     // instruction format can be used.
     if (dest.offset != 0)
-        ma_add(dest.base, Imm32(dest.offset), dest.base);
+        ma_add(dest.base, Imm32(dest.offset), dest.base, scratch2);
 
     as_dtr(IsStore, 32, Offset, scratch,
            DTRAddr(dest.base, DtrRegImmShift(dest.index, LSL, shift)));
 
     // Restore the original value of the base, if necessary.
     if (dest.offset != 0)
-        ma_sub(dest.base, Imm32(dest.offset), dest.base);
+        ma_sub(dest.base, Imm32(dest.offset), dest.base, scratch);
 }
 
 void
 MacroAssemblerARMCompat::storePayload(Register src, const BaseIndex& dest)
 {
     unsigned shift = ScaleToShift(dest.scale);
     MOZ_ASSERT(shift < 32);
 
+    ScratchRegisterScope scratch(asMasm());
+
     // If NUNBOX32_PAYLOAD_OFFSET is not zero, the memory operand [base + index
     // << shift + imm] cannot be encoded into a single instruction, and cannot
     // be integrated into the as_dtr call.
     JS_STATIC_ASSERT(NUNBOX32_PAYLOAD_OFFSET == 0);
 
     // Save/restore the base if the BaseIndex has an offset, as above.
     if (dest.offset != 0)
-        ma_add(dest.base, Imm32(dest.offset), dest.base);
+        ma_add(dest.base, Imm32(dest.offset), dest.base, scratch);
 
     // Technically, shift > -32 can be handle by changing LSL to ASR, but should
     // never come up, and this is one less code path to get wrong.
     as_dtr(IsStore, 32, Offset, src, DTRAddr(dest.base, DtrRegImmShift(dest.index, LSL, shift)));
 
     if (dest.offset != 0)
-        ma_sub(dest.base, Imm32(dest.offset), dest.base);
+        ma_sub(dest.base, Imm32(dest.offset), dest.base, scratch);
 }
 
 void
 MacroAssemblerARMCompat::storeTypeTag(ImmTag tag, const Address& dest)
 {
-    AutoRegisterScope scratch2(asMasm(), secondScratchReg_);
-    ma_mov(tag, scratch2);
-    ma_str(scratch2, ToType(dest));
+    ScratchRegisterScope scratch(asMasm());
+    SecondScratchRegisterScope scratch2(asMasm());
+
+    ma_mov(tag, scratch);
+    ma_str(scratch, ToType(dest), scratch2);
 }
 
 void
 MacroAssemblerARMCompat::storeTypeTag(ImmTag tag, const BaseIndex& dest)
 {
     Register base = dest.base;
     Register index = dest.index;
     unsigned shift = ScaleToShift(dest.scale);
-    MOZ_ASSERT(base != ScratchRegister);
-    MOZ_ASSERT(index != ScratchRegister);
-
-    // A value needs to be store a value int base + index << shift + 4.
-    // ARM cannot handle this in a single operand, so a temp register is
-    // required. However, the scratch register is presently in use to hold the
-    // immediate that is being stored into said memory location. Work around
-    // this by modifying the base so the valid [base + index << shift] format
-    // can be used, then restore it.
-    ma_add(base, Imm32(NUNBOX32_TYPE_OFFSET + dest.offset), base);
 
     ScratchRegisterScope scratch(asMasm());
+    SecondScratchRegisterScope scratch2(asMasm());
+
+    MOZ_ASSERT(base != scratch && base != scratch2);
+    MOZ_ASSERT(index != scratch && index != scratch2);
+
+    ma_add(base, Imm32(dest.offset + NUNBOX32_TYPE_OFFSET), scratch2, scratch);
     ma_mov(tag, scratch);
-    ma_str(scratch, DTRAddr(base, DtrRegImmShift(index, LSL, shift)));
-    ma_sub(base, Imm32(NUNBOX32_TYPE_OFFSET + dest.offset), base);
+    ma_str(scratch, DTRAddr(scratch2, DtrRegImmShift(index, LSL, shift)));
 }
 
 void
 MacroAssemblerARM::ma_call(ImmPtr dest)
 {
     ma_movPatchable(dest, CallReg, Always);
     as_blx(CallReg);
 }
@@ -3523,78 +3599,94 @@ MacroAssemblerARMCompat::checkStackAlign
 }
 
 void
 MacroAssemblerARMCompat::handleFailureWithHandlerTail(void* handler)
 {
     // Reserve space for exception information.
     int size = (sizeof(ResumeFromException) + 7) & ~7;
 
-    ma_sub(Imm32(size), sp);
+    Imm8 size8(size);
+    as_sub(sp, sp, size8);
     ma_mov(sp, r0);
 
     // Call the handler.
     asMasm().setupUnalignedABICall(r1);
     asMasm().passABIArg(r0);
     asMasm().callWithABI(handler);
 
     Label entryFrame;
     Label catch_;
     Label finally;
     Label return_;
     Label bailout;
 
-    ma_ldr(Address(sp, offsetof(ResumeFromException, kind)), r0);
+    {
+        ScratchRegisterScope scratch(asMasm());
+        ma_ldr(Address(sp, offsetof(ResumeFromException, kind)), r0, scratch);
+    }
+
     asMasm().branch32(Assembler::Equal, r0, Imm32(ResumeFromException::RESUME_ENTRY_FRAME),
                       &entryFrame);
     asMasm().branch32(Assembler::Equal, r0, Imm32(ResumeFromException::RESUME_CATCH), &catch_);
     asMasm().branch32(Assembler::Equal, r0, Imm32(ResumeFromException::RESUME_FINALLY), &finally);
     asMasm().branch32(Assembler::Equal, r0, Imm32(ResumeFromException::RESUME_FORCED_RETURN),
                       &return_);
     asMasm().branch32(Assembler::Equal, r0, Imm32(ResumeFromException::RESUME_BAILOUT), &bailout);
 
     breakpoint(); // Invalid kind.
 
     // No exception handler. Load the error value, load the new stack pointer
     // and return from the entry frame.
     bind(&entryFrame);
     moveValue(MagicValue(JS_ION_ERROR), JSReturnOperand);
-    ma_ldr(Address(sp, offsetof(ResumeFromException, stackPointer)), sp);
+    {
+        ScratchRegisterScope scratch(asMasm());
+        ma_ldr(Address(sp, offsetof(ResumeFromException, stackPointer)), sp, scratch);
+    }
 
     // We're going to be returning by the ion calling convention, which returns
     // by ??? (for now, I think ldr pc, [sp]!)
     as_dtr(IsLoad, 32, PostIndex, pc, DTRAddr(sp, DtrOffImm(4)));
 
     // If we found a catch handler, this must be a baseline frame. Restore state
     // and jump to the catch block.
     bind(&catch_);
-    ma_ldr(Address(sp, offsetof(ResumeFromException, target)), r0);
-    ma_ldr(Address(sp, offsetof(ResumeFromException, framePointer)), r11);
-    ma_ldr(Address(sp, offsetof(ResumeFromException, stackPointer)), sp);
+    {
+        ScratchRegisterScope scratch(asMasm());
+        ma_ldr(Address(sp, offsetof(ResumeFromException, target)), r0, scratch);
+        ma_ldr(Address(sp, offsetof(ResumeFromException, framePointer)), r11, scratch);
+        ma_ldr(Address(sp, offsetof(ResumeFromException, stackPointer)), sp, scratch);
+    }
     jump(r0);
 
     // If we found a finally block, this must be a baseline frame. Push two
     // values expected by JSOP_RETSUB: BooleanValue(true) and the exception.
     bind(&finally);
     ValueOperand exception = ValueOperand(r1, r2);
     loadValue(Operand(sp, offsetof(ResumeFromException, exception)), exception);
-
-    ma_ldr(Address(sp, offsetof(ResumeFromException, target)), r0);
-    ma_ldr(Address(sp, offsetof(ResumeFromException, framePointer)), r11);
-    ma_ldr(Address(sp, offsetof(ResumeFromException, stackPointer)), sp);
+    {
+        ScratchRegisterScope scratch(asMasm());
+        ma_ldr(Address(sp, offsetof(ResumeFromException, target)), r0, scratch);
+        ma_ldr(Address(sp, offsetof(ResumeFromException, framePointer)), r11, scratch);
+        ma_ldr(Address(sp, offsetof(ResumeFromException, stackPointer)), sp, scratch);
+    }
 
     pushValue(BooleanValue(true));
     pushValue(exception);
     jump(r0);
 
     // Only used in debug mode. Return BaselineFrame->returnValue() to the
     // caller.
     bind(&return_);
-    ma_ldr(Address(sp, offsetof(ResumeFromException, framePointer)), r11);
-    ma_ldr(Address(sp, offsetof(ResumeFromException, stackPointer)), sp);
+    {
+        ScratchRegisterScope scratch(asMasm());
+        ma_ldr(Address(sp, offsetof(ResumeFromException, framePointer)), r11, scratch);
+        ma_ldr(Address(sp, offsetof(ResumeFromException, stackPointer)), sp, scratch);
+    }
     loadValue(Address(r11, BaselineFrame::reverseOffsetOfReturnValue()), JSReturnOperand);
     ma_mov(r11, sp);
     pop(r11);
 
     // If profiling is enabled, then update the lastProfilingFrame to refer to caller
     // frame before returning.
     {
         Label skipProfilingInstrumentation;
@@ -3606,29 +3698,34 @@ MacroAssemblerARMCompat::handleFailureWi
         bind(&skipProfilingInstrumentation);
     }
 
     ret();
 
     // If we are bailing out to baseline to handle an exception, jump to the
     // bailout tail stub.
     bind(&bailout);
-    ma_ldr(Address(sp, offsetof(ResumeFromException, bailoutInfo)), r2);
-    ma_mov(Imm32(BAILOUT_RETURN_OK), r0);
-    ma_ldr(Address(sp, offsetof(ResumeFromException, target)), r1);
+    {
+        ScratchRegisterScope scratch(asMasm());
+        ma_ldr(Address(sp, offsetof(ResumeFromException, bailoutInfo)), r2, scratch);
+        ma_mov(Imm32(BAILOUT_RETURN_OK), r0);
+        ma_ldr(Address(sp, offsetof(ResumeFromException, target)), r1, scratch);
+    }
     jump(r1);
 }
 
 Assembler::Condition
 MacroAssemblerARMCompat::testStringTruthy(bool truthy, const ValueOperand& value)
 {
     Register string = value.payloadReg();
     ScratchRegisterScope scratch(asMasm());
-    ma_dtr(IsLoad, string, Imm32(JSString::offsetOfLength()), scratch);
-    ma_cmp(scratch, Imm32(0));
+    SecondScratchRegisterScope scratch2(asMasm());
+
+    ma_dtr(IsLoad, string, Imm32(JSString::offsetOfLength()), scratch, scratch2);
+    as_cmp(scratch, Imm8(0));
     return truthy ? Assembler::NotEqual : Assembler::Equal;
 }
 
 void
 MacroAssemblerARMCompat::floor(FloatRegister input, Register output, Label* bail)
 {
     Label handleZero;
     Label handleNeg;
@@ -3651,31 +3748,31 @@ MacroAssemblerARMCompat::floor(FloatRegi
     ma_mov(output, output, SetCC);
     ma_b(bail, Signed);
     ma_b(&fin);
 
     bind(&handleZero);
     // Move the top word of the double into the output reg, if it is non-zero,
     // then the original value was -0.0.
     as_vxfer(output, InvalidReg, input, FloatToCore, Always, 1);
-    ma_cmp(output, Imm32(0));
+    as_cmp(output, Imm8(0));
     ma_b(bail, NonZero);
     ma_b(&fin);
 
     bind(&handleNeg);
     // Negative case, negate, then start dancing.
     ma_vneg(input, input);
     ma_vcvt_F64_U32(input, scratchDouble.uintOverlay());
     ma_vxfer(scratchDouble.uintOverlay(), output);
     ma_vcvt_U32_F64(scratchDouble.uintOverlay(), scratchDouble);
     compareDouble(scratchDouble, input);
-    ma_add(output, Imm32(1), output, LeaveCC, NotEqual);
+    as_add(output, output, Imm8(1), LeaveCC, NotEqual);
     // Negate the output. Since INT_MIN < -INT_MAX, even after adding 1, the
     // result will still be a negative number.
-    ma_rsb(output, Imm32(0), output, SetCC);
+    as_rsb(output, output, Imm8(0), SetCC);
     // Flip the negated input back to its original value.
     ma_vneg(input, input);
     // If the result looks non-negative, then this value didn't actually fit
     // into the int range, and special handling is required. Zero is also caught
     // by this case, but floor of a negative number should never be zero.
     ma_b(bail, NotSigned);
 
     bind(&fin);
@@ -3705,34 +3802,34 @@ MacroAssemblerARMCompat::floorf(FloatReg
     ma_mov(output, output, SetCC);
     ma_b(bail, Signed);
     ma_b(&fin);
 
     bind(&handleZero);
     // Move the top word of the double into the output reg, if it is non-zero,
     // then the original value was -0.0.
     as_vxfer(output, InvalidReg, VFPRegister(input).singleOverlay(), FloatToCore, Always, 0);
-    ma_cmp(output, Imm32(0));
+    as_cmp(output, Imm8(0));
     ma_b(bail, NonZero);
     ma_b(&fin);
 
     bind(&handleNeg);
     // Negative case, negate, then start dancing.
     {
         ScratchFloat32Scope scratch(asMasm());
         ma_vneg_f32(input, input);
         ma_vcvt_F32_U32(input, scratch.uintOverlay());
         ma_vxfer(VFPRegister(scratch).uintOverlay(), output);
         ma_vcvt_U32_F32(scratch.uintOverlay(), scratch);
         compareFloat(scratch, input);
-        ma_add(output, Imm32(1), output, LeaveCC, NotEqual);
+        as_add(output, output, Imm8(1), LeaveCC, NotEqual);
     }
     // Negate the output. Since INT_MIN < -INT_MAX, even after adding 1, the
     // result will still be a negative number.
-    ma_rsb(output, Imm32(0), output, SetCC);
+    as_rsb(output, output, Imm8(0), SetCC);
     // Flip the negated input back to its original value.
     ma_vneg_f32(input, input);
     // If the result looks non-negative, then this value didn't actually fit
     // into the int range, and special handling is required. Zero is also caught
     // by this case, but floor of a negative number should never be zero.
     ma_b(bail, NotSigned);
 
     bind(&fin);
@@ -3768,28 +3865,28 @@ MacroAssemblerARMCompat::ceil(FloatRegis
     ma_neg(output, output, SetCC);
     ma_b(bail, NotSigned);
     ma_b(&fin);
 
     // Test for 0.0 / -0.0: if the top word of the input double is not zero,
     // then it was -0 and we need to bail out.
     bind(&handleZero);
     as_vxfer(output, InvalidReg, input, FloatToCore, Always, 1);
-    ma_cmp(output, Imm32(0));
+    as_cmp(output, Imm8(0));
     ma_b(bail, NonZero);
     ma_b(&fin);
 
     // We are in the ]0; +inf] range: truncate integer values, maybe add 1 for
     // non integer values, maybe bail if overflow.
     bind(&handlePos);
     ma_vcvt_F64_U32(input, ScratchUIntReg);
     ma_vxfer(ScratchUIntReg, output);
     ma_vcvt_U32_F64(ScratchUIntReg, scratchDouble);
     compareDouble(scratchDouble, input);
-    ma_add(output, Imm32(1), output, LeaveCC, NotEqual);
+    as_add(output, output, Imm8(1), LeaveCC, NotEqual);
     // Bail out if the add overflowed or the result is non positive.
     ma_mov(output, output, SetCC);
     ma_b(bail, Signed);
     ma_b(bail, Zero);
 
     bind(&fin);
 }
 
@@ -3829,33 +3926,33 @@ MacroAssemblerARMCompat::ceilf(FloatRegi
         ma_b(bail, NotSigned);
         ma_b(&fin);
     }
 
     // Test for 0.0 / -0.0: if the top word of the input double is not zero,
     // then it was -0 and we need to bail out.
     bind(&handleZero);
     as_vxfer(output, InvalidReg, VFPRegister(input).singleOverlay(), FloatToCore, Always, 0);
-    ma_cmp(output, Imm32(0));
+    as_cmp(output, Imm8(0));
     ma_b(bail, NonZero);
     ma_b(&fin);
 
     // We are in the ]0; +inf] range: truncate integer values, maybe add 1 for
     // non integer values, maybe bail if overflow.
     bind(&handlePos);
     {
         ScratchDoubleScope scratchDouble(asMasm());
         FloatRegister scratchFloat = scratchDouble.asSingle();
         FloatRegister scratchUInt = scratchDouble.uintOverlay();
 
         ma_vcvt_F32_U32(input, scratchUInt);
         ma_vxfer(scratchUInt, output);
         ma_vcvt_U32_F32(scratchUInt, scratchFloat);
         compareFloat(scratchFloat, input);
-        ma_add(output, Imm32(1), output, LeaveCC, NotEqual);
+        as_add(output, output, Imm8(1), LeaveCC, NotEqual);
 
         // Bail on overflow or non-positive result.
         ma_mov(output, output, SetCC);
         ma_b(bail, Signed);
         ma_b(bail, Zero);
     }
 
     bind(&fin);
@@ -3921,17 +4018,17 @@ MacroAssemblerARMCompat::round(FloatRegi
     ma_mov(output, output, SetCC);
     ma_b(bail, Signed);
     ma_b(&fin);
 
     bind(&handleZero);
     // Move the top word of the double into the output reg, if it is non-zero,
     // then the original value was -0.0
     as_vxfer(output, InvalidReg, input, FloatToCore, Always, 1);
-    ma_cmp(output, Imm32(0));
+    as_cmp(output, Imm8(0));
     ma_b(bail, NonZero);
     ma_b(&fin);
 
     bind(&handleNeg);
     // Negative case, negate, then start dancing. This number may be positive,
     // since we added 0.5.
 
     // Add 0.5 to negative numbers, store the result into tmp
@@ -3941,20 +4038,20 @@ MacroAssemblerARMCompat::round(FloatRegi
     ma_vcvt_F64_U32(tmp, scratchDouble.uintOverlay());
     ma_vxfer(VFPRegister(scratchDouble).uintOverlay(), output);
 
     // -output is now a correctly rounded value, unless the original value was
     // exactly halfway between two integers, at which point, it has been rounded
     // away from zero, when it should be rounded towards \infty.
     ma_vcvt_U32_F64(scratchDouble.uintOverlay(), scratchDouble);
     compareDouble(scratchDouble, tmp);
-    ma_sub(output, Imm32(1), output, LeaveCC, Equal);
+    as_sub(output, output, Imm8(1), LeaveCC, Equal);
     // Negate the output. Since INT_MIN < -INT_MAX, even after adding 1, the
     // result will still be a negative number.
-    ma_rsb(output, Imm32(0), output, SetCC);
+    as_rsb(output, output, Imm8(0), SetCC);
 
     // If the result looks non-negative, then this value didn't actually fit
     // into the int range, and special handling is required, or it was zero,
     // which means the result is actually -0.0 which also requires special
     // handling.
     ma_b(bail, NotSigned);
 
     bind(&fin);
@@ -3998,17 +4095,17 @@ MacroAssemblerARMCompat::roundf(FloatReg
     ma_b(bail, Signed);
     ma_b(&fin);
 
     bind(&handleZero);
 
     // Move the whole float32 into the output reg, if it is non-zero, then the
     // original value was -0.0.
     as_vxfer(output, InvalidReg, input, FloatToCore, Always, 0);
-    ma_cmp(output, Imm32(0));
+    as_cmp(output, Imm8(0));
     ma_b(bail, NonZero);
     ma_b(&fin);
 
     bind(&handleNeg);
 
     // Add 0.5 to negative numbers, storing the result into tmp.
     ma_vneg_f32(input, tmp);
     loadConstantFloat32(0.5f, scratchFloat);
@@ -4029,22 +4126,22 @@ MacroAssemblerARMCompat::roundf(FloatReg
     Label flipSign;
     ma_b(&flipSign, Equal);
 
     // -output is now a correctly rounded value, unless the original value was
     // exactly halfway between two integers, at which point, it has been rounded
     // away from zero, when it should be rounded towards \infty.
     ma_vcvt_U32_F32(tmp.uintOverlay(), tmp);
     compareFloat(tmp, scratchFloat);
-    ma_sub(output, Imm32(1), output, LeaveCC, Equal);
+    as_sub(output, output, Imm8(1), LeaveCC, Equal);
 
     // Negate the output. Since INT_MIN < -INT_MAX, even after adding 1, the
     // result will still be a negative number.
     bind(&flipSign);
-    ma_rsb(output, Imm32(0), output, SetCC);
+    as_rsb(output, output, Imm8(0), SetCC);
 
     // If the result looks non-negative, then this value didn't actually fit
     // into the int range, and special handling is required, or it was zero,
     // which means the result is actually -0.0 which also requires special
     // handling.
     ma_b(bail, NotSigned);
 
     bind(&fin);
@@ -4067,29 +4164,33 @@ namespace jit {
 template<>
 Register
 MacroAssemblerARMCompat::computePointer<BaseIndex>(const BaseIndex& src, Register r)
 {
     Register base = src.base;
     Register index = src.index;
     uint32_t scale = Imm32::ShiftOf(src.scale).value;
     int32_t offset = src.offset;
+
+    ScratchRegisterScope scratch(asMasm());
+
     as_add(r, base, lsl(index, scale));
     if (offset != 0)
-        ma_add(r, Imm32(offset), r);
+        ma_add(r, Imm32(offset), r, scratch);
     return r;
 }
 
 template<>
 Register
 MacroAssemblerARMCompat::computePointer<Address>(const Address& src, Register r)
 {
+    ScratchRegisterScope scratch(asMasm());
     if (src.offset == 0)
         return src.base;
-    ma_add(src.base, Imm32(src.offset), r);
+    ma_add(src.base, Imm32(src.offset), r, scratch);
     return r;
 }
 
 } // namespace jit
 } // namespace js
 
 template<typename T>
 void
@@ -4132,17 +4233,17 @@ template<typename T>
 void
 MacroAssemblerARMCompat::compareExchangeARMv7(int nbytes, bool signExtend, const T& mem,
                                               Register oldval, Register newval, Register output)
 {
     Label again;
     Label done;
     ma_dmb(BarrierST);
 
-    AutoRegisterScope scratch2(asMasm(), secondScratchReg_);
+    SecondScratchRegisterScope scratch2(asMasm());
     Register ptr = computePointer(mem, scratch2);
 
     ScratchRegisterScope scratch(asMasm());
 
     bind(&again);
     switch (nbytes) {
       case 1:
         as_ldrexb(output, ptr);
@@ -4228,17 +4329,17 @@ template<typename T>
 void
 MacroAssemblerARMCompat::atomicExchangeARMv7(int nbytes, bool signExtend, const T& mem,
                                              Register value, Register output)
 {
     Label again;
     Label done;
     ma_dmb(BarrierST);
 
-    AutoRegisterScope scratch2(asMasm(), secondScratchReg_);
+    SecondScratchRegisterScope scratch2(asMasm());
     Register ptr = computePointer(mem, scratch2);
 
     ScratchRegisterScope scratch(asMasm());
 
     bind(&again);
     switch (nbytes) {
       case 1:
         as_ldrexb(output, ptr);
@@ -4337,17 +4438,17 @@ void
 MacroAssemblerARMCompat::atomicFetchOpARMv7(int nbytes, bool signExtend, AtomicOp op,
                                             const Register& value, const T& mem, Register flagTemp,
                                             Register output)
 {
     MOZ_ASSERT(flagTemp != InvalidReg);
 
     Label again;
 
-    AutoRegisterScope scratch2(asMasm(), secondScratchReg_);
+    SecondScratchRegisterScope scratch2(asMasm());
     Register ptr = computePointer(mem, scratch2);
 
     ma_dmb();
 
     ScratchRegisterScope scratch(asMasm());
 
     bind(&again);
     switch (nbytes) {
@@ -4454,17 +4555,17 @@ template<typename T>
 void
 MacroAssemblerARMCompat::atomicEffectOpARMv7(int nbytes, AtomicOp op, const Register& value,
                                              const T& mem, Register flagTemp)
 {
     MOZ_ASSERT(flagTemp != InvalidReg);
 
     Label again;
 
-    AutoRegisterScope scratch2(asMasm(), secondScratchReg_);
+    SecondScratchRegisterScope scratch2(asMasm());
     Register ptr = computePointer(mem, scratch2);
 
     ma_dmb();
 
     ScratchRegisterScope scratch(asMasm());
 
     bind(&again);
     switch (nbytes) {
@@ -4763,17 +4864,17 @@ MacroAssembler::PopRegsInMaskIgnore(Live
         freeStack(reservedG);
     }
     MOZ_ASSERT(diffG == 0);
 }
 
 void
 MacroAssembler::Push(Register reg)
 {
-    ma_push(reg);
+    push(reg);
     adjustFrame(sizeof(intptr_t));
 }
 
 void
 MacroAssembler::Push(const Imm32 imm)
 {
     push(imm);
     adjustFrame(sizeof(intptr_t));
@@ -4826,18 +4927,19 @@ MacroAssembler::Pop(const ValueOperand& 
 {
     popValue(val);
     adjustFrame(-sizeof(Value));
 }
 
 void
 MacroAssembler::reserveStack(uint32_t amount)
 {
+    ScratchRegisterScope scratch(*this);
     if (amount)
-        ma_sub(Imm32(amount), sp);
+        ma_sub(Imm32(amount), sp, scratch);
     adjustFrame(amount);
 }
 
 // ===============================================================
 // Simple call functions.
 
 CodeOffset
 MacroAssembler::call(Register reg)
@@ -4915,17 +5017,17 @@ MacroAssembler::thunkWithPatch()
 
     // Inhibit pools since these three words must be contiguous so that the offset
     // calculations below are valid.
     AutoForbidPools afp(this, 3);
 
     // When pc is used, the read value is the address of the instruction + 8.
     // This is exactly the address of the uint32 word we want to load.
     ScratchRegisterScope scratch(*this);
-    ma_ldr(Address(pc, 0), scratch);
+    ma_ldr(DTRAddr(pc, DtrOffImm(0)), scratch);
 
     // Branch by making pc the destination register.
     ma_add(pc, scratch, pc, LeaveCC, Always);
 
     // Allocate space which will be patched by patchThunk().
     CodeOffset u32Offset(currentOffset());
     writeInst(UINT32_MAX);
 
@@ -5000,17 +5102,17 @@ MacroAssembler::popReturnAddress()
 void
 MacroAssembler::setupUnalignedABICall(Register scratch)
 {
     setupABICall();
     dynamicAlignment_ = true;
 
     ma_mov(sp, scratch);
     // Force sp to be aligned.
-    ma_and(Imm32(~(ABIStackAlignment - 1)), sp, sp);
+    as_bic(sp, sp, Imm8(ABIStackAlignment - 1));
     ma_push(scratch);
 }
 
 void
 MacroAssembler::callWithABIPre(uint32_t* stackAdjust, bool callFromAsmJS)
 {
     MOZ_ASSERT(inCall_);
     uint32_t stackForCall = abiArgs_.stackBytesConsumedSoFar();
@@ -5099,17 +5201,20 @@ MacroAssembler::callWithABINoProfiler(Re
 }
 
 void
 MacroAssembler::callWithABINoProfiler(const Address& fun, MoveOp::Type result)
 {
     // Load the callee in r12, no instruction between the ldr and call should
     // clobber it. Note that we can't use fun.base because it may be one of the
     // IntArg registers clobbered before the call.
-    ma_ldr(fun, r12);
+    {
+        ScratchRegisterScope scratch(*this);
+        ma_ldr(fun, r12, scratch);
+    }
     uint32_t stackAdjust;
     callWithABIPre(&stackAdjust);
     call(r12);
     callWithABIPost(stackAdjust, result);
 }
 
 // ===============================================================
 // Jit Frames.
@@ -5137,17 +5242,17 @@ MacroAssembler::pushFakeReturnAddress(Re
 
 // ===============================================================
 // Branch functions
 
 void
 MacroAssembler::branchPtrInNurseryChunk(Condition cond, Register ptr, Register temp,
                                         Label* label)
 {
-    AutoRegisterScope scratch2(*this, secondScratchReg_);
+    SecondScratchRegisterScope scratch2(*this);
 
     MOZ_ASSERT(cond == Assembler::Equal || cond == Assembler::NotEqual);
     MOZ_ASSERT(ptr != temp);
     MOZ_ASSERT(ptr != scratch2);
 
     ma_lsr(Imm32(gc::ChunkShift), ptr, scratch2);
     ma_lsl(Imm32(gc::ChunkShift), scratch2, scratch2);
     load32(Address(scratch2, gc::ChunkLocationOffset), scratch2);
@@ -5190,22 +5295,24 @@ MacroAssembler::branchTestValue(Conditio
     MOZ_ASSERT(cond == Equal || cond == NotEqual);
     // If cond == NotEqual, branch when a.payload != b.payload || a.tag !=
     // b.tag. If the payloads are equal, compare the tags. If the payloads are
     // not equal, short circuit true (NotEqual).
     //
     // If cand == Equal, branch when a.payload == b.payload && a.tag == b.tag.
     // If the payloads are equal, compare the tags. If the payloads are not
     // equal, short circuit false (NotEqual).
+    ScratchRegisterScope scratch(*this);
+
     jsval_layout jv = JSVAL_TO_IMPL(rhs);
     if (rhs.isMarkable())
-        ma_cmp(lhs.payloadReg(), ImmGCPtr(reinterpret_cast<gc::Cell*>(rhs.toGCThing())));
+        ma_cmp(lhs.payloadReg(), ImmGCPtr(reinterpret_cast<gc::Cell*>(rhs.toGCThing())), scratch);
     else
-        ma_cmp(lhs.payloadReg(), Imm32(jv.s.payload.i32));
-    ma_cmp(lhs.typeReg(), Imm32(jv.s.tag), Equal);
+        ma_cmp(lhs.payloadReg(), Imm32(jv.s.payload.i32), scratch);
+    ma_cmp(lhs.typeReg(), Imm32(jv.s.tag), scratch, Equal);
     ma_b(label, cond);
 }
 
 // ========================================================================
 // Memory access primitives.
 template <typename T>
 void
 MacroAssembler::storeUnboxedValue(const ConstantOrRegister& value, MIRType valueType,
@@ -5241,31 +5348,35 @@ MacroAssemblerARM::emitUnalignedLoad(boo
                                      Register dest, unsigned offset)
 {
     // Preconditions.
     MOZ_ASSERT(ptr != tmp);
     MOZ_ASSERT(ptr != dest);
     MOZ_ASSERT(tmp != dest);
     MOZ_ASSERT(byteSize <= 4);
 
+    ScratchRegisterScope scratch(asMasm());
+
     for (unsigned i = 0; i < byteSize; i++) {
         // Only the last byte load shall be signed, if needed.
         bool signedByteLoad = isSigned && (i == byteSize - 1);
-        ma_dataTransferN(IsLoad, 8, signedByteLoad, ptr, Imm32(offset + i), i ? tmp : dest);
+        ma_dataTransferN(IsLoad, 8, signedByteLoad, ptr, Imm32(offset + i), i ? tmp : dest, scratch);
         if (i)
             as_orr(dest, dest, lsl(tmp, 8 * i));
     }
 }
 
 void
 MacroAssemblerARM::emitUnalignedStore(unsigned byteSize, Register ptr, Register val,
                                       unsigned offset)
 {
     // Preconditions.
     MOZ_ASSERT(ptr != val);
     MOZ_ASSERT(byteSize <= 4);
 
+    ScratchRegisterScope scratch(asMasm());
+
     for (unsigned i = 0; i < byteSize; i++) {
-        ma_dataTransferN(IsStore, 8 /* bits */, /* signed */ false, ptr, Imm32(offset + i), val);
+        ma_dataTransferN(IsStore, 8 /* bits */, /* signed */ false, ptr, Imm32(offset + i), val, scratch);
         if (i < byteSize - 1)
             ma_lsr(Imm32(8), val, val);
     }
 }
--- a/js/src/jit/arm/MacroAssembler-arm.h
+++ b/js/src/jit/arm/MacroAssembler-arm.h
@@ -39,16 +39,21 @@ class MacroAssemblerARM : public Assembl
     // On ARM, some instructions require a second scratch register. This
     // register defaults to lr, since it's non-allocatable (as it can be
     // clobbered by some instructions). Allow the baseline compiler to override
     // this though, since baseline IC stubs rely on lr holding the return
     // address.
     Register secondScratchReg_;
 
   public:
+    Register getSecondScratchReg() const {
+        return secondScratchReg_;
+    }
+
+  public:
     // Higher level tag testing code.
     // TODO: Can probably remove the Operand versions.
     Operand ToPayload(Operand base) const {
         return Operand(Register::FromCode(base.base()), base.disp());
     }
     Address ToPayload(const Address& base) const {
         return base;
     }
@@ -98,22 +103,20 @@ class MacroAssemblerARM : public Assembl
     // Somewhat direct wrappers for the low-level assembler funcitons
     // bitops. Attempt to encode a virtual alu instruction using two real
     // instructions.
   private:
     bool alu_dbl(Register src1, Imm32 imm, Register dest, ALUOp op,
                  SBit s, Condition c);
 
   public:
+    void ma_alu(Register src1, Imm32 imm, Register dest, AutoRegisterScope& scratch,
+                ALUOp op, SBit s = LeaveCC, Condition c = Always);
     void ma_alu(Register src1, Operand2 op2, Register dest, ALUOp op,
                 SBit s = LeaveCC, Condition c = Always);
-    void ma_alu(Register src1, Imm32 imm, Register dest,
-                ALUOp op,
-                SBit s =  LeaveCC, Condition c = Always);
-
     void ma_alu(Register src1, Operand op2, Register dest, ALUOp op,
                 SBit s = LeaveCC, Condition c = Always);
     void ma_nop();
 
     void ma_movPatchable(Imm32 imm, Register dest, Assembler::Condition c);
     void ma_movPatchable(ImmPtr imm, Register dest, Assembler::Condition c);
 
     static void ma_mov_patch(Imm32 imm, Register dest, Assembler::Condition c,
@@ -136,193 +139,204 @@ class MacroAssemblerARM : public Assembl
     void ma_asr(Imm32 shift, Register src, Register dst);
     void ma_ror(Imm32 shift, Register src, Register dst);
     void ma_rol(Imm32 shift, Register src, Register dst);
 
     void ma_lsl(Register shift, Register src, Register dst);
     void ma_lsr(Register shift, Register src, Register dst);
     void ma_asr(Register shift, Register src, Register dst);
     void ma_ror(Register shift, Register src, Register dst);
-    void ma_rol(Register shift, Register src, Register dst);
+    void ma_rol(Register shift, Register src, Register dst, AutoRegisterScope& scratch);
 
     // Move not (dest <- ~src)
     void ma_mvn(Register src1, Register dest, SBit s = LeaveCC, Condition c = Always);
 
     // Negate (dest <- -src) implemented as rsb dest, src, 0
     void ma_neg(Register src, Register dest,
                 SBit s = LeaveCC, Condition c = Always);
 
     // And
     void ma_and(Register src, Register dest,
                 SBit s = LeaveCC, Condition c = Always);
 
     void ma_and(Register src1, Register src2, Register dest,
                 SBit s = LeaveCC, Condition c = Always);
 
-    void ma_and(Imm32 imm, Register dest,
+    void ma_and(Imm32 imm, Register dest, AutoRegisterScope& scratch,
                 SBit s = LeaveCC, Condition c = Always);
 
-    void ma_and(Imm32 imm, Register src1, Register dest,
+    void ma_and(Imm32 imm, Register src1, Register dest, AutoRegisterScope& scratch,
                 SBit s = LeaveCC, Condition c = Always);
 
     // Bit clear (dest <- dest & ~imm) or (dest <- src1 & ~src2)
-    void ma_bic(Imm32 imm, Register dest,
+    void ma_bic(Imm32 imm, Register dest, AutoRegisterScope& scratch,
                 SBit s = LeaveCC, Condition c = Always);
 
     // Exclusive or
     void ma_eor(Register src, Register dest,
                 SBit s = LeaveCC, Condition c = Always);
 
     void ma_eor(Register src1, Register src2, Register dest,
                 SBit s = LeaveCC, Condition c = Always);
 
-    void ma_eor(Imm32 imm, Register dest,
+    void ma_eor(Imm32 imm, Register dest, AutoRegisterScope& scratch,
                 SBit s = LeaveCC, Condition c = Always);
 
-    void ma_eor(Imm32 imm, Register src1, Register dest,
+    void ma_eor(Imm32 imm, Register src1, Register dest, AutoRegisterScope& scratch,
                 SBit s = LeaveCC, Condition c = Always);
 
-
     // Or
     void ma_orr(Register src, Register dest,
                 SBit s = LeaveCC, Condition c = Always);
 
     void ma_orr(Register src1, Register src2, Register dest,
                 SBit s = LeaveCC, Condition c = Always);
 
-    void ma_orr(Imm32 imm, Register dest,
+    void ma_orr(Imm32 imm, Register dest, AutoRegisterScope& scratch,
                 SBit s = LeaveCC, Condition c = Always);
 
-    void ma_orr(Imm32 imm, Register src1, Register dest,
+    void ma_orr(Imm32 imm, Register src1, Register dest, AutoRegisterScope& scratch,
                 SBit s = LeaveCC, Condition c = Always);
 
 
     // Arithmetic based ops.
     // Add with carry:
-    void ma_adc(Imm32 imm, Register dest, SBit s = LeaveCC, Condition c = Always);
+    void ma_adc(Imm32 imm, Register dest, AutoRegisterScope& scratch, SBit s = LeaveCC, Condition c = Always);
     void ma_adc(Register src, Register dest, SBit s = LeaveCC, Condition c = Always);
     void ma_adc(Register src1, Register src2, Register dest, SBit s = LeaveCC, Condition c = Always);
 
     // Add:
-    void ma_add(Imm32 imm, Register dest, SBit s = LeaveCC, Condition c = Always);
+    void ma_add(Imm32 imm, Register dest, AutoRegisterScope& scratch, SBit s = LeaveCC, Condition c = Always);
     void ma_add(Register src1, Register dest, SBit s = LeaveCC, Condition c = Always);
     void ma_add(Register src1, Register src2, Register dest, SBit s = LeaveCC, Condition c = Always);
     void ma_add(Register src1, Operand op, Register dest, SBit s = LeaveCC, Condition c = Always);
-    void ma_add(Register src1, Imm32 op, Register dest, SBit s = LeaveCC, Condition c = Always);
+    void ma_add(Register src1, Imm32 op, Register dest, AutoRegisterScope& scratch,
+                SBit s = LeaveCC, Condition c = Always);
 
     // Subtract with carry:
-    void ma_sbc(Imm32 imm, Register dest, SBit s = LeaveCC, Condition c = Always);
+    void ma_sbc(Imm32 imm, Register dest, AutoRegisterScope& scratch, SBit s = LeaveCC, Condition c = Always);
     void ma_sbc(Register src1, Register dest, SBit s = LeaveCC, Condition c = Always);
     void ma_sbc(Register src1, Register src2, Register dest, SBit s = LeaveCC, Condition c = Always);
 
     // Subtract:
-    void ma_sub(Imm32 imm, Register dest, SBit s = LeaveCC, Condition c = Always);
+    void ma_sub(Imm32 imm, Register dest, AutoRegisterScope& scratch, SBit s = LeaveCC, Condition c = Always);
     void ma_sub(Register src1, Register dest, SBit s = LeaveCC, Condition c = Always);
     void ma_sub(Register src1, Register src2, Register dest, SBit s = LeaveCC, Condition c = Always);
     void ma_sub(Register src1, Operand op, Register dest, SBit s = LeaveCC, Condition c = Always);
-    void ma_sub(Register src1, Imm32 op, Register dest, SBit s = LeaveCC, Condition c = Always);
+    void ma_sub(Register src1, Imm32 op, Register dest, AutoRegisterScope& scratch,
+                SBit s = LeaveCC, Condition c = Always);
 
     // Reverse subtract:
-    void ma_rsb(Imm32 imm, Register dest, SBit s = LeaveCC, Condition c = Always);
+    void ma_rsb(Imm32 imm, Register dest, AutoRegisterScope& scratch, SBit s = LeaveCC, Condition c = Always);
     void ma_rsb(Register src1, Register dest, SBit s = LeaveCC, Condition c = Always);
     void ma_rsb(Register src1, Register src2, Register dest, SBit s = LeaveCC, Condition c = Always);
-    void ma_rsb(Register src1, Imm32 op2, Register dest, SBit s = LeaveCC, Condition c = Always);
+    void ma_rsb(Register src1, Imm32 op2, Register dest, AutoRegisterScope& scratch,
+                SBit s = LeaveCC, Condition c = Always);
 
     // Reverse subtract with carry:
-    void ma_rsc(Imm32 imm, Register dest, SBit s = LeaveCC, Condition c = Always);
+    void ma_rsc(Imm32 imm, Register dest, AutoRegisterScope& scratch, SBit s = LeaveCC, Condition c = Always);
     void ma_rsc(Register src1, Register dest, SBit s = LeaveCC, Condition c = Always);
     void ma_rsc(Register src1, Register src2, Register dest, SBit s = LeaveCC, Condition c = Always);
 
     // Compares/tests.
     // Compare negative (sets condition codes as src1 + src2 would):
-    void ma_cmn(Register src1, Imm32 imm, Condition c = Always);
+    void ma_cmn(Register src1, Imm32 imm, AutoRegisterScope& scratch, Condition c = Always);
     void ma_cmn(Register src1, Register src2, Condition c = Always);
     void ma_cmn(Register src1, Operand op, Condition c = Always);
 
     // Compare (src - src2):
-    void ma_cmp(Register src1, Imm32 imm, Condition c = Always);
-    void ma_cmp(Register src1, ImmWord ptr, Condition c = Always);
-    void ma_cmp(Register src1, ImmGCPtr ptr, Condition c = Always);
-    void ma_cmp(Register src1, Operand op, Condition c = Always);
+    void ma_cmp(Register src1, Imm32 imm, AutoRegisterScope& scratch, Condition c = Always);
+    void ma_cmp(Register src1, ImmTag tag, Condition c = Always);
+    void ma_cmp(Register src1, ImmWord ptr, AutoRegisterScope& scratch, Condition c = Always);
+    void ma_cmp(Register src1, ImmGCPtr ptr, AutoRegisterScope& scratch, Condition c = Always);
+    void ma_cmp(Register src1, Operand op, AutoRegisterScope& scratch, AutoRegisterScope& scratch2,
+                Condition c = Always);
     void ma_cmp(Register src1, Register src2, Condition c = Always);
 
     // Test for equality, (src1 ^ src2):
-    void ma_teq(Register src1, Imm32 imm, Condition c = Always);
+    void ma_teq(Register src1, Imm32 imm, AutoRegisterScope& scratch, Condition c = Always);
     void ma_teq(Register src1, Register src2, Condition c = Always);
     void ma_teq(Register src1, Operand op, Condition c = Always);
 
     // Test (src1 & src2):
-    void ma_tst(Register src1, Imm32 imm, Condition c = Always);
+    void ma_tst(Register src1, Imm32 imm, AutoRegisterScope& scratch, Condition c = Always);
     void ma_tst(Register src1, Register src2, Condition c = Always);
     void ma_tst(Register src1, Operand op, Condition c = Always);
 
     // Multiplies. For now, there are only two that we care about.
     void ma_mul(Register src1, Register src2, Register dest);
-    void ma_mul(Register src1, Imm32 imm, Register dest);
-    Condition ma_check_mul(Register src1, Register src2, Register dest, Condition cond);
-    Condition ma_check_mul(Register src1, Imm32 imm, Register dest, Condition cond);
+    void ma_mul(Register src1, Imm32 imm, Register dest, AutoRegisterScope& scratch);
+    Condition ma_check_mul(Register src1, Register src2, Register dest,
+                           AutoRegisterScope& scratch, Condition cond);
+    Condition ma_check_mul(Register src1, Imm32 imm, Register dest,
+                           AutoRegisterScope& scratch, Condition cond);
 
+    void ma_umull(Register src1, Imm32 imm, Register destHigh, Register destLow, AutoRegisterScope& scratch);
     void ma_umull(Register src1, Register src2, Register destHigh, Register destLow);
-    void ma_umull(Register src1, Imm32 imm, Register destHigh, Register destLow);
 
     // Fast mod, uses scratch registers, and thus needs to be in the assembler
     // implicitly assumes that we can overwrite dest at the beginning of the
     // sequence.
     void ma_mod_mask(Register src, Register dest, Register hold, Register tmp,
-                     int32_t shift);
+                     AutoRegisterScope& scratch, AutoRegisterScope& scratch2, int32_t shift);
 
     // Mod - depends on integer divide instructions being supported.
-    void ma_smod(Register num, Register div, Register dest);
-    void ma_umod(Register num, Register div, Register dest);
+    void ma_smod(Register num, Register div, Register dest, AutoRegisterScope& scratch);
+    void ma_umod(Register num, Register div, Register dest, AutoRegisterScope& scratch);
 
     // Division - depends on integer divide instructions being supported.
     void ma_sdiv(Register num, Register div, Register dest, Condition cond = Always);
     void ma_udiv(Register num, Register div, Register dest, Condition cond = Always);
     // Misc operations
     void ma_clz(Register src, Register dest, Condition cond = Always);
-    void ma_ctz(Register src, Register dest);
+    void ma_ctz(Register src, Register dest, AutoRegisterScope& scratch);
     // Memory:
     // Shortcut for when we know we're transferring 32 bits of data.
-    void ma_dtr(LoadStore ls, Register rn, Imm32 offset, Register rt,
+    void ma_dtr(LoadStore ls, Register rn, Imm32 offset, Register rt, AutoRegisterScope& scratch,
+                Index mode = Offset, Condition cc = Always);
+    void ma_dtr(LoadStore ls, Register rt, const Address& addr, AutoRegisterScope& scratch,
+                Index mode, Condition cc);
+
+    void ma_str(Register rt, DTRAddr addr, Index mode = Offset, Condition cc = Always);
+    void ma_str(Register rt, const Address& addr, AutoRegisterScope& scratch,
                 Index mode = Offset, Condition cc = Always);
 
-    void ma_dtr(LoadStore ls, Register rn, Register rm, Register rt,
+    void ma_ldr(DTRAddr addr, Register rt, Index mode = Offset, Condition cc = Always);
+    void ma_ldr(const Address& addr, Register rt, AutoRegisterScope& scratch,
                 Index mode = Offset, Condition cc = Always);
 
-
-    void ma_str(Register rt, DTRAddr addr, Index mode = Offset, Condition cc = Always);
-    void ma_str(Register rt, const Address& addr, Index mode = Offset, Condition cc = Always);
-    void ma_dtr(LoadStore ls, Register rt, const Address& addr, Index mode, Condition cc);
-
-    void ma_ldr(DTRAddr addr, Register rt, Index mode = Offset, Condition cc = Always);
-    void ma_ldr(const Address& addr, Register rt, Index mode = Offset, Condition cc = Always);
-
     void ma_ldrb(DTRAddr addr, Register rt, Index mode = Offset, Condition cc = Always);
     void ma_ldrh(EDtrAddr addr, Register rt, Index mode = Offset, Condition cc = Always);
     void ma_ldrsh(EDtrAddr addr, Register rt, Index mode = Offset, Condition cc = Always);
     void ma_ldrsb(EDtrAddr addr, Register rt, Index mode = Offset, Condition cc = Always);
     void ma_ldrd(EDtrAddr addr, Register rt, DebugOnly<Register> rt2, Index mode = Offset,
                  Condition cc = Always);
     void ma_strb(Register rt, DTRAddr addr, Index mode = Offset, Condition cc = Always);
     void ma_strh(Register rt, EDtrAddr addr, Index mode = Offset, Condition cc = Always);
     void ma_strd(Register rt, DebugOnly<Register> rt2, EDtrAddr addr, Index mode = Offset,
                  Condition cc = Always);
 
     // Specialty for moving N bits of data, where n == 8,16,32,64.
     BufferOffset ma_dataTransferN(LoadStore ls, int size, bool IsSigned,
-                                  Register rn, Register rm, Register rt,
+                                  Register rn, Register rm, Register rt, AutoRegisterScope& scratch,
                                   Index mode = Offset, Condition cc = Always,
-                                  unsigned scale = TimesOne);
+                                  Scale scale = TimesOne);
 
     BufferOffset ma_dataTransferN(LoadStore ls, int size, bool IsSigned,
-                                  Register rn, Imm32 offset, Register rt,
+                                  Register rn, Register rm, Register rt,
+                                  Index mode = Offset, Condition cc = Always);
+
+    BufferOffset ma_dataTransferN(LoadStore ls, int size, bool IsSigned,
+                                  Register rn, Imm32 offset, Register rt, AutoRegisterScope& scratch,
                                   Index mode = Offset, Condition cc = Always);
 
     void ma_pop(Register r);
+    void ma_popn_pc(Imm32 n, AutoRegisterScope& scratch, AutoRegisterScope& scratch2);
     void ma_push(Register r);
+    void ma_push_sp(Register r, AutoRegisterScope& scratch);
 
     void ma_vpop(VFPRegister r);
     void ma_vpush(VFPRegister r);
 
     // Barriers.
     void ma_dmb(BarrierOption option=BarrierSY);
     void ma_dsb(BarrierOption option=BarrierSY);
 
@@ -392,27 +406,30 @@ class MacroAssemblerARM : public Assembl
     // Transfer (do not coerce) a double into a couple of gpr.
     void ma_vxfer(VFPRegister src, Register dest1, Register dest2, Condition cc = Always);
 
     // Transfer (do not coerce) a gpr into a float
     void ma_vxfer(Register src, FloatRegister dest, Condition cc = Always);
     // Transfer (do not coerce) a couple of gpr into a double
     void ma_vxfer(Register src1, Register src2, FloatRegister dest, Condition cc = Always);
 
-    BufferOffset ma_vdtr(LoadStore ls, const Address& addr, VFPRegister dest, Condition cc = Always);
+    BufferOffset ma_vdtr(LoadStore ls, const Address& addr, VFPRegister dest, AutoRegisterScope& scratch,
+                         Condition cc = Always);
 
     BufferOffset ma_vldr(VFPAddr addr, VFPRegister dest, Condition cc = Always);
-    BufferOffset ma_vldr(const Address& addr, VFPRegister dest, Condition cc = Always);
-    BufferOffset ma_vldr(VFPRegister src, Register base, Register index,
+    BufferOffset ma_vldr(const Address& addr, VFPRegister dest, AutoRegisterScope& scratch, Condition cc = Always);
+    BufferOffset ma_vldr(VFPRegister src, Register base, Register index, AutoRegisterScope& scratch,
                          int32_t shift = defaultShift, Condition cc = Always);
 
     BufferOffset ma_vstr(VFPRegister src, VFPAddr addr, Condition cc = Always);
-    BufferOffset ma_vstr(VFPRegister src, const Address& addr, Condition cc = Always);
-    BufferOffset ma_vstr(VFPRegister src, Register base, Register index, int32_t shift,
-                         int32_t offset, Condition cc = Always);
+    BufferOffset ma_vstr(VFPRegister src, const Address& addr, AutoRegisterScope& scratch, Condition cc = Always);
+    BufferOffset ma_vstr(VFPRegister src, Register base, Register index, AutoRegisterScope& scratch,
+                         AutoRegisterScope& scratch2, int32_t shift, int32_t offset, Condition cc = Always);
+    BufferOffset ma_vstr(VFPRegister src, Register base, Register index, AutoRegisterScope& scratch,
+                         int32_t shift, Condition cc = Always);
 
     void ma_call(ImmPtr dest);
 
     // Float registers can only be loaded/stored in continuous runs when using
     // vstm/vldm. This function breaks set into continuous runs and loads/stores
     // them at [rm]. rm will be modified and left in a state logically suitable
     // for the next load/store. Returns the offset from [dm] for the logical
     // next load/store.
@@ -534,66 +551,74 @@ class MacroAssemblerARMCompat : public M
     }
     void shortJumpSizedNop() {
         ma_nop();
     }
     void ret() {
         ma_pop(pc);
     }
     void retn(Imm32 n) {
-        // pc <- [sp]; sp += n
-        ma_dtr(IsLoad, sp, n, pc, PostIndex);
+        ScratchRegisterScope scratch(asMasm());
+        SecondScratchRegisterScope scratch2(asMasm());
+        ma_popn_pc(n, scratch, scratch2);
     }
     void push(Imm32 imm) {
         ScratchRegisterScope scratch(asMasm());
         ma_mov(imm, scratch);
         ma_push(scratch);
     }
     void push(ImmWord imm) {
         push(Imm32(imm.value));
     }
     void push(ImmGCPtr imm) {
         ScratchRegisterScope scratch(asMasm());
         ma_mov(imm, scratch);
         ma_push(scratch);
     }
     void push(const Address& addr) {
         ScratchRegisterScope scratch(asMasm());
-        ma_ldr(addr, scratch);
+        SecondScratchRegisterScope scratch2(asMasm());
+        ma_ldr(addr, scratch, scratch2);
         ma_push(scratch);
     }
     void push(Register reg) {
-        ma_push(reg);
+        if (reg == sp) {
+            ScratchRegisterScope scratch(asMasm());
+            ma_push_sp(reg, scratch);
+        } else {
+            ma_push(reg);
+        }
     }
     void push(FloatRegister reg) {
         ma_vpush(VFPRegister(reg));
     }
     void pushWithPadding(Register reg, const Imm32 extraSpace) {
+        ScratchRegisterScope scratch(asMasm());
         Imm32 totSpace = Imm32(extraSpace.value + 4);
-        ma_dtr(IsStore, sp, totSpace, reg, PreIndex);
+        ma_dtr(IsStore, sp, totSpace, reg, scratch, PreIndex);
     }
     void pushWithPadding(Imm32 imm, const Imm32 extraSpace) {
-        AutoRegisterScope scratch2(asMasm(), secondScratchReg_);
+        ScratchRegisterScope scratch(asMasm());
+        SecondScratchRegisterScope scratch2(asMasm());
         Imm32 totSpace = Imm32(extraSpace.value + 4);
-        // ma_dtr may need the scratch register to adjust the stack, so use the
-        // second scratch register.
-        ma_mov(imm, scratch2);
-        ma_dtr(IsStore, sp, totSpace, scratch2, PreIndex);
+        ma_mov(imm, scratch);
+        ma_dtr(IsStore, sp, totSpace, scratch, scratch2, PreIndex);
     }
 
     void pop(Register reg) {
         ma_pop(reg);
     }
     void pop(FloatRegister reg) {
         ma_vpop(VFPRegister(reg));
     }
 
     void popN(Register reg, Imm32 extraSpace) {
+        ScratchRegisterScope scratch(asMasm());
         Imm32 totSpace = Imm32(extraSpace.value + 4);
-        ma_dtr(IsLoad, sp, totSpace, reg, PostIndex);
+        ma_dtr(IsLoad, sp, totSpace, reg, scratch, PostIndex);
     }
 
     CodeOffset toggledJump(Label* label);
 
     // Emit a BLX or NOP instruction. ToggleCall can be used to patch this
     // instruction.
     CodeOffset toggledCall(JitCode* target, bool enabled);
 
@@ -619,36 +644,39 @@ class MacroAssemblerARMCompat : public M
     void jump(JitCode* code) {
         branch(code);
     }
     void jump(Register reg) {
         ma_bx(reg);
     }
     void jump(const Address& addr) {
         ScratchRegisterScope scratch(asMasm());
-        ma_ldr(addr, scratch);
+        SecondScratchRegisterScope scratch2(asMasm());
+        ma_ldr(addr, scratch, scratch2);
         ma_bx(scratch);
     }
     void jump(wasm::JumpTarget target) {
         as_b(target);
     }
 
     void negl(Register reg) {
         ma_neg(reg, reg, SetCC);
     }
     void test32(Register lhs, Register rhs) {
         ma_tst(lhs, rhs);
     }
     void test32(Register lhs, Imm32 imm) {
-        ma_tst(lhs, imm);
+        ScratchRegisterScope scratch(asMasm());
+        ma_tst(lhs, imm, scratch);
     }
     void test32(const Address& addr, Imm32 imm) {
         ScratchRegisterScope scratch(asMasm());
-        ma_ldr(addr, scratch);
-        ma_tst(scratch, imm);
+        SecondScratchRegisterScope scratch2(asMasm());
+        ma_ldr(addr, scratch, scratch2);
+        ma_tst(scratch, imm, scratch2);
     }
     void testPtr(Register lhs, Register rhs) {
         test32(lhs, rhs);
     }
 
     // Returns the register containing the type tag.
     Register splitTagForTest(const ValueOperand& value) {
         return value.typeReg();
@@ -720,17 +748,17 @@ class MacroAssemblerARMCompat : public M
     void unboxObject(const Address& src, Register dest) { unboxNonDouble(src, dest); }
     void unboxObject(const BaseIndex& src, Register dest) { unboxNonDouble(src, dest); }
     void unboxDouble(const ValueOperand& src, FloatRegister dest);
     void unboxDouble(const Address& src, FloatRegister dest);
     void unboxValue(const ValueOperand& src, AnyRegister dest);
     void unboxPrivate(const ValueOperand& src, Register dest);
 
     void notBoolean(const ValueOperand& val) {
-        ma_eor(Imm32(1), val.payloadReg());
+        as_eor(val.payloadReg(), val.payloadReg(), Imm8(1));
     }
 
     // Boxing code.
     void boxDouble(FloatRegister src, const ValueOperand& dest);
     void boxNonDouble(JSValueType type, Register src, const ValueOperand& dest);
 
     // Extended unboxing API. If the payload is already in a register, returns
     // that register. Otherwise, provides a move to the given scratch register,
@@ -774,20 +802,22 @@ class MacroAssemblerARMCompat : public M
 
     CodeOffsetJump jumpWithPatch(RepatchLabel* label, Condition cond = Always,
                                  Label* documentation = nullptr);
     CodeOffsetJump backedgeJump(RepatchLabel* label, Label* documentation) {
         return jumpWithPatch(label, Always, documentation);
     }
 
     void loadUnboxedValue(Address address, MIRType type, AnyRegister dest) {
-        if (dest.isFloat())
+        if (dest.isFloat()) {
             loadInt32OrDouble(address, dest.fpu());
-        else
-            ma_ldr(address, dest.gpr());
+        } else {
+            ScratchRegisterScope scratch(asMasm());
+            ma_ldr(address, dest.gpr(), scratch);
+        }
     }
 
     void loadUnboxedValue(BaseIndex address, MIRType type, AnyRegister dest) {
         if (dest.isFloat())
             loadInt32OrDouble(address.base, address.index, dest.fpu(), address.scale);
         else
             load32(address, dest.gpr());
     }
@@ -834,40 +864,100 @@ class MacroAssemblerARMCompat : public M
         if (s1 != d1)
             ma_mov(s1, d1);
     }
 
     void storeValue(ValueOperand val, const Address& dst);
     void storeValue(ValueOperand val, const BaseIndex& dest);
     void storeValue(JSValueType type, Register reg, BaseIndex dest) {
         ScratchRegisterScope scratch(asMasm());
+        SecondScratchRegisterScope scratch2(asMasm());
+
+        int32_t payloadoffset = dest.offset + NUNBOX32_PAYLOAD_OFFSET;
+        int32_t typeoffset = dest.offset + NUNBOX32_TYPE_OFFSET;
+
         ma_alu(dest.base, lsl(dest.index, dest.scale), scratch, OpAdd);
-        storeValue(type, reg, Address(scratch, dest.offset));
+
+        // Store the payload.
+        if (payloadoffset < 4096 && payloadoffset > -4096)
+            ma_str(reg, DTRAddr(scratch, DtrOffImm(payloadoffset)));
+        else
+            ma_str(reg, Address(scratch, payloadoffset), scratch2);
+
+        // Store the type.
+        if (typeoffset < 4096 && typeoffset > -4096) {
+            // Encodable as DTRAddr, so only two instructions needed.
+            ma_mov(ImmTag(JSVAL_TYPE_TO_TAG(type)), scratch2);
+            ma_str(scratch2, DTRAddr(scratch, DtrOffImm(typeoffset)));
+        } else {
+            // Since there are only two scratch registers, the offset must be
+            // applied early using a third instruction to be safe.
+            ma_add(Imm32(typeoffset), scratch, scratch2);
+            ma_mov(ImmTag(JSVAL_TYPE_TO_TAG(type)), scratch2);
+            ma_str(scratch2, DTRAddr(scratch, DtrOffImm(0)));
+        }
     }
     void storeValue(JSValueType type, Register reg, Address dest) {
-        ma_str(reg, dest);
-        AutoRegisterScope scratch2(asMasm(), secondScratchReg_);
-        ma_mov(ImmTag(JSVAL_TYPE_TO_TAG(type)), scratch2);
-        ma_str(scratch2, Address(dest.base, dest.offset + 4));
+        ScratchRegisterScope scratch(asMasm());
+        SecondScratchRegisterScope scratch2(asMasm());
+
+        ma_str(reg, dest, scratch2);
+        ma_mov(ImmTag(JSVAL_TYPE_TO_TAG(type)), scratch);
+        ma_str(scratch, Address(dest.base, dest.offset + NUNBOX32_TYPE_OFFSET), scratch2);
     }
     void storeValue(const Value& val, const Address& dest) {
-        AutoRegisterScope scratch2(asMasm(), secondScratchReg_);
+        ScratchRegisterScope scratch(asMasm());
+        SecondScratchRegisterScope scratch2(asMasm());
+
         jsval_layout jv = JSVAL_TO_IMPL(val);
-        ma_mov(Imm32(jv.s.tag), scratch2);
-        ma_str(scratch2, ToType(dest));
+        ma_mov(Imm32(jv.s.tag), scratch);
+        ma_str(scratch, ToType(dest), scratch2);
         if (val.isMarkable())
-            ma_mov(ImmGCPtr(reinterpret_cast<gc::Cell*>(val.toGCThing())), scratch2);
+            ma_mov(ImmGCPtr(reinterpret_cast<gc::Cell*>(val.toGCThing())), scratch);
         else
-            ma_mov(Imm32(jv.s.payload.i32), scratch2);
-        ma_str(scratch2, ToPayload(dest));
+            ma_mov(Imm32(jv.s.payload.i32), scratch);
+        ma_str(scratch, ToPayload(dest), scratch2);
     }
     void storeValue(const Value& val, BaseIndex dest) {
         ScratchRegisterScope scratch(asMasm());
+        SecondScratchRegisterScope scratch2(asMasm());
+        jsval_layout jv = JSVAL_TO_IMPL(val);
+
+        int32_t typeoffset = dest.offset + NUNBOX32_TYPE_OFFSET;
+        int32_t payloadoffset = dest.offset + NUNBOX32_PAYLOAD_OFFSET;
+
         ma_alu(dest.base, lsl(dest.index, dest.scale), scratch, OpAdd);
-        storeValue(val, Address(scratch, dest.offset));
+
+        // Store the type.
+        if (typeoffset < 4096 && typeoffset > -4096) {
+            ma_mov(Imm32(jv.s.tag), scratch2);
+            ma_str(scratch2, DTRAddr(scratch, DtrOffImm(typeoffset)));
+        } else {
+            ma_add(Imm32(typeoffset), scratch, scratch2);
+            ma_mov(Imm32(jv.s.tag), scratch2);
+            ma_str(scratch2, DTRAddr(scratch, DtrOffImm(0)));
+            // Restore scratch for the payload store.
+            ma_alu(dest.base, lsl(dest.index, dest.scale), scratch, OpAdd);
+        }
+
+        // Store the payload, marking if necessary.
+        if (payloadoffset < 4096 && payloadoffset > -4096) {
+            if (val.isMarkable())
+                ma_mov(ImmGCPtr(reinterpret_cast<gc::Cell*>(val.toGCThing())), scratch2);
+            else
+                ma_mov(Imm32(jv.s.payload.i32), scratch2);
+            ma_str(scratch2, DTRAddr(scratch, DtrOffImm(payloadoffset)));
+        } else {
+            ma_add(Imm32(payloadoffset), scratch, scratch2);
+            if (val.isMarkable())
+                ma_mov(ImmGCPtr(reinterpret_cast<gc::Cell*>(val.toGCThing())), scratch2);
+            else
+                ma_mov(Imm32(jv.s.payload.i32), scratch2);
+            ma_str(scratch2, DTRAddr(scratch, DtrOffImm(0)));
+        }
     }
     void storeValue(const Address& src, const Address& dest, Register temp) {
         load32(ToType(src), temp);
         store32(temp, ToType(dest));
 
         load32(ToPayload(src), temp);
         store32(temp, ToPayload(dest));
     }
@@ -1398,64 +1488,64 @@ class MacroAssemblerARMCompat : public M
     bool buildOOLFakeExitFrame(void* fakeReturnAddr);
 
   public:
     CodeOffset labelForPatch() {
         return CodeOffset(nextOffset().getOffset());
     }
 
     void computeEffectiveAddress(const Address& address, Register dest) {
-        ma_add(address.base, Imm32(address.offset), dest, LeaveCC);
+        ScratchRegisterScope scratch(asMasm());
+        ma_add(address.base, Imm32(address.offset), dest, scratch, LeaveCC);
     }
     void computeEffectiveAddress(const BaseIndex& address, Register dest) {
+        ScratchRegisterScope scratch(asMasm());
         ma_alu(address.base, lsl(address.index, address.scale), dest, OpAdd, LeaveCC);
         if (address.offset)
-            ma_add(dest, Imm32(address.offset), dest, LeaveCC);
+            ma_add(dest, Imm32(address.offset), dest, scratch, LeaveCC);
     }
     void floor(FloatRegister input, Register output, Label* handleNotAnInt);
     void floorf(FloatRegister input, Register output, Label* handleNotAnInt);
     void ceil(FloatRegister input, Register output, Label* handleNotAnInt);
     void ceilf(FloatRegister input, Register output, Label* handleNotAnInt);
     void round(FloatRegister input, Register output, Label* handleNotAnInt, FloatRegister tmp);
     void roundf(FloatRegister input, Register output, Label* handleNotAnInt, FloatRegister tmp);
 
     void clampCheck(Register r, Label* handleNotAnInt) {
         // Check explicitly for r == INT_MIN || r == INT_MAX
         // This is the instruction sequence that gcc generated for this
         // operation.
         ScratchRegisterScope scratch(asMasm());
-        ma_sub(r, Imm32(0x80000001), scratch);
-        ma_cmn(scratch, Imm32(3));
+        SecondScratchRegisterScope scratch2(asMasm());
+        ma_sub(r, Imm32(0x80000001), scratch, scratch2);
+        as_cmn(scratch, Imm8(3));
         ma_b(handleNotAnInt, Above);
     }
 
     void lea(Operand addr, Register dest) {
-        ma_add(addr.baseReg(), Imm32(addr.disp()), dest);
+        ScratchRegisterScope scratch(asMasm());
+        ma_add(addr.baseReg(), Imm32(addr.disp()), dest, scratch);
     }
 
     void abiret() {
         as_bx(lr);
     }
 
-    void ma_storeImm(Imm32 c, const Address& dest) {
-        ma_mov(c, lr);
-        ma_str(lr, dest);
-    }
-
     void moveFloat32(FloatRegister src, FloatRegister dest, Condition cc = Always) {
         as_vmov(VFPRegister(dest).singleOverlay(), VFPRegister(src).singleOverlay(), cc);
     }
 
     void loadWasmGlobalPtr(uint32_t globalDataOffset, Register dest) {
         loadPtr(Address(GlobalReg, globalDataOffset - AsmJSGlobalRegBias), dest);
     }
     void loadWasmPinnedRegsFromTls() {
-        loadPtr(Address(WasmTlsReg, offsetof(wasm::TlsData, memoryBase)), HeapReg);
-        loadPtr(Address(WasmTlsReg, offsetof(wasm::TlsData, globalData)), GlobalReg);
-        ma_add(Imm32(AsmJSGlobalRegBias), GlobalReg);
+        ScratchRegisterScope scratch(asMasm());
+        ma_ldr(Address(WasmTlsReg, offsetof(wasm::TlsData, memoryBase)), HeapReg, scratch);
+        ma_ldr(Address(WasmTlsReg, offsetof(wasm::TlsData, globalData)), GlobalReg, scratch);
+        ma_add(Imm32(AsmJSGlobalRegBias), GlobalReg, scratch);
     }
 
     // Instrumentation for entering and leaving the profiler.
     void profilerEnterFrame(Register framePtr, Register scratch);
     void profilerExitFrame();
 };
 
 typedef MacroAssemblerARMCompat MacroAssemblerSpecific;
--- a/js/src/jit/arm/MoveEmitter-arm.cpp
+++ b/js/src/jit/arm/MoveEmitter-arm.cpp
@@ -84,275 +84,289 @@ MoveEmitterARM::tempReg()
     // used for address computations, such as those found when we attempt to
     // access values more than 4096 off of the stack pointer. Instead, use lr,
     // the LinkRegister.
     spilledReg_ = r14;
     if (pushedAtSpill_ == -1) {
         masm.Push(spilledReg_);
         pushedAtSpill_ = masm.framePushed();
     } else {
-        masm.ma_str(spilledReg_, spillSlot());
+        ScratchRegisterScope scratch(masm);
+        masm.ma_str(spilledReg_, spillSlot(), scratch);
     }
     return spilledReg_;
 }
 
 void
 MoveEmitterARM::breakCycle(const MoveOperand& from, const MoveOperand& to,
                            MoveOp::Type type, uint32_t slotId)
 {
     // There is some pattern:
     //   (A -> B)
     //   (B -> A)
     //
     // This case handles (A -> B), which we reach first. We save B, then allow
     // the original move to continue.
+
+    ScratchRegisterScope scratch(masm);
+
     switch (type) {
       case MoveOp::FLOAT32:
         if (to.isMemory()) {
-            VFPRegister temp = ScratchFloat32Reg;
-            masm.ma_vldr(toAddress(to), temp);
+            ScratchFloat32Scope scratchFloat32(masm);
+            masm.ma_vldr(toAddress(to), scratchFloat32, scratch);
             // Since it is uncertain if the load will be aligned or not
             // just fill both of them with the same value.
-            masm.ma_vstr(temp, cycleSlot(slotId, 0));
-            masm.ma_vstr(temp, cycleSlot(slotId, 4));
+            masm.ma_vstr(scratchFloat32, cycleSlot(slotId, 0), scratch);
+            masm.ma_vstr(scratchFloat32, cycleSlot(slotId, 4), scratch);
         } else if (to.isGeneralReg()) {
             // Since it is uncertain if the load will be aligned or not
             // just fill both of them with the same value.
-            masm.ma_str(to.reg(), cycleSlot(slotId, 0));
-            masm.ma_str(to.reg(), cycleSlot(slotId, 4));
+            masm.ma_str(to.reg(), cycleSlot(slotId, 0), scratch);
+            masm.ma_str(to.reg(), cycleSlot(slotId, 4), scratch);
         } else {
             FloatRegister src = to.floatReg();
             // Just always store the largest possible size. Currently, this is
             // a double. When SIMD is added, two doubles will need to be stored.
-            masm.ma_vstr(src.doubleOverlay(), cycleSlot(slotId, 0));
+            masm.ma_vstr(src.doubleOverlay(), cycleSlot(slotId, 0), scratch);
         }
         break;
       case MoveOp::DOUBLE:
         if (to.isMemory()) {
-            ScratchDoubleScope scratch(masm);
-            masm.ma_vldr(toAddress(to), scratch);
-            masm.ma_vstr(scratch, cycleSlot(slotId, 0));
+            ScratchDoubleScope scratchDouble(masm);
+            masm.ma_vldr(toAddress(to), scratchDouble, scratch);
+            masm.ma_vstr(scratchDouble, cycleSlot(slotId, 0), scratch);
         } else if (to.isGeneralRegPair()) {
-            ScratchDoubleScope scratch(masm);
-            masm.ma_vxfer(to.evenReg(), to.oddReg(), scratch);
-            masm.ma_vstr(scratch, cycleSlot(slotId, 0));
+            ScratchDoubleScope scratchDouble(masm);
+            masm.ma_vxfer(to.evenReg(), to.oddReg(), scratchDouble);
+            masm.ma_vstr(scratchDouble, cycleSlot(slotId, 0), scratch);
         } else {
-            masm.ma_vstr(to.floatReg().doubleOverlay(), cycleSlot(slotId, 0));
+            masm.ma_vstr(to.floatReg().doubleOverlay(), cycleSlot(slotId, 0), scratch);
         }
         break;
       case MoveOp::INT32:
       case MoveOp::GENERAL:
         // an non-vfp value
         if (to.isMemory()) {
             Register temp = tempReg();
-            masm.ma_ldr(toAddress(to), temp);
-            masm.ma_str(temp, cycleSlot(0,0));
+            masm.ma_ldr(toAddress(to), temp, scratch);
+            masm.ma_str(temp, cycleSlot(0,0), scratch);
         } else {
             if (to.reg() == spilledReg_) {
                 // If the destination was spilled, restore it first.
-                masm.ma_ldr(spillSlot(), spilledReg_);
+                masm.ma_ldr(spillSlot(), spilledReg_, scratch);
                 spilledReg_ = InvalidReg;
             }
-            masm.ma_str(to.reg(), cycleSlot(0,0));
+            masm.ma_str(to.reg(), cycleSlot(0,0), scratch);
         }
         break;
       default:
         MOZ_CRASH("Unexpected move type");
     }
 }
 
 void
 MoveEmitterARM::completeCycle(const MoveOperand& from, const MoveOperand& to, MoveOp::Type type, uint32_t slotId)
 {
     // There is some pattern:
     //   (A -> B)
     //   (B -> A)
     //
     // This case handles (B -> A), which we reach last. We emit a move from the
     // saved value of B, to A.
+
+    ScratchRegisterScope scratch(masm);
+
     switch (type) {
       case MoveOp::FLOAT32:
         MOZ_ASSERT(!to.isGeneralRegPair());
         if (to.isMemory()) {
-            ScratchFloat32Scope scratch(masm);
-            masm.ma_vldr(cycleSlot(slotId, 0), scratch);
-            masm.ma_vstr(scratch, toAddress(to));
+            ScratchFloat32Scope scratchFloat32(masm);
+            masm.ma_vldr(cycleSlot(slotId, 0), scratchFloat32, scratch);
+            masm.ma_vstr(scratchFloat32, toAddress(to), scratch);
         } else if (to.isGeneralReg()) {
             MOZ_ASSERT(type == MoveOp::FLOAT32);
-            masm.ma_ldr(toAddress(from), to.reg());
+            masm.ma_ldr(toAddress(from), to.reg(), scratch);
         } else {
             uint32_t offset = 0;
             if ((!from.isMemory()) && from.floatReg().numAlignedAliased() == 1)
                 offset = sizeof(float);
-            masm.ma_vldr(cycleSlot(slotId, offset), to.floatReg());
+            masm.ma_vldr(cycleSlot(slotId, offset), to.floatReg(), scratch);
         }
         break;
       case MoveOp::DOUBLE:
         MOZ_ASSERT(!to.isGeneralReg());
         if (to.isMemory()) {
-            ScratchDoubleScope scratch(masm);
-            masm.ma_vldr(cycleSlot(slotId, 0), scratch);
-            masm.ma_vstr(scratch, toAddress(to));
+            ScratchDoubleScope scratchDouble(masm);
+            masm.ma_vldr(cycleSlot(slotId, 0), scratchDouble, scratch);
+            masm.ma_vstr(scratchDouble, toAddress(to), scratch);
         } else if (to.isGeneralRegPair()) {
             MOZ_ASSERT(type == MoveOp::DOUBLE);
-            ScratchDoubleScope scratch(masm);
-            masm.ma_vldr(toAddress(from), scratch);
-            masm.ma_vxfer(scratch, to.evenReg(), to.oddReg());
+            ScratchDoubleScope scratchDouble(masm);
+            masm.ma_vldr(toAddress(from), scratchDouble, scratch);
+            masm.ma_vxfer(scratchDouble, to.evenReg(), to.oddReg());
         } else {
             uint32_t offset = 0;
             if ((!from.isMemory()) && from.floatReg().numAlignedAliased() == 1)
                 offset = sizeof(float);
-            masm.ma_vldr(cycleSlot(slotId, offset), to.floatReg());
+            masm.ma_vldr(cycleSlot(slotId, offset), to.floatReg(), scratch);
         }
         break;
       case MoveOp::INT32:
       case MoveOp::GENERAL:
         MOZ_ASSERT(slotId == 0);
         if (to.isMemory()) {
             Register temp = tempReg();
-            masm.ma_ldr(cycleSlot(slotId, 0), temp);
-            masm.ma_str(temp, toAddress(to));
+            masm.ma_ldr(cycleSlot(slotId, 0), temp, scratch);
+            masm.ma_str(temp, toAddress(to), scratch);
         } else {
             if (to.reg() == spilledReg_) {
                 // Make sure we don't re-clobber the spilled register later.
                 spilledReg_ = InvalidReg;
             }
-            masm.ma_ldr(cycleSlot(slotId, 0), to.reg());
+            masm.ma_ldr(cycleSlot(slotId, 0), to.reg(), scratch);
         }
         break;
       default:
         MOZ_CRASH("Unexpected move type");
     }
 }
 
 void
 MoveEmitterARM::emitMove(const MoveOperand& from, const MoveOperand& to)
 {
     // Register pairs are used to store Double values during calls.
     MOZ_ASSERT(!from.isGeneralRegPair());
     MOZ_ASSERT(!to.isGeneralRegPair());
 
+    ScratchRegisterScope scratch(masm);
+
     if (to.isGeneralReg() && to.reg() == spilledReg_) {
         // If the destination is the spilled register, make sure we
         // don't re-clobber its value.
         spilledReg_ = InvalidReg;
     }
 
     if (from.isGeneralReg()) {
         if (from.reg() == spilledReg_) {
             // If the source is a register that has been spilled, make sure
             // to load the source back into that register.
-            masm.ma_ldr(spillSlot(), spilledReg_);
+            masm.ma_ldr(spillSlot(), spilledReg_, scratch);
             spilledReg_ = InvalidReg;
         }
         if (to.isMemoryOrEffectiveAddress())
-            masm.ma_str(from.reg(), toAddress(to));
+            masm.ma_str(from.reg(), toAddress(to), scratch);
         else
             masm.ma_mov(from.reg(), to.reg());
     } else if (to.isGeneralReg()) {
         MOZ_ASSERT(from.isMemoryOrEffectiveAddress());
         if (from.isMemory())
-            masm.ma_ldr(toAddress(from), to.reg());
+            masm.ma_ldr(toAddress(from), to.reg(), scratch);
         else
-            masm.ma_add(from.base(), Imm32(from.disp()), to.reg());
+            masm.ma_add(from.base(), Imm32(from.disp()), to.reg(), scratch);
     } else {
         // Memory to memory gpr move.
         Register reg = tempReg();
 
         MOZ_ASSERT(from.isMemoryOrEffectiveAddress());
         if (from.isMemory())
-            masm.ma_ldr(toAddress(from), reg);
+            masm.ma_ldr(toAddress(from), reg, scratch);
         else
-            masm.ma_add(from.base(), Imm32(from.disp()), reg);
+            masm.ma_add(from.base(), Imm32(from.disp()), reg, scratch);
         MOZ_ASSERT(to.base() != reg);
-        masm.ma_str(reg, toAddress(to));
+        masm.ma_str(reg, toAddress(to), scratch);
     }
 }
 
 void
 MoveEmitterARM::emitFloat32Move(const MoveOperand& from, const MoveOperand& to)
 {
     // Register pairs are used to store Double values during calls.
     MOZ_ASSERT(!from.isGeneralRegPair());
     MOZ_ASSERT(!to.isGeneralRegPair());
 
+    ScratchRegisterScope scratch(masm);
+
     if (from.isFloatReg()) {
         if (to.isFloatReg())
             masm.ma_vmov_f32(from.floatReg(), to.floatReg());
         else if (to.isGeneralReg())
             masm.ma_vxfer(from.floatReg(), to.reg());
         else
-            masm.ma_vstr(VFPRegister(from.floatReg()).singleOverlay(), toAddress(to));
+            masm.ma_vstr(VFPRegister(from.floatReg()).singleOverlay(), toAddress(to), scratch);
     } else if (from.isGeneralReg()) {
-        if (to.isFloatReg())
+        if (to.isFloatReg()) {
             masm.ma_vxfer(from.reg(), to.floatReg());
-        else if (to.isGeneralReg())
+        } else if (to.isGeneralReg()) {
             masm.ma_mov(from.reg(), to.reg());
-        else
-            masm.ma_str(from.reg(), toAddress(to));
+        } else {
+            masm.ma_str(from.reg(), toAddress(to), scratch);
+        }
     } else if (to.isFloatReg()) {
-        masm.ma_vldr(toAddress(from), VFPRegister(to.floatReg()).singleOverlay());
+        masm.ma_vldr(toAddress(from), VFPRegister(to.floatReg()).singleOverlay(), scratch);
     } else if (to.isGeneralReg()) {
-        masm.ma_ldr(toAddress(from), to.reg());
+        masm.ma_ldr(toAddress(from), to.reg(), scratch);
     } else {
         // Memory to memory move.
         MOZ_ASSERT(from.isMemory());
-        FloatRegister reg = ScratchFloat32Reg;
-        masm.ma_vldr(toAddress(from), VFPRegister(reg).singleOverlay());
-        masm.ma_vstr(VFPRegister(reg).singleOverlay(), toAddress(to));
+        ScratchFloat32Scope scratchFloat32(masm);
+        masm.ma_vldr(toAddress(from), scratchFloat32, scratch);
+        masm.ma_vstr(scratchFloat32, toAddress(to), scratch);
     }
 }
 
 void
 MoveEmitterARM::emitDoubleMove(const MoveOperand& from, const MoveOperand& to)
 {
     // Registers are used to store pointers / int32 / float32 values.
     MOZ_ASSERT(!from.isGeneralReg());
     MOZ_ASSERT(!to.isGeneralReg());
 
+    ScratchRegisterScope scratch(masm);
+
     if (from.isFloatReg()) {
         if (to.isFloatReg())
             masm.ma_vmov(from.floatReg(), to.floatReg());
         else if (to.isGeneralRegPair())
             masm.ma_vxfer(from.floatReg(), to.evenReg(), to.oddReg());
         else
-            masm.ma_vstr(from.floatReg(), toAddress(to));
+            masm.ma_vstr(from.floatReg(), toAddress(to), scratch);
     } else if (from.isGeneralRegPair()) {
         if (to.isFloatReg())
             masm.ma_vxfer(from.evenReg(), from.oddReg(), to.floatReg());
         else if (to.isGeneralRegPair()) {
             MOZ_ASSERT(!from.aliases(to));
             masm.ma_mov(from.evenReg(), to.evenReg());
             masm.ma_mov(from.oddReg(), to.oddReg());
         } else {
-            FloatRegister reg = ScratchDoubleReg;
-            masm.ma_vxfer(from.evenReg(), from.oddReg(), reg);
-            masm.ma_vstr(reg, toAddress(to));
+            ScratchDoubleScope scratchDouble(masm);
+            masm.ma_vxfer(from.evenReg(), from.oddReg(), scratchDouble);
+            masm.ma_vstr(scratchDouble, toAddress(to), scratch);
         }
     } else if (to.isFloatReg()) {
-        masm.ma_vldr(toAddress(from), to.floatReg());
+        masm.ma_vldr(toAddress(from), to.floatReg(), scratch);
     } else if (to.isGeneralRegPair()) {
         MOZ_ASSERT(from.isMemory());
         Address src = toAddress(from);
         // Note: We can safely use the MoveOperand's displacement here,
         // even if the base is SP: MoveEmitter::toOperand adjusts
         // SP-relative operands by the difference between the current
         // stack usage and stackAdjust, which emitter.finish() resets to
         // 0.
         //
         // Warning: if the offset isn't within [-255,+255] then this
         // will assert-fail (or, if non-debug, load the wrong words).
         // Nothing uses such an offset at the time of this writing.
         masm.ma_ldrd(EDtrAddr(src.base, EDtrOffImm(src.offset)), to.evenReg(), to.oddReg());
     } else {
         // Memory to memory move.
         MOZ_ASSERT(from.isMemory());
-        ScratchDoubleScope scratch(masm);
-        masm.ma_vldr(toAddress(from), scratch);
-        masm.ma_vstr(scratch, toAddress(to));
+        ScratchDoubleScope scratchDouble(masm);
+        masm.ma_vldr(toAddress(from), scratchDouble, scratch);
+        masm.ma_vstr(scratchDouble, toAddress(to), scratch);
     }
 }
 
 void
 MoveEmitterARM::emit(const MoveOp& move)
 {
     const MoveOperand& from = move.from();
     const MoveOperand& to = move.to();
@@ -400,12 +414,14 @@ MoveEmitterARM::assertDone()
     MOZ_ASSERT(inCycle_ == 0);
 }
 
 void
 MoveEmitterARM::finish()
 {
     assertDone();
 
-    if (pushedAtSpill_ != -1 && spilledReg_ != InvalidReg)
-        masm.ma_ldr(spillSlot(), spilledReg_);
+    if (pushedAtSpill_ != -1 && spilledReg_ != InvalidReg) {
+        ScratchRegisterScope scratch(masm);
+        masm.ma_ldr(spillSlot(), spilledReg_, scratch);
+    }
     masm.freeStack(masm.framePushed() - pushedAtStart_);
 }
--- a/js/src/jit/arm/SharedIC-arm.cpp
+++ b/js/src/jit/arm/SharedIC-arm.cpp
@@ -54,36 +54,40 @@ ICBinaryArith_Int32::Compiler::generateS
         masm.mov(scratchReg, R0.payloadReg());
         break;
       case JSOP_SUB:
         masm.ma_sub(R0.payloadReg(), R1.payloadReg(), scratchReg, SetCC);
         masm.j(Assembler::Overflow, &failure);
         masm.mov(scratchReg, R0.payloadReg());
         break;
       case JSOP_MUL: {
+        ScratchRegisterScope scratch(masm);
         Assembler::Condition cond = masm.ma_check_mul(R0.payloadReg(), R1.payloadReg(), scratchReg,
-                                                      Assembler::Overflow);
+                                                      scratch, Assembler::Overflow);
         masm.j(cond, &failure);
 
-        masm.ma_cmp(scratchReg, Imm32(0));
+        masm.as_cmp(scratchReg, Imm8(0));
         masm.j(Assembler::Equal, &maybeNegZero);
 
         masm.mov(scratchReg, R0.payloadReg());
         break;
       }
       case JSOP_DIV:
       case JSOP_MOD: {
         // Check for INT_MIN / -1, it results in a double.
-        masm.ma_cmp(R0.payloadReg(), Imm32(INT_MIN));
-        masm.ma_cmp(R1.payloadReg(), Imm32(-1), Assembler::Equal);
-        masm.j(Assembler::Equal, &failure);
+        {
+            ScratchRegisterScope scratch(masm);
+            masm.ma_cmp(R0.payloadReg(), Imm32(INT_MIN), scratch);
+            masm.ma_cmp(R1.payloadReg(), Imm32(-1), scratch, Assembler::Equal);
+            masm.j(Assembler::Equal, &failure);
+        }
 
         // Check for both division by zero and 0 / X with X < 0 (results in -0).
-        masm.ma_cmp(R1.payloadReg(), Imm32(0));
-        masm.ma_cmp(R0.payloadReg(), Imm32(0), Assembler::LessThan);
+        masm.as_cmp(R1.payloadReg(), Imm8(0));
+        masm.as_cmp(R0.payloadReg(), Imm8(0), Assembler::LessThan);
         masm.j(Assembler::Equal, &failure);
 
         // The call will preserve registers r4-r11. Save R0 and the link
         // register.
         MOZ_ASSERT(R1 == ValueOperand(r5, r4));
         MOZ_ASSERT(R0 == ValueOperand(r3, r2));
         masm.moveValue(R0, savedValue);
 
@@ -113,27 +117,27 @@ ICBinaryArith_Int32::Compiler::generateS
       case JSOP_BITXOR:
         masm.ma_eor(R1.payloadReg(), R0.payloadReg(), R0.payloadReg());
         break;
       case JSOP_BITAND:
         masm.ma_and(R1.payloadReg(), R0.payloadReg(), R0.payloadReg());
         break;
       case JSOP_LSH:
         // ARM will happily try to shift by more than 0x1f.
-        masm.ma_and(Imm32(0x1F), R1.payloadReg(), R1.payloadReg());
+        masm.as_and(R1.payloadReg(), R1.payloadReg(), Imm8(0x1F));
         masm.ma_lsl(R1.payloadReg(), R0.payloadReg(), R0.payloadReg());
         break;
       case JSOP_RSH:
-        masm.ma_and(Imm32(0x1F), R1.payloadReg(), R1.payloadReg());
+        masm.as_and(R1.payloadReg(), R1.payloadReg(), Imm8(0x1F));
         masm.ma_asr(R1.payloadReg(), R0.payloadReg(), R0.payloadReg());
         break;
       case JSOP_URSH:
-        masm.ma_and(Imm32(0x1F), R1.payloadReg(), scratchReg);
+        masm.as_and(scratchReg, R1.payloadReg(), Imm8(0x1F));
         masm.ma_lsr(scratchReg, R0.payloadReg(), scratchReg);
-        masm.ma_cmp(scratchReg, Imm32(0));
+        masm.as_cmp(scratchReg, Imm8(0));
         if (allowDouble_) {
             Label toUint;
             masm.j(Assembler::LessThan, &toUint);
 
             // Move result and box for return.
             masm.mov(scratchReg, R0.payloadReg());
             EmitReturnFromIC(masm);
 
@@ -191,17 +195,17 @@ ICUnaryArith_Int32::Compiler::generateSt
       case JSOP_BITNOT:
         masm.ma_mvn(R0.payloadReg(), R0.payloadReg());
         break;
       case JSOP_NEG:
         // Guard against 0 and MIN_INT, both result in a double.
         masm.branchTest32(Assembler::Zero, R0.payloadReg(), Imm32(0x7fffffff), &failure);
 
         // Compile -x as 0 - x.
-        masm.ma_rsb(R0.payloadReg(), Imm32(0), R0.payloadReg());
+        masm.as_rsb(R0.payloadReg(), R0.payloadReg(), Imm8(0));
         break;
       default:
         MOZ_CRASH("Unexpected op");
     }
 
     EmitReturnFromIC(masm);
 
     masm.bind(&failure);
--- a/js/src/jit/arm/SharedICHelpers-arm.h
+++ b/js/src/jit/arm/SharedICHelpers-arm.h
@@ -82,21 +82,24 @@ inline void
 EmitBaselineTailCallVM(JitCode* target, MacroAssembler& masm, uint32_t argSize)
 {
     // We assume during this that R0 and R1 have been pushed, and that R2 is
     // unused.
     MOZ_ASSERT(R2 == ValueOperand(r1, r0));
 
     // Compute frame size.
     masm.movePtr(BaselineFrameReg, r0);
-    masm.ma_add(Imm32(BaselineFrame::FramePointerOffset), r0);
+    masm.as_add(r0, r0, Imm8(BaselineFrame::FramePointerOffset));
     masm.ma_sub(BaselineStackReg, r0);
 
     // Store frame size without VMFunction arguments for GC marking.
-    masm.ma_sub(r0, Imm32(argSize), r1);
+    {
+        ScratchRegisterScope scratch(masm);
+        masm.ma_sub(r0, Imm32(argSize), r1, scratch);
+    }
     masm.store32(r1, Address(BaselineFrameReg, BaselineFrame::reverseOffsetOfFrameSize()));
 
     // Push frame descriptor and perform the tail call.
     // ICTailCallReg (lr) already contains the return address (as we keep
     // it there through the stub calls), but the VMWrapper code being called
     // expects the return address to also be pushed on the stack.
     MOZ_ASSERT(ICTailCallReg == lr);
     masm.makeFrameDescriptor(r0, JitFrame_BaselineJS, ExitFrameLayout::Size());
@@ -128,17 +131,17 @@ EmitIonTailCallVM(JitCode* target, Macro
 }
 
 inline void
 EmitBaselineCreateStubFrameDescriptor(MacroAssembler& masm, Register reg, uint32_t headerSize)
 {
     // Compute stub frame size. We have to add two pointers: the stub reg and
     // previous frame pointer pushed by EmitEnterStubFrame.
     masm.mov(BaselineFrameReg, reg);
-    masm.ma_add(Imm32(sizeof(void*) * 2), reg);
+    masm.as_add(reg, reg, Imm8(sizeof(void*) * 2));
     masm.ma_sub(BaselineStackReg, reg);
 
     masm.makeFrameDescriptor(reg, JitFrame_BaselineStub, headerSize);
 }
 
 inline void
 EmitBaselineCallVM(JitCode* target, MacroAssembler& masm)
 {
@@ -169,17 +172,17 @@ static const uint32_t STUB_FRAME_SAVED_S
 
 inline void
 EmitBaselineEnterStubFrame(MacroAssembler& masm, Register scratch)
 {
     MOZ_ASSERT(scratch != ICTailCallReg);
 
     // Compute frame size.
     masm.mov(BaselineFrameReg, scratch);
-    masm.ma_add(Imm32(BaselineFrame::FramePointerOffset), scratch);
+    masm.as_add(scratch, scratch, Imm8(BaselineFrame::FramePointerOffset));
     masm.ma_sub(BaselineStackReg, scratch);
 
     masm.store32(scratch, Address(BaselineFrameReg, BaselineFrame::reverseOffsetOfFrameSize()));
 
     // Note: when making changes here, don't forget to update STUB_FRAME_SIZE if
     // needed.
 
     // Push frame descriptor and return address.
--- a/js/src/jit/arm/Trampoline-arm.cpp
+++ b/js/src/jit/arm/Trampoline-arm.cpp
@@ -169,17 +169,17 @@ JitRuntime::generateEnterJIT(JSContext* 
     // enough space reserved for pushing the arguments, and the JitFrameLayout.
     // The stack pointer is also aligned on the alignment expected by the Jit
     // frames.
     //
     // At the end the register r4, is a pointer to the stack where the first
     // argument is expected by the Jit frame.
     //
     aasm->as_sub(r4, sp, O2RegImmShift(r1, LSL, 3));    // r4 = sp - argc*8
-    masm.ma_and(Imm32(~(JitStackAlignment - 1)), r4, r4);
+    aasm->as_bic(r4, r4, Imm8(JitStackAlignment - 1));
     // r4 is now the aligned on the bottom of the list of arguments.
     static_assert(sizeof(JitFrameLayout) % JitStackAlignment == 0,
       "No need to consider the JitFrameLayout for aligning the stack");
     // sp' = ~(JitStackAlignment - 1) & (sp - argc * sizeof(Value)) - sizeof(JitFrameLayout)
     aasm->as_sub(sp, r4, Imm8(sizeof(JitFrameLayout)));
 
     // Get a copy of the number of args to use as a decrement counter, also set
     // the zero condition code.
@@ -312,17 +312,17 @@ JitRuntime::generateEnterJIT(JSContext* 
         // If OSR-ing, then emit instrumentation for setting lastProfilerFrame
         // if profiler instrumentation is enabled.
         {
             Label skipProfilingInstrumentation;
             Register realFramePtr = numStackValues;
             AbsoluteAddress addressOfEnabled(cx->runtime()->spsProfiler.addressOfEnabled());
             masm.branch32(Assembler::Equal, addressOfEnabled, Imm32(0),
                           &skipProfilingInstrumentation);
-            masm.ma_add(framePtr, Imm32(sizeof(void*)), realFramePtr);
+            masm.as_add(realFramePtr, framePtr, Imm8(sizeof(void*)));
             masm.profilerEnterFrame(realFramePtr, scratch);
             masm.bind(&skipProfilingInstrumentation);
         }
 
         masm.jump(jitcode);
 
         // OOM: Load error value, discard return address and previous frame
         // pointer and return.
@@ -395,30 +395,31 @@ JitRuntime::generateInvalidator(JSContex
     // See large comment in x86's JitRuntime::generateInvalidator.
     MacroAssembler masm(cx);
     // At this point, one of two things has happened:
     // 1) Execution has just returned from C code, which left the stack aligned
     // 2) Execution has just returned from Ion code, which left the stack unaligned.
     // The old return address should not matter, but we still want the stack to
     // be aligned, and there is no good reason to automatically align it with a
     // call to setupUnalignedABICall.
-    masm.ma_and(Imm32(~7), sp, sp);
+    masm.as_bic(sp, sp, Imm8(7));
     masm.startDataTransferM(IsStore, sp, DB, WriteBack);
     // We don't have to push everything, but this is likely easier.
     // Setting regs_.
     for (uint32_t i = 0; i < Registers::Total; i++)
         masm.transferReg(Register::FromCode(i));
     masm.finishDataTransfer();
 
     // Since our datastructures for stack inspection are compile-time fixed,
     // if there are only 16 double registers, then we need to reserve
     // space on the stack for the missing 16.
     if (FloatRegisters::ActualTotalPhys() != FloatRegisters::TotalPhys) {
+        ScratchRegisterScope scratch(masm);
         int missingRegs = FloatRegisters::TotalPhys - FloatRegisters::ActualTotalPhys();
-        masm.ma_sub(Imm32(missingRegs * sizeof(double)), sp);
+        masm.ma_sub(Imm32(missingRegs * sizeof(double)), sp, scratch);
     }
 
     masm.startFloatTransferM(IsStore, sp, DB, WriteBack);
     for (uint32_t i = 0; i < FloatRegisters::ActualTotalPhys(); i++)
         masm.transferFloatReg(FloatRegister(i, FloatRegister::Double));
     masm.finishFloatTransfer();
 
     masm.ma_mov(sp, r0);
@@ -429,22 +430,28 @@ JitRuntime::generateInvalidator(JSContex
     masm.reserveStack(sizeOfBailoutInfo);
     masm.mov(sp, r2);
     masm.setupAlignedABICall();
     masm.passABIArg(r0);
     masm.passABIArg(r1);
     masm.passABIArg(r2);
     masm.callWithABI(JS_FUNC_TO_DATA_PTR(void*, InvalidationBailout));
 
-    masm.ma_ldr(Address(sp, 0), r2);
-    masm.ma_ldr(Address(sp, sizeOfBailoutInfo), r1);
+    masm.ma_ldr(DTRAddr(sp, DtrOffImm(0)), r2);
+    {
+        ScratchRegisterScope scratch(masm);
+        masm.ma_ldr(Address(sp, sizeOfBailoutInfo), r1, scratch);
+    }
     // Remove the return address, the IonScript, the register state
     // (InvaliationBailoutStack) and the space that was allocated for the return
     // value.
-    masm.ma_add(sp, Imm32(sizeof(InvalidationBailoutStack) + sizeOfRetval + sizeOfBailoutInfo), sp);
+    {
+        ScratchRegisterScope scratch(masm);
+        masm.ma_add(sp, Imm32(sizeof(InvalidationBailoutStack) + sizeOfRetval + sizeOfBailoutInfo), sp, scratch);
+    }
     // Remove the space that this frame was using before the bailout (computed
     // by InvalidationBailout)
     masm.ma_add(sp, r1, sp);
 
     // Jump to shared bailout tail. The BailoutInfo pointer has to be in r2.
     JitCode* bailoutTail = cx->runtime()->jitRuntime()->getBailoutTail();
     masm.branch(bailoutTail);
 
@@ -470,66 +477,72 @@ JitRuntime::generateArgumentsRectifier(J
     // Including |this|, there are (|nargs| + 1) arguments to copy.
     MOZ_ASSERT(ArgumentsRectifierReg == r8);
 
     // Copy number of actual arguments into r0.
     masm.ma_ldr(DTRAddr(sp, DtrOffImm(RectifierFrameLayout::offsetOfNumActualArgs())), r0);
 
     // Load the number of |undefined|s to push into r6.
     masm.ma_ldr(DTRAddr(sp, DtrOffImm(RectifierFrameLayout::offsetOfCalleeToken())), r1);
-    masm.ma_and(Imm32(CalleeTokenMask), r1, r6);
+    {
+        ScratchRegisterScope scratch(masm);
+        masm.ma_and(Imm32(CalleeTokenMask), r1, r6, scratch);
+    }
     masm.ma_ldrh(EDtrAddr(r6, EDtrOffImm(JSFunction::offsetOfNargs())), r6);
 
     masm.ma_sub(r6, r8, r2);
 
     // Get the topmost argument.
-    masm.ma_alu(sp, lsl(r8, 3), r3, OpAdd); // r3 <- r3 + nargs * 8
-    masm.ma_add(r3, Imm32(sizeof(RectifierFrameLayout)), r3);
+    {
+        ScratchRegisterScope scratch(masm);
+        masm.ma_alu(sp, lsl(r8, 3), r3, OpAdd); // r3 <- r3 + nargs * 8
+        masm.ma_add(r3, Imm32(sizeof(RectifierFrameLayout)), r3, scratch);
+    }
 
     {
         Label notConstructing;
 
         masm.branchTest32(Assembler::Zero, r1, Imm32(CalleeToken_FunctionConstructing),
                           &notConstructing);
 
         // Add sizeof(Value) to overcome |this|
-        masm.ma_dataTransferN(IsLoad, 64, true, r3, Imm32(8), r4, Offset);
-        masm.ma_dataTransferN(IsStore, 64, true, sp, Imm32(-8), r4, PreIndex);
+        masm.as_extdtr(IsLoad, 64, true, Offset, r4, EDtrAddr(r3, EDtrOffImm(8)));
+        masm.as_extdtr(IsStore, 64, true, PreIndex, r4, EDtrAddr(sp, EDtrOffImm(-8)));
 
         // Include the newly pushed newTarget value in the frame size
         // calculated below.
         masm.add32(Imm32(1), r6);
 
         masm.bind(&notConstructing);
     }
 
     // Push undefined.
     masm.moveValue(UndefinedValue(), r5, r4);
     {
         Label undefLoopTop;
         masm.bind(&undefLoopTop);
-        masm.ma_dataTransferN(IsStore, 64, true, sp, Imm32(-8), r4, PreIndex);
-        masm.ma_sub(r2, Imm32(1), r2, SetCC);
+        masm.as_extdtr(IsStore, 64, true, PreIndex, r4, EDtrAddr(sp, EDtrOffImm(-8)));
+        masm.as_sub(r2, r2, Imm8(1), SetCC);
 
         masm.ma_b(&undefLoopTop, Assembler::NonZero);
     }
 
     // Push arguments, |nargs| + 1 times (to include |this|).
     {
         Label copyLoopTop;
         masm.bind(&copyLoopTop);
-        masm.ma_dataTransferN(IsLoad, 64, true, r3, Imm32(-8), r4, PostIndex);
-        masm.ma_dataTransferN(IsStore, 64, true, sp, Imm32(-8), r4, PreIndex);
+        masm.as_extdtr(IsLoad, 64, true, PostIndex, r4, EDtrAddr(r3, EDtrOffImm(-8)));
+        masm.as_extdtr(IsStore, 64, true, PreIndex, r4, EDtrAddr(sp, EDtrOffImm(-8)));
 
-        masm.ma_sub(r8, Imm32(1), r8, SetCC);
+        masm.as_sub(r8, r8, Imm8(1), SetCC);
         masm.ma_b(&copyLoopTop, Assembler::NotSigned);
     }
 
     // translate the framesize from values into bytes
-    masm.ma_add(r6, Imm32(1), r6);
+    masm.as_add(r6, r6, Imm8(1));
     masm.ma_lsl(Imm32(3), r6, r6);
 
     // Construct sizeDescriptor.
     masm.makeFrameDescriptor(r6, JitFrame_Rectifier, JitFrameLayout::Size());
 
     // Construct JitFrameLayout.
     masm.ma_push(r0); // actual arguments.
     masm.ma_push(r1); // callee token
@@ -546,17 +559,20 @@ JitRuntime::generateArgumentsRectifier(J
     //  ...
     // argN
     // num actual args
     // callee token
     // sizeDescriptor     <- sp now
     // return address
 
     // Remove the rectifier frame.
-    masm.ma_dtr(IsLoad, sp, Imm32(12), r4, PostIndex);
+    {
+        ScratchRegisterScope scratch(masm);
+        masm.ma_dtr(IsLoad, sp, Imm32(12), r4, scratch, PostIndex);
+    }
 
     // arg1
     //  ...
     // argN               <- sp now; r4 <- frame descriptor
     // num actual args
     // callee token
     // sizeDescriptor
     // return address
@@ -595,22 +611,24 @@ PushBailoutFrame(MacroAssembler& masm, u
 
     masm.startDataTransferM(IsStore, sp, DB, WriteBack);
     // We don't have to push everything, but this is likely easier.
     // Setting regs_.
     for (uint32_t i = 0; i < Registers::Total; i++)
         masm.transferReg(Register::FromCode(i));
     masm.finishDataTransfer();
 
+    ScratchRegisterScope scratch(masm);
+
     // Since our datastructures for stack inspection are compile-time fixed,
     // if there are only 16 double registers, then we need to reserve
     // space on the stack for the missing 16.
     if (FloatRegisters::ActualTotalPhys() != FloatRegisters::TotalPhys) {
         int missingRegs = FloatRegisters::TotalPhys - FloatRegisters::ActualTotalPhys();
-        masm.ma_sub(Imm32(missingRegs * sizeof(double)), sp);
+        masm.ma_sub(Imm32(missingRegs * sizeof(double)), sp, scratch);
     }
     masm.startFloatTransferM(IsStore, sp, DB, WriteBack);
     for (uint32_t i = 0; i < FloatRegisters::ActualTotalPhys(); i++)
         masm.transferFloatReg(FloatRegister(i, FloatRegister::Double));
     masm.finishFloatTransfer();
 
     // STEP 1b: Push both the "return address" of the function call (the address
     //          of the instruction after the call that we used to get here) as
@@ -653,46 +671,52 @@ GenerateBailoutThunk(JSContext* cx, Macr
     // both the snapshotoffset as well as the: masm.as_sub(sp, sp, Imm8(4));
 
     // Set the old (4-byte aligned) value of the sp as the first argument.
     masm.passABIArg(r0);
     masm.passABIArg(r1);
 
     // Sp % 8 == 0
     masm.callWithABI(JS_FUNC_TO_DATA_PTR(void*, Bailout));
-    masm.ma_ldr(Address(sp, 0), r2);
-    masm.ma_add(sp, Imm32(sizeOfBailoutInfo), sp);
+    masm.ma_ldr(DTRAddr(sp, DtrOffImm(0)), r2);
+    {
+        ScratchRegisterScope scratch(masm);
+        masm.ma_add(sp, Imm32(sizeOfBailoutInfo), sp, scratch);
+    }
+
     // Common size of a bailout frame.
     uint32_t bailoutFrameSize = 0
         + sizeof(void*) // frameClass
         + sizeof(RegisterDump);
 
     if (frameClass == NO_FRAME_SIZE_CLASS_ID) {
         // Make sure the bailout frame size fits into the offset for a load.
         masm.as_dtr(IsLoad, 32, Offset,
                     r4, DTRAddr(sp, DtrOffImm(4)));
         // Used to be: offsetof(BailoutStack, frameSize_)
         // This structure is no longer available to us :(
         // We add 12 to the bailoutFrameSize because:
         // sizeof(uint32_t) for the tableOffset that was pushed onto the stack
         // sizeof(uintptr_t) for the snapshotOffset;
         // alignment to round the uintptr_t up to a multiple of 8 bytes.
-        masm.ma_add(sp, Imm32(bailoutFrameSize+12), sp);
+        ScratchRegisterScope scratch(masm);
+        masm.ma_add(sp, Imm32(bailoutFrameSize+12), sp, scratch);
         masm.as_add(sp, sp, O2Reg(r4));
     } else {
+        ScratchRegisterScope scratch(masm);
         uint32_t frameSize = FrameSizeClass::FromClass(frameClass).frameSize();
         masm.ma_add(Imm32(// The frame that was added when we entered the most
                           // recent function.
                           frameSize
                           // The size of the "return address" that was dumped on
                           // the stack.
                           + sizeof(void*)
                           // Everything else that was pushed on the stack.
                           + bailoutFrameSize)
-                    , sp);
+                    , sp, scratch);
     }
 
     // Jump to shared bailout tail. The BailoutInfo pointer has to be in r2.
     JitCode* bailoutTail = cx->runtime()->jitRuntime()->getBailoutTail();
     masm.branch(bailoutTail);
 }
 
 JitCode*
@@ -772,17 +796,18 @@ JitRuntime::generateVMWrapper(JSContext*
     masm.enterExitFrame(&f);
     masm.loadJSContext(cxreg);
 
     // Save the base of the argument set stored on the stack.
     Register argsBase = InvalidReg;
     if (f.explicitArgs) {
         argsBase = r5;
         regs.take(argsBase);
-        masm.ma_add(sp, Imm32(ExitFrameLayout::SizeWithFooter()), argsBase);
+        ScratchRegisterScope scratch(masm);
+        masm.ma_add(sp, Imm32(ExitFrameLayout::SizeWithFooter()), argsBase, scratch);
     }
 
     // Reserve space for the outparameter.
     Register outReg = InvalidReg;
     switch (f.outParam) {
       case Type_Value:
         outReg = r4;
         regs.take(outReg);
@@ -1150,17 +1175,20 @@ JitRuntime::generateProfilerExitFrameTai
 #endif
 
     // Load the frame descriptor into |scratch1|, figure out what to do depending on its type.
     masm.loadPtr(Address(StackPointer, JitFrameLayout::offsetOfDescriptor()), scratch1);
 
     // Going into the conditionals, we will have:
     //      FrameDescriptor.size in scratch1
     //      FrameDescriptor.type in scratch2
-    masm.ma_and(Imm32((1 << FRAMETYPE_BITS) - 1), scratch1, scratch2);
+    {
+        ScratchRegisterScope asmScratch(masm);
+        masm.ma_and(Imm32((1 << FRAMETYPE_BITS) - 1), scratch1, scratch2, asmScratch);
+    }
     masm.rshiftPtr(Imm32(FRAMESIZE_SHIFT), scratch1);
 
     // Handling of each case is dependent on FrameDescriptor.type
     Label handle_IonJS;
     Label handle_BaselineStub;
     Label handle_Rectifier;
     Label handle_IonAccessorIC;
     Label handle_Entry;
@@ -1196,17 +1224,17 @@ JitRuntime::generateProfilerExitFrameTai
         // returning directly to an IonJS frame.  Store return addr to frame
         // in lastProfilingCallSite.
         masm.loadPtr(Address(StackPointer, JitFrameLayout::offsetOfReturnAddress()), scratch2);
         masm.storePtr(scratch2, lastProfilingCallSite);
 
         // Store return frame in lastProfilingFrame.
         // scratch2 := StackPointer + Descriptor.size*1 + JitFrameLayout::Size();
         masm.ma_add(StackPointer, scratch1, scratch2);
-        masm.ma_add(scratch2, Imm32(JitFrameLayout::Size()), scratch2);
+        masm.as_add(scratch2, scratch2, Imm8(JitFrameLayout::Size()));
         masm.storePtr(scratch2, lastProfilingFrame);
         masm.ret();
     }
 
     //
     // JitFrame_BaselineStub
     //
     // Look past the stub and store the frame pointer to