bug 991153: Fix float32 on arm to handle aliased registers (r=jandem)
authorMarty Rosenberg <mrosenberg@mozilla.com>
Tue, 15 Jul 2014 03:34:08 -0400
changeset 214835 651fde63cc765b100cc9d83cee1dd6f69c6e3d03
parent 214834 414ac77b7f2d6eb48f96d991414b78459a3b42c5
child 214836 d96127c37431bf98ab331b6e35c782f1cd6d5679
push id3857
push userraliiev@mozilla.com
push dateTue, 02 Sep 2014 16:39:23 +0000
treeherdermozilla-beta@5638b907b505 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersjandem
bugs991153
milestone33.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
bug 991153: Fix float32 on arm to handle aliased registers (r=jandem)
js/src/jit/AsmJS.cpp
js/src/jit/CodeGenerator.cpp
js/src/jit/IonFrames.cpp
js/src/jit/IonFrames.h
js/src/jit/IonMacroAssembler.cpp
js/src/jit/LIR.h
js/src/jit/Lowering.cpp
js/src/jit/MoveResolver.cpp
js/src/jit/RegisterAllocator.cpp
js/src/jit/RegisterAllocator.h
js/src/jit/RegisterSets.h
js/src/jit/Registers.h
js/src/jit/StupidAllocator.cpp
js/src/jit/StupidAllocator.h
js/src/jit/arm/Architecture-arm.cpp
js/src/jit/arm/Architecture-arm.h
js/src/jit/arm/Assembler-arm.cpp
js/src/jit/arm/Assembler-arm.h
js/src/jit/arm/Bailouts-arm.cpp
js/src/jit/arm/CodeGenerator-arm.cpp
js/src/jit/arm/MacroAssembler-arm.cpp
js/src/jit/arm/MacroAssembler-arm.h
js/src/jit/arm/Simulator-arm.cpp
js/src/jit/arm/Trampoline-arm.cpp
js/src/jit/shared/CodeGenerator-shared.cpp
js/src/jit/x64/Architecture-x64.h
js/src/jit/x86/Architecture-x86.h
js/src/jsapi-tests/testJitMoveEmitterCycles.cpp
--- a/js/src/jit/AsmJS.cpp
+++ b/js/src/jit/AsmJS.cpp
@@ -5901,32 +5901,32 @@ StackDecrementForCall(MacroAssembler &ma
 }
 
 #if defined(JS_CODEGEN_ARM)
 // The ARM system ABI also includes d15 in the non volatile float registers.
 // Also exclude lr (a.k.a. r14) as we preserve it manually)
 static const RegisterSet NonVolatileRegs =
     RegisterSet(GeneralRegisterSet(Registers::NonVolatileMask &
                                    ~(uint32_t(1) << Registers::lr)),
-                FloatRegisterSet(FloatRegisters::NonVolatileMask | (1 << FloatRegisters::d15)));
+                FloatRegisterSet(FloatRegisters::NonVolatileMask | (1ULL << FloatRegisters::d15)));
 #else
 static const RegisterSet NonVolatileRegs =
     RegisterSet(GeneralRegisterSet(Registers::NonVolatileMask),
                 FloatRegisterSet(FloatRegisters::NonVolatileMask));
 #endif
 
 #if defined(JS_CODEGEN_MIPS)
 // Mips is using one more double slot due to stack alignment for double values.
 // Look at MacroAssembler::PushRegsInMask(RegisterSet set)
 static const unsigned FramePushedAfterSave = NonVolatileRegs.gprs().size() * sizeof(intptr_t) +
                                              NonVolatileRegs.fpus().size() * sizeof(double) +
                                              sizeof(double);
 #else
 static const unsigned FramePushedAfterSave = NonVolatileRegs.gprs().size() * sizeof(intptr_t) +
-                                             NonVolatileRegs.fpus().size() * sizeof(double);
+                                             NonVolatileRegs.fpus().getPushSizeInBytes();
 #endif
 
 static bool
 GenerateEntry(ModuleCompiler &m, const AsmJSModule::ExportedFunction &exportedFunc)
 {
     MacroAssembler &masm = m.masm();
 
     // In constrast to the system ABI, the Ion convention is that all registers
@@ -6648,17 +6648,17 @@ GenerateStackOverflowExit(ModuleCompiler
     // Don't worry about restoring the stack; throwLabel will pop everything.
     masm.jump(throwLabel);
     return !masm.oom();
 }
 
 static const RegisterSet AllRegsExceptSP =
     RegisterSet(GeneralRegisterSet(Registers::AllMask &
                                    ~(uint32_t(1) << Registers::StackPointer)),
-                FloatRegisterSet(FloatRegisters::AllMask));
+                FloatRegisterSet(FloatRegisters::AllDoubleMask));
 
 // The operation-callback exit is called from arbitrarily-interrupted asm.js
 // code. That means we must first save *all* registers and restore *all*
 // registers (except the stack pointer) when we resume. The address to resume to
 // (assuming that js::HandleExecutionInterrupt doesn't indicate that the
 // execution should be aborted) is stored in AsmJSActivation::resumePC_.
 // Unfortunately, loading this requires a scratch register which we don't have
 // after restoring all registers. To hack around this, push the resumePC on the
@@ -6771,22 +6771,22 @@ GenerateInterruptExit(ModuleCompiler &m,
     // Store resumePC into the return PC stack slot.
     LoadAsmJSActivationIntoRegister(masm, IntArgReg0);
     masm.loadPtr(Address(IntArgReg0, AsmJSActivation::offsetOfResumePC()), IntArgReg1);
     masm.storePtr(IntArgReg1, Address(r6, 14 * sizeof(uint32_t*)));
 
     // argument 0: cx
     masm.loadPtr(Address(IntArgReg0, AsmJSActivation::offsetOfContext()), IntArgReg0);
 
-    masm.PushRegsInMask(RegisterSet(GeneralRegisterSet(0), FloatRegisterSet(FloatRegisters::AllMask)));   // save all FP registers
+    masm.PushRegsInMask(RegisterSet(GeneralRegisterSet(0), FloatRegisterSet(FloatRegisters::AllDoubleMask)));   // save all FP registers
     masm.call(AsmJSImm_HandleExecutionInterrupt);
     masm.branchIfFalseBool(ReturnReg, throwLabel);
 
     // Restore the machine state to before the interrupt. this will set the pc!
-    masm.PopRegsInMask(RegisterSet(GeneralRegisterSet(0), FloatRegisterSet(FloatRegisters::AllMask)));   // restore all FP registers
+    masm.PopRegsInMask(RegisterSet(GeneralRegisterSet(0), FloatRegisterSet(FloatRegisters::AllDoubleMask)));   // restore all FP registers
     masm.mov(r6,sp);
     masm.as_vmsr(r5);
     masm.as_msr(r4);
     // Restore all GP registers
     masm.startDataTransferM(IsLoad, sp, IA, WriteBack);
     masm.transferReg(r0);
     masm.transferReg(r1);
     masm.transferReg(r2);
--- a/js/src/jit/CodeGenerator.cpp
+++ b/js/src/jit/CodeGenerator.cpp
@@ -1532,17 +1532,16 @@ CodeGenerator::visitMoveGroup(LMoveGroup
 
         const LAllocation *from = move.from();
         const LAllocation *to = move.to();
         LDefinition::Type type = move.type();
 
         // No bogus moves.
         JS_ASSERT(*from != *to);
         JS_ASSERT(!from->isConstant());
-
         MoveOp::Type moveType;
         switch (type) {
           case LDefinition::OBJECT:
           case LDefinition::SLOTS:
 #ifdef JS_NUNBOX32
           case LDefinition::TYPE:
           case LDefinition::PAYLOAD:
 #else
@@ -8543,22 +8542,32 @@ CodeGenerator::visitHasClass(LHasClass *
 
 bool
 CodeGenerator::visitAsmJSCall(LAsmJSCall *ins)
 {
     MAsmJSCall *mir = ins->mir();
 
 #if defined(JS_CODEGEN_ARM)
     if (!UseHardFpABI() && mir->callee().which() == MAsmJSCall::Callee::Builtin) {
+        // The soft ABI passes floating point arguments in GPRs. Since basically
+        // nothing is set up to handle this, the values are placed in the
+        // corresponding VFP registers, then transferred to GPRs immediately
+        // before the call. The mapping is sN <-> rN, where double registers
+        // can be treated as their two component single registers.
         for (unsigned i = 0, e = ins->numOperands(); i < e; i++) {
             LAllocation *a = ins->getOperand(i);
             if (a->isFloatReg()) {
                 FloatRegister fr = ToFloatRegister(a);
-                int srcId = fr.code() * 2;
-                masm.ma_vxfer(fr, Register::FromCode(srcId), Register::FromCode(srcId+1));
+                if (fr.isDouble()) {
+                    uint32_t srcId = fr.singleOverlay().id();
+                    masm.ma_vxfer(fr, Register::FromCode(srcId), Register::FromCode(srcId + 1));
+                } else {
+                    uint32_t srcId = fr.id();
+                    masm.ma_vxfer(fr, Register::FromCode(srcId));
+                }
             }
         }
     }
 #endif
 
     if (mir->spIncrement())
         masm.freeStack(mir->spIncrement());
 
--- a/js/src/jit/IonFrames.cpp
+++ b/js/src/jit/IonFrames.cpp
@@ -348,19 +348,29 @@ JitFrameIterator::machineState() const
     uintptr_t *spill = spillBase();
 
     MachineState machine;
     for (GeneralRegisterBackwardIterator iter(reader.allGprSpills()); iter.more(); iter++)
         machine.setRegisterLocation(*iter, --spill);
 
     uint8_t *spillAlign = alignDoubleSpillWithOffset(reinterpret_cast<uint8_t *>(spill), 0);
 
-    double *floatSpill = reinterpret_cast<double *>(spillAlign);
-    for (FloatRegisterBackwardIterator iter(reader.allFloatSpills()); iter.more(); iter++)
-        machine.setRegisterLocation(*iter, --floatSpill);
+    char *floatSpill = reinterpret_cast<char *>(spillAlign);
+    FloatRegisterSet fregs = reader.allFloatSpills();
+    fregs = fregs.reduceSetForPush();
+    for (FloatRegisterBackwardIterator iter(fregs); iter.more(); iter++) {
+        floatSpill -= (*iter).size();
+        for (uint32_t a = 0; a < (*iter).numAlignedAliased(); a++) {
+            // Only say that registers that actually start here start here.
+            // e.g. d0 should not start at s1, only at s0.
+            FloatRegister ftmp;
+            (*iter).alignedAliased(a, &ftmp);
+            machine.setRegisterLocation(ftmp, (double*)floatSpill);
+        }
+    }
 
     return machine;
 }
 
 static void
 CloseLiveIterator(JSContext *cx, const InlineFrameIterator &frame, uint32_t localSlot)
 {
     SnapshotIterator si = frame.snapshotIterator();
@@ -1957,25 +1967,32 @@ InlineFrameIterator::computeScopeChain(V
 bool
 InlineFrameIterator::isFunctionFrame() const
 {
     return !!callee_;
 }
 
 MachineState
 MachineState::FromBailout(mozilla::Array<uintptr_t, Registers::Total> &regs,
-                          mozilla::Array<double, FloatRegisters::Total> &fpregs)
+                          mozilla::Array<double, FloatRegisters::TotalPhys> &fpregs)
 {
     MachineState machine;
 
     for (unsigned i = 0; i < Registers::Total; i++)
         machine.setRegisterLocation(Register::FromCode(i), &regs[i]);
+#ifdef JS_CODEGEN_ARM
+    float *fbase = (float*)&fpregs[0];
+    for (unsigned i = 0; i < FloatRegisters::TotalDouble; i++)
+        machine.setRegisterLocation(FloatRegister(i, FloatRegister::Double), &fpregs[i]);
+    for (unsigned i = 0; i < FloatRegisters::TotalSingle; i++)
+        machine.setRegisterLocation(FloatRegister(i, FloatRegister::Single), (double*)&fbase[i]);
+#else
     for (unsigned i = 0; i < FloatRegisters::Total; i++)
         machine.setRegisterLocation(FloatRegister::FromCode(i), &fpregs[i]);
-
+#endif
     return machine;
 }
 
 bool
 InlineFrameIterator::isConstructing() const
 {
     // Skip the current frame and look at the caller's.
     if (more()) {
--- a/js/src/jit/IonFrames.h
+++ b/js/src/jit/IonFrames.h
@@ -826,17 +826,17 @@ class IonBaselineStubFrameLayout : publi
         uint8_t *fp = reinterpret_cast<uint8_t *>(this);
         *reinterpret_cast<ICStub **>(fp + reverseOffsetOfStubPtr()) = stub;
     }
 };
 
 // An invalidation bailout stack is at the stack pointer for the callee frame.
 class InvalidationBailoutStack
 {
-    mozilla::Array<double, FloatRegisters::Total> fpregs_;
+    mozilla::Array<double, FloatRegisters::TotalPhys> fpregs_;
     mozilla::Array<uintptr_t, Registers::Total> regs_;
     IonScript   *ionScript_;
     uint8_t       *osiPointReturnAddress_;
 
   public:
     uint8_t *sp() const {
         return (uint8_t *) this + sizeof(InvalidationBailoutStack);
     }
--- a/js/src/jit/IonMacroAssembler.cpp
+++ b/js/src/jit/IonMacroAssembler.cpp
@@ -1573,19 +1573,24 @@ MacroAssembler::convertValueToFloatingPo
     boolValueToFloatingPoint(value, output, outputType);
     jump(&done);
 
     bind(&isInt32);
     int32ValueToFloatingPoint(value, output, outputType);
     jump(&done);
 
     bind(&isDouble);
-    unboxDouble(value, output);
+    FloatRegister tmp = output;
+    if (outputType == MIRType_Float32 && hasMultiAlias())
+        tmp = ScratchDoubleReg;
+
+    unboxDouble(value, tmp);
     if (outputType == MIRType_Float32)
-        convertDoubleToFloat32(output, output);
+        convertDoubleToFloat32(tmp, output);
+
     bind(&done);
 }
 
 bool
 MacroAssembler::convertValueToFloatingPoint(JSContext *cx, const Value &v, FloatRegister output,
                                             Label *fail, MIRType outputType)
 {
     if (v.isNumber() || v.isString()) {
--- a/js/src/jit/LIR.h
+++ b/js/src/jit/LIR.h
@@ -200,27 +200,27 @@ class LAllocation : public TempObject
 
 };
 
 class LUse : public LAllocation
 {
     static const uint32_t POLICY_BITS = 3;
     static const uint32_t POLICY_SHIFT = 0;
     static const uint32_t POLICY_MASK = (1 << POLICY_BITS) - 1;
-    static const uint32_t REG_BITS = 5;
+    static const uint32_t REG_BITS = 6;
     static const uint32_t REG_SHIFT = POLICY_SHIFT + POLICY_BITS;
     static const uint32_t REG_MASK = (1 << REG_BITS) - 1;
 
     // Whether the physical register for this operand may be reused for a def.
     static const uint32_t USED_AT_START_BITS = 1;
     static const uint32_t USED_AT_START_SHIFT = REG_SHIFT + REG_BITS;
     static const uint32_t USED_AT_START_MASK = (1 << USED_AT_START_BITS) - 1;
 
   public:
-    // Virtual registers get the remaining 20 bits.
+    // Virtual registers get the remaining 19 bits.
     static const uint32_t VREG_BITS = DATA_BITS - (USED_AT_START_SHIFT + USED_AT_START_BITS);
     static const uint32_t VREG_SHIFT = USED_AT_START_SHIFT + USED_AT_START_BITS;
     static const uint32_t VREG_MASK = (1 << VREG_BITS) - 1;
 
     enum Policy {
         // Input should be in a read-only register or stack slot.
         ANY,
 
@@ -479,17 +479,17 @@ class LDefinition
     Policy policy() const {
         return (Policy)((bits_ >> POLICY_SHIFT) & POLICY_MASK);
     }
     Type type() const {
         return (Type)((bits_ >> TYPE_SHIFT) & TYPE_MASK);
     }
     bool isCompatibleReg(const AnyRegister &r) const {
         if (isFloatReg() && r.isFloat()) {
-#if defined(JS_CODEGEN_ARM) && defined(EVERYONE_KNOWS_ABOUT_ALIASING)
+#if defined(JS_CODEGEN_ARM)
             if (type() == FLOAT32)
                 return r.fpu().isSingle();
             return r.fpu().isDouble();
 #else
             return true;
 #endif
         }
         return !isFloatReg() && !r.isFloat();
--- a/js/src/jit/Lowering.cpp
+++ b/js/src/jit/Lowering.cpp
@@ -3108,18 +3108,18 @@ LIRGenerator::visitAssertRange(MAssertRa
         lir = new(alloc()) LAssertRangeI(useRegisterAtStart(input));
         break;
 
       case MIRType_Double:
         lir = new(alloc()) LAssertRangeD(useRegister(input), tempDouble());
         break;
 
       case MIRType_Float32: {
-        LDefinition armtemp = hasMultiAlias() ? tempFloat32() : LDefinition::BogusTemp();
-        lir = new(alloc()) LAssertRangeF(useRegister(input), tempFloat32(), armtemp);
+        LDefinition armtemp = hasMultiAlias() ? tempDouble() : LDefinition::BogusTemp();
+        lir = new(alloc()) LAssertRangeF(useRegister(input), tempDouble(), armtemp);
         break;
       }
       case MIRType_Value:
         lir = new(alloc()) LAssertRangeV(tempToUnbox(), tempDouble(), tempDouble());
         if (!useBox(lir, LAssertRangeV::Input, input))
             return false;
         break;
 
--- a/js/src/jit/MoveResolver.cpp
+++ b/js/src/jit/MoveResolver.cpp
@@ -38,17 +38,17 @@ MoveResolver::addMove(const MoveOperand 
 // Given move (A -> B), this function attempts to find any move (B -> *) in the
 // pending move list, and returns the first one.
 MoveResolver::PendingMove *
 MoveResolver::findBlockingMove(const PendingMove *last)
 {
     for (PendingMoveIterator iter = pending_.begin(); iter != pending_.end(); iter++) {
         PendingMove *other = *iter;
 
-        if (other->from() == last->to()) {
+        if (other->from().aliases(last->to())) {
             // We now have pairs in the form (A -> X) (X -> y). The second pair
             // blocks the move in the first pair, so return it.
             return other;
         }
     }
 
     // No blocking moves found.
     return nullptr;
--- a/js/src/jit/RegisterAllocator.cpp
+++ b/js/src/jit/RegisterAllocator.cpp
@@ -263,16 +263,17 @@ AllocationIntegrityState::checkSafepoint
 
     if (ins->isCall() && alloc.isRegister())
         return true;
 
     if (alloc.isRegister()) {
         AnyRegister reg = alloc.toRegister();
         if (populateSafepoints)
             safepoint->addLiveRegister(reg);
+
         JS_ASSERT(safepoint->liveRegs().has(reg));
     }
 
     LDefinition::Type type = virtualRegisters[vreg]
                              ? virtualRegisters[vreg]->type()
                              : LDefinition::GENERAL;
 
     switch (type) {
--- a/js/src/jit/RegisterAllocator.h
+++ b/js/src/jit/RegisterAllocator.h
@@ -317,17 +317,18 @@ class RegisterAllocator
             allRegisters_.take(AnyRegister(FramePointer));
 #if defined(JS_CODEGEN_X64)
         if (mir->compilingAsmJS())
             allRegisters_.take(AnyRegister(HeapReg));
 #elif defined(JS_CODEGEN_ARM) || defined(JS_CODEGEN_MIPS)
         if (mir->compilingAsmJS()) {
             allRegisters_.take(AnyRegister(HeapReg));
             allRegisters_.take(AnyRegister(GlobalReg));
-            allRegisters_.take(AnyRegister(NANReg));
+            // Need to remove both NANReg, and its aliases.
+            allRegisters_.takeAllAliasedUnchecked(AnyRegister(NANReg));
         }
 #endif
     }
 
     bool init();
 
     TempAllocator &alloc() const {
         return mir->alloc();
--- a/js/src/jit/RegisterSets.h
+++ b/js/src/jit/RegisterSets.h
@@ -358,26 +358,43 @@ class TypedRegisterSet
     }
     static inline TypedRegisterSet Volatile() {
         return TypedRegisterSet(T::Codes::AllocatableMask & T::Codes::VolatileMask);
     }
     static inline TypedRegisterSet NonVolatile() {
         return TypedRegisterSet(T::Codes::AllocatableMask & T::Codes::NonVolatileMask);
     }
     bool has(T reg) const {
+        // When checking to see if a set has a register, we only want that exact
+        // register, not worrying about aliasing.
         return !!(bits_ & (SetType(1) << reg.code()));
     }
     void addUnchecked(T reg) {
         bits_ |= (SetType(1) << reg.code());
     }
+    void addAllAliasedUnchecked(T reg) {
+        for (int a = 0; a < reg.numAliased(); a++) {
+            T tmp;
+            reg.aliased(a, &tmp);
+            bits_ |= (SetType(1) << tmp.code());
+        }
+    }
 
     void add(T reg) {
-        JS_ASSERT(!has(reg));
+        // Make sure we don't add two overlapping registers.
+#ifdef DEBUG
+        for (uint32_t a = 0; a < reg.numAliased(); a++) {
+            T tmp;
+            reg.aliased(a, &tmp);
+            JS_ASSERT(!has(tmp));
+        }
+#endif
         addUnchecked(reg);
     }
+
     void add(ValueOperand value) {
 #if defined(JS_NUNBOX32)
         add(value.payloadReg());
         add(value.typeReg());
 #elif defined(JS_PUNBOX64)
         add(value.valueReg());
 #else
 #error "Bad architecture"
@@ -394,16 +411,23 @@ class TypedRegisterSet
     }
     void take(T reg) {
         JS_ASSERT(has(reg));
         takeUnchecked(reg);
     }
     void takeUnchecked(T reg) {
         bits_ &= ~(SetType(1) << reg.code());
     }
+    void takeAllAliasedUnchecked(T reg) {
+        for (int a = 0; a < reg.numAliased(); a++) {
+            T tmp;
+            reg.aliased(a, &tmp);
+            bits_ &= ~(SetType(1) << tmp.code());
+        }
+    }
     void take(ValueOperand value) {
 #if defined(JS_NUNBOX32)
         take(value.payloadReg());
         take(value.typeReg());
 #elif defined(JS_PUNBOX64)
         take(value.valueReg());
 #else
 #error "Bad architecture"
@@ -444,21 +468,21 @@ class TypedRegisterSet
         take(preclude);
         JS_ASSERT(!empty());
         T result = getAny();
         add(preclude);
         return result;
     }
     T getFirst() const {
         JS_ASSERT(!empty());
-        return T::FromCode(mozilla::CountTrailingZeroes32(bits_));
+        return T::FromCode(T::FirstBit(bits_));
     }
     T getLast() const {
         JS_ASSERT(!empty());
-        int ireg = 31 - mozilla::CountLeadingZeroes32(bits_);
+        int ireg = T::LastBit(bits_);
         return T::FromCode(ireg);
     }
     T takeAny() {
         JS_ASSERT(!empty());
         T reg = getAny();
         take(reg);
         return reg;
     }
@@ -598,16 +622,22 @@ class RegisterSet {
         fpu_.addUnchecked(reg);
     }
     void addUnchecked(AnyRegister any) {
         if (any.isFloat())
             addUnchecked(any.fpu());
         else
             addUnchecked(any.gpr());
     }
+    void addAllAliasedUnchecked(const AnyRegister &reg) {
+        if (reg.isFloat())
+            fpu_.addAllAliasedUnchecked(reg.fpu());
+        else
+            gpr_.addAllAliasedUnchecked(reg.gpr());
+    }
 
 
     bool empty(bool floats) const {
         return floats ? fpu_.empty() : gpr_.empty();
     }
     FloatRegister takeFloat() {
         return fpu_.takeAny();
     }
@@ -624,16 +654,22 @@ class RegisterSet {
 #endif
     }
     void take(AnyRegister reg) {
         if (reg.isFloat())
             fpu_.take(reg.fpu());
         else
             gpr_.take(reg.gpr());
     }
+    void takeAllAliasedUnchecked(AnyRegister reg) {
+        if (reg.isFloat())
+            fpu_.takeAllAliasedUnchecked(reg.fpu());
+        else
+            gpr_.takeAllAliasedUnchecked(reg.gpr());
+    }
     AnyRegister takeAny(bool isFloat) {
         if (isFloat)
             return AnyRegister(takeFloat());
         return AnyRegister(takeGeneral());
     }
     void clear() {
         gpr_.clear();
         fpu_.clear();
--- a/js/src/jit/Registers.h
+++ b/js/src/jit/Registers.h
@@ -65,42 +65,51 @@ struct Register {
 
     // N.B. FloatRegister is an explicit outparam here because msvc-2010
     // miscompiled it on win64 when the value was simply returned.  This
     // now has an explicit outparam for compatability.
     void aliased(uint32_t aliasIdx, Register *ret) const {
         JS_ASSERT(aliasIdx == 0);
         *ret = *this;
     }
+    static uint32_t SetSize(SetType x) {
+        return Codes::SetSize(x);
+    }
+    static uint32_t FirstBit(SetType x) {
+        return Codes::FirstBit(x);
+    }
+    static uint32_t LastBit(SetType x) {
+        return Codes::LastBit(x);
+    }
 };
 
 class RegisterDump
 {
   protected: // Silence Clang warning.
     mozilla::Array<uintptr_t, Registers::Total> regs_;
-    mozilla::Array<double, FloatRegisters::Total> fpregs_;
+    mozilla::Array<double, FloatRegisters::TotalPhys> fpregs_;
 
   public:
     static size_t offsetOfRegister(Register reg) {
         return offsetof(RegisterDump, regs_) + reg.code() * sizeof(uintptr_t);
     }
     static size_t offsetOfRegister(FloatRegister reg) {
-        return offsetof(RegisterDump, fpregs_) + reg.code() * sizeof(double);
+        return offsetof(RegisterDump, fpregs_) + reg.getRegisterDumpOffsetInBytes();
     }
 };
 
 // Information needed to recover machine register state.
 class MachineState
 {
     mozilla::Array<uintptr_t *, Registers::Total> regs_;
     mozilla::Array<double *, FloatRegisters::Total> fpregs_;
 
   public:
     static MachineState FromBailout(mozilla::Array<uintptr_t, Registers::Total> &regs,
-                                    mozilla::Array<double, FloatRegisters::Total> &fpregs);
+                                    mozilla::Array<double, FloatRegisters::TotalPhys> &fpregs);
 
     void setRegisterLocation(Register reg, uintptr_t *up) {
         regs_[reg.code()] = up;
     }
     void setRegisterLocation(FloatRegister reg, double *dp) {
         fpregs_[reg.code()] = dp;
     }
 
--- a/js/src/jit/StupidAllocator.cpp
+++ b/js/src/jit/StupidAllocator.cpp
@@ -72,34 +72,36 @@ StupidAllocator::init()
     }
 
     // Assign physical registers to the tracked allocation.
     {
         registerCount = 0;
         RegisterSet remainingRegisters(allRegisters_);
         while (!remainingRegisters.empty(/* float = */ false))
             registers[registerCount++].reg = AnyRegister(remainingRegisters.takeGeneral());
+
         while (!remainingRegisters.empty(/* float = */ true))
             registers[registerCount++].reg = AnyRegister(remainingRegisters.takeFloat());
+
         JS_ASSERT(registerCount <= MAX_REGISTERS);
     }
 
     return true;
 }
 
 bool
 StupidAllocator::allocationRequiresRegister(const LAllocation *alloc, AnyRegister reg)
 {
     if (alloc->isRegister() && alloc->toRegister() == reg)
         return true;
     if (alloc->isUse()) {
         const LUse *use = alloc->toUse();
         if (use->policy() == LUse::FIXED) {
             AnyRegister usedReg = GetFixedRegister(virtualRegisters[use->virtualRegister()], use);
-            if (usedReg == reg)
+            if (usedReg.aliases(reg))
                 return true;
         }
     }
     return false;
 }
 
 bool
 StupidAllocator::registerIsReserved(LInstruction *ins, AnyRegister reg)
@@ -124,17 +126,17 @@ AnyRegister
 StupidAllocator::ensureHasRegister(LInstruction *ins, uint32_t vreg)
 {
     // Ensure that vreg is held in a register before ins.
 
     // Check if the virtual register is already held in a physical register.
     RegisterIndex existing = findExistingRegister(vreg);
     if (existing != UINT32_MAX) {
         if (registerIsReserved(ins, registers[existing].reg)) {
-            evictRegister(ins, existing);
+            evictAliasedRegister(ins, existing);
         } else {
             registers[existing].age = ins->id();
             return registers[existing].reg;
         }
     }
 
     RegisterIndex best = allocateRegister(ins, vreg);
     loadRegister(ins, vreg, best, virtualRegisters[vreg]->type());
@@ -153,32 +155,32 @@ StupidAllocator::allocateRegister(LInstr
     LDefinition *def = virtualRegisters[vreg];
     JS_ASSERT(def);
 
     RegisterIndex best = UINT32_MAX;
 
     for (size_t i = 0; i < registerCount; i++) {
         AnyRegister reg = registers[i].reg;
 
-        if (reg.isFloat() != def->isFloatReg())
+        if (!def->isCompatibleReg(reg))
             continue;
 
         // Skip the register if it is in use for an allocated input or output.
         if (registerIsReserved(ins, reg))
             continue;
 
         if (registers[i].vreg == MISSING_ALLOCATION ||
             best == UINT32_MAX ||
             registers[best].age > registers[i].age)
         {
             best = i;
         }
     }
 
-    evictRegister(ins, best);
+    evictAliasedRegister(ins, best);
     return best;
 }
 
 void
 StupidAllocator::syncRegister(LInstruction *ins, RegisterIndex index)
 {
     if (registers[index].dirty) {
         LMoveGroup *input = getInputMoveGroup(ins->id());
@@ -195,16 +197,26 @@ StupidAllocator::syncRegister(LInstructi
 void
 StupidAllocator::evictRegister(LInstruction *ins, RegisterIndex index)
 {
     syncRegister(ins, index);
     registers[index].set(MISSING_ALLOCATION);
 }
 
 void
+StupidAllocator::evictAliasedRegister(LInstruction *ins, RegisterIndex index)
+{
+    for (int i = 0; i < registers[index].reg.numAliased(); i++) {
+        int aindex = registerIndex(registers[index].reg.aliased(i));
+        syncRegister(ins, aindex);
+        registers[aindex].set(MISSING_ALLOCATION);
+    }
+}
+
+void
 StupidAllocator::loadRegister(LInstruction *ins, uint32_t vreg, RegisterIndex index, LDefinition::Type type)
 {
     // Load a vreg from its stack location to a register.
     LMoveGroup *input = getInputMoveGroup(ins->id());
     LAllocation *source = stackLocation(vreg);
     LAllocation *dest = new(alloc()) LAllocation(registers[index].reg);
     input->addAfter(source, dest, type);
     registers[index].set(vreg, ins);
@@ -329,17 +341,19 @@ StupidAllocator::allocateForInstruction(
         uint32_t vreg = use->virtualRegister();
         if (use->policy() == LUse::REGISTER) {
             AnyRegister reg = ensureHasRegister(ins, vreg);
             alloc.replace(LAllocation(reg));
         } else if (use->policy() == LUse::FIXED) {
             AnyRegister reg = GetFixedRegister(virtualRegisters[vreg], use);
             RegisterIndex index = registerIndex(reg);
             if (registers[index].vreg != vreg) {
-                evictRegister(ins, index);
+                // Need to evict multiple registers
+                evictAliasedRegister(ins, registerIndex(reg));
+                // If this vreg is already assigned to an incorrect register
                 RegisterIndex existing = findExistingRegister(vreg);
                 if (existing != UINT32_MAX)
                     evictRegister(ins, existing);
                 loadRegister(ins, vreg, index, virtualRegisters[vreg]->type());
             }
             alloc.replace(LAllocation(reg));
         } else {
             // Inputs which are not required to be in a register are not
--- a/js/src/jit/StupidAllocator.h
+++ b/js/src/jit/StupidAllocator.h
@@ -70,16 +70,17 @@ class StupidAllocator : public RegisterA
 
     RegisterIndex registerIndex(AnyRegister reg);
 
     AnyRegister ensureHasRegister(LInstruction *ins, uint32_t vreg);
     RegisterIndex allocateRegister(LInstruction *ins, uint32_t vreg);
 
     void syncRegister(LInstruction *ins, RegisterIndex index);
     void evictRegister(LInstruction *ins, RegisterIndex index);
+    void evictAliasedRegister(LInstruction *ins, RegisterIndex index);
     void loadRegister(LInstruction *ins, uint32_t vreg, RegisterIndex index, LDefinition::Type type);
 
     RegisterIndex findExistingRegister(uint32_t vreg);
 
     bool allocationRequiresRegister(const LAllocation *alloc, AnyRegister reg);
     bool registerIsReserved(LInstruction *ins, AnyRegister reg);
 };
 
--- a/js/src/jit/arm/Architecture-arm.cpp
+++ b/js/src/jit/arm/Architecture-arm.cpp
@@ -9,16 +9,17 @@
 #ifndef JS_ARM_SIMULATOR
 #include <elf.h>
 #endif
 
 #include <fcntl.h>
 #include <unistd.h>
 
 #include "jit/arm/Assembler-arm.h"
+#include "jit/RegisterSets.h"
 
 #define HWCAP_USE_HARDFP_ABI (1 << 27)
 
 #if !(defined(ANDROID) || defined(MOZ_B2G)) && !defined(JS_ARM_SIMULATOR)
 #define HWCAP_ARMv7 (1 << 28)
 #include <asm/hwcap.h>
 #else
 #define HWCAP_VFP      (1<<0)
@@ -350,10 +351,20 @@ VFPRegister::getRegisterDumpOffsetInByte
 {
     if (isSingle())
         return id() * sizeof(float);
     if (isDouble())
         return id() * sizeof(double);
     MOZ_ASSUME_UNREACHABLE();
 }
 
+uint32_t
+FloatRegisters::ActualTotalPhys()
+{
+    if (Has32DP())
+        return 32;
+    return 16;
+}
+
+
 } // namespace jit
 } // namespace js
+
--- a/js/src/jit/arm/Architecture-arm.h
+++ b/js/src/jit/arm/Architecture-arm.h
@@ -143,25 +143,64 @@ class Registers
         (1 << Registers::r1);  // Used for double-size returns.
 
     static const uint32_t AllocatableMask = AllMask & ~NonAllocatableMask;
     typedef uint32_t SetType;
     static uint32_t SetSize(SetType x) {
         static_assert(sizeof(SetType) == 4, "SetType must be 32 bits");
         return mozilla::CountPopulation32(x);
     }
+    static uint32_t FirstBit(SetType x) {
+        return mozilla::CountTrailingZeroes32(x);
+    }
+    static uint32_t LastBit(SetType x) {
+        return 31 - mozilla::CountLeadingZeroes32(x);
+    }
 };
 
 // Smallest integer type that can hold a register bitmask.
 typedef uint16_t PackedRegisterMask;
+typedef uint16_t PackedRegisterMask;
 
 class FloatRegisters
 {
   public:
     enum FPRegisterID {
+        s0,
+        s1,
+        s2,
+        s3,
+        s4,
+        s5,
+        s6,
+        s7,
+        s8,
+        s9,
+        s10,
+        s11,
+        s12,
+        s13,
+        s14,
+        s15,
+        s16,
+        s17,
+        s18,
+        s19,
+        s20,
+        s21,
+        s22,
+        s23,
+        s24,
+        s25,
+        s26,
+        s27,
+        s28,
+        s29,
+        s30,
+        s31,
         d0,
         d1,
         d2,
         d3,
         d4,
         d5,
         d6,
         d7,
@@ -183,61 +222,100 @@ class FloatRegisters
         d23,
         d24,
         d25,
         d26,
         d27,
         d28,
         d29,
         d30,
+        d31,
         invalid_freg
     };
+
     typedef FPRegisterID Code;
 
-    static const char *GetName(Code code) {
+
+    static const char *GetDoubleName(Code code) {
         static const char * const Names[] = { "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
-                                              "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15"};
+                                              "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15",
+                                              "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23",
+                                              "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31"};
         return Names[code];
     }
+    static const char *GetSingleName(Code code) {
+        static const char * const Names[] = { "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7",
+                                              "s8", "s9", "s10", "s11", "s12", "s13", "s14", "s15",
+                                              "s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23",
+                                              "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31"};
+        return Names[code];
+    }
+
     static const char *GetName(uint32_t i) {
         JS_ASSERT(i < Total);
         return GetName(Code(i));
     }
 
     static Code FromName(const char *name);
 
     static const Code Invalid = invalid_freg;
-
-    static const uint32_t Total = 16;
-    static const uint32_t Allocatable = 15;
-
-    static const uint32_t AllMask = (1 << Total) - 1;
+    static const uint32_t Total = 48;
+    static const uint32_t TotalDouble = 16;
+    static const uint32_t TotalSingle = 32;
+    static const uint32_t Allocatable = 45;
+    // There are only 32 places that we can put values.
+    static const uint32_t TotalPhys = 32;
+    static uint32_t ActualTotalPhys();
+    static const uint64_t AllDoubleMask = ((1ull << 16) - 1) << 32;
+    static const uint64_t AllMask = ((1ull << 48) - 1);
 
     // d15 is the ScratchFloatReg.
-    static const uint32_t NonVolatileMask =
-        (1 << d8) |
-        (1 << d9) |
-        (1 << d10) |
-        (1 << d11) |
-        (1 << d12) |
-        (1 << d13) |
-        (1 << d14);
+    static const uint64_t NonVolatileDoubleMask =
+         ((1ULL << d8) |
+          (1ULL << d9) |
+          (1ULL << d10) |
+          (1ULL << d11) |
+          (1ULL << d12) |
+          (1ULL << d13) |
+          (1ULL << d14));
+    // s30 and s31 alias d15.
+    static const uint64_t NonVolatileMask =
+        (NonVolatileDoubleMask |
+         ((1 << s16) |
+          (1 << s17) |
+          (1 << s18) |
+          (1 << s19) |
+          (1 << s20) |
+          (1 << s21) |
+          (1 << s22) |
+          (1 << s23) |
+          (1 << s24) |
+          (1 << s25) |
+          (1 << s26) |
+          (1 << s27) |
+          (1 << s28) |
+          (1 << s29) |
+          (1 << s30)));
 
-    static const uint32_t VolatileMask = AllMask & ~NonVolatileMask;
+    static const uint64_t VolatileMask = AllMask & ~NonVolatileMask;
+    static const uint64_t VolatileDoubleMask = AllDoubleMask & ~NonVolatileDoubleMask;
 
-    static const uint32_t WrapperMask = VolatileMask;
+    static const uint64_t WrapperMask = VolatileMask;
 
     // d15 is the ARM scratch float register.
-    static const uint32_t NonAllocatableMask = (1 << d15) | (1 << invalid_freg);
+    // s30 and s31 alias d15.
+    static const uint64_t NonAllocatableMask = ((1ULL << d15)) |
+                                                (1ULL << s30) |
+                                                (1ULL << s31);
 
     // Registers that can be allocated without being saved, generally.
-    static const uint32_t TempMask = VolatileMask & ~NonAllocatableMask;
+    static const uint64_t TempMask = VolatileMask & ~NonAllocatableMask;
 
-    static const uint32_t AllocatableMask = AllMask & ~NonAllocatableMask;
-    typedef uint32_t SetType;
+    static const uint64_t AllocatableMask = AllMask & ~NonAllocatableMask;
+    typedef uint64_t SetType;
 };
 
 template <typename T>
 class TypedRegisterSet;
 
 class VFPRegister
 {
   public:
@@ -257,17 +335,17 @@ class VFPRegister
   protected:
     RegType kind : 2;
     // ARM doesn't have more than 32 registers. Don't take more bits than we'll
     // need. Presently, we don't have plans to address the upper and lower
     // halves of the double registers seprately, so 5 bits should suffice. If we
     // do decide to address them seprately (vmov, I'm looking at you), we will
     // likely specify it as a separate field.
   public:
-    Code code_ : 5;
+    uint32_t code_ : 5;
   protected:
     bool _isInvalid : 1;
     bool _isMissing : 1;
 
   public:
     MOZ_CONSTEXPR VFPRegister(uint32_t r, RegType k)
       : kind(k), code_ (Code(r)), _isInvalid(false), _isMissing(false)
     { }
@@ -288,17 +366,17 @@ class VFPRegister
         return kind == other.kind && code_ == other.code_;
     }
     bool isDouble() const { return kind == Double; }
     bool isSingle() const { return kind == Single; }
     bool isFloat() const { return (kind == Double) || (kind == Single); }
     bool isInt() const { return (kind == UInt) || (kind == Int); }
     bool isSInt() const { return kind == Int; }
     bool isUInt() const { return kind == UInt; }
-    bool equiv(VFPRegister other) const { return other.kind == kind; }
+    bool equiv(const VFPRegister &other) const { return other.kind == kind; }
     size_t size() const { return (kind == Double) ? 8 : 4; }
     bool isInvalid() const;
     bool isMissing() const;
 
     VFPRegister doubleOverlay(unsigned int which = 0) const;
     VFPRegister singleOverlay(unsigned int which = 0) const;
     VFPRegister sintOverlay(unsigned int which = 0) const;
     VFPRegister uintOverlay(unsigned int which = 0) const;
@@ -322,52 +400,52 @@ class VFPRegister
         }
     };
 
     Code code() const {
         JS_ASSERT(!_isInvalid && !_isMissing);
         // This should only be used in areas where we only have doubles and
         // singles.
         JS_ASSERT(isFloat());
-        return Code(code_);
+        return Code(code_ | (kind << 5));
     }
     uint32_t id() const {
         return code_;
     }
     static VFPRegister FromCode(uint32_t i) {
         uint32_t code = i & 31;
-        return VFPRegister(code, Double);
+        uint32_t kind = i >> 5;
+        return VFPRegister(code, RegType(kind));
     }
     bool volatile_() const {
         if (isDouble())
             return !!((1 << (code_ >> 1)) & FloatRegisters::VolatileMask);
         return !!((1 << code_) & FloatRegisters::VolatileMask);
     }
     const char *name() const {
-        return FloatRegisters::GetName(code_);
+        if (isDouble())
+            return FloatRegisters::GetDoubleName(Code(code_));
+        return FloatRegisters::GetSingleName(Code(code_));
     }
     bool operator != (const VFPRegister &other) const {
         return other.kind != kind || code_ != other.code_;
     }
     bool aliases(const VFPRegister &other) {
         if (kind == other.kind)
             return code_ == other.code_;
         return doubleOverlay() == other.doubleOverlay();
     }
     static const int NumAliasedDoubles = 16;
     uint32_t numAliased() const {
-        return 1;
-#ifdef EVERYONE_KNOWS_ABOUT_ALIASING
         if (isDouble()) {
             if (code_ < NumAliasedDoubles)
                 return 3;
             return 1;
         }
         return 2;
-#endif
     }
 
     // N.B. FloatRegister is an explicit outparam here because msvc-2010
     // miscompiled it on win64 when the value was simply returned
     void aliased(uint32_t aliasIdx, VFPRegister *ret) {
         if (aliasIdx == 0) {
             *ret = *this;
             return;
@@ -408,26 +486,32 @@ class VFPRegister
             return;
         }
         JS_ASSERT((code_ & 1) == 0);
         *ret = doubleOverlay(aliasIdx - 1);
         return;
     }
     typedef FloatRegisters::SetType SetType;
     static uint32_t SetSize(SetType x) {
-        static_assert(sizeof(SetType) == 4, "SetType must be 32 bits");
+        static_assert(sizeof(SetType) == 8, "SetType must be 64 bits");
         return mozilla::CountPopulation32(x);
     }
     static Code FromName(const char *name) {
         return FloatRegisters::FromName(name);
     }
     static TypedRegisterSet<VFPRegister> ReduceSetForPush(const TypedRegisterSet<VFPRegister> &s);
     static uint32_t GetSizeInBytes(const TypedRegisterSet<VFPRegister> &s);
     static uint32_t GetPushSizeInBytes(const TypedRegisterSet<VFPRegister> &s);
     uint32_t getRegisterDumpOffsetInBytes();
+    static uint32_t FirstBit(SetType x) {
+        return mozilla::CountTrailingZeroes64(x);
+    }
+    static uint32_t LastBit(SetType x) {
+        return 63 - mozilla::CountLeadingZeroes64(x);
+    }
 
 };
 
 // The only floating point register set that we work with are the VFP Registers.
 typedef VFPRegister FloatRegister;
 
 uint32_t GetARMFlags();
 bool HasMOVWT();
--- a/js/src/jit/arm/Assembler-arm.cpp
+++ b/js/src/jit/arm/Assembler-arm.cpp
@@ -17,16 +17,18 @@
 #include "jit/arm/MacroAssembler-arm.h"
 #include "jit/JitCompartment.h"
 
 using namespace js;
 using namespace js::jit;
 
 using mozilla::CountLeadingZeroes32;
 
+void dbg_break() {}
+
 // Note this is used for inter-AsmJS calls and may pass arguments and results in
 // floating point registers even if the system ABI does not.
 ABIArgGenerator::ABIArgGenerator() :
     intRegIndex_(0),
     floatRegIndex_(0),
     stackOffset_(0),
     current_()
 {}
@@ -41,27 +43,39 @@ ABIArgGenerator::next(MIRType type)
             current_ = ABIArg(stackOffset_);
             stackOffset_ += sizeof(uint32_t);
             break;
         }
         current_ = ABIArg(Register::FromCode(intRegIndex_));
         intRegIndex_++;
         break;
       case MIRType_Float32:
-      case MIRType_Double:
         if (floatRegIndex_ == NumFloatArgRegs) {
             static const int align = sizeof(double) - 1;
             stackOffset_ = (stackOffset_ + align) & ~align;
             current_ = ABIArg(stackOffset_);
             stackOffset_ += sizeof(uint64_t);
             break;
         }
-        current_ = ABIArg(FloatRegister::FromCode(floatRegIndex_));
+        current_ = ABIArg(VFPRegister(floatRegIndex_, VFPRegister::Single));
         floatRegIndex_++;
         break;
+      case MIRType_Double:
+        // Bump the number of used registers up to the next multiple of two.
+        floatRegIndex_ = (floatRegIndex_ + 1) & ~1;
+        if (floatRegIndex_ == NumFloatArgRegs) {
+            static const int align = sizeof(double) - 1;
+            stackOffset_ = (stackOffset_ + align) & ~align;
+            current_ = ABIArg(stackOffset_);
+            stackOffset_ += sizeof(uint64_t);
+            break;
+        }
+        current_ = ABIArg(VFPRegister(floatRegIndex_ >> 1, VFPRegister::Double));
+        floatRegIndex_+=2;
+        break;
       default:
         MOZ_ASSUME_UNREACHABLE("Unexpected argument type");
     }
 
     return current_;
 }
 const Register ABIArgGenerator::NonArgReturnVolatileReg0 = r4;
 const Register ABIArgGenerator::NonArgReturnVolatileReg1 = r5;
@@ -1184,16 +1198,17 @@ BOffImm::getDest(Instruction *src)
     return &src[(((int32_t)data << 8) >> 8) + 2];
 }
 
 // VFPRegister implementation
 VFPRegister
 VFPRegister::doubleOverlay(unsigned int which) const
 {
     JS_ASSERT(!_isInvalid);
+    JS_ASSERT(which == 0);
     if (kind != Double)
         return VFPRegister(code_ >> 1, Double);
     return *this;
 }
 VFPRegister
 VFPRegister::singleOverlay(unsigned int which) const
 {
     JS_ASSERT(!_isInvalid);
@@ -1581,29 +1596,29 @@ class PoolHintData {
     void init(uint32_t index, Assembler::Condition cond, LoadType lt, const VFPRegister &destReg) {
         JS_ASSERT(destReg.isFloat());
         index_ = index;
         JS_ASSERT(index_ == index);
         cond_ = cond >> 28;
         JS_ASSERT(cond_ == cond >> 28);
         loadType_ = lt;
         ONES = ExpectedOnes;
-        destReg_ = destReg.isDouble() ? destReg.code() : destReg.doubleOverlay().code();
+        destReg_ = destReg.id();
         destType_ = destReg.isDouble();
     }
     Assembler::Condition getCond() {
         return Assembler::Condition(cond_ << 28);
     }
 
     Register getReg() {
         return Register::FromCode(destReg_);
     }
     VFPRegister getVFPReg() {
-        VFPRegister r = VFPRegister(FloatRegister::FromCode(destReg_));
-        return destType_ ? r : r.singleOverlay();
+        VFPRegister r = VFPRegister(destReg_, destType_ ? VFPRegister::Double : VFPRegister::Single);
+        return r;
     }
 
     int32_t getIndex() {
         return index_;
     }
     void setIndex(uint32_t index) {
         JS_ASSERT(ONES == ExpectedOnes && loadType_ != PoolBOGUS);
         index_ = index;
@@ -2314,17 +2329,16 @@ Assembler::retarget(Label *label, Label 
             JS_ASSERT((int32_t)prev == Label::INVALID_OFFSET);
         }
     }
     label->reset();
 
 }
 
 
-void dbg_break() {}
 static int stopBKPT = -1;
 void
 Assembler::as_bkpt()
 {
     // This is a count of how many times a breakpoint instruction has been
     // generated. It is embedded into the instruction for debugging
     // purposes. Gdb will print "bkpt xxx" when you attempt to dissassemble a
     // breakpoint with the number xxx embedded into it. If this breakpoint is
--- a/js/src/jit/arm/Assembler-arm.h
+++ b/js/src/jit/arm/Assembler-arm.h
@@ -77,29 +77,31 @@ class ABIArgGenerator
     uint32_t stackBytesConsumedSoFar() const { return stackOffset_; }
     static const Register NonArgReturnVolatileReg0;
     static const Register NonArgReturnVolatileReg1;
 };
 
 static MOZ_CONSTEXPR_VAR Register PreBarrierReg = r1;
 
 static MOZ_CONSTEXPR_VAR Register InvalidReg = { Registers::invalid_reg };
-static MOZ_CONSTEXPR_VAR FloatRegister InvalidFloatReg(FloatRegisters::invalid_freg);
+static MOZ_CONSTEXPR_VAR FloatRegister InvalidFloatReg;
 
 static MOZ_CONSTEXPR_VAR Register JSReturnReg_Type = r3;
 static MOZ_CONSTEXPR_VAR Register JSReturnReg_Data = r2;
 static MOZ_CONSTEXPR_VAR Register StackPointer = sp;
 static MOZ_CONSTEXPR_VAR Register FramePointer = InvalidReg;
 static MOZ_CONSTEXPR_VAR Register ReturnReg = r0;
-static MOZ_CONSTEXPR_VAR FloatRegister ReturnFloat32Reg(FloatRegisters::d0);
-static MOZ_CONSTEXPR_VAR FloatRegister ReturnDoubleReg(FloatRegisters::d0);
-static MOZ_CONSTEXPR_VAR FloatRegister ScratchFloat32Reg(FloatRegisters::d15);
-static MOZ_CONSTEXPR_VAR FloatRegister ScratchDoubleReg(FloatRegisters::d15);
+static MOZ_CONSTEXPR_VAR FloatRegister ReturnFloat32Reg = { FloatRegisters::d0, VFPRegister::Single };
+static MOZ_CONSTEXPR_VAR FloatRegister ReturnDoubleReg = { FloatRegisters::d0, VFPRegister::Double};
+static MOZ_CONSTEXPR_VAR FloatRegister ScratchFloat32Reg = { FloatRegisters::d30, VFPRegister::Single };
+static MOZ_CONSTEXPR_VAR FloatRegister ScratchDoubleReg = { FloatRegisters::d15, VFPRegister::Double };
+static MOZ_CONSTEXPR_VAR FloatRegister ScratchUIntReg = { FloatRegisters::d15, VFPRegister::UInt };
+static MOZ_CONSTEXPR_VAR FloatRegister ScratchIntReg = { FloatRegisters::d15, VFPRegister::Int };
 
-static MOZ_CONSTEXPR_VAR FloatRegister NANReg(FloatRegisters::d14);
+static MOZ_CONSTEXPR_VAR FloatRegister NANReg = { FloatRegisters::d14, VFPRegister::Double };
 
 // Registers used in the GenerateFFIIonExit Enable Activation block.
 static MOZ_CONSTEXPR_VAR Register AsmJSIonExitRegCallee = r4;
 static MOZ_CONSTEXPR_VAR Register AsmJSIonExitRegE0 = r0;
 static MOZ_CONSTEXPR_VAR Register AsmJSIonExitRegE1 = r1;
 static MOZ_CONSTEXPR_VAR Register AsmJSIonExitRegE2 = r2;
 static MOZ_CONSTEXPR_VAR Register AsmJSIonExitRegE3 = r3;
 
@@ -107,32 +109,33 @@ static MOZ_CONSTEXPR_VAR Register AsmJSI
 // None of these may be the second scratch register (lr).
 static MOZ_CONSTEXPR_VAR Register AsmJSIonExitRegReturnData = r2;
 static MOZ_CONSTEXPR_VAR Register AsmJSIonExitRegReturnType = r3;
 static MOZ_CONSTEXPR_VAR Register AsmJSIonExitRegD0 = r0;
 static MOZ_CONSTEXPR_VAR Register AsmJSIonExitRegD1 = r1;
 static MOZ_CONSTEXPR_VAR Register AsmJSIonExitRegD2 = r4;
 
 
-static MOZ_CONSTEXPR_VAR FloatRegister d0(FloatRegisters::d0);
-static MOZ_CONSTEXPR_VAR FloatRegister d1(FloatRegisters::d1);
-static MOZ_CONSTEXPR_VAR FloatRegister d2(FloatRegisters::d2);
-static MOZ_CONSTEXPR_VAR FloatRegister d3(FloatRegisters::d3);
-static MOZ_CONSTEXPR_VAR FloatRegister d4(FloatRegisters::d4);
-static MOZ_CONSTEXPR_VAR FloatRegister d5(FloatRegisters::d5);
-static MOZ_CONSTEXPR_VAR FloatRegister d6(FloatRegisters::d6);
-static MOZ_CONSTEXPR_VAR FloatRegister d7(FloatRegisters::d7);
-static MOZ_CONSTEXPR_VAR FloatRegister d8(FloatRegisters::d8);
-static MOZ_CONSTEXPR_VAR FloatRegister d9(FloatRegisters::d9);
-static MOZ_CONSTEXPR_VAR FloatRegister d10(FloatRegisters::d10);
-static MOZ_CONSTEXPR_VAR FloatRegister d11(FloatRegisters::d11);
-static MOZ_CONSTEXPR_VAR FloatRegister d12(FloatRegisters::d12);
-static MOZ_CONSTEXPR_VAR FloatRegister d13(FloatRegisters::d13);
-static MOZ_CONSTEXPR_VAR FloatRegister d14(FloatRegisters::d14);
-static MOZ_CONSTEXPR_VAR FloatRegister d15(FloatRegisters::d15);
+static MOZ_CONSTEXPR_VAR FloatRegister d0  = {FloatRegisters::d0, VFPRegister::Double};
+static MOZ_CONSTEXPR_VAR FloatRegister d1  = {FloatRegisters::d1, VFPRegister::Double};
+static MOZ_CONSTEXPR_VAR FloatRegister d2  = {FloatRegisters::d2, VFPRegister::Double};
+static MOZ_CONSTEXPR_VAR FloatRegister d3  = {FloatRegisters::d3, VFPRegister::Double};
+static MOZ_CONSTEXPR_VAR FloatRegister d4  = {FloatRegisters::d4, VFPRegister::Double};
+static MOZ_CONSTEXPR_VAR FloatRegister d5  = {FloatRegisters::d5, VFPRegister::Double};
+static MOZ_CONSTEXPR_VAR FloatRegister d6  = {FloatRegisters::d6, VFPRegister::Double};
+static MOZ_CONSTEXPR_VAR FloatRegister d7  = {FloatRegisters::d7, VFPRegister::Double};
+static MOZ_CONSTEXPR_VAR FloatRegister d8  = {FloatRegisters::d8, VFPRegister::Double};
+static MOZ_CONSTEXPR_VAR FloatRegister d9  = {FloatRegisters::d9, VFPRegister::Double};
+static MOZ_CONSTEXPR_VAR FloatRegister d10 = {FloatRegisters::d10, VFPRegister::Double};
+static MOZ_CONSTEXPR_VAR FloatRegister d11 = {FloatRegisters::d11, VFPRegister::Double};
+static MOZ_CONSTEXPR_VAR FloatRegister d12 = {FloatRegisters::d12, VFPRegister::Double};
+static MOZ_CONSTEXPR_VAR FloatRegister d13 = {FloatRegisters::d13, VFPRegister::Double};
+static MOZ_CONSTEXPR_VAR FloatRegister d14 = {FloatRegisters::d14, VFPRegister::Double};
+static MOZ_CONSTEXPR_VAR FloatRegister d15 = {FloatRegisters::d15, VFPRegister::Double};
+
 
 // For maximal awesomeness, 8 should be sufficent. ldrd/strd (dual-register
 // load/store) operate in a single cycle when the address they are dealing with
 // is 8 byte aligned. Also, the ARM abi wants the stack to be 8 byte aligned at
 // function boundaries. I'm trying to make sure this is always true.
 static const uint32_t StackAlignment = 8;
 static const uint32_t CodeAlignment = 8;
 static const bool StackKeptAligned = true;
@@ -1613,28 +1616,50 @@ class Assembler : public AssemblerShared
         } else {
             if (dtmDelta == 0) {
                 dtmDelta = rn.code() - dtmLastReg;
                 JS_ASSERT(dtmDelta == 1 || dtmDelta == -1);
             }
             JS_ASSERT(dtmLastReg >= 0);
             JS_ASSERT(rn.code() == unsigned(dtmLastReg) + dtmDelta);
         }
+
         dtmLastReg = rn.code();
     }
     void finishFloatTransfer() {
         JS_ASSERT(dtmActive);
         dtmActive = false;
         JS_ASSERT(dtmLastReg != -1);
         dtmDelta = dtmDelta ? dtmDelta : 1;
+        // The operand for the vstr/vldr instruction is the lowest register in the range.
+        int low = Min(dtmLastReg, vdtmFirstReg);
+        int high = Max(dtmLastReg, vdtmFirstReg);
         // Fencepost problem.
-        int len = dtmDelta * (dtmLastReg - vdtmFirstReg) + 1;
-        as_vdtm(dtmLoadStore, dtmBase,
-                VFPRegister(FloatRegister::FromCode(Min(vdtmFirstReg, dtmLastReg))),
-                len, dtmCond);
+        int len = high - low + 1;
+        // vdtm can only transfer 16 registers at once.  If we need to transfer more,
+        // then either hoops are necessary, or we need to be updating the register.
+        JS_ASSERT_IF(len > 16, dtmUpdate == WriteBack);
+
+        int adjustLow = dtmLoadStore == IsStore ? 0 : 1;
+        int adjustHigh = dtmLoadStore == IsStore ? -1 : 0;
+        while (len > 0) {
+            // Limit the instruction to 16 registers.
+            int curLen = Min(len, 16);
+            // If it is a store, we want to start at the high end and move down
+            // (e.g. vpush d16-d31; vpush d0-d15).
+            int curStart = (dtmLoadStore == IsStore) ? high - curLen + 1 : low;
+            as_vdtm(dtmLoadStore, dtmBase,
+                    VFPRegister(FloatRegister::FromCode(curStart)),
+                    curLen, dtmCond);
+            // Update the bounds.
+            low += adjustLow * curLen;
+            high += adjustHigh * curLen;
+            // Update the length parameter.
+            len -= curLen;
+        }
     }
 
   private:
     int dtmRegBitField;
     int vdtmFirstReg;
     int dtmLastReg;
     int dtmDelta;
     Register dtmBase;
@@ -1995,17 +2020,19 @@ class InstructionIterator {
         return cur();
     }
     Instruction *cur() const {
         return i;
     }
 };
 
 static const uint32_t NumIntArgRegs = 4;
-static const uint32_t NumFloatArgRegs = 8;
+// There are 16 *float* registers available for arguments
+// If doubles are used, only half the number of registers are available.
+static const uint32_t NumFloatArgRegs = 16;
 
 static inline bool
 GetIntArgReg(uint32_t usedIntArgs, uint32_t usedFloatArgs, Register *out)
 {
     if (usedIntArgs >= NumIntArgRegs)
         return false;
     *out = Register::FromCode(usedIntArgs);
     return true;
@@ -2043,22 +2070,32 @@ GetArgStackDisp(uint32_t arg)
 }
 
 #endif
 
 
 #if defined(JS_CODEGEN_ARM_HARDFP) || defined(JS_ARM_SIMULATOR)
 
 static inline bool
-GetFloatArgReg(uint32_t usedIntArgs, uint32_t usedFloatArgs, FloatRegister *out)
+GetFloat32ArgReg(uint32_t usedIntArgs, uint32_t usedFloatArgs, FloatRegister *out)
 {
     JS_ASSERT(UseHardFpABI());
     if (usedFloatArgs >= NumFloatArgRegs)
         return false;
-    *out = FloatRegister::FromCode(usedFloatArgs);
+    *out = VFPRegister(usedFloatArgs, VFPRegister::Single);
+    return true;
+}
+static inline bool
+GetDoubleArgReg(uint32_t usedIntArgs, uint32_t usedFloatArgs, FloatRegister *out)
+{
+    JS_ASSERT(UseHardFpABI());
+    JS_ASSERT((usedFloatArgs % 2) == 0);
+    if (usedFloatArgs >= NumFloatArgRegs)
+        return false;
+    *out = VFPRegister(usedFloatArgs>>1, VFPRegister::Double);
     return true;
 }
 
 static inline uint32_t
 GetIntArgStackDisp(uint32_t usedIntArgs, uint32_t usedFloatArgs, uint32_t *padding)
 {
     JS_ASSERT(UseHardFpABI());
     JS_ASSERT(usedIntArgs >= NumIntArgRegs);
--- a/js/src/jit/arm/Bailouts-arm.cpp
+++ b/js/src/jit/arm/Bailouts-arm.cpp
@@ -25,17 +25,17 @@ class BailoutStack
     // with frameClassId_ above. This should be migrated to ip.
   public:
     union {
         uintptr_t frameSize_;
         uintptr_t tableOffset_;
     };
 
   protected: // Silence Clang warning about unused private fields.
-    mozilla::Array<double, FloatRegisters::Total> fpregs_;
+    mozilla::Array<double, FloatRegisters::TotalPhys> fpregs_;
     mozilla::Array<uintptr_t, Registers::Total> regs_;
 
     uintptr_t snapshotOffset_;
     uintptr_t padding_;
 
   public:
     FrameSizeClass frameClass() const {
         return FrameSizeClass::FromClass(frameClassId_);
--- a/js/src/jit/arm/CodeGenerator-arm.cpp
+++ b/js/src/jit/arm/CodeGenerator-arm.cpp
@@ -279,17 +279,17 @@ CodeGeneratorARM::visitMinMaxD(LMinMaxD 
     masm.ma_b(&nan, Assembler::VFP_Unordered);
     // Make sure we handle -0 and 0 right.
     masm.ma_b(&equal, Assembler::VFP_Equal);
     masm.ma_b(&returnSecond, cond);
     masm.ma_b(&done);
 
     // Check for zero.
     masm.bind(&equal);
-    masm.compareDouble(first, InvalidFloatReg);
+    masm.compareDouble(first, NoVFPRegister);
     // First wasn't 0 or -0, so just return it.
     masm.ma_b(&done, Assembler::VFP_NotEqualOrUnordered);
     // So now both operands are either -0 or 0.
     if (ins->mir()->isMax()) {
         // -0 + -0 = -0 and -0 + 0 = 0.
         masm.ma_vadd(second, first, first);
     } else {
         masm.ma_vneg(first, first);
--- a/js/src/jit/arm/MacroAssembler-arm.cpp
+++ b/js/src/jit/arm/MacroAssembler-arm.cpp
@@ -159,16 +159,18 @@ MacroAssemblerARM::convertFloat32ToInt32
         as_vxfer(dest, InvalidReg, VFPRegister(src).singleOverlay(), FloatToCore, Assembler::Equal, 0);
         ma_cmp(dest, Imm32(0x80000000), Assembler::Equal);
         ma_b(fail, Assembler::Equal);
     }
 }
 
 void
 MacroAssemblerARM::convertFloat32ToDouble(FloatRegister src, FloatRegister dest) {
+    JS_ASSERT(dest.isDouble());
+    JS_ASSERT(src.isSingle());
     as_vcvt(VFPRegister(dest), VFPRegister(src).singleOverlay());
 }
 
 void
 MacroAssemblerARM::branchTruncateFloat32(FloatRegister src, Register dest, Label *fail) {
     ma_vcvt_F32_I32(src, ScratchFloat32Reg.sintOverlay());
     ma_vxfer(ScratchFloat32Reg, dest);
     ma_cmp(dest, Imm32(0x7fffffff));
@@ -1603,31 +1605,39 @@ MacroAssemblerARM::ma_vcvt_U32_F64(Float
     JS_ASSERT(src.isUInt());
     JS_ASSERT(dest.isDouble());
     as_vcvt(dest, src, false, cc);
 }
 
 void
 MacroAssemblerARM::ma_vcvt_F32_I32(FloatRegister src, FloatRegister dest, Condition cc)
 {
+    JS_ASSERT(src.isSingle());
+    JS_ASSERT(dest.isSInt());
     as_vcvt(VFPRegister(dest).sintOverlay(), VFPRegister(src).singleOverlay(), false, cc);
 }
 void
 MacroAssemblerARM::ma_vcvt_F32_U32(FloatRegister src, FloatRegister dest, Condition cc)
 {
+    JS_ASSERT(src.isSingle());
+    JS_ASSERT(dest.isUInt());
     as_vcvt(VFPRegister(dest).uintOverlay(), VFPRegister(src).singleOverlay(), false, cc);
 }
 void
 MacroAssemblerARM::ma_vcvt_I32_F32(FloatRegister src, FloatRegister dest, Condition cc)
 {
+    JS_ASSERT(src.isSInt());
+    JS_ASSERT(dest.isSingle());
     as_vcvt(VFPRegister(dest).singleOverlay(), VFPRegister(src).sintOverlay(), false, cc);
 }
 void
 MacroAssemblerARM::ma_vcvt_U32_F32(FloatRegister src, FloatRegister dest, Condition cc)
 {
+    JS_ASSERT(src.isUInt());
+    JS_ASSERT(dest.isSingle());
     as_vcvt(VFPRegister(dest).singleOverlay(), VFPRegister(src).uintOverlay(), false, cc);
 }
 
 void
 MacroAssemblerARM::ma_vxfer(FloatRegister src, Register dest, Condition cc)
 {
     as_vxfer(dest, InvalidReg, VFPRegister(src).singleOverlay(), FloatToCore, cc);
 }
@@ -1864,17 +1874,17 @@ void
 MacroAssemblerARMCompat::freeStack(Register amount)
 {
     ma_add(amount, sp);
 }
 
 void
 MacroAssembler::PushRegsInMask(RegisterSet set)
 {
-    int32_t diffF = set.fpus().size() * sizeof(double);
+    int32_t diffF = set.fpus().getPushSizeInBytes();
     int32_t diffG = set.gprs().size() * sizeof(intptr_t);
 
     if (set.gprs().size() > 1) {
         adjustFrame(diffG);
         startDataTransferM(IsStore, StackPointer, DB, WriteBack);
         for (GeneralRegisterBackwardIterator iter(set.gprs()); iter.more(); iter++) {
             diffG -= sizeof(intptr_t);
             transferReg(*iter);
@@ -1893,17 +1903,17 @@ MacroAssembler::PushRegsInMask(RegisterS
     diffF += transferMultipleByRuns(set.fpus(), IsStore, StackPointer, DB);
     JS_ASSERT(diffF == 0);
 }
 
 void
 MacroAssembler::PopRegsInMaskIgnore(RegisterSet set, RegisterSet ignore)
 {
     int32_t diffG = set.gprs().size() * sizeof(intptr_t);
-    int32_t diffF = set.fpus().size() * sizeof(double);
+    int32_t diffF = set.fpus().getPushSizeInBytes();
     const int32_t reservedG = diffG;
     const int32_t reservedF = diffF;
 
     // ARM can load multiple registers at once, but only if we want back all
     // the registers we previously saved to the stack.
     if (ignore.empty(true)) {
         diffF -= transferMultipleByRuns(set.fpus(), IsLoad, StackPointer, IA);
         adjustFrame(-reservedF);
@@ -2666,17 +2676,17 @@ MacroAssemblerARMCompat::addPtr(Imm32 im
     addPtr(imm, ScratchRegister);
     storePtr(ScratchRegister, dest);
 }
 
 void
 MacroAssemblerARMCompat::compareDouble(FloatRegister lhs, FloatRegister rhs)
 {
     // Compare the doubles, setting vector status flags.
-    if (rhs == InvalidFloatReg)
+    if (rhs.isMissing())
         ma_vcmpz(lhs);
     else
         ma_vcmp(lhs, rhs);
 
     // Move vector status bits to normal status flags.
     as_vmrs(pc);
 }
 
@@ -2703,17 +2713,17 @@ MacroAssemblerARMCompat::branchDouble(Do
 
     ma_b(label, ConditionFromDoubleCondition(cond));
 }
 
 void
 MacroAssemblerARMCompat::compareFloat(FloatRegister lhs, FloatRegister rhs)
 {
     // Compare the doubles, setting vector status flags.
-    if (rhs == InvalidFloatReg)
+    if (rhs.isMissing())
         as_vcmpz(VFPRegister(lhs).singleOverlay());
     else
         as_vcmp(VFPRegister(lhs).singleOverlay(), VFPRegister(rhs).singleOverlay());
 
     // Move vector status bits to normal status flags.
     as_vmrs(pc);
 }
 
@@ -3148,23 +3158,25 @@ void
 MacroAssemblerARMCompat::unboxNonDouble(const Address &src, Register dest)
 {
     ma_ldr(payloadOf(src), dest);
 }
 
 void
 MacroAssemblerARMCompat::unboxDouble(const ValueOperand &operand, FloatRegister dest)
 {
+    MOZ_ASSERT(dest.isDouble());
     as_vxfer(operand.payloadReg(), operand.typeReg(),
              VFPRegister(dest), CoreToFloat);
 }
 
 void
 MacroAssemblerARMCompat::unboxDouble(const Address &src, FloatRegister dest)
 {
+    MOZ_ASSERT(dest.isDouble());
     ma_vldr(Operand(src), dest);
 }
 
 void
 MacroAssemblerARMCompat::unboxValue(const ValueOperand &src, AnyRegister dest)
 {
     if (dest.isFloat()) {
         Label notInt32, end;
@@ -3724,18 +3736,22 @@ MacroAssemblerARMCompat::setupABICall(ui
     usedIntSlots_ = 0;
 #if defined(JS_CODEGEN_ARM_HARDFP) || defined(JS_ARM_SIMULATOR)
     usedFloatSlots_ = 0;
     usedFloat32_ = false;
     padding_ = 0;
 #endif
     floatArgsInGPR[0] = MoveOperand();
     floatArgsInGPR[1] = MoveOperand();
+    floatArgsInGPR[2] = MoveOperand();
+    floatArgsInGPR[3] = MoveOperand();
     floatArgsInGPRValid[0] = false;
     floatArgsInGPRValid[1] = false;
+    floatArgsInGPRValid[2] = false;
+    floatArgsInGPRValid[3] = false;
 }
 
 void
 MacroAssemblerARMCompat::setupAlignedABICall(uint32_t args)
 {
     setupABICall(args);
 
     dynamicAlignment_ = false;
@@ -3758,71 +3774,72 @@ MacroAssemblerARMCompat::setupUnalignedA
 void
 MacroAssemblerARMCompat::passHardFpABIArg(const MoveOperand &from, MoveOp::Type type)
 {
     MoveOperand to;
     ++passedArgs_;
     if (!enoughMemory_)
         return;
     switch (type) {
-      case MoveOp::FLOAT32:
-      case MoveOp::DOUBLE: {
-        // N.B. This isn't a limitation of the ABI, it is a limitation of the
-        // compiler right now. There isn't a good way to handle odd numbered
-        // single registers, so everything goes to hell when we try. Current fix
-        // is to never use more than one float in a function call. Fix coming
-        // along with complete float32 support in bug 957504.
-        JS_ASSERT(!usedFloat32_);
-        if (type == MoveOp::FLOAT32)
-            usedFloat32_ = true;
+      case MoveOp::FLOAT32: {
         FloatRegister fr;
-        if (GetFloatArgReg(usedIntSlots_, usedFloatSlots_, &fr)) {
+        passedArgTypes_ = (passedArgTypes_ << ArgType_Shift) | ArgType_Float32;
+        if (GetFloat32ArgReg(usedIntSlots_, usedFloatSlots_, &fr)) {
             if (from.isFloatReg() && from.floatReg() == fr) {
                 // Nothing to do; the value is in the right register already.
                 usedFloatSlots_++;
-                if (type == MoveOp::FLOAT32)
-                    passedArgTypes_ = (passedArgTypes_ << ArgType_Shift) | ArgType_Float32;
-                else
-                    passedArgTypes_ = (passedArgTypes_ << ArgType_Shift) | ArgType_Double;
+                passedArgTypes_ = (passedArgTypes_ << ArgType_Shift) | ArgType_Float32;
                 return;
             }
             to = MoveOperand(fr);
         } else {
             // If (and only if) the integer registers have started spilling, do
             // we need to take the register's alignment into account.
-            uint32_t disp = INT_MAX;
-            if (type == MoveOp::FLOAT32)
-                disp = GetFloat32ArgStackDisp(usedIntSlots_, usedFloatSlots_, &padding_);
-            else
-                disp = GetDoubleArgStackDisp(usedIntSlots_, usedFloatSlots_, &padding_);
+            uint32_t disp = GetFloat32ArgStackDisp(usedIntSlots_, usedFloatSlots_, &padding_);
             to = MoveOperand(sp, disp);
         }
         usedFloatSlots_++;
-        if (type == MoveOp::FLOAT32)
-            passedArgTypes_ = (passedArgTypes_ << ArgType_Shift) | ArgType_Float32;
-        else
-            passedArgTypes_ = (passedArgTypes_ << ArgType_Shift) | ArgType_Double;
         break;
       }
+
+      case MoveOp::DOUBLE: {
+          FloatRegister fr;
+          passedArgTypes_ = (passedArgTypes_ << ArgType_Shift) | ArgType_Double;
+          usedFloatSlots_ = (usedFloatSlots_ + 1) & -2;
+          if (GetDoubleArgReg(usedIntSlots_, usedFloatSlots_, &fr)) {
+              if (from.isFloatReg() && from.floatReg() == fr) {
+                  // Nothing to do; the value is in the right register already.
+                  usedFloatSlots_ += 2;
+                  return;
+              }
+              to = MoveOperand(fr);
+          } else {
+              // If (and only if) the integer registers have started spilling, do we
+              // need to take the register's alignment into account
+              uint32_t disp = GetDoubleArgStackDisp(usedIntSlots_, usedFloatSlots_, &padding_);
+              to = MoveOperand(sp, disp);
+          }
+          usedFloatSlots_+=2;
+          break;
+      }
       case MoveOp::GENERAL: {
         Register r;
+        passedArgTypes_ = (passedArgTypes_ << ArgType_Shift) | ArgType_General;
         if (GetIntArgReg(usedIntSlots_, usedFloatSlots_, &r)) {
             if (from.isGeneralReg() && from.reg() == r) {
                 // Nothing to do; the value is in the right register already.
                 usedIntSlots_++;
-                passedArgTypes_ = (passedArgTypes_ << ArgType_Shift) | ArgType_General;
                 return;
             }
             to = MoveOperand(r);
         } else {
             uint32_t disp = GetIntArgStackDisp(usedIntSlots_, usedFloatSlots_, &padding_);
             to = MoveOperand(sp, disp);
         }
         usedIntSlots_++;
-        passedArgTypes_ = (passedArgTypes_ << ArgType_Shift) | ArgType_General;
         break;
       }
       default:
         MOZ_ASSUME_UNREACHABLE("Unexpected argument type");
     }
 
     enoughMemory_ = moveResolver_.addMove(from, to, type);
 }
@@ -3853,18 +3870,18 @@ MacroAssemblerARMCompat::passSoftFpABIAr
       default:
         MOZ_ASSUME_UNREACHABLE("Unexpected argument type");
     }
 
     Register destReg;
     MoveOperand dest;
     if (GetIntArgReg(usedIntSlots_, 0, &destReg)) {
         if (type == MoveOp::DOUBLE || type == MoveOp::FLOAT32) {
-            floatArgsInGPR[destReg.code() >> 1] = from;
-            floatArgsInGPRValid[destReg.code() >> 1] = true;
+            floatArgsInGPR[destReg.code()] = from;
+            floatArgsInGPRValid[destReg.code()] = true;
             useResolver = false;
         } else if (from.isGeneralReg() && from.reg() == destReg) {
             // No need to move anything.
             useResolver = false;
         } else {
             dest = MoveOperand(destReg);
         }
     } else {
@@ -3941,23 +3958,34 @@ MacroAssemblerARMCompat::callWithABIPre(
         enoughMemory_ = enoughMemory_ && moveResolver_.resolve();
         if (!enoughMemory_)
             return;
 
         MoveEmitter emitter(*this);
         emitter.emit(moveResolver_);
         emitter.finish();
     }
-    for (int i = 0; i < 2; i++) {
+    for (int i = 0; i < 4; i++) {
         if (floatArgsInGPRValid[i]) {
             MoveOperand from = floatArgsInGPR[i];
-            Register to0 = Register::FromCode(i * 2), to1 = Register::FromCode(i * 2 + 1);
+            Register to0 = Register::FromCode(i);
+            Register to1;
+
+            if (!from.isFloatReg() || from.floatReg().isDouble()) {
+                // Doubles need to be moved into a pair of aligned registers
+                // whether they come from the stack, or VFP registers.
+                to1 = Register::FromCode(i + 1);
+                MOZ_ASSERT(i % 2 == 0);
+            }
 
             if (from.isFloatReg()) {
-                ma_vxfer(VFPRegister(from.floatReg()), to0, to1);
+                if (from.floatReg().isDouble())
+                    ma_vxfer(from.floatReg(), to0, to1);
+                else
+                    ma_vxfer(from.floatReg(), to0);
             } else {
                 JS_ASSERT(from.isMemory());
                 // Note: We can safely use the MoveOperand's displacement here,
                 // even if the base is SP: MoveEmitter::toOperand adjusts
                 // SP-relative operands by the difference between the current
                 // stack usage and stackAdjust, which emitter.finish() resets to
                 // 0.
                 //
@@ -4090,16 +4118,17 @@ MacroAssemblerARMCompat::callWithABI(con
     callWithABIPost(stackAdjust, result);
 }
 
 void
 MacroAssemblerARMCompat::handleFailureWithHandler(void *handler)
 {
     // Reserve space for exception information.
     int size = (sizeof(ResumeFromException) + 7) & ~7;
+
     ma_sub(Imm32(size), sp);
     ma_mov(sp, r0);
 
     // Ask for an exception handler.
     setupUnalignedABICall(1, r1);
     passABIArg(r0);
     callWithABI(handler);
 
@@ -4186,17 +4215,17 @@ MacroAssemblerARMCompat::testStringTruth
 }
 
 void
 MacroAssemblerARMCompat::floor(FloatRegister input, Register output, Label *bail)
 {
     Label handleZero;
     Label handleNeg;
     Label fin;
-    compareDouble(input, InvalidFloatReg);
+    compareDouble(input, NoVFPRegister);
     ma_b(&handleZero, Assembler::Equal);
     ma_b(&handleNeg, Assembler::Signed);
     // NaN is always a bail condition, just bail directly.
     ma_b(bail, Assembler::Overflow);
 
     // The argument is a positive number, truncation is the path to glory. Since
     // it is known to be > 0.0, explicitly convert to a larger range, then a
     // value that rounds to INT_MAX is explicitly different from an argument
@@ -4237,17 +4266,17 @@ MacroAssemblerARMCompat::floor(FloatRegi
 }
 
 void
 MacroAssemblerARMCompat::floorf(FloatRegister input, Register output, Label *bail)
 {
     Label handleZero;
     Label handleNeg;
     Label fin;
-    compareFloat(input, InvalidFloatReg);
+    compareFloat(input, NoVFPRegister);
     ma_b(&handleZero, Assembler::Equal);
     ma_b(&handleNeg, Assembler::Signed);
     // NaN is always a bail condition, just bail directly.
     ma_b(bail, Assembler::Overflow);
 
     // The argument is a positive number, truncation is the path to glory; Since
     // it is known to be > 0.0, explicitly convert to a larger range, then a
     // value that rounds to INT_MAX is explicitly different from an argument
@@ -4289,17 +4318,17 @@ MacroAssemblerARMCompat::floorf(FloatReg
 
 void
 MacroAssemblerARMCompat::ceil(FloatRegister input, Register output, Label *bail)
 {
     Label handleZero;
     Label handlePos;
     Label fin;
 
-    compareDouble(input, InvalidFloatReg);
+    compareDouble(input, NoVFPRegister);
     // NaN is always a bail condition, just bail directly.
     ma_b(bail, Assembler::Overflow);
     ma_b(&handleZero, Assembler::Equal);
     ma_b(&handlePos, Assembler::NotSigned);
 
     // We are in the ]-Inf; 0[ range
     // If we are in the ]-1; 0[ range => bailout
     ma_vimm(-1.0, ScratchDoubleReg);
@@ -4342,17 +4371,17 @@ MacroAssemblerARMCompat::ceil(FloatRegis
 
 void
 MacroAssemblerARMCompat::ceilf(FloatRegister input, Register output, Label *bail)
 {
     Label handleZero;
     Label handlePos;
     Label fin;
 
-    compareFloat(input, InvalidFloatReg);
+    compareFloat(input, NoVFPRegister);
     // NaN is always a bail condition, just bail directly.
     ma_b(bail, Assembler::Overflow);
     ma_b(&handleZero, Assembler::Equal);
     ma_b(&handlePos, Assembler::NotSigned);
 
     // We are in the ]-Inf; 0[ range
     // If we are in the ]-1; 0[ range => bailout
     ma_vimm_f32(-1.f, ScratchFloat32Reg);
--- a/js/src/jit/arm/MacroAssembler-arm.h
+++ b/js/src/jit/arm/MacroAssembler-arm.h
@@ -428,30 +428,35 @@ private:
     // iterators for forward/backward traversals. The sign argument should be 1
     // if we traverse forwards, -1 if we traverse backwards.
     template<typename RegisterIterator> int32_t
     transferMultipleByRunsImpl(FloatRegisterSet set, LoadStore ls,
                                Register rm, DTMMode mode, int32_t sign)
     {
         JS_ASSERT(sign == 1 || sign == -1);
 
-        int32_t delta = sign * sizeof(double);
+        int32_t delta = sign * sizeof(float);
         int32_t offset = 0;
-        RegisterIterator iter(set);
+        // Build up a new set, which is the sum of all of the single and double
+        // registers. This set can have up to 48 registers in it total
+        // s0-s31 and d16-d31
+        FloatRegisterSet mod = set.reduceSetForPush();
+
+        RegisterIterator iter(mod);
         while (iter.more()) {
             startFloatTransferM(ls, rm, mode, WriteBack);
-            int32_t reg = (*iter).code_;
+            int32_t reg = (*iter).code();
             do {
                 offset += delta;
+                if ((*iter).isDouble())
+                    offset += delta;
                 transferFloatReg(*iter);
-            } while ((++iter).more() && (*iter).code_ == (reg += sign));
+            } while ((++iter).more() && (*iter).code() == (reg += sign));
             finishFloatTransfer();
         }
-
-        JS_ASSERT(offset == static_cast<int32_t>(set.size() * sizeof(double)) * sign);
         return offset;
     }
 };
 
 class MacroAssemblerARMCompat : public MacroAssemblerARM
 {
     bool inCall_;
     // Number of bytes the stack is adjusted inside a call to C. Calls to C may
@@ -473,18 +478,18 @@ class MacroAssemblerARMCompat : public M
     uint32_t usedFloatSlots_;
     bool usedFloat32_;
     uint32_t padding_;
 #endif
     bool dynamicAlignment_;
 
     // Used to work around the move resolver's lack of support for moving into
     // register pairs, which the softfp ABI needs.
-    mozilla::Array<MoveOperand, 2> floatArgsInGPR;
-    mozilla::Array<bool, 2> floatArgsInGPRValid;
+    mozilla::Array<MoveOperand, 4> floatArgsInGPR;
+    mozilla::Array<bool, 4> floatArgsInGPRValid;
 
     // Compute space needed for the function call and set the properties of the
     // callee. It returns the space which has to be allocated for calling the
     // function.
     //
     // arg            Number of arguments of the function.
     void setupABICall(uint32_t arg);
 
--- a/js/src/jit/arm/Simulator-arm.cpp
+++ b/js/src/jit/arm/Simulator-arm.cpp
@@ -754,17 +754,17 @@ ArmDebugger::debug()
                                 i < 8 &&
                                 (i % 2) == 0) {
                                 dvalue = getRegisterPairDoubleValue(i);
                                 printf(" (%f)\n", dvalue);
                             } else {
                                 printf("\n");
                             }
                         }
-                        for (uint32_t i = 0; i < FloatRegisters::Total; i++) {
+                        for (uint32_t i = 0; i < FloatRegisters::TotalPhys; i++) {
                             dvalue = getVFPDoubleRegisterValue(i);
                             uint64_t as_words = mozilla::BitwiseCast<uint64_t>(dvalue);
                             printf("%3s: %f 0x%08x %08x\n",
                                    FloatRegister::FromCode(i).name(),
                                    dvalue,
                                    static_cast<uint32_t>(as_words >> 32),
                                    static_cast<uint32_t>(as_words & 0xffffffff));
                         }
@@ -1316,38 +1316,38 @@ Simulator::set_dw_register(int dreg, con
     MOZ_ASSERT(dreg >= 0 && dreg < num_d_registers);
     registers_[dreg] = dbl[0];
     registers_[dreg + 1] = dbl[1];
 }
 
 void
 Simulator::get_d_register(int dreg, uint64_t *value)
 {
-    MOZ_ASSERT(dreg >= 0 && dreg < int(FloatRegisters::Total));
+    MOZ_ASSERT(dreg >= 0 && dreg < int(FloatRegisters::TotalPhys));
     memcpy(value, vfp_registers_ + dreg * 2, sizeof(*value));
 }
 
 void
 Simulator::set_d_register(int dreg, const uint64_t *value)
 {
-    MOZ_ASSERT(dreg >= 0 && dreg < int(FloatRegisters::Total));
+    MOZ_ASSERT(dreg >= 0 && dreg < int(FloatRegisters::TotalPhys));
     memcpy(vfp_registers_ + dreg * 2, value, sizeof(*value));
 }
 
 void
 Simulator::get_d_register(int dreg, uint32_t *value)
 {
-    MOZ_ASSERT(dreg >= 0 && dreg < int(FloatRegisters::Total));
+    MOZ_ASSERT(dreg >= 0 && dreg < int(FloatRegisters::TotalPhys));
     memcpy(value, vfp_registers_ + dreg * 2, sizeof(*value) * 2);
 }
 
 void
 Simulator::set_d_register(int dreg, const uint32_t *value)
 {
-    MOZ_ASSERT(dreg >= 0 && dreg < int(FloatRegisters::Total));
+    MOZ_ASSERT(dreg >= 0 && dreg < int(FloatRegisters::TotalPhys));
     memcpy(vfp_registers_ + dreg * 2, value, sizeof(*value) * 2);
 }
 
 void
 Simulator::get_q_register(int qreg, uint64_t *value)
 {
     MOZ_ASSERT(qreg >= 0 && qreg < num_q_registers);
     memcpy(value, vfp_registers_ + qreg * 4, sizeof(*value) * 2);
@@ -1409,30 +1409,30 @@ Simulator::get_s_register(int sreg) cons
 }
 
 template<class InputType, int register_size>
 void
 Simulator::setVFPRegister(int reg_index, const InputType &value)
 {
     MOZ_ASSERT(reg_index >= 0);
     MOZ_ASSERT_IF(register_size == 1, reg_index < num_s_registers);
-    MOZ_ASSERT_IF(register_size == 2, reg_index < int(FloatRegisters::Total));
+    MOZ_ASSERT_IF(register_size == 2, reg_index < int(FloatRegisters::TotalPhys));
 
     char buffer[register_size * sizeof(vfp_registers_[0])];
     memcpy(buffer, &value, register_size * sizeof(vfp_registers_[0]));
     memcpy(&vfp_registers_[reg_index * register_size], buffer,
            register_size * sizeof(vfp_registers_[0]));
 }
 
 template<class ReturnType, int register_size>
 ReturnType Simulator::getFromVFPRegister(int reg_index)
 {
     MOZ_ASSERT(reg_index >= 0);
     MOZ_ASSERT_IF(register_size == 1, reg_index < num_s_registers);
-    MOZ_ASSERT_IF(register_size == 2, reg_index < int(FloatRegisters::Total));
+    MOZ_ASSERT_IF(register_size == 2, reg_index < int(FloatRegisters::TotalPhys));
 
     ReturnType value = 0;
     char buffer[register_size * sizeof(vfp_registers_[0])];
     memcpy(buffer, &vfp_registers_[register_size * reg_index],
            register_size * sizeof(vfp_registers_[0]));
     memcpy(&value, buffer, register_size * sizeof(vfp_registers_[0]));
     return value;
 }
@@ -2097,17 +2097,17 @@ Simulator::scratchVolatileRegisters(bool
     set_register(r3, scratch_value);
     set_register(r12, scratch_value); // Intra-Procedure-call scratch register.
     set_register(r14, scratch_value); // Link register.
 
     if (scratchFloat) {
         uint64_t scratch_value_d = 0x5a5a5a5a5a5a5a5aLU ^ uint64_t(icount_) ^ (uint64_t(icount_) << 30);
         for (uint32_t i = d0; i < d8; i++)
             set_d_register(i, &scratch_value_d);
-        for (uint32_t i = d16; i < FloatRegisters::Total; i++)
+        for (uint32_t i = d16; i < FloatRegisters::TotalPhys; i++)
             set_d_register(i, &scratch_value_d);
     }
 }
 
 // Software interrupt instructions are used by the simulator to call into C++.
 void
 Simulator::softwareInterrupt(SimInstruction *instr)
 {
--- a/js/src/jit/arm/Trampoline-arm.cpp
+++ b/js/src/jit/arm/Trampoline-arm.cpp
@@ -20,24 +20,24 @@
 #include "jit/VMFunctions.h"
 
 #include "jit/ExecutionMode-inl.h"
 
 using namespace js;
 using namespace js::jit;
 
 static const FloatRegisterSet NonVolatileFloatRegs =
-    FloatRegisterSet((1 << FloatRegisters::d8) |
-                     (1 << FloatRegisters::d9) |
-                     (1 << FloatRegisters::d10) |
-                     (1 << FloatRegisters::d11) |
-                     (1 << FloatRegisters::d12) |
-                     (1 << FloatRegisters::d13) |
-                     (1 << FloatRegisters::d14) |
-                     (1 << FloatRegisters::d15));
+    FloatRegisterSet((1ULL << FloatRegisters::d8) |
+                     (1ULL << FloatRegisters::d9) |
+                     (1ULL << FloatRegisters::d10) |
+                     (1ULL << FloatRegisters::d11) |
+                     (1ULL << FloatRegisters::d12) |
+                     (1ULL << FloatRegisters::d13) |
+                     (1ULL << FloatRegisters::d14) |
+                     (1ULL << FloatRegisters::d15));
 
 static void
 GenerateReturn(MacroAssembler &masm, int returnCode, SPSProfiler *prof)
 {
     // Restore non-volatile floating point registers.
     masm.transferMultipleByRuns(NonVolatileFloatRegs, IsLoad, StackPointer, IA);
 
     // Unwind the sps mark.
@@ -345,34 +345,41 @@ JitRuntime::generateEnterJIT(JSContext *
     return code;
 }
 
 JitCode *
 JitRuntime::generateInvalidator(JSContext *cx)
 {
     // See large comment in x86's JitRuntime::generateInvalidator.
     MacroAssembler masm(cx);
-    //masm.as_bkpt();
     // At this point, one of two things has happened:
     // 1) Execution has just returned from C code, which left the stack aligned
     // 2) Execution has just returned from Ion code, which left the stack unaligned.
     // The old return address should not matter, but we still want the stack to
     // be aligned, and there is no good reason to automatically align it with a
     // call to setupUnalignedABICall.
     masm.ma_and(Imm32(~7), sp, sp);
     masm.startDataTransferM(IsStore, sp, DB, WriteBack);
     // We don't have to push everything, but this is likely easier.
     // Setting regs_.
     for (uint32_t i = 0; i < Registers::Total; i++)
         masm.transferReg(Register::FromCode(i));
     masm.finishDataTransfer();
 
+    // Since our datastructures for stack inspection are compile-time fixed,
+    // if there are only 16 double registers, then we need to reserve
+    // space on the stack for the missing 16.
+    if (FloatRegisters::ActualTotalPhys() != FloatRegisters::TotalPhys) {
+        int missingRegs = FloatRegisters::TotalPhys - FloatRegisters::ActualTotalPhys();
+        masm.ma_sub(Imm32(missingRegs * sizeof(double)), sp);
+    }
+
     masm.startFloatTransferM(IsStore, sp, DB, WriteBack);
-    for (uint32_t i = 0; i < FloatRegisters::Total; i++)
-        masm.transferFloatReg(FloatRegister::FromCode(i));
+    for (uint32_t i = 0; i < FloatRegisters::ActualTotalPhys(); i++)
+        masm.transferFloatReg(FloatRegister(i, FloatRegister::Double));
     masm.finishFloatTransfer();
 
     masm.ma_mov(sp, r0);
     const int sizeOfRetval = sizeof(size_t)*2;
     masm.reserveStack(sizeOfRetval);
     masm.mov(sp, r1);
     const int sizeOfBailoutInfo = sizeof(void *)*2;
     masm.reserveStack(sizeOfBailoutInfo);
@@ -524,26 +531,34 @@ PushBailoutFrame(MacroAssembler &masm, u
     // bailoutFrame.registersnapshot
     // bailoutFrame.fpsnapshot
     // bailoutFrame.snapshotOffset
     // bailoutFrame.frameSize
 
     // STEP 1a: Save our register sets to the stack so Bailout() can read
     // everything.
     // sp % 8 == 0
+
     masm.startDataTransferM(IsStore, sp, DB, WriteBack);
     // We don't have to push everything, but this is likely easier.
     // Setting regs_.
     for (uint32_t i = 0; i < Registers::Total; i++)
         masm.transferReg(Register::FromCode(i));
     masm.finishDataTransfer();
 
+    // Since our datastructures for stack inspection are compile-time fixed,
+    // if there are only 16 double registers, then we need to reserve
+    // space on the stack for the missing 16.
+    if (FloatRegisters::ActualTotalPhys() != FloatRegisters::TotalPhys) {
+        int missingRegs = FloatRegisters::TotalPhys - FloatRegisters::ActualTotalPhys();
+        masm.ma_sub(Imm32(missingRegs * sizeof(double)), sp);
+    }
     masm.startFloatTransferM(IsStore, sp, DB, WriteBack);
-    for (uint32_t i = 0; i < FloatRegisters::Total; i++)
-        masm.transferFloatReg(FloatRegister::FromCode(i));
+    for (uint32_t i = 0; i < FloatRegisters::ActualTotalPhys(); i++)
+        masm.transferFloatReg(FloatRegister(i, FloatRegister::Double));
     masm.finishFloatTransfer();
 
     // STEP 1b: Push both the "return address" of the function call (the address
     //          of the instruction after the call that we used to get here) as
     //          well as the callee token onto the stack. The return address is
     //          currently in r14. We will proceed by loading the callee token
     //          into a sacrificial register <= r14, then pushing both onto the
     //          stack.
@@ -586,17 +601,17 @@ GenerateBailoutThunk(JSContext *cx, Macr
     masm.passABIArg(r1);
 
     // Sp % 8 == 0
     masm.callWithABI(JS_FUNC_TO_DATA_PTR(void *, Bailout));
     masm.ma_ldr(Address(sp, 0), r2);
     masm.ma_add(sp, Imm32(sizeOfBailoutInfo), sp);
     // Common size of a bailout frame.
     uint32_t bailoutFrameSize = sizeof(void *) + // frameClass
-                              sizeof(double) * FloatRegisters::Total +
+                              sizeof(double) * FloatRegisters::TotalPhys +
                               sizeof(void *) * Registers::Total;
 
     if (frameClass == NO_FRAME_SIZE_CLASS_ID) {
         // Make sure the bailout frame size fits into the offset for a load.
         masm.as_dtr(IsLoad, 32, Offset,
                     r4, DTRAddr(sp, DtrOffImm(4)));
         // Used to be: offsetof(BailoutStack, frameSize_)
         // This structure is no longer available to us :(
@@ -888,17 +903,17 @@ JitRuntime::generateVMWrapper(JSContext 
 JitCode *
 JitRuntime::generatePreBarrier(JSContext *cx, MIRType type)
 {
     MacroAssembler masm(cx);
 
     RegisterSet save;
     if (cx->runtime()->jitSupportsFloatingPoint) {
         save = RegisterSet(GeneralRegisterSet(Registers::VolatileMask),
-                           FloatRegisterSet(FloatRegisters::VolatileMask));
+                           FloatRegisterSet(FloatRegisters::VolatileDoubleMask));
     } else {
         save = RegisterSet(GeneralRegisterSet(Registers::VolatileMask),
                            FloatRegisterSet());
     }
     masm.PushRegsInMask(save);
 
     JS_ASSERT(PreBarrierReg == r1);
     masm.movePtr(ImmPtr(cx->runtime()), r0);
--- a/js/src/jit/shared/CodeGenerator-shared.cpp
+++ b/js/src/jit/shared/CodeGenerator-shared.cpp
@@ -764,33 +764,40 @@ CodeGeneratorShared::emitTruncateFloat32
 
 bool
 CodeGeneratorShared::visitOutOfLineTruncateSlow(OutOfLineTruncateSlow *ool)
 {
     FloatRegister src = ool->src();
     Register dest = ool->dest();
 
     saveVolatile(dest);
+#ifdef JS_CODEGEN_ARM
+    if (ool->needFloat32Conversion()) {
+        masm.convertFloat32ToDouble(src, ScratchDoubleReg);
+        src = ScratchDoubleReg;
+    }
 
+#else
     if (ool->needFloat32Conversion()) {
         masm.push(src);
         masm.convertFloat32ToDouble(src, src);
     }
-
+#endif
     masm.setupUnalignedABICall(1, dest);
     masm.passABIArg(src, MoveOp::DOUBLE);
     if (gen->compilingAsmJS())
         masm.callWithABI(AsmJSImm_ToInt32);
     else
         masm.callWithABI(JS_FUNC_TO_DATA_PTR(void *, js::ToInt32));
     masm.storeCallResult(dest);
 
+#ifndef JS_CODEGEN_ARM
     if (ool->needFloat32Conversion())
         masm.pop(src);
-
+#endif
     restoreVolatile(dest);
 
     masm.jump(ool->rejoin());
     return true;
 }
 
 bool
 CodeGeneratorShared::omitOverRecursedCheck() const
--- a/js/src/jit/x64/Architecture-x64.h
+++ b/js/src/jit/x64/Architecture-x64.h
@@ -27,16 +27,22 @@ static const uint32_t ShadowStackSpace =
 class Registers {
   public:
     typedef JSC::X86Registers::RegisterID Code;
     typedef uint32_t SetType;
     static uint32_t SetSize(SetType x) {
         static_assert(sizeof(SetType) == 4, "SetType must be 32 bits");
         return mozilla::CountPopulation32(x);
     }
+    static uint32_t FirstBit(SetType x) {
+        return mozilla::CountTrailingZeroes32(x);
+    }
+    static uint32_t LastBit(SetType x) {
+        return 31 - mozilla::CountLeadingZeroes32(x);
+    }
     static const char *GetName(Code code) {
         static const char * const Names[] = { "rax", "rcx", "rdx", "rbx",
                                               "rsp", "rbp", "rsi", "rdi",
                                               "r8",  "r9",  "r10", "r11",
                                               "r12", "r13", "r14", "r15" };
         return Names[code];
     }
 
@@ -175,17 +181,22 @@ class TypedRegisterSet;
 struct FloatRegister {
     typedef FloatRegisters Codes;
     typedef Codes::Code Code;
     typedef Codes::SetType SetType;
     static uint32_t SetSize(SetType x) {
         static_assert(sizeof(SetType) == 4, "SetType must be 32 bits");
         return mozilla::CountPopulation32(x);
     }
-
+    static uint32_t FirstBit(SetType x) {
+        return mozilla::CountTrailingZeroes32(x);
+    }
+    static uint32_t LastBit(SetType x) {
+        return 31 - mozilla::CountLeadingZeroes32(x);
+    }
     Code code_;
 
     static FloatRegister FromCode(uint32_t i) {
         JS_ASSERT(i < FloatRegisters::Total);
         FloatRegister r = { (FloatRegisters::Code)i };
         return r;
     }
     Code code() const {
--- a/js/src/jit/x86/Architecture-x86.h
+++ b/js/src/jit/x86/Architecture-x86.h
@@ -36,16 +36,22 @@ static const uint32_t BAILOUT_TABLE_ENTR
 class Registers {
   public:
     typedef JSC::X86Registers::RegisterID Code;
     typedef uint8_t SetType;
     static uint32_t SetSize(SetType x) {
         static_assert(sizeof(SetType) == 1, "SetType must be 8 bits");
         return mozilla::CountPopulation32(x);
     }
+    static uint32_t FirstBit(SetType x) {
+        return mozilla::CountTrailingZeroes32(x);
+    }
+    static uint32_t LastBit(SetType x) {
+        return 31 - mozilla::CountLeadingZeroes32(x);
+    }
     static const char *GetName(Code code) {
         static const char * const Names[] = { "eax", "ecx", "edx", "ebx",
                                               "esp", "ebp", "esi", "edi" };
         return Names[code];
     }
 
     static Code FromName(const char *name) {
         for (size_t i = 0; i < Total; i++) {
@@ -151,17 +157,22 @@ class TypedRegisterSet;
 struct FloatRegister {
     typedef FloatRegisters Codes;
     typedef Codes::Code Code;
     typedef Codes::SetType SetType;
     static uint32_t SetSize(SetType x) {
         static_assert(sizeof(SetType) == 4, "SetType must be 32 bits");
         return mozilla::CountPopulation32(x);
     }
-
+    static uint32_t FirstBit(SetType x) {
+        return mozilla::CountTrailingZeroes32(x);
+    }
+    static uint32_t LastBit(SetType x) {
+        return 31 - mozilla::CountLeadingZeroes32(x);
+    }
     Code code_;
 
     static FloatRegister FromCode(uint32_t i) {
         JS_ASSERT(i < FloatRegisters::Total);
         FloatRegister r = { (FloatRegisters::Code)i };
         return r;
     }
     Code code() const {
--- a/js/src/jsapi-tests/testJitMoveEmitterCycles.cpp
+++ b/js/src/jsapi-tests/testJitMoveEmitterCycles.cpp
@@ -1,16 +1,16 @@
 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
  * vim: set ts=8 sts=4 et sw=4 tw=99:
  */
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-#if defined(JS_ARM_SIMULATOR) && defined(EVERYONE_KNOWS_ABOUT_ALIASING)
+#if defined(JS_ARM_SIMULATOR)
 #include "jit/arm/Assembler-arm.h"
 #include "jit/arm/MoveEmitter-arm.h"
 #include "jit/arm/Simulator-arm.h"
 #include "jit/IonLinker.h"
 #include "jit/IonMacroAssembler.h"
 #include "jit/MoveResolver.h"
 
 #include "jsapi-tests/tests.h"