Bug 1145811 - Remove simdSet argument of PushRegsInMask. r=bbouvier
authorNicolas B. Pierron <nicolas.b.pierron@mozilla.com>
Sat, 28 Mar 2015 01:08:13 +0100
changeset 266654 8787eda5c93e9c62d6f4b86472b74cecdf4afff6
parent 266653 5092827680338c844b18a39b706f016278928961
child 266655 1628ecf1c71fb80533e39245a255d69554927d69
push id830
push userraliiev@mozilla.com
push dateFri, 19 Jun 2015 19:24:37 +0000
treeherdermozilla-release@932614382a68 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersbbouvier
bugs1145811
milestone39.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1145811 - Remove simdSet argument of PushRegsInMask. r=bbouvier
js/src/asmjs/AsmJSValidate.cpp
js/src/jit/MacroAssembler.cpp
js/src/jit/MacroAssembler.h
js/src/jit/arm/MacroAssembler-arm.cpp
js/src/jit/mips/MacroAssembler-mips.cpp
js/src/jit/none/Trampoline-none.cpp
js/src/jit/shared/MacroAssembler-x86-shared.cpp
--- a/js/src/asmjs/AsmJSValidate.cpp
+++ b/js/src/asmjs/AsmJSValidate.cpp
@@ -8235,33 +8235,28 @@ static const LiveRegisterSet NonVolatile
                     FloatRegisterSet(FloatRegisters::NonVolatileMask
                                      | (1ULL << FloatRegisters::d15)
                                      | (1ULL << FloatRegisters::s31)));
 #else
 static const LiveRegisterSet NonVolatileRegs =
     LiveRegisterSet(GeneralRegisterSet(Registers::NonVolatileMask),
                     FloatRegisterSet(FloatRegisters::NonVolatileMask));
 #endif
-static const LiveFloatRegisterSet NonVolatileSimdRegs(
-    SupportsSimd ? NonVolatileRegs.fpus() : FloatRegisterSet());
 
 #if defined(JS_CODEGEN_MIPS)
 // Mips is using one more double slot due to stack alignment for double values.
 // Look at MacroAssembler::PushRegsInMask(RegisterSet set)
 static const unsigned FramePushedAfterSave = NonVolatileRegs.gprs().size() * sizeof(intptr_t) +
                                              NonVolatileRegs.fpus().getPushSizeInBytes() +
                                              sizeof(double);
 #elif defined(JS_CODEGEN_NONE)
 static const unsigned FramePushedAfterSave = 0;
 #else
-static const unsigned FramePushedAfterSave =
-   SupportsSimd ? NonVolatileRegs.gprs().size() * sizeof(intptr_t) +
-                  NonVolatileRegs.fpus().size() * Simd128DataSize
-                : NonVolatileRegs.gprs().size() * sizeof(intptr_t) +
-                  NonVolatileRegs.fpus().getPushSizeInBytes();
+static const unsigned FramePushedAfterSave = NonVolatileRegs.gprs().size() * sizeof(intptr_t)
+                                           + NonVolatileRegs.fpus().getPushSizeInBytes();
 #endif
 static const unsigned FramePushedForEntrySP = FramePushedAfterSave + sizeof(void*);
 
 static bool
 GenerateEntry(ModuleCompiler &m, unsigned exportIndex)
 {
     MacroAssembler &masm = m.masm();
 
@@ -8276,17 +8271,17 @@ GenerateEntry(ModuleCompiler &m, unsigne
     masm.push(ra);
 #elif defined(JS_CODEGEN_X86)
     static const unsigned EntryFrameSize = sizeof(void*);
 #endif
 
     // Save all caller non-volatile registers before we clobber them here and in
     // the asm.js callee (which does not preserve non-volatile registers).
     masm.setFramePushed(0);
-    masm.PushRegsInMask(NonVolatileRegs, NonVolatileSimdRegs);
+    masm.PushRegsInMask(NonVolatileRegs);
     MOZ_ASSERT(masm.framePushed() == FramePushedAfterSave);
 
     // ARM and MIPS have a globally-pinned GlobalReg (x64 uses RIP-relative
     // addressing, x86 uses immediates in effective addresses). For the
     // AsmJSGlobalRegBias addition, see Assembler-(mips,arm).h.
 #if defined(JS_CODEGEN_ARM) || defined(JS_CODEGEN_MIPS)
     masm.movePtr(IntArgReg1, GlobalReg);
     masm.addPtr(Imm32(AsmJSGlobalRegBias), GlobalReg);
@@ -8423,17 +8418,17 @@ GenerateEntry(ModuleCompiler &m, unsigne
         break;
       case RetType::Float32x4:
         // We don't have control on argv alignment, do an unaligned access.
         masm.storeUnalignedFloat32x4(ReturnFloat32x4Reg, Address(argv, 0));
         break;
     }
 
     // Restore clobbered non-volatile registers of the caller.
-    masm.PopRegsInMask(NonVolatileRegs, NonVolatileSimdRegs);
+    masm.PopRegsInMask(NonVolatileRegs);
     MOZ_ASSERT(masm.framePushed() == 0);
 
     masm.move32(Imm32(true), ReturnReg);
     masm.ret();
 
     return m.finishGeneratingEntry(exportIndex, &begin) && !masm.oom();
 }
 
@@ -9029,17 +9024,17 @@ GenerateOnOutOfBoundsLabelExit(ModuleCom
     masm.jump(throwLabel);
 
     return m.finishGeneratingInlineStub(&m.onOutOfBoundsLabel()) && !masm.oom();
 }
 
 static const LiveRegisterSet AllRegsExceptSP(
     GeneralRegisterSet(Registers::AllMask &
                        ~(uint32_t(1) << Registers::StackPointer)),
-    FloatRegisterSet(FloatRegisters::AllDoubleMask));
+    FloatRegisterSet(FloatRegisters::AllMask));
 
 // The async interrupt-callback exit is called from arbitrarily-interrupted asm.js
 // code. That means we must first save *all* registers and restore *all*
 // registers (except the stack pointer) when we resume. The address to resume to
 // (assuming that js::HandleExecutionInterrupt doesn't indicate that the
 // execution should be aborted) is stored in AsmJSActivation::resumePC_.
 // Unfortunately, loading this requires a scratch register which we don't have
 // after restoring all registers. To hack around this, push the resumePC on the
@@ -9053,18 +9048,17 @@ GenerateAsyncInterruptExit(ModuleCompile
 
 #if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64)
     // Be very careful here not to perturb the machine state before saving it
     // to the stack. In particular, add/sub instructions may set conditions in
     // the flags register.
     masm.push(Imm32(0));            // space for resumePC
     masm.pushFlags();               // after this we are safe to use sub
     masm.setFramePushed(0);         // set to zero so we can use masm.framePushed() below
-    LiveFloatRegisterSet simdSet(AllRegsExceptSP.fpus());
-    masm.PushRegsInMask(AllRegsExceptSP, simdSet); // save all GP/FP registers (except SP)
+    masm.PushRegsInMask(AllRegsExceptSP); // save all GP/FP registers (except SP)
 
     Register scratch = ABIArgGenerator::NonArgReturnReg0;
 
     // Store resumePC into the reserved space.
     masm.loadAsmJSActivation(scratch);
     masm.loadPtr(Address(scratch, AsmJSActivation::offsetOfResumePC()), scratch);
     masm.storePtr(scratch, Address(StackPointer, masm.framePushed() + sizeof(void*)));
 
@@ -9079,17 +9073,17 @@ GenerateAsyncInterruptExit(ModuleCompile
     masm.call(AsmJSImmPtr(AsmJSImm_HandleExecutionInterrupt));
 
     masm.branchIfFalseBool(ReturnReg, throwLabel);
 
     // Restore the StackPointer to it's position before the call.
     masm.mov(ABIArgGenerator::NonVolatileReg, StackPointer);
 
     // Restore the machine state to before the interrupt.
-    masm.PopRegsInMask(AllRegsExceptSP, simdSet); // restore all GP/FP registers (except SP)
+    masm.PopRegsInMask(AllRegsExceptSP); // restore all GP/FP registers (except SP)
     masm.popFlags();              // after this, nothing that sets conditions
     masm.ret();                   // pop resumePC into PC
 #elif defined(JS_CODEGEN_MIPS)
     // Reserve space to store resumePC.
     masm.subPtr(Imm32(sizeof(intptr_t)), StackPointer);
     // set to zero so we can use masm.framePushed() below.
     masm.setFramePushed(0);
     // When this platform supports SIMD extensions, we'll need to push high lanes
@@ -9149,28 +9143,28 @@ GenerateAsyncInterruptExit(ModuleCompile
     masm.storePtr(IntArgReg1, Address(r6, 14 * sizeof(uint32_t*)));
 
     // When this platform supports SIMD extensions, we'll need to push and pop
     // high lanes of SIMD registers as well.
 
     // Save all FP registers
     JS_STATIC_ASSERT(!SupportsSimd);
     masm.PushRegsInMask(LiveRegisterSet(GeneralRegisterSet(0),
-                                             FloatRegisterSet(FloatRegisters::AllDoubleMask)));
+                                        FloatRegisterSet(FloatRegisters::AllDoubleMask)));
 
     masm.assertStackAlignment(ABIStackAlignment);
     masm.call(AsmJSImm_HandleExecutionInterrupt);
 
     masm.branchIfFalseBool(ReturnReg, throwLabel);
 
     // Restore the machine state to before the interrupt. this will set the pc!
 
     // Restore all FP registers
     masm.PopRegsInMask(LiveRegisterSet(GeneralRegisterSet(0),
-                                            FloatRegisterSet(FloatRegisters::AllDoubleMask)));
+                                       FloatRegisterSet(FloatRegisters::AllDoubleMask)));
     masm.mov(r6,sp);
     masm.as_vmsr(r5);
     masm.as_msr(r4);
     // Restore all GP registers
     masm.startDataTransferM(IsLoad, sp, IA, WriteBack);
     masm.transferReg(r0);
     masm.transferReg(r1);
     masm.transferReg(r2);
@@ -9233,17 +9227,17 @@ GenerateThrowStub(ModuleCompiler &m, Lab
     // frame.
     Register scratch = ABIArgGenerator::NonArgReturnReg0;
     masm.loadAsmJSActivation(scratch);
     masm.storePtr(ImmWord(0), Address(scratch, AsmJSActivation::offsetOfFP()));
 
     masm.setFramePushed(FramePushedForEntrySP);
     masm.loadPtr(Address(scratch, AsmJSActivation::offsetOfEntrySP()), StackPointer);
     masm.Pop(scratch);
-    masm.PopRegsInMask(NonVolatileRegs, NonVolatileSimdRegs);
+    masm.PopRegsInMask(NonVolatileRegs);
     MOZ_ASSERT(masm.framePushed() == 0);
 
     masm.mov(ImmWord(0), ReturnReg);
     masm.ret();
 
     return m.finishGeneratingInlineStub(throwLabel) && !masm.oom();
 }
 
--- a/js/src/jit/MacroAssembler.cpp
+++ b/js/src/jit/MacroAssembler.cpp
@@ -2527,52 +2527,34 @@ MacroAssembler::alignJitStackBasedOnNArg
         andPtr(Imm32(~(JitStackAlignment - 1)), StackPointer);
     }
 }
 
 // ===============================================================
 // Stack manipulation functions.
 
 void
-MacroAssembler::PushRegsInMask(LiveRegisterSet set)
-{
-    PushRegsInMask(set, LiveFloatRegisterSet());
-}
-
-void
 MacroAssembler::PushRegsInMask(LiveGeneralRegisterSet set)
 {
     PushRegsInMask(LiveRegisterSet(set.set(), FloatRegisterSet()));
 }
 
 void
 MacroAssembler::PopRegsInMask(LiveRegisterSet set)
 {
     PopRegsInMaskIgnore(set, LiveRegisterSet());
 }
 
 void
-MacroAssembler::PopRegsInMask(LiveRegisterSet set, LiveFloatRegisterSet simdSet)
-{
-    PopRegsInMaskIgnore(set, LiveRegisterSet(), simdSet);
-}
-
-void
 MacroAssembler::PopRegsInMask(LiveGeneralRegisterSet set)
 {
     PopRegsInMask(LiveRegisterSet(set.set(), FloatRegisterSet()));
 }
 
 void
-MacroAssembler::PopRegsInMaskIgnore(LiveRegisterSet set, LiveRegisterSet ignore)
-{
-    PopRegsInMaskIgnore(set, ignore, LiveFloatRegisterSet());
-}
-
-void
 MacroAssembler::Push(jsid id, Register scratchReg)
 {
     if (JSID_IS_GCTHING(id)) {
         // If we're pushing a gcthing, then we can't just push the tagged jsid
         // value since the GC won't have any idea that the push instruction
         // carries a reference to a gcthing.  Need to unpack the pointer,
         // push it using ImmGCPtr, and then rematerialize the id at runtime.
 
--- a/js/src/jit/MacroAssembler.h
+++ b/js/src/jit/MacroAssembler.h
@@ -291,26 +291,22 @@ class MacroAssembler : public MacroAssem
     size_t instructionsSize() const {
         return size();
     }
 
   public:
     // ===============================================================
     // Stack manipulation functions.
 
-    void PushRegsInMask(LiveRegisterSet set, LiveFloatRegisterSet simdSet) PER_ARCH;
-    void PushRegsInMask(LiveRegisterSet set);
+    void PushRegsInMask(LiveRegisterSet set) PER_ARCH;
     void PushRegsInMask(LiveGeneralRegisterSet set);
 
     void PopRegsInMask(LiveRegisterSet set);
-    void PopRegsInMask(LiveRegisterSet set, LiveFloatRegisterSet simdSet);
     void PopRegsInMask(LiveGeneralRegisterSet set);
-    void PopRegsInMaskIgnore(LiveRegisterSet set, LiveRegisterSet ignore,
-                             LiveFloatRegisterSet simdSet) PER_ARCH;
-    void PopRegsInMaskIgnore(LiveRegisterSet set, LiveRegisterSet ignore);
+    void PopRegsInMaskIgnore(LiveRegisterSet set, LiveRegisterSet ignore) PER_ARCH;
 
     void Push(const Operand op) PER_ARCH ONLY_X86_X64;
     void Push(Register reg) PER_ARCH;
     void Push(const Imm32 imm) PER_ARCH;
     void Push(const ImmWord imm) PER_ARCH;
     void Push(const ImmPtr imm) PER_ARCH;
     void Push(const ImmGCPtr ptr) PER_ARCH;
     void Push(FloatRegister reg) PER_ARCH;
--- a/js/src/jit/arm/MacroAssembler-arm.cpp
+++ b/js/src/jit/arm/MacroAssembler-arm.cpp
@@ -5127,19 +5127,18 @@ MacroAssemblerARMCompat::asMasm() const
 {
     return *static_cast<const MacroAssembler *>(this);
 }
 
 // ===============================================================
 // Stack manipulation functions.
 
 void
-MacroAssembler::PushRegsInMask(LiveRegisterSet set, LiveFloatRegisterSet simdSet)
-{
-    MOZ_ASSERT(!SupportsSimd() && simdSet.set().size() == 0);
+MacroAssembler::PushRegsInMask(LiveRegisterSet set)
+{
     int32_t diffF = set.fpus().getPushSizeInBytes();
     int32_t diffG = set.gprs().size() * sizeof(intptr_t);
 
     if (set.gprs().size() > 1) {
         adjustFrame(diffG);
         startDataTransferM(IsStore, StackPointer, DB, WriteBack);
         for (GeneralRegisterBackwardIterator iter(set.gprs()); iter.more(); iter++) {
             diffG -= sizeof(intptr_t);
@@ -5156,20 +5155,18 @@ MacroAssembler::PushRegsInMask(LiveRegis
     MOZ_ASSERT(diffG == 0);
 
     adjustFrame(diffF);
     diffF += transferMultipleByRuns(set.fpus(), IsStore, StackPointer, DB);
     MOZ_ASSERT(diffF == 0);
 }
 
 void
-MacroAssembler::PopRegsInMaskIgnore(LiveRegisterSet set, LiveRegisterSet ignore,
-                                    LiveFloatRegisterSet simdSet)
-{
-    MOZ_ASSERT(!SupportsSimd() && simdSet.set().size() == 0);
+MacroAssembler::PopRegsInMaskIgnore(LiveRegisterSet set, LiveRegisterSet ignore)
+{
     int32_t diffG = set.gprs().size() * sizeof(intptr_t);
     int32_t diffF = set.fpus().getPushSizeInBytes();
     const int32_t reservedG = diffG;
     const int32_t reservedF = diffF;
 
     // ARM can load multiple registers at once, but only if we want back all
     // the registers we previously saved to the stack.
     if (ignore.emptyFloat()) {
--- a/js/src/jit/mips/MacroAssembler-mips.cpp
+++ b/js/src/jit/mips/MacroAssembler-mips.cpp
@@ -3663,19 +3663,18 @@ MacroAssemblerMIPSCompat::asMasm() const
 {
     return *static_cast<const MacroAssembler *>(this);
 }
 
 // ===============================================================
 // Stack manipulation functions.
 
 void
-MacroAssembler::PushRegsInMask(RegisterSet set, FloatRegisterSet simdSet)
-{
-    MOZ_ASSERT(!SupportsSimd() && simdSet.size() == 0);
+MacroAssembler::PushRegsInMask(LiveRegisterSet set)
+{
     int32_t diffF = set.fpus().getPushSizeInBytes();
     int32_t diffG = set.gprs().size() * sizeof(intptr_t);
 
     reserveStack(diffG);
     for (GeneralRegisterBackwardIterator iter(set.gprs()); iter.more(); iter++) {
         diffG -= sizeof(intptr_t);
         storePtr(*iter, Address(StackPointer, diffG));
     }
@@ -3691,19 +3690,18 @@ MacroAssembler::PushRegsInMask(RegisterS
         if ((*iter).code() % 2 == 0)
             as_sd(*iter, SecondScratchReg, -diffF);
         diffF -= sizeof(double);
     }
     MOZ_ASSERT(diffF == 0);
 }
 
 void
-MacroAssembler::PopRegsInMaskIgnore(RegisterSet set, RegisterSet ignore, FloatRegisterSet simdSet)
-{
-    MOZ_ASSERT(!SupportsSimd() && simdSet.size() == 0);
+MacroAssembler::PopRegsInMaskIgnore(LiveRegisterSet set, LiveRegisterSet ignore)
+{
     int32_t diffG = set.gprs().size() * sizeof(intptr_t);
     int32_t diffF = set.fpus().getPushSizeInBytes();
     const int32_t reservedG = diffG;
     const int32_t reservedF = diffF;
 
     // Read the buffer form the first aligned location.
     ma_addu(SecondScratchReg, sp, Imm32(reservedF + sizeof(double)));
     ma_and(SecondScratchReg, SecondScratchReg, Imm32(~(ABIStackAlignment - 1)));
--- a/js/src/jit/none/Trampoline-none.cpp
+++ b/js/src/jit/none/Trampoline-none.cpp
@@ -54,21 +54,18 @@ bool ICCompare_Int32::Compiler::generate
 bool ICCompare_Double::Compiler::generateStubCode(MacroAssembler &) { MOZ_CRASH(); }
 bool ICBinaryArith_Int32::Compiler::generateStubCode(MacroAssembler &) { MOZ_CRASH(); }
 bool ICUnaryArith_Int32::Compiler::generateStubCode(MacroAssembler &) { MOZ_CRASH(); }
 JitCode *JitRuntime::generateProfilerExitFrameTailStub(JSContext *) { MOZ_CRASH(); }
 
 // ===============================================================
 // Stack manipulation functions.
 
-void MacroAssembler::PushRegsInMask(LiveRegisterSet, LiveFloatRegisterSet) { MOZ_CRASH(); }
-void MacroAssembler::PopRegsInMaskIgnore(LiveRegisterSet, LiveRegisterSet, LiveFloatRegisterSet)
-{
-    MOZ_CRASH();
-}
+void MacroAssembler::PushRegsInMask(LiveRegisterSet) { MOZ_CRASH(); }
+void MacroAssembler::PopRegsInMaskIgnore(LiveRegisterSet, LiveRegisterSet) { MOZ_CRASH(); }
 
 void MacroAssembler::Push(Register reg) { MOZ_CRASH(); }
 void MacroAssembler::Push(const Imm32 imm) { MOZ_CRASH(); }
 void MacroAssembler::Push(const ImmWord imm) { MOZ_CRASH(); }
 void MacroAssembler::Push(const ImmPtr imm) { MOZ_CRASH(); }
 void MacroAssembler::Push(const ImmGCPtr ptr) { MOZ_CRASH(); }
 void MacroAssembler::Push(FloatRegister reg) { MOZ_CRASH(); }
 
--- a/js/src/jit/shared/MacroAssembler-x86-shared.cpp
+++ b/js/src/jit/shared/MacroAssembler-x86-shared.cpp
@@ -174,108 +174,86 @@ MacroAssemblerX86Shared::asMasm() const
 {
     return *static_cast<const MacroAssembler *>(this);
 }
 
 // ===============================================================
 // Stack manipulation functions.
 
 void
-MacroAssembler::PushRegsInMask(LiveRegisterSet set, LiveFloatRegisterSet simdSet)
+MacroAssembler::PushRegsInMask(LiveRegisterSet set)
 {
-    FloatRegisterSet doubleSet(FloatRegisterSet::Subtract(set.fpus(), simdSet.set()));
-    MOZ_ASSERT_IF(simdSet.empty(), doubleSet == set.fpus());
-    doubleSet = doubleSet.reduceSetForPush();
-    unsigned numSimd = simdSet.set().size();
-    unsigned numDouble = doubleSet.size();
-    int32_t diffF = doubleSet.getPushSizeInBytes() + numSimd * Simd128DataSize;
+    FloatRegisterSet fpuSet(set.fpus().reduceSetForPush());
+    unsigned numFpu = fpuSet.size();
+    int32_t diffF = fpuSet.getPushSizeInBytes();
     int32_t diffG = set.gprs().size() * sizeof(intptr_t);
 
     // On x86, always use push to push the integer registers, as it's fast
     // on modern hardware and it's a small instruction.
     for (GeneralRegisterBackwardIterator iter(set.gprs()); iter.more(); iter++) {
         diffG -= sizeof(intptr_t);
         Push(*iter);
     }
     MOZ_ASSERT(diffG == 0);
 
     reserveStack(diffF);
-    for (FloatRegisterBackwardIterator iter(doubleSet); iter.more(); iter++) {
+    for (FloatRegisterBackwardIterator iter(fpuSet); iter.more(); iter++) {
         FloatRegister reg = *iter;
         diffF -= reg.size();
-        numDouble -= 1;
+        numFpu -= 1;
         Address spillAddress(StackPointer, diffF);
         if (reg.isDouble())
             storeDouble(reg, spillAddress);
         else if (reg.isSingle())
             storeFloat32(reg, spillAddress);
         else if (reg.isInt32x4())
             storeUnalignedInt32x4(reg, spillAddress);
         else if (reg.isFloat32x4())
             storeUnalignedFloat32x4(reg, spillAddress);
         else
             MOZ_CRASH("Unknown register type.");
     }
-    MOZ_ASSERT(numDouble == 0);
-    for (FloatRegisterBackwardIterator iter(simdSet); iter.more(); iter++) {
-        diffF -= Simd128DataSize;
-        numSimd -= 1;
-        // XXX how to choose the right move type?
-        storeUnalignedInt32x4(*iter, Address(StackPointer, diffF));
-    }
-    MOZ_ASSERT(numSimd == 0);
+    MOZ_ASSERT(numFpu == 0);
     // x64 padding to keep the stack aligned on uintptr_t. Keep in sync with
     // GetPushBytesInSize.
     diffF -= diffF % sizeof(uintptr_t);
     MOZ_ASSERT(diffF == 0);
 }
 
 void
-MacroAssembler::PopRegsInMaskIgnore(LiveRegisterSet set, LiveRegisterSet ignore,
-                                    LiveFloatRegisterSet simdSet)
+MacroAssembler::PopRegsInMaskIgnore(LiveRegisterSet set, LiveRegisterSet ignore)
 {
-    FloatRegisterSet doubleSet(FloatRegisterSet::Subtract(set.fpus(), simdSet.set()));
-    MOZ_ASSERT_IF(simdSet.empty(), doubleSet == set.fpus());
-    doubleSet = doubleSet.reduceSetForPush();
-    unsigned numSimd = simdSet.set().size();
-    unsigned numDouble = doubleSet.size();
+    FloatRegisterSet fpuSet(set.fpus().reduceSetForPush());
+    unsigned numFpu = fpuSet.size();
     int32_t diffG = set.gprs().size() * sizeof(intptr_t);
-    int32_t diffF = doubleSet.getPushSizeInBytes() + numSimd * Simd128DataSize;
+    int32_t diffF = fpuSet.getPushSizeInBytes();
     const int32_t reservedG = diffG;
     const int32_t reservedF = diffF;
 
-    for (FloatRegisterBackwardIterator iter(simdSet); iter.more(); iter++) {
-        diffF -= Simd128DataSize;
-        numSimd -= 1;
-        if (!ignore.has(*iter))
-            // XXX how to choose the right move type?
-            loadUnalignedInt32x4(Address(StackPointer, diffF), *iter);
-    }
-    MOZ_ASSERT(numSimd == 0);
-    for (FloatRegisterBackwardIterator iter(doubleSet); iter.more(); iter++) {
+    for (FloatRegisterBackwardIterator iter(fpuSet); iter.more(); iter++) {
         FloatRegister reg = *iter;
         diffF -= reg.size();
-        numDouble -= 1;
+        numFpu -= 1;
         if (ignore.has(reg))
             continue;
 
         Address spillAddress(StackPointer, diffF);
         if (reg.isDouble())
             loadDouble(spillAddress, reg);
         else if (reg.isSingle())
             loadFloat32(spillAddress, reg);
         else if (reg.isInt32x4())
             loadUnalignedInt32x4(spillAddress, reg);
         else if (reg.isFloat32x4())
             loadUnalignedFloat32x4(spillAddress, reg);
         else
             MOZ_CRASH("Unknown register type.");
     }
     freeStack(reservedF);
-    MOZ_ASSERT(numDouble == 0);
+    MOZ_ASSERT(numFpu == 0);
     // x64 padding to keep the stack aligned on uintptr_t. Keep in sync with
     // GetPushBytesInSize.
     diffF -= diffF % sizeof(uintptr_t);
     MOZ_ASSERT(diffF == 0);
 
     // On x86, use pop to pop the integer registers, if we're not going to
     // ignore any slots, as it's fast on modern hardware and it's a small
     // instruction.