Bug 1436691 : [MIPS32] Use ldc1 and sdc1 for double loads and stores; r=bbouvier
authorDragan Mladjenovic <dragan.mladjenovic>
Tue, 06 Feb 2018 14:35:49 +0100
changeset 457770 d7874deef1a5ebf93d4debce425f564b77f6d179
parent 457769 cc6edcbe8361ba4278d0f4133fd030200237231e
child 457771 eb1149743b446076c8aa38bb4be81623fe72845a
push id8799
push usermtabara@mozilla.com
push dateThu, 01 Mar 2018 16:46:23 +0000
treeherdermozilla-beta@15334014dc67 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersbbouvier
bugs1436691
milestone60.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1436691 : [MIPS32] Use ldc1 and sdc1 for double loads and stores; r=bbouvier
js/src/jit/mips-shared/Assembler-mips-shared.cpp
js/src/jit/mips-shared/Assembler-mips-shared.h
js/src/jit/mips-shared/MacroAssembler-mips-shared-inl.h
js/src/jit/mips-shared/MacroAssembler-mips-shared.cpp
js/src/jit/mips-shared/MacroAssembler-mips-shared.h
js/src/jit/mips32/Architecture-mips32.h
js/src/jit/mips32/MacroAssembler-mips32-inl.h
js/src/jit/mips32/MacroAssembler-mips32.cpp
js/src/jit/mips32/MacroAssembler-mips32.h
js/src/jit/mips32/Trampoline-mips32.cpp
js/src/jit/mips64/Architecture-mips64.h
js/src/jit/mips64/MacroAssembler-mips64-inl.h
js/src/jit/mips64/MacroAssembler-mips64.cpp
js/src/jit/mips64/MacroAssembler-mips64.h
js/src/jit/mips64/Trampoline-mips64.cpp
js/src/wasm/WasmBaselineCompile.cpp
--- a/js/src/jit/mips-shared/Assembler-mips-shared.cpp
+++ b/js/src/jit/mips-shared/Assembler-mips-shared.cpp
@@ -1212,45 +1212,45 @@ AssemblerMIPSShared::as_dextu(Register r
     rd = Register::FromCode(size - 1);
     spew("dextu  %3s,%3s, %d, %d", rt.name(), rs.name(), pos, size);
     MOZ_ASSERT(hasR2());
     return writeInst(InstReg(op_special3, rs, rt, rd, pos - 32, ff_dextu).encode());
 }
 
 // FP instructions
 BufferOffset
-AssemblerMIPSShared::as_ld(FloatRegister fd, Register base, int32_t off)
+AssemblerMIPSShared::as_ldc1(FloatRegister ft, Register base, int32_t off)
 {
     MOZ_ASSERT(Imm16::IsInSignedRange(off));
-    spew("ldc1   %3s, (0x%x)%2s", fd.name(), off, base.name());
-    return writeInst(InstImm(op_ldc1, base, fd, Imm16(off)).encode());
+    spew("ldc1   %3s, (0x%x)%2s", ft.name(), off, base.name());
+    return writeInst(InstImm(op_ldc1, base, ft, Imm16(off)).encode());
 }
 
 BufferOffset
-AssemblerMIPSShared::as_sd(FloatRegister fd, Register base, int32_t off)
+AssemblerMIPSShared::as_sdc1(FloatRegister ft, Register base, int32_t off)
 {
     MOZ_ASSERT(Imm16::IsInSignedRange(off));
-    spew("sdc1   %3s, (0x%x)%2s", fd.name(), off, base.name());
-    return writeInst(InstImm(op_sdc1, base, fd, Imm16(off)).encode());
+    spew("sdc1   %3s, (0x%x)%2s", ft.name(), off, base.name());
+    return writeInst(InstImm(op_sdc1, base, ft, Imm16(off)).encode());
 }
 
 BufferOffset
-AssemblerMIPSShared::as_ls(FloatRegister fd, Register base, int32_t off)
+AssemblerMIPSShared::as_lwc1(FloatRegister ft, Register base, int32_t off)
 {
     MOZ_ASSERT(Imm16::IsInSignedRange(off));
-    spew("lwc1   %3s, (0x%x)%2s", fd.name(), off, base.name());
-    return writeInst(InstImm(op_lwc1, base, fd, Imm16(off)).encode());
+    spew("lwc1   %3s, (0x%x)%2s", ft.name(), off, base.name());
+    return writeInst(InstImm(op_lwc1, base, ft, Imm16(off)).encode());
 }
 
 BufferOffset
-AssemblerMIPSShared::as_ss(FloatRegister fd, Register base, int32_t off)
+AssemblerMIPSShared::as_swc1(FloatRegister ft, Register base, int32_t off)
 {
     MOZ_ASSERT(Imm16::IsInSignedRange(off));
-    spew("swc1   %3s, (0x%x)%2s", fd.name(), off, base.name());
-    return writeInst(InstImm(op_swc1, base, fd, Imm16(off)).encode());
+    spew("swc1   %3s, (0x%x)%2s", ft.name(), off, base.name());
+    return writeInst(InstImm(op_swc1, base, ft, Imm16(off)).encode());
 }
 
 BufferOffset
 AssemblerMIPSShared::as_gsldl(FloatRegister fd, Register base, int32_t off)
 {
     MOZ_ASSERT(Imm8::IsInSignedRange(off));
     spew("gsldl  %3s, (0x%x)%2s", fd.name(), off, base.name());
     return writeInst(InstGS(op_lwc2, base, fd, Imm8(off), ff_gsxdlc1).encode());
--- a/js/src/jit/mips-shared/Assembler-mips-shared.h
+++ b/js/src/jit/mips-shared/Assembler-mips-shared.h
@@ -1134,23 +1134,21 @@ class AssemblerMIPSShared : public Assem
     BufferOffset as_dextu(Register rt, Register rs, uint16_t pos, uint16_t size);
 
     // Sign extend
     BufferOffset as_seb(Register rd, Register rt);
     BufferOffset as_seh(Register rd, Register rt);
 
     // FP instructions
 
-    // Use these two functions only when you are sure address is aligned.
-    // Otherwise, use ma_ld and ma_sd.
-    BufferOffset as_ld(FloatRegister fd, Register base, int32_t off);
-    BufferOffset as_sd(FloatRegister fd, Register base, int32_t off);
+    BufferOffset as_ldc1(FloatRegister ft, Register base, int32_t off);
+    BufferOffset as_sdc1(FloatRegister ft, Register base, int32_t off);
 
-    BufferOffset as_ls(FloatRegister fd, Register base, int32_t off);
-    BufferOffset as_ss(FloatRegister fd, Register base, int32_t off);
+    BufferOffset as_lwc1(FloatRegister ft, Register base, int32_t off);
+    BufferOffset as_swc1(FloatRegister ft, Register base, int32_t off);
 
     // Loongson-specific FP load and store instructions
     BufferOffset as_gsldl(FloatRegister fd, Register base, int32_t off);
     BufferOffset as_gsldr(FloatRegister fd, Register base, int32_t off);
     BufferOffset as_gssdl(FloatRegister fd, Register base, int32_t off);
     BufferOffset as_gssdr(FloatRegister fd, Register base, int32_t off);
     BufferOffset as_gslsl(FloatRegister fd, Register base, int32_t off);
     BufferOffset as_gslsr(FloatRegister fd, Register base, int32_t off);
--- a/js/src/jit/mips-shared/MacroAssembler-mips-shared-inl.h
+++ b/js/src/jit/mips-shared/MacroAssembler-mips-shared-inl.h
@@ -1033,16 +1033,38 @@ MacroAssembler::storeFloat32x3(FloatRegi
 }
 void
 MacroAssembler::storeFloat32x3(FloatRegister src, const BaseIndex& dest)
 {
     MOZ_CRASH("NYI");
 }
 
 void
+MacroAssembler::storeUncanonicalizedDouble(FloatRegister src, const Address& addr)
+{
+    ma_sd(src, addr);
+}
+void
+MacroAssembler::storeUncanonicalizedDouble(FloatRegister src, const BaseIndex& addr)
+{
+    ma_sd(src, addr);
+}
+
+void
+MacroAssembler::storeUncanonicalizedFloat32(FloatRegister src, const Address& addr)
+{
+    ma_ss(src, addr);
+}
+void
+MacroAssembler::storeUncanonicalizedFloat32(FloatRegister src, const BaseIndex& addr)
+{
+    ma_ss(src, addr);
+}
+
+void
 MacroAssembler::memoryBarrier(MemoryBarrierBits barrier)
 {
     as_sync();
 }
 
 // ===============================================================
 // Clamping functions.
 
--- a/js/src/jit/mips-shared/MacroAssembler-mips-shared.cpp
+++ b/js/src/jit/mips-shared/MacroAssembler-mips-shared.cpp
@@ -1114,24 +1114,16 @@ MacroAssemblerMIPSShared::ma_lis(FloatRe
         ma_li(ScratchRegister, imm);
         moveToFloat32(ScratchRegister, dest);
     } else {
         moveToFloat32(zero, dest);
     }
 }
 
 void
-MacroAssemblerMIPSShared::ma_liNegZero(FloatRegister dest)
-{
-    moveToDoubleLo(zero, dest);
-    ma_li(ScratchRegister, Imm32(INT_MIN));
-    asMasm().moveToDoubleHi(ScratchRegister, dest);
-}
-
-void
 MacroAssemblerMIPSShared::ma_sd(FloatRegister ft, BaseIndex address)
 {
     if (isLoongson() && Imm8::IsInSignedRange(address.offset)) {
         Register index = address.index;
 
         if (address.scale != TimesOne) {
             int32_t shift = Imm32::ShiftOf(address.scale).value;
 
@@ -1174,16 +1166,30 @@ MacroAssemblerMIPSShared::ma_ss(FloatReg
         return;
     }
 
     asMasm().computeScaledAddress(address, SecondScratchReg);
     asMasm().ma_ss(ft, Address(SecondScratchReg, address.offset));
 }
 
 void
+MacroAssemblerMIPSShared::ma_ld(FloatRegister ft, const BaseIndex& src)
+{
+    asMasm().computeScaledAddress(src, SecondScratchReg);
+    asMasm().ma_ld(ft, Address(SecondScratchReg, src.offset));
+}
+
+void
+MacroAssemblerMIPSShared::ma_ls(FloatRegister ft, const BaseIndex& src)
+{
+    asMasm().computeScaledAddress(src, SecondScratchReg);
+    asMasm().ma_ls(ft, Address(SecondScratchReg, src.offset));
+}
+
+void
 MacroAssemblerMIPSShared::ma_bc1s(FloatRegister lhs, FloatRegister rhs, Label* label,
                                   DoubleCondition c, JumpKind jumpKind, FPConditionBit fcc)
 {
     FloatTestKind testKind;
     compareFloatingPoint(SingleFloat, lhs, rhs, c, &testKind, fcc);
     asMasm().branchWithCode(getBranchCode(testKind, fcc), label, jumpKind);
 }
 
@@ -1286,16 +1292,54 @@ MacroAssemblerMIPSShared::minMaxFloat32(
 
     bind(&nan);
     asMasm().loadConstantFloat32(JS::GenericNaN(), srcDest);
 
     bind(&done);
 }
 
 void
+MacroAssemblerMIPSShared::loadDouble(const Address& address, FloatRegister dest)
+{
+    asMasm().ma_ld(dest, address);
+}
+
+void
+MacroAssemblerMIPSShared::loadDouble(const BaseIndex& src, FloatRegister dest)
+{
+    asMasm().ma_ld(dest, src);
+}
+
+void
+MacroAssemblerMIPSShared::loadFloatAsDouble(const Address& address, FloatRegister dest)
+{
+    asMasm().ma_ls(dest, address);
+    as_cvtds(dest, dest);
+}
+
+void
+MacroAssemblerMIPSShared::loadFloatAsDouble(const BaseIndex& src, FloatRegister dest)
+{
+    asMasm().loadFloat32(src, dest);
+    as_cvtds(dest, dest);
+}
+
+void
+MacroAssemblerMIPSShared::loadFloat32(const Address& address, FloatRegister dest)
+{
+    asMasm().ma_ls(dest, address);
+}
+
+void
+MacroAssemblerMIPSShared::loadFloat32(const BaseIndex& src, FloatRegister dest)
+{
+    asMasm().ma_ls(dest, src);
+}
+
+void
 MacroAssemblerMIPSShared::ma_call(ImmPtr dest)
 {
     asMasm().ma_liPatchable(CallReg, dest);
     as_jalr(CallReg);
     as_nop();
 }
 
 void
@@ -1366,45 +1410,45 @@ MacroAssembler::Push(const ImmGCPtr ptr)
     ma_push(ScratchRegister);
     adjustFrame(int32_t(sizeof(intptr_t)));
 }
 
 void
 MacroAssembler::Push(FloatRegister f)
 {
     ma_push(f);
-    adjustFrame(int32_t(sizeof(double)));
+    adjustFrame(int32_t(f.pushSize()));
 }
 
 void
 MacroAssembler::Pop(Register reg)
 {
     ma_pop(reg);
     adjustFrame(-int32_t(sizeof(intptr_t)));
 }
 
 void
 MacroAssembler::Pop(FloatRegister f)
 {
     ma_pop(f);
-    adjustFrame(-int32_t(sizeof(double)));
+    adjustFrame(-int32_t(f.pushSize()));
 }
 
 void
 MacroAssembler::Pop(const ValueOperand& val)
 {
     popValue(val);
-    framePushed_ -= sizeof(Value);
+    adjustFrame(-int32_t(sizeof(Value)));
 }
 
 void
 MacroAssembler::PopStackPtr()
 {
     loadPtr(Address(StackPointer, 0), StackPointer);
-    framePushed_ -= sizeof(intptr_t);
+    adjustFrame(-int32_t(sizeof(intptr_t)));
 }
 
 
 // ===============================================================
 // Simple call functions.
 
 CodeOffset
 MacroAssembler::call(Register reg)
@@ -1967,22 +2011,20 @@ MacroAssemblerMIPSShared::wasmLoadImpl(c
                                        static_cast<LoadStoreSize>(8 * byteSize),
                                        isSigned ? SignExtend : ZeroExtend);
         }
         return;
     }
 
     asMasm().memoryBarrierBefore(access.sync());
     if (isFloat) {
-        if (byteSize == 4) {
-            asMasm().loadFloat32(address, output.fpu());
-        } else {
-            asMasm().computeScaledAddress(address, SecondScratchReg);
-            asMasm().as_ld(output.fpu(), SecondScratchReg, 0);
-        }
+        if (byteSize == 4)
+            asMasm().ma_ls(output.fpu(), address);
+         else
+            asMasm().ma_ld(output.fpu(), address);
     } else {
         asMasm().ma_load(output.gpr(), address, static_cast<LoadStoreSize>(8 * byteSize),
                          isSigned ? SignExtend : ZeroExtend);
     }
     asMasm().append(access, asMasm().size() - 4, asMasm().framePushed());
     asMasm().memoryBarrierAfter(access.sync());
 }
 
@@ -2031,25 +2073,20 @@ MacroAssemblerMIPSShared::wasmStoreImpl(
                                         static_cast<LoadStoreSize>(8 * byteSize),
                                         isSigned ? SignExtend : ZeroExtend);
         }
         return;
     }
 
     asMasm().memoryBarrierBefore(access.sync());
     if (isFloat) {
-        if (byteSize == 4) {
-            asMasm().storeFloat32(value.fpu(), address);
-        } else {
-            //asMasm().storeDouble(value.fpu(), address);
-            // For time being storeDouble for mips32 uses two store instructions,
-            // so we emit only one to get correct behavior in case of OOB access.
-            asMasm().computeScaledAddress(address, SecondScratchReg);
-            asMasm().as_sd(value.fpu(), SecondScratchReg, 0);
-        }
+        if (byteSize == 4)
+            asMasm().ma_ss(value.fpu(), address);
+        else
+            asMasm().ma_sd(value.fpu(), address);
     } else {
         asMasm().ma_store(value.gpr(), address,
                       static_cast<LoadStoreSize>(8 * byteSize),
                       isSigned ? SignExtend : ZeroExtend);
     }
     // Only the last emitted instruction is a memory access.
     asMasm().append(access, asMasm().size() - 4, asMasm().framePushed());
     asMasm().memoryBarrierAfter(access.sync());
--- a/js/src/jit/mips-shared/MacroAssembler-mips-shared.h
+++ b/js/src/jit/mips-shared/MacroAssembler-mips-shared.h
@@ -167,42 +167,38 @@ class MacroAssemblerMIPSShared : public 
     void ma_b(Register lhs, T rhs, wasm::OldTrapDesc target, Condition c,
               JumpKind jumpKind = LongJump);
 
     void ma_b(Label* l, JumpKind jumpKind = LongJump);
     void ma_b(wasm::OldTrapDesc target, JumpKind jumpKind = LongJump);
 
     // fp instructions
     void ma_lis(FloatRegister dest, float value);
-    void ma_liNegZero(FloatRegister dest);
 
-    void ma_sd(FloatRegister fd, BaseIndex address);
-    void ma_ss(FloatRegister fd, BaseIndex address);
+    void ma_sd(FloatRegister src, BaseIndex address);
+    void ma_ss(FloatRegister src, BaseIndex address);
+
+    void ma_ld(FloatRegister dest, const BaseIndex& src);
+    void ma_ls(FloatRegister dest, const BaseIndex& src);
 
     //FP branches
     void ma_bc1s(FloatRegister lhs, FloatRegister rhs, Label* label, DoubleCondition c,
                  JumpKind jumpKind = LongJump, FPConditionBit fcc = FCC0);
     void ma_bc1d(FloatRegister lhs, FloatRegister rhs, Label* label, DoubleCondition c,
                  JumpKind jumpKind = LongJump, FPConditionBit fcc = FCC0);
 
     void ma_call(ImmPtr dest);
 
     void ma_jump(ImmPtr dest);
 
     void ma_cmp_set(Register dst, Register lhs, Register rhs, Condition c);
     void ma_cmp_set(Register dst, Register lhs, Imm32 imm, Condition c);
     void ma_cmp_set_double(Register dst, FloatRegister lhs, FloatRegister rhs, DoubleCondition c);
     void ma_cmp_set_float32(Register dst, FloatRegister lhs, FloatRegister rhs, DoubleCondition c);
 
-    BufferOffset ma_BoundsCheck(Register bounded) {
-        BufferOffset bo = m_buffer.nextOffset();
-        ma_liPatchable(bounded, Imm32(0));
-        return bo;
-    }
-
     void moveToDoubleLo(Register src, FloatRegister dest) {
         as_mtc1(src, dest);
     }
     void moveFromDoubleLo(FloatRegister src, Register dest) {
         as_mfc1(dest, src);
     }
 
     void moveToFloat32(Register src, FloatRegister dest) {
@@ -212,16 +208,26 @@ class MacroAssemblerMIPSShared : public 
         as_mfc1(dest, src);
     }
 
     // Evaluate srcDest = minmax<isMax>{Float32,Double}(srcDest, other).
     // Handle NaN specially if handleNaN is true.
     void minMaxDouble(FloatRegister srcDest, FloatRegister other, bool handleNaN, bool isMax);
     void minMaxFloat32(FloatRegister srcDest, FloatRegister other, bool handleNaN, bool isMax);
 
+    void loadDouble(const Address& addr, FloatRegister dest);
+    void loadDouble(const BaseIndex& src, FloatRegister dest);
+
+    // Load a float value into a register, then expand it to a double.
+    void loadFloatAsDouble(const Address& addr, FloatRegister dest);
+    void loadFloatAsDouble(const BaseIndex& src, FloatRegister dest);
+
+    void loadFloat32(const Address& addr, FloatRegister dest);
+    void loadFloat32(const BaseIndex& src, FloatRegister dest);
+
    void outOfLineWasmTruncateToInt32Check(FloatRegister input, Register output, MIRType fromType,
                                            TruncFlags flags, Label* rejoin,
                                            wasm::BytecodeOffset trapOffset);
     void outOfLineWasmTruncateToInt64Check(FloatRegister input, Register64 output, MIRType fromType,
                                            TruncFlags flags, Label* rejoin,
                                            wasm::BytecodeOffset trapOffset);
 
   protected:
--- a/js/src/jit/mips32/Architecture-mips32.h
+++ b/js/src/jit/mips32/Architecture-mips32.h
@@ -126,16 +126,17 @@ class FloatRegister : public FloatRegist
 
     bool operator==(const FloatRegister& other) const {
         MOZ_ASSERT(!isInvalid());
         MOZ_ASSERT(!other.isInvalid());
         return kind_ == other.kind_ && code_ == other.code_;
     }
     bool equiv(const FloatRegister& other) const { return other.kind_ == kind_; }
     size_t size() const { return (kind_ == Double) ? 8 : 4; }
+    size_t pushSize() const { return size(); }
     bool isInvalid() const {
         return code_ == FloatRegisters::invalid_freg;
     }
 
     bool isNotOdd() const { return !isInvalid() && ((code_ & 1) == 0); }
 
     bool isSingle() const { return kind_ == Single; }
     bool isDouble() const { return kind_ == Double; }
--- a/js/src/jit/mips32/MacroAssembler-mips32-inl.h
+++ b/js/src/jit/mips32/MacroAssembler-mips32-inl.h
@@ -1012,42 +1012,16 @@ MacroAssembler::branchTruncateFloat32May
     as_truncws(ScratchFloat32Reg, src);
     as_cfc1(ScratchRegister, Assembler::FCSR);
     moveFromFloat32(ScratchFloat32Reg, dest);
     ma_ext(ScratchRegister, ScratchRegister, Assembler::CauseV, 1);
     ma_b(ScratchRegister, Imm32(0), fail, Assembler::NotEqual);
 }
 
 // ========================================================================
-// Memory access primitives.
-void
-MacroAssembler::storeUncanonicalizedDouble(FloatRegister src, const Address& addr)
-{
-    ma_sd(src, addr);
-}
-void
-MacroAssembler::storeUncanonicalizedDouble(FloatRegister src, const BaseIndex& addr)
-{
-    MOZ_ASSERT(addr.offset == 0);
-    ma_sd(src, addr);
-}
-
-void
-MacroAssembler::storeUncanonicalizedFloat32(FloatRegister src, const Address& addr)
-{
-    ma_ss(src, addr);
-}
-void
-MacroAssembler::storeUncanonicalizedFloat32(FloatRegister src, const BaseIndex& addr)
-{
-    MOZ_ASSERT(addr.offset == 0);
-    ma_ss(src, addr);
-}
-
-// ========================================================================
 // wasm support
 
 template <class L>
 void
 MacroAssembler::wasmBoundsCheck(Condition cond, Register index, Register boundsCheckLimit, L label)
 {
      ma_b(index, boundsCheckLimit, label, cond);
 }
--- a/js/src/jit/mips32/MacroAssembler-mips32.cpp
+++ b/js/src/jit/mips32/MacroAssembler-mips32.cpp
@@ -865,95 +865,120 @@ MacroAssemblerMIPS::ma_mv(ValueOperand s
     moveToDoubleLo(src.payloadReg(), dest);
     moveToDoubleHi(src.typeReg(), dest);
 }
 
 void
 MacroAssemblerMIPS::ma_ls(FloatRegister ft, Address address)
 {
     if (Imm16::IsInSignedRange(address.offset)) {
-        as_ls(ft, address.base, address.offset);
+        as_lwc1(ft, address.base, address.offset);
     } else {
         MOZ_ASSERT(address.base != ScratchRegister);
         ma_li(ScratchRegister, Imm32(address.offset));
         if (isLoongson()) {
             as_gslsx(ft, address.base, ScratchRegister, 0);
         } else {
             as_addu(ScratchRegister, address.base, ScratchRegister);
-            as_ls(ft, ScratchRegister, 0);
+            as_lwc1(ft, ScratchRegister, 0);
         }
     }
 }
 
 void
 MacroAssemblerMIPS::ma_ld(FloatRegister ft, Address address)
 {
-    // Use single precision load instructions so we don't have to worry about
-    // alignment.
-
-    int32_t off = address.offset + PAYLOAD_OFFSET;
-    int32_t off2 = address.offset + TAG_OFFSET;
-    if (Imm16::IsInSignedRange(off) && Imm16::IsInSignedRange(off2)) {
-        as_ls(ft, address.base, off);
-        as_ls(getOddPair(ft), address.base, off2);
+    if (Imm16::IsInSignedRange(address.offset)) {
+        as_ldc1(ft, address.base, address.offset);
     } else {
         MOZ_ASSERT(address.base != ScratchRegister);
-        ma_li(ScratchRegister, Imm32(off));
-        as_addu(ScratchRegister, address.base, ScratchRegister);
-        as_ls(ft, ScratchRegister, PAYLOAD_OFFSET);
-        as_ls(getOddPair(ft), ScratchRegister, TAG_OFFSET);
+        ma_li(ScratchRegister, Imm32(address.offset));
+        if (isLoongson()) {
+            as_gsldx(ft, address.base, ScratchRegister, 0);
+        } else {
+            as_addu(ScratchRegister, address.base, ScratchRegister);
+            as_ldc1(ft, ScratchRegister, 0);
+        }
     }
 }
 
 void
 MacroAssemblerMIPS::ma_sd(FloatRegister ft, Address address)
 {
-    int32_t off = address.offset + PAYLOAD_OFFSET;
-    int32_t off2 = address.offset + TAG_OFFSET;
-    if (Imm16::IsInSignedRange(off) && Imm16::IsInSignedRange(off2)) {
-        as_ss(ft, address.base, off);
-        as_ss(getOddPair(ft), address.base, off2);
+    if (Imm16::IsInSignedRange(address.offset)) {
+        as_sdc1(ft, address.base, address.offset);
     } else {
         MOZ_ASSERT(address.base != ScratchRegister);
-        ma_li(ScratchRegister, Imm32(off));
-        as_addu(ScratchRegister, address.base, ScratchRegister);
-        as_ss(ft, ScratchRegister, PAYLOAD_OFFSET);
-        as_ss(getOddPair(ft), ScratchRegister, TAG_OFFSET);
+        ma_li(ScratchRegister, Imm32(address.offset));
+        if (isLoongson()) {
+            as_gssdx(ft, address.base, ScratchRegister, 0);
+        } else {
+            as_addu(ScratchRegister, address.base, ScratchRegister);
+            as_sdc1(ft, ScratchRegister, 0);
+        }
     }
 }
 
 void
 MacroAssemblerMIPS::ma_ss(FloatRegister ft, Address address)
 {
     if (Imm16::IsInSignedRange(address.offset)) {
-        as_ss(ft, address.base, address.offset);
+        as_swc1(ft, address.base, address.offset);
     } else {
         MOZ_ASSERT(address.base != ScratchRegister);
         ma_li(ScratchRegister, Imm32(address.offset));
         if (isLoongson()) {
             as_gsssx(ft, address.base, ScratchRegister, 0);
         } else {
             as_addu(ScratchRegister, address.base, ScratchRegister);
-            as_ss(ft, ScratchRegister, 0);
+            as_swc1(ft, ScratchRegister, 0);
         }
     }
 }
 
 void
-MacroAssemblerMIPS::ma_pop(FloatRegister fs)
+MacroAssemblerMIPS::ma_ldc1WordAligned(FloatRegister ft, Register base, int32_t off)
 {
-    ma_ld(fs.doubleOverlay(), Address(StackPointer, 0));
-    as_addiu(StackPointer, StackPointer, sizeof(double));
+    MOZ_ASSERT(Imm16::IsInSignedRange(off + PAYLOAD_OFFSET) &&
+               Imm16::IsInSignedRange(off + TAG_OFFSET));
+
+    as_lwc1(ft, base, off + PAYLOAD_OFFSET);
+    as_lwc1(getOddPair(ft), base, off + TAG_OFFSET);
+}
+
+void
+MacroAssemblerMIPS::ma_sdc1WordAligned(FloatRegister ft, Register base, int32_t off)
+{
+    MOZ_ASSERT(Imm16::IsInSignedRange(off + PAYLOAD_OFFSET) &&
+               Imm16::IsInSignedRange(off + TAG_OFFSET));
+
+    as_swc1(ft, base, off + PAYLOAD_OFFSET);
+    as_swc1(getOddPair(ft), base, off + TAG_OFFSET);
 }
 
 void
-MacroAssemblerMIPS::ma_push(FloatRegister fs)
+MacroAssemblerMIPS::ma_pop(FloatRegister f)
 {
-    as_addiu(StackPointer, StackPointer, -sizeof(double));
-    ma_sd(fs.doubleOverlay(), Address(StackPointer, 0));
+    if (f.isDouble())
+        ma_ldc1WordAligned(f, StackPointer, 0);
+    else
+        as_lwc1(f, StackPointer, 0);
+
+    as_addiu(StackPointer, StackPointer, f.size());
+}
+
+void
+MacroAssemblerMIPS::ma_push(FloatRegister f)
+{
+    as_addiu(StackPointer, StackPointer, -f.size());
+
+    if(f.isDouble())
+        ma_sdc1WordAligned(f, StackPointer, 0);
+    else
+        as_swc1(f, StackPointer, 0);
 }
 
 bool
 MacroAssemblerMIPSCompat::buildOOLFakeExitFrame(void* fakeReturnAddr)
 {
     uint32_t descriptor = MakeFrameDescriptor(asMasm().framePushed(), JitFrame_IonJS,
                                               ExitFrameLayout::Size());
 
@@ -1106,29 +1131,16 @@ MacroAssemblerMIPSCompat::loadPtr(wasm::
 
 void
 MacroAssemblerMIPSCompat::loadPrivate(const Address& address, Register dest)
 {
     ma_lw(dest, Address(address.base, address.offset + PAYLOAD_OFFSET));
 }
 
 void
-MacroAssemblerMIPSCompat::loadDouble(const Address& address, FloatRegister dest)
-{
-    ma_ld(dest, address);
-}
-
-void
-MacroAssemblerMIPSCompat::loadDouble(const BaseIndex& src, FloatRegister dest)
-{
-    computeScaledAddress(src, SecondScratchReg);
-    ma_ld(dest, Address(SecondScratchReg, src.offset));
-}
-
-void
 MacroAssemblerMIPSCompat::loadUnalignedDouble(const wasm::MemoryAccessDesc& access,
                                               const BaseIndex& src, Register temp, FloatRegister dest)
 {
     MOZ_ASSERT(MOZ_LITTLE_ENDIAN, "Wasm-only; wasm is disabled on big-endian.");
     computeScaledAddress(src, SecondScratchReg);
 
     uint32_t framePushed = asMasm().framePushed();
     BufferOffset load;
@@ -1151,43 +1163,16 @@ MacroAssemblerMIPSCompat::loadUnalignedD
         load = as_lwl(temp, ScratchRegister, INT64HIGH_OFFSET + 3);
         as_lwr(temp, ScratchRegister, INT64HIGH_OFFSET);
         append(access, load.getOffset(), framePushed);
         moveToDoubleHi(temp, dest);
     }
 }
 
 void
-MacroAssemblerMIPSCompat::loadFloatAsDouble(const Address& address, FloatRegister dest)
-{
-    ma_ls(dest, address);
-    as_cvtds(dest, dest);
-}
-
-void
-MacroAssemblerMIPSCompat::loadFloatAsDouble(const BaseIndex& src, FloatRegister dest)
-{
-    loadFloat32(src, dest);
-    as_cvtds(dest, dest);
-}
-
-void
-MacroAssemblerMIPSCompat::loadFloat32(const Address& address, FloatRegister dest)
-{
-    ma_ls(dest, address);
-}
-
-void
-MacroAssemblerMIPSCompat::loadFloat32(const BaseIndex& src, FloatRegister dest)
-{
-    computeScaledAddress(src, SecondScratchReg);
-    ma_ls(dest, Address(SecondScratchReg, src.offset));
-}
-
-void
 MacroAssemblerMIPSCompat::loadUnalignedFloat32(const wasm::MemoryAccessDesc& access,
                                                const BaseIndex& src, Register temp, FloatRegister dest)
 {
     MOZ_ASSERT(MOZ_LITTLE_ENDIAN, "Wasm-only; wasm is disabled on big-endian.");
     computeScaledAddress(src, SecondScratchReg);
     BufferOffset load;
     if (Imm16::IsInSignedRange(src.offset) && Imm16::IsInSignedRange(src.offset + 3)) {
         load = as_lwl(temp, SecondScratchReg, src.offset + 3);
@@ -2185,17 +2170,17 @@ MacroAssembler::PushRegsInMask(LiveRegis
         // start writing from the first aligned location.
         // We reserve a whole extra double so that the buffer has even size.
         ma_and(SecondScratchReg, sp, Imm32(~(ABIStackAlignment - 1)));
         reserveStack(diffF);
 
         diffF -= sizeof(double);
 
         for (FloatRegisterForwardIterator iter(set.fpus().reduceSetForPush()); iter.more(); ++iter) {
-            as_sd(*iter, SecondScratchReg, -diffF);
+            as_sdc1(*iter, SecondScratchReg, -diffF);
             diffF -= sizeof(double);
         }
 
         MOZ_ASSERT(diffF == 0);
     }
 }
 
 void
@@ -2211,17 +2196,17 @@ MacroAssembler::PopRegsInMaskIgnore(Live
         ma_addu(SecondScratchReg, sp, Imm32(reservedF));
         ma_and(SecondScratchReg, SecondScratchReg, Imm32(~(ABIStackAlignment - 1)));
 
         diffF -= sizeof(double);
 
         LiveFloatRegisterSet fpignore(ignore.fpus().reduceSetForPush());
         for (FloatRegisterForwardIterator iter(set.fpus().reduceSetForPush()); iter.more(); ++iter) {
             if (!ignore.has(*iter))
-                as_ld(*iter, SecondScratchReg, -diffF);
+                as_ldc1(*iter, SecondScratchReg, -diffF);
             diffF -= sizeof(double);
         }
         freeStack(reservedF);
         MOZ_ASSERT(diffF == 0);
     }
 
     for (GeneralRegisterBackwardIterator iter(set.gprs()); iter.more(); ++iter) {
         diffG -= sizeof(intptr_t);
@@ -2251,17 +2236,17 @@ MacroAssembler::storeRegsInMask(LiveRegi
     if (diffF > 0) {
 
         computeEffectiveAddress(dest, scratch);
         ma_and(scratch, scratch, Imm32(~(ABIStackAlignment - 1)));
 
         diffF -= sizeof(double);
 
         for (FloatRegisterForwardIterator iter(set.fpus().reduceSetForPush()); iter.more(); ++iter) {
-            as_sd(*iter, scratch, -diffF);
+            as_sdc1(*iter, scratch, -diffF);
             diffF -= sizeof(double);
         }
         MOZ_ASSERT(diffF == 0);
     }
 }
 // ===============================================================
 // ABI function calls.
 
--- a/js/src/jit/mips32/MacroAssembler-mips32.h
+++ b/js/src/jit/mips32/MacroAssembler-mips32.h
@@ -48,16 +48,18 @@ static const int32_t HIGH_32_OFFSET = 0;
 
 class MacroAssemblerMIPS : public MacroAssemblerMIPSShared
 {
   public:
     using MacroAssemblerMIPSShared::ma_b;
     using MacroAssemblerMIPSShared::ma_li;
     using MacroAssemblerMIPSShared::ma_ss;
     using MacroAssemblerMIPSShared::ma_sd;
+    using MacroAssemblerMIPSShared::ma_ls;
+    using MacroAssemblerMIPSShared::ma_ld;
     using MacroAssemblerMIPSShared::ma_load;
     using MacroAssemblerMIPSShared::ma_store;
     using MacroAssemblerMIPSShared::ma_cmp_set;
     using MacroAssemblerMIPSShared::ma_subTestOverflow;
     using MacroAssemblerMIPSShared::ma_liPatchable;
 
     void ma_li(Register dest, CodeOffset* label);
 
@@ -117,23 +119,26 @@ class MacroAssemblerMIPS : public MacroA
     void ma_bal(Label* l, DelaySlotFill delaySlotFill = FillDelaySlot);
 
     // fp instructions
     void ma_lid(FloatRegister dest, double value);
 
     void ma_mv(FloatRegister src, ValueOperand dest);
     void ma_mv(ValueOperand src, FloatRegister dest);
 
-    void ma_ls(FloatRegister fd, Address address);
-    void ma_ld(FloatRegister fd, Address address);
-    void ma_sd(FloatRegister fd, Address address);
-    void ma_ss(FloatRegister fd, Address address);
+    void ma_ls(FloatRegister ft, Address address);
+    void ma_ld(FloatRegister ft, Address address);
+    void ma_sd(FloatRegister ft, Address address);
+    void ma_ss(FloatRegister ft, Address address);
 
-    void ma_pop(FloatRegister fs);
-    void ma_push(FloatRegister fs);
+    void ma_ldc1WordAligned(FloatRegister ft, Register base, int32_t off);
+    void ma_sdc1WordAligned(FloatRegister ft, Register base, int32_t off);
+
+    void ma_pop(FloatRegister f);
+    void ma_push(FloatRegister f);
 
     void ma_cmp_set(Register dst, Register lhs, ImmPtr imm, Condition c) {
         ma_cmp_set(dst, lhs, Imm32(uint32_t(imm.value)), c);
     }
     void ma_cmp_set(Register rd, Register rs, Address addr, Condition c);
     void ma_cmp_set(Register dst, Address lhs, Register rhs, Condition c);
     void ma_cmp_set(Register dst, Address lhs, ImmPtr imm, Condition c) {
         ma_lw(ScratchRegister, lhs);
@@ -631,27 +636,19 @@ class MacroAssemblerMIPSCompat : public 
 
     void loadAlignedSimd128Float(const Address& addr, FloatRegister dest) { MOZ_CRASH("NYI"); }
     void storeAlignedSimd128Float(FloatRegister src, Address addr) { MOZ_CRASH("NYI"); }
     void loadUnalignedSimd128Float(const Address& addr, FloatRegister dest) { MOZ_CRASH("NYI"); }
     void loadUnalignedSimd128Float(const BaseIndex& addr, FloatRegister dest) { MOZ_CRASH("NYI"); }
     void storeUnalignedSimd128Float(FloatRegister src, Address addr) { MOZ_CRASH("NYI"); }
     void storeUnalignedSimd128Float(FloatRegister src, BaseIndex addr) { MOZ_CRASH("NYI"); }
 
-    void loadDouble(const Address& addr, FloatRegister dest);
-    void loadDouble(const BaseIndex& src, FloatRegister dest);
     void loadUnalignedDouble(const wasm::MemoryAccessDesc& access, const BaseIndex& src,
                              Register temp, FloatRegister dest);
 
-    // Load a float value into a register, then expand it to a double.
-    void loadFloatAsDouble(const Address& addr, FloatRegister dest);
-    void loadFloatAsDouble(const BaseIndex& src, FloatRegister dest);
-
-    void loadFloat32(const Address& addr, FloatRegister dest);
-    void loadFloat32(const BaseIndex& src, FloatRegister dest);
     void loadUnalignedFloat32(const wasm::MemoryAccessDesc& access, const BaseIndex& src,
                               Register temp, FloatRegister dest);
 
     void store8(Register src, const Address& address);
     void store8(Imm32 imm, const Address& address);
     void store8(Register src, const BaseIndex& address);
     void store8(Imm32 imm, const BaseIndex& address);
 
--- a/js/src/jit/mips32/Trampoline-mips32.cpp
+++ b/js/src/jit/mips32/Trampoline-mips32.cpp
@@ -65,64 +65,64 @@ struct EnterJITArgs
 };
 
 static void
 GenerateReturn(MacroAssembler& masm, int returnCode)
 {
     MOZ_ASSERT(masm.framePushed() == sizeof(EnterJITRegs));
 
     // Restore non-volatile registers
-    masm.loadPtr(Address(StackPointer, offsetof(EnterJITRegs, s0)), s0);
-    masm.loadPtr(Address(StackPointer, offsetof(EnterJITRegs, s1)), s1);
-    masm.loadPtr(Address(StackPointer, offsetof(EnterJITRegs, s2)), s2);
-    masm.loadPtr(Address(StackPointer, offsetof(EnterJITRegs, s3)), s3);
-    masm.loadPtr(Address(StackPointer, offsetof(EnterJITRegs, s4)), s4);
-    masm.loadPtr(Address(StackPointer, offsetof(EnterJITRegs, s5)), s5);
-    masm.loadPtr(Address(StackPointer, offsetof(EnterJITRegs, s6)), s6);
-    masm.loadPtr(Address(StackPointer, offsetof(EnterJITRegs, s7)), s7);
-    masm.loadPtr(Address(StackPointer, offsetof(EnterJITRegs, fp)), fp);
-    masm.loadPtr(Address(StackPointer, offsetof(EnterJITRegs, ra)), ra);
+    masm.as_lw(s0, StackPointer, offsetof(EnterJITRegs, s0));
+    masm.as_lw(s1, StackPointer, offsetof(EnterJITRegs, s1));
+    masm.as_lw(s2, StackPointer, offsetof(EnterJITRegs, s2));
+    masm.as_lw(s3, StackPointer, offsetof(EnterJITRegs, s3));
+    masm.as_lw(s4, StackPointer, offsetof(EnterJITRegs, s4));
+    masm.as_lw(s5, StackPointer, offsetof(EnterJITRegs, s5));
+    masm.as_lw(s6, StackPointer, offsetof(EnterJITRegs, s6));
+    masm.as_lw(s7, StackPointer, offsetof(EnterJITRegs, s7));
+    masm.as_lw(fp, StackPointer, offsetof(EnterJITRegs, fp));
+    masm.as_lw(ra, StackPointer, offsetof(EnterJITRegs, ra));
 
     // Restore non-volatile floating point registers
-    masm.loadDouble(Address(StackPointer, offsetof(EnterJITRegs, f20)), f20);
-    masm.loadDouble(Address(StackPointer, offsetof(EnterJITRegs, f22)), f22);
-    masm.loadDouble(Address(StackPointer, offsetof(EnterJITRegs, f24)), f24);
-    masm.loadDouble(Address(StackPointer, offsetof(EnterJITRegs, f26)), f26);
-    masm.loadDouble(Address(StackPointer, offsetof(EnterJITRegs, f28)), f28);
-    masm.loadDouble(Address(StackPointer, offsetof(EnterJITRegs, f30)), f30);
+    masm.as_ldc1(f20, StackPointer, offsetof(EnterJITRegs, f20));
+    masm.as_ldc1(f22, StackPointer, offsetof(EnterJITRegs, f22));
+    masm.as_ldc1(f24, StackPointer, offsetof(EnterJITRegs, f24));
+    masm.as_ldc1(f26, StackPointer, offsetof(EnterJITRegs, f26));
+    masm.as_ldc1(f28, StackPointer, offsetof(EnterJITRegs, f28));
+    masm.as_ldc1(f30, StackPointer, offsetof(EnterJITRegs, f30));
 
     masm.freeStack(sizeof(EnterJITRegs));
 
     masm.branch(ra);
 }
 
 static void
 GeneratePrologue(MacroAssembler& masm)
 {
     // Save non-volatile registers. These must be saved by the trampoline,
     // rather than the JIT'd code, because they are scanned by the conservative
     // scanner.
     masm.reserveStack(sizeof(EnterJITRegs));
-    masm.storePtr(s0, Address(StackPointer, offsetof(EnterJITRegs, s0)));
-    masm.storePtr(s1, Address(StackPointer, offsetof(EnterJITRegs, s1)));
-    masm.storePtr(s2, Address(StackPointer, offsetof(EnterJITRegs, s2)));
-    masm.storePtr(s3, Address(StackPointer, offsetof(EnterJITRegs, s3)));
-    masm.storePtr(s4, Address(StackPointer, offsetof(EnterJITRegs, s4)));
-    masm.storePtr(s5, Address(StackPointer, offsetof(EnterJITRegs, s5)));
-    masm.storePtr(s6, Address(StackPointer, offsetof(EnterJITRegs, s6)));
-    masm.storePtr(s7, Address(StackPointer, offsetof(EnterJITRegs, s7)));
-    masm.storePtr(fp, Address(StackPointer, offsetof(EnterJITRegs, fp)));
-    masm.storePtr(ra, Address(StackPointer, offsetof(EnterJITRegs, ra)));
+    masm.as_sw(s0, StackPointer, offsetof(EnterJITRegs, s0));
+    masm.as_sw(s1, StackPointer, offsetof(EnterJITRegs, s1));
+    masm.as_sw(s2, StackPointer, offsetof(EnterJITRegs, s2));
+    masm.as_sw(s3, StackPointer, offsetof(EnterJITRegs, s3));
+    masm.as_sw(s4, StackPointer, offsetof(EnterJITRegs, s4));
+    masm.as_sw(s5, StackPointer, offsetof(EnterJITRegs, s5));
+    masm.as_sw(s6, StackPointer, offsetof(EnterJITRegs, s6));
+    masm.as_sw(s7, StackPointer, offsetof(EnterJITRegs, s7));
+    masm.as_sw(fp, StackPointer, offsetof(EnterJITRegs, fp));
+    masm.as_sw(ra, StackPointer, offsetof(EnterJITRegs, ra));
 
-    masm.as_sd(f20, StackPointer, offsetof(EnterJITRegs, f20));
-    masm.as_sd(f22, StackPointer, offsetof(EnterJITRegs, f22));
-    masm.as_sd(f24, StackPointer, offsetof(EnterJITRegs, f24));
-    masm.as_sd(f26, StackPointer, offsetof(EnterJITRegs, f26));
-    masm.as_sd(f28, StackPointer, offsetof(EnterJITRegs, f28));
-    masm.as_sd(f30, StackPointer, offsetof(EnterJITRegs, f30));
+    masm.as_sdc1(f20, StackPointer, offsetof(EnterJITRegs, f20));
+    masm.as_sdc1(f22, StackPointer, offsetof(EnterJITRegs, f22));
+    masm.as_sdc1(f24, StackPointer, offsetof(EnterJITRegs, f24));
+    masm.as_sdc1(f26, StackPointer, offsetof(EnterJITRegs, f26));
+    masm.as_sdc1(f28, StackPointer, offsetof(EnterJITRegs, f28));
+    masm.as_sdc1(f30, StackPointer, offsetof(EnterJITRegs, f30));
 }
 
 
 /*
  * This method generates a trampoline for a c++ function with the following
  * signature:
  *   void enter(void* code, int argc, Value* argv, InterpreterFrame* fp,
  *              CalleeToken calleeToken, JSObject* scopeChain, Value* vp)
@@ -359,19 +359,20 @@ JitRuntime::generateInvalidator(MacroAss
     for (uint32_t i = 0; i < Registers::Total; i++) {
         Address address = Address(StackPointer, InvalidationBailoutStack::offsetOfRegs() +
                                                 i * sizeof(uintptr_t));
         masm.storePtr(Register::FromCode(i), address);
     }
 
     // Save floating point registers
     // We can use as_sd because stack is alligned.
-    for (uint32_t i = 0; i < FloatRegisters::TotalDouble; i ++)
-        masm.as_sd(FloatRegister::FromIndex(i, FloatRegister::Double), StackPointer,
-                   InvalidationBailoutStack::offsetOfFpRegs() + i * sizeof(double));
+    for (uint32_t i = 0; i < FloatRegisters::TotalDouble; i ++) {
+        masm.as_sdc1(FloatRegister::FromIndex(i, FloatRegister::Double), StackPointer,
+                     InvalidationBailoutStack::offsetOfFpRegs() + i * sizeof(double));
+    }
 
     // Pass pointer to InvalidationBailoutStack structure.
     masm.movePtr(StackPointer, a0);
 
     // Reserve place for return value and BailoutInfo pointer
     masm.subPtr(Imm32(2 * sizeof(uintptr_t)), StackPointer);
     // Pass pointer to return value.
     masm.ma_addu(a1, StackPointer, Imm32(sizeof(uintptr_t)));
@@ -571,20 +572,21 @@ PushBailoutFrame(MacroAssembler& masm, u
 
     // Save general purpose registers.
     for (uint32_t i = 0; i < Registers::Total; i++) {
         uint32_t off = BailoutStack::offsetOfRegs() + i * sizeof(uintptr_t);
         masm.storePtr(Register::FromCode(i), Address(StackPointer, off));
     }
 
     // Save floating point registers
-    // We can use as_sd because stack is alligned.
-    for (uint32_t i = 0; i < FloatRegisters::TotalDouble; i++)
-        masm.as_sd(FloatRegister::FromIndex(i, FloatRegister::Double), StackPointer,
-                   BailoutStack::offsetOfFpRegs() + i * sizeof(double));
+    // We can use as_sdc1 because stack is alligned.
+    for (uint32_t i = 0; i < FloatRegisters::TotalDouble; i++) {
+        masm.as_sdc1(FloatRegister::FromIndex(i, FloatRegister::Double), StackPointer,
+                     BailoutStack::offsetOfFpRegs() + i * sizeof(double));
+    }
 
     // Store the frameSize_ or tableOffset_ stored in ra
     // See: JitRuntime::generateBailoutTable()
     // See: CodeGeneratorMIPS::generateOutOfLineCode()
     masm.storePtr(ra, Address(StackPointer, BailoutStack::offsetOfFrameSize()));
 
     // Put frame class to stack
     masm.storePtr(ImmWord(frameClass), Address(StackPointer, BailoutStack::offsetOfFrameClass()));
@@ -770,18 +772,18 @@ JitRuntime::generateVMWrapper(JSContext*
             break;
           case VMFunction::WordByRef:
             masm.passABIArg(MoveOperand(argsBase, argDisp, MoveOperand::EFFECTIVE_ADDRESS),
                             MoveOp::GENERAL);
             argDisp += sizeof(uint32_t);
             break;
           case VMFunction::DoubleByRef:
             // Copy double sized argument to aligned place.
-            masm.ma_ld(ScratchDoubleReg, Address(argsBase, argDisp));
-            masm.as_sd(ScratchDoubleReg, doubleArgs, doubleArgDisp);
+            masm.ma_ldc1WordAligned(ScratchDoubleReg, argsBase, argDisp);
+            masm.as_sdc1(ScratchDoubleReg, doubleArgs, doubleArgDisp);
             masm.passABIArg(MoveOperand(doubleArgs, doubleArgDisp, MoveOperand::EFFECTIVE_ADDRESS),
                             MoveOp::GENERAL);
             doubleArgDisp += sizeof(double);
             argDisp += sizeof(double);
             break;
         }
     }
 
@@ -836,17 +838,17 @@ JitRuntime::generateVMWrapper(JSContext*
 
       case Type_Bool:
         masm.load8ZeroExtend(Address(StackPointer, 0), ReturnReg);
         masm.freeStack(sizeof(uintptr_t));
         break;
 
       case Type_Double:
         if (cx->runtime()->jitSupportsFloatingPoint) {
-            masm.as_ld(ReturnDoubleReg, StackPointer, 0);
+            masm.as_ldc1(ReturnDoubleReg, StackPointer, 0);
         } else {
             masm.assumeUnreachable("Unable to load into float reg, with no FP support.");
         }
         masm.freeStack(sizeof(double));
         break;
 
       default:
         MOZ_ASSERT(f.outParam == Type_Void);
--- a/js/src/jit/mips64/Architecture-mips64.h
+++ b/js/src/jit/mips64/Architecture-mips64.h
@@ -117,16 +117,18 @@ class FloatRegister : public FloatRegist
 
     bool operator==(const FloatRegister& other) const {
         MOZ_ASSERT(!isInvalid());
         MOZ_ASSERT(!other.isInvalid());
         return kind_ == other.kind_ && reg_ == other.reg_;
     }
     bool equiv(const FloatRegister& other) const { return other.kind_ == kind_; }
     size_t size() const { return (kind_ == Codes::Double) ? sizeof(double) : sizeof (float); }
+    // Always push doubles to maintain 8-byte stack alignment.
+    size_t pushSize() const { return sizeof(double); }
     bool isInvalid() const {
         return reg_ == FloatRegisters::invalid_freg;
     }
 
     bool isSingle() const { return kind_ == Codes::Single; }
     bool isDouble() const { return kind_ == Codes::Double; }
 
     FloatRegister singleOverlay() const;
--- a/js/src/jit/mips64/MacroAssembler-mips64-inl.h
+++ b/js/src/jit/mips64/MacroAssembler-mips64-inl.h
@@ -764,57 +764,31 @@ MacroAssembler::branchTruncateFloat32May
     moveFromDouble(ScratchDoubleReg, dest);
     ma_ext(ScratchRegister, ScratchRegister, Assembler::CauseV, 1);
     ma_b(ScratchRegister, Imm32(0), fail, Assembler::NotEqual);
 
     as_sll(dest, dest, 0);
 }
 
 // ========================================================================
-// Memory access primitives.
-void
-MacroAssembler::storeUncanonicalizedDouble(FloatRegister src, const Address& addr)
-{
-    ma_sd(src, addr);
-}
-void
-MacroAssembler::storeUncanonicalizedDouble(FloatRegister src, const BaseIndex& addr)
-{
-    MOZ_ASSERT(addr.offset == 0);
-    ma_sd(src, addr);
-}
-
-void
-MacroAssembler::storeUncanonicalizedFloat32(FloatRegister src, const Address& addr)
-{
-    ma_ss(src, addr);
-}
-void
-MacroAssembler::storeUncanonicalizedFloat32(FloatRegister src, const BaseIndex& addr)
-{
-    MOZ_ASSERT(addr.offset == 0);
-    ma_ss(src, addr);
-}
-
-// ========================================================================
 // wasm support
 
 template <class L>
 void
 MacroAssembler::wasmBoundsCheck(Condition cond, Register index, Register boundsCheckLimit, L label)
 {
     ma_b(index, boundsCheckLimit, label, cond);
 }
 
 template <class L>
 void
 MacroAssembler::wasmBoundsCheck(Condition cond, Register index, Address boundsCheckLimit, L label)
 {
     SecondScratchRegisterScope scratch2(*this);
-    load32(boundsCheckLimit,SecondScratchReg);
+    load32(boundsCheckLimit, SecondScratchReg);
     ma_b(index, SecondScratchReg, label, cond);
 }
 
 //}}} check_macroassembler_style
 // ===============================================================
 
 // The specializations for cmpPtrSet are outside the braces because check_macroassembler_style can't yet
 // deal with specializations.
--- a/js/src/jit/mips64/MacroAssembler-mips64.cpp
+++ b/js/src/jit/mips64/MacroAssembler-mips64.cpp
@@ -845,92 +845,92 @@ MacroAssemblerMIPS64::ma_mv(ValueOperand
 {
     as_dmtc1(src.valueReg(), dest);
 }
 
 void
 MacroAssemblerMIPS64::ma_ls(FloatRegister ft, Address address)
 {
     if (Imm16::IsInSignedRange(address.offset)) {
-        as_ls(ft, address.base, address.offset);
+        as_lwc1(ft, address.base, address.offset);
     } else {
         MOZ_ASSERT(address.base != ScratchRegister);
         ma_li(ScratchRegister, Imm32(address.offset));
         if (isLoongson()) {
             as_gslsx(ft, address.base, ScratchRegister, 0);
         } else {
             as_daddu(ScratchRegister, address.base, ScratchRegister);
-            as_ls(ft, ScratchRegister, 0);
+            as_lwc1(ft, ScratchRegister, 0);
         }
     }
 }
 
 void
 MacroAssemblerMIPS64::ma_ld(FloatRegister ft, Address address)
 {
     if (Imm16::IsInSignedRange(address.offset)) {
-        as_ld(ft, address.base, address.offset);
+        as_ldc1(ft, address.base, address.offset);
     } else {
         MOZ_ASSERT(address.base != ScratchRegister);
         ma_li(ScratchRegister, Imm32(address.offset));
         if (isLoongson()) {
             as_gsldx(ft, address.base, ScratchRegister, 0);
         } else {
             as_daddu(ScratchRegister, address.base, ScratchRegister);
-            as_ld(ft, ScratchRegister, 0);
+            as_ldc1(ft, ScratchRegister, 0);
         }
     }
 }
 
 void
 MacroAssemblerMIPS64::ma_sd(FloatRegister ft, Address address)
 {
     if (Imm16::IsInSignedRange(address.offset)) {
-        as_sd(ft, address.base, address.offset);
+        as_sdc1(ft, address.base, address.offset);
     } else {
         MOZ_ASSERT(address.base != ScratchRegister);
         ma_li(ScratchRegister, Imm32(address.offset));
         if (isLoongson()) {
             as_gssdx(ft, address.base, ScratchRegister, 0);
         } else {
             as_daddu(ScratchRegister, address.base, ScratchRegister);
-            as_sd(ft, ScratchRegister, 0);
+            as_sdc1(ft, ScratchRegister, 0);
         }
     }
 }
 
 void
 MacroAssemblerMIPS64::ma_ss(FloatRegister ft, Address address)
 {
     if (Imm16::IsInSignedRange(address.offset)) {
-        as_ss(ft, address.base, address.offset);
+        as_swc1(ft, address.base, address.offset);
     } else {
         MOZ_ASSERT(address.base != ScratchRegister);
         ma_li(ScratchRegister, Imm32(address.offset));
         if (isLoongson()) {
             as_gsssx(ft, address.base, ScratchRegister, 0);
         } else {
             as_daddu(ScratchRegister, address.base, ScratchRegister);
-            as_ss(ft, ScratchRegister, 0);
+            as_swc1(ft, ScratchRegister, 0);
         }
     }
 }
 
 void
-MacroAssemblerMIPS64::ma_pop(FloatRegister fs)
+MacroAssemblerMIPS64::ma_pop(FloatRegister f)
 {
-    ma_ld(fs, Address(StackPointer, 0));
+    as_ldc1(f, StackPointer, 0);
     as_daddiu(StackPointer, StackPointer, sizeof(double));
 }
 
 void
-MacroAssemblerMIPS64::ma_push(FloatRegister fs)
+MacroAssemblerMIPS64::ma_push(FloatRegister f)
 {
     as_daddiu(StackPointer, StackPointer, (int32_t)-sizeof(double));
-    ma_sd(fs, Address(StackPointer, 0));
+    as_sdc1(f, StackPointer, 0);
 }
 
 bool
 MacroAssemblerMIPS64Compat::buildOOLFakeExitFrame(void* fakeReturnAddr)
 {
     uint32_t descriptor = MakeFrameDescriptor(asMasm().framePushed(), JitFrame_IonJS,
                                               ExitFrameLayout::Size());
 
@@ -1084,29 +1084,16 @@ MacroAssemblerMIPS64Compat::loadPtr(wasm
 void
 MacroAssemblerMIPS64Compat::loadPrivate(const Address& address, Register dest)
 {
     loadPtr(address, dest);
     ma_dsll(dest, dest, Imm32(1));
 }
 
 void
-MacroAssemblerMIPS64Compat::loadDouble(const Address& address, FloatRegister dest)
-{
-    ma_ld(dest, address);
-}
-
-void
-MacroAssemblerMIPS64Compat::loadDouble(const BaseIndex& src, FloatRegister dest)
-{
-    computeScaledAddress(src, SecondScratchReg);
-    ma_ld(dest, Address(SecondScratchReg, src.offset));
-}
-
-void
 MacroAssemblerMIPS64Compat::loadUnalignedDouble(const wasm::MemoryAccessDesc& access,
                                                 const BaseIndex& src, Register temp, FloatRegister dest)
 {
     computeScaledAddress(src, SecondScratchReg);
     BufferOffset load;
     if (Imm16::IsInSignedRange(src.offset) && Imm16::IsInSignedRange(src.offset + 7)) {
         load = as_ldl(temp, SecondScratchReg, src.offset + 7);
         as_ldr(temp, SecondScratchReg, src.offset);
@@ -1116,43 +1103,16 @@ MacroAssemblerMIPS64Compat::loadUnaligne
         load = as_ldl(temp, ScratchRegister, 7);
         as_ldr(temp, ScratchRegister, 0);
     }
     append(access, load.getOffset(), asMasm().framePushed());
     moveToDouble(temp, dest);
 }
 
 void
-MacroAssemblerMIPS64Compat::loadFloatAsDouble(const Address& address, FloatRegister dest)
-{
-    ma_ls(dest, address);
-    as_cvtds(dest, dest);
-}
-
-void
-MacroAssemblerMIPS64Compat::loadFloatAsDouble(const BaseIndex& src, FloatRegister dest)
-{
-    loadFloat32(src, dest);
-    as_cvtds(dest, dest);
-}
-
-void
-MacroAssemblerMIPS64Compat::loadFloat32(const Address& address, FloatRegister dest)
-{
-    ma_ls(dest, address);
-}
-
-void
-MacroAssemblerMIPS64Compat::loadFloat32(const BaseIndex& src, FloatRegister dest)
-{
-    computeScaledAddress(src, SecondScratchReg);
-    ma_ls(dest, Address(SecondScratchReg, src.offset));
-}
-
-void
 MacroAssemblerMIPS64Compat::loadUnalignedFloat32(const wasm::MemoryAccessDesc& access,
                                                  const BaseIndex& src, Register temp, FloatRegister dest)
 {
     computeScaledAddress(src, SecondScratchReg);
     BufferOffset load;
     if (Imm16::IsInSignedRange(src.offset) && Imm16::IsInSignedRange(src.offset + 3)) {
         load = as_lwl(temp, SecondScratchReg, src.offset + 3);
         as_lwr(temp, SecondScratchReg, src.offset);
--- a/js/src/jit/mips64/MacroAssembler-mips64.h
+++ b/js/src/jit/mips64/MacroAssembler-mips64.h
@@ -46,16 +46,18 @@ static_assert(1 << defaultShift == sizeo
 
 class MacroAssemblerMIPS64 : public MacroAssemblerMIPSShared
 {
   public:
     using MacroAssemblerMIPSShared::ma_b;
     using MacroAssemblerMIPSShared::ma_li;
     using MacroAssemblerMIPSShared::ma_ss;
     using MacroAssemblerMIPSShared::ma_sd;
+    using MacroAssemblerMIPSShared::ma_ls;
+    using MacroAssemblerMIPSShared::ma_ld;
     using MacroAssemblerMIPSShared::ma_load;
     using MacroAssemblerMIPSShared::ma_store;
     using MacroAssemblerMIPSShared::ma_cmp_set;
     using MacroAssemblerMIPSShared::ma_subTestOverflow;
 
     void ma_li(Register dest, CodeOffset* label);
     void ma_li(Register dest, ImmWord imm);
     void ma_liPatchable(Register dest, ImmPtr imm);
@@ -128,23 +130,23 @@ class MacroAssemblerMIPS64 : public Macr
     void ma_bal(Label* l, DelaySlotFill delaySlotFill = FillDelaySlot);
 
     // fp instructions
     void ma_lid(FloatRegister dest, double value);
 
     void ma_mv(FloatRegister src, ValueOperand dest);
     void ma_mv(ValueOperand src, FloatRegister dest);
 
-    void ma_ls(FloatRegister fd, Address address);
-    void ma_ld(FloatRegister fd, Address address);
-    void ma_sd(FloatRegister fd, Address address);
-    void ma_ss(FloatRegister fd, Address address);
+    void ma_ls(FloatRegister ft, Address address);
+    void ma_ld(FloatRegister ft, Address address);
+    void ma_sd(FloatRegister ft, Address address);
+    void ma_ss(FloatRegister ft, Address address);
 
-    void ma_pop(FloatRegister fs);
-    void ma_push(FloatRegister fs);
+    void ma_pop(FloatRegister f);
+    void ma_push(FloatRegister f);
 
     void ma_cmp_set(Register dst, Register lhs, ImmWord imm, Condition c);
     void ma_cmp_set(Register dst, Register lhs, ImmPtr imm, Condition c);
 
     // These functions abstract the access to high part of the double precision
     // float register. They are intended to work on both 32 bit and 64 bit
     // floating point coprocessor.
     void moveToDoubleHi(Register src, FloatRegister dest) {
@@ -655,27 +657,18 @@ class MacroAssemblerMIPS64Compat : publi
 
     void loadAlignedSimd128Float(const Address& addr, FloatRegister dest) { MOZ_CRASH("NYI"); }
     void storeAlignedSimd128Float(FloatRegister src, Address addr) { MOZ_CRASH("NYI"); }
     void loadUnalignedSimd128Float(const Address& addr, FloatRegister dest) { MOZ_CRASH("NYI"); }
     void loadUnalignedSimd128Float(const BaseIndex& addr, FloatRegister dest) { MOZ_CRASH("NYI"); }
     void storeUnalignedSimd128Float(FloatRegister src, Address addr) { MOZ_CRASH("NYI"); }
     void storeUnalignedSimd128Float(FloatRegister src, BaseIndex addr) { MOZ_CRASH("NYI"); }
 
-    void loadDouble(const Address& addr, FloatRegister dest);
-    void loadDouble(const BaseIndex& src, FloatRegister dest);
     void loadUnalignedDouble(const wasm::MemoryAccessDesc& access, const BaseIndex& src,
                              Register temp, FloatRegister dest);
-
-    // Load a float value into a register, then expand it to a double.
-    void loadFloatAsDouble(const Address& addr, FloatRegister dest);
-    void loadFloatAsDouble(const BaseIndex& src, FloatRegister dest);
-
-    void loadFloat32(const Address& addr, FloatRegister dest);
-    void loadFloat32(const BaseIndex& src, FloatRegister dest);
     void loadUnalignedFloat32(const wasm::MemoryAccessDesc& access, const BaseIndex& src,
                               Register temp, FloatRegister dest);
 
     void store8(Register src, const Address& address);
     void store8(Imm32 imm, const Address& address);
     void store8(Register src, const BaseIndex& address);
     void store8(Imm32 imm, const BaseIndex& address);
 
--- a/js/src/jit/mips64/Trampoline-mips64.cpp
+++ b/js/src/jit/mips64/Trampoline-mips64.cpp
@@ -59,16 +59,18 @@ struct EnterJITRegs
     uint64_t s0;
     // Save reg_vp(a7) on stack, use it after call jit code.
     uint64_t a7;
 };
 
 static void
 GenerateReturn(MacroAssembler& masm, int returnCode)
 {
+    MOZ_ASSERT(masm.framePushed() == sizeof(EnterJITRegs));
+
     if (isLoongson()) {
         // Restore non-volatile registers
         masm.as_ld(s0, StackPointer, offsetof(EnterJITRegs, s0));
         masm.as_gslq(s1, s2, StackPointer, offsetof(EnterJITRegs, s2));
         masm.as_gslq(s3, s4, StackPointer, offsetof(EnterJITRegs, s4));
         masm.as_gslq(s5, s6, StackPointer, offsetof(EnterJITRegs, s6));
         masm.as_gslq(s7, fp, StackPointer, offsetof(EnterJITRegs, fp));
         masm.as_ld(ra, StackPointer, offsetof(EnterJITRegs, ra));
@@ -87,24 +89,24 @@ GenerateReturn(MacroAssembler& masm, int
         masm.as_ld(s4, StackPointer, offsetof(EnterJITRegs, s4));
         masm.as_ld(s5, StackPointer, offsetof(EnterJITRegs, s5));
         masm.as_ld(s6, StackPointer, offsetof(EnterJITRegs, s6));
         masm.as_ld(s7, StackPointer, offsetof(EnterJITRegs, s7));
         masm.as_ld(fp, StackPointer, offsetof(EnterJITRegs, fp));
         masm.as_ld(ra, StackPointer, offsetof(EnterJITRegs, ra));
 
         // Restore non-volatile floating point registers
-        masm.as_ld(f24, StackPointer, offsetof(EnterJITRegs, f24));
-        masm.as_ld(f25, StackPointer, offsetof(EnterJITRegs, f25));
-        masm.as_ld(f26, StackPointer, offsetof(EnterJITRegs, f26));
-        masm.as_ld(f27, StackPointer, offsetof(EnterJITRegs, f27));
-        masm.as_ld(f28, StackPointer, offsetof(EnterJITRegs, f28));
-        masm.as_ld(f29, StackPointer, offsetof(EnterJITRegs, f29));
-        masm.as_ld(f30, StackPointer, offsetof(EnterJITRegs, f30));
-        masm.as_ld(f31, StackPointer, offsetof(EnterJITRegs, f31));
+        masm.as_ldc1(f24, StackPointer, offsetof(EnterJITRegs, f24));
+        masm.as_ldc1(f25, StackPointer, offsetof(EnterJITRegs, f25));
+        masm.as_ldc1(f26, StackPointer, offsetof(EnterJITRegs, f26));
+        masm.as_ldc1(f27, StackPointer, offsetof(EnterJITRegs, f27));
+        masm.as_ldc1(f28, StackPointer, offsetof(EnterJITRegs, f28));
+        masm.as_ldc1(f29, StackPointer, offsetof(EnterJITRegs, f29));
+        masm.as_ldc1(f30, StackPointer, offsetof(EnterJITRegs, f30));
+        masm.as_ldc1(f31, StackPointer, offsetof(EnterJITRegs, f31));
     }
 
     masm.freeStack(sizeof(EnterJITRegs));
 
     masm.branch(ra);
 }
 
 static void
@@ -134,24 +136,24 @@ GeneratePrologue(MacroAssembler& masm)
     masm.as_sd(s4, StackPointer, offsetof(EnterJITRegs, s4));
     masm.as_sd(s5, StackPointer, offsetof(EnterJITRegs, s5));
     masm.as_sd(s6, StackPointer, offsetof(EnterJITRegs, s6));
     masm.as_sd(s7, StackPointer, offsetof(EnterJITRegs, s7));
     masm.as_sd(fp, StackPointer, offsetof(EnterJITRegs, fp));
     masm.as_sd(ra, StackPointer, offsetof(EnterJITRegs, ra));
     masm.as_sd(a7, StackPointer, offsetof(EnterJITRegs, a7));
 
-    masm.as_sd(f24, StackPointer, offsetof(EnterJITRegs, f24));
-    masm.as_sd(f25, StackPointer, offsetof(EnterJITRegs, f25));
-    masm.as_sd(f26, StackPointer, offsetof(EnterJITRegs, f26));
-    masm.as_sd(f27, StackPointer, offsetof(EnterJITRegs, f27));
-    masm.as_sd(f28, StackPointer, offsetof(EnterJITRegs, f28));
-    masm.as_sd(f29, StackPointer, offsetof(EnterJITRegs, f29));
-    masm.as_sd(f30, StackPointer, offsetof(EnterJITRegs, f30));
-    masm.as_sd(f31, StackPointer, offsetof(EnterJITRegs, f31));
+    masm.as_sdc1(f24, StackPointer, offsetof(EnterJITRegs, f24));
+    masm.as_sdc1(f25, StackPointer, offsetof(EnterJITRegs, f25));
+    masm.as_sdc1(f26, StackPointer, offsetof(EnterJITRegs, f26));
+    masm.as_sdc1(f27, StackPointer, offsetof(EnterJITRegs, f27));
+    masm.as_sdc1(f28, StackPointer, offsetof(EnterJITRegs, f28));
+    masm.as_sdc1(f29, StackPointer, offsetof(EnterJITRegs, f29));
+    masm.as_sdc1(f30, StackPointer, offsetof(EnterJITRegs, f30));
+    masm.as_sdc1(f31, StackPointer, offsetof(EnterJITRegs, f31));
 }
 
 
 // Generates a trampoline for calling Jit compiled code from a C++ function.
 // The trampoline use the EnterJitCode signature, with the standard x64 fastcall
 // calling convention.
 void
 JitRuntime::generateEnterJIT(JSContext* cx, MacroAssembler& masm)
@@ -791,17 +793,17 @@ JitRuntime::generateVMWrapper(JSContext*
 
       case Type_Bool:
         masm.load8ZeroExtend(Address(StackPointer, 0), ReturnReg);
         masm.freeStack(2 * sizeof(int32_t));
         break;
 
       case Type_Double:
         if (cx->runtime()->jitSupportsFloatingPoint) {
-            masm.as_ld(ReturnDoubleReg, StackPointer, 0);
+            masm.as_ldc1(ReturnDoubleReg, StackPointer, 0);
         } else {
             masm.assumeUnreachable("Unable to load into float reg, with no FP support.");
         }
         masm.freeStack(sizeof(double));
         break;
 
       default:
         MOZ_ASSERT(f.outParam == Type_Void);
--- a/js/src/wasm/WasmBaselineCompile.cpp
+++ b/js/src/wasm/WasmBaselineCompile.cpp
@@ -1186,17 +1186,17 @@ class BaseStackFrame
     // The size values come from the implementations of Push() in
     // MacroAssembler-x86-shared.cpp and MacroAssembler-arm-shared.cpp, and from
     // VFPRegister::size() in Architecture-arm.h.
     //
     // On ARM unlike on x86 we push a single for float.
 
     static const size_t StackSizeOfPtr    = sizeof(intptr_t);
     static const size_t StackSizeOfInt64  = sizeof(int64_t);
-#ifdef JS_CODEGEN_ARM
+#if defined(JS_CODEGEN_ARM) || defined(JS_CODEGEN_MIPS32)
     static const size_t StackSizeOfFloat  = sizeof(float);
 #else
     static const size_t StackSizeOfFloat  = sizeof(double);
 #endif
     static const size_t StackSizeOfDouble = sizeof(double);
 
     // We won't know until after we've generated code how big the frame will be
     // (we may need arbitrary spill slots and outgoing param slots) so emit a