Bug 900756 - Ionmonkey (ARM): add float32 support r=mjrosenb
authorJon Coppeard <jcoppeard@mozilla.com>
Sun, 15 Sep 2013 00:48:10 +1000
changeset 162189 e38bff7fe9c04c888213b7236b55b85c05f88eba
parent 162188 3456780c25fab45eecc389d6220304fa88809429
child 162190 23de7d509e70cf9f4b2256f23e6d4ac373788257
push id428
push userbbajaj@mozilla.com
push dateTue, 28 Jan 2014 00:16:25 +0000
treeherdermozilla-release@cd72a7ff3a75 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersmjrosenb
bugs900756
milestone26.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 900756 - Ionmonkey (ARM): add float32 support r=mjrosenb
js/src/jit/arm/Assembler-arm.cpp
js/src/jit/arm/Assembler-arm.h
js/src/jit/arm/CodeGenerator-arm.cpp
js/src/jit/arm/CodeGenerator-arm.h
js/src/jit/arm/Lowering-arm.cpp
js/src/jit/arm/Lowering-arm.h
js/src/jit/arm/MacroAssembler-arm.cpp
js/src/jit/arm/MacroAssembler-arm.h
--- a/js/src/jit/arm/Assembler-arm.cpp
+++ b/js/src/jit/arm/Assembler-arm.cpp
@@ -1189,22 +1189,25 @@ jit::ror(Register r, Register amt)
 }
 
 O2RegRegShift
 jit::asr (Register r, Register amt)
 {
     return O2RegRegShift(r, ASR, amt);
 }
 
+static js::jit::DoubleEncoder doubleEncoder;
+
+/* static */ const js::jit::VFPImm js::jit::VFPImm::one(0x3FF00000);
 
 js::jit::VFPImm::VFPImm(uint32_t top)
 {
     data = -1;
     datastore::Imm8VFPImmData tmp;
-    if (DoubleEncoder::lookup(top, &tmp))
+    if (doubleEncoder.lookup(top, &tmp))
         data = tmp.encode();
 }
 
 BOffImm::BOffImm(Instruction &inst)
   : data(inst.encode() & 0x00ffffff)
 {
 }
 
@@ -1212,18 +1215,16 @@ Instruction *
 BOffImm::getDest(Instruction *src)
 {
     // TODO: It is probably worthwhile to verify that src is actually a branch
     // NOTE: This does not explicitly shift the offset of the destination left by 2,
     // since it is indexing into an array of instruction sized objects.
     return &src[(((int32_t)data<<8)>>8) + 2];
 }
 
-js::jit::DoubleEncoder js::jit::DoubleEncoder::_this;
-
 //VFPRegister implementation
 VFPRegister
 VFPRegister::doubleOverlay()
 {
     JS_ASSERT(!_isInvalid);
     if (kind != Double)
         return VFPRegister(_code >> 1, Double);
     return *this;
@@ -1580,76 +1581,83 @@ class PoolHintData {
         // accidentally left somewhere.
         poolBOGUS  = 0,
         poolDTR    = 1,
         poolBranch = 2,
         poolVDTR   = 3
     };
 
   private:
-    uint32_t   index    : 17;
+    uint32_t   index    : 16;
     uint32_t   cond     : 4;
-    LoadType loadType : 2;
+    LoadType   loadType : 2;
     uint32_t   destReg  : 5;
+    uint32_t   destType : 1;
     uint32_t   ONES     : 4;
 
+    static const uint32_t expectedOnes = 0xfu;
+
   public:
     void init(uint32_t index_, Assembler::Condition cond_, LoadType lt, const Register &destReg_) {
         index = index_;
         JS_ASSERT(index == index_);
         cond = cond_ >> 28;
         JS_ASSERT(cond == cond_ >> 28);
         loadType = lt;
-        ONES = 0xfu;
+        ONES = expectedOnes;
         destReg = destReg_.code();
+        destType = 0;
     }
     void init(uint32_t index_, Assembler::Condition cond_, LoadType lt, const VFPRegister &destReg_) {
+        JS_ASSERT(destReg_.isFloat());
         index = index_;
         JS_ASSERT(index == index_);
         cond = cond_ >> 28;
         JS_ASSERT(cond == cond_ >> 28);
         loadType = lt;
-        ONES = 0xfu;
+        ONES = expectedOnes;
         destReg = destReg_.code();
+        destType = destReg_.isDouble();
     }
     Assembler::Condition getCond() {
         return Assembler::Condition(cond << 28);
     }
 
     Register getReg() {
         return Register::FromCode(destReg);
     }
     VFPRegister getVFPReg() {
-        return VFPRegister(FloatRegister::FromCode(destReg));
+        return VFPRegister(FloatRegister::FromCode(destReg),
+                           destType ? VFPRegister::Double : VFPRegister::Single);
     }
 
     int32_t getIndex() {
         return index;
     }
     void setIndex(uint32_t index_) {
-        JS_ASSERT(ONES == 0xf && loadType != poolBOGUS);
+        JS_ASSERT(ONES == expectedOnes && loadType != poolBOGUS);
         index = index_;
         JS_ASSERT(index == index_);
     }
 
     LoadType getLoadType() {
         // If this *was* a poolBranch, but the branch has already been bound
         // then this isn't going to look like a real poolhintdata, but we still
         // want to lie about it so everyone knows it *used* to be a branch.
-        if (ONES != 0xf)
+        if (ONES != expectedOnes)
             return PoolHintData::poolBranch;
         return loadType;
     }
 
     bool isValidPoolHint() {
         // Most instructions cannot have a condition that is 0xf. Notable exceptions are
         // blx and the entire NEON instruction set. For the purposes of pool loads, and
         // possibly patched branches, the possible instructions are ldr and b, neither of
         // which can have a condition code of 0xf.
-        return ONES == 0xf;
+        return ONES == expectedOnes;
     }
 };
 
 union PoolHintPun {
     PoolHintData phd;
     uint32_t raw;
 };
 
@@ -1700,16 +1708,17 @@ Assembler::as_dtm(LoadStore ls, Register
 
 BufferOffset
 Assembler::as_Imm32Pool(Register dest, uint32_t value, ARMBuffer::PoolEntry *pe, Condition c)
 {
     PoolHintPun php;
     php.phd.init(0, c, PoolHintData::poolDTR, dest);
     return m_buffer.insertEntry(4, (uint8_t*)&php.raw, int32Pool, (uint8_t*)&value, pe);
 }
+
 void
 Assembler::as_WritePoolEntry(Instruction *addr, Condition c, uint32_t data)
 {
     JS_ASSERT(addr->is<InstLDR>());
     int32_t offset = addr->encode() & 0xfff;
     if ((addr->encode() & IsUp) != IsUp)
         offset = -offset;
     char * rawAddr = reinterpret_cast<char*>(addr);
@@ -1734,25 +1743,47 @@ Assembler::as_BranchPool(uint32_t value,
         BufferOffset dest(label);
         as_b(dest.diffB<BOffImm>(next), c, next);
     } else {
         label->use(next.getOffset());
     }
     return ret;
 }
 
-
 BufferOffset
 Assembler::as_FImm64Pool(VFPRegister dest, double value, ARMBuffer::PoolEntry *pe, Condition c)
 {
     JS_ASSERT(dest.isDouble());
     PoolHintPun php;
     php.phd.init(0, c, PoolHintData::poolVDTR, dest);
     return m_buffer.insertEntry(4, (uint8_t*)&php.raw, doublePool, (uint8_t*)&value, pe);
 }
+
+struct PaddedFloat32
+{
+    float value;
+    uint32_t padding;
+};
+JS_STATIC_ASSERT(sizeof(PaddedFloat32) == sizeof(double));
+
+BufferOffset
+Assembler::as_FImm32Pool(VFPRegister dest, float value, ARMBuffer::PoolEntry *pe, Condition c)
+{
+    /*
+     * Insert floats into the double pool as they have the same limitations on
+     * immediate offset.  This wastes 4 bytes padding per float.  An alternative
+     * would be to have a separate pool for floats.
+     */
+    JS_ASSERT(dest.isSingle());
+    PoolHintPun php;
+    php.phd.init(0, c, PoolHintData::poolVDTR, dest);
+    PaddedFloat32 pf = { value, 0 };
+    return m_buffer.insertEntry(4, (uint8_t*)&php.raw, doublePool, (uint8_t*)&pf, pe);
+}
+
 // Pool callbacks stuff:
 void
 Assembler::insertTokenIntoTag(uint32_t instSize, uint8_t *load_, int32_t token)
 {
     uint32_t *load = (uint32_t*) load_;
     PoolHintPun php;
     php.raw = *load;
     php.phd.setIndex(token);
@@ -1782,25 +1813,24 @@ Assembler::patchConstantPoolLoad(void* l
         // then we want to make sure this is a load from the pool entry (and the pool entry
         // should be NULL so it will crash).
         if (data.isValidPoolHint()) {
             dummy->as_dtr(IsLoad, 32, Offset, pc,
                           DTRAddr(pc, DtrOffImm(offset+4*data.getIndex() - 8)),
                           data.getCond(), instAddr);
         }
         break;
-      case PoolHintData::poolVDTR:
-        if ((offset + (8 * data.getIndex()) - 8) < -1023 ||
-            (offset + (8 * data.getIndex()) - 8) > 1023)
-        {
+      case PoolHintData::poolVDTR: {
+        VFPRegister dest = data.getVFPReg();
+        int32_t imm = offset + (8 * data.getIndex()) - 8;
+        if (imm < -1023 || imm  > 1023)
             return false;
-        }
-        dummy->as_vdtr(IsLoad, data.getVFPReg(),
-                       VFPAddr(pc, VFPOffImm(offset+8*data.getIndex() - 8)), data.getCond(), instAddr);
+        dummy->as_vdtr(IsLoad, dest, VFPAddr(pc, VFPOffImm(imm)), data.getCond(), instAddr);
         break;
+      }
     }
     return true;
 }
 
 uint32_t
 Assembler::placeConstantPoolBarrier(int offset)
 {
     // BUG: 700526
@@ -1904,17 +1934,16 @@ Assembler::as_bl(BOffImm off, Condition 
 
 BufferOffset
 Assembler::as_bl(Label *l, Condition c)
 {
     if (m_buffer.oom()) {
         BufferOffset ret;
         return ret;
     }
-    //as_bkpt();
     m_buffer.markNextAsBranch();
     if (l->bound()) {
         BufferOffset ret = as_nop();
         as_bl(BufferOffset(l).diffB<BOffImm>(ret), c, ret);
         return ret;
     }
 
     int32_t old;
@@ -1970,17 +1999,18 @@ Assembler::writeVFPInst(vfp_size sz, uin
 
 // Unityped variants: all registers hold the same (ieee754 single/double)
 // notably not included are vcvt; vmov vd, #imm; vmov rt, vn.
 BufferOffset
 Assembler::as_vfp_float(VFPRegister vd, VFPRegister vn, VFPRegister vm,
                   VFPOp op, Condition c)
 {
     // Make sure we believe that all of our operands are the same kind
-    JS_ASSERT(vd.equiv(vn) && vd.equiv(vm));
+    JS_ASSERT_IF(!vn.isMissing(), vd.equiv(vn));
+    JS_ASSERT_IF(!vm.isMissing(), vd.equiv(vm));
     vfp_size sz = vd.isDouble() ? isDouble : isSingle;
     return writeVFPInst(sz, VD(vd) | VN(vn) | VM(vm) | op | vfp_arith | c);
 }
 
 BufferOffset
 Assembler::as_vadd(VFPRegister vd, VFPRegister vn, VFPRegister vm,
                  Condition c)
 {
@@ -2199,22 +2229,18 @@ Assembler::as_vdtm(LoadStore st, Registe
     return writeVFPInst(sz, dtmLoadStore | RN(rn) | VD(vd) |
                         length |
                         dtmMode | dtmUpdate | dtmCond);
 }
 
 BufferOffset
 Assembler::as_vimm(VFPRegister vd, VFPImm imm, Condition c)
 {
+    JS_ASSERT(imm.isValid());
     vfp_size sz = vd.isDouble() ? isDouble : isSingle;
-
-    // Don't know how to handle this right now.
-    if (!vd.isDouble())
-        MOZ_ASSUME_UNREACHABLE("non-double immediate");
-
     return writeVFPInst(sz,  c | imm.encode() | VD(vd) | 0x02B00000);
 
 }
 BufferOffset
 Assembler::as_vmrs(Register r, Condition c)
 {
     return writeInst(c | 0x0ef10a10 | RT(r));
 }
--- a/js/src/jit/arm/Assembler-arm.h
+++ b/js/src/jit/arm/Assembler-arm.h
@@ -202,24 +202,24 @@ class VFPRegister
         JS_ASSERT(_code == (unsigned)fr.code());
     }
 
     VFPRegister(FloatRegister fr, RegType k)
       : kind(k), _code (fr.code()), _isInvalid(false), _isMissing(false)
     {
         JS_ASSERT(_code == (unsigned)fr.code());
     }
-    bool isDouble() { return kind == Double; }
-    bool isSingle() { return kind == Single; }
-    bool isFloat() { return (kind == Double) || (kind == Single); }
-    bool isInt() { return (kind == UInt) || (kind == Int); }
-    bool isSInt()   { return kind == Int; }
-    bool isUInt()   { return kind == UInt; }
-    bool equiv(VFPRegister other) { return other.kind == kind; }
-    size_t size() { return (kind == Double) ? 8 : 4; }
+    bool isDouble() const { return kind == Double; }
+    bool isSingle() const { return kind == Single; }
+    bool isFloat() const { return (kind == Double) || (kind == Single); }
+    bool isInt() const { return (kind == UInt) || (kind == Int); }
+    bool isSInt() const { return kind == Int; }
+    bool isUInt() const { return kind == UInt; }
+    bool equiv(VFPRegister other) const { return other.kind == kind; }
+    size_t size() const { return (kind == Double) ? 8 : 4; }
     bool isInvalid();
     bool isMissing();
 
     VFPRegister doubleOverlay();
     VFPRegister singleOverlay();
     VFPRegister sintOverlay();
     VFPRegister uintOverlay();
 
@@ -942,17 +942,19 @@ class VFPAddr
         return data;
     }
 };
 
 class VFPImm {
     uint32_t data;
 
   public:
-    VFPImm(uint32_t top);
+    static const VFPImm one;
+
+    VFPImm(uint32_t topWordOfDouble);
 
     uint32_t encode() {
         return data;
     }
     bool isValid() {
         return data != -1U;
     }
 };
@@ -1483,16 +1485,19 @@ class Assembler
     void as_WritePoolEntry(Instruction *addr, Condition c, uint32_t data);
     // load a 32 bit immediate from a pool into a register
     BufferOffset as_Imm32Pool(Register dest, uint32_t value, ARMBuffer::PoolEntry *pe = NULL, Condition c = Always);
     // make a patchable jump that can target the entire 32 bit address space.
     BufferOffset as_BranchPool(uint32_t value, RepatchLabel *label, ARMBuffer::PoolEntry *pe = NULL, Condition c = Always);
 
     // load a 64 bit floating point immediate from a pool into a register
     BufferOffset as_FImm64Pool(VFPRegister dest, double value, ARMBuffer::PoolEntry *pe = NULL, Condition c = Always);
+    // load a 32 bit floating point immediate from a pool into a register
+    BufferOffset as_FImm32Pool(VFPRegister dest, float value, ARMBuffer::PoolEntry *pe = NULL, Condition c = Always);
+
     // Control flow stuff:
 
     // bx can *only* branch to a register
     // never to an immediate.
     BufferOffset as_bx(Register r, Condition c = Always, bool isPatchable = false);
 
     // Branch can branch to an immediate *or* to a register.
     // Branches to immediates are pc relative, branches to registers
@@ -2177,23 +2182,25 @@ GetTempRegForIntArg(uint32_t usedIntArgs
 static inline uint32_t
 GetArgStackDisp(uint32_t arg)
 {
     JS_ASSERT(arg >= NumIntArgRegs);
     return (arg - NumIntArgRegs) * STACK_SLOT_SIZE;
 }
 
 #endif
+
 class DoubleEncoder {
     uint32_t rep(bool b, uint32_t count) {
         uint32_t ret = 0;
         for (uint32_t i = 0; i < count; i++)
             ret = (ret << 1) | b;
         return ret;
     }
+
     uint32_t encode(uint8_t value) {
         //ARM ARM "VFP modified immediate constants"
         // aBbbbbbb bbcdefgh 000...
         // we want to return the top 32 bits of the double
         // the rest are 0.
         bool a = value >> 7;
         bool b = value >> 6 & 1;
         bool B = !b;
@@ -2211,32 +2218,31 @@ class DoubleEncoder {
 
         DoubleEntry()
           : dblTop(-1)
         { }
         DoubleEntry(uint32_t dblTop_, datastore::Imm8VFPImmData data_)
           : dblTop(dblTop_), data(data_)
         { }
     };
-    DoubleEntry table [256];
 
-    // grumble singleton, grumble
-    static DoubleEncoder _this;
+    DoubleEntry table[256];
+
+  public:
     DoubleEncoder()
     {
         for (int i = 0; i < 256; i++) {
             table[i] = DoubleEntry(encode(i), datastore::Imm8VFPImmData(i));
         }
     }
 
-  public:
-    static bool lookup(uint32_t top, datastore::Imm8VFPImmData *ret) {
+    bool lookup(uint32_t top, datastore::Imm8VFPImmData *ret) {
         for (int i = 0; i < 256; i++) {
-            if (_this.table[i].dblTop == top) {
-                *ret = _this.table[i].data;
+            if (table[i].dblTop == top) {
+                *ret = table[i].data;
                 return true;
             }
         }
         return false;
     }
 };
 
 class AutoForbidPools {
--- a/js/src/jit/arm/CodeGenerator-arm.cpp
+++ b/js/src/jit/arm/CodeGenerator-arm.cpp
@@ -990,17 +990,17 @@ MoveOperand
 CodeGeneratorARM::toMoveOperand(const LAllocation *a) const
 {
     if (a->isGeneralReg())
         return MoveOperand(ToRegister(a));
     if (a->isFloatReg())
         return MoveOperand(ToFloatRegister(a));
     JS_ASSERT((ToStackOffset(a) & 3) == 0);
     int32_t offset = ToStackOffset(a);
-    
+
     // The way the stack slots work, we assume that everything from depth == 0 downwards is writable
     // however, since our frame is included in this, ensure that the frame gets skipped
     if (gen->compilingAsmJS())
         offset -= AlignmentMidPrologue;
 
     return MoveOperand(StackPointer, offset);
 }
 
@@ -1130,16 +1130,42 @@ CodeGeneratorARM::visitMathD(LMathD *mat
         break;
       default:
         MOZ_ASSUME_UNREACHABLE("unexpected opcode");
     }
     return true;
 }
 
 bool
+CodeGeneratorARM::visitMathF(LMathF *math)
+{
+    const LAllocation *src1 = math->getOperand(0);
+    const LAllocation *src2 = math->getOperand(1);
+    const LDefinition *output = math->getDef(0);
+
+    switch (math->jsop()) {
+      case JSOP_ADD:
+        masm.ma_vadd_f32(ToFloatRegister(src1), ToFloatRegister(src2), ToFloatRegister(output));
+        break;
+      case JSOP_SUB:
+        masm.ma_vsub_f32(ToFloatRegister(src1), ToFloatRegister(src2), ToFloatRegister(output));
+        break;
+      case JSOP_MUL:
+        masm.ma_vmul_f32(ToFloatRegister(src1), ToFloatRegister(src2), ToFloatRegister(output));
+        break;
+      case JSOP_DIV:
+        masm.ma_vdiv_f32(ToFloatRegister(src1), ToFloatRegister(src2), ToFloatRegister(output));
+        break;
+      default:
+        MOZ_ASSUME_UNREACHABLE("unexpected opcode");
+    }
+    return true;
+}
+
+bool
 CodeGeneratorARM::visitFloor(LFloor *lir)
 {
     FloatRegister input = ToFloatRegister(lir->input());
     Register output = ToRegister(lir->output());
     Label bail;
     masm.floor(input, output, &bail);
     if (!bailoutFrom(&bail, lir->snapshot()))
         return false;
@@ -1304,16 +1330,24 @@ CodeGeneratorARM::visitDouble(LDouble *i
 {
 
     const LDefinition *out = ins->getDef(0);
 
     masm.ma_vimm(ins->getDouble(), ToFloatRegister(out));
     return true;
 }
 
+bool
+CodeGeneratorARM::visitFloat32(LFloat32 *ins)
+{
+    const LDefinition *out = ins->getDef(0);
+    masm.loadConstantFloat32(ins->getFloat(), ToFloatRegister(out));
+    return true;
+}
+
 Register
 CodeGeneratorARM::splitTagForTest(const ValueOperand &value)
 {
     return value.typeReg();
 }
 
 bool
 CodeGeneratorARM::visitTestDAndBranch(LTestDAndBranch *test)
@@ -2080,8 +2114,16 @@ CodeGeneratorARM::visitNegI(LNegI *ins)
 
 bool
 CodeGeneratorARM::visitNegD(LNegD *ins)
 {
     FloatRegister input = ToFloatRegister(ins->input());
     masm.ma_vneg(input, ToFloatRegister(ins->output()));
     return true;
 }
+
+bool
+CodeGeneratorARM::visitNegF(LNegF *ins)
+{
+    FloatRegister input = ToFloatRegister(ins->input());
+    masm.ma_vneg_f32(input, ToFloatRegister(ins->output()));
+    return true;
+}
--- a/js/src/jit/arm/CodeGenerator-arm.h
+++ b/js/src/jit/arm/CodeGenerator-arm.h
@@ -96,16 +96,17 @@ class CodeGeneratorARM : public CodeGene
     virtual bool visitCompareV(LCompareV *lir);
     virtual bool visitCompareVAndBranch(LCompareVAndBranch *lir);
     virtual bool visitBitAndAndBranch(LBitAndAndBranch *baab);
     virtual bool visitUInt32ToDouble(LUInt32ToDouble *lir);
     virtual bool visitNotI(LNotI *ins);
     virtual bool visitNotD(LNotD *ins);
 
     virtual bool visitMathD(LMathD *math);
+    virtual bool visitMathF(LMathF *math);
     virtual bool visitFloor(LFloor *lir);
     virtual bool visitRound(LRound *lir);
     virtual bool visitTruncateDToInt32(LTruncateDToInt32 *ins);
 
     // Out of line visitors.
     bool visitOutOfLineBailout(OutOfLineBailout *ool);
     bool visitOutOfLineTableSwitch(OutOfLineTableSwitch *ool);
 
@@ -130,32 +131,34 @@ class CodeGeneratorARM : public CodeGene
 
   public:
     bool visitBox(LBox *box);
     bool visitBoxFloatingPoint(LBoxFloatingPoint *box);
     bool visitUnbox(LUnbox *unbox);
     bool visitValue(LValue *value);
     bool visitOsrValue(LOsrValue *value);
     bool visitDouble(LDouble *ins);
+    bool visitFloat32(LFloat32 *ins);
 
     bool visitLoadSlotV(LLoadSlotV *load);
     bool visitLoadSlotT(LLoadSlotT *load);
     bool visitStoreSlotT(LStoreSlotT *load);
 
     bool visitLoadElementT(LLoadElementT *load);
 
     bool visitGuardShape(LGuardShape *guard);
     bool visitGuardObjectType(LGuardObjectType *guard);
     bool visitGuardClass(LGuardClass *guard);
     bool visitImplicitThis(LImplicitThis *lir);
 
     bool visitInterruptCheck(LInterruptCheck *lir);
 
     bool visitNegI(LNegI *lir);
     bool visitNegD(LNegD *lir);
+    bool visitNegF(LNegF *lir);
     bool visitLoadTypedArrayElementStatic(LLoadTypedArrayElementStatic *ins);
     bool visitStoreTypedArrayElementStatic(LStoreTypedArrayElementStatic *ins);
     bool visitAsmJSLoadHeap(LAsmJSLoadHeap *ins);
     bool visitAsmJSStoreHeap(LAsmJSStoreHeap *ins);
     bool visitAsmJSLoadGlobalVar(LAsmJSLoadGlobalVar *ins);
     bool visitAsmJSStoreGlobalVar(LAsmJSStoreGlobalVar *ins);
     bool visitAsmJSLoadFuncPtr(LAsmJSLoadFuncPtr *ins);
     bool visitAsmJSLoadFFIFunc(LAsmJSLoadFFIFunc *ins);
--- a/js/src/jit/arm/Lowering-arm.cpp
+++ b/js/src/jit/arm/Lowering-arm.cpp
@@ -57,22 +57,29 @@ LIRGeneratorARM::useByteOpRegisterOrNonD
 
 bool
 LIRGeneratorARM::lowerConstantDouble(double d, MInstruction *mir)
 {
     return define(new LDouble(d), mir);
 }
 
 bool
+LIRGeneratorARM::lowerConstantFloat32(float d, MInstruction *mir)
+{
+    return define(new LFloat32(d), mir);
+}
+
+bool
 LIRGeneratorARM::visitConstant(MConstant *ins)
 {
-    if (ins->type() == MIRType_Double) {
-        LDouble *lir = new LDouble(ins->value().toDouble());
-        return define(lir, ins);
-    }
+    if (ins->type() == MIRType_Double)
+        return lowerConstantDouble(ins->value().toDouble(), ins);
+
+    if (ins->type() == MIRType_Float32)
+        return lowerConstantFloat32(ins->value().toDouble(), ins);
 
     // Emit non-double constants at their uses.
     if (ins->canEmitAtUses())
         return emitAtUses(ins);
 
     return LIRGeneratorShared::visitConstant(ins);
 }
 
--- a/js/src/jit/arm/Lowering-arm.h
+++ b/js/src/jit/arm/Lowering-arm.h
@@ -47,56 +47,52 @@ class LIRGeneratorARM : public LIRGenera
                      MDefinition *lhs, MDefinition *rhs);
 
     bool lowerForFPU(LInstructionHelper<1, 1, 0> *ins, MDefinition *mir,
                      MDefinition *src);
     bool lowerForFPU(LInstructionHelper<1, 2, 0> *ins, MDefinition *mir,
                      MDefinition *lhs, MDefinition *rhs);
     bool lowerForBitAndAndBranch(LBitAndAndBranch *baab, MInstruction *mir,
                                  MDefinition *lhs, MDefinition *rhs);
-
+    bool lowerConstantDouble(double d, MInstruction *ins);
+    bool lowerConstantFloat32(float d, MInstruction *ins);
     bool lowerTruncateDToInt32(MTruncateToInt32 *ins);
-
-    bool lowerConstantDouble(double d, MInstruction *ins);
     bool lowerDivI(MDiv *div);
     bool lowerModI(MMod *mod);
     bool lowerMulI(MMul *mul, MDefinition *lhs, MDefinition *rhs);
     bool lowerUDiv(MInstruction *div);
     bool lowerUMod(MInstruction *mod);
     bool visitPowHalf(MPowHalf *ins);
     bool visitAsmJSNeg(MAsmJSNeg *ins);
     bool visitAsmJSUDiv(MAsmJSUDiv *ins);
     bool visitAsmJSUMod(MAsmJSUMod *ins);
 
     LTableSwitch *newLTableSwitch(const LAllocation &in, const LDefinition &inputCopy,
                                   MTableSwitch *ins);
     LTableSwitchV *newLTableSwitchV(MTableSwitch *ins);
     LGetPropertyCacheT *newLGetPropertyCacheT(MGetPropertyCache *ins);
     LGetElementCacheT *newLGetElementCacheT(MGetElementCache *ins);
 
-    bool lowerConstantFloat32(float d, MInstruction *ins) {
-        MOZ_ASSUME_UNREACHABLE("NYI");
-    }
-    bool lowerTruncateFToInt32(MTruncateToInt32 *ins) {
-        MOZ_ASSUME_UNREACHABLE("NYI");
-    }
-
   public:
     bool visitConstant(MConstant *ins);
     bool visitBox(MBox *box);
     bool visitUnbox(MUnbox *unbox);
     bool visitReturn(MReturn *ret);
     bool lowerPhi(MPhi *phi);
     bool visitGuardShape(MGuardShape *ins);
     bool visitGuardObjectType(MGuardObjectType *ins);
     bool visitAsmJSUnsignedToDouble(MAsmJSUnsignedToDouble *ins);
     bool visitAsmJSLoadHeap(MAsmJSLoadHeap *ins);
     bool visitAsmJSStoreHeap(MAsmJSStoreHeap *ins);
     bool visitAsmJSLoadFuncPtr(MAsmJSLoadFuncPtr *ins);
     bool visitStoreTypedArrayElementStatic(MStoreTypedArrayElementStatic *ins);
+
+    static bool allowFloat32Optimizations() {
+        return true;
+    }
 };
 
 typedef LIRGeneratorARM LIRGeneratorSpecific;
 
 } // namespace jit
 } // namespace js
 
 #endif /* jit_arm_Lowering_arm_h */
--- a/js/src/jit/arm/MacroAssembler-arm.cpp
+++ b/js/src/jit/arm/MacroAssembler-arm.cpp
@@ -47,18 +47,18 @@ MacroAssemblerARM::convertInt32ToDouble(
     as_vxfer(src, InvalidReg, dest.sintOverlay(),
              CoreToFloat);
     as_vcvt(dest, dest.sintOverlay());
 }
 
 void
 MacroAssemblerARM::convertInt32ToDouble(const Address &src, FloatRegister dest)
 {
-    ma_ldr(Operand(src), ScratchRegister);
-    convertInt32ToDouble(ScratchRegister, dest);
+    ma_vldr(Operand(src), ScratchFloatReg);
+    as_vcvt(dest, VFPRegister(ScratchFloatReg).sintOverlay());
 }
 
 void
 MacroAssemblerARM::convertUInt32ToDouble(const Register &src, const FloatRegister &dest_)
 {
     // direct conversions aren't possible.
     VFPRegister dest = VFPRegister(dest_);
     as_vxfer(src, InvalidReg, dest.uintOverlay(),
@@ -112,16 +112,45 @@ MacroAssemblerARM::convertDoubleToInt32(
         // then the original value was -0.0
         as_vxfer(dest, InvalidReg, src, FloatToCore, Assembler::Equal, 1);
         ma_cmp(dest, Imm32(0x80000000), Assembler::Equal);
         ma_b(fail, Assembler::Equal);
     }
 }
 
 void
+MacroAssemblerARM::convertFloatToDouble(const FloatRegister &src, const FloatRegister &dest) {
+    as_vcvt(VFPRegister(dest), VFPRegister(src).singleOverlay());
+}
+
+void
+MacroAssemblerARM::branchTruncateFloat32(const FloatRegister &src, const Register &dest, Label *fail) {
+    ma_vcvt_F32_I32(src, ScratchFloatReg);
+    ma_vxfer(ScratchFloatReg, dest);
+    ma_cmp(dest, Imm32(0x7fffffff));
+    ma_cmp(dest, Imm32(0x80000000), Assembler::NotEqual);
+    ma_b(fail, Assembler::Equal);
+}
+
+void
+MacroAssemblerARM::convertInt32ToFloat32(const Register &src, const FloatRegister &dest_) {
+    // direct conversions aren't possible.
+    VFPRegister dest = VFPRegister(dest_).singleOverlay();
+    as_vxfer(src, InvalidReg, dest.sintOverlay(),
+             CoreToFloat);
+    as_vcvt(dest, dest.sintOverlay());
+}
+
+void
+MacroAssemblerARM::convertInt32ToFloat32(const Address &src, FloatRegister dest) {
+    ma_vldr(Operand(src), ScratchFloatReg);
+    as_vcvt(dest, VFPRegister(ScratchFloatReg).sintOverlay());
+}
+
+void
 MacroAssemblerARM::addDouble(FloatRegister src, FloatRegister dest)
 {
     ma_vadd(dest, src, dest);
 }
 
 void
 MacroAssemblerARM::subDouble(FloatRegister src, FloatRegister dest)
 {
@@ -1292,93 +1321,170 @@ MacroAssemblerARM::ma_blx(Register reg, 
 // VFP/ALU
 void
 MacroAssemblerARM::ma_vadd(FloatRegister src1, FloatRegister src2, FloatRegister dst)
 {
     as_vadd(VFPRegister(dst), VFPRegister(src1), VFPRegister(src2));
 }
 
 void
+MacroAssemblerARM::ma_vadd_f32(FloatRegister src1, FloatRegister src2, FloatRegister dst)
+{
+    as_vadd(VFPRegister(dst).singleOverlay(), VFPRegister(src1).singleOverlay(),
+            VFPRegister(src2).singleOverlay());
+}
+
+void
 MacroAssemblerARM::ma_vsub(FloatRegister src1, FloatRegister src2, FloatRegister dst)
 {
     as_vsub(VFPRegister(dst), VFPRegister(src1), VFPRegister(src2));
 }
 
 void
+MacroAssemblerARM::ma_vsub_f32(FloatRegister src1, FloatRegister src2, FloatRegister dst)
+{
+    as_vsub(VFPRegister(dst).singleOverlay(), VFPRegister(src1).singleOverlay(),
+            VFPRegister(src2).singleOverlay());
+}
+
+void
 MacroAssemblerARM::ma_vmul(FloatRegister src1, FloatRegister src2, FloatRegister dst)
 {
     as_vmul(VFPRegister(dst), VFPRegister(src1), VFPRegister(src2));
 }
 
 void
+MacroAssemblerARM::ma_vmul_f32(FloatRegister src1, FloatRegister src2, FloatRegister dst)
+{
+    as_vmul(VFPRegister(dst).singleOverlay(), VFPRegister(src1).singleOverlay(),
+            VFPRegister(src2).singleOverlay());
+}
+
+void
 MacroAssemblerARM::ma_vdiv(FloatRegister src1, FloatRegister src2, FloatRegister dst)
 {
     as_vdiv(VFPRegister(dst), VFPRegister(src1), VFPRegister(src2));
 }
 
 void
+MacroAssemblerARM::ma_vdiv_f32(FloatRegister src1, FloatRegister src2, FloatRegister dst)
+{
+    as_vdiv(VFPRegister(dst).singleOverlay(), VFPRegister(src1).singleOverlay(),
+            VFPRegister(src2).singleOverlay());
+}
+
+void
 MacroAssemblerARM::ma_vmov(FloatRegister src, FloatRegister dest, Condition cc)
 {
     as_vmov(dest, src, cc);
 }
 
 void
 MacroAssemblerARM::ma_vneg(FloatRegister src, FloatRegister dest, Condition cc)
 {
     as_vneg(dest, src, cc);
 }
 
 void
+MacroAssemblerARM::ma_vneg_f32(FloatRegister src, FloatRegister dest, Condition cc)
+{
+    as_vneg(VFPRegister(dest).singleOverlay(), VFPRegister(src).singleOverlay(), cc);
+}
+
+void
 MacroAssemblerARM::ma_vabs(FloatRegister src, FloatRegister dest, Condition cc)
 {
     as_vabs(dest, src, cc);
 }
 
 void
 MacroAssemblerARM::ma_vsqrt(FloatRegister src, FloatRegister dest, Condition cc)
 {
     as_vsqrt(dest, src, cc);
 }
 
+union DoublePun
+{
+    struct
+    {
+#if defined(IS_LITTLE_ENDIAN)
+        uint32_t lo, hi;
+#else
+        uint32_t hi, lo;
+#endif
+    } s;
+    double d;
+};
+
+static inline uint32_t
+DoubleHighWord(const double& value)
+{
+    const DoublePun *dpun = reinterpret_cast<const DoublePun *>(&value);
+    return dpun->s.hi;
+}
+
+static inline uint32_t
+DoubleLowWord(const double& value)
+{
+    const DoublePun *dpun = reinterpret_cast<const DoublePun *>(&value);
+    return dpun->s.lo;
+}
+
 void
 MacroAssemblerARM::ma_vimm(double value, FloatRegister dest, Condition cc)
 {
-    union DoublePun {
-        struct {
-#if defined(IS_LITTLE_ENDIAN)
-            uint32_t lo, hi;
-#else
-            uint32_t hi, lo;
-#endif
-        } s;
-        double d;
-    } dpun;
-    dpun.d = value;
     if (hasVFPv3()) {
-        if (dpun.s.lo == 0) {
-            if (dpun.s.hi == 0) {
+        if (DoubleLowWord(value) == 0) {
+            if (DoubleHighWord(value) == 0) {
                 // To zero a register, load 1.0, then execute dN <- dN - dN
-                VFPImm dblEnc(0x3FF00000);
-                as_vimm(dest, dblEnc, cc);
+                as_vimm(dest, VFPImm::one, cc);
                 as_vsub(dest, dest, dest, cc);
                 return;
             }
 
-            VFPImm dblEnc(dpun.s.hi);
-            if (dblEnc.isValid()) {
-                as_vimm(dest, dblEnc, cc);
+            VFPImm enc(DoubleHighWord(value));
+            if (enc.isValid()) {
+                as_vimm(dest, enc, cc);
                 return;
             }
 
         }
     }
     // Fall back to putting the value in a pool.
     as_FImm64Pool(dest, value, NULL, cc);
 }
 
+static inline uint32_t
+Float32Word(const float& value)
+{
+    return *reinterpret_cast<const uint32_t*>(&value);
+}
+
+void
+MacroAssemblerARM::ma_vimm_f32(float value, FloatRegister dest, Condition cc)
+{
+    VFPRegister vd = VFPRegister(dest).singleOverlay();
+    if (hasVFPv3()) {
+        if (Float32Word(value) == 0) {
+            // To zero a register, load 1.0, then execute sN <- sN - sN
+            as_vimm(vd, VFPImm::one, cc);
+            as_vsub(vd, vd, vd, cc);
+            return;
+        }
+
+        VFPImm enc(DoubleHighWord(double(value)));
+        if (enc.isValid()) {
+            as_vimm(vd, enc, cc);
+            return;
+        }
+    }
+    // Fall back to putting the value in a pool.
+    as_FImm32Pool(vd, value, NULL, cc);
+}
+
 void
 MacroAssemblerARM::ma_vcmp(FloatRegister src1, FloatRegister src2, Condition cc)
 {
     as_vcmp(VFPRegister(src1), VFPRegister(src2), cc);
 }
 void
 MacroAssemblerARM::ma_vcmpz(FloatRegister src1, Condition cc)
 {
@@ -1402,16 +1508,37 @@ MacroAssemblerARM::ma_vcvt_I32_F64(Float
 }
 void
 MacroAssemblerARM::ma_vcvt_U32_F64(FloatRegister dest, FloatRegister src, Condition cc)
 {
     as_vcvt(VFPRegister(dest), VFPRegister(src).uintOverlay(), false, cc);
 }
 
 void
+MacroAssemblerARM::ma_vcvt_F32_I32(FloatRegister src, FloatRegister dest, Condition cc)
+{
+    as_vcvt(VFPRegister(dest).sintOverlay(), VFPRegister(src).singleOverlay(), false, cc);
+}
+void
+MacroAssemblerARM::ma_vcvt_F32_U32(FloatRegister src, FloatRegister dest, Condition cc)
+{
+    as_vcvt(VFPRegister(dest).uintOverlay(), VFPRegister(src).singleOverlay(), false, cc);
+}
+void
+MacroAssemblerARM::ma_vcvt_I32_F32(FloatRegister dest, FloatRegister src, Condition cc)
+{
+    as_vcvt(VFPRegister(dest).singleOverlay(), VFPRegister(src).sintOverlay(), false, cc);
+}
+void
+MacroAssemblerARM::ma_vcvt_U32_F32(FloatRegister dest, FloatRegister src, Condition cc)
+{
+    as_vcvt(VFPRegister(dest).singleOverlay(), VFPRegister(src).uintOverlay(), false, cc);
+}
+
+void
 MacroAssemblerARM::ma_vxfer(FloatRegister src, Register dest, Condition cc)
 {
     as_vxfer(dest, InvalidReg, VFPRegister(src).singleOverlay(), FloatToCore, cc);
 }
 
 void
 MacroAssemblerARM::ma_vxfer(FloatRegister src, Register dest1, Register dest2, Condition cc)
 {
@@ -1944,37 +2071,57 @@ MacroAssemblerARMCompat::loadDouble(cons
 
     ma_vldr(Operand(ScratchRegister, offset), dest);
 }
 
 void
 MacroAssemblerARMCompat::loadFloatAsDouble(const Address &address, const FloatRegister &dest)
 {
     VFPRegister rt = dest;
-    ma_vdtr(IsLoad, address, rt.singleOverlay());
+    ma_vldr(Operand(address), rt.singleOverlay());
     as_vcvt(rt, rt.singleOverlay());
 }
 
 void
 MacroAssemblerARMCompat::loadFloatAsDouble(const BaseIndex &src, const FloatRegister &dest)
 {
     // VFP instructions don't even support register Base + register Index modes, so
     // just add the index, then handle the offset like normal
     Register base = src.base;
     Register index = src.index;
     uint32_t scale = Imm32::ShiftOf(src.scale).value;
     int32_t offset = src.offset;
     VFPRegister rt = dest;
     as_add(ScratchRegister, base, lsl(index, scale));
 
-    ma_vdtr(IsLoad, Operand(ScratchRegister, offset), rt.singleOverlay());
+    ma_vldr(Operand(ScratchRegister, offset), rt.singleOverlay());
     as_vcvt(rt, rt.singleOverlay());
 }
 
 void
+MacroAssemblerARMCompat::loadFloat(const Address &address, const FloatRegister &dest)
+{
+    ma_vldr(Operand(address), VFPRegister(dest).singleOverlay());
+}
+
+void
+MacroAssemblerARMCompat::loadFloat(const BaseIndex &src, const FloatRegister &dest)
+{
+    // VFP instructions don't even support register Base + register Index modes, so
+    // just add the index, then handle the offset like normal
+    Register base = src.base;
+    Register index = src.index;
+    uint32_t scale = Imm32::ShiftOf(src.scale).value;
+    int32_t offset = src.offset;
+    as_add(ScratchRegister, base, lsl(index, scale));
+
+    ma_vldr(Operand(ScratchRegister, offset), VFPRegister(dest).singleOverlay());
+}
+
+void
 MacroAssemblerARMCompat::store8(const Imm32 &imm, const Address &address)
 {
     ma_mov(imm, secondScratchReg_);
     store8(secondScratchReg_, address);
 }
 
 void
 MacroAssemblerARMCompat::store8(const Register &src, const Address &address)
@@ -2253,16 +2400,53 @@ MacroAssemblerARMCompat::branchDouble(Do
         ma_b(label, VFP_Unordered);
         ma_b(label, VFP_Equal);
         return;
     }
 
     ma_b(label, ConditionFromDoubleCondition(cond));
 }
 
+void
+MacroAssemblerARMCompat::compareFloat(FloatRegister lhs, FloatRegister rhs)
+{
+    // Compare the doubles, setting vector status flags.
+    if (rhs == InvalidFloatReg)
+        as_vcmpz(VFPRegister(lhs).singleOverlay());
+    else
+        as_vcmp(VFPRegister(lhs).singleOverlay(), VFPRegister(rhs).singleOverlay());
+
+    // Move vector status bits to normal status flags.
+    as_vmrs(pc);
+}
+
+void
+MacroAssemblerARMCompat::branchFloat(DoubleCondition cond, const FloatRegister &lhs,
+                                     const FloatRegister &rhs, Label *label)
+{
+    compareFloat(lhs, rhs);
+
+    if (cond == DoubleNotEqual) {
+        // Force the unordered cases not to jump.
+        Label unordered;
+        ma_b(&unordered, VFP_Unordered);
+        ma_b(label, VFP_NotEqualOrUnordered);
+        bind(&unordered);
+        return;
+    }
+
+    if (cond == DoubleEqualOrUnordered) {
+        ma_b(label, VFP_Unordered);
+        ma_b(label, VFP_Equal);
+        return;
+    }
+
+    ma_b(label, ConditionFromDoubleCondition(cond));
+}
+
 // higher level tag testing code
 Operand ToPayload(Operand base) {
     return Operand(Register::FromCode(base.base()), base.disp());
 }
 Operand ToType(Operand base) {
     return Operand(Register::FromCode(base.base()), base.disp() + sizeof(void *));
 }
 
@@ -2688,16 +2872,49 @@ MacroAssemblerARMCompat::int32ValueToDou
     VFPRegister vfpdest = VFPRegister(dest);
     as_vxfer(operand.payloadReg(), InvalidReg,
              vfpdest.sintOverlay(), CoreToFloat);
     // convert the value to a double.
     as_vcvt(vfpdest, vfpdest.sintOverlay());
 }
 
 void
+MacroAssemblerARMCompat::boolValueToFloat32(const ValueOperand &operand, const FloatRegister &dest)
+{
+    VFPRegister d = VFPRegister(dest).singleOverlay();
+    ma_vimm_f32(1.0, dest);
+    ma_cmp(operand.payloadReg(), Imm32(0));
+    // If the source is 0, then subtract the dest from itself, producing 0.
+    as_vsub(d, d, d, Equal);
+}
+
+void
+MacroAssemblerARMCompat::int32ValueToFloat32(const ValueOperand &operand, const FloatRegister &dest)
+{
+    // transfer the integral value to a floating point register
+    VFPRegister vfpdest = VFPRegister(dest).singleOverlay();
+    as_vxfer(operand.payloadReg(), InvalidReg,
+             vfpdest.sintOverlay(), CoreToFloat);
+    // convert the value to a float.
+    as_vcvt(vfpdest, vfpdest.sintOverlay());
+}
+
+void
+MacroAssemblerARMCompat::loadConstantFloat32(float f, const FloatRegister &dest)
+{
+    ma_vimm_f32(f, dest);
+}
+
+void
+MacroAssemblerARMCompat::loadStaticFloat32(const float *fp, const FloatRegister &dest)
+{
+    loadConstantFloat32(*fp, dest);
+}
+
+void
 MacroAssemblerARMCompat::loadInt32OrDouble(const Operand &src, const FloatRegister &dest)
 {
     Label notInt32, end;
     // If it's an int, convert it to double.
     ma_ldr(ToType(src), ScratchRegister);
     branchTestInt32(Assembler::NotEqual, ScratchRegister, &notInt32);
     ma_ldr(ToPayload(src), ScratchRegister);
     convertInt32ToDouble(ScratchRegister, dest);
--- a/js/src/jit/arm/MacroAssembler-arm.h
+++ b/js/src/jit/arm/MacroAssembler-arm.h
@@ -49,28 +49,20 @@ class MacroAssemblerARM : public Assembl
     void convertInt32ToDouble(const Register &src, const FloatRegister &dest);
     void convertInt32ToDouble(const Address &src, FloatRegister dest);
     void convertUInt32ToDouble(const Register &src, const FloatRegister &dest);
     void convertDoubleToFloat(const FloatRegister &src, const FloatRegister &dest);
     void branchTruncateDouble(const FloatRegister &src, const Register &dest, Label *fail);
     void convertDoubleToInt32(const FloatRegister &src, const Register &dest, Label *fail,
                               bool negativeZeroCheck = true);
 
-    void convertFloatToDouble(const FloatRegister &src, const FloatRegister &dest) {
-        MOZ_ASSUME_UNREACHABLE("NYI");
-    }
-    void branchTruncateFloat32(const FloatRegister &src, const Register &dest, Label *fail) {
-        MOZ_ASSUME_UNREACHABLE("NYI");
-    }
-    void convertInt32ToFloat32(const Register &src, const FloatRegister &dest) {
-        MOZ_ASSUME_UNREACHABLE("NYI");
-    }
-    void convertInt32ToFloat32(const Address &src, FloatRegister dest) {
-        MOZ_ASSUME_UNREACHABLE("NYI");
-    }
+    void convertFloatToDouble(const FloatRegister &src, const FloatRegister &dest);
+    void branchTruncateFloat32(const FloatRegister &src, const Register &dest, Label *fail);
+    void convertInt32ToFloat32(const Register &src, const FloatRegister &dest);
+    void convertInt32ToFloat32(const Address &src, FloatRegister dest);
 
     void addDouble(FloatRegister src, FloatRegister dest);
     void subDouble(FloatRegister src, FloatRegister dest);
     void mulDouble(FloatRegister src, FloatRegister dest);
     void divDouble(FloatRegister src, FloatRegister dest);
 
     void negateDouble(FloatRegister reg);
     void inc64(AbsoluteAddress dest);
@@ -322,28 +314,45 @@ class MacroAssemblerARM : public Assembl
 
     void ma_vneg(FloatRegister src, FloatRegister dest, Condition cc = Always);
     void ma_vmov(FloatRegister src, FloatRegister dest, Condition cc = Always);
     void ma_vabs(FloatRegister src, FloatRegister dest, Condition cc = Always);
 
     void ma_vsqrt(FloatRegister src, FloatRegister dest, Condition cc = Always);
 
     void ma_vimm(double value, FloatRegister dest, Condition cc = Always);
+    void ma_vimm_f32(float value, FloatRegister dest, Condition cc = Always);
 
     void ma_vcmp(FloatRegister src1, FloatRegister src2, Condition cc = Always);
     void ma_vcmpz(FloatRegister src1, Condition cc = Always);
 
+    void ma_vadd_f32(FloatRegister src1, FloatRegister src2, FloatRegister dst);
+    void ma_vsub_f32(FloatRegister src1, FloatRegister src2, FloatRegister dst);
+
+    void ma_vmul_f32(FloatRegister src1, FloatRegister src2, FloatRegister dst);
+    void ma_vdiv_f32(FloatRegister src1, FloatRegister src2, FloatRegister dst);
+
+    void ma_vneg_f32(FloatRegister src, FloatRegister dest, Condition cc = Always);
+
     // source is F64, dest is I32
     void ma_vcvt_F64_I32(FloatRegister src, FloatRegister dest, Condition cc = Always);
     void ma_vcvt_F64_U32(FloatRegister src, FloatRegister dest, Condition cc = Always);
 
     // source is I32, dest is F64
     void ma_vcvt_I32_F64(FloatRegister src, FloatRegister dest, Condition cc = Always);
     void ma_vcvt_U32_F64(FloatRegister src, FloatRegister dest, Condition cc = Always);
 
+    // source is F32, dest is I32
+    void ma_vcvt_F32_I32(FloatRegister src, FloatRegister dest, Condition cc = Always);
+    void ma_vcvt_F32_U32(FloatRegister src, FloatRegister dest, Condition cc = Always);
+
+    // source is I32, dest is F32
+    void ma_vcvt_I32_F32(FloatRegister src, FloatRegister dest, Condition cc = Always);
+    void ma_vcvt_U32_F32(FloatRegister src, FloatRegister dest, Condition cc = Always);
+
     void ma_vxfer(FloatRegister src, Register dest, Condition cc = Always);
     void ma_vxfer(FloatRegister src, Register dest1, Register dest2, Condition cc = Always);
 
     void ma_vxfer(VFPRegister src, Register dest, Condition cc = Always);
     void ma_vxfer(VFPRegister src, Register dest1, Register dest2, Condition cc = Always);
 
     void ma_vxfer(Register src1, Register src2, FloatRegister dest, Condition cc = Always);
 
@@ -770,28 +779,20 @@ class MacroAssemblerARMCompat : public M
     void loadStaticDouble(const double *dp, const FloatRegister &dest);
     void loadConstantDouble(double dp, const FloatRegister &dest);
     // treat the value as a boolean, and set condition codes accordingly
     Condition testInt32Truthy(bool truthy, const ValueOperand &operand);
     Condition testBooleanTruthy(bool truthy, const ValueOperand &operand);
     Condition testDoubleTruthy(bool truthy, const FloatRegister &reg);
     Condition testStringTruthy(bool truthy, const ValueOperand &value);
 
-    void boolValueToFloat32(const ValueOperand &operand, const FloatRegister &dest) {
-        MOZ_ASSUME_UNREACHABLE("NYI");
-    }
-    void int32ValueToFloat32(const ValueOperand &operand, const FloatRegister &dest) {
-        MOZ_ASSUME_UNREACHABLE("NYI");
-    }
-    void loadStaticFloat32(const float *dp, const FloatRegister &dest) {
-        MOZ_ASSUME_UNREACHABLE("NYI");
-    }
-    void loadConstantFloat32(const float dp, const FloatRegister &dest) {
-        MOZ_ASSUME_UNREACHABLE("NYI");
-    }
+    void boolValueToFloat32(const ValueOperand &operand, const FloatRegister &dest);
+    void int32ValueToFloat32(const ValueOperand &operand, const FloatRegister &dest);
+    void loadStaticFloat32(const float *fp, const FloatRegister &dest);
+    void loadConstantFloat32(float f, const FloatRegister &dest);
 
     template<typename T>
     void branchTestInt32(Condition cond, const T & t, Label *label) {
         Condition c = testInt32(cond, t);
         ma_b(label, c);
     }
     template<typename T>
     void branchTestBoolean(Condition cond, const T & t, Label *label) {
@@ -1235,22 +1236,18 @@ class MacroAssemblerARMCompat : public M
 
     void loadDouble(const Address &addr, const FloatRegister &dest);
     void loadDouble(const BaseIndex &src, const FloatRegister &dest);
 
     // Load a float value into a register, then expand it to a double.
     void loadFloatAsDouble(const Address &addr, const FloatRegister &dest);
     void loadFloatAsDouble(const BaseIndex &src, const FloatRegister &dest);
 
-    void loadFloat(const Address &addr, const FloatRegister &dest) {
-        MOZ_ASSUME_UNREACHABLE("NYI");
-    }
-    void loadFloat(const BaseIndex &src, const FloatRegister &dest) {
-        MOZ_ASSUME_UNREACHABLE("NYI");
-    }
+    void loadFloat(const Address &addr, const FloatRegister &dest);
+    void loadFloat(const BaseIndex &src, const FloatRegister &dest);
 
     void store8(const Register &src, const Address &address);
     void store8(const Imm32 &imm, const Address &address);
     void store8(const Register &src, const BaseIndex &address);
     void store8(const Imm32 &imm, const BaseIndex &address);
 
     void store16(const Register &src, const Address &address);
     void store16(const Imm32 &imm, const Address &address);
@@ -1329,20 +1326,19 @@ class MacroAssemblerARMCompat : public M
     void breakpoint();
     // conditional breakpoint
     void breakpoint(Condition cc);
 
     void compareDouble(FloatRegister lhs, FloatRegister rhs);
     void branchDouble(DoubleCondition cond, const FloatRegister &lhs, const FloatRegister &rhs,
                       Label *label);
 
+    void compareFloat(FloatRegister lhs, FloatRegister rhs);
     void branchFloat(DoubleCondition cond, const FloatRegister &lhs, const FloatRegister &rhs,
-                      Label *label) {
-        MOZ_ASSUME_UNREACHABLE("NYI");
-    }
+                     Label *label);
 
     void checkStackAlignment();
 
     void rshiftPtr(Imm32 imm, Register dest) {
         ma_lsr(imm, dest, dest);
     }
     void lshiftPtr(Imm32 imm, Register dest) {
         ma_lsl(imm, dest, dest);