Bug 930477: Specialize Math.floor for Float32; r=jandem,jonco
authorBenjamin Bouvier <benj@benj.me>
Mon, 28 Oct 2013 14:24:19 +0100
changeset 152497 f55bec1817908cf80e0c9656a557e3465dc59ae9
parent 152496 75ea7b7bb293c16b0e7a6b101d66ecac64ff992c
child 152498 7f7d8aa3e66f5f2dcb284d45a64800b1de1aff6c
push id25551
push userphilringnalda@gmail.com
push dateMon, 28 Oct 2013 23:57:55 +0000
treeherdermozilla-central@518f5bff0ae4 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersjandem, jonco
bugs930477
milestone27.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 930477: Specialize Math.floor for Float32; r=jandem,jonco
js/src/assembler/assembler/X86Assembler.h
js/src/jit-test/tests/ion/testFloat32-correctness.js
js/src/jit/CodeGenerator.cpp
js/src/jit/LIR-Common.h
js/src/jit/LOpcodes.h
js/src/jit/Lowering.cpp
js/src/jit/MCallOptimize.cpp
js/src/jit/MIR.cpp
js/src/jit/MIR.h
js/src/jit/arm/CodeGenerator-arm.cpp
js/src/jit/arm/CodeGenerator-arm.h
js/src/jit/arm/MacroAssembler-arm.cpp
js/src/jit/arm/MacroAssembler-arm.h
js/src/jit/shared/Assembler-x86-shared.h
js/src/jit/shared/CodeGenerator-x86-shared.cpp
js/src/jit/shared/CodeGenerator-x86-shared.h
js/src/jit/x64/MacroAssembler-x64.cpp
js/src/jit/x64/MacroAssembler-x64.h
js/src/jit/x86/MacroAssembler-x86.cpp
js/src/jit/x86/MacroAssembler-x86.h
--- a/js/src/assembler/assembler/X86Assembler.h
+++ b/js/src/assembler/assembler/X86Assembler.h
@@ -317,16 +317,17 @@ private:
         OP2_MOVSX_GvEb      = 0xBE,
         OP2_MOVSX_GvEw      = 0xBF,
         OP2_MOVZX_GvEb      = 0xB6,
         OP2_MOVZX_GvEw      = 0xB7,
         OP2_PEXTRW_GdUdIb   = 0xC5
     } TwoByteOpcodeID;
 
     typedef enum {
+        OP3_ROUNDSS_VsdWsd  = 0x0A,
         OP3_ROUNDSD_VsdWsd  = 0x0B,
         OP3_PTEST_VdVd      = 0x17,
         OP3_PINSRD_VsdWsd   = 0x22
     } ThreeByteOpcodeID;
 
     typedef enum {
         ESCAPE_PTEST        = 0x38,
         ESCAPE_PINSRD       = 0x3A,
@@ -735,16 +736,21 @@ public:
         spew("fisttp     %s0x%x(%s)", PRETTY_PRINT_OFFSET(offset), nameIReg(base));
         m_formatter.oneByteOp(OP_FPU6, FPU6_OP_FISTTP, base, offset);
     }
     void fstp_m(int offset, RegisterID base)
     {
         spew("fstp       %s0x%x(%s)", PRETTY_PRINT_OFFSET(offset), nameIReg(base));
         m_formatter.oneByteOp(OP_FPU6, FPU6_OP_FSTP, base, offset);
     }
+    void fstp32_m(int offset, RegisterID base)
+    {
+        spew("fstp32       %s0x%x(%s)", PRETTY_PRINT_OFFSET(offset), nameIReg(base));
+        m_formatter.oneByteOp(OP_FLD32, FPU6_OP_FSTP, base, offset);
+    }
 
     void negl_r(RegisterID dst)
     {
         spew("negl       %s", nameIReg(4,dst));
         m_formatter.oneByteOp(OP_GROUP3_Ev, GROUP3_OP_NEG, dst);
     }
 
     void negl_m(int offset, RegisterID base)
@@ -2933,16 +2939,25 @@ public:
     {
         spew("roundsd    %s, %s, %d",
              nameFPReg(src), nameFPReg(dst), (int)mode);
         m_formatter.prefix(PRE_SSE_66);
         m_formatter.threeByteOp(OP3_ROUNDSD_VsdWsd, ESCAPE_ROUNDSD, (RegisterID)dst, (RegisterID)src);
         m_formatter.immediate8(mode);
     }
 
+    void roundss_rr(XMMRegisterID src, XMMRegisterID dst, RoundingMode mode)
+    {
+        spew("roundss    %s, %s, %d",
+             nameFPReg(src), nameFPReg(dst), (int)mode);
+        m_formatter.prefix(PRE_SSE_66);
+        m_formatter.threeByteOp(OP3_ROUNDSS_VsdWsd, ESCAPE_ROUNDSD, (RegisterID)dst, (RegisterID)src);
+        m_formatter.immediate8(mode); // modes are the same for roundsd and roundss
+    }
+
     void pinsrd_rr(RegisterID src, XMMRegisterID dst)
     {
         spew("pinsrd     $1, %s, %s",
              nameIReg(src), nameFPReg(dst));
         m_formatter.prefix(PRE_SSE_66);
         m_formatter.threeByteOp(OP3_PINSRD_VsdWsd, ESCAPE_PINSRD, (RegisterID)dst, (RegisterID)src);
         m_formatter.immediate8(0x01); // the $1
     }
--- a/js/src/jit-test/tests/ion/testFloat32-correctness.js
+++ b/js/src/jit-test/tests/ion/testFloat32-correctness.js
@@ -222,8 +222,46 @@ test(setupTrigo, trigo);
 function otherMath() {
     for (var i = 0; i < 10; ++i) {
         assertNear(Math.fround(Math.exp(f32[i])), Math.exp(f32[i]));
         assertNear(Math.fround(Math.log(f32[i])), Math.log(f32[i]));
     }
 };
 test(setupComp, otherMath);
 
+function setupFloor() {
+    f32[0] = -5.5;
+    f32[1] = -0.5;
+    f32[2] = 0;
+    f32[3] = 1.5;
+}
+function setupFloorDouble() {
+    f32[4] = NaN;
+    f32[5] = -0;
+    f32[6] = Infinity;
+    f32[7] = -Infinity;
+    f32[8] = Math.pow(2,31); // too big to fit into a int
+}
+function testFloor() {
+    for (var i = 0; i < 4; ++i) {
+        var f = Math.floor(f32[i]);
+        assertFloat32(g, false); // f is an int32
+
+        var g = Math.floor(-0 + f32[i]);
+        assertFloat32(g, false);
+
+        assertEq(f, g);
+    }
+}
+function testFloorDouble() {
+    for (var i = 4; i < 9; ++i) {
+        var f = Math.fround(Math.floor(f32[i]));
+        assertFloat32(f, true);
+
+        var g = Math.floor(-0 + f32[i]);
+        assertFloat32(g, false);
+
+        assertEq(f, g);
+    }
+}
+test(setupFloor, testFloor);
+test(setupFloorDouble, testFloorDouble);
+
--- a/js/src/jit/CodeGenerator.cpp
+++ b/js/src/jit/CodeGenerator.cpp
@@ -3894,29 +3894,30 @@ CodeGenerator::visitMathFunctionF(LMathF
     FloatRegister input = ToFloatRegister(ins->input());
     JS_ASSERT(ToFloatRegister(ins->output()) == ReturnFloatReg);
 
     masm.setupUnalignedABICall(1, temp);
     masm.passABIArg(input);
 
     void *funptr = nullptr;
     switch (ins->mir()->function()) {
-      case MMathFunction::Log:  funptr = JS_FUNC_TO_DATA_PTR(void *, logf);  break;
-      case MMathFunction::Sin:  funptr = JS_FUNC_TO_DATA_PTR(void *, sinf);  break;
-      case MMathFunction::Cos:  funptr = JS_FUNC_TO_DATA_PTR(void *, cosf);  break;
-      case MMathFunction::Exp:  funptr = JS_FUNC_TO_DATA_PTR(void *, expf);  break;
-      case MMathFunction::Tan:  funptr = JS_FUNC_TO_DATA_PTR(void *, tanf);  break;
-      case MMathFunction::ATan: funptr = JS_FUNC_TO_DATA_PTR(void *, atanf); break;
-      case MMathFunction::ASin: funptr = JS_FUNC_TO_DATA_PTR(void *, sinf);  break;
-      case MMathFunction::ACos: funptr = JS_FUNC_TO_DATA_PTR(void *, acosf); break;
+      case MMathFunction::Log:   funptr = JS_FUNC_TO_DATA_PTR(void *, logf);   break;
+      case MMathFunction::Sin:   funptr = JS_FUNC_TO_DATA_PTR(void *, sinf);   break;
+      case MMathFunction::Cos:   funptr = JS_FUNC_TO_DATA_PTR(void *, cosf);   break;
+      case MMathFunction::Exp:   funptr = JS_FUNC_TO_DATA_PTR(void *, expf);   break;
+      case MMathFunction::Tan:   funptr = JS_FUNC_TO_DATA_PTR(void *, tanf);   break;
+      case MMathFunction::ATan:  funptr = JS_FUNC_TO_DATA_PTR(void *, atanf);  break;
+      case MMathFunction::ASin:  funptr = JS_FUNC_TO_DATA_PTR(void *, sinf);   break;
+      case MMathFunction::ACos:  funptr = JS_FUNC_TO_DATA_PTR(void *, acosf);  break;
+      case MMathFunction::Floor: funptr = JS_FUNC_TO_DATA_PTR(void *, floorf); break;
       default:
         MOZ_ASSUME_UNREACHABLE("Unknown or unsupported float32 math function");
     }
 
-    masm.callWithABI(funptr, MacroAssembler::DOUBLE);
+    masm.callWithABI(funptr, MacroAssembler::FLOAT);
     return true;
 }
 
 bool
 CodeGenerator::visitModD(LModD *ins)
 {
     FloatRegister lhs = ToFloatRegister(ins->lhs());
     FloatRegister rhs = ToFloatRegister(ins->rhs());
--- a/js/src/jit/LIR-Common.h
+++ b/js/src/jit/LIR-Common.h
@@ -4361,28 +4361,35 @@ class LStringLength : public LInstructio
         setOperand(0, string);
     }
 
     const LAllocation *string() {
         return getOperand(0);
     }
 };
 
-// Take the floor of a number. Implements Math.floor().
+// Take the floor of a double precision number. Implements Math.floor().
 class LFloor : public LInstructionHelper<1, 1, 0>
 {
   public:
     LIR_HEADER(Floor)
 
     LFloor(const LAllocation &num) {
         setOperand(0, num);
     }
-
-    MRound *mir() const {
-        return mir_->toRound();
+};
+
+// Take the floor of a single precision number. Implements Math.floor().
+class LFloorF : public LInstructionHelper<1, 1, 0>
+{
+  public:
+    LIR_HEADER(FloorF)
+
+    LFloorF(const LAllocation &num) {
+        setOperand(0, num);
     }
 };
 
 // Round a number. Implements Math.round().
 class LRound : public LInstructionHelper<1, 1, 1>
 {
   public:
     LIR_HEADER(Round)
--- a/js/src/jit/LOpcodes.h
+++ b/js/src/jit/LOpcodes.h
@@ -241,16 +241,17 @@
     _(SetFrameArgumentC)            \
     _(SetFrameArgumentV)            \
     _(RunOncePrologue)              \
     _(Rest)                         \
     _(RestPar)                      \
     _(TypeOfV)                      \
     _(ToIdV)                        \
     _(Floor)                        \
+    _(FloorF)                       \
     _(Round)                        \
     _(In)                           \
     _(InArray)                      \
     _(InstanceOfO)                  \
     _(InstanceOfV)                  \
     _(CallInstanceOf)               \
     _(InterruptCheck)               \
     _(InterruptCheckImplicit)       \
--- a/js/src/jit/Lowering.cpp
+++ b/js/src/jit/Lowering.cpp
@@ -1124,18 +1124,27 @@ bool
 LIRGenerator::visitUrsh(MUrsh *ins)
 {
     return lowerShiftOp(JSOP_URSH, ins);
 }
 
 bool
 LIRGenerator::visitFloor(MFloor *ins)
 {
-    JS_ASSERT(ins->num()->type() == MIRType_Double);
-    LFloor *lir = new LFloor(useRegister(ins->num()));
+    MIRType type = ins->num()->type();
+    JS_ASSERT(IsFloatingPointType(type));
+
+    if (type == MIRType_Double) {
+        LFloor *lir = new LFloor(useRegister(ins->num()));
+        if (!assignSnapshot(lir))
+            return false;
+        return define(lir, ins);
+    }
+
+    LFloorF *lir = new LFloorF(useRegister(ins->num()));
     if (!assignSnapshot(lir))
         return false;
     return define(lir, ins);
 }
 
 bool
 LIRGenerator::visitRound(MRound *ins)
 {
--- a/js/src/jit/MCallOptimize.cpp
+++ b/js/src/jit/MCallOptimize.cpp
@@ -549,42 +549,41 @@ IonBuilder::inlineMathAbs(CallInfo &call
 
     current->push(ins);
     return InliningStatus_Inlined;
 }
 
 IonBuilder::InliningStatus
 IonBuilder::inlineMathFloor(CallInfo &callInfo)
 {
-
     if (callInfo.constructing())
         return InliningStatus_NotInlined;
 
     if (callInfo.argc() != 1)
         return InliningStatus_NotInlined;
 
     MIRType argType = callInfo.getArg(0)->type();
     MIRType returnType = getInlineReturnType();
 
     // Math.floor(int(x)) == int(x)
     if (argType == MIRType_Int32 && returnType == MIRType_Int32) {
         callInfo.unwrapArgs();
         current->push(callInfo.getArg(0));
         return InliningStatus_Inlined;
     }
 
-    if (argType == MIRType_Double && returnType == MIRType_Int32) {
+    if (IsFloatingPointType(argType) && returnType == MIRType_Int32) {
         callInfo.unwrapArgs();
         MFloor *ins = new MFloor(callInfo.getArg(0));
         current->add(ins);
         current->push(ins);
         return InliningStatus_Inlined;
     }
 
-    if (argType == MIRType_Double && returnType == MIRType_Double) {
+    if (IsFloatingPointType(argType) && returnType == MIRType_Double) {
         callInfo.unwrapArgs();
         MMathFunction *ins = MMathFunction::New(callInfo.getArg(0), MMathFunction::Floor, nullptr);
         current->add(ins);
         current->push(ins);
         return InliningStatus_Inlined;
     }
 
     return InliningStatus_NotInlined;
--- a/js/src/jit/MIR.cpp
+++ b/js/src/jit/MIR.cpp
@@ -25,16 +25,33 @@
 
 using namespace js;
 using namespace js::jit;
 
 using mozilla::DoublesAreIdentical;
 using mozilla::IsFloat32Representable;
 using mozilla::Maybe;
 
+template<size_t Op> static void
+ConvertDefinitionToDouble(MDefinition *def, MInstruction *consumer)
+{
+    MInstruction *replace = MToDouble::New(def);
+    consumer->replaceOperand(Op, replace);
+    consumer->block()->insertBefore(consumer, replace);
+}
+
+static bool
+CheckUsesAreFloat32Consumers(MInstruction *ins)
+{
+    bool allConsumerUses = true;
+    for (MUseDefIterator use(ins); allConsumerUses && use; use++)
+        allConsumerUses &= use.def()->canConsumeFloat32();
+    return allConsumerUses;
+}
+
 void
 MDefinition::PrintOpcodeName(FILE *fp, MDefinition::Opcode op)
 {
     static const char * const names[] =
     {
 #define NAME(x) #x,
         MIR_OPCODE_LIST(NAME)
 #undef NAME
@@ -645,16 +662,32 @@ MStringLength::foldsTo(bool useValueNumb
         size_t length = JS_GetStringLength(value.toString());
 
         return MConstant::New(Int32Value(length));
     }
 
     return this;
 }
 
+void
+MFloor::trySpecializeFloat32()
+{
+    // No need to look at the output, as it's an integer (see IonBuilder::inlineMathFloor)
+    if (!input()->canProduceFloat32()) {
+        if (input()->type() == MIRType_Float32)
+            ConvertDefinitionToDouble<0>(input(), this);
+        return;
+    }
+
+    if (type() == MIRType_Double)
+        setResultType(MIRType_Float32);
+
+    setPolicyType(MIRType_Float32);
+}
+
 MTest *
 MTest::New(MDefinition *ins, MBasicBlock *ifTrue, MBasicBlock *ifFalse)
 {
     return new MTest(ins, ifTrue, ifFalse);
 }
 
 MCompare *
 MCompare::New(MDefinition *left, MDefinition *right, JSOp op)
@@ -1231,33 +1264,16 @@ MBinaryArithInstruction::foldsTo(bool us
         return this;
 
     if (IsConstant(lhs, getIdentity()))
         return rhs; // x op id => x
 
     return this;
 }
 
-template<size_t Op> static void
-ConvertDefinitionToDouble(MDefinition *def, MInstruction *consumer)
-{
-    MInstruction *replace = MToDouble::New(def);
-    consumer->replaceOperand(Op, replace);
-    consumer->block()->insertBefore(consumer, replace);
-}
-
-static bool
-CheckUsesAreFloat32Consumers(MInstruction *ins)
-{
-    bool allConsumerUses = true;
-    for (MUseDefIterator use(ins); allConsumerUses && use; use++)
-        allConsumerUses &= use.def()->canConsumeFloat32();
-    return allConsumerUses;
-}
-
 void
 MBinaryArithInstruction::trySpecializeFloat32()
 {
     MDefinition *left = lhs();
     MDefinition *right = rhs();
 
     if (!left->canProduceFloat32() || !right->canProduceFloat32()
         || !CheckUsesAreFloat32Consumers(this))
--- a/js/src/jit/MIR.h
+++ b/js/src/jit/MIR.h
@@ -3743,17 +3743,17 @@ class MMathFunction
 
     void printOpcode(FILE *fp) const;
 
     static const char *FunctionName(Function function);
 
     bool isFloat32Commutative() const {
         return function_ == Log || function_ == Sin || function_ == Cos
                || function_ == Exp || function_ == Tan || function_ == ATan
-               || function_ == ASin || function_ == ACos;
+               || function_ == ASin || function_ == ACos || function_ == Floor;
     }
     void trySpecializeFloat32();
 };
 
 class MAdd : public MBinaryArithInstruction
 {
     // Is this instruction really an int at heart?
     MAdd(MDefinition *left, MDefinition *right)
@@ -7590,37 +7590,47 @@ class MStringLength
     }
 
     void computeRange();
 };
 
 // Inlined version of Math.floor().
 class MFloor
   : public MUnaryInstruction,
-    public DoublePolicy<0>
+    public FloatingPointPolicy<0>
 {
   public:
     MFloor(MDefinition *num)
       : MUnaryInstruction(num)
     {
         setResultType(MIRType_Int32);
+        setPolicyType(MIRType_Double);
         setMovable();
     }
 
     INSTRUCTION_HEADER(Floor)
 
     MDefinition *num() const {
         return getOperand(0);
     }
     AliasSet getAliasSet() const {
         return AliasSet::None();
     }
     TypePolicy *typePolicy() {
         return this;
     }
+    bool isFloat32Commutative() const {
+        return true;
+    }
+    void trySpecializeFloat32();
+#ifdef DEBUG
+    bool isConsistentFloat32Use() const {
+        return true;
+    }
+#endif
 };
 
 // Inlined version of Math.round().
 class MRound
   : public MUnaryInstruction,
     public DoublePolicy<0>
 {
   public:
--- a/js/src/jit/arm/CodeGenerator-arm.cpp
+++ b/js/src/jit/arm/CodeGenerator-arm.cpp
@@ -1198,16 +1198,28 @@ CodeGeneratorARM::visitFloor(LFloor *lir
     Label bail;
     masm.floor(input, output, &bail);
     if (!bailoutFrom(&bail, lir->snapshot()))
         return false;
     return true;
 }
 
 bool
+CodeGeneratorARM::visitFloorF(LFloorF *lir)
+{
+    FloatRegister input = ToFloatRegister(lir->input());
+    Register output = ToRegister(lir->output());
+    Label bail;
+    masm.floorf(input, output, &bail);
+    if (!bailoutFrom(&bail, lir->snapshot()))
+        return false;
+    return true;
+}
+
+bool
 CodeGeneratorARM::visitRound(LRound *lir)
 {
     FloatRegister input = ToFloatRegister(lir->input());
     Register output = ToRegister(lir->output());
     FloatRegister tmp = ToFloatRegister(lir->temp());
     Label bail;
     // Output is either correct, or clamped.  All -0 cases have been translated to a clamped
     // case.a
--- a/js/src/jit/arm/CodeGenerator-arm.h
+++ b/js/src/jit/arm/CodeGenerator-arm.h
@@ -105,16 +105,17 @@ class CodeGeneratorARM : public CodeGene
     virtual bool visitAsmJSUInt32ToFloat32(LAsmJSUInt32ToFloat32 *lir);
     virtual bool visitNotI(LNotI *ins);
     virtual bool visitNotD(LNotD *ins);
     virtual bool visitNotF(LNotF *ins);
 
     virtual bool visitMathD(LMathD *math);
     virtual bool visitMathF(LMathF *math);
     virtual bool visitFloor(LFloor *lir);
+    virtual bool visitFloorF(LFloorF *lir);
     virtual bool visitRound(LRound *lir);
     virtual bool visitTruncateDToInt32(LTruncateDToInt32 *ins);
     virtual bool visitTruncateFToInt32(LTruncateFToInt32 *ins);
 
     // Out of line visitors.
     bool visitOutOfLineBailout(OutOfLineBailout *ool);
     bool visitOutOfLineTableSwitch(OutOfLineTableSwitch *ool);
 
--- a/js/src/jit/arm/MacroAssembler-arm.cpp
+++ b/js/src/jit/arm/MacroAssembler-arm.cpp
@@ -3825,16 +3825,68 @@ MacroAssemblerARMCompat::floor(FloatRegi
     // the int range, and special handling is required.
     // zero is also caught by this case, but floor of a negative number
     // should never be zero.
     ma_b(bail, Unsigned);
 
     bind(&fin);
 }
 
+void
+MacroAssemblerARMCompat::floorf(FloatRegister input, Register output, Label *bail)
+{
+    Label handleZero;
+    Label handleNeg;
+    Label fin;
+    compareFloat(input, InvalidFloatReg);
+    ma_b(&handleZero, Assembler::Equal);
+    ma_b(&handleNeg, Assembler::Signed);
+    // NaN is always a bail condition, just bail directly.
+    ma_b(bail, Assembler::Overflow);
+
+    // The argument is a positive number, truncation is the path to glory;
+    // Since it is known to be > 0.0, explicitly convert to a larger range,
+    // then a value that rounds to INT_MAX is explicitly different from an
+    // argument that clamps to INT_MAX
+    ma_vcvt_F32_U32(input, ScratchFloatReg);
+    ma_vxfer(VFPRegister(ScratchFloatReg).uintOverlay(), output);
+    ma_mov(output, output, SetCond);
+    ma_b(bail, Signed);
+    ma_b(&fin);
+
+    bind(&handleZero);
+    // Move the top word of the double into the output reg, if it is non-zero,
+    // then the original value was -0.0
+    as_vxfer(output, InvalidReg, VFPRegister(input).singleOverlay(), FloatToCore, Always, 0);
+    ma_cmp(output, Imm32(0));
+    ma_b(bail, NonZero);
+    ma_b(&fin);
+
+    bind(&handleNeg);
+    // Negative case, negate, then start dancing
+    ma_vneg_f32(input, input);
+    ma_vcvt_F32_U32(input, ScratchFloatReg);
+    ma_vxfer(VFPRegister(ScratchFloatReg).uintOverlay(), output);
+    ma_vcvt_U32_F32(ScratchFloatReg, ScratchFloatReg);
+    compareFloat(ScratchFloatReg, input);
+    ma_add(output, Imm32(1), output, NoSetCond, NotEqual);
+    // Negate the output.  Since INT_MIN < -INT_MAX, even after adding 1,
+    // the result will still be a negative number
+    ma_rsb(output, Imm32(0), output, SetCond);
+    // Flip the negated input back to its original value.
+    ma_vneg_f32(input, input);
+    // If the result looks non-negative, then this value didn't actually fit into
+    // the int range, and special handling is required.
+    // zero is also caught by this case, but floor of a negative number
+    // should never be zero.
+    ma_b(bail, Unsigned);
+
+    bind(&fin);
+}
+
 CodeOffsetLabel
 MacroAssemblerARMCompat::toggledJump(Label *label)
 {
     // Emit a B that can be toggled to a CMP. See ToggleToJmp(), ToggleToCmp().
     CodeOffsetLabel ret(nextOffset().getOffset());
     ma_b(label, Always, true);
     return ret;
 }
--- a/js/src/jit/arm/MacroAssembler-arm.h
+++ b/js/src/jit/arm/MacroAssembler-arm.h
@@ -1430,16 +1430,17 @@ class MacroAssemblerARMCompat : public M
         ma_add(address.base, Imm32(address.offset), dest, NoSetCond);
     }
     void computeEffectiveAddress(const BaseIndex &address, Register dest) {
         ma_alu(address.base, lsl(address.index, address.scale), dest, op_add, NoSetCond);
         if (address.offset)
             ma_add(dest, Imm32(address.offset), dest, NoSetCond);
     }
     void floor(FloatRegister input, Register output, Label *handleNotAnInt);
+    void floorf(FloatRegister input, Register output, Label *handleNotAnInt);
     void round(FloatRegister input, Register output, Label *handleNotAnInt, FloatRegister tmp);
 
     void clampCheck(Register r, Label *handleNotAnInt) {
         // check explicitly for r == INT_MIN || r == INT_MAX
         // this is the instruction sequence that gcc generated for this
         // operation.
         ma_sub(r, Imm32(0x80000001), ScratchRegister);
         ma_cmn(ScratchRegister, Imm32(3));
--- a/js/src/jit/shared/Assembler-x86-shared.h
+++ b/js/src/jit/shared/Assembler-x86-shared.h
@@ -1461,16 +1461,22 @@ class AssemblerX86Shared
         masm.sqrtss_rr(src.code(), dest.code());
     }
     void roundsd(const FloatRegister &src, const FloatRegister &dest,
                  JSC::X86Assembler::RoundingMode mode)
     {
         JS_ASSERT(HasSSE41());
         masm.roundsd_rr(src.code(), dest.code(), mode);
     }
+    void roundss(const FloatRegister &src, const FloatRegister &dest,
+                 JSC::X86Assembler::RoundingMode mode)
+    {
+        JS_ASSERT(HasSSE41());
+        masm.roundss_rr(src.code(), dest.code(), mode);
+    }
     void minsd(const FloatRegister &src, const FloatRegister &dest) {
         JS_ASSERT(HasSSE2());
         masm.minsd_rr(src.code(), dest.code());
     }
     void minsd(const Operand &src, const FloatRegister &dest) {
         JS_ASSERT(HasSSE2());
         switch (src.kind()) {
           case Operand::FPREG:
@@ -1523,16 +1529,25 @@ class AssemblerX86Shared
         switch (src.kind()) {
           case Operand::MEM_REG_DISP:
             masm.fstp_m(src.disp(), src.base());
             break;
           default:
             MOZ_ASSUME_UNREACHABLE("unexpected operand kind");
         }
     }
+    void fstp32(const Operand &src) {
+        switch (src.kind()) {
+          case Operand::MEM_REG_DISP:
+            masm.fstp32_m(src.disp(), src.base());
+            break;
+          default:
+            MOZ_ASSUME_UNREACHABLE("unexpected operand kind");
+        }
+    }
 
     // Defined for compatibility with ARM's assembler
     uint32_t actualOffset(uint32_t x) {
         return x;
     }
 
     uint32_t actualIndex(uint32_t x) {
         return x;
--- a/js/src/jit/shared/CodeGenerator-x86-shared.cpp
+++ b/js/src/jit/shared/CodeGenerator-x86-shared.cpp
@@ -1444,16 +1444,83 @@ CodeGeneratorX86Shared::visitFloor(LFloo
         }
 
         masm.bind(&end);
     }
     return true;
 }
 
 bool
+CodeGeneratorX86Shared::visitFloorF(LFloorF *lir)
+{
+    FloatRegister input = ToFloatRegister(lir->input());
+    FloatRegister scratch = ScratchFloatReg;
+    Register output = ToRegister(lir->output());
+
+    if (AssemblerX86Shared::HasSSE41()) {
+        // Bail on negative-zero.
+        Assembler::Condition bailCond = masm.testNegativeZeroFloat32(input, output);
+        if (!bailoutIf(bailCond, lir->snapshot()))
+            return false;
+
+        // Round toward -Infinity.
+        masm.roundss(input, scratch, JSC::X86Assembler::RoundDown);
+
+        masm.cvttss2si(scratch, output);
+        masm.cmp32(output, Imm32(INT_MIN));
+        if (!bailoutIf(Assembler::Equal, lir->snapshot()))
+            return false;
+    } else {
+        Label negative, end;
+
+        // Branch to a slow path for negative inputs. Doesn't catch NaN or -0.
+        masm.xorps(scratch, scratch);
+        masm.branchFloat(Assembler::DoubleLessThan, input, scratch, &negative);
+
+        // Bail on negative-zero.
+        Assembler::Condition bailCond = masm.testNegativeZeroFloat32(input, output);
+        if (!bailoutIf(bailCond, lir->snapshot()))
+            return false;
+
+        // Input is non-negative, so truncation correctly rounds.
+        masm.cvttss2si(input, output);
+        masm.cmp32(output, Imm32(INT_MIN));
+        if (!bailoutIf(Assembler::Equal, lir->snapshot()))
+            return false;
+
+        masm.jump(&end);
+
+        // Input is negative, but isn't -0.
+        // Negative values go on a comparatively expensive path, since no
+        // native rounding mode matches JS semantics. Still better than callVM.
+        masm.bind(&negative);
+        {
+            // Truncate and round toward zero.
+            // This is off-by-one for everything but integer-valued inputs.
+            masm.cvttss2si(input, output);
+            masm.cmp32(output, Imm32(INT_MIN));
+            if (!bailoutIf(Assembler::Equal, lir->snapshot()))
+                return false;
+
+            // Test whether the input double was integer-valued.
+            masm.convertInt32ToFloat32(output, scratch);
+            masm.branchFloat(Assembler::DoubleEqualOrUnordered, input, scratch, &end);
+
+            // Input is not integer-valued, so we rounded off-by-one in the
+            // wrong direction. Correct by subtraction.
+            masm.subl(Imm32(1), output);
+            // Cannot overflow: output was already checked against INT_MIN.
+        }
+
+        masm.bind(&end);
+    }
+    return true;
+}
+
+bool
 CodeGeneratorX86Shared::visitRound(LRound *lir)
 {
     FloatRegister input = ToFloatRegister(lir->input());
     FloatRegister temp = ToFloatRegister(lir->temp());
     FloatRegister scratch = ScratchFloatReg;
     Register output = ToRegister(lir->output());
 
     Label negative, end;
--- a/js/src/jit/shared/CodeGenerator-x86-shared.h
+++ b/js/src/jit/shared/CodeGenerator-x86-shared.h
@@ -109,16 +109,17 @@ class CodeGeneratorX86Shared : public Co
     virtual bool visitCompareFAndBranch(LCompareFAndBranch *comp);
     virtual bool visitBitAndAndBranch(LBitAndAndBranch *baab);
     virtual bool visitNotI(LNotI *comp);
     virtual bool visitNotD(LNotD *comp);
     virtual bool visitNotF(LNotF *comp);
     virtual bool visitMathD(LMathD *math);
     virtual bool visitMathF(LMathF *math);
     virtual bool visitFloor(LFloor *lir);
+    virtual bool visitFloorF(LFloorF *lir);
     virtual bool visitRound(LRound *lir);
     virtual bool visitGuardShape(LGuardShape *guard);
     virtual bool visitGuardObjectType(LGuardObjectType *guard);
     virtual bool visitGuardClass(LGuardClass *guard);
     virtual bool visitEffectiveAddress(LEffectiveAddress *ins);
     virtual bool visitUDivOrMod(LUDivOrMod *ins);
     virtual bool visitAsmJSPassStackArg(LAsmJSPassStackArg *ins);
 
--- a/js/src/jit/x64/MacroAssembler-x64.cpp
+++ b/js/src/jit/x64/MacroAssembler-x64.cpp
@@ -359,8 +359,16 @@ MacroAssemblerX64::handleFailureWithHand
 
 Assembler::Condition
 MacroAssemblerX64::testNegativeZero(const FloatRegister &reg, const Register &scratch)
 {
     movq(reg, scratch);
     cmpq(scratch, Imm32(1));
     return Overflow;
 }
+
+Assembler::Condition
+MacroAssemblerX64::testNegativeZeroFloat32(const FloatRegister &reg, const Register &scratch)
+{
+    movd(reg, scratch);
+    cmpl(scratch, Imm32(1));
+    return Overflow;
+}
--- a/js/src/jit/x64/MacroAssembler-x64.h
+++ b/js/src/jit/x64/MacroAssembler-x64.h
@@ -76,17 +76,18 @@ class MacroAssemblerX64 : public MacroAs
     using MacroAssemblerX86Shared::call;
     using MacroAssemblerX86Shared::Push;
     using MacroAssemblerX86Shared::Pop;
     using MacroAssemblerX86Shared::callWithExitFrame;
     using MacroAssemblerX86Shared::branch32;
 
     enum Result {
         GENERAL,
-        DOUBLE
+        DOUBLE,
+        FLOAT
     };
 
     typedef MoveResolver::MoveOperand MoveOperand;
     typedef MoveResolver::Move Move;
 
     MacroAssemblerX64()
       : inCall_(false),
         enoughMemory_(true)
@@ -461,16 +462,17 @@ class MacroAssemblerX64 : public MacroAs
     void cmpPtr(const Register &lhs, const Register &rhs) {
         return cmpq(lhs, rhs);
     }
     void testPtr(const Register &lhs, const Register &rhs) {
         testq(lhs, rhs);
     }
 
     Condition testNegativeZero(const FloatRegister &reg, const Register &scratch);
+    Condition testNegativeZeroFloat32(const FloatRegister &reg, const Register &scratch);
 
     /////////////////////////////////////////////////////////////////
     // Common interface.
     /////////////////////////////////////////////////////////////////
     void reserveStack(uint32_t amount) {
         if (amount)
             subq(Imm32(amount), StackPointer);
         framePushed_ += amount;
--- a/js/src/jit/x86/MacroAssembler-x86.cpp
+++ b/js/src/jit/x86/MacroAssembler-x86.cpp
@@ -234,16 +234,22 @@ MacroAssemblerX86::callWithABIPost(uint3
 {
     freeStack(stackAdjust);
     if (result == DOUBLE) {
         reserveStack(sizeof(double));
         fstp(Operand(esp, 0));
         loadDouble(Operand(esp, 0), ReturnFloatReg);
         freeStack(sizeof(double));
     }
+    if (result == FLOAT) {
+        reserveStack(sizeof(float));
+        fstp32(Operand(esp, 0));
+        loadFloat(Operand(esp, 0), ReturnFloatReg);
+        freeStack(sizeof(float));
+    }
     if (dynamicAlignment_)
         pop(esp);
 
     JS_ASSERT(inCall_);
     inCall_ = false;
 }
 
 void
@@ -396,8 +402,16 @@ MacroAssemblerX86::testNegativeZero(cons
     // Input register is either zero or negative zero. Test sign bit.
     movmskpd(reg, scratch);
     // If reg is -0, then a test of Zero is true.
     cmpl(scratch, Imm32(1));
 
     bind(&nonZero);
     return Zero;
 }
+
+Assembler::Condition
+MacroAssemblerX86::testNegativeZeroFloat32(const FloatRegister &reg, const Register &scratch)
+{
+    movd(reg, scratch);
+    cmpl(scratch, Imm32(1));
+    return Overflow;
+}
--- a/js/src/jit/x86/MacroAssembler-x86.h
+++ b/js/src/jit/x86/MacroAssembler-x86.h
@@ -67,17 +67,18 @@ class MacroAssemblerX86 : public MacroAs
   public:
     using MacroAssemblerX86Shared::Push;
     using MacroAssemblerX86Shared::Pop;
     using MacroAssemblerX86Shared::callWithExitFrame;
     using MacroAssemblerX86Shared::branch32;
 
     enum Result {
         GENERAL,
-        DOUBLE
+        DOUBLE,
+        FLOAT
     };
 
     typedef MoveResolver::MoveOperand MoveOperand;
     typedef MoveResolver::Move Move;
 
     MacroAssemblerX86()
       : inCall_(false),
         enoughMemory_(true)
@@ -494,16 +495,17 @@ class MacroAssemblerX86 : public MacroAs
     void cmpPtr(Register lhs, Register rhs) {
         cmpl(lhs, rhs);
     }
     void testPtr(Register lhs, Register rhs) {
         testl(lhs, rhs);
     }
 
     Condition testNegativeZero(const FloatRegister &reg, const Register &scratch);
+    Condition testNegativeZeroFloat32(const FloatRegister &reg, const Register &scratch);
 
     /////////////////////////////////////////////////////////////////
     // Common interface.
     /////////////////////////////////////////////////////////////////
     void reserveStack(uint32_t amount) {
         if (amount)
             subl(Imm32(amount), StackPointer);
         framePushed_ += amount;