Bug 1240796 - Implement Uint32x4 <==> Float32x4 conversions. r=sunfish
authorJakob Stoklund Olesen <jolesen@mozilla.com>
Mon, 01 Feb 2016 14:55:07 -0800
changeset 282642 564346366f94def3db4e03355263657274ec99d2
parent 282641 a3eee47dab19a77a251829c4bb0be1084b96d61d
child 282643 426fa86f579d918364e4f1fc74f6f5df5c8b4a59
push id17362
push usercbook@mozilla.com
push dateTue, 02 Feb 2016 10:54:53 +0000
treeherderfx-team@e5f1b4782e38 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerssunfish
bugs1240796
milestone47.0a1
Bug 1240796 - Implement Uint32x4 <==> Float32x4 conversions. r=sunfish The conversion from Uint32x4 to Float32x4 is not available as an SSE instruction, so we need to expand into a larger instruction sequence lifted from LLVM. Make this expansion early when generating MIR so that it can be exposed to LICM and GVN optimizations. The conversion from Float32x4 to Uint32x4 can throw a RangeError. It is handled similarly to LFloat32x4ToInt32x4. This expansion depends on the details of the cvttps2dq instruction that can't be expressed in MIR, so it can't be expanded early.
js/src/jit-test/tests/SIMD/uconvert.js
js/src/jit/Lowering.cpp
js/src/jit/MCallOptimize.cpp
js/src/jit/MIR.cpp
js/src/jit/MIR.h
js/src/jit/arm/Assembler-arm.h
js/src/jit/arm64/Assembler-arm64.h
js/src/jit/mips32/Assembler-mips32.h
js/src/jit/mips64/Assembler-mips64.h
js/src/jit/none/Architecture-none.h
js/src/jit/shared/LIR-shared.h
js/src/jit/shared/LOpcodes-shared.h
js/src/jit/x86-shared/Architecture-x86-shared.h
js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp
js/src/jit/x86-shared/CodeGenerator-x86-shared.h
new file mode 100644
--- /dev/null
+++ b/js/src/jit-test/tests/SIMD/uconvert.js
@@ -0,0 +1,81 @@
+load(libdir + 'simd.js');
+
+setJitCompilerOption("ion.warmup.trigger", 30);
+
+// Testing Uint32 <-> Float32 conversions.
+// These conversions deserve special attention because SSE doesn't provide
+// simple conversion instructions.
+
+// Convert an Uint32Array to a Float32Array using scalar conversions.
+function cvt_utof_scalar(u32s, f32s) {
+    assertEq(u32s.length, f32s.length);
+    for (var i = 0; i < u32s.length; i++) {
+        f32s[i] = u32s[i];
+    }
+}
+
+// Convert an Uint32Array to a Float32Array using simd conversions.
+function cvt_utof_simd(u32s, f32s) {
+    assertEq(u32s.length, f32s.length);
+    for (var i = 0; i < u32s.length; i += 4) {
+        SIMD.Float32x4.store(f32s, i, SIMD.Float32x4.fromUint32x4(SIMD.Uint32x4.load(u32s, i)));
+    }
+}
+
+// Convert a Float32Array to an Uint32Array using scalar conversions.
+function cvt_ftou_scalar(f32s, u32s) {
+    assertEq(f32s.length, u32s.length);
+    for (var i = 0; i < f32s.length; i++) {
+        u32s[i] = f32s[i];
+    }
+}
+
+// Convert a Float32Array to an Uint32Array using simd conversions.
+function cvt_ftou_simd(f32s, u32s) {
+    assertEq(f32s.length, u32s.length);
+    for (var i = 0; i < f32s.length; i += 4) {
+        SIMD.Uint32x4.store(u32s, i, SIMD.Uint32x4.fromFloat32x4(SIMD.Float32x4.load(f32s, i)));
+    }
+}
+
+function check(a, b) {
+    assertEq(a.length, b.length);
+    for (var i = 0; i < a.length; i++) {
+        assertEq(a[i], b[i]);
+    }
+}
+
+// Uint32x4 --> Float32x4 tests.
+var src = new Uint32Array(8000);
+var dst1 = new Float32Array(8000);
+var dst2 = new Float32Array(8000);
+
+for (var i = 0; i < 2000; i++) {
+    src[i] = i;
+    src[i + 2000] = 0x7fffffff - i;
+    src[i + 4000] = 0x80000000 + i;
+    src[i + 6000] = 0xffffffff - i;
+}
+
+for (var n = 0; n < 10; n++) {
+    cvt_utof_scalar(src, dst1);
+    cvt_utof_simd(src, dst2);
+    check(dst1, dst2);
+}
+
+// Float32x4 --> Uint32x4 tests.
+var fsrc = dst1;
+var fdst1 = new Uint32Array(8000);
+var fdst2 = new Uint32Array(8000);
+
+// The 0xffffffff entries in fsrc round to 0x1.0p32f which throws.
+// Go as high as 0x0.ffffffp32f.
+for (var i = 0; i < 2000; i++) {
+    fsrc[i + 6000] = 0xffffff7f - i;
+}
+
+for (var n = 0; n < 10; n++) {
+    cvt_ftou_scalar(fsrc, fdst1);
+    cvt_ftou_simd(fsrc, fdst2);
+    check(fdst1, fdst2);
+}
--- a/js/src/jit/Lowering.cpp
+++ b/js/src/jit/Lowering.cpp
@@ -4096,22 +4096,38 @@ LIRGenerator::visitSimdConstant(MSimdCon
 void
 LIRGenerator::visitSimdConvert(MSimdConvert* ins)
 {
     MOZ_ASSERT(IsSimdType(ins->type()));
     MDefinition* input = ins->input();
     LUse use = useRegister(input);
     if (ins->type() == MIRType_Int32x4) {
         MOZ_ASSERT(input->type() == MIRType_Float32x4);
-        LFloat32x4ToInt32x4* lir = new(alloc()) LFloat32x4ToInt32x4(use, temp());
-        if (!gen->compilingAsmJS())
-            assignSnapshot(lir, Bailout_BoundsCheck);
-        define(lir, ins);
+        switch (ins->signedness()) {
+          case SimdSign::Signed: {
+              LFloat32x4ToInt32x4* lir = new(alloc()) LFloat32x4ToInt32x4(use, temp());
+              if (!gen->compilingAsmJS())
+                  assignSnapshot(lir, Bailout_BoundsCheck);
+              define(lir, ins);
+              break;
+          }
+          case SimdSign::Unsigned: {
+              LFloat32x4ToUint32x4* lir =
+                new (alloc()) LFloat32x4ToUint32x4(use, temp(), temp(LDefinition::INT32X4));
+              if (!gen->compilingAsmJS())
+                  assignSnapshot(lir, Bailout_BoundsCheck);
+              define(lir, ins);
+              break;
+          }
+          default:
+            MOZ_CRASH("Unexpected SimdConvert sign");
+        }
     } else if (ins->type() == MIRType_Float32x4) {
         MOZ_ASSERT(input->type() == MIRType_Int32x4);
+        MOZ_ASSERT(ins->signedness() == SimdSign::Signed, "Unexpected SimdConvert sign");
         define(new(alloc()) LInt32x4ToFloat32x4(use), ins);
     } else {
         MOZ_CRASH("Unknown SIMD kind when generating constant");
     }
 }
 
 void
 LIRGenerator::visitSimdReinterpretCast(MSimdReinterpretCast* ins)
--- a/js/src/jit/MCallOptimize.cpp
+++ b/js/src/jit/MCallOptimize.cpp
@@ -3372,17 +3372,20 @@ IonBuilder::inlineSimdCheck(CallInfo& ca
     return InliningStatus_Inlined;
 }
 
 IonBuilder::InliningStatus
 IonBuilder::boxSimd(CallInfo& callInfo, MInstruction* ins, InlineTypedObject* templateObj)
 {
     MSimdBox* obj = MSimdBox::New(alloc(), constraints(), ins, templateObj,
                                   templateObj->group()->initialHeap(constraints()));
-    current->add(ins);
+
+    // In some cases, ins has already been added to current.
+    if (!ins->block())
+        current->add(ins);
     current->add(obj);
     current->push(obj);
 
     callInfo.setImplicitlyUsedUnchecked();
     return InliningStatus_Inlined;
 }
 
 template<typename T>
@@ -3520,26 +3523,25 @@ IonBuilder::inlineSimdReplaceLane(CallIn
 IonBuilder::InliningStatus
 IonBuilder::inlineSimdConvert(CallInfo& callInfo, JSNative native, bool isCast,
                               MIRType fromType, MIRType toType, SimdSign sign)
 {
     InlineTypedObject* templateObj = nullptr;
     if (!canInlineSimd(callInfo, native, 1, &templateObj))
         return InliningStatus_NotInlined;
 
-    // TODO JSO: Implement unsigned integer conversions.
-    if (sign == SimdSign::Unsigned)
-        return InliningStatus_NotInlined;
-
     // See comment in inlineSimdBinary
     MInstruction* ins;
     if (isCast)
+        // Signed/Unsigned doesn't matter for bitcasts.
         ins = MSimdReinterpretCast::New(alloc(), callInfo.getArg(0), fromType, toType);
     else
-        ins = MSimdConvert::New(alloc(), callInfo.getArg(0), fromType, toType);
+        // Possibly expand into multiple instructions.
+        ins = MSimdConvert::AddLegalized(alloc(), current, callInfo.getArg(0),
+                                         fromType, toType, sign);
 
     return boxSimd(callInfo, ins, templateObj);
 }
 
 IonBuilder::InliningStatus
 IonBuilder::inlineSimdSelect(CallInfo& callInfo, JSNative native, MIRType mirType)
 {
     InlineTypedObject* templateObj = nullptr;
--- a/js/src/jit/MIR.cpp
+++ b/js/src/jit/MIR.cpp
@@ -1032,16 +1032,115 @@ MSimdGeneralShuffle::foldsTo(TempAllocat
 
     if (numVectors() == 1)
         return MSimdSwizzle::New(alloc, vector(0), type(), lanes[0], lanes[1], lanes[2], lanes[3]);
 
     MOZ_ASSERT(numVectors() == 2);
     return MSimdShuffle::New(alloc, vector(0), vector(1), type(), lanes[0], lanes[1], lanes[2], lanes[3]);
 }
 
+MInstruction*
+MSimdConvert::AddLegalized(TempAllocator& alloc, MBasicBlock* addTo, MDefinition* obj,
+                           MIRType fromType, MIRType toType, SimdSign sign)
+{
+    if (SupportsUint32x4FloatConversions || sign != SimdSign::Unsigned) {
+        MInstruction* ins = New(alloc, obj, fromType, toType, sign);
+        addTo->add(ins);
+        return ins;
+    }
+
+    // This architecture can't do Uint32x4 <-> Float32x4 conversions (Hi SSE!)
+    MOZ_ASSERT(sign == SimdSign::Unsigned);
+    if (fromType == MIRType_Int32x4 && toType == MIRType_Float32x4) {
+        // Converting Uint32x4 -> Float32x4. This algorithm is from LLVM.
+        //
+        // Split the input number into high and low parts:
+        //
+        // uint32_t hi = x >> 16;
+        // uint32_t lo = x & 0xffff;
+        //
+        // Insert these parts as the low mantissa bits in a float32 number with
+        // the corresponding exponent:
+        //
+        // float fhi = (bits-as-float)(hi | 0x53000000); // 0x1.0p39f + hi*2^16
+        // float flo = (bits-as-float)(lo | 0x4b000000); // 0x1.0p23f + lo
+        //
+        // Subtract the bias from the hi part:
+        //
+        // fhi -= (0x1.0p39 + 0x1.0p23) // hi*2^16 - 0x1.0p23
+        //
+        // And finally combine:
+        //
+        // result = flo + fhi // lo + hi*2^16.
+
+        // Compute hi = obj >> 16 (lane-wise unsigned shift).
+        MInstruction* c16 = MConstant::New(alloc, Int32Value(16));
+        addTo->add(c16);
+        MInstruction* hi = MSimdShift::New(alloc, obj, c16, MSimdShift::ursh, MIRType_Int32x4);
+        addTo->add(hi);
+
+        // Compute lo = obj & 0xffff (lane-wise).
+        MInstruction* m16 =
+          MSimdConstant::New(alloc, SimdConstant::SplatX4(0xffff), MIRType_Int32x4);
+        addTo->add(m16);
+        MInstruction* lo =
+          MSimdBinaryBitwise::New(alloc, obj, m16, MSimdBinaryBitwise::and_, MIRType_Int32x4);
+        addTo->add(lo);
+
+        // Mix in the exponents.
+        MInstruction* exphi =
+          MSimdConstant::New(alloc, SimdConstant::SplatX4(0x53000000), MIRType_Int32x4);
+        addTo->add(exphi);
+        MInstruction* mhi =
+          MSimdBinaryBitwise::New(alloc, hi, exphi, MSimdBinaryBitwise::or_, MIRType_Int32x4);
+        addTo->add(mhi);
+        MInstruction* explo =
+          MSimdConstant::New(alloc, SimdConstant::SplatX4(0x4b000000), MIRType_Int32x4);
+        addTo->add(explo);
+        MInstruction* mlo =
+          MSimdBinaryBitwise::New(alloc, lo, explo, MSimdBinaryBitwise::or_, MIRType_Int32x4);
+        addTo->add(mlo);
+
+        // Bit-cast both to Float32x4.
+        MInstruction* fhi =
+          MSimdReinterpretCast::New(alloc, mhi, MIRType_Int32x4, MIRType_Float32x4);
+        addTo->add(fhi);
+        MInstruction* flo =
+          MSimdReinterpretCast::New(alloc, mlo, MIRType_Int32x4, MIRType_Float32x4);
+        addTo->add(flo);
+
+        // Subtract out the bias: 0x1.0p39f + 0x1.0p23f.
+        // MSVC doesn't support the hexadecimal float syntax.
+        const float BiasValue = 549755813888.f + 8388608.f;
+        MInstruction* bias =
+          MSimdConstant::New(alloc, SimdConstant::SplatX4(BiasValue), MIRType_Float32x4);
+        addTo->add(bias);
+        MInstruction* fhi_debiased =
+          MSimdBinaryArith::New(alloc, fhi, bias, MSimdBinaryArith::Op_sub, MIRType_Float32x4);
+        addTo->add(fhi_debiased);
+
+        // Compute the final result.
+        MInstruction* result = MSimdBinaryArith::New(alloc, fhi_debiased, flo,
+                                                     MSimdBinaryArith::Op_add, MIRType_Float32x4);
+        addTo->add(result);
+
+        return result;
+    }
+
+    if (fromType == MIRType_Float32x4 && toType == MIRType_Int32x4) {
+        // The Float32x4 -> Uint32x4 conversion can throw if the input is out of
+        // range. This is handled by the LFloat32x4ToUint32x4 expansion.
+        MInstruction* ins = New(alloc, obj, fromType, toType, sign);
+        addTo->add(ins);
+        return ins;
+    }
+
+    MOZ_CRASH("Unhandled SIMD type conversion");
+}
+
 template <typename T>
 static void
 PrintOpcodeOperation(T* mir, GenericPrinter& out)
 {
     mir->MDefinition::printOpcode(out);
     out.printf(" (%s)", T::OperationName(mir->operation()));
 }
 
--- a/js/src/jit/MIR.h
+++ b/js/src/jit/MIR.h
@@ -1537,50 +1537,73 @@ class MSimdConstant
     ALLOW_CLONE(MSimdConstant)
 };
 
 // Converts all lanes of a given vector into the type of another vector
 class MSimdConvert
   : public MUnaryInstruction,
     public SimdPolicy<0>::Data
 {
-    MSimdConvert(MDefinition* obj, MIRType fromType, MIRType toType)
-      : MUnaryInstruction(obj)
+    // When either fromType or toType is an integer vector, should it be treated
+    // as signed or unsigned. Note that we don't support int-int conversions -
+    // use MSimdReinterpretCast for that.
+    SimdSign sign_;
+
+    MSimdConvert(MDefinition* obj, MIRType fromType, MIRType toType, SimdSign sign)
+      : MUnaryInstruction(obj), sign_(sign)
     {
         MOZ_ASSERT(IsSimdType(toType));
+        // All conversions are int <-> float, so signedness is required.
+        MOZ_ASSERT(sign != SimdSign::NotApplicable);
+
         setResultType(toType);
         specialization_ = fromType; // expects fromType as input
 
         setMovable();
         if (IsFloatingPointSimdType(fromType) && IsIntegerSimdType(toType)) {
             // Does the extra range check => do not remove
             setGuard();
         }
     }
 
   public:
     INSTRUCTION_HEADER(SimdConvert)
     static MSimdConvert* NewAsmJS(TempAllocator& alloc, MDefinition* obj, MIRType fromType,
                                   MIRType toType)
     {
         MOZ_ASSERT(IsSimdType(obj->type()) && fromType == obj->type());
-        return new(alloc) MSimdConvert(obj, fromType, toType);
+        // AsmJS only has signed integer vectors for now.
+        return new(alloc) MSimdConvert(obj, fromType, toType, SimdSign::Signed);
     }
 
     static MSimdConvert* New(TempAllocator& alloc, MDefinition* obj, MIRType fromType,
-                             MIRType toType)
-    {
-        return new(alloc) MSimdConvert(obj, fromType, toType);
-    }
-
-    AliasSet getAliasSet() const override {
-        return AliasSet::None();
-    }
-    bool congruentTo(const MDefinition* ins) const override {
-        return congruentIfOperandsEqual(ins);
+                             MIRType toType, SimdSign sign)
+    {
+        return new(alloc) MSimdConvert(obj, fromType, toType, sign);
+    }
+
+    // Create a MSimdConvert instruction and add it to the basic block.
+    // Possibly create and add an equivalent sequence of instructions instead if
+    // the current target doesn't support the requested conversion directly.
+    // Return the inserted MInstruction that computes the converted value.
+    static MInstruction* AddLegalized(TempAllocator& alloc, MBasicBlock* addTo, MDefinition* obj,
+                                      MIRType fromType, MIRType toType, SimdSign sign);
+
+    SimdSign signedness() const {
+        return sign_;
+    }
+
+    AliasSet getAliasSet() const override {
+        return AliasSet::None();
+    }
+    bool congruentTo(const MDefinition* ins) const override {
+        if (!congruentIfOperandsEqual(ins))
+            return false;
+        const MSimdConvert* other = ins->toSimdConvert();
+        return sign_ == other->sign_;
     }
     ALLOW_CLONE(MSimdConvert)
 };
 
 // Casts bits of a vector input to another SIMD type (doesn't generate code).
 class MSimdReinterpretCast
   : public MUnaryInstruction,
     public SimdPolicy<0>::Data
--- a/js/src/jit/arm/Assembler-arm.h
+++ b/js/src/jit/arm/Assembler-arm.h
@@ -215,16 +215,19 @@ static_assert(CodeAlignment % SimdMemory
   "alignment for SIMD constants.");
 
 static_assert(JitStackAlignment % SimdMemoryAlignment == 0,
   "Stack alignment should be larger than any of the alignments which are used for "
   "spilled values.  Thus it should be larger than the alignment for SIMD accesses.");
 
 static const uint32_t AsmJSStackAlignment = SimdMemoryAlignment;
 
+// Does this architecture support SIMD conversions between Uint32x4 and Float32x4?
+static MOZ_CONSTEXPR_VAR bool SupportsUint32x4FloatConversions = false;
+
 static const Scale ScalePointer = TimesFour;
 
 class Instruction;
 class InstBranchImm;
 uint32_t RM(Register r);
 uint32_t RS(Register r);
 uint32_t RD(Register r);
 uint32_t RT(Register r);
--- a/js/src/jit/arm64/Assembler-arm64.h
+++ b/js/src/jit/arm64/Assembler-arm64.h
@@ -171,16 +171,19 @@ static constexpr uint32_t SimdMemoryAlig
 static_assert(CodeAlignment % SimdMemoryAlignment == 0,
   "Code alignment should be larger than any of the alignments which are used for "
   "the constant sections of the code buffer.  Thus it should be larger than the "
   "alignment for SIMD constants.");
 
 static const uint32_t AsmJSStackAlignment = SimdMemoryAlignment;
 static const int32_t AsmJSGlobalRegBias = 1024;
 
+// Does this architecture support SIMD conversions between Uint32x4 and Float32x4?
+static MOZ_CONSTEXPR_VAR bool SupportsUint32x4FloatConversions = false;
+
 class Assembler : public vixl::Assembler
 {
   public:
     Assembler()
       : vixl::Assembler()
     { }
 
     typedef vixl::Condition Condition;
--- a/js/src/jit/mips32/Assembler-mips32.h
+++ b/js/src/jit/mips32/Assembler-mips32.h
@@ -93,16 +93,19 @@ static_assert(JitStackAlignment % sizeof
   "Stack alignment should be a non-zero multiple of sizeof(Value)");
 
 // TODO this is just a filler to prevent a build failure. The MIPS SIMD
 // alignment requirements still need to be explored.
 // TODO Copy the static_asserts from x64/x86 assembler files.
 static MOZ_CONSTEXPR_VAR uint32_t SimdMemoryAlignment = 8;
 static MOZ_CONSTEXPR_VAR uint32_t AsmJSStackAlignment = SimdMemoryAlignment;
 
+// Does this architecture support SIMD conversions between Uint32x4 and Float32x4?
+static MOZ_CONSTEXPR_VAR bool SupportsUint32x4FloatConversions = false;
+
 static MOZ_CONSTEXPR_VAR Scale ScalePointer = TimesFour;
 
 class Assembler : public AssemblerMIPSShared
 {
   public:
     Assembler()
       : AssemblerMIPSShared()
     { }
--- a/js/src/jit/mips64/Assembler-mips64.h
+++ b/js/src/jit/mips64/Assembler-mips64.h
@@ -104,16 +104,19 @@ static_assert(JitStackAlignment % sizeof
 
 // TODO this is just a filler to prevent a build failure. The MIPS SIMD
 // alignment requirements still need to be explored.
 // TODO Copy the static_asserts from x64/x86 assembler files.
 static MOZ_CONSTEXPR_VAR uint32_t SimdMemoryAlignment = 16;
 
 static MOZ_CONSTEXPR_VAR uint32_t AsmJSStackAlignment = SimdMemoryAlignment;
 
+// Does this architecture support SIMD conversions between Uint32x4 and Float32x4?
+static MOZ_CONSTEXPR_VAR bool SupportsUint32x4FloatConversions = false;
+
 static MOZ_CONSTEXPR_VAR Scale ScalePointer = TimesEight;
 
 class Assembler : public AssemblerMIPSShared
 {
   public:
     Assembler()
       : AssemblerMIPSShared()
     { }
--- a/js/src/jit/none/Architecture-none.h
+++ b/js/src/jit/none/Architecture-none.h
@@ -13,16 +13,19 @@
 
 namespace js {
 namespace jit {
 
 static const bool SupportsSimd = false;
 static const uint32_t SimdMemoryAlignment = 4; // Make it 4 to avoid a bunch of div-by-zero warnings
 static const uint32_t AsmJSStackAlignment = 8;
 
+// Does this architecture support SIMD conversions between Uint32x4 and Float32x4?
+static MOZ_CONSTEXPR_VAR bool SupportsUint32x4FloatConversions = false;
+
 class Registers
 {
   public:
     enum RegisterID {
         r0 = 0,
         invalid_reg
     };
     typedef uint8_t Code;
--- a/js/src/jit/shared/LIR-shared.h
+++ b/js/src/jit/shared/LIR-shared.h
@@ -3801,16 +3801,39 @@ class LFloat32x4ToInt32x4 : public LInst
     const LDefinition* temp() {
         return getTemp(0);
     }
     const MSimdConvert* mir() const {
         return mir_->toSimdConvert();
     }
 };
 
+// Float32x4 to Uint32x4 needs one GPR temp and one FloatReg temp.
+class LFloat32x4ToUint32x4 : public LInstructionHelper<1, 1, 2>
+{
+  public:
+    LIR_HEADER(Float32x4ToUint32x4);
+    explicit LFloat32x4ToUint32x4(const LAllocation& input, const LDefinition& tempR,
+                                  const LDefinition& tempF)
+    {
+        setOperand(0, input);
+        setTemp(0, tempR);
+        setTemp(1, tempF);
+    }
+    const LDefinition* tempR() {
+        return getTemp(0);
+    }
+    const LDefinition* tempF() {
+        return getTemp(1);
+    }
+    const MSimdConvert* mir() const {
+        return mir_->toSimdConvert();
+    }
+};
+
 // Double raised to a half power.
 class LPowHalfD : public LInstructionHelper<1, 1, 0>
 {
   public:
     LIR_HEADER(PowHalfD);
     explicit LPowHalfD(const LAllocation& input) {
         setOperand(0, input);
     }
--- a/js/src/jit/shared/LOpcodes-shared.h
+++ b/js/src/jit/shared/LOpcodes-shared.h
@@ -180,16 +180,17 @@
     _(TruncateFToInt32)             \
     _(BooleanToString)              \
     _(IntToString)                  \
     _(DoubleToString)               \
     _(ValueToString)                \
     _(ValueToObjectOrNull)          \
     _(Int32x4ToFloat32x4)           \
     _(Float32x4ToInt32x4)           \
+    _(Float32x4ToUint32x4)          \
     _(Start)                        \
     _(OsrEntry)                     \
     _(OsrValue)                     \
     _(OsrScopeChain)                \
     _(OsrReturnValue)               \
     _(OsrArgumentsObject)           \
     _(RegExp)                       \
     _(RegExpMatcher)                \
--- a/js/src/jit/x86-shared/Architecture-x86-shared.h
+++ b/js/src/jit/x86-shared/Architecture-x86-shared.h
@@ -15,16 +15,19 @@
 
 #include <string.h>
 
 #include "jit/x86-shared/Constants-x86-shared.h"
 
 namespace js {
 namespace jit {
 
+// Does this architecture support SIMD conversions between Uint32x4 and Float32x4?
+static const bool SupportsUint32x4FloatConversions = false;
+
 #if defined(JS_CODEGEN_X86)
 // In bytes: slots needed for potential memory->memory move spills.
 //   +8 for cycles
 //   +4 for gpr spills
 //   +8 for double spills
 static const uint32_t ION_FRAME_SLACK_SIZE    = 20;
 
 #elif defined(JS_CODEGEN_X64)
--- a/js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp
+++ b/js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp
@@ -2301,16 +2301,101 @@ CodeGeneratorX86Shared::visitOutOfLineSi
     masm.jump(ool->rejoin());
 
     if (bail.used()) {
         masm.bind(&bail);
         bailout(ool->ins()->snapshot());
     }
 }
 
+// Convert Float32x4 to Uint32x4.
+//
+// If any input lane value is out of range or NaN, bail out.
+void
+CodeGeneratorX86Shared::visitFloat32x4ToUint32x4(LFloat32x4ToUint32x4* ins)
+{
+    FloatRegister in = ToFloatRegister(ins->input());
+    FloatRegister out = ToFloatRegister(ins->output());
+    Register temp = ToRegister(ins->tempR());
+    FloatRegister tempF = ToFloatRegister(ins->tempF());
+
+    // Classify lane values into 4 disjoint classes:
+    //
+    //   N-lanes:             in < -0.0
+    //   A-lanes:     -0.0 <= in <= 0x0.ffffffp31
+    //   B-lanes: 0x1.0p31 <= in <= 0x0.ffffffp32
+    //   V-lanes: 0x1.0p32 <= in, or isnan(in)
+    //
+    // We need to bail out to throw a RangeError if we see any N-lanes or
+    // V-lanes.
+    //
+    // For A-lanes and B-lanes, we make two float -> int32 conversions:
+    //
+    //   A = cvttps2dq(in)
+    //   B = cvttps2dq(in - 0x1.0p31f)
+    //
+    // Note that the subtraction for the B computation is exact for B-lanes.
+    // There is no rounding, so B is the low 31 bits of the correctly converted
+    // result.
+    //
+    // The cvttps2dq instruction produces 0x80000000 when the input is NaN or
+    // out of range for a signed int32_t. This conveniently provides the missing
+    // high bit for B, so the desired result is A for A-lanes and A|B for
+    // B-lanes.
+
+    ScratchSimd128Scope scratch(masm);
+
+    // First we need to filter out N-lanes. We need to use a floating point
+    // comparison to do that because cvttps2dq maps the negative range
+    // [-0x0.ffffffp0;-0.0] to 0. We can't simply look at the sign bits of in
+    // because -0.0 is a valid input.
+    // TODO: It may be faster to let ool code deal with -0.0 and skip the
+    // vcmpleps here.
+    masm.zeroFloat32x4(scratch);
+    masm.vcmpleps(Operand(in), scratch, scratch);
+    masm.vmovmskps(scratch, temp);
+    masm.cmp32(temp, Imm32(15));
+    bailoutIf(Assembler::NotEqual, ins->snapshot());
+
+    // TODO: If the majority of lanes are A-lanes, it could be faster to compute
+    // A first, use vmovmskps to check for any non-A-lanes and handle them in
+    // ool code. OTOH, we we're wrong about the lane distribution, that would be
+    // slower.
+
+    // Compute B in |scratch|.
+    static const float Adjust = 0x80000000; // 0x1.0p31f for the benefit of MSVC.
+    static const SimdConstant Bias = SimdConstant::SplatX4(-Adjust);
+    masm.loadConstantFloat32x4(Bias, scratch);
+    masm.packedAddFloat32(Operand(in), scratch);
+    masm.convertFloat32x4ToInt32x4(scratch, scratch);
+
+    // Compute A in |out|. This is the last time we use |in| and the first time
+    // we use |out|, so we can tolerate if they are the same register.
+    masm.convertFloat32x4ToInt32x4(in, out);
+
+    // Since we filtered out N-lanes, we can identify A-lanes by the sign bits
+    // in A: Any A-lanes will be positive in A, and B-lanes and V-lanes will be
+    // 0x80000000 in A. Compute a mask of non-A-lanes into |tempF|.
+    masm.zeroFloat32x4(tempF);
+    masm.packedGreaterThanInt32x4(Operand(out), tempF);
+
+    // Clear the A-lanes in B.
+    masm.bitwiseAndX4(Operand(tempF), scratch);
+
+    // Compute the final result: A for A-lanes, A|B for B-lanes.
+    masm.bitwiseOrX4(Operand(scratch), out);
+
+    // We still need to filter out the V-lanes. They would show up as 0x80000000
+    // in both A and B. Since we cleared the valid A-lanes in B, the V-lanes are
+    // the remaining negative lanes in B.
+    masm.vmovmskps(scratch, temp);
+    masm.cmp32(temp, Imm32(0));
+    bailoutIf(Assembler::NotEqual, ins->snapshot());
+}
+
 void
 CodeGeneratorX86Shared::visitSimdValueInt32x4(LSimdValueInt32x4* ins)
 {
     MOZ_ASSERT(ins->mir()->type() == MIRType_Int32x4 || ins->mir()->type() == MIRType_Bool32x4);
 
     FloatRegister output = ToFloatRegister(ins->output());
     if (AssemblerX86Shared::HasSSE41()) {
         masm.vmovd(ToRegister(ins->getOperand(0)), output);
--- a/js/src/jit/x86-shared/CodeGenerator-x86-shared.h
+++ b/js/src/jit/x86-shared/CodeGenerator-x86-shared.h
@@ -256,16 +256,17 @@ class CodeGeneratorX86Shared : public Co
     // SIMD operators
     void visitSimdValueInt32x4(LSimdValueInt32x4* lir);
     void visitSimdValueFloat32x4(LSimdValueFloat32x4* lir);
     void visitSimdSplatX4(LSimdSplatX4* lir);
     void visitInt32x4(LInt32x4* ins);
     void visitFloat32x4(LFloat32x4* ins);
     void visitInt32x4ToFloat32x4(LInt32x4ToFloat32x4* ins);
     void visitFloat32x4ToInt32x4(LFloat32x4ToInt32x4* ins);
+    void visitFloat32x4ToUint32x4(LFloat32x4ToUint32x4* ins);
     void visitSimdReinterpretCast(LSimdReinterpretCast* lir);
     void visitSimdExtractElementB(LSimdExtractElementB* lir);
     void visitSimdExtractElementI(LSimdExtractElementI* lir);
     void visitSimdExtractElementF(LSimdExtractElementF* lir);
     void visitSimdInsertElementI(LSimdInsertElementI* lir);
     void visitSimdInsertElementF(LSimdInsertElementF* lir);
     void visitSimdSwizzleI(LSimdSwizzleI* lir);
     void visitSimdSwizzleF(LSimdSwizzleF* lir);