Bug 1135042: Inline SIMD loads in Ion; r=bhackett
authorBenjamin Bouvier <benj@benj.me>
Mon, 02 Mar 2015 12:11:19 +0100
changeset 260691 ddee53b10d77a87b6b553d1fe0a3de71b9448eb2
parent 260690 085b7d36e31d30904edf5742f420b8ab9eeba171
child 260692 3f54fe544025d1a49bbf51ca1f40761a2cbe3e5f
push id4718
push userraliiev@mozilla.com
push dateMon, 11 May 2015 18:39:53 +0000
treeherdermozilla-beta@c20c4ef55f08 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersbhackett
bugs1135042
milestone39.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1135042: Inline SIMD loads in Ion; r=bhackett
js/src/builtin/SIMD.h
js/src/jit-test/tests/SIMD/load.js
js/src/jit/CodeGenerator.cpp
js/src/jit/IonBuilder.h
js/src/jit/Lowering.cpp
js/src/jit/MCallOptimize.cpp
js/src/jit/MIR.h
js/src/jit/MacroAssembler.cpp
js/src/jit/RangeAnalysis.cpp
js/src/jit/arm/MacroAssembler-arm.h
js/src/jit/mips/MacroAssembler-mips.h
js/src/jit/shared/MacroAssembler-x86-shared.h
--- a/js/src/builtin/SIMD.h
+++ b/js/src/builtin/SIMD.h
@@ -237,26 +237,26 @@
     BITWISE_COMMONX4_SIMD_OP(_)      \
     WITH_COMMONX4_SIMD_OP(_)         \
     _(bitselect)                     \
     _(select)                        \
     _(splat)                         \
     _(not)                           \
     _(neg)                           \
     _(swizzle)                       \
+    _(load)                          \
+    _(store)                         \
     _(check)
 #define FOREACH_COMMONX4_SIMD_OP(_)  \
     ION_COMMONX4_SIMD_OP(_)          \
     COMP_COMMONX4_TO_INT32X4_SIMD_OP(_) \
     _(shuffle)                       \
-    _(load)                          \
     _(loadX)                         \
     _(loadXY)                        \
     _(loadXYZ)                       \
-    _(store)                         \
     _(storeX)                        \
     _(storeXY)                       \
     _(storeXYZ)
 #define FORALL_SIMD_OP(_)            \
     FOREACH_INT32X4_SIMD_OP(_)       \
     FOREACH_FLOAT32X4_SIMD_OP(_)     \
     FOREACH_COMMONX4_SIMD_OP(_)
 
new file mode 100644
--- /dev/null
+++ b/js/src/jit-test/tests/SIMD/load.js
@@ -0,0 +1,48 @@
+load(libdir + 'simd.js');
+
+setJitCompilerOption("ion.warmup.trigger", 40);
+
+function f() {
+    var f32 = new Float32Array(16);
+    for (var i = 0; i < 16; i++)
+        f32[i] = i + 1;
+
+    var f64 = new Float64Array(f32.buffer);
+    var i32 = new Int32Array(f32.buffer);
+    var u32 = new Uint32Array(f32.buffer);
+    var i16 = new Int16Array(f32.buffer);
+    var u16 = new Uint16Array(f32.buffer);
+    var i8  = new Int8Array(f32.buffer);
+    var u8  = new Uint8Array(f32.buffer);
+
+    var r;
+    for (var i = 0; i < 150; i++) {
+        assertEqX4(SIMD.float32x4.load(f64, 0), [1,2,3,4]);
+        assertEqX4(SIMD.float32x4.load(f32, 1), [2,3,4,5]);
+        assertEqX4(SIMD.float32x4.load(i32, 2), [3,4,5,6]);
+        assertEqX4(SIMD.float32x4.load(i16, 3 << 1), [4,5,6,7]);
+        assertEqX4(SIMD.float32x4.load(u16, 4 << 1), [5,6,7,8]);
+        assertEqX4(SIMD.float32x4.load(i8 , 5 << 2), [6,7,8,9]);
+        assertEqX4(SIMD.float32x4.load(u8 , 6 << 2), [7,8,9,10]);
+
+        assertEqX4(SIMD.float32x4.load(f64, (16 >> 1) - (4 >> 1)), [13,14,15,16]);
+        assertEqX4(SIMD.float32x4.load(f32, 16 - 4),               [13,14,15,16]);
+        assertEqX4(SIMD.float32x4.load(i32, 16 - 4),               [13,14,15,16]);
+        assertEqX4(SIMD.float32x4.load(i16, (16 << 1) - (4 << 1)), [13,14,15,16]);
+        assertEqX4(SIMD.float32x4.load(u16, (16 << 1) - (4 << 1)), [13,14,15,16]);
+        assertEqX4(SIMD.float32x4.load(i8,  (16 << 2) - (4 << 2)), [13,14,15,16]);
+        assertEqX4(SIMD.float32x4.load(u8,  (16 << 2) - (4 << 2)), [13,14,15,16]);
+
+        var caught = false;
+        try {
+            SIMD.float32x4.load(i8, (i < 149) ? 0 : (16 << 2) - (4 << 2) + 1);
+        } catch (e) {
+            caught = true;
+        }
+        assertEq(i < 149 || caught, true);
+    }
+    return r
+}
+
+f();
+
--- a/js/src/jit/CodeGenerator.cpp
+++ b/js/src/jit/CodeGenerator.cpp
@@ -8649,27 +8649,28 @@ CodeGenerator::visitUnboxObjectOrNull(LU
 void
 CodeGenerator::visitLoadTypedArrayElement(LLoadTypedArrayElement *lir)
 {
     Register elements = ToRegister(lir->elements());
     Register temp = lir->temp()->isBogusTemp() ? InvalidReg : ToRegister(lir->temp());
     AnyRegister out = ToAnyRegister(lir->output());
 
     Scalar::Type arrayType = lir->mir()->arrayType();
+    Scalar::Type readType  = lir->mir()->readType();
     int width = Scalar::byteSize(arrayType);
 
     Label fail;
     if (lir->index()->isConstant()) {
         Address source(elements, ToInt32(lir->index()) * width + lir->mir()->offsetAdjustment());
-        masm.loadFromTypedArray(arrayType, source, out, temp, &fail,
+        masm.loadFromTypedArray(readType, source, out, temp, &fail,
                                 lir->mir()->canonicalizeDoubles());
     } else {
         BaseIndex source(elements, ToRegister(lir->index()), ScaleFromElemWidth(width),
                          lir->mir()->offsetAdjustment());
-        masm.loadFromTypedArray(arrayType, source, out, temp, &fail,
+        masm.loadFromTypedArray(readType, source, out, temp, &fail,
                                 lir->mir()->canonicalizeDoubles());
     }
 
     if (fail.used())
         bailoutFrom(&fail, lir->snapshot());
 }
 
 void
--- a/js/src/jit/IonBuilder.h
+++ b/js/src/jit/IonBuilder.h
@@ -833,16 +833,17 @@ class IonBuilder
                                   SimdTypeDescr::Type type);
     InliningStatus inlineSimdSplat(CallInfo &callInfo, JSNative native, SimdTypeDescr::Type type);
     InliningStatus inlineSimdSwizzle(CallInfo &callInfo, JSNative native, SimdTypeDescr::Type type);
     InliningStatus inlineSimdCheck(CallInfo &callInfo, JSNative native, SimdTypeDescr::Type type);
     InliningStatus inlineSimdConvert(CallInfo &callInfo, JSNative native, bool isCast,
                                      SimdTypeDescr::Type from, SimdTypeDescr::Type to);
     InliningStatus inlineSimdSelect(CallInfo &callInfo, JSNative native, bool isElementWise,
                                     SimdTypeDescr::Type type);
+    InliningStatus inlineSimdLoad(CallInfo &callInfo, JSNative native, SimdTypeDescr::Type type);
 
     // Utility intrinsics.
     InliningStatus inlineIsCallable(CallInfo &callInfo);
     InliningStatus inlineIsObject(CallInfo &callInfo);
     InliningStatus inlineToObject(CallInfo &callInfo);
     InliningStatus inlineToInteger(CallInfo &callInfo);
     InliningStatus inlineToString(CallInfo &callInfo);
     InliningStatus inlineDump(CallInfo &callInfo);
--- a/js/src/jit/Lowering.cpp
+++ b/js/src/jit/Lowering.cpp
@@ -2883,17 +2883,18 @@ void
 LIRGenerator::visitLoadTypedArrayElement(MLoadTypedArrayElement *ins)
 {
     MOZ_ASSERT(IsValidElementsType(ins->elements(), ins->offsetAdjustment()));
     MOZ_ASSERT(ins->index()->type() == MIRType_Int32);
 
     const LUse elements = useRegister(ins->elements());
     const LAllocation index = useRegisterOrConstant(ins->index());
 
-    MOZ_ASSERT(IsNumberType(ins->type()) || ins->type() == MIRType_Boolean);
+    MOZ_ASSERT(IsNumberType(ins->type()) || IsSimdType(ins->type()) ||
+               ins->type() == MIRType_Boolean);
 
     // We need a temp register for Uint32Array with known double result.
     LDefinition tempDef = LDefinition::BogusTemp();
     if (ins->arrayType() == Scalar::Uint32 && IsFloatingPointType(ins->type()))
         tempDef = temp();
 
     if (ins->requiresMemoryBarrier()) {
         LMemoryBarrier *fence = new(alloc()) LMemoryBarrier(MembarBeforeLoad);
--- a/js/src/jit/MCallOptimize.cpp
+++ b/js/src/jit/MCallOptimize.cpp
@@ -349,16 +349,21 @@ IonBuilder::inlineNativeCall(CallInfo &c
     if (native == js::simd_float32x4_select)
         return inlineSimdSelect(callInfo, native, IsElementWise(true), SimdTypeDescr::TYPE_FLOAT32);
     if (native == js::simd_float32x4_bitselect)
         return inlineSimdSelect(callInfo, native, IsElementWise(false), SimdTypeDescr::TYPE_FLOAT32);
 
     if (native == js::simd_int32x4_swizzle)
         return inlineSimdSwizzle(callInfo, native, SimdTypeDescr::TYPE_INT32);
 
+    if (native == js::simd_int32x4_load)
+        return inlineSimdLoad(callInfo, native, SimdTypeDescr::TYPE_INT32);
+    if (native == js::simd_float32x4_load)
+        return inlineSimdLoad(callInfo, native, SimdTypeDescr::TYPE_FLOAT32);
+
     return InliningStatus_NotInlined;
 }
 
 IonBuilder::InliningStatus
 IonBuilder::inlineNativeGetter(CallInfo &callInfo, JSFunction *target)
 {
     MOZ_ASSERT(target->isNative());
     JSNative native = target->native();
@@ -3113,10 +3118,70 @@ IonBuilder::inlineSimdSwizzle(CallInfo &
     for (size_t i = 0; i < 4; i++)
         lanes[i] = callInfo.getArg(1 + i);
 
     MIRType mirType = SimdTypeDescrToMIRType(type);
     MSimdGeneralSwizzle *ins = MSimdGeneralSwizzle::New(alloc(), callInfo.getArg(0), lanes, mirType);
     return boxSimd(callInfo, ins, templateObj);
 }
 
+static Scalar::Type
+SimdTypeToScalarType(SimdTypeDescr::Type type)
+{
+    switch (type) {
+      case SimdTypeDescr::TYPE_FLOAT32: return Scalar::Float32x4;
+      case SimdTypeDescr::TYPE_INT32:   return Scalar::Int32x4;
+      case SimdTypeDescr::TYPE_FLOAT64: break;
+    }
+    MOZ_CRASH("unexpected simd type");
+}
+
+IonBuilder::InliningStatus
+IonBuilder::inlineSimdLoad(CallInfo &callInfo, JSNative native, SimdTypeDescr::Type type)
+{
+    InlineTypedObject *templateObj = nullptr;
+    if (!checkInlineSimd(callInfo, native, type, 2, &templateObj))
+        return InliningStatus_NotInlined;
+
+    MDefinition *array = callInfo.getArg(0);
+    MDefinition *index = callInfo.getArg(1);
+
+    Scalar::Type arrayType;
+    if (!ElementAccessIsAnyTypedArray(constraints(), array, index, &arrayType))
+        return InliningStatus_NotInlined;
+
+    MInstruction *indexAsInt32 = MToInt32::New(alloc(), index);
+    current->add(indexAsInt32);
+    index = indexAsInt32;
+
+    MDefinition *indexForBoundsCheck = index;
+
+    // Artificially make sure the index is in bounds by adding the difference
+    // number of slots needed (e.g. reading from Float32Array we need to make
+    // sure to be in bounds for 4 slots, so add 3, etc.).
+    MOZ_ASSERT(Simd128DataSize % Scalar::byteSize(arrayType) == 0);
+    int32_t suppSlotsNeeded = Simd128DataSize / Scalar::byteSize(arrayType) - 1;
+    if (suppSlotsNeeded) {
+        MConstant *suppSlots = constant(Int32Value(suppSlotsNeeded));
+        MAdd *addedIndex = MAdd::New(alloc(), index, suppSlots);
+        // Even if this addition overflows, we're fine because the code generated
+        // for the bounds check uses uint32 arithmetic
+        addedIndex->setInt32();
+        current->add(addedIndex);
+        indexForBoundsCheck = addedIndex;
+    }
+
+    MInstruction *length;
+    MInstruction *elements;
+    addTypedArrayLengthAndData(array, SkipBoundsCheck, &index, &length, &elements);
+
+    MInstruction *check = MBoundsCheck::New(alloc(), indexForBoundsCheck, length);
+    current->add(check);
+
+    MLoadTypedArrayElement *load = MLoadTypedArrayElement::New(alloc(), elements, index, arrayType);
+    load->setResultType(SimdTypeDescrToMIRType(type));
+    load->setReadType(SimdTypeToScalarType(type));
+
+    return boxSimd(callInfo, load, templateObj);
+}
+
 } // namespace jit
 } // namespace js
--- a/js/src/jit/MIR.h
+++ b/js/src/jit/MIR.h
@@ -8828,25 +8828,27 @@ enum MemoryBarrierRequirement
 
 // Also see comments above MMemoryBarrier, below.
 
 class MLoadTypedArrayElement
   : public MBinaryInstruction,
     public SingleObjectPolicy::Data
 {
     Scalar::Type arrayType_;
+    Scalar::Type readType_;
     bool requiresBarrier_;
     int32_t offsetAdjustment_;
     bool canonicalizeDoubles_;
 
     MLoadTypedArrayElement(MDefinition *elements, MDefinition *index,
                            Scalar::Type arrayType, MemoryBarrierRequirement requiresBarrier,
                            int32_t offsetAdjustment, bool canonicalizeDoubles)
       : MBinaryInstruction(elements, index),
         arrayType_(arrayType),
+        readType_(arrayType),
         requiresBarrier_(requiresBarrier == DoesRequireMemoryBarrier),
         offsetAdjustment_(offsetAdjustment),
         canonicalizeDoubles_(canonicalizeDoubles)
     {
         setResultType(MIRType_Value);
         if (requiresBarrier_)
             setGuard();         // Not removable or movable
         else
@@ -8865,16 +8867,23 @@ class MLoadTypedArrayElement
                                        int32_t offsetAdjustment = 0,
                                        bool canonicalizeDoubles = true)
     {
         return new(alloc) MLoadTypedArrayElement(elements, index, arrayType,
                                                  requiresBarrier, offsetAdjustment,
                                                  canonicalizeDoubles);
     }
 
+    void setReadType(Scalar::Type type) {
+        readType_ = type;
+    }
+    Scalar::Type readType() const {
+        return readType_;
+    }
+
     Scalar::Type arrayType() const {
         return arrayType_;
     }
     bool fallible() const {
         // Bailout if the result does not fit in an int32.
         return arrayType_ == Scalar::Uint32 && type() == MIRType_Int32;
     }
     bool requiresMemoryBarrier() const {
@@ -8903,16 +8912,18 @@ class MLoadTypedArrayElement
     bool congruentTo(const MDefinition *ins) const MOZ_OVERRIDE {
         if (requiresBarrier_)
             return false;
         if (!ins->isLoadTypedArrayElement())
             return false;
         const MLoadTypedArrayElement *other = ins->toLoadTypedArrayElement();
         if (arrayType_ != other->arrayType_)
             return false;
+        if (readType_ != other->readType_)
+            return false;
         if (offsetAdjustment() != other->offsetAdjustment())
             return false;
         if (canonicalizeDoubles() != other->canonicalizeDoubles())
             return false;
         return congruentIfOperandsEqual(other);
     }
 
     void printOpcode(FILE *fp) const MOZ_OVERRIDE;
--- a/js/src/jit/MacroAssembler.cpp
+++ b/js/src/jit/MacroAssembler.cpp
@@ -351,16 +351,22 @@ MacroAssembler::loadFromTypedArray(Scala
         loadFloat32(src, dest.fpu());
         canonicalizeFloat(dest.fpu());
         break;
       case Scalar::Float64:
         loadDouble(src, dest.fpu());
         if (canonicalizeDoubles)
             canonicalizeDouble(dest.fpu());
         break;
+      case Scalar::Int32x4:
+        loadUnalignedInt32x4(src, dest.fpu());
+        break;
+      case Scalar::Float32x4:
+        loadUnalignedFloat32x4(src, dest.fpu());
+        break;
       default:
         MOZ_CRASH("Invalid typed array type");
     }
 }
 
 template void MacroAssembler::loadFromTypedArray(Scalar::Type arrayType, const Address &src, AnyRegister dest,
                                                  Register temp, Label *fail, bool canonicalizeDoubles);
 template void MacroAssembler::loadFromTypedArray(Scalar::Type arrayType, const BaseIndex &src, AnyRegister dest,
--- a/js/src/jit/RangeAnalysis.cpp
+++ b/js/src/jit/RangeAnalysis.cpp
@@ -1643,17 +1643,18 @@ MToInt32::computeRange(TempAllocator &al
 
 void
 MLimitedTruncate::computeRange(TempAllocator &alloc)
 {
     Range *output = new(alloc) Range(input());
     setRange(output);
 }
 
-static Range *GetTypedArrayRange(TempAllocator &alloc, int type)
+static Range *
+GetTypedArrayRange(TempAllocator &alloc, Scalar::Type type)
 {
     switch (type) {
       case Scalar::Uint8Clamped:
       case Scalar::Uint8:
         return Range::NewUInt32Range(alloc, 0, UINT8_MAX);
       case Scalar::Uint16:
         return Range::NewUInt32Range(alloc, 0, UINT16_MAX);
       case Scalar::Uint32:
@@ -1663,28 +1664,30 @@ static Range *GetTypedArrayRange(TempAll
         return Range::NewInt32Range(alloc, INT8_MIN, INT8_MAX);
       case Scalar::Int16:
         return Range::NewInt32Range(alloc, INT16_MIN, INT16_MAX);
       case Scalar::Int32:
         return Range::NewInt32Range(alloc, INT32_MIN, INT32_MAX);
 
       case Scalar::Float32:
       case Scalar::Float64:
+      case Scalar::Float32x4:
+      case Scalar::Int32x4:
+      case Scalar::MaxTypedArrayViewType:
         break;
     }
-
-  return nullptr;
+    return nullptr;
 }
 
 void
 MLoadTypedArrayElement::computeRange(TempAllocator &alloc)
 {
     // We have an Int32 type and if this is a UInt32 load it may produce a value
     // outside of our range, but we have a bailout to handle those cases.
-    setRange(GetTypedArrayRange(alloc, arrayType()));
+    setRange(GetTypedArrayRange(alloc, readType()));
 }
 
 void
 MLoadTypedArrayElementStatic::computeRange(TempAllocator &alloc)
 {
     // We don't currently use MLoadTypedArrayElementStatic for uint32, so we
     // don't have to worry about it returning a value outside our type.
     MOZ_ASSERT(AnyTypedArrayType(someTypedArray_) != Scalar::Uint32);
--- a/js/src/jit/arm/MacroAssembler-arm.h
+++ b/js/src/jit/arm/MacroAssembler-arm.h
@@ -1393,21 +1393,23 @@ class MacroAssemblerARMCompat : public M
     void loadPtr(AbsoluteAddress address, Register dest);
     void loadPtr(AsmJSAbsoluteAddress address, Register dest);
 
     void loadPrivate(const Address &address, Register dest);
 
     void loadAlignedInt32x4(const Address &addr, FloatRegister dest) { MOZ_CRASH("NYI"); }
     void storeAlignedInt32x4(FloatRegister src, Address addr) { MOZ_CRASH("NYI"); }
     void loadUnalignedInt32x4(const Address &addr, FloatRegister dest) { MOZ_CRASH("NYI"); }
+    void loadUnalignedInt32x4(const BaseIndex &addr, FloatRegister dest) { MOZ_CRASH("NYI"); }
     void storeUnalignedInt32x4(FloatRegister src, Address addr) { MOZ_CRASH("NYI"); }
 
     void loadAlignedFloat32x4(const Address &addr, FloatRegister dest) { MOZ_CRASH("NYI"); }
     void storeAlignedFloat32x4(FloatRegister src, Address addr) { MOZ_CRASH("NYI"); }
     void loadUnalignedFloat32x4(const Address &addr, FloatRegister dest) { MOZ_CRASH("NYI"); }
+    void loadUnalignedFloat32x4(const BaseIndex &addr, FloatRegister dest) { MOZ_CRASH("NYI"); }
     void storeUnalignedFloat32x4(FloatRegister src, Address addr) { MOZ_CRASH("NYI"); }
 
     void loadDouble(const Address &addr, FloatRegister dest);
     void loadDouble(const BaseIndex &src, FloatRegister dest);
 
     // Load a float value into a register, then expand it to a double.
     void loadFloatAsDouble(const Address &addr, FloatRegister dest);
     void loadFloatAsDouble(const BaseIndex &src, FloatRegister dest);
--- a/js/src/jit/mips/MacroAssembler-mips.h
+++ b/js/src/jit/mips/MacroAssembler-mips.h
@@ -1254,21 +1254,23 @@ public:
     void loadPtr(AbsoluteAddress address, Register dest);
     void loadPtr(AsmJSAbsoluteAddress address, Register dest);
 
     void loadPrivate(const Address &address, Register dest);
 
     void loadAlignedInt32x4(const Address &addr, FloatRegister dest) { MOZ_CRASH("NYI"); }
     void storeAlignedInt32x4(FloatRegister src, Address addr) { MOZ_CRASH("NYI"); }
     void loadUnalignedInt32x4(const Address &addr, FloatRegister dest) { MOZ_CRASH("NYI"); }
+    void loadUnalignedInt32x4(const BaseIndex &addr, FloatRegister dest) { MOZ_CRASH("NYI"); }
     void storeUnalignedInt32x4(FloatRegister src, Address addr) { MOZ_CRASH("NYI"); }
 
     void loadAlignedFloat32x4(const Address &addr, FloatRegister dest) { MOZ_CRASH("NYI"); }
     void storeAlignedFloat32x4(FloatRegister src, Address addr) { MOZ_CRASH("NYI"); }
     void loadUnalignedFloat32x4(const Address &addr, FloatRegister dest) { MOZ_CRASH("NYI"); }
+    void loadUnalignedFloat32x4(const BaseIndex &addr, FloatRegister dest) { MOZ_CRASH("NYI"); }
     void storeUnalignedFloat32x4(FloatRegister src, Address addr) { MOZ_CRASH("NYI"); }
 
     void loadDouble(const Address &addr, FloatRegister dest);
     void loadDouble(const BaseIndex &src, FloatRegister dest);
 
     // Load a float value into a register, then expand it to a double.
     void loadFloatAsDouble(const Address &addr, FloatRegister dest);
     void loadFloatAsDouble(const BaseIndex &src, FloatRegister dest);
--- a/js/src/jit/shared/MacroAssembler-x86-shared.h
+++ b/js/src/jit/shared/MacroAssembler-x86-shared.h
@@ -925,16 +925,19 @@ class MacroAssemblerX86Shared : public A
         if (HasAVX() && src.kind() == Operand::FPREG)
             return FloatRegister::FromCode(src.fpu());
         loadAlignedInt32x4(src, dest);
         return dest;
     }
     void loadUnalignedInt32x4(const Address &src, FloatRegister dest) {
         vmovdqu(Operand(src), dest);
     }
+    void loadUnalignedInt32x4(const BaseIndex &src, FloatRegister dest) {
+        vmovdqu(Operand(src), dest);
+    }
     void loadUnalignedInt32x4(const Operand &src, FloatRegister dest) {
         vmovdqu(src, dest);
     }
     void storeUnalignedInt32x4(FloatRegister src, const Address &dest) {
         vmovdqu(src, Operand(dest));
     }
     void storeUnalignedInt32x4(FloatRegister src, const Operand &dest) {
         vmovdqu(src, dest);
@@ -1006,16 +1009,19 @@ class MacroAssemblerX86Shared : public A
         if (HasAVX() && src.kind() == Operand::FPREG)
             return FloatRegister::FromCode(src.fpu());
         loadAlignedFloat32x4(src, dest);
         return dest;
     }
     void loadUnalignedFloat32x4(const Address &src, FloatRegister dest) {
         vmovups(Operand(src), dest);
     }
+    void loadUnalignedFloat32x4(const BaseIndex &src, FloatRegister dest) {
+        vmovdqu(Operand(src), dest);
+    }
     void loadUnalignedFloat32x4(const Operand &src, FloatRegister dest) {
         vmovups(src, dest);
     }
     void storeUnalignedFloat32x4(FloatRegister src, const Address &dest) {
         vmovups(src, Operand(dest));
     }
     void storeUnalignedFloat32x4(FloatRegister src, const Operand &dest) {
         vmovups(src, dest);