Bug 1135039: Implement general SIMD swizzle and use it; r=sunfish
authorBenjamin Bouvier <benj@benj.me>
Fri, 27 Feb 2015 09:35:03 +0100
changeset 231528 b5cac236faad28d0a4c971e7c2f4a3499336ecae
parent 231527 376b134c1495543d5a57fc70de38d74e00629797
child 231529 756a0e4a9f48c974dadb68ac2b6eaa704ba1c325
push id56290
push userbenj@benj.me
push dateTue, 03 Mar 2015 10:15:52 +0000
treeherdermozilla-inbound@b7b0305f5747 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerssunfish
bugs1135039
milestone39.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1135039: Implement general SIMD swizzle and use it; r=sunfish
js/src/builtin/SIMD.h
js/src/jit/IonBuilder.h
js/src/jit/LIR-Common.h
js/src/jit/LOpcodes.h
js/src/jit/Lowering.cpp
js/src/jit/Lowering.h
js/src/jit/MCallOptimize.cpp
js/src/jit/MIR.cpp
js/src/jit/MIR.h
js/src/jit/MOpcodes.h
js/src/jit/TypePolicy.cpp
js/src/jit/TypePolicy.h
js/src/jit/arm/CodeGenerator-arm.h
js/src/jit/mips/CodeGenerator-mips.h
js/src/jit/shared/CodeGenerator-x86-shared.cpp
js/src/jit/shared/CodeGenerator-x86-shared.h
--- a/js/src/builtin/SIMD.h
+++ b/js/src/builtin/SIMD.h
@@ -236,21 +236,21 @@
     ARITH_COMMONX4_SIMD_OP(_)        \
     BITWISE_COMMONX4_SIMD_OP(_)      \
     WITH_COMMONX4_SIMD_OP(_)         \
     _(bitselect)                     \
     _(select)                        \
     _(splat)                         \
     _(not)                           \
     _(neg)                           \
+    _(swizzle)                       \
     _(check)
 #define FOREACH_COMMONX4_SIMD_OP(_)  \
     ION_COMMONX4_SIMD_OP(_)          \
     COMP_COMMONX4_TO_INT32X4_SIMD_OP(_) \
-    _(swizzle)                       \
     _(shuffle)                       \
     _(load)                          \
     _(loadX)                         \
     _(loadXY)                        \
     _(loadXYZ)                       \
     _(store)                         \
     _(storeX)                        \
     _(storeXY)                       \
--- a/js/src/jit/IonBuilder.h
+++ b/js/src/jit/IonBuilder.h
@@ -823,16 +823,17 @@ class IonBuilder
                                     typename T::Operation op, SimdTypeDescr::Type type);
     InliningStatus inlineCompSimd(CallInfo &callInfo, JSNative native,
                                   MSimdBinaryComp::Operation op, SimdTypeDescr::Type compType);
     InliningStatus inlineUnarySimd(CallInfo &callInfo, JSNative native,
                                    MSimdUnaryArith::Operation op, SimdTypeDescr::Type type);
     InliningStatus inlineSimdWith(CallInfo &callInfo, JSNative native, SimdLane lane,
                                   SimdTypeDescr::Type type);
     InliningStatus inlineSimdSplat(CallInfo &callInfo, JSNative native, SimdTypeDescr::Type type);
+    InliningStatus inlineSimdSwizzle(CallInfo &callInfo, JSNative native, SimdTypeDescr::Type type);
     InliningStatus inlineSimdCheck(CallInfo &callInfo, JSNative native, SimdTypeDescr::Type type);
     InliningStatus inlineSimdConvert(CallInfo &callInfo, JSNative native, bool isCast,
                                      SimdTypeDescr::Type from, SimdTypeDescr::Type to);
     InliningStatus inlineSimdSelect(CallInfo &callInfo, JSNative native, bool isElementWise,
                                     SimdTypeDescr::Type type);
 
     // Utility intrinsics.
     InliningStatus inlineIsCallable(CallInfo &callInfo);
--- a/js/src/jit/LIR-Common.h
+++ b/js/src/jit/LIR-Common.h
@@ -369,16 +369,59 @@ class LSimdSwizzleI : public LSimdSwizzl
 class LSimdSwizzleF : public LSimdSwizzleBase
 {
   public:
     LIR_HEADER(SimdSwizzleF);
     explicit LSimdSwizzleF(const LAllocation &base) : LSimdSwizzleBase(base)
     {}
 };
 
+class LSimdGeneralSwizzleBase : public LInstructionHelper<1, 5, 1>
+{
+  public:
+    LSimdGeneralSwizzleBase(const LAllocation &base, const LAllocation lanes[4],
+                            const LDefinition &temp)
+    {
+        setOperand(0, base);
+        for (size_t i = 0; i < 4; i++)
+            setOperand(1 + i, lanes[i]);
+        setTemp(0, temp);
+    }
+
+    const LAllocation *base() {
+        return getOperand(0);
+    }
+    const LAllocation *lane(size_t i) {
+        return getOperand(1 + i);
+    }
+    const LDefinition *temp() {
+        return getTemp(0);
+    }
+};
+
+class LSimdGeneralSwizzleI : public LSimdGeneralSwizzleBase
+{
+  public:
+    LIR_HEADER(SimdGeneralSwizzleI);
+    LSimdGeneralSwizzleI(const LAllocation &base, const LAllocation lanes[4],
+                         const LDefinition &temp)
+      : LSimdGeneralSwizzleBase(base, lanes, temp)
+    {}
+};
+
+class LSimdGeneralSwizzleF : public LSimdGeneralSwizzleBase
+{
+  public:
+    LIR_HEADER(SimdGeneralSwizzleF);
+    LSimdGeneralSwizzleF(const LAllocation &base, const LAllocation lanes[4],
+                         const LDefinition &temp)
+      : LSimdGeneralSwizzleBase(base, lanes, temp)
+    {}
+};
+
 // Base class for both int32x4 and float32x4 shuffle instructions.
 class LSimdShuffle : public LInstructionHelper<1, 2, 1>
 {
   public:
     LIR_HEADER(SimdShuffle);
     LSimdShuffle()
     {}
 
--- a/js/src/jit/LOpcodes.h
+++ b/js/src/jit/LOpcodes.h
@@ -23,16 +23,18 @@
     _(Int32x4)                      \
     _(Float32x4)                    \
     _(SimdReinterpretCast)          \
     _(SimdExtractElementI)          \
     _(SimdExtractElementF)          \
     _(SimdInsertElementI)           \
     _(SimdInsertElementF)           \
     _(SimdSignMaskX4)               \
+    _(SimdGeneralSwizzleI)          \
+    _(SimdGeneralSwizzleF)          \
     _(SimdSwizzleI)                 \
     _(SimdSwizzleF)                 \
     _(SimdShuffle)                  \
     _(SimdUnaryArithIx4)            \
     _(SimdUnaryArithFx4)            \
     _(SimdBinaryCompIx4)            \
     _(SimdBinaryCompFx4)            \
     _(SimdBinaryArithIx4)           \
--- a/js/src/jit/Lowering.cpp
+++ b/js/src/jit/Lowering.cpp
@@ -3942,16 +3942,39 @@ LIRGenerator::visitSimdSwizzle(MSimdSwiz
         LSimdSwizzleF *lir = new (alloc()) LSimdSwizzleF(use);
         define(lir, ins);
     } else {
         MOZ_CRASH("Unknown SIMD kind when getting lane");
     }
 }
 
 void
+LIRGenerator::visitSimdGeneralSwizzle(MSimdGeneralSwizzle *ins)
+{
+    MOZ_ASSERT(IsSimdType(ins->input()->type()));
+    MOZ_ASSERT(IsSimdType(ins->type()));
+
+    LAllocation lanesUses[4];
+    for (size_t i = 0; i < 4; i++)
+        lanesUses[i] = use(ins->lane(i));
+
+    if (ins->input()->type() == MIRType_Int32x4) {
+        LSimdGeneralSwizzleI *lir = new (alloc()) LSimdGeneralSwizzleI(useRegister(ins->input()),
+                                                                       lanesUses, temp());
+        define(lir, ins);
+    } else if (ins->input()->type() == MIRType_Float32x4) {
+        LSimdGeneralSwizzleF *lir = new (alloc()) LSimdGeneralSwizzleF(useRegister(ins->input()),
+                                                                       lanesUses, temp());
+        define(lir, ins);
+    } else {
+        MOZ_CRASH("Unknown SIMD kind when getting lane");
+    }
+}
+
+void
 LIRGenerator::visitSimdShuffle(MSimdShuffle *ins)
 {
     MOZ_ASSERT(IsSimdType(ins->lhs()->type()));
     MOZ_ASSERT(IsSimdType(ins->rhs()->type()));
     MOZ_ASSERT(IsSimdType(ins->type()));
     MOZ_ASSERT(ins->type() == MIRType_Int32x4 || ins->type() == MIRType_Float32x4);
 
     bool zFromLHS = ins->laneZ() < 4;
--- a/js/src/jit/Lowering.h
+++ b/js/src/jit/Lowering.h
@@ -266,16 +266,17 @@ class LIRGenerator : public LIRGenerator
     void visitRecompileCheck(MRecompileCheck *ins);
     void visitMemoryBarrier(MMemoryBarrier *ins);
     void visitSimdBox(MSimdBox *ins);
     void visitSimdUnbox(MSimdUnbox *ins);
     void visitSimdExtractElement(MSimdExtractElement *ins);
     void visitSimdInsertElement(MSimdInsertElement *ins);
     void visitSimdSignMask(MSimdSignMask *ins);
     void visitSimdSwizzle(MSimdSwizzle *ins);
+    void visitSimdGeneralSwizzle(MSimdGeneralSwizzle *ins);
     void visitSimdShuffle(MSimdShuffle *ins);
     void visitSimdUnaryArith(MSimdUnaryArith *ins);
     void visitSimdBinaryComp(MSimdBinaryComp *ins);
     void visitSimdBinaryBitwise(MSimdBinaryBitwise *ins);
     void visitSimdShift(MSimdShift *ins);
     void visitSimdConstant(MSimdConstant *ins);
     void visitSimdConvert(MSimdConvert *ins);
     void visitSimdReinterpretCast(MSimdReinterpretCast *ins);
--- a/js/src/jit/MCallOptimize.cpp
+++ b/js/src/jit/MCallOptimize.cpp
@@ -346,16 +346,19 @@ IonBuilder::inlineNativeCall(CallInfo &c
         return inlineSimdSelect(callInfo, native, IsElementWise(true), SimdTypeDescr::TYPE_INT32);
     if (native == js::simd_int32x4_bitselect)
         return inlineSimdSelect(callInfo, native, IsElementWise(false), SimdTypeDescr::TYPE_INT32);
     if (native == js::simd_float32x4_select)
         return inlineSimdSelect(callInfo, native, IsElementWise(true), SimdTypeDescr::TYPE_FLOAT32);
     if (native == js::simd_float32x4_bitselect)
         return inlineSimdSelect(callInfo, native, IsElementWise(false), SimdTypeDescr::TYPE_FLOAT32);
 
+    if (native == js::simd_int32x4_swizzle)
+        return inlineSimdSwizzle(callInfo, native, SimdTypeDescr::TYPE_INT32);
+
     return InliningStatus_NotInlined;
 }
 
 IonBuilder::InliningStatus
 IonBuilder::inlineNativeGetter(CallInfo &callInfo, JSFunction *target)
 {
     MOZ_ASSERT(target->isNative());
     JSNative native = target->native();
@@ -3094,10 +3097,26 @@ IonBuilder::inlineSimdCheck(CallInfo &ca
 
     current->add(unbox);
     current->push(callInfo.getArg(0));
 
     callInfo.setImplicitlyUsedUnchecked();
     return InliningStatus_Inlined;
 }
 
+IonBuilder::InliningStatus
+IonBuilder::inlineSimdSwizzle(CallInfo &callInfo, JSNative native, SimdTypeDescr::Type type)
+{
+    InlineTypedObject *templateObj = nullptr;
+    if (!checkInlineSimd(callInfo, native, type, 5, &templateObj))
+        return InliningStatus_NotInlined;
+
+    MDefinition *lanes[4];
+    for (size_t i = 0; i < 4; i++)
+        lanes[i] = callInfo.getArg(1 + i);
+
+    MIRType mirType = SimdTypeDescrToMIRType(type);
+    MSimdGeneralSwizzle *ins = MSimdGeneralSwizzle::New(alloc(), callInfo.getArg(0), lanes, mirType);
+    return boxSimd(callInfo, ins, templateObj);
+}
+
 } // namespace jit
 } // namespace js
--- a/js/src/jit/MIR.cpp
+++ b/js/src/jit/MIR.cpp
@@ -943,16 +943,30 @@ MSimdUnbox::foldsTo(TempAllocator &alloc
 MDefinition *
 MSimdSwizzle::foldsTo(TempAllocator &alloc)
 {
     if (lanesMatch(0, 1, 2, 3))
         return input();
     return this;
 }
 
+MDefinition *
+MSimdGeneralSwizzle::foldsTo(TempAllocator &alloc)
+{
+    int32_t lanes[4];
+    for (size_t i = 0; i < 4; i++) {
+        if (!lane(i)->isConstant() || lane(i)->type() != MIRType_Int32)
+            return this;
+        lanes[i] = lane(i)->toConstant()->value().toInt32();
+        if (lanes[i] < 0 || lanes[i] >= 4)
+            return this;
+    }
+    return MSimdSwizzle::New(alloc, input(), type(), lanes[0], lanes[1], lanes[2], lanes[3]);
+}
+
 template <typename T>
 static void
 PrintOpcodeOperation(T *mir, FILE *fp)
 {
     mir->MDefinition::printOpcode(fp);
     fprintf(fp, " (%s)", T::OperationName(mir->operation()));
 }
 
--- a/js/src/jit/MIR.h
+++ b/js/src/jit/MIR.h
@@ -1809,32 +1809,89 @@ class MSimdSwizzle
     INSTRUCTION_HEADER(SimdSwizzle)
 
     static MSimdSwizzle *NewAsmJS(TempAllocator &alloc, MDefinition *obj, MIRType type,
                                   uint32_t laneX, uint32_t laneY, uint32_t laneZ, uint32_t laneW)
     {
         return new(alloc) MSimdSwizzle(obj, type, laneX, laneY, laneZ, laneW);
     }
 
+    static MSimdSwizzle *New(TempAllocator &alloc, MDefinition *obj, MIRType type,
+                             uint32_t laneX, uint32_t laneY, uint32_t laneZ, uint32_t laneW)
+    {
+        return new(alloc) MSimdSwizzle(obj, type, laneX, laneY, laneZ, laneW);
+    }
+
     bool congruentTo(const MDefinition *ins) const MOZ_OVERRIDE {
         if (!ins->isSimdSwizzle())
             return false;
         const MSimdSwizzle *other = ins->toSimdSwizzle();
         return sameLanes(other) && congruentIfOperandsEqual(other);
     }
 
     AliasSet getAliasSet() const MOZ_OVERRIDE {
         return AliasSet::None();
     }
 
     MDefinition *foldsTo(TempAllocator &alloc) MOZ_OVERRIDE;
 
     ALLOW_CLONE(MSimdSwizzle)
 };
 
+// A "general swizzle" is a swizzle with non-constant lane indices.  This is the
+// one that Ion inlines and it can be folded into a MSimdSwizzle if lane indices
+// are constant. Performance of general swizzle does not really matter, as we
+// expect to always get constant indices.
+class MSimdGeneralSwizzle :
+    public MAryInstruction<5>,
+    public SimdSwizzlePolicy::Data
+{
+  protected:
+    MSimdGeneralSwizzle(MDefinition *vec, MDefinition *lanes[4], MIRType type)
+    {
+        MOZ_ASSERT(IsSimdType(type));
+        MOZ_ASSERT(SimdTypeToLength(type) == 4);
+
+        initOperand(0, vec);
+        for (unsigned i = 0; i < 4; i++)
+            initOperand(1 + i, lanes[i]);
+
+        setResultType(type);
+        specialization_ = type;
+        setMovable();
+    }
+
+  public:
+    INSTRUCTION_HEADER(SimdGeneralSwizzle);
+    ALLOW_CLONE(MSimdGeneralSwizzle);
+
+    static MSimdGeneralSwizzle *New(TempAllocator &alloc, MDefinition *vec, MDefinition *lanes[4],
+                                    MIRType type)
+    {
+        return new(alloc) MSimdGeneralSwizzle(vec, lanes, type);
+    }
+
+    MDefinition *input() const {
+        return getOperand(0);
+    }
+    MDefinition *lane(size_t i) const {
+        return getOperand(1 + i);
+    }
+
+    bool congruentTo(const MDefinition *ins) const MOZ_OVERRIDE {
+        return congruentIfOperandsEqual(ins);
+    }
+
+    MDefinition *foldsTo(TempAllocator &alloc) MOZ_OVERRIDE;
+
+    AliasSet getAliasSet() const MOZ_OVERRIDE {
+        return AliasSet::None();
+    }
+};
+
 // Applies a shuffle operation to the inputs, selecting the 2 first lanes of the
 // output from lanes of the first input, and the 2 last lanes of the output from
 // lanes of the second input.
 class MSimdShuffle
   : public MBinaryInstruction,
     public MSimdShuffleBase,
     public NoTypePolicy::Data
 {
--- a/js/src/jit/MOpcodes.h
+++ b/js/src/jit/MOpcodes.h
@@ -19,16 +19,17 @@ namespace jit {
     _(SimdSplatX4)                                                          \
     _(SimdConstant)                                                         \
     _(SimdConvert)                                                          \
     _(SimdReinterpretCast)                                                  \
     _(SimdExtractElement)                                                   \
     _(SimdInsertElement)                                                    \
     _(SimdSignMask)                                                         \
     _(SimdSwizzle)                                                          \
+    _(SimdGeneralSwizzle)                                                   \
     _(SimdShuffle)                                                          \
     _(SimdUnaryArith)                                                       \
     _(SimdBinaryComp)                                                       \
     _(SimdBinaryArith)                                                      \
     _(SimdBinaryBitwise)                                                    \
     _(SimdShift)                                                            \
     _(SimdSelect)                                                           \
     _(CloneLiteral)                                                         \
--- a/js/src/jit/TypePolicy.cpp
+++ b/js/src/jit/TypePolicy.cpp
@@ -781,16 +781,41 @@ SimdPolicy<Op>::adjustInputs(TempAllocat
 {
     return MaybeSimdUnbox(alloc, ins, ins->typePolicySpecialization(), Op);
 }
 
 template bool
 SimdPolicy<0>::adjustInputs(TempAllocator &alloc, MInstruction *ins);
 
 bool
+SimdSwizzlePolicy::adjustInputs(TempAllocator &alloc, MInstruction *ins)
+{
+    MIRType specialization = ins->typePolicySpecialization();
+
+    // First input is the vector input.
+    if (!MaybeSimdUnbox(alloc, ins, specialization, 0))
+        return false;
+
+    // Next inputs are the lanes, which need to be int32
+    for (unsigned i = 0; i < 4; i++) {
+        MDefinition *in = ins->getOperand(i + 1);
+        if (in->type() == MIRType_Int32)
+            continue;
+
+        MInstruction *replace = MTruncateToInt32::New(alloc, in);
+        ins->block()->insertBefore(ins, replace);
+        ins->replaceOperand(i + 1, replace);
+        if (!replace->typePolicy()->adjustInputs(alloc, replace))
+            return false;
+    }
+
+    return true;
+}
+
+bool
 SimdSelectPolicy::adjustInputs(TempAllocator &alloc, MInstruction *ins)
 {
     MIRType specialization = ins->typePolicySpecialization();
 
     // First input is the mask, which has to be an int32x4 (for now).
     if (!MaybeSimdUnbox(alloc, ins, MIRType_Int32x4, 0))
         return false;
 
@@ -1080,16 +1105,17 @@ FilterTypeSetPolicy::adjustInputs(TempAl
     _(CallSetElementPolicy)                     \
     _(ClampPolicy)                              \
     _(ComparePolicy)                            \
     _(FilterTypeSetPolicy)                      \
     _(InstanceOfPolicy)                         \
     _(PowPolicy)                                \
     _(SimdAllPolicy)                            \
     _(SimdSelectPolicy)                         \
+    _(SimdSwizzlePolicy)                        \
     _(StoreTypedArrayElementStaticPolicy)       \
     _(StoreTypedArrayHolePolicy)                \
     _(StoreTypedArrayPolicy)                    \
     _(StoreUnboxedObjectOrNullPolicy)           \
     _(TestPolicy)                               \
     _(AllDoublePolicy)                          \
     _(ToDoublePolicy)                           \
     _(ToInt32Policy)                            \
--- a/js/src/jit/TypePolicy.h
+++ b/js/src/jit/TypePolicy.h
@@ -305,32 +305,39 @@ class SimdScalarPolicy MOZ_FINAL : publi
     static bool staticAdjustInputs(TempAllocator &alloc, MInstruction *def);
     virtual bool adjustInputs(TempAllocator &alloc, MInstruction *def) MOZ_OVERRIDE {
         return staticAdjustInputs(alloc, def);
     }
 };
 
 class SimdAllPolicy MOZ_FINAL : public TypePolicy
 {
-    public:
+  public:
     SPECIALIZATION_DATA_;
     virtual bool adjustInputs(TempAllocator &alloc, MInstruction *ins) MOZ_OVERRIDE;
 };
 
 template <unsigned Op>
 class SimdPolicy MOZ_FINAL : public TypePolicy
 {
-    public:
+  public:
     SPECIALIZATION_DATA_;
     virtual bool adjustInputs(TempAllocator &alloc, MInstruction *ins) MOZ_OVERRIDE;
 };
 
 class SimdSelectPolicy MOZ_FINAL : public TypePolicy
 {
-    public:
+  public:
+    SPECIALIZATION_DATA_;
+    virtual bool adjustInputs(TempAllocator &alloc, MInstruction *ins) MOZ_OVERRIDE;
+};
+
+class SimdSwizzlePolicy MOZ_FINAL : public TypePolicy
+{
+  public:
     SPECIALIZATION_DATA_;
     virtual bool adjustInputs(TempAllocator &alloc, MInstruction *ins) MOZ_OVERRIDE;
 };
 
 // SIMD value-type policy, use the returned type of the instruction to determine
 // how to unbox its operand.
 template <unsigned Op>
 class SimdSameAsReturnedTypePolicy MOZ_FINAL : public TypePolicy
--- a/js/src/jit/arm/CodeGenerator-arm.h
+++ b/js/src/jit/arm/CodeGenerator-arm.h
@@ -239,16 +239,18 @@ class CodeGeneratorARM : public CodeGene
     // Unimplemented SIMD instructions
     void visitSimdSplatX4(LSimdSplatX4 *lir) { MOZ_CRASH("NYI"); }
     void visitInt32x4(LInt32x4 *ins) { MOZ_CRASH("NYI"); }
     void visitFloat32x4(LFloat32x4 *ins) { MOZ_CRASH("NYI"); }
     void visitSimdReinterpretCast(LSimdReinterpretCast *ins) { MOZ_CRASH("NYI"); }
     void visitSimdExtractElementI(LSimdExtractElementI *ins) { MOZ_CRASH("NYI"); }
     void visitSimdExtractElementF(LSimdExtractElementF *ins) { MOZ_CRASH("NYI"); }
     void visitSimdSignMaskX4(LSimdSignMaskX4 *ins) { MOZ_CRASH("NYI"); }
+    void visitSimdGeneralSwizzleI(LSimdGeneralSwizzleI *lir) { MOZ_CRASH("NYI"); }
+    void visitSimdGeneralSwizzleF(LSimdGeneralSwizzleF *lir) { MOZ_CRASH("NYI"); }
     void visitSimdSwizzleI(LSimdSwizzleI *lir) { MOZ_CRASH("NYI"); }
     void visitSimdSwizzleF(LSimdSwizzleF *lir) { MOZ_CRASH("NYI"); }
     void visitSimdBinaryCompIx4(LSimdBinaryCompIx4 *lir) { MOZ_CRASH("NYI"); }
     void visitSimdBinaryCompFx4(LSimdBinaryCompFx4 *lir) { MOZ_CRASH("NYI"); }
     void visitSimdBinaryArithIx4(LSimdBinaryArithIx4 *lir) { MOZ_CRASH("NYI"); }
     void visitSimdBinaryArithFx4(LSimdBinaryArithFx4 *lir) { MOZ_CRASH("NYI"); }
     void visitSimdBinaryBitwiseX4(LSimdBinaryBitwiseX4 *lir) { MOZ_CRASH("NYI"); }
 };
--- a/js/src/jit/mips/CodeGenerator-mips.h
+++ b/js/src/jit/mips/CodeGenerator-mips.h
@@ -285,16 +285,18 @@ class CodeGeneratorMIPS : public CodeGen
     void visitSimdExtractElementI(LSimdExtractElementI *ins) { MOZ_CRASH("NYI"); }
     void visitSimdExtractElementF(LSimdExtractElementF *ins) { MOZ_CRASH("NYI"); }
     void visitSimdSignMaskX4(LSimdSignMaskX4 *ins) { MOZ_CRASH("NYI"); }
     void visitSimdBinaryCompIx4(LSimdBinaryCompIx4 *lir) { MOZ_CRASH("NYI"); }
     void visitSimdBinaryCompFx4(LSimdBinaryCompFx4 *lir) { MOZ_CRASH("NYI"); }
     void visitSimdBinaryArithIx4(LSimdBinaryArithIx4 *lir) { MOZ_CRASH("NYI"); }
     void visitSimdBinaryArithFx4(LSimdBinaryArithFx4 *lir) { MOZ_CRASH("NYI"); }
     void visitSimdBinaryBitwiseX4(LSimdBinaryBitwiseX4 *lir) { MOZ_CRASH("NYI"); }
+    void visitSimdGeneralSwizzleI(LSimdGeneralSwizzleI *lir) { MOZ_CRASH("NYI"); }
+    void visitSimdGeneralSwizzleF(LSimdGeneralSwizzleF *lir) { MOZ_CRASH("NYI"); }
 };
 
 typedef CodeGeneratorMIPS CodeGeneratorSpecific;
 
 // An out-of-line bailout thunk.
 class OutOfLineBailout : public OutOfLineCodeBase<CodeGeneratorMIPS>
 {
     LSnapshot *snapshot_;
--- a/js/src/jit/shared/CodeGenerator-x86-shared.cpp
+++ b/js/src/jit/shared/CodeGenerator-x86-shared.cpp
@@ -2375,16 +2375,103 @@ CodeGeneratorX86Shared::visitSimdSignMas
     FloatRegister input = ToFloatRegister(ins->input());
     Register output = ToRegister(ins->output());
 
     // For Float32x4 and Int32x4.
     masm.vmovmskps(input, output);
 }
 
 void
+CodeGeneratorX86Shared::visitSimdGeneralSwizzleI(LSimdGeneralSwizzleI *ins)
+{
+    FloatRegister input = ToFloatRegister(ins->base());
+    Register temp = ToRegister(ins->temp());
+
+    // This won't generate fast code, but it's fine because we expect users
+    // to have used constant indices (and thus MSimdGeneralSwizzle to be fold
+    // into MSimdSwizzle, which are fast).
+    masm.reserveStack(Simd128DataSize * 2);
+
+    masm.storeAlignedInt32x4(input, Address(StackPointer, Simd128DataSize));
+
+    for (size_t i = 0; i < 4; i++) {
+        Operand lane = ToOperand(ins->lane(i));
+
+        Label go, join;
+        masm.cmp32(lane, Imm32(4));
+        masm.j(Assembler::Below, &go);
+
+        {
+            masm.store32(Imm32(0), Address(StackPointer, i * sizeof(int32_t)));
+            masm.jump(&join);
+        }
+
+        masm.bind(&go);
+        if (lane.kind() == Operand::REG) {
+            masm.load32(Operand(StackPointer, ToRegister(ins->lane(i)), TimesFour, Simd128DataSize),
+                        temp);
+        } else {
+            masm.load32(lane, temp);
+            masm.load32(Operand(StackPointer, temp, TimesFour, Simd128DataSize), temp);
+        }
+
+        masm.store32(temp, Address(StackPointer, i * sizeof(int32_t)));
+        masm.bind(&join);
+    }
+
+    FloatRegister output = ToFloatRegister(ins->output());
+    masm.loadAlignedInt32x4(Address(StackPointer, 0), output);
+
+    masm.freeStack(Simd128DataSize * 2);
+}
+
+void
+CodeGeneratorX86Shared::visitSimdGeneralSwizzleF(LSimdGeneralSwizzleF *ins)
+{
+    FloatRegister input = ToFloatRegister(ins->base());
+    Register temp = ToRegister(ins->temp());
+
+    // See comment in the visitSimdGeneralSwizzleI.
+    masm.reserveStack(Simd128DataSize * 2);
+
+    masm.storeAlignedFloat32x4(input, Address(StackPointer, Simd128DataSize));
+
+    for (size_t i = 0; i < 4; i++) {
+        Operand lane = ToOperand(ins->lane(i));
+
+        Label go, join;
+        masm.cmp32(lane, Imm32(4));
+        masm.j(Assembler::Below, &go);
+
+        {
+            masm.loadConstantFloat32(float(GenericNaN()), ScratchFloat32Reg);
+            masm.storeFloat32(ScratchFloat32Reg, Address(StackPointer, i * sizeof(int32_t)));
+            masm.jump(&join);
+        }
+
+        masm.bind(&go);
+        if (lane.kind() == Operand::REG) {
+            masm.loadFloat32(Operand(StackPointer, ToRegister(ins->lane(i)), TimesFour, Simd128DataSize),
+                             ScratchFloat32Reg);
+        } else {
+            masm.load32(lane, temp);
+            masm.loadFloat32(Operand(StackPointer, temp, TimesFour, Simd128DataSize), ScratchFloat32Reg);
+        }
+
+        masm.storeFloat32(ScratchFloat32Reg, Address(StackPointer, i * sizeof(int32_t)));
+        masm.bind(&join);
+    }
+
+    FloatRegister output = ToFloatRegister(ins->output());
+    masm.loadAlignedFloat32x4(Address(StackPointer, 0), output);
+
+    masm.freeStack(Simd128DataSize * 2);
+}
+
+void
 CodeGeneratorX86Shared::visitSimdSwizzleI(LSimdSwizzleI *ins)
 {
     FloatRegister input = ToFloatRegister(ins->input());
     FloatRegister output = ToFloatRegister(ins->output());
 
     uint32_t x = ins->laneX();
     uint32_t y = ins->laneY();
     uint32_t z = ins->laneZ();
--- a/js/src/jit/shared/CodeGenerator-x86-shared.h
+++ b/js/src/jit/shared/CodeGenerator-x86-shared.h
@@ -253,16 +253,18 @@ class CodeGeneratorX86Shared : public Co
     void visitInt32x4ToFloat32x4(LInt32x4ToFloat32x4 *ins);
     void visitFloat32x4ToInt32x4(LFloat32x4ToInt32x4 *ins);
     void visitSimdReinterpretCast(LSimdReinterpretCast *lir);
     void visitSimdExtractElementI(LSimdExtractElementI *lir);
     void visitSimdExtractElementF(LSimdExtractElementF *lir);
     void visitSimdInsertElementI(LSimdInsertElementI *lir);
     void visitSimdInsertElementF(LSimdInsertElementF *lir);
     void visitSimdSignMaskX4(LSimdSignMaskX4 *ins);
+    void visitSimdGeneralSwizzleI(LSimdGeneralSwizzleI *lir);
+    void visitSimdGeneralSwizzleF(LSimdGeneralSwizzleF *lir);
     void visitSimdSwizzleI(LSimdSwizzleI *lir);
     void visitSimdSwizzleF(LSimdSwizzleF *lir);
     void visitSimdShuffle(LSimdShuffle *lir);
     void visitSimdUnaryArithIx4(LSimdUnaryArithIx4 *lir);
     void visitSimdUnaryArithFx4(LSimdUnaryArithFx4 *lir);
     void visitSimdBinaryCompIx4(LSimdBinaryCompIx4 *lir);
     void visitSimdBinaryCompFx4(LSimdBinaryCompFx4 *lir);
     void visitSimdBinaryArithIx4(LSimdBinaryArithIx4 *lir);