Backed out changeset dd24124194d5 (bug 1108825) for SM(e) failures
authorBenjamin Bouvier <benj@benj.me>
Thu, 11 Dec 2014 12:57:24 +0100
changeset 246800 56d19efedf4fc80e5022b65930ad2723b4742684
parent 246799 824175640cafb430c15ef0f85fba827ecf1c80c9
child 246801 5d3fe12c63ed0bff4e6f01801a45d9406d175150
push id698
push userjlund@mozilla.com
push dateMon, 23 Mar 2015 22:08:11 +0000
treeherdermozilla-release@b0c0ae7b02a3 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
bugs1108825
milestone37.0a1
backs outdd24124194d5dcdd9f1e40ad3aa57f4f86b42132
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Backed out changeset dd24124194d5 (bug 1108825) for SM(e) failures
js/src/builtin/SIMD.h
js/src/jit-test/tests/asm.js/testSIMD.js
js/src/jit/LIR-Common.h
js/src/jit/Lowering.cpp
js/src/jit/Lowering.h
js/src/jit/MIR.h
js/src/jit/arm/Lowering-arm.cpp
js/src/jit/arm/Lowering-arm.h
js/src/jit/mips/Lowering-mips.cpp
js/src/jit/mips/Lowering-mips.h
js/src/jit/shared/Assembler-x86-shared.h
js/src/jit/shared/BaseAssembler-x86-shared.h
js/src/jit/shared/CodeGenerator-x86-shared.cpp
js/src/jit/shared/Lowering-shared.cpp
js/src/jit/shared/Lowering-shared.h
js/src/jit/shared/Lowering-x86-shared.cpp
js/src/jit/shared/Lowering-x86-shared.h
--- a/js/src/builtin/SIMD.h
+++ b/js/src/builtin/SIMD.h
@@ -135,25 +135,25 @@
     _(shiftRightLogicalByScalar)
 #define FOREACH_FLOAT32X4_SIMD_OP(_) \
     _(abs)                           \
     _(sqrt)                          \
     _(reciprocal)                    \
     _(reciprocalSqrt)                \
     _(fromInt32x4)                   \
     _(fromInt32x4Bits)               \
+    _(mul)                           \
     _(div)                           \
     _(max)                           \
     _(min)                           \
     _(maxNum)                        \
     _(minNum)
 #define FOREACH_COMMONX4_SIMD_OP(_)  \
     _(add)                           \
     _(sub)                           \
-    _(mul)                           \
     _(lessThan)                      \
     _(lessThanOrEqual)               \
     _(equal)                         \
     _(notEqual)                      \
     _(greaterThan)                   \
     _(greaterThanOrEqual)            \
     _(and)                           \
     _(or)                            \
--- a/js/src/jit-test/tests/asm.js/testSIMD.js
+++ b/js/src/jit-test/tests/asm.js/testSIMD.js
@@ -7,17 +7,16 @@ const DEBUG = false;
 if (!isSimdAvailable() || typeof SIMD === 'undefined') {
     DEBUG && print("won't run tests as simd extensions aren't activated yet");
     quit(0);
 }
 
 const I32 = 'var i4 = glob.SIMD.int32x4;'
 const I32A = 'var i4a = i4.add;'
 const I32S = 'var i4s = i4.sub;'
-const I32M = 'var i4m = i4.mul;'
 const F32 = 'var f4 = glob.SIMD.float32x4;'
 const F32A = 'var f4a = f4.add;'
 const F32S = 'var f4s = f4.sub;'
 const F32M = 'var f4m = f4.mul;'
 const F32D = 'var f4d = f4.div;'
 const FROUND = 'var f32=glob.Math.fround;'
 
 const INT32_MAX = Math.pow(2, 31) - 1;
@@ -449,29 +448,19 @@ CheckI4(I32S, 'var x=i4(' + INT32_MIN + 
 CheckI4(I32S, 'var x=i4(' + INT32_MIN + ',2,3,4); var y=i4(1,1,0,3); x=i4(i4s(x,y))', [INT32_MAX,1,3,1]);
 
 CheckF4(F32S, 'var x=f4(1,2,3,4); x=f4s(x,x)', [0,0,0,0]);
 CheckF4(F32S, 'var x=f4(1,2,3,4); var y=f4(4,3,5,2); x=f4s(x,y)', [-3,-1,-2,2]);
 CheckF4(F32S, 'var x=f4(13.37,2,3,4); var y=f4(4,3,5,2); x=f4s(x,y)', [Math.fround(13.37) - 4,-1,-2,2]);
 CheckF4(F32S, 'var x=f4(13.37,2,3,4); var y=f4(4,3,5,2); x=f4(f4s(x,y))', [Math.fround(13.37) - 4,-1,-2,2]);
 
 // 2.3.3. Multiplications / Divisions
+assertAsmTypeFail('glob', USE_ASM + I32 + "var f4m=i4.mul; function f() {} return f");
 assertAsmTypeFail('glob', USE_ASM + I32 + "var f4d=i4.div; function f() {} return f");
 
-CheckI4(I32M, 'var x=i4(1,2,3,4); var y=i4(-1,1,0,2); x=i4m(x,y)', [-1,2,0,8]);
-CheckI4(I32M, 'var x=i4(5,4,3,2); var y=i4(1,2,3,4); x=i4m(x,y)', [5,8,9,8]);
-CheckI4(I32M, 'var x=i4(1,2,3,4); x=i4m(x,x)', [1,4,9,16]);
-(function() {
-    var m = INT32_MIN, M = INT32_MAX, imul = Math.imul;
-    CheckI4(I32M, `var x=i4(${m},${m}, ${M}, ${M}); var y=i4(2,-3,4,-5); x=i4m(x,y)`,
-            [imul(m, 2), imul(m, -3), imul(M, 4), imul(M, -5)]);
-    CheckI4(I32M, `var x=i4(${m},${m}, ${M}, ${M}); var y=i4(${m}, ${M}, ${m}, ${M}); x=i4m(x,y)`,
-            [imul(m, m), imul(m, M), imul(M, m), imul(M, M)]);
-})();
-
 CheckF4(F32M, 'var x=f4(1,2,3,4); x=f4m(x,x)', [1,4,9,16]);
 CheckF4(F32M, 'var x=f4(1,2,3,4); var y=f4(4,3,5,2); x=f4m(x,y)', [4,6,15,8]);
 CheckF4(F32M, 'var x=f4(13.37,2,3,4); var y=f4(4,3,5,2); x=f4m(x,y)', [Math.fround(13.37) * 4,6,15,8]);
 CheckF4(F32M, 'var x=f4(13.37,2,3,4); var y=f4(4,3,5,2); x=f4(f4m(x,y))', [Math.fround(13.37) * 4,6,15,8]);
 
 var f32x4 = SIMD.float32x4(0, NaN, -0, NaN);
 var another = SIMD.float32x4(NaN, -1, -0, NaN);
 assertEqX4(asmLink(asmCompile('glob', USE_ASM + F32 + F32M + "function f(x, y) {x=f4(x); y=f4(y); x=f4m(x,y); return f4(x);} return f"), this)(f32x4, another), [NaN, NaN, 0, NaN]);
--- a/js/src/jit/LIR-Common.h
+++ b/js/src/jit/LIR-Common.h
@@ -361,53 +361,55 @@ class LSimdBinaryCompIx4 : public LSimdB
 class LSimdBinaryCompFx4 : public LSimdBinaryComp
 {
   public:
     LIR_HEADER(SimdBinaryCompFx4);
     LSimdBinaryCompFx4() : LSimdBinaryComp() {}
 };
 
 // Binary SIMD arithmetic operation between two SIMD operands
-class LSimdBinaryArith : public LInstructionHelper<1, 2, 1>
+template<size_t Temps>
+class LSimdBinaryArith : public LInstructionHelper<1, 2, Temps>
 {
   public:
     LSimdBinaryArith() {}
 
     const LAllocation *lhs() {
         return this->getOperand(0);
     }
     const LAllocation *rhs() {
         return this->getOperand(1);
     }
-    const LDefinition *temp() {
-        return getTemp(0);
-    }
 
     MSimdBinaryArith::Operation operation() const {
         return this->mir_->toSimdBinaryArith()->operation();
     }
     const char *extraName() const {
         return MSimdBinaryArith::OperationName(operation());
     }
 };
 
 // Binary SIMD arithmetic operation between two Int32x4 operands
-class LSimdBinaryArithIx4 : public LSimdBinaryArith
+class LSimdBinaryArithIx4 : public LSimdBinaryArith<0>
 {
   public:
     LIR_HEADER(SimdBinaryArithIx4);
-    LSimdBinaryArithIx4() : LSimdBinaryArith() {}
+    LSimdBinaryArithIx4() : LSimdBinaryArith<0>() {}
 };
 
 // Binary SIMD arithmetic operation between two Float32x4 operands
-class LSimdBinaryArithFx4 : public LSimdBinaryArith
+class LSimdBinaryArithFx4 : public LSimdBinaryArith<1>
 {
   public:
     LIR_HEADER(SimdBinaryArithFx4);
-    LSimdBinaryArithFx4() : LSimdBinaryArith() {}
+    LSimdBinaryArithFx4() : LSimdBinaryArith<1>() {}
+
+    const LDefinition *temp() {
+        return getTemp(0);
+    }
 };
 
 // Unary SIMD arithmetic operation on a SIMD operand
 class LSimdUnaryArith : public LInstructionHelper<1, 1, 0>
 {
   public:
     explicit LSimdUnaryArith(const LAllocation &in) {
         setOperand(0, in);
--- a/js/src/jit/Lowering.cpp
+++ b/js/src/jit/Lowering.cpp
@@ -626,16 +626,73 @@ ReorderComparison(JSOp op, MDefinition *
     if (lhs->isConstant()) {
         *rhsp = lhs;
         *lhsp = rhs;
         return ReverseCompareOp(op);
     }
     return op;
 }
 
+static bool
+ShouldReorderCommutative(MDefinition *lhs, MDefinition *rhs, MInstruction *ins)
+{
+    // lhs and rhs are used by the commutative operator.
+    MOZ_ASSERT(lhs->hasDefUses());
+    MOZ_ASSERT(rhs->hasDefUses());
+
+    // Ensure that if there is a constant, then it is in rhs.
+    if (rhs->isConstant())
+        return false;
+    if (lhs->isConstant())
+        return true;
+
+    // Since clobbering binary operations clobber the left operand, prefer a
+    // non-constant lhs operand with no further uses. To be fully precise, we
+    // should check whether this is the *last* use, but checking hasOneDefUse()
+    // is a decent approximation which doesn't require any extra analysis.
+    bool rhsSingleUse = rhs->hasOneDefUse();
+    bool lhsSingleUse = lhs->hasOneDefUse();
+    if (rhsSingleUse) {
+        if (!lhsSingleUse)
+            return true;
+    } else {
+        if (lhsSingleUse)
+            return false;
+    }
+
+    // If this is a reduction-style computation, such as
+    //
+    //   sum = 0;
+    //   for (...)
+    //      sum += ...;
+    //
+    // put the phi on the left to promote coalescing. This is fairly specific.
+    if (rhsSingleUse &&
+        rhs->isPhi() &&
+        rhs->block()->isLoopHeader() &&
+        ins == rhs->toPhi()->getLoopBackedgeOperand())
+    {
+        return true;
+    }
+
+    return false;
+}
+
+static void
+ReorderCommutative(MDefinition **lhsp, MDefinition **rhsp, MInstruction *ins)
+{
+    MDefinition *lhs = *lhsp;
+    MDefinition *rhs = *rhsp;
+
+    if (ShouldReorderCommutative(lhs, rhs, ins)) {
+        *rhsp = lhs;
+        *lhsp = rhs;
+    }
+}
+
 void
 LIRGenerator::visitTest(MTest *test)
 {
     MDefinition *opd = test->getOperand(0);
     MBasicBlock *ifTrue = test->ifTrue();
     MBasicBlock *ifFalse = test->ifFalse();
 
     // String is converted to length of string in the type analysis phase (see
@@ -4022,16 +4079,44 @@ LIRGenerator::visitSimdBinaryComp(MSimdB
         LSimdBinaryCompFx4 *add = new(alloc()) LSimdBinaryCompFx4();
         lowerForCompFx4(add, ins, ins->lhs(), ins->rhs());
     } else {
         MOZ_CRASH("Unknown compare type when comparing values");
     }
 }
 
 void
+LIRGenerator::visitSimdBinaryArith(MSimdBinaryArith *ins)
+{
+    MOZ_ASSERT(IsSimdType(ins->type()));
+
+    MDefinition *lhs = ins->lhs();
+    MDefinition *rhs = ins->rhs();
+
+    if (ins->isCommutative())
+        ReorderCommutative(&lhs, &rhs, ins);
+
+    if (ins->type() == MIRType_Int32x4) {
+        lowerForFPU(new(alloc()) LSimdBinaryArithIx4(), ins, lhs, rhs);
+        return;
+    }
+
+    MOZ_ASSERT(ins->type() == MIRType_Float32x4, "unknown simd type on binary arith operation");
+
+    LSimdBinaryArithFx4 *lir = new(alloc()) LSimdBinaryArithFx4();
+
+    bool needsTemp = ins->operation() == MSimdBinaryArith::Max ||
+                     ins->operation() == MSimdBinaryArith::MinNum ||
+                     ins->operation() == MSimdBinaryArith::MaxNum;
+    lir->setTemp(0, needsTemp ? temp(LDefinition::FLOAT32X4) : LDefinition::BogusTemp());
+
+    lowerForFPU(lir, ins, lhs, rhs);
+}
+
+void
 LIRGenerator::visitSimdBinaryBitwise(MSimdBinaryBitwise *ins)
 {
     MOZ_ASSERT(IsSimdType(ins->type()));
 
     MDefinition *lhs = ins->lhs();
     MDefinition *rhs = ins->rhs();
     ReorderCommutative(&lhs, &rhs, ins);
 
--- a/js/src/jit/Lowering.h
+++ b/js/src/jit/Lowering.h
@@ -278,16 +278,17 @@ class LIRGenerator : public LIRGenerator
     void visitMemoryBarrier(MMemoryBarrier *ins);
     void visitSimdExtractElement(MSimdExtractElement *ins);
     void visitSimdInsertElement(MSimdInsertElement *ins);
     void visitSimdSignMask(MSimdSignMask *ins);
     void visitSimdSwizzle(MSimdSwizzle *ins);
     void visitSimdShuffle(MSimdShuffle *ins);
     void visitSimdUnaryArith(MSimdUnaryArith *ins);
     void visitSimdBinaryComp(MSimdBinaryComp *ins);
+    void visitSimdBinaryArith(MSimdBinaryArith *ins);
     void visitSimdBinaryBitwise(MSimdBinaryBitwise *ins);
     void visitSimdShift(MSimdShift *ins);
     void visitSimdConstant(MSimdConstant *ins);
     void visitSimdConvert(MSimdConvert *ins);
     void visitSimdReinterpretCast(MSimdReinterpretCast *ins);
     void visitPhi(MPhi *ins);
     void visitBeta(MBeta *ins);
     void visitObjectState(MObjectState *ins);
--- a/js/src/jit/MIR.h
+++ b/js/src/jit/MIR.h
@@ -1878,17 +1878,17 @@ class MSimdBinaryArith : public MBinaryI
     }
 
   private:
     Operation operation_;
 
     MSimdBinaryArith(MDefinition *left, MDefinition *right, Operation op, MIRType type)
       : MBinaryInstruction(left, right), operation_(op)
     {
-        MOZ_ASSERT_IF(type == MIRType_Int32x4, op == Add || op == Sub || op == Mul);
+        MOZ_ASSERT_IF(type == MIRType_Int32x4, op == Add || op == Sub);
         MOZ_ASSERT(IsSimdType(type));
         MOZ_ASSERT(left->type() == right->type());
         MOZ_ASSERT(left->type() == type);
         setResultType(type);
         setMovable();
         if (op == Add || op == Mul || op == Min || op == Max)
             setCommutative();
     }
--- a/js/src/jit/arm/Lowering-arm.cpp
+++ b/js/src/jit/arm/Lowering-arm.cpp
@@ -552,22 +552,16 @@ LIRGeneratorARM::visitStoreTypedArrayEle
 
 void
 LIRGeneratorARM::visitForkJoinGetSlice(MForkJoinGetSlice *ins)
 {
     MOZ_CRASH("NYI");
 }
 
 void
-LIRGeneratorARM::visitSimdBinaryArith(MSimdBinaryArith *ins)
-{
-    MOZ_CRASH("NYI");
-}
-
-void
 LIRGeneratorARM::visitSimdTernaryBitwise(MSimdTernaryBitwise *ins)
 {
     MOZ_CRASH("NYI");
 }
 
 void
 LIRGeneratorARM::visitSimdSplatX4(MSimdSplatX4 *ins)
 {
--- a/js/src/jit/arm/Lowering-arm.h
+++ b/js/src/jit/arm/Lowering-arm.h
@@ -102,17 +102,16 @@ class LIRGeneratorARM : public LIRGenera
     void visitAsmJSUnsignedToFloat32(MAsmJSUnsignedToFloat32 *ins);
     void visitAsmJSLoadHeap(MAsmJSLoadHeap *ins);
     void visitAsmJSStoreHeap(MAsmJSStoreHeap *ins);
     void visitAsmJSLoadFuncPtr(MAsmJSLoadFuncPtr *ins);
     void visitAsmJSCompareExchangeHeap(MAsmJSCompareExchangeHeap *ins);
     void visitAsmJSAtomicBinopHeap(MAsmJSAtomicBinopHeap *ins);
     void visitStoreTypedArrayElementStatic(MStoreTypedArrayElementStatic *ins);
     void visitForkJoinGetSlice(MForkJoinGetSlice *ins);
-    void visitSimdBinaryArith(MSimdBinaryArith *ins);
     void visitSimdTernaryBitwise(MSimdTernaryBitwise *ins);
     void visitSimdSplatX4(MSimdSplatX4 *ins);
     void visitSimdValueX4(MSimdValueX4 *ins);
     void visitCompareExchangeTypedArrayElement(MCompareExchangeTypedArrayElement *ins);
     void visitAtomicTypedArrayElementBinop(MAtomicTypedArrayElementBinop *ins);
     void visitSubstr(MSubstr *ins);
 };
 
--- a/js/src/jit/mips/Lowering-mips.cpp
+++ b/js/src/jit/mips/Lowering-mips.cpp
@@ -542,22 +542,16 @@ LIRGeneratorMIPS::visitStoreTypedArrayEl
 
 void
 LIRGeneratorMIPS::visitForkJoinGetSlice(MForkJoinGetSlice *ins)
 {
     MOZ_CRASH("NYI");
 }
 
 void
-LIRGeneratorMIPS::visitSimdBinaryArith(MSimdBinaryArith *ins)
-{
-    MOZ_CRASH("NYI");
-}
-
-void
 LIRGeneratorMIPS::visitSimdTernaryBitwise(MSimdTernaryBitwise *ins)
 {
     MOZ_CRASH("NYI");
 }
 
 void
 LIRGeneratorMIPS::visitSimdSplatX4(MSimdSplatX4 *ins)
 {
--- a/js/src/jit/mips/Lowering-mips.h
+++ b/js/src/jit/mips/Lowering-mips.h
@@ -102,17 +102,16 @@ class LIRGeneratorMIPS : public LIRGener
     void visitAsmJSUnsignedToFloat32(MAsmJSUnsignedToFloat32 *ins);
     void visitAsmJSLoadHeap(MAsmJSLoadHeap *ins);
     void visitAsmJSStoreHeap(MAsmJSStoreHeap *ins);
     void visitAsmJSCompareExchangeHeap(MAsmJSCompareExchangeHeap *ins);
     void visitAsmJSAtomicBinopHeap(MAsmJSAtomicBinopHeap *ins);
     void visitAsmJSLoadFuncPtr(MAsmJSLoadFuncPtr *ins);
     void visitStoreTypedArrayElementStatic(MStoreTypedArrayElementStatic *ins);
     void visitForkJoinGetSlice(MForkJoinGetSlice *ins);
-    void visitSimdBinaryArith(MSimdBinaryArith *ins);
     void visitSimdTernaryBitwise(MSimdTernaryBitwise *ins);
     void visitSimdSplatX4(MSimdSplatX4 *ins);
     void visitSimdValueX4(MSimdValueX4 *ins);
     void visitCompareExchangeTypedArrayElement(MCompareExchangeTypedArrayElement *ins);
     void visitAtomicTypedArrayElementBinop(MAtomicTypedArrayElementBinop *ins);
     void visitSubstr(MSubstr *ins);
 };
 
--- a/js/src/jit/shared/Assembler-x86-shared.h
+++ b/js/src/jit/shared/Assembler-x86-shared.h
@@ -597,19 +597,16 @@ class AssemblerX86Shared : public Assemb
             break;
           default:
             MOZ_CRASH("unexpected operand kind");
         }
     }
     void movdqa(const Operand &src, FloatRegister dest) {
         MOZ_ASSERT(HasSSE2());
         switch (src.kind()) {
-          case Operand::FPREG:
-            masm.movdqa_rr(src.fpu(), dest.code());
-            break;
           case Operand::MEM_REG_DISP:
             masm.movdqa_mr(src.disp(), src.base(), dest.code());
             break;
           case Operand::MEM_SCALE:
             masm.movdqa_mr(src.disp(), src.base(), src.index(), src.scale(), dest.code());
             break;
           default:
             MOZ_CRASH("unexpected operand kind");
@@ -1810,36 +1807,16 @@ class AssemblerX86Shared : public Assemb
             break;
           case Operand::MEM_ADDRESS32:
             masm.psubd_mr(src.address(), dest.code());
             break;
           default:
             MOZ_CRASH("unexpected operand kind");
         }
     }
-    void pmuludq(FloatRegister src, FloatRegister dest) {
-        MOZ_ASSERT(HasSSE2());
-        masm.pmuludq_rr(src.code(), dest.code());
-    }
-    void pmulld(const Operand &src, FloatRegister dest) {
-        MOZ_ASSERT(HasSSE41());
-        switch (src.kind()) {
-          case Operand::FPREG:
-            masm.pmulld_rr(src.fpu(), dest.code());
-            break;
-          case Operand::MEM_REG_DISP:
-            masm.pmulld_mr(src.disp(), src.base(), dest.code());
-            break;
-          case Operand::MEM_ADDRESS32:
-            masm.pmulld_mr(src.address(), dest.code());
-            break;
-          default:
-            MOZ_CRASH("unexpected operand kind");
-        }
-    }
     void vaddps(const Operand &src1, FloatRegister src0, FloatRegister dest) {
         MOZ_ASSERT(HasSSE2());
         switch (src1.kind()) {
           case Operand::FPREG:
             masm.vaddps_rr(src1.fpu(), src0.code(), dest.code());
             break;
           case Operand::MEM_REG_DISP:
             masm.vaddps_mr(src1.disp(), src1.base(), src0.code(), dest.code());
@@ -1999,32 +1976,16 @@ class AssemblerX86Shared : public Assemb
     void pxor(FloatRegister src, FloatRegister dest) {
         MOZ_ASSERT(HasSSE2());
         masm.pxor_rr(src.code(), dest.code());
     }
     void pshufd(uint32_t mask, FloatRegister src, FloatRegister dest) {
         MOZ_ASSERT(HasSSE2());
         masm.pshufd_irr(mask, src.code(), dest.code());
     }
-    void pshufd(uint32_t mask, const Operand &src, FloatRegister dest) {
-        MOZ_ASSERT(HasSSE2());
-        switch (src.kind()) {
-          case Operand::FPREG:
-            masm.pshufd_irr(mask, src.fpu(), dest.code());
-            break;
-          case Operand::MEM_REG_DISP:
-            masm.pshufd_imr(mask, src.disp(), src.base(), dest.code());
-            break;
-          case Operand::MEM_ADDRESS32:
-            masm.pshufd_imr(mask, src.address(), dest.code());
-            break;
-          default:
-            MOZ_CRASH("unexpected operand kind");
-        }
-    }
     void movhlps(FloatRegister src, FloatRegister dest) {
         MOZ_ASSERT(HasSSE2());
         masm.movhlps_rr(src.code(), dest.code());
     }
     void movlhps(FloatRegister src, FloatRegister dest) {
         MOZ_ASSERT(HasSSE2());
         masm.movlhps_rr(src.code(), dest.code());
     }
--- a/js/src/jit/shared/BaseAssembler-x86-shared.h
+++ b/js/src/jit/shared/BaseAssembler-x86-shared.h
@@ -382,37 +382,34 @@ private:
         OP2_XADD_EvGv       = 0xC1,
         OP2_CMPPS_VpsWps    = 0xC2,
         OP2_PEXTRW_GdUdIb   = 0xC5,
         OP2_SHUFPS_VpsWpsIb = 0xC6,
         OP2_PSRLD_VdqWdq    = 0xD2,
         OP2_PSRAD_VdqWdq    = 0xE2,
         OP2_PXORDQ_VdqWdq   = 0xEF,
         OP2_PSLLD_VdqWdq    = 0xF2,
-        OP2_PMULUDQ_VdqWdq  = 0xF4,
         OP2_PSUBD_VdqWdq    = 0xFA,
         OP2_PADDD_VdqWdq    = 0xFE
     } TwoByteOpcodeID;
 
     typedef enum {
         OP3_ROUNDSS_VsdWsd  = 0x0A,
         OP3_ROUNDSD_VsdWsd  = 0x0B,
         OP3_BLENDVPS_VdqWdq = 0x14,
         OP3_PEXTRD_EdVdqIb  = 0x16,
         OP3_BLENDPS_VpsWpsIb = 0x0C,
         OP3_PTEST_VdVd      = 0x17,
         OP3_INSERTPS_VpsUps = 0x21,
         OP3_PINSRD_VdqEdIb  = 0x22,
-        OP3_PMULLD_VdqWdq   = 0x40,
         OP3_VBLENDVPS_VdqWdq = 0x4A
     } ThreeByteOpcodeID;
 
     typedef enum {
         ESCAPE_BLENDVPS     = 0x38,
-        ESCAPE_PMULLD       = 0x38,
         ESCAPE_PTEST        = 0x38,
         ESCAPE_PINSRD       = 0x3A,
         ESCAPE_PEXTRD       = 0x3A,
         ESCAPE_ROUNDSD      = 0x3A,
         ESCAPE_INSERTPS     = 0x3A,
         ESCAPE_BLENDPS      = 0x3A,
         ESCAPE_VBLENDVPS    = 0x3A
     } ThreeByteEscape;
@@ -800,43 +797,16 @@ public:
     }
     void psubd_mr(const void* address, XMMRegisterID dst)
     {
         spew("psubd      %p, %s", address, nameFPReg(dst));
         m_formatter.prefix(PRE_SSE_66);
         m_formatter.twoByteOp(OP2_PSUBD_VdqWdq, address, (RegisterID)dst);
     }
 
-    void pmuludq_rr(XMMRegisterID src, XMMRegisterID dst)
-    {
-        spew("pmuludq     %s, %s", nameFPReg(src), nameFPReg(dst));
-        m_formatter.prefix(PRE_SSE_66);
-        m_formatter.twoByteOp(OP2_PMULUDQ_VdqWdq, (RegisterID)src, (RegisterID)dst);
-    }
-
-    void pmulld_rr(XMMRegisterID src, XMMRegisterID dst)
-    {
-        spew("pmulld      %s, %s", nameFPReg(src), nameFPReg(dst));
-        m_formatter.prefix(PRE_SSE_66);
-        m_formatter.threeByteOp(OP3_PMULLD_VdqWdq, ESCAPE_PMULLD, (RegisterID)src, (RegisterID)dst);
-    }
-    void pmulld_mr(int offset, RegisterID base, XMMRegisterID dst)
-    {
-        spew("pmulld      %s0x%x(%s), %s",
-             PRETTY_PRINT_OFFSET(offset), nameIReg(base), nameFPReg(dst));
-        m_formatter.prefix(PRE_SSE_66);
-        m_formatter.threeByteOp(OP3_PMULLD_VdqWdq, ESCAPE_PMULLD, offset, base, (RegisterID)dst);
-    }
-    void pmulld_mr(const void* address, XMMRegisterID dst)
-    {
-        spew("pmulld      %p, %s", address, nameFPReg(dst));
-        m_formatter.prefix(PRE_SSE_66);
-        m_formatter.threeByteOp(OP3_PMULLD_VdqWdq, ESCAPE_PMULLD, address, (RegisterID)dst);
-    }
-
     void vaddps_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst)
     {
         twoByteOpSimd("vaddps", VEX_PS, OP2_ADDPS_VpsWps, src1, src0, dst);
     }
     void vaddps_mr(int offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst)
     {
         twoByteOpSimd("vaddps", VEX_PS, OP2_ADDPS_VpsWps, offset, base, src0, dst);
     }
@@ -2966,34 +2936,16 @@ public:
     {
         MOZ_ASSERT(mask < 256);
         spew("pshufd     0x%x, %s, %s", mask, nameFPReg(src), nameFPReg(dst));
         m_formatter.prefix(PRE_SSE_66);
         m_formatter.twoByteOp(OP2_PSHUFD_VdqWdqIb, (RegisterID)src, (RegisterID)dst);
         m_formatter.immediate8(uint8_t(mask));
     }
 
-    void pshufd_imr(uint32_t mask, int offset, RegisterID base, XMMRegisterID dst)
-    {
-        MOZ_ASSERT(mask < 256);
-        spew("pshufd     0x%x, %s0x%x(%s), %s",
-             mask, PRETTY_PRINT_OFFSET(offset), nameIReg(base), nameFPReg(dst));
-        m_formatter.prefix(PRE_SSE_66);
-        m_formatter.twoByteOp(OP2_PSHUFD_VdqWdqIb, offset, base, (RegisterID)dst);
-        m_formatter.immediate8(uint8_t(mask));
-    }
-
-    void pshufd_imr(uint32_t mask, const void* address, XMMRegisterID dst)
-    {
-        spew("pshufd     %x, %p, %s", mask, address, nameFPReg(dst));
-        m_formatter.prefix(PRE_SSE_66);
-        m_formatter.twoByteOp(OP2_PSHUFD_VdqWdqIb, address, (RegisterID)dst);
-        m_formatter.immediate8(uint8_t(mask));
-    }
-
     void shufps_irr(uint32_t mask, XMMRegisterID src, XMMRegisterID dst)
     {
         MOZ_ASSERT(mask < 256);
         spew("shufps     0x%x, %s, %s", mask, nameFPReg(src), nameFPReg(dst));
         m_formatter.twoByteOp(OP2_SHUFPS_VpsWpsIb, (RegisterID)src, (RegisterID)dst);
         m_formatter.immediate8(uint8_t(mask));
     }
 
@@ -3004,16 +2956,17 @@ public:
              mask, PRETTY_PRINT_OFFSET(offset), nameIReg(base), nameFPReg(dst));
         m_formatter.twoByteOp(OP2_SHUFPS_VpsWpsIb, offset, base, (RegisterID)dst);
         m_formatter.immediate8(uint8_t(mask));
     }
 
     void shufps_imr(uint32_t mask, const void* address, XMMRegisterID dst)
     {
         spew("shufps     %x, %p, %s", mask, address, nameFPReg(dst));
+        m_formatter.prefix(PRE_SSE_F3);
         m_formatter.twoByteOp(OP2_SHUFPS_VpsWpsIb, address, (RegisterID)dst);
         m_formatter.immediate8(uint8_t(mask));
     }
 
     void movhlps_rr(XMMRegisterID src, XMMRegisterID dst)
     {
         spew("movhlps    %s, %s", nameFPReg(src), nameFPReg(dst));
         m_formatter.twoByteOp(OP2_MOVHLPS_VqUq, (RegisterID)src, (RegisterID)dst);
@@ -4782,26 +4735,16 @@ private:
               case 0x38: m = 2; break; // 0x0F 0x38
               case 0x3A: m = 3; break; // 0x0F 0x3A
               default: MOZ_CRASH("unexpected escape");
             }
             threeOpVex(ty, r, x, b, m, w, v, l, opcode);
             memoryModRM(offset, base, reg);
         }
 
-        void threeByteOp(ThreeByteOpcodeID opcode, ThreeByteEscape escape, const void* address, int reg)
-        {
-            m_buffer.ensureSpace(maxInstructionSize);
-            emitRexIfNeeded(reg, 0, 0);
-            m_buffer.putByteUnchecked(OP_2BYTE_ESCAPE);
-            m_buffer.putByteUnchecked(escape);
-            m_buffer.putByteUnchecked(opcode);
-            memoryModRM(address, reg);
-        }
-
         void vblendvOpVex(VexOperandType ty, ThreeByteOpcodeID opcode, ThreeByteEscape escape,
                           XMMRegisterID mask, RegisterID rm, XMMRegisterID src0, int reg)
         {
             int r = (reg >> 3), x = 0, b = (rm >> 3);
             int m = 0, w = 0, v = src0, l = 0;
             switch (escape) {
               case 0x38: m = 2; break; // 0x0F 0x38
               case 0x3A: m = 3; break; // 0x0F 0x3A
--- a/js/src/jit/shared/CodeGenerator-x86-shared.cpp
+++ b/js/src/jit/shared/CodeGenerator-x86-shared.cpp
@@ -2621,37 +2621,19 @@ CodeGeneratorX86Shared::visitSimdBinaryA
     MSimdBinaryArith::Operation op = ins->operation();
     switch (op) {
       case MSimdBinaryArith::Add:
         masm.packedAddInt32(rhs, lhs);
         return;
       case MSimdBinaryArith::Sub:
         masm.packedSubInt32(rhs, lhs);
         return;
-      case MSimdBinaryArith::Mul: {
-        if (AssemblerX86Shared::HasSSE41()) {
-            masm.pmulld(rhs, lhs);
-            return;
-        }
-
-        masm.loadAlignedInt32x4(rhs, ScratchSimdReg);
-        masm.pmuludq(lhs, ScratchSimdReg);
-        // ScratchSimdReg contains (Rx, _, Rz, _) where R is the resulting vector.
-
-        FloatRegister temp = ToFloatRegister(ins->temp());
-        masm.pshufd(MacroAssembler::ComputeShuffleMask(LaneY, LaneY, LaneW, LaneW), lhs, lhs);
-        masm.pshufd(MacroAssembler::ComputeShuffleMask(LaneY, LaneY, LaneW, LaneW), rhs, temp);
-        masm.pmuludq(temp, lhs);
-        // lhs contains (Ry, _, Rw, _) where R is the resulting vector.
-
-        masm.shufps(MacroAssembler::ComputeShuffleMask(LaneX, LaneZ, LaneX, LaneZ), ScratchSimdReg, lhs);
-        // lhs contains (Ry, Rw, Rx, Rz)
-        masm.shufps(MacroAssembler::ComputeShuffleMask(LaneZ, LaneX, LaneW, LaneY), lhs, lhs);
-        return;
-      }
+      case MSimdBinaryArith::Mul:
+        // we can do mul with a single instruction only if we have SSE4.1
+        // using the PMULLD instruction.
       case MSimdBinaryArith::Div:
         // x86 doesn't have SIMD i32 div.
         break;
       case MSimdBinaryArith::Max:
         // we can do max with a single instruction only if we have SSE4.1
         // using the PMAXSD instruction.
         break;
       case MSimdBinaryArith::Min:
--- a/js/src/jit/shared/Lowering-shared.cpp
+++ b/js/src/jit/shared/Lowering-shared.cpp
@@ -9,73 +9,16 @@
 #include "jit/LIR.h"
 #include "jit/MIR.h"
 
 #include "vm/Symbol.h"
 
 using namespace js;
 using namespace jit;
 
-bool
-LIRGeneratorShared::ShouldReorderCommutative(MDefinition *lhs, MDefinition *rhs, MInstruction *ins)
-{
-    // lhs and rhs are used by the commutative operator.
-    MOZ_ASSERT(lhs->hasDefUses());
-    MOZ_ASSERT(rhs->hasDefUses());
-
-    // Ensure that if there is a constant, then it is in rhs.
-    if (rhs->isConstant())
-        return false;
-    if (lhs->isConstant())
-        return true;
-
-    // Since clobbering binary operations clobber the left operand, prefer a
-    // non-constant lhs operand with no further uses. To be fully precise, we
-    // should check whether this is the *last* use, but checking hasOneDefUse()
-    // is a decent approximation which doesn't require any extra analysis.
-    bool rhsSingleUse = rhs->hasOneDefUse();
-    bool lhsSingleUse = lhs->hasOneDefUse();
-    if (rhsSingleUse) {
-        if (!lhsSingleUse)
-            return true;
-    } else {
-        if (lhsSingleUse)
-            return false;
-    }
-
-    // If this is a reduction-style computation, such as
-    //
-    //   sum = 0;
-    //   for (...)
-    //      sum += ...;
-    //
-    // put the phi on the left to promote coalescing. This is fairly specific.
-    if (rhsSingleUse &&
-        rhs->isPhi() &&
-        rhs->block()->isLoopHeader() &&
-        ins == rhs->toPhi()->getLoopBackedgeOperand())
-    {
-        return true;
-    }
-
-    return false;
-}
-
-void
-LIRGeneratorShared::ReorderCommutative(MDefinition **lhsp, MDefinition **rhsp, MInstruction *ins)
-{
-    MDefinition *lhs = *lhsp;
-    MDefinition *rhs = *rhsp;
-
-    if (ShouldReorderCommutative(lhs, rhs, ins)) {
-        *rhsp = lhs;
-        *lhsp = rhs;
-    }
-}
-
 void
 LIRGeneratorShared::visitConstant(MConstant *ins)
 {
     const Value &v = ins->value();
     switch (ins->type()) {
       case MIRType_Boolean:
         define(new(alloc()) LInteger(v.toBoolean()), ins);
         break;
--- a/js/src/jit/shared/Lowering-shared.h
+++ b/js/src/jit/shared/Lowering-shared.h
@@ -45,20 +45,16 @@ class LIRGeneratorShared : public MDefin
         osiPoint_(nullptr)
     { }
 
     MIRGenerator *mir() {
         return gen;
     }
 
   protected:
-
-    static void ReorderCommutative(MDefinition **lhsp, MDefinition **rhsp, MInstruction *ins);
-    static bool ShouldReorderCommutative(MDefinition *lhs, MDefinition *rhs, MInstruction *ins);
-
     // A backend can decide that an instruction should be emitted at its uses,
     // rather than at its definition. To communicate this, set the
     // instruction's virtual register set to 0. When using the instruction,
     // its virtual register is temporarily reassigned. To know to clear it
     // after constructing the use information, the worklist bit is temporarily
     // unset.
     //
     // The backend can use the worklist bit to determine whether or not a
--- a/js/src/jit/shared/Lowering-x86-shared.cpp
+++ b/js/src/jit/shared/Lowering-x86-shared.cpp
@@ -651,44 +651,16 @@ LIRGeneratorX86Shared::visitAsmJSAtomicB
 
     LAsmJSAtomicBinopHeap *lir =
         new(alloc()) LAsmJSAtomicBinopHeap(useRegister(ptr), value, tempDef);
 
     defineFixed(lir, ins, LAllocation(AnyRegister(eax)));
 }
 
 void
-LIRGeneratorX86Shared::visitSimdBinaryArith(MSimdBinaryArith *ins)
-{
-    MOZ_ASSERT(IsSimdType(ins->type()));
-
-    MDefinition *lhs = ins->lhs();
-    MDefinition *rhs = ins->rhs();
-
-    if (ins->isCommutative())
-        ReorderCommutative(&lhs, &rhs, ins);
-
-    if (ins->type() == MIRType_Int32x4) {
-        lowerForFPU(new(alloc()) LSimdBinaryArithIx4(), ins, lhs, rhs);
-        return;
-    }
-
-    MOZ_ASSERT(ins->type() == MIRType_Float32x4, "unknown simd type on binary arith operation");
-
-    LSimdBinaryArithFx4 *lir = new(alloc()) LSimdBinaryArithFx4();
-
-    bool needsTemp = ins->operation() == MSimdBinaryArith::Max ||
-                     ins->operation() == MSimdBinaryArith::MinNum ||
-                     ins->operation() == MSimdBinaryArith::MaxNum;
-    lir->setTemp(0, needsTemp ? temp(LDefinition::FLOAT32X4) : LDefinition::BogusTemp());
-
-    lowerForFPU(lir, ins, lhs, rhs);
-}
-
-void
 LIRGeneratorX86Shared::visitSimdTernaryBitwise(MSimdTernaryBitwise *ins)
 {
     MOZ_ASSERT(IsSimdType(ins->type()));
 
     if (ins->type() == MIRType_Int32x4 || ins->type() == MIRType_Float32x4) {
         LSimdSelect *lins = new(alloc()) LSimdSelect;
 
         // This must be useRegisterAtStart() because it is destroyed.
--- a/js/src/jit/shared/Lowering-x86-shared.h
+++ b/js/src/jit/shared/Lowering-x86-shared.h
@@ -48,17 +48,16 @@ class LIRGeneratorX86Shared : public LIR
     void lowerUDiv(MDiv *div);
     void lowerUMod(MMod *mod);
     void lowerUrshD(MUrsh *mir);
     void lowerConstantDouble(double d, MInstruction *ins);
     void lowerConstantFloat32(float d, MInstruction *ins);
     void lowerTruncateDToInt32(MTruncateToInt32 *ins);
     void lowerTruncateFToInt32(MTruncateToInt32 *ins);
     void visitForkJoinGetSlice(MForkJoinGetSlice *ins);
-    void visitSimdBinaryArith(MSimdBinaryArith *ins);
     void visitSimdTernaryBitwise(MSimdTernaryBitwise *ins);
     void visitSimdSplatX4(MSimdSplatX4 *ins);
     void visitSimdValueX4(MSimdValueX4 *ins);
     void visitCompareExchangeTypedArrayElement(MCompareExchangeTypedArrayElement *ins);
     void visitAtomicTypedArrayElementBinop(MAtomicTypedArrayElementBinop *ins);
     void visitAsmJSCompareExchangeHeap(MAsmJSCompareExchangeHeap *ins);
     void visitAsmJSAtomicBinopHeap(MAsmJSAtomicBinopHeap *ins);
 };