Bug 1136226 - Implement compares for 8x16 and 16x8 SIMD types. r=sunfish
authorJakob Stoklund Olesen <jolesen@mozilla.com>
Tue, 31 May 2016 09:00:19 -0700
changeset 338733 8e4f48388c600edd5ee5292b014cedb0b8f7672f
parent 338732 e5479106a7ab7033580c4114d96cd5d0d3c062d2
child 338734 a019522f497b3d852ece421f4e09548e654d1e7e
push id6249
push userjlund@mozilla.com
push dateMon, 01 Aug 2016 13:59:36 +0000
treeherdermozilla-beta@bad9d4f5bf7e [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerssunfish
bugs1136226
milestone49.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1136226 - Implement compares for 8x16 and 16x8 SIMD types. r=sunfish Since SSE doesn't have unsigned comparisons, add a bias vector and use the signed comparisons instead, just like we do for the 32x4 unsigned vectors. Use 'defineReuseInput' even when SIMD input and output types differ. This is fine now since the register allocator uses a single Simd128 class for all SIMD registers.
js/src/jit/Lowering.cpp
js/src/jit/MIR.cpp
js/src/jit/arm/Assembler-arm.h
js/src/jit/arm64/Assembler-arm64.h
js/src/jit/mips32/Assembler-mips32.h
js/src/jit/mips64/Assembler-mips64.h
js/src/jit/none/Architecture-none.h
js/src/jit/shared/LIR-shared.h
js/src/jit/shared/LOpcodes-shared.h
js/src/jit/x86-shared/Architecture-x86-shared.h
js/src/jit/x86-shared/Assembler-x86-shared.h
js/src/jit/x86-shared/BaseAssembler-x86-shared.h
js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp
js/src/jit/x86-shared/CodeGenerator-x86-shared.h
js/src/jit/x86-shared/Encoding-x86-shared.h
js/src/jit/x86-shared/Lowering-x86-shared.cpp
js/src/jit/x86-shared/MacroAssembler-x86-shared-inl.h
js/src/jit/x86-shared/MacroAssembler-x86-shared.h
--- a/js/src/jit/Lowering.cpp
+++ b/js/src/jit/Lowering.cpp
@@ -4440,25 +4440,42 @@ LIRGenerator::visitSimdBinaryComp(MSimdB
 {
     MOZ_ASSERT(IsSimdType(ins->lhs()->type()));
     MOZ_ASSERT(IsSimdType(ins->rhs()->type()));
     MOZ_ASSERT(IsBooleanSimdType(ins->type()));
 
     if (ShouldReorderCommutative(ins->lhs(), ins->rhs(), ins))
         ins->reverse();
 
-    if (ins->specialization() == MIRType::Int32x4) {
-        MOZ_ASSERT(ins->signedness() == SimdSign::Signed);
-        LSimdBinaryCompIx4* add = new(alloc()) LSimdBinaryCompIx4();
-        lowerForCompIx4(add, ins, ins->lhs(), ins->rhs());
-    } else if (ins->specialization() == MIRType::Float32x4) {
-        MOZ_ASSERT(ins->signedness() == SimdSign::NotApplicable);
-        LSimdBinaryCompFx4* add = new(alloc()) LSimdBinaryCompFx4();
-        lowerForCompFx4(add, ins, ins->lhs(), ins->rhs());
-    } else {
+    switch (ins->specialization()) {
+      case MIRType::Int8x16: {
+          MOZ_ASSERT(ins->signedness() == SimdSign::Signed);
+          LSimdBinaryCompIx16* add = new (alloc()) LSimdBinaryCompIx16();
+          lowerForFPU(add, ins, ins->lhs(), ins->rhs());
+          return;
+      }
+      case MIRType::Int16x8: {
+          MOZ_ASSERT(ins->signedness() == SimdSign::Signed);
+          LSimdBinaryCompIx8* add = new (alloc()) LSimdBinaryCompIx8();
+          lowerForFPU(add, ins, ins->lhs(), ins->rhs());
+          return;
+      }
+      case MIRType::Int32x4: {
+          MOZ_ASSERT(ins->signedness() == SimdSign::Signed);
+          LSimdBinaryCompIx4* add = new (alloc()) LSimdBinaryCompIx4();
+          lowerForCompIx4(add, ins, ins->lhs(), ins->rhs());
+          return;
+      }
+      case MIRType::Float32x4: {
+          MOZ_ASSERT(ins->signedness() == SimdSign::NotApplicable);
+          LSimdBinaryCompFx4* add = new (alloc()) LSimdBinaryCompFx4();
+          lowerForCompFx4(add, ins, ins->lhs(), ins->rhs());
+          return;
+      }
+      default:
         MOZ_CRASH("Unknown compare type when comparing values");
     }
 }
 
 void
 LIRGenerator::visitSimdBinaryBitwise(MSimdBinaryBitwise* ins)
 {
     MOZ_ASSERT(IsSimdType(ins->lhs()->type()));
--- a/js/src/jit/MIR.cpp
+++ b/js/src/jit/MIR.cpp
@@ -1333,38 +1333,52 @@ MInstruction*
 MSimdBinaryComp::AddLegalized(TempAllocator& alloc, MBasicBlock* addTo, MDefinition* left,
                               MDefinition* right, Operation op, SimdSign sign)
 {
     MOZ_ASSERT(left->type() == right->type());
     MIRType opType = left->type();
     MOZ_ASSERT(IsSimdType(opType));
     bool IsEquality = op == equal || op == notEqual;
 
-    if (!SupportsUint32x4Compares && sign == SimdSign::Unsigned && !IsEquality) {
-        MOZ_ASSERT(opType == MIRType::Int32x4);
+    // Check if this is an unsupported unsigned compare that needs to be biased.
+    // If so, put the bias vector in `bias`.
+    if (sign == SimdSign::Unsigned && !IsEquality) {
+        MInstruction* bias = nullptr;
+
         // This is an order comparison of Uint32x4 vectors which are not supported on this target.
         // Simply offset |left| and |right| by INT_MIN, then do a signed comparison.
-        MInstruction* bias =
-          MSimdConstant::New(alloc, SimdConstant::SplatX4(int32_t(0x80000000)), opType);
-        addTo->add(bias);
-
-        // Add the bias.
-        MInstruction* bleft =
-          MSimdBinaryArith::AddLegalized(alloc, addTo, left, bias, MSimdBinaryArith::Op_add);
-        MInstruction* bright =
-          MSimdBinaryArith::AddLegalized(alloc, addTo, right, bias, MSimdBinaryArith::Op_add);
-
-        // Do the equivalent signed comparison.
-        MInstruction* result = MSimdBinaryComp::New(alloc, bleft, bright, op, SimdSign::Signed);
-        addTo->add(result);
-
-        return result;
-    }
-
-    if (!SupportsUint32x4Compares && sign == SimdSign::Unsigned && opType == MIRType::Int32x4) {
+        if (!SupportsUint32x4Compares && opType == MIRType::Int32x4)
+            bias = MSimdConstant::New(alloc, SimdConstant::SplatX4(int32_t(0x80000000)), opType);
+        else if (!SupportsUint16x8Compares && opType == MIRType::Int16x8)
+            bias = MSimdConstant::New(alloc, SimdConstant::SplatX8(int16_t(0x8000)), opType);
+        if (!SupportsUint8x16Compares && opType == MIRType::Int8x16)
+            bias = MSimdConstant::New(alloc, SimdConstant::SplatX16(int8_t(0x80)), opType);
+
+        if (bias) {
+            addTo->add(bias);
+
+            // Add the bias.
+            MInstruction* bleft =
+              MSimdBinaryArith::AddLegalized(alloc, addTo, left, bias, MSimdBinaryArith::Op_add);
+            MInstruction* bright =
+              MSimdBinaryArith::AddLegalized(alloc, addTo, right, bias, MSimdBinaryArith::Op_add);
+
+            // Do the equivalent signed comparison.
+            MInstruction* result =
+              MSimdBinaryComp::New(alloc, bleft, bright, op, SimdSign::Signed);
+            addTo->add(result);
+
+            return result;
+        }
+    }
+
+    if (sign == SimdSign::Unsigned &&
+        ((!SupportsUint32x4Compares && opType == MIRType::Int32x4) ||
+         (!SupportsUint16x8Compares && opType == MIRType::Int16x8) ||
+         (!SupportsUint8x16Compares && opType == MIRType::Int8x16))) {
         // The sign doesn't matter for equality tests. Flip it to make the
         // backend assertions happy.
         MOZ_ASSERT(IsEquality);
         sign = SimdSign::Signed;
     }
 
     // This is a legal operation already. Just create the instruction requested.
     MInstruction* result = MSimdBinaryComp::New(alloc, left, right, op, sign);
--- a/js/src/jit/arm/Assembler-arm.h
+++ b/js/src/jit/arm/Assembler-arm.h
@@ -222,17 +222,19 @@ static_assert(JitStackAlignment % SimdMe
   "Stack alignment should be larger than any of the alignments which are used for "
   "spilled values.  Thus it should be larger than the alignment for SIMD accesses.");
 
 static const uint32_t AsmJSStackAlignment = SimdMemoryAlignment;
 
 // Does this architecture support SIMD conversions between Uint32x4 and Float32x4?
 static MOZ_CONSTEXPR_VAR bool SupportsUint32x4FloatConversions = false;
 
-// Does this architecture support comparisons of unsigned 32x4 integer vectors?
+// Does this architecture support comparisons of unsigned integer vectors?
+static MOZ_CONSTEXPR_VAR bool SupportsUint8x16Compares = false;
+static MOZ_CONSTEXPR_VAR bool SupportsUint16x8Compares = false;
 static MOZ_CONSTEXPR_VAR bool SupportsUint32x4Compares = false;
 
 static const Scale ScalePointer = TimesFour;
 
 class Instruction;
 class InstBranchImm;
 uint32_t RM(Register r);
 uint32_t RS(Register r);
--- a/js/src/jit/arm64/Assembler-arm64.h
+++ b/js/src/jit/arm64/Assembler-arm64.h
@@ -173,17 +173,19 @@ static_assert(CodeAlignment % SimdMemory
   "alignment for SIMD constants.");
 
 static const uint32_t AsmJSStackAlignment = SimdMemoryAlignment;
 static const int32_t AsmJSGlobalRegBias = 1024;
 
 // Does this architecture support SIMD conversions between Uint32x4 and Float32x4?
 static MOZ_CONSTEXPR_VAR bool SupportsUint32x4FloatConversions = false;
 
-// Does this architecture support comparisons of unsigned 32x4 integer vectors?
+// Does this architecture support comparisons of unsigned integer vectors?
+static MOZ_CONSTEXPR_VAR bool SupportsUint8x16Compares = false;
+static MOZ_CONSTEXPR_VAR bool SupportsUint16x8Compares = false;
 static MOZ_CONSTEXPR_VAR bool SupportsUint32x4Compares = false;
 
 class Assembler : public vixl::Assembler
 {
   public:
     Assembler()
       : vixl::Assembler()
     { }
--- a/js/src/jit/mips32/Assembler-mips32.h
+++ b/js/src/jit/mips32/Assembler-mips32.h
@@ -101,17 +101,19 @@ static_assert(JitStackAlignment % sizeof
 // alignment requirements still need to be explored.
 // TODO Copy the static_asserts from x64/x86 assembler files.
 static MOZ_CONSTEXPR_VAR uint32_t SimdMemoryAlignment = 8;
 static MOZ_CONSTEXPR_VAR uint32_t AsmJSStackAlignment = SimdMemoryAlignment;
 
 // Does this architecture support SIMD conversions between Uint32x4 and Float32x4?
 static MOZ_CONSTEXPR_VAR bool SupportsUint32x4FloatConversions = false;
 
-// Does this architecture support comparisons of unsigned 32x4 integer vectors?
+// Does this architecture support comparisons of unsigned integer vectors?
+static MOZ_CONSTEXPR_VAR bool SupportsUint8x16Compares = false;
+static MOZ_CONSTEXPR_VAR bool SupportsUint16x8Compares = false;
 static MOZ_CONSTEXPR_VAR bool SupportsUint32x4Compares = false;
 
 static MOZ_CONSTEXPR_VAR Scale ScalePointer = TimesFour;
 
 class Assembler : public AssemblerMIPSShared
 {
   public:
     Assembler()
--- a/js/src/jit/mips64/Assembler-mips64.h
+++ b/js/src/jit/mips64/Assembler-mips64.h
@@ -112,17 +112,19 @@ static_assert(JitStackAlignment % sizeof
 // TODO Copy the static_asserts from x64/x86 assembler files.
 static MOZ_CONSTEXPR_VAR uint32_t SimdMemoryAlignment = 16;
 
 static MOZ_CONSTEXPR_VAR uint32_t AsmJSStackAlignment = SimdMemoryAlignment;
 
 // Does this architecture support SIMD conversions between Uint32x4 and Float32x4?
 static MOZ_CONSTEXPR_VAR bool SupportsUint32x4FloatConversions = false;
 
-// Does this architecture support comparisons of unsigned 32x4 integer vectors?
+// Does this architecture support comparisons of unsigned integer vectors?
+static MOZ_CONSTEXPR_VAR bool SupportsUint8x16Compares = false;
+static MOZ_CONSTEXPR_VAR bool SupportsUint16x8Compares = false;
 static MOZ_CONSTEXPR_VAR bool SupportsUint32x4Compares = false;
 
 static MOZ_CONSTEXPR_VAR Scale ScalePointer = TimesEight;
 
 class Assembler : public AssemblerMIPSShared
 {
   public:
     Assembler()
--- a/js/src/jit/none/Architecture-none.h
+++ b/js/src/jit/none/Architecture-none.h
@@ -16,17 +16,19 @@ namespace jit {
 
 static const bool SupportsSimd = false;
 static const uint32_t SimdMemoryAlignment = 4; // Make it 4 to avoid a bunch of div-by-zero warnings
 static const uint32_t AsmJSStackAlignment = 8;
 
 // Does this architecture support SIMD conversions between Uint32x4 and Float32x4?
 static MOZ_CONSTEXPR_VAR bool SupportsUint32x4FloatConversions = false;
 
-// Does this architecture support comparisons of unsigned 32x4 integer vectors?
+// Does this architecture support comparisons of unsigned integer vectors?
+static MOZ_CONSTEXPR_VAR bool SupportsUint8x16Compares = false;
+static MOZ_CONSTEXPR_VAR bool SupportsUint16x8Compares = false;
 static MOZ_CONSTEXPR_VAR bool SupportsUint32x4Compares = false;
 
 class Registers
 {
   public:
     enum RegisterID {
         r0 = 0,
         invalid_reg
--- a/js/src/jit/shared/LIR-shared.h
+++ b/js/src/jit/shared/LIR-shared.h
@@ -505,17 +505,33 @@ public:
     MSimdBinaryComp::Operation operation() const {
         return mir_->toSimdBinaryComp()->operation();
     }
     const char* extraName() const {
         return MSimdBinaryComp::OperationName(operation());
     }
 };
 
-// Binary SIMD comparison operation between two Int32x4 operands
+// Binary SIMD comparison operation between two Int8x16 operands.
+class LSimdBinaryCompIx16 : public LSimdBinaryComp
+{
+  public:
+    LIR_HEADER(SimdBinaryCompIx16);
+    LSimdBinaryCompIx16() : LSimdBinaryComp() {}
+};
+
+// Binary SIMD comparison operation between two Int16x8 operands.
+class LSimdBinaryCompIx8 : public LSimdBinaryComp
+{
+  public:
+    LIR_HEADER(SimdBinaryCompIx8);
+    LSimdBinaryCompIx8() : LSimdBinaryComp() {}
+};
+
+// Binary SIMD comparison operation between two Int32x4 operands.
 class LSimdBinaryCompIx4 : public LSimdBinaryComp
 {
   public:
     LIR_HEADER(SimdBinaryCompIx4);
     LSimdBinaryCompIx4() : LSimdBinaryComp() {}
 };
 
 // Binary SIMD comparison operation between two Float32x4 operands
--- a/js/src/jit/shared/LOpcodes-shared.h
+++ b/js/src/jit/shared/LOpcodes-shared.h
@@ -39,16 +39,18 @@
     _(SimdSwizzleI)                 \
     _(SimdSwizzleF)                 \
     _(SimdShuffle)                  \
     _(SimdShuffleX4)                \
     _(SimdUnaryArithIx16)           \
     _(SimdUnaryArithIx8)            \
     _(SimdUnaryArithIx4)            \
     _(SimdUnaryArithFx4)            \
+    _(SimdBinaryCompIx16)           \
+    _(SimdBinaryCompIx8)            \
     _(SimdBinaryCompIx4)            \
     _(SimdBinaryCompFx4)            \
     _(SimdBinaryArithIx16)          \
     _(SimdBinaryArithIx8)           \
     _(SimdBinaryArithIx4)           \
     _(SimdBinaryArithFx4)           \
     _(SimdBinarySaturating)         \
     _(SimdBinaryBitwise)            \
--- a/js/src/jit/x86-shared/Architecture-x86-shared.h
+++ b/js/src/jit/x86-shared/Architecture-x86-shared.h
@@ -18,17 +18,19 @@
 #include "jit/x86-shared/Constants-x86-shared.h"
 
 namespace js {
 namespace jit {
 
 // Does this architecture support SIMD conversions between Uint32x4 and Float32x4?
 static MOZ_CONSTEXPR_VAR bool SupportsUint32x4FloatConversions = false;
 
-// Does this architecture support comparisons of unsigned 32x4 integer vectors?
+// Does this architecture support comparisons of unsigned integer vectors?
+static MOZ_CONSTEXPR_VAR bool SupportsUint8x16Compares = false;
+static MOZ_CONSTEXPR_VAR bool SupportsUint16x8Compares = false;
 static MOZ_CONSTEXPR_VAR bool SupportsUint32x4Compares = false;
 
 #if defined(JS_CODEGEN_X86)
 // In bytes: slots needed for potential memory->memory move spills.
 //   +8 for cycles
 //   +4 for gpr spills
 //   +8 for double spills
 static const uint32_t ION_FRAME_SLACK_SIZE    = 20;
--- a/js/src/jit/x86-shared/Assembler-x86-shared.h
+++ b/js/src/jit/x86-shared/Assembler-x86-shared.h
@@ -2264,20 +2264,83 @@ class AssemblerX86Shared : public Assemb
     void vucomisd(FloatRegister rhs, FloatRegister lhs) {
         MOZ_ASSERT(HasSSE2());
         masm.vucomisd_rr(rhs.encoding(), lhs.encoding());
     }
     void vucomiss(FloatRegister rhs, FloatRegister lhs) {
         MOZ_ASSERT(HasSSE2());
         masm.vucomiss_rr(rhs.encoding(), lhs.encoding());
     }
-    void vpcmpeqw(FloatRegister rhs, FloatRegister lhs, FloatRegister dst) {
+
+    void vpcmpeqb(const Operand& rhs, FloatRegister lhs, FloatRegister dest) {
+        MOZ_ASSERT(HasSSE2());
+        switch (rhs.kind()) {
+          case Operand::FPREG:
+            masm.vpcmpeqb_rr(rhs.fpu(), lhs.encoding(), dest.encoding());
+            break;
+          case Operand::MEM_REG_DISP:
+            masm.vpcmpeqb_mr(rhs.disp(), rhs.base(), lhs.encoding(), dest.encoding());
+            break;
+          case Operand::MEM_ADDRESS32:
+            masm.vpcmpeqb_mr(rhs.address(), lhs.encoding(), dest.encoding());
+            break;
+          default:
+            MOZ_CRASH("unexpected operand kind");
+        }
+    }
+    void vpcmpgtb(const Operand& rhs, FloatRegister lhs, FloatRegister dest) {
         MOZ_ASSERT(HasSSE2());
-        masm.vpcmpeqw_rr(rhs.encoding(), lhs.encoding(), dst.encoding());
-    }
+        switch (rhs.kind()) {
+          case Operand::FPREG:
+            masm.vpcmpgtb_rr(rhs.fpu(), lhs.encoding(), dest.encoding());
+            break;
+          case Operand::MEM_REG_DISP:
+            masm.vpcmpgtb_mr(rhs.disp(), rhs.base(), lhs.encoding(), dest.encoding());
+            break;
+          case Operand::MEM_ADDRESS32:
+            masm.vpcmpgtb_mr(rhs.address(), lhs.encoding(), dest.encoding());
+            break;
+          default:
+            MOZ_CRASH("unexpected operand kind");
+        }
+    }
+
+    void vpcmpeqw(const Operand& rhs, FloatRegister lhs, FloatRegister dest) {
+        MOZ_ASSERT(HasSSE2());
+        switch (rhs.kind()) {
+          case Operand::FPREG:
+            masm.vpcmpeqw_rr(rhs.fpu(), lhs.encoding(), dest.encoding());
+            break;
+          case Operand::MEM_REG_DISP:
+            masm.vpcmpeqw_mr(rhs.disp(), rhs.base(), lhs.encoding(), dest.encoding());
+            break;
+          case Operand::MEM_ADDRESS32:
+            masm.vpcmpeqw_mr(rhs.address(), lhs.encoding(), dest.encoding());
+            break;
+          default:
+            MOZ_CRASH("unexpected operand kind");
+        }
+    }
+    void vpcmpgtw(const Operand& rhs, FloatRegister lhs, FloatRegister dest) {
+        MOZ_ASSERT(HasSSE2());
+        switch (rhs.kind()) {
+          case Operand::FPREG:
+            masm.vpcmpgtw_rr(rhs.fpu(), lhs.encoding(), dest.encoding());
+            break;
+          case Operand::MEM_REG_DISP:
+            masm.vpcmpgtw_mr(rhs.disp(), rhs.base(), lhs.encoding(), dest.encoding());
+            break;
+          case Operand::MEM_ADDRESS32:
+            masm.vpcmpgtw_mr(rhs.address(), lhs.encoding(), dest.encoding());
+            break;
+          default:
+            MOZ_CRASH("unexpected operand kind");
+        }
+    }
+
     void vpcmpeqd(const Operand& rhs, FloatRegister lhs, FloatRegister dest) {
         MOZ_ASSERT(HasSSE2());
         switch (rhs.kind()) {
           case Operand::FPREG:
             masm.vpcmpeqd_rr(rhs.fpu(), lhs.encoding(), dest.encoding());
             break;
           case Operand::MEM_REG_DISP:
             masm.vpcmpeqd_mr(rhs.disp(), rhs.base(), lhs.encoding(), dest.encoding());
@@ -2300,16 +2363,17 @@ class AssemblerX86Shared : public Assemb
             break;
           case Operand::MEM_ADDRESS32:
             masm.vpcmpgtd_mr(rhs.address(), lhs.encoding(), dest.encoding());
             break;
           default:
             MOZ_CRASH("unexpected operand kind");
         }
     }
+
     void vcmpps(uint8_t order, Operand src1, FloatRegister src0, FloatRegister dest) {
         MOZ_ASSERT(HasSSE2());
         // :TODO: (Bug 1132894) See LIRGeneratorX86Shared::lowerForFPU
         // FIXME: This logic belongs in the MacroAssembler.
         if (!HasAVX() && !src0.aliases(dest)) {
             if (src1.kind() == Operand::FPREG &&
                 dest.aliases(FloatRegister::FromCode(src1.fpu())))
             {
--- a/js/src/jit/x86-shared/BaseAssembler-x86-shared.h
+++ b/js/src/jit/x86-shared/BaseAssembler-x86-shared.h
@@ -2569,19 +2569,66 @@ public:
         m_formatter.twoByteOp(jccRel32(cond));
         JmpSrc r = m_formatter.immediateRel32();
         spew("j%s        .Lfrom%d", CCName(cond), r.offset());
         return r;
     }
 
     // SSE operations:
 
+    void vpcmpeqb_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst)
+    {
+        twoByteOpSimd("vpcmpeqb", VEX_PD, OP2_PCMPEQB_VdqWdq, src1, src0, dst);
+    }
+    void vpcmpeqb_mr(int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst)
+    {
+        twoByteOpSimd("vpcmpeqb", VEX_PD, OP2_PCMPEQB_VdqWdq, offset, base, src0, dst);
+    }
+    void vpcmpeqb_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst)
+    {
+        twoByteOpSimd("vpcmpeqb", VEX_PD, OP2_PCMPEQB_VdqWdq, address, src0, dst);
+    }
+
+    void vpcmpgtb_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst)
+    {
+        twoByteOpSimd("vpcmpgtb", VEX_PD, OP2_PCMPGTB_VdqWdq, src1, src0, dst);
+    }
+    void vpcmpgtb_mr(int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst)
+    {
+        twoByteOpSimd("vpcmpgtb", VEX_PD, OP2_PCMPGTB_VdqWdq, offset, base, src0, dst);
+    }
+    void vpcmpgtb_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst)
+    {
+        twoByteOpSimd("vpcmpgtb", VEX_PD, OP2_PCMPGTB_VdqWdq, address, src0, dst);
+    }
+
     void vpcmpeqw_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst)
     {
-        twoByteOpSimd("vpcmpeqw", VEX_PD, OP2_PCMPEQW, src1, src0, dst);
+        twoByteOpSimd("vpcmpeqw", VEX_PD, OP2_PCMPEQW_VdqWdq, src1, src0, dst);
+    }
+    void vpcmpeqw_mr(int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst)
+    {
+        twoByteOpSimd("vpcmpeqw", VEX_PD, OP2_PCMPEQW_VdqWdq, offset, base, src0, dst);
+    }
+    void vpcmpeqw_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst)
+    {
+        twoByteOpSimd("vpcmpeqw", VEX_PD, OP2_PCMPEQW_VdqWdq, address, src0, dst);
+    }
+
+    void vpcmpgtw_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst)
+    {
+        twoByteOpSimd("vpcmpgtw", VEX_PD, OP2_PCMPGTW_VdqWdq, src1, src0, dst);
+    }
+    void vpcmpgtw_mr(int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst)
+    {
+        twoByteOpSimd("vpcmpgtw", VEX_PD, OP2_PCMPGTW_VdqWdq, offset, base, src0, dst);
+    }
+    void vpcmpgtw_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst)
+    {
+        twoByteOpSimd("vpcmpgtw", VEX_PD, OP2_PCMPGTW_VdqWdq, address, src0, dst);
     }
 
     void vpcmpeqd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst)
     {
         twoByteOpSimd("vpcmpeqd", VEX_PD, OP2_PCMPEQD_VdqWdq, src1, src0, dst);
     }
     void vpcmpeqd_mr(int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst)
     {
--- a/js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp
+++ b/js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp
@@ -3397,16 +3397,136 @@ CodeGeneratorX86Shared::visitSimdShuffle
     masm.vshufps(mask, rhs, lhs, lhs);
 
     mask = MacroAssembler::ComputeShuffleMask(secondMask[0], secondMask[1],
                                               secondMask[2], secondMask[3]);
     masm.vshufps(mask, lhs, lhs, lhs);
 }
 
 void
+CodeGeneratorX86Shared::visitSimdBinaryCompIx16(LSimdBinaryCompIx16* ins)
+{
+    static const SimdConstant allOnes = SimdConstant::SplatX16(-1);
+
+    FloatRegister lhs = ToFloatRegister(ins->lhs());
+    Operand rhs = ToOperand(ins->rhs());
+    FloatRegister output = ToFloatRegister(ins->output());
+    MOZ_ASSERT_IF(!Assembler::HasAVX(), output == lhs);
+
+    ScratchSimd128Scope scratch(masm);
+
+    MSimdBinaryComp::Operation op = ins->operation();
+    switch (op) {
+      case MSimdBinaryComp::greaterThan:
+        masm.vpcmpgtb(rhs, lhs, output);
+        return;
+      case MSimdBinaryComp::equal:
+        masm.vpcmpeqb(rhs, lhs, output);
+        return;
+      case MSimdBinaryComp::lessThan:
+        // src := rhs
+        if (rhs.kind() == Operand::FPREG)
+            masm.moveSimd128Int(ToFloatRegister(ins->rhs()), scratch);
+        else
+            masm.loadAlignedSimd128Int(rhs, scratch);
+
+        // src := src > lhs (i.e. lhs < rhs)
+        // Improve by doing custom lowering (rhs is tied to the output register)
+        masm.vpcmpgtb(ToOperand(ins->lhs()), scratch, scratch);
+        masm.moveSimd128Int(scratch, output);
+        return;
+      case MSimdBinaryComp::notEqual:
+        // Ideally for notEqual, greaterThanOrEqual, and lessThanOrEqual, we
+        // should invert the comparison by, e.g. swapping the arms of a select
+        // if that's what it's used in.
+        masm.loadConstantSimd128Int(allOnes, scratch);
+        masm.vpcmpeqb(rhs, lhs, output);
+        masm.bitwiseXorSimd128(Operand(scratch), output);
+        return;
+      case MSimdBinaryComp::greaterThanOrEqual:
+        // src := rhs
+        if (rhs.kind() == Operand::FPREG)
+            masm.moveSimd128Int(ToFloatRegister(ins->rhs()), scratch);
+        else
+            masm.loadAlignedSimd128Int(rhs, scratch);
+        masm.vpcmpgtb(ToOperand(ins->lhs()), scratch, scratch);
+        masm.loadConstantSimd128Int(allOnes, output);
+        masm.bitwiseXorSimd128(Operand(scratch), output);
+        return;
+      case MSimdBinaryComp::lessThanOrEqual:
+        // lhs <= rhs is equivalent to !(rhs < lhs), which we compute here.
+        masm.loadConstantSimd128Int(allOnes, scratch);
+        masm.vpcmpgtb(rhs, lhs, output);
+        masm.bitwiseXorSimd128(Operand(scratch), output);
+        return;
+    }
+    MOZ_CRASH("unexpected SIMD op");
+}
+
+void
+CodeGeneratorX86Shared::visitSimdBinaryCompIx8(LSimdBinaryCompIx8* ins)
+{
+    static const SimdConstant allOnes = SimdConstant::SplatX8(-1);
+
+    FloatRegister lhs = ToFloatRegister(ins->lhs());
+    Operand rhs = ToOperand(ins->rhs());
+    FloatRegister output = ToFloatRegister(ins->output());
+    MOZ_ASSERT_IF(!Assembler::HasAVX(), output == lhs);
+
+    ScratchSimd128Scope scratch(masm);
+
+    MSimdBinaryComp::Operation op = ins->operation();
+    switch (op) {
+      case MSimdBinaryComp::greaterThan:
+        masm.vpcmpgtw(rhs, lhs, output);
+        return;
+      case MSimdBinaryComp::equal:
+        masm.vpcmpeqw(rhs, lhs, output);
+        return;
+      case MSimdBinaryComp::lessThan:
+        // src := rhs
+        if (rhs.kind() == Operand::FPREG)
+            masm.moveSimd128Int(ToFloatRegister(ins->rhs()), scratch);
+        else
+            masm.loadAlignedSimd128Int(rhs, scratch);
+
+        // src := src > lhs (i.e. lhs < rhs)
+        // Improve by doing custom lowering (rhs is tied to the output register)
+        masm.vpcmpgtw(ToOperand(ins->lhs()), scratch, scratch);
+        masm.moveSimd128Int(scratch, output);
+        return;
+      case MSimdBinaryComp::notEqual:
+        // Ideally for notEqual, greaterThanOrEqual, and lessThanOrEqual, we
+        // should invert the comparison by, e.g. swapping the arms of a select
+        // if that's what it's used in.
+        masm.loadConstantSimd128Int(allOnes, scratch);
+        masm.vpcmpeqw(rhs, lhs, output);
+        masm.bitwiseXorSimd128(Operand(scratch), output);
+        return;
+      case MSimdBinaryComp::greaterThanOrEqual:
+        // src := rhs
+        if (rhs.kind() == Operand::FPREG)
+            masm.moveSimd128Int(ToFloatRegister(ins->rhs()), scratch);
+        else
+            masm.loadAlignedSimd128Int(rhs, scratch);
+        masm.vpcmpgtw(ToOperand(ins->lhs()), scratch, scratch);
+        masm.loadConstantSimd128Int(allOnes, output);
+        masm.bitwiseXorSimd128(Operand(scratch), output);
+        return;
+      case MSimdBinaryComp::lessThanOrEqual:
+        // lhs <= rhs is equivalent to !(rhs < lhs), which we compute here.
+        masm.loadConstantSimd128Int(allOnes, scratch);
+        masm.vpcmpgtw(rhs, lhs, output);
+        masm.bitwiseXorSimd128(Operand(scratch), output);
+        return;
+    }
+    MOZ_CRASH("unexpected SIMD op");
+}
+
+void
 CodeGeneratorX86Shared::visitSimdBinaryCompIx4(LSimdBinaryCompIx4* ins)
 {
     static const SimdConstant allOnes = SimdConstant::SplatX4(-1);
 
     FloatRegister lhs = ToFloatRegister(ins->lhs());
     Operand rhs = ToOperand(ins->rhs());
     MOZ_ASSERT(ToFloatRegister(ins->output()) == lhs);
 
--- a/js/src/jit/x86-shared/CodeGenerator-x86-shared.h
+++ b/js/src/jit/x86-shared/CodeGenerator-x86-shared.h
@@ -309,16 +309,18 @@ class CodeGeneratorX86Shared : public Co
     void visitSimdSwizzleI(LSimdSwizzleI* lir);
     void visitSimdSwizzleF(LSimdSwizzleF* lir);
     void visitSimdShuffleX4(LSimdShuffleX4* lir);
     void visitSimdShuffle(LSimdShuffle* lir);
     void visitSimdUnaryArithIx16(LSimdUnaryArithIx16* lir);
     void visitSimdUnaryArithIx8(LSimdUnaryArithIx8* lir);
     void visitSimdUnaryArithIx4(LSimdUnaryArithIx4* lir);
     void visitSimdUnaryArithFx4(LSimdUnaryArithFx4* lir);
+    void visitSimdBinaryCompIx16(LSimdBinaryCompIx16* lir);
+    void visitSimdBinaryCompIx8(LSimdBinaryCompIx8* lir);
     void visitSimdBinaryCompIx4(LSimdBinaryCompIx4* lir);
     void visitSimdBinaryCompFx4(LSimdBinaryCompFx4* lir);
     void visitSimdBinaryArithIx16(LSimdBinaryArithIx16* lir);
     void visitSimdBinaryArithIx8(LSimdBinaryArithIx8* lir);
     void visitSimdBinaryArithIx4(LSimdBinaryArithIx4* lir);
     void visitSimdBinaryArithFx4(LSimdBinaryArithFx4* lir);
     void visitSimdBinarySaturating(LSimdBinarySaturating* lir);
     void visitSimdBinaryBitwise(LSimdBinaryBitwise* lir);
--- a/js/src/jit/x86-shared/Encoding-x86-shared.h
+++ b/js/src/jit/x86-shared/Encoding-x86-shared.h
@@ -207,31 +207,34 @@ enum TwoByteOpcodeID {
     OP2_SQRTSS_VssWss   = 0x51,
     OP2_SQRTPS_VpsWps   = 0x51,
     OP2_RSQRTPS_VpsWps  = 0x52,
     OP2_RCPPS_VpsWps    = 0x53,
     OP2_ANDPD_VpdWpd    = 0x54,
     OP2_ORPD_VpdWpd     = 0x56,
     OP2_XORPD_VpdWpd    = 0x57,
     OP2_PUNPCKLDQ       = 0x62,
+    OP2_PCMPGTB_VdqWdq  = 0x64,
+    OP2_PCMPGTW_VdqWdq  = 0x65,
     OP2_PCMPGTD_VdqWdq  = 0x66,
     OP2_MOVD_VdEd       = 0x6E,
     OP2_MOVDQ_VsdWsd    = 0x6F,
     OP2_MOVDQ_VdqWdq    = 0x6F,
     OP2_PSHUFD_VdqWdqIb = 0x70,
     OP2_PSHUFLW_VdqWdqIb = 0x70,
     OP2_PSHUFHW_VdqWdqIb = 0x70,
     OP2_PSLLW_UdqIb     = 0x71,
     OP2_PSRAW_UdqIb     = 0x71,
     OP2_PSRLW_UdqIb     = 0x71,
     OP2_PSLLD_UdqIb     = 0x72,
     OP2_PSRAD_UdqIb     = 0x72,
     OP2_PSRLD_UdqIb     = 0x72,
     OP2_PSRLDQ_Vd       = 0x73,
-    OP2_PCMPEQW         = 0x75,
+    OP2_PCMPEQB_VdqWdq  = 0x74,
+    OP2_PCMPEQW_VdqWdq  = 0x75,
     OP2_PCMPEQD_VdqWdq  = 0x76,
     OP2_HADDPD          = 0x7C,
     OP2_MOVD_EdVd       = 0x7E,
     OP2_MOVQ_VdWd       = 0x7E,
     OP2_MOVDQ_WdqVdq    = 0x7F,
     OP2_JCC_rel32       = 0x80,
     OP_SETCC            = 0x90,
     OP2_SHLD            = 0xA4,
--- a/js/src/jit/x86-shared/Lowering-x86-shared.cpp
+++ b/js/src/jit/x86-shared/Lowering-x86-shared.cpp
@@ -132,22 +132,17 @@ LIRGeneratorX86Shared::lowerForALUInt64(
 }
 
 template<size_t Temps>
 void
 LIRGeneratorX86Shared::lowerForFPU(LInstructionHelper<1, 2, Temps>* ins, MDefinition* mir, MDefinition* lhs, MDefinition* rhs)
 {
     // Without AVX, we'll need to use the x86 encodings where one of the
     // inputs must be the same location as the output.
-    //
-    // :TODO: (Bug 1132894) Note, we might have to allocate a different
-    // registers if the MIRType of the reused operand differs from the MIRType
-    // of returned value, as MUST_REUSE_INPUT is not yet capable of reusing the
-    // same register but with a different register type.
-    if (!Assembler::HasAVX() && mir->type() == lhs->type()) {
+    if (!Assembler::HasAVX()) {
         ins->setOperand(0, useRegisterAtStart(lhs));
         ins->setOperand(1, lhs != rhs ? use(rhs) : useAtStart(rhs));
         defineReuseInput(ins, mir, 0);
     } else {
         ins->setOperand(0, useRegisterAtStart(lhs));
         ins->setOperand(1, useAtStart(rhs));
         define(ins, mir);
     }
--- a/js/src/jit/x86-shared/MacroAssembler-x86-shared-inl.h
+++ b/js/src/jit/x86-shared/MacroAssembler-x86-shared-inl.h
@@ -214,29 +214,29 @@ MacroAssembler::neg32(Register reg)
 {
     negl(reg);
 }
 
 void
 MacroAssembler::negateFloat(FloatRegister reg)
 {
     ScratchFloat32Scope scratch(*this);
-    vpcmpeqw(scratch, scratch, scratch);
+    vpcmpeqw(Operand(scratch), scratch, scratch);
     vpsllq(Imm32(31), scratch, scratch);
 
     // XOR the float in a float register with -0.0.
     vxorps(scratch, reg, reg); // s ^ 0x80000000
 }
 
 void
 MacroAssembler::negateDouble(FloatRegister reg)
 {
     // From MacroAssemblerX86Shared::maybeInlineDouble
     ScratchDoubleScope scratch(*this);
-    vpcmpeqw(scratch, scratch, scratch);
+    vpcmpeqw(Operand(scratch), scratch, scratch);
     vpsllq(Imm32(63), scratch, scratch);
 
     // XOR the float in a float register with -0.0.
     vxorpd(scratch, reg, reg); // s ^ 0x80000000000000
 }
 
 // ===============================================================
 // Rotation instructions
--- a/js/src/jit/x86-shared/MacroAssembler-x86-shared.h
+++ b/js/src/jit/x86-shared/MacroAssembler-x86-shared.h
@@ -1239,17 +1239,17 @@ class MacroAssemblerX86Shared : public A
     bool maybeInlineSimd128Int(const SimdConstant& v, const FloatRegister& dest) {
         static const SimdConstant zero = SimdConstant::SplatX4(0);
         static const SimdConstant minusOne = SimdConstant::SplatX4(-1);
         if (v == zero) {
             zeroSimd128Int(dest);
             return true;
         }
         if (v == minusOne) {
-            vpcmpeqw(dest, dest, dest);
+            vpcmpeqw(Operand(dest), dest, dest);
             return true;
         }
         return false;
     }
     bool maybeInlineSimd128Float(const SimdConstant& v, const FloatRegister& dest) {
         static const SimdConstant zero = SimdConstant::SplatX4(0.f);
         if (v == zero) {
             // This won't get inlined if the SimdConstant v contains -0 in any