Bug 1025127: SIMD x86-x64 backend: implement Comparisons; a=ijibaja,dougc,bbouvier; r=sunfish
authorIvan Jibaja <ivan@cs.utexas.edu>
Fri, 25 Jul 2014 02:38:24 -0700
changeset 224512 5fa26de0a04fcf3f6e44de54f143fbb9cda38004
parent 224511 3fcf08e891faafb3dfd28253d1e47ba32dc611ae
child 224513 393c2341a26b5419d88cd17add68b9d4cf309423
push id3979
push userraliiev@mozilla.com
push dateMon, 13 Oct 2014 16:35:44 +0000
treeherdermozilla-beta@30f2cc610691 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersijibaja, dougc, bbouvier, sunfish
bugs1025127
milestone34.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1025127: SIMD x86-x64 backend: implement Comparisons; a=ijibaja,dougc,bbouvier; r=sunfish
js/src/jit/LIR-Common.h
js/src/jit/LOpcodes.h
js/src/jit/Lowering.cpp
js/src/jit/Lowering.h
js/src/jit/MIR.h
js/src/jit/MOpcodes.h
js/src/jit/ParallelSafetyAnalysis.cpp
js/src/jit/arm/CodeGenerator-arm.h
js/src/jit/shared/Assembler-x86-shared.h
js/src/jit/shared/BaseAssembler-x86-shared.h
js/src/jit/shared/CodeGenerator-x86-shared.cpp
js/src/jit/shared/CodeGenerator-x86-shared.h
js/src/jit/shared/MacroAssembler-x86-shared.h
--- a/js/src/jit/LIR-Common.h
+++ b/js/src/jit/LIR-Common.h
@@ -180,16 +180,61 @@ class LSimdExtractElementF : public LIns
     const LAllocation *getBase() {
         return getOperand(0);
     }
     SimdLane lane() const {
         return lane_;
     }
 };
 
+// Binary SIMD comparison operation between two SIMD operands
+class LSimdBinaryComp: public LInstructionHelper<1, 2, 0>
+{
+  protected:
+    LSimdBinaryComp() {}
+
+public:
+    const LAllocation *lhs() {
+        return getOperand(0);
+    }
+    const LAllocation *rhs() {
+        return getOperand(1);
+    }
+    MSimdBinaryComp::Operation operation() const {
+        return mir_->toSimdBinaryComp()->operation();
+    }
+    const char *extraName() const {
+        switch (operation()) {
+          case MSimdBinaryComp::greaterThan: return "greaterThan";
+          case MSimdBinaryComp::greaterThanOrEqual: return "greaterThanOrEqual";
+          case MSimdBinaryComp::lessThan: return "lessThan";
+          case MSimdBinaryComp::lessThanOrEqual: return "lessThanOrEqual";
+          case MSimdBinaryComp::equal: return "equal";
+          case MSimdBinaryComp::notEqual: return "notEqual";
+        }
+        MOZ_CRASH("unexpected operation");
+    }
+};
+
+// Binary SIMD comparison operation between two Int32x4 operands
+class LSimdBinaryCompIx4 : public LSimdBinaryComp
+{
+  public:
+    LIR_HEADER(SimdBinaryCompIx4);
+    LSimdBinaryCompIx4() : LSimdBinaryComp() {}
+};
+
+// Binary SIMD comparison operation between two Float32x4 operands
+class LSimdBinaryCompFx4 : public LSimdBinaryComp
+{
+  public:
+    LIR_HEADER(SimdBinaryCompFx4);
+    LSimdBinaryCompFx4() : LSimdBinaryComp() {}
+};
+
 // Binary SIMD arithmetic operation between two SIMD operands
 class LSimdBinaryArith : public LInstructionHelper<1, 2, 0>
 {
   public:
     LSimdBinaryArith() {}
 
     const LAllocation *lhs() {
         return getOperand(0);
--- a/js/src/jit/LOpcodes.h
+++ b/js/src/jit/LOpcodes.h
@@ -16,16 +16,18 @@
     _(Pointer)                      \
     _(Double)                       \
     _(Float32)                      \
     _(SimdValueX4)                  \
     _(Int32x4)                      \
     _(Float32x4)                    \
     _(SimdExtractElementI)          \
     _(SimdExtractElementF)          \
+    _(SimdBinaryCompIx4)            \
+    _(SimdBinaryCompFx4)            \
     _(SimdBinaryArithIx4)           \
     _(SimdBinaryArithFx4)           \
     _(Value)                        \
     _(CloneLiteral)                 \
     _(Parameter)                    \
     _(Callee)                       \
     _(TableSwitch)                  \
     _(TableSwitchV)                 \
--- a/js/src/jit/Lowering.cpp
+++ b/js/src/jit/Lowering.cpp
@@ -3723,16 +3723,35 @@ LIRGenerator::visitSimdExtractElement(MS
         return define(new(alloc()) LSimdExtractElementF(use, ins->lane()), ins);
     }
 
     MOZ_ASSUME_UNREACHABLE("Unknown SIMD kind when extracting element");
     return false;
 }
 
 bool
+LIRGenerator::visitSimdBinaryComp(MSimdBinaryComp *ins)
+{
+    MOZ_ASSERT(ins->type() == MIRType_Int32x4);
+
+    if (ins->compareType() == MSimdBinaryComp::CompareInt32x4) {
+        LSimdBinaryCompIx4 *add = new(alloc()) LSimdBinaryCompIx4();
+        return lowerForFPU(add, ins, ins->lhs(), ins->rhs());
+    }
+
+    if (ins->compareType() == MSimdBinaryComp::CompareFloat32x4) {
+        LSimdBinaryCompFx4 *add = new(alloc()) LSimdBinaryCompFx4();
+        return lowerForFPU(add, ins, ins->lhs(), ins->rhs());
+    }
+
+    MOZ_CRASH("Unknown compare type when comparing values");
+    return false;
+}
+
+bool
 LIRGenerator::visitSimdBinaryArith(MSimdBinaryArith *ins)
 {
     JS_ASSERT(IsSimdType(ins->type()));
 
     if (ins->type() == MIRType_Int32x4) {
         LSimdBinaryArithIx4 *add = new(alloc()) LSimdBinaryArithIx4();
         return lowerForFPU(add, ins, ins->lhs(), ins->rhs());
     }
--- a/js/src/jit/Lowering.h
+++ b/js/src/jit/Lowering.h
@@ -263,16 +263,17 @@ class LIRGenerator : public LIRGenerator
     bool visitAsmJSVoidReturn(MAsmJSVoidReturn *ins);
     bool visitAsmJSPassStackArg(MAsmJSPassStackArg *ins);
     bool visitAsmJSCall(MAsmJSCall *ins);
     bool visitSetDOMProperty(MSetDOMProperty *ins);
     bool visitGetDOMProperty(MGetDOMProperty *ins);
     bool visitGetDOMMember(MGetDOMMember *ins);
     bool visitRecompileCheck(MRecompileCheck *ins);
     bool visitSimdExtractElement(MSimdExtractElement *ins);
+    bool visitSimdBinaryComp(MSimdBinaryComp *ins);
     bool visitSimdBinaryArith(MSimdBinaryArith *ins);
     bool visitSimdValueX4(MSimdValueX4 *ins);
     bool visitSimdConstant(MSimdConstant *ins);
     bool visitPhi(MPhi *ins);
     bool visitBeta(MBeta *ins);
     bool visitObjectState(MObjectState *ins);
     bool visitArrayState(MArrayState *ins);
 };
--- a/js/src/jit/MIR.h
+++ b/js/src/jit/MIR.h
@@ -1353,16 +1353,81 @@ class MSimdExtractElement : public MUnar
             return false;
         const MSimdExtractElement *other = ins->toSimdExtractElement();
         if (other->lane_ != lane_)
             return false;
         return congruentIfOperandsEqual(other);
     }
 };
 
+// Compares each value of a SIMD vector to each corresponding lane's value of
+// another SIMD vector, and returns a int32x4 vector containing the results of
+// the comparison: all bits are set to 1 if the comparison is true, 0 otherwise.
+class MSimdBinaryComp : public MBinaryInstruction
+{
+  public:
+    enum Operation {
+        greaterThan,
+        greaterThanOrEqual,
+        lessThan,
+        lessThanOrEqual,
+        equal,
+        notEqual
+    };
+
+    enum CompareType {
+        CompareInt32x4,
+        CompareFloat32x4
+    };
+
+  private:
+    Operation operation_;
+    CompareType compareType_;
+
+    MSimdBinaryComp(MDefinition *left, MDefinition *right, Operation op)
+      : MBinaryInstruction(left, right), operation_(op)
+    {
+        MOZ_ASSERT(IsSimdType(left->type()));
+        MOZ_ASSERT(left->type() == right->type());
+
+        if (left->type() == MIRType_Int32x4) {
+            compareType_ = CompareInt32x4;
+        } else {
+            MOZ_ASSERT(left->type() == MIRType_Float32x4);
+            compareType_ = CompareFloat32x4;
+        }
+
+        setResultType(MIRType_Int32x4);
+        setMovable();
+        if (op == equal || op == notEqual)
+            setCommutative();
+    }
+
+  public:
+    INSTRUCTION_HEADER(SimdBinaryComp);
+    static MSimdBinaryComp *NewAsmJS(TempAllocator &alloc, MDefinition *left, MDefinition *right,
+                                     Operation op)
+    {
+        return new(alloc) MSimdBinaryComp(left, right, op);
+    }
+
+    AliasSet getAliasSet() const {
+        return AliasSet::None();
+    }
+
+    Operation operation() const { return operation_; }
+    CompareType compareType() const { return compareType_; }
+
+    bool congruentTo(const MDefinition *ins) const {
+        if (!binaryCongruentTo(ins))
+            return false;
+        return operation_ == ins->toSimdBinaryComp()->operation();
+    }
+};
+
 class MSimdBinaryArith : public MBinaryInstruction
 {
   public:
     enum Operation {
         Add,
         Sub,
         Mul,
         Div
--- a/js/src/jit/MOpcodes.h
+++ b/js/src/jit/MOpcodes.h
@@ -10,16 +10,17 @@
 namespace js {
 namespace jit {
 
 #define MIR_OPCODE_LIST(_)                                                  \
     _(Constant)                                                             \
     _(SimdValueX4)                                                          \
     _(SimdConstant)                                                         \
     _(SimdExtractElement)                                                   \
+    _(SimdBinaryComp)                                                       \
     _(SimdBinaryArith)                                                      \
     _(CloneLiteral)                                                         \
     _(Parameter)                                                            \
     _(Callee)                                                               \
     _(TableSwitch)                                                          \
     _(Goto)                                                                 \
     _(Test)                                                                 \
     _(TypeObjectDispatch)                                                   \
--- a/js/src/jit/ParallelSafetyAnalysis.cpp
+++ b/js/src/jit/ParallelSafetyAnalysis.cpp
@@ -110,16 +110,17 @@ class ParallelSafetyVisitor : public MDe
 
     // I am taking the policy of blacklisting everything that's not
     // obviously safe for now.  We can loosen as we need.
 
     SAFE_OP(Constant)
     SAFE_OP(SimdValueX4)
     SAFE_OP(SimdConstant)
     SAFE_OP(SimdExtractElement)
+    SAFE_OP(SimdBinaryComp)
     SAFE_OP(SimdBinaryArith)
     UNSAFE_OP(CloneLiteral)
     SAFE_OP(Parameter)
     SAFE_OP(Callee)
     SAFE_OP(TableSwitch)
     SAFE_OP(Goto)
     SAFE_OP(Test)
     SAFE_OP(Compare)
--- a/js/src/jit/arm/CodeGenerator-arm.h
+++ b/js/src/jit/arm/CodeGenerator-arm.h
@@ -229,16 +229,18 @@ class CodeGeneratorARM : public CodeGene
 
   public:
     // Unimplemented SIMD instructions
     bool visitSimdValueX4(LSimdValueX4 *lir) { MOZ_CRASH("NYI"); }
     bool visitInt32x4(LInt32x4 *ins) { MOZ_CRASH("NYI"); }
     bool visitFloat32x4(LFloat32x4 *ins) { MOZ_CRASH("NYI"); }
     bool visitSimdExtractElementI(LSimdExtractElementI *ins) { MOZ_CRASH("NYI"); }
     bool visitSimdExtractElementF(LSimdExtractElementF *ins) { MOZ_CRASH("NYI"); }
+    bool visitSimdBinaryCompIx4(LSimdBinaryCompIx4 *lir) { MOZ_CRASH("NYI"); }
+    bool visitSimdBinaryCompFx4(LSimdBinaryCompFx4 *lir) { MOZ_CRASH("NYI"); }
     bool visitSimdBinaryArithIx4(LSimdBinaryArithIx4 *lir) { MOZ_CRASH("NYI"); }
     bool visitSimdBinaryArithFx4(LSimdBinaryArithFx4 *lir) { MOZ_CRASH("NYI"); }
 };
 
 typedef CodeGeneratorARM CodeGeneratorSpecific;
 
 // An out-of-line bailout thunk.
 class OutOfLineBailout : public OutOfLineCodeBase<CodeGeneratorARM>
--- a/js/src/jit/shared/Assembler-x86-shared.h
+++ b/js/src/jit/shared/Assembler-x86-shared.h
@@ -1508,16 +1508,64 @@ class AssemblerX86Shared : public Assemb
     void ucomiss(FloatRegister lhs, FloatRegister rhs) {
         JS_ASSERT(HasSSE2());
         masm.ucomiss_rr(rhs.code(), lhs.code());
     }
     void pcmpeqw(FloatRegister lhs, FloatRegister rhs) {
         JS_ASSERT(HasSSE2());
         masm.pcmpeqw_rr(rhs.code(), lhs.code());
     }
+    void pcmpeqd(const Operand &src, FloatRegister dest) {
+        MOZ_ASSERT(HasSSE2());
+        switch (src.kind()) {
+          case Operand::FPREG:
+            masm.pcmpeqd_rr(src.fpu(), dest.code());
+            break;
+          case Operand::MEM_REG_DISP:
+            masm.pcmpeqd_mr(src.disp(), src.base(), dest.code());
+            break;
+          case Operand::MEM_ADDRESS32:
+            masm.pcmpeqd_mr(src.address(), dest.code());
+            break;
+          default:
+            MOZ_CRASH("unexpected operand kind");
+        }
+    }
+    void pcmpgtd(const Operand &src, FloatRegister dest) {
+        MOZ_ASSERT(HasSSE2());
+        switch (src.kind()) {
+          case Operand::FPREG:
+            masm.pcmpgtd_rr(src.fpu(), dest.code());
+            break;
+          case Operand::MEM_REG_DISP:
+            masm.pcmpgtd_mr(src.disp(), src.base(), dest.code());
+            break;
+          case Operand::MEM_ADDRESS32:
+            masm.pcmpgtd_mr(src.address(), dest.code());
+            break;
+          default:
+            MOZ_CRASH("unexpected operand kind");
+        }
+    }
+    void cmpps(const Operand &src, FloatRegister dest, uint8_t order) {
+        MOZ_ASSERT(HasSSE2());
+        switch (src.kind()) {
+          case Operand::FPREG:
+            masm.cmpps_rr(src.fpu(), dest.code(), order);
+            break;
+          case Operand::MEM_REG_DISP:
+            masm.cmpps_mr(src.disp(), src.base(), dest.code(), order);
+            break;
+          case Operand::MEM_ADDRESS32:
+            masm.cmpps_mr(src.address(), dest.code(), order);
+            break;
+          default:
+            MOZ_CRASH("unexpected operand kind");
+        }
+    }
     void movd(Register src, FloatRegister dest) {
         JS_ASSERT(HasSSE2());
         masm.movd_rr(src.code(), dest.code());
     }
     void movd(FloatRegister src, Register dest) {
         JS_ASSERT(HasSSE2());
         masm.movd_rr(src.code(), dest.code());
     }
--- a/js/src/jit/shared/BaseAssembler-x86-shared.h
+++ b/js/src/jit/shared/BaseAssembler-x86-shared.h
@@ -307,34 +307,37 @@ private:
         OP2_DIVSD_VsdWsd    = 0x5E,
         OP2_DIVPS_VpsWps    = 0x5E,
         OP2_MAXSD_VsdWsd    = 0x5F,
         OP2_SQRTSD_VsdWsd   = 0x51,
         OP2_SQRTSS_VssWss   = 0x51,
         OP2_ANDPD_VpdWpd    = 0x54,
         OP2_ORPD_VpdWpd     = 0x56,
         OP2_XORPD_VpdWpd    = 0x57,
+        OP2_PCMPGTD_VdqWdq  = 0x66,
         OP2_MOVD_VdEd       = 0x6E,
         OP2_MOVDQ_VsdWsd    = 0x6F,
         OP2_MOVDQ_VdqWdq    = 0x6F,
         OP2_PSHUFD_VdqWdqIb = 0x70,
         OP2_PSRLDQ_Vd       = 0x73,
         OP2_PCMPEQW         = 0x75,
+        OP2_PCMPEQD_VdqWdq  = 0x76,
         OP2_MOVD_EdVd       = 0x7E,
         OP2_MOVDQ_WdqVdq    = 0x7F,
         OP2_JCC_rel32       = 0x80,
         OP_SETCC            = 0x90,
         OP2_IMUL_GvEv       = 0xAF,
         OP2_CMPXCHG_GvEw    = 0xB1,
         OP2_BSR_GvEv        = 0xBD,
         OP2_MOVSX_GvEb      = 0xBE,
         OP2_MOVSX_GvEw      = 0xBF,
         OP2_MOVZX_GvEb      = 0xB6,
         OP2_MOVZX_GvEw      = 0xB7,
         OP2_XADD_EvGv       = 0xC1,
+        OP2_CMPPS_VpsWps    = 0xC2,
         OP2_PEXTRW_GdUdIb   = 0xC5,
         OP2_SHUFPS_VpsWpsIb = 0xC6,
         OP2_PXORDQ_VdqWdq   = 0xEF,
         OP2_PSUBD_VdqWdq    = 0xFA,
         OP2_PADDD_VdqWdq    = 0xFE
     } TwoByteOpcodeID;
 
     typedef enum {
@@ -2528,16 +2531,88 @@ public:
     void pcmpeqw_rr(XMMRegisterID src, XMMRegisterID dst)
     {
         spew("pcmpeqw    %s, %s",
              nameFPReg(src), nameFPReg(dst));
         m_formatter.prefix(PRE_SSE_66);
         m_formatter.twoByteOp(OP2_PCMPEQW, (RegisterID)dst, (RegisterID)src); /* right order ? */
     }
 
+    void pcmpeqd_rr(XMMRegisterID src, XMMRegisterID dst)
+    {
+        spew("pcmpeqd   %s, %s",
+             nameFPReg(src), nameFPReg(dst));
+        m_formatter.prefix(PRE_SSE_66);
+        m_formatter.twoByteOp(OP2_PCMPEQD_VdqWdq, (RegisterID)dst, (RegisterID)src);
+    }
+
+    void pcmpeqd_mr(int offset, RegisterID base, XMMRegisterID dst)
+    {
+        spew("pcmpeqd   %s0x%x(%s), %s",
+             PRETTY_PRINT_OFFSET(offset), nameIReg(base), nameFPReg(dst));
+        m_formatter.prefix(PRE_SSE_66);
+        m_formatter.twoByteOp(OP2_PCMPEQD_VdqWdq, (RegisterID)dst, base, offset);
+    }
+
+    void pcmpeqd_mr(const void* address, XMMRegisterID dst)
+    {
+        spew("pcmpeqd   %p, %s",
+             address, nameFPReg(dst));
+        m_formatter.prefix(PRE_SSE_66);
+        m_formatter.twoByteOp(OP2_PCMPEQD_VdqWdq, (RegisterID)dst, address);
+    }
+
+    void pcmpgtd_rr(XMMRegisterID src, XMMRegisterID dst)
+    {
+        spew("pcmpgtd   %s, %s",
+             nameFPReg(src), nameFPReg(dst));
+        m_formatter.prefix(PRE_SSE_66);
+        m_formatter.twoByteOp(OP2_PCMPGTD_VdqWdq, (RegisterID)dst, (RegisterID)src);
+    }
+
+    void pcmpgtd_mr(int offset, RegisterID base, XMMRegisterID dst)
+    {
+        spew("pcmpgtd   %s0x%x(%s), %s",
+             PRETTY_PRINT_OFFSET(offset), nameIReg(base), nameFPReg(dst));
+        m_formatter.prefix(PRE_SSE_66);
+        m_formatter.twoByteOp(OP2_PCMPGTD_VdqWdq, (RegisterID)dst, base, offset);
+    }
+
+    void pcmpgtd_mr(const void* address, XMMRegisterID dst)
+    {
+        spew("pcmpgtd   %p, %s",
+             address, nameFPReg(dst));
+        m_formatter.prefix(PRE_SSE_66);
+        m_formatter.twoByteOp(OP2_PCMPGTD_VdqWdq, (RegisterID)dst, address);
+    }
+
+    void cmpps_rr(XMMRegisterID src, XMMRegisterID dst, uint8_t order)
+    {
+        spew("cmpps     %s, %s, %u",
+             nameFPReg(src), nameFPReg(dst), order);
+        m_formatter.twoByteOp(OP2_CMPPS_VpsWps, (RegisterID)dst, (RegisterID)src);
+        m_formatter.immediate8(order);
+    }
+
+    void cmpps_mr(int offset, RegisterID base, XMMRegisterID dst, uint8_t order)
+    {
+        spew("cmpps     %s0x%x(%s), %s, %u",
+             PRETTY_PRINT_OFFSET(offset), nameIReg(base), nameFPReg(dst), order);
+        m_formatter.twoByteOp(OP2_CMPPS_VpsWps, (RegisterID)dst, base, offset);
+        m_formatter.immediate8(order);
+    }
+
+    void cmpps_mr(const void* address, XMMRegisterID dst, uint8_t order)
+    {
+        spew("cmpps     %p, %s, %u",
+             address, nameFPReg(dst), order);
+        m_formatter.twoByteOp(OP2_CMPPS_VpsWps, (RegisterID)dst, address);
+        m_formatter.immediate8(order);
+    }
+
     void addsd_rr(XMMRegisterID src, XMMRegisterID dst)
     {
         spew("addsd      %s, %s",
              nameFPReg(src), nameFPReg(dst));
         m_formatter.prefix(PRE_SSE_F2);
         m_formatter.twoByteOp(OP2_ADDSD_VsdWsd, (RegisterID)dst, (RegisterID)src);
     }
 
--- a/js/src/jit/shared/CodeGenerator-x86-shared.cpp
+++ b/js/src/jit/shared/CodeGenerator-x86-shared.cpp
@@ -2188,16 +2188,83 @@ CodeGeneratorX86Shared::visitSimdExtract
         uint32_t mask = MacroAssembler::ComputeShuffleMask(lane);
         masm.shuffleFloat32(mask, input, output);
     }
     masm.canonicalizeFloat(output);
     return true;
 }
 
 bool
+CodeGeneratorX86Shared::visitSimdBinaryCompIx4(LSimdBinaryCompIx4 *ins)
+{
+    FloatRegister lhs = ToFloatRegister(ins->lhs());
+    Operand rhs = ToOperand(ins->rhs());
+    MOZ_ASSERT(ToFloatRegister(ins->output()) == lhs);
+
+    MSimdBinaryComp::Operation op = ins->operation();
+    switch (op) {
+      case MSimdBinaryComp::greaterThan:
+        masm.packedGreaterThanInt32x4(rhs, lhs);
+        return true;
+      case MSimdBinaryComp::equal:
+        masm.packedEqualInt32x4(rhs, lhs);
+        return true;
+      case MSimdBinaryComp::lessThan:
+        // scr := rhs
+        if (rhs.kind() == Operand::FPREG)
+            masm.moveAlignedInt32x4(ToFloatRegister(ins->rhs()), ScratchSimdReg);
+        else
+            masm.loadAlignedInt32x4(rhs, ScratchSimdReg);
+
+        // scr := scr > lhs (i.e. lhs < rhs)
+        // Improve by doing custom lowering (rhs is tied to the output register)
+        masm.packedGreaterThanInt32x4(ToOperand(ins->lhs()), ScratchSimdReg);
+        masm.moveAlignedInt32x4(ScratchFloat32Reg, lhs);
+        return true;
+      case MSimdBinaryComp::notEqual:
+      case MSimdBinaryComp::greaterThanOrEqual:
+      case MSimdBinaryComp::lessThanOrEqual:
+        // These operations are not part of the spec. so are not implemented.
+        break;
+    }
+    MOZ_ASSUME_UNREACHABLE("unexpected SIMD op");
+}
+
+bool
+CodeGeneratorX86Shared::visitSimdBinaryCompFx4(LSimdBinaryCompFx4 *ins)
+{
+    FloatRegister lhs = ToFloatRegister(ins->lhs());
+    Operand rhs = ToOperand(ins->rhs());
+    MOZ_ASSERT(ToFloatRegister(ins->output()) == lhs);
+
+    MSimdBinaryComp::Operation op = ins->operation();
+    switch (op) {
+      case MSimdBinaryComp::equal:
+        masm.cmpps(rhs, lhs, 0x0);
+        return true;
+      case MSimdBinaryComp::lessThan:
+        masm.cmpps(rhs, lhs, 0x1);
+        return true;
+      case MSimdBinaryComp::lessThanOrEqual:
+        masm.cmpps(rhs, lhs, 0x2);
+        return true;
+      case MSimdBinaryComp::notEqual:
+        masm.cmpps(rhs, lhs, 0x4);
+        return true;
+      case MSimdBinaryComp::greaterThanOrEqual:
+        masm.cmpps(rhs, lhs, 0x5);
+        return true;
+      case MSimdBinaryComp::greaterThan:
+        masm.cmpps(rhs, lhs, 0x6);
+        return true;
+    }
+    MOZ_ASSUME_UNREACHABLE("unexpected SIMD op");
+}
+
+bool
 CodeGeneratorX86Shared::visitSimdBinaryArithIx4(LSimdBinaryArithIx4 *ins)
 {
     FloatRegister lhs = ToFloatRegister(ins->lhs());
     Operand rhs = ToOperand(ins->rhs());
     JS_ASSERT(ToFloatRegister(ins->output()) == lhs);
 
     MSimdBinaryArith::Operation op = ins->operation();
     switch (op) {
--- a/js/src/jit/shared/CodeGenerator-x86-shared.h
+++ b/js/src/jit/shared/CodeGenerator-x86-shared.h
@@ -206,16 +206,18 @@ class CodeGeneratorX86Shared : public Co
     bool visitNegF(LNegF *lir);
 
     // SIMD operators
     bool visitSimdValueX4(LSimdValueX4 *lir);
     bool visitInt32x4(LInt32x4 *ins);
     bool visitFloat32x4(LFloat32x4 *ins);
     bool visitSimdExtractElementI(LSimdExtractElementI *lir);
     bool visitSimdExtractElementF(LSimdExtractElementF *lir);
+    bool visitSimdBinaryCompIx4(LSimdBinaryCompIx4 *lir);
+    bool visitSimdBinaryCompFx4(LSimdBinaryCompFx4 *lir);
     bool visitSimdBinaryArithIx4(LSimdBinaryArithIx4 *lir);
     bool visitSimdBinaryArithFx4(LSimdBinaryArithFx4 *lir);
 
     // Out of line visitors.
     bool visitOutOfLineBailout(OutOfLineBailout *ool);
     bool visitOutOfLineUndoALUOperation(OutOfLineUndoALUOperation *ool);
     bool visitMulNegativeZeroCheck(MulNegativeZeroCheck *ool);
     bool visitModOverflowCheck(ModOverflowCheck *ool);
--- a/js/src/jit/shared/MacroAssembler-x86-shared.h
+++ b/js/src/jit/shared/MacroAssembler-x86-shared.h
@@ -465,38 +465,50 @@ class MacroAssemblerX86Shared : public A
     }
     void convertDoubleToFloat32(FloatRegister src, FloatRegister dest) {
         cvtsd2ss(src, dest);
     }
 
     void loadAlignedInt32x4(const Address &src, FloatRegister dest) {
         movdqa(Operand(src), dest);
     }
+    void loadAlignedInt32x4(const Operand &src, FloatRegister dest) {
+        movdqa(src, dest);
+    }
     void storeAlignedInt32x4(FloatRegister src, const Address &dest) {
         movdqa(src, Operand(dest));
     }
     void moveAlignedInt32x4(FloatRegister src, FloatRegister dest) {
         movdqa(src, dest);
     }
     void loadUnalignedInt32x4(const Address &src, FloatRegister dest) {
         movdqu(Operand(src), dest);
     }
     void storeUnalignedInt32x4(FloatRegister src, const Address &dest) {
         movdqu(src, Operand(dest));
     }
+    void packedEqualInt32x4(const Operand &src, FloatRegister dest) {
+        pcmpeqd(src, dest);
+    }
+    void packedGreaterThanInt32x4(const Operand &src, FloatRegister dest) {
+        pcmpgtd(src, dest);
+    }
     void packedAddInt32(const Operand &src, FloatRegister dest) {
         paddd(src, dest);
     }
     void packedSubInt32(const Operand &src, FloatRegister dest) {
         psubd(src, dest);
     }
 
     void loadAlignedFloat32x4(const Address &src, FloatRegister dest) {
         movaps(Operand(src), dest);
     }
+    void loadAlignedFloat32x4(const Operand &src, FloatRegister dest) {
+        movaps(src, dest);
+    }
     void storeAlignedFloat32x4(FloatRegister src, const Address &dest) {
         movaps(src, Operand(dest));
     }
     void moveAlignedFloat32x4(FloatRegister src, FloatRegister dest) {
         movaps(src, dest);
     }
     void loadUnalignedFloat32x4(const Address &src, FloatRegister dest) {
         movups(Operand(src), dest);
@@ -511,17 +523,16 @@ class MacroAssemblerX86Shared : public A
         subps(src, dest);
     }
     void packedMulFloat32(const Operand &src, FloatRegister dest) {
         mulps(src, dest);
     }
     void packedDivFloat32(const Operand &src, FloatRegister dest) {
         divps(src, dest);
     }
-
     static uint32_t ComputeShuffleMask(SimdLane x, SimdLane y = LaneX,
                                        SimdLane z = LaneX, SimdLane w = LaneX)
     {
         uint32_t r = (uint32_t(w) << 6) |
                      (uint32_t(z) << 4) |
                      (uint32_t(y) << 2) |
                      uint32_t(x);
         JS_ASSERT(r < 256);