Bug 1025100: Implement MSimdInsertElement; r=sunfish
authorBenjamin Bouvier <benj@benj.me>
Fri, 19 Sep 2014 14:53:25 +0200
changeset 206230 d418a4d0f8d5840b51c875599926074ee120ef97
parent 206229 70698c31fca1f24cd36753066b9c42d85ee2e214
child 206231 02e8c6942c85da6d1dae60a4455c9f87687486db
push id27516
push userryanvm@gmail.com
push dateFri, 19 Sep 2014 17:54:48 +0000
treeherdermozilla-central@b00bdb144e06 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerssunfish
bugs1025100
milestone35.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1025100: Implement MSimdInsertElement; r=sunfish
js/src/jit/LIR-Common.h
js/src/jit/LOpcodes.h
js/src/jit/Lowering.cpp
js/src/jit/Lowering.h
js/src/jit/MIR.h
js/src/jit/MOpcodes.h
js/src/jit/ParallelSafetyAnalysis.cpp
js/src/jit/shared/Assembler-x86-shared.h
js/src/jit/shared/CodeGenerator-x86-shared.cpp
js/src/jit/shared/CodeGenerator-x86-shared.h
--- a/js/src/jit/LIR-Common.h
+++ b/js/src/jit/LIR-Common.h
@@ -173,16 +173,57 @@ class LSimdExtractElementF : public LSim
 {
   public:
     LIR_HEADER(SimdExtractElementF);
     LSimdExtractElementF(const LAllocation &base)
       : LSimdExtractElementBase(base)
     {}
 };
 
+class LSimdInsertElementBase : public LInstructionHelper<1, 2, 0>
+{
+  protected:
+    LSimdInsertElementBase(const LAllocation &vec, const LAllocation &val)
+    {
+        setOperand(0, vec);
+        setOperand(1, val);
+    }
+
+  public:
+    const LAllocation *vector() {
+        return getOperand(0);
+    }
+    const LAllocation *value() {
+        return getOperand(1);
+    }
+    const SimdLane lane() const {
+        return mir_->toSimdInsertElement()->lane();
+    }
+};
+
+// Replace an element from a given SIMD int32x4 lane with a given value.
+class LSimdInsertElementI : public LSimdInsertElementBase
+{
+  public:
+    LIR_HEADER(SimdInsertElementI);
+    LSimdInsertElementI(const LAllocation &vec, const LAllocation &val)
+      : LSimdInsertElementBase(vec, val)
+    {}
+};
+
+// Replace an element from a given SIMD float32x4 lane with a given value.
+class LSimdInsertElementF : public LSimdInsertElementBase
+{
+  public:
+    LIR_HEADER(SimdInsertElementF);
+    LSimdInsertElementF(const LAllocation &vec, const LAllocation &val)
+      : LSimdInsertElementBase(vec, val)
+    {}
+};
+
 class LSimdSignMaskX4 : public LInstructionHelper<1, 1, 0>
 {
   public:
     LIR_HEADER(SimdSignMaskX4);
 
     explicit LSimdSignMaskX4(const LAllocation &input) {
         setOperand(0, input);
     }
--- a/js/src/jit/LOpcodes.h
+++ b/js/src/jit/LOpcodes.h
@@ -16,16 +16,18 @@
     _(Pointer)                      \
     _(Double)                       \
     _(Float32)                      \
     _(SimdSplatX4)                  \
     _(Int32x4)                      \
     _(Float32x4)                    \
     _(SimdExtractElementI)          \
     _(SimdExtractElementF)          \
+    _(SimdInsertElementI)           \
+    _(SimdInsertElementF)           \
     _(SimdSignMaskX4)               \
     _(SimdBinaryCompIx4)            \
     _(SimdBinaryCompFx4)            \
     _(SimdBinaryArithIx4)           \
     _(SimdBinaryArithFx4)           \
     _(SimdBinaryBitwiseX4)          \
     _(SimdSelect)                   \
     _(Value)                        \
--- a/js/src/jit/Lowering.cpp
+++ b/js/src/jit/Lowering.cpp
@@ -3734,16 +3734,31 @@ LIRGenerator::visitSimdExtractElement(MS
         LUse use = useRegisterAtStart(ins->input());
         return define(new(alloc()) LSimdExtractElementF(use), ins);
     }
 
     MOZ_CRASH("Unknown SIMD kind when extracting element");
 }
 
 bool
+LIRGenerator::visitSimdInsertElement(MSimdInsertElement *ins)
+{
+    JS_ASSERT(IsSimdType(ins->type()));
+
+    LUse vec = useRegisterAtStart(ins->vector());
+    LUse val = useRegister(ins->value());
+    if (ins->type() == MIRType_Int32x4)
+        return defineReuseInput(new(alloc()) LSimdInsertElementI(vec, val), ins, 0);
+    if (ins->type() == MIRType_Float32x4)
+        return defineReuseInput(new(alloc()) LSimdInsertElementF(vec, val), ins, 0);
+
+    MOZ_CRASH("Unknown SIMD kind when generating constant");
+}
+
+bool
 LIRGenerator::visitSimdSignMask(MSimdSignMask *ins)
 {
     MDefinition *input = ins->input();
     MOZ_ASSERT(IsSimdType(input->type()));
     MOZ_ASSERT(ins->type() == MIRType_Int32);
 
     LUse use = useRegisterAtStart(input);
 
--- a/js/src/jit/Lowering.h
+++ b/js/src/jit/Lowering.h
@@ -264,16 +264,17 @@ class LIRGenerator : public LIRGenerator
     bool visitAsmJSVoidReturn(MAsmJSVoidReturn *ins);
     bool visitAsmJSPassStackArg(MAsmJSPassStackArg *ins);
     bool visitAsmJSCall(MAsmJSCall *ins);
     bool visitSetDOMProperty(MSetDOMProperty *ins);
     bool visitGetDOMProperty(MGetDOMProperty *ins);
     bool visitGetDOMMember(MGetDOMMember *ins);
     bool visitRecompileCheck(MRecompileCheck *ins);
     bool visitSimdExtractElement(MSimdExtractElement *ins);
+    bool visitSimdInsertElement(MSimdInsertElement *ins);
     bool visitSimdSignMask(MSimdSignMask *ins);
     bool visitSimdBinaryComp(MSimdBinaryComp *ins);
     bool visitSimdBinaryArith(MSimdBinaryArith *ins);
     bool visitSimdBinaryBitwise(MSimdBinaryBitwise *ins);
     bool visitSimdConstant(MSimdConstant *ins);
     bool visitPhi(MPhi *ins);
     bool visitBeta(MBeta *ins);
     bool visitObjectState(MObjectState *ins);
--- a/js/src/jit/MIR.h
+++ b/js/src/jit/MIR.h
@@ -1397,16 +1397,60 @@ class MSimdExtractElement : public MUnar
             return false;
         const MSimdExtractElement *other = ins->toSimdExtractElement();
         if (other->lane_ != lane_)
             return false;
         return congruentIfOperandsEqual(other);
     }
 };
 
+// Replaces the datum in the given lane by a scalar value of the same type.
+class MSimdInsertElement : public MBinaryInstruction
+{
+  private:
+    SimdLane lane_;
+
+    MSimdInsertElement(MDefinition *vec, MDefinition *val, MIRType type, SimdLane lane)
+      : MBinaryInstruction(vec, val), lane_(lane)
+    {
+        MOZ_ASSERT(IsSimdType(type) && vec->type() == type);
+        MOZ_ASSERT(SimdTypeToScalarType(type) == val->type());
+
+        setMovable();
+        setResultType(type);
+    }
+
+  public:
+    INSTRUCTION_HEADER(SimdInsertElement)
+
+    static MSimdInsertElement *NewAsmJS(TempAllocator &alloc, MDefinition *vec, MDefinition *val,
+                                         MIRType type, SimdLane lane)
+    {
+        return new(alloc) MSimdInsertElement(vec, val, type, lane);
+    }
+
+    MDefinition *vector() {
+        return getOperand(0);
+    }
+    MDefinition *value() {
+        return getOperand(1);
+    }
+    SimdLane lane() const {
+        return lane_;
+    }
+
+    AliasSet getAliasSet() const {
+        return AliasSet::None();
+    }
+
+    bool congruentTo(const MDefinition *ins) const {
+        return binaryCongruentTo(ins) && lane_ == ins->toSimdInsertElement()->lane();
+    }
+};
+
 // Extracts the sign bits from a given vector, returning an MIRType_Int32.
 class MSimdSignMask : public MUnaryInstruction
 {
   protected:
     explicit MSimdSignMask(MDefinition *obj)
       : MUnaryInstruction(obj)
     {
         MOZ_ASSERT(IsSimdType(obj->type()));
--- a/js/src/jit/MOpcodes.h
+++ b/js/src/jit/MOpcodes.h
@@ -11,16 +11,17 @@ namespace js {
 namespace jit {
 
 #define MIR_OPCODE_LIST(_)                                                  \
     _(Constant)                                                             \
     _(SimdValueX4)                                                          \
     _(SimdSplatX4)                                                          \
     _(SimdConstant)                                                         \
     _(SimdExtractElement)                                                   \
+    _(SimdInsertElement)                                                    \
     _(SimdSignMask)                                                         \
     _(SimdBinaryComp)                                                       \
     _(SimdBinaryArith)                                                      \
     _(SimdBinaryBitwise)                                                    \
     _(SimdTernaryBitwise)                                                   \
     _(CloneLiteral)                                                         \
     _(Parameter)                                                            \
     _(Callee)                                                               \
--- a/js/src/jit/ParallelSafetyAnalysis.cpp
+++ b/js/src/jit/ParallelSafetyAnalysis.cpp
@@ -110,16 +110,17 @@ class ParallelSafetyVisitor : public MDe
     // I am taking the policy of blacklisting everything that's not
     // obviously safe for now.  We can loosen as we need.
 
     SAFE_OP(Constant)
     SAFE_OP(SimdValueX4)
     SAFE_OP(SimdSplatX4)
     SAFE_OP(SimdConstant)
     SAFE_OP(SimdExtractElement)
+    SAFE_OP(SimdInsertElement)
     SAFE_OP(SimdSignMask)
     SAFE_OP(SimdBinaryComp)
     SAFE_OP(SimdBinaryArith)
     SAFE_OP(SimdBinaryBitwise)
     SAFE_OP(SimdTernaryBitwise)
     UNSAFE_OP(CloneLiteral)
     SAFE_OP(Parameter)
     SAFE_OP(Callee)
--- a/js/src/jit/shared/Assembler-x86-shared.h
+++ b/js/src/jit/shared/Assembler-x86-shared.h
@@ -520,43 +520,50 @@ class AssemblerX86Shared : public Assemb
           case Operand::MEM_SCALE:
             masm.movups_rm(src.code(), dest.disp(), dest.base(), dest.index(), dest.scale());
             break;
           default:
             MOZ_CRASH("unexpected operand kind");
         }
     }
 
-    // movsd and movss are only provided in load/store form since the
+    // movsd is only provided in load/store form since the
     // register-to-register form has different semantics (it doesn't clobber
     // the whole output register) and isn't needed currently.
     void movsd(const Address &src, FloatRegister dest) {
         masm.movsd_mr(src.offset, src.base.code(), dest.code());
     }
     void movsd(const BaseIndex &src, FloatRegister dest) {
         masm.movsd_mr(src.offset, src.base.code(), src.index.code(), src.scale, dest.code());
     }
     void movsd(FloatRegister src, const Address &dest) {
         masm.movsd_rm(src.code(), dest.offset, dest.base.code());
     }
     void movsd(FloatRegister src, const BaseIndex &dest) {
         masm.movsd_rm(src.code(), dest.offset, dest.base.code(), dest.index.code(), dest.scale);
     }
+    // Although movss is not only provided in load/store form (for the same
+    // reasons as movsd above), the register to register form should be only
+    // used in contexts where we care about not clearing the higher lanes of
+    // the FloatRegister.
     void movss(const Address &src, FloatRegister dest) {
         masm.movss_mr(src.offset, src.base.code(), dest.code());
     }
     void movss(const BaseIndex &src, FloatRegister dest) {
         masm.movss_mr(src.offset, src.base.code(), src.index.code(), src.scale, dest.code());
     }
     void movss(FloatRegister src, const Address &dest) {
         masm.movss_rm(src.code(), dest.offset, dest.base.code());
     }
     void movss(FloatRegister src, const BaseIndex &dest) {
         masm.movss_rm(src.code(), dest.offset, dest.base.code(), dest.index.code(), dest.scale);
     }
+    void movss(FloatRegister src, const FloatRegister &dest) {
+        masm.movss_rr(src.code(), dest.code());
+    }
     void movdqu(const Operand &src, FloatRegister dest) {
         JS_ASSERT(HasSSE2());
         switch (src.kind()) {
           case Operand::MEM_REG_DISP:
             masm.movdqu_mr(src.disp(), src.base(), dest.code());
             break;
           case Operand::MEM_SCALE:
             masm.movdqu_mr(src.disp(), src.base(), src.index(), src.scale(), dest.code());
--- a/js/src/jit/shared/CodeGenerator-x86-shared.cpp
+++ b/js/src/jit/shared/CodeGenerator-x86-shared.cpp
@@ -2280,16 +2280,67 @@ CodeGeneratorX86Shared::visitSimdExtract
         uint32_t mask = MacroAssembler::ComputeShuffleMask(lane);
         masm.shuffleFloat32(mask, input, output);
     }
     masm.canonicalizeFloat(output);
     return true;
 }
 
 bool
+CodeGeneratorX86Shared::visitSimdInsertElementI(LSimdInsertElementI *ins)
+{
+    FloatRegister vector = ToFloatRegister(ins->vector());
+    Register value = ToRegister(ins->value());
+    FloatRegister output = ToFloatRegister(ins->output());
+    MOZ_ASSERT(vector == output); // defineReuseInput(0)
+
+    unsigned component = unsigned(ins->lane());
+
+    // Note that, contrarily to float32x4, we cannot use movd if the inserted
+    // value goes into the first component, as movd clears out the higher lanes
+    // of the output.
+    if (AssemblerX86Shared::HasSSE41()) {
+        masm.pinsrd(component, value, output);
+        return true;
+    }
+
+    masm.reserveStack(Simd128DataSize);
+    masm.storeAlignedInt32x4(vector, Address(StackPointer, 0));
+    masm.store32(value, Address(StackPointer, component * sizeof(int32_t)));
+    masm.loadAlignedInt32x4(Address(StackPointer, 0), output);
+    masm.freeStack(Simd128DataSize);
+    return true;
+}
+
+bool
+CodeGeneratorX86Shared::visitSimdInsertElementF(LSimdInsertElementF *ins)
+{
+    FloatRegister vector = ToFloatRegister(ins->vector());
+    FloatRegister value = ToFloatRegister(ins->value());
+    FloatRegister output = ToFloatRegister(ins->output());
+    MOZ_ASSERT(vector == output); // defineReuseInput(0)
+
+    if (ins->lane() == SimdLane::LaneX) {
+        // As both operands are registers, movss doesn't modify the upper bits
+        // of the destination operand.
+        if (value != output)
+            masm.movss(value, output);
+        return true;
+    }
+
+    unsigned component = unsigned(ins->lane());
+    masm.reserveStack(Simd128DataSize);
+    masm.storeAlignedFloat32x4(vector, Address(StackPointer, 0));
+    masm.storeFloat32(value, Address(StackPointer, component * sizeof(int32_t)));
+    masm.loadAlignedFloat32x4(Address(StackPointer, 0), output);
+    masm.freeStack(Simd128DataSize);
+    return true;
+}
+
+bool
 CodeGeneratorX86Shared::visitSimdSignMaskX4(LSimdSignMaskX4 *ins)
 {
     FloatRegister input = ToFloatRegister(ins->input());
     Register output = ToRegister(ins->output());
 
     // For Float32x4 and Int32x4.
     masm.movmskps(input, output);
     return true;
--- a/js/src/jit/shared/CodeGenerator-x86-shared.h
+++ b/js/src/jit/shared/CodeGenerator-x86-shared.h
@@ -209,16 +209,18 @@ class CodeGeneratorX86Shared : public Co
     // SIMD operators
     bool visitSimdValueInt32x4(LSimdValueInt32x4 *lir);
     bool visitSimdValueFloat32x4(LSimdValueFloat32x4 *lir);
     bool visitSimdSplatX4(LSimdSplatX4 *lir);
     bool visitInt32x4(LInt32x4 *ins);
     bool visitFloat32x4(LFloat32x4 *ins);
     bool visitSimdExtractElementI(LSimdExtractElementI *lir);
     bool visitSimdExtractElementF(LSimdExtractElementF *lir);
+    bool visitSimdInsertElementI(LSimdInsertElementI *lir);
+    bool visitSimdInsertElementF(LSimdInsertElementF *lir);
     bool visitSimdSignMaskX4(LSimdSignMaskX4 *ins);
     bool visitSimdBinaryCompIx4(LSimdBinaryCompIx4 *lir);
     bool visitSimdBinaryCompFx4(LSimdBinaryCompFx4 *lir);
     bool visitSimdBinaryArithIx4(LSimdBinaryArithIx4 *lir);
     bool visitSimdBinaryArithFx4(LSimdBinaryArithFx4 *lir);
     bool visitSimdBinaryBitwiseX4(LSimdBinaryBitwiseX4 *lir);
     bool visitSimdSelect(LSimdSelect *ins);