Bug 1240796 - Implement Uint32x4 extractLane in Ion. r=nbp
authorJakob Stoklund Olesen <jolesen@mozilla.com>
Mon, 01 Feb 2016 14:55:07 -0800
changeset 282644 84db96b7857f6f693331c1a78bf87cd0bfceb5e8
parent 282643 426fa86f579d918364e4f1fc74f6f5df5c8b4a59
child 282645 c537facdae50cf31e1a39cf23339edb84b3e7444
push id17362
push usercbook@mozilla.com
push dateTue, 02 Feb 2016 10:54:53 +0000
treeherderfx-team@e5f1b4782e38 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersnbp
bugs1240796
milestone47.0a1
Bug 1240796 - Implement Uint32x4 extractLane in Ion. r=nbp Since Uint32 can't be represented in a MIRType_Int32, this function should return a MIRType_Double. Allow MSimdExtractElement(Uint32x4) to return a MIRType_Int32 too. It will work like the double version followed by MTruncateToInt32 which bitcasts the Uint32 value range into the Int32 value range.
js/src/jit-test/tests/SIMD/getters.js
js/src/jit/Lowering.cpp
js/src/jit/MCallOptimize.cpp
js/src/jit/MIR.h
js/src/jit/shared/LIR-shared.h
js/src/jit/shared/LOpcodes-shared.h
js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp
js/src/jit/x86-shared/CodeGenerator-x86-shared.h
--- a/js/src/jit-test/tests/SIMD/getters.js
+++ b/js/src/jit-test/tests/SIMD/getters.js
@@ -1,29 +1,35 @@
 load(libdir + 'simd.js');
 
 setJitCompilerOption("ion.warmup.trigger", 50);
 
 function f() {
     var i4 = SIMD.Int32x4(1, -2, 3, -4);
+    var u4 = SIMD.Uint32x4(1, -2, 3, 0x88000000);
     var b4 = SIMD.Bool32x4(true, true, false, true);
 
 
     var bt4 = SIMD.Bool32x4(true, true, true, true);
     var bf4 = SIMD.Bool32x4(false, false, false, false);
 
     var v = Math.fround(13.37);
     var f4 = SIMD.Float32x4(13.37, NaN, Infinity, -0);
 
     for (var i = 0; i < 150; i++) {
         assertEq(SIMD.Int32x4.extractLane(i4, 0), 1);
         assertEq(SIMD.Int32x4.extractLane(i4, 1), -2);
         assertEq(SIMD.Int32x4.extractLane(i4, 2), 3);
         assertEq(SIMD.Int32x4.extractLane(i4, 3), -4);
 
+        assertEq(SIMD.Uint32x4.extractLane(u4, 0), 1);
+        assertEq(SIMD.Uint32x4.extractLane(u4, 1), -2 >>> 0);
+        assertEq(SIMD.Uint32x4.extractLane(u4, 2), 3);
+        assertEq(SIMD.Uint32x4.extractLane(u4, 3), 0x88000000);
+
         assertEq(SIMD.Float32x4.extractLane(f4, 0), v);
         assertEq(SIMD.Float32x4.extractLane(f4, 1), NaN);
         assertEq(SIMD.Float32x4.extractLane(f4, 2), Infinity);
         assertEq(SIMD.Float32x4.extractLane(f4, 3), -0);
 
         assertEq(SIMD.Bool32x4.extractLane(b4, 0), true);
         assertEq(SIMD.Bool32x4.extractLane(b4, 1), true);
         assertEq(SIMD.Bool32x4.extractLane(b4, 2), false);
--- a/js/src/jit/Lowering.cpp
+++ b/js/src/jit/Lowering.cpp
@@ -4144,28 +4144,37 @@ LIRGenerator::visitSimdReinterpretCast(M
 void
 LIRGenerator::visitSimdExtractElement(MSimdExtractElement* ins)
 {
     MOZ_ASSERT(IsSimdType(ins->input()->type()));
     MOZ_ASSERT(!IsSimdType(ins->type()));
 
     switch (ins->input()->type()) {
       case MIRType_Int32x4: {
+        MOZ_ASSERT(ins->signedness() != SimdSign::NotApplicable);
         // Note: there could be int16x8 in the future, which doesn't use the
         // same instruction. We either need to pass the arity or create new LIns.
         LUse use = useRegisterAtStart(ins->input());
-        define(new(alloc()) LSimdExtractElementI(use), ins);
+        if (ins->type() == MIRType_Double) {
+            // Extract an Uint32 lane into a double.
+            MOZ_ASSERT(ins->signedness() == SimdSign::Unsigned);
+            define(new (alloc()) LSimdExtractElementU2D(use, temp()), ins);
+        } else {
+            define(new (alloc()) LSimdExtractElementI(use), ins);
+        }
         break;
       }
       case MIRType_Float32x4: {
+        MOZ_ASSERT(ins->signedness() == SimdSign::NotApplicable);
         LUse use = useRegisterAtStart(ins->input());
         define(new(alloc()) LSimdExtractElementF(use), ins);
         break;
       }
       case MIRType_Bool32x4: {
+        MOZ_ASSERT(ins->signedness() == SimdSign::NotApplicable);
         LUse use = useRegisterAtStart(ins->input());
         define(new(alloc()) LSimdExtractElementB(use), ins);
         break;
       }
       default:
         MOZ_CRASH("Unknown SIMD kind when extracting element");
     }
 }
--- a/js/src/jit/MCallOptimize.cpp
+++ b/js/src/jit/MCallOptimize.cpp
@@ -3463,31 +3463,32 @@ IonBuilder::inlineSimdExtractLane(CallIn
 {
     // extractLane() returns a scalar, so don't use canInlineSimd() which looks
     // for a template object.
     if (callInfo.argc() != 2 || callInfo.constructing()) {
         trackOptimizationOutcome(TrackedOutcome::CantInlineNativeBadForm);
         return InliningStatus_NotInlined;
     }
 
-    // TODO JSO: Implement unsigned integer lane values.
-    if (sign == SimdSign::Unsigned)
-        return InliningStatus_NotInlined;
-
     MDefinition* arg = callInfo.getArg(1);
     if (!arg->isConstantValue() || arg->type() != MIRType_Int32)
         return InliningStatus_NotInlined;
     int32_t lane = callInfo.getArg(1)->constantValue().toInt32();
     if (lane < 0 || lane >= 4)
         return InliningStatus_NotInlined;
 
     // See comment in inlineSimdBinary
     MIRType laneType = SimdTypeToLaneType(vecType);
+
+    // An Uint32 lane can't be represented in MIRType_Int32. Get it as a double.
+    if (sign == SimdSign::Unsigned && vecType == MIRType_Int32x4)
+        laneType = MIRType_Double;
+
     MSimdExtractElement* ins = MSimdExtractElement::New(alloc(), callInfo.getArg(0),
-                                                        vecType, laneType, SimdLane(lane));
+                                                        vecType, laneType, SimdLane(lane), sign);
     current->add(ins);
     current->push(ins);
     callInfo.setImplicitlyUsedUnchecked();
     return InliningStatus_Inlined;
 }
 
 IonBuilder::InliningStatus
 IonBuilder::inlineSimdReplaceLane(CallInfo& callInfo, JSNative native, MIRType mirType)
--- a/js/src/jit/MIR.h
+++ b/js/src/jit/MIR.h
@@ -1637,66 +1637,88 @@ class MSimdReinterpretCast
     }
     bool congruentTo(const MDefinition* ins) const override {
         return congruentIfOperandsEqual(ins);
     }
     ALLOW_CLONE(MSimdReinterpretCast)
 };
 
 // Extracts a lane element from a given vector type, given by its lane symbol.
+//
+// For integer SIMD types, a SimdSign must be provided so the lane value can be
+// converted to a scalar correctly.
 class MSimdExtractElement
   : public MUnaryInstruction,
     public SimdPolicy<0>::Data
 {
   protected:
     SimdLane lane_;
-
-    MSimdExtractElement(MDefinition* obj, MIRType vecType, MIRType laneType, SimdLane lane)
-      : MUnaryInstruction(obj), lane_(lane)
+    SimdSign sign_;
+
+    MSimdExtractElement(MDefinition* obj, MIRType vecType, MIRType laneType, SimdLane lane,
+                        SimdSign sign)
+      : MUnaryInstruction(obj), lane_(lane), sign_(sign)
     {
         MOZ_ASSERT(IsSimdType(vecType));
         MOZ_ASSERT(uint32_t(lane) < SimdTypeToLength(vecType));
         MOZ_ASSERT(!IsSimdType(laneType));
+        MOZ_ASSERT((sign != SimdSign::NotApplicable) == IsIntegerSimdType(vecType),
+                   "Signedness must be specified for integer SIMD extractLanes");
         // The resulting type should match the lane type.
         // Allow extracting boolean lanes directly into an Int32 (for asm.js).
+        // Allow extracting Uint32 lanes into a double.
+        //
+        // We also allow extracting Uint32 lanes into a MIRType_Int32. This is
+        // equivalent to extracting the Uint32 lane to a double and then
+        // applying MTruncateToInt32, but it bypasses the conversion to/from
+        // double.
         MOZ_ASSERT(SimdTypeToLaneType(vecType) == laneType ||
-                   (IsBooleanSimdType(vecType) && laneType == MIRType_Int32));
+                   (IsBooleanSimdType(vecType) && laneType == MIRType_Int32) ||
+                   (vecType == MIRType_Int32x4 && laneType == MIRType_Double &&
+                    sign == SimdSign::Unsigned));
 
         setMovable();
         specialization_ = vecType;
         setResultType(laneType);
     }
 
   public:
     INSTRUCTION_HEADER(SimdExtractElement)
 
     static MSimdExtractElement* NewAsmJS(TempAllocator& alloc, MDefinition* obj, MIRType type,
                                          SimdLane lane)
     {
-        return new(alloc) MSimdExtractElement(obj, obj->type(), type, lane);
+        // Only signed integer types in AsmJS so far.
+        SimdSign sign =
+          IsIntegerSimdType(obj->type()) ? SimdSign::Signed : SimdSign::NotApplicable;
+        return new (alloc) MSimdExtractElement(obj, obj->type(), type, lane, sign);
     }
 
     static MSimdExtractElement* New(TempAllocator& alloc, MDefinition* obj, MIRType vecType,
-                                    MIRType scalarType, SimdLane lane)
-    {
-        return new(alloc) MSimdExtractElement(obj, vecType, scalarType, lane);
+                                    MIRType scalarType, SimdLane lane, SimdSign sign)
+    {
+        return new(alloc) MSimdExtractElement(obj, vecType, scalarType, lane, sign);
     }
 
     SimdLane lane() const {
         return lane_;
     }
 
+    SimdSign signedness() const {
+        return sign_;
+    }
+
     AliasSet getAliasSet() const override {
         return AliasSet::None();
     }
     bool congruentTo(const MDefinition* ins) const override {
         if (!ins->isSimdExtractElement())
             return false;
         const MSimdExtractElement* other = ins->toSimdExtractElement();
-        if (other->lane_ != lane_)
+        if (other->lane_ != lane_ || other->sign_ != sign_)
             return false;
         return congruentIfOperandsEqual(other);
     }
     ALLOW_CLONE(MSimdExtractElement)
 };
 
 // Replaces the datum in the given lane by a scalar value of the same type.
 class MSimdInsertElement
--- a/js/src/jit/shared/LIR-shared.h
+++ b/js/src/jit/shared/LIR-shared.h
@@ -275,16 +275,34 @@ class LSimdExtractElementF : public LSim
 {
   public:
     LIR_HEADER(SimdExtractElementF);
     explicit LSimdExtractElementF(const LAllocation& base)
       : LSimdExtractElementBase(base)
     {}
 };
 
+// Extracts an element from an Uint32x4 SIMD vector, converts to double.
+class LSimdExtractElementU2D : public LInstructionHelper<1, 1, 1>
+{
+  public:
+    LIR_HEADER(SimdExtractElementU2D);
+    explicit LSimdExtractElementU2D(const LAllocation& base, const LDefinition& temp) {
+        setOperand(0, base);
+        setTemp(0, temp);
+    }
+    SimdLane lane() const {
+        return mir_->toSimdExtractElement()->lane();
+    }
+    const LDefinition* temp() {
+        return getTemp(0);
+    }
+};
+
+
 class LSimdInsertElementBase : public LInstructionHelper<1, 2, 0>
 {
   protected:
     LSimdInsertElementBase(const LAllocation& vec, const LAllocation& val)
     {
         setOperand(0, vec);
         setOperand(1, val);
     }
--- a/js/src/jit/shared/LOpcodes-shared.h
+++ b/js/src/jit/shared/LOpcodes-shared.h
@@ -21,16 +21,17 @@
     _(SimdUnbox)                    \
     _(SimdSplatX4)                  \
     _(Int32x4)                      \
     _(Float32x4)                    \
     _(SimdAllTrue)                  \
     _(SimdAnyTrue)                  \
     _(SimdReinterpretCast)          \
     _(SimdExtractElementI)          \
+    _(SimdExtractElementU2D)        \
     _(SimdExtractElementB)          \
     _(SimdExtractElementF)          \
     _(SimdInsertElementI)           \
     _(SimdInsertElementF)           \
     _(SimdGeneralShuffleI)          \
     _(SimdGeneralShuffleF)          \
     _(SimdSwizzleI)                 \
     _(SimdSwizzleF)                 \
--- a/js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp
+++ b/js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp
@@ -2479,56 +2479,62 @@ CodeGeneratorX86Shared::visitSimdReinter
       case MIRType_Float32x4:
         masm.vmovaps(input, output);
         break;
       default:
         MOZ_CRASH("Unknown SIMD kind");
     }
 }
 
+// Extract an integer lane from the vector register |input| and place it in |output|.
 void
-CodeGeneratorX86Shared::visitSimdExtractElementB(LSimdExtractElementB* ins)
+CodeGeneratorX86Shared::emitSimdExtractLane(FloatRegister input, Register output, unsigned lane)
 {
-    FloatRegister input = ToFloatRegister(ins->input());
-    Register output = ToRegister(ins->output());
-
-    SimdLane lane = ins->lane();
     if (lane == LaneX) {
         // The value we want to extract is in the low double-word
         masm.moveLowInt32(input, output);
     } else if (AssemblerX86Shared::HasSSE41()) {
         masm.vpextrd(lane, input, output);
     } else {
         uint32_t mask = MacroAssembler::ComputeShuffleMask(lane);
         masm.shuffleInt32(mask, input, ScratchSimd128Reg);
         masm.moveLowInt32(ScratchSimd128Reg, output);
     }
+}
+
+void
+CodeGeneratorX86Shared::visitSimdExtractElementB(LSimdExtractElementB* ins)
+{
+    FloatRegister input = ToFloatRegister(ins->input());
+    Register output = ToRegister(ins->output());
+
+    emitSimdExtractLane(input, output, ins->lane());
 
     // We need to generate a 0/1 value. We have 0/-1.
     masm.and32(Imm32(1), output);
 }
 
 void
 CodeGeneratorX86Shared::visitSimdExtractElementI(LSimdExtractElementI* ins)
 {
     FloatRegister input = ToFloatRegister(ins->input());
     Register output = ToRegister(ins->output());
 
-    SimdLane lane = ins->lane();
-    if (lane == LaneX) {
-        // The value we want to extract is in the low double-word
-        masm.moveLowInt32(input, output);
-    } else if (AssemblerX86Shared::HasSSE41()) {
-        masm.vpextrd(lane, input, output);
-    } else {
-        uint32_t mask = MacroAssembler::ComputeShuffleMask(lane);
-        ScratchSimd128Scope scratch(masm);
-        masm.shuffleInt32(mask, input, scratch);
-        masm.moveLowInt32(scratch, output);
-    }
+    emitSimdExtractLane(input, output, ins->lane());
+}
+
+void
+CodeGeneratorX86Shared::visitSimdExtractElementU2D(LSimdExtractElementU2D* ins)
+{
+    FloatRegister input = ToFloatRegister(ins->input());
+    FloatRegister output = ToFloatRegister(ins->output());
+    Register temp = ToRegister(ins->temp());
+
+    emitSimdExtractLane(input, temp, ins->lane());
+    masm.convertUInt32ToDouble(temp, output);
 }
 
 void
 CodeGeneratorX86Shared::visitSimdExtractElementF(LSimdExtractElementF* ins)
 {
     FloatRegister input = ToFloatRegister(ins->input());
     FloatRegister output = ToFloatRegister(ins->output());
 
--- a/js/src/jit/x86-shared/CodeGenerator-x86-shared.h
+++ b/js/src/jit/x86-shared/CodeGenerator-x86-shared.h
@@ -180,16 +180,18 @@ class CodeGeneratorX86Shared : public Co
     {
         MOZ_ASSERT(cond == Assembler::Equal || cond == Assembler::NotEqual);
         masm.cmpPtr(reg, ImmWord(0));
         emitBranch(cond, ifTrue, ifFalse);
     }
 
     void emitTableSwitchDispatch(MTableSwitch* mir, Register index, Register base);
 
+    void emitSimdExtractLane(FloatRegister input, Register output, unsigned lane);
+
   public:
     CodeGeneratorX86Shared(MIRGenerator* gen, LIRGraph* graph, MacroAssembler* masm);
 
   public:
     // Instruction visitors.
     virtual void visitDouble(LDouble* ins);
     virtual void visitFloat32(LFloat32* ins);
     virtual void visitMinMaxD(LMinMaxD* ins);
@@ -260,16 +262,17 @@ class CodeGeneratorX86Shared : public Co
     void visitInt32x4(LInt32x4* ins);
     void visitFloat32x4(LFloat32x4* ins);
     void visitInt32x4ToFloat32x4(LInt32x4ToFloat32x4* ins);
     void visitFloat32x4ToInt32x4(LFloat32x4ToInt32x4* ins);
     void visitFloat32x4ToUint32x4(LFloat32x4ToUint32x4* ins);
     void visitSimdReinterpretCast(LSimdReinterpretCast* lir);
     void visitSimdExtractElementB(LSimdExtractElementB* lir);
     void visitSimdExtractElementI(LSimdExtractElementI* lir);
+    void visitSimdExtractElementU2D(LSimdExtractElementU2D* lir);
     void visitSimdExtractElementF(LSimdExtractElementF* lir);
     void visitSimdInsertElementI(LSimdInsertElementI* lir);
     void visitSimdInsertElementF(LSimdInsertElementF* lir);
     void visitSimdSwizzleI(LSimdSwizzleI* lir);
     void visitSimdSwizzleF(LSimdSwizzleF* lir);
     void visitSimdShuffle(LSimdShuffle* lir);
     void visitSimdUnaryArithIx4(LSimdUnaryArithIx4* lir);
     void visitSimdUnaryArithFx4(LSimdUnaryArithFx4* lir);