Bug 1110164: Extend MSimdSelect to also handle element-wise select; r=sunfish
authorBenjamin Bouvier <benj@benj.me>
Mon, 29 Dec 2014 18:04:47 +0100
changeset 221512 f3e8355e98306060f1e73e61f6b1532c97f65158
parent 221511 a207bb51ae07cf7f33425d5d4f18b7ccd309d169
child 221513 f13a3dcc5d0fc4d6a5dbcd14a354c3f9ba42a555
push id28032
push userkwierso@gmail.com
push dateTue, 30 Dec 2014 01:28:14 +0000
treeherdermozilla-central@67872ce17918 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerssunfish
bugs1110164
milestone37.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1110164: Extend MSimdSelect to also handle element-wise select; r=sunfish
js/src/asmjs/AsmJSValidate.cpp
js/src/jit-test/tests/asm.js/testSIMD.js
js/src/jit/LIR-Common.h
js/src/jit/MIR.h
js/src/jit/shared/CodeGenerator-x86-shared.cpp
--- a/js/src/asmjs/AsmJSValidate.cpp
+++ b/js/src/asmjs/AsmJSValidate.cpp
@@ -2651,17 +2651,17 @@ class FunctionCompiler
     {
         if (inDeadCode())
             return nullptr;
 
         MOZ_ASSERT(IsSimdType(mask->type()));
         MOZ_ASSERT(mask->type() == MIRType_Int32x4);
         MOZ_ASSERT(IsSimdType(lhs->type()) && rhs->type() == lhs->type());
         MOZ_ASSERT(lhs->type() == type);
-        MSimdSelect *ins = MSimdSelect::NewAsmJS(alloc(), mask, lhs, rhs, type);
+        MSimdSelect *ins = MSimdSelect::NewAsmJS(alloc(), mask, lhs, rhs, type, /* isElementWise */ true);
         curBlock_->add(ins);
         return ins;
     }
 
     template<class T>
     MDefinition *convertSimd(MDefinition *vec, MIRType from, MIRType to)
     {
         if (inDeadCode())
--- a/js/src/jit-test/tests/asm.js/testSIMD.js
+++ b/js/src/jit-test/tests/asm.js/testSIMD.js
@@ -831,23 +831,34 @@ const F32SEL = 'var f4sel = f4.select;'
 assertAsmTypeFail('glob', USE_ASM + F32 + I32SEL + "function f() {var x=f4(1,2,3,4); return i4(i4sel(x,x,x));} return f");
 assertAsmTypeFail('glob', USE_ASM + I32 + F32 + I32SEL + "function f() {var m=f4(1,2,3,4); var x=i4(1,2,3,4); return i4(i4sel(m,x,x));} return f");
 assertAsmTypeFail('glob', USE_ASM + I32 + F32 + I32SEL + "function f() {var m=f4(1,2,3,4); var x=f4(1,2,3,4); return i4(i4sel(m,x,x));} return f");
 assertAsmTypeFail('glob', USE_ASM + I32 + F32 + I32SEL + "function f() {var m=i4(1,2,3,4); var x=f4(1,2,3,4); return i4(i4sel(m,x,x));} return f");
 assertAsmTypeFail('glob', USE_ASM + I32 + F32 + I32SEL + "function f() {var m=i4(1,2,3,4); var x=f4(1,2,3,4); var y=i4(5,6,7,8); return i4(i4sel(m,x,y));} return f");
 assertAsmTypeFail('glob', USE_ASM + I32 + F32 + I32SEL + "function f() {var m=i4(1,2,3,4); var x=i4(1,2,3,4); var y=f4(5,6,7,8); return i4(i4sel(m,x,y));} return f");
 assertAsmTypeFail('glob', USE_ASM + I32 + F32 + I32SEL + "function f() {var m=i4(1,2,3,4); var x=f4(1,2,3,4); var y=f4(5,6,7,8); return i4(i4sel(m,x,y));} return f");
 
-
 assertAsmTypeFail('glob', USE_ASM + F32 + F32SEL + "function f() {var m=f4(1,2,3,4); return f4(f4sel(x,x,x));} return f");
 assertAsmTypeFail('glob', USE_ASM + I32 + F32 + F32SEL + "function f() {var m=f4(1,2,3,4); var x=i4(1,2,3,4); return f4(f4sel(m,x,x));} return f");
 assertAsmTypeFail('glob', USE_ASM + I32 + F32 + F32SEL + "function f() {var m=f4(1,2,3,4); var x=f4(1,2,3,4); return f4(f4sel(m,x,x));} return f");
 assertAsmTypeFail('glob', USE_ASM + I32 + F32 + F32SEL + "function f() {var m=i4(1,2,3,4); var x=f4(1,2,3,4); var y=i4(5,6,7,8); return f4(f4sel(m,x,y));} return f");
 assertAsmTypeFail('glob', USE_ASM + I32 + F32 + F32SEL + "function f() {var m=i4(1,2,3,4); var x=i4(1,2,3,4); var y=f4(5,6,7,8); return f4(f4sel(m,x,y));} return f");
 
+// These pass with select but not bitselect
+assertEqX4(asmLink(asmCompile('glob', USE_ASM + I32 + I32SEL + "function f() {var m=i4(0,0,0,0); var x=i4(1,2,3,4); var y=i4(5,6,7,8); return i4(i4sel(m,x,y)); } return f"), this)(), [5, 6, 7, 8]);
+assertEqX4(asmLink(asmCompile('glob', USE_ASM + I32 + I32SEL + "function f() {var m=i4(-1,-2,-3,-42); var x=i4(1,2,3,4); var y=i4(5,6,7,8); return i4(i4sel(m,x,y)); } return f"), this)(), [1, 2, 3, 4]);
+assertEqX4(asmLink(asmCompile('glob', USE_ASM + I32 + I32SEL + "function f() {var m=i4(1,-1,2,-2); var x=i4(1,2,3,4); var y=i4(5,6,7,8); return i4(i4sel(m,x,y)); } return f"), this)(), [5, 2, 7, 4]);
+assertEqX4(asmLink(asmCompile('glob', USE_ASM + I32 + I32SEL + "function f() {var m=i4(42,45,-42,-47); var x=i4(1,2,3,4); var y=i4(5,6,7,8); return i4(i4sel(m,x,y)); } return f"), this)(), [5, 6, 3, 4]);
+
+assertEqX4(asmLink(asmCompile('glob', USE_ASM + I32 + F32 + F32SEL + "function f() {var m=i4(0,0,0,0); var x=f4(1,2,3,4); var y=f4(5,6,7,8); return f4(f4sel(m,x,y)); } return f"), this)(), [5, 6, 7, 8]);
+assertEqX4(asmLink(asmCompile('glob', USE_ASM + I32 + F32 + F32SEL + "function f() {var m=i4(-1,-2,-3,-42); var x=f4(1,2,3,4); var y=f4(5,6,7,8); return f4(f4sel(m,x,y)); } return f"), this)(), [1, 2, 3, 4]);
+assertEqX4(asmLink(asmCompile('glob', USE_ASM + I32 + F32 + F32SEL + "function f() {var m=i4(1,-1,2,-2); var x=f4(1,2,3,4); var y=f4(5,6,7,8); return f4(f4sel(m,x,y)); } return f"), this)(), [5, 2, 7, 4]);
+assertEqX4(asmLink(asmCompile('glob', USE_ASM + I32 + F32 + F32SEL + "function f() {var m=i4(42,45,-42,-47); var x=f4(1,2,3,4); var y=f4(5,6,7,8); return f4(f4sel(m,x,y)); } return f"), this)(), [5, 6, 3, 4]);
+
+// These pass for both select and bitselect
 assertEqX4(asmLink(asmCompile('glob', USE_ASM + I32 + I32SEL + "function f() {var m=i4(0,0,0,0); var x=i4(1,2,3,4); var y=i4(5,6,7,8); return i4(i4sel(m,x,y)); } return f"), this)(), [5, 6, 7, 8]);
 assertEqX4(asmLink(asmCompile('glob', USE_ASM + I32 + I32SEL + "function f() {var m=i4(0xffffffff,0xffffffff,0xffffffff,0xffffffff); var x=i4(1,2,3,4); var y=i4(5,6,7,8); return i4(i4sel(m,x,y)); } return f"), this)(), [1, 2, 3, 4]);
 assertEqX4(asmLink(asmCompile('glob', USE_ASM + I32 + I32SEL + "function f() {var m=i4(0,0xffffffff,0,0xffffffff); var x=i4(1,2,3,4); var y=i4(5,6,7,8); return i4(i4sel(m,x,y)); } return f"), this)(), [5, 2, 7, 4]);
 assertEqX4(asmLink(asmCompile('glob', USE_ASM + I32 + I32SEL + "function f() {var m=i4(0,0,0xffffffff,0xffffffff); var x=i4(1,2,3,4); var y=i4(5,6,7,8); return i4(i4sel(m,x,y)); } return f"), this)(), [5, 6, 3, 4]);
 
 assertEqX4(asmLink(asmCompile('glob', USE_ASM + I32 + F32 + F32SEL + "function f() {var m=i4(0,0,0,0); var x=f4(1,2,3,4); var y=f4(5,6,7,8); return f4(f4sel(m,x,y)); } return f"), this)(), [5, 6, 7, 8]);
 assertEqX4(asmLink(asmCompile('glob', USE_ASM + I32 + F32 + F32SEL + "function f() {var m=i4(0xffffffff,0xffffffff,0xffffffff,0xffffffff); var x=f4(1,2,3,4); var y=f4(5,6,7,8); return f4(f4sel(m,x,y)); } return f"), this)(), [1, 2, 3, 4]);
 assertEqX4(asmLink(asmCompile('glob', USE_ASM + I32 + F32 + F32SEL + "function f() {var m=i4(0,0xffffffff,0,0xffffffff); var x=f4(1,2,3,4); var y=f4(5,6,7,8); return f4(f4sel(m,x,y)); } return f"), this)(), [5, 2, 7, 4]);
--- a/js/src/jit/LIR-Common.h
+++ b/js/src/jit/LIR-Common.h
@@ -493,16 +493,19 @@ class LSimdSelect : public LInstructionH
         return getOperand(1);
     }
     const LAllocation *rhs() {
         return getOperand(2);
     }
     const LDefinition *temp() {
         return getTemp(0);
     }
+    MSimdSelect *mir() const {
+        return mir_->toSimdSelect();
+    }
 };
 
 // Constant 32-bit integer.
 class LInteger : public LInstructionHelper<1, 0, 0>
 {
     int32_t i32_;
 
   public:
--- a/js/src/jit/MIR.h
+++ b/js/src/jit/MIR.h
@@ -2018,38 +2018,54 @@ class MSimdShift : public MBinaryInstruc
         return operation_ == ins->toSimdShift()->operation();
     }
 
     ALLOW_CLONE(MSimdShift)
 };
 
 class MSimdSelect : public MTernaryInstruction
 {
-  private:
-    MSimdSelect(MDefinition *mask, MDefinition *lhs, MDefinition *rhs, MIRType type)
-      : MTernaryInstruction(mask, lhs, rhs)
+    bool isElementWise_;
+
+    MSimdSelect(MDefinition *mask, MDefinition *lhs, MDefinition *rhs, MIRType type,
+                bool isElementWise)
+      : MTernaryInstruction(mask, lhs, rhs), isElementWise_(isElementWise)
     {
         MOZ_ASSERT(IsSimdType(type));
         MOZ_ASSERT(mask->type() == MIRType_Int32x4);
         MOZ_ASSERT(lhs->type() == rhs->type());
         MOZ_ASSERT(lhs->type() == type);
         setResultType(type);
         setMovable();
     }
 
   public:
     INSTRUCTION_HEADER(SimdSelect);
     static MSimdSelect *NewAsmJS(TempAllocator &alloc, MDefinition *mask, MDefinition *lhs,
-                                 MDefinition *rhs, MIRType t)
-    {
-        return new(alloc) MSimdSelect(mask, lhs, rhs, t);
-    }
-
-    AliasSet getAliasSet() const {
-        return AliasSet::None();
+                                 MDefinition *rhs, MIRType t, bool isElementWise)
+    {
+        return new(alloc) MSimdSelect(mask, lhs, rhs, t, isElementWise);
+    }
+
+    MDefinition *mask() const {
+        return getOperand(0);
+    }
+
+    AliasSet getAliasSet() const {
+        return AliasSet::None();
+    }
+
+    bool isElementWise() const {
+        return isElementWise_;
+    }
+
+    bool congruentTo(const MDefinition *ins) const {
+        if (!congruentIfOperandsEqual(ins))
+            return false;
+        return isElementWise_ == ins->toSimdSelect()->isElementWise();
     }
 
     ALLOW_CLONE(MSimdSelect)
 };
 
 // Deep clone a constant JSObject.
 class MCloneLiteral
   : public MUnaryInstruction,
--- a/js/src/jit/shared/CodeGenerator-x86-shared.cpp
+++ b/js/src/jit/shared/CodeGenerator-x86-shared.cpp
@@ -2959,17 +2959,32 @@ CodeGeneratorX86Shared::visitSimdSelect(
     FloatRegister output = ToFloatRegister(ins->output());
     FloatRegister temp = ToFloatRegister(ins->temp());
 
     if (onTrue != output)
         masm.vmovaps(onTrue, output);
     if (mask != temp)
         masm.vmovaps(mask, temp);
 
-    masm.bitwiseAndX4(Operand(mask), output);
+    MSimdSelect *mir = ins->mir();
+    if (mir->isElementWise()) {
+        if (AssemblerX86Shared::HasAVX()) {
+            masm.vblendvps(mask, onTrue, onFalse, output);
+            return;
+        }
+
+        // SSE4.1 has plain blendvps which can do this, but it is awkward
+        // to use because it requires the mask to be in xmm0.
+
+        // Propagate sign to all bits of mask vector, if necessary.
+        if (!mir->mask()->isSimdBinaryComp())
+            masm.packedRightShiftByScalar(Imm32(31), temp);
+    }
+
+    masm.bitwiseAndX4(Operand(temp), output);
     masm.bitwiseAndNotX4(Operand(onFalse), temp);
     masm.bitwiseOrX4(Operand(temp), output);
 }
 
 void
 CodeGeneratorX86Shared::visitForkJoinGetSlice(LForkJoinGetSlice *ins)
 {
     MOZ_ASSERT(gen->info().executionMode() == ParallelExecution);