Bug 1136226 - Implement 8x16 SIMD shift operators. r=bbouvier
authorJakob Stoklund Olesen <jolesen@mozilla.com>
Tue, 31 May 2016 09:00:18 -0700
changeset 340737 6ea5fb073f4be87096890646bf58061c90b22fcd
parent 340736 25dc50270a77116f3e679f451e143b0031382cdd
child 340738 43882729d51c668fc0e57a7941cfbdc25664422d
push id1183
push userraliiev@mozilla.com
push dateMon, 05 Sep 2016 20:01:49 +0000
treeherdermozilla-release@3148731bed45 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersbbouvier
bugs1136226
milestone49.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1136226 - Implement 8x16 SIMD shift operators. r=bbouvier These operations don't have SSE instructions, so express them in terms of 16x8 shifts in MSimdShift::AddLegalized.
js/src/asmjs/WasmIonCompile.cpp
js/src/jit/MCallOptimize.cpp
js/src/jit/MIR.cpp
js/src/jit/MIR.h
js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp
--- a/js/src/asmjs/WasmIonCompile.cpp
+++ b/js/src/asmjs/WasmIonCompile.cpp
@@ -344,25 +344,22 @@ class FunctionCompiler
                                 SimdSign sign)
     {
         if (inDeadCode())
             return nullptr;
 
         return MSimdBinaryComp::AddLegalized(alloc(), curBlock_, lhs, rhs, op, sign);
     }
 
-    template<class T>
-    MDefinition* binarySimd(MDefinition* lhs, MDefinition* rhs, typename T::Operation op)
+    MDefinition* binarySimdShift(MDefinition* lhs, MDefinition* rhs, MSimdShift::Operation op)
     {
         if (inDeadCode())
             return nullptr;
 
-        T* ins = T::New(alloc(), lhs, rhs, op);
-        curBlock_->add(ins);
-        return ins;
+        return MSimdShift::AddLegalized(alloc(), curBlock_, lhs, rhs, op);
     }
 
     MDefinition* swizzleSimd(MDefinition* vector, const uint8_t lanes[], MIRType type)
     {
         if (inDeadCode())
             return nullptr;
 
         MOZ_ASSERT(vector->type() == type);
@@ -2401,17 +2398,17 @@ EmitSimdBinaryComp(FunctionCompiler& f, 
 static bool
 EmitSimdShift(FunctionCompiler& f, ValType operandType, MSimdShift::Operation op)
 {
     MDefinition* lhs;
     MDefinition* rhs;
     if (!f.iter().readSimdShiftByScalar(operandType, &lhs, &rhs))
         return false;
 
-    f.iter().setResult(f.binarySimd<MSimdShift>(lhs, rhs, op));
+    f.iter().setResult(f.binarySimdShift(lhs, rhs, op));
     return true;
 }
 
 static ValType
 SimdToLaneType(ValType type)
 {
     switch (type) {
       case ValType::I8x16:
--- a/js/src/jit/MCallOptimize.cpp
+++ b/js/src/jit/MCallOptimize.cpp
@@ -3569,17 +3569,17 @@ IonBuilder::inlineSimdShift(CallInfo& ca
                             SimdType type)
 {
     InlineTypedObject* templateObj = nullptr;
     if (!canInlineSimd(callInfo, native, 2, &templateObj))
         return InliningStatus_NotInlined;
 
     MDefinition* vec = unboxSimd(callInfo.getArg(0), type);
 
-    MInstruction* ins = MSimdShift::New(alloc(), vec, callInfo.getArg(1), op);
+    MInstruction* ins = MSimdShift::AddLegalized(alloc(), current, vec, callInfo.getArg(1), op);
     return boxSimd(callInfo, ins, templateObj);
 }
 
 IonBuilder::InliningStatus
 IonBuilder::inlineSimdComp(CallInfo& callInfo, JSNative native, MSimdBinaryComp::Operation op,
                            SimdType type)
 {
     InlineTypedObject* templateObj = nullptr;
--- a/js/src/jit/MIR.cpp
+++ b/js/src/jit/MIR.cpp
@@ -1262,18 +1262,17 @@ MSimdConvert::AddLegalized(TempAllocator
         //
         // And finally combine:
         //
         // result = flo + fhi // lo + hi*2^16.
 
         // Compute hi = obj >> 16 (lane-wise unsigned shift).
         MInstruction* c16 = MConstant::New(alloc, Int32Value(16));
         addTo->add(c16);
-        MInstruction* hi = MSimdShift::New(alloc, obj, c16, MSimdShift::ursh);
-        addTo->add(hi);
+        MInstruction* hi = MSimdShift::AddLegalized(alloc, addTo, obj, c16, MSimdShift::ursh);
 
         // Compute lo = obj & 0xffff (lane-wise).
         MInstruction* m16 =
           MSimdConstant::New(alloc, SimdConstant::SplatX4(0xffff), MIRType::Int32x4);
         addTo->add(m16);
         MInstruction* lo = MSimdBinaryBitwise::New(alloc, obj, m16, MSimdBinaryBitwise::and_);
         addTo->add(lo);
 
@@ -1362,16 +1361,104 @@ MSimdBinaryComp::AddLegalized(TempAlloca
     }
 
     // This is a legal operation already. Just create the instruction requested.
     MInstruction* result = MSimdBinaryComp::New(alloc, left, right, op, sign);
     addTo->add(result);
     return result;
 }
 
+MInstruction*
+MSimdShift::AddLegalized(TempAllocator& alloc, MBasicBlock* addTo, MDefinition* left,
+                         MDefinition* right, Operation op)
+{
+    MIRType opType = left->type();
+    MOZ_ASSERT(IsIntegerSimdType(opType));
+
+    // SSE does not provide 8x16 shift instructions.
+    if (opType == MIRType::Int8x16) {
+        // Express the shift in terms of Int16x8 shifts by splitting into even
+        // and odd lanes, place 8-bit lanes into the high bits of Int16x8
+        // vectors `even` and `odd`. Shift, mask, combine.
+        //
+        //   wide = Int16x8.fromInt8x16Bits(left);
+        //   shiftBy = right & 7
+        //   mask = Int16x8.splat(0xff00);
+        //
+        MInstruction* wide = MSimdReinterpretCast::New(alloc, left, MIRType::Int16x8);
+        addTo->add(wide);
+
+        // wide = yyxx yyxx yyxx yyxx yyxx yyxx yyxx yyxx
+
+        MInstruction* shiftMask = MConstant::New(alloc, Int32Value(7));
+        addTo->add(shiftMask);
+        MBinaryBitwiseInstruction* shiftBy = MBitAnd::New(alloc, right, shiftMask);
+        shiftBy->setInt32Specialization();
+        addTo->add(shiftBy);
+
+        // Move the even 8x16 lanes into the high bits of the 16x8 lanes.
+        MInstruction* eight = MConstant::New(alloc, Int32Value(8));
+        addTo->add(eight);
+        MInstruction* even = MSimdShift::AddLegalized(alloc, addTo, wide, eight, lsh);
+
+        // Leave the odd lanes in place.
+        MInstruction* odd = wide;
+
+        // even = xx00 xx00 xx00 xx00 xx00 xx00 xx00 xx00
+        // odd  = yyxx yyxx yyxx yyxx yyxx yyxx yyxx yyxx
+
+        MInstruction* mask =
+          MSimdConstant::New(alloc, SimdConstant::SplatX8(int16_t(0xff00)), MIRType::Int16x8);
+        addTo->add(mask);
+
+        // Left-shift: Clear the low bits in `odd` before shifting.
+        if (op == lsh) {
+            odd = MSimdBinaryBitwise::New(alloc, odd, mask, MSimdBinaryBitwise::and_);
+            addTo->add(odd);
+            // odd  = yy00 yy00 yy00 yy00 yy00 yy00 yy00 yy00
+        }
+
+        // Do the real shift twice: once for the even lanes, once for the odd
+        // lanes. This is a recursive call, but with a different type.
+        even = MSimdShift::AddLegalized(alloc, addTo, even, shiftBy, op);
+        odd = MSimdShift::AddLegalized(alloc, addTo, odd, shiftBy, op);
+
+        // even = XX~~ XX~~ XX~~ XX~~ XX~~ XX~~ XX~~ XX~~
+        // odd  = YY~~ YY~~ YY~~ YY~~ YY~~ YY~~ YY~~ YY~~
+
+        // Right-shift: Clear the low bits in `odd` after shifting.
+        if (op != lsh) {
+            odd = MSimdBinaryBitwise::New(alloc, odd, mask, MSimdBinaryBitwise::and_);
+            addTo->add(odd);
+            // odd  = YY00 YY00 YY00 YY00 YY00 YY00 YY00 YY00
+        }
+
+        // Move the even lanes back to their original place.
+        even = MSimdShift::AddLegalized(alloc, addTo, even, eight, ursh);
+
+        // Now, `odd` contains the odd lanes properly shifted, and `even`
+        // contains the even lanes properly shifted:
+        //
+        // even = 00XX 00XX 00XX 00XX 00XX 00XX 00XX 00XX
+        // odd  = YY00 YY00 YY00 YY00 YY00 YY00 YY00 YY00
+        //
+        // Combine:
+        MInstruction* result = MSimdBinaryBitwise::New(alloc, even, odd, MSimdBinaryBitwise::or_);
+        addTo->add(result);
+        result = MSimdReinterpretCast::New(alloc, result, opType);
+        addTo->add(result);
+        return result;
+    }
+
+    // This is a legal operation already. Just create the instruction requested.
+    MInstruction* result = MSimdShift::New(alloc, left, right, op);
+    addTo->add(result);
+    return result;
+}
+
 template <typename T>
 static void
 PrintOpcodeOperation(T* mir, GenericPrinter& out)
 {
     mir->MDefinition::printOpcode(out);
     out.printf(" (%s)", T::OperationName(mir->operation()));
 }
 
--- a/js/src/jit/MIR.h
+++ b/js/src/jit/MIR.h
@@ -2486,16 +2486,23 @@ class MSimdShift
     INSTRUCTION_HEADER(SimdShift)
 
     static MSimdShift* New(TempAllocator& alloc, MDefinition* left, MDefinition* right,
                            Operation op)
     {
         return new(alloc) MSimdShift(left, right, op);
     }
 
+    // Create an MSimdShift instruction and add it to the basic block. Possibly
+    // create and add an equivalent sequence of instructions instead if the
+    // current target doesn't support the requested shift operation directly.
+    // Return the inserted MInstruction that computes the shifted value.
+    static MInstruction* AddLegalized(TempAllocator& alloc, MBasicBlock* addTo, MDefinition* left,
+                                      MDefinition* right, Operation op);
+
     // Get the relevant right shift operation given the signedness of a type.
     static Operation rshForSign(SimdSign sign) {
         return sign == SimdSign::Unsigned ? ursh : rsh;
     }
 
     AliasSet getAliasSet() const override {
         return AliasSet::None();
     }
--- a/js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp
+++ b/js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp
@@ -2630,26 +2630,20 @@ void
 CodeGeneratorX86Shared::visitSimdReinterpretCast(LSimdReinterpretCast* ins)
 {
     FloatRegister input = ToFloatRegister(ins->input());
     FloatRegister output = ToFloatRegister(ins->output());
 
     if (input.aliases(output))
         return;
 
-    switch (ins->mir()->type()) {
-      case MIRType::Int32x4:
+    if (IsIntegerSimdType(ins->mir()->type()))
         masm.vmovdqa(input, output);
-        break;
-      case MIRType::Float32x4:
+    else
         masm.vmovaps(input, output);
-        break;
-      default:
-        MOZ_CRASH("Unknown SIMD kind");
-    }
 }
 
 // Extract an integer lane from the 32x4 vector register |input| and place it in
 // |output|.
 void
 CodeGeneratorX86Shared::emitSimdExtractLane32x4(FloatRegister input, Register output, unsigned lane)
 {
     if (lane == 0) {