Bug 1096684 - IonMonkey: Optimize with SSE4.1 pextrd r=bbouvier
authorDan Gohman <sunfish@mozilla.com>
Wed, 12 Nov 2014 12:38:32 -0800
changeset 215354 cf5beb0f5d259693e71046f40b3997f13c2ac079
parent 215353 191a52db5011606e73899b3782929f6a5b237419
child 215355 4c04203003c22c9b725bb9ecbe2b5731f5fc601f
push id27813
push userkwierso@gmail.com
push dateThu, 13 Nov 2014 01:03:17 +0000
treeherdermozilla-central@64f1fb1e2f38 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersbbouvier
bugs1096684
milestone36.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1096684 - IonMonkey: Optimize with SSE4.1 pextrd r=bbouvier
js/src/jit/shared/Assembler-x86-shared.h
js/src/jit/shared/BaseAssembler-x86-shared.h
js/src/jit/shared/CodeGenerator-x86-shared.cpp
js/src/jit/x86/MacroAssembler-x86.h
--- a/js/src/jit/shared/Assembler-x86-shared.h
+++ b/js/src/jit/shared/Assembler-x86-shared.h
@@ -1498,16 +1498,33 @@ class AssemblerX86Shared : public Assemb
             break;
           case Operand::MEM_REG_DISP:
             masm.pinsrd_imr(lane, src.disp(), src.base(), dest.code());
             break;
           default:
             MOZ_CRASH("unexpected operand kind");
         }
     }
+    void pextrd(unsigned lane, FloatRegister src, Register dest) {
+        MOZ_ASSERT(HasSSE41());
+        masm.pextrd_irr(lane, src.code(), dest.code());
+    }
+    void pextrd(unsigned lane, FloatRegister src, const Operand &dest) {
+        MOZ_ASSERT(HasSSE41());
+        switch (dest.kind()) {
+          case Operand::REG:
+            masm.pextrd_irr(lane, src.code(), dest.reg());
+            break;
+          case Operand::MEM_REG_DISP:
+            masm.pextrd_imr(lane, src.code(), dest.disp(), dest.base());
+            break;
+          default:
+            MOZ_CRASH("unexpected operand kind");
+        }
+    }
     void psrldq(Imm32 shift, FloatRegister dest) {
         MOZ_ASSERT(HasSSE2());
         masm.psrldq_ir(shift.value, dest.code());
     }
     void psllq(Imm32 shift, FloatRegister dest) {
         MOZ_ASSERT(HasSSE2());
         masm.psllq_ir(shift.value, dest.code());
     }
--- a/js/src/jit/shared/BaseAssembler-x86-shared.h
+++ b/js/src/jit/shared/BaseAssembler-x86-shared.h
@@ -375,25 +375,27 @@ private:
         OP2_PSLLD_VdqWdq    = 0xF2,
         OP2_PSUBD_VdqWdq    = 0xFA,
         OP2_PADDD_VdqWdq    = 0xFE
     } TwoByteOpcodeID;
 
     typedef enum {
         OP3_ROUNDSS_VsdWsd  = 0x0A,
         OP3_ROUNDSD_VsdWsd  = 0x0B,
+        OP3_PEXTRD_EdVdqIb  = 0x16,
         OP3_BLENDPS_VpsWpsIb = 0x0C,
         OP3_PTEST_VdVd      = 0x17,
         OP3_INSERTPS_VpsUps = 0x21,
         OP3_PINSRD_VdqEdIb  = 0x22
     } ThreeByteOpcodeID;
 
     typedef enum {
         ESCAPE_PTEST        = 0x38,
         ESCAPE_PINSRD       = 0x3A,
+        ESCAPE_PEXTRD       = 0x3A,
         ESCAPE_ROUNDSD      = 0x3A,
         ESCAPE_INSERTPS     = 0x3A,
         ESCAPE_BLENDPS      = 0x3A
     } ThreeByteEscape;
 
     TwoByteOpcodeID jccRel32(Condition cond)
     {
         return (TwoByteOpcodeID)(OP2_JCC_rel32 + cond);
@@ -3703,32 +3705,51 @@ public:
         m_formatter.prefix(PRE_SSE_66);
         m_formatter.threeByteOp(OP3_INSERTPS_VpsUps, ESCAPE_INSERTPS, (RegisterID)dst, (RegisterID)src);
         m_formatter.immediate8(uint8_t(mask));
     }
 
     void pinsrd_irr(unsigned lane, RegisterID src, XMMRegisterID dst)
     {
         MOZ_ASSERT(lane < 4);
-        spew("pinsrd     $%x, %s, %s", lane, nameIReg(src), nameFPReg(dst));
+        spew("pinsrd     $%x, %s, %s", lane, nameIReg(4, src), nameFPReg(dst));
         m_formatter.prefix(PRE_SSE_66);
         m_formatter.threeByteOp(OP3_PINSRD_VdqEdIb, ESCAPE_PINSRD, (RegisterID)dst, (RegisterID)src);
         m_formatter.immediate8(uint8_t(lane));
     }
 
     void pinsrd_imr(unsigned lane, int offset, RegisterID base, XMMRegisterID dst)
     {
         MOZ_ASSERT(lane < 4);
         spew("pinsrd     $%x, %s0x%x(%s), %s", lane, PRETTY_PRINT_OFFSET(offset),
              nameIReg(base), nameFPReg(dst));
         m_formatter.prefix(PRE_SSE_66);
         m_formatter.threeByteOp(OP3_PINSRD_VdqEdIb, ESCAPE_PINSRD, (RegisterID)dst, base, offset);
         m_formatter.immediate8(uint8_t(lane));
     }
 
+    void pextrd_irr(unsigned lane, XMMRegisterID src, RegisterID dst)
+    {
+        MOZ_ASSERT(lane < 4);
+        spew("pextrd     $%x, %s, %s", lane, nameFPReg(src), nameIReg(4, dst));
+        m_formatter.prefix(PRE_SSE_66);
+        m_formatter.threeByteOp(OP3_PEXTRD_EdVdqIb, ESCAPE_PEXTRD, (RegisterID)src, (RegisterID)dst);
+        m_formatter.immediate8(uint8_t(lane));
+    }
+
+    void pextrd_imr(unsigned lane, XMMRegisterID src, int offset, RegisterID base)
+    {
+        MOZ_ASSERT(lane < 4);
+        spew("pextrd     $%x, %s, %s0x%x(%s)", lane, nameFPReg(src),
+             PRETTY_PRINT_OFFSET(offset), nameIReg(base));
+        m_formatter.prefix(PRE_SSE_66);
+        m_formatter.threeByteOp(OP3_PEXTRD_EdVdqIb, ESCAPE_PEXTRD, (RegisterID)src, base, offset);
+        m_formatter.immediate8(uint8_t(lane));
+    }
+
     void blendps_irr(unsigned imm, XMMRegisterID src, XMMRegisterID dst)
     {
         MOZ_ASSERT(imm < 16);
         spew("blendps    $%x, %s, %s", imm, nameFPReg(src), nameFPReg(dst));
         m_formatter.prefix(PRE_SSE_66);
         m_formatter.threeByteOp(OP3_BLENDPS_VpsWpsIb, ESCAPE_BLENDPS, (RegisterID)dst, (RegisterID)src);
         m_formatter.immediate8(uint8_t(imm));
     }
--- a/js/src/jit/shared/CodeGenerator-x86-shared.cpp
+++ b/js/src/jit/shared/CodeGenerator-x86-shared.cpp
@@ -2283,16 +2283,18 @@ CodeGeneratorX86Shared::visitSimdExtract
 {
     FloatRegister input = ToFloatRegister(ins->input());
     Register output = ToRegister(ins->output());
 
     SimdLane lane = ins->lane();
     if (lane == LaneX) {
         // The value we want to extract is in the low double-word
         masm.moveLowInt32(input, output);
+    } else if (AssemblerX86Shared::HasSSE41()) {
+        masm.pextrd(lane, input, output);
     } else {
         uint32_t mask = MacroAssembler::ComputeShuffleMask(lane);
         masm.shuffleInt32(mask, input, ScratchSimdReg);
         masm.moveLowInt32(ScratchSimdReg, output);
     }
     return true;
 }
 
--- a/js/src/jit/x86/MacroAssembler-x86.h
+++ b/js/src/jit/x86/MacroAssembler-x86.h
@@ -830,19 +830,24 @@ class MacroAssemblerX86 : public MacroAs
                               Label *label)
     {
         MOZ_ASSERT(cond == Equal || cond == NotEqual);
         branchTestValue(cond, val, MagicValue(why), label);
     }
 
     // Note: this function clobbers the source register.
     void boxDouble(FloatRegister src, const ValueOperand &dest) {
-        movd(src, dest.payloadReg());
-        psrldq(Imm32(4), src);
-        movd(src, dest.typeReg());
+        if (Assembler::HasSSE41()) {
+            movd(src, dest.payloadReg());
+            pextrd(1, src, dest.typeReg());
+        } else {
+            movd(src, dest.payloadReg());
+            psrldq(Imm32(4), src);
+            movd(src, dest.typeReg());
+        }
     }
     void boxNonDouble(JSValueType type, Register src, const ValueOperand &dest) {
         if (src != dest.payloadReg())
             movl(src, dest.payloadReg());
         movl(ImmType(type), dest.typeReg());
     }
 
     void unboxNonDouble(const ValueOperand &src, Register dest) {