--- a/js/src/jit/shared/Assembler-x86-shared.h
+++ b/js/src/jit/shared/Assembler-x86-shared.h
@@ -1491,85 +1491,85 @@ class AssemblerX86Shared : public Assemb
}
void idiv(Register divisor) {
masm.idivl_r(divisor.code());
}
void udiv(Register divisor) {
masm.divl_r(divisor.code());
}
- void pinsrd(unsigned lane, Register src, FloatRegister dest) {
+ void vpinsrd(unsigned lane, Register src1, FloatRegister src0, FloatRegister dest) {
MOZ_ASSERT(HasSSE41());
- masm.pinsrd_irr(lane, src.code(), dest.code());
+ masm.vpinsrd_irr(lane, src1.code(), src0.code(), dest.code());
}
- void pinsrd(unsigned lane, const Operand &src, FloatRegister dest) {
+ void vpinsrd(unsigned lane, const Operand &src1, FloatRegister src0, FloatRegister dest) {
MOZ_ASSERT(HasSSE41());
- switch (src.kind()) {
+ switch (src1.kind()) {
case Operand::REG:
- masm.pinsrd_irr(lane, src.reg(), dest.code());
+ masm.vpinsrd_irr(lane, src1.reg(), src0.code(), dest.code());
break;
case Operand::MEM_REG_DISP:
- masm.pinsrd_imr(lane, src.disp(), src.base(), dest.code());
+ masm.vpinsrd_imr(lane, src1.disp(), src1.base(), src0.code(), dest.code());
break;
default:
MOZ_CRASH("unexpected operand kind");
}
}
- void pextrd(unsigned lane, FloatRegister src, Register dest) {
+ void vpextrd(unsigned lane, FloatRegister src, Register dest) {
MOZ_ASSERT(HasSSE41());
- masm.pextrd_irr(lane, src.code(), dest.code());
+ masm.vpextrd_irr(lane, src.code(), dest.code());
}
- void pextrd(unsigned lane, FloatRegister src, const Operand &dest) {
+ void vpextrd(unsigned lane, FloatRegister src, const Operand &dest) {
MOZ_ASSERT(HasSSE41());
switch (dest.kind()) {
case Operand::REG:
- masm.pextrd_irr(lane, src.code(), dest.reg());
+ masm.vpextrd_irr(lane, src.code(), dest.reg());
break;
case Operand::MEM_REG_DISP:
- masm.pextrd_imr(lane, src.code(), dest.disp(), dest.base());
+ masm.vpextrd_irm(lane, src.code(), dest.disp(), dest.base());
break;
default:
MOZ_CRASH("unexpected operand kind");
}
}
- void psrldq(Imm32 shift, FloatRegister dest) {
+ void vpsrldq(Imm32 shift, FloatRegister src0, FloatRegister dest) {
MOZ_ASSERT(HasSSE2());
- masm.psrldq_ir(shift.value, dest.code());
+ masm.vpsrldq_ir(shift.value, src0.code(), dest.code());
}
- void psllq(Imm32 shift, FloatRegister dest) {
+ void vpsllq(Imm32 shift, FloatRegister src0, FloatRegister dest) {
MOZ_ASSERT(HasSSE2());
- masm.psllq_ir(shift.value, dest.code());
+ masm.vpsllq_ir(shift.value, src0.code(), dest.code());
}
- void psrlq(Imm32 shift, FloatRegister dest) {
+ void vpsrlq(Imm32 shift, FloatRegister src0, FloatRegister dest) {
MOZ_ASSERT(HasSSE2());
- masm.psrlq_ir(shift.value, dest.code());
+ masm.vpsrlq_ir(shift.value, src0.code(), dest.code());
}
void vpslld(FloatRegister src1, FloatRegister src0, FloatRegister dest) {
MOZ_ASSERT(HasSSE2());
masm.vpslld_rr(src1.code(), src0.code(), dest.code());
}
- void pslld(Imm32 count, FloatRegister dest) {
+ void vpslld(Imm32 count, FloatRegister src0, FloatRegister dest) {
MOZ_ASSERT(HasSSE2());
- masm.pslld_ir(count.value, dest.code());
+ masm.vpslld_ir(count.value, src0.code(), dest.code());
}
void vpsrad(FloatRegister src1, FloatRegister src0, FloatRegister dest) {
MOZ_ASSERT(HasSSE2());
masm.vpsrad_rr(src1.code(), src0.code(), dest.code());
}
- void psrad(Imm32 count, FloatRegister dest) {
+ void vpsrad(Imm32 count, FloatRegister src0, FloatRegister dest) {
MOZ_ASSERT(HasSSE2());
- masm.psrad_ir(count.value, dest.code());
+ masm.vpsrad_ir(count.value, src0.code(), dest.code());
}
void vpsrld(FloatRegister src1, FloatRegister src0, FloatRegister dest) {
MOZ_ASSERT(HasSSE2());
masm.vpsrld_rr(src1.code(), src0.code(), dest.code());
}
- void psrld(Imm32 count, FloatRegister dest) {
+ void vpsrld(Imm32 count, FloatRegister src0, FloatRegister dest) {
MOZ_ASSERT(HasSSE2());
- masm.psrld_ir(count.value, dest.code());
+ masm.vpsrld_ir(count.value, src0.code(), dest.code());
}
void vcvtsi2sd(const Operand &src1, FloatRegister src0, FloatRegister dest) {
MOZ_ASSERT(HasSSE2());
switch (src1.kind()) {
case Operand::REG:
masm.vcvtsi2sd_rr(src1.reg(), src0.code(), dest.code());
break;
@@ -2068,31 +2068,31 @@ class AssemblerX86Shared : public Assemb
case Operand::MEM_ADDRESS32:
masm.vpandn_mr(src1.address(), src0.code(), dest.code());
break;
default:
MOZ_CRASH("unexpected operand kind");
}
}
- void pshufd(uint32_t mask, FloatRegister src, FloatRegister dest) {
+ void vpshufd(uint32_t mask, FloatRegister src, FloatRegister dest) {
MOZ_ASSERT(HasSSE2());
- masm.pshufd_irr(mask, src.code(), dest.code());
+ masm.vpshufd_irr(mask, src.code(), dest.code());
}
- void pshufd(uint32_t mask, const Operand &src, FloatRegister dest) {
+ void vpshufd(uint32_t mask, const Operand &src1, FloatRegister dest) {
MOZ_ASSERT(HasSSE2());
- switch (src.kind()) {
+ switch (src1.kind()) {
case Operand::FPREG:
- masm.pshufd_irr(mask, src.fpu(), dest.code());
+ masm.vpshufd_irr(mask, src1.fpu(), dest.code());
break;
case Operand::MEM_REG_DISP:
- masm.pshufd_imr(mask, src.disp(), src.base(), dest.code());
+ masm.vpshufd_imr(mask, src1.disp(), src1.base(), dest.code());
break;
case Operand::MEM_ADDRESS32:
- masm.pshufd_imr(mask, src.address(), dest.code());
+ masm.vpshufd_imr(mask, src1.address(), dest.code());
break;
default:
MOZ_CRASH("unexpected operand kind");
}
}
void vmovhlps(FloatRegister src1, FloatRegister src0, FloatRegister dest) {
MOZ_ASSERT(HasSSE2());
masm.vmovhlps_rr(src1.code(), src0.code(), dest.code());
@@ -2104,31 +2104,31 @@ class AssemblerX86Shared : public Assemb
void vunpcklps(FloatRegister src1, FloatRegister src0, FloatRegister dest) {
MOZ_ASSERT(HasSSE2());
masm.vunpcklps_rr(src1.code(), src0.code(), dest.code());
}
void vunpckhps(FloatRegister src1, FloatRegister src0, FloatRegister dest) {
MOZ_ASSERT(HasSSE2());
masm.vunpckhps_rr(src1.code(), src0.code(), dest.code());
}
- void shufps(uint32_t mask, FloatRegister src, FloatRegister dest) {
+ void vshufps(uint32_t mask, FloatRegister src1, FloatRegister src0, FloatRegister dest) {
MOZ_ASSERT(HasSSE2());
- masm.shufps_irr(mask, src.code(), dest.code());
+ masm.vshufps_irr(mask, src1.code(), src0.code(), dest.code());
}
- void shufps(uint32_t mask, const Operand &src, FloatRegister dest) {
+ void vshufps(uint32_t mask, const Operand &src1, FloatRegister src0, FloatRegister dest) {
MOZ_ASSERT(HasSSE2());
- switch (src.kind()) {
+ switch (src1.kind()) {
case Operand::FPREG:
- masm.shufps_irr(mask, src.fpu(), dest.code());
+ masm.vshufps_irr(mask, src1.fpu(), src0.code(), dest.code());
break;
case Operand::MEM_REG_DISP:
- masm.shufps_imr(mask, src.disp(), src.base(), dest.code());
+ masm.vshufps_imr(mask, src1.disp(), src1.base(), src0.code(), dest.code());
break;
case Operand::MEM_ADDRESS32:
- masm.shufps_imr(mask, src.address(), dest.code());
+ masm.vshufps_imr(mask, src1.address(), src0.code(), dest.code());
break;
default:
MOZ_CRASH("unexpected operand kind");
}
}
void vaddsd(FloatRegister src1, FloatRegister src0, FloatRegister dest) {
MOZ_ASSERT(HasSSE2());
masm.vaddsd_rr(src1.code(), src0.code(), dest.code());
@@ -2298,38 +2298,38 @@ class AssemblerX86Shared : public Assemb
void vsqrtsd(FloatRegister src1, FloatRegister src0, FloatRegister dest) {
MOZ_ASSERT(HasSSE2());
masm.vsqrtsd_rr(src1.code(), src0.code(), dest.code());
}
void vsqrtss(FloatRegister src1, FloatRegister src0, FloatRegister dest) {
MOZ_ASSERT(HasSSE2());
masm.vsqrtss_rr(src1.code(), src0.code(), dest.code());
}
- void roundsd(X86Assembler::RoundingMode mode, FloatRegister src, FloatRegister dest) {
+ void vroundsd(X86Assembler::RoundingMode mode, FloatRegister src1, FloatRegister src0, FloatRegister dest) {
MOZ_ASSERT(HasSSE41());
- masm.roundsd_rr(mode, src.code(), dest.code());
+ masm.vroundsd_irr(mode, src1.code(), src0.code(), dest.code());
}
- void roundss(X86Assembler::RoundingMode mode, FloatRegister src, FloatRegister dest) {
+ void vroundss(X86Assembler::RoundingMode mode, FloatRegister src1, FloatRegister src0, FloatRegister dest) {
MOZ_ASSERT(HasSSE41());
- masm.roundss_rr(mode, src.code(), dest.code());
+ masm.vroundss_irr(mode, src1.code(), src0.code(), dest.code());
}
- unsigned insertpsMask(SimdLane sourceLane, SimdLane destLane, unsigned zeroMask = 0)
+ unsigned vinsertpsMask(SimdLane sourceLane, SimdLane destLane, unsigned zeroMask = 0)
{
// Note that the sourceLane bits are ignored in the case of a source
// memory operand, and the source is the given 32-bits memory location.
MOZ_ASSERT(zeroMask < 16);
unsigned ret = zeroMask ;
ret |= unsigned(destLane) << 4;
ret |= unsigned(sourceLane) << 6;
MOZ_ASSERT(ret < 256);
return ret;
}
- void insertps(FloatRegister src, FloatRegister dest, unsigned mask) {
+ void vinsertps(uint32_t mask, FloatRegister src1, FloatRegister src0, FloatRegister dest) {
MOZ_ASSERT(HasSSE41());
- masm.insertps_irr(mask, src.code(), dest.code());
+ masm.vinsertps_irr(mask, src1.code(), src0.code(), dest.code());
}
unsigned blendpsMask(bool x, bool y, bool z, bool w) {
return x | (y << 1) | (z << 2) | (w << 3);
}
void vblendps(unsigned mask, FloatRegister src1, FloatRegister src0, FloatRegister dest) {
MOZ_ASSERT(HasSSE41());
masm.vblendps_irr(mask, src1.code(), src0.code(), dest.code());
}
--- a/js/src/jit/shared/BaseAssembler-x86-shared.h
+++ b/js/src/jit/shared/BaseAssembler-x86-shared.h
@@ -327,16 +327,25 @@ private:
PRE_SSE_F3 = 0xF3,
OP_HLT = 0xF4,
OP_GROUP3_EbIb = 0xF6,
OP_GROUP3_Ev = 0xF7,
OP_GROUP3_EvIz = 0xF7, // OP_GROUP3_Ev has an immediate, when instruction is a test.
OP_GROUP5_Ev = 0xFF
};
+ enum ShiftID {
+ Shift_vpsrld = 2,
+ Shift_vpsrlq = 2,
+ Shift_vpsrldq = 3,
+ Shift_vpsrad = 4,
+ Shift_vpslld = 6,
+ Shift_vpsllq = 6
+ };
+
enum TwoByteOpcodeID {
OP2_UD2 = 0x0B,
OP2_MOVSD_VsdWsd = 0x10,
OP2_MOVPS_VpsWps = 0x10,
OP2_MOVSD_WsdVsd = 0x11,
OP2_MOVPS_WpsVps = 0x11,
OP2_MOVHLPS_VqUq = 0x12,
OP2_MOVSLDUP_VpsWps = 0x12,
@@ -423,16 +432,17 @@ private:
};
// Test whether the given opcode should be printed with its operands reversed.
static inline bool IsXMMReversedOperands(TwoByteOpcodeID opcode) {
switch (opcode) {
case OP2_MOVSD_WsdVsd: // also OP2_MOVPS_WpsVps
case OP2_MOVAPS_WsdVsd:
case OP2_MOVDQ_WdqVdq:
+ case OP3_PEXTRD_EdVdqIb:
return true;
default:
break;
}
return false;
}
enum ThreeByteOpcodeID {
@@ -2750,28 +2760,25 @@ public:
}
void vpcmpgtd_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst)
{
twoByteOpSimd("vpcmpgtd", VEX_PD, OP2_PCMPGTD_VdqWdq, address, src0, dst);
}
void vcmpps_rr(uint8_t order, XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst)
{
- twoByteOpSimd("vcmpps", VEX_PS, OP2_CMPPS_VpsWps, src1, src0, dst);
- m_formatter.immediate8s(order);
+ twoByteOpImmSimd("vcmpps", VEX_PS, OP2_CMPPS_VpsWps, order, src1, src0, dst);
}
void vcmpps_mr(uint8_t order, int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst)
{
- twoByteOpSimd("vcmpps", VEX_PS, OP2_CMPPS_VpsWps, offset, base, src0, dst);
- m_formatter.immediate8s(order);
+ twoByteOpImmSimd("vcmpps", VEX_PS, OP2_CMPPS_VpsWps, order, offset, base, src0, dst);
}
void vcmpps_mr(uint8_t order, const void* address, XMMRegisterID src0, XMMRegisterID dst)
{
- twoByteOpSimd("vcmpps", VEX_PS, OP2_CMPPS_VpsWps, address, src0, dst);
- m_formatter.immediate8s(order);
+ twoByteOpImmSimd("vcmpps", VEX_PS, OP2_CMPPS_VpsWps, order, address, src0, dst);
}
void vrcpps_rr(XMMRegisterID src, XMMRegisterID dst) {
twoByteOpSimd("vrcpps", VEX_PS, OP2_RCPPS_VpsWps, src, X86Registers::invalid_xmm, dst);
}
void vrcpps_mr(int32_t offset, RegisterID base, XMMRegisterID dst) {
twoByteOpSimd("vrcpps", VEX_PS, OP2_RCPPS_VpsWps, offset, base, X86Registers::invalid_xmm, dst);
}
@@ -2972,133 +2979,101 @@ public:
{
twoByteOpSimd("vpandn", VEX_PD, OP2_PANDNDQ_VdqWdq, offset, base, src0, dst);
}
void vpandn_mr(const void *address, XMMRegisterID src0, XMMRegisterID dst)
{
twoByteOpSimd("vpandn", VEX_PD, OP2_PANDNDQ_VdqWdq, address, src0, dst);
}
- void pshufd_irr(uint32_t mask, XMMRegisterID src, XMMRegisterID dst)
- {
- spew("pshufd $0x%x, %s, %s", mask, nameFPReg(src), nameFPReg(dst));
- m_formatter.prefix(PRE_SSE_66);
- m_formatter.twoByteOp(OP2_PSHUFD_VdqWdqIb, (RegisterID)src, (RegisterID)dst);
- m_formatter.immediate8u(mask);
- }
-
- void pshufd_imr(uint32_t mask, int32_t offset, RegisterID base, XMMRegisterID dst)
- {
- MOZ_ASSERT(mask < 256);
- spew("pshufd $0x%x, " MEM_ob ", %s", mask, ADDR_ob(offset, base), nameFPReg(dst));
- m_formatter.prefix(PRE_SSE_66);
- m_formatter.twoByteOp(OP2_PSHUFD_VdqWdqIb, offset, base, (RegisterID)dst);
- m_formatter.immediate8u(mask);
- }
-
- void pshufd_imr(uint32_t mask, const void* address, XMMRegisterID dst)
- {
- spew("pshufd $0x%x, %p, %s", mask, address, nameFPReg(dst));
- m_formatter.prefix(PRE_SSE_66);
- m_formatter.twoByteOp(OP2_PSHUFD_VdqWdqIb, address, (RegisterID)dst);
- m_formatter.immediate8u(mask);
- }
-
- void shufps_irr(uint32_t mask, XMMRegisterID src, XMMRegisterID dst)
- {
- spew("shufps $0x%x, %s, %s", mask, nameFPReg(src), nameFPReg(dst));
- m_formatter.twoByteOp(OP2_SHUFPS_VpsWpsIb, (RegisterID)src, (RegisterID)dst);
- m_formatter.immediate8u(mask);
- }
-
- void shufps_imr(uint32_t mask, int32_t offset, RegisterID base, XMMRegisterID dst)
- {
- spew("shufps $0x%x, " MEM_ob ", %s", mask, ADDR_ob(offset, base), nameFPReg(dst));
- m_formatter.twoByteOp(OP2_SHUFPS_VpsWpsIb, offset, base, (RegisterID)dst);
- m_formatter.immediate8u(mask);
- }
-
- void shufps_imr(uint32_t mask, const void* address, XMMRegisterID dst)
- {
- spew("shufps $0x%x, %p, %s", mask, address, nameFPReg(dst));
- m_formatter.twoByteOp(OP2_SHUFPS_VpsWpsIb, address, (RegisterID)dst);
- m_formatter.immediate8u(mask);
+ void vpshufd_irr(uint32_t mask, XMMRegisterID src, XMMRegisterID dst)
+ {
+ twoByteOpImmSimd("vpshufd", VEX_PD, OP2_PSHUFD_VdqWdqIb, mask, src, X86Registers::invalid_xmm, dst);
+ }
+ void vpshufd_imr(uint32_t mask, int32_t offset, RegisterID base, XMMRegisterID dst)
+ {
+ twoByteOpImmSimd("vpshufd", VEX_PD, OP2_PSHUFD_VdqWdqIb, mask, offset, base, X86Registers::invalid_xmm, dst);
+ }
+ void vpshufd_imr(uint32_t mask, const void* address, XMMRegisterID dst)
+ {
+ twoByteOpImmSimd("vpshufd", VEX_PD, OP2_PSHUFD_VdqWdqIb, mask, address, X86Registers::invalid_xmm, dst);
+ }
+
+ void vshufps_irr(uint32_t mask, XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst)
+ {
+ twoByteOpImmSimd("vshufps", VEX_PS, OP2_SHUFPS_VpsWpsIb, mask, src1, src0, dst);
+ }
+ void vshufps_imr(uint32_t mask, int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst)
+ {
+ twoByteOpImmSimd("vshufps", VEX_PS, OP2_SHUFPS_VpsWpsIb, mask, offset, base, src0, dst);
+ }
+ void vshufps_imr(uint32_t mask, const void* address, XMMRegisterID src0, XMMRegisterID dst)
+ {
+ twoByteOpImmSimd("vshufps", VEX_PS, OP2_SHUFPS_VpsWpsIb, mask, address, src0, dst);
}
void vmovhlps_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst)
{
twoByteOpSimd("vmovhlps", VEX_PS, OP2_MOVHLPS_VqUq, src1, src0, dst);
}
void vmovlhps_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst)
{
twoByteOpSimd("vmovlhps", VEX_PS, OP2_MOVLHPS_VqUq, src1, src0, dst);
}
- void psrldq_ir(int shift, XMMRegisterID dest)
- {
- spew("psrldq $%d, %s", shift, nameFPReg(dest));
- m_formatter.prefix(PRE_SSE_66);
- m_formatter.twoByteOp(OP2_PSRLDQ_Vd, (RegisterID)dest, (RegisterID)3);
- m_formatter.immediate8s(shift);
- }
-
- void psllq_ir(int shift, XMMRegisterID dest)
- {
- spew("psllq $%d, %s", shift, nameFPReg(dest));
- m_formatter.prefix(PRE_SSE_66);
- m_formatter.twoByteOp(OP2_PSRLDQ_Vd, (RegisterID)dest, (RegisterID)6);
- m_formatter.immediate8s(shift);
- }
-
- void psrlq_ir(int shift, XMMRegisterID dest)
- {
- spew("psrlq $%d, %s", shift, nameFPReg(dest));
- m_formatter.prefix(PRE_SSE_66);
- m_formatter.twoByteOp(OP2_PSRLDQ_Vd, (RegisterID)dest, (RegisterID)2);
- m_formatter.immediate8s(shift);
+ void vpsrldq_ir(uint32_t count, XMMRegisterID src, XMMRegisterID dst)
+ {
+ MOZ_ASSERT(count < 16);
+ shiftOpImmSimd("vpsrldq", OP2_PSRLDQ_Vd, Shift_vpsrldq, count, src, dst);
+ }
+
+ void vpsllq_ir(uint32_t count, XMMRegisterID src, XMMRegisterID dst)
+ {
+ MOZ_ASSERT(count < 64);
+ shiftOpImmSimd("vpsllq", OP2_PSRLDQ_Vd, Shift_vpsllq, count, src, dst);
+ }
+
+ void vpsrlq_ir(uint32_t count, XMMRegisterID src, XMMRegisterID dst)
+ {
+ MOZ_ASSERT(count < 64);
+ shiftOpImmSimd("vpsrlq", OP2_PSRLDQ_Vd, Shift_vpsrlq, count, src, dst);
}
void vpslld_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst)
{
twoByteOpSimd("vpslld", VEX_PD, OP2_PSLLD_VdqWdq, src1, src0, dst);
}
- void pslld_ir(int32_t count, XMMRegisterID dst)
- {
- spew("pslld $%d, %s", count, nameFPReg(dst));
- m_formatter.prefix(PRE_SSE_66);
- m_formatter.twoByteOp(OP2_PSLLD_UdqIb, (RegisterID)dst, (RegisterID)6);
- m_formatter.immediate8s(int8_t(count));
+ void vpslld_ir(uint32_t count, XMMRegisterID src, XMMRegisterID dst)
+ {
+ MOZ_ASSERT(count < 32);
+ shiftOpImmSimd("vpslld", OP2_PSLLD_UdqIb, Shift_vpslld, count, src, dst);
}
void vpsrad_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst)
{
twoByteOpSimd("vpsrad", VEX_PD, OP2_PSRAD_VdqWdq, src1, src0, dst);
}
- void psrad_ir(int32_t count, XMMRegisterID dst)
- {
- spew("psrad $%d, %s", count, nameFPReg(dst));
- m_formatter.prefix(PRE_SSE_66);
- m_formatter.twoByteOp(OP2_PSRAD_UdqIb, (RegisterID)dst, (RegisterID)4);
- m_formatter.immediate8s(int8_t(count));
+ void vpsrad_ir(int32_t count, XMMRegisterID src, XMMRegisterID dst)
+ {
+ MOZ_ASSERT(count < 32);
+ shiftOpImmSimd("vpsrad", OP2_PSRAD_UdqIb, Shift_vpsrad, count, src, dst);
}
void vpsrld_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst)
{
twoByteOpSimd("vpsrld", VEX_PD, OP2_PSRLD_VdqWdq, src1, src0, dst);
}
- void psrld_ir(int32_t count, XMMRegisterID dst)
- {
- spew("psrld $%d, %s", count, nameFPReg(dst));
- m_formatter.prefix(PRE_SSE_66);
- m_formatter.twoByteOp(OP2_PSRLD_UdqIb, (RegisterID)dst, (RegisterID)2);
- m_formatter.immediate8s(int8_t(count));
+ void vpsrld_ir(uint32_t count, XMMRegisterID src, XMMRegisterID dst)
+ {
+ MOZ_ASSERT(count < 32);
+ shiftOpImmSimd("vpsrld", OP2_PSRLD_UdqIb, Shift_vpsrld, count, src, dst);
}
void vmovmskpd_rr(XMMRegisterID src, RegisterID dst)
{
twoByteOpSimdInt32("vmovmskpd", VEX_PD, OP2_MOVMSKPD_EdVd, src, dst);
}
void vmovmskps_rr(XMMRegisterID src, RegisterID dst)
@@ -3427,22 +3402,20 @@ public:
twoByteOpSimd("vmulsd", VEX_SD, OP2_MULSD_VsdWsd, offset, base, src0, dst);
}
void vmulss_mr(int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst)
{
twoByteOpSimd("vmulss", VEX_SS, OP2_MULSD_VsdWsd, offset, base, src0, dst);
}
- void pextrw_irr(int whichWord, XMMRegisterID src, RegisterID dst)
- {
- FIXME_INSN_PRINTING;
- m_formatter.prefix(PRE_SSE_66);
- m_formatter.twoByteOp(OP2_PEXTRW_GdUdIb, (RegisterID)src, (RegisterID)dst);
- m_formatter.immediate8(whichWord);
+ void vpextrw_irr(uint32_t whichWord, XMMRegisterID src, RegisterID dst)
+ {
+ MOZ_ASSERT(whichWord < 8);
+ twoByteOpImmSimdInt32("vpextrw", VEX_PD, OP2_PEXTRW_GdUdIb, whichWord, src, dst);
}
void vsubsd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst)
{
twoByteOpSimd("vsubsd", VEX_SD, OP2_SUBSD_VsdWsd, src1, src0, dst);
}
void vsubss_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst)
@@ -3575,90 +3548,70 @@ public:
twoByteOpSimd("vsqrtsd", VEX_SD, OP2_SQRTSD_VsdWsd, src1, src0, dst);
}
void vsqrtss_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst)
{
twoByteOpSimd("vsqrtss", VEX_SS, OP2_SQRTSS_VssWss, src1, src0, dst);
}
- void roundsd_rr(RoundingMode mode, XMMRegisterID src, XMMRegisterID dst)
- {
- spew("roundsd $%d, %s, %s", (int)mode, nameFPReg(src), nameFPReg(dst));
- m_formatter.prefix(PRE_SSE_66);
- m_formatter.threeByteOp(OP3_ROUNDSD_VsdWsd, ESCAPE_ROUNDSD, (RegisterID)src, (RegisterID)dst);
- m_formatter.immediate8u(mode);
- }
-
- void roundss_rr(RoundingMode mode, XMMRegisterID src, XMMRegisterID dst)
- {
- spew("roundss $%d, %s, %s", (int)mode, nameFPReg(src), nameFPReg(dst));
- m_formatter.prefix(PRE_SSE_66);
- m_formatter.threeByteOp(OP3_ROUNDSS_VsdWsd, ESCAPE_ROUNDSD, (RegisterID)src, (RegisterID)dst);
- m_formatter.immediate8(mode); // modes are the same for roundsd and roundss
- }
-
- void insertps_irr(unsigned mask, XMMRegisterID src, XMMRegisterID dst)
- {
- spew("insertps $0x%x, %s, %s", mask, nameFPReg(src), nameFPReg(dst));
- m_formatter.prefix(PRE_SSE_66);
- m_formatter.threeByteOp(OP3_INSERTPS_VpsUps, ESCAPE_INSERTPS, (RegisterID)src, (RegisterID)dst);
- m_formatter.immediate8u(mask);
- }
-
- void pinsrd_irr(unsigned lane, RegisterID src, XMMRegisterID dst)
+ void vroundsd_irr(RoundingMode mode, XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst)
+ {
+ threeByteOpImmSimd("vroundsd", VEX_PD, OP3_ROUNDSD_VsdWsd, ESCAPE_ROUNDSD, mode, src1, src0, dst);
+ }
+
+ void vroundss_irr(RoundingMode mode, XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst)
+ {
+ threeByteOpImmSimd("vroundss", VEX_PD, OP3_ROUNDSS_VsdWsd, ESCAPE_ROUNDSD, mode, src1, src0, dst);
+ }
+
+ void vinsertps_irr(uint32_t mask, XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst)
+ {
+ threeByteOpImmSimd("vinsertps", VEX_PD, OP3_INSERTPS_VpsUps, ESCAPE_INSERTPS, mask, src1, src0, dst);
+ }
+
+ void vpinsrd_irr(unsigned lane, RegisterID src1, XMMRegisterID src0, XMMRegisterID dst)
{
MOZ_ASSERT(lane < 4);
- spew("pinsrd $0x%x, %s, %s", lane, nameIReg(4, src), nameFPReg(dst));
- m_formatter.prefix(PRE_SSE_66);
- m_formatter.threeByteOp(OP3_PINSRD_VdqEdIb, ESCAPE_PINSRD, (RegisterID)src, (RegisterID)dst);
- m_formatter.immediate8u(lane);
- }
-
- void pinsrd_imr(unsigned lane, int32_t offset, RegisterID base, XMMRegisterID dst)
+ threeByteOpImmInt32Simd("vpinsrd", VEX_PD, OP3_PINSRD_VdqEdIb, ESCAPE_PINSRD, lane, src1, src0, dst);
+ }
+
+ void vpinsrd_imr(unsigned lane, int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst)
{
MOZ_ASSERT(lane < 4);
- spew("pinsrd $0x%x, " MEM_ob ", %s", lane, ADDR_ob(offset, base), nameFPReg(dst));
- m_formatter.prefix(PRE_SSE_66);
- m_formatter.threeByteOp(OP3_PINSRD_VdqEdIb, ESCAPE_PINSRD, offset, base, (RegisterID)dst);
- m_formatter.immediate8u(lane);
- }
-
- void pextrd_irr(unsigned lane, XMMRegisterID src, RegisterID dst)
+ threeByteOpImmInt32Simd("vpinsrd", VEX_PD, OP3_PINSRD_VdqEdIb, ESCAPE_PINSRD, lane, offset, base, src0, dst);
+ }
+
+ void vpextrd_irr(unsigned lane, XMMRegisterID src, RegisterID dst)
{
MOZ_ASSERT(lane < 4);
- spew("pextrd $0x%x, %s, %s", lane, nameFPReg(src), nameIReg(4, dst));
- m_formatter.prefix(PRE_SSE_66);
- m_formatter.threeByteOp(OP3_PEXTRD_EdVdqIb, ESCAPE_PEXTRD, (RegisterID)dst, (RegisterID)src);
- m_formatter.immediate8u(lane);
- }
-
- void pextrd_imr(unsigned lane, XMMRegisterID src, int32_t offset, RegisterID base)
+ threeByteOpImmSimdInt32("vpextrd", VEX_PD, OP3_PEXTRD_EdVdqIb, ESCAPE_PEXTRD, lane, (XMMRegisterID)dst, (RegisterID)src);
+ }
+
+ void vpextrd_irm(unsigned lane, XMMRegisterID src, int32_t offset, RegisterID base)
{
MOZ_ASSERT(lane < 4);
spew("pextrd $0x%x, %s, " MEM_ob, lane, nameFPReg(src), ADDR_ob(offset, base));
m_formatter.prefix(PRE_SSE_66);
m_formatter.threeByteOp(OP3_PEXTRD_EdVdqIb, ESCAPE_PEXTRD, offset, base, (RegisterID)src);
m_formatter.immediate8u(lane);
}
void vblendps_irr(unsigned imm, XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst)
{
MOZ_ASSERT(imm < 16);
// Despite being a "ps" instruction, vblendps is encoded with the "pd" prefix.
- threeByteOpSimd("vblendps", VEX_PD, OP3_BLENDPS_VpsWpsIb, ESCAPE_BLENDPS, src1, src0, dst);
- m_formatter.immediate8u(imm);
+ threeByteOpImmSimd("vblendps", VEX_PD, OP3_BLENDPS_VpsWpsIb, ESCAPE_BLENDPS, imm, src1, src0, dst);
}
void vblendps_imr(unsigned imm, int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst)
{
MOZ_ASSERT(imm < 16);
// Despite being a "ps" instruction, vblendps is encoded with the "pd" prefix.
- threeByteOpSimd("vblendps", VEX_PD, OP3_BLENDPS_VpsWpsIb, ESCAPE_BLENDPS, offset, base, src0, dst);
- m_formatter.immediate8u(imm);
+threeByteOpImmSimd("vblendps", VEX_PD, OP3_BLENDPS_VpsWpsIb, ESCAPE_BLENDPS, imm, offset, base, src0, dst);
}
void vblendvps_rr(XMMRegisterID mask, XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
vblendvOpSimd(mask, src1, src0, dst);
}
void vblendvps_mr(XMMRegisterID mask, int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst) {
vblendvOpSimd(mask, offset, base, src0, dst);
}
@@ -4155,16 +4108,35 @@ private:
else
spew("%-11s%s, %s", name, nameFPReg(rm), nameFPReg(dst));
} else {
spew("%-11s%s, %s, %s", name, nameFPReg(rm), nameFPReg(src0), nameFPReg(dst));
}
m_formatter.twoByteOpVex(ty, opcode, (RegisterID)rm, src0, dst);
}
+ void twoByteOpImmSimd(const char *name, VexOperandType ty, TwoByteOpcodeID opcode,
+ uint32_t imm, XMMRegisterID rm, XMMRegisterID src0, XMMRegisterID dst)
+ {
+ if (useLegacySSEEncoding(src0, dst)) {
+ spew("%-11s$0x%x, %s, %s", legacySSEOpName(name), imm, nameFPReg(rm), nameFPReg(dst));
+ m_formatter.legacySSEPrefix(ty);
+ m_formatter.twoByteOp(opcode, (RegisterID)rm, dst);
+ m_formatter.immediate8u(imm);
+ return;
+ }
+
+ if (src0 == X86Registers::invalid_xmm)
+ spew("%-11s$0x%x, %s, %s", name, imm, nameFPReg(rm), nameFPReg(dst));
+ else
+ spew("%-11s$0x%x, %s, %s, %s", name, imm, nameFPReg(rm), nameFPReg(src0), nameFPReg(dst));
+ m_formatter.twoByteOpVex(ty, opcode, (RegisterID)rm, src0, dst);
+ m_formatter.immediate8u(imm);
+ }
+
void twoByteOpSimd(const char *name, VexOperandType ty, TwoByteOpcodeID opcode,
int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst)
{
if (useLegacySSEEncoding(src0, dst)) {
if (IsXMMReversedOperands(opcode)) {
spew("%-11s%s, " MEM_ob, legacySSEOpName(name),
nameFPReg(dst), ADDR_ob(offset, base));
} else {
@@ -4210,16 +4182,34 @@ private:
}
} else {
spew("%-11s" MEM_o32b ", %s, %s", name,
ADDR_o32b(offset, base), nameFPReg(src0), nameFPReg(dst));
}
m_formatter.twoByteOpVex_disp32(ty, opcode, offset, base, src0, dst);
}
+ void twoByteOpImmSimd(const char *name, VexOperandType ty, TwoByteOpcodeID opcode,
+ uint32_t imm, int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst)
+ {
+ if (useLegacySSEEncoding(src0, dst)) {
+ spew("%-11s$0x%x, " MEM_ob ", %s", legacySSEOpName(name), imm,
+ ADDR_ob(offset, base), nameFPReg(dst));
+ m_formatter.legacySSEPrefix(ty);
+ m_formatter.twoByteOp(opcode, offset, base, dst);
+ m_formatter.immediate8u(imm);
+ return;
+ }
+
+ spew("%-11s$0x%x, " MEM_ob ", %s, %s", name, imm, ADDR_ob(offset, base),
+ nameFPReg(src0), nameFPReg(dst));
+ m_formatter.twoByteOpVex(ty, opcode, offset, base, src0, dst);
+ m_formatter.immediate8u(imm);
+ }
+
void twoByteOpSimd(const char *name, VexOperandType ty, TwoByteOpcodeID opcode,
int32_t offset, RegisterID base, RegisterID index, int scale,
XMMRegisterID src0, XMMRegisterID dst)
{
if (useLegacySSEEncoding(src0, dst)) {
if (IsXMMReversedOperands(opcode)) {
spew("%-11s%s, " MEM_obs, legacySSEOpName(name),
nameFPReg(dst), ADDR_obs(offset, base, index, scale));
@@ -4266,16 +4256,32 @@ private:
else
spew("%-11s%p, %s", name, address, nameFPReg(dst));
} else {
spew("%-11s%p, %s, %s", name, address, nameFPReg(src0), nameFPReg(dst));
}
m_formatter.twoByteOpVex(ty, opcode, address, src0, dst);
}
+ void twoByteOpImmSimd(const char *name, VexOperandType ty, TwoByteOpcodeID opcode,
+ uint32_t imm, const void *address, XMMRegisterID src0, XMMRegisterID dst)
+ {
+ if (useLegacySSEEncoding(src0, dst)) {
+ spew("%-11s$0x%x, %p, %s", legacySSEOpName(name), imm, address, nameFPReg(dst));
+ m_formatter.legacySSEPrefix(ty);
+ m_formatter.twoByteOp(opcode, address, dst);
+ m_formatter.immediate8u(imm);
+ return;
+ }
+
+ spew("%-11s$0x%x, %p, %s, %s", name, imm, address, nameFPReg(src0), nameFPReg(dst));
+ m_formatter.twoByteOpVex(ty, opcode, address, src0, dst);
+ m_formatter.immediate8u(imm);
+ }
+
void twoByteOpInt32Simd(const char *name, VexOperandType ty, TwoByteOpcodeID opcode,
RegisterID rm, XMMRegisterID src0, XMMRegisterID dst)
{
if (useLegacySSEEncoding(src0, dst)) {
if (IsXMMReversedOperands(opcode))
spew("%-11s%s, %s", legacySSEOpName(name), nameFPReg(dst), nameIReg(4, rm));
else
spew("%-11s%s, %s", legacySSEOpName(name), nameIReg(4, rm), nameFPReg(dst));
@@ -4340,16 +4346,32 @@ private:
spew("%-11s%s, %s", name, nameIReg(4, dst), nameFPReg(rm));
else if (opcode == OP2_MOVD_EdVd)
spew("%-11s%s, %s", name, nameFPReg((XMMRegisterID)dst), nameIReg(4, (RegisterID)rm));
else
spew("%-11s%s, %s", name, nameFPReg(rm), nameIReg(4, dst));
m_formatter.twoByteOpVex(ty, opcode, (RegisterID)rm, X86Registers::invalid_xmm, dst);
}
+ void twoByteOpImmSimdInt32(const char *name, VexOperandType ty, TwoByteOpcodeID opcode,
+ uint32_t imm, XMMRegisterID rm, RegisterID dst)
+ {
+ if (useLegacySSEEncodingForOtherOutput()) {
+ spew("%-11s$0x%x, %s, %s", legacySSEOpName(name), imm, nameFPReg(rm), nameIReg(4, dst));
+ m_formatter.legacySSEPrefix(ty);
+ m_formatter.twoByteOp(opcode, (RegisterID)rm, dst);
+ m_formatter.immediate8u(imm);
+ return;
+ }
+
+ spew("%-11s$0x%x, %s, %s", name, imm, nameFPReg(rm), nameIReg(4, dst));
+ m_formatter.twoByteOpVex(ty, opcode, (RegisterID)rm, X86Registers::invalid_xmm, dst);
+ m_formatter.immediate8u(imm);
+ }
+
#ifdef JS_CODEGEN_X64
void twoByteOpSimdInt64(const char *name, VexOperandType ty, TwoByteOpcodeID opcode,
XMMRegisterID rm, RegisterID dst)
{
if (useLegacySSEEncodingForOtherOutput()) {
if (IsXMMReversedOperands(opcode))
spew("%-11s%s, %s", legacySSEOpName(name), nameIReg(dst), nameFPReg(rm));
else if (opcode == OP2_MOVD_EdVd)
@@ -4411,16 +4433,33 @@ private:
m_formatter.threeByteOp(opcode, escape, (RegisterID)rm, dst);
return;
}
spew("%-11s%s, %s, %s", name, nameFPReg(rm), nameFPReg(src0), nameFPReg(dst));
m_formatter.threeByteOpVex(ty, opcode, escape, (RegisterID)rm, src0, dst);
}
+ void threeByteOpImmSimd(const char *name, VexOperandType ty, ThreeByteOpcodeID opcode,
+ ThreeByteEscape escape,
+ uint32_t imm, XMMRegisterID rm, XMMRegisterID src0, XMMRegisterID dst)
+ {
+ if (useLegacySSEEncoding(src0, dst)) {
+ spew("%-11s$0x%x, %s, %s", legacySSEOpName(name), imm, nameFPReg(rm), nameFPReg(dst));
+ m_formatter.legacySSEPrefix(ty);
+ m_formatter.threeByteOp(opcode, escape, (RegisterID)rm, dst);
+ m_formatter.immediate8u(imm);
+ return;
+ }
+
+ spew("%-11s$0x%x, %s, %s, %s", name, imm, nameFPReg(rm), nameFPReg(src0), nameFPReg(dst));
+ m_formatter.threeByteOpVex(ty, opcode, escape, (RegisterID)rm, src0, dst);
+ m_formatter.immediate8u(imm);
+ }
+
void threeByteOpSimd(const char *name, VexOperandType ty, ThreeByteOpcodeID opcode,
ThreeByteEscape escape,
int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst)
{
if (useLegacySSEEncoding(src0, dst)) {
spew("%-11s" MEM_ob ", %s", legacySSEOpName(name),
ADDR_ob(offset, base), nameFPReg(dst));
m_formatter.legacySSEPrefix(ty);
@@ -4428,31 +4467,121 @@ private:
return;
}
spew("%-11s" MEM_ob ", %s, %s", name,
ADDR_ob(offset, base), nameFPReg(src0), nameFPReg(dst));
m_formatter.threeByteOpVex(ty, opcode, escape, offset, base, src0, dst);
}
+ void threeByteOpImmSimd(const char *name, VexOperandType ty, ThreeByteOpcodeID opcode,
+ ThreeByteEscape escape,
+ uint32_t imm, int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst)
+ {
+ if (useLegacySSEEncoding(src0, dst)) {
+ spew("%-11s$0x%x, " MEM_ob ", %s", legacySSEOpName(name), imm,
+ ADDR_ob(offset, base), nameFPReg(dst));
+ m_formatter.legacySSEPrefix(ty);
+ m_formatter.threeByteOp(opcode, escape, offset, base, dst);
+ m_formatter.immediate8u(imm);
+ return;
+ }
+
+ spew("%-11s$0x%x, " MEM_ob ", %s, %s", name, imm, ADDR_ob(offset, base),
+ nameFPReg(src0), nameFPReg(dst));
+ m_formatter.threeByteOpVex(ty, opcode, escape, offset, base, src0, dst);
+ m_formatter.immediate8u(imm);
+ }
+
void threeByteOpSimd(const char *name, VexOperandType ty, ThreeByteOpcodeID opcode,
ThreeByteEscape escape,
const void *address, XMMRegisterID src0, XMMRegisterID dst)
{
if (useLegacySSEEncoding(src0, dst)) {
spew("%-11s%p, %s", legacySSEOpName(name), address, nameFPReg(dst));
m_formatter.legacySSEPrefix(ty);
m_formatter.threeByteOp(opcode, escape, address, dst);
return;
}
spew("%-11s%p, %s, %s", name, address, nameFPReg(src0), nameFPReg(dst));
m_formatter.threeByteOpVex(ty, opcode, escape, address, src0, dst);
}
+ void threeByteOpImmInt32Simd(const char *name, VexOperandType ty, ThreeByteOpcodeID opcode,
+ ThreeByteEscape escape, uint32_t imm,
+ RegisterID src1, XMMRegisterID src0, XMMRegisterID dst)
+ {
+ if (useLegacySSEEncoding(src0, dst)) {
+ spew("%-11s$0x%x, %s, %s", legacySSEOpName(name), imm, nameIReg(4, src1), nameFPReg(dst));
+ m_formatter.legacySSEPrefix(ty);
+ m_formatter.threeByteOp(opcode, escape, src1, dst);
+ m_formatter.immediate8u(imm);
+ return;
+ }
+
+ spew("%-11s$0x%x, %s, %s, %s", name, imm, nameIReg(4, src1), nameFPReg(src0), nameFPReg(dst));
+ m_formatter.threeByteOpVex(ty, opcode, escape, src1, src0, dst);
+ m_formatter.immediate8u(imm);
+ }
+
+ void threeByteOpImmInt32Simd(const char *name, VexOperandType ty, ThreeByteOpcodeID opcode,
+ ThreeByteEscape escape, uint32_t imm,
+ int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst)
+ {
+ if (useLegacySSEEncoding(src0, dst)) {
+ spew("%-11s$0x%x, " MEM_ob ", %s", legacySSEOpName(name), imm, ADDR_ob(offset, base), nameFPReg(dst));
+ m_formatter.legacySSEPrefix(ty);
+ m_formatter.threeByteOp(opcode, escape, offset, base, dst);
+ m_formatter.immediate8u(imm);
+ return;
+ }
+
+ spew("%-11s$0x%x, " MEM_ob ", %s, %s", name, imm, ADDR_ob(offset, base), nameFPReg(src0), nameFPReg(dst));
+ m_formatter.threeByteOpVex(ty, opcode, escape, offset, base, src0, dst);
+ m_formatter.immediate8u(imm);
+ }
+
+ void threeByteOpImmSimdInt32(const char *name, VexOperandType ty, ThreeByteOpcodeID opcode,
+ ThreeByteEscape escape, uint32_t imm,
+ XMMRegisterID src, RegisterID dst)
+ {
+ if (useLegacySSEEncodingForOtherOutput()) {
+ spew("%-11s$0x%x, %s, %s", legacySSEOpName(name), imm, nameFPReg(src), nameIReg(4, dst));
+ m_formatter.legacySSEPrefix(ty);
+ m_formatter.threeByteOp(opcode, escape, (RegisterID)src, dst);
+ m_formatter.immediate8u(imm);
+ return;
+ }
+
+ if (opcode == OP3_PEXTRD_EdVdqIb)
+ spew("%-11s$0x%x, %s, %s", name, imm, nameFPReg((XMMRegisterID)dst), nameIReg(4, (RegisterID)src));
+ else
+ spew("%-11s$0x%x, %s, %s", name, imm, nameFPReg(src), nameIReg(4, dst));
+ m_formatter.threeByteOpVex(ty, opcode, escape, (RegisterID)src, X86Registers::invalid_xmm, dst);
+ m_formatter.immediate8u(imm);
+ }
+
+ void threeByteOpImmSimdInt32(const char *name, VexOperandType ty, ThreeByteOpcodeID opcode,
+ ThreeByteEscape escape, uint32_t imm,
+ int32_t offset, RegisterID base, RegisterID dst)
+ {
+ if (useLegacySSEEncodingForOtherOutput()) {
+ spew("%-11s$0x%x, " MEM_ob ", %s", legacySSEOpName(name), imm, ADDR_ob(offset, base), nameIReg(4, dst));
+ m_formatter.legacySSEPrefix(ty);
+ m_formatter.threeByteOp(opcode, escape, offset, base, dst);
+ m_formatter.immediate8u(imm);
+ return;
+ }
+
+ spew("%-11s$0x%x, " MEM_ob ", %s", name, imm, ADDR_ob(offset, base), nameIReg(4, dst));
+ m_formatter.threeByteOpVex(ty, opcode, escape, offset, base, X86Registers::invalid_xmm, dst);
+ m_formatter.immediate8u(imm);
+ }
+
// Blendv is a three-byte op, but the VEX encoding has a different opcode
// than the SSE encoding, so we handle it specially.
void vblendvOpSimd(XMMRegisterID mask, XMMRegisterID rm, XMMRegisterID src0, XMMRegisterID dst)
{
if (useLegacySSEEncodingForVblendv(mask, src0, dst)) {
spew("blendvps %s, %s", nameFPReg(rm), nameFPReg(dst));
// Even though a "ps" instruction, vblendv is encoded with the "pd" prefix.
m_formatter.legacySSEPrefix(VEX_PD);
@@ -4479,16 +4608,32 @@ private:
spew("vblendvps %s, " MEM_ob ", %s, %s",
nameFPReg(mask), ADDR_ob(offset, base), nameFPReg(src0), nameFPReg(dst));
// Even though a "ps" instruction, vblendv is encoded with the "pd" prefix.
m_formatter.vblendvOpVex(VEX_PD, OP3_VBLENDVPS_VdqWdq, ESCAPE_VBLENDVPS,
mask, offset, base, src0, dst);
}
+ void shiftOpImmSimd(const char *name, TwoByteOpcodeID opcode, ShiftID shiftKind,
+ uint32_t imm, XMMRegisterID src, XMMRegisterID dst)
+ {
+ if (useLegacySSEEncoding(src, dst)) {
+ spew("%-11s$%d, %s", legacySSEOpName(name), imm, nameFPReg(dst));
+ m_formatter.legacySSEPrefix(VEX_PD);
+ m_formatter.twoByteOp(opcode, (RegisterID)dst, (int)shiftKind);
+ m_formatter.immediate8u(imm);
+ return;
+ }
+
+ spew("%-11s$%d, %s, %s", name, imm, nameFPReg(src), nameFPReg(dst));
+ m_formatter.twoByteOpVex(VEX_PD, opcode, (RegisterID)dst, src, (int)shiftKind);
+ m_formatter.immediate8u(imm);
+ }
+
static int32_t getInt32(void* where)
{
return reinterpret_cast<int32_t*>(where)[-1];
}
class X86InstructionFormatter {
static const int maxInstructionSize = 16;
--- a/js/src/jit/shared/CodeGenerator-x86-shared.cpp
+++ b/js/src/jit/shared/CodeGenerator-x86-shared.cpp
@@ -1591,17 +1591,17 @@ CodeGeneratorX86Shared::visitFloor(LFloo
Label bailout;
if (AssemblerX86Shared::HasSSE41()) {
// Bail on negative-zero.
masm.branchNegativeZero(input, output, &bailout);
bailoutFrom(&bailout, lir->snapshot());
// Round toward -Infinity.
- masm.roundsd(X86Assembler::RoundDown, input, scratch);
+ masm.vroundsd(X86Assembler::RoundDown, input, scratch, scratch);
bailoutCvttsd2si(scratch, output, lir->snapshot());
} else {
Label negative, end;
// Branch to a slow path for negative inputs. Doesn't catch NaN or -0.
masm.zeroDouble(scratch);
masm.branchDouble(Assembler::DoubleLessThan, input, scratch, &negative);
@@ -1648,17 +1648,17 @@ CodeGeneratorX86Shared::visitFloorF(LFlo
Label bailout;
if (AssemblerX86Shared::HasSSE41()) {
// Bail on negative-zero.
masm.branchNegativeZeroFloat32(input, output, &bailout);
bailoutFrom(&bailout, lir->snapshot());
// Round toward -Infinity.
- masm.roundss(X86Assembler::RoundDown, input, scratch);
+ masm.vroundss(X86Assembler::RoundDown, input, scratch, scratch);
bailoutCvttss2si(scratch, output, lir->snapshot());
} else {
Label negative, end;
// Branch to a slow path for negative inputs. Doesn't catch NaN or -0.
masm.zeroFloat32(scratch);
masm.branchFloat(Assembler::DoubleLessThan, input, scratch, &negative);
@@ -1713,17 +1713,17 @@ CodeGeneratorX86Shared::visitCeil(LCeil
masm.vmovmskpd(input, output);
masm.branchTest32(Assembler::NonZero, output, Imm32(1), &bailout);
bailoutFrom(&bailout, lir->snapshot());
if (AssemblerX86Shared::HasSSE41()) {
// x <= -1 or x > -0
masm.bind(&lessThanMinusOne);
// Round toward +Infinity.
- masm.roundsd(X86Assembler::RoundUp, input, scratch);
+ masm.vroundsd(X86Assembler::RoundUp, input, scratch, scratch);
bailoutCvttsd2si(scratch, output, lir->snapshot());
return;
}
// No SSE4.1
Label end;
// x >= 0 and x is not -0.0, we can truncate (resp. truncate and add 1) for
@@ -1765,17 +1765,17 @@ CodeGeneratorX86Shared::visitCeilF(LCeil
masm.vmovmskps(input, output);
masm.branchTest32(Assembler::NonZero, output, Imm32(1), &bailout);
bailoutFrom(&bailout, lir->snapshot());
if (AssemblerX86Shared::HasSSE41()) {
// x <= -1 or x > -0
masm.bind(&lessThanMinusOne);
// Round toward +Infinity.
- masm.roundss(X86Assembler::RoundUp, input, scratch);
+ masm.vroundss(X86Assembler::RoundUp, input, scratch, scratch);
bailoutCvttss2si(scratch, output, lir->snapshot());
return;
}
// No SSE4.1
Label end;
// x >= 0 and x is not -0.0, we can truncate (resp. truncate and add 1) for
@@ -1840,17 +1840,17 @@ CodeGeneratorX86Shared::visitRound(LRoun
// Input is negative.
masm.bind(&negative);
masm.loadConstantDouble(0.5, temp);
if (AssemblerX86Shared::HasSSE41()) {
// Add 0.5 and round toward -Infinity. The result is stored in the temp
// register (currently contains 0.5).
masm.addDouble(input, temp);
- masm.roundsd(X86Assembler::RoundDown, temp, scratch);
+ masm.vroundsd(X86Assembler::RoundDown, temp, scratch, scratch);
// Truncate.
bailoutCvttsd2si(scratch, output, lir->snapshot());
// If the result is positive zero, then the actual result is -0. Bail.
// Otherwise, the truncation will have produced the correct negative integer.
masm.test32(output, output);
bailoutIf(Assembler::Zero, lir->snapshot());
@@ -1923,17 +1923,17 @@ CodeGeneratorX86Shared::visitRoundF(LRou
// Input is negative.
masm.bind(&negative);
masm.loadConstantFloat32(0.5f, temp);
if (AssemblerX86Shared::HasSSE41()) {
// Add 0.5 and round toward -Infinity. The result is stored in the temp
// register (currently contains 0.5).
masm.addFloat32(input, temp);
- masm.roundss(X86Assembler::RoundDown, temp, scratch);
+ masm.vroundss(X86Assembler::RoundDown, temp, scratch, scratch);
// Truncate.
bailoutCvttss2si(scratch, output, lir->snapshot());
// If the result is positive zero, then the actual result is -0. Bail.
// Otherwise, the truncation will have produced the correct negative integer.
masm.test32(output, output);
bailoutIf(Assembler::Zero, lir->snapshot());
@@ -2088,17 +2088,17 @@ CodeGeneratorX86Shared::visitSimdValueIn
{
MOZ_ASSERT(ins->mir()->type() == MIRType_Int32x4);
FloatRegister output = ToFloatRegister(ins->output());
if (AssemblerX86Shared::HasSSE41()) {
masm.vmovd(ToRegister(ins->getOperand(0)), output);
for (size_t i = 1; i < 4; ++i) {
Register r = ToRegister(ins->getOperand(i));
- masm.pinsrd(i, r, output);
+ masm.vpinsrd(i, r, output, output);
}
return;
}
masm.reserveStack(Simd128DataSize);
for (size_t i = 0; i < 4; ++i) {
Register r = ToRegister(ins->getOperand(i));
masm.store32(r, Address(StackPointer, i * sizeof(int32_t)));
@@ -2135,24 +2135,24 @@ CodeGeneratorX86Shared::visitSimdSplatX4
MSimdSplatX4 *mir = ins->mir();
MOZ_ASSERT(IsSimdType(mir->type()));
JS_STATIC_ASSERT(sizeof(float) == sizeof(int32_t));
switch (mir->type()) {
case MIRType_Int32x4: {
Register r = ToRegister(ins->getOperand(0));
masm.vmovd(r, output);
- masm.pshufd(0, output, output);
+ masm.vpshufd(0, output, output);
break;
}
case MIRType_Float32x4: {
FloatRegister r = ToFloatRegister(ins->getOperand(0));
if (r != output)
masm.moveFloat32x4(r, output);
- masm.shufps(0, output, output);
+ masm.vshufps(0, output, output, output);
break;
}
default:
MOZ_CRASH("Unknown SIMD kind");
}
}
void
@@ -2161,17 +2161,17 @@ CodeGeneratorX86Shared::visitSimdExtract
FloatRegister input = ToFloatRegister(ins->input());
Register output = ToRegister(ins->output());
SimdLane lane = ins->lane();
if (lane == LaneX) {
// The value we want to extract is in the low double-word
masm.moveLowInt32(input, output);
} else if (AssemblerX86Shared::HasSSE41()) {
- masm.pextrd(lane, input, output);
+ masm.vpextrd(lane, input, output);
} else {
uint32_t mask = MacroAssembler::ComputeShuffleMask(lane);
masm.shuffleInt32(mask, input, ScratchSimdReg);
masm.moveLowInt32(ScratchSimdReg, output);
}
}
void
@@ -2203,17 +2203,18 @@ CodeGeneratorX86Shared::visitSimdInsertE
MOZ_ASSERT(vector == output); // defineReuseInput(0)
unsigned component = unsigned(ins->lane());
// Note that, contrarily to float32x4, we cannot use vmovd if the inserted
// value goes into the first component, as vmovd clears out the higher lanes
// of the output.
if (AssemblerX86Shared::HasSSE41()) {
- masm.pinsrd(component, value, output);
+ // TODO: Teach Lowering that we don't need defineReuseInput if we have AVX.
+ masm.vpinsrd(component, value, vector, output);
return;
}
masm.reserveStack(Simd128DataSize);
masm.storeAlignedInt32x4(vector, Address(StackPointer, 0));
masm.store32(value, Address(StackPointer, component * sizeof(int32_t)));
masm.loadAlignedInt32x4(Address(StackPointer, 0), output);
masm.freeStack(Simd128DataSize);
@@ -2232,17 +2233,17 @@ CodeGeneratorX86Shared::visitSimdInsertE
// of the destination operand.
if (value != output)
masm.vmovss(value, vector, output);
return;
}
if (AssemblerX86Shared::HasSSE41()) {
// The input value is in the low float32 of the 'value' FloatRegister.
- masm.insertps(value, output, masm.insertpsMask(SimdLane::LaneX, ins->lane()));
+ masm.vinsertps(masm.vinsertpsMask(SimdLane::LaneX, ins->lane()), value, output, output);
return;
}
unsigned component = unsigned(ins->lane());
masm.reserveStack(Simd128DataSize);
masm.storeAlignedFloat32x4(vector, Address(StackPointer, 0));
masm.storeFloat32(value, Address(StackPointer, component * sizeof(int32_t)));
masm.loadAlignedFloat32x4(Address(StackPointer, 0), output);
@@ -2338,17 +2339,17 @@ CodeGeneratorX86Shared::visitSimdShuffle
uint32_t y = ins->laneY();
uint32_t z = ins->laneZ();
uint32_t w = ins->laneW();
// Check that lanes come from LHS in majority:
unsigned numLanesFromLHS = (x < 4) + (y < 4) + (z < 4) + (w < 4);
MOZ_ASSERT(numLanesFromLHS >= 2);
- // When reading this method, remember that shufps takes the two first
+ // When reading this method, remember that vshufps takes the two first
// inputs of the destination operand (right operand) and the two last
// inputs of the source operand (left operand).
//
// Legend for explanations:
// - L: LHS
// - R: RHS
// - T: temporary
@@ -2371,17 +2372,17 @@ CodeGeneratorX86Shared::visitSimdShuffle
unsigned firstMask = -1, secondMask = -1;
// register-register vmovss preserves the high lanes.
if (ins->lanesMatch(4, 1, 2, 3)) {
masm.vmovss(rhs, lhs, out);
return;
}
- // SSE4.1 insertps can handle any single element.
+ // SSE4.1 vinsertps can handle any single element.
unsigned numLanesUnchanged = (x == 0) + (y == 1) + (z == 2) + (w == 3);
if (AssemblerX86Shared::HasSSE41() && numLanesUnchanged == 3) {
SimdLane srcLane;
SimdLane dstLane;
if (x >= 4) {
srcLane = SimdLane(x - 4);
dstLane = LaneX;
} else if (y >= 4) {
@@ -2390,62 +2391,62 @@ CodeGeneratorX86Shared::visitSimdShuffle
} else if (z >= 4) {
srcLane = SimdLane(z - 4);
dstLane = LaneZ;
} else {
MOZ_ASSERT(w >= 4);
srcLane = SimdLane(w - 4);
dstLane = LaneW;
}
- masm.insertps(rhs, out, masm.insertpsMask(srcLane, dstLane));
+ masm.vinsertps(masm.vinsertpsMask(srcLane, dstLane), rhs, out, out);
return;
}
FloatRegister rhsCopy = ToFloatRegister(ins->temp());
if (x < 4 && y < 4) {
if (w >= 4) {
w %= 4;
- // T = (Rw Rw Lz Lz) = shufps(firstMask, lhs, rhs)
+ // T = (Rw Rw Lz Lz) = vshufps(firstMask, lhs, rhs, rhs)
firstMask = MacroAssembler::ComputeShuffleMask(w, w, z, z);
- // (Lx Ly Lz Rw) = (Lx Ly Tz Tx) = shufps(secondMask, T, lhs)
+ // (Lx Ly Lz Rw) = (Lx Ly Tz Tx) = vshufps(secondMask, T, lhs, lhs)
secondMask = MacroAssembler::ComputeShuffleMask(x, y, LaneZ, LaneX);
} else {
MOZ_ASSERT(z >= 4);
z %= 4;
- // T = (Rz Rz Lw Lw) = shufps(firstMask, lhs, rhs)
+ // T = (Rz Rz Lw Lw) = vshufps(firstMask, lhs, rhs, rhs)
firstMask = MacroAssembler::ComputeShuffleMask(z, z, w, w);
- // (Lx Ly Rz Lw) = (Lx Ly Tx Tz) = shufps(secondMask, T, lhs)
+ // (Lx Ly Rz Lw) = (Lx Ly Tx Tz) = vshufps(secondMask, T, lhs, lhs)
secondMask = MacroAssembler::ComputeShuffleMask(x, y, LaneX, LaneZ);
}
- masm.shufps(firstMask, lhs, rhsCopy);
- masm.shufps(secondMask, rhsCopy, lhs);
+ masm.vshufps(firstMask, lhs, rhsCopy, rhsCopy);
+ masm.vshufps(secondMask, rhsCopy, lhs, lhs);
return;
}
MOZ_ASSERT(z < 4 && w < 4);
if (y >= 4) {
y %= 4;
- // T = (Ry Ry Lx Lx) = shufps(firstMask, lhs, rhs)
+ // T = (Ry Ry Lx Lx) = vshufps(firstMask, lhs, rhs, rhs)
firstMask = MacroAssembler::ComputeShuffleMask(y, y, x, x);
- // (Lx Ry Lz Lw) = (Tz Tx Lz Lw) = shufps(secondMask, lhs, T)
+ // (Lx Ry Lz Lw) = (Tz Tx Lz Lw) = vshufps(secondMask, lhs, T, T)
secondMask = MacroAssembler::ComputeShuffleMask(LaneZ, LaneX, z, w);
} else {
MOZ_ASSERT(x >= 4);
x %= 4;
- // T = (Rx Rx Ly Ly) = shufps(firstMask, lhs, rhs)
+ // T = (Rx Rx Ly Ly) = vshufps(firstMask, lhs, rhs, rhs)
firstMask = MacroAssembler::ComputeShuffleMask(x, x, y, y);
- // (Rx Ly Lz Lw) = (Tx Tz Lz Lw) = shufps(secondMask, lhs, T)
+ // (Rx Ly Lz Lw) = (Tx Tz Lz Lw) = vshufps(secondMask, lhs, T, T)
secondMask = MacroAssembler::ComputeShuffleMask(LaneX, LaneZ, z, w);
}
- masm.shufps(firstMask, lhs, rhsCopy);
- masm.shufps(secondMask, lhs, rhsCopy);
+ masm.vshufps(firstMask, lhs, rhsCopy, rhsCopy);
+ masm.vshufps(secondMask, lhs, rhsCopy, rhsCopy);
masm.moveFloat32x4(rhsCopy, out);
return;
}
// Two elements from one vector, two other elements from the other
MOZ_ASSERT(numLanesFromLHS == 2);
// TODO Here and below, symmetric case would be more handy to avoid a move,
@@ -2495,27 +2496,27 @@ CodeGeneratorX86Shared::visitSimdShuffle
} else {
masm.moveFloat32x4(rhs, ScratchSimdReg);
masm.vunpckhps(lhs, ScratchSimdReg, ScratchSimdReg);
masm.moveFloat32x4(ScratchSimdReg, out);
}
return;
}
- // In one shufps
+ // In one vshufps
if (x < 4 && y < 4) {
mask = MacroAssembler::ComputeShuffleMask(x, y, z % 4, w % 4);
- masm.shufps(mask, rhs, out);
+ masm.vshufps(mask, rhs, out, out);
return;
}
// At creation, we should have explicitly swapped in this case.
MOZ_ASSERT(!(z >= 4 && w >= 4));
- // In two shufps, for the most generic case:
+ // In two vshufps, for the most generic case:
uint32_t firstMask[4], secondMask[4];
unsigned i = 0, j = 2, k = 0;
#define COMPUTE_MASK(lane) \
if (lane >= 4) { \
firstMask[j] = lane % 4; \
secondMask[k++] = j++; \
} else { \
@@ -2528,21 +2529,21 @@ CodeGeneratorX86Shared::visitSimdShuffle
COMPUTE_MASK(z)
COMPUTE_MASK(w)
#undef COMPUTE_MASK
MOZ_ASSERT(i == 2 && j == 4 && k == 4);
mask = MacroAssembler::ComputeShuffleMask(firstMask[0], firstMask[1],
firstMask[2], firstMask[3]);
- masm.shufps(mask, rhs, lhs);
+ masm.vshufps(mask, rhs, lhs, lhs);
mask = MacroAssembler::ComputeShuffleMask(secondMask[0], secondMask[1],
secondMask[2], secondMask[3]);
- masm.shufps(mask, lhs, lhs);
+ masm.vshufps(mask, lhs, lhs, lhs);
}
void
CodeGeneratorX86Shared::visitSimdBinaryCompIx4(LSimdBinaryCompIx4 *ins)
{
static const SimdConstant allOnes = SimdConstant::SplatX4(-1);
FloatRegister lhs = ToFloatRegister(ins->lhs());
@@ -2648,24 +2649,24 @@ CodeGeneratorX86Shared::visitSimdBinaryA
return;
}
masm.loadAlignedInt32x4(rhs, ScratchSimdReg);
masm.vpmuludq(lhs, ScratchSimdReg, ScratchSimdReg);
// ScratchSimdReg contains (Rx, _, Rz, _) where R is the resulting vector.
FloatRegister temp = ToFloatRegister(ins->temp());
- masm.pshufd(MacroAssembler::ComputeShuffleMask(LaneY, LaneY, LaneW, LaneW), lhs, lhs);
- masm.pshufd(MacroAssembler::ComputeShuffleMask(LaneY, LaneY, LaneW, LaneW), rhs, temp);
+ masm.vpshufd(MacroAssembler::ComputeShuffleMask(LaneY, LaneY, LaneW, LaneW), lhs, lhs);
+ masm.vpshufd(MacroAssembler::ComputeShuffleMask(LaneY, LaneY, LaneW, LaneW), rhs, temp);
masm.vpmuludq(temp, lhs, lhs);
// lhs contains (Ry, _, Rw, _) where R is the resulting vector.
- masm.shufps(MacroAssembler::ComputeShuffleMask(LaneX, LaneZ, LaneX, LaneZ), ScratchSimdReg, lhs);
+ masm.vshufps(MacroAssembler::ComputeShuffleMask(LaneX, LaneZ, LaneX, LaneZ), ScratchSimdReg, lhs, lhs);
// lhs contains (Ry, Rw, Rx, Rz)
- masm.shufps(MacroAssembler::ComputeShuffleMask(LaneZ, LaneX, LaneW, LaneY), lhs, lhs);
+ masm.vshufps(MacroAssembler::ComputeShuffleMask(LaneZ, LaneX, LaneW, LaneY), lhs, lhs, lhs);
return;
}
case MSimdBinaryArith::Div:
// x86 doesn't have SIMD i32 div.
break;
case MSimdBinaryArith::Max:
// we can do max with a single instruction only if we have SSE4.1
// using the PMAXSD instruction.
@@ -2899,17 +2900,29 @@ CodeGeneratorX86Shared::visitSimdShift(L
// TODO: If the shift count is greater than 31, this will just zero all
// lanes by default for lsh and ursh, and set the count to 32 for rsh
// (which will just extend the sign bit to all bits). Plain JS doesn't do
// this: instead it only keeps the five low bits of the mask. Spec isn't
// clear about that topic so this might need to be fixed. See also bug
// 1068028.
const LAllocation *val = ins->value();
if (val->isConstant()) {
- Imm32 count(ToInt32(val));
+ int32_t c = ToInt32(val);
+ if (c > 31) {
+ switch (ins->operation()) {
+ case MSimdShift::lsh:
+ case MSimdShift::ursh:
+ masm.zeroInt32x4(out);
+ return;
+ default:
+ c = 31;
+ break;
+ }
+ }
+ Imm32 count(c);
switch (ins->operation()) {
case MSimdShift::lsh:
masm.packedLeftShiftByScalar(count, out);
return;
case MSimdShift::rsh:
masm.packedRightShiftByScalar(count, out);
return;
case MSimdShift::ursh: