--- a/js/src/assembler/assembler/X86Assembler.h
+++ b/js/src/assembler/assembler/X86Assembler.h
@@ -307,30 +307,32 @@ private:
OP2_SQRTSD_VsdWsd = 0x51,
OP2_SQRTSS_VssWss = 0x51,
OP2_ANDPD_VpdWpd = 0x54,
OP2_ORPD_VpdWpd = 0x56,
OP2_XORPD_VpdWpd = 0x57,
OP2_MOVD_VdEd = 0x6E,
OP2_MOVDQ_VsdWsd = 0x6F,
OP2_MOVDQ_VdqWdq = 0x6F,
+ OP2_PSHUFD_VdqWdqIb = 0x70,
OP2_PSRLDQ_Vd = 0x73,
OP2_PCMPEQW = 0x75,
OP2_MOVD_EdVd = 0x7E,
OP2_MOVDQ_WdqVdq = 0x7F,
OP2_JCC_rel32 = 0x80,
OP_SETCC = 0x90,
OP2_IMUL_GvEv = 0xAF,
OP2_CMPXCHG_GvEw = 0xB1,
OP2_MOVSX_GvEb = 0xBE,
OP2_MOVSX_GvEw = 0xBF,
OP2_MOVZX_GvEb = 0xB6,
OP2_MOVZX_GvEw = 0xB7,
OP2_XADD_EvGv = 0xC1,
- OP2_PEXTRW_GdUdIb = 0xC5
+ OP2_PEXTRW_GdUdIb = 0xC5,
+ OP2_SHUFPS_VpsWpsIb = 0xC6
} TwoByteOpcodeID;
typedef enum {
OP3_ROUNDSS_VsdWsd = 0x0A,
OP3_ROUNDSD_VsdWsd = 0x0B,
OP3_PTEST_VdVd = 0x17,
OP3_PINSRD_VsdWsd = 0x22
} ThreeByteOpcodeID;
@@ -2582,16 +2584,35 @@ public:
void movd_rr(RegisterID src, XMMRegisterID dst)
{
spew("movd %s, %s",
nameIReg(src), nameFPReg(dst));
m_formatter.prefix(PRE_SSE_66);
m_formatter.twoByteOp(OP2_MOVD_VdEd, (RegisterID)dst, src);
}
+ void pshufd_irr(uint32_t mask, XMMRegisterID src, XMMRegisterID dst)
+ {
+ JS_ASSERT(mask < 256);
+ spew("pshufd 0x%x, %s, %s",
+ mask, nameFPReg(src), nameFPReg(dst));
+ m_formatter.prefix(PRE_SSE_66);
+ m_formatter.twoByteOp(OP2_PSHUFD_VdqWdqIb, (RegisterID)dst, (RegisterID)src);
+ m_formatter.immediate8(uint8_t(mask));
+ }
+
+ void shufps_irr(uint32_t mask, XMMRegisterID src, XMMRegisterID dst)
+ {
+ JS_ASSERT(mask < 256);
+ spew("shufps 0x%x, %s, %s",
+ mask, nameFPReg(src), nameFPReg(dst));
+ m_formatter.twoByteOp(OP2_SHUFPS_VpsWpsIb, (RegisterID)dst, (RegisterID)src);
+ m_formatter.immediate8(uint8_t(mask));
+ }
+
void psrldq_ir(int shift, XMMRegisterID dest)
{
spew("psrldq $%d, %s",
shift, nameFPReg(dest));
m_formatter.prefix(PRE_SSE_66);
m_formatter.twoByteOp(OP2_PSRLDQ_Vd, (RegisterID)3, (RegisterID)dest);
m_formatter.immediate8(shift);
}
--- a/js/src/jit/IonTypes.h
+++ b/js/src/jit/IonTypes.h
@@ -446,16 +446,25 @@ SimdTypeToScalarType(MIRType type)
return MIRType_Int32;
case MIRType_Float32x4:
return MIRType_Float32;
default: break;
}
MOZ_ASSUME_UNREACHABLE("unexpected SIMD kind");
}
+// Indicates a lane in a SIMD register: X for the first lane, Y for the second,
+// Z for the third (if any), W for the fourth (if any).
+enum SimdLane {
+ LaneX = 0x0,
+ LaneY = 0x1,
+ LaneZ = 0x2,
+ LaneW = 0x3
+};
+
#ifdef DEBUG
// Track the pipeline of opcodes which has produced a snapshot.
#define TRACK_SNAPSHOTS 1
// Make sure registers are not modified between an instruction and
// its OsiPoint.
#define CHECK_OSIPOINT_REGISTERS 1
--- a/js/src/jit/LIR-Common.h
+++ b/js/src/jit/LIR-Common.h
@@ -123,16 +123,54 @@ class LMoveGroup : public LInstructionHe
size_t numMoves() const {
return moves_.length();
}
const LMove &getMove(size_t i) const {
return moves_[i];
}
};
+// Extracts an element from a given SIMD int32x4 lane.
+class LSimdExtractElementI : public LInstructionHelper<1, 1, 0>
+{
+ SimdLane lane_;
+
+ public:
+ LIR_HEADER(SimdExtractElementI);
+
+ LSimdExtractElementI(const LAllocation &base, SimdLane lane) : lane_(lane) {
+ setOperand(0, base);
+ }
+ const LAllocation *getBase() {
+ return getOperand(0);
+ }
+ SimdLane lane() const {
+ return lane_;
+ }
+};
+
+// Extracts an element from a given SIMD float32x4 lane.
+class LSimdExtractElementF : public LInstructionHelper<1, 1, 0>
+{
+ SimdLane lane_;
+
+ public:
+ LIR_HEADER(SimdExtractElementF);
+
+ LSimdExtractElementF(const LAllocation &base, SimdLane lane) : lane_(lane) {
+ setOperand(0, base);
+ }
+ const LAllocation *getBase() {
+ return getOperand(0);
+ }
+ SimdLane lane() const {
+ return lane_;
+ }
+};
+
// Constant 32-bit integer.
class LInteger : public LInstructionHelper<1, 0, 0>
{
int32_t i32_;
public:
LIR_HEADER(Integer)
--- a/js/src/jit/LOpcodes.h
+++ b/js/src/jit/LOpcodes.h
@@ -11,16 +11,18 @@
_(Label) \
_(Nop) \
_(OsiPoint) \
_(MoveGroup) \
_(Integer) \
_(Pointer) \
_(Double) \
_(Float32) \
+ _(SimdExtractElementI) \
+ _(SimdExtractElementF) \
_(Value) \
_(CloneLiteral) \
_(Parameter) \
_(Callee) \
_(TableSwitch) \
_(TableSwitchV) \
_(Goto) \
_(NewArray) \
--- a/js/src/jit/Lowering.cpp
+++ b/js/src/jit/Lowering.cpp
@@ -3633,16 +3633,38 @@ bool
LIRGenerator::visitRecompileCheck(MRecompileCheck *ins)
{
LRecompileCheck *lir = new(alloc()) LRecompileCheck(temp());
if (!add(lir, ins))
return false;
return assignSafepoint(lir, ins);
}
+bool
+LIRGenerator::visitSimdExtractElement(MSimdExtractElement *ins)
+{
+ JS_ASSERT(IsSimdType(ins->input()->type()));
+ JS_ASSERT(!IsSimdType(ins->type()));
+
+ if (ins->input()->type() == MIRType_Int32x4) {
+ // Note: there could be int16x8 in the future, which doesn't use the
+ // same instruction. We either need to pass the arity or create new LIns.
+ LUse use = useRegisterAtStart(ins->input());
+ return define(new(alloc()) LSimdExtractElementI(use, ins->lane()), ins);
+ }
+
+ if (ins->input()->type() == MIRType_Float32x4) {
+ LUse use = useRegisterAtStart(ins->input());
+ return define(new(alloc()) LSimdExtractElementF(use, ins->lane()), ins);
+ }
+
+ MOZ_ASSUME_UNREACHABLE("Unknown SIMD kind when extracting element");
+ return false;
+}
+
static void
SpewResumePoint(MBasicBlock *block, MInstruction *ins, MResumePoint *resumePoint)
{
fprintf(IonSpewFile, "Current resume point %p details:\n", (void *)resumePoint);
fprintf(IonSpewFile, " frame count: %u\n", resumePoint->frameCount());
if (ins) {
fprintf(IonSpewFile, " taken after: ");
--- a/js/src/jit/Lowering.h
+++ b/js/src/jit/Lowering.h
@@ -258,14 +258,15 @@ class LIRGenerator : public LIRGenerator
bool visitAsmJSReturn(MAsmJSReturn *ins);
bool visitAsmJSVoidReturn(MAsmJSVoidReturn *ins);
bool visitAsmJSPassStackArg(MAsmJSPassStackArg *ins);
bool visitAsmJSCall(MAsmJSCall *ins);
bool visitSetDOMProperty(MSetDOMProperty *ins);
bool visitGetDOMProperty(MGetDOMProperty *ins);
bool visitGetDOMMember(MGetDOMMember *ins);
bool visitRecompileCheck(MRecompileCheck *ins);
+ bool visitSimdExtractElement(MSimdExtractElement *ins);
};
} // namespace jit
} // namespace js
#endif /* jit_Lowering_h */
--- a/js/src/jit/MIR.h
+++ b/js/src/jit/MIR.h
@@ -1228,16 +1228,57 @@ class MConstant : public MNullaryInstruc
void computeRange(TempAllocator &alloc);
bool truncate(TruncateKind kind);
bool canProduceFloat32() const;
ALLOW_CLONE(MConstant)
};
+// Extracts a lane element from a given vector type, given by its lane symbol.
+class MSimdExtractElement : public MUnaryInstruction
+{
+ protected:
+ SimdLane lane_;
+
+ MSimdExtractElement(MDefinition *obj, MIRType type, SimdLane lane)
+ : MUnaryInstruction(obj), lane_(lane)
+ {
+ JS_ASSERT(IsSimdType(obj->type()));
+ JS_ASSERT(uint32_t(lane) < SimdTypeToLength(obj->type()));
+ JS_ASSERT(!IsSimdType(type));
+ JS_ASSERT(SimdTypeToScalarType(obj->type()) == type);
+ setResultType(type);
+ }
+
+ public:
+ INSTRUCTION_HEADER(SimdExtractElement);
+ static MSimdExtractElement *NewAsmJS(TempAllocator &alloc, MDefinition *obj, MIRType type,
+ SimdLane lane)
+ {
+ return new(alloc) MSimdExtractElement(obj, type, lane);
+ }
+
+ SimdLane lane() const {
+ return lane_;
+ }
+
+ AliasSet getAliasSet() const {
+ return AliasSet::None();
+ }
+ bool congruentTo(const MDefinition *ins) const {
+ if (!ins->isSimdExtractElement())
+ return false;
+ const MSimdExtractElement *other = ins->toSimdExtractElement();
+ if (other->lane_ != lane_)
+ return false;
+ return congruentIfOperandsEqual(other);
+ }
+};
+
// Deep clone a constant JSObject.
class MCloneLiteral
: public MUnaryInstruction,
public ObjectPolicy<0>
{
protected:
explicit MCloneLiteral(MDefinition *obj)
: MUnaryInstruction(obj)
--- a/js/src/jit/MOpcodes.h
+++ b/js/src/jit/MOpcodes.h
@@ -7,16 +7,17 @@
#ifndef jit_MOpcodes_h
#define jit_MOpcodes_h
namespace js {
namespace jit {
#define MIR_OPCODE_LIST(_) \
_(Constant) \
+ _(SimdExtractElement) \
_(CloneLiteral) \
_(Parameter) \
_(Callee) \
_(TableSwitch) \
_(Goto) \
_(Test) \
_(TypeObjectDispatch) \
_(FunctionDispatch) \
--- a/js/src/jit/ParallelSafetyAnalysis.cpp
+++ b/js/src/jit/ParallelSafetyAnalysis.cpp
@@ -107,16 +107,17 @@ class ParallelSafetyVisitor : public MDe
}
bool convertToBailout(MInstructionIterator &iter);
// I am taking the policy of blacklisting everything that's not
// obviously safe for now. We can loosen as we need.
SAFE_OP(Constant)
+ SAFE_OP(SimdExtractElement)
UNSAFE_OP(CloneLiteral)
SAFE_OP(Parameter)
SAFE_OP(Callee)
SAFE_OP(TableSwitch)
SAFE_OP(Goto)
SAFE_OP(Test)
SAFE_OP(Compare)
SAFE_OP(Phi)
--- a/js/src/jit/arm/CodeGenerator-arm.h
+++ b/js/src/jit/arm/CodeGenerator-arm.h
@@ -219,16 +219,21 @@ class CodeGeneratorARM : public CodeGene
}
}
}
bool visitEffectiveAddress(LEffectiveAddress *ins);
bool visitUDiv(LUDiv *ins);
bool visitUMod(LUMod *ins);
bool visitSoftUDivOrMod(LSoftUDivOrMod *ins);
+
+ public:
+ // Unimplemented SIMD instructions
+ bool visitSimdExtractElementI(LSimdExtractElementI *ins) { MOZ_ASSUME_UNREACHABLE("NYI"); }
+ bool visitSimdExtractElementF(LSimdExtractElementF *ins) { MOZ_ASSUME_UNREACHABLE("NYI"); }
};
typedef CodeGeneratorARM CodeGeneratorSpecific;
// An out-of-line bailout thunk.
class OutOfLineBailout : public OutOfLineCodeBase<CodeGeneratorARM>
{
protected: // Silence Clang warning.
--- a/js/src/jit/shared/Assembler-x86-shared.h
+++ b/js/src/jit/shared/Assembler-x86-shared.h
@@ -1458,16 +1458,24 @@ class AssemblerX86Shared : public Assemb
void movd(Register src, FloatRegister dest) {
JS_ASSERT(HasSSE2());
masm.movd_rr(src.code(), dest.code());
}
void movd(FloatRegister src, Register dest) {
JS_ASSERT(HasSSE2());
masm.movd_rr(src.code(), dest.code());
}
+ void pshufd(uint32_t mask, FloatRegister src, FloatRegister dest) {
+ JS_ASSERT(HasSSE2());
+ masm.pshufd_irr(mask, src.code(), dest.code());
+ }
+ void shufps(uint32_t mask, FloatRegister src, FloatRegister dest) {
+ JS_ASSERT(HasSSE2());
+ masm.shufps_irr(mask, src.code(), dest.code());
+ }
void addsd(FloatRegister src, FloatRegister dest) {
JS_ASSERT(HasSSE2());
masm.addsd_rr(src.code(), dest.code());
}
void addss(FloatRegister src, FloatRegister dest) {
JS_ASSERT(HasSSE2());
masm.addss_rr(src.code(), dest.code());
}
--- a/js/src/jit/shared/CodeGenerator-x86-shared.cpp
+++ b/js/src/jit/shared/CodeGenerator-x86-shared.cpp
@@ -2049,16 +2049,53 @@ CodeGeneratorX86Shared::visitNegF(LNegF
FloatRegister input = ToFloatRegister(ins->input());
JS_ASSERT(input == ToFloatRegister(ins->output()));
masm.negateFloat(input);
return true;
}
bool
+CodeGeneratorX86Shared::visitSimdExtractElementI(LSimdExtractElementI *ins)
+{
+ FloatRegister input = ToFloatRegister(ins->input());
+ Register output = ToRegister(ins->output());
+
+ SimdLane lane = ins->lane();
+ if (lane == LaneX) {
+ // The value we want to extract is in the low double-word
+ masm.moveLowInt32(input, output);
+ } else {
+ uint32_t mask = MacroAssembler::ComputeShuffleMask(lane);
+ masm.shuffleInt32(mask, input, ScratchSimdReg);
+ masm.moveLowInt32(ScratchSimdReg, output);
+ }
+ return true;
+}
+
+bool
+CodeGeneratorX86Shared::visitSimdExtractElementF(LSimdExtractElementF *ins)
+{
+ FloatRegister input = ToFloatRegister(ins->input());
+ FloatRegister output = ToFloatRegister(ins->output());
+
+ SimdLane lane = ins->lane();
+ if (lane == LaneX) {
+ // The value we want to extract is in the low double-word
+ if (input != output)
+ masm.moveFloat32(input, output);
+ } else {
+ uint32_t mask = MacroAssembler::ComputeShuffleMask(lane);
+ masm.shuffleFloat32(mask, input, output);
+ }
+ masm.canonicalizeFloat(output);
+ return true;
+}
+
+bool
CodeGeneratorX86Shared::visitForkJoinGetSlice(LForkJoinGetSlice *ins)
{
MOZ_ASSERT(gen->info().executionMode() == ParallelExecution);
MOZ_ASSERT(ToRegister(ins->forkJoinContext()) == ForkJoinGetSliceReg_cx);
MOZ_ASSERT(ToRegister(ins->temp1()) == eax);
MOZ_ASSERT(ToRegister(ins->temp2()) == edx);
MOZ_ASSERT(ToRegister(ins->temp3()) == ForkJoinGetSliceReg_temp0);
MOZ_ASSERT(ToRegister(ins->temp4()) == ForkJoinGetSliceReg_temp1);
--- a/js/src/jit/shared/CodeGenerator-x86-shared.h
+++ b/js/src/jit/shared/CodeGenerator-x86-shared.h
@@ -199,16 +199,20 @@ class CodeGeneratorX86Shared : public Co
bool visitOutOfLineLoadTypedArrayOutOfBounds(OutOfLineLoadTypedArrayOutOfBounds *ool);
bool visitForkJoinGetSlice(LForkJoinGetSlice *ins);
bool visitNegI(LNegI *lir);
bool visitNegD(LNegD *lir);
bool visitNegF(LNegF *lir);
+ // SIMD operators
+ bool visitSimdExtractElementI(LSimdExtractElementI *lir);
+ bool visitSimdExtractElementF(LSimdExtractElementF *lir);
+
// Out of line visitors.
bool visitOutOfLineBailout(OutOfLineBailout *ool);
bool visitOutOfLineUndoALUOperation(OutOfLineUndoALUOperation *ool);
bool visitMulNegativeZeroCheck(MulNegativeZeroCheck *ool);
bool visitModOverflowCheck(ModOverflowCheck *ool);
bool visitReturnZero(ReturnZero *ool);
bool visitOutOfLineTableSwitch(OutOfLineTableSwitch *ool);
bool generateInvalidateEpilogue();
--- a/js/src/jit/shared/MacroAssembler-x86-shared.h
+++ b/js/src/jit/shared/MacroAssembler-x86-shared.h
@@ -494,16 +494,45 @@ class MacroAssemblerX86Shared : public A
}
void loadUnalignedFloat32x4(const Address &src, FloatRegister dest) {
movups(Operand(src), dest);
}
void storeUnalignedFloat32x4(FloatRegister src, const Address &dest) {
movups(src, Operand(dest));
}
+ static uint32_t ComputeShuffleMask(SimdLane x, SimdLane y = LaneX,
+ SimdLane z = LaneX, SimdLane w = LaneX)
+ {
+ uint32_t r = (uint32_t(w) << 6) |
+ (uint32_t(z) << 4) |
+ (uint32_t(y) << 2) |
+ uint32_t(x);
+ JS_ASSERT(r < 256);
+ return r;
+ }
+
+ void shuffleInt32(uint32_t mask, FloatRegister src, FloatRegister dest) {
+ pshufd(mask, src, dest);
+ }
+ void moveLowInt32(FloatRegister src, Register dest) {
+ movd(src, dest);
+ }
+
+ void shuffleFloat32(uint32_t mask, FloatRegister src, FloatRegister dest) {
+ // The shuffle instruction on x86 is such that it moves 2 words from
+ // the dest and 2 words from the src operands. To simplify things, just
+ // clobber the output with the input and apply the instruction
+ // afterwards.
+ // Note: this is useAtStart-safe because src isn't read afterwards.
+ if (src != dest)
+ moveAlignedFloat32x4(src, dest);
+ shufps(mask, dest, dest);
+ }
+
void moveFloatAsDouble(Register src, FloatRegister dest) {
movd(src, dest);
cvtss2sd(dest, dest);
}
void loadFloatAsDouble(const Address &src, FloatRegister dest) {
movss(src, dest);
cvtss2sd(dest, dest);
}