[JAEGER] Use SSE4.1 when available to load doubles faster.
b=582785, r=dvander.
--- a/js/src/assembler/assembler/MacroAssemblerX86Common.cpp
+++ b/js/src/assembler/assembler/MacroAssemblerX86Common.cpp
@@ -1,11 +1,51 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
+ * vim: set ts=4 sw=4 et tw=99:
+ *
+ * ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is Mozilla SpiderMonkey JavaScript 1.9 code, released
+ * May 28, 2008.
+ *
+ * The Initial Developer of the Original Code is
+ * Mozilla Corporation
+ *
+ * Contributor(s):
+ * Alex Miller <amiller@mozilla.com>
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either of the GNU General Public License Version 2 or later (the "GPL"),
+ * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
#include "assembler/wtf/Platform.h"
-#if WTF_CPU_X86 && !WTF_PLATFORM_MAC
+/* SSE checks only make sense on Intel platforms. */
+#if WTF_CPU_X86 || WTF_CPU_X86_64
#include "MacroAssemblerX86Common.h"
using namespace JSC;
+MacroAssemblerX86Common::SSECheckState MacroAssemblerX86Common::s_sseCheckState = NotCheckedSSE;
-MacroAssemblerX86Common::SSE2CheckState MacroAssemblerX86Common::s_sse2CheckState = NotCheckedSSE2;
+#endif /* WTF_CPU_X86 || WTF_CPU_X86_64 */
-#endif
--- a/js/src/assembler/assembler/MacroAssemblerX86Common.h
+++ b/js/src/assembler/assembler/MacroAssemblerX86Common.h
@@ -1069,89 +1069,126 @@ public:
if (mask.m_value == -1)
m_assembler.cmpl_im(0, address.offset, address.base);
else
m_assembler.testl_i32m(mask.m_value, address.offset, address.base);
m_assembler.setCC_r(x86Condition(cond), dest);
m_assembler.movzbl_rr(dest, dest);
}
+ enum SSECheckState {
+ NotCheckedSSE = 0,
+ NoSSE = 1,
+ HasSSE2 = 2,
+ HasSSE4_1 = 3 // implies HasSSE2
+ };
+
+ static SSECheckState getSSEState()
+ {
+ if (s_sseCheckState == NotCheckedSSE) {
+ MacroAssemblerX86Common::setSSECheckState();
+ }
+ // Only check once.
+ ASSERT(s_sseCheckState != NotCheckedSSE);
+
+ return s_sseCheckState;
+ }
+
protected:
X86Assembler::Condition x86Condition(Condition cond)
{
return static_cast<X86Assembler::Condition>(cond);
}
private:
- // Only MacroAssemblerX86 should be using the following method; SSE2 is always available on
- // x86_64, and clients & subclasses of MacroAssembler should be using 'supportsFloatingPoint()'.
friend class MacroAssemblerX86;
+ static SSECheckState s_sseCheckState;
+
+ static void setSSECheckState()
+ {
+ // Default the flags value to zero; if the compiler is
+ // not MSVC or GCC we will read this as SSE2 not present.
+ int flags_edx = 0;
+ int flags_ecx = 0;
+#if WTF_COMPILER_MSVC
+ _asm {
+ mov eax, 1 // cpuid function 1 gives us the standard feature set
+ cpuid;
+ mov flags_ecx, ecx;
+ mov flags_edx, edx;
+ }
+#elif WTF_COMPILER_GCC
+ asm (
+ "movl $0x1, %%eax;"
+ "pushl %%ebx;"
+ "cpuid;"
+ "popl %%ebx;"
+ "movl %%ecx, %0;"
+ "movl %%edx, %1;"
+ : "=g" (flags_ecx), "=g" (flags_edx)
+ :
+ : "%eax", "%ecx", "%edx"
+ );
+#endif
+ static const int SSE2FeatureBit = 1 << 26;
+ static const int SSE41FeatureBit = 1 << 19;
+ if (flags_ecx & SSE41FeatureBit)
+ s_sseCheckState = HasSSE4_1;
+ else if (flags_edx & SSE2FeatureBit)
+ s_sseCheckState = HasSSE2;
+ else
+ s_sseCheckState = NoSSE;
+ }
+
#if WTF_CPU_X86
#if WTF_PLATFORM_MAC
// All X86 Macs are guaranteed to support at least SSE2,
static bool isSSE2Present()
{
return true;
}
#else // PLATFORM(MAC)
- enum SSE2CheckState {
- NotCheckedSSE2,
- HasSSE2,
- NoSSE2
- };
-
static bool isSSE2Present()
{
- if (s_sse2CheckState == NotCheckedSSE2) {
- // Default the flags value to zero; if the compiler is
- // not MSVC or GCC we will read this as SSE2 not present.
- int flags = 0;
-#if WTF_COMPILER_MSVC
- _asm {
- mov eax, 1 // cpuid function 1 gives us the standard feature set
- cpuid;
- mov flags, edx;
- }
-#elif WTF_COMPILER_GCC
- asm (
- "movl $0x1, %%eax;"
- "pushl %%ebx;"
- "cpuid;"
- "popl %%ebx;"
- "movl %%edx, %0;"
- : "=g" (flags)
- :
- : "%eax", "%ecx", "%edx"
- );
-#endif
- static const int SSE2FeatureBit = 1 << 26;
- s_sse2CheckState = (flags & SSE2FeatureBit) ? HasSSE2 : NoSSE2;
+ if (s_sseCheckState == NotCheckedSSE) {
+ setSSECheckState();
}
// Only check once.
- ASSERT(s_sse2CheckState != NotCheckedSSE2);
+ ASSERT(s_sseCheckState != NotCheckedSSE);
- return s_sse2CheckState == HasSSE2;
+ return s_sseCheckState >= HasSSE2;
}
- static SSE2CheckState s_sse2CheckState;
#endif // PLATFORM(MAC)
#elif !defined(NDEBUG) // CPU(X86)
// On x86-64 we should never be checking for SSE2 in a non-debug build,
// but non debug add this method to keep the asserts above happy.
static bool isSSE2Present()
{
return true;
}
#endif
+
+ static bool isSSE41Present()
+ {
+ if (s_sseCheckState == NotCheckedSSE) {
+ setSSECheckState();
+ }
+ // Only check once.
+ ASSERT(s_sseCheckState != NotCheckedSSE);
+
+ return s_sseCheckState == HasSSE4_1;
+ }
+
};
} // namespace JSC
#endif // ENABLE(ASSEMBLER)
#endif // MacroAssemblerX86Common_h
--- a/js/src/assembler/assembler/X86Assembler.h
+++ b/js/src/assembler/assembler/X86Assembler.h
@@ -176,16 +176,17 @@ public:
private:
typedef enum {
OP_ADD_EvGv = 0x01,
OP_ADD_GvEv = 0x03,
OP_OR_EvGv = 0x09,
OP_OR_GvEv = 0x0B,
OP_2BYTE_ESCAPE = 0x0F,
+ OP_3BYTE_ESCAPE = 0x3A,
OP_AND_EvGv = 0x21,
OP_AND_GvEv = 0x23,
OP_SUB_EvGv = 0x29,
OP_SUB_GvEv = 0x2B,
PRE_PREDICT_BRANCH_NOT_TAKEN = 0x2E,
OP_XOR_EvGv = 0x31,
OP_XOR_GvEv = 0x33,
OP_CMP_EvGv = 0x39,
@@ -253,16 +254,20 @@ private:
OP2_JCC_rel32 = 0x80,
OP_SETCC = 0x90,
OP2_IMUL_GvEv = 0xAF,
OP2_MOVZX_GvEb = 0xB6,
OP2_MOVZX_GvEw = 0xB7,
OP2_PEXTRW_GdUdIb = 0xC5
} TwoByteOpcodeID;
+ typedef enum {
+ OP3_PINSRD_VsdWsd = 0x22
+ } ThreeByteOpcodeID;
+
TwoByteOpcodeID jccRel32(Condition cond)
{
return (TwoByteOpcodeID)(OP2_JCC_rel32 + cond);
}
TwoByteOpcodeID setccOpcode(Condition cond)
{
return (TwoByteOpcodeID)(OP_SETCC + cond);
@@ -2001,16 +2006,26 @@ public:
{
js::JaegerSpew(js::JSpew_Insns,
IPFX "sqrtsd %s, %s\n", MAYBE_PAD,
nameFPReg(src), nameFPReg(dst));
m_formatter.prefix(PRE_SSE_F2);
m_formatter.twoByteOp(OP2_SQRTSD_VsdWsd, (RegisterID)dst, (RegisterID)src);
}
+ void pinsrd_rr(RegisterID src, XMMRegisterID dst)
+ {
+ js::JaegerSpew(js::JSpew_Insns,
+ IPFX "pinsrd $1, %s, %s\n", MAYBE_PAD,
+ nameIReg(src), nameFPReg(dst));
+ m_formatter.prefix(PRE_SSE_66);
+ m_formatter.threeByteOp(OP3_PINSRD_VsdWsd, (RegisterID)dst, (RegisterID)src);
+ m_formatter.immediate8(0x01); // the $1
+ }
+
// Misc instructions:
void int3()
{
js::JaegerSpew(js::JSpew_Insns, IPFX "int3\n", MAYBE_PAD);
m_formatter.oneByteOp(OP_INT3);
}
@@ -2356,16 +2371,26 @@ private:
{
m_buffer.ensureSpace(maxInstructionSize);
m_buffer.putByteUnchecked(OP_2BYTE_ESCAPE);
m_buffer.putByteUnchecked(opcode);
memoryModRM(reg, address);
}
#endif
+ void threeByteOp(ThreeByteOpcodeID opcode, int reg, RegisterID rm)
+ {
+ m_buffer.ensureSpace(maxInstructionSize);
+ emitRexIfNeeded(reg, 0, rm);
+ m_buffer.putByteUnchecked(OP_2BYTE_ESCAPE);
+ m_buffer.putByteUnchecked(OP_3BYTE_ESCAPE);
+ m_buffer.putByteUnchecked(opcode);
+ registerModRM(reg, rm);
+ }
+
#if WTF_CPU_X86_64
// Quad-word-sized operands:
//
// Used to format 64-bit operantions, planting a REX.w prefix.
// When planting d64 or f64 instructions, not requiring a REX.w prefix,
// the normal (non-'64'-postfixed) formatters should be used.
void oneByteOp64(OneByteOpcodeID opcode)
--- a/js/src/methodjit/BaseAssembler.h
+++ b/js/src/methodjit/BaseAssembler.h
@@ -32,16 +32,17 @@
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
+
#if !defined jsjaeger_baseassembler_h__ && defined JS_METHODJIT
#define jsjaeger_baseassembler_h__
#include "jscntxt.h"
#include "jstl.h"
#include "assembler/assembler/MacroAssemblerCodeRef.h"
#include "assembler/assembler/MacroAssembler.h"
#include "assembler/assembler/RepatchBuffer.h"
@@ -176,19 +177,24 @@ static const JSC::MacroAssembler::Regist
#ifdef JS_CPU_X86
void idiv(RegisterID reg) {
m_assembler.cdq();
m_assembler.idivl_r(reg);
}
void fastLoadDouble(RegisterID lo, RegisterID hi, FPRegisterID fpReg) {
- m_assembler.movd_rr(lo, fpReg);
- m_assembler.movd_rr(hi, FPRegisters::Temp0);
- m_assembler.unpcklps_rr(FPRegisters::Temp0, fpReg);
+ if (MacroAssemblerX86Common::getSSEState() >= HasSSE4_1) {
+ m_assembler.movd_rr(lo, fpReg);
+ m_assembler.pinsrd_rr(hi, fpReg);
+ } else {
+ m_assembler.movd_rr(lo, fpReg);
+ m_assembler.movd_rr(hi, FPRegisters::Temp0);
+ m_assembler.unpcklps_rr(FPRegisters::Temp0, fpReg);
+ }
}
#endif
/*
* Prepares for a stub call.
*/
void * getCallTarget(void *fun) {
#ifdef JS_CPU_ARM