Bug 1279248 - Part 21: Implement the 64bit variant of ToFloatingPoint on x86, r=sunfish
authorHannes Verschore <hv1989@gmail.com>
Fri, 29 Jul 2016 16:53:47 +0200
changeset 349412 0c40b01a4cba292b08577a71d5305325468a9c03
parent 349411 cb47a62a37a46a6081031a8d2fc78770ea4ba3e6
child 349413 c63714ab5d4d33a58bb1dc68e6e5f8cf80d7b39b
push id1230
push userjlund@mozilla.com
push dateMon, 31 Oct 2016 18:13:35 +0000
treeherdermozilla-release@5e06e3766db2 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerssunfish
bugs1279248
milestone50.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1279248 - Part 21: Implement the 64bit variant of ToFloatingPoint on x86, r=sunfish
js/src/jit/x64/LIR-x64.h
js/src/jit/x86-shared/BaseAssembler-x86-shared.h
js/src/jit/x86-shared/Encoding-x86-shared.h
js/src/jit/x86-shared/LIR-x86-shared.h
js/src/jit/x86/Assembler-x86.h
js/src/jit/x86/BaseAssembler-x86.h
js/src/jit/x86/CodeGenerator-x86.cpp
js/src/jit/x86/CodeGenerator-x86.h
js/src/jit/x86/LOpcodes-x86.h
js/src/jit/x86/Lowering-x86.cpp
--- a/js/src/jit/x64/LIR-x64.h
+++ b/js/src/jit/x64/LIR-x64.h
@@ -159,30 +159,12 @@ class LWasmTruncateToInt64 : public LIns
         return mir_->toWasmTruncateToInt64();
     }
 
     const LDefinition* temp() {
         return getTemp(0);
     }
 };
 
-class LInt64ToFloatingPoint : public LInstructionHelper<1, INT64_PIECES, 0>
-{
-  public:
-    LIR_HEADER(Int64ToFloatingPoint);
-
-    explicit LInt64ToFloatingPoint(const LInt64Allocation& in) {
-        setInt64Operand(0, in);
-    }
-
-    MInt64ToFloatingPoint* mir() const {
-        return mir_->toInt64ToFloatingPoint();
-    }
-
-    const LDefinition* temp() {
-        return getTemp(0);
-    }
-};
-
 } // namespace jit
 } // namespace js
 
 #endif /* jit_x64_LIR_x64_h */
--- a/js/src/jit/x86-shared/BaseAssembler-x86-shared.h
+++ b/js/src/jit/x86-shared/BaseAssembler-x86-shared.h
@@ -1005,16 +1005,22 @@ public:
         spew("fld        " MEM_ob, ADDR_ob(offset, base));
         m_formatter.oneByteOp(OP_FPU6, offset, base, FPU6_OP_FLD);
     }
     void fld32_m(int32_t offset, RegisterID base)
     {
         spew("fld        " MEM_ob, ADDR_ob(offset, base));
         m_formatter.oneByteOp(OP_FPU6_F32, offset, base, FPU6_OP_FLD);
     }
+    void faddp()
+    {
+        spew("addp       ");
+        m_formatter.oneByteOp(OP_FPU6_ADDP);
+        m_formatter.oneByteOp(OP_ADDP_ST0_ST1);
+    }
     void fisttp_m(int32_t offset, RegisterID base)
     {
         spew("fisttp     " MEM_ob, ADDR_ob(offset, base));
         m_formatter.oneByteOp(OP_FPU6, offset, base, FPU6_OP_FISTTP);
     }
     void fistp_m(int32_t offset, RegisterID base)
     {
         spew("fistp      " MEM_ob, ADDR_ob(offset, base));
--- a/js/src/jit/x86-shared/Encoding-x86-shared.h
+++ b/js/src/jit/x86-shared/Encoding-x86-shared.h
@@ -125,27 +125,29 @@ enum OneByteOpcodeID {
     OP_CDQ                          = 0x99,
     OP_MOV_EAXOv                    = 0xA1,
     OP_MOV_OvEAX                    = 0xA3,
     OP_TEST_EAXIb                   = 0xA8,
     OP_TEST_EAXIv                   = 0xA9,
     OP_MOV_EbIb                     = 0xB0,
     OP_MOV_EAXIv                    = 0xB8,
     OP_GROUP2_EvIb                  = 0xC1,
+    OP_ADDP_ST0_ST1                 = 0xC1,
     OP_RET_Iz                       = 0xC2,
     PRE_VEX_C4                      = 0xC4,
     PRE_VEX_C5                      = 0xC5,
     OP_RET                          = 0xC3,
     OP_GROUP11_EvIb                 = 0xC6,
     OP_GROUP11_EvIz                 = 0xC7,
     OP_INT3                         = 0xCC,
     OP_GROUP2_Ev1                   = 0xD1,
     OP_GROUP2_EvCL                  = 0xD3,
     OP_FPU6                         = 0xDD,
     OP_FPU6_F32                     = 0xD9,
+    OP_FPU6_ADDP                    = 0xDE,
     OP_FILD                         = 0xDF,
     OP_CALL_rel32                   = 0xE8,
     OP_JMP_rel32                    = 0xE9,
     OP_JMP_rel8                     = 0xEB,
     PRE_LOCK                        = 0xF0,
     PRE_SSE_F2                      = 0xF2,
     PRE_SSE_F3                      = 0xF3,
     OP_HLT                          = 0xF4,
@@ -372,16 +374,18 @@ enum GroupOpcodeID {
     GROUP3_OP_IDIV = 7,
 
     GROUP5_OP_INC   = 0,
     GROUP5_OP_DEC   = 1,
     GROUP5_OP_CALLN = 2,
     GROUP5_OP_JMPN  = 4,
     GROUP5_OP_PUSH  = 6,
 
+    FILD_OP_64      = 5,
+
     FPU6_OP_FLD     = 0,
     FPU6_OP_FISTTP  = 1,
     FPU6_OP_FSTP    = 3,
     FPU6_OP_FLDCW   = 5,
     FPU6_OP_FISTP   = 7,
 
     GROUP11_MOV = 0
 };
--- a/js/src/jit/x86-shared/LIR-x86-shared.h
+++ b/js/src/jit/x86-shared/LIR-x86-shared.h
@@ -380,12 +380,26 @@ class LSimdValueFloat32x4 : public LInst
         setTemp(0, copyY);
     }
 
     MSimdValueX4* mir() const {
         return mir_->toSimdValueX4();
     }
 };
 
+class LInt64ToFloatingPoint : public LInstructionHelper<1, INT64_PIECES, 0>
+{
+  public:
+    LIR_HEADER(Int64ToFloatingPoint);
+
+    explicit LInt64ToFloatingPoint(const LInt64Allocation& in) {
+        setInt64Operand(0, in);
+    }
+
+    MInt64ToFloatingPoint* mir() const {
+        return mir_->toInt64ToFloatingPoint();
+    }
+};
+
 } // namespace jit
 } // namespace js
 
 #endif /* jit_x86_shared_LIR_x86_shared_h */
--- a/js/src/jit/x86/Assembler-x86.h
+++ b/js/src/jit/x86/Assembler-x86.h
@@ -340,16 +340,19 @@ class Assembler : public AssemblerX86Sha
         switch (src.kind()) {
           case Operand::MEM_REG_DISP:
             masm.fstp32_m(src.disp(), src.base());
             break;
           default:
             MOZ_CRASH("unexpected operand kind");
         }
     }
+    void faddp() {
+        masm.faddp();
+    }
 
     void cmpl(ImmWord rhs, Register lhs) {
         masm.cmpl_ir(rhs.value, lhs.encoding());
     }
     void cmpl(ImmPtr rhs, Register lhs) {
         cmpl(ImmWord(uintptr_t(rhs.value)), lhs);
     }
     void cmpl(ImmGCPtr rhs, Register lhs) {
@@ -451,16 +454,26 @@ class Assembler : public AssemblerX86Sha
           case Operand::MEM_ADDRESS32:
             masm.vpunpckldq_mr(src1.address(), src0.encoding(), dest.encoding());
             break;
           default:
             MOZ_CRASH("unexpected operand kind");
         }
     }
 
+    void fild(const Operand& src) {
+        switch (src.kind()) {
+          case Operand::MEM_REG_DISP:
+            masm.fild_m(src.disp(), src.base());
+            break;
+          default:
+            MOZ_CRASH("unexpected operand kind");
+        }
+    }
+
     void jmp(ImmPtr target, Relocation::Kind reloc = Relocation::HARDCODED) {
         JmpSrc src = masm.jmp();
         addPendingJump(src, target, reloc);
     }
     void j(Condition cond, ImmPtr target,
            Relocation::Kind reloc = Relocation::HARDCODED) {
         JmpSrc src = masm.jCC(static_cast<X86Encoding::Condition>(cond));
         addPendingJump(src, target, reloc);
--- a/js/src/jit/x86/BaseAssembler-x86.h
+++ b/js/src/jit/x86/BaseAssembler-x86.h
@@ -168,16 +168,21 @@ class BaseAssemblerX86 : public BaseAsse
     {
         twoByteOpSimd("vpunpckldq", VEX_PD, OP2_PUNPCKLDQ, offset, base, src0, dst);
     }
     void vpunpckldq_mr(const void* addr, XMMRegisterID src0, XMMRegisterID dst)
     {
         twoByteOpSimd("vpunpckldq", VEX_PD, OP2_PUNPCKLDQ, addr, src0, dst);
     }
 
+    void fild_m(int32_t offset, RegisterID base)
+    {
+        m_formatter.oneByteOp(OP_FILD, offset, base, FILD_OP_64);
+    }
+
     // Misc instructions:
 
     void pusha()
     {
         spew("pusha");
         m_formatter.oneByteOp(OP_PUSHA);
     }
 
--- a/js/src/jit/x86/CodeGenerator-x86.cpp
+++ b/js/src/jit/x86/CodeGenerator-x86.cpp
@@ -1552,8 +1552,45 @@ CodeGeneratorX86::visitWasmTruncateToInt
             masm.truncateDoubleToInt64(Address(esp, 0), Address(esp, 0), temp);
     }
 
     // Load value into float register.
     masm.load64(Address(esp, 0), output);
 
     masm.freeStack(2*sizeof(int32_t));
 }
+
+void
+CodeGeneratorX86::visitInt64ToFloatingPoint(LInt64ToFloatingPoint* lir)
+{
+    Register64 input = ToRegister64(lir->getInt64Operand(0));
+    FloatRegister output = ToFloatRegister(lir->output());
+
+    MIRType outputType = lir->mir()->type();
+    MOZ_ASSERT(outputType == MIRType::Double || outputType == MIRType::Float32);
+
+    // Zero the output register to break dependencies, see convertInt32ToDouble.
+    if (outputType == MIRType::Double)
+        masm.zeroDouble(output);
+    else
+        masm.zeroFloat32(output);
+
+    masm.Push(input.high);
+    masm.Push(input.low);
+    masm.fild(Operand(esp, 0));
+
+    if (lir->mir()->isUnsigned()) {
+        Label notNegative;
+        masm.branch32(Assembler::NotSigned, input.high, Imm32(0), &notNegative);
+        double add_constant = 18446744073709551616.0; // 2^64
+        masm.store64(Imm64(mozilla::BitwiseCast<uint64_t>(add_constant)), Address(esp, 0));
+        masm.fld(Operand(esp, 0));
+        masm.faddp();
+        masm.bind(&notNegative);
+    }
+
+    masm.fstp(Operand(esp, 0));
+    masm.vmovsd(Address(esp, 0), output);
+    masm.freeStack(2*sizeof(intptr_t));
+
+    if (outputType == MIRType::Float32)
+        masm.convertDoubleToFloat32(output, output);
+}
--- a/js/src/jit/x86/CodeGenerator-x86.h
+++ b/js/src/jit/x86/CodeGenerator-x86.h
@@ -81,16 +81,17 @@ class CodeGeneratorX86 : public CodeGene
     void visitAsmReinterpretFromI64(LAsmReinterpretFromI64* lir);
     void visitAsmReinterpretToI64(LAsmReinterpretToI64* lir);
     void visitExtendInt32ToInt64(LExtendInt32ToInt64* lir);
     void visitWrapInt64ToInt32(LWrapInt64ToInt32* lir);
     void visitClzI64(LClzI64* lir);
     void visitCtzI64(LCtzI64* lir);
     void visitNotI64(LNotI64* lir);
     void visitWasmTruncateToInt64(LWasmTruncateToInt64* lir);
+    void visitInt64ToFloatingPoint(LInt64ToFloatingPoint* lir);
 
   private:
     void asmJSAtomicComputeAddress(Register addrTemp, Register ptrReg,
                                    const MWasmMemoryAccess* access);
 };
 
 typedef CodeGeneratorX86 CodeGeneratorSpecific;
 
--- a/js/src/jit/x86/LOpcodes-x86.h
+++ b/js/src/jit/x86/LOpcodes-x86.h
@@ -13,11 +13,12 @@
     _(BoxFloatingPoint)         \
     _(DivOrModConstantI)        \
     _(SimdValueInt32x4)         \
     _(SimdValueFloat32x4)       \
     _(UDivOrMod)                \
     _(UDivOrModConstant)        \
     _(UDivOrModI64)             \
     _(DivOrModI64)              \
-    _(WasmTruncateToInt64)
+    _(WasmTruncateToInt64)      \
+    _(Int64ToFloatingPoint)
 
 #endif /* jit_x86_LOpcodes_x86_h */
--- a/js/src/jit/x86/Lowering-x86.cpp
+++ b/js/src/jit/x86/Lowering-x86.cpp
@@ -596,17 +596,21 @@ LIRGeneratorX86::visitWasmTruncateToInt6
     LDefinition temp2 = tempDouble();
     LDefinition maybeTemp = ins->isUnsigned() ? tempDouble() : LDefinition::BogusTemp();
     defineInt64(new(alloc()) LWasmTruncateToInt64(useRegister(opd), temp1, temp2, maybeTemp), ins);
 }
 
 void
 LIRGeneratorX86::visitInt64ToFloatingPoint(MInt64ToFloatingPoint* ins)
 {
-    MOZ_CRASH("NY");
+    MDefinition* opd = ins->input();
+    MOZ_ASSERT(opd->type() == MIRType::Int64);
+    MOZ_ASSERT(IsFloatingPointType(ins->type()));
+
+    define(new(alloc()) LInt64ToFloatingPoint(useInt64Register(opd)), ins);
 }
 
 void
 LIRGeneratorX86::visitExtendInt32ToInt64(MExtendInt32ToInt64* ins)
 {
     if (ins->isUnsigned()) {
         defineInt64(new(alloc()) LExtendInt32ToInt64(useRegisterAtStart(ins->input())), ins);
     } else {