bug 601135. switch x86/x64 to use cvttsd2di. add documentation (+r nick)
authorWerner Sharp (wsharp@adobe.com)
Wed, 03 Nov 2010 16:30:00 -0400
changeset 57697 3e76412d374912f45e00f05ddaa91040870bf397
parent 57696 104e20381ef1c09ded64ce4eeb3fcdb00d9c8796
child 57698 635225cbcaea36ad322fa6641a6775390c86d81a
push id1
push usershaver@mozilla.com
push dateTue, 04 Jan 2011 17:58:04 +0000
bugs601135
milestone2.0b8pre
bug 601135. switch x86/x64 to use cvttsd2di. add documentation (+r nick)
js/src/nanojit/LIRopcode.tbl
js/src/nanojit/NativeX64.cpp
js/src/nanojit/NativeX64.h
js/src/nanojit/Nativei386.cpp
js/src/nanojit/Nativei386.h
--- a/js/src/nanojit/LIRopcode.tbl
+++ b/js/src/nanojit/LIRopcode.tbl
@@ -304,17 +304,33 @@ OP___(cmovd,   107, Op3,  D,    1)  // c
 // Conversions
 //---------------------------------------------------------------------------
 OP_64(i2q,     108, Op1,  Q,    1)  // sign-extend int to quad
 OP_64(ui2uq,   109, Op1,  Q,    1)  // zero-extend unsigned int to unsigned quad
 OP_64(q2i,     110, Op1,  I,    1)  // truncate quad to int (removes the high 32 bits)
 
 OP___(i2d,     111, Op1,  D,    1)  // convert int to double
 OP___(ui2d,    112, Op1,  D,    1)  // convert unsigned int to double
-OP___(d2i,     113, Op1,  I,    1)  // convert double to int (no exceptions raised, platform rounding rules)
+
+// The rounding behavior of LIR_d2i is platform specific.
+//
+// Platform     Asm code		Behavior
+// --------     --------		--------
+// x86 w/ x87   fist            uses current FP control word (default is rounding)
+// x86 w/ SSE   cvttsd2si       performs round to zero (truncate)
+// x64 (SSE)    cvttsd2si       performs round to zero (truncate) 
+// PowerPC                      unsupported
+// ARM          ftosid          round to nearest
+// MIPS         trunc.w.d       performs round to zero (truncate)
+// SH4          frtc            performs round to zero (truncate)
+// SPARC        fdtoi           performs round to zero (truncate)
+//
+// round to zero examples:  1.9 -> 1, 1.1 -> 1, -1.1 -> -1, -1.9 -> -1
+// round to nearest examples: 1.9 -> 2, 1.1 -> 1, -1.1 -> -1, -1.9 -> -2
+OP___(d2i,     113, Op1,  I,    1)  // convert double to int (no exceptions raised)
 
 OP_64(dasq,    114, Op1,  Q,    1)  // interpret the bits of a double as a quad
 OP_64(qasd,    115, Op1,  D,    1)  // interpret the bits of a quad as a double
 
 //---------------------------------------------------------------------------
 // Overflow arithmetic
 //---------------------------------------------------------------------------
 // These all exit if overflow occurred.  The result is valid on either path.
--- a/js/src/nanojit/NativeX64.cpp
+++ b/js/src/nanojit/NativeX64.cpp
@@ -490,16 +490,17 @@ namespace nanojit
     void Assembler::MULSD(   R l, R r)  { emitprr(X64_mulsd,   l,r); asm_output("mulsd %s, %s",   RQ(l),RQ(r)); }
     void Assembler::ADDSD(   R l, R r)  { emitprr(X64_addsd,   l,r); asm_output("addsd %s, %s",   RQ(l),RQ(r)); }
     void Assembler::SUBSD(   R l, R r)  { emitprr(X64_subsd,   l,r); asm_output("subsd %s, %s",   RQ(l),RQ(r)); }
     void Assembler::CVTSQ2SD(R l, R r)  { emitprr(X64_cvtsq2sd,l,r); asm_output("cvtsq2sd %s, %s",RQ(l),RQ(r)); }
     void Assembler::CVTSI2SD(R l, R r)  { emitprr(X64_cvtsi2sd,l,r); asm_output("cvtsi2sd %s, %s",RQ(l),RL(r)); }
     void Assembler::CVTSS2SD(R l, R r)  { emitprr(X64_cvtss2sd,l,r); asm_output("cvtss2sd %s, %s",RQ(l),RL(r)); }
     void Assembler::CVTSD2SS(R l, R r)  { emitprr(X64_cvtsd2ss,l,r); asm_output("cvtsd2ss %s, %s",RL(l),RQ(r)); }
     void Assembler::CVTSD2SI(R l, R r)  { emitprr(X64_cvtsd2si,l,r); asm_output("cvtsd2si %s, %s",RL(l),RQ(r)); }
+    void Assembler::CVTTSD2SI(R l, R r) { emitprr(X64_cvttsd2si,l,r);asm_output("cvttsd2si %s, %s",RL(l),RQ(r));}
     void Assembler::UCOMISD( R l, R r)  { emitprr(X64_ucomisd, l,r); asm_output("ucomisd %s, %s", RQ(l),RQ(r)); }
     void Assembler::MOVQRX(  R l, R r)  { emitprr(X64_movqrx,  r,l); asm_output("movq %s, %s",    RQ(l),RQ(r)); } // Nb: r and l are deliberately reversed within the emitprr() call.
     void Assembler::MOVQXR(  R l, R r)  { emitprr(X64_movqxr,  l,r); asm_output("movq %s, %s",    RQ(l),RQ(r)); }
 
     // MOVI must not affect condition codes!
     void Assembler::MOVI(  R r, I32 i32)    { emitr_imm(X64_movi,  r,i32); asm_output("movl %s, %d",RL(r),i32); }
     void Assembler::ADDLRI(R r, I32 i32)    { emitr_imm(X64_addlri,r,i32); asm_output("addl %s, %d",RL(r),i32); }
     void Assembler::SUBLRI(R r, I32 i32)    { emitr_imm(X64_sublri,r,i32); asm_output("subl %s, %d",RL(r),i32); }
@@ -1140,17 +1141,17 @@ namespace nanojit
     }
 
     void Assembler::asm_d2i(LIns *ins) {
         LIns *a = ins->oprnd1();
         NanoAssert(ins->isI() && a->isD());
 
         Register rr = prepareResultReg(ins, GpRegs);
         Register rb = findRegFor(a, FpRegs);
-        CVTSD2SI(rr, rb);
+        CVTTSD2SI(rr, rb); 
         freeResourcesOf(ins);
     }
 
     void Assembler::asm_cmov(LIns *ins) {
         LIns* cond    = ins->oprnd1();
         LIns* iftrue  = ins->oprnd2();
         LIns* iffalse = ins->oprnd3();
         NanoAssert(cond->isCmp());
--- a/js/src/nanojit/NativeX64.h
+++ b/js/src/nanojit/NativeX64.h
@@ -196,17 +196,18 @@ namespace nanojit
         X64_cmplri  = 0xF881400000000003LL, // 32bit compare r,immI
         X64_cmpqri  = 0xF881480000000003LL, // 64bit compare r,int64(immI)
         X64_cmplr8  = 0x00F8834000000004LL, // 32bit compare r,imm8
         X64_cmpqr8  = 0x00F8834800000004LL, // 64bit compare r,int64(imm8)
         X64_cvtsi2sd= 0xC02A0F40F2000005LL, // convert int32 to double r = (double) b
         X64_cvtsq2sd= 0xC02A0F48F2000005LL, // convert int64 to double r = (double) b
         X64_cvtss2sd= 0xC05A0F40F3000005LL, // convert float to double r = (double) b
         X64_cvtsd2ss= 0xC05A0F40F2000005LL, // convert double to float r = (float) b
-        X64_cvtsd2si= 0xC02D0F40F2000005LL, // convert double to int32 r = (int32) b
+        X64_cvtsd2si= 0xC02D0F40F2000005LL, // convert double to int32 with rounding r = (int32) b
+        X64_cvttsd2si=0xC02C0F40F2000005LL, // convert double to int32 r = (int32) b
         X64_divsd   = 0xC05E0F40F2000005LL, // divide scalar double r /= b
         X64_mulsd   = 0xC0590F40F2000005LL, // multiply scalar double r *= b
         X64_addsd   = 0xC0580F40F2000005LL, // add scalar double r += b
         X64_idiv    = 0xF8F7400000000003LL, // 32bit signed div (rax = rdx:rax/r, rdx=rdx:rax%r)
         X64_imul    = 0xC0AF0F4000000004LL, // 32bit signed mul r *= b
         X64_imuli   = 0xC069400000000003LL, // 32bit signed mul r = b * immI
         X64_imul8   = 0x00C06B4000000004LL, // 32bit signed mul r = b * imm8
         X64_jmpi    = 0x0000000025FF0006LL, // jump *0(rip)
@@ -499,16 +500,17 @@ namespace nanojit
         void MULSD(Register l, Register r);\
         void ADDSD(Register l, Register r);\
         void SUBSD(Register l, Register r);\
         void CVTSQ2SD(Register l, Register r);\
         void CVTSI2SD(Register l, Register r);\
         void CVTSS2SD(Register l, Register r);\
         void CVTSD2SS(Register l, Register r);\
         void CVTSD2SI(Register l, Register r);\
+        void CVTTSD2SI(Register l, Register r);\
         void UCOMISD(Register l, Register r);\
         void MOVQRX(Register l, Register r);\
         void MOVQXR(Register l, Register r);\
         void MOVI(Register r, int32_t i32);\
         void ADDLRI(Register r, int32_t i32);\
         void SUBLRI(Register r, int32_t i32);\
         void ANDLRI(Register r, int32_t i32);\
         void ORLRI(Register r, int32_t i32);\
--- a/js/src/nanojit/Nativei386.cpp
+++ b/js/src/nanojit/Nativei386.cpp
@@ -713,16 +713,17 @@ namespace nanojit
     inline void Assembler::SSE_STQsib(I32 d, R rb, R ri, I32 scale, R rv) {
         count_stq();
         SSEsib(0x660fd6, rv, d, rb, ri, scale);
         asm_output("movq %d(%s+%s*%c),%s", d, gpn(rb), gpn(ri), SIBIDX(scale), gpn(rv));
     }
 
     inline void Assembler::SSE_CVTSI2SD(R xr, R gr)  { count_fpu(); SSE(0xf20f2a, xr, gr); asm_output("cvtsi2sd %s,%s", gpn(xr), gpn(gr)); }
     inline void Assembler::SSE_CVTSD2SI(R gr, R xr)  { count_fpu(); SSE(0xf20f2d, gr, xr); asm_output("cvtsd2si %s,%s", gpn(gr), gpn(xr)); }
+    inline void Assembler::SSE_CVTTSD2SI(R gr, R xr) { count_fpu(); SSE(0xf20f2c, gr, xr); asm_output("cvttsd2si %s,%s",gpn(gr), gpn(xr)); }
     inline void Assembler::SSE_CVTSD2SS(R xr, R gr)  { count_fpu(); SSE(0xf20f5a, xr, gr); asm_output("cvtsd2ss %s,%s", gpn(xr), gpn(gr)); }
     inline void Assembler::SSE_CVTSS2SD(R xr, R gr)  { count_fpu(); SSE(0xf30f5a, xr, gr); asm_output("cvtss2sd %s,%s", gpn(xr), gpn(gr)); }
     inline void Assembler::SSE_CVTDQ2PD(R d,  R r)   { count_fpu(); SSE(0xf30fe6, d,  r);  asm_output("cvtdq2pd %s,%s", gpn(d), gpn(r)); }
 
     // Move and zero-extend GP reg to XMM reg.
     inline void Assembler::SSE_MOVD(R d, R s) {
         count_mov();
         if (IsXmmReg(s)) {
@@ -2582,17 +2583,17 @@ namespace nanojit
 
     void Assembler::asm_d2i(LIns* ins)
     {
         LIns *lhs = ins->oprnd1();
 
         if (_config.i386_sse2) {
             Register rr = prepareResultReg(ins, GpRegs);
             Register ra = findRegFor(lhs, XmmRegs);
-            SSE_CVTSD2SI(rr, ra);
+            SSE_CVTTSD2SI(rr, ra); 
         } else {
             bool pop = !lhs->isInReg();
             findSpecificRegFor(lhs, FST0);
             if (ins->isInReg())
                 evict(ins);
             int d = findMemFor(ins);
             FIST(pop, d, FP);
         }
--- a/js/src/nanojit/Nativei386.h
+++ b/js/src/nanojit/Nativei386.h
@@ -396,16 +396,17 @@ namespace nanojit
         void SSE_LDQsib(Register r, int32_t d, Register rb, Register ri, int32_t scale); \
         void SSE_LDSSsib(Register r, int32_t d, Register rb, Register ri, int32_t scale); \
         void SSE_STSD(int32_t d, Register b, Register r); \
         void SSE_STQ( int32_t d, Register b, Register r); \
         void SSE_STSS(int32_t d, Register b, Register r); \
         void SSE_STQsib(int32_t d, Register rb, Register ri, int32_t scale, Register rv); \
         void SSE_CVTSI2SD(Register xr, Register gr); \
         void SSE_CVTSD2SI(Register gr, Register xr); \
+        void SSE_CVTTSD2SI(Register gr, Register xr); \
         void SSE_CVTSD2SS(Register xr, Register gr); \
         void SSE_CVTSS2SD(Register xr, Register gr); \
         void SSE_CVTDQ2PD(Register d, Register r); \
         void SSE_MOVD(Register d, Register s); \
         void SSE_MOVSD(Register rd, Register rs); \
         void SSE_ADDSD(Register rd, Register rs); \
         void SSE_ADDSDm(Register r, const double* addr); \
         void SSE_SUBSD(Register rd, Register rs); \