Bug 652054: tweak register allocation for 64-bit stores, r=njn
authorDavid Mandelin <dmandelin@mozilla.com>
Thu, 19 May 2011 10:55:36 -0700
changeset 77517 a9619f57c8a1fc99981e136e2588e45ed1388067
parent 77516 5f430b3d9aa4ac8c33a4123f3554a897d966a8ef
child 77518 f99e149090f5e16746e55878c65837df61a5939d
push id78
push userclegnitto@mozilla.com
push dateFri, 16 Dec 2011 17:32:24 +0000
treeherdermozilla-release@79d24e644fdd [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersnjn
bugs652054
milestone9.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 652054: tweak register allocation for 64-bit stores, r=njn
js/src/jit-test/tests/basic/bug652054.js
js/src/nanojit/NativeX64.cpp
new file mode 100644
--- /dev/null
+++ b/js/src/jit-test/tests/basic/bug652054.js
@@ -0,0 +1,55 @@
+var M4x4 = {};
+M4x4.mul = function M4x4_mul(a, b, r) {
+    a11 = a[0]
+    a21 = a[1]
+    a31 = a[2]
+    a12 = a[4]
+    a22 = a[5]
+    a32 = a[6]
+    a13 = a[8]
+    a23 = a[9]
+    a33 = a[10]
+    a14 = a[12]
+    a24 = a[13]
+    a34 = a[14]
+    b[3]
+    b[4]
+    b13 = b[8]
+    b23 = b[9]
+    b33 = b[10]
+    b43 = b[11]
+    r[8] = a11 * b13 + a12 * b23 + a13 * b33 + a14 * b43
+    r[9] = a21 * b13 + a22 * b23 + a23 * b33 + a24 * b43
+    r[10] = a31 * b13 + a32 * b23 + a33 * b33 + a34 * b43
+    return r;
+};
+M4x4.scale3 = function M4x4_scale3(x, y, z, m) {
+    m[0] *= x;
+    m[3] *= x;
+    m[4] *= y;
+    m[11] *= z;
+};
+M4x4.makeLookAt = function M4x4_makeLookAt() {
+    tm1 = Float32Array(16);
+    tm2 = Float32Array(16);
+    r = new Float32Array(16)
+    return M4x4.mul(tm1, tm2, r);
+};
+var jellyfish = {};
+jellyfish.order = [];
+function jellyfishInstance() {}
+jellyfishInstance.prototype.drawShadow = function () {
+    pMatrix = M4x4.makeLookAt();
+    M4x4.mul(M4x4.makeLookAt(), pMatrix, pMatrix);
+    M4x4.scale3(6, 180, 0, pMatrix);
+}
+function drawScene() {
+    jellyfish.order.push([0, 0])
+    jellyfish[0] = new jellyfishInstance()
+    for (var i = 0, j = 0; i < jellyfish.count, j < 30; ++j) {
+        jellyfish.order[i][0]
+        jellyfish[0].drawShadow();
+    }
+}
+drawScene();
+
--- a/js/src/nanojit/NativeX64.cpp
+++ b/js/src/nanojit/NativeX64.cpp
@@ -488,17 +488,17 @@ namespace nanojit
     void Assembler::XORPS(   R l, R r)  { emitrr(X64_xorps,    l,r); asm_output("xorps %s, %s",   RQ(l),RQ(r)); }
     void Assembler::DIVSD(   R l, R r)  { emitprr(X64_divsd,   l,r); asm_output("divsd %s, %s",   RQ(l),RQ(r)); }
     void Assembler::MULSD(   R l, R r)  { emitprr(X64_mulsd,   l,r); asm_output("mulsd %s, %s",   RQ(l),RQ(r)); }
     void Assembler::ADDSD(   R l, R r)  { emitprr(X64_addsd,   l,r); asm_output("addsd %s, %s",   RQ(l),RQ(r)); }
     void Assembler::SUBSD(   R l, R r)  { emitprr(X64_subsd,   l,r); asm_output("subsd %s, %s",   RQ(l),RQ(r)); }
     void Assembler::CVTSQ2SD(R l, R r)  { emitprr(X64_cvtsq2sd,l,r); asm_output("cvtsq2sd %s, %s",RQ(l),RQ(r)); }
     void Assembler::CVTSI2SD(R l, R r)  { emitprr(X64_cvtsi2sd,l,r); asm_output("cvtsi2sd %s, %s",RQ(l),RL(r)); }
     void Assembler::CVTSS2SD(R l, R r)  { emitprr(X64_cvtss2sd,l,r); asm_output("cvtss2sd %s, %s",RQ(l),RL(r)); }
-    void Assembler::CVTSD2SS(R l, R r)  { emitprr(X64_cvtsd2ss,l,r); asm_output("cvtsd2ss %s, %s",RL(l),RQ(r)); }
+    void Assembler::CVTSD2SS(R l, R r)  { emitprr(X64_cvtsd2ss,l,r); asm_output("cvtsd2ss %s, %s",RQ(l),RQ(r)); }
     void Assembler::CVTSD2SI(R l, R r)  { emitprr(X64_cvtsd2si,l,r); asm_output("cvtsd2si %s, %s",RL(l),RQ(r)); }
     void Assembler::CVTTSD2SI(R l, R r) { emitprr(X64_cvttsd2si,l,r);asm_output("cvttsd2si %s, %s",RL(l),RQ(r));}
     void Assembler::UCOMISD( R l, R r)  { emitprr(X64_ucomisd, l,r); asm_output("ucomisd %s, %s", RQ(l),RQ(r)); }
     void Assembler::MOVQRX(  R l, R r)  { emitprr(X64_movqrx,  r,l); asm_output("movq %s, %s",    RQ(l),RQ(r)); } // Nb: r and l are deliberately reversed within the emitprr() call.
     void Assembler::MOVQXR(  R l, R r)  { emitprr(X64_movqxr,  l,r); asm_output("movq %s, %s",    RQ(l),RQ(r)); }
 
     // MOVI must not affect condition codes!
     void Assembler::MOVI(  R r, I32 i32)    { emitr_imm(X64_movi,  r,i32); asm_output("movl %s, %d",RL(r),i32); }
@@ -1722,19 +1722,22 @@ namespace nanojit
             }
             case LIR_std: {
                 Register b = getBaseReg(base, d, BaseRegs);
                 Register r = findRegFor(value, FpRegs);
                 MOVSDMR(r, d, b);   // xmm store
                 break;
             }
             case LIR_std2f: {
-                Register b = getBaseReg(base, d, BaseRegs);
                 Register r = findRegFor(value, FpRegs);
                 Register t = registerAllocTmp(FpRegs & ~rmask(r));
+                // Here, it is safe to call getBaseReg after registerAllocTmp
+                // because BaseRegs does not overlap with FpRegs, so getBaseReg
+                // will not allocate register |t|.
+                Register b = getBaseReg(base, d, BaseRegs);
 
                 MOVSSMR(t, d, b);   // store
                 CVTSD2SS(t, r);     // cvt to single-precision
                 XORPS(t);           // break dependency chains
                 break;
             }
             default:
                 NanoAssertMsg(0, "asm_store64 should never receive this LIR opcode");