Bug 869510 - x86 assembler spew improvements. r=sstangl
authorDan Gohman <sunfish@google.com>
Tue, 07 May 2013 11:39:33 -0700
changeset 142098 880dc52ab5192306c14a84137057413a8d35a9ae
parent 142097 e3dcea95f1613007334844001faf1980e428dc52
child 142099 05762bbaf26800ed44e9020540b6397a7700e1b0
push id2579
push userakeybl@mozilla.com
push dateMon, 24 Jun 2013 18:52:47 +0000
treeherdermozilla-beta@b69b7de8a05a [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewerssstangl
bugs869510
milestone23.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 869510 - x86 assembler spew improvements. r=sstangl
js/src/assembler/assembler/X86Assembler.h
--- a/js/src/assembler/assembler/X86Assembler.h
+++ b/js/src/assembler/assembler/X86Assembler.h
@@ -500,17 +500,17 @@ public:
         } else {
             m_formatter.oneByteOp(OP_GROUP1_EvIz, GROUP1_OP_ADD, dst);
             m_formatter.immediate32(imm);
         }
     }
 
     void addl_im(int imm, int offset, RegisterID base)
     {
-        spew("addl       %d, %s0x%x(%s)",
+        spew("addl       $%d, %s0x%x(%s)",
              imm, PRETTY_PRINT_OFFSET(offset), nameIReg(8,base));
         if (CAN_SIGN_EXTEND_8_32(imm)) {
             m_formatter.oneByteOp(OP_GROUP1_EvIb, GROUP1_OP_ADD, base, offset);
             m_formatter.immediate8(imm);
         } else {
             m_formatter.oneByteOp(OP_GROUP1_EvIz, GROUP1_OP_ADD, base, offset);
             m_formatter.immediate32(imm);
         }
@@ -901,17 +901,17 @@ public:
         } else {
             m_formatter.oneByteOp(OP_GROUP1_EvIz, GROUP1_OP_XOR, base, offset);
             m_formatter.immediate32(imm);
         }
     }
 
     void xorl_ir(int imm, RegisterID dst)
     {
-        spew("xorl       %d, %s",
+        spew("xorl       $%d, %s",
              imm, nameIReg(4,dst));
         if (CAN_SIGN_EXTEND_8_32(imm)) {
             m_formatter.oneByteOp(OP_GROUP1_EvIb, GROUP1_OP_XOR, dst);
             m_formatter.immediate8(imm);
         } else {
             m_formatter.oneByteOp(OP_GROUP1_EvIz, GROUP1_OP_XOR, dst);
             m_formatter.immediate32(imm);
         }
@@ -922,17 +922,17 @@ public:
     {
         spew("xorq       %s, %s",
              nameIReg(8,src), nameIReg(8, dst));
         m_formatter.oneByteOp64(OP_XOR_EvGv, src, dst);
     }
 
     void xorq_ir(int imm, RegisterID dst)
     {
-        spew("xorq       %d, %s",
+        spew("xorq       $%d, %s",
              imm, nameIReg(8,dst));
         if (CAN_SIGN_EXTEND_8_32(imm)) {
             m_formatter.oneByteOp64(OP_GROUP1_EvIb, GROUP1_OP_XOR, dst);
             m_formatter.immediate8(imm);
         } else {
             m_formatter.oneByteOp64(OP_GROUP1_EvIz, GROUP1_OP_XOR, dst);
             m_formatter.immediate32(imm);
         }
@@ -1041,17 +1041,17 @@ public:
     {
         spew("imull       %s0x%x(%s), %s",
              PRETTY_PRINT_OFFSET(offset), nameIReg(4,base), nameIReg(4,dst));
         m_formatter.twoByteOp(OP2_IMUL_GvEv, dst, base, offset);
     }
 
     void imull_i32r(RegisterID src, int32_t value, RegisterID dst)
     {
-        spew("imull      %d, %s, %s",
+        spew("imull      $%d, %s, %s",
              value, nameIReg(4, src), nameIReg(4, dst));
         m_formatter.oneByteOp(OP_IMUL_GvEvIz, dst, src);
         m_formatter.immediate32(value);
     }
 
     void idivl_r(RegisterID divisor)
     {
         spew("idivl      %s", 
@@ -1130,18 +1130,18 @@ public:
     void cmpb_im(int imm, int offset, RegisterID base, RegisterID index, int scale)
     {
         m_formatter.oneByteOp(OP_GROUP1_EbIb, GROUP1_OP_CMP, base, index, scale, offset);
         m_formatter.immediate8(imm);
     }
 
     void cmpl_im(int imm, int offset, RegisterID base, RegisterID index, int scale)
     {
-        spew("cmpl       %d, %d(%s,%s,%d)",
-             imm, offset, nameIReg(4,base), nameIReg(4,index), scale);
+        spew("cmpl       $%d, %d(%s,%s,%d)",
+             imm, offset, nameIReg(4,base), nameIReg(4,index), 1<<scale);
         if (CAN_SIGN_EXTEND_8_32(imm)) {
             m_formatter.oneByteOp(OP_GROUP1_EvIb, GROUP1_OP_CMP, base, index, scale, offset);
             m_formatter.immediate8(imm);
         } else {
             m_formatter.oneByteOp(OP_GROUP1_EvIz, GROUP1_OP_CMP, base, index, scale, offset);
             m_formatter.immediate32(imm);
         }
     }
@@ -1173,17 +1173,17 @@ public:
     {
         spew("cmpq       %d(%s), %s",
              offset, nameIReg(8, base), nameIReg(8, src));
         m_formatter.oneByteOp64(OP_CMP_GvEv, src, base, offset);
     }
 
     void cmpq_ir(int imm, RegisterID dst)
     {
-        spew("cmpq       %d, %s",
+        spew("cmpq       $%d, %s",
              imm, nameIReg(8, dst));
         if (CAN_SIGN_EXTEND_8_32(imm)) {
             m_formatter.oneByteOp64(OP_GROUP1_EvIb, GROUP1_OP_CMP, dst);
             m_formatter.immediate8(imm);
         } else {
             m_formatter.oneByteOp64(OP_GROUP1_EvIz, GROUP1_OP_CMP, dst);
             m_formatter.immediate32(imm);
         }
@@ -1438,25 +1438,25 @@ public:
     {
         FIXME_INSN_PRINTING;
         m_formatter.oneByteOp_disp32(OP_MOV_EvGv, src, base, offset);
     }
 
     void movw_rm(RegisterID src, int offset, RegisterID base, RegisterID index, int scale)
     {
         spew("movw       %s, %d(%s,%s,%d)", 
-             nameIReg(2, src), offset, nameIReg(base), nameIReg(index), scale);
+             nameIReg(2, src), offset, nameIReg(base), nameIReg(index), 1<<scale);
         m_formatter.prefix(PRE_OPERAND_SIZE);
         m_formatter.oneByteOp(OP_MOV_EvGv, src, base, index, scale, offset);
     }
 
     void movl_rm(RegisterID src, int offset, RegisterID base, RegisterID index, int scale)
     {
         spew("movl       %s, %d(%s,%s,%d)", 
-             nameIReg(4, src), offset, nameIReg(base), nameIReg(index), scale);
+             nameIReg(4, src), offset, nameIReg(base), nameIReg(index), 1<<scale);
         m_formatter.oneByteOp(OP_MOV_EvGv, src, base, index, scale, offset);
     }
 
     void movl_mEAX(void* addr)
     {
         FIXME_INSN_PRINTING;
         m_formatter.oneByteOp(OP_MOV_EAXOv);
 #if WTF_CPU_X86_64
@@ -1486,17 +1486,17 @@ public:
              int(base), nameIReg(index), scale, nameIReg(dst));
         m_formatter.oneByteOp_disp32(OP_MOV_GvEv, dst, index, scale, int(base));
     }
 #endif
 
     void movl_mr(int offset, RegisterID base, RegisterID index, int scale, RegisterID dst)
     {
         spew("movl       %d(%s,%s,%d), %s",
-             offset, nameIReg(base), nameIReg(index), scale, nameIReg(4, dst));
+             offset, nameIReg(base), nameIReg(index), 1<<scale, nameIReg(4, dst));
         m_formatter.oneByteOp(OP_MOV_GvEv, dst, base, index, scale, offset);
     }
 
     void movl_i32r(int imm, RegisterID dst)
     {
         spew("movl       $0x%x, %s",
              imm, nameIReg(dst));
         m_formatter.oneByteOp(OP_MOV_EAXIv, dst);
@@ -1509,17 +1509,17 @@ public:
              imm, PRETTY_PRINT_OFFSET(offset), nameIReg(base));
         m_formatter.oneByteOp(OP_GROUP11_EvIb, GROUP11_MOV, base, offset);
         m_formatter.immediate8(imm);
     }
 
     void movb_i8m(int imm, int offset, RegisterID base, RegisterID index, int scale)
     {
         spew("movb       $0x%x, %d(%s,%s,%d)",
-             imm, offset, nameIReg(base), nameIReg(index), scale);
+             imm, offset, nameIReg(base), nameIReg(index), 1<<scale);
         m_formatter.oneByteOp(OP_GROUP11_EvIb, GROUP11_MOV, base, index, scale, offset);
         m_formatter.immediate8(imm);
     }
 
     void movw_i16m(int imm, int offset, RegisterID base)
     {
         spew("movw       $0x%x, %s0x%x(%s)",
              imm, PRETTY_PRINT_OFFSET(offset), nameIReg(base));
@@ -1534,26 +1534,26 @@ public:
              imm, PRETTY_PRINT_OFFSET(offset), nameIReg(base));
         m_formatter.oneByteOp(OP_GROUP11_EvIz, GROUP11_MOV, base, offset);
         m_formatter.immediate32(imm);
     }
 
     void movw_i16m(int imm, int offset, RegisterID base, RegisterID index, int scale)
     {
         spew("movw       $0x%x, %d(%s,%s,%d)",
-             imm, offset, nameIReg(base), nameIReg(index), scale);
+             imm, offset, nameIReg(base), nameIReg(index), 1<<scale);
         m_formatter.prefix(PRE_OPERAND_SIZE);
         m_formatter.oneByteOp(OP_GROUP11_EvIz, GROUP11_MOV, base, index, scale, offset);
         m_formatter.immediate16(imm);
     }
 
     void movl_i32m(int imm, int offset, RegisterID base, RegisterID index, int scale)
     {
         spew("movl       $0x%x, %d(%s,%s,%d)",
-             imm, offset, nameIReg(base), nameIReg(index), scale);
+             imm, offset, nameIReg(base), nameIReg(index), 1<<scale);
         m_formatter.oneByteOp(OP_GROUP11_EvIz, GROUP11_MOV, base, index, scale, offset);
         m_formatter.immediate32(imm);
     }
 
     void movl_EAXm(void* addr)
     {
         FIXME_INSN_PRINTING;
         m_formatter.oneByteOp(OP_MOV_OvEAX);
@@ -1617,24 +1617,24 @@ public:
     {
         FIXME_INSN_PRINTING;
         m_formatter.oneByteOp64_disp32(OP_MOV_GvEv, dst, base, offset);
     }
 
     void movq_mr(int offset, RegisterID base, RegisterID index, int scale, RegisterID dst)
     {
         spew("movq       %d(%s,%s,%d), %s",
-             offset, nameIReg(base), nameIReg(index), scale, nameIReg(8,dst));
+             offset, nameIReg(base), nameIReg(index), 1<<scale, nameIReg(8,dst));
         m_formatter.oneByteOp64(OP_MOV_GvEv, dst, base, index, scale, offset);
     }
 
     void leaq_mr(int offset, RegisterID base, RegisterID index, int scale, RegisterID dst)
     {
         spew("leaq       %d(%s,%s,%d), %s",
-             offset, nameIReg(base), nameIReg(index), scale, nameIReg(8,dst)),
+             offset, nameIReg(base), nameIReg(index), 1<<scale, nameIReg(8,dst)),
         m_formatter.oneByteOp64(OP_LEA, dst, base, index, scale, offset);
     }
 
     void movq_i32m(int imm, int offset, RegisterID base)
     {
         spew("movq       $%d, %s0x%x(%s)",
              imm, PRETTY_PRINT_OFFSET(offset), nameIReg(base));
         m_formatter.oneByteOp64(OP_GROUP11_EvIz, GROUP11_MOV, base, offset);
@@ -1733,17 +1733,17 @@ public:
         spew("movb       %s, %s0x%x(%s)",
              nameIReg(1, src), PRETTY_PRINT_OFFSET(offset), nameIReg(base));
         m_formatter.oneByteOp8_disp32(OP_MOV_EbGv, src, base, offset);
     }
 
     void movb_rm(RegisterID src, int offset, RegisterID base, RegisterID index, int scale)
     {
         spew("movb       %s, %d(%s,%s,%d)",
-             nameIReg(1, src), offset, nameIReg(base), nameIReg(index), scale);
+             nameIReg(1, src), offset, nameIReg(base), nameIReg(index), 1<<scale);
         m_formatter.oneByteOp8(OP_MOV_EbGv, src, base, index, scale, offset);
     }
 
     void movzbl_mr(int offset, RegisterID base, RegisterID dst)
     {
         spew("movzbl     %s0x%x(%s), %s",
              PRETTY_PRINT_OFFSET(offset), nameIReg(base), nameIReg(4, dst));
         m_formatter.twoByteOp(OP2_MOVZX_GvEb, dst, base, offset);
@@ -1754,17 +1754,17 @@ public:
         spew("movzbl     %s0x%x(%s), %s",
              PRETTY_PRINT_OFFSET(offset), nameIReg(base), nameIReg(4, dst));
         m_formatter.twoByteOp_disp32(OP2_MOVZX_GvEb, dst, base, offset);
     }
 
     void movzbl_mr(int offset, RegisterID base, RegisterID index, int scale, RegisterID dst)
     {
         spew("movzbl     %d(%s,%s,%d), %s",
-             offset, nameIReg(base), nameIReg(index), scale, nameIReg(dst));
+             offset, nameIReg(base), nameIReg(index), 1<<scale, nameIReg(dst));
         m_formatter.twoByteOp(OP2_MOVZX_GvEb, dst, base, index, scale, offset);
     }
 
     void movxbl_mr(int offset, RegisterID base, RegisterID dst)
     {
         spew("movxbl     %s0x%x(%s), %s",
              PRETTY_PRINT_OFFSET(offset), nameIReg(base), nameIReg(4, dst));
         m_formatter.twoByteOp(OP2_MOVSX_GvEb, dst, base, offset);
@@ -1775,17 +1775,17 @@ public:
         spew("movxbl     %s0x%x(%s), %s",
              PRETTY_PRINT_OFFSET(offset), nameIReg(base), nameIReg(4, dst));
         m_formatter.twoByteOp_disp32(OP2_MOVSX_GvEb, dst, base, offset);
     }
 
     void movxbl_mr(int offset, RegisterID base, RegisterID index, int scale, RegisterID dst)
     {
         spew("movxbl     %d(%s,%s,%d), %s",
-             offset, nameIReg(base), nameIReg(index), scale, nameIReg(dst));
+             offset, nameIReg(base), nameIReg(index), 1<<scale, nameIReg(dst));
         m_formatter.twoByteOp(OP2_MOVSX_GvEb, dst, base, index, scale, offset);
     }
 
     void movzwl_mr(int offset, RegisterID base, RegisterID dst)
     {
         spew("movzwl     %s0x%x(%s), %s",
              PRETTY_PRINT_OFFSET(offset), nameIReg(base), nameIReg(4, dst));
         m_formatter.twoByteOp(OP2_MOVZX_GvEw, dst, base, offset);
@@ -1796,17 +1796,17 @@ public:
         spew("movzwl     %s0x%x(%s), %s",
              PRETTY_PRINT_OFFSET(offset), nameIReg(base), nameIReg(4, dst));
         m_formatter.twoByteOp_disp32(OP2_MOVZX_GvEw, dst, base, offset);
     }
 
     void movzwl_mr(int offset, RegisterID base, RegisterID index, int scale, RegisterID dst)
     {
         spew("movzwl     %d(%s,%s,%d), %s",
-             offset, nameIReg(base), nameIReg(index), scale, nameIReg(dst));
+             offset, nameIReg(base), nameIReg(index), 1<<scale, nameIReg(dst));
         m_formatter.twoByteOp(OP2_MOVZX_GvEw, dst, base, index, scale, offset);
     }
 
     void movxwl_mr(int offset, RegisterID base, RegisterID dst)
     {
         spew("movxwl     %s0x%x(%s), %s",
              PRETTY_PRINT_OFFSET(offset), nameIReg(base), nameIReg(4, dst));
         m_formatter.twoByteOp(OP2_MOVSX_GvEw, dst, base, offset);
@@ -1817,34 +1817,34 @@ public:
         spew("movxwl     %s0x%x(%s), %s",
              PRETTY_PRINT_OFFSET(offset), nameIReg(base), nameIReg(4, dst));
         m_formatter.twoByteOp_disp32(OP2_MOVSX_GvEw, dst, base, offset);
     }
 
     void movxwl_mr(int offset, RegisterID base, RegisterID index, int scale, RegisterID dst)
     {
         spew("movxwl     %d(%s,%s,%d), %s",
-             offset, nameIReg(base), nameIReg(index), scale, nameIReg(dst));
+             offset, nameIReg(base), nameIReg(index), 1<<scale, nameIReg(dst));
         m_formatter.twoByteOp(OP2_MOVSX_GvEw, dst, base, index, scale, offset);
     }
 
     void movzbl_rr(RegisterID src, RegisterID dst)
     {
         // In 64-bit, this may cause an unnecessary REX to be planted (if the dst register
         // is in the range ESP-EDI, and the src would not have required a REX).  Unneeded
         // REX prefixes are defined to be silently ignored by the processor.
         spew("movzbl      %s, %s",
              nameIReg(4,src), nameIReg(4,dst));
         m_formatter.twoByteOp8(OP2_MOVZX_GvEb, dst, src);
     }
 
     void leal_mr(int offset, RegisterID base, RegisterID index, int scale, RegisterID dst)
     {
         spew("leal       %d(%s,%s,%d), %s",
-             offset, nameIReg(base), nameIReg(index), scale, nameIReg(dst));
+             offset, nameIReg(base), nameIReg(index), 1<<scale, nameIReg(dst));
         m_formatter.oneByteOp(OP_LEA, dst, base, index, scale, offset);
     }
 
     void leal_mr(int offset, RegisterID base, RegisterID dst)
     {
         spew("leal       %s0x%x(%s), %s",
              PRETTY_PRINT_OFFSET(offset), nameIReg(base), nameIReg(4,dst));
         m_formatter.oneByteOp(OP_LEA, dst, base, offset);
@@ -1881,17 +1881,17 @@ public:
         m_formatter.oneByteOp(OP_GROUP5_Ev, GROUP5_OP_CALLN, dst);
         JmpSrc r = JmpSrc(m_formatter.size());
         spew("call       *%s", nameIReg(dst));
         return r;
     }
     
     void call_m(int offset, RegisterID base)
     {
-        spew("call       %s0x%x(%s)",
+        spew("call       *%s0x%x(%s)",
              PRETTY_PRINT_OFFSET(offset), nameIReg(base));
         m_formatter.oneByteOp(OP_GROUP5_Ev, GROUP5_OP_CALLN, base, offset);
     }
 
     // Comparison of EAX against a 32-bit immediate. The immediate is patched
     // in as if it were a jump target. The intention is to toggle the first
     // byte of the instruction between a CMP and a JMP to produce a pseudo-NOP.
     JmpSrc cmp_eax()
@@ -1910,37 +1910,39 @@ public:
         return r;
     }
     
     // Return a JmpSrc so we have a label to the jump, so we can use this
     // To make a tail recursive call on x86-64.  The MacroAssembler
     // really shouldn't wrap this as a Jump, since it can't be linked. :-/
     JmpSrc jmp_r(RegisterID dst)
     {
-        spew("jmp        ((%s))",
+        spew("jmp        *%s",
              nameIReg(dst));
         m_formatter.oneByteOp(OP_GROUP5_Ev, GROUP5_OP_JMPN, dst);
         return JmpSrc(m_formatter.size());
     }
     
     void jmp_m(int offset, RegisterID base)
     {
-        FIXME_INSN_PRINTING;
+        spew("jmp       *%d(%s)",
+             offset, nameIReg(base));
         m_formatter.oneByteOp(OP_GROUP5_Ev, GROUP5_OP_JMPN, base, offset);
     }
 
     void jmp_m(int offset, RegisterID base, RegisterID index, int scale) {
-        spew("jmp       ((%d(%s,%s,%d)))",
-             offset, nameIReg(base), nameIReg(index), scale);
+        spew("jmp       *%d(%s,%s,%d)",
+             offset, nameIReg(base), nameIReg(index), 1<<scale);
         m_formatter.oneByteOp(OP_GROUP5_Ev, GROUP5_OP_JMPN, base, index, scale, offset);
     }
 
 #if WTF_CPU_X86_64
     void jmp_rip(int ripOffset) {
         // rip-relative addressing.
+        spew("jmp        *%d(%%rip)", ripOffset);
         m_formatter.oneByteRipOp(OP_GROUP5_Ev, GROUP5_OP_JMPN, ripOffset);
     }
 
     void immediate64(int64_t imm)
     {
         m_formatter.immediate64(imm);
     }
 #endif
@@ -2129,17 +2131,17 @@ public:
              PRETTY_PRINT_OFFSET(offset), nameIReg(base), nameFPReg(dst));
         m_formatter.prefix(PRE_SSE_F2);
         m_formatter.twoByteOp(OP2_CVTSI2SD_VsdEd, (RegisterID)dst, base, offset);
     }
 
     void cvtsi2sd_mr(int offset, RegisterID base, RegisterID index, int scale, XMMRegisterID dst)
     {
         spew("cvtsi2sd   %d(%s,%s,%d), %s",
-             offset, nameIReg(base), nameIReg(index), scale, nameFPReg(dst));
+             offset, nameIReg(base), nameIReg(index), 1<<scale, nameFPReg(dst));
         m_formatter.prefix(PRE_SSE_F2);
         m_formatter.twoByteOp(OP2_CVTSI2SD_VsdEd, (RegisterID)dst, base, index, scale, offset);
     }
 
 #if !WTF_CPU_X86_64
     void cvtsi2sd_mr(void* address, XMMRegisterID dst)
     {
         spew("cvtsi2sd   %p, %s",
@@ -2296,33 +2298,33 @@ public:
              PRETTY_PRINT_OFFSET(offset), nameIReg(base), nameFPReg(dst));
         m_formatter.prefix(PRE_SSE_F3);
         m_formatter.twoByteOp_disp32(OP2_MOVSD_VsdWsd, (RegisterID)dst, base, offset);
     }
 
     void movsd_rm(XMMRegisterID src, int offset, RegisterID base, RegisterID index, int scale)
     {
         spew("movsd       %s, %d(%s,%s,%d)", 
-             nameFPReg(src), offset, nameIReg(base), nameIReg(index), scale);
+             nameFPReg(src), offset, nameIReg(base), nameIReg(index), 1<<scale);
         m_formatter.prefix(PRE_SSE_F2);
         m_formatter.twoByteOp(OP2_MOVSD_WsdVsd, (RegisterID)src, base, index, scale, offset);
     }
 
     void movss_rm(XMMRegisterID src, int offset, RegisterID base, RegisterID index, int scale)
     {
         spew("movss       %s, %d(%s,%s,%d)", 
-             nameFPReg(src), offset, nameIReg(base), nameIReg(index), scale);
+             nameFPReg(src), offset, nameIReg(base), nameIReg(index), 1<<scale);
         m_formatter.prefix(PRE_SSE_F3);
         m_formatter.twoByteOp(OP2_MOVSD_WsdVsd, (RegisterID)src, base, index, scale, offset);
     }
 
     void movss_mr(int offset, RegisterID base, RegisterID index, int scale, XMMRegisterID dst)
     {
         spew("movss      %d(%s,%s,%d), %s",
-             offset, nameIReg(base), nameIReg(index), scale, nameFPReg(dst));
+             offset, nameIReg(base), nameIReg(index), 1<<scale, nameFPReg(dst));
         m_formatter.prefix(PRE_SSE_F3);
         m_formatter.twoByteOp(OP2_MOVSD_VsdWsd, (RegisterID)dst, base, index, scale, offset);
     }
 
     void movsd_mr(int offset, RegisterID base, XMMRegisterID dst)
     {
         spew("movsd      %s0x%x(%s), %s",
              PRETTY_PRINT_OFFSET(offset), nameIReg(base), nameFPReg(dst));
@@ -2336,17 +2338,17 @@ public:
              PRETTY_PRINT_OFFSET(offset), nameIReg(base), nameFPReg(dst));
         m_formatter.prefix(PRE_SSE_F2);
         m_formatter.twoByteOp_disp32(OP2_MOVSD_VsdWsd, (RegisterID)dst, base, offset);
     }
 
     void movsd_mr(int offset, RegisterID base, RegisterID index, int scale, XMMRegisterID dst)
     {
         spew("movsd      %d(%s,%s,%d), %s",
-             offset, nameIReg(base), nameIReg(index), scale, nameFPReg(dst));
+             offset, nameIReg(base), nameIReg(index), 1<<scale, nameFPReg(dst));
         m_formatter.prefix(PRE_SSE_F2);
         m_formatter.twoByteOp(OP2_MOVSD_VsdWsd, (RegisterID)dst, base, index, scale, offset);
     }
 
     void movsd_rr(XMMRegisterID src, XMMRegisterID dst)
     {
         spew("movsd      %s, %s",
              nameFPReg(src), nameFPReg(dst));
@@ -2395,33 +2397,33 @@ public:
              nameFPReg(src), PRETTY_PRINT_OFFSET(offset), nameIReg(base));
         m_formatter.prefix(PRE_SSE_66);
         m_formatter.twoByteOp(OP2_MOVDQA_WsdVsd, (RegisterID)src, base, offset);
     }
 
     void movdqa_rm(XMMRegisterID src, int offset, RegisterID base, RegisterID index, int scale)
     {
         spew("movdqa      %s, %d(%s,%s,%d)", 
-             nameFPReg(src), offset, nameIReg(base), nameIReg(index), scale);
+             nameFPReg(src), offset, nameIReg(base), nameIReg(index), 1<<scale);
         m_formatter.prefix(PRE_SSE_66);
         m_formatter.twoByteOp(OP2_MOVDQA_WsdVsd, (RegisterID)src, base, index, scale, offset);
     }
 
     void movdqa_mr(int offset, RegisterID base, XMMRegisterID dst)
     {
         spew("movdqa     %s0x%x(%s), %s",
              PRETTY_PRINT_OFFSET(offset), nameIReg(base), nameFPReg(dst));
         m_formatter.prefix(PRE_SSE_66);
         m_formatter.twoByteOp(OP2_MOVDQA_VsdWsd, (RegisterID)dst, base, offset);
     }
 
     void movdqa_mr(int offset, RegisterID base, RegisterID index, int scale, XMMRegisterID dst)
     {
         spew("movdqa     %d(%s,%s,%d), %s",
-             offset, nameIReg(base), nameIReg(index), scale, nameFPReg(dst));
+             offset, nameIReg(base), nameIReg(index), 1<<scale, nameFPReg(dst));
         m_formatter.prefix(PRE_SSE_66);
         m_formatter.twoByteOp(OP2_MOVDQA_VsdWsd, (RegisterID)dst, base, index, scale, offset);
     }
 
     void mulsd_rr(XMMRegisterID src, XMMRegisterID dst)
     {
         spew("mulsd      %s, %s",
              nameFPReg(src), nameFPReg(dst));
@@ -2564,17 +2566,17 @@ public:
     void ret()
     {
         spew("ret");
         m_formatter.oneByteOp(OP_RET);
     }
 
     void ret(int imm)
     {
-        spew("ret        %d",
+        spew("ret        $%d",
              imm);
         m_formatter.oneByteOp(OP_RET_Iz);
         m_formatter.immediate16(imm);
     }
 
     void predictNotTaken()
     {
         FIXME_INSN_PRINTING;
@@ -2610,17 +2612,17 @@ public:
     
     static JmpDst labelFor(JmpSrc jump, intptr_t offset = 0)
     {
         return JmpDst(jump.m_offset + offset);
     }
     
     JmpDst align(int alignment)
     {
-        FIXME_INSN_PRINTING;
+        spew(".balign %d", alignment);
         while (!m_formatter.isAligned(alignment))
             m_formatter.oneByteOp(OP_HLT);
 
         return label();
     }
 
     void jumpTablePointer(uintptr_t ptr)
     {