Bug 1131289 - SpiderMonkey: Use the vmovq encoding for vmovq r=bbouvier
authorDan Gohman <sunfish@mozilla.com>
Tue, 10 Feb 2015 08:10:31 -0800
changeset 228462 017859925cf32162460410fd0ead91b992acd710
parent 228461 9bbd3641e6f01ad07e756b227796a1a474c0983f
child 228463 dc194ba77d7f6fe202cef65038c4aed70547ebf2
push idunknown
push userunknown
push dateunknown
reviewersbbouvier
bugs1131289
milestone38.0a1
Bug 1131289 - SpiderMonkey: Use the vmovq encoding for vmovq r=bbouvier
js/src/jit/shared/Assembler-x86-shared.h
js/src/jit/shared/BaseAssembler-x86-shared.h
js/src/jit/shared/Disassembler-x86-shared.cpp
js/src/jit/shared/Encoding-x86-shared.h
js/src/jit/x64/Assembler-x64.h
js/src/jit/x64/CodeGenerator-x64.cpp
js/src/jit/x86/Assembler-x86.h
js/src/jit/x86/CodeGenerator-x86.cpp
--- a/js/src/jit/shared/Assembler-x86-shared.h
+++ b/js/src/jit/shared/Assembler-x86-shared.h
@@ -1784,16 +1784,48 @@ class AssemblerX86Shared : public Assemb
         MOZ_ASSERT(HasSSE2());
         switch (dest.kind()) {
           case Operand::MEM_REG_DISP:
             masm.vmovd_rm(src.code(), dest.disp(), dest.base());
             break;
           case Operand::MEM_SCALE:
             masm.vmovd_rm(src.code(), dest.disp(), dest.base(), dest.index(), dest.scale());
             break;
+          case Operand::MEM_ADDRESS32:
+            masm.vmovq_rm(src.code(), dest.address());
+            break;
+          default:
+            MOZ_CRASH("unexpected operand kind");
+        }
+    }
+    void vmovq(const Operand &src, FloatRegister dest) {
+        MOZ_ASSERT(HasSSE2());
+        switch (src.kind()) {
+          case Operand::MEM_REG_DISP:
+            masm.vmovq_mr(src.disp(), src.base(), dest.code());
+            break;
+          case Operand::MEM_SCALE:
+            masm.vmovq_mr(src.disp(), src.base(), src.index(), src.scale(), dest.code());
+            break;
+          case Operand::MEM_ADDRESS32:
+            masm.vmovq_mr(src.address(), dest.code());
+            break;
+          default:
+            MOZ_CRASH("unexpected operand kind");
+        }
+    }
+    void vmovq(FloatRegister src, const Operand &dest) {
+        MOZ_ASSERT(HasSSE2());
+        switch (dest.kind()) {
+          case Operand::MEM_REG_DISP:
+            masm.vmovq_rm(src.code(), dest.disp(), dest.base());
+            break;
+          case Operand::MEM_SCALE:
+            masm.vmovq_rm(src.code(), dest.disp(), dest.base(), dest.index(), dest.scale());
+            break;
           default:
             MOZ_CRASH("unexpected operand kind");
         }
     }
     void vpaddd(const Operand &src1, FloatRegister src0, FloatRegister dest) {
         MOZ_ASSERT(HasSSE2());
         switch (src1.kind()) {
           case Operand::FPREG:
--- a/js/src/jit/shared/BaseAssembler-x86-shared.h
+++ b/js/src/jit/shared/BaseAssembler-x86-shared.h
@@ -1736,38 +1736,43 @@ public:
         if (src == rax && !IsAddressImmediate(addr)) {
             movq_EAXm(addr);
             return;
         }
 
         spew("movq       %s, %p", GPReg64Name(src), addr);
         m_formatter.oneByteOp64(OP_MOV_EvGv, addr, src);
     }
-
-    void movq_rm(XMMRegisterID src, int32_t offset, RegisterID base)
-    {
-        spew("movq       %s, " MEM_ob, XMMRegName(src), ADDR_ob(offset, base));
-        m_formatter.prefix(PRE_SSE_66);
-        m_formatter.twoByteOp64(OP2_MOVQ_EdVd, offset, base, src);
-    }
-
-    void movq_rm(XMMRegisterID src, int32_t offset, RegisterID base, RegisterID index, int scale)
-    {
-        spew("movq       %s, " MEM_obs, XMMRegName(src), ADDR_obs(offset, base, index, scale));
-        m_formatter.prefix(PRE_SSE_66);
-        m_formatter.twoByteOp64(OP2_MOVQ_EdVd, offset, base, index, scale, src);
-    }
-
-    void movq_rm(XMMRegisterID src, const void *addr)
-    {
-        spew("movq       %s, %p", XMMRegName(src), addr);
-        m_formatter.prefix(PRE_SSE_66);
-        m_formatter.twoByteOp64(OP2_MOVQ_EdVd, addr, src);
-    }
-
+#endif
+
+    void vmovq_rm(XMMRegisterID src, int32_t offset, RegisterID base)
+    {
+        // vmovq_rm can be encoded either as a true vmovq or as a vmovd with a
+        // REX prefix modifying it to be 64-bit. We choose the vmovq encoding
+        // because it's smaller (when it doesn't need a REX prefix for other
+        // reasons) and because it works on 32-bit x86 too.
+        twoByteOpSimd("vmovq", VEX_PD, OP2_MOVQ_WdVd, offset, base, invalid_xmm, src);
+    }
+
+    void vmovq_rm_disp32(XMMRegisterID src, int32_t offset, RegisterID base)
+    {
+        twoByteOpSimd_disp32("vmovq", VEX_PD, OP2_MOVQ_WdVd, offset, base, invalid_xmm, src);
+    }
+
+    void vmovq_rm(XMMRegisterID src, int32_t offset, RegisterID base, RegisterID index, int scale)
+    {
+        twoByteOpSimd("vmovq", VEX_PD, OP2_MOVQ_WdVd, offset, base, index, scale, invalid_xmm, src);
+    }
+
+    void vmovq_rm(XMMRegisterID src, const void *addr)
+    {
+        twoByteOpSimd("vmovq", VEX_PD, OP2_MOVQ_WdVd, addr, invalid_xmm, src);
+    }
+
+#ifdef JS_CODEGEN_X64
     void movq_mEAX(const void *addr)
     {
         if (IsAddressImmediate(addr)) {
             movq_mr(addr, rax);
             return;
         }
 
         spew("movq       %p, %%rax", addr);
@@ -1810,38 +1815,43 @@ public:
         if (dst == rax && !IsAddressImmediate(addr)) {
             movq_mEAX(addr);
             return;
         }
 
         spew("movq       %p, %s", addr, GPReg64Name(dst));
         m_formatter.oneByteOp64(OP_MOV_GvEv, addr, dst);
     }
-
-    void movq_mr(int32_t offset, RegisterID base, XMMRegisterID dst)
-    {
-        spew("movq       " MEM_ob ", %s", ADDR_ob(offset, base), XMMRegName(dst));
-        m_formatter.prefix(PRE_SSE_66);
-        m_formatter.twoByteOp64(OP2_MOVQ_VdEd, offset, base, (RegisterID) dst);
-    }
-
-    void movq_mr(int32_t offset, RegisterID base, RegisterID index, int32_t scale, XMMRegisterID dst)
-    {
-        spew("movq       " MEM_obs ", %s", ADDR_obs(offset, base, index, scale), XMMRegName(dst));
-        m_formatter.prefix(PRE_SSE_66);
-        m_formatter.twoByteOp64(OP2_MOVQ_VdEd, offset, base, index, scale, (RegisterID) dst);
-    }
-
-    void movq_mr(const void *addr, XMMRegisterID dst)
-    {
-        spew("movq       %p, %s", addr, XMMRegName(dst));
-        m_formatter.prefix(PRE_SSE_66);
-        m_formatter.twoByteOp64(OP2_MOVQ_VdEd, addr, (RegisterID) dst);
-    }
-
+#endif
+
+    void vmovq_mr(int32_t offset, RegisterID base, XMMRegisterID dst)
+    {
+        // vmovq_mr can be encoded either as a true vmovq or as a vmovd with a
+        // REX prefix modifying it to be 64-bit. We choose the vmovq encoding
+        // because it's smaller (when it doesn't need a REX prefix for other
+        // reasons) and because it works on 32-bit x86 too.
+        twoByteOpSimd("vmovq", VEX_SS, OP2_MOVQ_VdWd, offset, base, invalid_xmm, dst);
+    }
+
+    void vmovq_mr_disp32(int32_t offset, RegisterID base, XMMRegisterID dst)
+    {
+        twoByteOpSimd_disp32("vmovq", VEX_SS, OP2_MOVQ_VdWd, offset, base, invalid_xmm, dst);
+    }
+
+    void vmovq_mr(int32_t offset, RegisterID base, RegisterID index, int32_t scale, XMMRegisterID dst)
+    {
+        twoByteOpSimd("vmovq", VEX_SS, OP2_MOVQ_VdWd, offset, base, index, scale, invalid_xmm, dst);
+    }
+
+    void vmovq_mr(const void *addr, XMMRegisterID dst)
+    {
+        twoByteOpSimd("vmovq", VEX_SS, OP2_MOVQ_VdWd, addr, invalid_xmm, dst);
+    }
+
+#ifdef JS_CODEGEN_X64
     void leaq_mr(int32_t offset, RegisterID base, RegisterID index, int scale, RegisterID dst)
     {
         spew("leaq       " MEM_obs ", %s", ADDR_obs(offset, base, index, scale), GPReg64Name(dst)),
         m_formatter.oneByteOp64(OP_LEA, offset, base, index, scale, dst);
     }
 
     void movq_i32m(int32_t imm, int32_t offset, RegisterID base)
     {
@@ -2657,21 +2667,25 @@ public:
     void vmovd_rm(XMMRegisterID src, const void *address)
     {
         twoByteOpSimd("vmovd", VEX_PD, OP2_MOVD_EdVd, address, invalid_xmm, src);
     }
 
 #ifdef JS_CODEGEN_X64
     void vmovq_rr(XMMRegisterID src, RegisterID dst)
     {
+        // While this is called "vmovq", it actually uses the vmovd encoding
+        // with a REX prefix modifying it to be 64-bit.
         twoByteOpSimdInt64("vmovq", VEX_PD, OP2_MOVD_EdVd, (XMMRegisterID)dst, (RegisterID)src);
     }
 
     void vmovq_rr(RegisterID src, XMMRegisterID dst)
     {
+        // While this is called "vmovq", it actually uses the vmovd encoding
+        // with a REX prefix modifying it to be 64-bit.
         twoByteOpInt64Simd("vmovq", VEX_PD, OP2_MOVD_VdEd, src, invalid_xmm, dst);
     }
 #endif
 
     void vmovsd_rm(XMMRegisterID src, int32_t offset, RegisterID base)
     {
         twoByteOpSimd("vmovsd", VEX_SD, OP2_MOVSD_WsdVsd, offset, base, invalid_xmm, src);
     }
--- a/js/src/jit/shared/Disassembler-x86-shared.cpp
+++ b/js/src/jit/shared/Disassembler-x86-shared.cpp
@@ -468,16 +468,43 @@ js::jit::Disassembler::DisassembleHeapAc
           case VEX_SS: memSize = 4; break;
           case VEX_SD: memSize = 8; break;
           case VEX_PS:
           case VEX_PD: memSize = 16; break;
           default: MOZ_CRASH("Unexpected VEX type");
         }
         kind = HeapAccess::Store;
         break;
+      case Pack2ByteOpcode(OP2_MOVD_VdEd):
+        MOZ_ASSERT(!haveImm);
+        otherOperand = OtherOperand(xmm);
+        switch (type) {
+          case VEX_PD: memSize = 4; break;
+          default: MOZ_CRASH("Unexpected VEX type");
+        }
+        kind = HeapAccess::Load;
+        break;
+      case Pack2ByteOpcode(OP2_MOVQ_WdVd):
+        MOZ_ASSERT(!haveImm);
+        otherOperand = OtherOperand(xmm);
+        switch (type) {
+          case VEX_PD: memSize = 8; break;
+          default: MOZ_CRASH("Unexpected VEX type");
+        }
+        kind = HeapAccess::Store;
+        break;
+      case Pack2ByteOpcode(OP2_MOVD_EdVd): // aka OP2_MOVQ_VdWd
+        MOZ_ASSERT(!haveImm);
+        otherOperand = OtherOperand(xmm);
+        switch (type) {
+          case VEX_SS: memSize = 8; kind = HeapAccess::Load; break;
+          case VEX_PD: memSize = 4; kind = HeapAccess::Store; break;
+          default: MOZ_CRASH("Unexpected VEX type");
+        }
+        break;
       default:
         MOZ_CRASH("Unable to disassemble instruction");
     }
 
     *access = HeapAccess(kind, memSize, addr, otherOperand);
     return ptr;
 }
 
--- a/js/src/jit/shared/Encoding-x86-shared.h
+++ b/js/src/jit/shared/Encoding-x86-shared.h
@@ -162,28 +162,27 @@ enum TwoByteOpcodeID {
     OP2_SQRTPS_VpsWps   = 0x51,
     OP2_RSQRTPS_VpsWps  = 0x52,
     OP2_RCPPS_VpsWps    = 0x53,
     OP2_ANDPD_VpdWpd    = 0x54,
     OP2_ORPD_VpdWpd     = 0x56,
     OP2_XORPD_VpdWpd    = 0x57,
     OP2_PCMPGTD_VdqWdq  = 0x66,
     OP2_MOVD_VdEd       = 0x6E,
-    OP2_MOVQ_VdEd       = 0x6E,
     OP2_MOVDQ_VsdWsd    = 0x6F,
     OP2_MOVDQ_VdqWdq    = 0x6F,
     OP2_PSHUFD_VdqWdqIb = 0x70,
     OP2_PSLLD_UdqIb     = 0x72,
     OP2_PSRAD_UdqIb     = 0x72,
     OP2_PSRLD_UdqIb     = 0x72,
     OP2_PSRLDQ_Vd       = 0x73,
     OP2_PCMPEQW         = 0x75,
     OP2_PCMPEQD_VdqWdq  = 0x76,
     OP2_MOVD_EdVd       = 0x7E,
-    OP2_MOVQ_EdVd       = 0x7E,
+    OP2_MOVQ_VdWd       = 0x7E,
     OP2_MOVDQ_WdqVdq    = 0x7F,
     OP2_JCC_rel32       = 0x80,
     OP_SETCC            = 0x90,
     OP_FENCE            = 0xAE,
     OP2_IMUL_GvEv       = 0xAF,
     OP2_CMPXCHG_GvEb    = 0xB0,
     OP2_CMPXCHG_GvEw    = 0xB1,
     OP2_BSR_GvEv        = 0xBD,
@@ -192,16 +191,17 @@ enum TwoByteOpcodeID {
     OP2_MOVZX_GvEb      = 0xB6,
     OP2_MOVZX_GvEw      = 0xB7,
     OP2_XADD_EbGb       = 0xC0,
     OP2_XADD_EvGv       = 0xC1,
     OP2_CMPPS_VpsWps    = 0xC2,
     OP2_PEXTRW_GdUdIb   = 0xC5,
     OP2_SHUFPS_VpsWpsIb = 0xC6,
     OP2_PSRLD_VdqWdq    = 0xD2,
+    OP2_MOVQ_WdVd       = 0xD6,
     OP2_PANDDQ_VdqWdq   = 0xDB,
     OP2_PANDNDQ_VdqWdq  = 0xDF,
     OP2_PSRAD_VdqWdq    = 0xE2,
     OP2_PORDQ_VdqWdq    = 0xEB,
     OP2_PXORDQ_VdqWdq   = 0xEF,
     OP2_PSLLD_VdqWdq    = 0xF2,
     OP2_PMULUDQ_VdqWdq  = 0xF4,
     OP2_PSUBD_VdqWdq    = 0xFA,
--- a/js/src/jit/x64/Assembler-x64.h
+++ b/js/src/jit/x64/Assembler-x64.h
@@ -252,16 +252,17 @@ class Assembler : public AssemblerX86Sha
   protected:
     size_t addPatchableJump(JmpSrc src, Relocation::Kind reloc);
 
   public:
     using AssemblerX86Shared::j;
     using AssemblerX86Shared::jmp;
     using AssemblerX86Shared::push;
     using AssemblerX86Shared::pop;
+    using AssemblerX86Shared::vmovq;
 
     static uint8_t *PatchableJumpAddress(JitCode *code, size_t index);
     static void PatchJumpEntry(uint8_t *entry, uint8_t *target);
 
     Assembler()
       : extendedJumpTable_(0)
     {
     }
@@ -356,31 +357,16 @@ class Assembler : public AssemblerX86Sha
             break;
           case Operand::MEM_ADDRESS32:
             masm.movq_mr(src.address(), dest.code());
             break;
           default:
             MOZ_CRASH("unexpected operand kind");
         }
     }
-    void movq(const Operand &src, FloatRegister dest) {
-        switch (src.kind()) {
-          case Operand::MEM_REG_DISP:
-            masm.movq_mr(src.disp(), src.base(), dest.code());
-            break;
-          case Operand::MEM_SCALE:
-            masm.movq_mr(src.disp(), src.base(), src.index(), src.scale(), dest.code());
-            break;
-          case Operand::MEM_ADDRESS32:
-            masm.movq_mr(src.address(), dest.code());
-            break;
-          default:
-            MOZ_CRASH("unexpected operand kind");
-        }
-    }
     void movq(Register src, const Operand &dest) {
         switch (dest.kind()) {
           case Operand::REG:
             masm.movq_rr(src.code(), dest.reg());
             break;
           case Operand::MEM_REG_DISP:
             masm.movq_rm(src.code(), dest.disp(), dest.base());
             break;
@@ -389,31 +375,16 @@ class Assembler : public AssemblerX86Sha
             break;
           case Operand::MEM_ADDRESS32:
             masm.movq_rm(src.code(), dest.address());
             break;
           default:
             MOZ_CRASH("unexpected operand kind");
         }
     }
-    void movq(FloatRegister src, const Operand &dest) {
-        switch (dest.kind()) {
-          case Operand::MEM_REG_DISP:
-            masm.movq_rm(src.code(), dest.disp(), dest.base());
-            break;
-          case Operand::MEM_SCALE:
-            masm.movq_rm(src.code(), dest.disp(), dest.base(), dest.index(), dest.scale());
-            break;
-          case Operand::MEM_ADDRESS32:
-            masm.movq_rm(src.code(), dest.address());
-            break;
-          default:
-            MOZ_CRASH("unexpected operand kind");
-        }
-    }
     void movq(Imm32 imm32, const Operand &dest) {
         switch (dest.kind()) {
           case Operand::REG:
             masm.movl_i32r(imm32.value, dest.reg());
             break;
           case Operand::MEM_REG_DISP:
             masm.movq_i32m(imm32.value, dest.disp(), dest.base());
             break;
--- a/js/src/jit/x64/CodeGenerator-x64.cpp
+++ b/js/src/jit/x64/CodeGenerator-x64.cpp
@@ -270,17 +270,17 @@ CodeGeneratorX64::loadSimd(Scalar::Type 
         }
         break;
       }
       case Scalar::Int32x4: {
         switch (numElems) {
           // In memory-to-register mode, movd zeroes out the high lanes.
           case 1: masm.vmovd(srcAddr, out); break;
           // See comment above, which also applies to movq.
-          case 2: masm.movq(srcAddr, out); break;
+          case 2: masm.vmovq(srcAddr, out); break;
           case 4: masm.loadUnalignedInt32x4(srcAddr, out); break;
           default: MOZ_CRASH("unexpected size for partial load");
         }
         break;
       }
       case Scalar::Int8:
       case Scalar::Uint8:
       case Scalar::Int16:
@@ -424,17 +424,17 @@ CodeGeneratorX64::storeSimd(Scalar::Type
         }
         break;
       }
       case Scalar::Int32x4: {
         switch (numElems) {
           // In memory-to-register mode, movd zeroes out the high lanes.
           case 1: masm.vmovd(in, dstAddr); break;
           // See comment above, which also applies to movq.
-          case 2: masm.movq(in, dstAddr); break;
+          case 2: masm.vmovq(in, dstAddr); break;
           case 4: masm.storeUnalignedInt32x4(in, dstAddr); break;
           default: MOZ_CRASH("unexpected size for partial load");
         }
         break;
       }
       case Scalar::Int8:
       case Scalar::Uint8:
       case Scalar::Int16:
--- a/js/src/jit/x86/Assembler-x86.h
+++ b/js/src/jit/x86/Assembler-x86.h
@@ -467,16 +467,21 @@ class Assembler : public AssemblerX86Sha
         masm.vmovss_mr_disp32(src.offset, src.base.code(), dest.code());
         return CodeOffsetLabel(masm.currentOffset());
     }
     CodeOffsetLabel vmovdWithPatch(Address src, FloatRegister dest) {
         MOZ_ASSERT(HasSSE2());
         masm.vmovd_mr_disp32(src.offset, src.base.code(), dest.code());
         return CodeOffsetLabel(masm.currentOffset());
     }
+    CodeOffsetLabel vmovqWithPatch(Address src, FloatRegister dest) {
+        MOZ_ASSERT(HasSSE2());
+        masm.vmovq_mr_disp32(src.offset, src.base.code(), dest.code());
+        return CodeOffsetLabel(masm.currentOffset());
+    }
     CodeOffsetLabel vmovsdWithPatch(Address src, FloatRegister dest) {
         MOZ_ASSERT(HasSSE2());
         masm.vmovsd_mr_disp32(src.offset, src.base.code(), dest.code());
         return CodeOffsetLabel(masm.currentOffset());
     }
     CodeOffsetLabel vmovupsWithPatch(Address src, FloatRegister dest) {
         MOZ_ASSERT(HasSSE2());
         masm.vmovups_mr_disp32(src.offset, src.base.code(), dest.code());
@@ -501,16 +506,21 @@ class Assembler : public AssemblerX86Sha
         masm.movl_rm_disp32(src.code(), dest.offset, dest.base.code());
         return CodeOffsetLabel(masm.currentOffset());
     }
     CodeOffsetLabel vmovdWithPatch(FloatRegister src, Address dest) {
         MOZ_ASSERT(HasSSE2());
         masm.vmovd_rm_disp32(src.code(), dest.offset, dest.base.code());
         return CodeOffsetLabel(masm.currentOffset());
     }
+    CodeOffsetLabel vmovqWithPatch(FloatRegister src, Address dest) {
+        MOZ_ASSERT(HasSSE2());
+        masm.vmovq_rm_disp32(src.code(), dest.offset, dest.base.code());
+        return CodeOffsetLabel(masm.currentOffset());
+    }
     CodeOffsetLabel vmovssWithPatch(FloatRegister src, Address dest) {
         MOZ_ASSERT(HasSSE2());
         masm.vmovss_rm_disp32(src.code(), dest.offset, dest.base.code());
         return CodeOffsetLabel(masm.currentOffset());
     }
     CodeOffsetLabel vmovsdWithPatch(FloatRegister src, Address dest) {
         MOZ_ASSERT(HasSSE2());
         masm.vmovsd_rm_disp32(src.code(), dest.offset, dest.base.code());
@@ -561,16 +571,21 @@ class Assembler : public AssemblerX86Sha
         masm.vmovss_mr(src.addr, dest.code());
         return CodeOffsetLabel(masm.currentOffset());
     }
     CodeOffsetLabel vmovdWithPatch(PatchedAbsoluteAddress src, FloatRegister dest) {
         MOZ_ASSERT(HasSSE2());
         masm.vmovd_mr(src.addr, dest.code());
         return CodeOffsetLabel(masm.currentOffset());
     }
+    CodeOffsetLabel vmovqWithPatch(PatchedAbsoluteAddress src, FloatRegister dest) {
+        MOZ_ASSERT(HasSSE2());
+        masm.vmovq_mr(src.addr, dest.code());
+        return CodeOffsetLabel(masm.currentOffset());
+    }
     CodeOffsetLabel vmovsdWithPatch(PatchedAbsoluteAddress src, FloatRegister dest) {
         MOZ_ASSERT(HasSSE2());
         masm.vmovsd_mr(src.addr, dest.code());
         return CodeOffsetLabel(masm.currentOffset());
     }
     CodeOffsetLabel vmovdqaWithPatch(PatchedAbsoluteAddress src, FloatRegister dest) {
         MOZ_ASSERT(HasSSE2());
         masm.vmovdqa_mr(src.addr, dest.code());
@@ -610,16 +625,21 @@ class Assembler : public AssemblerX86Sha
         masm.vmovss_rm(src.code(), dest.addr);
         return CodeOffsetLabel(masm.currentOffset());
     }
     CodeOffsetLabel vmovdWithPatch(FloatRegister src, PatchedAbsoluteAddress dest) {
         MOZ_ASSERT(HasSSE2());
         masm.vmovd_rm(src.code(), dest.addr);
         return CodeOffsetLabel(masm.currentOffset());
     }
+    CodeOffsetLabel vmovqWithPatch(FloatRegister src, PatchedAbsoluteAddress dest) {
+        MOZ_ASSERT(HasSSE2());
+        masm.vmovq_rm(src.code(), dest.addr);
+        return CodeOffsetLabel(masm.currentOffset());
+    }
     CodeOffsetLabel vmovsdWithPatch(FloatRegister src, PatchedAbsoluteAddress dest) {
         MOZ_ASSERT(HasSSE2());
         masm.vmovsd_rm(src.code(), dest.addr);
         return CodeOffsetLabel(masm.currentOffset());
     }
     CodeOffsetLabel vmovdqaWithPatch(FloatRegister src, PatchedAbsoluteAddress dest) {
         MOZ_ASSERT(HasSSE2());
         masm.vmovdqa_rm(src.code(), dest.addr);
--- a/js/src/jit/x86/CodeGenerator-x86.cpp
+++ b/js/src/jit/x86/CodeGenerator-x86.cpp
@@ -370,18 +370,17 @@ CodeGeneratorX86::loadSimd(Scalar::Type 
         }
         break;
       }
       case Scalar::Int32x4: {
         switch (numElems) {
           // In memory-to-register mode, movd zeroes out the high lanes.
           case 1: masm.vmovdWithPatch(srcAddr, out); break;
           // See comment above, which also applies to movsd.
-          // TODO memory-to-xmm movq is encodable on x86 as well
-          case 2: masm.vmovsdWithPatch(srcAddr, out); break;
+          case 2: masm.vmovqWithPatch(srcAddr, out); break;
           case 4: masm.vmovdquWithPatch(srcAddr, out); break;
           default: MOZ_CRASH("unexpected size for partial load");
         }
         break;
       }
       case Scalar::Int8:
       case Scalar::Uint8:
       case Scalar::Int16:
@@ -592,18 +591,17 @@ CodeGeneratorX86::storeSimd(Scalar::Type
         }
         break;
       }
       case Scalar::Int32x4: {
         switch (numElems) {
           // In memory-to-register mode, movd zeroes destAddr the high lanes.
           case 1: masm.vmovdWithPatch(in, destAddr); break;
           // See comment above, which also applies to movsd.
-          // Cross-domain penalty here, as movq isn't encodable on x86.
-          case 2: masm.vmovsdWithPatch(in, destAddr); break;
+          case 2: masm.vmovqWithPatch(in, destAddr); break;
           case 4: masm.vmovdquWithPatch(in, destAddr); break;
           default: MOZ_CRASH("unexpected size for partial load");
         }
         break;
       }
       case Scalar::Int8:
       case Scalar::Uint8:
       case Scalar::Int16: