Bug 1478269 - Use symbolic names as input operands. r=jrmuizel
authorMike Hommey <mh+mozilla@glandium.org>
Fri, 24 Aug 2018 10:07:16 +0900
changeset 488864 a69ed9d1f49c097a67d6f67bdc0e91d7f26e32b4
parent 488863 998cf7d22736ed2197fd018e1ee28a8257e3de96
child 488865 e471c532fd11675b0cfafa7c161a2e5e147944b3
push id9734
push usershindli@mozilla.com
push dateThu, 30 Aug 2018 12:18:07 +0000
treeherdermozilla-beta@71c71ab3afae [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersjrmuizel
bugs1478269
milestone63.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 1478269 - Use symbolic names as input operands. r=jrmuizel Differential Revision: https://phabricator.services.mozilla.com/D4199
gfx/ycbcr/yuv_row_posix.cpp
--- a/gfx/ycbcr/yuv_row_posix.cpp
+++ b/gfx/ycbcr/yuv_row_posix.cpp
@@ -17,248 +17,248 @@ extern "C" {
 void FastConvertYUVToRGB32Row(const uint8* y_buf,  // rdi
                               const uint8* u_buf,  // rsi
                               const uint8* v_buf,  // rdx
                               uint8* rgb_buf,      // rcx
                               int width) {         // r8
   asm(
   "jmp    1f\n"
 "0:"
-  "movzb  (%1),%%r10\n"
-  "add    $0x1,%1\n"
-  "movzb  (%2),%%r11\n"
-  "add    $0x1,%2\n"
-  "movq   2048(%5,%%r10,8),%%xmm0\n"
-  "movzb  (%0),%%r10\n"
-  "movq   4096(%5,%%r11,8),%%xmm1\n"
-  "movzb  0x1(%0),%%r11\n"
+  "movzb  (%[u_buf]),%%r10\n"
+  "add    $0x1,%[u_buf]\n"
+  "movzb  (%[v_buf]),%%r11\n"
+  "add    $0x1,%[v_buf]\n"
+  "movq   2048(%[kCoefficientsRgbY],%%r10,8),%%xmm0\n"
+  "movzb  (%[y_buf]),%%r10\n"
+  "movq   4096(%[kCoefficientsRgbY],%%r11,8),%%xmm1\n"
+  "movzb  0x1(%[y_buf]),%%r11\n"
   "paddsw %%xmm1,%%xmm0\n"
-  "movq   (%5,%%r10,8),%%xmm2\n"
-  "add    $0x2,%0\n"
-  "movq   (%5,%%r11,8),%%xmm3\n"
+  "movq   (%[kCoefficientsRgbY],%%r10,8),%%xmm2\n"
+  "add    $0x2,%[y_buf]\n"
+  "movq   (%[kCoefficientsRgbY],%%r11,8),%%xmm3\n"
   "paddsw %%xmm0,%%xmm2\n"
   "paddsw %%xmm0,%%xmm3\n"
   "shufps $0x44,%%xmm3,%%xmm2\n"
   "psraw  $0x6,%%xmm2\n"
   "packuswb %%xmm2,%%xmm2\n"
-  "movq   %%xmm2,0x0(%3)\n"
-  "add    $0x8,%3\n"
+  "movq   %%xmm2,0x0(%[rgb_buf])\n"
+  "add    $0x8,%[rgb_buf]\n"
 "1:"
-  "sub    $0x2,%4\n"
+  "sub    $0x2,%[width]\n"
   "jns    0b\n"
 
 "2:"
-  "add    $0x1,%4\n"
+  "add    $0x1,%[width]\n"
   "js     3f\n"
 
-  "movzb  (%1),%%r10\n"
-  "movq   2048(%5,%%r10,8),%%xmm0\n"
-  "movzb  (%2),%%r10\n"
-  "movq   4096(%5,%%r10,8),%%xmm1\n"
+  "movzb  (%[u_buf]),%%r10\n"
+  "movq   2048(%[kCoefficientsRgbY],%%r10,8),%%xmm0\n"
+  "movzb  (%[v_buf]),%%r10\n"
+  "movq   4096(%[kCoefficientsRgbY],%%r10,8),%%xmm1\n"
   "paddsw %%xmm1,%%xmm0\n"
-  "movzb  (%0),%%r10\n"
-  "movq   (%5,%%r10,8),%%xmm1\n"
+  "movzb  (%[y_buf]),%%r10\n"
+  "movq   (%[kCoefficientsRgbY],%%r10,8),%%xmm1\n"
   "paddsw %%xmm0,%%xmm1\n"
   "psraw  $0x6,%%xmm1\n"
   "packuswb %%xmm1,%%xmm1\n"
-  "movd   %%xmm1,0x0(%3)\n"
+  "movd   %%xmm1,0x0(%[rgb_buf])\n"
 "3:"
   :
-  : "r"(y_buf),  // %0
-    "r"(u_buf),  // %1
-    "r"(v_buf),  // %2
-    "r"(rgb_buf),  // %3
-    "r"(width),  // %4
-    "r" (kCoefficientsRgbY)  // %5
+  : [y_buf] "r"(y_buf),
+    [u_buf] "r"(u_buf),
+    [v_buf] "r"(v_buf),
+    [rgb_buf] "r"(rgb_buf),
+    [width] "r"(width),
+    [kCoefficientsRgbY] "r" (kCoefficientsRgbY)
   : "memory", "r10", "r11", "xmm0", "xmm1", "xmm2", "xmm3"
 );
 }
 
 void ScaleYUVToRGB32Row(const uint8* y_buf,  // rdi
                         const uint8* u_buf,  // rsi
                         const uint8* v_buf,  // rdx
                         uint8* rgb_buf,      // rcx
                         int width,           // r8
                         int source_dx) {     // r9
   asm(
   "xor    %%r11,%%r11\n"
-  "sub    $0x2,%4\n"
+  "sub    $0x2,%[width]\n"
   "js     1f\n"
 
 "0:"
   "mov    %%r11,%%r10\n"
   "sar    $0x11,%%r10\n"
-  "movzb  (%1,%%r10,1),%%rax\n"
-  "movq   2048(%5,%%rax,8),%%xmm0\n"
-  "movzb  (%2,%%r10,1),%%rax\n"
-  "movq   4096(%5,%%rax,8),%%xmm1\n"
-  "lea    (%%r11,%6),%%r10\n"
+  "movzb  (%[u_buf],%%r10,1),%%rax\n"
+  "movq   2048(%[kCoefficientsRgbY],%%rax,8),%%xmm0\n"
+  "movzb  (%[v_buf],%%r10,1),%%rax\n"
+  "movq   4096(%[kCoefficientsRgbY],%%rax,8),%%xmm1\n"
+  "lea    (%%r11,%[source_dx]),%%r10\n"
   "sar    $0x10,%%r11\n"
-  "movzb  (%0,%%r11,1),%%rax\n"
+  "movzb  (%[y_buf],%%r11,1),%%rax\n"
   "paddsw %%xmm1,%%xmm0\n"
-  "movq   (%5,%%rax,8),%%xmm1\n"
-  "lea    (%%r10,%6),%%r11\n"
+  "movq   (%[kCoefficientsRgbY],%%rax,8),%%xmm1\n"
+  "lea    (%%r10,%[source_dx]),%%r11\n"
   "sar    $0x10,%%r10\n"
-  "movzb  (%0,%%r10,1),%%rax\n"
-  "movq   (%5,%%rax,8),%%xmm2\n"
+  "movzb  (%[y_buf],%%r10,1),%%rax\n"
+  "movq   (%[kCoefficientsRgbY],%%rax,8),%%xmm2\n"
   "paddsw %%xmm0,%%xmm1\n"
   "paddsw %%xmm0,%%xmm2\n"
   "shufps $0x44,%%xmm2,%%xmm1\n"
   "psraw  $0x6,%%xmm1\n"
   "packuswb %%xmm1,%%xmm1\n"
-  "movq   %%xmm1,0x0(%3)\n"
-  "add    $0x8,%3\n"
-  "sub    $0x2,%4\n"
+  "movq   %%xmm1,0x0(%[rgb_buf])\n"
+  "add    $0x8,%[rgb_buf]\n"
+  "sub    $0x2,%[width]\n"
   "jns    0b\n"
 
 "1:"
-  "add    $0x1,%4\n"
+  "add    $0x1,%[width]\n"
   "js     2f\n"
 
   "mov    %%r11,%%r10\n"
   "sar    $0x11,%%r10\n"
-  "movzb  (%1,%%r10,1),%%rax\n"
-  "movq   2048(%5,%%rax,8),%%xmm0\n"
-  "movzb  (%2,%%r10,1),%%rax\n"
-  "movq   4096(%5,%%rax,8),%%xmm1\n"
+  "movzb  (%[u_buf],%%r10,1),%%rax\n"
+  "movq   2048(%[kCoefficientsRgbY],%%rax,8),%%xmm0\n"
+  "movzb  (%[v_buf],%%r10,1),%%rax\n"
+  "movq   4096(%[kCoefficientsRgbY],%%rax,8),%%xmm1\n"
   "paddsw %%xmm1,%%xmm0\n"
   "sar    $0x10,%%r11\n"
-  "movzb  (%0,%%r11,1),%%rax\n"
-  "movq   (%5,%%rax,8),%%xmm1\n"
+  "movzb  (%[y_buf],%%r11,1),%%rax\n"
+  "movq   (%[kCoefficientsRgbY],%%rax,8),%%xmm1\n"
   "paddsw %%xmm0,%%xmm1\n"
   "psraw  $0x6,%%xmm1\n"
   "packuswb %%xmm1,%%xmm1\n"
-  "movd   %%xmm1,0x0(%3)\n"
+  "movd   %%xmm1,0x0(%[rgb_buf])\n"
 
 "2:"
   :
-  : "r"(y_buf),  // %0
-    "r"(u_buf),  // %1
-    "r"(v_buf),  // %2
-    "r"(rgb_buf),  // %3
-    "r"(width),  // %4
-    "r" (kCoefficientsRgbY),  // %5
-    "r"(static_cast<long>(source_dx))  // %6
+  : [y_buf] "r"(y_buf),
+    [u_buf] "r"(u_buf),
+    [v_buf] "r"(v_buf),
+    [rgb_buf] "r"(rgb_buf),
+    [width] "r"(width),
+    [kCoefficientsRgbY] "r" (kCoefficientsRgbY),
+    [source_dx] "r"(static_cast<long>(source_dx))
   : "memory", "r10", "r11", "rax", "xmm0", "xmm1", "xmm2"
 );
 }
 
 void LinearScaleYUVToRGB32Row(const uint8* y_buf,
                               const uint8* u_buf,
                               const uint8* v_buf,
                               uint8* rgb_buf,
                               int width,
                               int source_dx) {
   asm(
   "xor    %%r11,%%r11\n"   // x = 0
-  "sub    $0x2,%4\n"
+  "sub    $0x2,%[width]\n"
   "js     2f\n"
-  "cmp    $0x20000,%6\n"   // if source_dx >= 2.0
+  "cmp    $0x20000,%[source_dx]\n"   // if source_dx >= 2.0
   "jl     0f\n"
   "mov    $0x8000,%%r11\n" // x = 0.5 for 1/2 or less
 "0:"
 
 "1:"
   "mov    %%r11,%%r10\n"
   "sar    $0x11,%%r10\n"
 
-  "movzb  (%1, %%r10, 1), %%r13 \n"
-  "movzb  1(%1, %%r10, 1), %%r14 \n"
+  "movzb  (%[u_buf], %%r10, 1), %%r13 \n"
+  "movzb  1(%[u_buf], %%r10, 1), %%r14 \n"
   "mov    %%r11, %%rax \n"
   "and    $0x1fffe, %%rax \n"
   "imul   %%rax, %%r14 \n"
   "xor    $0x1fffe, %%rax \n"
   "imul   %%rax, %%r13 \n"
   "add    %%r14, %%r13 \n"
   "shr    $17, %%r13 \n"
-  "movq   2048(%5,%%r13,8), %%xmm0\n"
+  "movq   2048(%[kCoefficientsRgbY],%%r13,8), %%xmm0\n"
 
-  "movzb  (%2, %%r10, 1), %%r13 \n"
-  "movzb  1(%2, %%r10, 1), %%r14 \n"
+  "movzb  (%[v_buf], %%r10, 1), %%r13 \n"
+  "movzb  1(%[v_buf], %%r10, 1), %%r14 \n"
   "mov    %%r11, %%rax \n"
   "and    $0x1fffe, %%rax \n"
   "imul   %%rax, %%r14 \n"
   "xor    $0x1fffe, %%rax \n"
   "imul   %%rax, %%r13 \n"
   "add    %%r14, %%r13 \n"
   "shr    $17, %%r13 \n"
-  "movq   4096(%5,%%r13,8), %%xmm1\n"
+  "movq   4096(%[kCoefficientsRgbY],%%r13,8), %%xmm1\n"
 
   "mov    %%r11, %%rax \n"
-  "lea    (%%r11,%6),%%r10\n"
+  "lea    (%%r11,%[source_dx]),%%r10\n"
   "sar    $0x10,%%r11\n"
   "paddsw %%xmm1,%%xmm0\n"
 
-  "movzb  (%0, %%r11, 1), %%r13 \n"
-  "movzb  1(%0, %%r11, 1), %%r14 \n"
+  "movzb  (%[y_buf], %%r11, 1), %%r13 \n"
+  "movzb  1(%[y_buf], %%r11, 1), %%r14 \n"
   "and    $0xffff, %%rax \n"
   "imul   %%rax, %%r14 \n"
   "xor    $0xffff, %%rax \n"
   "imul   %%rax, %%r13 \n"
   "add    %%r14, %%r13 \n"
   "shr    $16, %%r13 \n"
-  "movq   (%5,%%r13,8),%%xmm1\n"
+  "movq   (%[kCoefficientsRgbY],%%r13,8),%%xmm1\n"
 
   "mov    %%r10, %%rax \n"
-  "lea    (%%r10,%6),%%r11\n"
+  "lea    (%%r10,%[source_dx]),%%r11\n"
   "sar    $0x10,%%r10\n"
 
-  "movzb  (%0,%%r10,1), %%r13 \n"
-  "movzb  1(%0,%%r10,1), %%r14 \n"
+  "movzb  (%[y_buf],%%r10,1), %%r13 \n"
+  "movzb  1(%[y_buf],%%r10,1), %%r14 \n"
   "and    $0xffff, %%rax \n"
   "imul   %%rax, %%r14 \n"
   "xor    $0xffff, %%rax \n"
   "imul   %%rax, %%r13 \n"
   "add    %%r14, %%r13 \n"
   "shr    $16, %%r13 \n"
-  "movq   (%5,%%r13,8),%%xmm2\n"
+  "movq   (%[kCoefficientsRgbY],%%r13,8),%%xmm2\n"
 
   "paddsw %%xmm0,%%xmm1\n"
   "paddsw %%xmm0,%%xmm2\n"
   "shufps $0x44,%%xmm2,%%xmm1\n"
   "psraw  $0x6,%%xmm1\n"
   "packuswb %%xmm1,%%xmm1\n"
-  "movq   %%xmm1,0x0(%3)\n"
-  "add    $0x8,%3\n"
-  "sub    $0x2,%4\n"
+  "movq   %%xmm1,0x0(%[rgb_buf])\n"
+  "add    $0x8,%[rgb_buf]\n"
+  "sub    $0x2,%[width]\n"
   "jns    1b\n"
 
 "2:"
-  "add    $0x1,%4\n"
+  "add    $0x1,%[width]\n"
   "js     3f\n"
 
   "mov    %%r11,%%r10\n"
   "sar    $0x11,%%r10\n"
 
-  "movzb  (%1,%%r10,1), %%r13 \n"
-  "movq   2048(%5,%%r13,8),%%xmm0\n"
+  "movzb  (%[u_buf],%%r10,1), %%r13 \n"
+  "movq   2048(%[kCoefficientsRgbY],%%r13,8),%%xmm0\n"
 
-  "movzb  (%2,%%r10,1), %%r13 \n"
-  "movq   4096(%5,%%r13,8),%%xmm1\n"
+  "movzb  (%[v_buf],%%r10,1), %%r13 \n"
+  "movq   4096(%[kCoefficientsRgbY],%%r13,8),%%xmm1\n"
 
   "paddsw %%xmm1,%%xmm0\n"
   "sar    $0x10,%%r11\n"
 
-  "movzb  (%0,%%r11,1), %%r13 \n"
-  "movq   (%5,%%r13,8),%%xmm1\n"
+  "movzb  (%[y_buf],%%r11,1), %%r13 \n"
+  "movq   (%[kCoefficientsRgbY],%%r13,8),%%xmm1\n"
 
   "paddsw %%xmm0,%%xmm1\n"
   "psraw  $0x6,%%xmm1\n"
   "packuswb %%xmm1,%%xmm1\n"
-  "movd   %%xmm1,0x0(%3)\n"
+  "movd   %%xmm1,0x0(%[rgb_buf])\n"
 
 "3:"
   :
-  : "r"(y_buf),  // %0
-    "r"(u_buf),  // %1
-    "r"(v_buf),  // %2
-    "r"(rgb_buf),  // %3
-    "r"(width),  // %4
-    "r" (kCoefficientsRgbY),  // %5
-    "r"(static_cast<long>(source_dx))  // %6
+  : [y_buf] "r"(y_buf),
+    [u_buf] "r"(u_buf),
+    [v_buf] "r"(v_buf),
+    [rgb_buf] "r"(rgb_buf),
+    [width] "r"(width),
+    [kCoefficientsRgbY] "r" (kCoefficientsRgbY),
+    [source_dx] "r"(static_cast<long>(source_dx))
   : "memory", "r10", "r11", "r13", "r14", "rax", "xmm0", "xmm1", "xmm2"
 );
 }
 
 #elif defined(MOZILLA_MAY_SUPPORT_SSE) && defined(ARCH_CPU_X86_32) && !defined(__PIC__)
 
 // PIC version is slower because less registers are available, so
 // non-PIC is used on platforms where it is possible.