Bug 572034 - Use Linux x86_64 YCbCr routines on x86_64 OS X. Convert Linux routines to local asm labels. r=doublec
authorMatthew Gregan <kinetik@flim.org>
Mon, 21 Jun 2010 12:18:13 -0700
changeset 43902 d6c0afe65fc707ec3902b0556dc782e3953e9102
parent 43901 0c940e50d73806d617b31c83fa8e18a670804d99
child 43903 5e852b461c063cc77a241f3594a6d07f1a8ff484
push id13954
push userme@kylehuey.com
push dateMon, 21 Jun 2010 19:20:12 +0000
treeherdermozilla-central@5e852b461c06 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersdoublec
bugs572034
milestone1.9.3a6pre
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 572034 - Use Linux x86_64 YCbCr routines on x86_64 OS X. Convert Linux routines to local asm labels. r=doublec
gfx/ycbcr/Makefile.in
gfx/ycbcr/README
gfx/ycbcr/bug572034_mac_64bit.patch
gfx/ycbcr/update.sh
gfx/ycbcr/yuv_row_linux.cpp
--- a/gfx/ycbcr/Makefile.in
+++ b/gfx/ycbcr/Makefile.in
@@ -25,18 +25,23 @@ ifdef _MSC_VER
 CPPSRCS += yuv_row_win.cpp \
            $(NULL)
 else
 ifeq ($(OS_ARCH),Linux)
 CPPSRCS += yuv_row_linux.cpp \
            $(NULL)
 else
 ifeq ($(OS_ARCH),Darwin)
+ifeq ($(OS_TEST),x86_64)
+CPPSRCS += yuv_row_linux.cpp \
+           $(NULL)
+else
 CPPSRCS += yuv_row_mac.cpp \
            $(NULL)
+endif
 else
 CPPSRCS += yuv_row_other.cpp \
            $(NULL)
 endif # mac
 endif # linux
 endif # windows
 
 EXTRA_DSO_LDOPTS += \
--- a/gfx/ycbcr/README
+++ b/gfx/ycbcr/README
@@ -14,8 +14,9 @@ picture_region.patch: Change Chromium co
                       The YUV conversion will convert within this 
                       picture region only.
 
 remove_scale.patch: Removes Chromium scaling code.
 export.patch: Fix export for building on comm-central
 win64_mac64.patch: Fallback to C implementation on Windows and Mac OS X 64 bit
 yv24.patch: Adds YCbCr 4:4:4 support
 row_c_fix.patch: Fix broken C fallback code (See bug 561385).
+bug572034_mac_64bit.patch: Fix x86_64 linux code so it works on OS X.
new file mode 100644
--- /dev/null
+++ b/gfx/ycbcr/bug572034_mac_64bit.patch
@@ -0,0 +1,66 @@
+diff --git a/gfx/ycbcr/yuv_row_linux.cpp b/gfx/ycbcr/yuv_row_linux.cpp
+--- a/gfx/ycbcr/yuv_row_linux.cpp
++++ b/gfx/ycbcr/yuv_row_linux.cpp
+@@ -250,18 +250,18 @@ MMX_ALIGNED(int16 kCoefficientsRgbY[768]
+ 
+ // AMD64 ABI uses register paremters.
+ void FastConvertYUVToRGB32Row(const uint8* y_buf,  // rdi
+                               const uint8* u_buf,  // rsi
+                               const uint8* v_buf,  // rdx
+                               uint8* rgb_buf,      // rcx
+                               int width) {         // r8
+   asm(
+-  "jmp    convertend\n"
+-"convertloop:"
++  "jmp    Lconvertend\n"
++"Lconvertloop:"
+   "movzb  (%1),%%r10\n"
+   "add    $0x1,%1\n"
+   "movzb  (%2),%%r11\n"
+   "add    $0x1,%2\n"
+   "movq   2048(%5,%%r10,8),%%xmm0\n"
+   "movzb  (%0),%%r10\n"
+   "movq   4096(%5,%%r11,8),%%xmm1\n"
+   "movzb  0x1(%0),%%r11\n"
+@@ -271,36 +271,36 @@ void FastConvertYUVToRGB32Row(const uint
+   "movq   (%5,%%r11,8),%%xmm3\n"
+   "paddsw %%xmm0,%%xmm2\n"
+   "paddsw %%xmm0,%%xmm3\n"
+   "shufps $0x44,%%xmm3,%%xmm2\n"
+   "psraw  $0x6,%%xmm2\n"
+   "packuswb %%xmm2,%%xmm2\n"
+   "movq   %%xmm2,0x0(%3)\n"
+   "add    $0x8,%3\n"
+-"convertend:"
++"Lconvertend:"
+   "sub    $0x2,%4\n"
+-  "jns    convertloop\n"
++  "jns    Lconvertloop\n"
+ 
+-"convertnext:"
++"Lconvertnext:"
+   "add    $0x1,%4\n"
+-  "js     convertdone\n"
++  "js     Lconvertdone\n"
+ 
+   "movzb  (%1),%%r10\n"
+   "movq   2048(%5,%%r10,8),%%xmm0\n"
+   "movzb  (%2),%%r10\n"
+   "movq   4096(%5,%%r10,8),%%xmm1\n"
+   "paddsw %%xmm1,%%xmm0\n"
+   "movzb  (%0),%%r10\n"
+   "movq   (%5,%%r10,8),%%xmm1\n"
+   "paddsw %%xmm0,%%xmm1\n"
+   "psraw  $0x6,%%xmm1\n"
+   "packuswb %%xmm1,%%xmm1\n"
+   "movd   %%xmm1,0x0(%3)\n"
+-"convertdone:"
++"Lconvertdone:"
+   :
+   : "r"(y_buf),  // %0
+     "r"(u_buf),  // %1
+     "r"(v_buf),  // %2
+     "r"(rgb_buf),  // %3
+     "r"(width),  // %4
+     "r" (kCoefficientsRgbY)  // %5
+   : "memory", "r10", "r11", "xmm0", "xmm1", "xmm2", "xmm3"
--- a/gfx/ycbcr/update.sh
+++ b/gfx/ycbcr/update.sh
@@ -8,8 +8,9 @@ cp $1/media/base/yuv_row_win.cc yuv_row_
 cp $1/media/base/yuv_row_linux.cc yuv_row_c.cpp
 patch -p3 <convert.patch
 patch -p3 <picture_region.patch
 patch -p3 <remove_scale.patch
 patch -p3 <export.patch
 patch -p3 <win64_mac64.patch
 patch -p3 <yv24.patch
 patch -p3 <row_c_fix.patch
+patch -p3 <bug572034_mac_64bit.patch
--- a/gfx/ycbcr/yuv_row_linux.cpp
+++ b/gfx/ycbcr/yuv_row_linux.cpp
@@ -250,18 +250,18 @@ MMX_ALIGNED(int16 kCoefficientsRgbY[768]
 
 // AMD64 ABI uses register paremters.
 void FastConvertYUVToRGB32Row(const uint8* y_buf,  // rdi
                               const uint8* u_buf,  // rsi
                               const uint8* v_buf,  // rdx
                               uint8* rgb_buf,      // rcx
                               int width) {         // r8
   asm(
-  "jmp    convertend\n"
-"convertloop:"
+  "jmp    Lconvertend\n"
+"Lconvertloop:"
   "movzb  (%1),%%r10\n"
   "add    $0x1,%1\n"
   "movzb  (%2),%%r11\n"
   "add    $0x1,%2\n"
   "movq   2048(%5,%%r10,8),%%xmm0\n"
   "movzb  (%0),%%r10\n"
   "movq   4096(%5,%%r11,8),%%xmm1\n"
   "movzb  0x1(%0),%%r11\n"
@@ -271,36 +271,36 @@ void FastConvertYUVToRGB32Row(const uint
   "movq   (%5,%%r11,8),%%xmm3\n"
   "paddsw %%xmm0,%%xmm2\n"
   "paddsw %%xmm0,%%xmm3\n"
   "shufps $0x44,%%xmm3,%%xmm2\n"
   "psraw  $0x6,%%xmm2\n"
   "packuswb %%xmm2,%%xmm2\n"
   "movq   %%xmm2,0x0(%3)\n"
   "add    $0x8,%3\n"
-"convertend:"
+"Lconvertend:"
   "sub    $0x2,%4\n"
-  "jns    convertloop\n"
+  "jns    Lconvertloop\n"
 
-"convertnext:"
+"Lconvertnext:"
   "add    $0x1,%4\n"
-  "js     convertdone\n"
+  "js     Lconvertdone\n"
 
   "movzb  (%1),%%r10\n"
   "movq   2048(%5,%%r10,8),%%xmm0\n"
   "movzb  (%2),%%r10\n"
   "movq   4096(%5,%%r10,8),%%xmm1\n"
   "paddsw %%xmm1,%%xmm0\n"
   "movzb  (%0),%%r10\n"
   "movq   (%5,%%r10,8),%%xmm1\n"
   "paddsw %%xmm0,%%xmm1\n"
   "psraw  $0x6,%%xmm1\n"
   "packuswb %%xmm1,%%xmm1\n"
   "movd   %%xmm1,0x0(%3)\n"
-"convertdone:"
+"Lconvertdone:"
   :
   : "r"(y_buf),  // %0
     "r"(u_buf),  // %1
     "r"(v_buf),  // %2
     "r"(rgb_buf),  // %3
     "r"(width),  // %4
     "r" (kCoefficientsRgbY)  // %5
   : "memory", "r10", "r11", "xmm0", "xmm1", "xmm2", "xmm3"