Bug 572034 - Use Linux x86_64 Y'CbCr routines on x86_64 OS X. Convert Linux routines to local asm labels. r=chris.double
☠☠ backed out by 3e0eb9fe8f48 ☠ ☠
authorMatthew Gregan <kinetik@flim.org>
Thu, 17 Jun 2010 18:16:28 +1200
changeset 43750 d268e54fbfcfbbfab0e3b890fe376cd058498bb8
parent 43749 4adab2629c3f76da345a60cb55a3925db6c6241f
child 43751 23f1170df612e052aab82489eb8fe73c7c1dae64
child 43823 3e0eb9fe8f481ce2257922ef08219bb364da24a7
push idunknown
push userunknown
push dateunknown
reviewerschris.double
bugs572034
milestone1.9.3a6pre
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 572034 - Use Linux x86_64 Y'CbCr routines on x86_64 OS X. Convert Linux routines to local asm labels. r=chris.double
gfx/ycbcr/Makefile.in
gfx/ycbcr/README
gfx/ycbcr/bug572034_mac_64bit.patch
gfx/ycbcr/update.sh
gfx/ycbcr/yuv_row_linux.cpp
--- a/gfx/ycbcr/Makefile.in
+++ b/gfx/ycbcr/Makefile.in
@@ -25,18 +25,23 @@ ifdef _MSC_VER
 CPPSRCS += yuv_row_win.cpp \
            $(NULL)
 else
 ifeq ($(OS_ARCH),Linux)
 CPPSRCS += yuv_row_linux.cpp \
            $(NULL)
 else
 ifeq ($(OS_ARCH),Darwin)
+ifeq ($(OS_TEST),x86_64)
+CPPSRCS += yuv_row_linux.cpp \
+           $(NULL)
+else
 CPPSRCS += yuv_row_mac.cpp \
            $(NULL)
+endif
 else
 CPPSRCS += yuv_row_other.cpp \
            $(NULL)
 endif # mac
 endif # linux
 endif # windows
 
 EXTRA_DSO_LDOPTS += \
--- a/gfx/ycbcr/README
+++ b/gfx/ycbcr/README
@@ -14,8 +14,9 @@ picture_region.patch: Change Chromium co
                       The YUV conversion will convert within this 
                       picture region only.
 
 remove_scale.patch: Removes Chromium scaling code.
 export.patch: Fix export for building on comm-central
 win64_mac64.patch: Fallback to C implementation on Windows and Mac OS X 64 bit
 yv24.patch: Adds YCbCr 4:4:4 support
 row_c_fix.patch: Fix broken C fallback code (See bug 561385).
+bug572034_mac_64bit.patch: Fix x86_64 linux code so it works on OS X.
new file mode 100644
--- /dev/null
+++ b/gfx/ycbcr/bug572034_mac_64bit.patch
@@ -0,0 +1,66 @@
+diff --git a/gfx/ycbcr/yuv_row_linux.cpp b/gfx/ycbcr/yuv_row_linux.cpp
+--- a/gfx/ycbcr/yuv_row_linux.cpp
++++ b/gfx/ycbcr/yuv_row_linux.cpp
+@@ -250,18 +250,18 @@ MMX_ALIGNED(int16 kCoefficientsRgbY[768]
+ 
+ // AMD64 ABI uses register paremters.
+ void FastConvertYUVToRGB32Row(const uint8* y_buf,  // rdi
+                               const uint8* u_buf,  // rsi
+                               const uint8* v_buf,  // rdx
+                               uint8* rgb_buf,      // rcx
+                               int width) {         // r8
+   asm(
+-  "jmp    convertend\n"
+-"convertloop:"
++  "jmp    Lconvertend\n"
++"Lconvertloop:"
+   "movzb  (%1),%%r10\n"
+   "add    $0x1,%1\n"
+   "movzb  (%2),%%r11\n"
+   "add    $0x1,%2\n"
+   "movq   2048(%5,%%r10,8),%%xmm0\n"
+   "movzb  (%0),%%r10\n"
+   "movq   4096(%5,%%r11,8),%%xmm1\n"
+   "movzb  0x1(%0),%%r11\n"
+@@ -271,36 +271,36 @@ void FastConvertYUVToRGB32Row(const uint
+   "movq   (%5,%%r11,8),%%xmm3\n"
+   "paddsw %%xmm0,%%xmm2\n"
+   "paddsw %%xmm0,%%xmm3\n"
+   "shufps $0x44,%%xmm3,%%xmm2\n"
+   "psraw  $0x6,%%xmm2\n"
+   "packuswb %%xmm2,%%xmm2\n"
+   "movq   %%xmm2,0x0(%3)\n"
+   "add    $0x8,%3\n"
+-"convertend:"
++"Lconvertend:"
+   "sub    $0x2,%4\n"
+-  "jns    convertloop\n"
++  "jns    Lconvertloop\n"
+ 
+-"convertnext:"
++"Lconvertnext:"
+   "add    $0x1,%4\n"
+-  "js     convertdone\n"
++  "js     Lconvertdone\n"
+ 
+   "movzb  (%1),%%r10\n"
+   "movq   2048(%5,%%r10,8),%%xmm0\n"
+   "movzb  (%2),%%r10\n"
+   "movq   4096(%5,%%r10,8),%%xmm1\n"
+   "paddsw %%xmm1,%%xmm0\n"
+   "movzb  (%0),%%r10\n"
+   "movq   (%5,%%r10,8),%%xmm1\n"
+   "paddsw %%xmm0,%%xmm1\n"
+   "psraw  $0x6,%%xmm1\n"
+   "packuswb %%xmm1,%%xmm1\n"
+   "movd   %%xmm1,0x0(%3)\n"
+-"convertdone:"
++"Lconvertdone:"
+   :
+   : "r"(y_buf),  // %0
+     "r"(u_buf),  // %1
+     "r"(v_buf),  // %2
+     "r"(rgb_buf),  // %3
+     "r"(width),  // %4
+     "r" (kCoefficientsRgbY)  // %5
+   : "memory", "r10", "r11", "xmm0", "xmm1", "xmm2", "xmm3"
--- a/gfx/ycbcr/update.sh
+++ b/gfx/ycbcr/update.sh
@@ -8,8 +8,9 @@ cp $1/media/base/yuv_row_win.cc yuv_row_
 cp $1/media/base/yuv_row_linux.cc yuv_row_c.cpp
 patch -p3 <convert.patch
 patch -p3 <picture_region.patch
 patch -p3 <remove_scale.patch
 patch -p3 <export.patch
 patch -p3 <win64_mac64.patch
 patch -p3 <yv24.patch
 patch -p3 <row_c_fix.patch
+patch -p3 <bug572034_mac_64bit.patch
--- a/gfx/ycbcr/yuv_row_linux.cpp
+++ b/gfx/ycbcr/yuv_row_linux.cpp
@@ -250,18 +250,18 @@ MMX_ALIGNED(int16 kCoefficientsRgbY[768]
 
 // AMD64 ABI uses register paremters.
 void FastConvertYUVToRGB32Row(const uint8* y_buf,  // rdi
                               const uint8* u_buf,  // rsi
                               const uint8* v_buf,  // rdx
                               uint8* rgb_buf,      // rcx
                               int width) {         // r8
   asm(
-  "jmp    convertend\n"
-"convertloop:"
+  "jmp    Lconvertend\n"
+"Lconvertloop:"
   "movzb  (%1),%%r10\n"
   "add    $0x1,%1\n"
   "movzb  (%2),%%r11\n"
   "add    $0x1,%2\n"
   "movq   2048(%5,%%r10,8),%%xmm0\n"
   "movzb  (%0),%%r10\n"
   "movq   4096(%5,%%r11,8),%%xmm1\n"
   "movzb  0x1(%0),%%r11\n"
@@ -271,36 +271,36 @@ void FastConvertYUVToRGB32Row(const uint
   "movq   (%5,%%r11,8),%%xmm3\n"
   "paddsw %%xmm0,%%xmm2\n"
   "paddsw %%xmm0,%%xmm3\n"
   "shufps $0x44,%%xmm3,%%xmm2\n"
   "psraw  $0x6,%%xmm2\n"
   "packuswb %%xmm2,%%xmm2\n"
   "movq   %%xmm2,0x0(%3)\n"
   "add    $0x8,%3\n"
-"convertend:"
+"Lconvertend:"
   "sub    $0x2,%4\n"
-  "jns    convertloop\n"
+  "jns    Lconvertloop\n"
 
-"convertnext:"
+"Lconvertnext:"
   "add    $0x1,%4\n"
-  "js     convertdone\n"
+  "js     Lconvertdone\n"
 
   "movzb  (%1),%%r10\n"
   "movq   2048(%5,%%r10,8),%%xmm0\n"
   "movzb  (%2),%%r10\n"
   "movq   4096(%5,%%r10,8),%%xmm1\n"
   "paddsw %%xmm1,%%xmm0\n"
   "movzb  (%0),%%r10\n"
   "movq   (%5,%%r10,8),%%xmm1\n"
   "paddsw %%xmm0,%%xmm1\n"
   "psraw  $0x6,%%xmm1\n"
   "packuswb %%xmm1,%%xmm1\n"
   "movd   %%xmm1,0x0(%3)\n"
-"convertdone:"
+"Lconvertdone:"
   :
   : "r"(y_buf),  // %0
     "r"(u_buf),  // %1
     "r"(v_buf),  // %2
     "r"(rgb_buf),  // %3
     "r"(width),  // %4
     "r" (kCoefficientsRgbY)  // %5
   : "memory", "r10", "r11", "xmm0", "xmm1", "xmm2", "xmm3"