Bug 561739 Implement YCbCr conversion on Solaris r=doublec
authorGinn Chen <ginn.chen@sun.com>
Wed, 23 Jun 2010 17:30:48 +0800
changeset 44177 28962e3d0ed3bea6f36a218ee244dd3aac465707
parent 44176 35f1e03ce92849ff187e4f4134f6009eb0daf6a8
child 44178 4a598b0e35dc2fe969a2f9c8468cf925e1a8a66c
push id14012
push userginn.chen@sun.com
push dateWed, 23 Jun 2010 09:37:55 +0000
treeherdermozilla-central@8f05ab3aa198 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersdoublec
bugs561739
milestone1.9.3a6pre
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 561739 Implement YCbCr conversion on Solaris r=doublec
gfx/ycbcr/Makefile.in
gfx/ycbcr/README
gfx/ycbcr/chromium_types.h
gfx/ycbcr/yuv_convert.cpp
gfx/ycbcr/yuv_row_linux.cpp
--- a/gfx/ycbcr/Makefile.in
+++ b/gfx/ycbcr/Makefile.in
@@ -24,28 +24,33 @@ CPPSRCS = yuv_convert.cpp \
 ifdef _MSC_VER
 CPPSRCS += yuv_row_win.cpp \
            $(NULL)
 else
 ifeq ($(OS_ARCH),Linux)
 CPPSRCS += yuv_row_linux.cpp \
            $(NULL)
 else
+ifeq ($(OS_ARCH),SunOS)
+CPPSRCS += yuv_row_linux.cpp \
+           $(NULL)
+else
 ifeq ($(OS_ARCH),Darwin)
 ifeq ($(OS_TEST),x86_64)
 CPPSRCS += yuv_row_linux.cpp \
            $(NULL)
 else
 CPPSRCS += yuv_row_mac.cpp \
            $(NULL)
 endif
 else
 CPPSRCS += yuv_row_other.cpp \
            $(NULL)
 endif # mac
+endif # SunOS
 endif # linux
 endif # windows
 
 EXTRA_DSO_LDOPTS += \
         $(LIBS_DIR) \
         $(EXTRA_DSO_LIBS) \
         $(XPCOM_LIBS) \
         $(NSPR_LIBS) \
--- a/gfx/ycbcr/README
+++ b/gfx/ycbcr/README
@@ -15,8 +15,9 @@ picture_region.patch: Change Chromium co
                       picture region only.
 
 remove_scale.patch: Removes Chromium scaling code.
 export.patch: Fix export for building on comm-central
 win64_mac64.patch: Fallback to C implementation on Windows and Mac OS X 64 bit
 yv24.patch: Adds YCbCr 4:4:4 support
 row_c_fix.patch: Fix broken C fallback code (See bug 561385).
 bug572034_mac_64bit.patch: Fix x86_64 linux code so it works on OS X.
+solaris.patch: Adds Solaris support, fallback to C implementation on SPARC
--- a/gfx/ycbcr/chromium_types.h
+++ b/gfx/ycbcr/chromium_types.h
@@ -47,25 +47,33 @@ typedef PRInt16 int16;
 // Processor architecture detection.  For more info on what's defined, see:
 //   http://msdn.microsoft.com/en-us/library/b0084kay.aspx
 //   http://www.agner.org/optimize/calling_conventions.pdf
 //   or with gcc, run: "echo | gcc -E -dM -"
 #if defined(_M_X64) || defined(__x86_64__)
 #define ARCH_CPU_X86_FAMILY 1
 #define ARCH_CPU_X86_64 1
 #define ARCH_CPU_64_BITS 1
-#elif defined(_M_IX86) || defined(__i386__)
+#elif defined(_M_IX86) || defined(__i386__) || defined(__i386)
 #define ARCH_CPU_X86_FAMILY 1
 #define ARCH_CPU_X86 1
 #define ARCH_CPU_32_BITS 1
 #elif defined(__ARMEL__)
 #define ARCH_CPU_ARM_FAMILY 1
 #define ARCH_CPU_ARMEL 1
 #define ARCH_CPU_32_BITS 1
 #elif defined(__ppc__)
 #define ARCH_CPU_PPC_FAMILY 1
 #define ARCH_CPU_PPC 1
 #define ARCH_CPU_32_BITS 1
+#elif defined(__sparc)
+#define ARCH_CPU_SPARC_FAMILY 1
+#define ARCH_CPU_SPARC 1
+#define ARCH_CPU_32_BITS 1
+#elif defined(__sparcv9)
+#define ARCH_CPU_SPARC_FAMILY 1
+#define ARCH_CPU_SPARC 1
+#define ARCH_CPU_64_BITS 1
 #else
 #error Please add support for your architecture in chromium_types.h
 #endif
 
 #endif // GFX_CHROMIUMTYPES_H
--- a/gfx/ycbcr/yuv_convert.cpp
+++ b/gfx/ycbcr/yuv_convert.cpp
@@ -75,15 +75,17 @@ NS_GFX_(void) ConvertYCbCrToRGB32(const 
       FastConvertYUVToRGB32Row_C(y_ptr,
                                  u_ptr,
                                  v_ptr,
                                  rgb_row,
                                  x_width,
                                  x_shift);
   }
 
+#ifdef ARCH_CPU_X86_FAMILY
   // MMX used for FastConvertYUVToRGB32Row requires emms instruction.
   if (has_mmx)
     EMMS();
+#endif
 }
 
 }  // namespace gfx
 }  // namespace mozilla
--- a/gfx/ycbcr/yuv_row_linux.cpp
+++ b/gfx/ycbcr/yuv_row_linux.cpp
@@ -6,18 +6,18 @@
 
 #define DCHECK(a)
 
 // TODO(fbarchard): Move this to yuv_row_posix.cc to share with Mac.
 // TODO(fbarchard): Do 64 bit version.
 
 extern "C" {
 
-#if defined(ARCH_CPU_ARM_FAMILY)
-// ARM implementation uses C fallback
+#ifndef ARCH_CPU_X86_FAMILY
+// non-x86 implementation uses C fallback
 void FastConvertYUVToRGB32Row(const uint8* y_buf,
                               const uint8* u_buf,
                               const uint8* v_buf,
                               uint8* rgb_buf,
                               int width) {
   FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, 1);
 }
  
@@ -241,18 +241,82 @@ MMX_ALIGNED(int16 kCoefficientsRgbY[768]
   RGBV(0xE8), RGBV(0xE9), RGBV(0xEA), RGBV(0xEB),
   RGBV(0xEC), RGBV(0xED), RGBV(0xEE), RGBV(0xEF),
   RGBV(0xF0), RGBV(0xF1), RGBV(0xF2), RGBV(0xF3),
   RGBV(0xF4), RGBV(0xF5), RGBV(0xF6), RGBV(0xF7),
   RGBV(0xF8), RGBV(0xF9), RGBV(0xFA), RGBV(0xFB),
   RGBV(0xFC), RGBV(0xFD), RGBV(0xFE), RGBV(0xFF),
 };
 
+#ifdef __SUNPRO_CC
+#pragma align 16 (kCoefficientsRgbY)
+#endif
+
 #if defined(ARCH_CPU_X86_64)
 
+#ifdef __SUNPRO_CC
+// AMD64 ABI uses register parameters.
+void FastConvertYUVToRGB32Row(const uint8* y_buf,  // rdi
+                              const uint8* u_buf,  // rsi
+                              const uint8* v_buf,  // rdx
+                              uint8* rgb_buf,      // rcx
+                              int width) {         // r8
+  asm(
+  "jmp    convertend\n"
+"convertloop:"
+  "movzbq (%1),%%r10\n"
+  "add    $0x1,%1\n"
+  "movzbq (%2),%%r11\n"
+  "add    $0x1,%2\n"
+  "movq   2048(%5,%%r10,8),%%xmm0\n"
+  "movzbq (%0),%%r10\n"
+  "movq   4096(%5,%%r11,8),%%xmm1\n"
+  "movzbq 0x1(%0),%%r11\n"
+  "paddsw %%xmm1,%%xmm0\n"
+  "movq   (%5,%%r10,8),%%xmm2\n"
+  "add    $0x2,%0\n"
+  "movq   (%5,%%r11,8),%%xmm3\n"
+  "paddsw %%xmm0,%%xmm2\n"
+  "paddsw %%xmm0,%%xmm3\n"
+  "shufps $0x44,%%xmm3,%%xmm2\n"
+  "psraw  $0x6,%%xmm2\n"
+  "packuswb %%xmm2,%%xmm2\n"
+  "movq   %%xmm2,0x0(%3)\n"
+  "add    $0x8,%3\n"
+"convertend:"
+  "sub    $0x2,%4\n"
+  "jns    convertloop\n"
+
+"convertnext:"
+  "add    $0x1,%4\n"
+  "js     convertdone\n"
+
+  "movzbq (%1),%%r10\n"
+  "movq   2048(%5,%%r10,8),%%xmm0\n"
+  "movzbq (%2),%%r10\n"
+  "movq   4096(%5,%%r10,8),%%xmm1\n"
+  "paddsw %%xmm1,%%xmm0\n"
+  "movzbq (%0),%%r10\n"
+  "movq   (%5,%%r10,8),%%xmm1\n"
+  "paddsw %%xmm0,%%xmm1\n"
+  "psraw  $0x6,%%xmm1\n"
+  "packuswb %%xmm1,%%xmm1\n"
+  "movd   %%xmm1,0x0(%3)\n"
+"convertdone:"
+  :
+  : "r"(y_buf),  // %0
+    "r"(u_buf),  // %1
+    "r"(v_buf),  // %2
+    "r"(rgb_buf),  // %3
+    "r"(width),  // %4
+    "r" (&kCoefficientsRgbY)  // %5
+  : "memory", "r10", "r11", "xmm0", "xmm1", "xmm2", "xmm3"
+);
+}
+#else // __SUNPRO_CC
 // AMD64 ABI uses register paremters.
 void FastConvertYUVToRGB32Row(const uint8* y_buf,  // rdi
                               const uint8* u_buf,  // rsi
                               const uint8* v_buf,  // rdx
                               uint8* rgb_buf,      // rcx
                               int width) {         // r8
   asm(
   "jmp    1f\n"
@@ -301,19 +365,79 @@ void FastConvertYUVToRGB32Row(const uint
     "r"(u_buf),  // %1
     "r"(v_buf),  // %2
     "r"(rgb_buf),  // %3
     "r"(width),  // %4
     "r" (kCoefficientsRgbY)  // %5
   : "memory", "r10", "r11", "xmm0", "xmm1", "xmm2", "xmm3"
 );
 }
+#endif // __SUNPRO_CC
 
-#else
+#else // ARCH_CPU_X86_64
+
+#ifdef __SUNPRO_CC
+void FastConvertYUVToRGB32Row(const uint8* y_buf,
+                              const uint8* u_buf,
+                              const uint8* v_buf,
+                              uint8* rgb_buf,
+                              int width) {
+  asm(
+  "pusha\n"
+  "mov    %eax,%ebp\n"
+  "jmp    convertend\n"
 
+"convertloop:"
+  "movzbl (%edi),%eax\n"
+  "add    $0x1,%edi\n"
+  "movzbl (%esi),%ebx\n"
+  "add    $0x1,%esi\n"
+  "movq   kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
+  "movzbl (%edx),%eax\n"
+  "paddsw kCoefficientsRgbY+4096(,%ebx,8),%mm0\n"
+  "movzbl 0x1(%edx),%ebx\n"
+  "movq   kCoefficientsRgbY(,%eax,8),%mm1\n"
+  "add    $0x2,%edx\n"
+  "movq   kCoefficientsRgbY(,%ebx,8),%mm2\n"
+  "paddsw %mm0,%mm1\n"
+  "paddsw %mm0,%mm2\n"
+  "psraw  $0x6,%mm1\n"
+  "psraw  $0x6,%mm2\n"
+  "packuswb %mm2,%mm1\n"
+  "movntq %mm1,0x0(%ebp)\n"
+  "add    $0x8,%ebp\n"
+"convertend:"
+  "sub    $0x2,%ecx\n"
+  "jns    convertloop\n"
+
+  "and    $0x1,%ecx\n"
+  "je     convertdone\n"
+
+  "movzbl (%edi),%eax\n"
+  "movq   kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
+  "movzbl (%esi),%eax\n"
+  "paddsw kCoefficientsRgbY+4096(,%eax,8),%mm0\n"
+  "movzbl (%edx),%eax\n"
+  "movq   kCoefficientsRgbY(,%eax,8),%mm1\n"
+  "paddsw %mm0,%mm1\n"
+  "psraw  $0x6,%mm1\n"
+  "packuswb %mm1,%mm1\n"
+  "movd   %mm1,0x0(%ebp)\n"
+"convertdone:"
+  "popa\n"
+  :
+  : "d"(y_buf),  // %edx
+    "D"(u_buf),  // %edi
+    "S"(v_buf),  // %esi
+    "a"(rgb_buf),  // %eax
+    "c"(width)  // %ecx
+  : "memory"
+);
+}
+#else //  __SUNPRO_CC
 void FastConvertYUVToRGB32Row(const uint8* y_buf,
                               const uint8* u_buf,
                               const uint8* v_buf,
                               uint8* rgb_buf,
                               int width);
 
 // It's necessary to specify the correct section for the following code,
 // otherwise it will be placed in whatever the current section is as this unit
@@ -369,12 +493,13 @@ void FastConvertYUVToRGB32Row(const uint
   "packuswb %mm1,%mm1\n"
   "movd   %mm1,0x0(%ebp)\n"
 "2:"
   "popa\n"
   "ret\n"
   ".previous\n"
 );
 
-#endif
-#endif // ARCH_CPU_ARM_FAMILY
+#endif // __SUNPRO_CC
+#endif // ARCH_CPU_X86_64
+#endif // !ARCH_CPU_X86_FAMILY
 }  // extern "C"