Bug 577645 - Y'CbCr assembly uses movntq which is SSE, not MMX. Change runtime CPU detection to match. r=tterribe
authorMatthew Gregan <kinetik@flim.org>
Mon, 02 Aug 2010 15:32:14 +1200
changeset 49182 5fa88be4c40188249426a095b99abe381e243e2e
parent 49181 9726125de121422b4366ec2bdb58de69a0b14233
child 49183 3afff341c3ee103d445441d4796b337e004bbf49
push idunknown
push userunknown
push dateunknown
reviewerstterribe
bugs577645
milestone2.0b4pre
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 577645 - Y'CbCr assembly uses movntq which is SSE, not MMX. Change runtime CPU detection to match. r=tterribe
gfx/ycbcr/bug577645_movntq.patch
gfx/ycbcr/update.sh
gfx/ycbcr/yuv_convert.cpp
new file mode 100644
--- /dev/null
+++ b/gfx/ycbcr/bug577645_movntq.patch
@@ -0,0 +1,63 @@
+diff --git a/gfx/ycbcr/yuv_convert.cpp b/gfx/ycbcr/yuv_convert.cpp
+--- a/gfx/ycbcr/yuv_convert.cpp
++++ b/gfx/ycbcr/yuv_convert.cpp
+@@ -36,19 +36,21 @@ NS_GFX_(void) ConvertYCbCrToRGB32(const 
+                                   int pic_width,
+                                   int pic_height,
+                                   int y_pitch,
+                                   int uv_pitch,
+                                   int rgb_pitch,
+                                   YUVType yuv_type) {
+   unsigned int y_shift = yuv_type == YV12 ? 1 : 0;
+   unsigned int x_shift = yuv_type == YV24 ? 0 : 1;
+-  // There is no optimized YV24 MMX routine so we check for this and
++  // Test for SSE because the optimized code uses movntq, which is not part of MMX.
++  bool has_sse = supports_mmx() && supports_sse();
++  // There is no optimized YV24 SSE routine so we check for this and
+   // fall back to the C code.
+-  bool has_mmx = supports_mmx() && yuv_type != YV24;
++  has_sse &= yuv_type != YV24;
+   bool odd_pic_x = yuv_type != YV24 && pic_x % 2 != 0;
+   int x_width = odd_pic_x ? pic_width - 1 : pic_width;
+ 
+   for (int y = pic_y; y < pic_height + pic_y; ++y) {
+     uint8* rgb_row = rgb_buf + (y - pic_y) * rgb_pitch;
+     const uint8* y_ptr = y_buf + y * y_pitch + pic_x;
+     const uint8* u_ptr = u_buf + (y >> y_shift) * uv_pitch + (pic_x >> x_shift);
+     const uint8* v_ptr = v_buf + (y >> y_shift) * uv_pitch + (pic_x >> x_shift);
+@@ -60,32 +62,32 @@ NS_GFX_(void) ConvertYCbCrToRGB32(const 
+                                  u_ptr++,
+                                  v_ptr++,
+                                  rgb_row,
+                                  1,
+                                  x_shift);
+       rgb_row += 4;
+     }
+ 
+-    if (has_mmx)
++    if (has_sse)
+       FastConvertYUVToRGB32Row(y_ptr,
+                                u_ptr,
+                                v_ptr,
+                                rgb_row,
+                                x_width);
+     else
+       FastConvertYUVToRGB32Row_C(y_ptr,
+                                  u_ptr,
+                                  v_ptr,
+                                  rgb_row,
+                                  x_width,
+                                  x_shift);
+   }
+ 
+ #ifdef ARCH_CPU_X86_FAMILY
+-  // MMX used for FastConvertYUVToRGB32Row requires emms instruction.
+-  if (has_mmx)
++  // SSE used for FastConvertYUVToRGB32Row requires emms instruction.
++  if (has_sse)
+     EMMS();
+ #endif
+ }
+ 
+ }  // namespace gfx
+ }  // namespace mozilla
--- a/gfx/ycbcr/update.sh
+++ b/gfx/ycbcr/update.sh
@@ -9,8 +9,9 @@ cp $1/media/base/yuv_row_linux.cc yuv_ro
 patch -p3 <convert.patch
 patch -p3 <picture_region.patch
 patch -p3 <remove_scale.patch
 patch -p3 <export.patch
 patch -p3 <win64_mac64.patch
 patch -p3 <yv24.patch
 patch -p3 <row_c_fix.patch
 patch -p3 <bug572034_mac_64bit.patch
+patch -p3 <bug577645_movntq.patch
--- a/gfx/ycbcr/yuv_convert.cpp
+++ b/gfx/ycbcr/yuv_convert.cpp
@@ -36,19 +36,21 @@ NS_GFX_(void) ConvertYCbCrToRGB32(const 
                                   int pic_width,
                                   int pic_height,
                                   int y_pitch,
                                   int uv_pitch,
                                   int rgb_pitch,
                                   YUVType yuv_type) {
   unsigned int y_shift = yuv_type == YV12 ? 1 : 0;
   unsigned int x_shift = yuv_type == YV24 ? 0 : 1;
-  // There is no optimized YV24 MMX routine so we check for this and
+  // Test for SSE because the optimized code uses movntq, which is not part of MMX.
+  bool has_sse = supports_mmx() && supports_sse();
+  // There is no optimized YV24 SSE routine so we check for this and
   // fall back to the C code.
-  bool has_mmx = supports_mmx() && yuv_type != YV24;
+  has_sse &= yuv_type != YV24;
   bool odd_pic_x = yuv_type != YV24 && pic_x % 2 != 0;
   int x_width = odd_pic_x ? pic_width - 1 : pic_width;
 
   for (int y = pic_y; y < pic_height + pic_y; ++y) {
     uint8* rgb_row = rgb_buf + (y - pic_y) * rgb_pitch;
     const uint8* y_ptr = y_buf + y * y_pitch + pic_x;
     const uint8* u_ptr = u_buf + (y >> y_shift) * uv_pitch + (pic_x >> x_shift);
     const uint8* v_ptr = v_buf + (y >> y_shift) * uv_pitch + (pic_x >> x_shift);
@@ -60,32 +62,32 @@ NS_GFX_(void) ConvertYCbCrToRGB32(const 
                                  u_ptr++,
                                  v_ptr++,
                                  rgb_row,
                                  1,
                                  x_shift);
       rgb_row += 4;
     }
 
-    if (has_mmx)
+    if (has_sse)
       FastConvertYUVToRGB32Row(y_ptr,
                                u_ptr,
                                v_ptr,
                                rgb_row,
                                x_width);
     else
       FastConvertYUVToRGB32Row_C(y_ptr,
                                  u_ptr,
                                  v_ptr,
                                  rgb_row,
                                  x_width,
                                  x_shift);
   }
 
 #ifdef ARCH_CPU_X86_FAMILY
-  // MMX used for FastConvertYUVToRGB32Row requires emms instruction.
-  if (has_mmx)
+  // SSE used for FastConvertYUVToRGB32Row requires emms instruction.
+  if (has_sse)
     EMMS();
 #endif
 }
 
 }  // namespace gfx
 }  // namespace mozilla