Bug 577645 - Y'CbCr assembly uses movntq which is SSE, not MMX. Change runtime CPU detection to match. r=tterribe
authorMatthew Gregan <kinetik@flim.org>
Mon, 02 Aug 2010 15:32:14 +1200
changeset 49182 5fa88be4c40188249426a095b99abe381e243e2e
parent 49181 9726125de121422b4366ec2bdb58de69a0b14233
child 49183 3afff341c3ee103d445441d4796b337e004bbf49
push idunknown
push userunknown
push dateunknown
reviewerstterribe
bugs577645
milestone2.0b4pre
Bug 577645 - Y'CbCr assembly uses movntq which is SSE, not MMX. Change runtime CPU detection to match. r=tterribe
gfx/ycbcr/bug577645_movntq.patch
gfx/ycbcr/update.sh
gfx/ycbcr/yuv_convert.cpp
new file mode 100644
--- /dev/null
+++ b/gfx/ycbcr/bug577645_movntq.patch
@@ -0,0 +1,63 @@
+diff --git a/gfx/ycbcr/yuv_convert.cpp b/gfx/ycbcr/yuv_convert.cpp
+--- a/gfx/ycbcr/yuv_convert.cpp
++++ b/gfx/ycbcr/yuv_convert.cpp
+@@ -36,19 +36,21 @@ NS_GFX_(void) ConvertYCbCrToRGB32(const 
+                                   int pic_width,
+                                   int pic_height,
+                                   int y_pitch,
+                                   int uv_pitch,
+                                   int rgb_pitch,
+                                   YUVType yuv_type) {
+   unsigned int y_shift = yuv_type == YV12 ? 1 : 0;
+   unsigned int x_shift = yuv_type == YV24 ? 0 : 1;
+-  // There is no optimized YV24 MMX routine so we check for this and
++  // Test for SSE because the optimized code uses movntq, which is not part of MMX.
++  bool has_sse = supports_mmx() && supports_sse();
++  // There is no optimized YV24 SSE routine so we check for this and
+   // fall back to the C code.
+-  bool has_mmx = supports_mmx() && yuv_type != YV24;
++  has_sse &= yuv_type != YV24;
+   bool odd_pic_x = yuv_type != YV24 && pic_x % 2 != 0;
+   int x_width = odd_pic_x ? pic_width - 1 : pic_width;
+ 
+   for (int y = pic_y; y < pic_height + pic_y; ++y) {
+     uint8* rgb_row = rgb_buf + (y - pic_y) * rgb_pitch;
+     const uint8* y_ptr = y_buf + y * y_pitch + pic_x;
+     const uint8* u_ptr = u_buf + (y >> y_shift) * uv_pitch + (pic_x >> x_shift);
+     const uint8* v_ptr = v_buf + (y >> y_shift) * uv_pitch + (pic_x >> x_shift);
+@@ -60,32 +62,32 @@ NS_GFX_(void) ConvertYCbCrToRGB32(const 
+                                  u_ptr++,
+                                  v_ptr++,
+                                  rgb_row,
+                                  1,
+                                  x_shift);
+       rgb_row += 4;
+     }
+ 
+-    if (has_mmx)
++    if (has_sse)
+       FastConvertYUVToRGB32Row(y_ptr,
+                                u_ptr,
+                                v_ptr,
+                                rgb_row,
+                                x_width);
+     else
+       FastConvertYUVToRGB32Row_C(y_ptr,
+                                  u_ptr,
+                                  v_ptr,
+                                  rgb_row,
+                                  x_width,
+                                  x_shift);
+   }
+ 
+ #ifdef ARCH_CPU_X86_FAMILY
+-  // MMX used for FastConvertYUVToRGB32Row requires emms instruction.
+-  if (has_mmx)
++  // SSE used for FastConvertYUVToRGB32Row requires emms instruction.
++  if (has_sse)
+     EMMS();
+ #endif
+ }
+ 
+ }  // namespace gfx
+ }  // namespace mozilla
--- a/gfx/ycbcr/update.sh
+++ b/gfx/ycbcr/update.sh
@@ -9,8 +9,9 @@ cp $1/media/base/yuv_row_linux.cc yuv_ro
 patch -p3 <convert.patch
 patch -p3 <picture_region.patch
 patch -p3 <remove_scale.patch
 patch -p3 <export.patch
 patch -p3 <win64_mac64.patch
 patch -p3 <yv24.patch
 patch -p3 <row_c_fix.patch
 patch -p3 <bug572034_mac_64bit.patch
+patch -p3 <bug577645_movntq.patch
--- a/gfx/ycbcr/yuv_convert.cpp
+++ b/gfx/ycbcr/yuv_convert.cpp
@@ -36,19 +36,21 @@ NS_GFX_(void) ConvertYCbCrToRGB32(const 
                                   int pic_width,
                                   int pic_height,
                                   int y_pitch,
                                   int uv_pitch,
                                   int rgb_pitch,
                                   YUVType yuv_type) {
   unsigned int y_shift = yuv_type == YV12 ? 1 : 0;
   unsigned int x_shift = yuv_type == YV24 ? 0 : 1;
-  // There is no optimized YV24 MMX routine so we check for this and
+  // Test for SSE because the optimized code uses movntq, which is not part of MMX.
+  bool has_sse = supports_mmx() && supports_sse();
+  // There is no optimized YV24 SSE routine so we check for this and
   // fall back to the C code.
-  bool has_mmx = supports_mmx() && yuv_type != YV24;
+  has_sse &= yuv_type != YV24;
   bool odd_pic_x = yuv_type != YV24 && pic_x % 2 != 0;
   int x_width = odd_pic_x ? pic_width - 1 : pic_width;
 
   for (int y = pic_y; y < pic_height + pic_y; ++y) {
     uint8* rgb_row = rgb_buf + (y - pic_y) * rgb_pitch;
     const uint8* y_ptr = y_buf + y * y_pitch + pic_x;
     const uint8* u_ptr = u_buf + (y >> y_shift) * uv_pitch + (pic_x >> x_shift);
     const uint8* v_ptr = v_buf + (y >> y_shift) * uv_pitch + (pic_x >> x_shift);
@@ -60,32 +62,32 @@ NS_GFX_(void) ConvertYCbCrToRGB32(const 
                                  u_ptr++,
                                  v_ptr++,
                                  rgb_row,
                                  1,
                                  x_shift);
       rgb_row += 4;
     }
 
-    if (has_mmx)
+    if (has_sse)
       FastConvertYUVToRGB32Row(y_ptr,
                                u_ptr,
                                v_ptr,
                                rgb_row,
                                x_width);
     else
       FastConvertYUVToRGB32Row_C(y_ptr,
                                  u_ptr,
                                  v_ptr,
                                  rgb_row,
                                  x_width,
                                  x_shift);
   }
 
 #ifdef ARCH_CPU_X86_FAMILY
-  // MMX used for FastConvertYUVToRGB32Row requires emms instruction.
-  if (has_mmx)
+  // SSE used for FastConvertYUVToRGB32Row requires emms instruction.
+  if (has_sse)
     EMMS();
 #endif
 }
 
 }  // namespace gfx
 }  // namespace mozilla