Bug 577645 - Y'CbCr assembly uses movntq which is SSE, not MMX. Change runtime CPU detection to match. r=tterribe
new file mode 100644
--- /dev/null
+++ b/gfx/ycbcr/bug577645_movntq.patch
@@ -0,0 +1,63 @@
+diff --git a/gfx/ycbcr/yuv_convert.cpp b/gfx/ycbcr/yuv_convert.cpp
+--- a/gfx/ycbcr/yuv_convert.cpp
++++ b/gfx/ycbcr/yuv_convert.cpp
+@@ -36,19 +36,21 @@ NS_GFX_(void) ConvertYCbCrToRGB32(const
+ int pic_width,
+ int pic_height,
+ int y_pitch,
+ int uv_pitch,
+ int rgb_pitch,
+ YUVType yuv_type) {
+ unsigned int y_shift = yuv_type == YV12 ? 1 : 0;
+ unsigned int x_shift = yuv_type == YV24 ? 0 : 1;
+- // There is no optimized YV24 MMX routine so we check for this and
++ // Test for SSE because the optimized code uses movntq, which is not part of MMX.
++ bool has_sse = supports_mmx() && supports_sse();
++ // There is no optimized YV24 SSE routine so we check for this and
+ // fall back to the C code.
+- bool has_mmx = supports_mmx() && yuv_type != YV24;
++ has_sse &= yuv_type != YV24;
+ bool odd_pic_x = yuv_type != YV24 && pic_x % 2 != 0;
+ int x_width = odd_pic_x ? pic_width - 1 : pic_width;
+
+ for (int y = pic_y; y < pic_height + pic_y; ++y) {
+ uint8* rgb_row = rgb_buf + (y - pic_y) * rgb_pitch;
+ const uint8* y_ptr = y_buf + y * y_pitch + pic_x;
+ const uint8* u_ptr = u_buf + (y >> y_shift) * uv_pitch + (pic_x >> x_shift);
+ const uint8* v_ptr = v_buf + (y >> y_shift) * uv_pitch + (pic_x >> x_shift);
+@@ -60,32 +62,32 @@ NS_GFX_(void) ConvertYCbCrToRGB32(const
+ u_ptr++,
+ v_ptr++,
+ rgb_row,
+ 1,
+ x_shift);
+ rgb_row += 4;
+ }
+
+- if (has_mmx)
++ if (has_sse)
+ FastConvertYUVToRGB32Row(y_ptr,
+ u_ptr,
+ v_ptr,
+ rgb_row,
+ x_width);
+ else
+ FastConvertYUVToRGB32Row_C(y_ptr,
+ u_ptr,
+ v_ptr,
+ rgb_row,
+ x_width,
+ x_shift);
+ }
+
+ #ifdef ARCH_CPU_X86_FAMILY
+- // MMX used for FastConvertYUVToRGB32Row requires emms instruction.
+- if (has_mmx)
++ // SSE used for FastConvertYUVToRGB32Row requires emms instruction.
++ if (has_sse)
+ EMMS();
+ #endif
+ }
+
+ } // namespace gfx
+ } // namespace mozilla
--- a/gfx/ycbcr/update.sh
+++ b/gfx/ycbcr/update.sh
@@ -9,8 +9,9 @@ cp $1/media/base/yuv_row_linux.cc yuv_ro
patch -p3 <convert.patch
patch -p3 <picture_region.patch
patch -p3 <remove_scale.patch
patch -p3 <export.patch
patch -p3 <win64_mac64.patch
patch -p3 <yv24.patch
patch -p3 <row_c_fix.patch
patch -p3 <bug572034_mac_64bit.patch
+patch -p3 <bug577645_movntq.patch
--- a/gfx/ycbcr/yuv_convert.cpp
+++ b/gfx/ycbcr/yuv_convert.cpp
@@ -36,19 +36,21 @@ NS_GFX_(void) ConvertYCbCrToRGB32(const
int pic_width,
int pic_height,
int y_pitch,
int uv_pitch,
int rgb_pitch,
YUVType yuv_type) {
unsigned int y_shift = yuv_type == YV12 ? 1 : 0;
unsigned int x_shift = yuv_type == YV24 ? 0 : 1;
- // There is no optimized YV24 MMX routine so we check for this and
+ // Test for SSE because the optimized code uses movntq, which is not part of MMX.
+ bool has_sse = supports_mmx() && supports_sse();
+ // There is no optimized YV24 SSE routine so we check for this and
// fall back to the C code.
- bool has_mmx = supports_mmx() && yuv_type != YV24;
+ has_sse &= yuv_type != YV24;
bool odd_pic_x = yuv_type != YV24 && pic_x % 2 != 0;
int x_width = odd_pic_x ? pic_width - 1 : pic_width;
for (int y = pic_y; y < pic_height + pic_y; ++y) {
uint8* rgb_row = rgb_buf + (y - pic_y) * rgb_pitch;
const uint8* y_ptr = y_buf + y * y_pitch + pic_x;
const uint8* u_ptr = u_buf + (y >> y_shift) * uv_pitch + (pic_x >> x_shift);
const uint8* v_ptr = v_buf + (y >> y_shift) * uv_pitch + (pic_x >> x_shift);
@@ -60,32 +62,32 @@ NS_GFX_(void) ConvertYCbCrToRGB32(const
u_ptr++,
v_ptr++,
rgb_row,
1,
x_shift);
rgb_row += 4;
}
- if (has_mmx)
+ if (has_sse)
FastConvertYUVToRGB32Row(y_ptr,
u_ptr,
v_ptr,
rgb_row,
x_width);
else
FastConvertYUVToRGB32Row_C(y_ptr,
u_ptr,
v_ptr,
rgb_row,
x_width,
x_shift);
}
#ifdef ARCH_CPU_X86_FAMILY
- // MMX used for FastConvertYUVToRGB32Row requires emms instruction.
- if (has_mmx)
+ // SSE used for FastConvertYUVToRGB32Row requires emms instruction.
+ if (has_sse)
EMMS();
#endif
}
} // namespace gfx
} // namespace mozilla