Bug 583138 - Update to latest Chromium YCbCr to RGB Conversion code - r=roc a=blocking2.0
authorChris Double <chris.double@double.co.nz>
Thu, 11 Nov 2010 12:54:27 +1300
changeset 57390 64901a1fcf9339c7497c0f203d9df1f95df3c738
parent 57389 8ecd9dc6684e04dd6a3d37b0bc8f40c5847e4e7a
child 57391 d2d6455065349ad0114f134e47746f18011df7cb
push idunknown
push userunknown
push dateunknown
reviewersroc, blocking2.0
bugs583138
milestone2.0b8pre
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 583138 - Update to latest Chromium YCbCr to RGB Conversion code - r=roc a=blocking2.0
gfx/layers/basic/BasicImages.cpp
gfx/ycbcr/Makefile.in
gfx/ycbcr/README
gfx/ycbcr/add_scale.patch
gfx/ycbcr/bug572034_mac_64bit.patch
gfx/ycbcr/bug577645_movntq.patch
gfx/ycbcr/bustage.patch
gfx/ycbcr/chromium_types.h
gfx/ycbcr/convert.patch
gfx/ycbcr/export.patch
gfx/ycbcr/picture_region.patch
gfx/ycbcr/remove_scale.patch
gfx/ycbcr/row_c_fix.patch
gfx/ycbcr/update.sh
gfx/ycbcr/win64_mac64.patch
gfx/ycbcr/yuv_convert.cpp
gfx/ycbcr/yuv_convert.h
gfx/ycbcr/yuv_row.h
gfx/ycbcr/yuv_row_c.cpp
gfx/ycbcr/yuv_row_linux.cpp
gfx/ycbcr/yuv_row_mac.cpp
gfx/ycbcr/yuv_row_other.cpp
gfx/ycbcr/yuv_row_posix.cpp
gfx/ycbcr/yuv_row_table.cpp
gfx/ycbcr/yuv_row_win.cpp
gfx/ycbcr/yv24.patch
--- a/gfx/layers/basic/BasicImages.cpp
+++ b/gfx/layers/basic/BasicImages.cpp
@@ -169,17 +169,18 @@ BasicPlanarYCbCrImage::SetData(const Dat
                            aData.mPicSize.width,
                            aData.mPicSize.height,
                            size.width,
                            size.height,
                            aData.mYStride,
                            aData.mCbCrStride,
                            size.width*4,
                            type,
-                           gfx::ROTATE_0);
+                           gfx::ROTATE_0,
+                           gfx::FILTER_BILINEAR);
   }
   else {
     gfx::ConvertYCbCrToRGB32(aData.mYChannel,
                              aData.mCbChannel,
                              aData.mCrChannel,
                              mBuffer,
                              aData.mPicX,
                              aData.mPicY,
--- a/gfx/ycbcr/Makefile.in
+++ b/gfx/ycbcr/Makefile.in
@@ -14,42 +14,38 @@ DEFINES += -D_IMPL_NS_GFX
 
 EXPORTS      = chromium_types.h \
                yuv_convert.h \
                yuv_row.h \
                $(NULL)
 
 CPPSRCS = yuv_convert.cpp \
           yuv_row_c.cpp \
+          yuv_row_table.cpp \
           $(NULL)
 
 ifdef _MSC_VER
 CPPSRCS += yuv_row_win.cpp \
            $(NULL)
 else
 ifeq ($(OS_ARCH),Linux)
-CPPSRCS += yuv_row_linux.cpp \
+CPPSRCS += yuv_row_posix.cpp \
            $(NULL)
 else
 ifeq ($(OS_ARCH),SunOS)
-CPPSRCS += yuv_row_linux.cpp \
+CPPSRCS += yuv_row_posix.cpp \
            $(NULL)
 else
 ifeq ($(OS_ARCH),Darwin)
-ifeq ($(OS_TEST),x86_64)
-CPPSRCS += yuv_row_linux.cpp \
+CPPSRCS += yuv_row_posix.cpp \
            $(NULL)
 else
-CPPSRCS += yuv_row_mac.cpp \
-           $(NULL)
-endif
-else
 CPPSRCS += yuv_row_other.cpp \
            $(NULL)
-endif # mac
+endif # Darwin
 endif # SunOS
 endif # linux
 endif # windows
 
 EXTRA_DSO_LDOPTS += \
         $(LIBS_DIR) \
         $(EXTRA_DSO_LIBS) \
         $(XPCOM_LIBS) \
--- a/gfx/ycbcr/README
+++ b/gfx/ycbcr/README
@@ -1,24 +1,15 @@
 This color conversion code is from the Chromium open source project available here:
 
 http://code.google.com/chromium/
 
-The code comes from svn revision 40876.
+The code comes from svn revision 638400 on 2010-10-26.
 
 The code was copied from a Chromium svn checkout using the 'update.sh' script which then applies patches for our build and to add dynamic CPU detection.
 
 convert.patch: Change Chromium code to build using Mozilla build system.
                Add runtime CPU detection for MMX
                Move default C implementation to work on all platforms.
-
-picture_region.patch: Change Chromium code to allow a picture region.
-                      The YUV conversion will convert within this 
-                      picture region only.
-
-remove_scale.patch: Removes Chromium scaling code.
-export.patch: Fix export for building on comm-central
-win64_mac64.patch: Fallback to C implementation on Windows and Mac OS X 64 bit
-yv24.patch: Adds YCbCr 4:4:4 support
-row_c_fix.patch: Fix broken C fallback code (See bug 561385).
-bug572034_mac_64bit.patch: Fix x86_64 linux code so it works on OS X.
-solaris.patch: Adds Solaris support, fallback to C implementation on SPARC
-add_scale.patch: re-adds Chromium scaling code
+               Change Chromium code to allow a picture region.
+               The YUV conversion will convert within this 
+               picture region only.
+               Add YCbCr 4:4:4 support
deleted file mode 100644
--- a/gfx/ycbcr/add_scale.patch
+++ /dev/null
@@ -1,953 +0,0 @@
-diff --git a/gfx/ycbcr/yuv_convert.cpp b/gfx/ycbcr/yuv_convert.cpp
-index 40ce10f..7d46629 100644
---- a/gfx/ycbcr/yuv_convert.cpp
-+++ b/gfx/ycbcr/yuv_convert.cpp
-@@ -82,10 +82,139 @@ NS_GFX_(void) ConvertYCbCrToRGB32(const uint8* y_buf,
- 
- #ifdef ARCH_CPU_X86_FAMILY
-   // MMX used for FastConvertYUVToRGB32Row requires emms instruction.
-   if (has_mmx)
-     EMMS();
- #endif
- }
- 
-+// Scale a frame of YUV to 32 bit ARGB.
-+NS_GFX_(void) ScaleYCbCrToRGB32(const uint8* y_buf,
-+                                const uint8* u_buf,
-+                                const uint8* v_buf,
-+                                uint8* rgb_buf,
-+                                int width,
-+                                int height,
-+                                int scaled_width,
-+                                int scaled_height,
-+                                int y_pitch,
-+                                int uv_pitch,
-+                                int rgb_pitch,
-+                                YUVType yuv_type,
-+                                Rotate view_rotate) {
-+  unsigned int y_shift = yuv_type == YV12 ? 1 : 0;
-+  unsigned int x_shift = yuv_type == YV24 ? 0 : 1;
-+  bool has_mmx = supports_mmx();
-+  // Diagram showing origin and direction of source sampling.
-+  // ->0   4<-
-+  // 7       3
-+  //
-+  // 6       5
-+  // ->1   2<-
-+  // Rotations that start at right side of image.
-+  if ((view_rotate == ROTATE_180) ||
-+      (view_rotate == ROTATE_270) ||
-+      (view_rotate == MIRROR_ROTATE_0) ||
-+      (view_rotate == MIRROR_ROTATE_90)) {
-+    y_buf += width - 1;
-+    u_buf += width / 2 - 1;
-+    v_buf += width / 2 - 1;
-+    width = -width;
-+  }
-+  // Rotations that start at bottom of image.
-+  if ((view_rotate == ROTATE_90) ||
-+      (view_rotate == ROTATE_180) ||
-+      (view_rotate == MIRROR_ROTATE_90) ||
-+      (view_rotate == MIRROR_ROTATE_180)) {
-+    y_buf += (height - 1) * y_pitch;
-+    u_buf += ((height >> y_shift) - 1) * uv_pitch;
-+    v_buf += ((height >> y_shift) - 1) * uv_pitch;
-+    height = -height;
-+  }
-+
-+  // Handle zero sized destination.
-+  if (scaled_width == 0 || scaled_height == 0)
-+    return;
-+  int scaled_dx = width * 16 / scaled_width;
-+  int scaled_dy = height * 16 / scaled_height;
-+
-+  int scaled_dx_uv = scaled_dx;
-+
-+  if ((view_rotate == ROTATE_90) ||
-+      (view_rotate == ROTATE_270)) {
-+    int tmp = scaled_height;
-+    scaled_height = scaled_width;
-+    scaled_width = tmp;
-+    tmp = height;
-+    height = width;
-+    width = tmp;
-+    int original_dx = scaled_dx;
-+    int original_dy = scaled_dy;
-+    scaled_dx = ((original_dy >> 4) * y_pitch) << 4;
-+    scaled_dx_uv = ((original_dy >> 4) * uv_pitch) << 4;
-+    scaled_dy = original_dx;
-+    if (view_rotate == ROTATE_90) {
-+      y_pitch = -1;
-+      uv_pitch = -1;
-+      height = -height;
-+    } else {
-+      y_pitch = 1;
-+      uv_pitch = 1;
-+    }
-+  }
-+
-+  for (int y = 0; y < scaled_height; ++y) {
-+    uint8* dest_pixel = rgb_buf + y * rgb_pitch;
-+    int scaled_y = (y * height / scaled_height);
-+    const uint8* y_ptr = y_buf + scaled_y * y_pitch;
-+    const uint8* u_ptr = u_buf + (scaled_y >> y_shift) * uv_pitch;
-+    const uint8* v_ptr = v_buf + (scaled_y >> y_shift) * uv_pitch;
-+
-+#if defined(_MSC_VER) && defined(_M_IX86)
-+    if (scaled_width == (width * 2)) {
-+      DoubleYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
-+                          dest_pixel, scaled_width);
-+    } else if ((scaled_dx & 15) == 0) {  // Scaling by integer scale factor.
-+      if (scaled_dx_uv == scaled_dx) {   // Not rotated.
-+        if (scaled_dx == 16) {           // Not scaled
-+          if (has_mmx)
-+            FastConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
-+                                     dest_pixel, scaled_width);
-+          else
-+            FastConvertYUVToRGB32Row_C(y_ptr, u_ptr, v_ptr,
-+                                      dest_pixel, scaled_width, x_shift);
-+        } else {  // Simple scale down. ie half
-+          ConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
-+                               dest_pixel, scaled_width, scaled_dx >> 4);
-+        }
-+      } else {
-+        RotateConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
-+                                   dest_pixel, scaled_width,
-+                                   scaled_dx >> 4, scaled_dx_uv >> 4);
-+      }
-+#else
-+    if (scaled_dx == 16) {           // Not scaled
-+      if (has_mmx)
-+        FastConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
-+                                 dest_pixel, scaled_width);
-+      else
-+        FastConvertYUVToRGB32Row_C(y_ptr, u_ptr, v_ptr,
-+                                   dest_pixel, scaled_width, x_shift);
-+#endif
-+    } else {
-+      if (has_mmx) 
-+        ScaleYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
-+                           dest_pixel, scaled_width, scaled_dx);
-+      else
-+        ScaleYUVToRGB32Row_C(y_ptr, u_ptr, v_ptr,
-+                             dest_pixel, scaled_width, scaled_dx, x_shift);
-+
-+    }  
-+  }
-+
-+  // MMX used for FastConvertYUVToRGB32Row requires emms instruction.
-+  if (has_mmx)
-+    EMMS();
-+}
-+
- }  // namespace gfx
- }  // namespace mozilla
-diff --git a/gfx/ycbcr/yuv_convert.h b/gfx/ycbcr/yuv_convert.h
-index c0b678d..a7e5b68 100644
---- a/gfx/ycbcr/yuv_convert.h
-+++ b/gfx/ycbcr/yuv_convert.h
-@@ -15,27 +15,56 @@ namespace gfx {
- // Type of YUV surface.
- // The value of these enums matter as they are used to shift vertical indices.
- enum YUVType {
-   YV12 = 0,           // YV12 is half width and half height chroma channels.
-   YV16 = 1,           // YV16 is half width and full height chroma channels.
-   YV24 = 2            // YV24 is full width and full height chroma channels.
- };
- 
-+// Mirror means flip the image horizontally, as in looking in a mirror.
-+// Rotate happens after mirroring.
-+enum Rotate {
-+  ROTATE_0,           // Rotation off.
-+  ROTATE_90,          // Rotate clockwise.
-+  ROTATE_180,         // Rotate upside down.
-+  ROTATE_270,         // Rotate counter clockwise.
-+  MIRROR_ROTATE_0,    // Mirror horizontally.
-+  MIRROR_ROTATE_90,   // Mirror then Rotate clockwise.
-+  MIRROR_ROTATE_180,  // Mirror vertically.
-+  MIRROR_ROTATE_270   // Transpose.
-+};
-+
- // Convert a frame of YUV to 32 bit ARGB.
- // Pass in YV16/YV12 depending on source format
- NS_GFX_(void) ConvertYCbCrToRGB32(const uint8* yplane,
-                                   const uint8* uplane,
-                                   const uint8* vplane,
-                                   uint8* rgbframe,
-                                   int pic_x,
-                                   int pic_y,
-                                   int pic_width,
-                                   int pic_height,
-                                   int ystride,
-                                   int uvstride,
-                                   int rgbstride,
-                                   YUVType yuv_type);
- 
-+// Scale a frame of YUV to 32 bit ARGB.
-+// Supports rotation and mirroring.
-+NS_GFX_(void) ScaleYCbCrToRGB32(const uint8* yplane,
-+                                const uint8* uplane,
-+                                const uint8* vplane,
-+                                uint8* rgbframe,
-+                                int frame_width,
-+                                int frame_height,
-+                                int scaled_width,
-+                                int scaled_height,
-+                                int ystride,
-+                                int uvstride,
-+                                int rgbstride,
-+                                YUVType yuv_type,
-+                                Rotate view_rotate);
-+
- }  // namespace gfx
- }  // namespace mozilla
- 
- #endif  // MEDIA_BASE_YUV_CONVERT_H_
-diff --git a/gfx/ycbcr/yuv_row.h b/gfx/ycbcr/yuv_row.h
-index 8519008..96969ec 100644
---- a/gfx/ycbcr/yuv_row.h
-+++ b/gfx/ycbcr/yuv_row.h
-@@ -24,16 +24,64 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf,
- void FastConvertYUVToRGB32Row_C(const uint8* y_buf,
-                                 const uint8* u_buf,
-                                 const uint8* v_buf,
-                                 uint8* rgb_buf,
-                                 int width,
-                                 unsigned int x_shift);
- 
- 
-+// Can do 1x, half size or any scale down by an integer amount.
-+// Step can be negative (mirroring, rotate 180).
-+// This is the third fastest of the scalers.
-+void ConvertYUVToRGB32Row(const uint8* y_buf,
-+                          const uint8* u_buf,
-+                          const uint8* v_buf,
-+                          uint8* rgb_buf,
-+                          int width,
-+                          int step);
-+
-+// Rotate is like Convert, but applies different step to Y versus U and V.
-+// This allows rotation by 90 or 270, by stepping by stride.
-+// This is the forth fastest of the scalers.
-+void RotateConvertYUVToRGB32Row(const uint8* y_buf,
-+                                const uint8* u_buf,
-+                                const uint8* v_buf,
-+                                uint8* rgb_buf,
-+                                int width,
-+                                int ystep,
-+                                int uvstep);
-+
-+// Doubler does 4 pixels at a time.  Each pixel is replicated.
-+// This is the fastest of the scalers.
-+void DoubleYUVToRGB32Row(const uint8* y_buf,
-+                         const uint8* u_buf,
-+                         const uint8* v_buf,
-+                         uint8* rgb_buf,
-+                         int width);
-+
-+// Handles arbitrary scaling up or down.
-+// Mirroring is supported, but not 90 or 270 degree rotation.
-+// Chroma is under sampled every 2 pixels for performance.
-+// This is the slowest of the scalers.
-+void ScaleYUVToRGB32Row(const uint8* y_buf,
-+                        const uint8* u_buf,
-+                        const uint8* v_buf,
-+                        uint8* rgb_buf,
-+                        int width,
-+                        int scaled_dx);
-+
-+void ScaleYUVToRGB32Row_C(const uint8* y_buf,
-+                          const uint8* u_buf,
-+                          const uint8* v_buf,
-+                          uint8* rgb_buf,
-+                          int width,
-+                          int scaled_dx,
-+                          unsigned int x_shift);
-+
- }  // extern "C"
- 
- // x64 uses MMX2 (SSE) so emms is not required.
- #if defined(ARCH_CPU_X86)
- #if defined(_MSC_VER)
- #define EMMS() __asm emms
- #else
- #define EMMS() asm("emms")
-diff --git a/gfx/ycbcr/yuv_row_c.cpp b/gfx/ycbcr/yuv_row_c.cpp
-index b5c0018..49eced2 100644
---- a/gfx/ycbcr/yuv_row_c.cpp
-+++ b/gfx/ycbcr/yuv_row_c.cpp
-@@ -172,10 +172,31 @@ void FastConvertYUVToRGB32Row_C(const uint8* y_buf,
-         v = v_buf[x + 1];
-       }
-       YuvPixel(y1, u, v, rgb_buf + 4);
-     }
-     rgb_buf += 8;  // Advance 2 pixels.
-   }
- }
- 
-+// 28.4 fixed point is used.  A shift by 4 isolates the integer.
-+// A shift by 5 is used to further subsample the chrominence channels.
-+// & 15 isolates the fixed point fraction.  >> 2 to get the upper 2 bits,
-+// for 1/4 pixel accurate interpolation.
-+void ScaleYUVToRGB32Row_C(const uint8* y_buf,
-+                        const uint8* u_buf,
-+                        const uint8* v_buf,
-+                        uint8* rgb_buf,
-+                        int width,
-+                        int scaled_dx,
-+                        unsigned int x_shift) {
-+  int scaled_x = 0;
-+  for (int x = 0; x < width; ++x) {
-+    uint8 u = u_buf[scaled_x >> (4 + x_shift)];
-+    uint8 v = v_buf[scaled_x >> (4 + x_shift)];
-+    uint8 y0 = y_buf[scaled_x >> 4];
-+    YuvPixel(y0, u, v, rgb_buf);
-+    rgb_buf += 4;
-+    scaled_x += scaled_dx;
-+  }
-+}
- }  // extern "C"
- 
-diff --git a/gfx/ycbcr/yuv_row_linux.cpp b/gfx/ycbcr/yuv_row_linux.cpp
-index 9f7625c..bff02b3 100644
---- a/gfx/ycbcr/yuv_row_linux.cpp
-+++ b/gfx/ycbcr/yuv_row_linux.cpp
-@@ -16,16 +16,24 @@ extern "C" {
- void FastConvertYUVToRGB32Row(const uint8* y_buf,
-                               const uint8* u_buf,
-                               const uint8* v_buf,
-                               uint8* rgb_buf,
-                               int width) {
-   FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, 1);
- }
-  
-+void ScaleYUVToRGB32Row(const uint8* y_buf,
-+                        const uint8* u_buf,
-+                        const uint8* v_buf,
-+                        uint8* rgb_buf,
-+                        int width,
-+                        int scaled_dx) {
-+  ScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, scaled_dx, 1);
-+}
- #else
- 
- #define RGBY(i) { \
-   static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
-   static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
-   static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
-   0 \
- }
-@@ -365,16 +373,86 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf,  // rdi
-     "r"(u_buf),  // %1
-     "r"(v_buf),  // %2
-     "r"(rgb_buf),  // %3
-     "r"(width),  // %4
-     "r" (kCoefficientsRgbY)  // %5
-   : "memory", "r10", "r11", "xmm0", "xmm1", "xmm2", "xmm3"
- );
- }
-+
-+void ScaleYUVToRGB32Row(const uint8* y_buf,  // rdi
-+                        const uint8* u_buf,  // rsi
-+                        const uint8* v_buf,  // rdx
-+                        uint8* rgb_buf,      // rcx
-+                        int width,           // r8
-+                        int scaled_dx) {     // r9
-+  asm(
-+  "xor    %%r11,%%r11\n"
-+  "sub    $0x2,%4\n"
-+  "js     scalenext\n"
-+
-+"scaleloop:"
-+  "mov    %%r11,%%r10\n"
-+  "sar    $0x5,%%r10\n"
-+  "movzb  (%1,%%r10,1),%%rax\n"
-+  "movq   2048(%5,%%rax,8),%%xmm0\n"
-+  "movzb  (%2,%%r10,1),%%rax\n"
-+  "movq   4096(%5,%%rax,8),%%xmm1\n"
-+  "lea    (%%r11,%6),%%r10\n"
-+  "sar    $0x4,%%r11\n"
-+  "movzb  (%0,%%r11,1),%%rax\n"
-+  "paddsw %%xmm1,%%xmm0\n"
-+  "movq   (%5,%%rax,8),%%xmm1\n"
-+  "lea    (%%r10,%6),%%r11\n"
-+  "sar    $0x4,%%r10\n"
-+  "movzb  (%0,%%r10,1),%%rax\n"
-+  "movq   (%5,%%rax,8),%%xmm2\n"
-+  "paddsw %%xmm0,%%xmm1\n"
-+  "paddsw %%xmm0,%%xmm2\n"
-+  "shufps $0x44,%%xmm2,%%xmm1\n"
-+  "psraw  $0x6,%%xmm1\n"
-+  "packuswb %%xmm1,%%xmm1\n"
-+  "movq   %%xmm1,0x0(%3)\n"
-+  "add    $0x8,%3\n"
-+  "sub    $0x2,%4\n"
-+  "jns    scaleloop\n"
-+
-+"scalenext:"
-+  "add    $0x1,%4\n"
-+  "js     scaledone\n"
-+
-+  "mov    %%r11,%%r10\n"
-+  "sar    $0x5,%%r10\n"
-+  "movzb  (%1,%%r10,1),%%rax\n"
-+  "movq   2048(%5,%%rax,8),%%xmm0\n"
-+  "movzb  (%2,%%r10,1),%%rax\n"
-+  "movq   4096(%5,%%rax,8),%%xmm1\n"
-+  "paddsw %%xmm1,%%xmm0\n"
-+  "sar    $0x4,%%r11\n"
-+  "movzb  (%0,%%r11,1),%%rax\n"
-+  "movq   (%5,%%rax,8),%%xmm1\n"
-+  "paddsw %%xmm0,%%xmm1\n"
-+  "psraw  $0x6,%%xmm1\n"
-+  "packuswb %%xmm1,%%xmm1\n"
-+  "movd   %%xmm1,0x0(%3)\n"
-+
-+"scaledone:"
-+  :
-+  : "r"(y_buf),  // %0
-+    "r"(u_buf),  // %1
-+    "r"(v_buf),  // %2
-+    "r"(rgb_buf),  // %3
-+    "r"(width),  // %4
-+    "r" (kCoefficientsRgbY),  // %5
-+    "r"(static_cast<long>(scaled_dx))  // %6
-+  : "memory", "r10", "r11", "rax", "xmm0", "xmm1", "xmm2"
-+);
-+}
-+
- #endif // __SUNPRO_CC
- 
- #else // ARCH_CPU_X86_64
- 
- #ifdef __SUNPRO_CC
- void FastConvertYUVToRGB32Row(const uint8* y_buf,
-                               const uint8* u_buf,
-                               const uint8* v_buf,
-@@ -493,13 +571,87 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf,
-   "packuswb %mm1,%mm1\n"
-   "movd   %mm1,0x0(%ebp)\n"
- "2:"
-   "popa\n"
-   "ret\n"
-   ".previous\n"
- );
- 
-+void ScaleYUVToRGB32Row(const uint8* y_buf,
-+                        const uint8* u_buf,
-+                        const uint8* v_buf,
-+                        uint8* rgb_buf,
-+                        int width,
-+                        int scaled_dx);
-+
-+  asm(
-+  ".global ScaleYUVToRGB32Row\n"
-+"ScaleYUVToRGB32Row:\n"
-+  "pusha\n"
-+  "mov    0x24(%esp),%edx\n"
-+  "mov    0x28(%esp),%edi\n"
-+  "mov    0x2c(%esp),%esi\n"
-+  "mov    0x30(%esp),%ebp\n"
-+  "mov    0x34(%esp),%ecx\n"
-+  "xor    %ebx,%ebx\n"
-+  "jmp    scaleend\n"
-+
-+"scaleloop:"
-+  "mov    %ebx,%eax\n"
-+  "sar    $0x5,%eax\n"
-+  "movzbl (%edi,%eax,1),%eax\n"
-+  "movq   kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
-+  "mov    %ebx,%eax\n"
-+  "sar    $0x5,%eax\n"
-+  "movzbl (%esi,%eax,1),%eax\n"
-+  "paddsw kCoefficientsRgbY+4096(,%eax,8),%mm0\n"
-+  "mov    %ebx,%eax\n"
-+  "add    0x38(%esp),%ebx\n"
-+  "sar    $0x4,%eax\n"
-+  "movzbl (%edx,%eax,1),%eax\n"
-+  "movq   kCoefficientsRgbY(,%eax,8),%mm1\n"
-+  "mov    %ebx,%eax\n"
-+  "add    0x38(%esp),%ebx\n"
-+  "sar    $0x4,%eax\n"
-+  "movzbl (%edx,%eax,1),%eax\n"
-+  "movq   kCoefficientsRgbY(,%eax,8),%mm2\n"
-+  "paddsw %mm0,%mm1\n"
-+  "paddsw %mm0,%mm2\n"
-+  "psraw  $0x6,%mm1\n"
-+  "psraw  $0x6,%mm2\n"
-+  "packuswb %mm2,%mm1\n"
-+  "movntq %mm1,0x0(%ebp)\n"
-+  "add    $0x8,%ebp\n"
-+"scaleend:"
-+  "sub    $0x2,%ecx\n"
-+  "jns    scaleloop\n"
-+
-+  "and    $0x1,%ecx\n"
-+  "je     scaledone\n"
-+
-+  "mov    %ebx,%eax\n"
-+  "sar    $0x5,%eax\n"
-+  "movzbl (%edi,%eax,1),%eax\n"
-+  "movq   kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
-+  "mov    %ebx,%eax\n"
-+  "sar    $0x5,%eax\n"
-+  "movzbl (%esi,%eax,1),%eax\n"
-+  "paddsw kCoefficientsRgbY+4096(,%eax,8),%mm0\n"
-+  "mov    %ebx,%eax\n"
-+  "sar    $0x4,%eax\n"
-+  "movzbl (%edx,%eax,1),%eax\n"
-+  "movq   kCoefficientsRgbY(,%eax,8),%mm1\n"
-+  "paddsw %mm0,%mm1\n"
-+  "psraw  $0x6,%mm1\n"
-+  "packuswb %mm1,%mm1\n"
-+  "movd   %mm1,0x0(%ebp)\n"
-+
-+"scaledone:"
-+  "popa\n"
-+  "ret\n"
-+);
-+
- #endif // __SUNPRO_CC
- #endif // ARCH_CPU_X86_64
- #endif // !ARCH_CPU_X86_FAMILY
- }  // extern "C"
- 
-diff --git a/gfx/ycbcr/yuv_row_mac.cpp b/gfx/ycbcr/yuv_row_mac.cpp
-index a1d0058..5acf825 100644
---- a/gfx/ycbcr/yuv_row_mac.cpp
-+++ b/gfx/ycbcr/yuv_row_mac.cpp
-@@ -16,16 +16,24 @@ extern "C" {
- void FastConvertYUVToRGB32Row(const uint8* y_buf,
-                               const uint8* u_buf,
-                               const uint8* v_buf,
-                               uint8* rgb_buf,
-                               int width) {
-   FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, 1);
- }
-  
-+void ScaleYUVToRGB32Row(const uint8* y_buf,
-+                        const uint8* u_buf,
-+                        const uint8* v_buf,
-+                        uint8* rgb_buf,
-+                        int width,
-+                        int scaled_dx) {
-+  ScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, scaled_dx, 1);
-+}
- #else
- 
- #define RGBY(i) { \
-   static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
-   static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
-   static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
-   0 \
- }
-@@ -313,11 +321,96 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf,
-                               const uint8* u_buf,
-                               const uint8* v_buf,
-                               uint8* rgb_buf,
-                               int width) {
-   MacConvertYUVToRGB32Row(y_buf, u_buf, v_buf, rgb_buf, width,
-                           &kCoefficientsRgbY[0][0]);
- }
- 
-+extern void MacScaleYUVToRGB32Row(const uint8* y_buf,
-+                               const uint8* u_buf,
-+                               const uint8* v_buf,
-+                               uint8* rgb_buf,
-+                               int width,
-+                               int scaled_dx,
-+                               int16 *kCoefficientsRgbY);
-+
-+  __asm__(
-+"_MacScaleYUVToRGB32Row:\n"
-+  "pusha\n"
-+  "mov    0x24(%esp),%edx\n"
-+  "mov    0x28(%esp),%edi\n"
-+  "mov    0x2c(%esp),%esi\n"
-+  "mov    0x30(%esp),%ebp\n"
-+  "mov    0x3c(%esp),%ecx\n"
-+  "xor    %ebx,%ebx\n"
-+  "jmp    Lscaleend\n"
-+
-+"Lscaleloop:"
-+  "mov    %ebx,%eax\n"
-+  "sar    $0x5,%eax\n"
-+  "movzbl (%edi,%eax,1),%eax\n"
-+  "movq   2048(%ecx,%eax,8),%mm0\n"
-+  "mov    %ebx,%eax\n"
-+  "sar    $0x5,%eax\n"
-+  "movzbl (%esi,%eax,1),%eax\n"
-+  "paddsw 4096(%ecx,%eax,8),%mm0\n"
-+  "mov    %ebx,%eax\n"
-+  "add    0x38(%esp),%ebx\n"
-+  "sar    $0x4,%eax\n"
-+  "movzbl (%edx,%eax,1),%eax\n"
-+  "movq   0(%ecx,%eax,8),%mm1\n"
-+  "mov    %ebx,%eax\n"
-+  "add    0x38(%esp),%ebx\n"
-+  "sar    $0x4,%eax\n"
-+  "movzbl (%edx,%eax,1),%eax\n"
-+  "movq   0(%ecx,%eax,8),%mm2\n"
-+  "paddsw %mm0,%mm1\n"
-+  "paddsw %mm0,%mm2\n"
-+  "psraw  $0x6,%mm1\n"
-+  "psraw  $0x6,%mm2\n"
-+  "packuswb %mm2,%mm1\n"
-+  "movntq %mm1,0x0(%ebp)\n"
-+  "add    $0x8,%ebp\n"
-+"Lscaleend:"
-+  "sub    $0x2,0x34(%esp)\n"
-+  "jns    Lscaleloop\n"
-+
-+  "and    $0x1,0x34(%esp)\n"
-+  "je     Lscaledone\n"
-+
-+  "mov    %ebx,%eax\n"
-+  "sar    $0x5,%eax\n"
-+  "movzbl (%edi,%eax,1),%eax\n"
-+  "movq   2048(%ecx,%eax,8),%mm0\n"
-+  "mov    %ebx,%eax\n"
-+  "sar    $0x5,%eax\n"
-+  "movzbl (%esi,%eax,1),%eax\n"
-+  "paddsw 4096(%ecx,%eax,8),%mm0\n"
-+  "mov    %ebx,%eax\n"
-+  "sar    $0x4,%eax\n"
-+  "movzbl (%edx,%eax,1),%eax\n"
-+  "movq   0(%ecx,%eax,8),%mm1\n"
-+  "paddsw %mm0,%mm1\n"
-+  "psraw  $0x6,%mm1\n"
-+  "packuswb %mm1,%mm1\n"
-+  "movd   %mm1,0x0(%ebp)\n"
-+
-+"Lscaledone:"
-+  "popa\n"
-+  "ret\n"
-+);
-+
-+void ScaleYUVToRGB32Row(const uint8* y_buf,
-+                        const uint8* u_buf,
-+                        const uint8* v_buf,
-+                        uint8* rgb_buf,
-+                        int width,
-+                        int scaled_dx) {
-+
-+  MacScaleYUVToRGB32Row(y_buf, u_buf, v_buf, rgb_buf, width, scaled_dx,
-+                        &kCoefficientsRgbY[0][0]);
-+}
-+
- #endif // ARCH_CPU_PPC || ARCH_CPU_64_BITS
- }  // extern "C"
- 
-diff --git a/gfx/ycbcr/yuv_row_win.cpp b/gfx/ycbcr/yuv_row_win.cpp
-index 699ac77..a1700fc 100644
---- a/gfx/ycbcr/yuv_row_win.cpp
-+++ b/gfx/ycbcr/yuv_row_win.cpp
-@@ -11,17 +11,26 @@ extern "C" {
- // PPC implementation uses C fallback
- void FastConvertYUVToRGB32Row(const uint8* y_buf,
-                               const uint8* u_buf,
-                               const uint8* v_buf,
-                               uint8* rgb_buf,
-                               int width) {
-   FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, 1);
- }
-- 
-+
-+void ScaleYUVToRGB32Row(const uint8* y_buf,
-+                        const uint8* u_buf,
-+                        const uint8* v_buf,
-+                        uint8* rgb_buf,
-+                        int width,
-+                        int scaled_dx) {
-+  ScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, scaled_dx, 1);
-+}
-+
- #else
- 
- 
- #define RGBY(i) { \
-   static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
-   static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
-   static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
-   0 \
-@@ -307,11 +316,280 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf,
-     movd      [ebp], mm1
-  convertdone :
- 
-     popad
-     ret
-   }
- }
- 
-+__declspec(naked)
-+void ConvertYUVToRGB32Row(const uint8* y_buf,
-+                          const uint8* u_buf,
-+                          const uint8* v_buf,
-+                          uint8* rgb_buf,
-+                          int width,
-+                          int step) {
-+  __asm {
-+    pushad
-+    mov       edx, [esp + 32 + 4]   // Y
-+    mov       edi, [esp + 32 + 8]   // U
-+    mov       esi, [esp + 32 + 12]  // V
-+    mov       ebp, [esp + 32 + 16]  // rgb
-+    mov       ecx, [esp + 32 + 20]  // width
-+    mov       ebx, [esp + 32 + 24]  // step
-+    jmp       wend
-+
-+ wloop :
-+    movzx     eax, byte ptr [edi]
-+    add       edi, ebx
-+    movq      mm0, [kCoefficientsRgbU + 8 * eax]
-+    movzx     eax, byte ptr [esi]
-+    add       esi, ebx
-+    paddsw    mm0, [kCoefficientsRgbV + 8 * eax]
-+    movzx     eax, byte ptr [edx]
-+    add       edx, ebx
-+    movq      mm1, [kCoefficientsRgbY + 8 * eax]
-+    movzx     eax, byte ptr [edx]
-+    add       edx, ebx
-+    movq      mm2, [kCoefficientsRgbY + 8 * eax]
-+    paddsw    mm1, mm0
-+    paddsw    mm2, mm0
-+    psraw     mm1, 6
-+    psraw     mm2, 6
-+    packuswb  mm1, mm2
-+    movntq    [ebp], mm1
-+    add       ebp, 8
-+ wend :
-+    sub       ecx, 2
-+    jns       wloop
-+
-+    and       ecx, 1  // odd number of pixels?
-+    jz        wdone
-+
-+    movzx     eax, byte ptr [edi]
-+    movq      mm0, [kCoefficientsRgbU + 8 * eax]
-+    movzx     eax, byte ptr [esi]
-+    paddsw    mm0, [kCoefficientsRgbV + 8 * eax]
-+    movzx     eax, byte ptr [edx]
-+    movq      mm1, [kCoefficientsRgbY + 8 * eax]
-+    paddsw    mm1, mm0
-+    psraw     mm1, 6
-+    packuswb  mm1, mm1
-+    movd      [ebp], mm1
-+ wdone :
-+
-+    popad
-+    ret
-+  }
-+}
-+
-+__declspec(naked)
-+void RotateConvertYUVToRGB32Row(const uint8* y_buf,
-+                                const uint8* u_buf,
-+                                const uint8* v_buf,
-+                                uint8* rgb_buf,
-+                                int width,
-+                                int ystep,
-+                                int uvstep) {
-+  __asm {
-+    pushad
-+    mov       edx, [esp + 32 + 4]   // Y
-+    mov       edi, [esp + 32 + 8]   // U
-+    mov       esi, [esp + 32 + 12]  // V
-+    mov       ebp, [esp + 32 + 16]  // rgb
-+    mov       ecx, [esp + 32 + 20]  // width
-+    jmp       wend
-+
-+ wloop :
-+    movzx     eax, byte ptr [edi]
-+    mov       ebx, [esp + 32 + 28]  // uvstep
-+    add       edi, ebx
-+    movq      mm0, [kCoefficientsRgbU + 8 * eax]
-+    movzx     eax, byte ptr [esi]
-+    add       esi, ebx
-+    paddsw    mm0, [kCoefficientsRgbV + 8 * eax]
-+    movzx     eax, byte ptr [edx]
-+    mov       ebx, [esp + 32 + 24]  // ystep
-+    add       edx, ebx
-+    movq      mm1, [kCoefficientsRgbY + 8 * eax]
-+    movzx     eax, byte ptr [edx]
-+    add       edx, ebx
-+    movq      mm2, [kCoefficientsRgbY + 8 * eax]
-+    paddsw    mm1, mm0
-+    paddsw    mm2, mm0
-+    psraw     mm1, 6
-+    psraw     mm2, 6
-+    packuswb  mm1, mm2
-+    movntq    [ebp], mm1
-+    add       ebp, 8
-+ wend :
-+    sub       ecx, 2
-+    jns       wloop
-+
-+    and       ecx, 1  // odd number of pixels?
-+    jz        wdone
-+
-+    movzx     eax, byte ptr [edi]
-+    movq      mm0, [kCoefficientsRgbU + 8 * eax]
-+    movzx     eax, byte ptr [esi]
-+    paddsw    mm0, [kCoefficientsRgbV + 8 * eax]
-+    movzx     eax, byte ptr [edx]
-+    movq      mm1, [kCoefficientsRgbY + 8 * eax]
-+    paddsw    mm1, mm0
-+    psraw     mm1, 6
-+    packuswb  mm1, mm1
-+    movd      [ebp], mm1
-+ wdone :
-+
-+    popad
-+    ret
-+  }
-+}
-+
-+__declspec(naked)
-+void DoubleYUVToRGB32Row(const uint8* y_buf,
-+                         const uint8* u_buf,
-+                         const uint8* v_buf,
-+                         uint8* rgb_buf,
-+                         int width) {
-+  __asm {
-+    pushad
-+    mov       edx, [esp + 32 + 4]   // Y
-+    mov       edi, [esp + 32 + 8]   // U
-+    mov       esi, [esp + 32 + 12]  // V
-+    mov       ebp, [esp + 32 + 16]  // rgb
-+    mov       ecx, [esp + 32 + 20]  // width
-+    jmp       wend
-+
-+ wloop :
-+    movzx     eax, byte ptr [edi]
-+    add       edi, 1
-+    movzx     ebx, byte ptr [esi]
-+    add       esi, 1
-+    movq      mm0, [kCoefficientsRgbU + 8 * eax]
-+    movzx     eax, byte ptr [edx]
-+    paddsw    mm0, [kCoefficientsRgbV + 8 * ebx]
-+    movq      mm1, [kCoefficientsRgbY + 8 * eax]
-+    paddsw    mm1, mm0
-+    psraw     mm1, 6
-+    packuswb  mm1, mm1
-+    punpckldq mm1, mm1
-+    movntq    [ebp], mm1
-+
-+    movzx     ebx, byte ptr [edx + 1]
-+    add       edx, 2
-+    paddsw    mm0, [kCoefficientsRgbY + 8 * ebx]
-+    psraw     mm0, 6
-+    packuswb  mm0, mm0
-+    punpckldq mm0, mm0
-+    movntq    [ebp+8], mm0
-+    add       ebp, 16
-+ wend :
-+    sub       ecx, 4
-+    jns       wloop
-+
-+    add       ecx, 4
-+    jz        wdone
-+
-+    movzx     eax, byte ptr [edi]
-+    movq      mm0, [kCoefficientsRgbU + 8 * eax]
-+    movzx     eax, byte ptr [esi]
-+    paddsw    mm0, [kCoefficientsRgbV + 8 * eax]
-+    movzx     eax, byte ptr [edx]
-+    movq      mm1, [kCoefficientsRgbY + 8 * eax]
-+    paddsw    mm1, mm0
-+    psraw     mm1, 6
-+    packuswb  mm1, mm1
-+    jmp       wend1
-+
-+ wloop1 :
-+    movd      [ebp], mm1
-+    add       ebp, 4
-+ wend1 :
-+    sub       ecx, 1
-+    jns       wloop1
-+ wdone :
-+    popad
-+    ret
-+  }
-+}
-+
-+// This version does general purpose scaling by any amount, up or down.
-+// The only thing it can not do it rotation by 90 or 270.
-+// For performance the chroma is under sampled, reducing cost of a 3x
-+// 1080p scale from 8.4 ms to 5.4 ms.
-+__declspec(naked)
-+void ScaleYUVToRGB32Row(const uint8* y_buf,
-+                        const uint8* u_buf,
-+                        const uint8* v_buf,
-+                        uint8* rgb_buf,
-+                        int width,
-+                        int dx) {
-+  __asm {
-+    pushad
-+    mov       edx, [esp + 32 + 4]   // Y
-+    mov       edi, [esp + 32 + 8]   // U
-+    mov       esi, [esp + 32 + 12]  // V
-+    mov       ebp, [esp + 32 + 16]  // rgb
-+    mov       ecx, [esp + 32 + 20]  // width
-+    xor       ebx, ebx              // x
-+    jmp       scaleend
-+
-+ scaleloop :
-+    mov       eax, ebx
-+    sar       eax, 5
-+    movzx     eax, byte ptr [edi + eax]
-+    movq      mm0, [kCoefficientsRgbU + 8 * eax]
-+    mov       eax, ebx
-+    sar       eax, 5
-+    movzx     eax, byte ptr [esi + eax]
-+    paddsw    mm0, [kCoefficientsRgbV + 8 * eax]
-+    mov       eax, ebx
-+    add       ebx, [esp + 32 + 24]  // x += dx
-+    sar       eax, 4
-+    movzx     eax, byte ptr [edx + eax]
-+    movq      mm1, [kCoefficientsRgbY + 8 * eax]
-+    mov       eax, ebx
-+    add       ebx, [esp + 32 + 24]  // x += dx
-+    sar       eax, 4
-+    movzx     eax, byte ptr [edx + eax]
-+    movq      mm2, [kCoefficientsRgbY + 8 * eax]
-+    paddsw    mm1, mm0
-+    paddsw    mm2, mm0
-+    psraw     mm1, 6
-+    psraw     mm2, 6
-+    packuswb  mm1, mm2
-+    movntq    [ebp], mm1
-+    add       ebp, 8
-+ scaleend :
-+    sub       ecx, 2
-+    jns       scaleloop
-+
-+    and       ecx, 1  // odd number of pixels?
-+    jz        scaledone
-+
-+    mov       eax, ebx
-+    sar       eax, 5
-+    movzx     eax, byte ptr [edi + eax]
-+    movq      mm0, [kCoefficientsRgbU + 8 * eax]
-+    mov       eax, ebx
-+    sar       eax, 5
-+    movzx     eax, byte ptr [esi + eax]
-+    paddsw    mm0, [kCoefficientsRgbV + 8 * eax]
-+    mov       eax, ebx
-+    sar       eax, 4
-+    movzx     eax, byte ptr [edx + eax]
-+    movq      mm1, [kCoefficientsRgbY + 8 * eax]
-+    paddsw    mm1, mm0
-+    psraw     mm1, 6
-+    packuswb  mm1, mm1
-+    movd      [ebp], mm1
-+
-+ scaledone :
-+    popad
-+    ret
-+  }
-+}
-+
- #endif // ARCH_CPU_64_BITS
- }  // extern "C"
- 
deleted file mode 100644
--- a/gfx/ycbcr/bug572034_mac_64bit.patch
+++ /dev/null
@@ -1,144 +0,0 @@
-diff --git a/gfx/ycbcr/yuv_row_linux.cpp b/gfx/ycbcr/yuv_row_linux.cpp
---- a/gfx/ycbcr/yuv_row_linux.cpp
-+++ b/gfx/ycbcr/yuv_row_linux.cpp
-@@ -250,18 +250,18 @@ MMX_ALIGNED(int16 kCoefficientsRgbY[768]
- 
- // AMD64 ABI uses register paremters.
- void FastConvertYUVToRGB32Row(const uint8* y_buf,  // rdi
-                               const uint8* u_buf,  // rsi
-                               const uint8* v_buf,  // rdx
-                               uint8* rgb_buf,      // rcx
-                               int width) {         // r8
-   asm(
--  "jmp    convertend\n"
--"convertloop:"
-+  "jmp    1f\n"
-+"0:"
-   "movzb  (%1),%%r10\n"
-   "add    $0x1,%1\n"
-   "movzb  (%2),%%r11\n"
-   "add    $0x1,%2\n"
-   "movq   2048(%5,%%r10,8),%%xmm0\n"
-   "movzb  (%0),%%r10\n"
-   "movq   4096(%5,%%r11,8),%%xmm1\n"
-   "movzb  0x1(%0),%%r11\n"
-@@ -271,36 +271,36 @@ void FastConvertYUVToRGB32Row(const uint
-   "movq   (%5,%%r11,8),%%xmm3\n"
-   "paddsw %%xmm0,%%xmm2\n"
-   "paddsw %%xmm0,%%xmm3\n"
-   "shufps $0x44,%%xmm3,%%xmm2\n"
-   "psraw  $0x6,%%xmm2\n"
-   "packuswb %%xmm2,%%xmm2\n"
-   "movq   %%xmm2,0x0(%3)\n"
-   "add    $0x8,%3\n"
--"convertend:"
-+"1:"
-   "sub    $0x2,%4\n"
--  "jns    convertloop\n"
-+  "jns    0b\n"
- 
--"convertnext:"
-+"2:"
-   "add    $0x1,%4\n"
--  "js     convertdone\n"
-+  "js     3f\n"
- 
-   "movzb  (%1),%%r10\n"
-   "movq   2048(%5,%%r10,8),%%xmm0\n"
-   "movzb  (%2),%%r10\n"
-   "movq   4096(%5,%%r10,8),%%xmm1\n"
-   "paddsw %%xmm1,%%xmm0\n"
-   "movzb  (%0),%%r10\n"
-   "movq   (%5,%%r10,8),%%xmm1\n"
-   "paddsw %%xmm0,%%xmm1\n"
-   "psraw  $0x6,%%xmm1\n"
-   "packuswb %%xmm1,%%xmm1\n"
-   "movd   %%xmm1,0x0(%3)\n"
--"convertdone:"
-+"3:"
-   :
-   : "r"(y_buf),  // %0
-     "r"(u_buf),  // %1
-     "r"(v_buf),  // %2
-     "r"(rgb_buf),  // %3
-     "r"(width),  // %4
-     "r" (kCoefficientsRgbY)  // %5
-   : "memory", "r10", "r11", "xmm0", "xmm1", "xmm2", "xmm3"
-@@ -309,28 +309,35 @@ void FastConvertYUVToRGB32Row(const uint
- 
- #else
- 
- void FastConvertYUVToRGB32Row(const uint8* y_buf,
-                               const uint8* u_buf,
-                               const uint8* v_buf,
-                               uint8* rgb_buf,
-                               int width);
-+
-+// It's necessary to specify the correct section for the following code,
-+// otherwise it will be placed in whatever the current section is as this unit
-+// is compiled.  Because GCC remembers the last section it emitted, we must
-+// also revert to the previous section state at the end of the asm block.
-   asm(
-+  ".section .text\n"
-   ".global FastConvertYUVToRGB32Row\n"
-+  ".type FastConvertYUVToRGB32Row, @function\n"
- "FastConvertYUVToRGB32Row:\n"
-   "pusha\n"
-   "mov    0x24(%esp),%edx\n"
-   "mov    0x28(%esp),%edi\n"
-   "mov    0x2c(%esp),%esi\n"
-   "mov    0x30(%esp),%ebp\n"
-   "mov    0x34(%esp),%ecx\n"
--  "jmp    convertend\n"
-+  "jmp    1f\n"
- 
--"convertloop:"
-+"0:"
-   "movzbl (%edi),%eax\n"
-   "add    $0x1,%edi\n"
-   "movzbl (%esi),%ebx\n"
-   "add    $0x1,%esi\n"
-   "movq   kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
-   "movzbl (%edx),%eax\n"
-   "paddsw kCoefficientsRgbY+4096(,%ebx,8),%mm0\n"
-   "movzbl 0x1(%edx),%ebx\n"
-@@ -339,34 +346,35 @@ void FastConvertYUVToRGB32Row(const uint
-   "movq   kCoefficientsRgbY(,%ebx,8),%mm2\n"
-   "paddsw %mm0,%mm1\n"
-   "paddsw %mm0,%mm2\n"
-   "psraw  $0x6,%mm1\n"
-   "psraw  $0x6,%mm2\n"
-   "packuswb %mm2,%mm1\n"
-   "movntq %mm1,0x0(%ebp)\n"
-   "add    $0x8,%ebp\n"
--"convertend:"
-+"1:"
-   "sub    $0x2,%ecx\n"
--  "jns    convertloop\n"
-+  "jns    0b\n"
- 
-   "and    $0x1,%ecx\n"
--  "je     convertdone\n"
-+  "je     2f\n"
- 
-   "movzbl (%edi),%eax\n"
-   "movq   kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
-   "movzbl (%esi),%eax\n"
-   "paddsw kCoefficientsRgbY+4096(,%eax,8),%mm0\n"
-   "movzbl (%edx),%eax\n"
-   "movq   kCoefficientsRgbY(,%eax,8),%mm1\n"
-   "paddsw %mm0,%mm1\n"
-   "psraw  $0x6,%mm1\n"
-   "packuswb %mm1,%mm1\n"
-   "movd   %mm1,0x0(%ebp)\n"
--"convertdone:"
-+"2:"
-   "popa\n"
-   "ret\n"
-+  ".previous\n"
- );
- 
- #endif
- #endif // ARCH_CPU_ARM_FAMILY
- }  // extern "C"
- 
deleted file mode 100644
--- a/gfx/ycbcr/bug577645_movntq.patch
+++ /dev/null
@@ -1,63 +0,0 @@
-diff --git a/gfx/ycbcr/yuv_convert.cpp b/gfx/ycbcr/yuv_convert.cpp
---- a/gfx/ycbcr/yuv_convert.cpp
-+++ b/gfx/ycbcr/yuv_convert.cpp
-@@ -36,19 +36,21 @@ NS_GFX_(void) ConvertYCbCrToRGB32(const 
-                                   int pic_width,
-                                   int pic_height,
-                                   int y_pitch,
-                                   int uv_pitch,
-                                   int rgb_pitch,
-                                   YUVType yuv_type) {
-   unsigned int y_shift = yuv_type == YV12 ? 1 : 0;
-   unsigned int x_shift = yuv_type == YV24 ? 0 : 1;
--  // There is no optimized YV24 MMX routine so we check for this and
-+  // Test for SSE because the optimized code uses movntq, which is not part of MMX.
-+  bool has_sse = supports_mmx() && supports_sse();
-+  // There is no optimized YV24 SSE routine so we check for this and
-   // fall back to the C code.
--  bool has_mmx = supports_mmx() && yuv_type != YV24;
-+  has_sse &= yuv_type != YV24;
-   bool odd_pic_x = yuv_type != YV24 && pic_x % 2 != 0;
-   int x_width = odd_pic_x ? pic_width - 1 : pic_width;
- 
-   for (int y = pic_y; y < pic_height + pic_y; ++y) {
-     uint8* rgb_row = rgb_buf + (y - pic_y) * rgb_pitch;
-     const uint8* y_ptr = y_buf + y * y_pitch + pic_x;
-     const uint8* u_ptr = u_buf + (y >> y_shift) * uv_pitch + (pic_x >> x_shift);
-     const uint8* v_ptr = v_buf + (y >> y_shift) * uv_pitch + (pic_x >> x_shift);
-@@ -60,32 +62,32 @@ NS_GFX_(void) ConvertYCbCrToRGB32(const 
-                                  u_ptr++,
-                                  v_ptr++,
-                                  rgb_row,
-                                  1,
-                                  x_shift);
-       rgb_row += 4;
-     }
- 
--    if (has_mmx)
-+    if (has_sse)
-       FastConvertYUVToRGB32Row(y_ptr,
-                                u_ptr,
-                                v_ptr,
-                                rgb_row,
-                                x_width);
-     else
-       FastConvertYUVToRGB32Row_C(y_ptr,
-                                  u_ptr,
-                                  v_ptr,
-                                  rgb_row,
-                                  x_width,
-                                  x_shift);
-   }
- 
- #ifdef ARCH_CPU_X86_FAMILY
--  // MMX used for FastConvertYUVToRGB32Row requires emms instruction.
--  if (has_mmx)
-+  // SSE used for FastConvertYUVToRGB32Row requires emms instruction.
-+  if (has_sse)
-     EMMS();
- #endif
- }
- 
- }  // namespace gfx
- }  // namespace mozilla
deleted file mode 100644
--- a/gfx/ycbcr/bustage.patch
+++ /dev/null
@@ -1,26 +0,0 @@
-diff --git a/gfx/ycbcr/yuv_row_linux.cpp b/gfx/ycbcr/yuv_row_linux.cpp
-index ce5ee89..455dd7b 100644
---- a/gfx/ycbcr/yuv_row_linux.cpp
-+++ b/gfx/ycbcr/yuv_row_linux.cpp
-@@ -18,7 +18,7 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf,
-                               const uint8* v_buf,
-                               uint8* rgb_buf,
-                               int width) {
--  FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width);
-+  FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, 1);
- }
-  
- #else
-diff --git a/gfx/ycbcr/yuv_row_mac.cpp b/gfx/ycbcr/yuv_row_mac.cpp
-index 3515ada..351466c 100644
---- a/gfx/ycbcr/yuv_row_mac.cpp
-+++ b/gfx/ycbcr/yuv_row_mac.cpp
-@@ -15,7 +15,7 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf,
-                               const uint8* v_buf,
-                               uint8* rgb_buf,
-                               int width) {
--  FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width);
-+  FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, 1);
- }
-  
- #else
--- a/gfx/ycbcr/chromium_types.h
+++ b/gfx/ycbcr/chromium_types.h
@@ -49,23 +49,24 @@ typedef PRInt16 int16;
 //   http://www.agner.org/optimize/calling_conventions.pdf
 //   or with gcc, run: "echo | gcc -E -dM -"
 #if defined(_M_X64) || defined(__x86_64__)
 #define ARCH_CPU_X86_FAMILY 1
 #define ARCH_CPU_X86_64 1
 #define ARCH_CPU_64_BITS 1
 #elif defined(_M_IX86) || defined(__i386__) || defined(__i386)
 #define ARCH_CPU_X86_FAMILY 1
+#define ARCH_CPU_X86_32 1
 #define ARCH_CPU_X86 1
 #define ARCH_CPU_32_BITS 1
 #elif defined(__ARMEL__)
 #define ARCH_CPU_ARM_FAMILY 1
 #define ARCH_CPU_ARMEL 1
 #define ARCH_CPU_32_BITS 1
-#elif defined(__ppc__)
+#elif defined(__ppc__) || defined(__powerpc) || defined(__PPC__)
 #define ARCH_CPU_PPC_FAMILY 1
 #define ARCH_CPU_PPC 1
 #define ARCH_CPU_32_BITS 1
 #elif defined(__sparc)
 #define ARCH_CPU_SPARC_FAMILY 1
 #define ARCH_CPU_SPARC 1
 #define ARCH_CPU_32_BITS 1
 #elif defined(__sparcv9)
--- a/gfx/ycbcr/convert.patch
+++ b/gfx/ycbcr/convert.patch
@@ -1,292 +1,438 @@
 diff --git b/gfx/ycbcr/yuv_convert.cpp a/gfx/ycbcr/yuv_convert.cpp
-index c73dfe4..c291d5c 100644
+index bea0e50..ab4f10a 100644
 --- b/gfx/ycbcr/yuv_convert.cpp
 +++ a/gfx/ycbcr/yuv_convert.cpp
-@@ -10,67 +10,80 @@
+@@ -6,77 +6,104 @@
+ // http://www.fourcc.org/yuv.php
+ // The actual conversion is best described here
+ // http://en.wikipedia.org/wiki/YUV
+ // An article on optimizing YUV conversion using tables instead of multiplies
  // http://lestourtereaux.free.fr/papers/data/yuvrgb.pdf
  //
  // YV12 is a full plane of Y and a half height, half width chroma planes
  // YV16 is a full plane of Y and a full height, half width chroma planes
++// YV24 is a full plane of Y and a full height, full width chroma planes
  //
  // ARGB pixel format is output, which on little endian is stored as BGRA.
  // The alpha is set to 255, allowing the application to use RGBA or RGB32.
  
 -#include "media/base/yuv_convert.h"
 +#include "yuv_convert.h"
  
  // Header for low level row functions.
 -#include "media/base/yuv_row.h"
 +#include "yuv_row.h"
++#define MOZILLA_SSE_INCLUDE_HEADER_FOR_SSE2
++#define MOZILLA_SSE_INCLUDE_HEADER_FOR_MMX
 +#include "mozilla/SSE.h"
  
+-#if USE_MMX
+-#if defined(_MSC_VER)
+-#include <intrin.h>
+-#else
+-#include <mmintrin.h>
+-#endif
+-#endif
+-
+-#if USE_SSE2
+-#include <emmintrin.h>
+-#endif
+-
 -namespace media {
 +namespace mozilla {
-+
+ 
 +namespace gfx {
++ 
+ // 16.16 fixed point arithmetic
+ const int kFractionBits = 16;
+ const int kFractionMax = 1 << kFractionBits;
+ const int kFractionMask = ((1 << kFractionBits) - 1);
  
  // Convert a frame of YUV to 32 bit ARGB.
 -void ConvertYUVToRGB32(const uint8* y_buf,
 -                       const uint8* u_buf,
 -                       const uint8* v_buf,
 -                       uint8* rgb_buf,
 -                       int width,
 -                       int height,
 -                       int y_pitch,
 -                       int uv_pitch,
 -                       int rgb_pitch,
 -                       YUVType yuv_type) {
-+void ConvertYCbCrToRGB32(const uint8* y_buf,
-+                         const uint8* u_buf,
-+                         const uint8* v_buf,
-+                         uint8* rgb_buf,
-+                         int width,
-+                         int height,
-+                         int y_pitch,
-+                         int uv_pitch,
-+                         int rgb_pitch,
-+                         YUVType yuv_type) {
-   unsigned int y_shift = yuv_type;
-+  bool has_mmx = supports_mmx();
-   for (int y = 0; y < height; ++y) {
-     uint8* rgb_row = rgb_buf + y * rgb_pitch;
-     const uint8* y_ptr = y_buf + y * y_pitch;
-     const uint8* u_ptr = u_buf + (y >> y_shift) * uv_pitch;
-     const uint8* v_ptr = v_buf + (y >> y_shift) * uv_pitch;
+-  unsigned int y_shift = yuv_type;
+-  for (int y = 0; y < height; ++y) {
+-    uint8* rgb_row = rgb_buf + y * rgb_pitch;
+-    const uint8* y_ptr = y_buf + y * y_pitch;
+-    const uint8* u_ptr = u_buf + (y >> y_shift) * uv_pitch;
+-    const uint8* v_ptr = v_buf + (y >> y_shift) * uv_pitch;
++NS_GFX_(void) ConvertYCbCrToRGB32(const uint8* y_buf,
++                                  const uint8* u_buf,
++                                  const uint8* v_buf,
++                                  uint8* rgb_buf,
++                                  int pic_x,
++                                  int pic_y,
++                                  int pic_width,
++                                  int pic_height,
++                                  int y_pitch,
++                                  int uv_pitch,
++                                  int rgb_pitch,
++                                  YUVType yuv_type) {
++  unsigned int y_shift = yuv_type == YV12 ? 1 : 0;
++  unsigned int x_shift = yuv_type == YV24 ? 0 : 1;
++  // Test for SSE because the optimized code uses movntq, which is not part of MMX.
++  bool has_sse = supports_mmx() && supports_sse();
++  // There is no optimized YV24 SSE routine so we check for this and
++  // fall back to the C code.
++  has_sse &= yuv_type != YV24;
++  bool odd_pic_x = yuv_type != YV24 && pic_x % 2 != 0;
++  int x_width = odd_pic_x ? pic_width - 1 : pic_width;
++
++  for (int y = pic_y; y < pic_height + pic_y; ++y) {
++    uint8* rgb_row = rgb_buf + (y - pic_y) * rgb_pitch;
++    const uint8* y_ptr = y_buf + y * y_pitch + pic_x;
++    const uint8* u_ptr = u_buf + (y >> y_shift) * uv_pitch + (pic_x >> x_shift);
++    const uint8* v_ptr = v_buf + (y >> y_shift) * uv_pitch + (pic_x >> x_shift);
  
 -    FastConvertYUVToRGB32Row(y_ptr,
 -                             u_ptr,
 -                             v_ptr,
 -                             rgb_row,
 -                             width);
-+    if (has_mmx)
++    if (odd_pic_x) {
++      // Handle the single odd pixel manually and use the
++      // fast routines for the remaining.
++      FastConvertYUVToRGB32Row_C(y_ptr++,
++                                 u_ptr++,
++                                 v_ptr++,
++                                 rgb_row,
++                                 1,
++                                 x_shift);
++      rgb_row += 4;
++    }
++
++    if (has_sse) {
 +      FastConvertYUVToRGB32Row(y_ptr,
 +                               u_ptr,
 +                               v_ptr,
 +                               rgb_row,
-+                               width);
-+    else
++                               x_width);
++    }
++    else {
 +      FastConvertYUVToRGB32Row_C(y_ptr,
 +                                 u_ptr,
 +                                 v_ptr,
 +                                 rgb_row,
-+                                 width);
++                                 x_width,
++                                 x_shift);
++    }
    }
  
    // MMX used for FastConvertYUVToRGB32Row requires emms instruction.
 -  EMMS();
-+  if (has_mmx)
++  if (has_sse)
 +    EMMS();
  }
  
+-#if USE_SSE2
++#if defined(MOZILLA_COMPILE_WITH_SSE2)
+ // FilterRows combines two rows of the image using linear interpolation.
+ // SSE2 version does 16 pixels at a time
+-
+ static void FilterRows(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr,
+                        int source_width, int source_y_fraction) {
+   __m128i zero = _mm_setzero_si128();
+   __m128i y1_fraction = _mm_set1_epi16(source_y_fraction);
+   __m128i y0_fraction = _mm_set1_epi16(256 - source_y_fraction);
+ 
+   const __m128i* y0_ptr128 = reinterpret_cast<const __m128i*>(y0_ptr);
+   const __m128i* y1_ptr128 = reinterpret_cast<const __m128i*>(y1_ptr);
+@@ -99,17 +126,17 @@ static void FilterRows(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr,
+     y0 = _mm_srli_epi16(y0, 8);
+     y2 = _mm_srli_epi16(y2, 8);
+     y0 = _mm_packus_epi16(y0, y2);
+     *dest128++ = y0;
+     ++y0_ptr128;
+     ++y1_ptr128;
+   } while (dest128 < end128);
+ }
+-#elif USE_MMX
++#elif defined(MOZILLA_COMPILE_WITH_MMX)
+ // MMX version does 8 pixels at a time
+ static void FilterRows(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr,
+                        int source_width, int source_y_fraction) {
+   __m64 zero = _mm_setzero_si64();
+   __m64 y1_fraction = _mm_set1_pi16(source_y_fraction);
+   __m64 y0_fraction = _mm_set1_pi16(256 - source_y_fraction);
+ 
+   const __m64* y0_ptr64 = reinterpret_cast<const __m64*>(y0_ptr);
+@@ -154,44 +181,45 @@ static void FilterRows(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr,
+     ybuf[7] = (y0_ptr[7] * y0_fraction + y1_ptr[7] * y1_fraction) >> 8;
+     y0_ptr += 8;
+     y1_ptr += 8;
+     ybuf += 8;
+   } while (ybuf < end);
+ }
+ #endif
+ 
+-
  // Scale a frame of YUV to 32 bit ARGB.
 -void ScaleYUVToRGB32(const uint8* y_buf,
 -                     const uint8* u_buf,
 -                     const uint8* v_buf,
 -                     uint8* rgb_buf,
+-                     int source_width,
+-                     int source_height,
 -                     int width,
 -                     int height,
--                     int scaled_width,
--                     int scaled_height,
 -                     int y_pitch,
 -                     int uv_pitch,
 -                     int rgb_pitch,
 -                     YUVType yuv_type,
--                     Rotate view_rotate) {
-+void ScaleYCbCrToRGB32(const uint8* y_buf,
-+                       const uint8* u_buf,
-+                       const uint8* v_buf,
-+                       uint8* rgb_buf,
-+                       int width,
-+                       int height,
-+                       int scaled_width,
-+                       int scaled_height,
-+                       int y_pitch,
-+                       int uv_pitch,
-+                       int rgb_pitch,
-+                       YUVType yuv_type,
-+                       Rotate view_rotate) {
-   unsigned int y_shift = yuv_type;
+-                     Rotate view_rotate,
+-                     ScaleFilter filter) {
++NS_GFX_(void) ScaleYCbCrToRGB32(const uint8* y_buf,
++                                const uint8* u_buf,
++                                const uint8* v_buf,
++                                uint8* rgb_buf,
++                                int source_width,
++                                int source_height,
++                                int width,
++                                int height,
++                                int y_pitch,
++                                int uv_pitch,
++                                int rgb_pitch,
++                                YUVType yuv_type,
++                                Rotate view_rotate,
++                                ScaleFilter filter) {
 +  bool has_mmx = supports_mmx();
++
+   // 4096 allows 3 buffers to fit in 12k.
+   // Helps performance on CPU with 16K L1 cache.
+   // Large enough for 3830x2160 and 30" displays which are 2560x1600.
+   const int kFilterBufferSize = 4096;
+   // Disable filtering if the screen is too big (to avoid buffer overflows).
+   // This should never happen to regular users: they don't have monitors
+   // wider than 4096 pixels.
+   // TODO(fbarchard): Allow rotated videos to filter.
+   if (source_width > kFilterBufferSize || view_rotate)
+     filter = FILTER_NONE;
+ 
+-  unsigned int y_shift = yuv_type;
++  unsigned int y_shift = yuv_type == YV12 ? 1 : 0;
    // Diagram showing origin and direction of source sampling.
    // ->0   4<-
    // 7       3
    //
    // 6       5
    // ->1   2<-
    // Rotations that start at right side of image.
    if ((view_rotate == ROTATE_180) ||
-@@ -126,42 +139,57 @@ void ScaleYUVToRGB32(const uint8* y_buf,
- 
-   for (int y = 0; y < scaled_height; ++y) {
-     uint8* dest_pixel = rgb_buf + y * rgb_pitch;
-     int scaled_y = (y * height / scaled_height);
-     const uint8* y_ptr = y_buf + scaled_y * y_pitch;
-     const uint8* u_ptr = u_buf + (scaled_y >> y_shift) * uv_pitch;
-     const uint8* v_ptr = v_buf + (scaled_y >> y_shift) * uv_pitch;
- 
--#if USE_MMX && defined(_MSC_VER)
-+#if defined(_MSC_VER)
-     if (scaled_width == (width * 2)) {
-       DoubleYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
-                           dest_pixel, scaled_width);
-     } else if ((scaled_dx & 15) == 0) {  // Scaling by integer scale factor.
-       if (scaled_dx_uv == scaled_dx) {   // Not rotated.
-         if (scaled_dx == 16) {           // Not scaled
--          FastConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
--                                   dest_pixel, scaled_width);
-+          if (has_mmx)
-+            FastConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
-+                                     dest_pixel, scaled_width);
-+          else
-+            FastConvertYUVToRGB32Row_C(y_ptr, u_ptr, v_ptr,
-+                                      dest_pixel, scaled_width);
-         } else {  // Simple scale down. ie half
-           ConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
-                                dest_pixel, scaled_width, scaled_dx >> 4);
-         }
-       } else {
-         RotateConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
-                                    dest_pixel, scaled_width,
-                                    scaled_dx >> 4, scaled_dx_uv >> 4);
-       }
- #else
-     if (scaled_dx == 16) {           // Not scaled
--      FastConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
--                               dest_pixel, scaled_width);
-+      if (has_mmx) 
-+        FastConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
-+                                 dest_pixel, scaled_width);
-+      else
-+        FastConvertYUVToRGB32Row_C(y_ptr, u_ptr, v_ptr,
-+                                   dest_pixel, scaled_width);
- #endif
-     } else {
--      ScaleYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
--                         dest_pixel, scaled_width, scaled_dx);
--    }
-+      if (has_mmx) 
-+        ScaleYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
-+                           dest_pixel, scaled_width, scaled_dx);
-+      else
-+        ScaleYUVToRGB32Row_C(y_ptr, u_ptr, v_ptr,
-+                             dest_pixel, scaled_width, scaled_dx);
-+
-+    }  
+@@ -243,17 +271,17 @@ void ScaleYUVToRGB32(const uint8* y_buf,
+       uv_pitch = 1;
+     }
    }
  
-   // MMX used for FastConvertYUVToRGB32Row requires emms instruction.
+   // Need padding because FilterRows() will write 1 to 16 extra pixels
+   // after the end for SSE2 version.
+   uint8 yuvbuf[16 + kFilterBufferSize * 3 + 16];
+   uint8* ybuf =
+-      reinterpret_cast<uint8*>(reinterpret_cast<uintptr_t>(yuvbuf + 15) & ~15);
++      reinterpret_cast<uint8*>(reinterpret_cast<PRUptrdiff>(yuvbuf + 15) & ~15);
+   uint8* ubuf = ybuf + kFilterBufferSize;
+   uint8* vbuf = ubuf + kFilterBufferSize;
+   // TODO(fbarchard): Fixed point math is off by 1 on negatives.
+   int yscale_fixed = (source_height << kFractionBits) / height;
+ 
+   // TODO(fbarchard): Split this into separate function for better efficiency.
+   for (int y = 0; y < height; ++y) {
+     uint8* dest_pixel = rgb_buf + y * rgb_pitch;
+@@ -276,17 +304,17 @@ void ScaleYUVToRGB32(const uint8* y_buf,
+     int source_uv_fraction =
+         ((source_y_subpixel >> y_shift) & kFractionMask) >> 8;
+ 
+     const uint8* y_ptr = y0_ptr;
+     const uint8* u_ptr = u0_ptr;
+     const uint8* v_ptr = v0_ptr;
+     // Apply vertical filtering if necessary.
+     // TODO(fbarchard): Remove memcpy when not necessary.
+-    if (filter & media::FILTER_BILINEAR_V) {
++    if (filter & mozilla::gfx::FILTER_BILINEAR_V) {
+       if (yscale_fixed != kFractionMax &&
+           source_y_fraction && ((source_y + 1) < source_height)) {
+         FilterRows(ybuf, y0_ptr, y1_ptr, source_width, source_y_fraction);
+       } else {
+         memcpy(ybuf, y0_ptr, source_width);
+       }
+       y_ptr = ybuf;
+       ybuf[source_width] = ybuf[source_width-1];
+@@ -309,17 +337,17 @@ void ScaleYUVToRGB32(const uint8* y_buf,
+       FastConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
+                                dest_pixel, width);
+     } else {
+       if (filter & FILTER_BILINEAR_H) {
+         LinearScaleYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
+                                  dest_pixel, width, source_dx);
+     } else {
+ // Specialized scalers and rotation.
+-#if USE_MMX && defined(_MSC_VER)
++#if defined(_MSC_VER) && defined(_M_IX86)
+         if (width == (source_width * 2)) {
+           DoubleYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
+                               dest_pixel, width);
+         } else if ((source_dx & kFractionMask) == 0) {
+           // Scaling by integer scale factor. ie half.
+           ConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
+                                dest_pixel, width,
+                                source_dx >> kFractionBits);
+@@ -331,16 +359,18 @@ void ScaleYUVToRGB32(const uint8* y_buf,
+                                      dest_pixel, width,
+                                      source_dx >> kFractionBits,
+                                      source_dx_uv >> kFractionBits);
+         }
+ #else
+         ScaleYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
+                            dest_pixel, width, source_dx);
+ #endif
+-      }
++      }      
+     }
+   }
+   // MMX used for FastConvertYUVToRGB32Row and FilterRows requires emms.
 -  EMMS();
 +  if (has_mmx)
 +    EMMS();
  }
  
 -}  // namespace media
 +}  // namespace gfx
 +}  // namespace mozilla
 diff --git b/gfx/ycbcr/yuv_convert.h a/gfx/ycbcr/yuv_convert.h
-index a757070..9d148a6 100644
+index 24a2c4e..eb99903 100644
 --- b/gfx/ycbcr/yuv_convert.h
 +++ a/gfx/ycbcr/yuv_convert.h
-@@ -1,63 +1,66 @@
- // Copyright (c) 2009 The Chromium Authors. All rights reserved.
+@@ -1,72 +1,79 @@
+ // Copyright (c) 2010 The Chromium Authors. All rights reserved.
  // Use of this source code is governed by a BSD-style license that can be
  // found in the LICENSE file.
  
  #ifndef MEDIA_BASE_YUV_CONVERT_H_
  #define MEDIA_BASE_YUV_CONVERT_H_
  
 -#include "base/basictypes.h"
+-
+-namespace media {
 +#include "chromium_types.h"
- 
--namespace media {
++#include "gfxCore.h"
++ 
 +namespace mozilla {
-+
+ 
 +namespace gfx {
- 
++ 
  // Type of YUV surface.
  // The value of these enums matter as they are used to shift vertical indices.
  enum YUVType {
-   YV16 = 0,           // YV16 is half width and full height chroma channels.
+-  YV16 = 0,           // YV16 is half width and full height chroma channels.
 -  YV12 = 1,           // YV12 is half width and half height chroma channels.
-+  YV12 = 1            // YV12 is half width and half height chroma channels.
++  YV12 = 0,           // YV12 is half width and half height chroma channels.
++  YV16 = 1,           // YV16 is half width and full height chroma channels.
++  YV24 = 2            // YV24 is full width and full height chroma channels.
  };
  
  // Mirror means flip the image horizontally, as in looking in a mirror.
  // Rotate happens after mirroring.
  enum Rotate {
    ROTATE_0,           // Rotation off.
    ROTATE_90,          // Rotate clockwise.
    ROTATE_180,         // Rotate upside down.
    ROTATE_270,         // Rotate counter clockwise.
    MIRROR_ROTATE_0,    // Mirror horizontally.
    MIRROR_ROTATE_90,   // Mirror then Rotate clockwise.
    MIRROR_ROTATE_180,  // Mirror vertically.
 -  MIRROR_ROTATE_270,  // Transpose.
 +  MIRROR_ROTATE_270   // Transpose.
  };
  
+ // Filter affects how scaling looks.
+ enum ScaleFilter {
+   FILTER_NONE = 0,        // No filter (point sampled).
+   FILTER_BILINEAR_H = 1,  // Bilinear horizontal filter.
+   FILTER_BILINEAR_V = 2,  // Bilinear vertical filter.
+-  FILTER_BILINEAR = 3,    // Bilinear filter.
++  FILTER_BILINEAR = 3     // Bilinear filter.
+ };
+ 
  // Convert a frame of YUV to 32 bit ARGB.
  // Pass in YV16/YV12 depending on source format
 -void ConvertYUVToRGB32(const uint8* yplane,
-+void ConvertYCbCrToRGB32(const uint8* yplane,
-+                         const uint8* uplane,
-+                         const uint8* vplane,
-+                         uint8* rgbframe,
-+                         int frame_width,
-+                         int frame_height,
-+                         int ystride,
-+                         int uvstride,
-+                         int rgbstride,
-+                         YUVType yuv_type);
-+
-+// Scale a frame of YUV to 32 bit ARGB.
-+// Supports rotation and mirroring.
-+void ScaleYCbCrToRGB32(const uint8* yplane,
-                        const uint8* uplane,
-                        const uint8* vplane,
-                        uint8* rgbframe,
-                        int frame_width,
-                        int frame_height,
-+                       int scaled_width,
-+                       int scaled_height,
-                        int ystride,
-                        int uvstride,
-                        int rgbstride,
+-                       const uint8* uplane,
+-                       const uint8* vplane,
+-                       uint8* rgbframe,
+-                       int width,
+-                       int height,
+-                       int ystride,
+-                       int uvstride,
+-                       int rgbstride,
 -                       YUVType yuv_type);
-+                       YUVType yuv_type,
-+                       Rotate view_rotate);
++NS_GFX_(void) ConvertYCbCrToRGB32(const uint8* yplane,
++                                  const uint8* uplane,
++                                  const uint8* vplane,
++                                  uint8* rgbframe,
++                                  int pic_x,
++                                  int pic_y,
++                                  int pic_width,
++                                  int pic_height,
++                                  int ystride,
++                                  int uvstride,
++                                  int rgbstride,
++                                  YUVType yuv_type);
  
--// Scale a frame of YUV to 32 bit ARGB.
--// Supports rotation and mirroring.
+ // Scale a frame of YUV to 32 bit ARGB.
+ // Supports rotation and mirroring.
 -void ScaleYUVToRGB32(const uint8* yplane,
 -                     const uint8* uplane,
 -                     const uint8* vplane,
 -                     uint8* rgbframe,
--                     int frame_width,
--                     int frame_height,
--                     int scaled_width,
--                     int scaled_height,
+-                     int source_width,
+-                     int source_height,
+-                     int width,
+-                     int height,
 -                     int ystride,
 -                     int uvstride,
 -                     int rgbstride,
 -                     YUVType yuv_type,
--                     Rotate view_rotate);
+-                     Rotate view_rotate,
+-                     ScaleFilter filter);
 -
 -}  // namespace media
++NS_GFX_(void) ScaleYCbCrToRGB32(const uint8* yplane,
++                                const uint8* uplane,
++                                const uint8* vplane,
++                                uint8* rgbframe,
++                                int source_width,
++                                int source_height,
++                                int width,
++                                int height,
++                                int ystride,
++                                int uvstride,
++                                int rgbstride,
++                                YUVType yuv_type,
++                                Rotate view_rotate,
++                                ScaleFilter filter);
+ 
 +}  // namespace gfx
 +}  // namespace mozilla
- 
++ 
  #endif  // MEDIA_BASE_YUV_CONVERT_H_
 diff --git b/gfx/ycbcr/yuv_row.h a/gfx/ycbcr/yuv_row.h
-index ac5c6fd..98efca6 100644
+index 0a2990b..4ce9eb8 100644
 --- b/gfx/ycbcr/yuv_row.h
 +++ a/gfx/ycbcr/yuv_row.h
-@@ -5,27 +5,34 @@
+@@ -5,27 +5,40 @@
  // yuv_row internal functions to handle YUV conversion and scaling to RGB.
  // These functions are used from both yuv_convert.cc and yuv_scale.cc.
  
  // TODO(fbarchard): Write function that can handle rotation and scaling.
  
  #ifndef MEDIA_BASE_YUV_ROW_H_
  #define MEDIA_BASE_YUV_ROW_H_
  
@@ -301,319 +447,144 @@ index ac5c6fd..98efca6 100644
                                const uint8* v_buf,
                                uint8* rgb_buf,
                                int width);
  
 +void FastConvertYUVToRGB32Row_C(const uint8* y_buf,
 +                                const uint8* u_buf,
 +                                const uint8* v_buf,
 +                                uint8* rgb_buf,
-+                                int width);
++                                int width,
++                                unsigned int x_shift);
 +
++void FastConvertYUVToRGB32Row(const uint8* y_buf,
++                              const uint8* u_buf,
++                              const uint8* v_buf,
++                              uint8* rgb_buf,
++                              int width);
 +
  // Can do 1x, half size or any scale down by an integer amount.
  // Step can be negative (mirroring, rotate 180).
  // This is the third fastest of the scalers.
  void ConvertYUVToRGB32Row(const uint8* y_buf,
                            const uint8* u_buf,
                            const uint8* v_buf,
                            uint8* rgb_buf,
                            int width,
-@@ -55,29 +62,28 @@ void DoubleYUVToRGB32Row(const uint8* y_buf,
+@@ -55,59 +68,67 @@ void DoubleYUVToRGB32Row(const uint8* y_buf,
  // Chroma is under sampled every 2 pixels for performance.
- // This is the slowest of the scalers.
  void ScaleYUVToRGB32Row(const uint8* y_buf,
                          const uint8* u_buf,
                          const uint8* v_buf,
                          uint8* rgb_buf,
                          int width,
-                         int scaled_dx);
--}  // extern "C"
+                         int source_dx);
+ 
++void ScaleYUVToRGB32Row(const uint8* y_buf,
++                        const uint8* u_buf,
++                        const uint8* v_buf,
++                        uint8* rgb_buf,
++                        int width,
++                        int source_dx);
++
++void ScaleYUVToRGB32Row_C(const uint8* y_buf,
++                          const uint8* u_buf,
++                          const uint8* v_buf,
++                          uint8* rgb_buf,
++                          int width,
++                          int source_dx);
++
+ // Handles arbitrary scaling up or down with bilinear filtering.
+ // Mirroring is supported, but not 90 or 270 degree rotation.
+ // Chroma is under sampled every 2 pixels for performance.
+ // This is the slowest of the scalers.
+ void LinearScaleYUVToRGB32Row(const uint8* y_buf,
+                               const uint8* u_buf,
+                               const uint8* v_buf,
+                               uint8* rgb_buf,
+                               int width,
+                               int source_dx);
  
++void LinearScaleYUVToRGB32Row(const uint8* y_buf,
++                              const uint8* u_buf,
++                              const uint8* v_buf,
++                              uint8* rgb_buf,
++                              int width,
++                              int source_dx);
++
++void LinearScaleYUVToRGB32Row_C(const uint8* y_buf,
++                                const uint8* u_buf,
++                                const uint8* v_buf,
++                                uint8* rgb_buf,
++                                int width,
++                                int source_dx);
++
++
+ #if defined(_MSC_VER)
+ #define SIMD_ALIGNED(var) __declspec(align(16)) var
+ #else
+ #define SIMD_ALIGNED(var) var __attribute__((aligned(16)))
+ #endif
+ extern SIMD_ALIGNED(int16 kCoefficientsRgbY[768][4]);
+ 
+-// Method to force C version.
+-//#define USE_MMX 0
+-//#define USE_SSE2 0
+-
 -#if !defined(USE_MMX)
--// Windows, Mac and Linux use MMX
--#if defined(ARCH_CPU_X86) || (defined(ARCH_CPU_X86_64) && defined(OS_LINUX))
+-// Windows, Mac and Linux/BSD use MMX
+-#if defined(__MMX__) || defined(_MSC_VER)
 -#define USE_MMX 1
 -#else
 -#define USE_MMX 0
 -#endif
 -#endif
-+void ScaleYUVToRGB32Row_C(const uint8* y_buf,
-+                          const uint8* u_buf,
-+                          const uint8* v_buf,
-+                          uint8* rgb_buf,
-+                          int width,
-+                          int scaled_dx);
-+
-+}  // extern "C"
- 
+-
+-#if !defined(USE_SSE2)
+-#if defined(__SSE2__) || defined(ARCH_CPU_X86_64) || _M_IX86_FP==2
+-#define USE_SSE2 1
+-#else
+-#define USE_SSE2 0
+-#endif
+-#endif
+-
  // x64 uses MMX2 (SSE) so emms is not required.
+ // Warning C4799: function has no EMMS instruction.
+ // EMMS() is slow and should be called by the calling function once per image.
 -#if USE_MMX && !defined(ARCH_CPU_X86_64)
 +#if !defined(ARCH_CPU_X86_64)
  #if defined(_MSC_VER)
  #define EMMS() __asm emms
+ #pragma warning(disable: 4799)
  #else
  #define EMMS() asm("emms")
  #endif
  #else
  #define EMMS()
- #endif
 diff --git b/gfx/ycbcr/yuv_row_c.cpp a/gfx/ycbcr/yuv_row_c.cpp
-index 4a20777..a81416c 100644
+index a66fa7b..d327f85 100644
 --- b/gfx/ycbcr/yuv_row_c.cpp
 +++ a/gfx/ycbcr/yuv_row_c.cpp
-@@ -1,522 +1,20 @@
- // Copyright (c) 2009 The Chromium Authors. All rights reserved.
+@@ -1,812 +1,18 @@
+ // Copyright (c) 2010 The Chromium Authors. All rights reserved.
  // Use of this source code is governed by a BSD-style license that can be
  // found in the LICENSE file.
  
 -#include "media/base/yuv_row.h"
 +#include "yuv_row.h"
  
 -#ifdef _DEBUG
 -#include "base/logging.h"
 -#else
  #define DCHECK(a)
 -#endif
  
- // TODO(fbarchard): Move this to yuv_row_posix.cc to share with Mac.
- // TODO(fbarchard): Do 64 bit version.
+ extern "C" {
  
- extern "C" {
--#if USE_MMX
--
--#define RGBY(i) { \
--  static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
--  static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
--  static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
--  0 \
--}
--
--#define RGBU(i) { \
--  static_cast<int16>(2.018 * 64 * (i - 128) + 0.5), \
--  static_cast<int16>(-0.391 * 64 * (i - 128) + 0.5), \
--  0, \
--  static_cast<int16>(256 * 64 - 1) \
--}
--
--#define RGBV(i) { \
--  0, \
--  static_cast<int16>(-0.813 * 64 * (i - 128) + 0.5), \
--  static_cast<int16>(1.596 * 64 * (i - 128) + 0.5), \
--  0 \
--}
--
--#define MMX_ALIGNED(var) var __attribute__((aligned(16)))
--
--
--MMX_ALIGNED(int16 kCoefficientsRgbY[768][4]) = {
--  RGBY(0x00), RGBY(0x01), RGBY(0x02), RGBY(0x03),
--  RGBY(0x04), RGBY(0x05), RGBY(0x06), RGBY(0x07),
--  RGBY(0x08), RGBY(0x09), RGBY(0x0A), RGBY(0x0B),
--  RGBY(0x0C), RGBY(0x0D), RGBY(0x0E), RGBY(0x0F),
--  RGBY(0x10), RGBY(0x11), RGBY(0x12), RGBY(0x13),
--  RGBY(0x14), RGBY(0x15), RGBY(0x16), RGBY(0x17),
--  RGBY(0x18), RGBY(0x19), RGBY(0x1A), RGBY(0x1B),
--  RGBY(0x1C), RGBY(0x1D), RGBY(0x1E), RGBY(0x1F),
--  RGBY(0x20), RGBY(0x21), RGBY(0x22), RGBY(0x23),
--  RGBY(0x24), RGBY(0x25), RGBY(0x26), RGBY(0x27),
--  RGBY(0x28), RGBY(0x29), RGBY(0x2A), RGBY(0x2B),
--  RGBY(0x2C), RGBY(0x2D), RGBY(0x2E), RGBY(0x2F),
--  RGBY(0x30), RGBY(0x31), RGBY(0x32), RGBY(0x33),
--  RGBY(0x34), RGBY(0x35), RGBY(0x36), RGBY(0x37),
--  RGBY(0x38), RGBY(0x39), RGBY(0x3A), RGBY(0x3B),
--  RGBY(0x3C), RGBY(0x3D), RGBY(0x3E), RGBY(0x3F),
--  RGBY(0x40), RGBY(0x41), RGBY(0x42), RGBY(0x43),
--  RGBY(0x44), RGBY(0x45), RGBY(0x46), RGBY(0x47),
--  RGBY(0x48), RGBY(0x49), RGBY(0x4A), RGBY(0x4B),
--  RGBY(0x4C), RGBY(0x4D), RGBY(0x4E), RGBY(0x4F),
--  RGBY(0x50), RGBY(0x51), RGBY(0x52), RGBY(0x53),
--  RGBY(0x54), RGBY(0x55), RGBY(0x56), RGBY(0x57),
--  RGBY(0x58), RGBY(0x59), RGBY(0x5A), RGBY(0x5B),
--  RGBY(0x5C), RGBY(0x5D), RGBY(0x5E), RGBY(0x5F),
--  RGBY(0x60), RGBY(0x61), RGBY(0x62), RGBY(0x63),
--  RGBY(0x64), RGBY(0x65), RGBY(0x66), RGBY(0x67),
--  RGBY(0x68), RGBY(0x69), RGBY(0x6A), RGBY(0x6B),
--  RGBY(0x6C), RGBY(0x6D), RGBY(0x6E), RGBY(0x6F),
--  RGBY(0x70), RGBY(0x71), RGBY(0x72), RGBY(0x73),
--  RGBY(0x74), RGBY(0x75), RGBY(0x76), RGBY(0x77),
--  RGBY(0x78), RGBY(0x79), RGBY(0x7A), RGBY(0x7B),
--  RGBY(0x7C), RGBY(0x7D), RGBY(0x7E), RGBY(0x7F),
--  RGBY(0x80), RGBY(0x81), RGBY(0x82), RGBY(0x83),
--  RGBY(0x84), RGBY(0x85), RGBY(0x86), RGBY(0x87),
--  RGBY(0x88), RGBY(0x89), RGBY(0x8A), RGBY(0x8B),
--  RGBY(0x8C), RGBY(0x8D), RGBY(0x8E), RGBY(0x8F),
--  RGBY(0x90), RGBY(0x91), RGBY(0x92), RGBY(0x93),
--  RGBY(0x94), RGBY(0x95), RGBY(0x96), RGBY(0x97),
--  RGBY(0x98), RGBY(0x99), RGBY(0x9A), RGBY(0x9B),
--  RGBY(0x9C), RGBY(0x9D), RGBY(0x9E), RGBY(0x9F),
--  RGBY(0xA0), RGBY(0xA1), RGBY(0xA2), RGBY(0xA3),
--  RGBY(0xA4), RGBY(0xA5), RGBY(0xA6), RGBY(0xA7),
--  RGBY(0xA8), RGBY(0xA9), RGBY(0xAA), RGBY(0xAB),
--  RGBY(0xAC), RGBY(0xAD), RGBY(0xAE), RGBY(0xAF),
--  RGBY(0xB0), RGBY(0xB1), RGBY(0xB2), RGBY(0xB3),
--  RGBY(0xB4), RGBY(0xB5), RGBY(0xB6), RGBY(0xB7),
--  RGBY(0xB8), RGBY(0xB9), RGBY(0xBA), RGBY(0xBB),
--  RGBY(0xBC), RGBY(0xBD), RGBY(0xBE), RGBY(0xBF),
--  RGBY(0xC0), RGBY(0xC1), RGBY(0xC2), RGBY(0xC3),
--  RGBY(0xC4), RGBY(0xC5), RGBY(0xC6), RGBY(0xC7),
--  RGBY(0xC8), RGBY(0xC9), RGBY(0xCA), RGBY(0xCB),
--  RGBY(0xCC), RGBY(0xCD), RGBY(0xCE), RGBY(0xCF),
--  RGBY(0xD0), RGBY(0xD1), RGBY(0xD2), RGBY(0xD3),
--  RGBY(0xD4), RGBY(0xD5), RGBY(0xD6), RGBY(0xD7),
--  RGBY(0xD8), RGBY(0xD9), RGBY(0xDA), RGBY(0xDB),
--  RGBY(0xDC), RGBY(0xDD), RGBY(0xDE), RGBY(0xDF),
--  RGBY(0xE0), RGBY(0xE1), RGBY(0xE2), RGBY(0xE3),
--  RGBY(0xE4), RGBY(0xE5), RGBY(0xE6), RGBY(0xE7),
--  RGBY(0xE8), RGBY(0xE9), RGBY(0xEA), RGBY(0xEB),
--  RGBY(0xEC), RGBY(0xED), RGBY(0xEE), RGBY(0xEF),
--  RGBY(0xF0), RGBY(0xF1), RGBY(0xF2), RGBY(0xF3),
--  RGBY(0xF4), RGBY(0xF5), RGBY(0xF6), RGBY(0xF7),
--  RGBY(0xF8), RGBY(0xF9), RGBY(0xFA), RGBY(0xFB),
--  RGBY(0xFC), RGBY(0xFD), RGBY(0xFE), RGBY(0xFF),
--
--  // Chroma U table.
--  RGBU(0x00), RGBU(0x01), RGBU(0x02), RGBU(0x03),
--  RGBU(0x04), RGBU(0x05), RGBU(0x06), RGBU(0x07),
--  RGBU(0x08), RGBU(0x09), RGBU(0x0A), RGBU(0x0B),
--  RGBU(0x0C), RGBU(0x0D), RGBU(0x0E), RGBU(0x0F),
--  RGBU(0x10), RGBU(0x11), RGBU(0x12), RGBU(0x13),
--  RGBU(0x14), RGBU(0x15), RGBU(0x16), RGBU(0x17),
--  RGBU(0x18), RGBU(0x19), RGBU(0x1A), RGBU(0x1B),
--  RGBU(0x1C), RGBU(0x1D), RGBU(0x1E), RGBU(0x1F),
--  RGBU(0x20), RGBU(0x21), RGBU(0x22), RGBU(0x23),
--  RGBU(0x24), RGBU(0x25), RGBU(0x26), RGBU(0x27),
--  RGBU(0x28), RGBU(0x29), RGBU(0x2A), RGBU(0x2B),
--  RGBU(0x2C), RGBU(0x2D), RGBU(0x2E), RGBU(0x2F),
--  RGBU(0x30), RGBU(0x31), RGBU(0x32), RGBU(0x33),
--  RGBU(0x34), RGBU(0x35), RGBU(0x36), RGBU(0x37),
--  RGBU(0x38), RGBU(0x39), RGBU(0x3A), RGBU(0x3B),
--  RGBU(0x3C), RGBU(0x3D), RGBU(0x3E), RGBU(0x3F),
--  RGBU(0x40), RGBU(0x41), RGBU(0x42), RGBU(0x43),
--  RGBU(0x44), RGBU(0x45), RGBU(0x46), RGBU(0x47),
--  RGBU(0x48), RGBU(0x49), RGBU(0x4A), RGBU(0x4B),
--  RGBU(0x4C), RGBU(0x4D), RGBU(0x4E), RGBU(0x4F),
--  RGBU(0x50), RGBU(0x51), RGBU(0x52), RGBU(0x53),
--  RGBU(0x54), RGBU(0x55), RGBU(0x56), RGBU(0x57),
--  RGBU(0x58), RGBU(0x59), RGBU(0x5A), RGBU(0x5B),
--  RGBU(0x5C), RGBU(0x5D), RGBU(0x5E), RGBU(0x5F),
--  RGBU(0x60), RGBU(0x61), RGBU(0x62), RGBU(0x63),
--  RGBU(0x64), RGBU(0x65), RGBU(0x66), RGBU(0x67),
--  RGBU(0x68), RGBU(0x69), RGBU(0x6A), RGBU(0x6B),
--  RGBU(0x6C), RGBU(0x6D), RGBU(0x6E), RGBU(0x6F),
--  RGBU(0x70), RGBU(0x71), RGBU(0x72), RGBU(0x73),
--  RGBU(0x74), RGBU(0x75), RGBU(0x76), RGBU(0x77),
--  RGBU(0x78), RGBU(0x79), RGBU(0x7A), RGBU(0x7B),
--  RGBU(0x7C), RGBU(0x7D), RGBU(0x7E), RGBU(0x7F),
--  RGBU(0x80), RGBU(0x81), RGBU(0x82), RGBU(0x83),
--  RGBU(0x84), RGBU(0x85), RGBU(0x86), RGBU(0x87),
--  RGBU(0x88), RGBU(0x89), RGBU(0x8A), RGBU(0x8B),
--  RGBU(0x8C), RGBU(0x8D), RGBU(0x8E), RGBU(0x8F),
--  RGBU(0x90), RGBU(0x91), RGBU(0x92), RGBU(0x93),
--  RGBU(0x94), RGBU(0x95), RGBU(0x96), RGBU(0x97),
--  RGBU(0x98), RGBU(0x99), RGBU(0x9A), RGBU(0x9B),
--  RGBU(0x9C), RGBU(0x9D), RGBU(0x9E), RGBU(0x9F),
--  RGBU(0xA0), RGBU(0xA1), RGBU(0xA2), RGBU(0xA3),
--  RGBU(0xA4), RGBU(0xA5), RGBU(0xA6), RGBU(0xA7),
--  RGBU(0xA8), RGBU(0xA9), RGBU(0xAA), RGBU(0xAB),
--  RGBU(0xAC), RGBU(0xAD), RGBU(0xAE), RGBU(0xAF),
--  RGBU(0xB0), RGBU(0xB1), RGBU(0xB2), RGBU(0xB3),
--  RGBU(0xB4), RGBU(0xB5), RGBU(0xB6), RGBU(0xB7),
--  RGBU(0xB8), RGBU(0xB9), RGBU(0xBA), RGBU(0xBB),
--  RGBU(0xBC), RGBU(0xBD), RGBU(0xBE), RGBU(0xBF),
--  RGBU(0xC0), RGBU(0xC1), RGBU(0xC2), RGBU(0xC3),
--  RGBU(0xC4), RGBU(0xC5), RGBU(0xC6), RGBU(0xC7),
--  RGBU(0xC8), RGBU(0xC9), RGBU(0xCA), RGBU(0xCB),
--  RGBU(0xCC), RGBU(0xCD), RGBU(0xCE), RGBU(0xCF),
--  RGBU(0xD0), RGBU(0xD1), RGBU(0xD2), RGBU(0xD3),
--  RGBU(0xD4), RGBU(0xD5), RGBU(0xD6), RGBU(0xD7),
--  RGBU(0xD8), RGBU(0xD9), RGBU(0xDA), RGBU(0xDB),
--  RGBU(0xDC), RGBU(0xDD), RGBU(0xDE), RGBU(0xDF),
--  RGBU(0xE0), RGBU(0xE1), RGBU(0xE2), RGBU(0xE3),
--  RGBU(0xE4), RGBU(0xE5), RGBU(0xE6), RGBU(0xE7),
--  RGBU(0xE8), RGBU(0xE9), RGBU(0xEA), RGBU(0xEB),
--  RGBU(0xEC), RGBU(0xED), RGBU(0xEE), RGBU(0xEF),
--  RGBU(0xF0), RGBU(0xF1), RGBU(0xF2), RGBU(0xF3),
--  RGBU(0xF4), RGBU(0xF5), RGBU(0xF6), RGBU(0xF7),
--  RGBU(0xF8), RGBU(0xF9), RGBU(0xFA), RGBU(0xFB),
--  RGBU(0xFC), RGBU(0xFD), RGBU(0xFE), RGBU(0xFF),
--
--  // Chroma V table.
--  RGBV(0x00), RGBV(0x01), RGBV(0x02), RGBV(0x03),
--  RGBV(0x04), RGBV(0x05), RGBV(0x06), RGBV(0x07),
--  RGBV(0x08), RGBV(0x09), RGBV(0x0A), RGBV(0x0B),
--  RGBV(0x0C), RGBV(0x0D), RGBV(0x0E), RGBV(0x0F),
--  RGBV(0x10), RGBV(0x11), RGBV(0x12), RGBV(0x13),
--  RGBV(0x14), RGBV(0x15), RGBV(0x16), RGBV(0x17),
--  RGBV(0x18), RGBV(0x19), RGBV(0x1A), RGBV(0x1B),
--  RGBV(0x1C), RGBV(0x1D), RGBV(0x1E), RGBV(0x1F),
--  RGBV(0x20), RGBV(0x21), RGBV(0x22), RGBV(0x23),
--  RGBV(0x24), RGBV(0x25), RGBV(0x26), RGBV(0x27),
--  RGBV(0x28), RGBV(0x29), RGBV(0x2A), RGBV(0x2B),
--  RGBV(0x2C), RGBV(0x2D), RGBV(0x2E), RGBV(0x2F),
--  RGBV(0x30), RGBV(0x31), RGBV(0x32), RGBV(0x33),
--  RGBV(0x34), RGBV(0x35), RGBV(0x36), RGBV(0x37),
--  RGBV(0x38), RGBV(0x39), RGBV(0x3A), RGBV(0x3B),
--  RGBV(0x3C), RGBV(0x3D), RGBV(0x3E), RGBV(0x3F),
--  RGBV(0x40), RGBV(0x41), RGBV(0x42), RGBV(0x43),
--  RGBV(0x44), RGBV(0x45), RGBV(0x46), RGBV(0x47),
--  RGBV(0x48), RGBV(0x49), RGBV(0x4A), RGBV(0x4B),
--  RGBV(0x4C), RGBV(0x4D), RGBV(0x4E), RGBV(0x4F),
--  RGBV(0x50), RGBV(0x51), RGBV(0x52), RGBV(0x53),
--  RGBV(0x54), RGBV(0x55), RGBV(0x56), RGBV(0x57),
--  RGBV(0x58), RGBV(0x59), RGBV(0x5A), RGBV(0x5B),
--  RGBV(0x5C), RGBV(0x5D), RGBV(0x5E), RGBV(0x5F),
--  RGBV(0x60), RGBV(0x61), RGBV(0x62), RGBV(0x63),
--  RGBV(0x64), RGBV(0x65), RGBV(0x66), RGBV(0x67),
--  RGBV(0x68), RGBV(0x69), RGBV(0x6A), RGBV(0x6B),
--  RGBV(0x6C), RGBV(0x6D), RGBV(0x6E), RGBV(0x6F),
--  RGBV(0x70), RGBV(0x71), RGBV(0x72), RGBV(0x73),
--  RGBV(0x74), RGBV(0x75), RGBV(0x76), RGBV(0x77),
--  RGBV(0x78), RGBV(0x79), RGBV(0x7A), RGBV(0x7B),
--  RGBV(0x7C), RGBV(0x7D), RGBV(0x7E), RGBV(0x7F),
--  RGBV(0x80), RGBV(0x81), RGBV(0x82), RGBV(0x83),
--  RGBV(0x84), RGBV(0x85), RGBV(0x86), RGBV(0x87),
--  RGBV(0x88), RGBV(0x89), RGBV(0x8A), RGBV(0x8B),
--  RGBV(0x8C), RGBV(0x8D), RGBV(0x8E), RGBV(0x8F),
--  RGBV(0x90), RGBV(0x91), RGBV(0x92), RGBV(0x93),
--  RGBV(0x94), RGBV(0x95), RGBV(0x96), RGBV(0x97),
--  RGBV(0x98), RGBV(0x99), RGBV(0x9A), RGBV(0x9B),
--  RGBV(0x9C), RGBV(0x9D), RGBV(0x9E), RGBV(0x9F),
--  RGBV(0xA0), RGBV(0xA1), RGBV(0xA2), RGBV(0xA3),
--  RGBV(0xA4), RGBV(0xA5), RGBV(0xA6), RGBV(0xA7),
--  RGBV(0xA8), RGBV(0xA9), RGBV(0xAA), RGBV(0xAB),
--  RGBV(0xAC), RGBV(0xAD), RGBV(0xAE), RGBV(0xAF),
--  RGBV(0xB0), RGBV(0xB1), RGBV(0xB2), RGBV(0xB3),
--  RGBV(0xB4), RGBV(0xB5), RGBV(0xB6), RGBV(0xB7),
--  RGBV(0xB8), RGBV(0xB9), RGBV(0xBA), RGBV(0xBB),
--  RGBV(0xBC), RGBV(0xBD), RGBV(0xBE), RGBV(0xBF),
--  RGBV(0xC0), RGBV(0xC1), RGBV(0xC2), RGBV(0xC3),
--  RGBV(0xC4), RGBV(0xC5), RGBV(0xC6), RGBV(0xC7),
--  RGBV(0xC8), RGBV(0xC9), RGBV(0xCA), RGBV(0xCB),
--  RGBV(0xCC), RGBV(0xCD), RGBV(0xCE), RGBV(0xCF),
--  RGBV(0xD0), RGBV(0xD1), RGBV(0xD2), RGBV(0xD3),
--  RGBV(0xD4), RGBV(0xD5), RGBV(0xD6), RGBV(0xD7),
--  RGBV(0xD8), RGBV(0xD9), RGBV(0xDA), RGBV(0xDB),
--  RGBV(0xDC), RGBV(0xDD), RGBV(0xDE), RGBV(0xDF),
--  RGBV(0xE0), RGBV(0xE1), RGBV(0xE2), RGBV(0xE3),
--  RGBV(0xE4), RGBV(0xE5), RGBV(0xE6), RGBV(0xE7),
--  RGBV(0xE8), RGBV(0xE9), RGBV(0xEA), RGBV(0xEB),
--  RGBV(0xEC), RGBV(0xED), RGBV(0xEE), RGBV(0xEF),
--  RGBV(0xF0), RGBV(0xF1), RGBV(0xF2), RGBV(0xF3),
--  RGBV(0xF4), RGBV(0xF5), RGBV(0xF6), RGBV(0xF7),
--  RGBV(0xF8), RGBV(0xF9), RGBV(0xFA), RGBV(0xFB),
--  RGBV(0xFC), RGBV(0xFD), RGBV(0xFE), RGBV(0xFF),
--};
--
--#undef RGBY
--#undef RGBU
--#undef RGBV
--#undef MMX_ALIGNED
--
--#if defined(ARCH_CPU_X86_64)
+-#if USE_SSE2 && defined(ARCH_CPU_X86_64)
 -
 -// AMD64 ABI uses register paremters.
 -void FastConvertYUVToRGB32Row(const uint8* y_buf,  // rdi
 -                              const uint8* u_buf,  // rsi
 -                              const uint8* v_buf,  // rdx
 -                              uint8* rgb_buf,      // rcx
 -                              int width) {         // r8
 -  asm(
@@ -669,36 +640,36 @@ index 4a20777..a81416c 100644
 -);
 -}
 -
 -void ScaleYUVToRGB32Row(const uint8* y_buf,  // rdi
 -                        const uint8* u_buf,  // rsi
 -                        const uint8* v_buf,  // rdx
 -                        uint8* rgb_buf,      // rcx
 -                        int width,           // r8
--                        int scaled_dx) {     // r9
+-                        int source_dx) {     // r9
 -  asm(
 -  "xor    %%r11,%%r11\n"
 -  "sub    $0x2,%4\n"
 -  "js     scalenext\n"
 -
 -"scaleloop:"
 -  "mov    %%r11,%%r10\n"
--  "sar    $0x5,%%r10\n"
+-  "sar    $0x11,%%r10\n"
 -  "movzb  (%1,%%r10,1),%%rax\n"
 -  "movq   2048(%5,%%rax,8),%%xmm0\n"
 -  "movzb  (%2,%%r10,1),%%rax\n"
 -  "movq   4096(%5,%%rax,8),%%xmm1\n"
 -  "lea    (%%r11,%6),%%r10\n"
--  "sar    $0x4,%%r11\n"
+-  "sar    $0x10,%%r11\n"
 -  "movzb  (%0,%%r11,1),%%rax\n"
 -  "paddsw %%xmm1,%%xmm0\n"
 -  "movq   (%5,%%rax,8),%%xmm1\n"
 -  "lea    (%%r10,%6),%%r11\n"
--  "sar    $0x4,%%r10\n"
+-  "sar    $0x10,%%r10\n"
 -  "movzb  (%0,%%r10,1),%%rax\n"
 -  "movq   (%5,%%rax,8),%%xmm2\n"
 -  "paddsw %%xmm0,%%xmm1\n"
 -  "paddsw %%xmm0,%%xmm2\n"
 -  "shufps $0x44,%%xmm2,%%xmm1\n"
 -  "psraw  $0x6,%%xmm1\n"
 -  "packuswb %%xmm1,%%xmm1\n"
 -  "movq   %%xmm1,0x0(%3)\n"
@@ -706,52 +677,172 @@ index 4a20777..a81416c 100644
 -  "sub    $0x2,%4\n"
 -  "jns    scaleloop\n"
 -
 -"scalenext:"
 -  "add    $0x1,%4\n"
 -  "js     scaledone\n"
 -
 -  "mov    %%r11,%%r10\n"
--  "sar    $0x5,%%r10\n"
+-  "sar    $0x11,%%r10\n"
 -  "movzb  (%1,%%r10,1),%%rax\n"
 -  "movq   2048(%5,%%rax,8),%%xmm0\n"
 -  "movzb  (%2,%%r10,1),%%rax\n"
 -  "movq   4096(%5,%%rax,8),%%xmm1\n"
 -  "paddsw %%xmm1,%%xmm0\n"
--  "sar    $0x4,%%r11\n"
+-  "sar    $0x10,%%r11\n"
 -  "movzb  (%0,%%r11,1),%%rax\n"
 -  "movq   (%5,%%rax,8),%%xmm1\n"
 -  "paddsw %%xmm0,%%xmm1\n"
 -  "psraw  $0x6,%%xmm1\n"
 -  "packuswb %%xmm1,%%xmm1\n"
 -  "movd   %%xmm1,0x0(%3)\n"
 -
 -"scaledone:"
 -  :
 -  : "r"(y_buf),  // %0
 -    "r"(u_buf),  // %1
 -    "r"(v_buf),  // %2
 -    "r"(rgb_buf),  // %3
 -    "r"(width),  // %4
 -    "r" (kCoefficientsRgbY),  // %5
--    "r"(static_cast<long>(scaled_dx))  // %6
+-    "r"(static_cast<long>(source_dx))  // %6
 -  : "memory", "r10", "r11", "rax", "xmm0", "xmm1", "xmm2"
 -);
 -}
 -
--#else
+-void LinearScaleYUVToRGB32Row(const uint8* y_buf,
+-                              const uint8* u_buf,
+-                              const uint8* v_buf,
+-                              uint8* rgb_buf,
+-                              int width,
+-                              int source_dx) {
+-  asm(
+-  "xor    %%r11,%%r11\n"   // x = 0
+-  "sub    $0x2,%4\n"
+-  "js     .lscalenext\n"
+-  "cmp    $0x20000,%6\n"   // if source_dx >= 2.0
+-  "jl     .lscalehalf\n"
+-  "mov    $0x8000,%%r11\n" // x = 0.5 for 1/2 or less
+-".lscalehalf:"
+-
+-".lscaleloop:"
+-  "mov    %%r11,%%r10\n"
+-  "sar    $0x11,%%r10\n"
+-
+-  "movzb  (%1, %%r10, 1), %%r13 \n"
+-  "movzb  1(%1, %%r10, 1), %%r14 \n"
+-  "mov    %%r11, %%rax \n"
+-  "and    $0x1fffe, %%rax \n"
+-  "imul   %%rax, %%r14 \n"
+-  "xor    $0x1fffe, %%rax \n"
+-  "imul   %%rax, %%r13 \n"
+-  "add    %%r14, %%r13 \n"
+-  "shr    $17, %%r13 \n"
+-  "movq   2048(%5,%%r13,8), %%xmm0\n"
+-
+-  "movzb  (%2, %%r10, 1), %%r13 \n"
+-  "movzb  1(%2, %%r10, 1), %%r14 \n"
+-  "mov    %%r11, %%rax \n"
+-  "and    $0x1fffe, %%rax \n"
+-  "imul   %%rax, %%r14 \n"
+-  "xor    $0x1fffe, %%rax \n"
+-  "imul   %%rax, %%r13 \n"
+-  "add    %%r14, %%r13 \n"
+-  "shr    $17, %%r13 \n"
+-  "movq   4096(%5,%%r13,8), %%xmm1\n"
+-
+-  "mov    %%r11, %%rax \n"
+-  "lea    (%%r11,%6),%%r10\n"
+-  "sar    $0x10,%%r11\n"
+-  "paddsw %%xmm1,%%xmm0\n"
+-
+-  "movzb  (%0, %%r11, 1), %%r13 \n"
+-  "movzb  1(%0, %%r11, 1), %%r14 \n"
+-  "and    $0xffff, %%rax \n"
+-  "imul   %%rax, %%r14 \n"
+-  "xor    $0xffff, %%rax \n"
+-  "imul   %%rax, %%r13 \n"
+-  "add    %%r14, %%r13 \n"
+-  "shr    $16, %%r13 \n"
+-  "movq   (%5,%%r13,8),%%xmm1\n"
+-
+-  "mov    %%r10, %%rax \n"
+-  "lea    (%%r10,%6),%%r11\n"
+-  "sar    $0x10,%%r10\n"
+-
+-  "movzb  (%0,%%r10,1), %%r13 \n"
+-  "movzb  1(%0,%%r10,1), %%r14 \n"
+-  "and    $0xffff, %%rax \n"
+-  "imul   %%rax, %%r14 \n"
+-  "xor    $0xffff, %%rax \n"
+-  "imul   %%rax, %%r13 \n"
+-  "add    %%r14, %%r13 \n"
+-  "shr    $16, %%r13 \n"
+-  "movq   (%5,%%r13,8),%%xmm2\n"
+-
+-  "paddsw %%xmm0,%%xmm1\n"
+-  "paddsw %%xmm0,%%xmm2\n"
+-  "shufps $0x44,%%xmm2,%%xmm1\n"
+-  "psraw  $0x6,%%xmm1\n"
+-  "packuswb %%xmm1,%%xmm1\n"
+-  "movq   %%xmm1,0x0(%3)\n"
+-  "add    $0x8,%3\n"
+-  "sub    $0x2,%4\n"
+-  "jns    .lscaleloop\n"
+-
+-".lscalenext:"
+-  "add    $0x1,%4\n"
+-  "js     .lscaledone\n"
+-
+-  "mov    %%r11,%%r10\n"
+-  "sar    $0x11,%%r10\n"
+-
+-  "movzb  (%1,%%r10,1), %%r13 \n"
+-  "movq   2048(%5,%%r13,8),%%xmm0\n"
+-
+-  "movzb  (%2,%%r10,1), %%r13 \n"
+-  "movq   4096(%5,%%r13,8),%%xmm1\n"
+-
+-  "paddsw %%xmm1,%%xmm0\n"
+-  "sar    $0x10,%%r11\n"
+-
+-  "movzb  (%0,%%r11,1), %%r13 \n"
+-  "movq   (%5,%%r13,8),%%xmm1\n"
+-
+-  "paddsw %%xmm0,%%xmm1\n"
+-  "psraw  $0x6,%%xmm1\n"
+-  "packuswb %%xmm1,%%xmm1\n"
+-  "movd   %%xmm1,0x0(%3)\n"
+-
+-".lscaledone:"
+-  :
+-  : "r"(y_buf),  // %0
+-    "r"(u_buf),  // %1
+-    "r"(v_buf),  // %2
+-    "r"(rgb_buf),  // %3
+-    "r"(width),  // %4
+-    "r" (kCoefficientsRgbY),  // %5
+-    "r"(static_cast<long>(source_dx))  // %6
+-  : "memory", "r10", "r11", "r13", "r14", "rax", "xmm0", "xmm1", "xmm2"
+-);
+-}
+-
+-#elif USE_MMX && !defined(ARCH_CPU_X86_64) && !defined(__PIC__)
+-
+-// PIC version is slower because less registers are available, so
+-// non-PIC is used on platforms where it is possible.
 -
 -void FastConvertYUVToRGB32Row(const uint8* y_buf,
 -                              const uint8* u_buf,
 -                              const uint8* v_buf,
 -                              uint8* rgb_buf,
 -                              int width);
--
 -  asm(
+-  ".text\n"
 -  ".global FastConvertYUVToRGB32Row\n"
 -"FastConvertYUVToRGB32Row:\n"
 -  "pusha\n"
 -  "mov    0x24(%esp),%edx\n"
 -  "mov    0x28(%esp),%edi\n"
 -  "mov    0x2c(%esp),%esi\n"
 -  "mov    0x30(%esp),%ebp\n"
 -  "mov    0x34(%esp),%ecx\n"
@@ -799,47 +890,47 @@ index 4a20777..a81416c 100644
 -);
 -
 -
 -void ScaleYUVToRGB32Row(const uint8* y_buf,
 -                        const uint8* u_buf,
 -                        const uint8* v_buf,
 -                        uint8* rgb_buf,
 -                        int width,
--                        int scaled_dx);
--
+-                        int source_dx);
 -  asm(
+-  ".text\n"
 -  ".global ScaleYUVToRGB32Row\n"
 -"ScaleYUVToRGB32Row:\n"
 -  "pusha\n"
 -  "mov    0x24(%esp),%edx\n"
 -  "mov    0x28(%esp),%edi\n"
 -  "mov    0x2c(%esp),%esi\n"
 -  "mov    0x30(%esp),%ebp\n"
 -  "mov    0x34(%esp),%ecx\n"
 -  "xor    %ebx,%ebx\n"
 -  "jmp    scaleend\n"
 -
 -"scaleloop:"
 -  "mov    %ebx,%eax\n"
--  "sar    $0x5,%eax\n"
+-  "sar    $0x11,%eax\n"
 -  "movzbl (%edi,%eax,1),%eax\n"
 -  "movq   kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
 -  "mov    %ebx,%eax\n"
--  "sar    $0x5,%eax\n"
+-  "sar    $0x11,%eax\n"
 -  "movzbl (%esi,%eax,1),%eax\n"
 -  "paddsw kCoefficientsRgbY+4096(,%eax,8),%mm0\n"
 -  "mov    %ebx,%eax\n"
 -  "add    0x38(%esp),%ebx\n"
--  "sar    $0x4,%eax\n"
+-  "sar    $0x10,%eax\n"
 -  "movzbl (%edx,%eax,1),%eax\n"
 -  "movq   kCoefficientsRgbY(,%eax,8),%mm1\n"
 -  "mov    %ebx,%eax\n"
 -  "add    0x38(%esp),%ebx\n"
--  "sar    $0x4,%eax\n"
+-  "sar    $0x10,%eax\n"
 -  "movzbl (%edx,%eax,1),%eax\n"
 -  "movq   kCoefficientsRgbY(,%eax,8),%mm2\n"
 -  "paddsw %mm0,%mm1\n"
 -  "paddsw %mm0,%mm2\n"
 -  "psraw  $0x6,%mm1\n"
 -  "psraw  $0x6,%mm2\n"
 -  "packuswb %mm2,%mm1\n"
 -  "movntq %mm1,0x0(%ebp)\n"
@@ -847,393 +938,1641 @@ index 4a20777..a81416c 100644
 -"scaleend:"
 -  "sub    $0x2,%ecx\n"
 -  "jns    scaleloop\n"
 -
 -  "and    $0x1,%ecx\n"
 -  "je     scaledone\n"
 -
 -  "mov    %ebx,%eax\n"
--  "sar    $0x5,%eax\n"
+-  "sar    $0x11,%eax\n"
 -  "movzbl (%edi,%eax,1),%eax\n"
 -  "movq   kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
 -  "mov    %ebx,%eax\n"
--  "sar    $0x5,%eax\n"
+-  "sar    $0x11,%eax\n"
 -  "movzbl (%esi,%eax,1),%eax\n"
 -  "paddsw kCoefficientsRgbY+4096(,%eax,8),%mm0\n"
 -  "mov    %ebx,%eax\n"
--  "sar    $0x4,%eax\n"
+-  "sar    $0x10,%eax\n"
 -  "movzbl (%edx,%eax,1),%eax\n"
 -  "movq   kCoefficientsRgbY(,%eax,8),%mm1\n"
 -  "paddsw %mm0,%mm1\n"
 -  "psraw  $0x6,%mm1\n"
 -  "packuswb %mm1,%mm1\n"
 -  "movd   %mm1,0x0(%ebp)\n"
 -
 -"scaledone:"
 -  "popa\n"
 -  "ret\n"
 -);
 -
+-void LinearScaleYUVToRGB32Row(const uint8* y_buf,
+-                              const uint8* u_buf,
+-                              const uint8* v_buf,
+-                              uint8* rgb_buf,
+-                              int width,
+-                              int source_dx);
+-  asm(
+-  ".text\n"
+-  ".global LinearScaleYUVToRGB32Row\n"
+-"LinearScaleYUVToRGB32Row:\n"
+-  "pusha\n"
+-  "mov    0x24(%esp),%edx\n"
+-  "mov    0x28(%esp),%edi\n"
+-  "mov    0x30(%esp),%ebp\n"
+-
+-  // source_width = width * source_dx + ebx
+-  "mov    0x34(%esp), %ecx\n"
+-  "imull  0x38(%esp), %ecx\n"
+-  "mov    %ecx, 0x34(%esp)\n"
+-
+-  "mov    0x38(%esp), %ecx\n"
+-  "xor    %ebx,%ebx\n"     // x = 0
+-  "cmp    $0x20000,%ecx\n" // if source_dx >= 2.0
+-  "jl     .lscaleend\n"
+-  "mov    $0x8000,%ebx\n"  // x = 0.5 for 1/2 or less
+-  "jmp    .lscaleend\n"
+-
+-".lscaleloop:"
+-  "mov    %ebx,%eax\n"
+-  "sar    $0x11,%eax\n"
+-
+-  "movzbl (%edi,%eax,1),%ecx\n"
+-  "movzbl 1(%edi,%eax,1),%esi\n"
+-  "mov    %ebx,%eax\n"
+-  "andl   $0x1fffe, %eax \n"
+-  "imul   %eax, %esi \n"
+-  "xorl   $0x1fffe, %eax \n"
+-  "imul   %eax, %ecx \n"
+-  "addl   %esi, %ecx \n"
+-  "shrl   $17, %ecx \n"
+-  "movq   kCoefficientsRgbY+2048(,%ecx,8),%mm0\n"
+-
+-  "mov    0x2c(%esp),%esi\n"
+-  "mov    %ebx,%eax\n"
+-  "sar    $0x11,%eax\n"
+-
+-  "movzbl (%esi,%eax,1),%ecx\n"
+-  "movzbl 1(%esi,%eax,1),%esi\n"
+-  "mov    %ebx,%eax\n"
+-  "andl   $0x1fffe, %eax \n"
+-  "imul   %eax, %esi \n"
+-  "xorl   $0x1fffe, %eax \n"
+-  "imul   %eax, %ecx \n"
+-  "addl   %esi, %ecx \n"
+-  "shrl   $17, %ecx \n"
+-  "paddsw kCoefficientsRgbY+4096(,%ecx,8),%mm0\n"
+-
+-  "mov    %ebx,%eax\n"
+-  "sar    $0x10,%eax\n"
+-  "movzbl (%edx,%eax,1),%ecx\n"
+-  "movzbl 1(%edx,%eax,1),%esi\n"
+-  "mov    %ebx,%eax\n"
+-  "add    0x38(%esp),%ebx\n"
+-  "andl   $0xffff, %eax \n"
+-  "imul   %eax, %esi \n"
+-  "xorl   $0xffff, %eax \n"
+-  "imul   %eax, %ecx \n"
+-  "addl   %esi, %ecx \n"
+-  "shrl   $16, %ecx \n"
+-  "movq   kCoefficientsRgbY(,%ecx,8),%mm1\n"
+-
+-  "cmp    0x34(%esp), %ebx\n"
+-  "jge    .lscalelastpixel\n"
+-
+-  "mov    %ebx,%eax\n"
+-  "sar    $0x10,%eax\n"
+-  "movzbl (%edx,%eax,1),%ecx\n"
+-  "movzbl 1(%edx,%eax,1),%esi\n"
+-  "mov    %ebx,%eax\n"
+-  "add    0x38(%esp),%ebx\n"
+-  "andl   $0xffff, %eax \n"
+-  "imul   %eax, %esi \n"
+-  "xorl   $0xffff, %eax \n"
+-  "imul   %eax, %ecx \n"
+-  "addl   %esi, %ecx \n"
+-  "shrl   $16, %ecx \n"
+-  "movq   kCoefficientsRgbY(,%ecx,8),%mm2\n"
+-
+-  "paddsw %mm0,%mm1\n"
+-  "paddsw %mm0,%mm2\n"
+-  "psraw  $0x6,%mm1\n"
+-  "psraw  $0x6,%mm2\n"
+-  "packuswb %mm2,%mm1\n"
+-  "movntq %mm1,0x0(%ebp)\n"
+-  "add    $0x8,%ebp\n"
+-
+-".lscaleend:"
+-  "cmp    0x34(%esp), %ebx\n"
+-  "jl     .lscaleloop\n"
+-  "popa\n"
+-  "ret\n"
+-
+-".lscalelastpixel:"
+-  "paddsw %mm0, %mm1\n"
+-  "psraw $6, %mm1\n"
+-  "packuswb %mm1, %mm1\n"
+-  "movd %mm1, (%ebp)\n"
+-  "popa\n"
+-  "ret\n"
+-);
+-
+-#elif USE_MMX && !defined(ARCH_CPU_X86_64) && defined(__PIC__)
+-
+-extern void PICConvertYUVToRGB32Row(const uint8* y_buf,
+-                                    const uint8* u_buf,
+-                                    const uint8* v_buf,
+-                                    uint8* rgb_buf,
+-                                    int width,
+-                                    int16 *kCoefficientsRgbY);
+-  asm(
+-  ".text\n"
+-#if defined(OS_MACOSX)
+-"_PICConvertYUVToRGB32Row:\n"
+-#else
+-"PICConvertYUVToRGB32Row:\n"
 -#endif
+-  "pusha\n"
+-  "mov    0x24(%esp),%edx\n"
+-  "mov    0x28(%esp),%edi\n"
+-  "mov    0x2c(%esp),%esi\n"
+-  "mov    0x30(%esp),%ebp\n"
+-  "mov    0x38(%esp),%ecx\n"
+-
+-  "jmp    .Lconvertend\n"
+-
+-".Lconvertloop:"
+-  "movzbl (%edi),%eax\n"
+-  "add    $0x1,%edi\n"
+-  "movzbl (%esi),%ebx\n"
+-  "add    $0x1,%esi\n"
+-  "movq   2048(%ecx,%eax,8),%mm0\n"
+-  "movzbl (%edx),%eax\n"
+-  "paddsw 4096(%ecx,%ebx,8),%mm0\n"
+-  "movzbl 0x1(%edx),%ebx\n"
+-  "movq   0(%ecx,%eax,8),%mm1\n"
+-  "add    $0x2,%edx\n"
+-  "movq   0(%ecx,%ebx,8),%mm2\n"
+-  "paddsw %mm0,%mm1\n"
+-  "paddsw %mm0,%mm2\n"
+-  "psraw  $0x6,%mm1\n"
+-  "psraw  $0x6,%mm2\n"
+-  "packuswb %mm2,%mm1\n"
+-  "movntq %mm1,0x0(%ebp)\n"
+-  "add    $0x8,%ebp\n"
+-".Lconvertend:"
+-  "subl   $0x2,0x34(%esp)\n"
+-  "jns    .Lconvertloop\n"
+-
+-  "andl   $0x1,0x34(%esp)\n"
+-  "je     .Lconvertdone\n"
+-
+-  "movzbl (%edi),%eax\n"
+-  "movq   2048(%ecx,%eax,8),%mm0\n"
+-  "movzbl (%esi),%eax\n"
+-  "paddsw 4096(%ecx,%eax,8),%mm0\n"
+-  "movzbl (%edx),%eax\n"
+-  "movq   0(%ecx,%eax,8),%mm1\n"
+-  "paddsw %mm0,%mm1\n"
+-  "psraw  $0x6,%mm1\n"
+-  "packuswb %mm1,%mm1\n"
+-  "movd   %mm1,0x0(%ebp)\n"
+-".Lconvertdone:\n"
+-  "popa\n"
+-  "ret\n"
+-);
+-
+-void FastConvertYUVToRGB32Row(const uint8* y_buf,
+-                              const uint8* u_buf,
+-                              const uint8* v_buf,
+-                              uint8* rgb_buf,
+-                              int width) {
+-  PICConvertYUVToRGB32Row(y_buf, u_buf, v_buf, rgb_buf, width,
+-                          &kCoefficientsRgbY[0][0]);
+-}
+-
+-extern void PICScaleYUVToRGB32Row(const uint8* y_buf,
+-                               const uint8* u_buf,
+-                               const uint8* v_buf,
+-                               uint8* rgb_buf,
+-                               int width,
+-                               int source_dx,
+-                               int16 *kCoefficientsRgbY);
+-
+-  asm(
+-  ".text\n"
+-#if defined(OS_MACOSX)
+-"_PICScaleYUVToRGB32Row:\n"
+-#else
+-"PICScaleYUVToRGB32Row:\n"
+-#endif
+-  "pusha\n"
+-  "mov    0x24(%esp),%edx\n"
+-  "mov    0x28(%esp),%edi\n"
+-  "mov    0x2c(%esp),%esi\n"
+-  "mov    0x30(%esp),%ebp\n"
+-  "mov    0x3c(%esp),%ecx\n"
+-  "xor    %ebx,%ebx\n"
+-  "jmp    Lscaleend\n"
+-
+-"Lscaleloop:"
+-  "mov    %ebx,%eax\n"
+-  "sar    $0x11,%eax\n"
+-  "movzbl (%edi,%eax,1),%eax\n"
+-  "movq   2048(%ecx,%eax,8),%mm0\n"
+-  "mov    %ebx,%eax\n"
+-  "sar    $0x11,%eax\n"
+-  "movzbl (%esi,%eax,1),%eax\n"
+-  "paddsw 4096(%ecx,%eax,8),%mm0\n"
+-  "mov    %ebx,%eax\n"
+-  "add    0x38(%esp),%ebx\n"
+-  "sar    $0x10,%eax\n"
+-  "movzbl (%edx,%eax,1),%eax\n"
+-  "movq   0(%ecx,%eax,8),%mm1\n"
+-  "mov    %ebx,%eax\n"
+-  "add    0x38(%esp),%ebx\n"
+-  "sar    $0x10,%eax\n"
+-  "movzbl (%edx,%eax,1),%eax\n"
+-  "movq   0(%ecx,%eax,8),%mm2\n"
+-  "paddsw %mm0,%mm1\n"
+-  "paddsw %mm0,%mm2\n"
+-  "psraw  $0x6,%mm1\n"
+-  "psraw  $0x6,%mm2\n"
+-  "packuswb %mm2,%mm1\n"
+-  "movntq %mm1,0x0(%ebp)\n"
+-  "add    $0x8,%ebp\n"
+-"Lscaleend:"
+-  "subl   $0x2,0x34(%esp)\n"
+-  "jns    Lscaleloop\n"
+-
+-  "andl   $0x1,0x34(%esp)\n"
+-  "je     Lscaledone\n"
+-
+-  "mov    %ebx,%eax\n"
+-  "sar    $0x11,%eax\n"
+-  "movzbl (%edi,%eax,1),%eax\n"
+-  "movq   2048(%ecx,%eax,8),%mm0\n"
+-  "mov    %ebx,%eax\n"
+-  "sar    $0x11,%eax\n"
+-  "movzbl (%esi,%eax,1),%eax\n"
+-  "paddsw 4096(%ecx,%eax,8),%mm0\n"
+-  "mov    %ebx,%eax\n"
+-  "sar    $0x10,%eax\n"
+-  "movzbl (%edx,%eax,1),%eax\n"
+-  "movq   0(%ecx,%eax,8),%mm1\n"
+-  "paddsw %mm0,%mm1\n"
+-  "psraw  $0x6,%mm1\n"
+-  "packuswb %mm1,%mm1\n"
+-  "movd   %mm1,0x0(%ebp)\n"
+-
+-"Lscaledone:"
+-  "popa\n"
+-  "ret\n"
+-);
+-
+-
+-void ScaleYUVToRGB32Row(const uint8* y_buf,
+-                        const uint8* u_buf,
+-                        const uint8* v_buf,
+-                        uint8* rgb_buf,
+-                        int width,
+-                        int source_dx) {
+-  PICScaleYUVToRGB32Row(y_buf, u_buf, v_buf, rgb_buf, width, source_dx,
+-                        &kCoefficientsRgbY[0][0]);
+-}
+-
+-void PICLinearScaleYUVToRGB32Row(const uint8* y_buf,
+-                                 const uint8* u_buf,
+-                                 const uint8* v_buf,
+-                                 uint8* rgb_buf,
+-                                 int width,
+-                                 int source_dx,
+-                                 int16 *kCoefficientsRgbY);
+-  asm(
+-  ".text\n"
+-#if defined(OS_MACOSX)
+-"_PICLinearScaleYUVToRGB32Row:\n"
+-#else
+-"PICLinearScaleYUVToRGB32Row:\n"
+-#endif
+-  "pusha\n"
+-  "mov    0x24(%esp),%edx\n"
+-  "mov    0x30(%esp),%ebp\n"
+-  "mov    0x34(%esp),%ecx\n"
+-  "mov    0x3c(%esp),%edi\n"
+-  "xor    %ebx,%ebx\n"
+-
+-  // source_width = width * source_dx + ebx
+-  "mov    0x34(%esp), %ecx\n"
+-  "imull  0x38(%esp), %ecx\n"
+-  "mov    %ecx, 0x34(%esp)\n"
+-
+-  "mov    0x38(%esp), %ecx\n"
+-  "xor    %ebx,%ebx\n"     // x = 0
+-  "cmp    $0x20000,%ecx\n" // if source_dx >= 2.0
+-  "jl     .lscaleend\n"
+-  "mov    $0x8000,%ebx\n"  // x = 0.5 for 1/2 or less
+-  "jmp    .lscaleend\n"
+-
+-".lscaleloop:"
+-  "mov    0x28(%esp),%esi\n"
+-  "mov    %ebx,%eax\n"
+-  "sar    $0x11,%eax\n"
+-
+-  "movzbl (%esi,%eax,1),%ecx\n"
+-  "movzbl 1(%esi,%eax,1),%esi\n"
+-  "mov    %ebx,%eax\n"
+-  "andl   $0x1fffe, %eax \n"
+-  "imul   %eax, %esi \n"
+-  "xorl   $0x1fffe, %eax \n"
+-  "imul   %eax, %ecx \n"
+-  "addl   %esi, %ecx \n"
+-  "shrl   $17, %ecx \n"
+-  "movq   2048(%edi,%ecx,8),%mm0\n"
+-
+-  "mov    0x2c(%esp),%esi\n"
+-  "mov    %ebx,%eax\n"
+-  "sar    $0x11,%eax\n"
+-
+-  "movzbl (%esi,%eax,1),%ecx\n"
+-  "movzbl 1(%esi,%eax,1),%esi\n"
+-  "mov    %ebx,%eax\n"
+-  "andl   $0x1fffe, %eax \n"
+-  "imul   %eax, %esi \n"
+-  "xorl   $0x1fffe, %eax \n"
+-  "imul   %eax, %ecx \n"
+-  "addl   %esi, %ecx \n"
+-  "shrl   $17, %ecx \n"
+-  "paddsw 4096(%edi,%ecx,8),%mm0\n"
+-
+-  "mov    %ebx,%eax\n"
+-  "sar    $0x10,%eax\n"
+-  "movzbl (%edx,%eax,1),%ecx\n"
+-  "movzbl 1(%edx,%eax,1),%esi\n"
+-  "mov    %ebx,%eax\n"
+-  "add    0x38(%esp),%ebx\n"
+-  "andl   $0xffff, %eax \n"
+-  "imul   %eax, %esi \n"
+-  "xorl   $0xffff, %eax \n"
+-  "imul   %eax, %ecx \n"
+-  "addl   %esi, %ecx \n"
+-  "shrl   $16, %ecx \n"
+-  "movq   (%edi,%ecx,8),%mm1\n"
+-
+-  "cmp    0x34(%esp), %ebx\n"
+-  "jge    .lscalelastpixel\n"
+-
+-  "mov    %ebx,%eax\n"
+-  "sar    $0x10,%eax\n"
+-  "movzbl (%edx,%eax,1),%ecx\n"
+-  "movzbl 1(%edx,%eax,1),%esi\n"
+-  "mov    %ebx,%eax\n"
+-  "add    0x38(%esp),%ebx\n"
+-  "andl   $0xffff, %eax \n"
+-  "imul   %eax, %esi \n"
+-  "xorl   $0xffff, %eax \n"
+-  "imul   %eax, %ecx \n"
+-  "addl   %esi, %ecx \n"
+-  "shrl   $16, %ecx \n"
+-  "movq   (%edi,%ecx,8),%mm2\n"
+-
+-  "paddsw %mm0,%mm1\n"
+-  "paddsw %mm0,%mm2\n"
+-  "psraw  $0x6,%mm1\n"
+-  "psraw  $0x6,%mm2\n"
+-  "packuswb %mm2,%mm1\n"
+-  "movntq %mm1,0x0(%ebp)\n"
+-  "add    $0x8,%ebp\n"
+-
+-".lscaleend:"
+-  "cmp    %ebx, 0x34(%esp)\n"
+-  "jg     .lscaleloop\n"
+-  "popa\n"
+-  "ret\n"
+-
+-".lscalelastpixel:"
+-  "paddsw %mm0, %mm1\n"
+-  "psraw $6, %mm1\n"
+-  "packuswb %mm1, %mm1\n"
+-  "movd %mm1, (%ebp)\n"
+-  "popa\n"
+-  "ret\n"
+-);
+-
+-void LinearScaleYUVToRGB32Row(const uint8* y_buf,
+-                        const uint8* u_buf,
+-                        const uint8* v_buf,
+-                        uint8* rgb_buf,
+-                        int width,
+-                        int source_dx) {
+-  PICLinearScaleYUVToRGB32Row(y_buf, u_buf, v_buf, rgb_buf, width, source_dx,
+-                              &kCoefficientsRgbY[0][0]);
+-}
 -
 -#else  // USE_MMX
- 
- // Reference version of YUV converter.
- static const int kClipTableSize = 256;
- static const int kClipOverflow = 288;  // Cb max is 535.
+-
+ // C reference code that mimic the YUV assembly.
+ #define packuswb(x) ((x) < 0 ? 0 : ((x) > 255 ? 255 : (x)))
+ #define paddsw(x, y) (((x) + (y)) < -32768 ? -32768 : \
+     (((x) + (y)) > 32767 ? 32767 : ((x) + (y))))
  
- static uint8 kRgbClipTable[kClipOverflow +
-                            kClipTableSize +
-                            kClipOverflow] = {
-@@ -651,17 +149,17 @@ static inline void YuvPixel(uint8 y,
+ static inline void YuvPixel(uint8 y,
+                             uint8 u,
+                             uint8 v,
+@@ -833,66 +39,71 @@ static inline void YuvPixel(uint8 y,
+   a >>= 6;
  
-   int32 C298a = ((static_cast<int32>(y) - 16) * 298 + 128);
-   *reinterpret_cast<uint32*>(rgb_buf) = (clip(C298a + cb)) |
-                                         (clip(C298a + cg) << 8) |
-                                         (clip(C298a + cr) << 16) |
-                                         (0xff000000);
+   *reinterpret_cast<uint32*>(rgb_buf) = (packuswb(b)) |
+                                         (packuswb(g) << 8) |
+                                         (packuswb(r) << 16) |
+                                         (packuswb(a) << 24);
  }
  
 -void FastConvertYUVToRGB32Row(const uint8* y_buf,
+-                              const uint8* u_buf,
+-                              const uint8* v_buf,
+-                              uint8* rgb_buf,
+-                              int width) {
 +void FastConvertYUVToRGB32Row_C(const uint8* y_buf,
-                               const uint8* u_buf,
-                               const uint8* v_buf,
-                               uint8* rgb_buf,
-                               int width) {
++                                const uint8* u_buf,
++                                const uint8* v_buf,
++                                uint8* rgb_buf,
++                                int width,
++                                unsigned int x_shift) {
    for (int x = 0; x < width; x += 2) {
-     uint8 u = u_buf[x >> 1];
-     uint8 v = v_buf[x >> 1];
+-    uint8 u = u_buf[x >> 1];
+-    uint8 v = v_buf[x >> 1];
++    uint8 u = u_buf[x >> x_shift];
++    uint8 v = v_buf[x >> x_shift];
      uint8 y0 = y_buf[x];
-@@ -673,27 +171,26 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf,
+     YuvPixel(y0, u, v, rgb_buf);
+     if ((x + 1) < width) {
+       uint8 y1 = y_buf[x + 1];
++      if (x_shift == 0) {
++        u = u_buf[x + 1];
++        v = v_buf[x + 1];
++      }
+       YuvPixel(y1, u, v, rgb_buf + 4);
+     }
      rgb_buf += 8;  // Advance 2 pixels.
    }
  }
  
- // 28.4 fixed point is used.  A shift by 4 isolates the integer.
- // A shift by 5 is used to further subsample the chrominence channels.
- // & 15 isolates the fixed point fraction.  >> 2 to get the upper 2 bits,
- // for 1/4 pixel accurate interpolation.
+ // 16.16 fixed point is used.  A shift by 16 isolates the integer.
+ // A shift by 17 is used to further subsample the chrominence channels.
+ // & 0xffff isolates the fixed point fraction.  >> 2 to get the upper 2 bits,
+ // for 1/65536 pixel accurate interpolation.
 -void ScaleYUVToRGB32Row(const uint8* y_buf,
+-                        const uint8* u_buf,
+-                        const uint8* v_buf,
+-                        uint8* rgb_buf,
+-                        int width,
+-                        int source_dx) {
 +void ScaleYUVToRGB32Row_C(const uint8* y_buf,
-                         const uint8* u_buf,
-                         const uint8* v_buf,
-                         uint8* rgb_buf,
-                         int width,
-                         int scaled_dx) {
-   int scaled_x = 0;
-   for (int x = 0; x < width; ++x) {
-     uint8 u = u_buf[scaled_x >> 5];
-     uint8 v = v_buf[scaled_x >> 5];
-     uint8 y0 = y_buf[scaled_x >> 4];
-     YuvPixel(y0, u, v, rgb_buf);
-     rgb_buf += 4;
-     scaled_x += scaled_dx;
++                          const uint8* u_buf,
++                          const uint8* v_buf,
++                          uint8* rgb_buf,
++                          int width,
++                          int source_dx) {
+   int x = 0;
+   for (int i = 0; i < width; i += 2) {
+     int y = y_buf[x >> 16];
+     int u = u_buf[(x >> 17)];
+     int v = v_buf[(x >> 17)];
+     YuvPixel(y, u, v, rgb_buf);
+     x += source_dx;
+     if ((i + 1) < width) {
+       y = y_buf[x >> 16];
+       YuvPixel(y, u, v, rgb_buf+4);
+       x += source_dx;
+     }
+     rgb_buf += 8;
    }
  }
+ 
+-void LinearScaleYUVToRGB32Row(const uint8* y_buf,
+-                              const uint8* u_buf,
+-                              const uint8* v_buf,
+-                              uint8* rgb_buf,
+-                              int width,
+-                              int source_dx) {
++void LinearScaleYUVToRGB32Row_C(const uint8* y_buf,
++                                const uint8* u_buf,
++                                const uint8* v_buf,
++                                uint8* rgb_buf,
++                                int width,
++                                int source_dx) {
+   int x = 0;
+   if (source_dx >= 0x20000) {
+     x = 32768;
+   }
+   for (int i = 0; i < width; i += 2) {
+     int y0 = y_buf[x >> 16];
+     int y1 = y_buf[(x >> 16) + 1];
+     int u0 = u_buf[(x >> 17)];
+@@ -913,11 +124,10 @@ void LinearScaleYUVToRGB32Row(const uint8* y_buf,
+       y = (y_frac * y1 + (y_frac ^ 65535) * y0) >> 16;
+       YuvPixel(y, u, v, rgb_buf+4);
+       x += source_dx;
+     }
+     rgb_buf += 8;
+   }
+ }
+ 
 -#endif  // USE_MMX
  }  // extern "C"
  
-diff --git b/gfx/ycbcr/yuv_row_linux.cpp a/gfx/ycbcr/yuv_row_linux.cpp
-index 4a20777..eeb78f6 100644
---- b/gfx/ycbcr/yuv_row_linux.cpp
-+++ a/gfx/ycbcr/yuv_row_linux.cpp
-@@ -1,25 +1,20 @@
- // Copyright (c) 2009 The Chromium Authors. All rights reserved.
+diff --git b/gfx/ycbcr/yuv_row_posix.cpp a/gfx/ycbcr/yuv_row_posix.cpp
+index a66fa7b..382c2bd 100644
+--- b/gfx/ycbcr/yuv_row_posix.cpp
++++ a/gfx/ycbcr/yuv_row_posix.cpp
+@@ -1,33 +1,29 @@
+ // Copyright (c) 2010 The Chromium Authors. All rights reserved.
  // Use of this source code is governed by a BSD-style license that can be
  // found in the LICENSE file.
  
 -#include "media/base/yuv_row.h"
 +#include "yuv_row.h"
  
 -#ifdef _DEBUG
 -#include "base/logging.h"
 -#else
  #define DCHECK(a)
 -#endif
  
- // TODO(fbarchard): Move this to yuv_row_posix.cc to share with Mac.
- // TODO(fbarchard): Do 64 bit version.
- 
  extern "C" {
--#if USE_MMX
- 
- #define RGBY(i) { \
-   static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
-   static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
-   static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
-   0 \
- }
  
-@@ -234,21 +229,16 @@ MMX_ALIGNED(int16 kCoefficientsRgbY[768][4]) = {
-   RGBV(0xE8), RGBV(0xE9), RGBV(0xEA), RGBV(0xEB),
-   RGBV(0xEC), RGBV(0xED), RGBV(0xEE), RGBV(0xEF),
-   RGBV(0xF0), RGBV(0xF1), RGBV(0xF2), RGBV(0xF3),
-   RGBV(0xF4), RGBV(0xF5), RGBV(0xF6), RGBV(0xF7),
-   RGBV(0xF8), RGBV(0xF9), RGBV(0xFA), RGBV(0xFB),
-   RGBV(0xFC), RGBV(0xFD), RGBV(0xFE), RGBV(0xFF),
- };
- 
--#undef RGBY
--#undef RGBU
--#undef RGBV
--#undef MMX_ALIGNED
--
- #if defined(ARCH_CPU_X86_64)
+-#if USE_SSE2 && defined(ARCH_CPU_X86_64)
++#if defined(ARCH_CPU_X86_64)
  
  // AMD64 ABI uses register paremters.
  void FastConvertYUVToRGB32Row(const uint8* y_buf,  // rdi
                                const uint8* u_buf,  // rsi
                                const uint8* v_buf,  // rdx
                                uint8* rgb_buf,      // rcx
                                int width) {         // r8
-@@ -376,17 +366,16 @@ void ScaleYUVToRGB32Row(const uint8* y_buf,  // rdi
+   asm(
+-  "jmp    convertend\n"
+-"convertloop:"
++  "jmp    1f\n"
++"0:"
+   "movzb  (%1),%%r10\n"
+   "add    $0x1,%1\n"
+   "movzb  (%2),%%r11\n"
+   "add    $0x1,%2\n"
+   "movq   2048(%5,%%r10,8),%%xmm0\n"
+   "movzb  (%0),%%r10\n"
+   "movq   4096(%5,%%r11,8),%%xmm1\n"
+   "movzb  0x1(%0),%%r11\n"
+@@ -37,36 +33,36 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf,  // rdi
+   "movq   (%5,%%r11,8),%%xmm3\n"
+   "paddsw %%xmm0,%%xmm2\n"
+   "paddsw %%xmm0,%%xmm3\n"
+   "shufps $0x44,%%xmm3,%%xmm2\n"
+   "psraw  $0x6,%%xmm2\n"
+   "packuswb %%xmm2,%%xmm2\n"
+   "movq   %%xmm2,0x0(%3)\n"
+   "add    $0x8,%3\n"
+-"convertend:"
++"1:"
+   "sub    $0x2,%4\n"
+-  "jns    convertloop\n"
++  "jns    0b\n"
+ 
+-"convertnext:"
++"2:"
+   "add    $0x1,%4\n"
+-  "js     convertdone\n"
++  "js     3f\n"
+ 
+   "movzb  (%1),%%r10\n"
+   "movq   2048(%5,%%r10,8),%%xmm0\n"
+   "movzb  (%2),%%r10\n"
+   "movq   4096(%5,%%r10,8),%%xmm1\n"
+   "paddsw %%xmm1,%%xmm0\n"
+   "movzb  (%0),%%r10\n"
+   "movq   (%5,%%r10,8),%%xmm1\n"
+   "paddsw %%xmm0,%%xmm1\n"
+   "psraw  $0x6,%%xmm1\n"
+   "packuswb %%xmm1,%%xmm1\n"
+   "movd   %%xmm1,0x0(%3)\n"
+-"convertdone:"
++"3:"
+   :
+   : "r"(y_buf),  // %0
+     "r"(u_buf),  // %1
+     "r"(v_buf),  // %2
+     "r"(rgb_buf),  // %3
+     "r"(width),  // %4
+     "r" (kCoefficientsRgbY)  // %5
+   : "memory", "r10", "r11", "xmm0", "xmm1", "xmm2", "xmm3"
+@@ -77,19 +73,19 @@ void ScaleYUVToRGB32Row(const uint8* y_buf,  // rdi
+                         const uint8* u_buf,  // rsi
+                         const uint8* v_buf,  // rdx
+                         uint8* rgb_buf,      // rcx
+                         int width,           // r8
+                         int source_dx) {     // r9
+   asm(
+   "xor    %%r11,%%r11\n"
+   "sub    $0x2,%4\n"
+-  "js     scalenext\n"
++  "js     1f\n"
+ 
+-"scaleloop:"
++"0:"
+   "mov    %%r11,%%r10\n"
+   "sar    $0x11,%%r10\n"
+   "movzb  (%1,%%r10,1),%%rax\n"
+   "movq   2048(%5,%%rax,8),%%xmm0\n"
+   "movzb  (%2,%%r10,1),%%rax\n"
+   "movq   4096(%5,%%rax,8),%%xmm1\n"
+   "lea    (%%r11,%6),%%r10\n"
+   "sar    $0x10,%%r11\n"
+@@ -103,38 +99,38 @@ void ScaleYUVToRGB32Row(const uint8* y_buf,  // rdi
+   "paddsw %%xmm0,%%xmm1\n"
+   "paddsw %%xmm0,%%xmm2\n"
+   "shufps $0x44,%%xmm2,%%xmm1\n"
+   "psraw  $0x6,%%xmm1\n"
+   "packuswb %%xmm1,%%xmm1\n"
+   "movq   %%xmm1,0x0(%3)\n"
+   "add    $0x8,%3\n"
+   "sub    $0x2,%4\n"
+-  "jns    scaleloop\n"
++  "jns    0b\n"
+ 
+-"scalenext:"
++"1:"
+   "add    $0x1,%4\n"
+-  "js     scaledone\n"
++  "js     2f\n"
  
- #else
+   "mov    %%r11,%%r10\n"
+   "sar    $0x11,%%r10\n"
+   "movzb  (%1,%%r10,1),%%rax\n"
+   "movq   2048(%5,%%rax,8),%%xmm0\n"
+   "movzb  (%2,%%r10,1),%%rax\n"
+   "movq   4096(%5,%%rax,8),%%xmm1\n"
+   "paddsw %%xmm1,%%xmm0\n"
+   "sar    $0x10,%%r11\n"
+   "movzb  (%0,%%r11,1),%%rax\n"
+   "movq   (%5,%%rax,8),%%xmm1\n"
+   "paddsw %%xmm0,%%xmm1\n"
+   "psraw  $0x6,%%xmm1\n"
+   "packuswb %%xmm1,%%xmm1\n"
+   "movd   %%xmm1,0x0(%3)\n"
+ 
+-"scaledone:"
++"2:"
+   :
+   : "r"(y_buf),  // %0
+     "r"(u_buf),  // %1
+     "r"(v_buf),  // %2
+     "r"(rgb_buf),  // %3
+     "r"(width),  // %4
+     "r" (kCoefficientsRgbY),  // %5
+     "r"(static_cast<long>(source_dx))  // %6
+@@ -146,23 +142,23 @@ void LinearScaleYUVToRGB32Row(const uint8* y_buf,
+                               const uint8* u_buf,
+                               const uint8* v_buf,
+                               uint8* rgb_buf,
+                               int width,
+                               int source_dx) {
+   asm(
+   "xor    %%r11,%%r11\n"   // x = 0
+   "sub    $0x2,%4\n"
+-  "js     .lscalenext\n"
++  "js     2f\n"
+   "cmp    $0x20000,%6\n"   // if source_dx >= 2.0
+-  "jl     .lscalehalf\n"
++  "jl     0f\n"
+   "mov    $0x8000,%%r11\n" // x = 0.5 for 1/2 or less
+-".lscalehalf:"
++"0:"
+ 
+-".lscaleloop:"
++"1:"
+   "mov    %%r11,%%r10\n"
+   "sar    $0x11,%%r10\n"
  
+   "movzb  (%1, %%r10, 1), %%r13 \n"
+   "movzb  1(%1, %%r10, 1), %%r14 \n"
+   "mov    %%r11, %%rax \n"
+   "and    $0x1fffe, %%rax \n"
+   "imul   %%rax, %%r14 \n"
+@@ -215,21 +211,21 @@ void LinearScaleYUVToRGB32Row(const uint8* y_buf,
+   "paddsw %%xmm0,%%xmm1\n"
+   "paddsw %%xmm0,%%xmm2\n"
+   "shufps $0x44,%%xmm2,%%xmm1\n"
+   "psraw  $0x6,%%xmm1\n"
+   "packuswb %%xmm1,%%xmm1\n"
+   "movq   %%xmm1,0x0(%3)\n"
+   "add    $0x8,%3\n"
+   "sub    $0x2,%4\n"
+-  "jns    .lscaleloop\n"
++  "jns    1b\n"
+ 
+-".lscalenext:"
++"2:"
+   "add    $0x1,%4\n"
+-  "js     .lscaledone\n"
++  "js     3f\n"
+ 
+   "mov    %%r11,%%r10\n"
+   "sar    $0x11,%%r10\n"
+ 
+   "movzb  (%1,%%r10,1), %%r13 \n"
+   "movq   2048(%5,%%r13,8),%%xmm0\n"
+ 
+   "movzb  (%2,%%r10,1), %%r13 \n"
+@@ -241,52 +237,52 @@ void LinearScaleYUVToRGB32Row(const uint8* y_buf,
+   "movzb  (%0,%%r11,1), %%r13 \n"
+   "movq   (%5,%%r13,8),%%xmm1\n"
+ 
+   "paddsw %%xmm0,%%xmm1\n"
+   "psraw  $0x6,%%xmm1\n"
+   "packuswb %%xmm1,%%xmm1\n"
+   "movd   %%xmm1,0x0(%3)\n"
+ 
+-".lscaledone:"
++"3:"
+   :
+   : "r"(y_buf),  // %0
+     "r"(u_buf),  // %1
+     "r"(v_buf),  // %2
+     "r"(rgb_buf),  // %3
+     "r"(width),  // %4
+     "r" (kCoefficientsRgbY),  // %5
+     "r"(static_cast<long>(source_dx))  // %6
+   : "memory", "r10", "r11", "r13", "r14", "rax", "xmm0", "xmm1", "xmm2"
+ );
+ }
+ 
+-#elif USE_MMX && !defined(ARCH_CPU_X86_64) && !defined(__PIC__)
++#elif defined(ARCH_CPU_X86_32) && !defined(__PIC__)
+ 
+ // PIC version is slower because less registers are available, so
+ // non-PIC is used on platforms where it is possible.
+-
  void FastConvertYUVToRGB32Row(const uint8* y_buf,
                                const uint8* u_buf,
                                const uint8* v_buf,
                                uint8* rgb_buf,
                                int width);
--
    asm(
+   ".text\n"
    ".global FastConvertYUVToRGB32Row\n"
++  ".type FastConvertYUVToRGB32Row, @function\n"
  "FastConvertYUVToRGB32Row:\n"
    "pusha\n"
    "mov    0x24(%esp),%edx\n"
    "mov    0x28(%esp),%edi\n"
    "mov    0x2c(%esp),%esi\n"
    "mov    0x30(%esp),%ebp\n"
-@@ -505,195 +494,10 @@ void ScaleYUVToRGB32Row(const uint8* y_buf,
+   "mov    0x34(%esp),%ecx\n"
+-  "jmp    convertend\n"
++  "jmp    1f\n"
+ 
+-"convertloop:"
++"0:"
+   "movzbl (%edi),%eax\n"
+   "add    $0x1,%edi\n"
+   "movzbl (%esi),%ebx\n"
+   "add    $0x1,%esi\n"
+   "movq   kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
+   "movzbl (%edx),%eax\n"
+   "paddsw kCoefficientsRgbY+4096(,%ebx,8),%mm0\n"
+   "movzbl 0x1(%edx),%ebx\n"
+@@ -295,59 +291,63 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf,
+   "movq   kCoefficientsRgbY(,%ebx,8),%mm2\n"
+   "paddsw %mm0,%mm1\n"
+   "paddsw %mm0,%mm2\n"
+   "psraw  $0x6,%mm1\n"
+   "psraw  $0x6,%mm2\n"
+   "packuswb %mm2,%mm1\n"
+   "movntq %mm1,0x0(%ebp)\n"
+   "add    $0x8,%ebp\n"
+-"convertend:"
++"1:"
+   "sub    $0x2,%ecx\n"
+-  "jns    convertloop\n"
++  "jns    0b\n"
+ 
+   "and    $0x1,%ecx\n"
+-  "je     convertdone\n"
++  "je     2f\n"
+ 
+   "movzbl (%edi),%eax\n"
+   "movq   kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
+   "movzbl (%esi),%eax\n"
+   "paddsw kCoefficientsRgbY+4096(,%eax,8),%mm0\n"
+   "movzbl (%edx),%eax\n"
+   "movq   kCoefficientsRgbY(,%eax,8),%mm1\n"
+   "paddsw %mm0,%mm1\n"
+   "psraw  $0x6,%mm1\n"
+   "packuswb %mm1,%mm1\n"
+   "movd   %mm1,0x0(%ebp)\n"
+-"convertdone:"
++"2:"
+   "popa\n"
+   "ret\n"
++#if !defined(XP_MACOSX)
++  ".previous\n"
++#endif
+ );
+ 
+ 
+ void ScaleYUVToRGB32Row(const uint8* y_buf,
+                         const uint8* u_buf,
+                         const uint8* v_buf,
+                         uint8* rgb_buf,
+                         int width,
+                         int source_dx);
+   asm(
+   ".text\n"
+   ".global ScaleYUVToRGB32Row\n"
++  ".type ScaleYUVToRGB32Row, @function\n"
+ "ScaleYUVToRGB32Row:\n"
+   "pusha\n"
+   "mov    0x24(%esp),%edx\n"
+   "mov    0x28(%esp),%edi\n"
+   "mov    0x2c(%esp),%esi\n"
+   "mov    0x30(%esp),%ebp\n"
+   "mov    0x34(%esp),%ecx\n"
+   "xor    %ebx,%ebx\n"
+-  "jmp    scaleend\n"
++  "jmp    1f\n"
+ 
+-"scaleloop:"
++"0:"
+   "mov    %ebx,%eax\n"
+   "sar    $0x11,%eax\n"
+   "movzbl (%edi,%eax,1),%eax\n"
+   "movq   kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
+   "mov    %ebx,%eax\n"
+   "sar    $0x11,%eax\n"
+   "movzbl (%esi,%eax,1),%eax\n"
+   "paddsw kCoefficientsRgbY+4096(,%eax,8),%mm0\n"
+@@ -363,22 +363,22 @@ void ScaleYUVToRGB32Row(const uint8* y_buf,
+   "movq   kCoefficientsRgbY(,%eax,8),%mm2\n"
+   "paddsw %mm0,%mm1\n"
+   "paddsw %mm0,%mm2\n"
+   "psraw  $0x6,%mm1\n"
+   "psraw  $0x6,%mm2\n"
+   "packuswb %mm2,%mm1\n"
+   "movntq %mm1,0x0(%ebp)\n"
+   "add    $0x8,%ebp\n"
+-"scaleend:"
++"1:"
+   "sub    $0x2,%ecx\n"
+-  "jns    scaleloop\n"
++  "jns    0b\n"
+ 
+   "and    $0x1,%ecx\n"
+-  "je     scaledone\n"
++  "je     2f\n"
+ 
+   "mov    %ebx,%eax\n"
+   "sar    $0x11,%eax\n"
+   "movzbl (%edi,%eax,1),%eax\n"
+   "movq   kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
+   "mov    %ebx,%eax\n"
+   "sar    $0x11,%eax\n"
+   "movzbl (%esi,%eax,1),%eax\n"
+@@ -387,49 +387,53 @@ void ScaleYUVToRGB32Row(const uint8* y_buf,
+   "sar    $0x10,%eax\n"
+   "movzbl (%edx,%eax,1),%eax\n"
+   "movq   kCoefficientsRgbY(,%eax,8),%mm1\n"
+   "paddsw %mm0,%mm1\n"
+   "psraw  $0x6,%mm1\n"
+   "packuswb %mm1,%mm1\n"
    "movd   %mm1,0x0(%ebp)\n"
  
- "scaledone:"
+-"scaledone:"
++"2:"
+   "popa\n"
+   "ret\n"
++#if !defined(XP_MACOSX)
++  ".previous\n"
++#endif
+ );
+ 
+ void LinearScaleYUVToRGB32Row(const uint8* y_buf,
+                               const uint8* u_buf,
+                               const uint8* v_buf,
+                               uint8* rgb_buf,
+                               int width,
+                               int source_dx);
+   asm(
+   ".text\n"
+   ".global LinearScaleYUVToRGB32Row\n"
++  ".type LinearScaleYUVToRGB32Row, @function\n"
+ "LinearScaleYUVToRGB32Row:\n"
+   "pusha\n"
+   "mov    0x24(%esp),%edx\n"
+   "mov    0x28(%esp),%edi\n"
+   "mov    0x30(%esp),%ebp\n"
+ 
+   // source_width = width * source_dx + ebx
+   "mov    0x34(%esp), %ecx\n"
+   "imull  0x38(%esp), %ecx\n"
+   "mov    %ecx, 0x34(%esp)\n"
+ 
+   "mov    0x38(%esp), %ecx\n"
+   "xor    %ebx,%ebx\n"     // x = 0
+   "cmp    $0x20000,%ecx\n" // if source_dx >= 2.0
+-  "jl     .lscaleend\n"
++  "jl     1f\n"
+   "mov    $0x8000,%ebx\n"  // x = 0.5 for 1/2 or less
+-  "jmp    .lscaleend\n"
++  "jmp    1f\n"
+ 
+-".lscaleloop:"
++"0:"
+   "mov    %ebx,%eax\n"
+   "sar    $0x11,%eax\n"
+ 
+   "movzbl (%edi,%eax,1),%ecx\n"
+   "movzbl 1(%edi,%eax,1),%esi\n"
+   "mov    %ebx,%eax\n"
+   "andl   $0x1fffe, %eax \n"
+   "imul   %eax, %esi \n"
+@@ -464,17 +468,17 @@ void LinearScaleYUVToRGB32Row(const uint8* y_buf,
+   "imul   %eax, %esi \n"
+   "xorl   $0xffff, %eax \n"
+   "imul   %eax, %ecx \n"
+   "addl   %esi, %ecx \n"
+   "shrl   $16, %ecx \n"
+   "movq   kCoefficientsRgbY(,%ecx,8),%mm1\n"
+ 
+   "cmp    0x34(%esp), %ebx\n"
+-  "jge    .lscalelastpixel\n"
++  "jge    2f\n"
+ 
+   "mov    %ebx,%eax\n"
+   "sar    $0x10,%eax\n"
+   "movzbl (%edx,%eax,1),%ecx\n"
+   "movzbl 1(%edx,%eax,1),%esi\n"
+   "mov    %ebx,%eax\n"
+   "add    0x38(%esp),%ebx\n"
+   "andl   $0xffff, %eax \n"
+@@ -488,56 +492,60 @@ void LinearScaleYUVToRGB32Row(const uint8* y_buf,
+   "paddsw %mm0,%mm1\n"
+   "paddsw %mm0,%mm2\n"
+   "psraw  $0x6,%mm1\n"
+   "psraw  $0x6,%mm2\n"
+   "packuswb %mm2,%mm1\n"
+   "movntq %mm1,0x0(%ebp)\n"
+   "add    $0x8,%ebp\n"
+ 
+-".lscaleend:"
++"1:"
+   "cmp    0x34(%esp), %ebx\n"
+-  "jl     .lscaleloop\n"
++  "jl     0b\n"
+   "popa\n"
+   "ret\n"
+ 
+-".lscalelastpixel:"
++"2:"
+   "paddsw %mm0, %mm1\n"
+   "psraw $6, %mm1\n"
+   "packuswb %mm1, %mm1\n"
+   "movd %mm1, (%ebp)\n"
+   "popa\n"
+   "ret\n"
++#if !defined(XP_MACOSX)
++  ".previous\n"
++#endif
+ );
+ 
+-#elif USE_MMX && !defined(ARCH_CPU_X86_64) && defined(__PIC__)
++#elif defined(ARCH_CPU_X86_32) && defined(__PIC__)
++
++void PICConvertYUVToRGB32Row(const uint8* y_buf,
++                             const uint8* u_buf,
++                             const uint8* v_buf,
++                             uint8* rgb_buf,
++                             int width,
++                             int16 *kCoefficientsRgbY);
+ 
+-extern void PICConvertYUVToRGB32Row(const uint8* y_buf,
+-                                    const uint8* u_buf,
+-                                    const uint8* v_buf,
+-                                    uint8* rgb_buf,
+-                                    int width,
+-                                    int16 *kCoefficientsRgbY);
+   asm(
+   ".text\n"
+-#if defined(OS_MACOSX)
++#if defined(XP_MACOSX)
+ "_PICConvertYUVToRGB32Row:\n"
+ #else
+ "PICConvertYUVToRGB32Row:\n"
+ #endif
+   "pusha\n"
+   "mov    0x24(%esp),%edx\n"
+   "mov    0x28(%esp),%edi\n"
+   "mov    0x2c(%esp),%esi\n"
+   "mov    0x30(%esp),%ebp\n"
+   "mov    0x38(%esp),%ecx\n"
+ 
+-  "jmp    .Lconvertend\n"
++  "jmp    1f\n"
+ 
+-".Lconvertloop:"
++"0:"
+   "movzbl (%edi),%eax\n"
+   "add    $0x1,%edi\n"
+   "movzbl (%esi),%ebx\n"
+   "add    $0x1,%esi\n"
+   "movq   2048(%ecx,%eax,8),%mm0\n"
+   "movzbl (%edx),%eax\n"
+   "paddsw 4096(%ecx,%ebx,8),%mm0\n"
+   "movzbl 0x1(%edx),%ebx\n"
+@@ -546,72 +554,75 @@ extern void PICConvertYUVToRGB32Row(const uint8* y_buf,
+   "movq   0(%ecx,%ebx,8),%mm2\n"
+   "paddsw %mm0,%mm1\n"
+   "paddsw %mm0,%mm2\n"
+   "psraw  $0x6,%mm1\n"
+   "psraw  $0x6,%mm2\n"
+   "packuswb %mm2,%mm1\n"
+   "movntq %mm1,0x0(%ebp)\n"
+   "add    $0x8,%ebp\n"
+-".Lconvertend:"
++"1:"
+   "subl   $0x2,0x34(%esp)\n"
+-  "jns    .Lconvertloop\n"
++  "jns    0b\n"
+ 
+   "andl   $0x1,0x34(%esp)\n"
+-  "je     .Lconvertdone\n"
++  "je     2f\n"
+ 
+   "movzbl (%edi),%eax\n"
+   "movq   2048(%ecx,%eax,8),%mm0\n"
+   "movzbl (%esi),%eax\n"
+   "paddsw 4096(%ecx,%eax,8),%mm0\n"
+   "movzbl (%edx),%eax\n"
+   "movq   0(%ecx,%eax,8),%mm1\n"
+   "paddsw %mm0,%mm1\n"
+   "psraw  $0x6,%mm1\n"
+   "packuswb %mm1,%mm1\n"
+   "movd   %mm1,0x0(%ebp)\n"
+-".Lconvertdone:\n"
++"2:"
    "popa\n"
    "ret\n"
++#if !defined(XP_MACOSX)
++  ".previous\n"
++#endif
+ );
+ 
+ void FastConvertYUVToRGB32Row(const uint8* y_buf,
+                               const uint8* u_buf,
+                               const uint8* v_buf,
+                               uint8* rgb_buf,
+                               int width) {
+   PICConvertYUVToRGB32Row(y_buf, u_buf, v_buf, rgb_buf, width,
+                           &kCoefficientsRgbY[0][0]);
+ }
+ 
+-extern void PICScaleYUVToRGB32Row(const uint8* y_buf,
+-                               const uint8* u_buf,
+-                               const uint8* v_buf,
+-                               uint8* rgb_buf,
+-                               int width,
+-                               int source_dx,
+-                               int16 *kCoefficientsRgbY);
++void PICScaleYUVToRGB32Row(const uint8* y_buf,
++                           const uint8* u_buf,
++                           const uint8* v_buf,
++                           uint8* rgb_buf,
++                           int width,
++                           int source_dx,
++                           int16 *kCoefficientsRgbY);
+ 
+   asm(
+   ".text\n"
+-#if defined(OS_MACOSX)
++#if defined(XP_MACOSX)
+ "_PICScaleYUVToRGB32Row:\n"
+ #else
+ "PICScaleYUVToRGB32Row:\n"
+ #endif
+   "pusha\n"
+   "mov    0x24(%esp),%edx\n"
+   "mov    0x28(%esp),%edi\n"
+   "mov    0x2c(%esp),%esi\n"
+   "mov    0x30(%esp),%ebp\n"
+   "mov    0x3c(%esp),%ecx\n"
+   "xor    %ebx,%ebx\n"
+-  "jmp    Lscaleend\n"
++  "jmp    1f\n"
+ 
+-"Lscaleloop:"
++"0:"
+   "mov    %ebx,%eax\n"
+   "sar    $0x11,%eax\n"
+   "movzbl (%edi,%eax,1),%eax\n"
+   "movq   2048(%ecx,%eax,8),%mm0\n"
+   "mov    %ebx,%eax\n"
+   "sar    $0x11,%eax\n"
+   "movzbl (%esi,%eax,1),%eax\n"
+   "paddsw 4096(%ecx,%eax,8),%mm0\n"
+@@ -627,22 +638,22 @@ extern void PICScaleYUVToRGB32Row(const uint8* y_buf,
+   "movq   0(%ecx,%eax,8),%mm2\n"
+   "paddsw %mm0,%mm1\n"
+   "paddsw %mm0,%mm2\n"
+   "psraw  $0x6,%mm1\n"
+   "psraw  $0x6,%mm2\n"
+   "packuswb %mm2,%mm1\n"
+   "movntq %mm1,0x0(%ebp)\n"
+   "add    $0x8,%ebp\n"
+-"Lscaleend:"
++"1:"
+   "subl   $0x2,0x34(%esp)\n"
+-  "jns    Lscaleloop\n"
++  "jns    0b\n"
+ 
+   "andl   $0x1,0x34(%esp)\n"
+-  "je     Lscaledone\n"
++  "je     2f\n"
+ 
+   "mov    %ebx,%eax\n"
+   "sar    $0x11,%eax\n"
+   "movzbl (%edi,%eax,1),%eax\n"
+   "movq   2048(%ecx,%eax,8),%mm0\n"
+   "mov    %ebx,%eax\n"
+   "sar    $0x11,%eax\n"
+   "movzbl (%esi,%eax,1),%eax\n"
+@@ -651,22 +662,24 @@ extern void PICScaleYUVToRGB32Row(const uint8* y_buf,
+   "sar    $0x10,%eax\n"
+   "movzbl (%edx,%eax,1),%eax\n"
+   "movq   0(%ecx,%eax,8),%mm1\n"
+   "paddsw %mm0,%mm1\n"
+   "psraw  $0x6,%mm1\n"
+   "packuswb %mm1,%mm1\n"
+   "movd   %mm1,0x0(%ebp)\n"
+ 
+-"Lscaledone:"
++"2:"
+   "popa\n"
+   "ret\n"
++#if !defined(XP_MACOSX)
++  ".previous\n"
++#endif
  );
  
+-
+ void ScaleYUVToRGB32Row(const uint8* y_buf,
+                         const uint8* u_buf,
+                         const uint8* v_buf,
+                         uint8* rgb_buf,
+                         int width,
+                         int source_dx) {
+   PICScaleYUVToRGB32Row(y_buf, u_buf, v_buf, rgb_buf, width, source_dx,
+                         &kCoefficientsRgbY[0][0]);
+@@ -674,19 +687,20 @@ void ScaleYUVToRGB32Row(const uint8* y_buf,
+ 
+ void PICLinearScaleYUVToRGB32Row(const uint8* y_buf,
+                                  const uint8* u_buf,
+                                  const uint8* v_buf,
+                                  uint8* rgb_buf,
+                                  int width,
+                                  int source_dx,
+                                  int16 *kCoefficientsRgbY);
++
+   asm(
+   ".text\n"
+-#if defined(OS_MACOSX)
++#if defined(XP_MACOSX)
+ "_PICLinearScaleYUVToRGB32Row:\n"
+ #else
+ "PICLinearScaleYUVToRGB32Row:\n"
  #endif
+   "pusha\n"
+   "mov    0x24(%esp),%edx\n"
+   "mov    0x30(%esp),%ebp\n"
+   "mov    0x34(%esp),%ecx\n"
+@@ -696,21 +710,21 @@ void PICLinearScaleYUVToRGB32Row(const uint8* y_buf,
+   // source_width = width * source_dx + ebx
+   "mov    0x34(%esp), %ecx\n"
+   "imull  0x38(%esp), %ecx\n"
+   "mov    %ecx, 0x34(%esp)\n"
+ 
+   "mov    0x38(%esp), %ecx\n"
+   "xor    %ebx,%ebx\n"     // x = 0
+   "cmp    $0x20000,%ecx\n" // if source_dx >= 2.0
+-  "jl     .lscaleend\n"
++  "jl     1f\n"
+   "mov    $0x8000,%ebx\n"  // x = 0.5 for 1/2 or less
+-  "jmp    .lscaleend\n"
++  "jmp    1f\n"
+ 
+-".lscaleloop:"
++"0:"
+   "mov    0x28(%esp),%esi\n"
+   "mov    %ebx,%eax\n"
+   "sar    $0x11,%eax\n"
+ 
+   "movzbl (%esi,%eax,1),%ecx\n"
+   "movzbl 1(%esi,%eax,1),%esi\n"
+   "mov    %ebx,%eax\n"
+   "andl   $0x1fffe, %eax \n"
+@@ -746,17 +760,17 @@ void PICLinearScaleYUVToRGB32Row(const uint8* y_buf,
+   "imul   %eax, %esi \n"
+   "xorl   $0xffff, %eax \n"
+   "imul   %eax, %ecx \n"
+   "addl   %esi, %ecx \n"
+   "shrl   $16, %ecx \n"
+   "movq   (%edi,%ecx,8),%mm1\n"
+ 
+   "cmp    0x34(%esp), %ebx\n"
+-  "jge    .lscalelastpixel\n"
++  "jge    2f\n"
+ 
+   "mov    %ebx,%eax\n"
+   "sar    $0x10,%eax\n"
+   "movzbl (%edx,%eax,1),%ecx\n"
+   "movzbl 1(%edx,%eax,1),%esi\n"
+   "mov    %ebx,%eax\n"
+   "add    0x38(%esp),%ebx\n"
+   "andl   $0xffff, %eax \n"
+@@ -770,154 +784,66 @@ void PICLinearScaleYUVToRGB32Row(const uint8* y_buf,
+   "paddsw %mm0,%mm1\n"
+   "paddsw %mm0,%mm2\n"
+   "psraw  $0x6,%mm1\n"
+   "psraw  $0x6,%mm2\n"
+   "packuswb %mm2,%mm1\n"
+   "movntq %mm1,0x0(%ebp)\n"
+   "add    $0x8,%ebp\n"
+ 
+-".lscaleend:"
++"1:"
+   "cmp    %ebx, 0x34(%esp)\n"
+-  "jg     .lscaleloop\n"
++  "jg     0b\n"
+   "popa\n"
+   "ret\n"
+ 
+-".lscalelastpixel:"
++"2:"
+   "paddsw %mm0, %mm1\n"
+   "psraw $6, %mm1\n"
+   "packuswb %mm1, %mm1\n"
+   "movd %mm1, (%ebp)\n"
+   "popa\n"
+   "ret\n"
++#if !defined(XP_MACOSX)
++  ".previous\n"
++#endif
+ );
+ 
++
+ void LinearScaleYUVToRGB32Row(const uint8* y_buf,
+-                        const uint8* u_buf,
+-                        const uint8* v_buf,
+-                        uint8* rgb_buf,
+-                        int width,
+-                        int source_dx) {
++                              const uint8* u_buf,
++                              const uint8* v_buf,
++                              uint8* rgb_buf,
++                              int width,
++                              int source_dx) {
+   PICLinearScaleYUVToRGB32Row(y_buf, u_buf, v_buf, rgb_buf, width, source_dx,
+                               &kCoefficientsRgbY[0][0]);
+ }
 -
 -#else  // USE_MMX
 -
--// Reference version of YUV converter.
--static const int kClipTableSize = 256;
--static const int kClipOverflow = 288;  // Cb max is 535.
--
--static uint8 kRgbClipTable[kClipOverflow +
--                           kClipTableSize +
--                           kClipOverflow] = {
--  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // 288 underflow values
--  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // clipped to 0.
--  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
--  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
--  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
--  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
--  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
--  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
--  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
--  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
--  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
--  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
--  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
--  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
--  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
--  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
--  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
--  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
--  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
--  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
--  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
--  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
--  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
--  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
--  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
--  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
--  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
--  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
--  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
--  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
--  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
--  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
--  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
--  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
--  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
--  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
--  0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,  // Unclipped values.
--  0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
--  0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
--  0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
--  0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
--  0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
--  0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
--  0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F,
--  0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
--  0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
--  0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
--  0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F,
--  0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
--  0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
--  0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
--  0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
--  0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
--  0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F,
--  0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
--  0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F,
--  0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7,
--  0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF,
--  0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7,
--  0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF,
--  0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
--  0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
--  0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
--  0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
--  0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
--  0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
--  0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
--  0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF,
--  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,  // 288 overflow values
--  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,  // clipped to 255.
--  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
--  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
--  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
--  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
--  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
--  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
--  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
--  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
--  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
--  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
--  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
--  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
--  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
--  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
--  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
--  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
--  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
--  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
--  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
--  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
--  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
--  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
--  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
--  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
--  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
--  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
--  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
--  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
--  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
--  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
--  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
--  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
--  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
--  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
--};
--
--// Clip an rgb channel value to 0..255 range.
--// Source is signed fixed point 8.8.
--// Table allows for values to underflow or overflow by 128.
--// Therefore source range is -128 to 384.
--// Output clips to unsigned 0 to 255.
--static inline uint32 clip(int32 value) {
--  DCHECK(((value >> 8) + kClipOverflow) >= 0);
--  DCHECK(((value >> 8) + kClipOverflow) <
--         (kClipOverflow + kClipTableSize + kClipOverflow));
--  return static_cast<uint32>(kRgbClipTable[((value) >> 8) + kClipOverflow]);
--}
+-// C reference code that mimic the YUV assembly.
+-#define packuswb(x) ((x) < 0 ? 0 : ((x) > 255 ? 255 : (x)))
+-#define paddsw(x, y) (((x) + (y)) < -32768 ? -32768 : \
+-    (((x) + (y)) > 32767 ? 32767 : ((x) + (y))))
 -
 -static inline void YuvPixel(uint8 y,
 -                            uint8 u,
 -                            uint8 v,
 -                            uint8* rgb_buf) {
--  int32 d = static_cast<int32>(u) - 128;
--  int32 e = static_cast<int32>(v) - 128;
+-
+-  int b = kCoefficientsRgbY[256+u][0];
+-  int g = kCoefficientsRgbY[256+u][1];
+-  int r = kCoefficientsRgbY[256+u][2];
+-  int a = kCoefficientsRgbY[256+u][3];
+-
+-  b = paddsw(b, kCoefficientsRgbY[512+v][0]);
+-  g = paddsw(g, kCoefficientsRgbY[512+v][1]);
+-  r = paddsw(r, kCoefficientsRgbY[512+v][2]);
+-  a = paddsw(a, kCoefficientsRgbY[512+v][3]);
 -
--  int32 cb =   (516 * d + 128);
--  int32 cg = (- 100 * d - 208 * e + 128);
--  int32 cr =             (409 * e + 128);
+-  b = paddsw(b, kCoefficientsRgbY[y][0]);
+-  g = paddsw(g, kCoefficientsRgbY[y][1]);
+-  r = paddsw(r, kCoefficientsRgbY[y][2]);
+-  a = paddsw(a, kCoefficientsRgbY[y][3]);
 -
--  int32 C298a = ((static_cast<int32>(y) - 16) * 298 + 128);
--  *reinterpret_cast<uint32*>(rgb_buf) = (clip(C298a + cb)) |
--                                        (clip(C298a + cg) << 8) |
--                                        (clip(C298a + cr) << 16) |
--                                        (0xff000000);
+-  b >>= 6;
+-  g >>= 6;
+-  r >>= 6;
+-  a >>= 6;
+-
+-  *reinterpret_cast<uint32*>(rgb_buf) = (packuswb(b)) |
+-                                        (packuswb(g) << 8) |
+-                                        (packuswb(r) << 16) |
+-                                        (packuswb(a) << 24);
 -}
 -
--void FastConvertYUVToRGB32Row(const uint8* y_buf,
--                              const uint8* u_buf,
--                              const uint8* v_buf,
--                              uint8* rgb_buf,
--                              int width) {
++#else
+ void FastConvertYUVToRGB32Row(const uint8* y_buf,
+                               const uint8* u_buf,
+                               const uint8* v_buf,
+                               uint8* rgb_buf,
+                               int width) {
 -  for (int x = 0; x < width; x += 2) {
 -    uint8 u = u_buf[x >> 1];
 -    uint8 v = v_buf[x >> 1];
 -    uint8 y0 = y_buf[x];
 -    YuvPixel(y0, u, v, rgb_buf);
 -    if ((x + 1) < width) {
 -      uint8 y1 = y_buf[x + 1];
 -      YuvPixel(y1, u, v, rgb_buf + 4);
 -    }
 -    rgb_buf += 8;  // Advance 2 pixels.
 -  }
--}
--
--// 28.4 fixed point is used.  A shift by 4 isolates the integer.
--// A shift by 5 is used to further subsample the chrominence channels.
--// & 15 isolates the fixed point fraction.  >> 2 to get the upper 2 bits,
--// for 1/4 pixel accurate interpolation.
--void ScaleYUVToRGB32Row(const uint8* y_buf,
--                        const uint8* u_buf,
--                        const uint8* v_buf,
--                        uint8* rgb_buf,
--                        int width,
--                        int scaled_dx) {
--  int scaled_x = 0;
--  for (int x = 0; x < width; ++x) {
--    uint8 u = u_buf[scaled_x >> 5];
--    uint8 v = v_buf[scaled_x >> 5];
--    uint8 y0 = y_buf[scaled_x >> 4];
--    YuvPixel(y0, u, v, rgb_buf);
--    rgb_buf += 4;
--    scaled_x += scaled_dx;
++  FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, 1);
+ }
+ 
+-// 16.16 fixed point is used.  A shift by 16 isolates the integer.
+-// A shift by 17 is used to further subsample the chrominence channels.
+-// & 0xffff isolates the fixed point fraction.  >> 2 to get the upper 2 bits,
+-// for 1/65536 pixel accurate interpolation.
+ void ScaleYUVToRGB32Row(const uint8* y_buf,
+                         const uint8* u_buf,
+                         const uint8* v_buf,
+                         uint8* rgb_buf,
+                         int width,
+                         int source_dx) {
+-  int x = 0;
+-  for (int i = 0; i < width; i += 2) {
+-    int y = y_buf[x >> 16];
+-    int u = u_buf[(x >> 17)];
+-    int v = v_buf[(x >> 17)];
+-    YuvPixel(y, u, v, rgb_buf);
+-    x += source_dx;
+-    if ((i + 1) < width) {
+-      y = y_buf[x >> 16];
+-      YuvPixel(y, u, v, rgb_buf+4);
+-      x += source_dx;
+-    }
+-    rgb_buf += 8;
 -  }
--}
++  ScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, source_dx);
+ }
+ 
+ void LinearScaleYUVToRGB32Row(const uint8* y_buf,
+                               const uint8* u_buf,
+                               const uint8* v_buf,
+                               uint8* rgb_buf,
+                               int width,
+                               int source_dx) {
+-  int x = 0;
+-  if (source_dx >= 0x20000) {
+-    x = 32768;
+-  }
+-  for (int i = 0; i < width; i += 2) {
+-    int y0 = y_buf[x >> 16];
+-    int y1 = y_buf[(x >> 16) + 1];
+-    int u0 = u_buf[(x >> 17)];
+-    int u1 = u_buf[(x >> 17) + 1];
+-    int v0 = v_buf[(x >> 17)];
+-    int v1 = v_buf[(x >> 17) + 1];
+-    int y_frac = (x & 65535);
+-    int uv_frac = ((x >> 1) & 65535);
+-    int y = (y_frac * y1 + (y_frac ^ 65535) * y0) >> 16;
+-    int u = (uv_frac * u1 + (uv_frac ^ 65535) * u0) >> 16;
+-    int v = (uv_frac * v1 + (uv_frac ^ 65535) * v0) >> 16;
+-    YuvPixel(y, u, v, rgb_buf);
+-    x += source_dx;
+-    if ((i + 1) < width) {
+-      y0 = y_buf[x >> 16];
+-      y1 = y_buf[(x >> 16) + 1];
+-      y_frac = (x & 65535);
+-      y = (y_frac * y1 + (y_frac ^ 65535) * y0) >> 16;
+-      YuvPixel(y, u, v, rgb_buf+4);
+-      x += source_dx;
+-    }
+-    rgb_buf += 8;
+-  }
++  LinearScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, source_dx);
+ }
++#endif
+ 
 -#endif  // USE_MMX
  }  // extern "C"
  
-diff --git b/gfx/ycbcr/yuv_row_mac.cpp a/gfx/ycbcr/yuv_row_mac.cpp
-index 39c252d..aa40aac 100644
---- b/gfx/ycbcr/yuv_row_mac.cpp
-+++ a/gfx/ycbcr/yuv_row_mac.cpp
+diff --git b/gfx/ycbcr/yuv_row_table.cpp a/gfx/ycbcr/yuv_row_table.cpp
+index 296380b..ad71341 100644
+--- b/gfx/ycbcr/yuv_row_table.cpp
++++ a/gfx/ycbcr/yuv_row_table.cpp
 @@ -1,13 +1,13 @@
- // Copyright (c) 2009 The Chromium Authors. All rights reserved.
- // Use of this source code is governed by a BSD-style license that can be
- // found in the LICENSE file.
- 
--#include "media/base/yuv_row.h"
-+#include "yuv_row.h"
- 
- // TODO(fbarchard): Do 64 bit version.
- 
- extern "C" {
- 
- #define RGBY(i) { \
-   static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
-   static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
-diff --git b/gfx/ycbcr/yuv_row_win.cpp a/gfx/ycbcr/yuv_row_win.cpp
-index 1e11d94..a77a16f 100644
---- b/gfx/ycbcr/yuv_row_win.cpp
-+++ a/gfx/ycbcr/yuv_row_win.cpp
-@@ -1,13 +1,13 @@
- // Copyright (c) 2009 The Chromium Authors. All rights reserved.
+ // Copyright (c) 2010 The Chromium Authors. All rights reserved.
  // Use of this source code is governed by a BSD-style license that can be
  // found in the LICENSE file.
  
 -#include "media/base/yuv_row.h"
 +#include "yuv_row.h"
  
  extern "C" {
+ 
  #define RGBY(i) { \
    static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
    static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
    static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
    0 \
+diff --git b/gfx/ycbcr/yuv_row_win.cpp a/gfx/ycbcr/yuv_row_win.cpp
+index b5049a5..627b8cb 100644
+--- b/gfx/ycbcr/yuv_row_win.cpp
++++ a/gfx/ycbcr/yuv_row_win.cpp
+@@ -1,20 +1,23 @@
+ // Copyright (c) 2010 The Chromium Authors. All rights reserved.
+ // Use of this source code is governed by a BSD-style license that can be
+ // found in the LICENSE file.
+ 
+-#include "media/base/yuv_row.h"
++#include "yuv_row.h"
++#define MOZILLA_SSE_INCLUDE_HEADER_FOR_SSE2
++#define MOZILLA_SSE_INCLUDE_HEADER_FOR_MMX
++#include "mozilla/SSE.h"
++
+ 
+ #define kCoefficientsRgbU kCoefficientsRgbY + 2048
+ #define kCoefficientsRgbV kCoefficientsRgbY + 4096
+ 
+ extern "C" {
+-
+-#if USE_MMX
++#if defined(MOZILLA_COMPILE_WITH_SSE2)
+ __declspec(naked)
+ void FastConvertYUVToRGB32Row(const uint8* y_buf,
+                               const uint8* u_buf,
+                               const uint8* v_buf,
+                               uint8* rgb_buf,
+                               int width) {
+   __asm {
+     pushad
+@@ -438,152 +441,37 @@ lscalelastpixel:
+     paddsw    mm1, mm0
+     psraw     mm1, 6
+     packuswb  mm1, mm1
+     movd      [ebp], mm1
+     popad
+     ret
+   };
  }
+-#else  // USE_MMX
+-
+-// C reference code that mimic the YUV assembly.
+-#define packuswb(x) ((x) < 0 ? 0 : ((x) > 255 ? 255 : (x)))
+-#define paddsw(x, y) (((x) + (y)) < -32768 ? -32768 : \
+-    (((x) + (y)) > 32767 ? 32767 : ((x) + (y))))
+-
+-static inline void YuvPixel(uint8 y,
+-                            uint8 u,
+-                            uint8 v,
+-                            uint8* rgb_buf) {
+-
+-  int b = kCoefficientsRgbY[256+u][0];
+-  int g = kCoefficientsRgbY[256+u][1];
+-  int r = kCoefficientsRgbY[256+u][2];
+-  int a = kCoefficientsRgbY[256+u][3];
+-
+-  b = paddsw(b, kCoefficientsRgbY[512+v][0]);
+-  g = paddsw(g, kCoefficientsRgbY[512+v][1]);
+-  r = paddsw(r, kCoefficientsRgbY[512+v][2]);
+-  a = paddsw(a, kCoefficientsRgbY[512+v][3]);
+-
+-  b = paddsw(b, kCoefficientsRgbY[y][0]);
+-  g = paddsw(g, kCoefficientsRgbY[y][1]);
+-  r = paddsw(r, kCoefficientsRgbY[y][2]);
+-  a = paddsw(a, kCoefficientsRgbY[y][3]);
+-
+-  b >>= 6;
+-  g >>= 6;
+-  r >>= 6;
+-  a >>= 6;
+-
+-  *reinterpret_cast<uint32*>(rgb_buf) = (packuswb(b)) |
+-                                        (packuswb(g) << 8) |
+-                                        (packuswb(r) << 16) |
+-                                        (packuswb(a) << 24);
+-}
+-
+-#if TEST_MMX_YUV
+-static inline void YuvPixel(uint8 y,
+-                            uint8 u,
+-                            uint8 v,
+-                            uint8* rgb_buf) {
+-
+-  __asm {
+-    movzx     eax, u
+-    movq      mm0, [kCoefficientsRgbY+2048 + 8 * eax]
+-    movzx     eax, v
+-    paddsw    mm0, [kCoefficientsRgbY+4096 + 8 * eax]
+-    movzx     eax, y
+-    movq      mm1, [kCoefficientsRgbY + 8 * eax]
+-    paddsw    mm1, mm0
+-    psraw     mm1, 6
+-    packuswb  mm1, mm1
+-    mov       eax, rgb_buf
+-    movd      [eax], mm1
+-    emms
+-  }
+-}
+-#endif
+-
++#else // MOZILLA_COMPILE_WITH_SSE2
+ void FastConvertYUVToRGB32Row(const uint8* y_buf,
+                               const uint8* u_buf,
+                               const uint8* v_buf,
+                               uint8* rgb_buf,
+                               int width) {
+-  for (int x = 0; x < width; x += 2) {
+-    uint8 u = u_buf[x >> 1];
+-    uint8 v = v_buf[x >> 1];
+-    uint8 y0 = y_buf[x];
+-    YuvPixel(y0, u, v, rgb_buf);
+-    if ((x + 1) < width) {
+-      uint8 y1 = y_buf[x + 1];
+-      YuvPixel(y1, u, v, rgb_buf + 4);
+-    }
+-    rgb_buf += 8;  // Advance 2 pixels.
+-  }
++  FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, 1);
+ }
+ 
+-// 16.16 fixed point is used.  A shift by 16 isolates the integer.
+-// A shift by 17 is used to further subsample the chrominence channels.
+-// & 0xffff isolates the fixed point fraction.  >> 2 to get the upper 2 bits,
+-// for 1/65536 pixel accurate interpolation.
+ void ScaleYUVToRGB32Row(const uint8* y_buf,
+                         const uint8* u_buf,
+                         const uint8* v_buf,
+                         uint8* rgb_buf,
+                         int width,
+                         int source_dx) {
+-  int x = 0;
+-  for (int i = 0; i < width; i += 2) {
+-    int y = y_buf[x >> 16];
+-    int u = u_buf[(x >> 17)];
+-    int v = v_buf[(x >> 17)];
+-    YuvPixel(y, u, v, rgb_buf);
+-    x += source_dx;
+-    if ((i + 1) < width) {
+-      y = y_buf[x >> 16];
+-      YuvPixel(y, u, v, rgb_buf+4);
+-      x += source_dx;
+-    }
+-    rgb_buf += 8;
+-  }
++  ScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, source_dx);
+ }
+ 
+ void LinearScaleYUVToRGB32Row(const uint8* y_buf,
+                               const uint8* u_buf,
+                               const uint8* v_buf,
+                               uint8* rgb_buf,
+                               int width,
+                               int source_dx) {
+-  int x = 0;
+-  if (source_dx >= 0x20000) {
+-    x = 32768;
+-  }
+-  for (int i = 0; i < width; i += 2) {
+-    int y0 = y_buf[x >> 16];
+-    int y1 = y_buf[(x >> 16) + 1];
+-    int u0 = u_buf[(x >> 17)];
+-    int u1 = u_buf[(x >> 17) + 1];
+-    int v0 = v_buf[(x >> 17)];
+-    int v1 = v_buf[(x >> 17) + 1];
+-    int y_frac = (x & 65535);
+-    int uv_frac = ((x >> 1) & 65535);
+-    int y = (y_frac * y1 + (y_frac ^ 65535) * y0) >> 16;
+-    int u = (uv_frac * u1 + (uv_frac ^ 65535) * u0) >> 16;
+-    int v = (uv_frac * v1 + (uv_frac ^ 65535) * v0) >> 16;
+-    YuvPixel(y, u, v, rgb_buf);
+-    x += source_dx;
+-    if ((i + 1) < width) {
+-      y0 = y_buf[x >> 16];
+-      y1 = y_buf[(x >> 16) + 1];
+-      y_frac = (x & 65535);
+-      y = (y_frac * y1 + (y_frac ^ 65535) * y0) >> 16;
+-      YuvPixel(y, u, v, rgb_buf+4);
+-      x += source_dx;
+-    }
+-    rgb_buf += 8;
+-  }
++  LinearScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, source_dx);
+ }
+-
+-#endif  // USE_MMX
++#endif
+ }  // extern "C"
+ 
deleted file mode 100644
--- a/gfx/ycbcr/export.patch
+++ /dev/null
@@ -1,43 +0,0 @@
-diff --git a/gfx/ycbcr/yuv_convert.h b/gfx/ycbcr/yuv_convert.h
-index 6735b77..e624168 100644
---- a/gfx/ycbcr/yuv_convert.h
-+++ b/gfx/ycbcr/yuv_convert.h
-@@ -6,6 +6,7 @@
- #define MEDIA_BASE_YUV_CONVERT_H_
- 
- #include "chromium_types.h"
-+#include "gfxCore.h"
- 
- namespace mozilla {
- 
-@@ -20,18 +21,18 @@ enum YUVType {
- 
- // Convert a frame of YUV to 32 bit ARGB.
- // Pass in YV16/YV12 depending on source format
--void ConvertYCbCrToRGB32(const uint8* yplane,
--                         const uint8* uplane,
--                         const uint8* vplane,
--                         uint8* rgbframe,
--                         int pic_x,
--                         int pic_y,
--                         int pic_width,
--                         int pic_height,
--                         int ystride,
--                         int uvstride,
--                         int rgbstride,
--                         YUVType yuv_type);
-+NS_GFX_(void) ConvertYCbCrToRGB32(const uint8* yplane,
-+                                  const uint8* uplane,
-+                                  const uint8* vplane,
-+                                  uint8* rgbframe,
-+                                  int pic_x,
-+                                  int pic_y,
-+                                  int pic_width,
-+                                  int pic_height,
-+                                  int ystride,
-+                                  int uvstride,
-+                                  int rgbstride,
-+                                  YUVType yuv_type);
- 
- }  // namespace gfx
- }  // namespace mozilla
deleted file mode 100644
--- a/gfx/ycbcr/picture_region.patch
+++ /dev/null
@@ -1,99 +0,0 @@
-diff --git a/gfx/ycbcr/yuv_convert.cpp b/gfx/ycbcr/yuv_convert.cpp
-index c291d5c..ff7267e 100644
---- a/gfx/ycbcr/yuv_convert.cpp
-+++ b/gfx/ycbcr/yuv_convert.cpp
-@@ -25,42 +25,58 @@ namespace mozilla {
- 
- namespace gfx {
- 
- // Convert a frame of YUV to 32 bit ARGB.
- void ConvertYCbCrToRGB32(const uint8* y_buf,
-                          const uint8* u_buf,
-                          const uint8* v_buf,
-                          uint8* rgb_buf,
--                         int width,
--                         int height,
-+                         int pic_x,
-+                         int pic_y,
-+                         int pic_width,
-+                         int pic_height,
-                          int y_pitch,
-                          int uv_pitch,
-                          int rgb_pitch,
-                          YUVType yuv_type) {
-   unsigned int y_shift = yuv_type;
-   bool has_mmx = supports_mmx();
--  for (int y = 0; y < height; ++y) {
--    uint8* rgb_row = rgb_buf + y * rgb_pitch;
--    const uint8* y_ptr = y_buf + y * y_pitch;
--    const uint8* u_ptr = u_buf + (y >> y_shift) * uv_pitch;
--    const uint8* v_ptr = v_buf + (y >> y_shift) * uv_pitch;
-+  bool odd_pic_x = pic_x % 2 != 0;
-+  int x_width = odd_pic_x ? pic_width - 1 : pic_width;
-+
-+  for (int y = pic_y; y < pic_height + pic_y; ++y) {
-+    uint8* rgb_row = rgb_buf + (y - pic_y) * rgb_pitch;
-+    const uint8* y_ptr = y_buf + y * y_pitch + pic_x;
-+    const uint8* u_ptr = u_buf + (y >> y_shift) * uv_pitch + (pic_x >> 1);
-+    const uint8* v_ptr = v_buf + (y >> y_shift) * uv_pitch + (pic_x >> 1);
-+
-+    if (odd_pic_x) {
-+      // Handle the single odd pixel manually and use the
-+      // fast routines for the remaining.
-+      FastConvertYUVToRGB32Row_C(y_ptr++,
-+                                 u_ptr++,
-+                                 v_ptr++,
-+                                 rgb_row,
-+                                 1);
-+      rgb_row += 4;
-+    }
- 
-     if (has_mmx)
-       FastConvertYUVToRGB32Row(y_ptr,
-                                u_ptr,
-                                v_ptr,
-                                rgb_row,
--                               width);
-+                               x_width);
-     else
-       FastConvertYUVToRGB32Row_C(y_ptr,
-                                  u_ptr,
-                                  v_ptr,
-                                  rgb_row,
--                                 width);
-+                                 x_width);
-   }
- 
-   // MMX used for FastConvertYUVToRGB32Row requires emms instruction.
-   if (has_mmx)
-     EMMS();
- }
- 
- // Scale a frame of YUV to 32 bit ARGB.
-diff --git a/gfx/ycbcr/yuv_convert.h b/gfx/ycbcr/yuv_convert.h
-index 9d148a6..77ca8e6 100644
---- a/gfx/ycbcr/yuv_convert.h
-+++ b/gfx/ycbcr/yuv_convert.h
-@@ -32,18 +32,20 @@ enum Rotate {
- };
- 
- // Convert a frame of YUV to 32 bit ARGB.
- // Pass in YV16/YV12 depending on source format
- void ConvertYCbCrToRGB32(const uint8* yplane,
-                          const uint8* uplane,
-                          const uint8* vplane,
-                          uint8* rgbframe,
--                         int frame_width,
--                         int frame_height,
-+                         int pic_x,
-+                         int pic_y,
-+                         int pic_width,
-+                         int pic_height,
-                          int ystride,
-                          int uvstride,
-                          int rgbstride,
-                          YUVType yuv_type);
- 
- // Scale a frame of YUV to 32 bit ARGB.
- // Supports rotation and mirroring.
- void ScaleYCbCrToRGB32(const uint8* yplane,
deleted file mode 100644
--- a/gfx/ycbcr/remove_scale.patch
+++ /dev/null
@@ -1,839 +0,0 @@
-diff --git a/gfx/ycbcr/yuv_convert.cpp b/gfx/ycbcr/yuv_convert.cpp
-index eec578d..de91f79 100644
---- a/gfx/ycbcr/yuv_convert.cpp
-+++ b/gfx/ycbcr/yuv_convert.cpp
-@@ -81,133 +81,5 @@ void ConvertYCbCrToRGB32(const uint8* y_buf,
-     EMMS();
- }
- 
--// Scale a frame of YUV to 32 bit ARGB.
--void ScaleYCbCrToRGB32(const uint8* y_buf,
--                       const uint8* u_buf,
--                       const uint8* v_buf,
--                       uint8* rgb_buf,
--                       int width,
--                       int height,
--                       int scaled_width,
--                       int scaled_height,
--                       int y_pitch,
--                       int uv_pitch,
--                       int rgb_pitch,
--                       YUVType yuv_type,
--                       Rotate view_rotate) {
--  unsigned int y_shift = yuv_type;
--  bool has_mmx = supports_mmx();
--  // Diagram showing origin and direction of source sampling.
--  // ->0   4<-
--  // 7       3
--  //
--  // 6       5
--  // ->1   2<-
--  // Rotations that start at right side of image.
--  if ((view_rotate == ROTATE_180) ||
--      (view_rotate == ROTATE_270) ||
--      (view_rotate == MIRROR_ROTATE_0) ||
--      (view_rotate == MIRROR_ROTATE_90)) {
--    y_buf += width - 1;
--    u_buf += width / 2 - 1;
--    v_buf += width / 2 - 1;
--    width = -width;
--  }
--  // Rotations that start at bottom of image.
--  if ((view_rotate == ROTATE_90) ||
--      (view_rotate == ROTATE_180) ||
--      (view_rotate == MIRROR_ROTATE_90) ||
--      (view_rotate == MIRROR_ROTATE_180)) {
--    y_buf += (height - 1) * y_pitch;
--    u_buf += ((height >> y_shift) - 1) * uv_pitch;
--    v_buf += ((height >> y_shift) - 1) * uv_pitch;
--    height = -height;
--  }
--
--  // Handle zero sized destination.
--  if (scaled_width == 0 || scaled_height == 0)
--    return;
--  int scaled_dx = width * 16 / scaled_width;
--  int scaled_dy = height * 16 / scaled_height;
--
--  int scaled_dx_uv = scaled_dx;
--
--  if ((view_rotate == ROTATE_90) ||
--      (view_rotate == ROTATE_270)) {
--    int tmp = scaled_height;
--    scaled_height = scaled_width;
--    scaled_width = tmp;
--    tmp = height;
--    height = width;
--    width = tmp;
--    int original_dx = scaled_dx;
--    int original_dy = scaled_dy;
--    scaled_dx = ((original_dy >> 4) * y_pitch) << 4;
--    scaled_dx_uv = ((original_dy >> 4) * uv_pitch) << 4;
--    scaled_dy = original_dx;
--    if (view_rotate == ROTATE_90) {
--      y_pitch = -1;
--      uv_pitch = -1;
--      height = -height;
--    } else {
--      y_pitch = 1;
--      uv_pitch = 1;
--    }
--  }
--
--  for (int y = 0; y < scaled_height; ++y) {
--    uint8* dest_pixel = rgb_buf + y * rgb_pitch;
--    int scaled_y = (y * height / scaled_height);
--    const uint8* y_ptr = y_buf + scaled_y * y_pitch;
--    const uint8* u_ptr = u_buf + (scaled_y >> y_shift) * uv_pitch;
--    const uint8* v_ptr = v_buf + (scaled_y >> y_shift) * uv_pitch;
--
--#if defined(_MSC_VER)
--    if (scaled_width == (width * 2)) {
--      DoubleYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
--                          dest_pixel, scaled_width);
--    } else if ((scaled_dx & 15) == 0) {  // Scaling by integer scale factor.
--      if (scaled_dx_uv == scaled_dx) {   // Not rotated.
--        if (scaled_dx == 16) {           // Not scaled
--          if (has_mmx)
--            FastConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
--                                     dest_pixel, scaled_width);
--          else
--            FastConvertYUVToRGB32Row_C(y_ptr, u_ptr, v_ptr,
--                                      dest_pixel, scaled_width);
--        } else {  // Simple scale down. ie half
--          ConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
--                               dest_pixel, scaled_width, scaled_dx >> 4);
--        }
--      } else {
--        RotateConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
--                                   dest_pixel, scaled_width,
--                                   scaled_dx >> 4, scaled_dx_uv >> 4);
--      }
--#else
--    if (scaled_dx == 16) {           // Not scaled
--      if (has_mmx) 
--        FastConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
--                                 dest_pixel, scaled_width);
--      else
--        FastConvertYUVToRGB32Row_C(y_ptr, u_ptr, v_ptr,
--                                   dest_pixel, scaled_width);
--#endif
--    } else {
--      if (has_mmx) 
--        ScaleYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
--                           dest_pixel, scaled_width, scaled_dx);
--      else
--        ScaleYUVToRGB32Row_C(y_ptr, u_ptr, v_ptr,
--                             dest_pixel, scaled_width, scaled_dx);
--
--    }  
--  }
--
--  // MMX used for FastConvertYUVToRGB32Row requires emms instruction.
--  if (has_mmx)
--    EMMS();
--}
--
- }  // namespace gfx
- }  // namespace mozilla
-diff --git a/gfx/ycbcr/yuv_convert.h b/gfx/ycbcr/yuv_convert.h
-index 7962af7..c9bf7e0 100644
---- a/gfx/ycbcr/yuv_convert.h
-+++ b/gfx/ycbcr/yuv_convert.h
-@@ -18,19 +18,6 @@ enum YUVType {
-   YV12 = 1            // YV12 is half width and half height chroma channels.
- };
- 
--// Mirror means flip the image horizontally, as in looking in a mirror.
--// Rotate happens after mirroring.
--enum Rotate {
--  ROTATE_0,           // Rotation off.
--  ROTATE_90,          // Rotate clockwise.
--  ROTATE_180,         // Rotate upside down.
--  ROTATE_270,         // Rotate counter clockwise.
--  MIRROR_ROTATE_0,    // Mirror horizontally.
--  MIRROR_ROTATE_90,   // Mirror then Rotate clockwise.
--  MIRROR_ROTATE_180,  // Mirror vertically.
--  MIRROR_ROTATE_270   // Transpose.
--};
--
- // Convert a frame of YUV to 32 bit ARGB.
- // Pass in YV16/YV12 depending on source format
- void ConvertYCbCrToRGB32(const uint8* yplane,
-@@ -48,22 +35,6 @@ void ConvertYCbCrToRGB32(const uint8* yplane,
-                          int rgbstride,
-                          YUVType yuv_type);
- 
--// Scale a frame of YUV to 32 bit ARGB.
--// Supports rotation and mirroring.
--void ScaleYCbCrToRGB32(const uint8* yplane,
--                       const uint8* uplane,
--                       const uint8* vplane,
--                       uint8* rgbframe,
--                       int frame_width,
--                       int frame_height,
--                       int scaled_width,
--                       int scaled_height,
--                       int ystride,
--                       int uvstride,
--                       int rgbstride,
--                       YUVType yuv_type,
--                       Rotate view_rotate);
--
- }  // namespace gfx
- }  // namespace mozilla
- 
-diff --git a/gfx/ycbcr/yuv_row.h b/gfx/ycbcr/yuv_row.h
-index c43f713..2a82972 100644
---- a/gfx/ycbcr/yuv_row.h
-+++ b/gfx/ycbcr/yuv_row.h
-@@ -28,53 +28,6 @@ void FastConvertYUVToRGB32Row_C(const uint8* y_buf,
-                                 int width);
- 
- 
--// Can do 1x, half size or any scale down by an integer amount.
--// Step can be negative (mirroring, rotate 180).
--// This is the third fastest of the scalers.
--void ConvertYUVToRGB32Row(const uint8* y_buf,
--                          const uint8* u_buf,
--                          const uint8* v_buf,
--                          uint8* rgb_buf,
--                          int width,
--                          int step);
--
--// Rotate is like Convert, but applies different step to Y versus U and V.
--// This allows rotation by 90 or 270, by stepping by stride.
--// This is the forth fastest of the scalers.
--void RotateConvertYUVToRGB32Row(const uint8* y_buf,
--                                const uint8* u_buf,
--                                const uint8* v_buf,
--                                uint8* rgb_buf,
--                                int width,
--                                int ystep,
--                                int uvstep);
--
--// Doubler does 4 pixels at a time.  Each pixel is replicated.
--// This is the fastest of the scalers.
--void DoubleYUVToRGB32Row(const uint8* y_buf,
--                         const uint8* u_buf,
--                         const uint8* v_buf,
--                         uint8* rgb_buf,
--                         int width);
--
--// Handles arbitrary scaling up or down.
--// Mirroring is supported, but not 90 or 270 degree rotation.
--// Chroma is under sampled every 2 pixels for performance.
--// This is the slowest of the scalers.
--void ScaleYUVToRGB32Row(const uint8* y_buf,
--                        const uint8* u_buf,
--                        const uint8* v_buf,
--                        uint8* rgb_buf,
--                        int width,
--                        int scaled_dx);
--
--void ScaleYUVToRGB32Row_C(const uint8* y_buf,
--                          const uint8* u_buf,
--                          const uint8* v_buf,
--                          uint8* rgb_buf,
--                          int width,
--                          int scaled_dx);
--
- }  // extern "C"
- 
- // x64 uses MMX2 (SSE) so emms is not required.
-diff --git a/gfx/ycbcr/yuv_row_c.cpp b/gfx/ycbcr/yuv_row_c.cpp
-index a81416c..d3bdab4 100644
---- a/gfx/ycbcr/yuv_row_c.cpp
-+++ b/gfx/ycbcr/yuv_row_c.cpp
-@@ -172,25 +172,5 @@ void FastConvertYUVToRGB32Row_C(const uint8* y_buf,
-   }
- }
- 
--// 28.4 fixed point is used.  A shift by 4 isolates the integer.
--// A shift by 5 is used to further subsample the chrominence channels.
--// & 15 isolates the fixed point fraction.  >> 2 to get the upper 2 bits,
--// for 1/4 pixel accurate interpolation.
--void ScaleYUVToRGB32Row_C(const uint8* y_buf,
--                        const uint8* u_buf,
--                        const uint8* v_buf,
--                        uint8* rgb_buf,
--                        int width,
--                        int scaled_dx) {
--  int scaled_x = 0;
--  for (int x = 0; x < width; ++x) {
--    uint8 u = u_buf[scaled_x >> 5];
--    uint8 v = v_buf[scaled_x >> 5];
--    uint8 y0 = y_buf[scaled_x >> 4];
--    YuvPixel(y0, u, v, rgb_buf);
--    rgb_buf += 4;
--    scaled_x += scaled_dx;
--  }
--}
- }  // extern "C"
- 
-diff --git a/gfx/ycbcr/yuv_row_linux.cpp b/gfx/ycbcr/yuv_row_linux.cpp
-index 5fb2bc4..ce5ee89 100644
---- a/gfx/ycbcr/yuv_row_linux.cpp
-+++ b/gfx/ycbcr/yuv_row_linux.cpp
-@@ -21,14 +21,6 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf,
-   FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width);
- }
-  
--void ScaleYUVToRGB32Row(const uint8* y_buf,
--                        const uint8* u_buf,
--                        const uint8* v_buf,
--                        uint8* rgb_buf,
--                        int width,
--                        int scaled_dx) {
--  ScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, scaled_dx);
--}
- #else
- 
- #define RGBY(i) { \
-@@ -315,75 +307,6 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf,  // rdi
- );
- }
- 
--void ScaleYUVToRGB32Row(const uint8* y_buf,  // rdi
--                        const uint8* u_buf,  // rsi
--                        const uint8* v_buf,  // rdx
--                        uint8* rgb_buf,      // rcx
--                        int width,           // r8
--                        int scaled_dx) {     // r9
--  asm(
--  "xor    %%r11,%%r11\n"
--  "sub    $0x2,%4\n"
--  "js     scalenext\n"
--
--"scaleloop:"
--  "mov    %%r11,%%r10\n"
--  "sar    $0x5,%%r10\n"
--  "movzb  (%1,%%r10,1),%%rax\n"
--  "movq   2048(%5,%%rax,8),%%xmm0\n"
--  "movzb  (%2,%%r10,1),%%rax\n"
--  "movq   4096(%5,%%rax,8),%%xmm1\n"
--  "lea    (%%r11,%6),%%r10\n"
--  "sar    $0x4,%%r11\n"
--  "movzb  (%0,%%r11,1),%%rax\n"
--  "paddsw %%xmm1,%%xmm0\n"
--  "movq   (%5,%%rax,8),%%xmm1\n"
--  "lea    (%%r10,%6),%%r11\n"
--  "sar    $0x4,%%r10\n"
--  "movzb  (%0,%%r10,1),%%rax\n"
--  "movq   (%5,%%rax,8),%%xmm2\n"
--  "paddsw %%xmm0,%%xmm1\n"
--  "paddsw %%xmm0,%%xmm2\n"
--  "shufps $0x44,%%xmm2,%%xmm1\n"
--  "psraw  $0x6,%%xmm1\n"
--  "packuswb %%xmm1,%%xmm1\n"
--  "movq   %%xmm1,0x0(%3)\n"
--  "add    $0x8,%3\n"
--  "sub    $0x2,%4\n"
--  "jns    scaleloop\n"
--
--"scalenext:"
--  "add    $0x1,%4\n"
--  "js     scaledone\n"
--
--  "mov    %%r11,%%r10\n"
--  "sar    $0x5,%%r10\n"
--  "movzb  (%1,%%r10,1),%%rax\n"
--  "movq   2048(%5,%%rax,8),%%xmm0\n"
--  "movzb  (%2,%%r10,1),%%rax\n"
--  "movq   4096(%5,%%rax,8),%%xmm1\n"
--  "paddsw %%xmm1,%%xmm0\n"
--  "sar    $0x4,%%r11\n"
--  "movzb  (%0,%%r11,1),%%rax\n"
--  "movq   (%5,%%rax,8),%%xmm1\n"
--  "paddsw %%xmm0,%%xmm1\n"
--  "psraw  $0x6,%%xmm1\n"
--  "packuswb %%xmm1,%%xmm1\n"
--  "movd   %%xmm1,0x0(%3)\n"
--
--"scaledone:"
--  :
--  : "r"(y_buf),  // %0
--    "r"(u_buf),  // %1
--    "r"(v_buf),  // %2
--    "r"(rgb_buf),  // %3
--    "r"(width),  // %4
--    "r" (kCoefficientsRgbY),  // %5
--    "r"(static_cast<long>(scaled_dx))  // %6
--  : "memory", "r10", "r11", "rax", "xmm0", "xmm1", "xmm2"
--);
--}
--
- #else
- 
- void FastConvertYUVToRGB32Row(const uint8* y_buf,
-@@ -443,81 +366,6 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf,
-   "ret\n"
- );
- 
--
--void ScaleYUVToRGB32Row(const uint8* y_buf,
--                        const uint8* u_buf,
--                        const uint8* v_buf,
--                        uint8* rgb_buf,
--                        int width,
--                        int scaled_dx);
--
--  asm(
--  ".global ScaleYUVToRGB32Row\n"
--"ScaleYUVToRGB32Row:\n"
--  "pusha\n"
--  "mov    0x24(%esp),%edx\n"
--  "mov    0x28(%esp),%edi\n"
--  "mov    0x2c(%esp),%esi\n"
--  "mov    0x30(%esp),%ebp\n"
--  "mov    0x34(%esp),%ecx\n"
--  "xor    %ebx,%ebx\n"
--  "jmp    scaleend\n"
--
--"scaleloop:"
--  "mov    %ebx,%eax\n"
--  "sar    $0x5,%eax\n"
--  "movzbl (%edi,%eax,1),%eax\n"
--  "movq   kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
--  "mov    %ebx,%eax\n"
--  "sar    $0x5,%eax\n"
--  "movzbl (%esi,%eax,1),%eax\n"
--  "paddsw kCoefficientsRgbY+4096(,%eax,8),%mm0\n"
--  "mov    %ebx,%eax\n"
--  "add    0x38(%esp),%ebx\n"
--  "sar    $0x4,%eax\n"
--  "movzbl (%edx,%eax,1),%eax\n"
--  "movq   kCoefficientsRgbY(,%eax,8),%mm1\n"
--  "mov    %ebx,%eax\n"
--  "add    0x38(%esp),%ebx\n"
--  "sar    $0x4,%eax\n"
--  "movzbl (%edx,%eax,1),%eax\n"
--  "movq   kCoefficientsRgbY(,%eax,8),%mm2\n"
--  "paddsw %mm0,%mm1\n"
--  "paddsw %mm0,%mm2\n"
--  "psraw  $0x6,%mm1\n"
--  "psraw  $0x6,%mm2\n"
--  "packuswb %mm2,%mm1\n"
--  "movntq %mm1,0x0(%ebp)\n"
--  "add    $0x8,%ebp\n"
--"scaleend:"
--  "sub    $0x2,%ecx\n"
--  "jns    scaleloop\n"
--
--  "and    $0x1,%ecx\n"
--  "je     scaledone\n"
--
--  "mov    %ebx,%eax\n"
--  "sar    $0x5,%eax\n"
--  "movzbl (%edi,%eax,1),%eax\n"
--  "movq   kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
--  "mov    %ebx,%eax\n"
--  "sar    $0x5,%eax\n"
--  "movzbl (%esi,%eax,1),%eax\n"
--  "paddsw kCoefficientsRgbY+4096(,%eax,8),%mm0\n"
--  "mov    %ebx,%eax\n"
--  "sar    $0x4,%eax\n"
--  "movzbl (%edx,%eax,1),%eax\n"
--  "movq   kCoefficientsRgbY(,%eax,8),%mm1\n"
--  "paddsw %mm0,%mm1\n"
--  "psraw  $0x6,%mm1\n"
--  "packuswb %mm1,%mm1\n"
--  "movd   %mm1,0x0(%ebp)\n"
--
--"scaledone:"
--  "popa\n"
--  "ret\n"
--);
--
- #endif
- #endif // ARCH_CPU_ARM_FAMILY
- }  // extern "C"
-diff --git a/gfx/ycbcr/yuv_row_mac.cpp b/gfx/ycbcr/yuv_row_mac.cpp
-index a7e8243..3515ada 100644
---- a/gfx/ycbcr/yuv_row_mac.cpp
-+++ b/gfx/ycbcr/yuv_row_mac.cpp
-@@ -18,14 +18,6 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf,
-   FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width);
- }
-  
--void ScaleYUVToRGB32Row(const uint8* y_buf,
--                        const uint8* u_buf,
--                        const uint8* v_buf,
--                        uint8* rgb_buf,
--                        int width,
--                        int scaled_dx) {
--  ScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, scaled_dx);
--}
- #else
- 
- #define RGBY(i) { \
-@@ -323,91 +315,6 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf,
-                           &kCoefficientsRgbY[0][0]);
- }
- 
--extern void MacScaleYUVToRGB32Row(const uint8* y_buf,
--                               const uint8* u_buf,
--                               const uint8* v_buf,
--                               uint8* rgb_buf,
--                               int width,
--                               int scaled_dx,
--                               int16 *kCoefficientsRgbY);
--
--  __asm__(
--"_MacScaleYUVToRGB32Row:\n"
--  "pusha\n"
--  "mov    0x24(%esp),%edx\n"
--  "mov    0x28(%esp),%edi\n"
--  "mov    0x2c(%esp),%esi\n"
--  "mov    0x30(%esp),%ebp\n"
--  "mov    0x3c(%esp),%ecx\n"
--  "xor    %ebx,%ebx\n"
--  "jmp    Lscaleend\n"
--
--"Lscaleloop:"
--  "mov    %ebx,%eax\n"
--  "sar    $0x5,%eax\n"
--  "movzbl (%edi,%eax,1),%eax\n"
--  "movq   2048(%ecx,%eax,8),%mm0\n"
--  "mov    %ebx,%eax\n"
--  "sar    $0x5,%eax\n"
--  "movzbl (%esi,%eax,1),%eax\n"
--  "paddsw 4096(%ecx,%eax,8),%mm0\n"
--  "mov    %ebx,%eax\n"
--  "add    0x38(%esp),%ebx\n"
--  "sar    $0x4,%eax\n"
--  "movzbl (%edx,%eax,1),%eax\n"
--  "movq   0(%ecx,%eax,8),%mm1\n"
--  "mov    %ebx,%eax\n"
--  "add    0x38(%esp),%ebx\n"
--  "sar    $0x4,%eax\n"
--  "movzbl (%edx,%eax,1),%eax\n"
--  "movq   0(%ecx,%eax,8),%mm2\n"
--  "paddsw %mm0,%mm1\n"
--  "paddsw %mm0,%mm2\n"
--  "psraw  $0x6,%mm1\n"
--  "psraw  $0x6,%mm2\n"
--  "packuswb %mm2,%mm1\n"
--  "movntq %mm1,0x0(%ebp)\n"
--  "add    $0x8,%ebp\n"
--"Lscaleend:"
--  "sub    $0x2,0x34(%esp)\n"
--  "jns    Lscaleloop\n"
--
--  "and    $0x1,0x34(%esp)\n"
--  "je     Lscaledone\n"
--
--  "mov    %ebx,%eax\n"
--  "sar    $0x5,%eax\n"
--  "movzbl (%edi,%eax,1),%eax\n"
--  "movq   2048(%ecx,%eax,8),%mm0\n"
--  "mov    %ebx,%eax\n"
--  "sar    $0x5,%eax\n"
--  "movzbl (%esi,%eax,1),%eax\n"
--  "paddsw 4096(%ecx,%eax,8),%mm0\n"
--  "mov    %ebx,%eax\n"
--  "sar    $0x4,%eax\n"
--  "movzbl (%edx,%eax,1),%eax\n"
--  "movq   0(%ecx,%eax,8),%mm1\n"
--  "paddsw %mm0,%mm1\n"
--  "psraw  $0x6,%mm1\n"
--  "packuswb %mm1,%mm1\n"
--  "movd   %mm1,0x0(%ebp)\n"
--
--"Lscaledone:"
--  "popa\n"
--  "ret\n"
--);
--
--
--void ScaleYUVToRGB32Row(const uint8* y_buf,
--                        const uint8* u_buf,
--                        const uint8* v_buf,
--                        uint8* rgb_buf,
--                        int width,
--                        int scaled_dx) {
--
--  MacScaleYUVToRGB32Row(y_buf, u_buf, v_buf, rgb_buf, width, scaled_dx,
--                        &kCoefficientsRgbY[0][0]);
--}
- #endif // ARCH_CPU_PPC 
- }  // extern "C"
- 
-diff --git a/gfx/ycbcr/yuv_row_win.cpp b/gfx/ycbcr/yuv_row_win.cpp
-index a77a16f..f994931 100644
---- a/gfx/ycbcr/yuv_row_win.cpp
-+++ b/gfx/ycbcr/yuv_row_win.cpp
-@@ -297,273 +297,5 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf,
-   }
- }
- 
--__declspec(naked)
--void ConvertYUVToRGB32Row(const uint8* y_buf,
--                          const uint8* u_buf,
--                          const uint8* v_buf,
--                          uint8* rgb_buf,
--                          int width,
--                          int step) {
--  __asm {
--    pushad
--    mov       edx, [esp + 32 + 4]   // Y
--    mov       edi, [esp + 32 + 8]   // U
--    mov       esi, [esp + 32 + 12]  // V
--    mov       ebp, [esp + 32 + 16]  // rgb
--    mov       ecx, [esp + 32 + 20]  // width
--    mov       ebx, [esp + 32 + 24]  // step
--    jmp       wend
--
-- wloop :
--    movzx     eax, byte ptr [edi]
--    add       edi, ebx
--    movq      mm0, [kCoefficientsRgbU + 8 * eax]
--    movzx     eax, byte ptr [esi]
--    add       esi, ebx
--    paddsw    mm0, [kCoefficientsRgbV + 8 * eax]
--    movzx     eax, byte ptr [edx]
--    add       edx, ebx
--    movq      mm1, [kCoefficientsRgbY + 8 * eax]
--    movzx     eax, byte ptr [edx]
--    add       edx, ebx
--    movq      mm2, [kCoefficientsRgbY + 8 * eax]
--    paddsw    mm1, mm0
--    paddsw    mm2, mm0
--    psraw     mm1, 6
--    psraw     mm2, 6
--    packuswb  mm1, mm2
--    movntq    [ebp], mm1
--    add       ebp, 8
-- wend :
--    sub       ecx, 2
--    jns       wloop
--
--    and       ecx, 1  // odd number of pixels?
--    jz        wdone
--
--    movzx     eax, byte ptr [edi]
--    movq      mm0, [kCoefficientsRgbU + 8 * eax]
--    movzx     eax, byte ptr [esi]
--    paddsw    mm0, [kCoefficientsRgbV + 8 * eax]
--    movzx     eax, byte ptr [edx]
--    movq      mm1, [kCoefficientsRgbY + 8 * eax]
--    paddsw    mm1, mm0
--    psraw     mm1, 6
--    packuswb  mm1, mm1
--    movd      [ebp], mm1
-- wdone :
--
--    popad
--    ret
--  }
--}
--
--__declspec(naked)
--void RotateConvertYUVToRGB32Row(const uint8* y_buf,
--                                const uint8* u_buf,
--                                const uint8* v_buf,
--                                uint8* rgb_buf,
--                                int width,
--                                int ystep,
--                                int uvstep) {
--  __asm {
--    pushad
--    mov       edx, [esp + 32 + 4]   // Y
--    mov       edi, [esp + 32 + 8]   // U
--    mov       esi, [esp + 32 + 12]  // V
--    mov       ebp, [esp + 32 + 16]  // rgb
--    mov       ecx, [esp + 32 + 20]  // width
--    jmp       wend
--
-- wloop :
--    movzx     eax, byte ptr [edi]
--    mov       ebx, [esp + 32 + 28]  // uvstep
--    add       edi, ebx
--    movq      mm0, [kCoefficientsRgbU + 8 * eax]
--    movzx     eax, byte ptr [esi]
--    add       esi, ebx
--    paddsw    mm0, [kCoefficientsRgbV + 8 * eax]
--    movzx     eax, byte ptr [edx]
--    mov       ebx, [esp + 32 + 24]  // ystep
--    add       edx, ebx
--    movq      mm1, [kCoefficientsRgbY + 8 * eax]
--    movzx     eax, byte ptr [edx]
--    add       edx, ebx
--    movq      mm2, [kCoefficientsRgbY + 8 * eax]
--    paddsw    mm1, mm0
--    paddsw    mm2, mm0
--    psraw     mm1, 6
--    psraw     mm2, 6
--    packuswb  mm1, mm2
--    movntq    [ebp], mm1
--    add       ebp, 8
-- wend :
--    sub       ecx, 2
--    jns       wloop
--
--    and       ecx, 1  // odd number of pixels?
--    jz        wdone
--
--    movzx     eax, byte ptr [edi]
--    movq      mm0, [kCoefficientsRgbU + 8 * eax]
--    movzx     eax, byte ptr [esi]
--    paddsw    mm0, [kCoefficientsRgbV + 8 * eax]
--    movzx     eax, byte ptr [edx]
--    movq      mm1, [kCoefficientsRgbY + 8 * eax]
--    paddsw    mm1, mm0
--    psraw     mm1, 6
--    packuswb  mm1, mm1
--    movd      [ebp], mm1
-- wdone :
--
--    popad
--    ret
--  }
--}
--
--__declspec(naked)
--void DoubleYUVToRGB32Row(const uint8* y_buf,
--                         const uint8* u_buf,
--                         const uint8* v_buf,
--                         uint8* rgb_buf,
--                         int width) {
--  __asm {
--    pushad
--    mov       edx, [esp + 32 + 4]   // Y
--    mov       edi, [esp + 32 + 8]   // U
--    mov       esi, [esp + 32 + 12]  // V
--    mov       ebp, [esp + 32 + 16]  // rgb
--    mov       ecx, [esp + 32 + 20]  // width
--    jmp       wend
--
-- wloop :
--    movzx     eax, byte ptr [edi]
--    add       edi, 1
--    movzx     ebx, byte ptr [esi]
--    add       esi, 1
--    movq      mm0, [kCoefficientsRgbU + 8 * eax]
--    movzx     eax, byte ptr [edx]
--    paddsw    mm0, [kCoefficientsRgbV + 8 * ebx]
--    movq      mm1, [kCoefficientsRgbY + 8 * eax]
--    paddsw    mm1, mm0
--    psraw     mm1, 6
--    packuswb  mm1, mm1
--    punpckldq mm1, mm1
--    movntq    [ebp], mm1
--
--    movzx     ebx, byte ptr [edx + 1]
--    add       edx, 2
--    paddsw    mm0, [kCoefficientsRgbY + 8 * ebx]
--    psraw     mm0, 6
--    packuswb  mm0, mm0
--    punpckldq mm0, mm0
--    movntq    [ebp+8], mm0
--    add       ebp, 16
-- wend :
--    sub       ecx, 4
--    jns       wloop
--
--    add       ecx, 4
--    jz        wdone
--
--    movzx     eax, byte ptr [edi]
--    movq      mm0, [kCoefficientsRgbU + 8 * eax]
--    movzx     eax, byte ptr [esi]
--    paddsw    mm0, [kCoefficientsRgbV + 8 * eax]
--    movzx     eax, byte ptr [edx]
--    movq      mm1, [kCoefficientsRgbY + 8 * eax]
--    paddsw    mm1, mm0
--    psraw     mm1, 6
--    packuswb  mm1, mm1
--    jmp       wend1
--
-- wloop1 :
--    movd      [ebp], mm1
--    add       ebp, 4
-- wend1 :
--    sub       ecx, 1
--    jns       wloop1
-- wdone :
--    popad
--    ret
--  }
--}
--
--// This version does general purpose scaling by any amount, up or down.
--// The only thing it can not do it rotation by 90 or 270.
--// For performance the chroma is under sampled, reducing cost of a 3x
--// 1080p scale from 8.4 ms to 5.4 ms.
--__declspec(naked)
--void ScaleYUVToRGB32Row(const uint8* y_buf,
--                        const uint8* u_buf,
--                        const uint8* v_buf,
--                        uint8* rgb_buf,
--                        int width,
--                        int dx) {
--  __asm {
--    pushad
--    mov       edx, [esp + 32 + 4]   // Y
--    mov       edi, [esp + 32 + 8]   // U
--    mov       esi, [esp + 32 + 12]  // V
--    mov       ebp, [esp + 32 + 16]  // rgb
--    mov       ecx, [esp + 32 + 20]  // width
--    xor       ebx, ebx              // x
--    jmp       scaleend
--
-- scaleloop :
--    mov       eax, ebx
--    sar       eax, 5
--    movzx     eax, byte ptr [edi + eax]
--    movq      mm0, [kCoefficientsRgbU + 8 * eax]
--    mov       eax, ebx
--    sar       eax, 5
--    movzx     eax, byte ptr [esi + eax]
--    paddsw    mm0, [kCoefficientsRgbV + 8 * eax]
--    mov       eax, ebx
--    add       ebx, [esp + 32 + 24]  // x += dx
--    sar       eax, 4
--    movzx     eax, byte ptr [edx + eax]
--    movq      mm1, [kCoefficientsRgbY + 8 * eax]
--    mov       eax, ebx
--    add       ebx, [esp + 32 + 24]  // x += dx
--    sar       eax, 4
--    movzx     eax, byte ptr [edx + eax]
--    movq      mm2, [kCoefficientsRgbY + 8 * eax]
--    paddsw    mm1, mm0
--    paddsw    mm2, mm0
--    psraw     mm1, 6
--    psraw     mm2, 6
--    packuswb  mm1, mm2
--    movntq    [ebp], mm1
--    add       ebp, 8
-- scaleend :
--    sub       ecx, 2
--    jns       scaleloop
--
--    and       ecx, 1  // odd number of pixels?
--    jz        scaledone
--
--    mov       eax, ebx
--    sar       eax, 5
--    movzx     eax, byte ptr [edi + eax]
--    movq      mm0, [kCoefficientsRgbU + 8 * eax]
--    mov       eax, ebx
--    sar       eax, 5
--    movzx     eax, byte ptr [esi + eax]
--    paddsw    mm0, [kCoefficientsRgbV + 8 * eax]
--    mov       eax, ebx
--    sar       eax, 4
--    movzx     eax, byte ptr [edx + eax]
--    movq      mm1, [kCoefficientsRgbY + 8 * eax]
--    paddsw    mm1, mm0
--    psraw     mm1, 6
--    packuswb  mm1, mm1
--    movd      [ebp], mm1
--
-- scaledone :
--    popad
--    ret
--  }
--}
- }  // extern "C"
- 
deleted file mode 100644
--- a/gfx/ycbcr/row_c_fix.patch
+++ /dev/null
@@ -1,23 +0,0 @@
-diff --git a/gfx/ycbcr/yuv_row_c.cpp b/gfx/ycbcr/yuv_row_c.cpp
-index 36d9bda..b5c0018 100644
---- a/gfx/ycbcr/yuv_row_c.cpp
-+++ b/gfx/ycbcr/yuv_row_c.cpp
-@@ -142,17 +142,17 @@ static inline void YuvPixel(uint8 y,
-                             uint8* rgb_buf) {
-   int32 d = static_cast<int32>(u) - 128;
-   int32 e = static_cast<int32>(v) - 128;
- 
-   int32 cb =   (516 * d + 128);
-   int32 cg = (- 100 * d - 208 * e + 128);
-   int32 cr =             (409 * e + 128);
- 
--  int32 C298a = ((static_cast<int32>(y) - 16) * 298 + 128);
-+  int32 C298a = ((static_cast<int32>(y) - 16) * 298);
-   *reinterpret_cast<uint32*>(rgb_buf) = (clip(C298a + cb)) |
-                                         (clip(C298a + cg) << 8) |
-                                         (clip(C298a + cr) << 16) |
-                                         (0xff000000);
- }
- 
- void FastConvertYUVToRGB32Row_C(const uint8* y_buf,
-                               const uint8* u_buf,
--- a/gfx/ycbcr/update.sh
+++ b/gfx/ycbcr/update.sh
@@ -1,18 +1,9 @@
 # update.sh <chromium-src-directory>
 cp $1/media/base/yuv_convert.h .
 cp $1/media/base/yuv_convert.cc yuv_convert.cpp
 cp $1/media/base/yuv_row.h .
-cp $1/media/base/yuv_row_linux.cc yuv_row_linux.cpp
-cp $1/media/base/yuv_row_mac.cc yuv_row_mac.cpp
+cp $1/media/base/yuv_row_table.cc yuv_row_table.cpp
+cp $1/media/base/yuv_row_posix.cc yuv_row_posix.cpp
 cp $1/media/base/yuv_row_win.cc yuv_row_win.cpp
-cp $1/media/base/yuv_row_linux.cc yuv_row_c.cpp
+cp $1/media/base/yuv_row_posix.cc yuv_row_c.cpp
 patch -p3 <convert.patch
-patch -p3 <picture_region.patch
-patch -p3 <remove_scale.patch
-patch -p3 <export.patch
-patch -p3 <win64_mac64.patch
-patch -p3 <yv24.patch
-patch -p3 <row_c_fix.patch
-patch -p3 <bug572034_mac_64bit.patch
-patch -p3 <bug577645_movntq.patch
-patch -p3 <add_scale.patch
deleted file mode 100644
--- a/gfx/ycbcr/win64_mac64.patch
+++ /dev/null
@@ -1,57 +0,0 @@
-diff --git a/gfx/ycbcr/yuv_row_mac.cpp b/gfx/ycbcr/yuv_row_mac.cpp
-index 351466c..2a679cc 100644
---- a/gfx/ycbcr/yuv_row_mac.cpp
-+++ b/gfx/ycbcr/yuv_row_mac.cpp
-@@ -8,7 +8,10 @@
- 
- extern "C" {
- 
--#if defined(ARCH_CPU_PPC)
-+// PPC and 64 Bit builds use the C fallback. Optimized code
-+// needs to be fixed for 64 bit builds. PPC has no optimized code
-+// option at all.
-+#if defined(ARCH_CPU_PPC) || defined(ARCH_CPU_64_BITS)
- // PPC implementation uses C fallback
- void FastConvertYUVToRGB32Row(const uint8* y_buf,
-                               const uint8* u_buf,
-@@ -315,6 +318,6 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf,
-                           &kCoefficientsRgbY[0][0]);
- }
- 
--#endif // ARCH_CPU_PPC 
-+#endif // ARCH_CPU_PPC || ARCH_CPU_64_BITS
- }  // extern "C"
- 
-diff --git a/gfx/ycbcr/yuv_row_win.cpp b/gfx/ycbcr/yuv_row_win.cpp
-index f994931..708ef14 100644
---- a/gfx/ycbcr/yuv_row_win.cpp
-+++ b/gfx/ycbcr/yuv_row_win.cpp
-@@ -5,6 +5,21 @@
- #include "yuv_row.h"
- 
- extern "C" {
-+// 64 Bit builds use the C fallback. Optimized code
-+// needs to be fixed for 64 bit builds.
-+#if defined(ARCH_CPU_64_BITS)
-+// PPC implementation uses C fallback
-+void FastConvertYUVToRGB32Row(const uint8* y_buf,
-+                              const uint8* u_buf,
-+                              const uint8* v_buf,
-+                              uint8* rgb_buf,
-+                              int width) {
-+  FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width);
-+}
-+ 
-+#else
-+
-+
- #define RGBY(i) { \
-   static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
-   static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
-@@ -297,5 +312,6 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf,
-   }
- }
- 
-+#endif // ARCH_CPU_64_BITS
- }  // extern "C"
- 
--- a/gfx/ycbcr/yuv_convert.cpp
+++ b/gfx/ycbcr/yuv_convert.cpp
@@ -1,9 +1,9 @@
-// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 
 // This webpage shows layout of YV12 and other YUV formats
 // http://www.fourcc.org/yuv.php
 // The actual conversion is best described here
 // http://en.wikipedia.org/wiki/YUV
 // An article on optimizing YUV conversion using tables instead of multiplies
@@ -15,21 +15,28 @@
 //
 // ARGB pixel format is output, which on little endian is stored as BGRA.
 // The alpha is set to 255, allowing the application to use RGBA or RGB32.
 
 #include "yuv_convert.h"
 
 // Header for low level row functions.
 #include "yuv_row.h"
+#define MOZILLA_SSE_INCLUDE_HEADER_FOR_SSE2
+#define MOZILLA_SSE_INCLUDE_HEADER_FOR_MMX
 #include "mozilla/SSE.h"
 
 namespace mozilla {
 
 namespace gfx {
+ 
+// 16.16 fixed point arithmetic
+const int kFractionBits = 16;
+const int kFractionMax = 1 << kFractionBits;
+const int kFractionMask = ((1 << kFractionBits) - 1);
 
 // Convert a frame of YUV to 32 bit ARGB.
 NS_GFX_(void) ConvertYCbCrToRGB32(const uint8* y_buf,
                                   const uint8* u_buf,
                                   const uint8* v_buf,
                                   uint8* rgb_buf,
                                   int pic_x,
                                   int pic_y,
@@ -62,161 +69,308 @@ NS_GFX_(void) ConvertYCbCrToRGB32(const 
                                  u_ptr++,
                                  v_ptr++,
                                  rgb_row,
                                  1,
                                  x_shift);
       rgb_row += 4;
     }
 
-    if (has_sse)
+    if (has_sse) {
       FastConvertYUVToRGB32Row(y_ptr,
                                u_ptr,
                                v_ptr,
                                rgb_row,
                                x_width);
-    else
+    }
+    else {
       FastConvertYUVToRGB32Row_C(y_ptr,
                                  u_ptr,
                                  v_ptr,
                                  rgb_row,
                                  x_width,
                                  x_shift);
+    }
   }
 
-#ifdef ARCH_CPU_X86_FAMILY
-  // SSE used for FastConvertYUVToRGB32Row requires emms instruction.
+  // MMX used for FastConvertYUVToRGB32Row requires emms instruction.
   if (has_sse)
     EMMS();
+}
+
+#if defined(MOZILLA_COMPILE_WITH_SSE2)
+// FilterRows combines two rows of the image using linear interpolation.
+// SSE2 version does 16 pixels at a time
+static void FilterRows(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr,
+                       int source_width, int source_y_fraction) {
+  __m128i zero = _mm_setzero_si128();
+  __m128i y1_fraction = _mm_set1_epi16(source_y_fraction);
+  __m128i y0_fraction = _mm_set1_epi16(256 - source_y_fraction);
+
+  const __m128i* y0_ptr128 = reinterpret_cast<const __m128i*>(y0_ptr);
+  const __m128i* y1_ptr128 = reinterpret_cast<const __m128i*>(y1_ptr);
+  __m128i* dest128 = reinterpret_cast<__m128i*>(ybuf);
+  __m128i* end128 = reinterpret_cast<__m128i*>(ybuf + source_width);
+
+  do {
+    __m128i y0 = _mm_loadu_si128(y0_ptr128);
+    __m128i y1 = _mm_loadu_si128(y1_ptr128);
+    __m128i y2 = _mm_unpackhi_epi8(y0, zero);
+    __m128i y3 = _mm_unpackhi_epi8(y1, zero);
+    y0 = _mm_unpacklo_epi8(y0, zero);
+    y1 = _mm_unpacklo_epi8(y1, zero);
+    y0 = _mm_mullo_epi16(y0, y0_fraction);
+    y1 = _mm_mullo_epi16(y1, y1_fraction);
+    y2 = _mm_mullo_epi16(y2, y0_fraction);
+    y3 = _mm_mullo_epi16(y3, y1_fraction);
+    y0 = _mm_add_epi16(y0, y1);
+    y2 = _mm_add_epi16(y2, y3);
+    y0 = _mm_srli_epi16(y0, 8);
+    y2 = _mm_srli_epi16(y2, 8);
+    y0 = _mm_packus_epi16(y0, y2);
+    *dest128++ = y0;
+    ++y0_ptr128;
+    ++y1_ptr128;
+  } while (dest128 < end128);
+}
+#elif defined(MOZILLA_COMPILE_WITH_MMX)
+// MMX version does 8 pixels at a time
+static void FilterRows(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr,
+                       int source_width, int source_y_fraction) {
+  __m64 zero = _mm_setzero_si64();
+  __m64 y1_fraction = _mm_set1_pi16(source_y_fraction);
+  __m64 y0_fraction = _mm_set1_pi16(256 - source_y_fraction);
+
+  const __m64* y0_ptr64 = reinterpret_cast<const __m64*>(y0_ptr);
+  const __m64* y1_ptr64 = reinterpret_cast<const __m64*>(y1_ptr);
+  __m64* dest64 = reinterpret_cast<__m64*>(ybuf);
+  __m64* end64 = reinterpret_cast<__m64*>(ybuf + source_width);
+
+  do {
+    __m64 y0 = *y0_ptr64++;
+    __m64 y1 = *y1_ptr64++;
+    __m64 y2 = _mm_unpackhi_pi8(y0, zero);
+    __m64 y3 = _mm_unpackhi_pi8(y1, zero);
+    y0 = _mm_unpacklo_pi8(y0, zero);
+    y1 = _mm_unpacklo_pi8(y1, zero);
+    y0 = _mm_mullo_pi16(y0, y0_fraction);
+    y1 = _mm_mullo_pi16(y1, y1_fraction);
+    y2 = _mm_mullo_pi16(y2, y0_fraction);
+    y3 = _mm_mullo_pi16(y3, y1_fraction);
+    y0 = _mm_add_pi16(y0, y1);
+    y2 = _mm_add_pi16(y2, y3);
+    y0 = _mm_srli_pi16(y0, 8);
+    y2 = _mm_srli_pi16(y2, 8);
+    y0 = _mm_packs_pu16(y0, y2);
+    *dest64++ = y0;
+  } while (dest64 < end64);
+}
+#else  // no MMX or SSE2
+// C version does 8 at a time to mimic MMX code
+static void FilterRows(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr,
+                       int source_width, int source_y_fraction) {
+  int y1_fraction = source_y_fraction;
+  int y0_fraction = 256 - y1_fraction;
+  uint8* end = ybuf + source_width;
+  do {
+    ybuf[0] = (y0_ptr[0] * y0_fraction + y1_ptr[0] * y1_fraction) >> 8;
+    ybuf[1] = (y0_ptr[1] * y0_fraction + y1_ptr[1] * y1_fraction) >> 8;
+    ybuf[2] = (y0_ptr[2] * y0_fraction + y1_ptr[2] * y1_fraction) >> 8;
+    ybuf[3] = (y0_ptr[3] * y0_fraction + y1_ptr[3] * y1_fraction) >> 8;
+    ybuf[4] = (y0_ptr[4] * y0_fraction + y1_ptr[4] * y1_fraction) >> 8;
+    ybuf[5] = (y0_ptr[5] * y0_fraction + y1_ptr[5] * y1_fraction) >> 8;
+    ybuf[6] = (y0_ptr[6] * y0_fraction + y1_ptr[6] * y1_fraction) >> 8;
+    ybuf[7] = (y0_ptr[7] * y0_fraction + y1_ptr[7] * y1_fraction) >> 8;
+    y0_ptr += 8;
+    y1_ptr += 8;
+    ybuf += 8;
+  } while (ybuf < end);
+}
 #endif
-}
 
 // Scale a frame of YUV to 32 bit ARGB.
 NS_GFX_(void) ScaleYCbCrToRGB32(const uint8* y_buf,
                                 const uint8* u_buf,
                                 const uint8* v_buf,
                                 uint8* rgb_buf,
+                                int source_width,
+                                int source_height,
                                 int width,
                                 int height,
-                                int scaled_width,
-                                int scaled_height,
                                 int y_pitch,
                                 int uv_pitch,
                                 int rgb_pitch,
                                 YUVType yuv_type,
-                                Rotate view_rotate) {
+                                Rotate view_rotate,
+                                ScaleFilter filter) {
+  bool has_mmx = supports_mmx();
+
+  // 4096 allows 3 buffers to fit in 12k.
+  // Helps performance on CPU with 16K L1 cache.
+  // Large enough for 3830x2160 and 30" displays which are 2560x1600.
+  const int kFilterBufferSize = 4096;
+  // Disable filtering if the screen is too big (to avoid buffer overflows).
+  // This should never happen to regular users: they don't have monitors
+  // wider than 4096 pixels.
+  // TODO(fbarchard): Allow rotated videos to filter.
+  if (source_width > kFilterBufferSize || view_rotate)
+    filter = FILTER_NONE;
+
   unsigned int y_shift = yuv_type == YV12 ? 1 : 0;
-  unsigned int x_shift = yuv_type == YV24 ? 0 : 1;
-  bool has_mmx = supports_mmx();
   // Diagram showing origin and direction of source sampling.
   // ->0   4<-
   // 7       3
   //
   // 6       5
   // ->1   2<-
   // Rotations that start at right side of image.
   if ((view_rotate == ROTATE_180) ||
       (view_rotate == ROTATE_270) ||
       (view_rotate == MIRROR_ROTATE_0) ||
       (view_rotate == MIRROR_ROTATE_90)) {
-    y_buf += width - 1;
-    u_buf += width / 2 - 1;
-    v_buf += width / 2 - 1;
-    width = -width;
+    y_buf += source_width - 1;
+    u_buf += source_width / 2 - 1;
+    v_buf += source_width / 2 - 1;
+    source_width = -source_width;
   }
   // Rotations that start at bottom of image.
   if ((view_rotate == ROTATE_90) ||
       (view_rotate == ROTATE_180) ||
       (view_rotate == MIRROR_ROTATE_90) ||
       (view_rotate == MIRROR_ROTATE_180)) {
-    y_buf += (height - 1) * y_pitch;
-    u_buf += ((height >> y_shift) - 1) * uv_pitch;
-    v_buf += ((height >> y_shift) - 1) * uv_pitch;
-    height = -height;
+    y_buf += (source_height - 1) * y_pitch;
+    u_buf += ((source_height >> y_shift) - 1) * uv_pitch;
+    v_buf += ((source_height >> y_shift) - 1) * uv_pitch;
+    source_height = -source_height;
   }
 
   // Handle zero sized destination.
-  if (scaled_width == 0 || scaled_height == 0)
+  if (width == 0 || height == 0)
     return;
-  int scaled_dx = width * 16 / scaled_width;
-  int scaled_dy = height * 16 / scaled_height;
-
-  int scaled_dx_uv = scaled_dx;
+  int source_dx = source_width * kFractionMax / width;
+  int source_dy = source_height * kFractionMax / height;
+  int source_dx_uv = source_dx;
 
   if ((view_rotate == ROTATE_90) ||
       (view_rotate == ROTATE_270)) {
-    int tmp = scaled_height;
-    scaled_height = scaled_width;
-    scaled_width = tmp;
-    tmp = height;
+    int tmp = height;
     height = width;
     width = tmp;
-    int original_dx = scaled_dx;
-    int original_dy = scaled_dy;
-    scaled_dx = ((original_dy >> 4) * y_pitch) << 4;
-    scaled_dx_uv = ((original_dy >> 4) * uv_pitch) << 4;
-    scaled_dy = original_dx;
+    tmp = source_height;
+    source_height = source_width;
+    source_width = tmp;
+    int original_dx = source_dx;
+    int original_dy = source_dy;
+    source_dx = ((original_dy >> kFractionBits) * y_pitch) << kFractionBits;
+    source_dx_uv = ((original_dy >> kFractionBits) * uv_pitch) << kFractionBits;
+    source_dy = original_dx;
     if (view_rotate == ROTATE_90) {
       y_pitch = -1;
       uv_pitch = -1;
-      height = -height;
+      source_height = -source_height;
     } else {
       y_pitch = 1;
       uv_pitch = 1;
     }
   }
 
-  for (int y = 0; y < scaled_height; ++y) {
+  // Need padding because FilterRows() will write 1 to 16 extra pixels
+  // after the end for SSE2 version.
+  uint8 yuvbuf[16 + kFilterBufferSize * 3 + 16];
+  uint8* ybuf =
+      reinterpret_cast<uint8*>(reinterpret_cast<PRUptrdiff>(yuvbuf + 15) & ~15);
+  uint8* ubuf = ybuf + kFilterBufferSize;
+  uint8* vbuf = ubuf + kFilterBufferSize;
+  // TODO(fbarchard): Fixed point math is off by 1 on negatives.
+  int yscale_fixed = (source_height << kFractionBits) / height;
+
+  // TODO(fbarchard): Split this into separate function for better efficiency.
+  for (int y = 0; y < height; ++y) {
     uint8* dest_pixel = rgb_buf + y * rgb_pitch;
-    int scaled_y = (y * height / scaled_height);
-    const uint8* y_ptr = y_buf + scaled_y * y_pitch;
-    const uint8* u_ptr = u_buf + (scaled_y >> y_shift) * uv_pitch;
-    const uint8* v_ptr = v_buf + (scaled_y >> y_shift) * uv_pitch;
+    int source_y_subpixel = (y * yscale_fixed);
+    if (yscale_fixed >= (kFractionMax * 2)) {
+      source_y_subpixel += kFractionMax / 2;  // For 1/2 or less, center filter.
+    }
+    int source_y = source_y_subpixel >> kFractionBits;
 
-#if defined(_MSC_VER) && defined(_M_IX86)
-    if (scaled_width == (width * 2)) {
-      DoubleYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
-                          dest_pixel, scaled_width);
-    } else if ((scaled_dx & 15) == 0) {  // Scaling by integer scale factor.
-      if (scaled_dx_uv == scaled_dx) {   // Not rotated.
-        if (scaled_dx == 16) {           // Not scaled
-          if (has_mmx)
-            FastConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
-                                     dest_pixel, scaled_width);
-          else
-            FastConvertYUVToRGB32Row_C(y_ptr, u_ptr, v_ptr,
-                                      dest_pixel, scaled_width, x_shift);
-        } else {  // Simple scale down. ie half
-          ConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
-                               dest_pixel, scaled_width, scaled_dx >> 4);
-        }
+    const uint8* y0_ptr = y_buf + source_y * y_pitch;
+    const uint8* y1_ptr = y0_ptr + y_pitch;
+
+    const uint8* u0_ptr = u_buf + (source_y >> y_shift) * uv_pitch;
+    const uint8* u1_ptr = u0_ptr + uv_pitch;
+    const uint8* v0_ptr = v_buf + (source_y >> y_shift) * uv_pitch;
+    const uint8* v1_ptr = v0_ptr + uv_pitch;
+
+    // vertical scaler uses 16.8 fixed point
+    int source_y_fraction = (source_y_subpixel & kFractionMask) >> 8;
+    int source_uv_fraction =
+        ((source_y_subpixel >> y_shift) & kFractionMask) >> 8;
+
+    const uint8* y_ptr = y0_ptr;
+    const uint8* u_ptr = u0_ptr;
+    const uint8* v_ptr = v0_ptr;
+    // Apply vertical filtering if necessary.
+    // TODO(fbarchard): Remove memcpy when not necessary.
+    if (filter & mozilla::gfx::FILTER_BILINEAR_V) {
+      if (yscale_fixed != kFractionMax &&
+          source_y_fraction && ((source_y + 1) < source_height)) {
+        FilterRows(ybuf, y0_ptr, y1_ptr, source_width, source_y_fraction);
       } else {
-        RotateConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
-                                   dest_pixel, scaled_width,
-                                   scaled_dx >> 4, scaled_dx_uv >> 4);
+        memcpy(ybuf, y0_ptr, source_width);
+      }
+      y_ptr = ybuf;
+      ybuf[source_width] = ybuf[source_width-1];
+      int uv_source_width = (source_width + 1) / 2;
+      if (yscale_fixed != kFractionMax &&
+          source_uv_fraction &&
+          (((source_y >> y_shift) + 1) < (source_height >> y_shift))) {
+        FilterRows(ubuf, u0_ptr, u1_ptr, uv_source_width, source_uv_fraction);
+        FilterRows(vbuf, v0_ptr, v1_ptr, uv_source_width, source_uv_fraction);
+      } else {
+        memcpy(ubuf, u0_ptr, uv_source_width);
+        memcpy(vbuf, v0_ptr, uv_source_width);
       }
+      u_ptr = ubuf;
+      v_ptr = vbuf;
+      ubuf[uv_source_width] = ubuf[uv_source_width - 1];
+      vbuf[uv_source_width] = vbuf[uv_source_width - 1];
+    }
+    if (source_dx == kFractionMax) {  // Not scaled
+      FastConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
+                               dest_pixel, width);
+    } else {
+      if (filter & FILTER_BILINEAR_H) {
+        LinearScaleYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
+                                 dest_pixel, width, source_dx);
+    } else {
+// Specialized scalers and rotation.
+#if defined(_MSC_VER) && defined(_M_IX86)
+        if (width == (source_width * 2)) {
+          DoubleYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
+                              dest_pixel, width);
+        } else if ((source_dx & kFractionMask) == 0) {
+          // Scaling by integer scale factor. ie half.
+          ConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
+                               dest_pixel, width,
+                               source_dx >> kFractionBits);
+        } else if (source_dx_uv == source_dx) {  // Not rotated.
+          ScaleYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
+                             dest_pixel, width, source_dx);
+        } else {
+          RotateConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
+                                     dest_pixel, width,
+                                     source_dx >> kFractionBits,
+                                     source_dx_uv >> kFractionBits);
+        }
 #else
-    if (scaled_dx == 16) {           // Not scaled
-      if (has_mmx)
-        FastConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
-                                 dest_pixel, scaled_width);
-      else
-        FastConvertYUVToRGB32Row_C(y_ptr, u_ptr, v_ptr,
-                                   dest_pixel, scaled_width, x_shift);
+        ScaleYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
+                           dest_pixel, width, source_dx);
 #endif
-    } else {
-      if (has_mmx) 
-        ScaleYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
-                           dest_pixel, scaled_width, scaled_dx);
-      else
-        ScaleYUVToRGB32Row_C(y_ptr, u_ptr, v_ptr,
-                             dest_pixel, scaled_width, scaled_dx, x_shift);
-
-    }  
+      }      
+    }
   }
-
-  // MMX used for FastConvertYUVToRGB32Row requires emms instruction.
+  // MMX used for FastConvertYUVToRGB32Row and FilterRows requires emms.
   if (has_mmx)
     EMMS();
 }
 
 }  // namespace gfx
 }  // namespace mozilla
--- a/gfx/ycbcr/yuv_convert.h
+++ b/gfx/ycbcr/yuv_convert.h
@@ -1,22 +1,22 @@
-// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 
 #ifndef MEDIA_BASE_YUV_CONVERT_H_
 #define MEDIA_BASE_YUV_CONVERT_H_
 
 #include "chromium_types.h"
 #include "gfxCore.h"
-
+ 
 namespace mozilla {
 
 namespace gfx {
-
+ 
 // Type of YUV surface.
 // The value of these enums matter as they are used to shift vertical indices.
 enum YUVType {
   YV12 = 0,           // YV12 is half width and half height chroma channels.
   YV16 = 1,           // YV16 is half width and full height chroma channels.
   YV24 = 2            // YV24 is full width and full height chroma channels.
 };
 
@@ -28,16 +28,24 @@ enum Rotate {
   ROTATE_180,         // Rotate upside down.
   ROTATE_270,         // Rotate counter clockwise.
   MIRROR_ROTATE_0,    // Mirror horizontally.
   MIRROR_ROTATE_90,   // Mirror then Rotate clockwise.
   MIRROR_ROTATE_180,  // Mirror vertically.
   MIRROR_ROTATE_270   // Transpose.
 };
 
+// Filter affects how scaling looks.
+enum ScaleFilter {
+  FILTER_NONE = 0,        // No filter (point sampled).
+  FILTER_BILINEAR_H = 1,  // Bilinear horizontal filter.
+  FILTER_BILINEAR_V = 2,  // Bilinear vertical filter.
+  FILTER_BILINEAR = 3     // Bilinear filter.
+};
+
 // Convert a frame of YUV to 32 bit ARGB.
 // Pass in YV16/YV12 depending on source format
 NS_GFX_(void) ConvertYCbCrToRGB32(const uint8* yplane,
                                   const uint8* uplane,
                                   const uint8* vplane,
                                   uint8* rgbframe,
                                   int pic_x,
                                   int pic_y,
@@ -49,22 +57,23 @@ NS_GFX_(void) ConvertYCbCrToRGB32(const 
                                   YUVType yuv_type);
 
 // Scale a frame of YUV to 32 bit ARGB.
 // Supports rotation and mirroring.
 NS_GFX_(void) ScaleYCbCrToRGB32(const uint8* yplane,
                                 const uint8* uplane,
                                 const uint8* vplane,
                                 uint8* rgbframe,
-                                int frame_width,
-                                int frame_height,
-                                int scaled_width,
-                                int scaled_height,
+                                int source_width,
+                                int source_height,
+                                int width,
+                                int height,
                                 int ystride,
                                 int uvstride,
                                 int rgbstride,
                                 YUVType yuv_type,
-                                Rotate view_rotate);
+                                Rotate view_rotate,
+                                ScaleFilter filter);
 
 }  // namespace gfx
 }  // namespace mozilla
-
+ 
 #endif  // MEDIA_BASE_YUV_CONVERT_H_
--- a/gfx/ycbcr/yuv_row.h
+++ b/gfx/ycbcr/yuv_row.h
@@ -1,9 +1,9 @@
-// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 
 // yuv_row internal functions to handle YUV conversion and scaling to RGB.
 // These functions are used from both yuv_convert.cc and yuv_scale.cc.
 
 // TODO(fbarchard): Write function that can handle rotation and scaling.
 
@@ -23,16 +23,21 @@ void FastConvertYUVToRGB32Row(const uint
 
 void FastConvertYUVToRGB32Row_C(const uint8* y_buf,
                                 const uint8* u_buf,
                                 const uint8* v_buf,
                                 uint8* rgb_buf,
                                 int width,
                                 unsigned int x_shift);
 
+void FastConvertYUVToRGB32Row(const uint8* y_buf,
+                              const uint8* u_buf,
+                              const uint8* v_buf,
+                              uint8* rgb_buf,
+                              int width);
 
 // Can do 1x, half size or any scale down by an integer amount.
 // Step can be negative (mirroring, rotate 180).
 // This is the third fastest of the scalers.
 void ConvertYUVToRGB32Row(const uint8* y_buf,
                           const uint8* u_buf,
                           const uint8* v_buf,
                           uint8* rgb_buf,
@@ -56,38 +61,79 @@ void DoubleYUVToRGB32Row(const uint8* y_
                          const uint8* u_buf,
                          const uint8* v_buf,
                          uint8* rgb_buf,
                          int width);
 
 // Handles arbitrary scaling up or down.
 // Mirroring is supported, but not 90 or 270 degree rotation.
 // Chroma is under sampled every 2 pixels for performance.
-// This is the slowest of the scalers.
 void ScaleYUVToRGB32Row(const uint8* y_buf,
                         const uint8* u_buf,
                         const uint8* v_buf,
                         uint8* rgb_buf,
                         int width,
-                        int scaled_dx);
+                        int source_dx);
+
+void ScaleYUVToRGB32Row(const uint8* y_buf,
+                        const uint8* u_buf,
+                        const uint8* v_buf,
+                        uint8* rgb_buf,
+                        int width,
+                        int source_dx);
 
 void ScaleYUVToRGB32Row_C(const uint8* y_buf,
                           const uint8* u_buf,
                           const uint8* v_buf,
                           uint8* rgb_buf,
                           int width,
-                          int scaled_dx,
-                          unsigned int x_shift);
+                          int source_dx);
+
+// Handles arbitrary scaling up or down with bilinear filtering.
+// Mirroring is supported, but not 90 or 270 degree rotation.
+// Chroma is under sampled every 2 pixels for performance.
+// This is the slowest of the scalers.
+void LinearScaleYUVToRGB32Row(const uint8* y_buf,
+                              const uint8* u_buf,
+                              const uint8* v_buf,
+                              uint8* rgb_buf,
+                              int width,
+                              int source_dx);
 
-}  // extern "C"
+void LinearScaleYUVToRGB32Row(const uint8* y_buf,
+                              const uint8* u_buf,
+                              const uint8* v_buf,
+                              uint8* rgb_buf,
+                              int width,
+                              int source_dx);
+
+void LinearScaleYUVToRGB32Row_C(const uint8* y_buf,
+                                const uint8* u_buf,
+                                const uint8* v_buf,
+                                uint8* rgb_buf,
+                                int width,
+                                int source_dx);
+
+
+#if defined(_MSC_VER)
+#define SIMD_ALIGNED(var) __declspec(align(16)) var
+#else
+#define SIMD_ALIGNED(var) var __attribute__((aligned(16)))
+#endif
+extern SIMD_ALIGNED(int16 kCoefficientsRgbY[768][4]);
 
 // x64 uses MMX2 (SSE) so emms is not required.
-#if defined(ARCH_CPU_X86)
+// Warning C4799: function has no EMMS instruction.
+// EMMS() is slow and should be called by the calling function once per image.
+#if !defined(ARCH_CPU_X86_64)
 #if defined(_MSC_VER)
 #define EMMS() __asm emms
+#pragma warning(disable: 4799)
 #else
 #define EMMS() asm("emms")
 #endif
 #else
 #define EMMS()
 #endif
 
+}  // extern "C"
+
 #endif  // MEDIA_BASE_YUV_ROW_H_
--- a/gfx/ycbcr/yuv_row_c.cpp
+++ b/gfx/ycbcr/yuv_row_c.cpp
@@ -1,170 +1,60 @@
-// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 
 #include "yuv_row.h"
 
 #define DCHECK(a)
 
-// TODO(fbarchard): Move this to yuv_row_posix.cc to share with Mac.
-// TODO(fbarchard): Do 64 bit version.
-
 extern "C" {
 
-// Reference version of YUV converter.
-static const int kClipTableSize = 256;
-static const int kClipOverflow = 288;  // Cb max is 535.
-
-static uint8 kRgbClipTable[kClipOverflow +
-                           kClipTableSize +
-                           kClipOverflow] = {
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // 288 underflow values
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  // clipped to 0.
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,  // Unclipped values.
-  0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
-  0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
-  0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
-  0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
-  0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
-  0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
-  0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F,
-  0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
-  0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
-  0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
-  0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F,
-  0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
-  0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
-  0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
-  0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
-  0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
-  0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F,
-  0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
-  0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F,
-  0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7,
-  0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF,
-  0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7,
-  0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF,
-  0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
-  0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
-  0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
-  0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
-  0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
-  0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
-  0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
-  0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF,
-  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,  // 288 overflow values
-  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,  // clipped to 255.
-  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
-  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
-  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
-  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
-  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
-  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
-  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
-  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
-  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
-  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
-  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
-  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
-  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
-  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
-  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
-  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
-  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
-  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
-  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
-  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
-  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
-  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
-  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
-  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
-  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
-  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
-  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
-  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
-  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
-  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
-  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
-  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
-  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
-  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
-};
-
-// Clip an rgb channel value to 0..255 range.
-// Source is signed fixed point 8.8.
-// Table allows for values to underflow or overflow by 128.
-// Therefore source range is -128 to 384.
-// Output clips to unsigned 0 to 255.
-static inline uint32 clip(int32 value) {
-  DCHECK(((value >> 8) + kClipOverflow) >= 0);
-  DCHECK(((value >> 8) + kClipOverflow) <
-         (kClipOverflow + kClipTableSize + kClipOverflow));
-  return static_cast<uint32>(kRgbClipTable[((value) >> 8) + kClipOverflow]);
-}
+// C reference code that mimic the YUV assembly.
+#define packuswb(x) ((x) < 0 ? 0 : ((x) > 255 ? 255 : (x)))
+#define paddsw(x, y) (((x) + (y)) < -32768 ? -32768 : \
+    (((x) + (y)) > 32767 ? 32767 : ((x) + (y))))
 
 static inline void YuvPixel(uint8 y,
                             uint8 u,
                             uint8 v,
                             uint8* rgb_buf) {
-  int32 d = static_cast<int32>(u) - 128;
-  int32 e = static_cast<int32>(v) - 128;
+
+  int b = kCoefficientsRgbY[256+u][0];
+  int g = kCoefficientsRgbY[256+u][1];
+  int r = kCoefficientsRgbY[256+u][2];
+  int a = kCoefficientsRgbY[256+u][3];
+
+  b = paddsw(b, kCoefficientsRgbY[512+v][0]);
+  g = paddsw(g, kCoefficientsRgbY[512+v][1]);
+  r = paddsw(r, kCoefficientsRgbY[512+v][2]);
+  a = paddsw(a, kCoefficientsRgbY[512+v][3]);
 
-  int32 cb =   (516 * d + 128);
-  int32 cg = (- 100 * d - 208 * e + 128);
-  int32 cr =             (409 * e + 128);
+  b = paddsw(b, kCoefficientsRgbY[y][0]);
+  g = paddsw(g, kCoefficientsRgbY[y][1]);
+  r = paddsw(r, kCoefficientsRgbY[y][2]);
+  a = paddsw(a, kCoefficientsRgbY[y][3]);
 
-  int32 C298a = ((static_cast<int32>(y) - 16) * 298);
-  *reinterpret_cast<uint32*>(rgb_buf) = (clip(C298a + cb)) |
-                                        (clip(C298a + cg) << 8) |
-                                        (clip(C298a + cr) << 16) |
-                                        (0xff000000);
+  b >>= 6;
+  g >>= 6;
+  r >>= 6;
+  a >>= 6;
+
+  *reinterpret_cast<uint32*>(rgb_buf) = (packuswb(b)) |
+                                        (packuswb(g) << 8) |
+                                        (packuswb(r) << 16) |
+                                        (packuswb(a) << 24);
 }
 
 void FastConvertYUVToRGB32Row_C(const uint8* y_buf,
-                              const uint8* u_buf,
-                              const uint8* v_buf,
-                              uint8* rgb_buf,
-                              int width,
-                              unsigned int x_shift) {
+                                const uint8* u_buf,
+                                const uint8* v_buf,
+                                uint8* rgb_buf,
+                                int width,
+                                unsigned int x_shift) {
   for (int x = 0; x < width; x += 2) {
     uint8 u = u_buf[x >> x_shift];
     uint8 v = v_buf[x >> x_shift];
     uint8 y0 = y_buf[x];
     YuvPixel(y0, u, v, rgb_buf);
     if ((x + 1) < width) {
       uint8 y1 = y_buf[x + 1];
       if (x_shift == 0) {
@@ -172,31 +62,72 @@ void FastConvertYUVToRGB32Row_C(const ui
         v = v_buf[x + 1];
       }
       YuvPixel(y1, u, v, rgb_buf + 4);
     }
     rgb_buf += 8;  // Advance 2 pixels.
   }
 }
 
-// 28.4 fixed point is used.  A shift by 4 isolates the integer.
-// A shift by 5 is used to further subsample the chrominence channels.
-// & 15 isolates the fixed point fraction.  >> 2 to get the upper 2 bits,
-// for 1/4 pixel accurate interpolation.
+// 16.16 fixed point is used.  A shift by 16 isolates the integer.
+// A shift by 17 is used to further subsample the chrominence channels.
+// & 0xffff isolates the fixed point fraction.  >> 2 to get the upper 2 bits,
+// for 1/65536 pixel accurate interpolation.
 void ScaleYUVToRGB32Row_C(const uint8* y_buf,
-                        const uint8* u_buf,
-                        const uint8* v_buf,
-                        uint8* rgb_buf,
-                        int width,
-                        int scaled_dx,
-                        unsigned int x_shift) {
-  int scaled_x = 0;
-  for (int x = 0; x < width; ++x) {
-    uint8 u = u_buf[scaled_x >> (4 + x_shift)];
-    uint8 v = v_buf[scaled_x >> (4 + x_shift)];
-    uint8 y0 = y_buf[scaled_x >> 4];
-    YuvPixel(y0, u, v, rgb_buf);
-    rgb_buf += 4;
-    scaled_x += scaled_dx;
+                          const uint8* u_buf,
+                          const uint8* v_buf,
+                          uint8* rgb_buf,
+                          int width,
+                          int source_dx) {
+  int x = 0;
+  for (int i = 0; i < width; i += 2) {
+    int y = y_buf[x >> 16];
+    int u = u_buf[(x >> 17)];
+    int v = v_buf[(x >> 17)];
+    YuvPixel(y, u, v, rgb_buf);
+    x += source_dx;
+    if ((i + 1) < width) {
+      y = y_buf[x >> 16];
+      YuvPixel(y, u, v, rgb_buf+4);
+      x += source_dx;
+    }
+    rgb_buf += 8;
   }
 }
+
+void LinearScaleYUVToRGB32Row_C(const uint8* y_buf,
+                                const uint8* u_buf,
+                                const uint8* v_buf,
+                                uint8* rgb_buf,
+                                int width,
+                                int source_dx) {
+  int x = 0;
+  if (source_dx >= 0x20000) {
+    x = 32768;
+  }
+  for (int i = 0; i < width; i += 2) {
+    int y0 = y_buf[x >> 16];
+    int y1 = y_buf[(x >> 16) + 1];
+    int u0 = u_buf[(x >> 17)];
+    int u1 = u_buf[(x >> 17) + 1];
+    int v0 = v_buf[(x >> 17)];
+    int v1 = v_buf[(x >> 17) + 1];
+    int y_frac = (x & 65535);
+    int uv_frac = ((x >> 1) & 65535);
+    int y = (y_frac * y1 + (y_frac ^ 65535) * y0) >> 16;
+    int u = (uv_frac * u1 + (uv_frac ^ 65535) * u0) >> 16;
+    int v = (uv_frac * v1 + (uv_frac ^ 65535) * v0) >> 16;
+    YuvPixel(y, u, v, rgb_buf);
+    x += source_dx;
+    if ((i + 1) < width) {
+      y0 = y_buf[x >> 16];
+      y1 = y_buf[(x >> 16) + 1];
+      y_frac = (x & 65535);
+      y = (y_frac * y1 + (y_frac ^ 65535) * y0) >> 16;
+      YuvPixel(y, u, v, rgb_buf+4);
+      x += source_dx;
+    }
+    rgb_buf += 8;
+  }
+}
+
 }  // extern "C"
 
deleted file mode 100644
--- a/gfx/ycbcr/yuv_row_linux.cpp
+++ /dev/null
@@ -1,657 +0,0 @@
-// Copyright (c) 2009 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include "yuv_row.h"
-
-#define DCHECK(a)
-
-// TODO(fbarchard): Move this to yuv_row_posix.cc to share with Mac.
-// TODO(fbarchard): Do 64 bit version.
-
-extern "C" {
-
-#ifndef ARCH_CPU_X86_FAMILY
-// non-x86 implementation uses C fallback
-void FastConvertYUVToRGB32Row(const uint8* y_buf,
-                              const uint8* u_buf,
-                              const uint8* v_buf,
-                              uint8* rgb_buf,
-                              int width) {
-  FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, 1);
-}
- 
-void ScaleYUVToRGB32Row(const uint8* y_buf,
-                        const uint8* u_buf,
-                        const uint8* v_buf,
-                        uint8* rgb_buf,
-                        int width,
-                        int scaled_dx) {
-  ScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, scaled_dx, 1);
-}
-#else
-
-#define RGBY(i) { \
-  static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
-  static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
-  static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
-  0 \
-}
-
-#define RGBU(i) { \
-  static_cast<int16>(2.018 * 64 * (i - 128) + 0.5), \
-  static_cast<int16>(-0.391 * 64 * (i - 128) + 0.5), \
-  0, \
-  static_cast<int16>(256 * 64 - 1) \
-}
-
-#define RGBV(i) { \
-  0, \
-  static_cast<int16>(-0.813 * 64 * (i - 128) + 0.5), \
-  static_cast<int16>(1.596 * 64 * (i - 128) + 0.5), \
-  0 \
-}
-
-#define MMX_ALIGNED(var) var __attribute__((aligned(16)))
-
-
-MMX_ALIGNED(int16 kCoefficientsRgbY[768][4]) = {
-  RGBY(0x00), RGBY(0x01), RGBY(0x02), RGBY(0x03),
-  RGBY(0x04), RGBY(0x05), RGBY(0x06), RGBY(0x07),
-  RGBY(0x08), RGBY(0x09), RGBY(0x0A), RGBY(0x0B),
-  RGBY(0x0C), RGBY(0x0D), RGBY(0x0E), RGBY(0x0F),
-  RGBY(0x10), RGBY(0x11), RGBY(0x12), RGBY(0x13),
-  RGBY(0x14), RGBY(0x15), RGBY(0x16), RGBY(0x17),
-  RGBY(0x18), RGBY(0x19), RGBY(0x1A), RGBY(0x1B),
-  RGBY(0x1C), RGBY(0x1D), RGBY(0x1E), RGBY(0x1F),
-  RGBY(0x20), RGBY(0x21), RGBY(0x22), RGBY(0x23),
-  RGBY(0x24), RGBY(0x25), RGBY(0x26), RGBY(0x27),
-  RGBY(0x28), RGBY(0x29), RGBY(0x2A), RGBY(0x2B),
-  RGBY(0x2C), RGBY(0x2D), RGBY(0x2E), RGBY(0x2F),
-  RGBY(0x30), RGBY(0x31), RGBY(0x32), RGBY(0x33),
-  RGBY(0x34), RGBY(0x35), RGBY(0x36), RGBY(0x37),
-  RGBY(0x38), RGBY(0x39), RGBY(0x3A), RGBY(0x3B),
-  RGBY(0x3C), RGBY(0x3D), RGBY(0x3E), RGBY(0x3F),
-  RGBY(0x40), RGBY(0x41), RGBY(0x42), RGBY(0x43),
-  RGBY(0x44), RGBY(0x45), RGBY(0x46), RGBY(0x47),
-  RGBY(0x48), RGBY(0x49), RGBY(0x4A), RGBY(0x4B),
-  RGBY(0x4C), RGBY(0x4D), RGBY(0x4E), RGBY(0x4F),
-  RGBY(0x50), RGBY(0x51), RGBY(0x52), RGBY(0x53),
-  RGBY(0x54), RGBY(0x55), RGBY(0x56), RGBY(0x57),
-  RGBY(0x58), RGBY(0x59), RGBY(0x5A), RGBY(0x5B),
-  RGBY(0x5C), RGBY(0x5D), RGBY(0x5E), RGBY(0x5F),
-  RGBY(0x60), RGBY(0x61), RGBY(0x62), RGBY(0x63),
-  RGBY(0x64), RGBY(0x65), RGBY(0x66), RGBY(0x67),
-  RGBY(0x68), RGBY(0x69), RGBY(0x6A), RGBY(0x6B),
-  RGBY(0x6C), RGBY(0x6D), RGBY(0x6E), RGBY(0x6F),
-  RGBY(0x70), RGBY(0x71), RGBY(0x72), RGBY(0x73),
-  RGBY(0x74), RGBY(0x75), RGBY(0x76), RGBY(0x77),
-  RGBY(0x78), RGBY(0x79), RGBY(0x7A), RGBY(0x7B),
-  RGBY(0x7C), RGBY(0x7D), RGBY(0x7E), RGBY(0x7F),
-  RGBY(0x80), RGBY(0x81), RGBY(0x82), RGBY(0x83),
-  RGBY(0x84), RGBY(0x85), RGBY(0x86), RGBY(0x87),
-  RGBY(0x88), RGBY(0x89), RGBY(0x8A), RGBY(0x8B),
-  RGBY(0x8C), RGBY(0x8D), RGBY(0x8E), RGBY(0x8F),
-  RGBY(0x90), RGBY(0x91), RGBY(0x92), RGBY(0x93),
-  RGBY(0x94), RGBY(0x95), RGBY(0x96), RGBY(0x97),
-  RGBY(0x98), RGBY(0x99), RGBY(0x9A), RGBY(0x9B),
-  RGBY(0x9C), RGBY(0x9D), RGBY(0x9E), RGBY(0x9F),
-  RGBY(0xA0), RGBY(0xA1), RGBY(0xA2), RGBY(0xA3),
-  RGBY(0xA4), RGBY(0xA5), RGBY(0xA6), RGBY(0xA7),
-  RGBY(0xA8), RGBY(0xA9), RGBY(0xAA), RGBY(0xAB),
-  RGBY(0xAC), RGBY(0xAD), RGBY(0xAE), RGBY(0xAF),
-  RGBY(0xB0), RGBY(0xB1), RGBY(0xB2), RGBY(0xB3),
-  RGBY(0xB4), RGBY(0xB5), RGBY(0xB6), RGBY(0xB7),
-  RGBY(0xB8), RGBY(0xB9), RGBY(0xBA), RGBY(0xBB),
-  RGBY(0xBC), RGBY(0xBD), RGBY(0xBE), RGBY(0xBF),
-  RGBY(0xC0), RGBY(0xC1), RGBY(0xC2), RGBY(0xC3),
-  RGBY(0xC4), RGBY(0xC5), RGBY(0xC6), RGBY(0xC7),
-  RGBY(0xC8), RGBY(0xC9), RGBY(0xCA), RGBY(0xCB),
-  RGBY(0xCC), RGBY(0xCD), RGBY(0xCE), RGBY(0xCF),
-  RGBY(0xD0), RGBY(0xD1), RGBY(0xD2), RGBY(0xD3),
-  RGBY(0xD4), RGBY(0xD5), RGBY(0xD6), RGBY(0xD7),
-  RGBY(0xD8), RGBY(0xD9), RGBY(0xDA), RGBY(0xDB),
-  RGBY(0xDC), RGBY(0xDD), RGBY(0xDE), RGBY(0xDF),
-  RGBY(0xE0), RGBY(0xE1), RGBY(0xE2), RGBY(0xE3),
-  RGBY(0xE4), RGBY(0xE5), RGBY(0xE6), RGBY(0xE7),
-  RGBY(0xE8), RGBY(0xE9), RGBY(0xEA), RGBY(0xEB),
-  RGBY(0xEC), RGBY(0xED), RGBY(0xEE), RGBY(0xEF),
-  RGBY(0xF0), RGBY(0xF1), RGBY(0xF2), RGBY(0xF3),
-  RGBY(0xF4), RGBY(0xF5), RGBY(0xF6), RGBY(0xF7),
-  RGBY(0xF8), RGBY(0xF9), RGBY(0xFA), RGBY(0xFB),
-  RGBY(0xFC), RGBY(0xFD), RGBY(0xFE), RGBY(0xFF),
-
-  // Chroma U table.
-  RGBU(0x00), RGBU(0x01), RGBU(0x02), RGBU(0x03),
-  RGBU(0x04), RGBU(0x05), RGBU(0x06), RGBU(0x07),
-  RGBU(0x08), RGBU(0x09), RGBU(0x0A), RGBU(0x0B),
-  RGBU(0x0C), RGBU(0x0D), RGBU(0x0E), RGBU(0x0F),
-  RGBU(0x10), RGBU(0x11), RGBU(0x12), RGBU(0x13),
-  RGBU(0x14), RGBU(0x15), RGBU(0x16), RGBU(0x17),
-  RGBU(0x18), RGBU(0x19), RGBU(0x1A), RGBU(0x1B),
-  RGBU(0x1C), RGBU(0x1D), RGBU(0x1E), RGBU(0x1F),
-  RGBU(0x20), RGBU(0x21), RGBU(0x22), RGBU(0x23),
-  RGBU(0x24), RGBU(0x25), RGBU(0x26), RGBU(0x27),
-  RGBU(0x28), RGBU(0x29), RGBU(0x2A), RGBU(0x2B),
-  RGBU(0x2C), RGBU(0x2D), RGBU(0x2E), RGBU(0x2F),
-  RGBU(0x30), RGBU(0x31), RGBU(0x32), RGBU(0x33),
-  RGBU(0x34), RGBU(0x35), RGBU(0x36), RGBU(0x37),
-  RGBU(0x38), RGBU(0x39), RGBU(0x3A), RGBU(0x3B),
-  RGBU(0x3C), RGBU(0x3D), RGBU(0x3E), RGBU(0x3F),
-  RGBU(0x40), RGBU(0x41), RGBU(0x42), RGBU(0x43),
-  RGBU(0x44), RGBU(0x45), RGBU(0x46), RGBU(0x47),
-  RGBU(0x48), RGBU(0x49), RGBU(0x4A), RGBU(0x4B),
-  RGBU(0x4C), RGBU(0x4D), RGBU(0x4E), RGBU(0x4F),
-  RGBU(0x50), RGBU(0x51), RGBU(0x52), RGBU(0x53),
-  RGBU(0x54), RGBU(0x55), RGBU(0x56), RGBU(0x57),
-  RGBU(0x58), RGBU(0x59), RGBU(0x5A), RGBU(0x5B),
-  RGBU(0x5C), RGBU(0x5D), RGBU(0x5E), RGBU(0x5F),
-  RGBU(0x60), RGBU(0x61), RGBU(0x62), RGBU(0x63),
-  RGBU(0x64), RGBU(0x65), RGBU(0x66), RGBU(0x67),
-  RGBU(0x68), RGBU(0x69), RGBU(0x6A), RGBU(0x6B),
-  RGBU(0x6C), RGBU(0x6D), RGBU(0x6E), RGBU(0x6F),
-  RGBU(0x70), RGBU(0x71), RGBU(0x72), RGBU(0x73),
-  RGBU(0x74), RGBU(0x75), RGBU(0x76), RGBU(0x77),
-  RGBU(0x78), RGBU(0x79), RGBU(0x7A), RGBU(0x7B),
-  RGBU(0x7C), RGBU(0x7D), RGBU(0x7E), RGBU(0x7F),
-  RGBU(0x80), RGBU(0x81), RGBU(0x82), RGBU(0x83),
-  RGBU(0x84), RGBU(0x85), RGBU(0x86), RGBU(0x87),
-  RGBU(0x88), RGBU(0x89), RGBU(0x8A), RGBU(0x8B),
-  RGBU(0x8C), RGBU(0x8D), RGBU(0x8E), RGBU(0x8F),
-  RGBU(0x90), RGBU(0x91), RGBU(0x92), RGBU(0x93),
-  RGBU(0x94), RGBU(0x95), RGBU(0x96), RGBU(0x97),
-  RGBU(0x98), RGBU(0x99), RGBU(0x9A), RGBU(0x9B),
-  RGBU(0x9C), RGBU(0x9D), RGBU(0x9E), RGBU(0x9F),
-  RGBU(0xA0), RGBU(0xA1), RGBU(0xA2), RGBU(0xA3),
-  RGBU(0xA4), RGBU(0xA5), RGBU(0xA6), RGBU(0xA7),
-  RGBU(0xA8), RGBU(0xA9), RGBU(0xAA), RGBU(0xAB),
-  RGBU(0xAC), RGBU(0xAD), RGBU(0xAE), RGBU(0xAF),
-  RGBU(0xB0), RGBU(0xB1), RGBU(0xB2), RGBU(0xB3),
-  RGBU(0xB4), RGBU(0xB5), RGBU(0xB6), RGBU(0xB7),
-  RGBU(0xB8), RGBU(0xB9), RGBU(0xBA), RGBU(0xBB),
-  RGBU(0xBC), RGBU(0xBD), RGBU(0xBE), RGBU(0xBF),
-  RGBU(0xC0), RGBU(0xC1), RGBU(0xC2), RGBU(0xC3),
-  RGBU(0xC4), RGBU(0xC5), RGBU(0xC6), RGBU(0xC7),
-  RGBU(0xC8), RGBU(0xC9), RGBU(0xCA), RGBU(0xCB),
-  RGBU(0xCC), RGBU(0xCD), RGBU(0xCE), RGBU(0xCF),
-  RGBU(0xD0), RGBU(0xD1), RGBU(0xD2), RGBU(0xD3),
-  RGBU(0xD4), RGBU(0xD5), RGBU(0xD6), RGBU(0xD7),
-  RGBU(0xD8), RGBU(0xD9), RGBU(0xDA), RGBU(0xDB),
-  RGBU(0xDC), RGBU(0xDD), RGBU(0xDE), RGBU(0xDF),
-  RGBU(0xE0), RGBU(0xE1), RGBU(0xE2), RGBU(0xE3),
-  RGBU(0xE4), RGBU(0xE5), RGBU(0xE6), RGBU(0xE7),
-  RGBU(0xE8), RGBU(0xE9), RGBU(0xEA), RGBU(0xEB),
-  RGBU(0xEC), RGBU(0xED), RGBU(0xEE), RGBU(0xEF),
-  RGBU(0xF0), RGBU(0xF1), RGBU(0xF2), RGBU(0xF3),
-  RGBU(0xF4), RGBU(0xF5), RGBU(0xF6), RGBU(0xF7),
-  RGBU(0xF8), RGBU(0xF9), RGBU(0xFA), RGBU(0xFB),
-  RGBU(0xFC), RGBU(0xFD), RGBU(0xFE), RGBU(0xFF),
-
-  // Chroma V table.
-  RGBV(0x00), RGBV(0x01), RGBV(0x02), RGBV(0x03),
-  RGBV(0x04), RGBV(0x05), RGBV(0x06), RGBV(0x07),
-  RGBV(0x08), RGBV(0x09), RGBV(0x0A), RGBV(0x0B),
-  RGBV(0x0C), RGBV(0x0D), RGBV(0x0E), RGBV(0x0F),
-  RGBV(0x10), RGBV(0x11), RGBV(0x12), RGBV(0x13),
-  RGBV(0x14), RGBV(0x15), RGBV(0x16), RGBV(0x17),
-  RGBV(0x18), RGBV(0x19), RGBV(0x1A), RGBV(0x1B),
-  RGBV(0x1C), RGBV(0x1D), RGBV(0x1E), RGBV(0x1F),
-  RGBV(0x20), RGBV(0x21), RGBV(0x22), RGBV(0x23),
-  RGBV(0x24), RGBV(0x25), RGBV(0x26), RGBV(0x27),
-  RGBV(0x28), RGBV(0x29), RGBV(0x2A), RGBV(0x2B),
-  RGBV(0x2C), RGBV(0x2D), RGBV(0x2E), RGBV(0x2F),
-  RGBV(0x30), RGBV(0x31), RGBV(0x32), RGBV(0x33),
-  RGBV(0x34), RGBV(0x35), RGBV(0x36), RGBV(0x37),
-  RGBV(0x38), RGBV(0x39), RGBV(0x3A), RGBV(0x3B),
-  RGBV(0x3C), RGBV(0x3D), RGBV(0x3E), RGBV(0x3F),
-  RGBV(0x40), RGBV(0x41), RGBV(0x42), RGBV(0x43),
-  RGBV(0x44), RGBV(0x45), RGBV(0x46), RGBV(0x47),
-  RGBV(0x48), RGBV(0x49), RGBV(0x4A), RGBV(0x4B),
-  RGBV(0x4C), RGBV(0x4D), RGBV(0x4E), RGBV(0x4F),
-  RGBV(0x50), RGBV(0x51), RGBV(0x52), RGBV(0x53),
-  RGBV(0x54), RGBV(0x55), RGBV(0x56), RGBV(0x57),
-  RGBV(0x58), RGBV(0x59), RGBV(0x5A), RGBV(0x5B),
-  RGBV(0x5C), RGBV(0x5D), RGBV(0x5E), RGBV(0x5F),
-  RGBV(0x60), RGBV(0x61), RGBV(0x62), RGBV(0x63),
-  RGBV(0x64), RGBV(0x65), RGBV(0x66), RGBV(0x67),
-  RGBV(0x68), RGBV(0x69), RGBV(0x6A), RGBV(0x6B),
-  RGBV(0x6C), RGBV(0x6D), RGBV(0x6E), RGBV(0x6F),
-  RGBV(0x70), RGBV(0x71), RGBV(0x72), RGBV(0x73),
-  RGBV(0x74), RGBV(0x75), RGBV(0x76), RGBV(0x77),
-  RGBV(0x78), RGBV(0x79), RGBV(0x7A), RGBV(0x7B),
-  RGBV(0x7C), RGBV(0x7D), RGBV(0x7E), RGBV(0x7F),
-  RGBV(0x80), RGBV(0x81), RGBV(0x82), RGBV(0x83),
-  RGBV(0x84), RGBV(0x85), RGBV(0x86), RGBV(0x87),
-  RGBV(0x88), RGBV(0x89), RGBV(0x8A), RGBV(0x8B),
-  RGBV(0x8C), RGBV(0x8D), RGBV(0x8E), RGBV(0x8F),
-  RGBV(0x90), RGBV(0x91), RGBV(0x92), RGBV(0x93),
-  RGBV(0x94), RGBV(0x95), RGBV(0x96), RGBV(0x97),
-  RGBV(0x98), RGBV(0x99), RGBV(0x9A), RGBV(0x9B),
-  RGBV(0x9C), RGBV(0x9D), RGBV(0x9E), RGBV(0x9F),
-  RGBV(0xA0), RGBV(0xA1), RGBV(0xA2), RGBV(0xA3),
-  RGBV(0xA4), RGBV(0xA5), RGBV(0xA6), RGBV(0xA7),
-  RGBV(0xA8), RGBV(0xA9), RGBV(0xAA), RGBV(0xAB),
-  RGBV(0xAC), RGBV(0xAD), RGBV(0xAE), RGBV(0xAF),
-  RGBV(0xB0), RGBV(0xB1), RGBV(0xB2), RGBV(0xB3),
-  RGBV(0xB4), RGBV(0xB5), RGBV(0xB6), RGBV(0xB7),
-  RGBV(0xB8), RGBV(0xB9), RGBV(0xBA), RGBV(0xBB),
-  RGBV(0xBC), RGBV(0xBD), RGBV(0xBE), RGBV(0xBF),
-  RGBV(0xC0), RGBV(0xC1), RGBV(0xC2), RGBV(0xC3),
-  RGBV(0xC4), RGBV(0xC5), RGBV(0xC6), RGBV(0xC7),
-  RGBV(0xC8), RGBV(0xC9), RGBV(0xCA), RGBV(0xCB),
-  RGBV(0xCC), RGBV(0xCD), RGBV(0xCE), RGBV(0xCF),
-  RGBV(0xD0), RGBV(0xD1), RGBV(0xD2), RGBV(0xD3),
-  RGBV(0xD4), RGBV(0xD5), RGBV(0xD6), RGBV(0xD7),
-  RGBV(0xD8), RGBV(0xD9), RGBV(0xDA), RGBV(0xDB),
-  RGBV(0xDC), RGBV(0xDD), RGBV(0xDE), RGBV(0xDF),
-  RGBV(0xE0), RGBV(0xE1), RGBV(0xE2), RGBV(0xE3),
-  RGBV(0xE4), RGBV(0xE5), RGBV(0xE6), RGBV(0xE7),
-  RGBV(0xE8), RGBV(0xE9), RGBV(0xEA), RGBV(0xEB),
-  RGBV(0xEC), RGBV(0xED), RGBV(0xEE), RGBV(0xEF),
-  RGBV(0xF0), RGBV(0xF1), RGBV(0xF2), RGBV(0xF3),
-  RGBV(0xF4), RGBV(0xF5), RGBV(0xF6), RGBV(0xF7),
-  RGBV(0xF8), RGBV(0xF9), RGBV(0xFA), RGBV(0xFB),
-  RGBV(0xFC), RGBV(0xFD), RGBV(0xFE), RGBV(0xFF),
-};
-
-#ifdef __SUNPRO_CC
-#pragma align 16 (kCoefficientsRgbY)
-#endif
-
-#if defined(ARCH_CPU_X86_64)
-
-#ifdef __SUNPRO_CC
-// AMD64 ABI uses register parameters.
-void FastConvertYUVToRGB32Row(const uint8* y_buf,  // rdi
-                              const uint8* u_buf,  // rsi
-                              const uint8* v_buf,  // rdx
-                              uint8* rgb_buf,      // rcx
-                              int width) {         // r8
-  asm(
-  "jmp    convertend\n"
-"convertloop:"
-  "movzbq (%1),%%r10\n"
-  "add    $0x1,%1\n"
-  "movzbq (%2),%%r11\n"
-  "add    $0x1,%2\n"
-  "movq   2048(%5,%%r10,8),%%xmm0\n"
-  "movzbq (%0),%%r10\n"
-  "movq   4096(%5,%%r11,8),%%xmm1\n"
-  "movzbq 0x1(%0),%%r11\n"
-  "paddsw %%xmm1,%%xmm0\n"
-  "movq   (%5,%%r10,8),%%xmm2\n"
-  "add    $0x2,%0\n"
-  "movq   (%5,%%r11,8),%%xmm3\n"
-  "paddsw %%xmm0,%%xmm2\n"
-  "paddsw %%xmm0,%%xmm3\n"
-  "shufps $0x44,%%xmm3,%%xmm2\n"
-  "psraw  $0x6,%%xmm2\n"
-  "packuswb %%xmm2,%%xmm2\n"
-  "movq   %%xmm2,0x0(%3)\n"
-  "add    $0x8,%3\n"
-"convertend:"
-  "sub    $0x2,%4\n"
-  "jns    convertloop\n"
-
-"convertnext:"
-  "add    $0x1,%4\n"
-  "js     convertdone\n"
-
-  "movzbq (%1),%%r10\n"
-  "movq   2048(%5,%%r10,8),%%xmm0\n"
-  "movzbq (%2),%%r10\n"
-  "movq   4096(%5,%%r10,8),%%xmm1\n"
-  "paddsw %%xmm1,%%xmm0\n"
-  "movzbq (%0),%%r10\n"
-  "movq   (%5,%%r10,8),%%xmm1\n"
-  "paddsw %%xmm0,%%xmm1\n"
-  "psraw  $0x6,%%xmm1\n"
-  "packuswb %%xmm1,%%xmm1\n"
-  "movd   %%xmm1,0x0(%3)\n"
-"convertdone:"
-  :
-  : "r"(y_buf),  // %0
-    "r"(u_buf),  // %1
-    "r"(v_buf),  // %2
-    "r"(rgb_buf),  // %3
-    "r"(width),  // %4
-    "r" (&kCoefficientsRgbY)  // %5
-  : "memory", "r10", "r11", "xmm0", "xmm1", "xmm2", "xmm3"
-);
-}
-#else // __SUNPRO_CC
-// AMD64 ABI uses register paremters.
-void FastConvertYUVToRGB32Row(const uint8* y_buf,  // rdi
-                              const uint8* u_buf,  // rsi
-                              const uint8* v_buf,  // rdx
-                              uint8* rgb_buf,      // rcx
-                              int width) {         // r8
-  asm(
-  "jmp    1f\n"
-"0:"
-  "movzb  (%1),%%r10\n"
-  "add    $0x1,%1\n"
-  "movzb  (%2),%%r11\n"
-  "add    $0x1,%2\n"
-  "movq   2048(%5,%%r10,8),%%xmm0\n"
-  "movzb  (%0),%%r10\n"
-  "movq   4096(%5,%%r11,8),%%xmm1\n"
-  "movzb  0x1(%0),%%r11\n"
-  "paddsw %%xmm1,%%xmm0\n"
-  "movq   (%5,%%r10,8),%%xmm2\n"
-  "add    $0x2,%0\n"
-  "movq   (%5,%%r11,8),%%xmm3\n"
-  "paddsw %%xmm0,%%xmm2\n"
-  "paddsw %%xmm0,%%xmm3\n"
-  "shufps $0x44,%%xmm3,%%xmm2\n"
-  "psraw  $0x6,%%xmm2\n"
-  "packuswb %%xmm2,%%xmm2\n"
-  "movq   %%xmm2,0x0(%3)\n"
-  "add    $0x8,%3\n"
-"1:"
-  "sub    $0x2,%4\n"
-  "jns    0b\n"
-
-"2:"
-  "add    $0x1,%4\n"
-  "js     3f\n"
-
-  "movzb  (%1),%%r10\n"
-  "movq   2048(%5,%%r10,8),%%xmm0\n"
-  "movzb  (%2),%%r10\n"
-  "movq   4096(%5,%%r10,8),%%xmm1\n"
-  "paddsw %%xmm1,%%xmm0\n"
-  "movzb  (%0),%%r10\n"
-  "movq   (%5,%%r10,8),%%xmm1\n"
-  "paddsw %%xmm0,%%xmm1\n"
-  "psraw  $0x6,%%xmm1\n"
-  "packuswb %%xmm1,%%xmm1\n"
-  "movd   %%xmm1,0x0(%3)\n"
-"3:"
-  :
-  : "r"(y_buf),  // %0
-    "r"(u_buf),  // %1
-    "r"(v_buf),  // %2
-    "r"(rgb_buf),  // %3
-    "r"(width),  // %4
-    "r" (kCoefficientsRgbY)  // %5
-  : "memory", "r10", "r11", "xmm0", "xmm1", "xmm2", "xmm3"
-);
-}
-
-void ScaleYUVToRGB32Row(const uint8* y_buf,  // rdi
-                        const uint8* u_buf,  // rsi
-                        const uint8* v_buf,  // rdx
-                        uint8* rgb_buf,      // rcx
-                        int width,           // r8
-                        int scaled_dx) {     // r9
-  asm(
-  "xor    %%r11,%%r11\n"
-  "sub    $0x2,%4\n"
-  "js     scalenext\n"
-
-"scaleloop:"
-  "mov    %%r11,%%r10\n"
-  "sar    $0x5,%%r10\n"
-  "movzb  (%1,%%r10,1),%%rax\n"
-  "movq   2048(%5,%%rax,8),%%xmm0\n"
-  "movzb  (%2,%%r10,1),%%rax\n"
-  "movq   4096(%5,%%rax,8),%%xmm1\n"
-  "lea    (%%r11,%6),%%r10\n"
-  "sar    $0x4,%%r11\n"
-  "movzb  (%0,%%r11,1),%%rax\n"
-  "paddsw %%xmm1,%%xmm0\n"
-  "movq   (%5,%%rax,8),%%xmm1\n"
-  "lea    (%%r10,%6),%%r11\n"
-  "sar    $0x4,%%r10\n"
-  "movzb  (%0,%%r10,1),%%rax\n"
-  "movq   (%5,%%rax,8),%%xmm2\n"
-  "paddsw %%xmm0,%%xmm1\n"
-  "paddsw %%xmm0,%%xmm2\n"
-  "shufps $0x44,%%xmm2,%%xmm1\n"
-  "psraw  $0x6,%%xmm1\n"
-  "packuswb %%xmm1,%%xmm1\n"
-  "movq   %%xmm1,0x0(%3)\n"
-  "add    $0x8,%3\n"
-  "sub    $0x2,%4\n"
-  "jns    scaleloop\n"
-
-"scalenext:"
-  "add    $0x1,%4\n"
-  "js     scaledone\n"
-
-  "mov    %%r11,%%r10\n"
-  "sar    $0x5,%%r10\n"
-  "movzb  (%1,%%r10,1),%%rax\n"
-  "movq   2048(%5,%%rax,8),%%xmm0\n"
-  "movzb  (%2,%%r10,1),%%rax\n"
-  "movq   4096(%5,%%rax,8),%%xmm1\n"
-  "paddsw %%xmm1,%%xmm0\n"
-  "sar    $0x4,%%r11\n"
-  "movzb  (%0,%%r11,1),%%rax\n"
-  "movq   (%5,%%rax,8),%%xmm1\n"
-  "paddsw %%xmm0,%%xmm1\n"
-  "psraw  $0x6,%%xmm1\n"
-  "packuswb %%xmm1,%%xmm1\n"
-  "movd   %%xmm1,0x0(%3)\n"
-
-"scaledone:"
-  :
-  : "r"(y_buf),  // %0
-    "r"(u_buf),  // %1
-    "r"(v_buf),  // %2
-    "r"(rgb_buf),  // %3
-    "r"(width),  // %4
-    "r" (kCoefficientsRgbY),  // %5
-    "r"(static_cast<long>(scaled_dx))  // %6
-  : "memory", "r10", "r11", "rax", "xmm0", "xmm1", "xmm2"
-);
-}
-
-#endif // __SUNPRO_CC
-
-#else // ARCH_CPU_X86_64
-
-#ifdef __SUNPRO_CC
-void FastConvertYUVToRGB32Row(const uint8* y_buf,
-                              const uint8* u_buf,
-                              const uint8* v_buf,
-                              uint8* rgb_buf,
-                              int width) {
-  asm(
-  "pusha\n"
-  "mov    %eax,%ebp\n"
-  "jmp    convertend\n"
-
-"convertloop:"
-  "movzbl (%edi),%eax\n"
-  "add    $0x1,%edi\n"
-  "movzbl (%esi),%ebx\n"
-  "add    $0x1,%esi\n"
-  "movq   kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
-  "movzbl (%edx),%eax\n"
-  "paddsw kCoefficientsRgbY+4096(,%ebx,8),%mm0\n"
-  "movzbl 0x1(%edx),%ebx\n"
-  "movq   kCoefficientsRgbY(,%eax,8),%mm1\n"
-  "add    $0x2,%edx\n"
-  "movq   kCoefficientsRgbY(,%ebx,8),%mm2\n"
-  "paddsw %mm0,%mm1\n"
-  "paddsw %mm0,%mm2\n"
-  "psraw  $0x6,%mm1\n"
-  "psraw  $0x6,%mm2\n"
-  "packuswb %mm2,%mm1\n"
-  "movntq %mm1,0x0(%ebp)\n"
-  "add    $0x8,%ebp\n"
-"convertend:"
-  "sub    $0x2,%ecx\n"
-  "jns    convertloop\n"
-
-  "and    $0x1,%ecx\n"
-  "je     convertdone\n"
-
-  "movzbl (%edi),%eax\n"
-  "movq   kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
-  "movzbl (%esi),%eax\n"
-  "paddsw kCoefficientsRgbY+4096(,%eax,8),%mm0\n"
-  "movzbl (%edx),%eax\n"
-  "movq   kCoefficientsRgbY(,%eax,8),%mm1\n"
-  "paddsw %mm0,%mm1\n"
-  "psraw  $0x6,%mm1\n"
-  "packuswb %mm1,%mm1\n"
-  "movd   %mm1,0x0(%ebp)\n"
-"convertdone:"
-  "popa\n"
-  :
-  : "d"(y_buf),  // %edx
-    "D"(u_buf),  // %edi
-    "S"(v_buf),  // %esi
-    "a"(rgb_buf),  // %eax
-    "c"(width)  // %ecx
-  : "memory"
-);
-}
-#else //  __SUNPRO_CC
-void FastConvertYUVToRGB32Row(const uint8* y_buf,
-                              const uint8* u_buf,
-                              const uint8* v_buf,
-                              uint8* rgb_buf,
-                              int width);
-
-// It's necessary to specify the correct section for the following code,
-// otherwise it will be placed in whatever the current section is as this unit
-// is compiled.  Because GCC remembers the last section it emitted, we must
-// also revert to the previous section state at the end of the asm block.
-  asm(
-  ".section .text\n"
-  ".global FastConvertYUVToRGB32Row\n"
-  ".type FastConvertYUVToRGB32Row, @function\n"
-"FastConvertYUVToRGB32Row:\n"
-  "pusha\n"
-  "mov    0x24(%esp),%edx\n"
-  "mov    0x28(%esp),%edi\n"
-  "mov    0x2c(%esp),%esi\n"
-  "mov    0x30(%esp),%ebp\n"
-  "mov    0x34(%esp),%ecx\n"
-  "jmp    1f\n"
-
-"0:"
-  "movzbl (%edi),%eax\n"
-  "add    $0x1,%edi\n"
-  "movzbl (%esi),%ebx\n"
-  "add    $0x1,%esi\n"
-  "movq   kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
-  "movzbl (%edx),%eax\n"
-  "paddsw kCoefficientsRgbY+4096(,%ebx,8),%mm0\n"
-  "movzbl 0x1(%edx),%ebx\n"
-  "movq   kCoefficientsRgbY(,%eax,8),%mm1\n"
-  "add    $0x2,%edx\n"
-  "movq   kCoefficientsRgbY(,%ebx,8),%mm2\n"
-  "paddsw %mm0,%mm1\n"
-  "paddsw %mm0,%mm2\n"
-  "psraw  $0x6,%mm1\n"
-  "psraw  $0x6,%mm2\n"
-  "packuswb %mm2,%mm1\n"
-  "movntq %mm1,0x0(%ebp)\n"
-  "add    $0x8,%ebp\n"
-"1:"
-  "sub    $0x2,%ecx\n"
-  "jns    0b\n"
-
-  "and    $0x1,%ecx\n"
-  "je     2f\n"
-
-  "movzbl (%edi),%eax\n"
-  "movq   kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
-  "movzbl (%esi),%eax\n"
-  "paddsw kCoefficientsRgbY+4096(,%eax,8),%mm0\n"
-  "movzbl (%edx),%eax\n"
-  "movq   kCoefficientsRgbY(,%eax,8),%mm1\n"
-  "paddsw %mm0,%mm1\n"
-  "psraw  $0x6,%mm1\n"
-  "packuswb %mm1,%mm1\n"
-  "movd   %mm1,0x0(%ebp)\n"
-"2:"
-  "popa\n"
-  "ret\n"
-  ".previous\n"
-);
-
-void ScaleYUVToRGB32Row(const uint8* y_buf,
-                        const uint8* u_buf,
-                        const uint8* v_buf,
-                        uint8* rgb_buf,
-                        int width,
-                        int scaled_dx);
-
-  asm(
-  ".global ScaleYUVToRGB32Row\n"
-"ScaleYUVToRGB32Row:\n"
-  "pusha\n"
-  "mov    0x24(%esp),%edx\n"
-  "mov    0x28(%esp),%edi\n"
-  "mov    0x2c(%esp),%esi\n"
-  "mov    0x30(%esp),%ebp\n"
-  "mov    0x34(%esp),%ecx\n"
-  "xor    %ebx,%ebx\n"
-  "jmp    scaleend\n"
-
-"scaleloop:"
-  "mov    %ebx,%eax\n"
-  "sar    $0x5,%eax\n"
-  "movzbl (%edi,%eax,1),%eax\n"
-  "movq   kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
-  "mov    %ebx,%eax\n"
-  "sar    $0x5,%eax\n"
-  "movzbl (%esi,%eax,1),%eax\n"
-  "paddsw kCoefficientsRgbY+4096(,%eax,8),%mm0\n"
-  "mov    %ebx,%eax\n"
-  "add    0x38(%esp),%ebx\n"
-  "sar    $0x4,%eax\n"
-  "movzbl (%edx,%eax,1),%eax\n"
-  "movq   kCoefficientsRgbY(,%eax,8),%mm1\n"
-  "mov    %ebx,%eax\n"
-  "add    0x38(%esp),%ebx\n"
-  "sar    $0x4,%eax\n"
-  "movzbl (%edx,%eax,1),%eax\n"
-  "movq   kCoefficientsRgbY(,%eax,8),%mm2\n"
-  "paddsw %mm0,%mm1\n"
-  "paddsw %mm0,%mm2\n"
-  "psraw  $0x6,%mm1\n"
-  "psraw  $0x6,%mm2\n"
-  "packuswb %mm2,%mm1\n"
-  "movntq %mm1,0x0(%ebp)\n"
-  "add    $0x8,%ebp\n"
-"scaleend:"
-  "sub    $0x2,%ecx\n"
-  "jns    scaleloop\n"
-
-  "and    $0x1,%ecx\n"
-  "je     scaledone\n"
-
-  "mov    %ebx,%eax\n"
-  "sar    $0x5,%eax\n"
-  "movzbl (%edi,%eax,1),%eax\n"
-  "movq   kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
-  "mov    %ebx,%eax\n"
-  "sar    $0x5,%eax\n"
-  "movzbl (%esi,%eax,1),%eax\n"
-  "paddsw kCoefficientsRgbY+4096(,%eax,8),%mm0\n"
-  "mov    %ebx,%eax\n"
-  "sar    $0x4,%eax\n"
-  "movzbl (%edx,%eax,1),%eax\n"
-  "movq   kCoefficientsRgbY(,%eax,8),%mm1\n"
-  "paddsw %mm0,%mm1\n"
-  "psraw  $0x6,%mm1\n"
-  "packuswb %mm1,%mm1\n"
-  "movd   %mm1,0x0(%ebp)\n"
-
-"scaledone:"
-  "popa\n"
-  "ret\n"
-);
-
-#endif // __SUNPRO_CC
-#endif // ARCH_CPU_X86_64
-#endif // !ARCH_CPU_X86_FAMILY
-}  // extern "C"
-
deleted file mode 100644
--- a/gfx/ycbcr/yuv_row_mac.cpp
+++ /dev/null
@@ -1,416 +0,0 @@
-// Copyright (c) 2009 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include "yuv_row.h"
-
-// TODO(fbarchard): Do 64 bit version.
-
-extern "C" {
-
-// PPC and 64 Bit builds use the C fallback. Optimized code
-// needs to be fixed for 64 bit builds. PPC has no optimized code
-// option at all.
-#if defined(ARCH_CPU_PPC) || defined(ARCH_CPU_64_BITS)
-// PPC implementation uses C fallback
-void FastConvertYUVToRGB32Row(const uint8* y_buf,
-                              const uint8* u_buf,
-                              const uint8* v_buf,
-                              uint8* rgb_buf,
-                              int width) {
-  FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, 1);
-}
- 
-void ScaleYUVToRGB32Row(const uint8* y_buf,
-                        const uint8* u_buf,
-                        const uint8* v_buf,
-                        uint8* rgb_buf,
-                        int width,
-                        int scaled_dx) {
-  ScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, scaled_dx, 1);
-}
-#else
-
-#define RGBY(i) { \
-  static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
-  static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
-  static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
-  0 \
-}
-
-#define RGBU(i) { \
-  static_cast<int16>(2.018 * 64 * (i - 128) + 0.5), \
-  static_cast<int16>(-0.391 * 64 * (i - 128) + 0.5), \
-  0, \
-  static_cast<int16>(256 * 64 - 1) \
-}
-
-#define RGBV(i) { \
-  0, \
-  static_cast<int16>(-0.813 * 64 * (i - 128) + 0.5), \
-  static_cast<int16>(1.596 * 64 * (i - 128) + 0.5), \
-  0 \
-}
-
-#define MMX_ALIGNED(var) \
-  var __attribute__ ((section ("__TEXT,__text"))) __attribute__ ((aligned(16)))
-
-MMX_ALIGNED(int16 kCoefficientsRgbY[768][4]) = {
-  RGBY(0x00), RGBY(0x01), RGBY(0x02), RGBY(0x03),
-  RGBY(0x04), RGBY(0x05), RGBY(0x06), RGBY(0x07),
-  RGBY(0x08), RGBY(0x09), RGBY(0x0A), RGBY(0x0B),
-  RGBY(0x0C), RGBY(0x0D), RGBY(0x0E), RGBY(0x0F),
-  RGBY(0x10), RGBY(0x11), RGBY(0x12), RGBY(0x13),
-  RGBY(0x14), RGBY(0x15), RGBY(0x16), RGBY(0x17),
-  RGBY(0x18), RGBY(0x19), RGBY(0x1A), RGBY(0x1B),
-  RGBY(0x1C), RGBY(0x1D), RGBY(0x1E), RGBY(0x1F),
-  RGBY(0x20), RGBY(0x21), RGBY(0x22), RGBY(0x23),
-  RGBY(0x24), RGBY(0x25), RGBY(0x26), RGBY(0x27),
-  RGBY(0x28), RGBY(0x29), RGBY(0x2A), RGBY(0x2B),
-  RGBY(0x2C), RGBY(0x2D), RGBY(0x2E), RGBY(0x2F),
-  RGBY(0x30), RGBY(0x31), RGBY(0x32), RGBY(0x33),
-  RGBY(0x34), RGBY(0x35), RGBY(0x36), RGBY(0x37),
-  RGBY(0x38), RGBY(0x39), RGBY(0x3A), RGBY(0x3B),
-  RGBY(0x3C), RGBY(0x3D), RGBY(0x3E), RGBY(0x3F),
-  RGBY(0x40), RGBY(0x41), RGBY(0x42), RGBY(0x43),
-  RGBY(0x44), RGBY(0x45), RGBY(0x46), RGBY(0x47),
-  RGBY(0x48), RGBY(0x49), RGBY(0x4A), RGBY(0x4B),
-  RGBY(0x4C), RGBY(0x4D), RGBY(0x4E), RGBY(0x4F),
-  RGBY(0x50), RGBY(0x51), RGBY(0x52), RGBY(0x53),
-  RGBY(0x54), RGBY(0x55), RGBY(0x56), RGBY(0x57),
-  RGBY(0x58), RGBY(0x59), RGBY(0x5A), RGBY(0x5B),
-  RGBY(0x5C), RGBY(0x5D), RGBY(0x5E), RGBY(0x5F),
-  RGBY(0x60), RGBY(0x61), RGBY(0x62), RGBY(0x63),
-  RGBY(0x64), RGBY(0x65), RGBY(0x66), RGBY(0x67),
-  RGBY(0x68), RGBY(0x69), RGBY(0x6A), RGBY(0x6B),
-  RGBY(0x6C), RGBY(0x6D), RGBY(0x6E), RGBY(0x6F),
-  RGBY(0x70), RGBY(0x71), RGBY(0x72), RGBY(0x73),
-  RGBY(0x74), RGBY(0x75), RGBY(0x76), RGBY(0x77),
-  RGBY(0x78), RGBY(0x79), RGBY(0x7A), RGBY(0x7B),
-  RGBY(0x7C), RGBY(0x7D), RGBY(0x7E), RGBY(0x7F),
-  RGBY(0x80), RGBY(0x81), RGBY(0x82), RGBY(0x83),
-  RGBY(0x84), RGBY(0x85), RGBY(0x86), RGBY(0x87),
-  RGBY(0x88), RGBY(0x89), RGBY(0x8A), RGBY(0x8B),
-  RGBY(0x8C), RGBY(0x8D), RGBY(0x8E), RGBY(0x8F),
-  RGBY(0x90), RGBY(0x91), RGBY(0x92), RGBY(0x93),
-  RGBY(0x94), RGBY(0x95), RGBY(0x96), RGBY(0x97),
-  RGBY(0x98), RGBY(0x99), RGBY(0x9A), RGBY(0x9B),
-  RGBY(0x9C), RGBY(0x9D), RGBY(0x9E), RGBY(0x9F),
-  RGBY(0xA0), RGBY(0xA1), RGBY(0xA2), RGBY(0xA3),
-  RGBY(0xA4), RGBY(0xA5), RGBY(0xA6), RGBY(0xA7),
-  RGBY(0xA8), RGBY(0xA9), RGBY(0xAA), RGBY(0xAB),
-  RGBY(0xAC), RGBY(0xAD), RGBY(0xAE), RGBY(0xAF),
-  RGBY(0xB0), RGBY(0xB1), RGBY(0xB2), RGBY(0xB3),
-  RGBY(0xB4), RGBY(0xB5), RGBY(0xB6), RGBY(0xB7),
-  RGBY(0xB8), RGBY(0xB9), RGBY(0xBA), RGBY(0xBB),
-  RGBY(0xBC), RGBY(0xBD), RGBY(0xBE), RGBY(0xBF),
-  RGBY(0xC0), RGBY(0xC1), RGBY(0xC2), RGBY(0xC3),
-  RGBY(0xC4), RGBY(0xC5), RGBY(0xC6), RGBY(0xC7),
-  RGBY(0xC8), RGBY(0xC9), RGBY(0xCA), RGBY(0xCB),
-  RGBY(0xCC), RGBY(0xCD), RGBY(0xCE), RGBY(0xCF),
-  RGBY(0xD0), RGBY(0xD1), RGBY(0xD2), RGBY(0xD3),
-  RGBY(0xD4), RGBY(0xD5), RGBY(0xD6), RGBY(0xD7),
-  RGBY(0xD8), RGBY(0xD9), RGBY(0xDA), RGBY(0xDB),
-  RGBY(0xDC), RGBY(0xDD), RGBY(0xDE), RGBY(0xDF),
-  RGBY(0xE0), RGBY(0xE1), RGBY(0xE2), RGBY(0xE3),
-  RGBY(0xE4), RGBY(0xE5), RGBY(0xE6), RGBY(0xE7),
-  RGBY(0xE8), RGBY(0xE9), RGBY(0xEA), RGBY(0xEB),
-  RGBY(0xEC), RGBY(0xED), RGBY(0xEE), RGBY(0xEF),
-  RGBY(0xF0), RGBY(0xF1), RGBY(0xF2), RGBY(0xF3),
-  RGBY(0xF4), RGBY(0xF5), RGBY(0xF6), RGBY(0xF7),
-  RGBY(0xF8), RGBY(0xF9), RGBY(0xFA), RGBY(0xFB),
-  RGBY(0xFC), RGBY(0xFD), RGBY(0xFE), RGBY(0xFF),
-
-  // Chroma U table.
-  RGBU(0x00), RGBU(0x01), RGBU(0x02), RGBU(0x03),
-  RGBU(0x04), RGBU(0x05), RGBU(0x06), RGBU(0x07),
-  RGBU(0x08), RGBU(0x09), RGBU(0x0A), RGBU(0x0B),
-  RGBU(0x0C), RGBU(0x0D), RGBU(0x0E), RGBU(0x0F),
-  RGBU(0x10), RGBU(0x11), RGBU(0x12), RGBU(0x13),
-  RGBU(0x14), RGBU(0x15), RGBU(0x16), RGBU(0x17),
-  RGBU(0x18), RGBU(0x19), RGBU(0x1A), RGBU(0x1B),
-  RGBU(0x1C), RGBU(0x1D), RGBU(0x1E), RGBU(0x1F),
-  RGBU(0x20), RGBU(0x21), RGBU(0x22), RGBU(0x23),
-  RGBU(0x24), RGBU(0x25), RGBU(0x26), RGBU(0x27),
-  RGBU(0x28), RGBU(0x29), RGBU(0x2A), RGBU(0x2B),
-  RGBU(0x2C), RGBU(0x2D), RGBU(0x2E), RGBU(0x2F),
-  RGBU(0x30), RGBU(0x31), RGBU(0x32), RGBU(0x33),
-  RGBU(0x34), RGBU(0x35), RGBU(0x36), RGBU(0x37),
-  RGBU(0x38), RGBU(0x39), RGBU(0x3A), RGBU(0x3B),
-  RGBU(0x3C), RGBU(0x3D), RGBU(0x3E), RGBU(0x3F),
-  RGBU(0x40), RGBU(0x41), RGBU(0x42), RGBU(0x43),
-  RGBU(0x44), RGBU(0x45), RGBU(0x46), RGBU(0x47),
-  RGBU(0x48), RGBU(0x49), RGBU(0x4A), RGBU(0x4B),
-  RGBU(0x4C), RGBU(0x4D), RGBU(0x4E), RGBU(0x4F),
-  RGBU(0x50), RGBU(0x51), RGBU(0x52), RGBU(0x53),
-  RGBU(0x54), RGBU(0x55), RGBU(0x56), RGBU(0x57),
-  RGBU(0x58), RGBU(0x59), RGBU(0x5A), RGBU(0x5B),
-  RGBU(0x5C), RGBU(0x5D), RGBU(0x5E), RGBU(0x5F),
-  RGBU(0x60), RGBU(0x61), RGBU(0x62), RGBU(0x63),
-  RGBU(0x64), RGBU(0x65), RGBU(0x66), RGBU(0x67),
-  RGBU(0x68), RGBU(0x69), RGBU(0x6A), RGBU(0x6B),
-  RGBU(0x6C), RGBU(0x6D), RGBU(0x6E), RGBU(0x6F),
-  RGBU(0x70), RGBU(0x71), RGBU(0x72), RGBU(0x73),
-  RGBU(0x74), RGBU(0x75), RGBU(0x76), RGBU(0x77),
-  RGBU(0x78), RGBU(0x79), RGBU(0x7A), RGBU(0x7B),
-  RGBU(0x7C), RGBU(0x7D), RGBU(0x7E), RGBU(0x7F),
-  RGBU(0x80), RGBU(0x81), RGBU(0x82), RGBU(0x83),
-  RGBU(0x84), RGBU(0x85), RGBU(0x86), RGBU(0x87),
-  RGBU(0x88), RGBU(0x89), RGBU(0x8A), RGBU(0x8B),
-  RGBU(0x8C), RGBU(0x8D), RGBU(0x8E), RGBU(0x8F),
-  RGBU(0x90), RGBU(0x91), RGBU(0x92), RGBU(0x93),
-  RGBU(0x94), RGBU(0x95), RGBU(0x96), RGBU(0x97),
-  RGBU(0x98), RGBU(0x99), RGBU(0x9A), RGBU(0x9B),
-  RGBU(0x9C), RGBU(0x9D), RGBU(0x9E), RGBU(0x9F),
-  RGBU(0xA0), RGBU(0xA1), RGBU(0xA2), RGBU(0xA3),
-  RGBU(0xA4), RGBU(0xA5), RGBU(0xA6), RGBU(0xA7),
-  RGBU(0xA8), RGBU(0xA9), RGBU(0xAA), RGBU(0xAB),
-  RGBU(0xAC), RGBU(0xAD), RGBU(0xAE), RGBU(0xAF),
-  RGBU(0xB0), RGBU(0xB1), RGBU(0xB2), RGBU(0xB3),
-  RGBU(0xB4), RGBU(0xB5), RGBU(0xB6), RGBU(0xB7),
-  RGBU(0xB8), RGBU(0xB9), RGBU(0xBA), RGBU(0xBB),
-  RGBU(0xBC), RGBU(0xBD), RGBU(0xBE), RGBU(0xBF),
-  RGBU(0xC0), RGBU(0xC1), RGBU(0xC2), RGBU(0xC3),
-  RGBU(0xC4), RGBU(0xC5), RGBU(0xC6), RGBU(0xC7),
-  RGBU(0xC8), RGBU(0xC9), RGBU(0xCA), RGBU(0xCB),
-  RGBU(0xCC), RGBU(0xCD), RGBU(0xCE), RGBU(0xCF),
-  RGBU(0xD0), RGBU(0xD1), RGBU(0xD2), RGBU(0xD3),
-  RGBU(0xD4), RGBU(0xD5), RGBU(0xD6), RGBU(0xD7),
-  RGBU(0xD8), RGBU(0xD9), RGBU(0xDA), RGBU(0xDB),
-  RGBU(0xDC), RGBU(0xDD), RGBU(0xDE), RGBU(0xDF),
-  RGBU(0xE0), RGBU(0xE1), RGBU(0xE2), RGBU(0xE3),
-  RGBU(0xE4), RGBU(0xE5), RGBU(0xE6), RGBU(0xE7),
-  RGBU(0xE8), RGBU(0xE9), RGBU(0xEA), RGBU(0xEB),
-  RGBU(0xEC), RGBU(0xED), RGBU(0xEE), RGBU(0xEF),
-  RGBU(0xF0), RGBU(0xF1), RGBU(0xF2), RGBU(0xF3),
-  RGBU(0xF4), RGBU(0xF5), RGBU(0xF6), RGBU(0xF7),
-  RGBU(0xF8), RGBU(0xF9), RGBU(0xFA), RGBU(0xFB),
-  RGBU(0xFC), RGBU(0xFD), RGBU(0xFE), RGBU(0xFF),
-
-  // Chroma V table.
-  RGBV(0x00), RGBV(0x01), RGBV(0x02), RGBV(0x03),
-  RGBV(0x04), RGBV(0x05), RGBV(0x06), RGBV(0x07),
-  RGBV(0x08), RGBV(0x09), RGBV(0x0A), RGBV(0x0B),
-  RGBV(0x0C), RGBV(0x0D), RGBV(0x0E), RGBV(0x0F),
-  RGBV(0x10), RGBV(0x11), RGBV(0x12), RGBV(0x13),
-  RGBV(0x14), RGBV(0x15), RGBV(0x16), RGBV(0x17),
-  RGBV(0x18), RGBV(0x19), RGBV(0x1A), RGBV(0x1B),
-  RGBV(0x1C), RGBV(0x1D), RGBV(0x1E), RGBV(0x1F),
-  RGBV(0x20), RGBV(0x21), RGBV(0x22), RGBV(0x23),
-  RGBV(0x24), RGBV(0x25), RGBV(0x26), RGBV(0x27),
-  RGBV(0x28), RGBV(0x29), RGBV(0x2A), RGBV(0x2B),
-  RGBV(0x2C), RGBV(0x2D), RGBV(0x2E), RGBV(0x2F),
-  RGBV(0x30), RGBV(0x31), RGBV(0x32), RGBV(0x33),
-  RGBV(0x34), RGBV(0x35), RGBV(0x36), RGBV(0x37),
-  RGBV(0x38), RGBV(0x39), RGBV(0x3A), RGBV(0x3B),
-  RGBV(0x3C), RGBV(0x3D), RGBV(0x3E), RGBV(0x3F),
-  RGBV(0x40), RGBV(0x41), RGBV(0x42), RGBV(0x43),
-  RGBV(0x44), RGBV(0x45), RGBV(0x46), RGBV(0x47),
-  RGBV(0x48), RGBV(0x49), RGBV(0x4A), RGBV(0x4B),
-  RGBV(0x4C), RGBV(0x4D), RGBV(0x4E), RGBV(0x4F),
-  RGBV(0x50), RGBV(0x51), RGBV(0x52), RGBV(0x53),
-  RGBV(0x54), RGBV(0x55), RGBV(0x56), RGBV(0x57),
-  RGBV(0x58), RGBV(0x59), RGBV(0x5A), RGBV(0x5B),
-  RGBV(0x5C), RGBV(0x5D), RGBV(0x5E), RGBV(0x5F),
-  RGBV(0x60), RGBV(0x61), RGBV(0x62), RGBV(0x63),
-  RGBV(0x64), RGBV(0x65), RGBV(0x66), RGBV(0x67),
-  RGBV(0x68), RGBV(0x69), RGBV(0x6A), RGBV(0x6B),
-  RGBV(0x6C), RGBV(0x6D), RGBV(0x6E), RGBV(0x6F),
-  RGBV(0x70), RGBV(0x71), RGBV(0x72), RGBV(0x73),
-  RGBV(0x74), RGBV(0x75), RGBV(0x76), RGBV(0x77),
-  RGBV(0x78), RGBV(0x79), RGBV(0x7A), RGBV(0x7B),
-  RGBV(0x7C), RGBV(0x7D), RGBV(0x7E), RGBV(0x7F),
-  RGBV(0x80), RGBV(0x81), RGBV(0x82), RGBV(0x83),
-  RGBV(0x84), RGBV(0x85), RGBV(0x86), RGBV(0x87),
-  RGBV(0x88), RGBV(0x89), RGBV(0x8A), RGBV(0x8B),
-  RGBV(0x8C), RGBV(0x8D), RGBV(0x8E), RGBV(0x8F),
-  RGBV(0x90), RGBV(0x91), RGBV(0x92), RGBV(0x93),
-  RGBV(0x94), RGBV(0x95), RGBV(0x96), RGBV(0x97),
-  RGBV(0x98), RGBV(0x99), RGBV(0x9A), RGBV(0x9B),
-  RGBV(0x9C), RGBV(0x9D), RGBV(0x9E), RGBV(0x9F),
-  RGBV(0xA0), RGBV(0xA1), RGBV(0xA2), RGBV(0xA3),
-  RGBV(0xA4), RGBV(0xA5), RGBV(0xA6), RGBV(0xA7),
-  RGBV(0xA8), RGBV(0xA9), RGBV(0xAA), RGBV(0xAB),
-  RGBV(0xAC), RGBV(0xAD), RGBV(0xAE), RGBV(0xAF),
-  RGBV(0xB0), RGBV(0xB1), RGBV(0xB2), RGBV(0xB3),
-  RGBV(0xB4), RGBV(0xB5), RGBV(0xB6), RGBV(0xB7),
-  RGBV(0xB8), RGBV(0xB9), RGBV(0xBA), RGBV(0xBB),
-  RGBV(0xBC), RGBV(0xBD), RGBV(0xBE), RGBV(0xBF),
-  RGBV(0xC0), RGBV(0xC1), RGBV(0xC2), RGBV(0xC3),
-  RGBV(0xC4), RGBV(0xC5), RGBV(0xC6), RGBV(0xC7),
-  RGBV(0xC8), RGBV(0xC9), RGBV(0xCA), RGBV(0xCB),
-  RGBV(0xCC), RGBV(0xCD), RGBV(0xCE), RGBV(0xCF),
-  RGBV(0xD0), RGBV(0xD1), RGBV(0xD2), RGBV(0xD3),
-  RGBV(0xD4), RGBV(0xD5), RGBV(0xD6), RGBV(0xD7),
-  RGBV(0xD8), RGBV(0xD9), RGBV(0xDA), RGBV(0xDB),
-  RGBV(0xDC), RGBV(0xDD), RGBV(0xDE), RGBV(0xDF),
-  RGBV(0xE0), RGBV(0xE1), RGBV(0xE2), RGBV(0xE3),
-  RGBV(0xE4), RGBV(0xE5), RGBV(0xE6), RGBV(0xE7),
-  RGBV(0xE8), RGBV(0xE9), RGBV(0xEA), RGBV(0xEB),
-  RGBV(0xEC), RGBV(0xED), RGBV(0xEE), RGBV(0xEF),
-  RGBV(0xF0), RGBV(0xF1), RGBV(0xF2), RGBV(0xF3),
-  RGBV(0xF4), RGBV(0xF5), RGBV(0xF6), RGBV(0xF7),
-  RGBV(0xF8), RGBV(0xF9), RGBV(0xFA), RGBV(0xFB),
-  RGBV(0xFC), RGBV(0xFD), RGBV(0xFE), RGBV(0xFF),
-};
-
-#undef RGBY
-#undef RGBU
-#undef RGBV
-#undef MMX_ALIGNED
-
-extern void MacConvertYUVToRGB32Row(const uint8* y_buf,
-                                    const uint8* u_buf,
-                                    const uint8* v_buf,
-                                    uint8* rgb_buf,
-                                    int width,
-                                    int16 *kCoefficientsRgbY);
-  __asm__(
-"_MacConvertYUVToRGB32Row:\n"
-  "pusha\n"
-  "mov    0x24(%esp),%edx\n"
-  "mov    0x28(%esp),%edi\n"
-  "mov    0x2c(%esp),%esi\n"
-  "mov    0x30(%esp),%ebp\n"
-  "mov    0x38(%esp),%ecx\n"
-
-  "jmp    Lconvertend\n"
-
-"Lconvertloop:"
-  "movzbl (%edi),%eax\n"
-  "add    $0x1,%edi\n"
-  "movzbl (%esi),%ebx\n"
-  "add    $0x1,%esi\n"
-  "movq   2048(%ecx,%eax,8),%mm0\n"
-  "movzbl (%edx),%eax\n"
-  "paddsw 4096(%ecx,%ebx,8),%mm0\n"
-  "movzbl 0x1(%edx),%ebx\n"
-  "movq   0(%ecx,%eax,8),%mm1\n"
-  "add    $0x2,%edx\n"
-  "movq   0(%ecx,%ebx,8),%mm2\n"
-  "paddsw %mm0,%mm1\n"
-  "paddsw %mm0,%mm2\n"
-  "psraw  $0x6,%mm1\n"
-  "psraw  $0x6,%mm2\n"
-  "packuswb %mm2,%mm1\n"
-  "movntq %mm1,0x0(%ebp)\n"
-  "add    $0x8,%ebp\n"
-"Lconvertend:"
-  "sub    $0x2,0x34(%esp)\n"
-  "jns    Lconvertloop\n"
-
-  "and    $0x1,0x34(%esp)\n"
-  "je     Lconvertdone\n"
-
-  "movzbl (%edi),%eax\n"
-  "movq   2048(%ecx,%eax,8),%mm0\n"
-  "movzbl (%esi),%eax\n"
-  "paddsw 4096(%ecx,%eax,8),%mm0\n"
-  "movzbl (%edx),%eax\n"
-  "movq   0(%ecx,%eax,8),%mm1\n"
-  "paddsw %mm0,%mm1\n"
-  "psraw  $0x6,%mm1\n"
-  "packuswb %mm1,%mm1\n"
-  "movd   %mm1,0x0(%ebp)\n"
-"Lconvertdone:\n"
-  "popa\n"
-  "ret\n"
-);
-
-void FastConvertYUVToRGB32Row(const uint8* y_buf,
-                              const uint8* u_buf,
-                              const uint8* v_buf,
-                              uint8* rgb_buf,
-                              int width) {
-  MacConvertYUVToRGB32Row(y_buf, u_buf, v_buf, rgb_buf, width,
-                          &kCoefficientsRgbY[0][0]);
-}
-
-extern void MacScaleYUVToRGB32Row(const uint8* y_buf,
-                               const uint8* u_buf,
-                               const uint8* v_buf,
-                               uint8* rgb_buf,
-                               int width,
-                               int scaled_dx,
-                               int16 *kCoefficientsRgbY);
-
-  __asm__(
-"_MacScaleYUVToRGB32Row:\n"
-  "pusha\n"
-  "mov    0x24(%esp),%edx\n"
-  "mov    0x28(%esp),%edi\n"
-  "mov    0x2c(%esp),%esi\n"
-  "mov    0x30(%esp),%ebp\n"
-  "mov    0x3c(%esp),%ecx\n"
-  "xor    %ebx,%ebx\n"
-  "jmp    Lscaleend\n"
-
-"Lscaleloop:"
-  "mov    %ebx,%eax\n"
-  "sar    $0x5,%eax\n"
-  "movzbl (%edi,%eax,1),%eax\n"
-  "movq   2048(%ecx,%eax,8),%mm0\n"
-  "mov    %ebx,%eax\n"
-  "sar    $0x5,%eax\n"
-  "movzbl (%esi,%eax,1),%eax\n"
-  "paddsw 4096(%ecx,%eax,8),%mm0\n"
-  "mov    %ebx,%eax\n"
-  "add    0x38(%esp),%ebx\n"
-  "sar    $0x4,%eax\n"
-  "movzbl (%edx,%eax,1),%eax\n"
-  "movq   0(%ecx,%eax,8),%mm1\n"
-  "mov    %ebx,%eax\n"
-  "add    0x38(%esp),%ebx\n"
-  "sar    $0x4,%eax\n"
-  "movzbl (%edx,%eax,1),%eax\n"
-  "movq   0(%ecx,%eax,8),%mm2\n"
-  "paddsw %mm0,%mm1\n"
-  "paddsw %mm0,%mm2\n"
-  "psraw  $0x6,%mm1\n"
-  "psraw  $0x6,%mm2\n"
-  "packuswb %mm2,%mm1\n"
-  "movntq %mm1,0x0(%ebp)\n"
-  "add    $0x8,%ebp\n"
-"Lscaleend:"
-  "sub    $0x2,0x34(%esp)\n"
-  "jns    Lscaleloop\n"
-
-  "and    $0x1,0x34(%esp)\n"
-  "je     Lscaledone\n"
-
-  "mov    %ebx,%eax\n"
-  "sar    $0x5,%eax\n"
-  "movzbl (%edi,%eax,1),%eax\n"
-  "movq   2048(%ecx,%eax,8),%mm0\n"
-  "mov    %ebx,%eax\n"
-  "sar    $0x5,%eax\n"
-  "movzbl (%esi,%eax,1),%eax\n"
-  "paddsw 4096(%ecx,%eax,8),%mm0\n"
-  "mov    %ebx,%eax\n"
-  "sar    $0x4,%eax\n"
-  "movzbl (%edx,%eax,1),%eax\n"
-  "movq   0(%ecx,%eax,8),%mm1\n"
-  "paddsw %mm0,%mm1\n"
-  "psraw  $0x6,%mm1\n"
-  "packuswb %mm1,%mm1\n"
-  "movd   %mm1,0x0(%ebp)\n"
-
-"Lscaledone:"
-  "popa\n"
-  "ret\n"
-);
-
-void ScaleYUVToRGB32Row(const uint8* y_buf,
-                        const uint8* u_buf,
-                        const uint8* v_buf,
-                        uint8* rgb_buf,
-                        int width,
-                        int scaled_dx) {
-
-  MacScaleYUVToRGB32Row(y_buf, u_buf, v_buf, rgb_buf, width, scaled_dx,
-                        &kCoefficientsRgbY[0][0]);
-}
-
-#endif // ARCH_CPU_PPC || ARCH_CPU_64_BITS
-}  // extern "C"
-
--- a/gfx/ycbcr/yuv_row_other.cpp
+++ b/gfx/ycbcr/yuv_row_other.cpp
@@ -7,9 +7,28 @@
 extern "C" {
 void FastConvertYUVToRGB32Row(const uint8* y_buf,
                               const uint8* u_buf,
                               const uint8* v_buf,
                               uint8* rgb_buf,
                               int width) {
   FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, 1);
 }
+
+void ScaleYUVToRGB32Row(const uint8* y_buf,
+                        const uint8* u_buf,
+                        const uint8* v_buf,
+                        uint8* rgb_buf,
+                        int width,
+                        int source_dx) {
+  ScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, source_dx);
+}
+
+void LinearScaleYUVToRGB32Row(const uint8* y_buf,
+                              const uint8* u_buf,
+                              const uint8* v_buf,
+                              uint8* rgb_buf,
+                              int width,
+                              int source_dx) {
+  LinearScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, source_dx);
+}
+
 } 
new file mode 100644
--- /dev/null
+++ b/gfx/ycbcr/yuv_row_posix.cpp
@@ -0,0 +1,849 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "yuv_row.h"
+
+#define DCHECK(a)
+
+extern "C" {
+
+#if defined(ARCH_CPU_X86_64)
+
+// AMD64 ABI uses register paremters.
+void FastConvertYUVToRGB32Row(const uint8* y_buf,  // rdi
+                              const uint8* u_buf,  // rsi
+                              const uint8* v_buf,  // rdx
+                              uint8* rgb_buf,      // rcx
+                              int width) {         // r8
+  asm(
+  "jmp    1f\n"
+"0:"
+  "movzb  (%1),%%r10\n"
+  "add    $0x1,%1\n"
+  "movzb  (%2),%%r11\n"
+  "add    $0x1,%2\n"
+  "movq   2048(%5,%%r10,8),%%xmm0\n"
+  "movzb  (%0),%%r10\n"
+  "movq   4096(%5,%%r11,8),%%xmm1\n"
+  "movzb  0x1(%0),%%r11\n"
+  "paddsw %%xmm1,%%xmm0\n"
+  "movq   (%5,%%r10,8),%%xmm2\n"
+  "add    $0x2,%0\n"
+  "movq   (%5,%%r11,8),%%xmm3\n"
+  "paddsw %%xmm0,%%xmm2\n"
+  "paddsw %%xmm0,%%xmm3\n"
+  "shufps $0x44,%%xmm3,%%xmm2\n"
+  "psraw  $0x6,%%xmm2\n"
+  "packuswb %%xmm2,%%xmm2\n"
+  "movq   %%xmm2,0x0(%3)\n"
+  "add    $0x8,%3\n"
+"1:"
+  "sub    $0x2,%4\n"
+  "jns    0b\n"
+
+"2:"
+  "add    $0x1,%4\n"
+  "js     3f\n"
+
+  "movzb  (%1),%%r10\n"
+  "movq   2048(%5,%%r10,8),%%xmm0\n"
+  "movzb  (%2),%%r10\n"
+  "movq   4096(%5,%%r10,8),%%xmm1\n"
+  "paddsw %%xmm1,%%xmm0\n"
+  "movzb  (%0),%%r10\n"
+  "movq   (%5,%%r10,8),%%xmm1\n"
+  "paddsw %%xmm0,%%xmm1\n"
+  "psraw  $0x6,%%xmm1\n"
+  "packuswb %%xmm1,%%xmm1\n"
+  "movd   %%xmm1,0x0(%3)\n"
+"3:"
+  :
+  : "r"(y_buf),  // %0
+    "r"(u_buf),  // %1
+    "r"(v_buf),  // %2
+    "r"(rgb_buf),  // %3
+    "r"(width),  // %4
+    "r" (kCoefficientsRgbY)  // %5
+  : "memory", "r10", "r11", "xmm0", "xmm1", "xmm2", "xmm3"
+);
+}
+
+void ScaleYUVToRGB32Row(const uint8* y_buf,  // rdi
+                        const uint8* u_buf,  // rsi
+                        const uint8* v_buf,  // rdx
+                        uint8* rgb_buf,      // rcx
+                        int width,           // r8
+                        int source_dx) {     // r9
+  asm(
+  "xor    %%r11,%%r11\n"
+  "sub    $0x2,%4\n"
+  "js     1f\n"
+
+"0:"
+  "mov    %%r11,%%r10\n"
+  "sar    $0x11,%%r10\n"
+  "movzb  (%1,%%r10,1),%%rax\n"
+  "movq   2048(%5,%%rax,8),%%xmm0\n"
+  "movzb  (%2,%%r10,1),%%rax\n"
+  "movq   4096(%5,%%rax,8),%%xmm1\n"
+  "lea    (%%r11,%6),%%r10\n"
+  "sar    $0x10,%%r11\n"
+  "movzb  (%0,%%r11,1),%%rax\n"
+  "paddsw %%xmm1,%%xmm0\n"
+  "movq   (%5,%%rax,8),%%xmm1\n"
+  "lea    (%%r10,%6),%%r11\n"
+  "sar    $0x10,%%r10\n"
+  "movzb  (%0,%%r10,1),%%rax\n"
+  "movq   (%5,%%rax,8),%%xmm2\n"
+  "paddsw %%xmm0,%%xmm1\n"
+  "paddsw %%xmm0,%%xmm2\n"
+  "shufps $0x44,%%xmm2,%%xmm1\n"
+  "psraw  $0x6,%%xmm1\n"
+  "packuswb %%xmm1,%%xmm1\n"
+  "movq   %%xmm1,0x0(%3)\n"
+  "add    $0x8,%3\n"
+  "sub    $0x2,%4\n"
+  "jns    0b\n"
+
+"1:"
+  "add    $0x1,%4\n"
+  "js     2f\n"
+
+  "mov    %%r11,%%r10\n"
+  "sar    $0x11,%%r10\n"
+  "movzb  (%1,%%r10,1),%%rax\n"
+  "movq   2048(%5,%%rax,8),%%xmm0\n"
+  "movzb  (%2,%%r10,1),%%rax\n"
+  "movq   4096(%5,%%rax,8),%%xmm1\n"
+  "paddsw %%xmm1,%%xmm0\n"
+  "sar    $0x10,%%r11\n"
+  "movzb  (%0,%%r11,1),%%rax\n"
+  "movq   (%5,%%rax,8),%%xmm1\n"
+  "paddsw %%xmm0,%%xmm1\n"
+  "psraw  $0x6,%%xmm1\n"
+  "packuswb %%xmm1,%%xmm1\n"
+  "movd   %%xmm1,0x0(%3)\n"
+
+"2:"
+  :
+  : "r"(y_buf),  // %0
+    "r"(u_buf),  // %1
+    "r"(v_buf),  // %2
+    "r"(rgb_buf),  // %3
+    "r"(width),  // %4
+    "r" (kCoefficientsRgbY),  // %5
+    "r"(static_cast<long>(source_dx))  // %6
+  : "memory", "r10", "r11", "rax", "xmm0", "xmm1", "xmm2"
+);
+}
+
+void LinearScaleYUVToRGB32Row(const uint8* y_buf,
+                              const uint8* u_buf,
+                              const uint8* v_buf,
+                              uint8* rgb_buf,
+                              int width,
+                              int source_dx) {
+  asm(
+  "xor    %%r11,%%r11\n"   // x = 0
+  "sub    $0x2,%4\n"
+  "js     2f\n"
+  "cmp    $0x20000,%6\n"   // if source_dx >= 2.0
+  "jl     0f\n"
+  "mov    $0x8000,%%r11\n" // x = 0.5 for 1/2 or less
+"0:"
+
+"1:"
+  "mov    %%r11,%%r10\n"
+  "sar    $0x11,%%r10\n"
+
+  "movzb  (%1, %%r10, 1), %%r13 \n"
+  "movzb  1(%1, %%r10, 1), %%r14 \n"
+  "mov    %%r11, %%rax \n"
+  "and    $0x1fffe, %%rax \n"
+  "imul   %%rax, %%r14 \n"
+  "xor    $0x1fffe, %%rax \n"
+  "imul   %%rax, %%r13 \n"
+  "add    %%r14, %%r13 \n"
+  "shr    $17, %%r13 \n"
+  "movq   2048(%5,%%r13,8), %%xmm0\n"
+
+  "movzb  (%2, %%r10, 1), %%r13 \n"
+  "movzb  1(%2, %%r10, 1), %%r14 \n"
+  "mov    %%r11, %%rax \n"
+  "and    $0x1fffe, %%rax \n"
+  "imul   %%rax, %%r14 \n"
+  "xor    $0x1fffe, %%rax \n"
+  "imul   %%rax, %%r13 \n"
+  "add    %%r14, %%r13 \n"
+  "shr    $17, %%r13 \n"
+  "movq   4096(%5,%%r13,8), %%xmm1\n"
+
+  "mov    %%r11, %%rax \n"
+  "lea    (%%r11,%6),%%r10\n"
+  "sar    $0x10,%%r11\n"
+  "paddsw %%xmm1,%%xmm0\n"
+
+  "movzb  (%0, %%r11, 1), %%r13 \n"
+  "movzb  1(%0, %%r11, 1), %%r14 \n"
+  "and    $0xffff, %%rax \n"
+  "imul   %%rax, %%r14 \n"
+  "xor    $0xffff, %%rax \n"
+  "imul   %%rax, %%r13 \n"
+  "add    %%r14, %%r13 \n"
+  "shr    $16, %%r13 \n"
+  "movq   (%5,%%r13,8),%%xmm1\n"
+
+  "mov    %%r10, %%rax \n"
+  "lea    (%%r10,%6),%%r11\n"
+  "sar    $0x10,%%r10\n"
+
+  "movzb  (%0,%%r10,1), %%r13 \n"
+  "movzb  1(%0,%%r10,1), %%r14 \n"
+  "and    $0xffff, %%rax \n"
+  "imul   %%rax, %%r14 \n"
+  "xor    $0xffff, %%rax \n"
+  "imul   %%rax, %%r13 \n"
+  "add    %%r14, %%r13 \n"
+  "shr    $16, %%r13 \n"
+  "movq   (%5,%%r13,8),%%xmm2\n"
+
+  "paddsw %%xmm0,%%xmm1\n"
+  "paddsw %%xmm0,%%xmm2\n"
+  "shufps $0x44,%%xmm2,%%xmm1\n"
+  "psraw  $0x6,%%xmm1\n"
+  "packuswb %%xmm1,%%xmm1\n"
+  "movq   %%xmm1,0x0(%3)\n"
+  "add    $0x8,%3\n"
+  "sub    $0x2,%4\n"
+  "jns    1b\n"
+
+"2:"
+  "add    $0x1,%4\n"
+  "js     3f\n"
+
+  "mov    %%r11,%%r10\n"
+  "sar    $0x11,%%r10\n"
+
+  "movzb  (%1,%%r10,1), %%r13 \n"
+  "movq   2048(%5,%%r13,8),%%xmm0\n"
+
+  "movzb  (%2,%%r10,1), %%r13 \n"
+  "movq   4096(%5,%%r13,8),%%xmm1\n"
+
+  "paddsw %%xmm1,%%xmm0\n"
+  "sar    $0x10,%%r11\n"
+
+  "movzb  (%0,%%r11,1), %%r13 \n"
+  "movq   (%5,%%r13,8),%%xmm1\n"
+
+  "paddsw %%xmm0,%%xmm1\n"
+  "psraw  $0x6,%%xmm1\n"
+  "packuswb %%xmm1,%%xmm1\n"
+  "movd   %%xmm1,0x0(%3)\n"
+
+"3:"
+  :
+  : "r"(y_buf),  // %0
+    "r"(u_buf),  // %1
+    "r"(v_buf),  // %2
+    "r"(rgb_buf),  // %3
+    "r"(width),  // %4
+    "r" (kCoefficientsRgbY),  // %5
+    "r"(static_cast<long>(source_dx))  // %6
+  : "memory", "r10", "r11", "r13", "r14", "rax", "xmm0", "xmm1", "xmm2"
+);
+}
+
+#elif defined(ARCH_CPU_X86_32) && !defined(__PIC__)
+
+// PIC version is slower because less registers are available, so
+// non-PIC is used on platforms where it is possible.
+void FastConvertYUVToRGB32Row(const uint8* y_buf,
+                              const uint8* u_buf,
+                              const uint8* v_buf,
+                              uint8* rgb_buf,
+                              int width);
+  asm(
+  ".text\n"
+  ".global FastConvertYUVToRGB32Row\n"
+  ".type FastConvertYUVToRGB32Row, @function\n"
+"FastConvertYUVToRGB32Row:\n"
+  "pusha\n"
+  "mov    0x24(%esp),%edx\n"
+  "mov    0x28(%esp),%edi\n"
+  "mov    0x2c(%esp),%esi\n"
+  "mov    0x30(%esp),%ebp\n"
+  "mov    0x34(%esp),%ecx\n"
+  "jmp    1f\n"
+
+"0:"
+  "movzbl (%edi),%eax\n"
+  "add    $0x1,%edi\n"
+  "movzbl (%esi),%ebx\n"
+  "add    $0x1,%esi\n"
+  "movq   kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
+  "movzbl (%edx),%eax\n"
+  "paddsw kCoefficientsRgbY+4096(,%ebx,8),%mm0\n"
+  "movzbl 0x1(%edx),%ebx\n"
+  "movq   kCoefficientsRgbY(,%eax,8),%mm1\n"
+  "add    $0x2,%edx\n"
+  "movq   kCoefficientsRgbY(,%ebx,8),%mm2\n"
+  "paddsw %mm0,%mm1\n"
+  "paddsw %mm0,%mm2\n"
+  "psraw  $0x6,%mm1\n"
+  "psraw  $0x6,%mm2\n"
+  "packuswb %mm2,%mm1\n"
+  "movntq %mm1,0x0(%ebp)\n"
+  "add    $0x8,%ebp\n"
+"1:"
+  "sub    $0x2,%ecx\n"
+  "jns    0b\n"
+
+  "and    $0x1,%ecx\n"
+  "je     2f\n"
+
+  "movzbl (%edi),%eax\n"
+  "movq   kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
+  "movzbl (%esi),%eax\n"
+  "paddsw kCoefficientsRgbY+4096(,%eax,8),%mm0\n"
+  "movzbl (%edx),%eax\n"
+  "movq   kCoefficientsRgbY(,%eax,8),%mm1\n"
+  "paddsw %mm0,%mm1\n"
+  "psraw  $0x6,%mm1\n"
+  "packuswb %mm1,%mm1\n"
+  "movd   %mm1,0x0(%ebp)\n"
+"2:"
+  "popa\n"
+  "ret\n"
+#if !defined(XP_MACOSX)
+  ".previous\n"
+#endif
+);
+
+
+void ScaleYUVToRGB32Row(const uint8* y_buf,
+                        const uint8* u_buf,
+                        const uint8* v_buf,
+                        uint8* rgb_buf,
+                        int width,
+                        int source_dx);
+  asm(
+  ".text\n"
+  ".global ScaleYUVToRGB32Row\n"
+  ".type ScaleYUVToRGB32Row, @function\n"
+"ScaleYUVToRGB32Row:\n"
+  "pusha\n"
+  "mov    0x24(%esp),%edx\n"
+  "mov    0x28(%esp),%edi\n"
+  "mov    0x2c(%esp),%esi\n"
+  "mov    0x30(%esp),%ebp\n"
+  "mov    0x34(%esp),%ecx\n"
+  "xor    %ebx,%ebx\n"
+  "jmp    1f\n"
+
+"0:"
+  "mov    %ebx,%eax\n"
+  "sar    $0x11,%eax\n"
+  "movzbl (%edi,%eax,1),%eax\n"
+  "movq   kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
+  "mov    %ebx,%eax\n"
+  "sar    $0x11,%eax\n"
+  "movzbl (%esi,%eax,1),%eax\n"
+  "paddsw kCoefficientsRgbY+4096(,%eax,8),%mm0\n"
+  "mov    %ebx,%eax\n"
+  "add    0x38(%esp),%ebx\n"
+  "sar    $0x10,%eax\n"
+  "movzbl (%edx,%eax,1),%eax\n"
+  "movq   kCoefficientsRgbY(,%eax,8),%mm1\n"
+  "mov    %ebx,%eax\n"
+  "add    0x38(%esp),%ebx\n"
+  "sar    $0x10,%eax\n"
+  "movzbl (%edx,%eax,1),%eax\n"
+  "movq   kCoefficientsRgbY(,%eax,8),%mm2\n"
+  "paddsw %mm0,%mm1\n"
+  "paddsw %mm0,%mm2\n"
+  "psraw  $0x6,%mm1\n"
+  "psraw  $0x6,%mm2\n"
+  "packuswb %mm2,%mm1\n"
+  "movntq %mm1,0x0(%ebp)\n"
+  "add    $0x8,%ebp\n"
+"1:"
+  "sub    $0x2,%ecx\n"
+  "jns    0b\n"
+
+  "and    $0x1,%ecx\n"
+  "je     2f\n"
+
+  "mov    %ebx,%eax\n"
+  "sar    $0x11,%eax\n"
+  "movzbl (%edi,%eax,1),%eax\n"
+  "movq   kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
+  "mov    %ebx,%eax\n"
+  "sar    $0x11,%eax\n"
+  "movzbl (%esi,%eax,1),%eax\n"
+  "paddsw kCoefficientsRgbY+4096(,%eax,8),%mm0\n"
+  "mov    %ebx,%eax\n"
+  "sar    $0x10,%eax\n"
+  "movzbl (%edx,%eax,1),%eax\n"
+  "movq   kCoefficientsRgbY(,%eax,8),%mm1\n"
+  "paddsw %mm0,%mm1\n"
+  "psraw  $0x6,%mm1\n"
+  "packuswb %mm1,%mm1\n"
+  "movd   %mm1,0x0(%ebp)\n"
+
+"2:"
+  "popa\n"
+  "ret\n"
+#if !defined(XP_MACOSX)
+  ".previous\n"
+#endif
+);
+
+void LinearScaleYUVToRGB32Row(const uint8* y_buf,
+                              const uint8* u_buf,
+                              const uint8* v_buf,
+                              uint8* rgb_buf,
+                              int width,
+                              int source_dx);
+  asm(
+  ".text\n"
+  ".global LinearScaleYUVToRGB32Row\n"
+  ".type LinearScaleYUVToRGB32Row, @function\n"
+"LinearScaleYUVToRGB32Row:\n"
+  "pusha\n"
+  "mov    0x24(%esp),%edx\n"
+  "mov    0x28(%esp),%edi\n"
+  "mov    0x30(%esp),%ebp\n"
+
+  // source_width = width * source_dx + ebx
+  "mov    0x34(%esp), %ecx\n"
+  "imull  0x38(%esp), %ecx\n"
+  "mov    %ecx, 0x34(%esp)\n"
+
+  "mov    0x38(%esp), %ecx\n"
+  "xor    %ebx,%ebx\n"     // x = 0
+  "cmp    $0x20000,%ecx\n" // if source_dx >= 2.0
+  "jl     1f\n"
+  "mov    $0x8000,%ebx\n"  // x = 0.5 for 1/2 or less
+  "jmp    1f\n"
+
+"0:"
+  "mov    %ebx,%eax\n"
+  "sar    $0x11,%eax\n"
+
+  "movzbl (%edi,%eax,1),%ecx\n"
+  "movzbl 1(%edi,%eax,1),%esi\n"
+  "mov    %ebx,%eax\n"
+  "andl   $0x1fffe, %eax \n"
+  "imul   %eax, %esi \n"
+  "xorl   $0x1fffe, %eax \n"
+  "imul   %eax, %ecx \n"
+  "addl   %esi, %ecx \n"
+  "shrl   $17, %ecx \n"
+  "movq   kCoefficientsRgbY+2048(,%ecx,8),%mm0\n"
+
+  "mov    0x2c(%esp),%esi\n"
+  "mov    %ebx,%eax\n"
+  "sar    $0x11,%eax\n"
+
+  "movzbl (%esi,%eax,1),%ecx\n"
+  "movzbl 1(%esi,%eax,1),%esi\n"
+  "mov    %ebx,%eax\n"
+  "andl   $0x1fffe, %eax \n"
+  "imul   %eax, %esi \n"
+  "xorl   $0x1fffe, %eax \n"
+  "imul   %eax, %ecx \n"
+  "addl   %esi, %ecx \n"
+  "shrl   $17, %ecx \n"
+  "paddsw kCoefficientsRgbY+4096(,%ecx,8),%mm0\n"
+
+  "mov    %ebx,%eax\n"
+  "sar    $0x10,%eax\n"
+  "movzbl (%edx,%eax,1),%ecx\n"
+  "movzbl 1(%edx,%eax,1),%esi\n"
+  "mov    %ebx,%eax\n"
+  "add    0x38(%esp),%ebx\n"
+  "andl   $0xffff, %eax \n"
+  "imul   %eax, %esi \n"
+  "xorl   $0xffff, %eax \n"
+  "imul   %eax, %ecx \n"
+  "addl   %esi, %ecx \n"
+  "shrl   $16, %ecx \n"
+  "movq   kCoefficientsRgbY(,%ecx,8),%mm1\n"
+
+  "cmp    0x34(%esp), %ebx\n"
+  "jge    2f\n"
+
+  "mov    %ebx,%eax\n"
+  "sar    $0x10,%eax\n"
+  "movzbl (%edx,%eax,1),%ecx\n"
+  "movzbl 1(%edx,%eax,1),%esi\n"
+  "mov    %ebx,%eax\n"
+  "add    0x38(%esp),%ebx\n"
+  "andl   $0xffff, %eax \n"
+  "imul   %eax, %esi \n"
+  "xorl   $0xffff, %eax \n"
+  "imul   %eax, %ecx \n"
+  "addl   %esi, %ecx \n"
+  "shrl   $16, %ecx \n"
+  "movq   kCoefficientsRgbY(,%ecx,8),%mm2\n"
+
+  "paddsw %mm0,%mm1\n"
+  "paddsw %mm0,%mm2\n"
+  "psraw  $0x6,%mm1\n"
+  "psraw  $0x6,%mm2\n"
+  "packuswb %mm2,%mm1\n"
+  "movntq %mm1,0x0(%ebp)\n"
+  "add    $0x8,%ebp\n"
+
+"1:"
+  "cmp    0x34(%esp), %ebx\n"
+  "jl     0b\n"
+  "popa\n"
+  "ret\n"
+
+"2:"
+  "paddsw %mm0, %mm1\n"
+  "psraw $6, %mm1\n"
+  "packuswb %mm1, %mm1\n"
+  "movd %mm1, (%ebp)\n"
+  "popa\n"
+  "ret\n"
+#if !defined(XP_MACOSX)
+  ".previous\n"
+#endif
+);
+
+#elif defined(ARCH_CPU_X86_32) && defined(__PIC__)
+
+void PICConvertYUVToRGB32Row(const uint8* y_buf,
+                             const uint8* u_buf,
+                             const uint8* v_buf,
+                             uint8* rgb_buf,
+                             int width,
+                             int16 *kCoefficientsRgbY);
+
+  asm(
+  ".text\n"
+#if defined(XP_MACOSX)
+"_PICConvertYUVToRGB32Row:\n"
+#else
+"PICConvertYUVToRGB32Row:\n"
+#endif
+  "pusha\n"
+  "mov    0x24(%esp),%edx\n"
+  "mov    0x28(%esp),%edi\n"
+  "mov    0x2c(%esp),%esi\n"
+  "mov    0x30(%esp),%ebp\n"
+  "mov    0x38(%esp),%ecx\n"
+
+  "jmp    1f\n"
+
+"0:"
+  "movzbl (%edi),%eax\n"
+  "add    $0x1,%edi\n"
+  "movzbl (%esi),%ebx\n"
+  "add    $0x1,%esi\n"
+  "movq   2048(%ecx,%eax,8),%mm0\n"
+  "movzbl (%edx),%eax\n"
+  "paddsw 4096(%ecx,%ebx,8),%mm0\n"
+  "movzbl 0x1(%edx),%ebx\n"
+  "movq   0(%ecx,%eax,8),%mm1\n"
+  "add    $0x2,%edx\n"
+  "movq   0(%ecx,%ebx,8),%mm2\n"
+  "paddsw %mm0,%mm1\n"
+  "paddsw %mm0,%mm2\n"
+  "psraw  $0x6,%mm1\n"
+  "psraw  $0x6,%mm2\n"
+  "packuswb %mm2,%mm1\n"
+  "movntq %mm1,0x0(%ebp)\n"
+  "add    $0x8,%ebp\n"
+"1:"
+  "subl   $0x2,0x34(%esp)\n"
+  "jns    0b\n"
+
+  "andl   $0x1,0x34(%esp)\n"
+  "je     2f\n"
+
+  "movzbl (%edi),%eax\n"
+  "movq   2048(%ecx,%eax,8),%mm0\n"
+  "movzbl (%esi),%eax\n"
+  "paddsw 4096(%ecx,%eax,8),%mm0\n"
+  "movzbl (%edx),%eax\n"
+  "movq   0(%ecx,%eax,8),%mm1\n"
+  "paddsw %mm0,%mm1\n"
+  "psraw  $0x6,%mm1\n"
+  "packuswb %mm1,%mm1\n"
+  "movd   %mm1,0x0(%ebp)\n"
+"2:"
+  "popa\n"
+  "ret\n"
+#if !defined(XP_MACOSX)
+  ".previous\n"
+#endif
+);
+
+void FastConvertYUVToRGB32Row(const uint8* y_buf,
+                              const uint8* u_buf,
+                              const uint8* v_buf,
+                              uint8* rgb_buf,
+                              int width) {
+  PICConvertYUVToRGB32Row(y_buf, u_buf, v_buf, rgb_buf, width,
+                          &kCoefficientsRgbY[0][0]);
+}
+
+void PICScaleYUVToRGB32Row(const uint8* y_buf,
+                           const uint8* u_buf,
+                           const uint8* v_buf,
+                           uint8* rgb_buf,
+                           int width,
+                           int source_dx,
+                           int16 *kCoefficientsRgbY);
+
+  asm(
+  ".text\n"
+#if defined(XP_MACOSX)
+"_PICScaleYUVToRGB32Row:\n"
+#else
+"PICScaleYUVToRGB32Row:\n"
+#endif
+  "pusha\n"
+  "mov    0x24(%esp),%edx\n"
+  "mov    0x28(%esp),%edi\n"
+  "mov    0x2c(%esp),%esi\n"
+  "mov    0x30(%esp),%ebp\n"
+  "mov    0x3c(%esp),%ecx\n"
+  "xor    %ebx,%ebx\n"
+  "jmp    1f\n"
+
+"0:"
+  "mov    %ebx,%eax\n"
+  "sar    $0x11,%eax\n"
+  "movzbl (%edi,%eax,1),%eax\n"
+  "movq   2048(%ecx,%eax,8),%mm0\n"
+  "mov    %ebx,%eax\n"
+  "sar    $0x11,%eax\n"
+  "movzbl (%esi,%eax,1),%eax\n"
+  "paddsw 4096(%ecx,%eax,8),%mm0\n"
+  "mov    %ebx,%eax\n"
+  "add    0x38(%esp),%ebx\n"
+  "sar    $0x10,%eax\n"
+  "movzbl (%edx,%eax,1),%eax\n"
+  "movq   0(%ecx,%eax,8),%mm1\n"
+  "mov    %ebx,%eax\n"
+  "add    0x38(%esp),%ebx\n"
+  "sar    $0x10,%eax\n"
+  "movzbl (%edx,%eax,1),%eax\n"
+  "movq   0(%ecx,%eax,8),%mm2\n"
+  "paddsw %mm0,%mm1\n"
+  "paddsw %mm0,%mm2\n"
+  "psraw  $0x6,%mm1\n"
+  "psraw  $0x6,%mm2\n"
+  "packuswb %mm2,%mm1\n"
+  "movntq %mm1,0x0(%ebp)\n"
+  "add    $0x8,%ebp\n"
+"1:"
+  "subl   $0x2,0x34(%esp)\n"
+  "jns    0b\n"
+
+  "andl   $0x1,0x34(%esp)\n"
+  "je     2f\n"
+
+  "mov    %ebx,%eax\n"
+  "sar    $0x11,%eax\n"
+  "movzbl (%edi,%eax,1),%eax\n"
+  "movq   2048(%ecx,%eax,8),%mm0\n"
+  "mov    %ebx,%eax\n"
+  "sar    $0x11,%eax\n"
+  "movzbl (%esi,%eax,1),%eax\n"
+  "paddsw 4096(%ecx,%eax,8),%mm0\n"
+  "mov    %ebx,%eax\n"
+  "sar    $0x10,%eax\n"
+  "movzbl (%edx,%eax,1),%eax\n"
+  "movq   0(%ecx,%eax,8),%mm1\n"
+  "paddsw %mm0,%mm1\n"
+  "psraw  $0x6,%mm1\n"
+  "packuswb %mm1,%mm1\n"
+  "movd   %mm1,0x0(%ebp)\n"
+
+"2:"
+  "popa\n"
+  "ret\n"
+#if !defined(XP_MACOSX)
+  ".previous\n"
+#endif
+);
+
+void ScaleYUVToRGB32Row(const uint8* y_buf,
+                        const uint8* u_buf,
+                        const uint8* v_buf,
+                        uint8* rgb_buf,
+                        int width,
+                        int source_dx) {
+  PICScaleYUVToRGB32Row(y_buf, u_buf, v_buf, rgb_buf, width, source_dx,
+                        &kCoefficientsRgbY[0][0]);
+}
+
+void PICLinearScaleYUVToRGB32Row(const uint8* y_buf,
+                                 const uint8* u_buf,
+                                 const uint8* v_buf,
+                                 uint8* rgb_buf,
+                                 int width,
+                                 int source_dx,
+                                 int16 *kCoefficientsRgbY);
+
+  asm(
+  ".text\n"
+#if defined(XP_MACOSX)
+"_PICLinearScaleYUVToRGB32Row:\n"
+#else
+"PICLinearScaleYUVToRGB32Row:\n"
+#endif
+  "pusha\n"
+  "mov    0x24(%esp),%edx\n"
+  "mov    0x30(%esp),%ebp\n"
+  "mov    0x34(%esp),%ecx\n"
+  "mov    0x3c(%esp),%edi\n"
+  "xor    %ebx,%ebx\n"
+
+  // source_width = width * source_dx + ebx
+  "mov    0x34(%esp), %ecx\n"
+  "imull  0x38(%esp), %ecx\n"
+  "mov    %ecx, 0x34(%esp)\n"
+
+  "mov    0x38(%esp), %ecx\n"
+  "xor    %ebx,%ebx\n"     // x = 0
+  "cmp    $0x20000,%ecx\n" // if source_dx >= 2.0
+  "jl     1f\n"
+  "mov    $0x8000,%ebx\n"  // x = 0.5 for 1/2 or less
+  "jmp    1f\n"
+
+"0:"
+  "mov    0x28(%esp),%esi\n"
+  "mov    %ebx,%eax\n"
+  "sar    $0x11,%eax\n"
+
+  "movzbl (%esi,%eax,1),%ecx\n"
+  "movzbl 1(%esi,%eax,1),%esi\n"
+  "mov    %ebx,%eax\n"
+  "andl   $0x1fffe, %eax \n"
+  "imul   %eax, %esi \n"
+  "xorl   $0x1fffe, %eax \n"
+  "imul   %eax, %ecx \n"
+  "addl   %esi, %ecx \n"
+  "shrl   $17, %ecx \n"
+  "movq   2048(%edi,%ecx,8),%mm0\n"
+
+  "mov    0x2c(%esp),%esi\n"
+  "mov    %ebx,%eax\n"
+  "sar    $0x11,%eax\n"
+
+  "movzbl (%esi,%eax,1),%ecx\n"
+  "movzbl 1(%esi,%eax,1),%esi\n"
+  "mov    %ebx,%eax\n"
+  "andl   $0x1fffe, %eax \n"
+  "imul   %eax, %esi \n"
+  "xorl   $0x1fffe, %eax \n"
+  "imul   %eax, %ecx \n"
+  "addl   %esi, %ecx \n"
+  "shrl   $17, %ecx \n"
+  "paddsw 4096(%edi,%ecx,8),%mm0\n"
+
+  "mov    %ebx,%eax\n"
+  "sar    $0x10,%eax\n"
+  "movzbl (%edx,%eax,1),%ecx\n"
+  "movzbl 1(%edx,%eax,1),%esi\n"
+  "mov    %ebx,%eax\n"
+  "add    0x38(%esp),%ebx\n"
+  "andl   $0xffff, %eax \n"
+  "imul   %eax, %esi \n"
+  "xorl   $0xffff, %eax \n"
+  "imul   %eax, %ecx \n"
+  "addl   %esi, %ecx \n"
+  "shrl   $16, %ecx \n"
+  "movq   (%edi,%ecx,8),%mm1\n"
+
+  "cmp    0x34(%esp), %ebx\n"
+  "jge    2f\n"
+
+  "mov    %ebx,%eax\n"
+  "sar    $0x10,%eax\n"
+  "movzbl (%edx,%eax,1),%ecx\n"
+  "movzbl 1(%edx,%eax,1),%esi\n"
+  "mov    %ebx,%eax\n"
+  "add    0x38(%esp),%ebx\n"
+  "andl   $0xffff, %eax \n"
+  "imul   %eax, %esi \n"
+  "xorl   $0xffff, %eax \n"
+  "imul   %eax, %ecx \n"
+  "addl   %esi, %ecx \n"
+  "shrl   $16, %ecx \n"
+  "movq   (%edi,%ecx,8),%mm2\n"
+
+  "paddsw %mm0,%mm1\n"
+  "paddsw %mm0,%mm2\n"
+  "psraw  $0x6,%mm1\n"
+  "psraw  $0x6,%mm2\n"
+  "packuswb %mm2,%mm1\n"
+  "movntq %mm1,0x0(%ebp)\n"
+  "add    $0x8,%ebp\n"
+
+"1:"
+  "cmp    %ebx, 0x34(%esp)\n"
+  "jg     0b\n"
+  "popa\n"
+  "ret\n"
+
+"2:"
+  "paddsw %mm0, %mm1\n"
+  "psraw $6, %mm1\n"
+  "packuswb %mm1, %mm1\n"
+  "movd %mm1, (%ebp)\n"
+  "popa\n"
+  "ret\n"
+#if !defined(XP_MACOSX)
+  ".previous\n"
+#endif
+);
+
+
+void LinearScaleYUVToRGB32Row(const uint8* y_buf,
+                              const uint8* u_buf,
+                              const uint8* v_buf,
+                              uint8* rgb_buf,
+                              int width,
+                              int source_dx) {
+  PICLinearScaleYUVToRGB32Row(y_buf, u_buf, v_buf, rgb_buf, width, source_dx,
+                              &kCoefficientsRgbY[0][0]);
+}
+#else
+void FastConvertYUVToRGB32Row(const uint8* y_buf,
+                              const uint8* u_buf,
+                              const uint8* v_buf,
+                              uint8* rgb_buf,
+                              int width) {
+  FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, 1);
+}
+
+void ScaleYUVToRGB32Row(const uint8* y_buf,
+                        const uint8* u_buf,
+                        const uint8* v_buf,
+                        uint8* rgb_buf,
+                        int width,
+                        int source_dx) {
+  ScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, source_dx);
+}
+
+void LinearScaleYUVToRGB32Row(const uint8* y_buf,
+                              const uint8* u_buf,
+                              const uint8* v_buf,
+                              uint8* rgb_buf,
+                              int width,
+                              int source_dx) {
+  LinearScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, source_dx);
+}
+#endif
+
+}  // extern "C"
+
new file mode 100644
--- /dev/null
+++ b/gfx/ycbcr/yuv_row_table.cpp
@@ -0,0 +1,233 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "yuv_row.h"
+
+extern "C" {
+
+#define RGBY(i) { \
+  static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
+  static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
+  static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
+  0 \
+}
+
+#define RGBU(i) { \
+  static_cast<int16>(2.018 * 64 * (i - 128) + 0.5), \
+  static_cast<int16>(-0.391 * 64 * (i - 128) + 0.5), \
+  0, \
+  static_cast<int16>(256 * 64 - 1) \
+}
+
+#define RGBV(i) { \
+  0, \
+  static_cast<int16>(-0.813 * 64 * (i - 128) + 0.5), \
+  static_cast<int16>(1.596 * 64 * (i - 128) + 0.5), \
+  0 \
+}
+
+SIMD_ALIGNED(int16 kCoefficientsRgbY[256 * 3][4]) = {
+  RGBY(0x00), RGBY(0x01), RGBY(0x02), RGBY(0x03),
+  RGBY(0x04), RGBY(0x05), RGBY(0x06), RGBY(0x07),
+  RGBY(0x08), RGBY(0x09), RGBY(0x0A), RGBY(0x0B),
+  RGBY(0x0C), RGBY(0x0D), RGBY(0x0E), RGBY(0x0F),
+  RGBY(0x10), RGBY(0x11), RGBY(0x12), RGBY(0x13),
+  RGBY(0x14), RGBY(0x15), RGBY(0x16), RGBY(0x17),
+  RGBY(0x18), RGBY(0x19), RGBY(0x1A), RGBY(0x1B),
+  RGBY(0x1C), RGBY(0x1D), RGBY(0x1E), RGBY(0x1F),
+  RGBY(0x20), RGBY(0x21), RGBY(0x22), RGBY(0x23),
+  RGBY(0x24), RGBY(0x25), RGBY(0x26), RGBY(0x27),
+  RGBY(0x28), RGBY(0x29), RGBY(0x2A), RGBY(0x2B),
+  RGBY(0x2C), RGBY(0x2D), RGBY(0x2E), RGBY(0x2F),
+  RGBY(0x30), RGBY(0x31), RGBY(0x32), RGBY(0x33),
+  RGBY(0x34), RGBY(0x35), RGBY(0x36), RGBY(0x37),
+  RGBY(0x38), RGBY(0x39), RGBY(0x3A), RGBY(0x3B),
+  RGBY(0x3C), RGBY(0x3D), RGBY(0x3E), RGBY(0x3F),
+  RGBY(0x40), RGBY(0x41), RGBY(0x42), RGBY(0x43),
+  RGBY(0x44), RGBY(0x45), RGBY(0x46), RGBY(0x47),
+  RGBY(0x48), RGBY(0x49), RGBY(0x4A), RGBY(0x4B),
+  RGBY(0x4C), RGBY(0x4D), RGBY(0x4E), RGBY(0x4F),
+  RGBY(0x50), RGBY(0x51), RGBY(0x52), RGBY(0x53),
+  RGBY(0x54), RGBY(0x55), RGBY(0x56), RGBY(0x57),
+  RGBY(0x58), RGBY(0x59), RGBY(0x5A), RGBY(0x5B),
+  RGBY(0x5C), RGBY(0x5D), RGBY(0x5E), RGBY(0x5F),
+  RGBY(0x60), RGBY(0x61), RGBY(0x62), RGBY(0x63),
+  RGBY(0x64), RGBY(0x65), RGBY(0x66), RGBY(0x67),
+  RGBY(0x68), RGBY(0x69), RGBY(0x6A), RGBY(0x6B),
+  RGBY(0x6C), RGBY(0x6D), RGBY(0x6E), RGBY(0x6F),
+  RGBY(0x70), RGBY(0x71), RGBY(0x72), RGBY(0x73),
+  RGBY(0x74), RGBY(0x75), RGBY(0x76), RGBY(0x77),
+  RGBY(0x78), RGBY(0x79), RGBY(0x7A), RGBY(0x7B),
+  RGBY(0x7C), RGBY(0x7D), RGBY(0x7E), RGBY(0x7F),
+  RGBY(0x80), RGBY(0x81), RGBY(0x82), RGBY(0x83),
+  RGBY(0x84), RGBY(0x85), RGBY(0x86), RGBY(0x87),
+  RGBY(0x88), RGBY(0x89), RGBY(0x8A), RGBY(0x8B),
+  RGBY(0x8C), RGBY(0x8D), RGBY(0x8E), RGBY(0x8F),
+  RGBY(0x90), RGBY(0x91), RGBY(0x92), RGBY(0x93),
+  RGBY(0x94), RGBY(0x95), RGBY(0x96), RGBY(0x97),
+  RGBY(0x98), RGBY(0x99), RGBY(0x9A), RGBY(0x9B),
+  RGBY(0x9C), RGBY(0x9D), RGBY(0x9E), RGBY(0x9F),
+  RGBY(0xA0), RGBY(0xA1), RGBY(0xA2), RGBY(0xA3),
+  RGBY(0xA4), RGBY(0xA5), RGBY(0xA6), RGBY(0xA7),
+  RGBY(0xA8), RGBY(0xA9), RGBY(0xAA), RGBY(0xAB),
+  RGBY(0xAC), RGBY(0xAD), RGBY(0xAE), RGBY(0xAF),
+  RGBY(0xB0), RGBY(0xB1), RGBY(0xB2), RGBY(0xB3),
+  RGBY(0xB4), RGBY(0xB5), RGBY(0xB6), RGBY(0xB7),
+  RGBY(0xB8), RGBY(0xB9), RGBY(0xBA), RGBY(0xBB),
+  RGBY(0xBC), RGBY(0xBD), RGBY(0xBE), RGBY(0xBF),
+  RGBY(0xC0), RGBY(0xC1), RGBY(0xC2), RGBY(0xC3),
+  RGBY(0xC4), RGBY(0xC5), RGBY(0xC6), RGBY(0xC7),
+  RGBY(0xC8), RGBY(0xC9), RGBY(0xCA), RGBY(0xCB),
+  RGBY(0xCC), RGBY(0xCD), RGBY(0xCE), RGBY(0xCF),
+  RGBY(0xD0), RGBY(0xD1), RGBY(0xD2), RGBY(0xD3),
+  RGBY(0xD4), RGBY(0xD5), RGBY(0xD6), RGBY(0xD7),
+  RGBY(0xD8), RGBY(0xD9), RGBY(0xDA), RGBY(0xDB),
+  RGBY(0xDC), RGBY(0xDD), RGBY(0xDE), RGBY(0xDF),
+  RGBY(0xE0), RGBY(0xE1), RGBY(0xE2), RGBY(0xE3),
+  RGBY(0xE4), RGBY(0xE5), RGBY(0xE6), RGBY(0xE7),
+  RGBY(0xE8), RGBY(0xE9), RGBY(0xEA), RGBY(0xEB),
+  RGBY(0xEC), RGBY(0xED), RGBY(0xEE), RGBY(0xEF),
+  RGBY(0xF0), RGBY(0xF1), RGBY(0xF2), RGBY(0xF3),
+  RGBY(0xF4), RGBY(0xF5), RGBY(0xF6), RGBY(0xF7),
+  RGBY(0xF8), RGBY(0xF9), RGBY(0xFA), RGBY(0xFB),
+  RGBY(0xFC), RGBY(0xFD), RGBY(0xFE), RGBY(0xFF),
+
+  // Chroma U table.
+  RGBU(0x00), RGBU(0x01), RGBU(0x02), RGBU(0x03),
+  RGBU(0x04), RGBU(0x05), RGBU(0x06), RGBU(0x07),
+  RGBU(0x08), RGBU(0x09), RGBU(0x0A), RGBU(0x0B),
+  RGBU(0x0C), RGBU(0x0D), RGBU(0x0E), RGBU(0x0F),
+  RGBU(0x10), RGBU(0x11), RGBU(0x12), RGBU(0x13),
+  RGBU(0x14), RGBU(0x15), RGBU(0x16), RGBU(0x17),
+  RGBU(0x18), RGBU(0x19), RGBU(0x1A), RGBU(0x1B),
+  RGBU(0x1C), RGBU(0x1D), RGBU(0x1E), RGBU(0x1F),
+  RGBU(0x20), RGBU(0x21), RGBU(0x22), RGBU(0x23),
+  RGBU(0x24), RGBU(0x25), RGBU(0x26), RGBU(0x27),
+  RGBU(0x28), RGBU(0x29), RGBU(0x2A), RGBU(0x2B),
+  RGBU(0x2C), RGBU(0x2D), RGBU(0x2E), RGBU(0x2F),
+  RGBU(0x30), RGBU(0x31), RGBU(0x32), RGBU(0x33),
+  RGBU(0x34), RGBU(0x35), RGBU(0x36), RGBU(0x37),
+  RGBU(0x38), RGBU(0x39), RGBU(0x3A), RGBU(0x3B),
+  RGBU(0x3C), RGBU(0x3D), RGBU(0x3E), RGBU(0x3F),
+  RGBU(0x40), RGBU(0x41), RGBU(0x42), RGBU(0x43),
+  RGBU(0x44), RGBU(0x45), RGBU(0x46), RGBU(0x47),
+  RGBU(0x48), RGBU(0x49), RGBU(0x4A), RGBU(0x4B),
+  RGBU(0x4C), RGBU(0x4D), RGBU(0x4E), RGBU(0x4F),
+  RGBU(0x50), RGBU(0x51), RGBU(0x52), RGBU(0x53),
+  RGBU(0x54), RGBU(0x55), RGBU(0x56), RGBU(0x57),
+  RGBU(0x58), RGBU(0x59), RGBU(0x5A), RGBU(0x5B),
+  RGBU(0x5C), RGBU(0x5D), RGBU(0x5E), RGBU(0x5F),
+  RGBU(0x60), RGBU(0x61), RGBU(0x62), RGBU(0x63),
+  RGBU(0x64), RGBU(0x65), RGBU(0x66), RGBU(0x67),
+  RGBU(0x68), RGBU(0x69), RGBU(0x6A), RGBU(0x6B),
+  RGBU(0x6C), RGBU(0x6D), RGBU(0x6E), RGBU(0x6F),
+  RGBU(0x70), RGBU(0x71), RGBU(0x72), RGBU(0x73),
+  RGBU(0x74), RGBU(0x75), RGBU(0x76), RGBU(0x77),
+  RGBU(0x78), RGBU(0x79), RGBU(0x7A), RGBU(0x7B),
+  RGBU(0x7C), RGBU(0x7D), RGBU(0x7E), RGBU(0x7F),
+  RGBU(0x80), RGBU(0x81), RGBU(0x82), RGBU(0x83),
+  RGBU(0x84), RGBU(0x85), RGBU(0x86), RGBU(0x87),
+  RGBU(0x88), RGBU(0x89), RGBU(0x8A), RGBU(0x8B),
+  RGBU(0x8C), RGBU(0x8D), RGBU(0x8E), RGBU(0x8F),
+  RGBU(0x90), RGBU(0x91), RGBU(0x92), RGBU(0x93),
+  RGBU(0x94), RGBU(0x95), RGBU(0x96), RGBU(0x97),
+  RGBU(0x98), RGBU(0x99), RGBU(0x9A), RGBU(0x9B),
+  RGBU(0x9C), RGBU(0x9D), RGBU(0x9E), RGBU(0x9F),
+  RGBU(0xA0), RGBU(0xA1), RGBU(0xA2), RGBU(0xA3),
+  RGBU(0xA4), RGBU(0xA5), RGBU(0xA6), RGBU(0xA7),
+  RGBU(0xA8), RGBU(0xA9), RGBU(0xAA), RGBU(0xAB),
+  RGBU(0xAC), RGBU(0xAD), RGBU(0xAE), RGBU(0xAF),
+  RGBU(0xB0), RGBU(0xB1), RGBU(0xB2), RGBU(0xB3),
+  RGBU(0xB4), RGBU(0xB5), RGBU(0xB6), RGBU(0xB7),
+  RGBU(0xB8), RGBU(0xB9), RGBU(0xBA), RGBU(0xBB),
+  RGBU(0xBC), RGBU(0xBD), RGBU(0xBE), RGBU(0xBF),
+  RGBU(0xC0), RGBU(0xC1), RGBU(0xC2), RGBU(0xC3),
+  RGBU(0xC4), RGBU(0xC5), RGBU(0xC6), RGBU(0xC7),
+  RGBU(0xC8), RGBU(0xC9), RGBU(0xCA), RGBU(0xCB),
+  RGBU(0xCC), RGBU(0xCD), RGBU(0xCE), RGBU(0xCF),
+  RGBU(0xD0), RGBU(0xD1), RGBU(0xD2), RGBU(0xD3),
+  RGBU(0xD4), RGBU(0xD5), RGBU(0xD6), RGBU(0xD7),
+  RGBU(0xD8), RGBU(0xD9), RGBU(0xDA), RGBU(0xDB),
+  RGBU(0xDC), RGBU(0xDD), RGBU(0xDE), RGBU(0xDF),
+  RGBU(0xE0), RGBU(0xE1), RGBU(0xE2), RGBU(0xE3),
+  RGBU(0xE4), RGBU(0xE5), RGBU(0xE6), RGBU(0xE7),
+  RGBU(0xE8), RGBU(0xE9), RGBU(0xEA), RGBU(0xEB),
+  RGBU(0xEC), RGBU(0xED), RGBU(0xEE), RGBU(0xEF),
+  RGBU(0xF0), RGBU(0xF1), RGBU(0xF2), RGBU(0xF3),
+  RGBU(0xF4), RGBU(0xF5), RGBU(0xF6), RGBU(0xF7),
+  RGBU(0xF8), RGBU(0xF9), RGBU(0xFA), RGBU(0xFB),
+  RGBU(0xFC), RGBU(0xFD), RGBU(0xFE), RGBU(0xFF),
+
+  // Chroma V table.
+  RGBV(0x00), RGBV(0x01), RGBV(0x02), RGBV(0x03),
+  RGBV(0x04), RGBV(0x05), RGBV(0x06), RGBV(0x07),
+  RGBV(0x08), RGBV(0x09), RGBV(0x0A), RGBV(0x0B),
+  RGBV(0x0C), RGBV(0x0D), RGBV(0x0E), RGBV(0x0F),
+  RGBV(0x10), RGBV(0x11), RGBV(0x12), RGBV(0x13),
+  RGBV(0x14), RGBV(0x15), RGBV(0x16), RGBV(0x17),
+  RGBV(0x18), RGBV(0x19), RGBV(0x1A), RGBV(0x1B),
+  RGBV(0x1C), RGBV(0x1D), RGBV(0x1E), RGBV(0x1F),
+  RGBV(0x20), RGBV(0x21), RGBV(0x22), RGBV(0x23),
+  RGBV(0x24), RGBV(0x25), RGBV(0x26), RGBV(0x27),
+  RGBV(0x28), RGBV(0x29), RGBV(0x2A), RGBV(0x2B),
+  RGBV(0x2C), RGBV(0x2D), RGBV(0x2E), RGBV(0x2F),
+  RGBV(0x30), RGBV(0x31), RGBV(0x32), RGBV(0x33),
+  RGBV(0x34), RGBV(0x35), RGBV(0x36), RGBV(0x37),
+  RGBV(0x38), RGBV(0x39), RGBV(0x3A), RGBV(0x3B),
+  RGBV(0x3C), RGBV(0x3D), RGBV(0x3E), RGBV(0x3F),
+  RGBV(0x40), RGBV(0x41), RGBV(0x42), RGBV(0x43),
+  RGBV(0x44), RGBV(0x45), RGBV(0x46), RGBV(0x47),
+  RGBV(0x48), RGBV(0x49), RGBV(0x4A), RGBV(0x4B),
+  RGBV(0x4C), RGBV(0x4D), RGBV(0x4E), RGBV(0x4F),
+  RGBV(0x50), RGBV(0x51), RGBV(0x52), RGBV(0x53),
+  RGBV(0x54), RGBV(0x55), RGBV(0x56), RGBV(0x57),
+  RGBV(0x58), RGBV(0x59), RGBV(0x5A), RGBV(0x5B),
+  RGBV(0x5C), RGBV(0x5D), RGBV(0x5E), RGBV(0x5F),
+  RGBV(0x60), RGBV(0x61), RGBV(0x62), RGBV(0x63),
+  RGBV(0x64), RGBV(0x65), RGBV(0x66), RGBV(0x67),
+  RGBV(0x68), RGBV(0x69), RGBV(0x6A), RGBV(0x6B),
+  RGBV(0x6C), RGBV(0x6D), RGBV(0x6E), RGBV(0x6F),
+  RGBV(0x70), RGBV(0x71), RGBV(0x72), RGBV(0x73),
+  RGBV(0x74), RGBV(0x75), RGBV(0x76), RGBV(0x77),
+  RGBV(0x78), RGBV(0x79), RGBV(0x7A), RGBV(0x7B),
+  RGBV(0x7C), RGBV(0x7D), RGBV(0x7E), RGBV(0x7F),
+  RGBV(0x80), RGBV(0x81), RGBV(0x82), RGBV(0x83),
+  RGBV(0x84), RGBV(0x85), RGBV(0x86), RGBV(0x87),
+  RGBV(0x88), RGBV(0x89), RGBV(0x8A), RGBV(0x8B),
+  RGBV(0x8C), RGBV(0x8D), RGBV(0x8E), RGBV(0x8F),
+  RGBV(0x90), RGBV(0x91), RGBV(0x92), RGBV(0x93),
+  RGBV(0x94), RGBV(0x95), RGBV(0x96), RGBV(0x97),
+  RGBV(0x98), RGBV(0x99), RGBV(0x9A), RGBV(0x9B),
+  RGBV(0x9C), RGBV(0x9D), RGBV(0x9E), RGBV(0x9F),
+  RGBV(0xA0), RGBV(0xA1), RGBV(0xA2), RGBV(0xA3),
+  RGBV(0xA4), RGBV(0xA5), RGBV(0xA6), RGBV(0xA7),
+  RGBV(0xA8), RGBV(0xA9), RGBV(0xAA), RGBV(0xAB),
+  RGBV(0xAC), RGBV(0xAD), RGBV(0xAE), RGBV(0xAF),
+  RGBV(0xB0), RGBV(0xB1), RGBV(0xB2), RGBV(0xB3),
+  RGBV(0xB4), RGBV(0xB5), RGBV(0xB6), RGBV(0xB7),
+  RGBV(0xB8), RGBV(0xB9), RGBV(0xBA), RGBV(0xBB),
+  RGBV(0xBC), RGBV(0xBD), RGBV(0xBE), RGBV(0xBF),
+  RGBV(0xC0), RGBV(0xC1), RGBV(0xC2), RGBV(0xC3),
+  RGBV(0xC4), RGBV(0xC5), RGBV(0xC6), RGBV(0xC7),
+  RGBV(0xC8), RGBV(0xC9), RGBV(0xCA), RGBV(0xCB),
+  RGBV(0xCC), RGBV(0xCD), RGBV(0xCE), RGBV(0xCF),
+  RGBV(0xD0), RGBV(0xD1), RGBV(0xD2), RGBV(0xD3),
+  RGBV(0xD4), RGBV(0xD5), RGBV(0xD6), RGBV(0xD7),
+  RGBV(0xD8), RGBV(0xD9), RGBV(0xDA), RGBV(0xDB),
+  RGBV(0xDC), RGBV(0xDD), RGBV(0xDE), RGBV(0xDF),
+  RGBV(0xE0), RGBV(0xE1), RGBV(0xE2), RGBV(0xE3),
+  RGBV(0xE4), RGBV(0xE5), RGBV(0xE6), RGBV(0xE7),
+  RGBV(0xE8), RGBV(0xE9), RGBV(0xEA), RGBV(0xEB),
+  RGBV(0xEC), RGBV(0xED), RGBV(0xEE), RGBV(0xEF),
+  RGBV(0xF0), RGBV(0xF1), RGBV(0xF2), RGBV(0xF3),
+  RGBV(0xF4), RGBV(0xF5), RGBV(0xF6), RGBV(0xF7),
+  RGBV(0xF8), RGBV(0xF9), RGBV(0xFA), RGBV(0xFB),
+  RGBV(0xFC), RGBV(0xFD), RGBV(0xFE), RGBV(0xFF),
+};
+
+#undef RGBY
+#undef RGBU
+#undef RGBV
+
+}  // extern "C"
--- a/gfx/ycbcr/yuv_row_win.cpp
+++ b/gfx/ycbcr/yuv_row_win.cpp
@@ -1,273 +1,23 @@
-// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 
 #include "yuv_row.h"
-
-extern "C" {
-// 64 Bit builds use the C fallback. Optimized code
-// needs to be fixed for 64 bit builds.
-#if defined(ARCH_CPU_64_BITS)
-// PPC implementation uses C fallback
-void FastConvertYUVToRGB32Row(const uint8* y_buf,
-                              const uint8* u_buf,
-                              const uint8* v_buf,
-                              uint8* rgb_buf,
-                              int width) {
-  FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, 1);
-}
-
-void ScaleYUVToRGB32Row(const uint8* y_buf,
-                        const uint8* u_buf,
-                        const uint8* v_buf,
-                        uint8* rgb_buf,
-                        int width,
-                        int scaled_dx) {
-  ScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, scaled_dx, 1);
-}
-
-#else
+#define MOZILLA_SSE_INCLUDE_HEADER_FOR_SSE2
+#define MOZILLA_SSE_INCLUDE_HEADER_FOR_MMX
+#include "mozilla/SSE.h"
 
 
-#define RGBY(i) { \
-  static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
-  static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
-  static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
-  0 \
-}
-
-#define RGBU(i) { \
-  static_cast<int16>(2.018 * 64 * (i - 128) + 0.5), \
-  static_cast<int16>(-0.391 * 64 * (i - 128) + 0.5), \
-  0, \
-  static_cast<int16>(256 * 64 - 1) \
-}
-
-#define RGBV(i) { \
-  0, \
-  static_cast<int16>(-0.813 * 64 * (i - 128) + 0.5), \
-  static_cast<int16>(1.596 * 64 * (i - 128) + 0.5), \
-  0 \
-}
-
-#define MMX_ALIGNED(var) __declspec(align(16)) var
-
-MMX_ALIGNED(int16 kCoefficientsRgbY[256][4]) = {
-  RGBY(0x00), RGBY(0x01), RGBY(0x02), RGBY(0x03),
-  RGBY(0x04), RGBY(0x05), RGBY(0x06), RGBY(0x07),
-  RGBY(0x08), RGBY(0x09), RGBY(0x0A), RGBY(0x0B),
-  RGBY(0x0C), RGBY(0x0D), RGBY(0x0E), RGBY(0x0F),
-  RGBY(0x10), RGBY(0x11), RGBY(0x12), RGBY(0x13),
-  RGBY(0x14), RGBY(0x15), RGBY(0x16), RGBY(0x17),
-  RGBY(0x18), RGBY(0x19), RGBY(0x1A), RGBY(0x1B),
-  RGBY(0x1C), RGBY(0x1D), RGBY(0x1E), RGBY(0x1F),
-  RGBY(0x20), RGBY(0x21), RGBY(0x22), RGBY(0x23),
-  RGBY(0x24), RGBY(0x25), RGBY(0x26), RGBY(0x27),
-  RGBY(0x28), RGBY(0x29), RGBY(0x2A), RGBY(0x2B),
-  RGBY(0x2C), RGBY(0x2D), RGBY(0x2E), RGBY(0x2F),
-  RGBY(0x30), RGBY(0x31), RGBY(0x32), RGBY(0x33),
-  RGBY(0x34), RGBY(0x35), RGBY(0x36), RGBY(0x37),
-  RGBY(0x38), RGBY(0x39), RGBY(0x3A), RGBY(0x3B),
-  RGBY(0x3C), RGBY(0x3D), RGBY(0x3E), RGBY(0x3F),
-  RGBY(0x40), RGBY(0x41), RGBY(0x42), RGBY(0x43),
-  RGBY(0x44), RGBY(0x45), RGBY(0x46), RGBY(0x47),
-  RGBY(0x48), RGBY(0x49), RGBY(0x4A), RGBY(0x4B),
-  RGBY(0x4C), RGBY(0x4D), RGBY(0x4E), RGBY(0x4F),
-  RGBY(0x50), RGBY(0x51), RGBY(0x52), RGBY(0x53),
-  RGBY(0x54), RGBY(0x55), RGBY(0x56), RGBY(0x57),
-  RGBY(0x58), RGBY(0x59), RGBY(0x5A), RGBY(0x5B),
-  RGBY(0x5C), RGBY(0x5D), RGBY(0x5E), RGBY(0x5F),
-  RGBY(0x60), RGBY(0x61), RGBY(0x62), RGBY(0x63),
-  RGBY(0x64), RGBY(0x65), RGBY(0x66), RGBY(0x67),
-  RGBY(0x68), RGBY(0x69), RGBY(0x6A), RGBY(0x6B),
-  RGBY(0x6C), RGBY(0x6D), RGBY(0x6E), RGBY(0x6F),
-  RGBY(0x70), RGBY(0x71), RGBY(0x72), RGBY(0x73),
-  RGBY(0x74), RGBY(0x75), RGBY(0x76), RGBY(0x77),
-  RGBY(0x78), RGBY(0x79), RGBY(0x7A), RGBY(0x7B),
-  RGBY(0x7C), RGBY(0x7D), RGBY(0x7E), RGBY(0x7F),
-  RGBY(0x80), RGBY(0x81), RGBY(0x82), RGBY(0x83),
-  RGBY(0x84), RGBY(0x85), RGBY(0x86), RGBY(0x87),
-  RGBY(0x88), RGBY(0x89), RGBY(0x8A), RGBY(0x8B),
-  RGBY(0x8C), RGBY(0x8D), RGBY(0x8E), RGBY(0x8F),
-  RGBY(0x90), RGBY(0x91), RGBY(0x92), RGBY(0x93),
-  RGBY(0x94), RGBY(0x95), RGBY(0x96), RGBY(0x97),
-  RGBY(0x98), RGBY(0x99), RGBY(0x9A), RGBY(0x9B),
-  RGBY(0x9C), RGBY(0x9D), RGBY(0x9E), RGBY(0x9F),
-  RGBY(0xA0), RGBY(0xA1), RGBY(0xA2), RGBY(0xA3),
-  RGBY(0xA4), RGBY(0xA5), RGBY(0xA6), RGBY(0xA7),
-  RGBY(0xA8), RGBY(0xA9), RGBY(0xAA), RGBY(0xAB),
-  RGBY(0xAC), RGBY(0xAD), RGBY(0xAE), RGBY(0xAF),
-  RGBY(0xB0), RGBY(0xB1), RGBY(0xB2), RGBY(0xB3),
-  RGBY(0xB4), RGBY(0xB5), RGBY(0xB6), RGBY(0xB7),
-  RGBY(0xB8), RGBY(0xB9), RGBY(0xBA), RGBY(0xBB),
-  RGBY(0xBC), RGBY(0xBD), RGBY(0xBE), RGBY(0xBF),
-  RGBY(0xC0), RGBY(0xC1), RGBY(0xC2), RGBY(0xC3),
-  RGBY(0xC4), RGBY(0xC5), RGBY(0xC6), RGBY(0xC7),
-  RGBY(0xC8), RGBY(0xC9), RGBY(0xCA), RGBY(0xCB),
-  RGBY(0xCC), RGBY(0xCD), RGBY(0xCE), RGBY(0xCF),
-  RGBY(0xD0), RGBY(0xD1), RGBY(0xD2), RGBY(0xD3),
-  RGBY(0xD4), RGBY(0xD5), RGBY(0xD6), RGBY(0xD7),
-  RGBY(0xD8), RGBY(0xD9), RGBY(0xDA), RGBY(0xDB),
-  RGBY(0xDC), RGBY(0xDD), RGBY(0xDE), RGBY(0xDF),
-  RGBY(0xE0), RGBY(0xE1), RGBY(0xE2), RGBY(0xE3),
-  RGBY(0xE4), RGBY(0xE5), RGBY(0xE6), RGBY(0xE7),
-  RGBY(0xE8), RGBY(0xE9), RGBY(0xEA), RGBY(0xEB),
-  RGBY(0xEC), RGBY(0xED), RGBY(0xEE), RGBY(0xEF),
-  RGBY(0xF0), RGBY(0xF1), RGBY(0xF2), RGBY(0xF3),
-  RGBY(0xF4), RGBY(0xF5), RGBY(0xF6), RGBY(0xF7),
-  RGBY(0xF8), RGBY(0xF9), RGBY(0xFA), RGBY(0xFB),
-  RGBY(0xFC), RGBY(0xFD), RGBY(0xFE), RGBY(0xFF),
-};
+#define kCoefficientsRgbU kCoefficientsRgbY + 2048
+#define kCoefficientsRgbV kCoefficientsRgbY + 4096
 
-MMX_ALIGNED(int16 kCoefficientsRgbU[256][4]) = {
-  RGBU(0x00), RGBU(0x01), RGBU(0x02), RGBU(0x03),
-  RGBU(0x04), RGBU(0x05), RGBU(0x06), RGBU(0x07),
-  RGBU(0x08), RGBU(0x09), RGBU(0x0A), RGBU(0x0B),
-  RGBU(0x0C), RGBU(0x0D), RGBU(0x0E), RGBU(0x0F),
-  RGBU(0x10), RGBU(0x11), RGBU(0x12), RGBU(0x13),
-  RGBU(0x14), RGBU(0x15), RGBU(0x16), RGBU(0x17),
-  RGBU(0x18), RGBU(0x19), RGBU(0x1A), RGBU(0x1B),
-  RGBU(0x1C), RGBU(0x1D), RGBU(0x1E), RGBU(0x1F),
-  RGBU(0x20), RGBU(0x21), RGBU(0x22), RGBU(0x23),
-  RGBU(0x24), RGBU(0x25), RGBU(0x26), RGBU(0x27),
-  RGBU(0x28), RGBU(0x29), RGBU(0x2A), RGBU(0x2B),
-  RGBU(0x2C), RGBU(0x2D), RGBU(0x2E), RGBU(0x2F),
-  RGBU(0x30), RGBU(0x31), RGBU(0x32), RGBU(0x33),
-  RGBU(0x34), RGBU(0x35), RGBU(0x36), RGBU(0x37),
-  RGBU(0x38), RGBU(0x39), RGBU(0x3A), RGBU(0x3B),
-  RGBU(0x3C), RGBU(0x3D), RGBU(0x3E), RGBU(0x3F),
-  RGBU(0x40), RGBU(0x41), RGBU(0x42), RGBU(0x43),
-  RGBU(0x44), RGBU(0x45), RGBU(0x46), RGBU(0x47),
-  RGBU(0x48), RGBU(0x49), RGBU(0x4A), RGBU(0x4B),
-  RGBU(0x4C), RGBU(0x4D), RGBU(0x4E), RGBU(0x4F),
-  RGBU(0x50), RGBU(0x51), RGBU(0x52), RGBU(0x53),
-  RGBU(0x54), RGBU(0x55), RGBU(0x56), RGBU(0x57),
-  RGBU(0x58), RGBU(0x59), RGBU(0x5A), RGBU(0x5B),
-  RGBU(0x5C), RGBU(0x5D), RGBU(0x5E), RGBU(0x5F),
-  RGBU(0x60), RGBU(0x61), RGBU(0x62), RGBU(0x63),
-  RGBU(0x64), RGBU(0x65), RGBU(0x66), RGBU(0x67),
-  RGBU(0x68), RGBU(0x69), RGBU(0x6A), RGBU(0x6B),
-  RGBU(0x6C), RGBU(0x6D), RGBU(0x6E), RGBU(0x6F),
-  RGBU(0x70), RGBU(0x71), RGBU(0x72), RGBU(0x73),
-  RGBU(0x74), RGBU(0x75), RGBU(0x76), RGBU(0x77),
-  RGBU(0x78), RGBU(0x79), RGBU(0x7A), RGBU(0x7B),
-  RGBU(0x7C), RGBU(0x7D), RGBU(0x7E), RGBU(0x7F),
-  RGBU(0x80), RGBU(0x81), RGBU(0x82), RGBU(0x83),
-  RGBU(0x84), RGBU(0x85), RGBU(0x86), RGBU(0x87),
-  RGBU(0x88), RGBU(0x89), RGBU(0x8A), RGBU(0x8B),
-  RGBU(0x8C), RGBU(0x8D), RGBU(0x8E), RGBU(0x8F),
-  RGBU(0x90), RGBU(0x91), RGBU(0x92), RGBU(0x93),
-  RGBU(0x94), RGBU(0x95), RGBU(0x96), RGBU(0x97),
-  RGBU(0x98), RGBU(0x99), RGBU(0x9A), RGBU(0x9B),
-  RGBU(0x9C), RGBU(0x9D), RGBU(0x9E), RGBU(0x9F),
-  RGBU(0xA0), RGBU(0xA1), RGBU(0xA2), RGBU(0xA3),
-  RGBU(0xA4), RGBU(0xA5), RGBU(0xA6), RGBU(0xA7),
-  RGBU(0xA8), RGBU(0xA9), RGBU(0xAA), RGBU(0xAB),
-  RGBU(0xAC), RGBU(0xAD), RGBU(0xAE), RGBU(0xAF),
-  RGBU(0xB0), RGBU(0xB1), RGBU(0xB2), RGBU(0xB3),
-  RGBU(0xB4), RGBU(0xB5), RGBU(0xB6), RGBU(0xB7),
-  RGBU(0xB8), RGBU(0xB9), RGBU(0xBA), RGBU(0xBB),
-  RGBU(0xBC), RGBU(0xBD), RGBU(0xBE), RGBU(0xBF),
-  RGBU(0xC0), RGBU(0xC1), RGBU(0xC2), RGBU(0xC3),
-  RGBU(0xC4), RGBU(0xC5), RGBU(0xC6), RGBU(0xC7),
-  RGBU(0xC8), RGBU(0xC9), RGBU(0xCA), RGBU(0xCB),
-  RGBU(0xCC), RGBU(0xCD), RGBU(0xCE), RGBU(0xCF),
-  RGBU(0xD0), RGBU(0xD1), RGBU(0xD2), RGBU(0xD3),
-  RGBU(0xD4), RGBU(0xD5), RGBU(0xD6), RGBU(0xD7),
-  RGBU(0xD8), RGBU(0xD9), RGBU(0xDA), RGBU(0xDB),
-  RGBU(0xDC), RGBU(0xDD), RGBU(0xDE), RGBU(0xDF),
-  RGBU(0xE0), RGBU(0xE1), RGBU(0xE2), RGBU(0xE3),
-  RGBU(0xE4), RGBU(0xE5), RGBU(0xE6), RGBU(0xE7),
-  RGBU(0xE8), RGBU(0xE9), RGBU(0xEA), RGBU(0xEB),
-  RGBU(0xEC), RGBU(0xED), RGBU(0xEE), RGBU(0xEF),
-  RGBU(0xF0), RGBU(0xF1), RGBU(0xF2), RGBU(0xF3),
-  RGBU(0xF4), RGBU(0xF5), RGBU(0xF6), RGBU(0xF7),
-  RGBU(0xF8), RGBU(0xF9), RGBU(0xFA), RGBU(0xFB),
-  RGBU(0xFC), RGBU(0xFD), RGBU(0xFE), RGBU(0xFF),
-};
-
-MMX_ALIGNED(int16 kCoefficientsRgbV[256][4]) = {
-  RGBV(0x00), RGBV(0x01), RGBV(0x02), RGBV(0x03),
-  RGBV(0x04), RGBV(0x05), RGBV(0x06), RGBV(0x07),
-  RGBV(0x08), RGBV(0x09), RGBV(0x0A), RGBV(0x0B),
-  RGBV(0x0C), RGBV(0x0D), RGBV(0x0E), RGBV(0x0F),
-  RGBV(0x10), RGBV(0x11), RGBV(0x12), RGBV(0x13),
-  RGBV(0x14), RGBV(0x15), RGBV(0x16), RGBV(0x17),
-  RGBV(0x18), RGBV(0x19), RGBV(0x1A), RGBV(0x1B),
-  RGBV(0x1C), RGBV(0x1D), RGBV(0x1E), RGBV(0x1F),
-  RGBV(0x20), RGBV(0x21), RGBV(0x22), RGBV(0x23),
-  RGBV(0x24), RGBV(0x25), RGBV(0x26), RGBV(0x27),
-  RGBV(0x28), RGBV(0x29), RGBV(0x2A), RGBV(0x2B),
-  RGBV(0x2C), RGBV(0x2D), RGBV(0x2E), RGBV(0x2F),
-  RGBV(0x30), RGBV(0x31), RGBV(0x32), RGBV(0x33),
-  RGBV(0x34), RGBV(0x35), RGBV(0x36), RGBV(0x37),
-  RGBV(0x38), RGBV(0x39), RGBV(0x3A), RGBV(0x3B),
-  RGBV(0x3C), RGBV(0x3D), RGBV(0x3E), RGBV(0x3F),
-  RGBV(0x40), RGBV(0x41), RGBV(0x42), RGBV(0x43),
-  RGBV(0x44), RGBV(0x45), RGBV(0x46), RGBV(0x47),
-  RGBV(0x48), RGBV(0x49), RGBV(0x4A), RGBV(0x4B),
-  RGBV(0x4C), RGBV(0x4D), RGBV(0x4E), RGBV(0x4F),
-  RGBV(0x50), RGBV(0x51), RGBV(0x52), RGBV(0x53),
-  RGBV(0x54), RGBV(0x55), RGBV(0x56), RGBV(0x57),
-  RGBV(0x58), RGBV(0x59), RGBV(0x5A), RGBV(0x5B),
-  RGBV(0x5C), RGBV(0x5D), RGBV(0x5E), RGBV(0x5F),
-  RGBV(0x60), RGBV(0x61), RGBV(0x62), RGBV(0x63),
-  RGBV(0x64), RGBV(0x65), RGBV(0x66), RGBV(0x67),
-  RGBV(0x68), RGBV(0x69), RGBV(0x6A), RGBV(0x6B),
-  RGBV(0x6C), RGBV(0x6D), RGBV(0x6E), RGBV(0x6F),
-  RGBV(0x70), RGBV(0x71), RGBV(0x72), RGBV(0x73),
-  RGBV(0x74), RGBV(0x75), RGBV(0x76), RGBV(0x77),
-  RGBV(0x78), RGBV(0x79), RGBV(0x7A), RGBV(0x7B),
-  RGBV(0x7C), RGBV(0x7D), RGBV(0x7E), RGBV(0x7F),
-  RGBV(0x80), RGBV(0x81), RGBV(0x82), RGBV(0x83),
-  RGBV(0x84), RGBV(0x85), RGBV(0x86), RGBV(0x87),
-  RGBV(0x88), RGBV(0x89), RGBV(0x8A), RGBV(0x8B),
-  RGBV(0x8C), RGBV(0x8D), RGBV(0x8E), RGBV(0x8F),
-  RGBV(0x90), RGBV(0x91), RGBV(0x92), RGBV(0x93),
-  RGBV(0x94), RGBV(0x95), RGBV(0x96), RGBV(0x97),
-  RGBV(0x98), RGBV(0x99), RGBV(0x9A), RGBV(0x9B),
-  RGBV(0x9C), RGBV(0x9D), RGBV(0x9E), RGBV(0x9F),
-  RGBV(0xA0), RGBV(0xA1), RGBV(0xA2), RGBV(0xA3),
-  RGBV(0xA4), RGBV(0xA5), RGBV(0xA6), RGBV(0xA7),
-  RGBV(0xA8), RGBV(0xA9), RGBV(0xAA), RGBV(0xAB),
-  RGBV(0xAC), RGBV(0xAD), RGBV(0xAE), RGBV(0xAF),
-  RGBV(0xB0), RGBV(0xB1), RGBV(0xB2), RGBV(0xB3),
-  RGBV(0xB4), RGBV(0xB5), RGBV(0xB6), RGBV(0xB7),
-  RGBV(0xB8), RGBV(0xB9), RGBV(0xBA), RGBV(0xBB),
-  RGBV(0xBC), RGBV(0xBD), RGBV(0xBE), RGBV(0xBF),
-  RGBV(0xC0), RGBV(0xC1), RGBV(0xC2), RGBV(0xC3),
-  RGBV(0xC4), RGBV(0xC5), RGBV(0xC6), RGBV(0xC7),
-  RGBV(0xC8), RGBV(0xC9), RGBV(0xCA), RGBV(0xCB),
-  RGBV(0xCC), RGBV(0xCD), RGBV(0xCE), RGBV(0xCF),
-  RGBV(0xD0), RGBV(0xD1), RGBV(0xD2), RGBV(0xD3),
-  RGBV(0xD4), RGBV(0xD5), RGBV(0xD6), RGBV(0xD7),
-  RGBV(0xD8), RGBV(0xD9), RGBV(0xDA), RGBV(0xDB),
-  RGBV(0xDC), RGBV(0xDD), RGBV(0xDE), RGBV(0xDF),
-  RGBV(0xE0), RGBV(0xE1), RGBV(0xE2), RGBV(0xE3),
-  RGBV(0xE4), RGBV(0xE5), RGBV(0xE6), RGBV(0xE7),
-  RGBV(0xE8), RGBV(0xE9), RGBV(0xEA), RGBV(0xEB),
-  RGBV(0xEC), RGBV(0xED), RGBV(0xEE), RGBV(0xEF),
-  RGBV(0xF0), RGBV(0xF1), RGBV(0xF2), RGBV(0xF3),
-  RGBV(0xF4), RGBV(0xF5), RGBV(0xF6), RGBV(0xF7),
-  RGBV(0xF8), RGBV(0xF9), RGBV(0xFA), RGBV(0xFB),
-  RGBV(0xFC), RGBV(0xFD), RGBV(0xFE), RGBV(0xFF),
-};
-
-#undef RGBHY
-#undef RGBY
-#undef RGBU
-#undef RGBV
-#undef MMX_ALIGNED
-
-// Warning C4799: function has no EMMS instruction.
-// EMMS() is slow and should be called by the calling function once per image.
-#pragma warning(disable: 4799)
-
+extern "C" {
+#if defined(MOZILLA_COMPILE_WITH_SSE2)
 __declspec(naked)
 void FastConvertYUVToRGB32Row(const uint8* y_buf,
                               const uint8* u_buf,
                               const uint8* v_buf,
                               uint8* rgb_buf,
                               int width) {
   __asm {
     pushad
@@ -518,44 +268,44 @@ void DoubleYUVToRGB32Row(const uint8* y_
 // For performance the chroma is under sampled, reducing cost of a 3x
 // 1080p scale from 8.4 ms to 5.4 ms.
 __declspec(naked)
 void ScaleYUVToRGB32Row(const uint8* y_buf,
                         const uint8* u_buf,
                         const uint8* v_buf,
                         uint8* rgb_buf,
                         int width,
-                        int dx) {
+                        int source_dx) {
   __asm {
     pushad
     mov       edx, [esp + 32 + 4]   // Y
     mov       edi, [esp + 32 + 8]   // U
     mov       esi, [esp + 32 + 12]  // V
     mov       ebp, [esp + 32 + 16]  // rgb
     mov       ecx, [esp + 32 + 20]  // width
     xor       ebx, ebx              // x
     jmp       scaleend
 
  scaleloop :
     mov       eax, ebx
-    sar       eax, 5
+    sar       eax, 17
     movzx     eax, byte ptr [edi + eax]
     movq      mm0, [kCoefficientsRgbU + 8 * eax]
     mov       eax, ebx
-    sar       eax, 5
+    sar       eax, 17
     movzx     eax, byte ptr [esi + eax]
     paddsw    mm0, [kCoefficientsRgbV + 8 * eax]
     mov       eax, ebx
-    add       ebx, [esp + 32 + 24]  // x += dx
-    sar       eax, 4
+    add       ebx, [esp + 32 + 24]  // x += source_dx
+    sar       eax, 16
     movzx     eax, byte ptr [edx + eax]
     movq      mm1, [kCoefficientsRgbY + 8 * eax]
     mov       eax, ebx
-    add       ebx, [esp + 32 + 24]  // x += dx
-    sar       eax, 4
+    add       ebx, [esp + 32 + 24]  // x += source_dx
+    sar       eax, 16
     movzx     eax, byte ptr [edx + eax]
     movq      mm2, [kCoefficientsRgbY + 8 * eax]
     paddsw    mm1, mm0
     paddsw    mm2, mm0
     psraw     mm1, 6
     psraw     mm2, 6
     packuswb  mm1, mm2
     movntq    [ebp], mm1
@@ -563,33 +313,165 @@ void ScaleYUVToRGB32Row(const uint8* y_b
  scaleend :
     sub       ecx, 2
     jns       scaleloop
 
     and       ecx, 1  // odd number of pixels?
     jz        scaledone
 
     mov       eax, ebx
-    sar       eax, 5
+    sar       eax, 17
     movzx     eax, byte ptr [edi + eax]
     movq      mm0, [kCoefficientsRgbU + 8 * eax]
     mov       eax, ebx
-    sar       eax, 5
+    sar       eax, 17
     movzx     eax, byte ptr [esi + eax]
     paddsw    mm0, [kCoefficientsRgbV + 8 * eax]
     mov       eax, ebx
-    sar       eax, 4
+    sar       eax, 16
     movzx     eax, byte ptr [edx + eax]
     movq      mm1, [kCoefficientsRgbY + 8 * eax]
     paddsw    mm1, mm0
     psraw     mm1, 6
     packuswb  mm1, mm1
     movd      [ebp], mm1
 
  scaledone :
     popad
     ret
   }
 }
 
-#endif // ARCH_CPU_64_BITS
+__declspec(naked)
+void LinearScaleYUVToRGB32Row(const uint8* y_buf,
+                              const uint8* u_buf,
+                              const uint8* v_buf,
+                              uint8* rgb_buf,
+                              int width,
+                              int source_dx) {
+  __asm {
+    pushad
+    mov       edx, [esp + 32 + 4]  // Y
+    mov       edi, [esp + 32 + 8]  // U
+                // [esp + 32 + 12] // V
+    mov       ebp, [esp + 32 + 16] // rgb
+    mov       ecx, [esp + 32 + 20] // width
+    imul      ecx, [esp + 32 + 24] // source_dx
+    mov       [esp + 32 + 20], ecx // source_width = width * source_dx
+    mov       ecx, [esp + 32 + 24] // source_dx
+    xor       ebx, ebx             // x = 0
+    cmp       ecx, 0x20000
+    jl        lscaleend
+    mov       ebx, 0x8000          // x = 0.5 for 1/2 or less
+    jmp       lscaleend
+lscaleloop:
+    mov       eax, ebx
+    sar       eax, 0x11
+
+    movzx     ecx, byte ptr [edi + eax]
+    movzx     esi, byte ptr [edi + eax + 1]
+    mov       eax, ebx
+    and       eax, 0x1fffe
+    imul      esi, eax
+    xor       eax, 0x1fffe
+    imul      ecx, eax
+    add       ecx, esi
+    shr       ecx, 17
+    movq      mm0, [kCoefficientsRgbU + 8 * ecx]
+
+    mov       esi, [esp + 32 + 12]
+    mov       eax, ebx
+    sar       eax, 0x11
+
+    movzx     ecx, byte ptr [esi + eax]
+    movzx     esi, byte ptr [esi + eax + 1]
+    mov       eax, ebx
+    and       eax, 0x1fffe
+    imul      esi, eax
+    xor       eax, 0x1fffe
+    imul      ecx, eax
+    add       ecx, esi
+    shr       ecx, 17
+    paddsw    mm0, [kCoefficientsRgbV + 8 * ecx]
+
+    mov       eax, ebx
+    sar       eax, 0x10
+    movzx     ecx, byte ptr [edx + eax]
+    movzx     esi, byte ptr [1 + edx + eax]
+    mov       eax, ebx
+    add       ebx, [esp + 32 + 24]
+    and       eax, 0xffff
+    imul      esi, eax
+    xor       eax, 0xffff
+    imul      ecx, eax
+    add       ecx, esi
+    shr       ecx, 16
+    movq      mm1, [kCoefficientsRgbY + 8 * ecx]
+
+    cmp       ebx, [esp + 32 + 20]
+    jge       lscalelastpixel
+
+    mov       eax, ebx
+    sar       eax, 0x10
+    movzx     ecx, byte ptr [edx + eax]
+    movzx     esi, byte ptr [edx + eax + 1]
+    mov       eax, ebx
+    add       ebx, [esp + 32 + 24]
+    and       eax, 0xffff
+    imul      esi, eax
+    xor       eax, 0xffff
+    imul      ecx, eax
+    add       ecx, esi
+    shr       ecx, 16
+    movq      mm2, [kCoefficientsRgbY + 8 * ecx]
+
+    paddsw    mm1, mm0
+    paddsw    mm2, mm0
+    psraw     mm1, 0x6
+    psraw     mm2, 0x6
+    packuswb  mm1, mm2
+    movntq    [ebp], mm1
+    add       ebp, 0x8
+
+lscaleend:
+    cmp       ebx, [esp + 32 + 20]
+    jl        lscaleloop
+    popad
+    ret
+
+lscalelastpixel:
+    paddsw    mm1, mm0
+    psraw     mm1, 6
+    packuswb  mm1, mm1
+    movd      [ebp], mm1
+    popad
+    ret
+  };
+}
+#else // MOZILLA_COMPILE_WITH_SSE2
+void FastConvertYUVToRGB32Row(const uint8* y_buf,
+                              const uint8* u_buf,
+                              const uint8* v_buf,
+                              uint8* rgb_buf,
+                              int width) {
+  FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, 1);
+}
+
+void ScaleYUVToRGB32Row(const uint8* y_buf,
+                        const uint8* u_buf,
+                        const uint8* v_buf,
+                        uint8* rgb_buf,
+                        int width,
+                        int source_dx) {
+  ScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, source_dx);
+}
+
+void LinearScaleYUVToRGB32Row(const uint8* y_buf,
+                              const uint8* u_buf,
+                              const uint8* v_buf,
+                              uint8* rgb_buf,
+                              int width,
+                              int source_dx) {
+  LinearScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, source_dx);
+}
+#endif
 }  // extern "C"
 
deleted file mode 100644
--- a/gfx/ycbcr/yv24.patch
+++ /dev/null
@@ -1,241 +0,0 @@
-diff --git a/gfx/ycbcr/yuv_convert.cpp b/gfx/ycbcr/yuv_convert.cpp
-index b22e778..cdbb040 100644
---- a/gfx/ycbcr/yuv_convert.cpp
-+++ b/gfx/ycbcr/yuv_convert.cpp
-@@ -6,16 +6,17 @@
- // http://www.fourcc.org/yuv.php
- // The actual conversion is best described here
- // http://en.wikipedia.org/wiki/YUV
- // An article on optimizing YUV conversion using tables instead of multiplies
- // http://lestourtereaux.free.fr/papers/data/yuvrgb.pdf
- //
- // YV12 is a full plane of Y and a half height, half width chroma planes
- // YV16 is a full plane of Y and a full height, half width chroma planes
-+// YV24 is a full plane of Y and a full height, full width chroma planes
- //
- // ARGB pixel format is output, which on little endian is stored as BGRA.
- // The alpha is set to 255, allowing the application to use RGBA or RGB32.
- 
- #include "yuv_convert.h"
- 
- // Header for low level row functions.
- #include "yuv_row.h"
-@@ -33,50 +34,55 @@ void ConvertYCbCrToRGB32(const uint8* y_buf,
-                          int pic_x,
-                          int pic_y,
-                          int pic_width,
-                          int pic_height,
-                          int y_pitch,
-                          int uv_pitch,
-                          int rgb_pitch,
-                          YUVType yuv_type) {
--  unsigned int y_shift = yuv_type;
--  bool has_mmx = supports_mmx();
--  bool odd_pic_x = pic_x % 2 != 0;
-+  unsigned int y_shift = yuv_type == YV12 ? 1 : 0;
-+  unsigned int x_shift = yuv_type == YV24 ? 0 : 1;
-+  // There is no optimized YV24 MMX routine so we check for this and
-+  // fall back to the C code.
-+  bool has_mmx = supports_mmx() && yuv_type != YV24;
-+  bool odd_pic_x = yuv_type != YV24 && pic_x % 2 != 0;
-   int x_width = odd_pic_x ? pic_width - 1 : pic_width;
- 
-   for (int y = pic_y; y < pic_height + pic_y; ++y) {
-     uint8* rgb_row = rgb_buf + (y - pic_y) * rgb_pitch;
-     const uint8* y_ptr = y_buf + y * y_pitch + pic_x;
--    const uint8* u_ptr = u_buf + (y >> y_shift) * uv_pitch + (pic_x >> 1);
--    const uint8* v_ptr = v_buf + (y >> y_shift) * uv_pitch + (pic_x >> 1);
-+    const uint8* u_ptr = u_buf + (y >> y_shift) * uv_pitch + (pic_x >> x_shift);
-+    const uint8* v_ptr = v_buf + (y >> y_shift) * uv_pitch + (pic_x >> x_shift);
- 
-     if (odd_pic_x) {
-       // Handle the single odd pixel manually and use the
-       // fast routines for the remaining.
-       FastConvertYUVToRGB32Row_C(y_ptr++,
-                                  u_ptr++,
-                                  v_ptr++,
-                                  rgb_row,
--                                 1);
-+                                 1,
-+                                 x_shift);
-       rgb_row += 4;
-     }
- 
-     if (has_mmx)
-       FastConvertYUVToRGB32Row(y_ptr,
-                                u_ptr,
-                                v_ptr,
-                                rgb_row,
-                                x_width);
-     else
-       FastConvertYUVToRGB32Row_C(y_ptr,
-                                  u_ptr,
-                                  v_ptr,
-                                  rgb_row,
--                                 x_width);
-+                                 x_width,
-+                                 x_shift);
-   }
- 
-   // MMX used for FastConvertYUVToRGB32Row requires emms instruction.
-   if (has_mmx)
-     EMMS();
- }
- 
- }  // namespace gfx
-diff --git a/gfx/ycbcr/yuv_convert.h b/gfx/ycbcr/yuv_convert.h
-index e624168..c0b678d 100644
---- a/gfx/ycbcr/yuv_convert.h
-+++ b/gfx/ycbcr/yuv_convert.h
-@@ -10,18 +10,19 @@
- 
- namespace mozilla {
- 
- namespace gfx {
- 
- // Type of YUV surface.
- // The value of these enums matter as they are used to shift vertical indices.
- enum YUVType {
--  YV16 = 0,           // YV16 is half width and full height chroma channels.
--  YV12 = 1            // YV12 is half width and half height chroma channels.
-+  YV12 = 0,           // YV12 is half width and half height chroma channels.
-+  YV16 = 1,           // YV16 is half width and full height chroma channels.
-+  YV24 = 2            // YV24 is full width and full height chroma channels.
- };
- 
- // Convert a frame of YUV to 32 bit ARGB.
- // Pass in YV16/YV12 depending on source format
- NS_GFX_(void) ConvertYCbCrToRGB32(const uint8* yplane,
-                                   const uint8* uplane,
-                                   const uint8* vplane,
-                                   uint8* rgbframe,
-diff --git a/gfx/ycbcr/yuv_row.h b/gfx/ycbcr/yuv_row.h
-index 2a82972..d776dac 100644
---- a/gfx/ycbcr/yuv_row.h
-+++ b/gfx/ycbcr/yuv_row.h
-@@ -20,17 +20,18 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf,
-                               const uint8* v_buf,
-                               uint8* rgb_buf,
-                               int width);
- 
- void FastConvertYUVToRGB32Row_C(const uint8* y_buf,
-                                 const uint8* u_buf,
-                                 const uint8* v_buf,
-                                 uint8* rgb_buf,
--                                int width);
-+                                int width,
-+                                unsigned int x_shift);
- 
- 
- }  // extern "C"
- 
- // x64 uses MMX2 (SSE) so emms is not required.
- #if !defined(ARCH_CPU_X86_64) && !defined(ARCH_CPU_PPC)
- #if defined(_MSC_VER)
- #define EMMS() __asm emms
-diff --git a/gfx/ycbcr/yuv_row_c.cpp b/gfx/ycbcr/yuv_row_c.cpp
-index d3bdab4..36d9bda 100644
---- a/gfx/ycbcr/yuv_row_c.cpp
-+++ b/gfx/ycbcr/yuv_row_c.cpp
-@@ -153,24 +153,29 @@ static inline void YuvPixel(uint8 y,
-                                         (clip(C298a + cr) << 16) |
-                                         (0xff000000);
- }
- 
- void FastConvertYUVToRGB32Row_C(const uint8* y_buf,
-                               const uint8* u_buf,
-                               const uint8* v_buf,
-                               uint8* rgb_buf,
--                              int width) {
-+                              int width,
-+                              unsigned int x_shift) {
-   for (int x = 0; x < width; x += 2) {
--    uint8 u = u_buf[x >> 1];
--    uint8 v = v_buf[x >> 1];
-+    uint8 u = u_buf[x >> x_shift];
-+    uint8 v = v_buf[x >> x_shift];
-     uint8 y0 = y_buf[x];
-     YuvPixel(y0, u, v, rgb_buf);
-     if ((x + 1) < width) {
-       uint8 y1 = y_buf[x + 1];
-+      if (x_shift == 0) {
-+        u = u_buf[x + 1];
-+        v = v_buf[x + 1];
-+      }
-       YuvPixel(y1, u, v, rgb_buf + 4);
-     }
-     rgb_buf += 8;  // Advance 2 pixels.
-   }
- }
- 
- }  // extern "C"
- 
-diff --git a/gfx/ycbcr/yuv_row_linux.cpp b/gfx/ycbcr/yuv_row_linux.cpp
-index ce5ee89..455dd7b 100644
---- a/gfx/ycbcr/yuv_row_linux.cpp
-+++ b/gfx/ycbcr/yuv_row_linux.cpp
-@@ -13,17 +13,17 @@ extern "C" {
- 
- #if defined(ARCH_CPU_ARM_FAMILY)
- // ARM implementation uses C fallback
- void FastConvertYUVToRGB32Row(const uint8* y_buf,
-                               const uint8* u_buf,
-                               const uint8* v_buf,
-                               uint8* rgb_buf,
-                               int width) {
--  FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width);
-+  FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, 1);
- }
-  
- #else
- 
- #define RGBY(i) { \
-   static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
-   static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
-   static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
-diff --git a/gfx/ycbcr/yuv_row_mac.cpp b/gfx/ycbcr/yuv_row_mac.cpp
-index 34ecdc1..2a679cc 100644
---- a/gfx/ycbcr/yuv_row_mac.cpp
-+++ b/gfx/ycbcr/yuv_row_mac.cpp
-@@ -13,17 +13,17 @@ extern "C" {
- // option at all.
- #if defined(ARCH_CPU_PPC) || defined(ARCH_CPU_64_BITS)
- // PPC implementation uses C fallback
- void FastConvertYUVToRGB32Row(const uint8* y_buf,
-                               const uint8* u_buf,
-                               const uint8* v_buf,
-                               uint8* rgb_buf,
-                               int width) {
--  FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width);
-+  FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, 1);
- }
-  
- #else
- 
- #define RGBY(i) { \
-   static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
-   static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
-   static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
-diff --git a/gfx/ycbcr/yuv_row_win.cpp b/gfx/ycbcr/yuv_row_win.cpp
-index d2b82c4..708ef14 100644
---- a/gfx/ycbcr/yuv_row_win.cpp
-+++ b/gfx/ycbcr/yuv_row_win.cpp
-@@ -9,17 +9,17 @@ extern "C" {
- // needs to be fixed for 64 bit builds.
- #if defined(ARCH_CPU_64_BITS)
- // PPC implementation uses C fallback
- void FastConvertYUVToRGB32Row(const uint8* y_buf,
-                               const uint8* u_buf,
-                               const uint8* v_buf,
-                               uint8* rgb_buf,
-                               int width) {
--  FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width);
-+  FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, 1);
- }
-  
- #else
- 
- 
- #define RGBY(i) { \
-   static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
-   static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \