--- a/content/media/nsMediaDecoder.cpp
+++ b/content/media/nsMediaDecoder.cpp
@@ -45,28 +45,23 @@
#include "nsIDocument.h"
#include "nsThreadUtils.h"
#include "nsIDOMHTMLMediaElement.h"
#include "nsNetUtil.h"
#include "nsHTMLMediaElement.h"
#include "nsAutoLock.h"
#include "nsIRenderingContext.h"
#include "gfxContext.h"
-#include "gfxImageSurface.h"
#include "nsPresContext.h"
#include "nsDOMError.h"
#include "nsDisplayList.h"
#ifdef MOZ_SVG
#include "nsSVGEffects.h"
#endif
-#if defined(XP_MACOSX)
-#include "gfxQuartzImageSurface.h"
-#endif
-
// Number of milliseconds between progress events as defined by spec
#define PROGRESS_MS 350
// Number of milliseconds of no data before a stall event is fired as defined by spec
#define STALL_MS 3000
nsMediaDecoder::nsMediaDecoder() :
mElement(0),
--- a/gfx/layers/ImageLayers.h
+++ b/gfx/layers/ImageLayers.h
@@ -109,16 +109,17 @@ protected:
* (because layers can only be used on the main thread) and we want to
* be able to set the current Image from any thread, to facilitate
* video playback without involving the main thread, for example.
*/
class THEBES_API ImageContainer {
THEBES_INLINE_DECL_THREADSAFE_REFCOUNTING(ImageContainer)
public:
+ ImageContainer() {}
virtual ~ImageContainer() {}
/**
* Create an Image in one of the given formats.
* Picks the "best" format from the list and creates an Image of that
* format.
* Returns null if this backend does not support any of the formats.
*/
@@ -174,16 +175,23 @@ public:
/**
* Set a new layer manager for this image container. It must be
* either of the same type as the container's current layer manager,
* or null. TRUE is returned on success.
*/
virtual PRBool SetLayerManager(LayerManager *aManager) = 0;
+ /**
+ * Sets a size that the image is expected to be rendered at.
+ * This is a hint for image backends to optimize scaling.
+ * Default implementation in this class is to ignore the hint.
+ */
+ virtual void SetScaleHint(const gfxIntSize& /* aScaleHint */) { }
+
protected:
LayerManager* mManager;
ImageContainer(LayerManager* aManager) : mManager(aManager) {}
};
/**
* A Layer which renders an Image.
--- a/gfx/layers/basic/BasicImages.cpp
+++ b/gfx/layers/basic/BasicImages.cpp
@@ -99,39 +99,50 @@ protected:
/**
* We handle YCbCr by converting to RGB when the image is initialized
* (which should be done off the main thread). The RGB results are stored
* in a memory buffer and converted to a cairo surface lazily.
*/
class BasicPlanarYCbCrImage : public PlanarYCbCrImage, public BasicImageImplData {
public:
- BasicPlanarYCbCrImage() :
- PlanarYCbCrImage(static_cast<BasicImageImplData*>(this))
+ /**
+ * aScaleHint is a size that the image is expected to be rendered at.
+ * This is a hint for image backends to optimize scaling.
+ */
+ BasicPlanarYCbCrImage(const gfxIntSize& aScaleHint) :
+ PlanarYCbCrImage(static_cast<BasicImageImplData*>(this)),
+ mScaleHint(aScaleHint)
{}
virtual void SetData(const Data& aData);
virtual already_AddRefed<gfxASurface> GetAsSurface();
protected:
nsAutoArrayPtr<PRUint8> mBuffer;
nsCountedRef<nsMainThreadSurfaceRef> mSurface;
+ gfxIntSize mScaleHint;
};
void
BasicPlanarYCbCrImage::SetData(const Data& aData)
{
// Do some sanity checks to prevent integer overflow
if (aData.mYSize.width > 16384 || aData.mYSize.height > 16384) {
NS_ERROR("Illegal width or height");
return;
}
- size_t size = aData.mPicSize.width*aData.mPicSize.height*4;
- mBuffer = new PRUint8[size];
+ // 'prescale' is true if the scaling is to be done as part of the
+ // YCbCr to RGB conversion rather than on the RGB data when rendered.
+ PRBool prescale = mScaleHint.width > 0 && mScaleHint.height > 0;
+ gfxIntSize size(prescale ? mScaleHint.width : aData.mPicSize.width,
+ prescale ? mScaleHint.height : aData.mPicSize.height);
+
+ mBuffer = new PRUint8[size.width * size.height * 4];
if (!mBuffer) {
// out of memory
return;
}
gfx::YUVType type = gfx::YV12;
if (aData.mYSize.width == aData.mCbCrSize.width &&
aData.mYSize.height == aData.mCbCrSize.height) {
@@ -144,30 +155,47 @@ BasicPlanarYCbCrImage::SetData(const Dat
else if (aData.mYSize.width / 2 == aData.mCbCrSize.width &&
aData.mYSize.height / 2 == aData.mCbCrSize.height ) {
type = gfx::YV12;
}
else {
NS_ERROR("YCbCr format not supported");
}
- // Convert from YCbCr to RGB now
- gfx::ConvertYCbCrToRGB32(aData.mYChannel,
+ // Convert from YCbCr to RGB now, scaling the image if needed.
+ if (size != aData.mPicSize) {
+ gfx::ScaleYCbCrToRGB32(aData.mYChannel,
aData.mCbChannel,
aData.mCrChannel,
mBuffer,
- aData.mPicX,
- aData.mPicY,
aData.mPicSize.width,
aData.mPicSize.height,
+ size.width,
+ size.height,
aData.mYStride,
aData.mCbCrStride,
- aData.mPicSize.width*4,
- type);
- mSize = aData.mPicSize;
+ size.width*4,
+ type,
+ gfx::ROTATE_0);
+ }
+ else {
+ gfx::ConvertYCbCrToRGB32(aData.mYChannel,
+ aData.mCbChannel,
+ aData.mCrChannel,
+ mBuffer,
+ aData.mPicX,
+ aData.mPicY,
+ aData.mPicSize.width,
+ aData.mPicSize.height,
+ aData.mYStride,
+ aData.mCbCrStride,
+ aData.mPicSize.width*4,
+ type);
+ }
+ mSize = size;
}
static cairo_user_data_key_t imageSurfaceDataKey;
static void
DestroyBuffer(void* aBuffer)
{
delete[] static_cast<PRUint8*>(aBuffer);
@@ -213,29 +241,32 @@ BasicPlanarYCbCrImage::GetAsSurface()
/**
* Our image container is very simple. It's really just a factory
* for the image objects. We use a Monitor to synchronize access to
* mImage.
*/
class BasicImageContainer : public ImageContainer {
public:
BasicImageContainer(BasicLayerManager* aManager) :
- ImageContainer(aManager), mMonitor("BasicImageContainer")
+ ImageContainer(aManager), mMonitor("BasicImageContainer"),
+ mScaleHint(-1, -1)
{}
virtual already_AddRefed<Image> CreateImage(const Image::Format* aFormats,
PRUint32 aNumFormats);
virtual void SetCurrentImage(Image* aImage);
virtual already_AddRefed<Image> GetCurrentImage();
virtual already_AddRefed<gfxASurface> GetCurrentAsSurface(gfxIntSize* aSize);
virtual gfxIntSize GetCurrentSize();
virtual PRBool SetLayerManager(LayerManager *aManager);
+ virtual void SetScaleHint(const gfxIntSize& aScaleHint);
protected:
Monitor mMonitor;
nsRefPtr<Image> mImage;
+ gfxIntSize mScaleHint;
};
/**
* Returns true if aFormat is in the given format array.
*/
static PRBool
FormatInList(const Image::Format* aFormats, PRUint32 aNumFormats,
Image::Format aFormat)
@@ -252,17 +283,18 @@ already_AddRefed<Image>
BasicImageContainer::CreateImage(const Image::Format* aFormats,
PRUint32 aNumFormats)
{
nsRefPtr<Image> image;
// Prefer cairo surfaces because they're native for us
if (FormatInList(aFormats, aNumFormats, Image::CAIRO_SURFACE)) {
image = new BasicCairoImage();
} else if (FormatInList(aFormats, aNumFormats, Image::PLANAR_YCBCR)) {
- image = new BasicPlanarYCbCrImage();
+ MonitorAutoEnter mon(mMonitor);
+ image = new BasicPlanarYCbCrImage(mScaleHint);
}
return image.forget();
}
void
BasicImageContainer::SetCurrentImage(Image* aImage)
{
MonitorAutoEnter mon(mMonitor);
@@ -298,16 +330,22 @@ BasicImageContainer::GetCurrentAsSurface
gfxIntSize
BasicImageContainer::GetCurrentSize()
{
MonitorAutoEnter mon(mMonitor);
return !mImage ? gfxIntSize(0,0) : ToImageData(mImage)->GetSize();
}
+void BasicImageContainer::SetScaleHint(const gfxIntSize& aScaleHint)
+{
+ MonitorAutoEnter mon(mMonitor);
+ mScaleHint = aScaleHint;
+}
+
PRBool
BasicImageContainer::SetLayerManager(LayerManager *aManager)
{
if (aManager &&
aManager->GetBackendType() != LayerManager::LAYERS_BASIC)
{
return PR_FALSE;
}
--- a/gfx/ycbcr/README
+++ b/gfx/ycbcr/README
@@ -16,8 +16,9 @@ picture_region.patch: Change Chromium co
remove_scale.patch: Removes Chromium scaling code.
export.patch: Fix export for building on comm-central
win64_mac64.patch: Fallback to C implementation on Windows and Mac OS X 64 bit
yv24.patch: Adds YCbCr 4:4:4 support
row_c_fix.patch: Fix broken C fallback code (See bug 561385).
bug572034_mac_64bit.patch: Fix x86_64 linux code so it works on OS X.
solaris.patch: Adds Solaris support, fallback to C implementation on SPARC
+add_scale.patch: re-adds Chromium scaling code
new file mode 100644
--- /dev/null
+++ b/gfx/ycbcr/add_scale.patch
@@ -0,0 +1,953 @@
+diff --git a/gfx/ycbcr/yuv_convert.cpp b/gfx/ycbcr/yuv_convert.cpp
+index 40ce10f..7d46629 100644
+--- a/gfx/ycbcr/yuv_convert.cpp
++++ b/gfx/ycbcr/yuv_convert.cpp
+@@ -82,10 +82,139 @@ NS_GFX_(void) ConvertYCbCrToRGB32(const uint8* y_buf,
+
+ #ifdef ARCH_CPU_X86_FAMILY
+ // MMX used for FastConvertYUVToRGB32Row requires emms instruction.
+ if (has_mmx)
+ EMMS();
+ #endif
+ }
+
++// Scale a frame of YUV to 32 bit ARGB.
++void ScaleYCbCrToRGB32(const uint8* y_buf,
++ const uint8* u_buf,
++ const uint8* v_buf,
++ uint8* rgb_buf,
++ int width,
++ int height,
++ int scaled_width,
++ int scaled_height,
++ int y_pitch,
++ int uv_pitch,
++ int rgb_pitch,
++ YUVType yuv_type,
++ Rotate view_rotate) {
++ unsigned int y_shift = yuv_type == YV12 ? 1 : 0;
++ unsigned int x_shift = yuv_type == YV24 ? 0 : 1;
++ bool has_mmx = supports_mmx();
++ // Diagram showing origin and direction of source sampling.
++ // ->0 4<-
++ // 7 3
++ //
++ // 6 5
++ // ->1 2<-
++ // Rotations that start at right side of image.
++ if ((view_rotate == ROTATE_180) ||
++ (view_rotate == ROTATE_270) ||
++ (view_rotate == MIRROR_ROTATE_0) ||
++ (view_rotate == MIRROR_ROTATE_90)) {
++ y_buf += width - 1;
++ u_buf += width / 2 - 1;
++ v_buf += width / 2 - 1;
++ width = -width;
++ }
++ // Rotations that start at bottom of image.
++ if ((view_rotate == ROTATE_90) ||
++ (view_rotate == ROTATE_180) ||
++ (view_rotate == MIRROR_ROTATE_90) ||
++ (view_rotate == MIRROR_ROTATE_180)) {
++ y_buf += (height - 1) * y_pitch;
++ u_buf += ((height >> y_shift) - 1) * uv_pitch;
++ v_buf += ((height >> y_shift) - 1) * uv_pitch;
++ height = -height;
++ }
++
++ // Handle zero sized destination.
++ if (scaled_width == 0 || scaled_height == 0)
++ return;
++ int scaled_dx = width * 16 / scaled_width;
++ int scaled_dy = height * 16 / scaled_height;
++
++ int scaled_dx_uv = scaled_dx;
++
++ if ((view_rotate == ROTATE_90) ||
++ (view_rotate == ROTATE_270)) {
++ int tmp = scaled_height;
++ scaled_height = scaled_width;
++ scaled_width = tmp;
++ tmp = height;
++ height = width;
++ width = tmp;
++ int original_dx = scaled_dx;
++ int original_dy = scaled_dy;
++ scaled_dx = ((original_dy >> 4) * y_pitch) << 4;
++ scaled_dx_uv = ((original_dy >> 4) * uv_pitch) << 4;
++ scaled_dy = original_dx;
++ if (view_rotate == ROTATE_90) {
++ y_pitch = -1;
++ uv_pitch = -1;
++ height = -height;
++ } else {
++ y_pitch = 1;
++ uv_pitch = 1;
++ }
++ }
++
++ for (int y = 0; y < scaled_height; ++y) {
++ uint8* dest_pixel = rgb_buf + y * rgb_pitch;
++ int scaled_y = (y * height / scaled_height);
++ const uint8* y_ptr = y_buf + scaled_y * y_pitch;
++ const uint8* u_ptr = u_buf + (scaled_y >> y_shift) * uv_pitch;
++ const uint8* v_ptr = v_buf + (scaled_y >> y_shift) * uv_pitch;
++
++#if defined(_MSC_VER)
++ if (scaled_width == (width * 2)) {
++ DoubleYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
++ dest_pixel, scaled_width);
++ } else if ((scaled_dx & 15) == 0) { // Scaling by integer scale factor.
++ if (scaled_dx_uv == scaled_dx) { // Not rotated.
++ if (scaled_dx == 16) { // Not scaled
++ if (has_mmx)
++ FastConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
++ dest_pixel, scaled_width);
++ else
++ FastConvertYUVToRGB32Row_C(y_ptr, u_ptr, v_ptr,
++ dest_pixel, scaled_width, x_shift);
++ } else { // Simple scale down. ie half
++ ConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
++ dest_pixel, scaled_width, scaled_dx >> 4);
++ }
++ } else {
++ RotateConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
++ dest_pixel, scaled_width,
++ scaled_dx >> 4, scaled_dx_uv >> 4);
++ }
++#else
++ if (scaled_dx == 16) { // Not scaled
++ if (has_mmx)
++ FastConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
++ dest_pixel, scaled_width);
++ else
++ FastConvertYUVToRGB32Row_C(y_ptr, u_ptr, v_ptr,
++ dest_pixel, scaled_width, x_shift);
++#endif
++ } else {
++ if (has_mmx)
++ ScaleYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
++ dest_pixel, scaled_width, scaled_dx);
++ else
++ ScaleYUVToRGB32Row_C(y_ptr, u_ptr, v_ptr,
++ dest_pixel, scaled_width, scaled_dx, x_shift);
++
++ }
++ }
++
++ // MMX used for FastConvertYUVToRGB32Row requires emms instruction.
++ if (has_mmx)
++ EMMS();
++}
++
+ } // namespace gfx
+ } // namespace mozilla
+diff --git a/gfx/ycbcr/yuv_convert.h b/gfx/ycbcr/yuv_convert.h
+index c0b678d..a7e5b68 100644
+--- a/gfx/ycbcr/yuv_convert.h
++++ b/gfx/ycbcr/yuv_convert.h
+@@ -15,27 +15,56 @@ namespace gfx {
+ // Type of YUV surface.
+ // The value of these enums matter as they are used to shift vertical indices.
+ enum YUVType {
+ YV12 = 0, // YV12 is half width and half height chroma channels.
+ YV16 = 1, // YV16 is half width and full height chroma channels.
+ YV24 = 2 // YV24 is full width and full height chroma channels.
+ };
+
++// Mirror means flip the image horizontally, as in looking in a mirror.
++// Rotate happens after mirroring.
++enum Rotate {
++ ROTATE_0, // Rotation off.
++ ROTATE_90, // Rotate clockwise.
++ ROTATE_180, // Rotate upside down.
++ ROTATE_270, // Rotate counter clockwise.
++ MIRROR_ROTATE_0, // Mirror horizontally.
++ MIRROR_ROTATE_90, // Mirror then Rotate clockwise.
++ MIRROR_ROTATE_180, // Mirror vertically.
++ MIRROR_ROTATE_270 // Transpose.
++};
++
+ // Convert a frame of YUV to 32 bit ARGB.
+ // Pass in YV16/YV12 depending on source format
+ NS_GFX_(void) ConvertYCbCrToRGB32(const uint8* yplane,
+ const uint8* uplane,
+ const uint8* vplane,
+ uint8* rgbframe,
+ int pic_x,
+ int pic_y,
+ int pic_width,
+ int pic_height,
+ int ystride,
+ int uvstride,
+ int rgbstride,
+ YUVType yuv_type);
+
++// Scale a frame of YUV to 32 bit ARGB.
++// Supports rotation and mirroring.
++void ScaleYCbCrToRGB32(const uint8* yplane,
++ const uint8* uplane,
++ const uint8* vplane,
++ uint8* rgbframe,
++ int frame_width,
++ int frame_height,
++ int scaled_width,
++ int scaled_height,
++ int ystride,
++ int uvstride,
++ int rgbstride,
++ YUVType yuv_type,
++ Rotate view_rotate);
++
+ } // namespace gfx
+ } // namespace mozilla
+
+ #endif // MEDIA_BASE_YUV_CONVERT_H_
+diff --git a/gfx/ycbcr/yuv_row.h b/gfx/ycbcr/yuv_row.h
+index 8519008..96969ec 100644
+--- a/gfx/ycbcr/yuv_row.h
++++ b/gfx/ycbcr/yuv_row.h
+@@ -24,16 +24,64 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf,
+ void FastConvertYUVToRGB32Row_C(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ uint8* rgb_buf,
+ int width,
+ unsigned int x_shift);
+
+
++// Can do 1x, half size or any scale down by an integer amount.
++// Step can be negative (mirroring, rotate 180).
++// This is the third fastest of the scalers.
++void ConvertYUVToRGB32Row(const uint8* y_buf,
++ const uint8* u_buf,
++ const uint8* v_buf,
++ uint8* rgb_buf,
++ int width,
++ int step);
++
++// Rotate is like Convert, but applies different step to Y versus U and V.
++// This allows rotation by 90 or 270, by stepping by stride.
++// This is the forth fastest of the scalers.
++void RotateConvertYUVToRGB32Row(const uint8* y_buf,
++ const uint8* u_buf,
++ const uint8* v_buf,
++ uint8* rgb_buf,
++ int width,
++ int ystep,
++ int uvstep);
++
++// Doubler does 4 pixels at a time. Each pixel is replicated.
++// This is the fastest of the scalers.
++void DoubleYUVToRGB32Row(const uint8* y_buf,
++ const uint8* u_buf,
++ const uint8* v_buf,
++ uint8* rgb_buf,
++ int width);
++
++// Handles arbitrary scaling up or down.
++// Mirroring is supported, but not 90 or 270 degree rotation.
++// Chroma is under sampled every 2 pixels for performance.
++// This is the slowest of the scalers.
++void ScaleYUVToRGB32Row(const uint8* y_buf,
++ const uint8* u_buf,
++ const uint8* v_buf,
++ uint8* rgb_buf,
++ int width,
++ int scaled_dx);
++
++void ScaleYUVToRGB32Row_C(const uint8* y_buf,
++ const uint8* u_buf,
++ const uint8* v_buf,
++ uint8* rgb_buf,
++ int width,
++ int scaled_dx,
++ unsigned int x_shift);
++
+ } // extern "C"
+
+ // x64 uses MMX2 (SSE) so emms is not required.
+ #if defined(ARCH_CPU_X86)
+ #if defined(_MSC_VER)
+ #define EMMS() __asm emms
+ #else
+ #define EMMS() asm("emms")
+diff --git a/gfx/ycbcr/yuv_row_c.cpp b/gfx/ycbcr/yuv_row_c.cpp
+index b5c0018..49eced2 100644
+--- a/gfx/ycbcr/yuv_row_c.cpp
++++ b/gfx/ycbcr/yuv_row_c.cpp
+@@ -172,10 +172,31 @@ void FastConvertYUVToRGB32Row_C(const uint8* y_buf,
+ v = v_buf[x + 1];
+ }
+ YuvPixel(y1, u, v, rgb_buf + 4);
+ }
+ rgb_buf += 8; // Advance 2 pixels.
+ }
+ }
+
++// 28.4 fixed point is used. A shift by 4 isolates the integer.
++// A shift by 5 is used to further subsample the chrominence channels.
++// & 15 isolates the fixed point fraction. >> 2 to get the upper 2 bits,
++// for 1/4 pixel accurate interpolation.
++void ScaleYUVToRGB32Row_C(const uint8* y_buf,
++ const uint8* u_buf,
++ const uint8* v_buf,
++ uint8* rgb_buf,
++ int width,
++ int scaled_dx,
++ unsigned int x_shift) {
++ int scaled_x = 0;
++ for (int x = 0; x < width; ++x) {
++ uint8 u = u_buf[scaled_x >> (4 + x_shift)];
++ uint8 v = v_buf[scaled_x >> (4 + x_shift)];
++ uint8 y0 = y_buf[scaled_x >> 4];
++ YuvPixel(y0, u, v, rgb_buf);
++ rgb_buf += 4;
++ scaled_x += scaled_dx;
++ }
++}
+ } // extern "C"
+
+diff --git a/gfx/ycbcr/yuv_row_linux.cpp b/gfx/ycbcr/yuv_row_linux.cpp
+index 9f7625c..bff02b3 100644
+--- a/gfx/ycbcr/yuv_row_linux.cpp
++++ b/gfx/ycbcr/yuv_row_linux.cpp
+@@ -16,16 +16,24 @@ extern "C" {
+ void FastConvertYUVToRGB32Row(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ uint8* rgb_buf,
+ int width) {
+ FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, 1);
+ }
+
++void ScaleYUVToRGB32Row(const uint8* y_buf,
++ const uint8* u_buf,
++ const uint8* v_buf,
++ uint8* rgb_buf,
++ int width,
++ int scaled_dx) {
++ ScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, scaled_dx, 1);
++}
+ #else
+
+ #define RGBY(i) { \
+ static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
+ static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
+ static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
+ 0 \
+ }
+@@ -365,16 +373,86 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf, // rdi
+ "r"(u_buf), // %1
+ "r"(v_buf), // %2
+ "r"(rgb_buf), // %3
+ "r"(width), // %4
+ "r" (kCoefficientsRgbY) // %5
+ : "memory", "r10", "r11", "xmm0", "xmm1", "xmm2", "xmm3"
+ );
+ }
++
++void ScaleYUVToRGB32Row(const uint8* y_buf, // rdi
++ const uint8* u_buf, // rsi
++ const uint8* v_buf, // rdx
++ uint8* rgb_buf, // rcx
++ int width, // r8
++ int scaled_dx) { // r9
++ asm(
++ "xor %%r11,%%r11\n"
++ "sub $0x2,%4\n"
++ "js scalenext\n"
++
++"scaleloop:"
++ "mov %%r11,%%r10\n"
++ "sar $0x5,%%r10\n"
++ "movzb (%1,%%r10,1),%%rax\n"
++ "movq 2048(%5,%%rax,8),%%xmm0\n"
++ "movzb (%2,%%r10,1),%%rax\n"
++ "movq 4096(%5,%%rax,8),%%xmm1\n"
++ "lea (%%r11,%6),%%r10\n"
++ "sar $0x4,%%r11\n"
++ "movzb (%0,%%r11,1),%%rax\n"
++ "paddsw %%xmm1,%%xmm0\n"
++ "movq (%5,%%rax,8),%%xmm1\n"
++ "lea (%%r10,%6),%%r11\n"
++ "sar $0x4,%%r10\n"
++ "movzb (%0,%%r10,1),%%rax\n"
++ "movq (%5,%%rax,8),%%xmm2\n"
++ "paddsw %%xmm0,%%xmm1\n"
++ "paddsw %%xmm0,%%xmm2\n"
++ "shufps $0x44,%%xmm2,%%xmm1\n"
++ "psraw $0x6,%%xmm1\n"
++ "packuswb %%xmm1,%%xmm1\n"
++ "movq %%xmm1,0x0(%3)\n"
++ "add $0x8,%3\n"
++ "sub $0x2,%4\n"
++ "jns scaleloop\n"
++
++"scalenext:"
++ "add $0x1,%4\n"
++ "js scaledone\n"
++
++ "mov %%r11,%%r10\n"
++ "sar $0x5,%%r10\n"
++ "movzb (%1,%%r10,1),%%rax\n"
++ "movq 2048(%5,%%rax,8),%%xmm0\n"
++ "movzb (%2,%%r10,1),%%rax\n"
++ "movq 4096(%5,%%rax,8),%%xmm1\n"
++ "paddsw %%xmm1,%%xmm0\n"
++ "sar $0x4,%%r11\n"
++ "movzb (%0,%%r11,1),%%rax\n"
++ "movq (%5,%%rax,8),%%xmm1\n"
++ "paddsw %%xmm0,%%xmm1\n"
++ "psraw $0x6,%%xmm1\n"
++ "packuswb %%xmm1,%%xmm1\n"
++ "movd %%xmm1,0x0(%3)\n"
++
++"scaledone:"
++ :
++ : "r"(y_buf), // %0
++ "r"(u_buf), // %1
++ "r"(v_buf), // %2
++ "r"(rgb_buf), // %3
++ "r"(width), // %4
++ "r" (kCoefficientsRgbY), // %5
++ "r"(static_cast<long>(scaled_dx)) // %6
++ : "memory", "r10", "r11", "rax", "xmm0", "xmm1", "xmm2"
++);
++}
++
+ #endif // __SUNPRO_CC
+
+ #else // ARCH_CPU_X86_64
+
+ #ifdef __SUNPRO_CC
+ void FastConvertYUVToRGB32Row(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+@@ -493,13 +571,87 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf,
+ "packuswb %mm1,%mm1\n"
+ "movd %mm1,0x0(%ebp)\n"
+ "2:"
+ "popa\n"
+ "ret\n"
+ ".previous\n"
+ );
+
++void ScaleYUVToRGB32Row(const uint8* y_buf,
++ const uint8* u_buf,
++ const uint8* v_buf,
++ uint8* rgb_buf,
++ int width,
++ int scaled_dx);
++
++ asm(
++ ".global ScaleYUVToRGB32Row\n"
++"ScaleYUVToRGB32Row:\n"
++ "pusha\n"
++ "mov 0x24(%esp),%edx\n"
++ "mov 0x28(%esp),%edi\n"
++ "mov 0x2c(%esp),%esi\n"
++ "mov 0x30(%esp),%ebp\n"
++ "mov 0x34(%esp),%ecx\n"
++ "xor %ebx,%ebx\n"
++ "jmp scaleend\n"
++
++"scaleloop:"
++ "mov %ebx,%eax\n"
++ "sar $0x5,%eax\n"
++ "movzbl (%edi,%eax,1),%eax\n"
++ "movq kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
++ "mov %ebx,%eax\n"
++ "sar $0x5,%eax\n"
++ "movzbl (%esi,%eax,1),%eax\n"
++ "paddsw kCoefficientsRgbY+4096(,%eax,8),%mm0\n"
++ "mov %ebx,%eax\n"
++ "add 0x38(%esp),%ebx\n"
++ "sar $0x4,%eax\n"
++ "movzbl (%edx,%eax,1),%eax\n"
++ "movq kCoefficientsRgbY(,%eax,8),%mm1\n"
++ "mov %ebx,%eax\n"
++ "add 0x38(%esp),%ebx\n"
++ "sar $0x4,%eax\n"
++ "movzbl (%edx,%eax,1),%eax\n"
++ "movq kCoefficientsRgbY(,%eax,8),%mm2\n"
++ "paddsw %mm0,%mm1\n"
++ "paddsw %mm0,%mm2\n"
++ "psraw $0x6,%mm1\n"
++ "psraw $0x6,%mm2\n"
++ "packuswb %mm2,%mm1\n"
++ "movntq %mm1,0x0(%ebp)\n"
++ "add $0x8,%ebp\n"
++"scaleend:"
++ "sub $0x2,%ecx\n"
++ "jns scaleloop\n"
++
++ "and $0x1,%ecx\n"
++ "je scaledone\n"
++
++ "mov %ebx,%eax\n"
++ "sar $0x5,%eax\n"
++ "movzbl (%edi,%eax,1),%eax\n"
++ "movq kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
++ "mov %ebx,%eax\n"
++ "sar $0x5,%eax\n"
++ "movzbl (%esi,%eax,1),%eax\n"
++ "paddsw kCoefficientsRgbY+4096(,%eax,8),%mm0\n"
++ "mov %ebx,%eax\n"
++ "sar $0x4,%eax\n"
++ "movzbl (%edx,%eax,1),%eax\n"
++ "movq kCoefficientsRgbY(,%eax,8),%mm1\n"
++ "paddsw %mm0,%mm1\n"
++ "psraw $0x6,%mm1\n"
++ "packuswb %mm1,%mm1\n"
++ "movd %mm1,0x0(%ebp)\n"
++
++"scaledone:"
++ "popa\n"
++ "ret\n"
++);
++
+ #endif // __SUNPRO_CC
+ #endif // ARCH_CPU_X86_64
+ #endif // !ARCH_CPU_X86_FAMILY
+ } // extern "C"
+
+diff --git a/gfx/ycbcr/yuv_row_mac.cpp b/gfx/ycbcr/yuv_row_mac.cpp
+index a1d0058..5acf825 100644
+--- a/gfx/ycbcr/yuv_row_mac.cpp
++++ b/gfx/ycbcr/yuv_row_mac.cpp
+@@ -16,16 +16,24 @@ extern "C" {
+ void FastConvertYUVToRGB32Row(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ uint8* rgb_buf,
+ int width) {
+ FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, 1);
+ }
+
++void ScaleYUVToRGB32Row(const uint8* y_buf,
++ const uint8* u_buf,
++ const uint8* v_buf,
++ uint8* rgb_buf,
++ int width,
++ int scaled_dx) {
++ ScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, scaled_dx, 1);
++}
+ #else
+
+ #define RGBY(i) { \
+ static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
+ static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
+ static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
+ 0 \
+ }
+@@ -313,11 +321,96 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ uint8* rgb_buf,
+ int width) {
+ MacConvertYUVToRGB32Row(y_buf, u_buf, v_buf, rgb_buf, width,
+ &kCoefficientsRgbY[0][0]);
+ }
+
++extern void MacScaleYUVToRGB32Row(const uint8* y_buf,
++ const uint8* u_buf,
++ const uint8* v_buf,
++ uint8* rgb_buf,
++ int width,
++ int scaled_dx,
++ int16 *kCoefficientsRgbY);
++
++ __asm__(
++"_MacScaleYUVToRGB32Row:\n"
++ "pusha\n"
++ "mov 0x24(%esp),%edx\n"
++ "mov 0x28(%esp),%edi\n"
++ "mov 0x2c(%esp),%esi\n"
++ "mov 0x30(%esp),%ebp\n"
++ "mov 0x3c(%esp),%ecx\n"
++ "xor %ebx,%ebx\n"
++ "jmp Lscaleend\n"
++
++"Lscaleloop:"
++ "mov %ebx,%eax\n"
++ "sar $0x5,%eax\n"
++ "movzbl (%edi,%eax,1),%eax\n"
++ "movq 2048(%ecx,%eax,8),%mm0\n"
++ "mov %ebx,%eax\n"
++ "sar $0x5,%eax\n"
++ "movzbl (%esi,%eax,1),%eax\n"
++ "paddsw 4096(%ecx,%eax,8),%mm0\n"
++ "mov %ebx,%eax\n"
++ "add 0x38(%esp),%ebx\n"
++ "sar $0x4,%eax\n"
++ "movzbl (%edx,%eax,1),%eax\n"
++ "movq 0(%ecx,%eax,8),%mm1\n"
++ "mov %ebx,%eax\n"
++ "add 0x38(%esp),%ebx\n"
++ "sar $0x4,%eax\n"
++ "movzbl (%edx,%eax,1),%eax\n"
++ "movq 0(%ecx,%eax,8),%mm2\n"
++ "paddsw %mm0,%mm1\n"
++ "paddsw %mm0,%mm2\n"
++ "psraw $0x6,%mm1\n"
++ "psraw $0x6,%mm2\n"
++ "packuswb %mm2,%mm1\n"
++ "movntq %mm1,0x0(%ebp)\n"
++ "add $0x8,%ebp\n"
++"Lscaleend:"
++ "sub $0x2,0x34(%esp)\n"
++ "jns Lscaleloop\n"
++
++ "and $0x1,0x34(%esp)\n"
++ "je Lscaledone\n"
++
++ "mov %ebx,%eax\n"
++ "sar $0x5,%eax\n"
++ "movzbl (%edi,%eax,1),%eax\n"
++ "movq 2048(%ecx,%eax,8),%mm0\n"
++ "mov %ebx,%eax\n"
++ "sar $0x5,%eax\n"
++ "movzbl (%esi,%eax,1),%eax\n"
++ "paddsw 4096(%ecx,%eax,8),%mm0\n"
++ "mov %ebx,%eax\n"
++ "sar $0x4,%eax\n"
++ "movzbl (%edx,%eax,1),%eax\n"
++ "movq 0(%ecx,%eax,8),%mm1\n"
++ "paddsw %mm0,%mm1\n"
++ "psraw $0x6,%mm1\n"
++ "packuswb %mm1,%mm1\n"
++ "movd %mm1,0x0(%ebp)\n"
++
++"Lscaledone:"
++ "popa\n"
++ "ret\n"
++);
++
++void ScaleYUVToRGB32Row(const uint8* y_buf,
++ const uint8* u_buf,
++ const uint8* v_buf,
++ uint8* rgb_buf,
++ int width,
++ int scaled_dx) {
++
++ MacScaleYUVToRGB32Row(y_buf, u_buf, v_buf, rgb_buf, width, scaled_dx,
++ &kCoefficientsRgbY[0][0]);
++}
++
+ #endif // ARCH_CPU_PPC || ARCH_CPU_64_BITS
+ } // extern "C"
+
+diff --git a/gfx/ycbcr/yuv_row_win.cpp b/gfx/ycbcr/yuv_row_win.cpp
+index 699ac77..a1700fc 100644
+--- a/gfx/ycbcr/yuv_row_win.cpp
++++ b/gfx/ycbcr/yuv_row_win.cpp
+@@ -11,17 +11,26 @@ extern "C" {
+ // PPC implementation uses C fallback
+ void FastConvertYUVToRGB32Row(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ uint8* rgb_buf,
+ int width) {
+ FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, 1);
+ }
+-
++
++void ScaleYUVToRGB32Row(const uint8* y_buf,
++ const uint8* u_buf,
++ const uint8* v_buf,
++ uint8* rgb_buf,
++ int width,
++ int scaled_dx) {
++ ScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, scaled_dx, 1);
++}
++
+ #else
+
+
+ #define RGBY(i) { \
+ static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
+ static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
+ static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
+ 0 \
+@@ -307,11 +316,280 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf,
+ movd [ebp], mm1
+ convertdone :
+
+ popad
+ ret
+ }
+ }
+
++__declspec(naked)
++void ConvertYUVToRGB32Row(const uint8* y_buf,
++ const uint8* u_buf,
++ const uint8* v_buf,
++ uint8* rgb_buf,
++ int width,
++ int step) {
++ __asm {
++ pushad
++ mov edx, [esp + 32 + 4] // Y
++ mov edi, [esp + 32 + 8] // U
++ mov esi, [esp + 32 + 12] // V
++ mov ebp, [esp + 32 + 16] // rgb
++ mov ecx, [esp + 32 + 20] // width
++ mov ebx, [esp + 32 + 24] // step
++ jmp wend
++
++ wloop :
++ movzx eax, byte ptr [edi]
++ add edi, ebx
++ movq mm0, [kCoefficientsRgbU + 8 * eax]
++ movzx eax, byte ptr [esi]
++ add esi, ebx
++ paddsw mm0, [kCoefficientsRgbV + 8 * eax]
++ movzx eax, byte ptr [edx]
++ add edx, ebx
++ movq mm1, [kCoefficientsRgbY + 8 * eax]
++ movzx eax, byte ptr [edx]
++ add edx, ebx
++ movq mm2, [kCoefficientsRgbY + 8 * eax]
++ paddsw mm1, mm0
++ paddsw mm2, mm0
++ psraw mm1, 6
++ psraw mm2, 6
++ packuswb mm1, mm2
++ movntq [ebp], mm1
++ add ebp, 8
++ wend :
++ sub ecx, 2
++ jns wloop
++
++ and ecx, 1 // odd number of pixels?
++ jz wdone
++
++ movzx eax, byte ptr [edi]
++ movq mm0, [kCoefficientsRgbU + 8 * eax]
++ movzx eax, byte ptr [esi]
++ paddsw mm0, [kCoefficientsRgbV + 8 * eax]
++ movzx eax, byte ptr [edx]
++ movq mm1, [kCoefficientsRgbY + 8 * eax]
++ paddsw mm1, mm0
++ psraw mm1, 6
++ packuswb mm1, mm1
++ movd [ebp], mm1
++ wdone :
++
++ popad
++ ret
++ }
++}
++
++__declspec(naked)
++void RotateConvertYUVToRGB32Row(const uint8* y_buf,
++ const uint8* u_buf,
++ const uint8* v_buf,
++ uint8* rgb_buf,
++ int width,
++ int ystep,
++ int uvstep) {
++ __asm {
++ pushad
++ mov edx, [esp + 32 + 4] // Y
++ mov edi, [esp + 32 + 8] // U
++ mov esi, [esp + 32 + 12] // V
++ mov ebp, [esp + 32 + 16] // rgb
++ mov ecx, [esp + 32 + 20] // width
++ jmp wend
++
++ wloop :
++ movzx eax, byte ptr [edi]
++ mov ebx, [esp + 32 + 28] // uvstep
++ add edi, ebx
++ movq mm0, [kCoefficientsRgbU + 8 * eax]
++ movzx eax, byte ptr [esi]
++ add esi, ebx
++ paddsw mm0, [kCoefficientsRgbV + 8 * eax]
++ movzx eax, byte ptr [edx]
++ mov ebx, [esp + 32 + 24] // ystep
++ add edx, ebx
++ movq mm1, [kCoefficientsRgbY + 8 * eax]
++ movzx eax, byte ptr [edx]
++ add edx, ebx
++ movq mm2, [kCoefficientsRgbY + 8 * eax]
++ paddsw mm1, mm0
++ paddsw mm2, mm0
++ psraw mm1, 6
++ psraw mm2, 6
++ packuswb mm1, mm2
++ movntq [ebp], mm1
++ add ebp, 8
++ wend :
++ sub ecx, 2
++ jns wloop
++
++ and ecx, 1 // odd number of pixels?
++ jz wdone
++
++ movzx eax, byte ptr [edi]
++ movq mm0, [kCoefficientsRgbU + 8 * eax]
++ movzx eax, byte ptr [esi]
++ paddsw mm0, [kCoefficientsRgbV + 8 * eax]
++ movzx eax, byte ptr [edx]
++ movq mm1, [kCoefficientsRgbY + 8 * eax]
++ paddsw mm1, mm0
++ psraw mm1, 6
++ packuswb mm1, mm1
++ movd [ebp], mm1
++ wdone :
++
++ popad
++ ret
++ }
++}
++
++__declspec(naked)
++void DoubleYUVToRGB32Row(const uint8* y_buf,
++ const uint8* u_buf,
++ const uint8* v_buf,
++ uint8* rgb_buf,
++ int width) {
++ __asm {
++ pushad
++ mov edx, [esp + 32 + 4] // Y
++ mov edi, [esp + 32 + 8] // U
++ mov esi, [esp + 32 + 12] // V
++ mov ebp, [esp + 32 + 16] // rgb
++ mov ecx, [esp + 32 + 20] // width
++ jmp wend
++
++ wloop :
++ movzx eax, byte ptr [edi]
++ add edi, 1
++ movzx ebx, byte ptr [esi]
++ add esi, 1
++ movq mm0, [kCoefficientsRgbU + 8 * eax]
++ movzx eax, byte ptr [edx]
++ paddsw mm0, [kCoefficientsRgbV + 8 * ebx]
++ movq mm1, [kCoefficientsRgbY + 8 * eax]
++ paddsw mm1, mm0
++ psraw mm1, 6
++ packuswb mm1, mm1
++ punpckldq mm1, mm1
++ movntq [ebp], mm1
++
++ movzx ebx, byte ptr [edx + 1]
++ add edx, 2
++ paddsw mm0, [kCoefficientsRgbY + 8 * ebx]
++ psraw mm0, 6
++ packuswb mm0, mm0
++ punpckldq mm0, mm0
++ movntq [ebp+8], mm0
++ add ebp, 16
++ wend :
++ sub ecx, 4
++ jns wloop
++
++ add ecx, 4
++ jz wdone
++
++ movzx eax, byte ptr [edi]
++ movq mm0, [kCoefficientsRgbU + 8 * eax]
++ movzx eax, byte ptr [esi]
++ paddsw mm0, [kCoefficientsRgbV + 8 * eax]
++ movzx eax, byte ptr [edx]
++ movq mm1, [kCoefficientsRgbY + 8 * eax]
++ paddsw mm1, mm0
++ psraw mm1, 6
++ packuswb mm1, mm1
++ jmp wend1
++
++ wloop1 :
++ movd [ebp], mm1
++ add ebp, 4
++ wend1 :
++ sub ecx, 1
++ jns wloop1
++ wdone :
++ popad
++ ret
++ }
++}
++
++// This version does general purpose scaling by any amount, up or down.
++// The only thing it can not do it rotation by 90 or 270.
++// For performance the chroma is under sampled, reducing cost of a 3x
++// 1080p scale from 8.4 ms to 5.4 ms.
++__declspec(naked)
++void ScaleYUVToRGB32Row(const uint8* y_buf,
++ const uint8* u_buf,
++ const uint8* v_buf,
++ uint8* rgb_buf,
++ int width,
++ int dx) {
++ __asm {
++ pushad
++ mov edx, [esp + 32 + 4] // Y
++ mov edi, [esp + 32 + 8] // U
++ mov esi, [esp + 32 + 12] // V
++ mov ebp, [esp + 32 + 16] // rgb
++ mov ecx, [esp + 32 + 20] // width
++ xor ebx, ebx // x
++ jmp scaleend
++
++ scaleloop :
++ mov eax, ebx
++ sar eax, 5
++ movzx eax, byte ptr [edi + eax]
++ movq mm0, [kCoefficientsRgbU + 8 * eax]
++ mov eax, ebx
++ sar eax, 5
++ movzx eax, byte ptr [esi + eax]
++ paddsw mm0, [kCoefficientsRgbV + 8 * eax]
++ mov eax, ebx
++ add ebx, [esp + 32 + 24] // x += dx
++ sar eax, 4
++ movzx eax, byte ptr [edx + eax]
++ movq mm1, [kCoefficientsRgbY + 8 * eax]
++ mov eax, ebx
++ add ebx, [esp + 32 + 24] // x += dx
++ sar eax, 4
++ movzx eax, byte ptr [edx + eax]
++ movq mm2, [kCoefficientsRgbY + 8 * eax]
++ paddsw mm1, mm0
++ paddsw mm2, mm0
++ psraw mm1, 6
++ psraw mm2, 6
++ packuswb mm1, mm2
++ movntq [ebp], mm1
++ add ebp, 8
++ scaleend :
++ sub ecx, 2
++ jns scaleloop
++
++ and ecx, 1 // odd number of pixels?
++ jz scaledone
++
++ mov eax, ebx
++ sar eax, 5
++ movzx eax, byte ptr [edi + eax]
++ movq mm0, [kCoefficientsRgbU + 8 * eax]
++ mov eax, ebx
++ sar eax, 5
++ movzx eax, byte ptr [esi + eax]
++ paddsw mm0, [kCoefficientsRgbV + 8 * eax]
++ mov eax, ebx
++ sar eax, 4
++ movzx eax, byte ptr [edx + eax]
++ movq mm1, [kCoefficientsRgbY + 8 * eax]
++ paddsw mm1, mm0
++ psraw mm1, 6
++ packuswb mm1, mm1
++ movd [ebp], mm1
++
++ scaledone :
++ popad
++ ret
++ }
++}
++
+ #endif // ARCH_CPU_64_BITS
+ } // extern "C"
+
--- a/gfx/ycbcr/update.sh
+++ b/gfx/ycbcr/update.sh
@@ -10,8 +10,9 @@ patch -p3 <convert.patch
patch -p3 <picture_region.patch
patch -p3 <remove_scale.patch
patch -p3 <export.patch
patch -p3 <win64_mac64.patch
patch -p3 <yv24.patch
patch -p3 <row_c_fix.patch
patch -p3 <bug572034_mac_64bit.patch
patch -p3 <bug577645_movntq.patch
+patch -p3 <add_scale.patch
--- a/gfx/ycbcr/yuv_convert.cpp
+++ b/gfx/ycbcr/yuv_convert.cpp
@@ -84,10 +84,139 @@ NS_GFX_(void) ConvertYCbCrToRGB32(const
#ifdef ARCH_CPU_X86_FAMILY
// SSE used for FastConvertYUVToRGB32Row requires emms instruction.
if (has_sse)
EMMS();
#endif
}
+// Scale a frame of YUV to 32 bit ARGB.
+void ScaleYCbCrToRGB32(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ uint8* rgb_buf,
+ int width,
+ int height,
+ int scaled_width,
+ int scaled_height,
+ int y_pitch,
+ int uv_pitch,
+ int rgb_pitch,
+ YUVType yuv_type,
+ Rotate view_rotate) {
+ unsigned int y_shift = yuv_type == YV12 ? 1 : 0;
+ unsigned int x_shift = yuv_type == YV24 ? 0 : 1;
+ bool has_mmx = supports_mmx();
+ // Diagram showing origin and direction of source sampling.
+ // ->0 4<-
+ // 7 3
+ //
+ // 6 5
+ // ->1 2<-
+ // Rotations that start at right side of image.
+ if ((view_rotate == ROTATE_180) ||
+ (view_rotate == ROTATE_270) ||
+ (view_rotate == MIRROR_ROTATE_0) ||
+ (view_rotate == MIRROR_ROTATE_90)) {
+ y_buf += width - 1;
+ u_buf += width / 2 - 1;
+ v_buf += width / 2 - 1;
+ width = -width;
+ }
+ // Rotations that start at bottom of image.
+ if ((view_rotate == ROTATE_90) ||
+ (view_rotate == ROTATE_180) ||
+ (view_rotate == MIRROR_ROTATE_90) ||
+ (view_rotate == MIRROR_ROTATE_180)) {
+ y_buf += (height - 1) * y_pitch;
+ u_buf += ((height >> y_shift) - 1) * uv_pitch;
+ v_buf += ((height >> y_shift) - 1) * uv_pitch;
+ height = -height;
+ }
+
+ // Handle zero sized destination.
+ if (scaled_width == 0 || scaled_height == 0)
+ return;
+ int scaled_dx = width * 16 / scaled_width;
+ int scaled_dy = height * 16 / scaled_height;
+
+ int scaled_dx_uv = scaled_dx;
+
+ if ((view_rotate == ROTATE_90) ||
+ (view_rotate == ROTATE_270)) {
+ int tmp = scaled_height;
+ scaled_height = scaled_width;
+ scaled_width = tmp;
+ tmp = height;
+ height = width;
+ width = tmp;
+ int original_dx = scaled_dx;
+ int original_dy = scaled_dy;
+ scaled_dx = ((original_dy >> 4) * y_pitch) << 4;
+ scaled_dx_uv = ((original_dy >> 4) * uv_pitch) << 4;
+ scaled_dy = original_dx;
+ if (view_rotate == ROTATE_90) {
+ y_pitch = -1;
+ uv_pitch = -1;
+ height = -height;
+ } else {
+ y_pitch = 1;
+ uv_pitch = 1;
+ }
+ }
+
+ for (int y = 0; y < scaled_height; ++y) {
+ uint8* dest_pixel = rgb_buf + y * rgb_pitch;
+ int scaled_y = (y * height / scaled_height);
+ const uint8* y_ptr = y_buf + scaled_y * y_pitch;
+ const uint8* u_ptr = u_buf + (scaled_y >> y_shift) * uv_pitch;
+ const uint8* v_ptr = v_buf + (scaled_y >> y_shift) * uv_pitch;
+
+#if defined(_MSC_VER)
+ if (scaled_width == (width * 2)) {
+ DoubleYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
+ dest_pixel, scaled_width);
+ } else if ((scaled_dx & 15) == 0) { // Scaling by integer scale factor.
+ if (scaled_dx_uv == scaled_dx) { // Not rotated.
+ if (scaled_dx == 16) { // Not scaled
+ if (has_mmx)
+ FastConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
+ dest_pixel, scaled_width);
+ else
+ FastConvertYUVToRGB32Row_C(y_ptr, u_ptr, v_ptr,
+ dest_pixel, scaled_width, x_shift);
+ } else { // Simple scale down. ie half
+ ConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
+ dest_pixel, scaled_width, scaled_dx >> 4);
+ }
+ } else {
+ RotateConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
+ dest_pixel, scaled_width,
+ scaled_dx >> 4, scaled_dx_uv >> 4);
+ }
+#else
+ if (scaled_dx == 16) { // Not scaled
+ if (has_mmx)
+ FastConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
+ dest_pixel, scaled_width);
+ else
+ FastConvertYUVToRGB32Row_C(y_ptr, u_ptr, v_ptr,
+ dest_pixel, scaled_width, x_shift);
+#endif
+ } else {
+ if (has_mmx)
+ ScaleYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
+ dest_pixel, scaled_width, scaled_dx);
+ else
+ ScaleYUVToRGB32Row_C(y_ptr, u_ptr, v_ptr,
+ dest_pixel, scaled_width, scaled_dx, x_shift);
+
+ }
+ }
+
+ // MMX used for FastConvertYUVToRGB32Row requires emms instruction.
+ if (has_mmx)
+ EMMS();
+}
+
} // namespace gfx
} // namespace mozilla
--- a/gfx/ycbcr/yuv_convert.h
+++ b/gfx/ycbcr/yuv_convert.h
@@ -15,27 +15,56 @@ namespace gfx {
// Type of YUV surface.
// The value of these enums matter as they are used to shift vertical indices.
enum YUVType {
YV12 = 0, // YV12 is half width and half height chroma channels.
YV16 = 1, // YV16 is half width and full height chroma channels.
YV24 = 2 // YV24 is full width and full height chroma channels.
};
+// Mirror means flip the image horizontally, as in looking in a mirror.
+// Rotate happens after mirroring.
+enum Rotate {
+ ROTATE_0, // Rotation off.
+ ROTATE_90, // Rotate clockwise.
+ ROTATE_180, // Rotate upside down.
+ ROTATE_270, // Rotate counter clockwise.
+ MIRROR_ROTATE_0, // Mirror horizontally.
+ MIRROR_ROTATE_90, // Mirror then Rotate clockwise.
+ MIRROR_ROTATE_180, // Mirror vertically.
+ MIRROR_ROTATE_270 // Transpose.
+};
+
// Convert a frame of YUV to 32 bit ARGB.
// Pass in YV16/YV12 depending on source format
NS_GFX_(void) ConvertYCbCrToRGB32(const uint8* yplane,
const uint8* uplane,
const uint8* vplane,
uint8* rgbframe,
int pic_x,
int pic_y,
int pic_width,
int pic_height,
int ystride,
int uvstride,
int rgbstride,
YUVType yuv_type);
+// Scale a frame of YUV to 32 bit ARGB.
+// Supports rotation and mirroring.
+void ScaleYCbCrToRGB32(const uint8* yplane,
+ const uint8* uplane,
+ const uint8* vplane,
+ uint8* rgbframe,
+ int frame_width,
+ int frame_height,
+ int scaled_width,
+ int scaled_height,
+ int ystride,
+ int uvstride,
+ int rgbstride,
+ YUVType yuv_type,
+ Rotate view_rotate);
+
} // namespace gfx
} // namespace mozilla
#endif // MEDIA_BASE_YUV_CONVERT_H_
--- a/gfx/ycbcr/yuv_row.h
+++ b/gfx/ycbcr/yuv_row.h
@@ -24,16 +24,64 @@ void FastConvertYUVToRGB32Row(const uint
void FastConvertYUVToRGB32Row_C(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width,
unsigned int x_shift);
+// Can do 1x, half size or any scale down by an integer amount.
+// Step can be negative (mirroring, rotate 180).
+// This is the third fastest of the scalers.
+void ConvertYUVToRGB32Row(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ uint8* rgb_buf,
+ int width,
+ int step);
+
+// Rotate is like Convert, but applies different step to Y versus U and V.
+// This allows rotation by 90 or 270, by stepping by stride.
+// This is the forth fastest of the scalers.
+void RotateConvertYUVToRGB32Row(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ uint8* rgb_buf,
+ int width,
+ int ystep,
+ int uvstep);
+
+// Doubler does 4 pixels at a time. Each pixel is replicated.
+// This is the fastest of the scalers.
+void DoubleYUVToRGB32Row(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ uint8* rgb_buf,
+ int width);
+
+// Handles arbitrary scaling up or down.
+// Mirroring is supported, but not 90 or 270 degree rotation.
+// Chroma is under sampled every 2 pixels for performance.
+// This is the slowest of the scalers.
+void ScaleYUVToRGB32Row(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ uint8* rgb_buf,
+ int width,
+ int scaled_dx);
+
+void ScaleYUVToRGB32Row_C(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ uint8* rgb_buf,
+ int width,
+ int scaled_dx,
+ unsigned int x_shift);
+
} // extern "C"
// x64 uses MMX2 (SSE) so emms is not required.
#if defined(ARCH_CPU_X86)
#if defined(_MSC_VER)
#define EMMS() __asm emms
#else
#define EMMS() asm("emms")
--- a/gfx/ycbcr/yuv_row_c.cpp
+++ b/gfx/ycbcr/yuv_row_c.cpp
@@ -172,10 +172,31 @@ void FastConvertYUVToRGB32Row_C(const ui
v = v_buf[x + 1];
}
YuvPixel(y1, u, v, rgb_buf + 4);
}
rgb_buf += 8; // Advance 2 pixels.
}
}
+// 28.4 fixed point is used. A shift by 4 isolates the integer.
+// A shift by 5 is used to further subsample the chrominence channels.
+// & 15 isolates the fixed point fraction. >> 2 to get the upper 2 bits,
+// for 1/4 pixel accurate interpolation.
+void ScaleYUVToRGB32Row_C(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ uint8* rgb_buf,
+ int width,
+ int scaled_dx,
+ unsigned int x_shift) {
+ int scaled_x = 0;
+ for (int x = 0; x < width; ++x) {
+ uint8 u = u_buf[scaled_x >> (4 + x_shift)];
+ uint8 v = v_buf[scaled_x >> (4 + x_shift)];
+ uint8 y0 = y_buf[scaled_x >> 4];
+ YuvPixel(y0, u, v, rgb_buf);
+ rgb_buf += 4;
+ scaled_x += scaled_dx;
+ }
+}
} // extern "C"
--- a/gfx/ycbcr/yuv_row_linux.cpp
+++ b/gfx/ycbcr/yuv_row_linux.cpp
@@ -16,16 +16,24 @@ extern "C" {
void FastConvertYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) {
FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, 1);
}
+void ScaleYUVToRGB32Row(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ uint8* rgb_buf,
+ int width,
+ int scaled_dx) {
+ ScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, scaled_dx, 1);
+}
#else
#define RGBY(i) { \
static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
0 \
}
@@ -365,16 +373,86 @@ void FastConvertYUVToRGB32Row(const uint
"r"(u_buf), // %1
"r"(v_buf), // %2
"r"(rgb_buf), // %3
"r"(width), // %4
"r" (kCoefficientsRgbY) // %5
: "memory", "r10", "r11", "xmm0", "xmm1", "xmm2", "xmm3"
);
}
+
+void ScaleYUVToRGB32Row(const uint8* y_buf, // rdi
+ const uint8* u_buf, // rsi
+ const uint8* v_buf, // rdx
+ uint8* rgb_buf, // rcx
+ int width, // r8
+ int scaled_dx) { // r9
+ asm(
+ "xor %%r11,%%r11\n"
+ "sub $0x2,%4\n"
+ "js scalenext\n"
+
+"scaleloop:"
+ "mov %%r11,%%r10\n"
+ "sar $0x5,%%r10\n"
+ "movzb (%1,%%r10,1),%%rax\n"
+ "movq 2048(%5,%%rax,8),%%xmm0\n"
+ "movzb (%2,%%r10,1),%%rax\n"
+ "movq 4096(%5,%%rax,8),%%xmm1\n"
+ "lea (%%r11,%6),%%r10\n"
+ "sar $0x4,%%r11\n"
+ "movzb (%0,%%r11,1),%%rax\n"
+ "paddsw %%xmm1,%%xmm0\n"
+ "movq (%5,%%rax,8),%%xmm1\n"
+ "lea (%%r10,%6),%%r11\n"
+ "sar $0x4,%%r10\n"
+ "movzb (%0,%%r10,1),%%rax\n"
+ "movq (%5,%%rax,8),%%xmm2\n"
+ "paddsw %%xmm0,%%xmm1\n"
+ "paddsw %%xmm0,%%xmm2\n"
+ "shufps $0x44,%%xmm2,%%xmm1\n"
+ "psraw $0x6,%%xmm1\n"
+ "packuswb %%xmm1,%%xmm1\n"
+ "movq %%xmm1,0x0(%3)\n"
+ "add $0x8,%3\n"
+ "sub $0x2,%4\n"
+ "jns scaleloop\n"
+
+"scalenext:"
+ "add $0x1,%4\n"
+ "js scaledone\n"
+
+ "mov %%r11,%%r10\n"
+ "sar $0x5,%%r10\n"
+ "movzb (%1,%%r10,1),%%rax\n"
+ "movq 2048(%5,%%rax,8),%%xmm0\n"
+ "movzb (%2,%%r10,1),%%rax\n"
+ "movq 4096(%5,%%rax,8),%%xmm1\n"
+ "paddsw %%xmm1,%%xmm0\n"
+ "sar $0x4,%%r11\n"
+ "movzb (%0,%%r11,1),%%rax\n"
+ "movq (%5,%%rax,8),%%xmm1\n"
+ "paddsw %%xmm0,%%xmm1\n"
+ "psraw $0x6,%%xmm1\n"
+ "packuswb %%xmm1,%%xmm1\n"
+ "movd %%xmm1,0x0(%3)\n"
+
+"scaledone:"
+ :
+ : "r"(y_buf), // %0
+ "r"(u_buf), // %1
+ "r"(v_buf), // %2
+ "r"(rgb_buf), // %3
+ "r"(width), // %4
+ "r" (kCoefficientsRgbY), // %5
+ "r"(static_cast<long>(scaled_dx)) // %6
+ : "memory", "r10", "r11", "rax", "xmm0", "xmm1", "xmm2"
+);
+}
+
#endif // __SUNPRO_CC
#else // ARCH_CPU_X86_64
#ifdef __SUNPRO_CC
void FastConvertYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
@@ -493,13 +571,87 @@ void FastConvertYUVToRGB32Row(const uint
"packuswb %mm1,%mm1\n"
"movd %mm1,0x0(%ebp)\n"
"2:"
"popa\n"
"ret\n"
".previous\n"
);
+void ScaleYUVToRGB32Row(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ uint8* rgb_buf,
+ int width,
+ int scaled_dx);
+
+ asm(
+ ".global ScaleYUVToRGB32Row\n"
+"ScaleYUVToRGB32Row:\n"
+ "pusha\n"
+ "mov 0x24(%esp),%edx\n"
+ "mov 0x28(%esp),%edi\n"
+ "mov 0x2c(%esp),%esi\n"
+ "mov 0x30(%esp),%ebp\n"
+ "mov 0x34(%esp),%ecx\n"
+ "xor %ebx,%ebx\n"
+ "jmp scaleend\n"
+
+"scaleloop:"
+ "mov %ebx,%eax\n"
+ "sar $0x5,%eax\n"
+ "movzbl (%edi,%eax,1),%eax\n"
+ "movq kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
+ "mov %ebx,%eax\n"
+ "sar $0x5,%eax\n"
+ "movzbl (%esi,%eax,1),%eax\n"
+ "paddsw kCoefficientsRgbY+4096(,%eax,8),%mm0\n"
+ "mov %ebx,%eax\n"
+ "add 0x38(%esp),%ebx\n"
+ "sar $0x4,%eax\n"
+ "movzbl (%edx,%eax,1),%eax\n"
+ "movq kCoefficientsRgbY(,%eax,8),%mm1\n"
+ "mov %ebx,%eax\n"
+ "add 0x38(%esp),%ebx\n"
+ "sar $0x4,%eax\n"
+ "movzbl (%edx,%eax,1),%eax\n"
+ "movq kCoefficientsRgbY(,%eax,8),%mm2\n"
+ "paddsw %mm0,%mm1\n"
+ "paddsw %mm0,%mm2\n"
+ "psraw $0x6,%mm1\n"
+ "psraw $0x6,%mm2\n"
+ "packuswb %mm2,%mm1\n"
+ "movntq %mm1,0x0(%ebp)\n"
+ "add $0x8,%ebp\n"
+"scaleend:"
+ "sub $0x2,%ecx\n"
+ "jns scaleloop\n"
+
+ "and $0x1,%ecx\n"
+ "je scaledone\n"
+
+ "mov %ebx,%eax\n"
+ "sar $0x5,%eax\n"
+ "movzbl (%edi,%eax,1),%eax\n"
+ "movq kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
+ "mov %ebx,%eax\n"
+ "sar $0x5,%eax\n"
+ "movzbl (%esi,%eax,1),%eax\n"
+ "paddsw kCoefficientsRgbY+4096(,%eax,8),%mm0\n"
+ "mov %ebx,%eax\n"
+ "sar $0x4,%eax\n"
+ "movzbl (%edx,%eax,1),%eax\n"
+ "movq kCoefficientsRgbY(,%eax,8),%mm1\n"
+ "paddsw %mm0,%mm1\n"
+ "psraw $0x6,%mm1\n"
+ "packuswb %mm1,%mm1\n"
+ "movd %mm1,0x0(%ebp)\n"
+
+"scaledone:"
+ "popa\n"
+ "ret\n"
+);
+
#endif // __SUNPRO_CC
#endif // ARCH_CPU_X86_64
#endif // !ARCH_CPU_X86_FAMILY
} // extern "C"
--- a/gfx/ycbcr/yuv_row_mac.cpp
+++ b/gfx/ycbcr/yuv_row_mac.cpp
@@ -16,16 +16,24 @@ extern "C" {
void FastConvertYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) {
FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, 1);
}
+void ScaleYUVToRGB32Row(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ uint8* rgb_buf,
+ int width,
+ int scaled_dx) {
+ ScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, scaled_dx, 1);
+}
#else
#define RGBY(i) { \
static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
0 \
}
@@ -313,11 +321,96 @@ void FastConvertYUVToRGB32Row(const uint
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) {
MacConvertYUVToRGB32Row(y_buf, u_buf, v_buf, rgb_buf, width,
&kCoefficientsRgbY[0][0]);
}
+extern void MacScaleYUVToRGB32Row(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ uint8* rgb_buf,
+ int width,
+ int scaled_dx,
+ int16 *kCoefficientsRgbY);
+
+ __asm__(
+"_MacScaleYUVToRGB32Row:\n"
+ "pusha\n"
+ "mov 0x24(%esp),%edx\n"
+ "mov 0x28(%esp),%edi\n"
+ "mov 0x2c(%esp),%esi\n"
+ "mov 0x30(%esp),%ebp\n"
+ "mov 0x3c(%esp),%ecx\n"
+ "xor %ebx,%ebx\n"
+ "jmp Lscaleend\n"
+
+"Lscaleloop:"
+ "mov %ebx,%eax\n"
+ "sar $0x5,%eax\n"
+ "movzbl (%edi,%eax,1),%eax\n"
+ "movq 2048(%ecx,%eax,8),%mm0\n"
+ "mov %ebx,%eax\n"
+ "sar $0x5,%eax\n"
+ "movzbl (%esi,%eax,1),%eax\n"
+ "paddsw 4096(%ecx,%eax,8),%mm0\n"
+ "mov %ebx,%eax\n"
+ "add 0x38(%esp),%ebx\n"
+ "sar $0x4,%eax\n"
+ "movzbl (%edx,%eax,1),%eax\n"
+ "movq 0(%ecx,%eax,8),%mm1\n"
+ "mov %ebx,%eax\n"
+ "add 0x38(%esp),%ebx\n"
+ "sar $0x4,%eax\n"
+ "movzbl (%edx,%eax,1),%eax\n"
+ "movq 0(%ecx,%eax,8),%mm2\n"
+ "paddsw %mm0,%mm1\n"
+ "paddsw %mm0,%mm2\n"
+ "psraw $0x6,%mm1\n"
+ "psraw $0x6,%mm2\n"
+ "packuswb %mm2,%mm1\n"
+ "movntq %mm1,0x0(%ebp)\n"
+ "add $0x8,%ebp\n"
+"Lscaleend:"
+ "sub $0x2,0x34(%esp)\n"
+ "jns Lscaleloop\n"
+
+ "and $0x1,0x34(%esp)\n"
+ "je Lscaledone\n"
+
+ "mov %ebx,%eax\n"
+ "sar $0x5,%eax\n"
+ "movzbl (%edi,%eax,1),%eax\n"
+ "movq 2048(%ecx,%eax,8),%mm0\n"
+ "mov %ebx,%eax\n"
+ "sar $0x5,%eax\n"
+ "movzbl (%esi,%eax,1),%eax\n"
+ "paddsw 4096(%ecx,%eax,8),%mm0\n"
+ "mov %ebx,%eax\n"
+ "sar $0x4,%eax\n"
+ "movzbl (%edx,%eax,1),%eax\n"
+ "movq 0(%ecx,%eax,8),%mm1\n"
+ "paddsw %mm0,%mm1\n"
+ "psraw $0x6,%mm1\n"
+ "packuswb %mm1,%mm1\n"
+ "movd %mm1,0x0(%ebp)\n"
+
+"Lscaledone:"
+ "popa\n"
+ "ret\n"
+);
+
+void ScaleYUVToRGB32Row(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ uint8* rgb_buf,
+ int width,
+ int scaled_dx) {
+
+ MacScaleYUVToRGB32Row(y_buf, u_buf, v_buf, rgb_buf, width, scaled_dx,
+ &kCoefficientsRgbY[0][0]);
+}
+
#endif // ARCH_CPU_PPC || ARCH_CPU_64_BITS
} // extern "C"
--- a/gfx/ycbcr/yuv_row_win.cpp
+++ b/gfx/ycbcr/yuv_row_win.cpp
@@ -11,17 +11,26 @@ extern "C" {
// PPC implementation uses C fallback
void FastConvertYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) {
FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, 1);
}
-
+
+void ScaleYUVToRGB32Row(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ uint8* rgb_buf,
+ int width,
+ int scaled_dx) {
+ ScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, scaled_dx, 1);
+}
+
#else
#define RGBY(i) { \
static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
0 \
@@ -307,11 +316,280 @@ void FastConvertYUVToRGB32Row(const uint
movd [ebp], mm1
convertdone :
popad
ret
}
}
+__declspec(naked)
+void ConvertYUVToRGB32Row(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ uint8* rgb_buf,
+ int width,
+ int step) {
+ __asm {
+ pushad
+ mov edx, [esp + 32 + 4] // Y
+ mov edi, [esp + 32 + 8] // U
+ mov esi, [esp + 32 + 12] // V
+ mov ebp, [esp + 32 + 16] // rgb
+ mov ecx, [esp + 32 + 20] // width
+ mov ebx, [esp + 32 + 24] // step
+ jmp wend
+
+ wloop :
+ movzx eax, byte ptr [edi]
+ add edi, ebx
+ movq mm0, [kCoefficientsRgbU + 8 * eax]
+ movzx eax, byte ptr [esi]
+ add esi, ebx
+ paddsw mm0, [kCoefficientsRgbV + 8 * eax]
+ movzx eax, byte ptr [edx]
+ add edx, ebx
+ movq mm1, [kCoefficientsRgbY + 8 * eax]
+ movzx eax, byte ptr [edx]
+ add edx, ebx
+ movq mm2, [kCoefficientsRgbY + 8 * eax]
+ paddsw mm1, mm0
+ paddsw mm2, mm0
+ psraw mm1, 6
+ psraw mm2, 6
+ packuswb mm1, mm2
+ movntq [ebp], mm1
+ add ebp, 8
+ wend :
+ sub ecx, 2
+ jns wloop
+
+ and ecx, 1 // odd number of pixels?
+ jz wdone
+
+ movzx eax, byte ptr [edi]
+ movq mm0, [kCoefficientsRgbU + 8 * eax]
+ movzx eax, byte ptr [esi]
+ paddsw mm0, [kCoefficientsRgbV + 8 * eax]
+ movzx eax, byte ptr [edx]
+ movq mm1, [kCoefficientsRgbY + 8 * eax]
+ paddsw mm1, mm0
+ psraw mm1, 6
+ packuswb mm1, mm1
+ movd [ebp], mm1
+ wdone :
+
+ popad
+ ret
+ }
+}
+
+__declspec(naked)
+void RotateConvertYUVToRGB32Row(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ uint8* rgb_buf,
+ int width,
+ int ystep,
+ int uvstep) {
+ __asm {
+ pushad
+ mov edx, [esp + 32 + 4] // Y
+ mov edi, [esp + 32 + 8] // U
+ mov esi, [esp + 32 + 12] // V
+ mov ebp, [esp + 32 + 16] // rgb
+ mov ecx, [esp + 32 + 20] // width
+ jmp wend
+
+ wloop :
+ movzx eax, byte ptr [edi]
+ mov ebx, [esp + 32 + 28] // uvstep
+ add edi, ebx
+ movq mm0, [kCoefficientsRgbU + 8 * eax]
+ movzx eax, byte ptr [esi]
+ add esi, ebx
+ paddsw mm0, [kCoefficientsRgbV + 8 * eax]
+ movzx eax, byte ptr [edx]
+ mov ebx, [esp + 32 + 24] // ystep
+ add edx, ebx
+ movq mm1, [kCoefficientsRgbY + 8 * eax]
+ movzx eax, byte ptr [edx]
+ add edx, ebx
+ movq mm2, [kCoefficientsRgbY + 8 * eax]
+ paddsw mm1, mm0
+ paddsw mm2, mm0
+ psraw mm1, 6
+ psraw mm2, 6
+ packuswb mm1, mm2
+ movntq [ebp], mm1
+ add ebp, 8
+ wend :
+ sub ecx, 2
+ jns wloop
+
+ and ecx, 1 // odd number of pixels?
+ jz wdone
+
+ movzx eax, byte ptr [edi]
+ movq mm0, [kCoefficientsRgbU + 8 * eax]
+ movzx eax, byte ptr [esi]
+ paddsw mm0, [kCoefficientsRgbV + 8 * eax]
+ movzx eax, byte ptr [edx]
+ movq mm1, [kCoefficientsRgbY + 8 * eax]
+ paddsw mm1, mm0
+ psraw mm1, 6
+ packuswb mm1, mm1
+ movd [ebp], mm1
+ wdone :
+
+ popad
+ ret
+ }
+}
+
+__declspec(naked)
+void DoubleYUVToRGB32Row(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ uint8* rgb_buf,
+ int width) {
+ __asm {
+ pushad
+ mov edx, [esp + 32 + 4] // Y
+ mov edi, [esp + 32 + 8] // U
+ mov esi, [esp + 32 + 12] // V
+ mov ebp, [esp + 32 + 16] // rgb
+ mov ecx, [esp + 32 + 20] // width
+ jmp wend
+
+ wloop :
+ movzx eax, byte ptr [edi]
+ add edi, 1
+ movzx ebx, byte ptr [esi]
+ add esi, 1
+ movq mm0, [kCoefficientsRgbU + 8 * eax]
+ movzx eax, byte ptr [edx]
+ paddsw mm0, [kCoefficientsRgbV + 8 * ebx]
+ movq mm1, [kCoefficientsRgbY + 8 * eax]
+ paddsw mm1, mm0
+ psraw mm1, 6
+ packuswb mm1, mm1
+ punpckldq mm1, mm1
+ movntq [ebp], mm1
+
+ movzx ebx, byte ptr [edx + 1]
+ add edx, 2
+ paddsw mm0, [kCoefficientsRgbY + 8 * ebx]
+ psraw mm0, 6
+ packuswb mm0, mm0
+ punpckldq mm0, mm0
+ movntq [ebp+8], mm0
+ add ebp, 16
+ wend :
+ sub ecx, 4
+ jns wloop
+
+ add ecx, 4
+ jz wdone
+
+ movzx eax, byte ptr [edi]
+ movq mm0, [kCoefficientsRgbU + 8 * eax]
+ movzx eax, byte ptr [esi]
+ paddsw mm0, [kCoefficientsRgbV + 8 * eax]
+ movzx eax, byte ptr [edx]
+ movq mm1, [kCoefficientsRgbY + 8 * eax]
+ paddsw mm1, mm0
+ psraw mm1, 6
+ packuswb mm1, mm1
+ jmp wend1
+
+ wloop1 :
+ movd [ebp], mm1
+ add ebp, 4
+ wend1 :
+ sub ecx, 1
+ jns wloop1
+ wdone :
+ popad
+ ret
+ }
+}
+
+// This version does general purpose scaling by any amount, up or down.
+// The only thing it can not do it rotation by 90 or 270.
+// For performance the chroma is under sampled, reducing cost of a 3x
+// 1080p scale from 8.4 ms to 5.4 ms.
+__declspec(naked)
+void ScaleYUVToRGB32Row(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ uint8* rgb_buf,
+ int width,
+ int dx) {
+ __asm {
+ pushad
+ mov edx, [esp + 32 + 4] // Y
+ mov edi, [esp + 32 + 8] // U
+ mov esi, [esp + 32 + 12] // V
+ mov ebp, [esp + 32 + 16] // rgb
+ mov ecx, [esp + 32 + 20] // width
+ xor ebx, ebx // x
+ jmp scaleend
+
+ scaleloop :
+ mov eax, ebx
+ sar eax, 5
+ movzx eax, byte ptr [edi + eax]
+ movq mm0, [kCoefficientsRgbU + 8 * eax]
+ mov eax, ebx
+ sar eax, 5
+ movzx eax, byte ptr [esi + eax]
+ paddsw mm0, [kCoefficientsRgbV + 8 * eax]
+ mov eax, ebx
+ add ebx, [esp + 32 + 24] // x += dx
+ sar eax, 4
+ movzx eax, byte ptr [edx + eax]
+ movq mm1, [kCoefficientsRgbY + 8 * eax]
+ mov eax, ebx
+ add ebx, [esp + 32 + 24] // x += dx
+ sar eax, 4
+ movzx eax, byte ptr [edx + eax]
+ movq mm2, [kCoefficientsRgbY + 8 * eax]
+ paddsw mm1, mm0
+ paddsw mm2, mm0
+ psraw mm1, 6
+ psraw mm2, 6
+ packuswb mm1, mm2
+ movntq [ebp], mm1
+ add ebp, 8
+ scaleend :
+ sub ecx, 2
+ jns scaleloop
+
+ and ecx, 1 // odd number of pixels?
+ jz scaledone
+
+ mov eax, ebx
+ sar eax, 5
+ movzx eax, byte ptr [edi + eax]
+ movq mm0, [kCoefficientsRgbU + 8 * eax]
+ mov eax, ebx
+ sar eax, 5
+ movzx eax, byte ptr [esi + eax]
+ paddsw mm0, [kCoefficientsRgbV + 8 * eax]
+ mov eax, ebx
+ sar eax, 4
+ movzx eax, byte ptr [edx + eax]
+ movq mm1, [kCoefficientsRgbY + 8 * eax]
+ paddsw mm1, mm0
+ psraw mm1, 6
+ packuswb mm1, mm1
+ movd [ebp], mm1
+
+ scaledone :
+ popad
+ ret
+ }
+}
+
#endif // ARCH_CPU_64_BITS
} // extern "C"
--- a/layout/generic/nsVideoFrame.cpp
+++ b/layout/generic/nsVideoFrame.cpp
@@ -248,16 +248,20 @@ nsVideoFrame::BuildLayer(nsDisplayListBu
// the largest rectangle that fills our content-box and has the
// correct aspect ratio.
nsPresContext* presContext = PresContext();
gfxRect r = gfxRect(presContext->AppUnitsToGfxUnits(area.x),
presContext->AppUnitsToGfxUnits(area.y),
presContext->AppUnitsToGfxUnits(area.width),
presContext->AppUnitsToGfxUnits(area.height));
r = CorrectForAspectRatio(r, videoSize);
+ r.Round();
+ gfxIntSize scaleHint(static_cast<PRInt32>(r.Width()),
+ static_cast<PRInt32>(r.Height()));
+ container->SetScaleHint(scaleHint);
nsRefPtr<ImageLayer> layer = static_cast<ImageLayer*>
(aBuilder->LayerBuilder()->GetLeafLayerFor(aBuilder, aManager, aItem));
if (!layer) {
layer = aManager->CreateImageLayer();
if (!layer)
return nsnull;
}