Bug 732985 - Part 1: Add image scaling code to Azure. r=jrmuizel
authorBas Schouten <bschouten@mozilla.com>
Tue, 08 May 2012 04:31:30 +0200
changeset 93430 94ffc045ba89b4d4cea8aba91d789c58de41294b
parent 93429 56ffdd5c9388ab76c9c25cef55b91500e658b391
child 93431 f0ca5eb320c41133f51dc51a5e188551c217e80d
push id9146
push userbschouten@mozilla.com
push dateTue, 08 May 2012 02:32:02 +0000
treeherdermozilla-inbound@f0ca5eb320c4 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersjrmuizel
bugs732985
milestone15.0a1
first release with
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
last release without
nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
Bug 732985 - Part 1: Add image scaling code to Azure. r=jrmuizel
gfx/2d/2D.h
gfx/2d/Factory.cpp
gfx/2d/ImageScaling.cpp
gfx/2d/ImageScaling.h
gfx/2d/ImageScalingSSE2.cpp
gfx/2d/Makefile.in
gfx/2d/gfx2d.vcxproj
gfx/2d/unittest/Main.cpp
gfx/2d/unittest/TestPoint.h
gfx/2d/unittest/TestScaling.cpp
gfx/2d/unittest/TestScaling.h
gfx/2d/unittest/unittest.vcxproj
--- a/gfx/2d/2D.h
+++ b/gfx/2d/2D.h
@@ -799,16 +799,18 @@ protected:
   bool mTransformDirty : 1;
 
   SurfaceFormat mFormat;
 };
 
 class GFX2D_API Factory
 {
 public:
+  static bool HasSSE2();
+
   static TemporaryRef<DrawTarget> CreateDrawTargetForCairoSurface(cairo_surface_t* aSurface);
 
   static TemporaryRef<DrawTarget>
     CreateDrawTarget(BackendType aBackend, const IntSize &aSize, SurfaceFormat aFormat);
   
   static TemporaryRef<DrawTarget>
     CreateDrawTargetForData(BackendType aBackend, unsigned char* aData, const IntSize &aSize, int32_t aStride, SurfaceFormat aFormat);
 
--- a/gfx/2d/Factory.cpp
+++ b/gfx/2d/Factory.cpp
@@ -70,26 +70,114 @@
 #include "DrawTargetDual.h"
 
 #include "Logging.h"
 
 #ifdef PR_LOGGING
 PRLogModuleInfo *sGFX2DLog = PR_NewLogModule("gfx2d");
 #endif
 
+// The following code was largely taken from xpcom/glue/SSE.cpp and
+// made a little simpler.
+enum CPUIDRegister { eax = 0, ebx = 1, ecx = 2, edx = 3 };
+
+#ifdef HAVE_CPUID_H
+
+// cpuid.h is available on gcc 4.3 and higher on i386 and x86_64
+#include <cpuid.h>
+
+static bool
+HasCPUIDBit(unsigned int level, CPUIDRegister reg, unsigned int bit)
+{
+  unsigned int regs[4];
+  return __get_cpuid(level, &regs[0], &regs[1], &regs[2], &regs[3]) &&
+         (regs[reg] & bit);
+}
+
+#else
+
+#if defined(_MSC_VER) && _MSC_VER >= 1400 && (defined(_M_IX86) || defined(_M_AMD64))
+// MSVC 2005 or newer on x86-32 or x86-64
+#include <intrin.h>
+#elif defined(__SUNPRO_CC) && (defined(__i386) || defined(__x86_64__))
+
+// Define a function identical to MSVC function.
+#ifdef __i386
+static void
+__cpuid(int CPUInfo[4], int InfoType)
+{
+  asm (
+    "xchg %esi, %ebx\n"
+    "cpuid\n"
+    "movl %eax, (%edi)\n"
+    "movl %ebx, 4(%edi)\n"
+    "movl %ecx, 8(%edi)\n"
+    "movl %edx, 12(%edi)\n"
+    "xchg %esi, %ebx\n"
+    :
+    : "a"(InfoType), // %eax
+      "D"(CPUInfo) // %edi
+    : "%ecx", "%edx", "%esi"
+  );
+}
+#else
+static void
+__cpuid(int CPUInfo[4], int InfoType)
+{
+  asm (
+    "xchg %rsi, %rbx\n"
+    "cpuid\n"
+    "movl %eax, (%rdi)\n"
+    "movl %ebx, 4(%rdi)\n"
+    "movl %ecx, 8(%rdi)\n"
+    "movl %edx, 12(%rdi)\n"
+    "xchg %rsi, %rbx\n"
+    :
+    : "a"(InfoType), // %eax
+      "D"(CPUInfo) // %rdi
+    : "%ecx", "%edx", "%rsi"
+  );
+}
+#endif
+#endif
+
+static bool
+HasCPUIDBit(unsigned int level, CPUIDRegister reg, unsigned int bit)
+{
+  // Check that the level in question is supported.
+  volatile int regs[4];
+  __cpuid((int *)regs, level & 0x80000000u);
+  if (unsigned(regs[0]) < level)
+    return false;
+  __cpuid((int *)regs, level);
+  return !!(unsigned(regs[reg]) & bit);
+}
+
+#endif
+
 namespace mozilla {
 namespace gfx {
 
 // XXX - Need to define an API to set this.
 int sGfxLogLevel = LOG_DEBUG;
 
 #ifdef WIN32
 ID3D10Device1 *Factory::mD3D10Device;
 #endif
 
+bool
+Factory::HasSSE2()
+{
+#ifdef USE_SSE2
+  return HasCPUIDBit(1u, edx, (1u<<26));
+#else
+  return false;
+#endif
+}
+
 TemporaryRef<DrawTarget>
 Factory::CreateDrawTarget(BackendType aBackend, const IntSize &aSize, SurfaceFormat aFormat)
 {
   switch (aBackend) {
 #ifdef WIN32
   case BACKEND_DIRECT2D:
     {
       RefPtr<DrawTargetD2D> newTarget;
new file mode 100644
--- /dev/null
+++ b/gfx/2d/ImageScaling.cpp
@@ -0,0 +1,276 @@
+/* -*- Mode: C++; tab-width: 20; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is Mozilla Corporation code.
+ *
+ * The Initial Developer of the Original Code is Mozilla Foundation.
+ * Portions created by the Initial Developer are Copyright (C) 2011
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Contributor(s):
+ *   Bas Schouten <bschouten@mozilla.com>
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+#include "ImageScaling.h"
+#include "2D.h"
+
+#include <math.h>
+#include <algorithm>
+
+using namespace std;
+
+namespace mozilla {
+namespace gfx {
+
+inline uint32_t Avg2x2(uint32_t a, uint32_t b, uint32_t c, uint32_t d)
+{
+  // Prepare half-adder work
+  uint32_t sum = a ^ b ^ c;
+  uint32_t carry = (a & b) | (a & c) | (b & c);
+
+  // Before shifting, mask lower order bits of each byte to avoid underflow.
+  uint32_t mask = 0xfefefefe;
+
+  // Add d to sum and divide by 2.
+  sum = (((sum ^ d) & mask) >> 1) + (sum & d);
+
+  // Sum is now shifted into place relative to carry, add them together.
+  return (((sum ^ carry) & mask) >> 1) + (sum & carry);
+}
+
+inline uint32_t Avg2(uint32_t a, uint32_t b)
+{
+  // Prepare half-adder work
+  uint32_t sum = a ^ b;
+  uint32_t carry = (a & b);
+
+  // Before shifting, mask lower order bits of each byte to avoid underflow.
+  uint32_t mask = 0xfefefefe;
+
+  // Add d to sum and divide by 2.
+  return ((sum & mask) >> 1) + carry;
+}
+
+void
+ImageHalfScaler::ScaleForSize(const IntSize &aSize)
+{
+  uint32_t horizontalDownscales = 0;
+  uint32_t verticalDownscales = 0;
+
+  IntSize scaleSize = mOrigSize;
+  while ((scaleSize.height / 2) > aSize.height) {
+    verticalDownscales++;
+    scaleSize.height /= 2;
+  }
+
+  while ((scaleSize.width / 2) > aSize.width) {
+    horizontalDownscales++;
+    scaleSize.width /= 2;
+  }
+
+  if (scaleSize == mOrigSize) {
+    return;
+  }
+
+  IntSize internalSurfSize;
+
+  internalSurfSize.width = max(scaleSize.width, mOrigSize.width / 2);
+  internalSurfSize.height = max(scaleSize.height, mOrigSize.height / 2);
+
+  mStride = internalSurfSize.width * 4;
+  if (mStride % 16) {
+    mStride += 16 - (mStride % 16);
+  }
+
+  delete [] mDataStorage;
+  // Allocate 15 bytes extra to make sure we can get 16 byte alignment. We
+  // should add tools for this, see bug 751696.
+  mDataStorage = new uint8_t[internalSurfSize.height * mStride + 15];
+
+  if (uintptr_t(mDataStorage) % 16) {
+    // Our storage does not start at a 16-byte boundary. Make sure mData does!
+    mData = (uint8_t*)(uintptr_t(mDataStorage) +
+      (16 - (uintptr_t(mDataStorage) % 16)));
+  } else {
+    mData = mDataStorage;
+  }
+
+  mSize = scaleSize;
+
+  /* The surface we sample from might not be even sized, if it's not we will
+   * ignore the last row/column. This means we lose some data but it keeps the
+   * code very simple. There's also no perfect answer that provides a better
+   * solution.
+   */
+  IntSize currentSampledSize = mOrigSize;
+  uint32_t currentSampledStride = mOrigStride;
+  uint8_t *currentSampledData = mOrigData;
+  
+  while (verticalDownscales && horizontalDownscales) {
+    if (currentSampledSize.width % 2) {
+      currentSampledSize.width -= 1;
+    }
+    if (currentSampledSize.height % 2) {
+      currentSampledSize.height -= 1;
+    }
+
+    HalfImage2D(currentSampledData, currentSampledStride, currentSampledSize,
+                mData, mStride);
+
+    verticalDownscales--;
+    horizontalDownscales--;
+    currentSampledSize.width /= 2;
+    currentSampledSize.height /= 2;
+    currentSampledData = mData;
+    currentSampledStride = mStride;
+  }
+
+  while (verticalDownscales) {
+    if (currentSampledSize.height % 2) {
+      currentSampledSize.height -= 1;
+    }
+
+    HalfImageVertical(currentSampledData, currentSampledStride, currentSampledSize,
+                      mData, mStride);
+
+    verticalDownscales--;
+    currentSampledSize.height /= 2;
+    currentSampledData = mData;
+    currentSampledStride = mStride;
+  }
+
+
+  while (horizontalDownscales) {
+    if (currentSampledSize.width % 2) {
+      currentSampledSize.width -= 1;
+    }
+
+    HalfImageHorizontal(currentSampledData, currentSampledStride, currentSampledSize,
+                        mData, mStride);
+
+    horizontalDownscales--;
+    currentSampledSize.width /= 2;
+    currentSampledData = mData;
+    currentSampledStride = mStride;
+  }
+}
+
+void
+ImageHalfScaler::HalfImage2D(uint8_t *aSource, int32_t aSourceStride,
+                             const IntSize &aSourceSize, uint8_t *aDest,
+                             uint32_t aDestStride)
+{
+#ifdef USE_SSE2
+  if (Factory::HasSSE2()) {
+    HalfImage2D_SSE2(aSource, aSourceStride, aSourceSize, aDest, aDestStride);
+  } else
+#endif
+  {
+    HalfImage2D_C(aSource, aSourceStride, aSourceSize, aDest, aDestStride);
+  }
+}
+
+void
+ImageHalfScaler::HalfImageVertical(uint8_t *aSource, int32_t aSourceStride,
+                                   const IntSize &aSourceSize, uint8_t *aDest,
+                                   uint32_t aDestStride)
+{
+#ifdef USE_SSE2
+  if (Factory::HasSSE2()) {
+    HalfImageVertical_SSE2(aSource, aSourceStride, aSourceSize, aDest, aDestStride);
+  } else
+#endif
+  {
+    HalfImageVertical_C(aSource, aSourceStride, aSourceSize, aDest, aDestStride);
+  }
+}
+
+void
+ImageHalfScaler::HalfImageHorizontal(uint8_t *aSource, int32_t aSourceStride,
+                                     const IntSize &aSourceSize, uint8_t *aDest,
+                                     uint32_t aDestStride)
+{
+#ifdef USE_SSE2
+  if (Factory::HasSSE2()) {
+    HalfImageHorizontal_SSE2(aSource, aSourceStride, aSourceSize, aDest, aDestStride);
+  } else
+#endif
+  {
+    HalfImageHorizontal_C(aSource, aSourceStride, aSourceSize, aDest, aDestStride);
+  }
+}
+
+void
+ImageHalfScaler::HalfImage2D_C(uint8_t *aSource, int32_t aSourceStride,
+                               const IntSize &aSourceSize, uint8_t *aDest,
+                               uint32_t aDestStride)
+{
+  for (int y = 0; y < aSourceSize.height; y += 2) {
+    uint32_t *storage = (uint32_t*)(aDest + (y / 2) * aDestStride);
+    for (int x = 0; x < aSourceSize.width; x += 2) {
+      uint8_t *upperRow = aSource + (y * aSourceStride + x * 4);
+      uint8_t *lowerRow = aSource + ((y + 1) * aSourceStride + x * 4);
+
+      *storage++ = Avg2x2(*(uint32_t*)upperRow, *((uint32_t*)upperRow + 1),
+                          *(uint32_t*)lowerRow, *((uint32_t*)lowerRow + 1));
+    }
+  }
+}
+
+void
+ImageHalfScaler::HalfImageVertical_C(uint8_t *aSource, int32_t aSourceStride,
+                                     const IntSize &aSourceSize, uint8_t *aDest,
+                                     uint32_t aDestStride)
+{
+  for (int y = 0; y < aSourceSize.height; y += 2) {
+    uint32_t *storage = (uint32_t*)(aDest + (y / 2) * aDestStride);
+    for (int x = 0; x < aSourceSize.width; x++) {
+      uint32_t *upperRow = (uint32_t*)(aSource + (y * aSourceStride + x * 4));
+      uint32_t *lowerRow = (uint32_t*)(aSource + ((y + 1) * aSourceStride + x * 4));
+
+      *storage++ = Avg2(*upperRow, *lowerRow);
+    }
+  }
+}
+
+void
+ImageHalfScaler::HalfImageHorizontal_C(uint8_t *aSource, int32_t aSourceStride,
+                                       const IntSize &aSourceSize, uint8_t *aDest,
+                                       uint32_t aDestStride)
+{
+  for (int y = 0; y < aSourceSize.height; y++) {
+    uint32_t *storage = (uint32_t*)(aDest + y * aDestStride);
+    for (int x = 0; x < aSourceSize.width;  x+= 2) {
+      uint32_t *pixels = (uint32_t*)(aSource + (y * aSourceStride + x * 4));
+
+      *storage++ = Avg2(*pixels, *(pixels + 1));
+    }
+  }
+}
+
+}
+}
new file mode 100644
--- /dev/null
+++ b/gfx/2d/ImageScaling.h
@@ -0,0 +1,108 @@
+/* -*- Mode: C++; tab-width: 20; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is Mozilla Corporation code.
+ *
+ * The Initial Developer of the Original Code is Mozilla Foundation.
+ * Portions created by the Initial Developer are Copyright (C) 2011
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Contributor(s):
+ *   Bas Schouten <bschouten@mozilla.com>
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+#ifndef _MOZILLA_GFX_IMAGESCALING_H
+#define _MOZILLA_GFX_IMAGESCALING_H
+
+#include "Types.h"
+
+#include <vector>
+#include "Point.h"
+
+namespace mozilla {
+namespace gfx {
+
+class ImageHalfScaler
+{
+public:
+  ImageHalfScaler(uint8_t *aData, int32_t aStride, const IntSize &aSize)
+    : mOrigData(aData), mOrigStride(aStride), mOrigSize(aSize)
+    , mDataStorage(NULL)
+  {
+  }
+
+  ~ImageHalfScaler()
+  {
+    delete [] mDataStorage;
+  }
+
+  void ScaleForSize(const IntSize &aSize);
+
+  uint8_t *GetScaledData() const { return mData; }
+  IntSize GetSize() const { return mSize; }
+  uint32_t GetStride() const { return mStride; }
+
+private:
+  void HalfImage2D(uint8_t *aSource, int32_t aSourceStride, const IntSize &aSourceSize,
+                   uint8_t *aDest, uint32_t aDestStride);
+  void HalfImageVertical(uint8_t *aSource, int32_t aSourceStride, const IntSize &aSourceSize,
+                         uint8_t *aDest, uint32_t aDestStride);
+  void HalfImageHorizontal(uint8_t *aSource, int32_t aSourceStride, const IntSize &aSourceSize,
+                           uint8_t *aDest, uint32_t aDestStride);
+
+  // This is our SSE2 scaling function. Our destination must always be 16-byte
+  // aligned and use a 16-byte aligned stride.
+  void HalfImage2D_SSE2(uint8_t *aSource, int32_t aSourceStride, const IntSize &aSourceSize,
+                        uint8_t *aDest, uint32_t aDestStride);
+  void HalfImageVertical_SSE2(uint8_t *aSource, int32_t aSourceStride, const IntSize &aSourceSize,
+                              uint8_t *aDest, uint32_t aDestStride);
+  void HalfImageHorizontal_SSE2(uint8_t *aSource, int32_t aSourceStride, const IntSize &aSourceSize,
+                                uint8_t *aDest, uint32_t aDestStride);
+
+  void HalfImage2D_C(uint8_t *aSource, int32_t aSourceStride, const IntSize &aSourceSize,
+                     uint8_t *aDest, uint32_t aDestStride);
+  void HalfImageVertical_C(uint8_t *aSource, int32_t aSourceStride, const IntSize &aSourceSize,
+                           uint8_t *aDest, uint32_t aDestStride);
+  void HalfImageHorizontal_C(uint8_t *aSource, int32_t aSourceStride, const IntSize &aSourceSize,
+                             uint8_t *aDest, uint32_t aDestStride);
+
+  uint8_t *mOrigData;
+  int32_t mOrigStride;
+  IntSize mOrigSize;
+
+  uint8_t *mDataStorage;
+  // Guaranteed 16-byte aligned
+  uint8_t *mData;
+  IntSize mSize;
+  // Guaranteed 16-byte aligned
+  uint32_t mStride;
+};
+
+}
+}
+
+#endif
new file mode 100644
--- /dev/null
+++ b/gfx/2d/ImageScalingSSE2.cpp
@@ -0,0 +1,377 @@
+/* -*- Mode: C++; tab-width: 20; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is Mozilla Corporation code.
+ *
+ * The Initial Developer of the Original Code is Mozilla Foundation.
+ * Portions created by the Initial Developer are Copyright (C) 2011
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Contributor(s):
+ *   Bas Schouten <bschouten@mozilla.com>
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+#include "ImageScaling.h"
+#include "mozilla/Attributes.h"
+
+/* The functions below use the following system for averaging 4 pixels:
+ *
+ * The first observation is that a half-adder is implemented as follows:
+ * R = S + 2C or in the case of a and b (a ^ b) + ((a & b) << 1);
+ *
+ * This can be trivially extended to three pixels by observaring that when
+ * doing (a ^ b ^ c) as the sum, the carry is simply the bitwise-or of the
+ * carries of the individual numbers, since the sum of 3 bits can only ever
+ * have a carry of one.
+ *
+ * We then observe that the average is then ((carry << 1) + sum) >> 1, or,
+ * assuming eliminating overflows and underflows, carry + (sum >> 1).
+ *
+ * We now average our existing sum with the fourth number, so we get:
+ * sum2 = (sum + d) >> 1 or (sum >> 1) + (d >> 1).
+ *
+ * We now observe that our sum has been moved into place relative to the
+ * carry, so we can now average with the carry to get the final 4 input
+ * average: avg = (sum2 + carry) >> 1;
+ *
+ * Or to reverse the proof:
+ * avg = ((sum >> 1) + carry + d >> 1) >> 1
+ * avg = ((a + b + c) >> 1 + d >> 1) >> 1
+ * avg = ((a + b + c + d) >> 2)
+ *
+ * An additional fact used in the SSE versions is the concept that we can
+ * trivially convert a rounded average to a truncated average:
+ *
+ * We have:
+ * f(a, b) = (a + b + 1) >> 1
+ *
+ * And want:
+ * g(a, b) = (a + b) >> 1
+ *
+ * Observe:
+ * ~f(~a, ~b) == ~((~a + ~b + 1) >> 1)
+ *            == ~((-a - 1 + -b - 1 + 1) >> 1)
+ *            == ~((-a - 1 + -b) >> 1)
+ *            == ~((-(a + b) - 1) >> 1)
+ *            == ~((~(a + b)) >> 1)
+ *            == (a + b) >> 1
+ *            == g(a, b)
+ */
+
+MOZ_ALWAYS_INLINE __m128i not(__m128i arg)
+{
+  __m128i minusone = _mm_set1_epi32(0xffffffff);
+  return _mm_xor_si128(arg, minusone);
+}
+
+/* We have to pass pointers here, MSVC does not allow passing more than 3
+ * __m128i arguments on the stack. And it does not allow 16-byte aligned
+ * stack variables. This inlines properly on MSVC 2010. It does -not- inline
+ * with just the inline directive.
+ */
+MOZ_ALWAYS_INLINE __m128i avg_sse2_8x2(__m128i *a, __m128i *b, __m128i *c, __m128i *d)
+{
+#define shuf1 _MM_SHUFFLE(2, 0, 2, 0)
+#define shuf2 _MM_SHUFFLE(3, 1, 3, 1)
+
+// This cannot be an inline function as the __Imm argument to _mm_shuffle_ps
+// needs to be a compile time constant.
+#define shuffle_si128(arga, argb, imm) \
+  _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps((arga)), _mm_castsi128_ps((argb)), (imm)));
+
+  __m128i t = shuffle_si128(*a, *b, shuf1);
+  *b = shuffle_si128(*a, *b, shuf2);
+  *a = t;
+  t = shuffle_si128(*c, *d, shuf1);
+  *d = shuffle_si128(*c, *d, shuf2);
+  *c = t;
+
+#undef shuf1
+#undef shuf2
+#undef shuffle_si128
+
+  __m128i sum = _mm_xor_si128(*a, _mm_xor_si128(*b, *c));
+
+  __m128i carry = _mm_or_si128(_mm_and_si128(*a, *b), _mm_or_si128(_mm_and_si128(*a, *c), _mm_and_si128(*b, *c)));
+
+  __m128i minusone = _mm_set1_epi32(0xffffffff);
+
+  sum = _mm_avg_epu8(not(sum), not(*d));
+
+  return not(_mm_avg_epu8(sum, not(carry)));
+}
+
+MOZ_ALWAYS_INLINE __m128i avg_sse2_4x2_4x1(__m128i a, __m128i b)
+{
+  __m128i minusone = _mm_set1_epi32(0xffffffff);
+
+  return not(_mm_avg_epu8(not(a), not(b)));
+}
+
+MOZ_ALWAYS_INLINE __m128i avg_sse2_8x1_4x1(__m128i a, __m128i b)
+{
+  __m128i t = _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(b), _mm_castsi128_ps(a), _MM_SHUFFLE(3, 1, 3, 1)));
+  b = _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(b), _mm_castsi128_ps(a), _MM_SHUFFLE(2, 0, 2, 0)));
+  a = t;
+
+  __m128i minusone = _mm_set1_epi32(0xffffffff);
+
+  return not(_mm_avg_epu8(not(a), not(b)));
+}
+
+/* Before Nehalem _mm_loadu_si128 could be very slow, this trick is a little
+ * faster. Once enough people are on architectures where _mm_loadu_si128 is
+ * fast we can migrate to it.
+ */
+MOZ_ALWAYS_INLINE __m128i loadUnaligned128(const __m128i *aSource)
+{
+  // Yes! We use uninitialized memory here, we'll overwrite it though!
+  __m128 res = _mm_loadl_pi(_mm_set1_ps(0), (const __m64*)aSource);
+  return _mm_castps_si128(_mm_loadh_pi(res, ((const __m64*)(aSource)) + 1));
+}
+
+MOZ_ALWAYS_INLINE uint32_t Avg2x2(uint32_t a, uint32_t b, uint32_t c, uint32_t d)
+{
+  uint32_t sum = a ^ b ^ c;
+  uint32_t carry = (a & b) | (a & c) | (b & c);
+
+  uint32_t mask = 0xfefefefe;
+
+  // Not having a byte based average instruction means we should mask to avoid
+  // underflow.
+  sum = (((sum ^ d) & mask) >> 1) + (sum & d);
+
+  return (((sum ^ carry) & mask) >> 1) + (sum & carry);
+}
+
+// Simple 2 pixel average version of the function above.
+MOZ_ALWAYS_INLINE uint32_t Avg2(uint32_t a, uint32_t b)
+{
+  uint32_t sum = a ^ b;
+  uint32_t carry = (a & b);
+
+  uint32_t mask = 0xfefefefe;
+
+  return ((sum & mask) >> 1) + carry;
+}
+
+namespace mozilla {
+namespace gfx {
+
+void
+ImageHalfScaler::HalfImage2D_SSE2(uint8_t *aSource, int32_t aSourceStride,
+                                  const IntSize &aSourceSize, uint8_t *aDest,
+                                  uint32_t aDestStride)
+{
+  const int Bpp = 4;
+
+  for (int y = 0; y < aSourceSize.height; y += 2) {
+    __m128i *storage = (__m128i*)(aDest + (y / 2) * aDestStride);
+    int x = 0;
+    // Run a loop depending on alignment.
+    if (!(uintptr_t(aSource + (y * aSourceStride)) % 16) &&
+        !(uintptr_t(aSource + ((y + 1) * aSourceStride)) % 16)) {
+      for (; x < (aSourceSize.width - 7); x += 8) {
+        __m128i *upperRow = (__m128i*)(aSource + (y * aSourceStride + x * Bpp));
+        __m128i *lowerRow = (__m128i*)(aSource + ((y + 1) * aSourceStride + x * Bpp));
+
+        __m128i a = _mm_load_si128(upperRow);
+        __m128i b = _mm_load_si128(upperRow + 1);
+        __m128i c = _mm_load_si128(lowerRow);
+        __m128i d = _mm_load_si128(lowerRow + 1);
+
+        *storage++ = avg_sse2_8x2(&a, &b, &c, &d);
+      }
+    } else if (!(uintptr_t(aSource + (y * aSourceStride)) % 16)) {
+      for (; x < (aSourceSize.width - 7); x += 8) {
+        __m128i *upperRow = (__m128i*)(aSource + (y * aSourceStride + x * Bpp));
+        __m128i *lowerRow = (__m128i*)(aSource + ((y + 1) * aSourceStride + x * Bpp));
+
+        __m128i a = _mm_load_si128(upperRow);
+        __m128i b = _mm_load_si128(upperRow + 1);
+        __m128i c = loadUnaligned128(lowerRow);
+        __m128i d = loadUnaligned128(lowerRow + 1);
+
+        *storage++ = avg_sse2_8x2(&a, &b, &c, &d);
+      }
+    } else if (!(uintptr_t(aSource + ((y + 1) * aSourceStride)) % 16)) {
+      for (; x < (aSourceSize.width - 7); x += 8) {
+        __m128i *upperRow = (__m128i*)(aSource + (y * aSourceStride + x * Bpp));
+        __m128i *lowerRow = (__m128i*)(aSource + ((y + 1) * aSourceStride + x * Bpp));
+
+        __m128i a = loadUnaligned128((__m128i*)upperRow);
+        __m128i b = loadUnaligned128((__m128i*)upperRow + 1);
+        __m128i c = _mm_load_si128((__m128i*)lowerRow);
+        __m128i d = _mm_load_si128((__m128i*)lowerRow + 1);
+
+        *storage++ = avg_sse2_8x2(&a, &b, &c, &d);
+      }
+    } else {
+      for (; x < (aSourceSize.width - 7); x += 8) {
+        __m128i *upperRow = (__m128i*)(aSource + (y * aSourceStride + x * Bpp));
+        __m128i *lowerRow = (__m128i*)(aSource + ((y + 1) * aSourceStride + x * Bpp));
+
+        __m128i a = loadUnaligned128(upperRow);
+        __m128i b = loadUnaligned128(upperRow + 1);
+        __m128i c = loadUnaligned128(lowerRow);
+        __m128i d = loadUnaligned128(lowerRow + 1);
+
+        *storage++ = avg_sse2_8x2(&a, &b, &c, &d);
+      }
+    }
+
+    uint32_t *unalignedStorage = (uint32_t*)storage;
+    // Take care of the final pixels, we know there's an even number of pixels
+    // in the source rectangle. We use a 2x2 'simd' implementation for this.
+    //
+    // Potentially we only have to do this in the last row since overflowing 
+    // 8 pixels in an earlier row would appear to be harmless as it doesn't
+    // touch invalid memory. Even when reading and writing to the same surface.
+    // in practice we only do this when doing an additional downscale pass, and
+    // in this situation we have unused stride to write into harmlessly.
+    // I do not believe the additional code complexity would be worth it though.
+    for (; x < aSourceSize.width; x += 2) {
+      uint8_t *upperRow = aSource + (y * aSourceStride + x * Bpp);
+      uint8_t *lowerRow = aSource + ((y + 1) * aSourceStride + x * Bpp);
+
+      *unalignedStorage++ = Avg2x2(*(uint32_t*)upperRow, *((uint32_t*)upperRow + 1),
+                                   *(uint32_t*)lowerRow, *((uint32_t*)lowerRow + 1));
+    }
+  }
+}
+
+void
+ImageHalfScaler::HalfImageVertical_SSE2(uint8_t *aSource, int32_t aSourceStride,
+                                        const IntSize &aSourceSize, uint8_t *aDest,
+                                        uint32_t aDestStride)
+{
+  for (int y = 0; y < aSourceSize.height; y += 2) {
+    __m128i *storage = (__m128i*)(aDest + (y / 2) * aDestStride);
+    int x = 0;
+    // Run a loop depending on alignment.
+    if (!(uintptr_t(aSource + (y * aSourceStride)) % 16) &&
+        !(uintptr_t(aSource + ((y + 1) * aSourceStride)) % 16)) {
+      for (; x < (aSourceSize.width - 3); x += 4) {
+        uint8_t *upperRow = aSource + (y * aSourceStride + x * 4);
+        uint8_t *lowerRow = aSource + ((y + 1) * aSourceStride + x * 4);
+
+        __m128i a = _mm_load_si128((__m128i*)upperRow);
+        __m128i b = _mm_load_si128((__m128i*)lowerRow);
+
+        *storage++ = avg_sse2_4x2_4x1(a, b);
+      }
+    } else if (!(uintptr_t(aSource + (y * aSourceStride)) % 16)) {
+      // This line doesn't align well.
+      for (; x < (aSourceSize.width - 3); x += 4) {
+        uint8_t *upperRow = aSource + (y * aSourceStride + x * 4);
+        uint8_t *lowerRow = aSource + ((y + 1) * aSourceStride + x * 4);
+
+        __m128i a = _mm_load_si128((__m128i*)upperRow);
+        __m128i b = loadUnaligned128((__m128i*)lowerRow);
+
+        *storage++ = avg_sse2_4x2_4x1(a, b);
+      }
+    } else if (!(uintptr_t(aSource + (y * aSourceStride)) % 16)) {
+      for (; x < (aSourceSize.width - 3); x += 4) {
+        uint8_t *upperRow = aSource + (y * aSourceStride + x * 4);
+        uint8_t *lowerRow = aSource + ((y + 1) * aSourceStride + x * 4);
+
+        __m128i a = loadUnaligned128((__m128i*)upperRow);
+        __m128i b = _mm_load_si128((__m128i*)lowerRow);
+
+        *storage++ = avg_sse2_4x2_4x1(a, b);
+      }
+    } else {
+      for (; x < (aSourceSize.width - 3); x += 4) {
+        uint8_t *upperRow = aSource + (y * aSourceStride + x * 4);
+        uint8_t *lowerRow = aSource + ((y + 1) * aSourceStride + x * 4);
+
+        __m128i a = loadUnaligned128((__m128i*)upperRow);
+        __m128i b = loadUnaligned128((__m128i*)lowerRow);
+
+        *storage++ = avg_sse2_4x2_4x1(a, b);
+      }
+    }
+
+    uint32_t *unalignedStorage = (uint32_t*)storage;
+    // Take care of the final pixels, we know there's an even number of pixels
+    // in the source rectangle.
+    //
+    // Similar overflow considerations are valid as in the previous function.
+    for (; x < aSourceSize.width; x++) {
+      uint8_t *upperRow = aSource + (y * aSourceStride + x * 4);
+      uint8_t *lowerRow = aSource + ((y + 1) * aSourceStride + x * 4);
+
+      *unalignedStorage++ = Avg2(*(uint32_t*)upperRow, *(uint32_t*)lowerRow);
+    }
+  }
+}
+
+void
+ImageHalfScaler::HalfImageHorizontal_SSE2(uint8_t *aSource, int32_t aSourceStride,
+                                          const IntSize &aSourceSize, uint8_t *aDest,
+                                          uint32_t aDestStride)
+{
+  for (int y = 0; y < aSourceSize.height; y++) {
+    __m128i *storage = (__m128i*)(aDest + (y * aDestStride));
+    int x = 0;
+    // Run a loop depending on alignment.
+    if (!(uintptr_t(aSource + (y * aSourceStride)) % 16)) {
+      for (; x < (aSourceSize.width - 7); x += 8) {
+        __m128i* pixels = (__m128i*)(aSource + (y * aSourceStride + x * 4));
+
+        __m128i a = _mm_load_si128(pixels);
+        __m128i b = _mm_load_si128(pixels + 1);
+
+        *storage++ = avg_sse2_8x1_4x1(a, b);
+      }
+    } else {
+      for (; x < (aSourceSize.width - 7); x += 8) {
+        __m128i* pixels = (__m128i*)(aSource + (y * aSourceStride + x * 4));
+
+        __m128i a = loadUnaligned128(pixels);
+        __m128i b = loadUnaligned128(pixels + 1);
+
+        *storage++ = avg_sse2_8x1_4x1(a, b);
+      }
+    }
+
+    uint32_t *unalignedStorage = (uint32_t*)storage;
+    // Take care of the final pixels, we know there's an even number of pixels
+    // in the source rectangle.
+    //
+    // Similar overflow considerations are valid as in the previous function.
+    for (; x < aSourceSize.width; x += 2) {
+      uint32_t *pixels = (uint32_t*)(aSource + (y * aSourceStride + x * 4));
+
+      *unalignedStorage++ = Avg2(*pixels, *(pixels + 1));
+    }
+  }
+}
+
+}
+}
--- a/gfx/2d/Makefile.in
+++ b/gfx/2d/Makefile.in
@@ -69,16 +69,17 @@ CPPSRCS	= \
 	Factory.cpp \
         Matrix.cpp \
         DrawTargetCairo.cpp \
         SourceSurfaceCairo.cpp \
         PathCairo.cpp \
         Blur.cpp \
         ScaledFontBase.cpp \
         DrawTargetDual.cpp \
+        ImageScaling.cpp \
         $(NULL)
 
 ifeq (cocoa,$(MOZ_WIDGET_TOOLKIT))
 CPPSRCS	+= \
 	   SourceSurfaceCG.cpp \
 	   DrawTargetCG.cpp \
 	   PathCG.cpp \
 	   $(NULL)
@@ -112,16 +113,22 @@ DEFINES += -DSK_BUILD_FOR_ANDROID_NDK
 endif
 
 DEFINES += -DSK_A32_SHIFT=24 -DSK_R32_SHIFT=16 -DSK_G32_SHIFT=8 -DSK_B32_SHIFT=0
 
 ifdef MOZ_DEBUG
 DEFINES += -DGFX_LOG_DEBUG -DGFX_LOG_WARNING
 endif
 
+# Are we targeting x86 or x64?  If so, build SSE2 files.
+ifneq (,$(INTEL_ARCHITECTURE))
+CPPSRCS += ImageScalingSSE2.cpp
+DEFINES += -DUSE_SSE2
+endif
+
 ifeq ($(MOZ_WIDGET_TOOLKIT),windows)
 CPPSRCS	+= \
         DrawTargetD2D.cpp \
         SourceSurfaceD2D.cpp \
         SourceSurfaceD2DTarget.cpp \
         PathD2D.cpp \
         ScaledFontDWrite.cpp \
         $(NULL)
@@ -139,11 +146,23 @@ endif
 #        DrawTargetCG.cpp \
 #        SourceSurfaceCG.cpp \
 #	$(NULL)
 #
 ## Always link with OpenGL/AGL
 #EXTRA_DSO_LDOPTS += -framework OpenGL -framework AGL -framework QuickTime -framework AppKit -framework QuartzCore
 #endif
 
+# The file uses SSE2 intrinsics, so it needs special compile flags on some
+# compilers.
+ifneq (,$(INTEL_ARCHITECTURE))
+ifdef GNU_CC
+ImageScalingSSE2.$(OBJ_SUFFIX): CXXFLAGS+=-msse2
+endif
+
+ifdef SOLARIS_SUNPRO_CXX
+ImageScalingSSE2.$(OBJ_SUFFIX): OS_CXXFLAGS += -xarch=sse2 -xO4
+endif
+endif
+
 include $(topsrcdir)/config/rules.mk
 
 CXXFLAGS += $(MOZ_CAIRO_CFLAGS)
--- a/gfx/2d/gfx2d.vcxproj
+++ b/gfx/2d/gfx2d.vcxproj
@@ -36,17 +36,17 @@
     <LinkIncremental>true</LinkIncremental>
     <ExecutablePath>$(DXSDK_DIR)\Utilities\bin\x86;$(ExecutablePath)</ExecutablePath>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
     <LinkIncremental>true</LinkIncremental>
   </PropertyGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
     <ClCompile>
-      <PreprocessorDefinitions>WIN32;_DEBUG;_WINDOWS;%(PreprocessorDefinitions);GFX_LOG_DEBUG;GFX_LOG_WARNING;MFBT_STAND_ALONE;XP_WIN</PreprocessorDefinitions>
+      <PreprocessorDefinitions>USE_SSE2;WIN32;_DEBUG;_WINDOWS;%(PreprocessorDefinitions);GFX_LOG_DEBUG;GFX_LOG_WARNING;MFBT_STAND_ALONE;XP_WIN</PreprocessorDefinitions>
       <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
       <WarningLevel>Level3</WarningLevel>
       <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
       <Optimization>Disabled</Optimization>
     </ClCompile>
     <Link>
       <TargetMachine>MachineX86</TargetMachine>
       <GenerateDebugInformation>true</GenerateDebugInformation>
@@ -75,32 +75,35 @@
     <ClInclude Include="BaseMargin.h" />
     <ClInclude Include="BasePoint.h" />
     <ClInclude Include="BaseRect.h" />
     <ClInclude Include="BaseSize.h" />
     <ClInclude Include="DrawTargetD2D.h" />
     <ClInclude Include="DrawTargetDual.h" />
     <ClInclude Include="GradientStopsD2D.h" />
     <ClInclude Include="HelpersD2D.h" />
+    <ClInclude Include="ImageScaling.h" />
     <ClInclude Include="Logging.h" />
     <ClInclude Include="Matrix.h" />
     <ClInclude Include="PathD2D.h" />
     <ClInclude Include="Point.h" />
     <ClInclude Include="Rect.h" />
     <ClInclude Include="ScaledFontDWrite.h" />
     <ClInclude Include="SourceSurfaceD2D.h" />
     <ClInclude Include="SourceSurfaceD2DTarget.h" />
     <ClInclude Include="Tools.h" />
     <ClInclude Include="Types.h" />
     <ClInclude Include="UserData.h" />
   </ItemGroup>
   <ItemGroup>
     <ClCompile Include="DrawTargetD2D.cpp" />
     <ClCompile Include="DrawTargetDual.cpp" />
     <ClCompile Include="Factory.cpp" />
+    <ClCompile Include="ImageScaling.cpp" />
+    <ClCompile Include="ImageScalingSSE2.cpp" />
     <ClCompile Include="Matrix.cpp" />
     <ClCompile Include="PathD2D.cpp" />
     <ClCompile Include="ScaledFontDWrite.cpp" />
     <ClCompile Include="SourceSurfaceD2D.cpp" />
     <ClCompile Include="SourceSurfaceD2DTarget.cpp" />
   </ItemGroup>
   <ItemGroup>
     <None Include="Makefile.in" />
--- a/gfx/2d/unittest/Main.cpp
+++ b/gfx/2d/unittest/Main.cpp
@@ -32,16 +32,17 @@
  * and other provisions required by the GPL or the LGPL. If you do not delete
  * the provisions above, a recipient may use your version of this file under
  * the terms of any one of the MPL, the GPL or the LGPL.
  *
  * ***** END LICENSE BLOCK ***** */
 
 #include "SanityChecks.h"
 #include "TestPoint.h"
+#include "TestScaling.h"
 #ifdef WIN32
 #include "TestDrawTargetD2D.h"
 #endif
 
 #include <string>
 #include <sstream>
 
 struct TestObject {
@@ -56,17 +57,18 @@ int
 main()
 {
   TestObject tests[] = 
   {
     { new SanityChecks(), "Sanity Checks" },
   #ifdef WIN32
     { new TestDrawTargetD2D(), "DrawTarget (D2D)" },
   #endif
-    { new TestPoint(), "Point Tests" }
+    { new TestPoint(), "Point Tests" },
+    { new TestScaling(), "Scaling Tests" }
   };
 
   int totalFailures = 0;
   int totalTests = 0;
   stringstream message;
   printf("------ STARTING RUNNING TESTS ------\n");
   for (int i = 0; i < sizeof(tests) / sizeof(TestObject); i++) {
     message << "--- RUNNING TESTS: " << tests[i].name << " ---\n";
--- a/gfx/2d/unittest/TestPoint.h
+++ b/gfx/2d/unittest/TestPoint.h
@@ -30,16 +30,18 @@
  * use your version of this file under the terms of the MPL, indicate your
  * decision by deleting the provisions above and replace them with the notice
  * and other provisions required by the GPL or the LGPL. If you do not delete
  * the provisions above, a recipient may use your version of this file under
  * the terms of any one of the MPL, the GPL or the LGPL.
  *
  * ***** END LICENSE BLOCK ***** */
 
+#pragma once
+
 #include "TestBase.h"
 
 class TestPoint : public TestBase
 {
 public:
   TestPoint();
 
   void Addition();
new file mode 100644
--- /dev/null
+++ b/gfx/2d/unittest/TestScaling.cpp
@@ -0,0 +1,275 @@
+/* -*- Mode: C++; tab-width: 20; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is Mozilla Corporation code.
+ *
+ * The Initial Developer of the Original Code is Mozilla Foundation.
+ * Portions created by the Initial Developer are Copyright (C) 2011
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Contributor(s):
+ *   Bas Schouten <bschouten@mozilla.com>
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+#include "TestScaling.h"
+
+#include "ImageScaling.h"
+
+using namespace mozilla::gfx;
+
+TestScaling::TestScaling()
+{
+  REGISTER_TEST(TestScaling, BasicHalfScale);
+  REGISTER_TEST(TestScaling, DoubleHalfScale);
+  REGISTER_TEST(TestScaling, UnevenHalfScale);
+  REGISTER_TEST(TestScaling, OddStrideHalfScale);
+  REGISTER_TEST(TestScaling, VerticalHalfScale);
+  REGISTER_TEST(TestScaling, HorizontalHalfScale);
+  REGISTER_TEST(TestScaling, MixedHalfScale);
+}
+
+void
+TestScaling::BasicHalfScale()
+{
+  std::vector<uint8_t> data;
+  data.resize(500 * 500 * 4);
+
+  uint32_t *pixels = (uint32_t*)data.data();
+  for (int y = 0; y < 500; y += 2) {
+    for (int x = 0; x < 500; x += 2) {
+      pixels[y * 500 + x] = 0xff00ff00;
+      pixels[y * 500 + x + 1] = 0xff00ffff;
+      pixels[(y + 1) * 500 + x] = 0xff000000;
+      pixels[(y + 1) * 500 + x + 1] = 0xff0000ff;
+    }
+  }
+  ImageHalfScaler scaler(data.data(), 500 * 4, IntSize(500, 500));
+
+  scaler.ScaleForSize(IntSize(220, 240));
+
+  VERIFY(scaler.GetSize().width == 250);
+  VERIFY(scaler.GetSize().height == 250);
+
+  pixels = (uint32_t*)scaler.GetScaledData();
+
+  for (int y = 0; y < 250; y++) {
+    for (int x = 0; x < 250; x++) {
+      VERIFY(pixels[y * (scaler.GetStride() / 4) + x] == 0xff007f7f);
+    }
+  }
+}
+
+void
+TestScaling::DoubleHalfScale()
+{
+  std::vector<uint8_t> data;
+  data.resize(500 * 500 * 4);
+
+  uint32_t *pixels = (uint32_t*)data.data();
+  for (int y = 0; y < 500; y += 2) {
+    for (int x = 0; x < 500; x += 2) {
+      pixels[y * 500 + x] = 0xff00ff00;
+      pixels[y * 500 + x + 1] = 0xff00ffff;
+      pixels[(y + 1) * 500 + x] = 0xff000000;
+      pixels[(y + 1) * 500 + x + 1] = 0xff0000ff;
+    }
+  }
+  ImageHalfScaler scaler(data.data(), 500 * 4, IntSize(500, 500));
+
+  scaler.ScaleForSize(IntSize(120, 110));
+  VERIFY(scaler.GetSize().width == 125);
+  VERIFY(scaler.GetSize().height == 125);
+
+  pixels = (uint32_t*)scaler.GetScaledData();
+
+  for (int y = 0; y < 125; y++) {
+    for (int x = 0; x < 125; x++) {
+      VERIFY(pixels[y * (scaler.GetStride() / 4) + x] == 0xff007f7f);
+    }
+  }
+}
+
+void
+TestScaling::UnevenHalfScale()
+{
+  std::vector<uint8_t> data;
+  // Use a 16-byte aligned stride still, we test none-aligned strides
+  // separately.
+  data.resize(499 * 500 * 4);
+
+  uint32_t *pixels = (uint32_t*)data.data();
+  for (int y = 0; y < 500; y += 2) {
+    for (int x = 0; x < 500; x += 2) {
+      pixels[y * 500 + x] = 0xff00ff00;
+      if (x < 498) {
+        pixels[y * 500 + x + 1] = 0xff00ffff;
+      }
+      if (y < 498) {
+        pixels[(y + 1) * 500 + x] = 0xff000000;
+        if (x < 498) {
+          pixels[(y + 1) * 500 + x + 1] = 0xff0000ff;
+        }
+      }
+    }
+  }
+  ImageHalfScaler scaler(data.data(), 500 * 4, IntSize(499, 499));
+
+  scaler.ScaleForSize(IntSize(220, 220));
+  VERIFY(scaler.GetSize().width == 249);
+  VERIFY(scaler.GetSize().height == 249);
+
+  pixels = (uint32_t*)scaler.GetScaledData();
+
+  for (int y = 0; y < 249; y++) {
+    for (int x = 0; x < 249; x++) {
+      VERIFY(pixels[y * (scaler.GetStride() / 4) + x] == 0xff007f7f);
+    }
+  }
+}
+
+void
+TestScaling::OddStrideHalfScale()
+{
+  std::vector<uint8_t> data;
+  // Use a 4-byte aligned stride to test if that doesn't cause any issues.
+  data.resize(499 * 499 * 4);
+
+  uint32_t *pixels = (uint32_t*)data.data();
+  for (int y = 0; y < 500; y += 2) {
+    for (int x = 0; x < 500; x += 2) {
+      pixels[y * 499 + x] = 0xff00ff00;
+      if (x < 498) {
+        pixels[y * 499 + x + 1] = 0xff00ffff;
+      }
+      if (y < 498) {
+        pixels[(y + 1) * 499 + x] = 0xff000000;
+        if (x < 498) {
+          pixels[(y + 1) * 499 + x + 1] = 0xff0000ff;
+        }
+      }
+    }
+  }
+  ImageHalfScaler scaler(data.data(), 499 * 4, IntSize(499, 499));
+
+  scaler.ScaleForSize(IntSize(220, 220));
+  VERIFY(scaler.GetSize().width == 249);
+  VERIFY(scaler.GetSize().height == 249);
+
+  pixels = (uint32_t*)scaler.GetScaledData();
+
+  for (int y = 0; y < 249; y++) {
+    for (int x = 0; x < 249; x++) {
+      VERIFY(pixels[y * (scaler.GetStride() / 4) + x] == 0xff007f7f);
+    }
+  }
+}
+void
+TestScaling::VerticalHalfScale()
+{
+  std::vector<uint8_t> data;
+  data.resize(500 * 500 * 4);
+
+  uint32_t *pixels = (uint32_t*)data.data();
+  for (int y = 0; y < 500; y += 2) {
+    for (int x = 0; x < 500; x += 2) {
+      pixels[y * 500 + x] = 0xff00ff00;
+      pixels[y * 500 + x + 1] = 0xff00ffff;
+      pixels[(y + 1) * 500 + x] = 0xff000000;
+      pixels[(y + 1) * 500 + x + 1] = 0xff0000ff;
+    }
+  }
+  ImageHalfScaler scaler(data.data(), 500 * 4, IntSize(500, 500));
+
+  scaler.ScaleForSize(IntSize(400, 240));
+  VERIFY(scaler.GetSize().width == 500);
+  VERIFY(scaler.GetSize().height == 250);
+
+  pixels = (uint32_t*)scaler.GetScaledData();
+
+  for (int y = 0; y < 250; y++) {
+    for (int x = 0; x < 500; x += 2) {
+      VERIFY(pixels[y * (scaler.GetStride() / 4) + x] == 0xff007f00);
+      VERIFY(pixels[y * (scaler.GetStride() / 4) + x + 1] == 0xff007fff);
+    }
+  }
+}
+
+void
+TestScaling::HorizontalHalfScale()
+{
+  std::vector<uint8_t> data;
+  data.resize(500 * 500 * 4);
+
+  uint32_t *pixels = (uint32_t*)data.data();
+  for (int y = 0; y < 500; y += 2) {
+    for (int x = 0; x < 500; x += 2) {
+      pixels[y * 500 + x] = 0xff00ff00;
+      pixels[y * 500 + x + 1] = 0xff00ffff;
+      pixels[(y + 1) * 500 + x] = 0xff000000;
+      pixels[(y + 1) * 500 + x + 1] = 0xff0000ff;
+    }
+  }
+  ImageHalfScaler scaler(data.data(), 500 * 4, IntSize(500, 500));
+
+  scaler.ScaleForSize(IntSize(240, 400));
+  VERIFY(scaler.GetSize().width == 250);
+  VERIFY(scaler.GetSize().height == 500);
+
+  pixels = (uint32_t*)scaler.GetScaledData();
+
+  for (int y = 0; y < 500; y += 2) {
+    for (int x = 0; x < 250; x++) {
+      VERIFY(pixels[y * (scaler.GetStride() / 4) + x] == 0xff00ff7f);
+      VERIFY(pixels[(y + 1) * (scaler.GetStride() / 4) + x] == 0xff00007f);
+    }
+  }
+}
+
+void
+TestScaling::MixedHalfScale()
+{
+  std::vector<uint8_t> data;
+  data.resize(500 * 500 * 4);
+
+  uint32_t *pixels = (uint32_t*)data.data();
+  for (int y = 0; y < 500; y += 2) {
+    for (int x = 0; x < 500; x += 2) {
+      pixels[y * 500 + x] = 0xff00ff00;
+      pixels[y * 500 + x + 1] = 0xff00ffff;
+      pixels[(y + 1) * 500 + x] = 0xff000000;
+      pixels[(y + 1) * 500 + x + 1] = 0xff0000ff;
+    }
+  }
+  ImageHalfScaler scaler(data.data(), 500 * 4, IntSize(500, 500));
+
+  scaler.ScaleForSize(IntSize(120, 240));
+  VERIFY(scaler.GetSize().width == 125);
+  VERIFY(scaler.GetSize().height == 250);
+  scaler.ScaleForSize(IntSize(240, 120));
+  VERIFY(scaler.GetSize().width == 250);
+  VERIFY(scaler.GetSize().height == 125);
+}
new file mode 100644
--- /dev/null
+++ b/gfx/2d/unittest/TestScaling.h
@@ -0,0 +1,54 @@
+/* -*- Mode: C++; tab-width: 20; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is Mozilla Corporation code.
+ *
+ * The Initial Developer of the Original Code is Mozilla Foundation.
+ * Portions created by the Initial Developer are Copyright (C) 2011
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Contributor(s):
+ *   Bas Schouten <bschouten@mozilla.com>
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+#pragma once
+
+#include "TestBase.h"
+
+class TestScaling : public TestBase
+{
+public:
+  TestScaling();
+
+  void BasicHalfScale();
+  void DoubleHalfScale();
+  void UnevenHalfScale();
+  void OddStrideHalfScale();
+  void VerticalHalfScale();
+  void HorizontalHalfScale();
+  void MixedHalfScale();
+};
--- a/gfx/2d/unittest/unittest.vcxproj
+++ b/gfx/2d/unittest/unittest.vcxproj
@@ -72,20 +72,22 @@
   </ItemDefinitionGroup>
   <ItemGroup>
     <ClCompile Include="Main.cpp" />
     <ClCompile Include="SanityChecks.cpp" />
     <ClCompile Include="TestBase.cpp" />
     <ClCompile Include="TestDrawTargetBase.cpp" />
     <ClCompile Include="TestDrawTargetD2D.cpp" />
     <ClCompile Include="TestPoint.cpp" />
+    <ClCompile Include="TestScaling.cpp" />
   </ItemGroup>
   <ItemGroup>
     <ClInclude Include="TestDrawTargetBase.h" />
     <ClInclude Include="SanityChecks.h" />
     <ClInclude Include="TestBase.h" />
     <ClInclude Include="TestDrawTargetD2D.h" />
     <ClInclude Include="TestPoint.h" />
+    <ClInclude Include="TestScaling.h" />
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
   </ImportGroup>
 </Project>
\ No newline at end of file