b=451621; push new pixman with arm fast-paths; r=me
authorVladimir Vukicevic <vladimir@pobox.com>
Wed, 17 Sep 2008 14:15:01 -0700
changeset 19351 5d807b6163785bba4c635a999dffb93994ca803b
parent 19350 b375e5cab4dc0a4fe45734df4f01a0da59a63e5d
child 19352 84210947a1027ce74e908dff27a768a651edc877
push idunknown
push userunknown
push dateunknown
reviewersme
bugs451621
milestone1.9.1b1pre
b=451621; push new pixman with arm fast-paths; r=me
config/autoconf.mk.in
configure.in
gfx/cairo/README
gfx/cairo/libpixman/src/Makefile.in
gfx/cairo/libpixman/src/pixman-arm.c
gfx/cairo/libpixman/src/pixman-arm.h
gfx/cairo/libpixman/src/pixman-image.c
gfx/cairo/libpixman/src/pixman-pict.c
gfx/cairo/libpixman/src/pixman-sse2.c
gfx/cairo/libpixman/src/pixman-transformed.c
gfx/cairo/libpixman/src/pixman-version.h
gfx/cairo/libpixman/src/pixman.h
--- a/config/autoconf.mk.in
+++ b/config/autoconf.mk.in
@@ -639,8 +639,10 @@ HAVE_DTRACE= @HAVE_DTRACE@
 VISIBILITY_FLAGS = @VISIBILITY_FLAGS@
 WRAP_SYSTEM_INCLUDES = @WRAP_SYSTEM_INCLUDES@
 
 MOZ_V1_STRING_ABI = @MOZ_V1_STRING_ABI@
 
 MOZ_EMBEDDING_LEVEL_DEFAULT = @MOZ_EMBEDDING_LEVEL_DEFAULT@
 MOZ_EMBEDDING_LEVEL_BASIC = @MOZ_EMBEDDING_LEVEL_BASIC@
 MOZ_EMBEDDING_LEVEL_MINIMAL = @MOZ_EMBEDDING_LEVEL_MINIMAL@
+
+HAVE_ARM_SIMD= @HAVE_ARM_SIMD@
--- a/configure.in
+++ b/configure.in
@@ -2986,16 +2986,27 @@ AC_CHECK_HEADERS(X11/XKBlib.h)
 dnl These are all the places some variant of statfs can be hiding.
 AC_CHECK_HEADERS(sys/statvfs.h sys/statfs.h sys/vfs.h sys/mount.h)
 
 dnl Try for MMX support
 dnl NB - later gcc versions require -mmmx for this header to be successfully
 dnl included (or another option which implies it, such as -march=pentium-mmx)
 AC_CHECK_HEADERS(mmintrin.h)
 
+AC_MSG_CHECKING(for ARM SIMD support)
+AC_TRY_COMPILE([],
+               [asm("uqadd8 r1, r1, r2");],
+               result="yes", result="no")
+AC_MSG_RESULT("$result")
+if test "$result" = "yes"; then
+    AC_DEFINE(HAVE_ARM_SIMD)
+    HAVE_ARM_SIMD=1
+fi
+AC_SUBST(HAVE_ARM_SIMD)
+
 dnl Check whether the compiler supports the new-style C++ standard
 dnl library headers (i.e. <new>) or needs the old "new.h"
 AC_LANG_CPLUSPLUS
 NEW_H=new.h
 AC_CHECK_HEADER(new, [NEW_H=new])
 AC_DEFINE_UNQUOTED(NEW_H, <$NEW_H>)
 AC_LANG_C
 
--- a/gfx/cairo/README
+++ b/gfx/cairo/README
@@ -3,17 +3,17 @@ Snapshots of cairo and glitz for mozilla
 We only include the relevant parts of each release (generally, src/*.[ch]),
 as we have Makefile.in's that integrate into the Mozilla build system.  For
 documentation and similar, please see the official tarballs at
 http://www.cairographics.org/.
 
 VERSIONS:
 
   cairo (1.7.4-136-g5ea2555)
-  pixman (pixman-0.11.8-17-gf9d3f37)
+  pixman (pixman-0.11.10-8-g7180230)
 
 ***** NOTE FOR VISUAL C++ 6.0 *****
 
 VC6 is not supported.  Please upgrade to VC8.
 
 ==== Patches ====
 
 Some specific things:
--- a/gfx/cairo/libpixman/src/Makefile.in
+++ b/gfx/cairo/libpixman/src/Makefile.in
@@ -76,16 +76,21 @@ MMX_CFLAGS=-mmmx -Winline
 # See bug 410509 why we can't use SSE2 yet on linux
 #USE_SSE2=1
 #MMX_CFLAGS+=-msse -msse2
 ifneq ($(MOZ_WIDGET_TOOLKIT),os2)
 MMX_CFLAGS+=--param inline-unit-growth=10000 --param large-function-growth=10000
 endif
 endif
 endif
+ifeq (arm,$(findstring arm,$(OS_TEST)))
+ifdef HAVE_ARM_SIMD
+USE_ARM=1
+endif
+endif
 endif
 
 
 CSRCS	= \
 	pixman-access.c \
 	pixman-access-accessors.c \
 	pixman-combine32.c \
 	pixman-combine64.c \
@@ -115,16 +120,21 @@ CSRCS += pixman-sse2.c
 DEFINES += -DUSE_SSE -DUSE_SSE2
 endif
 
 ifdef USE_VMX
 CSRCS += pixman-vmx.c
 DEFINES += -DUSE_VMX
 endif
 
+ifdef USE_ARM
+CSRCS += pixman-arm.c
+DEFINE += -DUSE_ARM
+endif
+
 EXPORTS		= pixman.h pixman-version.h
 
 LOCAL_INCLUDES	+= -I$(srcdir) -I$(srcdir)/../../cairo/src
 
 FORCE_STATIC_LIB = 1
 # This library is used by other shared libs in a static build
 FORCE_USE_PIC = 1
 
new file mode 100644
--- /dev/null
+++ b/gfx/cairo/libpixman/src/pixman-arm.c
@@ -0,0 +1,409 @@
+/*
+ * Copyright © 2008 Mozilla Corporation
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of Mozilla Corporation not be used in
+ * advertising or publicity pertaining to distribution of the software without
+ * specific, written prior permission.  Mozilla Corporation makes no
+ * representations about the suitability of this software for any purpose.  It
+ * is provided "as is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
+ * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
+ * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ *
+ * Author:  Jeff Muizelaar (jeff@infidigm.net)
+ *
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include "pixman-arm.h"
+
+void
+fbCompositeSrcAdd_8000x8000arm (pixman_op_t op,
+				pixman_image_t * pSrc,
+				pixman_image_t * pMask,
+				pixman_image_t * pDst,
+				int16_t      xSrc,
+				int16_t      ySrc,
+				int16_t      xMask,
+				int16_t      yMask,
+				int16_t      xDst,
+				int16_t      yDst,
+				uint16_t     width,
+				uint16_t     height)
+{
+    uint8_t	*dstLine, *dst;
+    uint8_t	*srcLine, *src;
+    int	dstStride, srcStride;
+    uint16_t	w;
+    uint8_t	s, d;
+
+    fbComposeGetStart (pSrc, xSrc, ySrc, uint8_t, srcStride, srcLine, 1);
+    fbComposeGetStart (pDst, xDst, yDst, uint8_t, dstStride, dstLine, 1);
+
+    while (height--)
+    {
+	dst = dstLine;
+	dstLine += dstStride;
+	src = srcLine;
+	srcLine += srcStride;
+	w = width;
+
+	while (w && (unsigned long)dst & 3)
+	{
+	    s = *src;
+	    d = *dst;
+	    asm("uqadd8 %0, %1, %2" : "+r"(d) : "r"(s));
+	    *dst = d;
+
+	    dst++;
+	    src++;
+	    w--;
+	}
+
+	while (w >= 4)
+	{
+	    asm("uqadd8 %0, %1, %2" : "=r"(*(uint32_t*)dst) : "r"(*(uint32_t*)src), "r"(*(uint32_t*)dst));
+	    dst += 4;
+	    src += 4;
+	    w -= 4;
+	}
+
+	while (w)
+	{
+	    s = *src;
+	    d = *dst;
+	    asm("uqadd8 %0, %1, %2" : "+r"(d) : "r"(s));
+	    *dst = d;
+
+	    dst++;
+	    src++;
+	    w--;
+	}
+    }
+
+}
+
+void
+fbCompositeSrc_8888x8888arm (pixman_op_t op,
+			 pixman_image_t * pSrc,
+			 pixman_image_t * pMask,
+			 pixman_image_t * pDst,
+			 int16_t      xSrc,
+			 int16_t      ySrc,
+			 int16_t      xMask,
+			 int16_t      yMask,
+			 int16_t      xDst,
+			 int16_t      yDst,
+			 uint16_t     width,
+			 uint16_t     height)
+{
+    uint32_t	*dstLine, *dst;
+    uint32_t	*srcLine, *src;
+    int	dstStride, srcStride;
+    uint16_t	w;
+    uint32_t component_half = 0x800080;
+    uint32_t upper_component_mask = 0xff00ff00;
+    uint32_t alpha_mask = 0xff;
+
+    fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1);
+    fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1);
+
+    while (height--)
+    {
+	dst = dstLine;
+	dstLine += dstStride;
+	src = srcLine;
+	srcLine += srcStride;
+	w = width;
+
+//#define inner_branch
+	asm volatile (
+			"cmp %[w], #0\n\t"
+			"beq 2f\n\t"
+			"1:\n\t"
+			/* load dest */
+			"ldr r5, [%[src]], #4\n\t"
+#ifdef inner_branch
+			/* We can avoid doing the multiplication in two cases: 0x0 or 0xff.
+			 * The 0x0 case also allows us to avoid doing an unecessary data
+			 * write which is more valuable so we only check for that */
+			"cmp r5, #0x1000000\n\t"
+			"blt 3f\n\t"
+
+			/* = 255 - alpha */
+			"sub r8, %[alpha_mask], r5, lsr #24\n\t"
+
+			"ldr r4, [%[dest]] \n\t"
+
+#else
+			"ldr r4, [%[dest]] \n\t"
+
+			/* = 255 - alpha */
+			"sub r8, %[alpha_mask], r5, lsr #24\n\t"
+#endif
+			"uxtb16 r6, r4\n\t"
+			"uxtb16 r7, r4, ror #8\n\t"
+
+			/* multiply by 257 and divide by 65536 */
+			"mla r6, r6, r8, %[component_half]\n\t"
+			"mla r7, r7, r8, %[component_half]\n\t"
+
+			"uxtab16 r6, r6, r6, ror #8\n\t"
+			"uxtab16 r7, r7, r7, ror #8\n\t"
+
+			/* recombine the 0xff00ff00 bytes of r6 and r7 */
+			"and r7, %[upper_component_mask]\n\t"
+			"uxtab16 r6, r7, r6, ror #8\n\t"
+
+			"uqadd8 r5, r6, r5\n\t"
+
+#ifdef inner_branch
+			"3:\n\t"
+
+#endif
+			"str r5, [%[dest]], #4\n\t"
+			/* increment counter and jmp to top */
+			"subs	%[w], %[w], #1\n\t"
+			"bne	1b\n\t"
+			"2:\n\t"
+			: [w] "+r" (w), [dest] "+r" (dst), [src] "+r" (src)
+			: [component_half] "r" (component_half), [upper_component_mask] "r" (upper_component_mask),
+			  [alpha_mask] "r" (alpha_mask)
+			: "r4", "r5", "r6", "r7", "r8", "cc", "memory"
+			);
+    }
+}
+
+void
+fbCompositeSrc_8888x8x8888arm (pixman_op_t op,
+			       pixman_image_t * pSrc,
+			       pixman_image_t * pMask,
+			       pixman_image_t * pDst,
+			       int16_t	xSrc,
+			       int16_t	ySrc,
+			       int16_t      xMask,
+			       int16_t      yMask,
+			       int16_t      xDst,
+			       int16_t      yDst,
+			       uint16_t     width,
+			       uint16_t     height)
+{
+    uint32_t	*dstLine, *dst;
+    uint32_t	*srcLine, *src;
+    uint32_t	mask;
+    int	dstStride, srcStride;
+    uint16_t	w;
+    uint32_t component_half = 0x800080;
+    uint32_t alpha_mask = 0xff;
+
+    fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1);
+    fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1);
+
+    fbComposeGetSolid (pMask, mask, pDst->bits.format);
+    mask = (mask) >> 24;
+
+    while (height--)
+    {
+	dst = dstLine;
+	dstLine += dstStride;
+	src = srcLine;
+	srcLine += srcStride;
+	w = width;
+
+//#define inner_branch
+	asm volatile (
+			"cmp %[w], #0\n\t"
+			"beq 2f\n\t"
+			"1:\n\t"
+			/* load dest */
+			"ldr r5, [%[src]], #4\n\t"
+#ifdef inner_branch
+			/* We can avoid doing the multiplication in two cases: 0x0 or 0xff.
+			 * The 0x0 case also allows us to avoid doing an unecessary data
+			 * write which is more valuable so we only check for that */
+			"cmp r5, #0x1000000\n\t"
+			"blt 3f\n\t"
+
+#endif
+			"ldr r4, [%[dest]] \n\t"
+
+			"uxtb16 r6, r5\n\t"
+			"uxtb16 r7, r5, ror #8\n\t"
+
+			/* multiply by alpha (r8) then by 257 and divide by 65536 */
+			"mla r6, r6, %[mask_alpha], %[component_half]\n\t"
+			"mla r7, r7, %[mask_alpha], %[component_half]\n\t"
+
+			"uxtab16 r6, r6, r6, ror #8\n\t"
+			"uxtab16 r7, r7, r7, ror #8\n\t"
+
+			"uxtb16 r6, r6, ror #8\n\t"
+			"uxtb16 r7, r7, ror #8\n\t"
+
+			/* recombine */
+			"orr r5, r6, r7, lsl #8\n\t"
+
+			"uxtb16 r6, r4\n\t"
+			"uxtb16 r7, r4, ror #8\n\t"
+
+			/* 255 - alpha */
+			"sub r8, %[alpha_mask], r5, lsr #24\n\t"
+
+			/* multiply by alpha (r8) then by 257 and divide by 65536 */
+			"mla r6, r6, r8, %[component_half]\n\t"
+			"mla r7, r7, r8, %[component_half]\n\t"
+
+			"uxtab16 r6, r6, r6, ror #8\n\t"
+			"uxtab16 r7, r7, r7, ror #8\n\t"
+
+			"uxtb16 r6, r6, ror #8\n\t"
+			"uxtb16 r7, r7, ror #8\n\t"
+
+			/* recombine */
+			"orr r6, r6, r7, lsl #8\n\t"
+
+			"uqadd8 r5, r6, r5\n\t"
+
+#ifdef inner_branch
+			"3:\n\t"
+
+#endif
+			"str r5, [%[dest]], #4\n\t"
+			/* increment counter and jmp to top */
+			"subs	%[w], %[w], #1\n\t"
+			"bne	1b\n\t"
+			"2:\n\t"
+			: [w] "+r" (w), [dest] "+r" (dst), [src] "+r" (src)
+			: [component_half] "r" (component_half), [mask_alpha] "r" (mask),
+			  [alpha_mask] "r" (alpha_mask)
+			: "r4", "r5", "r6", "r7", "r8", "r9", "cc", "memory"
+			);
+    }
+}
+
+void
+fbCompositeSolidMask_nx8x8888arm (pixman_op_t      op,
+			       pixman_image_t * pSrc,
+			       pixman_image_t * pMask,
+			       pixman_image_t * pDst,
+			       int16_t      xSrc,
+			       int16_t      ySrc,
+			       int16_t      xMask,
+			       int16_t      yMask,
+			       int16_t      xDst,
+			       int16_t      yDst,
+			       uint16_t     width,
+			       uint16_t     height)
+{
+    uint32_t	 src, srca;
+    uint32_t	*dstLine, *dst;
+    uint8_t	*maskLine, *mask;
+    int		 dstStride, maskStride;
+    uint16_t	 w;
+
+    fbComposeGetSolid(pSrc, src, pDst->bits.format);
+
+    srca = src >> 24;
+    if (src == 0)
+	return;
+
+    uint32_t component_mask = 0xff00ff;
+    uint32_t component_half = 0x800080;
+
+    uint32_t src_hi = (src >> 8) & component_mask;
+    uint32_t src_lo = src & component_mask;
+
+    fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1);
+    fbComposeGetStart (pMask, xMask, yMask, uint8_t, maskStride, maskLine, 1);
+
+    while (height--)
+    {
+	dst = dstLine;
+	dstLine += dstStride;
+	mask = maskLine;
+	maskLine += maskStride;
+	w = width;
+
+//#define inner_branch
+	asm volatile (
+			"cmp %[w], #0\n\t"
+			"beq 2f\n\t"
+			"1:\n\t"
+			/* load mask */
+			"ldrb r5, [%[mask]], #1\n\t"
+#ifdef inner_branch
+			/* We can avoid doing the multiplication in two cases: 0x0 or 0xff.
+			 * The 0x0 case also allows us to avoid doing an unecessary data
+			 * write which is more valuable so we only check for that */
+			/* 0x1000000 is the least value that contains alpha all values
+			 * less than it have a 0 alpha value */
+			"cmp r5, #0x0\n\t"
+			"beq 3f\n\t"
+
+#endif
+			"ldr r4, [%[dest]] \n\t"
+
+			/* multiply by alpha (r8) then by 257 and divide by 65536 */
+			"mla r6, %[src_lo], r5, %[component_half]\n\t"
+			"mla r7, %[src_hi], r5, %[component_half]\n\t"
+
+			"uxtab16 r6, r6, r6, ror #8\n\t"
+			"uxtab16 r7, r7, r7, ror #8\n\t"
+
+			"uxtb16 r6, r6, ror #8\n\t"
+			"uxtb16 r7, r7, ror #8\n\t"
+
+			/* recombine */
+			"orr r5, r6, r7, lsl #8\n\t"
+
+			"uxtb16 r6, r4\n\t"
+			"uxtb16 r7, r4, ror #8\n\t"
+
+			/* we could simplify this to use 'sub' if we were
+			 * willing to give up a register for alpha_mask */
+			"mvn r8, r5\n\t"
+			"mov r8, r8, lsr #24\n\t"
+
+			/* multiply by alpha (r8) then by 257 and divide by 65536 */
+			"mla r6, r6, r8, %[component_half]\n\t"
+			"mla r7, r7, r8, %[component_half]\n\t"
+
+			"uxtab16 r6, r6, r6, ror #8\n\t"
+			"uxtab16 r7, r7, r7, ror #8\n\t"
+
+			"uxtb16 r6, r6, ror #8\n\t"
+			"uxtb16 r7, r7, ror #8\n\t"
+
+			/* recombine */
+			"orr r6, r6, r7, lsl #8\n\t"
+
+			"uqadd8 r5, r6, r5\n\t"
+
+#ifdef inner_branch
+			"3:\n\t"
+
+#endif
+			"str r5, [%[dest]], #4\n\t"
+			/* increment counter and jmp to top */
+			"subs	%[w], %[w], #1\n\t"
+			"bne	1b\n\t"
+			"2:\n\t"
+			: [w] "+r" (w), [dest] "+r" (dst), [src] "+r" (src), [mask] "+r" (mask)
+			: [component_half] "r" (component_half),
+			  [src_hi] "r" (src_hi), [src_lo] "r" (src_lo)
+			: "r4", "r5", "r6", "r7", "r8", "cc", "memory"
+			);
+    }
+}
new file mode 100644
--- /dev/null
+++ b/gfx/cairo/libpixman/src/pixman-arm.h
@@ -0,0 +1,94 @@
+/*
+ * Copyright © 2008 Mozilla Corporation
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of Mozilla Corporation not be used in
+ * advertising or publicity pertaining to distribution of the software without
+ * specific, written prior permission.  Mozilla Corporation makes no
+ * representations about the suitability of this software for any purpose.  It
+ * is provided "as is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
+ * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
+ * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ *
+ * Author:  Jeff Muizelaar (jeff@infidigm.net)
+ *
+ */
+
+#include "pixman-private.h"
+
+#ifdef USE_ARM
+
+static inline pixman_bool_t pixman_have_arm(void) { return TRUE; }
+
+#else
+#define pixman_have_arm() FALSE
+#endif
+
+#ifdef USE_ARM
+
+void
+fbCompositeSrcAdd_8000x8000arm (pixman_op_t op,
+				pixman_image_t * pSrc,
+				pixman_image_t * pMask,
+				pixman_image_t * pDst,
+				int16_t      xSrc,
+				int16_t      ySrc,
+				int16_t      xMask,
+				int16_t      yMask,
+				int16_t      xDst,
+				int16_t      yDst,
+				uint16_t     width,
+				uint16_t     height);
+void
+fbCompositeSrc_8888x8888arm (pixman_op_t op,
+			 pixman_image_t * pSrc,
+			 pixman_image_t * pMask,
+			 pixman_image_t * pDst,
+			 int16_t      xSrc,
+			 int16_t      ySrc,
+			 int16_t      xMask,
+			 int16_t      yMask,
+			 int16_t      xDst,
+			 int16_t      yDst,
+			 uint16_t     width,
+			 uint16_t     height);
+
+void
+fbCompositeSrc_8888x8x8888arm (pixman_op_t op,
+			 pixman_image_t * pSrc,
+			 pixman_image_t * pMask,
+			 pixman_image_t * pDst,
+			 int16_t      xSrc,
+			 int16_t      ySrc,
+			 int16_t      xMask,
+			 int16_t      yMask,
+			 int16_t      xDst,
+			 int16_t      yDst,
+			 uint16_t     width,
+			 uint16_t     height);
+void
+fbCompositeSolidMask_nx8x8888arm (pixman_op_t op,
+			 pixman_image_t * pSrc,
+			 pixman_image_t * pMask,
+			 pixman_image_t * pDst,
+			 int16_t      xSrc,
+			 int16_t      ySrc,
+			 int16_t      xMask,
+			 int16_t      yMask,
+			 int16_t      xDst,
+			 int16_t      yDst,
+			 uint16_t     width,
+			 uint16_t     height);
+
+
+#endif /* USE_ARM */
--- a/gfx/cairo/libpixman/src/pixman-image.c
+++ b/gfx/cairo/libpixman/src/pixman-image.c
@@ -813,12 +813,16 @@ pixman_image_is_opaque(pixman_image_t *i
 
     if (image->common.repeat == PIXMAN_REPEAT_NONE)
     {
         if (image->common.filter != PIXMAN_FILTER_NEAREST)
             return FALSE;
 
         if (image->common.transform)
             return FALSE;
+
+	/* Gradients do not necessarily cover the entire compositing area */
+	if (image->type == LINEAR || image->type == CONICAL || image->type == RADIAL)
+	    return FALSE;
     }
 
      return TRUE;
 }
--- a/gfx/cairo/libpixman/src/pixman-pict.c
+++ b/gfx/cairo/libpixman/src/pixman-pict.c
@@ -29,16 +29,17 @@
 
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include "pixman-private.h"
 #include "pixman-mmx.h"
 #include "pixman-vmx.h"
 #include "pixman-sse2.h"
+#include "pixman-arm.h"
 #include "pixman-combine32.h"
 
 #ifdef __GNUC__
 #   define inline __inline__ __attribute__ ((__always_inline__))
 #endif
 
 #define FbFullMask(n)   ((n) == 32 ? (uint32_t)-1 : ((((uint32_t) 1) << n) - 1))
 
@@ -753,16 +754,56 @@ fbCompositeSrc_8888x0565 (pixman_op_t op
 		}
 		WRITE(pDst, dst, cvt8888to0565(d));
 	    }
 	    dst++;
 	}
     }
 }
 
+
+void
+fbCompositeSrc_x888x0565 (pixman_op_t op,
+                          pixman_image_t * pSrc,
+                          pixman_image_t * pMask,
+                          pixman_image_t * pDst,
+                          int16_t      xSrc,
+                          int16_t      ySrc,
+                          int16_t      xMask,
+                          int16_t      yMask,
+                          int16_t      xDst,
+                          int16_t      yDst,
+                          uint16_t     width,
+                          uint16_t     height)
+{
+    uint16_t	*dstLine, *dst;
+    uint32_t	*srcLine, *src, s;
+    int	dstStride, srcStride;
+    uint16_t	w;
+
+    fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1);
+    fbComposeGetStart (pDst, xDst, yDst, uint16_t, dstStride, dstLine, 1);
+
+    while (height--)
+    {
+	dst = dstLine;
+	dstLine += dstStride;
+	src = srcLine;
+	srcLine += srcStride;
+	w = width;
+
+	while (w--)
+	{
+	    s = READ(pSrc, src++);
+	    WRITE(pDst, dst, cvt8888to0565(s));
+	    dst++;
+	}
+    }
+}
+
 void
 fbCompositeSrcAdd_8000x8000 (pixman_op_t	op,
 			     pixman_image_t * pSrc,
 			     pixman_image_t * pMask,
 			     pixman_image_t * pDst,
 			     int16_t      xSrc,
 			     int16_t      ySrc,
 			     int16_t      xMask,
@@ -1474,16 +1515,36 @@ static const FastPathInfo sse2_fast_path
 
 #ifdef USE_VMX
 static const FastPathInfo vmx_fast_paths[] =
 {
     { PIXMAN_OP_NONE },
 };
 #endif
 
+#ifdef USE_ARM
+static const FastPathInfo arm_fast_paths[] =
+{
+    { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null,     PIXMAN_a8r8g8b8, fbCompositeSrc_8888x8888arm,      0 },
+    { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null,	PIXMAN_x8r8g8b8, fbCompositeSrc_8888x8888arm,	   0 },
+    { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null,	PIXMAN_a8b8g8r8, fbCompositeSrc_8888x8888arm,	   0 },
+    { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null,	PIXMAN_x8b8g8r8, fbCompositeSrc_8888x8888arm,	   0 },
+    { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8,       PIXMAN_a8r8g8b8, fbCompositeSrc_8888x8x8888arm,    NEED_SOLID_MASK },
+    { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8,       PIXMAN_x8r8g8b8, fbCompositeSrc_8888x8x8888arm,	   NEED_SOLID_MASK },
+
+    { PIXMAN_OP_ADD, PIXMAN_a8,        PIXMAN_null,     PIXMAN_a8,       fbCompositeSrcAdd_8000x8000arm,   0 },
+
+    { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_a8r8g8b8, fbCompositeSolidMask_nx8x8888arm,     0 },
+    { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_x8r8g8b8, fbCompositeSolidMask_nx8x8888arm,     0 },
+    { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_a8b8g8r8, fbCompositeSolidMask_nx8x8888arm,     0 },
+    { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_x8b8g8r8, fbCompositeSolidMask_nx8x8888arm,     0 },
+
+    { PIXMAN_OP_NONE },
+};
+#endif
 
 static const FastPathInfo c_fast_paths[] =
 {
     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_r5g6b5,   fbCompositeSolidMask_nx8x0565, 0 },
     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_b5g6r5,   fbCompositeSolidMask_nx8x0565, 0 },
     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_r8g8b8,   fbCompositeSolidMask_nx8x0888, 0 },
     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_b8g8r8,   fbCompositeSolidMask_nx8x0888, 0 },
     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_a8r8g8b8, fbCompositeSolidMask_nx8x8888, 0 },
@@ -1542,16 +1603,20 @@ static const FastPathInfo c_fast_paths[]
     /* FIXME */
     { PIXMAN_OP_SRC, PIXMAN_a8r8g8b8,  PIXMAN_null,	PIXMAN_a8r8g8b8, fbCompositeSrcSrc_nxn, 0 },
     { PIXMAN_OP_SRC, PIXMAN_a8b8g8r8,  PIXMAN_null,	PIXMAN_a8b8g8r8, fbCompositeSrcSrc_nxn, 0 },
     { PIXMAN_OP_SRC, PIXMAN_x8r8g8b8,  PIXMAN_null,	PIXMAN_x8r8g8b8, fbCompositeSrcSrc_nxn, 0 },
     { PIXMAN_OP_SRC, PIXMAN_x8b8g8r8,  PIXMAN_null,	PIXMAN_x8b8g8r8, fbCompositeSrcSrc_nxn, 0 },
     { PIXMAN_OP_SRC, PIXMAN_r5g6b5,    PIXMAN_null,     PIXMAN_r5g6b5,   fbCompositeSrcSrc_nxn, 0 },
     { PIXMAN_OP_SRC, PIXMAN_b5g6r5,    PIXMAN_null,     PIXMAN_b5g6r5,   fbCompositeSrcSrc_nxn, 0 },
 #endif
+    { PIXMAN_OP_SRC, PIXMAN_a8r8g8b8,  PIXMAN_null,     PIXMAN_r5g6b5,   fbCompositeSrc_x888x0565, 0 },
+    { PIXMAN_OP_SRC, PIXMAN_x8r8g8b8,  PIXMAN_null,     PIXMAN_r5g6b5,   fbCompositeSrc_x888x0565, 0 },
+    { PIXMAN_OP_SRC, PIXMAN_a8b8g8r8,  PIXMAN_null,     PIXMAN_b5g6r5,   fbCompositeSrc_x888x0565, 0 },
+    { PIXMAN_OP_SRC, PIXMAN_x8b8g8r8,  PIXMAN_null,     PIXMAN_b5g6r5,   fbCompositeSrc_x888x0565, 0 },
     { PIXMAN_OP_IN,  PIXMAN_a8,        PIXMAN_null,     PIXMAN_a8,       fbCompositeSrcIn_8x8,   0 },
     { PIXMAN_OP_IN,  PIXMAN_solid,     PIXMAN_a8,	PIXMAN_a8,	 fbCompositeSolidMaskIn_nx8x8, 0 },
     { PIXMAN_OP_NONE },
 };
 
 static pixman_bool_t
 mask_is_solid (pixman_image_t *mask)
 {
@@ -1824,16 +1889,22 @@ pixman_image_composite (pixman_op_t     
 	    info = get_fast_path (mmx_fast_paths, op, pSrc, pMask, pDst, pixbuf);
 #endif
 
 #ifdef USE_VMX
 
 	if (!info && pixman_have_vmx())
 	    info = get_fast_path (vmx_fast_paths, op, pSrc, pMask, pDst, pixbuf);
 #endif
+
+#ifdef USE_ARM
+	if (!info && pixman_have_arm())
+	    info = get_fast_path (arm_fast_paths, op, pSrc, pMask, pDst, pixbuf);
+#endif
+
         if (!info)
 	    info = get_fast_path (c_fast_paths, op, pSrc, pMask, pDst, pixbuf);
 
 	if (info)
 	{
 	    func = info->func;
 
 	    if (info->src_format == PIXMAN_solid)
--- a/gfx/cairo/libpixman/src/pixman-sse2.c
+++ b/gfx/cairo/libpixman/src/pixman-sse2.c
@@ -68,52 +68,66 @@ static __m128i MaskAlpha;
 
 static __m128i Mask565r;
 static __m128i Mask565g1, Mask565g2;
 static __m128i Mask565b;
 static __m128i MaskRed;
 static __m128i MaskGreen;
 static __m128i MaskBlue;
 
+static __m128i Mask565FixRB;
+static __m128i Mask565FixG;
+
 /* -------------------------------------------------------------------------------------------------
  * SSE2 Inlines
  */
 static inline __m128i
 unpack_32_1x128 (uint32_t data)
 {
     return _mm_unpacklo_epi8 (_mm_cvtsi32_si128 (data), _mm_setzero_si128());
 }
 
 static inline void
 unpack_128_2x128 (__m128i data, __m128i* dataLo, __m128i* dataHi)
 {
     *dataLo = _mm_unpacklo_epi8 (data, _mm_setzero_si128 ());
     *dataHi = _mm_unpackhi_epi8 (data, _mm_setzero_si128 ());
 }
 
+static inline __m128i
+unpack565to8888 (__m128i lo)
+{
+    __m128i r, g, b, rb, t;
+    
+    r = _mm_and_si128 (_mm_slli_epi32 (lo, 8), MaskRed);
+    g = _mm_and_si128 (_mm_slli_epi32 (lo, 5), MaskGreen);
+    b = _mm_and_si128 (_mm_slli_epi32 (lo, 3), MaskBlue);
+
+    rb = _mm_or_si128 (r, b);
+    t  = _mm_and_si128 (rb, Mask565FixRB);
+    t  = _mm_srli_epi32 (t, 5);
+    rb = _mm_or_si128 (rb, t);
+
+    t  = _mm_and_si128 (g, Mask565FixG);
+    t  = _mm_srli_epi32 (t, 6);
+    g  = _mm_or_si128 (g, t);
+    
+    return _mm_or_si128 (rb, g);
+}
+
 static inline void
 unpack565_128_4x128 (__m128i data, __m128i* data0, __m128i* data1, __m128i* data2, __m128i* data3)
 {
     __m128i lo, hi;
-    __m128i r, g, b;
 
     lo = _mm_unpacklo_epi16 (data, _mm_setzero_si128 ());
     hi = _mm_unpackhi_epi16 (data, _mm_setzero_si128 ());
 
-    r = _mm_and_si128 (_mm_slli_epi32 (lo, 8), MaskRed);
-    g = _mm_and_si128 (_mm_slli_epi32 (lo, 5), MaskGreen);
-    b = _mm_and_si128 (_mm_slli_epi32 (lo, 3), MaskBlue);
-
-    lo = _mm_or_si128 (_mm_or_si128 (r, g), b);
-
-    r = _mm_and_si128 (_mm_slli_epi32 (hi, 8), MaskRed);
-    g = _mm_and_si128 (_mm_slli_epi32 (hi, 5), MaskGreen);
-    b = _mm_and_si128 (_mm_slli_epi32 (hi, 3), MaskBlue);
-
-    hi = _mm_or_si128 (_mm_or_si128 (r, g), b);
+    lo = unpack565to8888 (lo);
+    hi = unpack565to8888 (hi);
 
     unpack_128_2x128 (lo, data0, data1);
     unpack_128_2x128 (hi, data2, data3);
 }
 
 static inline uint16_t
 pack565_32_16 (uint32_t pixel)
 {
@@ -239,19 +253,21 @@ invertColors_2x128 (__m128i dataLo, __m1
     hi = _mm_shufflelo_epi16 (dataHi, _MM_SHUFFLE(3, 0, 1, 2));
     *invLo = _mm_shufflehi_epi16 (lo, _MM_SHUFFLE(3, 0, 1, 2));
     *invHi = _mm_shufflehi_epi16 (hi, _MM_SHUFFLE(3, 0, 1, 2));
 }
 
 static inline void
 over_2x128 (__m128i* srcLo, __m128i* srcHi, __m128i* alphaLo, __m128i* alphaHi, __m128i* dstLo, __m128i* dstHi)
 {
-    negate_2x128 (*alphaLo, *alphaHi, alphaLo, alphaHi);
-
-    pixMultiply_2x128 (dstLo, dstHi, alphaLo, alphaHi, dstLo, dstHi);
+    __m128i t1, t2;
+
+    negate_2x128 (*alphaLo, *alphaHi, &t1, &t2);
+
+    pixMultiply_2x128 (dstLo, dstHi, &t1, &t2, dstLo, dstHi);
 
     *dstLo = _mm_adds_epu8 (*srcLo, *dstLo);
     *dstHi = _mm_adds_epu8 (*srcHi, *dstHi);
 }
 
 static inline void
 overRevNonPre_2x128 (__m128i srcLo, __m128i srcHi, __m128i* dstLo, __m128i* dstHi)
 {
@@ -2290,17 +2306,18 @@ fbComposeSetupSSE2(void)
         /* SSE2 constants */
         Mask565r  = createMask_2x32_128 (0x00f80000, 0x00f80000);
         Mask565g1 = createMask_2x32_128 (0x00070000, 0x00070000);
         Mask565g2 = createMask_2x32_128 (0x000000e0, 0x000000e0);
         Mask565b  = createMask_2x32_128 (0x0000001f, 0x0000001f);
         MaskRed   = createMask_2x32_128 (0x00f80000, 0x00f80000);
         MaskGreen = createMask_2x32_128 (0x0000fc00, 0x0000fc00);
         MaskBlue  = createMask_2x32_128 (0x000000f8, 0x000000f8);
-
+	Mask565FixRB = createMask_2x32_128 (0x00e000e0, 0x00e000e0);
+	Mask565FixG = createMask_2x32_128  (0x0000c000, 0x0000c000);
         Mask0080 = createMask_16_128 (0x0080);
         Mask00ff = createMask_16_128 (0x00ff);
         Mask0101 = createMask_16_128 (0x0101);
         Maskffff = createMask_16_128 (0xffff);
         Maskff000000 = createMask_2x32_128 (0xff000000, 0xff000000);
         MaskAlpha = createMask_2x32_128 (0x00ff0000, 0x00000000);
 
         /* MMX constants */
@@ -2477,39 +2494,39 @@ fbCompositeSolid_nx0565sse2 (pixman_op_t
         cachePrefetch ((__m128i*)dst);
 
         dstLine += dstStride;
         w = width;
 
         while (w && (unsigned long)dst & 15)
         {
             d = *dst;
+
             *dst++ = pack565_32_16 (pack_1x64_32 (over_1x64 (_mm_movepi64_pi64 (xmmSrc),
                                                              _mm_movepi64_pi64 (xmmAlpha),
                                                              expand565_16_1x64 (d))));
             w--;
         }
 
         /* call prefetch hint to optimize cache load*/
         cachePrefetch ((__m128i*)dst);
 
         while (w >= 8)
         {
             /* fill cache line with next memory */
             cachePrefetchNext ((__m128i*)dst);
 
-            xmmDst = load128Aligned ((__m128i*)dst);
-
-            unpack565_128_4x128 (xmmDst, &xmmDst0, &xmmDst1, &xmmDst2, &xmmDst3);
-
+	    xmmDst = load128Aligned ((__m128i*)dst);
+	    
+	    unpack565_128_4x128 (xmmDst, &xmmDst0, &xmmDst1, &xmmDst2, &xmmDst3);
+	    
             over_2x128 (&xmmSrc, &xmmSrc, &xmmAlpha, &xmmAlpha, &xmmDst0, &xmmDst1);
             over_2x128 (&xmmSrc, &xmmSrc, &xmmAlpha, &xmmAlpha, &xmmDst2, &xmmDst3);
 
             xmmDst = pack565_4x128_128 (&xmmDst0, &xmmDst1, &xmmDst2, &xmmDst3);
-
             save128Aligned ((__m128i*)dst, xmmDst);
 
             dst += 8;
             w -= 8;
         }
 
         while (w--)
         {
--- a/gfx/cairo/libpixman/src/pixman-transformed.c
+++ b/gfx/cairo/libpixman/src/pixman-transformed.c
@@ -588,26 +588,25 @@ ACCESS(fbFetchTransformed)(bits_image_t 
     {
         unit.vector[0] = pixman_fixed_1;
         unit.vector[1] = 0;
         unit.vector[2] = 0;
     }
 
     /* This allows filtering code to pretend that pixels are located at integer coordinates */
     adjust (&v, &unit, -(pixman_fixed_1 / 2));
-    
+
     if (pict->common.filter == PIXMAN_FILTER_NEAREST || pict->common.filter == PIXMAN_FILTER_FAST)
     {
 	/* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */
 	adjust (&v, &unit, pixman_fixed_1 / 2 - pixman_fixed_e);
 	
         if (pict->common.repeat == PIXMAN_REPEAT_NORMAL)
         {
             fbFetchTransformed_Nearest_Normal(pict, width, buffer, mask, maskBits, affine, v, unit);
-
         }
         else if (pict->common.repeat == PIXMAN_REPEAT_PAD)
         {
             fbFetchTransformed_Nearest_Pad(pict, width, buffer, mask, maskBits, affine, v, unit);
         }
         else
         {
             fbFetchTransformed_Nearest_General(pict, width, buffer, mask, maskBits, affine, v, unit);
--- a/gfx/cairo/libpixman/src/pixman-version.h
+++ b/gfx/cairo/libpixman/src/pixman-version.h
@@ -27,20 +27,20 @@
 #ifndef PIXMAN_VERSION_H__
 #define PIXMAN_VERSION_H__
 
 #ifndef PIXMAN_H__
 #  error pixman-version.h should only be included by pixman.h
 #endif
 
 #define PIXMAN_VERSION_MAJOR 0
-#define PIXMAN_VERSION_MINOR 11
-#define PIXMAN_VERSION_MICRO 9
+#define PIXMAN_VERSION_MINOR 12
+#define PIXMAN_VERSION_MICRO 0
 
-#define PIXMAN_VERSION_STRING "0.11.9"
+#define PIXMAN_VERSION_STRING "0.12.0"
 
 #define PIXMAN_VERSION_ENCODE(major, minor, micro) (	\
 	  ((major) * 10000)				\
 	+ ((minor) *   100)				\
 	+ ((micro) *     1))
 
 #define PIXMAN_VERSION PIXMAN_VERSION_ENCODE(	\
 	PIXMAN_VERSION_MAJOR,			\
--- a/gfx/cairo/libpixman/src/pixman.h
+++ b/gfx/cairo/libpixman/src/pixman.h
@@ -69,17 +69,17 @@ SOFTWARE.
 #ifndef PIXMAN_H__
 #define PIXMAN_H__
 
 #include <pixman-version.h>
 
 /*
  * Standard integers
  */
-#if defined (_SVR4) || defined (SVR4) || defined (__OpenBSD__) || defined (_sgi)
+#if defined (_SVR4) || defined (SVR4) || defined (__OpenBSD__) || defined (_sgi) || defined (__sun) || defined (sun)
 #  include <inttypes.h>
 #elif defined (_MSC_VER)
 typedef __int8 int8_t;
 typedef unsigned __int8 uint8_t;
 typedef __int16 int16_t;
 typedef unsigned __int16 uint16_t;
 typedef __int32 int32_t;
 typedef unsigned __int32 uint32_t;