b=419715, upgrade cairo to 1.6 or as-close-as-possible -- imported patch pixman-upgrade.patch ; r=me
authorvladimir@pobox.com
Sun, 06 Apr 2008 22:07:38 -0700
changeset 13978 fc5e72f68a0492b790ac1adf97c2720e5aac8115
parent 13977 649aa78bbbd48bc61388fe5e200df52cc0e8c28b
child 13979 afc8b5ed1b2d5aae0624a50bed106ec7e00faca7
push id1
push userroot
push dateTue, 26 Apr 2011 22:38:44 +0000
treeherdermozilla-beta@bfdb6e623a36 [default view] [failures only]
perfherder[talos] [build metrics] [platform microbench] (compared to previous push)
reviewersme
bugs419715
milestone1.9pre
b=419715, upgrade cairo to 1.6 or as-close-as-possible -- imported patch pixman-upgrade.patch ; r=me
gfx/cairo/README
gfx/cairo/libpixman/src/Makefile.in
gfx/cairo/libpixman/src/pixman-access-accessors.c
gfx/cairo/libpixman/src/pixman-access.c
gfx/cairo/libpixman/src/pixman-combine.c
gfx/cairo/libpixman/src/pixman-compose.c
gfx/cairo/libpixman/src/pixman-mmx.c
gfx/cairo/libpixman/src/pixman-mmx.h
gfx/cairo/libpixman/src/pixman-pict.c
gfx/cairo/libpixman/src/pixman-private.h
gfx/cairo/libpixman/src/pixman-source.c
gfx/cairo/libpixman/src/pixman-sse.c
gfx/cairo/libpixman/src/pixman-sse.h
gfx/cairo/libpixman/src/pixman-transformed-accessors.c
gfx/cairo/libpixman/src/pixman-transformed.c
gfx/cairo/libpixman/src/pixman-trap.c
gfx/cairo/libpixman/src/pixman-utils.c
gfx/cairo/libpixman/src/pixman-version.h
gfx/cairo/libpixman/src/pixman.h
--- a/gfx/cairo/README
+++ b/gfx/cairo/README
@@ -2,18 +2,18 @@ Snapshots of cairo and glitz for mozilla
 
 We only include the relevant parts of each release (generally, src/*.[ch]),
 as we have Makefile.in's that integrate into the Mozilla build system.  For
 documentation and similar, please see the official tarballs at
 http://www.cairographics.org/.
 
 VERSIONS:
 
-  cairo (1.5.x - 1.5.12-56-ga33351f)
-  pixman (0.9.x - pixman-0.9.6-43-gddfb69a)
+  cairo (1.6.x - 1.5.12-56-ga33351f)
+  pixman (0.10.x - pixman-0.10.0-5-g4cde088)
   glitz 0.5.2 (cvs - 2006-01-10)
 
 ***** NOTE FOR VISUAL C++ 6.0 *****
 
 VC6 is not supported.  Please upgrade to VC8.
 
 ==== Patches ====
 
--- a/gfx/cairo/libpixman/src/Makefile.in
+++ b/gfx/cairo/libpixman/src/Makefile.in
@@ -78,34 +78,40 @@ MMX_CFLAGS=-mmmx -msse -Winline
 ifneq ($(MOZ_WIDGET_TOOLKIT),os2)
 MMX_CFLAGS+=--param inline-unit-growth=10000 --param large-function-growth=10000
 endif
 endif
 endif
 
 
 CSRCS	= \
+	pixman-access.c \
+	pixman-access-accessors.c \
+	pixman-combine.c \
 	pixman-compose.c \
 	pixman-compose-accessors.c \
 	pixman-compute-region.c \
 	pixman-edge.c \
 	pixman-edge-accessors.c \
 	pixman-image.c \
 	pixman-pict.c \
 	pixman-region.c \
+	pixman-source.c \
+	pixman-transformed.c \
+	pixman-transformed-accessors.c \
 	pixman-trap.c \
 	pixman-utils.c \
 	$(NULL)
 
 ifdef USE_MMX
 CSRCS += pixman-mmx.c
 DEFINES += -DUSE_MMX
 endif
 
-EXPORTS		= pixman.h pixman-remap.h
+EXPORTS		= pixman.h pixman-remap.h pixman-version.h
 
 LOCAL_INCLUDES	+= -I$(srcdir) -I$(srcdir)/../../cairo/src
 
 FORCE_STATIC_LIB = 1
 # This library is used by other shared libs in a static build
 FORCE_USE_PIC = 1
 
 include $(topsrcdir)/config/rules.mk
new file mode 100644
--- /dev/null
+++ b/gfx/cairo/libpixman/src/pixman-access-accessors.c
@@ -0,0 +1,3 @@
+#define PIXMAN_FB_ACCESSORS
+
+#include "pixman-access.c"
new file mode 100644
--- /dev/null
+++ b/gfx/cairo/libpixman/src/pixman-access.c
@@ -0,0 +1,1686 @@
+/*
+ *
+ * Copyright © 2000 Keith Packard, member of The XFree86 Project, Inc.
+ *             2005 Lars Knoll & Zack Rusin, Trolltech
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of Keith Packard not be used in
+ * advertising or publicity pertaining to distribution of the software without
+ * specific, written prior permission.  Keith Packard makes no
+ * representations about the suitability of this software for any purpose.  It
+ * is provided "as is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
+ * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
+ * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <string.h>
+
+#include "pixman-private.h"
+
+#ifdef PIXMAN_FB_ACCESSORS
+#define FETCH_PROC_FOR_PICTURE pixman_fetchProcForPicture_accessors
+#define FETCH_PIXEL_PROC_FOR_PICTURE pixman_fetchPixelProcForPicture_accessors
+#define STORE_PROC_FOR_PICTURE pixman_storeProcForPicture_accessors
+#else
+#define FETCH_PROC_FOR_PICTURE pixman_fetchProcForPicture
+#define FETCH_PIXEL_PROC_FOR_PICTURE pixman_fetchPixelProcForPicture
+#define STORE_PROC_FOR_PICTURE pixman_storeProcForPicture
+#endif
+
+/*
+ * YV12 setup and access macros
+ */
+
+#define YV12_SETUP(pict) \
+	uint32_t *bits = pict->bits; \
+	int stride = pict->rowstride; \
+	int offset0 = stride < 0 ? \
+		((-stride) >> 1) * ((pict->height - 1) >> 1) - stride : \
+		stride * pict->height; \
+	int offset1 = stride < 0 ? \
+		offset0 + ((-stride) >> 1) * ((pict->height) >> 1) : \
+		offset0 + (offset0 >> 2)
+/* Note n trailing semicolon on the above macro; if it's there, then
+ * the typical usage of YV12_SETUP(pict); will have an extra trailing ;
+ * that some compilers will interpret as a statement -- and then any further
+ * variable declarations will cause an error.
+ */
+
+#define YV12_Y(line)		\
+    ((uint8_t *) ((bits) + (stride) * (line)))
+
+#define YV12_U(line)	      \
+    ((uint8_t *) ((bits) + offset1 + \
+		((stride) >> 1) * ((line) >> 1)))
+
+#define YV12_V(line)	      \
+    ((uint8_t *) ((bits) + offset0 + \
+		((stride) >> 1) * ((line) >> 1)))
+
+/*********************************** Fetch ************************************/
+
+static FASTCALL void
+fbFetch_a8r8g8b8 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
+{
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
+    MEMCPY_WRAPPED(pict,
+                   buffer, (const uint32_t *)bits + x,
+		   width*sizeof(uint32_t));
+}
+
+static FASTCALL void
+fbFetch_x8r8g8b8 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
+{
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
+    const uint32_t *pixel = (const uint32_t *)bits + x;
+    const uint32_t *end = pixel + width;
+    while (pixel < end) {
+	*buffer++ = READ(pict, pixel++) | 0xff000000;
+    }
+}
+
+static FASTCALL void
+fbFetch_a8b8g8r8 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
+{
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
+    const uint32_t *pixel = (uint32_t *)bits + x;
+    const uint32_t *end = pixel + width;
+    while (pixel < end) {
+	uint32_t p = READ(pict, pixel++);
+	*buffer++ = (p & 0xff00ff00) |
+	            ((p >> 16) & 0xff) |
+	    ((p & 0xff) << 16);
+    }
+}
+
+static FASTCALL void
+fbFetch_x8b8g8r8 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
+{
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
+    const uint32_t *pixel = (uint32_t *)bits + x;
+    const uint32_t *end = pixel + width;
+    while (pixel < end) {
+	uint32_t p = READ(pict, pixel++);
+	*buffer++ = 0xff000000 |
+	    (p & 0x0000ff00) |
+	    ((p >> 16) & 0xff) |
+	    ((p & 0xff) << 16);
+    }
+}
+
+static FASTCALL void
+fbFetch_r8g8b8 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
+{
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
+    const uint8_t *pixel = (const uint8_t *)bits + 3*x;
+    const uint8_t *end = pixel + 3*width;
+    while (pixel < end) {
+	uint32_t b = Fetch24(pict, pixel) | 0xff000000;
+	pixel += 3;
+	*buffer++ = b;
+    }
+}
+
+static FASTCALL void
+fbFetch_b8g8r8 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
+{
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
+    const uint8_t *pixel = (const uint8_t *)bits + 3*x;
+    const uint8_t *end = pixel + 3*width;
+    while (pixel < end) {
+	uint32_t b = 0xff000000;
+#if IMAGE_BYTE_ORDER == MSBFirst
+	b |= (READ(pict, pixel++));
+	b |= (READ(pict, pixel++) << 8);
+	b |= (READ(pict, pixel++) << 16);
+#else
+	b |= (READ(pict, pixel++) << 16);
+	b |= (READ(pict, pixel++) << 8);
+	b |= (READ(pict, pixel++));
+#endif
+	*buffer++ = b;
+    }
+}
+
+static FASTCALL void
+fbFetch_r5g6b5 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
+{
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
+    const uint16_t *pixel = (const uint16_t *)bits + x;
+    const uint16_t *end = pixel + width;
+    while (pixel < end) {
+	uint32_t p = READ(pict, pixel++);
+	uint32_t r = (((p) << 3) & 0xf8) |
+	    (((p) << 5) & 0xfc00) |
+	    (((p) << 8) & 0xf80000);
+	r |= (r >> 5) & 0x70007;
+	r |= (r >> 6) & 0x300;
+	*buffer++ = 0xff000000 | r;
+    }
+}
+
+static FASTCALL void
+fbFetch_b5g6r5 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
+{
+    uint32_t  r,g,b;
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
+    const uint16_t *pixel = (const uint16_t *)bits + x;
+    const uint16_t *end = pixel + width;
+    while (pixel < end) {
+	uint32_t  p = READ(pict, pixel++);
+	b = ((p & 0xf800) | ((p & 0xe000) >> 5)) >> 8;
+	g = ((p & 0x07e0) | ((p & 0x0600) >> 6)) << 5;
+	r = ((p & 0x001c) | ((p & 0x001f) << 5)) << 14;
+	*buffer++ = 0xff000000 | r | g | b;
+    }
+}
+
+static FASTCALL void
+fbFetch_a1r5g5b5 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
+{
+    uint32_t  r,g,b, a;
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
+    const uint16_t *pixel = (const uint16_t *)bits + x;
+    const uint16_t *end = pixel + width;
+    while (pixel < end) {
+	uint32_t  p = READ(pict, pixel++);
+
+	a = (uint32_t) ((uint8_t) (0 - ((p & 0x8000) >> 15))) << 24;
+	r = ((p & 0x7c00) | ((p & 0x7000) >> 5)) << 9;
+	g = ((p & 0x03e0) | ((p & 0x0380) >> 5)) << 6;
+	b = ((p & 0x001c) | ((p & 0x001f) << 5)) >> 2;
+	*buffer++ = a | r | g | b;
+    }
+}
+
+static FASTCALL void
+fbFetch_x1r5g5b5 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
+{
+    uint32_t  r,g,b;
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
+    const uint16_t *pixel = (const uint16_t *)bits + x;
+    const uint16_t *end = pixel + width;
+    while (pixel < end) {
+	uint32_t  p = READ(pict, pixel++);
+
+	r = ((p & 0x7c00) | ((p & 0x7000) >> 5)) << 9;
+	g = ((p & 0x03e0) | ((p & 0x0380) >> 5)) << 6;
+	b = ((p & 0x001c) | ((p & 0x001f) << 5)) >> 2;
+	*buffer++ = 0xff000000 | r | g | b;
+    }
+}
+
+static FASTCALL void
+fbFetch_a1b5g5r5 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
+{
+    uint32_t  r,g,b, a;
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
+    const uint16_t *pixel = (const uint16_t *)bits + x;
+    const uint16_t *end = pixel + width;
+    while (pixel < end) {
+	uint32_t  p = READ(pict, pixel++);
+
+	a = (uint32_t) ((uint8_t) (0 - ((p & 0x8000) >> 15))) << 24;
+	b = ((p & 0x7c00) | ((p & 0x7000) >> 5)) >> 7;
+	g = ((p & 0x03e0) | ((p & 0x0380) >> 5)) << 6;
+	r = ((p & 0x001c) | ((p & 0x001f) << 5)) << 14;
+	*buffer++ = a | r | g | b;
+    }
+}
+
+static FASTCALL void
+fbFetch_x1b5g5r5 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
+{
+    uint32_t  r,g,b;
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
+    const uint16_t *pixel = (const uint16_t *)bits + x;
+    const uint16_t *end = pixel + width;
+    while (pixel < end) {
+	uint32_t  p = READ(pict, pixel++);
+
+	b = ((p & 0x7c00) | ((p & 0x7000) >> 5)) >> 7;
+	g = ((p & 0x03e0) | ((p & 0x0380) >> 5)) << 6;
+	r = ((p & 0x001c) | ((p & 0x001f) << 5)) << 14;
+	*buffer++ = 0xff000000 | r | g | b;
+    }
+}
+
+static FASTCALL void
+fbFetch_a4r4g4b4 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
+{
+    uint32_t  r,g,b, a;
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
+    const uint16_t *pixel = (const uint16_t *)bits + x;
+    const uint16_t *end = pixel + width;
+    while (pixel < end) {
+	uint32_t  p = READ(pict, pixel++);
+
+	a = ((p & 0xf000) | ((p & 0xf000) >> 4)) << 16;
+	r = ((p & 0x0f00) | ((p & 0x0f00) >> 4)) << 12;
+	g = ((p & 0x00f0) | ((p & 0x00f0) >> 4)) << 8;
+	b = ((p & 0x000f) | ((p & 0x000f) << 4));
+	*buffer++ = a | r | g | b;
+    }
+}
+
+static FASTCALL void
+fbFetch_x4r4g4b4 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
+{
+    uint32_t  r,g,b;
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
+    const uint16_t *pixel = (const uint16_t *)bits + x;
+    const uint16_t *end = pixel + width;
+    while (pixel < end) {
+	uint32_t  p = READ(pict, pixel++);
+
+	r = ((p & 0x0f00) | ((p & 0x0f00) >> 4)) << 12;
+	g = ((p & 0x00f0) | ((p & 0x00f0) >> 4)) << 8;
+	b = ((p & 0x000f) | ((p & 0x000f) << 4));
+	*buffer++ = 0xff000000 | r | g | b;
+    }
+}
+
+static FASTCALL void
+fbFetch_a4b4g4r4 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
+{
+    uint32_t  r,g,b, a;
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
+    const uint16_t *pixel = (const uint16_t *)bits + x;
+    const uint16_t *end = pixel + width;
+    while (pixel < end) {
+	uint32_t  p = READ(pict, pixel++);
+
+	a = ((p & 0xf000) | ((p & 0xf000) >> 4)) << 16;
+	b = ((p & 0x0f00) | ((p & 0x0f00) >> 4)) >> 4;
+	g = ((p & 0x00f0) | ((p & 0x00f0) >> 4)) << 8;
+	r = ((p & 0x000f) | ((p & 0x000f) << 4)) << 16;
+	*buffer++ = a | r | g | b;
+    }
+}
+
+static FASTCALL void
+fbFetch_x4b4g4r4 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
+{
+    uint32_t  r,g,b;
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
+    const uint16_t *pixel = (const uint16_t *)bits + x;
+    const uint16_t *end = pixel + width;
+    while (pixel < end) {
+	uint32_t  p = READ(pict, pixel++);
+
+	b = ((p & 0x0f00) | ((p & 0x0f00) >> 4)) >> 4;
+	g = ((p & 0x00f0) | ((p & 0x00f0) >> 4)) << 8;
+	r = ((p & 0x000f) | ((p & 0x000f) << 4)) << 16;
+	*buffer++ = 0xff000000 | r | g | b;
+    }
+}
+
+static FASTCALL void
+fbFetch_a8 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
+{
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
+    const uint8_t *pixel = (const uint8_t *)bits + x;
+    const uint8_t *end = pixel + width;
+    while (pixel < end) {
+	*buffer++ = READ(pict, pixel++) << 24;
+    }
+}
+
+static FASTCALL void
+fbFetch_r3g3b2 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
+{
+    uint32_t  r,g,b;
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
+    const uint8_t *pixel = (const uint8_t *)bits + x;
+    const uint8_t *end = pixel + width;
+    while (pixel < end) {
+	uint32_t  p = READ(pict, pixel++);
+
+	r = ((p & 0xe0) | ((p & 0xe0) >> 3) | ((p & 0xc0) >> 6)) << 16;
+	g = ((p & 0x1c) | ((p & 0x18) >> 3) | ((p & 0x1c) << 3)) << 8;
+	b = (((p & 0x03)     ) |
+	     ((p & 0x03) << 2) |
+	     ((p & 0x03) << 4) |
+	     ((p & 0x03) << 6));
+	*buffer++ = 0xff000000 | r | g | b;
+    }
+}
+
+static FASTCALL void
+fbFetch_b2g3r3 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
+{
+    uint32_t  r,g,b;
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
+    const uint8_t *pixel = (const uint8_t *)bits + x;
+    const uint8_t *end = pixel + width;
+    while (pixel < end) {
+	uint32_t  p = READ(pict, pixel++);
+
+	b = (((p & 0xc0)     ) |
+	     ((p & 0xc0) >> 2) |
+	     ((p & 0xc0) >> 4) |
+	     ((p & 0xc0) >> 6));
+	g = ((p & 0x38) | ((p & 0x38) >> 3) | ((p & 0x30) << 2)) << 8;
+	r = (((p & 0x07)     ) |
+	     ((p & 0x07) << 3) |
+	     ((p & 0x06) << 6)) << 16;
+	*buffer++ = 0xff000000 | r | g | b;
+    }
+}
+
+static FASTCALL void
+fbFetch_a2r2g2b2 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
+{
+    uint32_t   a,r,g,b;
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
+    const uint8_t *pixel = (const uint8_t *)bits + x;
+    const uint8_t *end = pixel + width;
+    while (pixel < end) {
+	uint32_t  p = READ(pict, pixel++);
+
+	a = ((p & 0xc0) * 0x55) << 18;
+	r = ((p & 0x30) * 0x55) << 12;
+	g = ((p & 0x0c) * 0x55) << 6;
+	b = ((p & 0x03) * 0x55);
+	*buffer++ = a|r|g|b;
+    }
+}
+
+static FASTCALL void
+fbFetch_a2b2g2r2 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
+{
+    uint32_t   a,r,g,b;
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
+    const uint8_t *pixel = (const uint8_t *)bits + x;
+    const uint8_t *end = pixel + width;
+    while (pixel < end) {
+	uint32_t  p = READ(pict, pixel++);
+
+	a = ((p & 0xc0) * 0x55) << 18;
+	b = ((p & 0x30) * 0x55) >> 6;
+	g = ((p & 0x0c) * 0x55) << 6;
+	r = ((p & 0x03) * 0x55) << 16;
+	*buffer++ = a|r|g|b;
+    }
+}
+
+static FASTCALL void
+fbFetch_c8 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
+{
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
+    const pixman_indexed_t * indexed = pict->indexed;
+    const uint8_t *pixel = (const uint8_t *)bits + x;
+    const uint8_t *end = pixel + width;
+    while (pixel < end) {
+	uint32_t  p = READ(pict, pixel++);
+	*buffer++ = indexed->rgba[p];
+    }
+}
+
+static FASTCALL void
+fbFetch_x4a4 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
+{
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
+    const uint8_t *pixel = (const uint8_t *)bits + x;
+    const uint8_t *end = pixel + width;
+    while (pixel < end) {
+	uint8_t p = READ(pict, pixel++) & 0xf;
+	*buffer++ = (p | (p << 4)) << 24;
+    }
+}
+
+#define Fetch8(img,l,o)    (READ(img, (uint8_t *)(l) + ((o) >> 2)))
+#if IMAGE_BYTE_ORDER == MSBFirst
+#define Fetch4(img,l,o)    ((o) & 2 ? Fetch8(img,l,o) & 0xf : Fetch8(img,l,o) >> 4)
+#else
+#define Fetch4(img,l,o)    ((o) & 2 ? Fetch8(img,l,o) >> 4 : Fetch8(img,l,o) & 0xf)
+#endif
+
+static FASTCALL void
+fbFetch_a4 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
+{
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
+    int i;
+    for (i = 0; i < width; ++i) {
+	uint32_t  p = Fetch4(pict, bits, i + x);
+
+	p |= p << 4;
+	*buffer++ = p << 24;
+    }
+}
+
+static FASTCALL void
+fbFetch_r1g2b1 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
+{
+    uint32_t  r,g,b;
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
+    int i;
+    for (i = 0; i < width; ++i) {
+	uint32_t  p = Fetch4(pict, bits, i + x);
+
+	r = ((p & 0x8) * 0xff) << 13;
+	g = ((p & 0x6) * 0x55) << 7;
+	b = ((p & 0x1) * 0xff);
+	*buffer++ = 0xff000000|r|g|b;
+    }
+}
+
+static FASTCALL void
+fbFetch_b1g2r1 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
+{
+    uint32_t  r,g,b;
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
+    int i;
+    for (i = 0; i < width; ++i) {
+	uint32_t  p = Fetch4(pict, bits, i + x);
+
+	b = ((p & 0x8) * 0xff) >> 3;
+	g = ((p & 0x6) * 0x55) << 7;
+	r = ((p & 0x1) * 0xff) << 16;
+	*buffer++ = 0xff000000|r|g|b;
+    }
+}
+
+static FASTCALL void
+fbFetch_a1r1g1b1 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
+{
+    uint32_t  a,r,g,b;
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
+    int i;
+    for (i = 0; i < width; ++i) {
+	uint32_t  p = Fetch4(pict, bits, i + x);
+
+	a = ((p & 0x8) * 0xff) << 21;
+	r = ((p & 0x4) * 0xff) << 14;
+	g = ((p & 0x2) * 0xff) << 7;
+	b = ((p & 0x1) * 0xff);
+	*buffer++ = a|r|g|b;
+    }
+}
+
+static FASTCALL void
+fbFetch_a1b1g1r1 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
+{
+    uint32_t  a,r,g,b;
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
+    int i;
+    for (i = 0; i < width; ++i) {
+	uint32_t  p = Fetch4(pict, bits, i + x);
+
+	a = ((p & 0x8) * 0xff) << 21;
+	r = ((p & 0x4) * 0xff) >> 3;
+	g = ((p & 0x2) * 0xff) << 7;
+	b = ((p & 0x1) * 0xff) << 16;
+	*buffer++ = a|r|g|b;
+    }
+}
+
+static FASTCALL void
+fbFetch_c4 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
+{
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
+    const pixman_indexed_t * indexed = pict->indexed;
+    int i;
+    for (i = 0; i < width; ++i) {
+	uint32_t  p = Fetch4(pict, bits, i + x);
+
+	*buffer++ = indexed->rgba[p];
+    }
+}
+
+
+static FASTCALL void
+fbFetch_a1 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
+{
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
+    int i;
+    for (i = 0; i < width; ++i) {
+	uint32_t  p = READ(pict, bits + ((i + x) >> 5));
+	uint32_t  a;
+#if BITMAP_BIT_ORDER == MSBFirst
+	a = p >> (0x1f - ((i+x) & 0x1f));
+#else
+	a = p >> ((i+x) & 0x1f);
+#endif
+	a = a & 1;
+	a |= a << 1;
+	a |= a << 2;
+	a |= a << 4;
+	*buffer++ = a << 24;
+    }
+}
+
+static FASTCALL void
+fbFetch_g1 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
+{
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
+    const pixman_indexed_t * indexed = pict->indexed;
+    int i;
+    for (i = 0; i < width; ++i) {
+	uint32_t p = READ(pict, bits + ((i+x) >> 5));
+	uint32_t a;
+#if BITMAP_BIT_ORDER == MSBFirst
+	a = p >> (0x1f - ((i+x) & 0x1f));
+#else
+	a = p >> ((i+x) & 0x1f);
+#endif
+	a = a & 1;
+	*buffer++ = indexed->rgba[a];
+    }
+}
+
+static FASTCALL void
+fbFetch_yuy2 (bits_image_t *pict, int x, int line, int width, uint32_t *buffer)
+{
+    int16_t y, u, v;
+    int32_t r, g, b;
+    int   i;
+
+    const uint32_t *bits = pict->bits + pict->rowstride * line;
+
+    for (i = 0; i < width; i++)
+    {
+	y = ((uint8_t *) bits)[(x + i) << 1] - 16;
+	u = ((uint8_t *) bits)[(((x + i) << 1) & -4) + 1] - 128;
+	v = ((uint8_t *) bits)[(((x + i) << 1) & -4) + 3] - 128;
+
+	/* R = 1.164(Y - 16) + 1.596(V - 128) */
+	r = 0x012b27 * y + 0x019a2e * v;
+	/* G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128) */
+	g = 0x012b27 * y - 0x00d0f2 * v - 0x00647e * u;
+	/* B = 1.164(Y - 16) + 2.018(U - 128) */
+	b = 0x012b27 * y + 0x0206a2 * u;
+
+    WRITE(pict, buffer++, 0xff000000 |
+	(r >= 0 ? r < 0x1000000 ? r         & 0xff0000 : 0xff0000 : 0) |
+	(g >= 0 ? g < 0x1000000 ? (g >> 8)  & 0x00ff00 : 0x00ff00 : 0) |
+	(b >= 0 ? b < 0x1000000 ? (b >> 16) & 0x0000ff : 0x0000ff : 0));
+    }
+}
+
+static FASTCALL void
+fbFetch_yv12 (bits_image_t *pict, int x, int line, int width, uint32_t *buffer)
+{
+    YV12_SETUP(pict);
+    uint8_t *pY = YV12_Y (line);
+    uint8_t *pU = YV12_U (line);
+    uint8_t *pV = YV12_V (line);
+    int16_t y, u, v;
+    int32_t r, g, b;
+    int   i;
+
+    for (i = 0; i < width; i++)
+    {
+	y = pY[x + i] - 16;
+	u = pU[(x + i) >> 1] - 128;
+	v = pV[(x + i) >> 1] - 128;
+
+	/* R = 1.164(Y - 16) + 1.596(V - 128) */
+	r = 0x012b27 * y + 0x019a2e * v;
+	/* G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128) */
+	g = 0x012b27 * y - 0x00d0f2 * v - 0x00647e * u;
+	/* B = 1.164(Y - 16) + 2.018(U - 128) */
+	b = 0x012b27 * y + 0x0206a2 * u;
+
+	WRITE(pict, buffer++, 0xff000000 |
+	    (r >= 0 ? r < 0x1000000 ? r         & 0xff0000 : 0xff0000 : 0) |
+	    (g >= 0 ? g < 0x1000000 ? (g >> 8)  & 0x00ff00 : 0x00ff00 : 0) |
+	    (b >= 0 ? b < 0x1000000 ? (b >> 16) & 0x0000ff : 0x0000ff : 0));
+    }
+}
+
+fetchProc FETCH_PROC_FOR_PICTURE (bits_image_t * pict)
+{
+    switch(pict->format) {
+    case PIXMAN_a8r8g8b8: return fbFetch_a8r8g8b8;
+    case PIXMAN_x8r8g8b8: return fbFetch_x8r8g8b8;
+    case PIXMAN_a8b8g8r8: return fbFetch_a8b8g8r8;
+    case PIXMAN_x8b8g8r8: return fbFetch_x8b8g8r8;
+
+        /* 24bpp formats */
+    case PIXMAN_r8g8b8: return fbFetch_r8g8b8;
+    case PIXMAN_b8g8r8: return fbFetch_b8g8r8;
+
+        /* 16bpp formats */
+    case PIXMAN_r5g6b5: return fbFetch_r5g6b5;
+    case PIXMAN_b5g6r5: return fbFetch_b5g6r5;
+
+    case PIXMAN_a1r5g5b5: return fbFetch_a1r5g5b5;
+    case PIXMAN_x1r5g5b5: return fbFetch_x1r5g5b5;
+    case PIXMAN_a1b5g5r5: return fbFetch_a1b5g5r5;
+    case PIXMAN_x1b5g5r5: return fbFetch_x1b5g5r5;
+    case PIXMAN_a4r4g4b4: return fbFetch_a4r4g4b4;
+    case PIXMAN_x4r4g4b4: return fbFetch_x4r4g4b4;
+    case PIXMAN_a4b4g4r4: return fbFetch_a4b4g4r4;
+    case PIXMAN_x4b4g4r4: return fbFetch_x4b4g4r4;
+
+        /* 8bpp formats */
+    case PIXMAN_a8: return  fbFetch_a8;
+    case PIXMAN_r3g3b2: return fbFetch_r3g3b2;
+    case PIXMAN_b2g3r3: return fbFetch_b2g3r3;
+    case PIXMAN_a2r2g2b2: return fbFetch_a2r2g2b2;
+    case PIXMAN_a2b2g2r2: return fbFetch_a2b2g2r2;
+    case PIXMAN_c8: return  fbFetch_c8;
+    case PIXMAN_g8: return  fbFetch_c8;
+    case PIXMAN_x4a4: return fbFetch_x4a4;
+
+        /* 4bpp formats */
+    case PIXMAN_a4: return  fbFetch_a4;
+    case PIXMAN_r1g2b1: return fbFetch_r1g2b1;
+    case PIXMAN_b1g2r1: return fbFetch_b1g2r1;
+    case PIXMAN_a1r1g1b1: return fbFetch_a1r1g1b1;
+    case PIXMAN_a1b1g1r1: return fbFetch_a1b1g1r1;
+    case PIXMAN_c4: return  fbFetch_c4;
+    case PIXMAN_g4: return  fbFetch_c4;
+
+        /* 1bpp formats */
+    case PIXMAN_a1: return  fbFetch_a1;
+    case PIXMAN_g1: return  fbFetch_g1;
+
+        /* YUV formats */
+    case PIXMAN_yuy2: return fbFetch_yuy2;
+    case PIXMAN_yv12: return fbFetch_yv12;
+    }
+
+    return NULL;
+}
+
+/**************************** Pixel wise fetching *****************************/
+
+static FASTCALL uint32_t
+fbFetchPixel_a8r8g8b8 (bits_image_t *pict, int offset, int line)
+{
+    uint32_t *bits = pict->bits + line*pict->rowstride;
+    return READ(pict, (uint32_t *)bits + offset);
+}
+
+static FASTCALL uint32_t
+fbFetchPixel_x8r8g8b8 (bits_image_t *pict, int offset, int line)
+{
+    uint32_t *bits = pict->bits + line*pict->rowstride;
+    return READ(pict, (uint32_t *)bits + offset) | 0xff000000;
+}
+
+static FASTCALL uint32_t
+fbFetchPixel_a8b8g8r8 (bits_image_t *pict, int offset, int line)
+{
+    uint32_t *bits = pict->bits + line*pict->rowstride;
+    uint32_t  pixel = READ(pict, (uint32_t *)bits + offset);
+
+    return ((pixel & 0xff000000) |
+	    ((pixel >> 16) & 0xff) |
+	    (pixel & 0x0000ff00) |
+	    ((pixel & 0xff) << 16));
+}
+
+static FASTCALL uint32_t
+fbFetchPixel_x8b8g8r8 (bits_image_t *pict, int offset, int line)
+{
+    uint32_t *bits = pict->bits + line*pict->rowstride;
+    uint32_t  pixel = READ(pict, (uint32_t *)bits + offset);
+
+    return ((0xff000000) |
+	    ((pixel >> 16) & 0xff) |
+	    (pixel & 0x0000ff00) |
+	    ((pixel & 0xff) << 16));
+}
+
+static FASTCALL uint32_t
+fbFetchPixel_r8g8b8 (bits_image_t *pict, int offset, int line)
+{
+    uint32_t *bits = pict->bits + line*pict->rowstride;
+    uint8_t   *pixel = ((uint8_t *) bits) + (offset*3);
+#if IMAGE_BYTE_ORDER == MSBFirst
+    return (0xff000000 |
+	    (READ(pict, pixel + 0) << 16) |
+	    (READ(pict, pixel + 1) << 8) |
+	    (READ(pict, pixel + 2)));
+#else
+    return (0xff000000 |
+	    (READ(pict, pixel + 2) << 16) |
+	    (READ(pict, pixel + 1) << 8) |
+	    (READ(pict, pixel + 0)));
+#endif
+}
+
+static FASTCALL uint32_t
+fbFetchPixel_b8g8r8 (bits_image_t *pict, int offset, int line)
+{
+    uint32_t *bits = pict->bits + line*pict->rowstride;
+    uint8_t   *pixel = ((uint8_t *) bits) + (offset*3);
+#if IMAGE_BYTE_ORDER == MSBFirst
+    return (0xff000000 |
+	    (READ(pict, pixel + 2) << 16) |
+	    (READ(pict, pixel + 1) << 8) |
+	    (READ(pict, pixel + 0)));
+#else
+    return (0xff000000 |
+	    (READ(pict, pixel + 0) << 16) |
+	    (READ(pict, pixel + 1) << 8) |
+	    (READ(pict, pixel + 2)));
+#endif
+}
+
+static FASTCALL uint32_t
+fbFetchPixel_r5g6b5 (bits_image_t *pict, int offset, int line)
+{
+    uint32_t  r,g,b;
+    uint32_t *bits = pict->bits + line*pict->rowstride;
+    uint32_t  pixel = READ(pict, (uint16_t *) bits + offset);
+
+    r = ((pixel & 0xf800) | ((pixel & 0xe000) >> 5)) << 8;
+    g = ((pixel & 0x07e0) | ((pixel & 0x0600) >> 6)) << 5;
+    b = ((pixel & 0x001c) | ((pixel & 0x001f) << 5)) >> 2;
+    return (0xff000000 | r | g | b);
+}
+
+static FASTCALL uint32_t
+fbFetchPixel_b5g6r5 (bits_image_t *pict, int offset, int line)
+{
+    uint32_t  r,g,b;
+    uint32_t *bits = pict->bits + line*pict->rowstride;
+    uint32_t  pixel = READ(pict, (uint16_t *) bits + offset);
+
+    b = ((pixel & 0xf800) | ((pixel & 0xe000) >> 5)) >> 8;
+    g = ((pixel & 0x07e0) | ((pixel & 0x0600) >> 6)) << 5;
+    r = ((pixel & 0x001c) | ((pixel & 0x001f) << 5)) << 14;
+    return (0xff000000 | r | g | b);
+}
+
+static FASTCALL uint32_t
+fbFetchPixel_a1r5g5b5 (bits_image_t *pict, int offset, int line)
+{
+    uint32_t  a,r,g,b;
+    uint32_t *bits = pict->bits + line*pict->rowstride;
+    uint32_t  pixel = READ(pict, (uint16_t *) bits + offset);
+
+    a = (uint32_t) ((uint8_t) (0 - ((pixel & 0x8000) >> 15))) << 24;
+    r = ((pixel & 0x7c00) | ((pixel & 0x7000) >> 5)) << 9;
+    g = ((pixel & 0x03e0) | ((pixel & 0x0380) >> 5)) << 6;
+    b = ((pixel & 0x001c) | ((pixel & 0x001f) << 5)) >> 2;
+    return (a | r | g | b);
+}
+
+static FASTCALL uint32_t
+fbFetchPixel_x1r5g5b5 (bits_image_t *pict, int offset, int line)
+{
+    uint32_t  r,g,b;
+    uint32_t *bits = pict->bits + line*pict->rowstride;
+    uint32_t  pixel = READ(pict, (uint16_t *) bits + offset);
+
+    r = ((pixel & 0x7c00) | ((pixel & 0x7000) >> 5)) << 9;
+    g = ((pixel & 0x03e0) | ((pixel & 0x0380) >> 5)) << 6;
+    b = ((pixel & 0x001c) | ((pixel & 0x001f) << 5)) >> 2;
+    return (0xff000000 | r | g | b);
+}
+
+static FASTCALL uint32_t
+fbFetchPixel_a1b5g5r5 (bits_image_t *pict, int offset, int line)
+{
+    uint32_t  a,r,g,b;
+    uint32_t *bits = pict->bits + line*pict->rowstride;
+    uint32_t  pixel = READ(pict, (uint16_t *) bits + offset);
+
+    a = (uint32_t) ((uint8_t) (0 - ((pixel & 0x8000) >> 15))) << 24;
+    b = ((pixel & 0x7c00) | ((pixel & 0x7000) >> 5)) >> 7;
+    g = ((pixel & 0x03e0) | ((pixel & 0x0380) >> 5)) << 6;
+    r = ((pixel & 0x001c) | ((pixel & 0x001f) << 5)) << 14;
+    return (a | r | g | b);
+}
+
+static FASTCALL uint32_t
+fbFetchPixel_x1b5g5r5 (bits_image_t *pict, int offset, int line)
+{
+    uint32_t  r,g,b;
+    uint32_t *bits = pict->bits + line*pict->rowstride;
+    uint32_t  pixel = READ(pict, (uint16_t *) bits + offset);
+
+    b = ((pixel & 0x7c00) | ((pixel & 0x7000) >> 5)) >> 7;
+    g = ((pixel & 0x03e0) | ((pixel & 0x0380) >> 5)) << 6;
+    r = ((pixel & 0x001c) | ((pixel & 0x001f) << 5)) << 14;
+    return (0xff000000 | r | g | b);
+}
+
+static FASTCALL uint32_t
+fbFetchPixel_a4r4g4b4 (bits_image_t *pict, int offset, int line)
+{
+    uint32_t  a,r,g,b;
+    uint32_t *bits = pict->bits + line*pict->rowstride;
+    uint32_t  pixel = READ(pict, (uint16_t *) bits + offset);
+
+    a = ((pixel & 0xf000) | ((pixel & 0xf000) >> 4)) << 16;
+    r = ((pixel & 0x0f00) | ((pixel & 0x0f00) >> 4)) << 12;
+    g = ((pixel & 0x00f0) | ((pixel & 0x00f0) >> 4)) << 8;
+    b = ((pixel & 0x000f) | ((pixel & 0x000f) << 4));
+    return (a | r | g | b);
+}
+
+static FASTCALL uint32_t
+fbFetchPixel_x4r4g4b4 (bits_image_t *pict, int offset, int line)
+{
+    uint32_t  r,g,b;
+    uint32_t *bits = pict->bits + line*pict->rowstride;
+    uint32_t  pixel = READ(pict, (uint16_t *) bits + offset);
+
+    r = ((pixel & 0x0f00) | ((pixel & 0x0f00) >> 4)) << 12;
+    g = ((pixel & 0x00f0) | ((pixel & 0x00f0) >> 4)) << 8;
+    b = ((pixel & 0x000f) | ((pixel & 0x000f) << 4));
+    return (0xff000000 | r | g | b);
+}
+
+static FASTCALL uint32_t
+fbFetchPixel_a4b4g4r4 (bits_image_t *pict, int offset, int line)
+{
+    uint32_t  a,r,g,b;
+    uint32_t *bits = pict->bits + line*pict->rowstride;
+    uint32_t  pixel = READ(pict, (uint16_t *) bits + offset);
+
+    a = ((pixel & 0xf000) | ((pixel & 0xf000) >> 4)) << 16;
+    b = ((pixel & 0x0f00) | ((pixel & 0x0f00) >> 4)) >> 4;
+    g = ((pixel & 0x00f0) | ((pixel & 0x00f0) >> 4)) << 8;
+    r = ((pixel & 0x000f) | ((pixel & 0x000f) << 4)) << 16;
+    return (a | r | g | b);
+}
+
+static FASTCALL uint32_t
+fbFetchPixel_x4b4g4r4 (bits_image_t *pict, int offset, int line)
+{
+    uint32_t  r,g,b;
+    uint32_t *bits = pict->bits + line*pict->rowstride;
+    uint32_t  pixel = READ(pict, (uint16_t *) bits + offset);
+
+    b = ((pixel & 0x0f00) | ((pixel & 0x0f00) >> 4)) >> 4;
+    g = ((pixel & 0x00f0) | ((pixel & 0x00f0) >> 4)) << 8;
+    r = ((pixel & 0x000f) | ((pixel & 0x000f) << 4)) << 16;
+    return (0xff000000 | r | g | b);
+}
+
+static FASTCALL uint32_t
+fbFetchPixel_a8 (bits_image_t *pict, int offset, int line)
+{
+    uint32_t *bits = pict->bits + line*pict->rowstride;
+    uint32_t   pixel = READ(pict, (uint8_t *) bits + offset);
+
+    return pixel << 24;
+}
+
+static FASTCALL uint32_t
+fbFetchPixel_r3g3b2 (bits_image_t *pict, int offset, int line)
+{
+    uint32_t  r,g,b;
+    uint32_t *bits = pict->bits + line*pict->rowstride;
+    uint32_t   pixel = READ(pict, (uint8_t *) bits + offset);
+
+    r = ((pixel & 0xe0) | ((pixel & 0xe0) >> 3) | ((pixel & 0xc0) >> 6)) << 16;
+    g = ((pixel & 0x1c) | ((pixel & 0x18) >> 3) | ((pixel & 0x1c) << 3)) << 8;
+    b = (((pixel & 0x03)     ) |
+	 ((pixel & 0x03) << 2) |
+	 ((pixel & 0x03) << 4) |
+	 ((pixel & 0x03) << 6));
+    return (0xff000000 | r | g | b);
+}
+
+static FASTCALL uint32_t
+fbFetchPixel_b2g3r3 (bits_image_t *pict, int offset, int line)
+{
+    uint32_t  r,g,b;
+    uint32_t *bits = pict->bits + line*pict->rowstride;
+    uint32_t   pixel = READ(pict, (uint8_t *) bits + offset);
+
+    b = (((pixel & 0xc0)     ) |
+	 ((pixel & 0xc0) >> 2) |
+	 ((pixel & 0xc0) >> 4) |
+	 ((pixel & 0xc0) >> 6));
+    g = ((pixel & 0x38) | ((pixel & 0x38) >> 3) | ((pixel & 0x30) << 2)) << 8;
+    r = (((pixel & 0x07)     ) |
+	 ((pixel & 0x07) << 3) |
+	 ((pixel & 0x06) << 6)) << 16;
+    return (0xff000000 | r | g | b);
+}
+
+static FASTCALL uint32_t
+fbFetchPixel_a2r2g2b2 (bits_image_t *pict, int offset, int line)
+{
+    uint32_t   a,r,g,b;
+    uint32_t *bits = pict->bits + line*pict->rowstride;
+    uint32_t   pixel = READ(pict, (uint8_t *) bits + offset);
+
+    a = ((pixel & 0xc0) * 0x55) << 18;
+    r = ((pixel & 0x30) * 0x55) << 12;
+    g = ((pixel & 0x0c) * 0x55) << 6;
+    b = ((pixel & 0x03) * 0x55);
+    return a|r|g|b;
+}
+
+static FASTCALL uint32_t
+fbFetchPixel_a2b2g2r2 (bits_image_t *pict, int offset, int line)
+{
+    uint32_t   a,r,g,b;
+    uint32_t *bits = pict->bits + line*pict->rowstride;
+    uint32_t   pixel = READ(pict, (uint8_t *) bits + offset);
+
+    a = ((pixel & 0xc0) * 0x55) << 18;
+    b = ((pixel & 0x30) * 0x55) >> 6;
+    g = ((pixel & 0x0c) * 0x55) << 6;
+    r = ((pixel & 0x03) * 0x55) << 16;
+    return a|r|g|b;
+}
+
+static FASTCALL uint32_t
+fbFetchPixel_c8 (bits_image_t *pict, int offset, int line)
+{
+    uint32_t *bits = pict->bits + line*pict->rowstride;
+    uint32_t   pixel = READ(pict, (uint8_t *) bits + offset);
+    const pixman_indexed_t * indexed = pict->indexed;
+    return indexed->rgba[pixel];
+}
+
+static FASTCALL uint32_t
+fbFetchPixel_x4a4 (bits_image_t *pict, int offset, int line)
+{
+    uint32_t *bits = pict->bits + line*pict->rowstride;
+    uint32_t   pixel = READ(pict, (uint8_t *) bits + offset);
+
+    return ((pixel & 0xf) | ((pixel & 0xf) << 4)) << 24;
+}
+
+static FASTCALL uint32_t
+fbFetchPixel_a4 (bits_image_t *pict, int offset, int line)
+{
+    uint32_t *bits = pict->bits + line*pict->rowstride;
+    uint32_t  pixel = Fetch4(pict, bits, offset);
+
+    pixel |= pixel << 4;
+    return pixel << 24;
+}
+
+static FASTCALL uint32_t
+fbFetchPixel_r1g2b1 (bits_image_t *pict, int offset, int line)
+{
+    uint32_t  r,g,b;
+    uint32_t *bits = pict->bits + line*pict->rowstride;
+    uint32_t  pixel = Fetch4(pict, bits, offset);
+
+    r = ((pixel & 0x8) * 0xff) << 13;
+    g = ((pixel & 0x6) * 0x55) << 7;
+    b = ((pixel & 0x1) * 0xff);
+    return 0xff000000|r|g|b;
+}
+
+static FASTCALL uint32_t
+fbFetchPixel_b1g2r1 (bits_image_t *pict, int offset, int line)
+{
+    uint32_t  r,g,b;
+    uint32_t *bits = pict->bits + line*pict->rowstride;
+    uint32_t  pixel = Fetch4(pict, bits, offset);
+
+    b = ((pixel & 0x8) * 0xff) >> 3;
+    g = ((pixel & 0x6) * 0x55) << 7;
+    r = ((pixel & 0x1) * 0xff) << 16;
+    return 0xff000000|r|g|b;
+}
+
+static FASTCALL uint32_t
+fbFetchPixel_a1r1g1b1 (bits_image_t *pict, int offset, int line)
+{
+    uint32_t  a,r,g,b;
+    uint32_t *bits = pict->bits + line*pict->rowstride;
+    uint32_t  pixel = Fetch4(pict, bits, offset);
+
+    a = ((pixel & 0x8) * 0xff) << 21;
+    r = ((pixel & 0x4) * 0xff) << 14;
+    g = ((pixel & 0x2) * 0xff) << 7;
+    b = ((pixel & 0x1) * 0xff);
+    return a|r|g|b;
+}
+
+static FASTCALL uint32_t
+fbFetchPixel_a1b1g1r1 (bits_image_t *pict, int offset, int line)
+{
+    uint32_t  a,r,g,b;
+    uint32_t *bits = pict->bits + line*pict->rowstride;
+    uint32_t  pixel = Fetch4(pict, bits, offset);
+
+    a = ((pixel & 0x8) * 0xff) << 21;
+    r = ((pixel & 0x4) * 0xff) >> 3;
+    g = ((pixel & 0x2) * 0xff) << 7;
+    b = ((pixel & 0x1) * 0xff) << 16;
+    return a|r|g|b;
+}
+
+static FASTCALL uint32_t
+fbFetchPixel_c4 (bits_image_t *pict, int offset, int line)
+{
+    uint32_t *bits = pict->bits + line*pict->rowstride;
+    uint32_t  pixel = Fetch4(pict, bits, offset);
+    const pixman_indexed_t * indexed = pict->indexed;
+
+    return indexed->rgba[pixel];
+}
+
+
+static FASTCALL uint32_t
+fbFetchPixel_a1 (bits_image_t *pict, int offset, int line)
+{
+    uint32_t *bits = pict->bits + line*pict->rowstride;
+    uint32_t  pixel = READ(pict, bits + (offset >> 5));
+    uint32_t  a;
+#if BITMAP_BIT_ORDER == MSBFirst
+    a = pixel >> (0x1f - (offset & 0x1f));
+#else
+    a = pixel >> (offset & 0x1f);
+#endif
+    a = a & 1;
+    a |= a << 1;
+    a |= a << 2;
+    a |= a << 4;
+    return a << 24;
+}
+
+static FASTCALL uint32_t
+fbFetchPixel_g1 (bits_image_t *pict, int offset, int line)
+{
+    uint32_t *bits = pict->bits + line*pict->rowstride;
+    uint32_t pixel = READ(pict, bits + (offset >> 5));
+    const pixman_indexed_t * indexed = pict->indexed;
+    uint32_t a;
+#if BITMAP_BIT_ORDER == MSBFirst
+    a = pixel >> (0x1f - (offset & 0x1f));
+#else
+    a = pixel >> (offset & 0x1f);
+#endif
+    a = a & 1;
+    return indexed->rgba[a];
+}
+
+static FASTCALL uint32_t
+fbFetchPixel_yuy2 (bits_image_t *pict, int offset, int line)
+{
+    int16_t y, u, v;
+    int32_t r, g, b;
+
+    const uint32_t *bits = pict->bits + pict->rowstride * line;
+
+    y = ((uint8_t *) bits)[offset << 1] - 16;
+    u = ((uint8_t *) bits)[((offset << 1) & -4) + 1] - 128;
+    v = ((uint8_t *) bits)[((offset << 1) & -4) + 3] - 128;
+
+    /* R = 1.164(Y - 16) + 1.596(V - 128) */
+    r = 0x012b27 * y + 0x019a2e * v;
+    /* G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128) */
+    g = 0x012b27 * y - 0x00d0f2 * v - 0x00647e * u;
+    /* B = 1.164(Y - 16) + 2.018(U - 128) */
+    b = 0x012b27 * y + 0x0206a2 * u;
+
+    return 0xff000000 |
+	(r >= 0 ? r < 0x1000000 ? r         & 0xff0000 : 0xff0000 : 0) |
+	(g >= 0 ? g < 0x1000000 ? (g >> 8)  & 0x00ff00 : 0x00ff00 : 0) |
+	(b >= 0 ? b < 0x1000000 ? (b >> 16) & 0x0000ff : 0x0000ff : 0);
+}
+
+static FASTCALL uint32_t
+fbFetchPixel_yv12 (bits_image_t *pict, int offset, int line)
+{
+    YV12_SETUP(pict);
+    int16_t y = YV12_Y (line)[offset] - 16;
+    int16_t u = YV12_U (line)[offset >> 1] - 128;
+    int16_t v = YV12_V (line)[offset >> 1] - 128;
+    int32_t r, g, b;
+
+    /* R = 1.164(Y - 16) + 1.596(V - 128) */
+    r = 0x012b27 * y + 0x019a2e * v;
+    /* G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128) */
+    g = 0x012b27 * y - 0x00d0f2 * v - 0x00647e * u;
+    /* B = 1.164(Y - 16) + 2.018(U - 128) */
+    b = 0x012b27 * y + 0x0206a2 * u;
+
+    return 0xff000000 |
+	(r >= 0 ? r < 0x1000000 ? r         & 0xff0000 : 0xff0000 : 0) |
+	(g >= 0 ? g < 0x1000000 ? (g >> 8)  & 0x00ff00 : 0x00ff00 : 0) |
+	(b >= 0 ? b < 0x1000000 ? (b >> 16) & 0x0000ff : 0x0000ff : 0);
+}
+
+fetchPixelProc FETCH_PIXEL_PROC_FOR_PICTURE (bits_image_t * pict)
+{
+    switch(pict->format) {
+    case PIXMAN_a8r8g8b8: return fbFetchPixel_a8r8g8b8;
+    case PIXMAN_x8r8g8b8: return fbFetchPixel_x8r8g8b8;
+    case PIXMAN_a8b8g8r8: return fbFetchPixel_a8b8g8r8;
+    case PIXMAN_x8b8g8r8: return fbFetchPixel_x8b8g8r8;
+
+        /* 24bpp formats */
+    case PIXMAN_r8g8b8: return fbFetchPixel_r8g8b8;
+    case PIXMAN_b8g8r8: return fbFetchPixel_b8g8r8;
+
+        /* 16bpp formats */
+    case PIXMAN_r5g6b5: return fbFetchPixel_r5g6b5;
+    case PIXMAN_b5g6r5: return fbFetchPixel_b5g6r5;
+
+    case PIXMAN_a1r5g5b5: return fbFetchPixel_a1r5g5b5;
+    case PIXMAN_x1r5g5b5: return fbFetchPixel_x1r5g5b5;
+    case PIXMAN_a1b5g5r5: return fbFetchPixel_a1b5g5r5;
+    case PIXMAN_x1b5g5r5: return fbFetchPixel_x1b5g5r5;
+    case PIXMAN_a4r4g4b4: return fbFetchPixel_a4r4g4b4;
+    case PIXMAN_x4r4g4b4: return fbFetchPixel_x4r4g4b4;
+    case PIXMAN_a4b4g4r4: return fbFetchPixel_a4b4g4r4;
+    case PIXMAN_x4b4g4r4: return fbFetchPixel_x4b4g4r4;
+
+        /* 8bpp formats */
+    case PIXMAN_a8: return  fbFetchPixel_a8;
+    case PIXMAN_r3g3b2: return fbFetchPixel_r3g3b2;
+    case PIXMAN_b2g3r3: return fbFetchPixel_b2g3r3;
+    case PIXMAN_a2r2g2b2: return fbFetchPixel_a2r2g2b2;
+    case PIXMAN_a2b2g2r2: return fbFetchPixel_a2b2g2r2;
+    case PIXMAN_c8: return  fbFetchPixel_c8;
+    case PIXMAN_g8: return  fbFetchPixel_c8;
+    case PIXMAN_x4a4: return fbFetchPixel_x4a4;
+
+        /* 4bpp formats */
+    case PIXMAN_a4: return  fbFetchPixel_a4;
+    case PIXMAN_r1g2b1: return fbFetchPixel_r1g2b1;
+    case PIXMAN_b1g2r1: return fbFetchPixel_b1g2r1;
+    case PIXMAN_a1r1g1b1: return fbFetchPixel_a1r1g1b1;
+    case PIXMAN_a1b1g1r1: return fbFetchPixel_a1b1g1r1;
+    case PIXMAN_c4: return  fbFetchPixel_c4;
+    case PIXMAN_g4: return  fbFetchPixel_c4;
+
+        /* 1bpp formats */
+    case PIXMAN_a1: return  fbFetchPixel_a1;
+    case PIXMAN_g1: return  fbFetchPixel_g1;
+
+        /* YUV formats */
+    case PIXMAN_yuy2: return fbFetchPixel_yuy2;
+    case PIXMAN_yv12: return fbFetchPixel_yv12;
+    }
+
+    return NULL;
+}
+
+/*********************************** Store ************************************/
+
+#define Splita(v)	uint32_t	a = ((v) >> 24), r = ((v) >> 16) & 0xff, g = ((v) >> 8) & 0xff, b = (v) & 0xff
+#define Split(v)	uint32_t	r = ((v) >> 16) & 0xff, g = ((v) >> 8) & 0xff, b = (v) & 0xff
+
+static FASTCALL void
+fbStore_a8r8g8b8 (pixman_image_t *image,
+		  uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
+{
+    MEMCPY_WRAPPED(image, ((uint32_t *)bits) + x, values, width*sizeof(uint32_t));
+}
+
+static FASTCALL void
+fbStore_x8r8g8b8 (pixman_image_t *image,
+		  uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
+{
+    int i;
+    uint32_t *pixel = (uint32_t *)bits + x;
+    for (i = 0; i < width; ++i)
+	WRITE(image, pixel++, values[i] & 0xffffff);
+}
+
+static FASTCALL void
+fbStore_a8b8g8r8 (pixman_image_t *image,
+		  uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
+{
+    int i;
+    uint32_t *pixel = (uint32_t *)bits + x;
+    for (i = 0; i < width; ++i)
+	WRITE(image, pixel++, (values[i] & 0xff00ff00) | ((values[i] >> 16) & 0xff) | ((values[i] & 0xff) << 16));
+}
+
+static FASTCALL void
+fbStore_x8b8g8r8 (pixman_image_t *image,
+		  uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
+{
+    int i;
+    uint32_t *pixel = (uint32_t *)bits + x;
+    for (i = 0; i < width; ++i)
+	WRITE(image, pixel++, (values[i] & 0x0000ff00) | ((values[i] >> 16) & 0xff) | ((values[i] & 0xff) << 16));
+}
+
+static FASTCALL void
+fbStore_r8g8b8 (pixman_image_t *image,
+		uint32_t *bits, const uint32_t *values, int x, int width,
+		const pixman_indexed_t * indexed)
+{
+    int i;
+    uint8_t *pixel = ((uint8_t *) bits) + 3*x;
+    for (i = 0; i < width; ++i) {
+	Store24(image, pixel, values[i]);
+	pixel += 3;
+    }
+}
+
+static FASTCALL void
+fbStore_b8g8r8 (pixman_image_t *image,
+		uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
+{
+    int i;
+    uint8_t *pixel = ((uint8_t *) bits) + 3*x;
+    for (i = 0; i < width; ++i) {
+	uint32_t val = values[i];
+#if IMAGE_BYTE_ORDER == MSBFirst
+	WRITE(image, pixel++, Blue(val));
+	WRITE(image, pixel++, Green(val));
+	WRITE(image, pixel++, Red(val));
+#else
+	WRITE(image, pixel++, Red(val));
+	WRITE(image, pixel++, Green(val));
+	WRITE(image, pixel++, Blue(val));
+#endif
+    }
+}
+
+static FASTCALL void
+fbStore_r5g6b5 (pixman_image_t *image,
+		uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
+{
+    int i;
+    uint16_t *pixel = ((uint16_t *) bits) + x;
+    for (i = 0; i < width; ++i) {
+	uint32_t s = values[i];
+	WRITE(image, pixel++, ((s >> 3) & 0x001f) |
+	      ((s >> 5) & 0x07e0) |
+	      ((s >> 8) & 0xf800));
+    }
+}
+
+static FASTCALL void
+fbStore_b5g6r5 (pixman_image_t *image,
+		uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
+{
+    int i;
+    uint16_t  *pixel = ((uint16_t *) bits) + x;
+    for (i = 0; i < width; ++i) {
+	Split(values[i]);
+	WRITE(image, pixel++, ((b << 8) & 0xf800) |
+	      ((g << 3) & 0x07e0) |
+	      ((r >> 3)         ));
+    }
+}
+
+static FASTCALL void
+fbStore_a1r5g5b5 (pixman_image_t *image,
+		  uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
+{
+    int i;
+    uint16_t  *pixel = ((uint16_t *) bits) + x;
+    for (i = 0; i < width; ++i) {
+	Splita(values[i]);
+	WRITE(image, pixel++, ((a << 8) & 0x8000) |
+	      ((r << 7) & 0x7c00) |
+	      ((g << 2) & 0x03e0) |
+	      ((b >> 3)         ));
+    }
+}
+
+static FASTCALL void
+fbStore_x1r5g5b5 (pixman_image_t *image,
+		  uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
+{
+    int i;
+    uint16_t  *pixel = ((uint16_t *) bits) + x;
+    for (i = 0; i < width; ++i) {
+	Split(values[i]);
+	WRITE(image, pixel++, ((r << 7) & 0x7c00) |
+	      ((g << 2) & 0x03e0) |
+	      ((b >> 3)         ));
+    }
+}
+
+static FASTCALL void
+fbStore_a1b5g5r5 (pixman_image_t *image,
+		  uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
+{
+    int i;
+    uint16_t  *pixel = ((uint16_t *) bits) + x;
+    for (i = 0; i < width; ++i) {
+	Splita(values[i]);
+	WRITE(image, pixel++, ((a << 8) & 0x8000) |
+	      ((b << 7) & 0x7c00) |
+	      ((g << 2) & 0x03e0) |
+	      ((r >> 3)         ));
+    }
+}
+
+static FASTCALL void
+fbStore_x1b5g5r5 (pixman_image_t *image,
+		  uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
+{
+    int i;
+    uint16_t  *pixel = ((uint16_t *) bits) + x;
+    for (i = 0; i < width; ++i) {
+	Split(values[i]);
+	WRITE(image, pixel++, ((b << 7) & 0x7c00) |
+	      ((g << 2) & 0x03e0) |
+	      ((r >> 3)         ));
+    }
+}
+
+static FASTCALL void
+fbStore_a4r4g4b4 (pixman_image_t *image,
+		  uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
+{
+    int i;
+    uint16_t  *pixel = ((uint16_t *) bits) + x;
+    for (i = 0; i < width; ++i) {
+	Splita(values[i]);
+	WRITE(image, pixel++, ((a << 8) & 0xf000) |
+	      ((r << 4) & 0x0f00) |
+	      ((g     ) & 0x00f0) |
+	      ((b >> 4)         ));
+    }
+}
+
+static FASTCALL void
+fbStore_x4r4g4b4 (pixman_image_t *image,
+		  uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
+{
+    int i;
+    uint16_t  *pixel = ((uint16_t *) bits) + x;
+    for (i = 0; i < width; ++i) {
+	Split(values[i]);
+	WRITE(image, pixel++, ((r << 4) & 0x0f00) |
+	      ((g     ) & 0x00f0) |
+	      ((b >> 4)         ));
+    }
+}
+
+static FASTCALL void
+fbStore_a4b4g4r4 (pixman_image_t *image,
+		  uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
+{
+    int i;
+    uint16_t  *pixel = ((uint16_t *) bits) + x;
+    for (i = 0; i < width; ++i) {
+	Splita(values[i]);
+	WRITE(image, pixel++, ((a << 8) & 0xf000) |
+	      ((b << 4) & 0x0f00) |
+	      ((g     ) & 0x00f0) |
+	      ((r >> 4)         ));
+    }
+}
+
+static FASTCALL void
+fbStore_x4b4g4r4 (pixman_image_t *image,
+		  uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
+{
+    int i;
+    uint16_t  *pixel = ((uint16_t *) bits) + x;
+    for (i = 0; i < width; ++i) {
+	Split(values[i]);
+	WRITE(image, pixel++, ((b << 4) & 0x0f00) |
+	      ((g     ) & 0x00f0) |
+	      ((r >> 4)         ));
+    }
+}
+
+static FASTCALL void
+fbStore_a8 (pixman_image_t *image,
+	    uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
+{
+    int i;
+    uint8_t   *pixel = ((uint8_t *) bits) + x;
+    for (i = 0; i < width; ++i) {
+	WRITE(image, pixel++, values[i] >> 24);
+    }
+}
+
+static FASTCALL void
+fbStore_r3g3b2 (pixman_image_t *image,
+		uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
+{
+    int i;
+    uint8_t   *pixel = ((uint8_t *) bits) + x;
+    for (i = 0; i < width; ++i) {
+	Split(values[i]);
+	WRITE(image, pixel++,
+	      ((r     ) & 0xe0) |
+	      ((g >> 3) & 0x1c) |
+	      ((b >> 6)       ));
+    }
+}
+
+static FASTCALL void
+fbStore_b2g3r3 (pixman_image_t *image,
+		uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
+{
+    int i;
+    uint8_t   *pixel = ((uint8_t *) bits) + x;
+    for (i = 0; i < width; ++i) {
+	Split(values[i]);
+	WRITE(image, pixel++,
+	      ((b     ) & 0xc0) |
+	      ((g >> 2) & 0x1c) |
+	      ((r >> 5)       ));
+    }
+}
+
+static FASTCALL void
+fbStore_a2r2g2b2 (pixman_image_t *image,
+		  uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
+{
+    int i;
+    uint8_t   *pixel = ((uint8_t *) bits) + x;
+    for (i = 0; i < width; ++i) {
+	Splita(values[i]);
+	WRITE(image, pixel++, ((a     ) & 0xc0) |
+	      ((r >> 2) & 0x30) |
+	      ((g >> 4) & 0x0c) |
+	      ((b >> 6)       ));
+    }
+}
+
+static FASTCALL void
+fbStore_c8 (pixman_image_t *image,
+	    uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
+{
+    int i;
+    uint8_t   *pixel = ((uint8_t *) bits) + x;
+    for (i = 0; i < width; ++i) {
+	WRITE(image, pixel++, miIndexToEnt24(indexed,values[i]));
+    }
+}
+
+static FASTCALL void
+fbStore_x4a4 (pixman_image_t *image,
+	      uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
+{
+    int i;
+    uint8_t   *pixel = ((uint8_t *) bits) + x;
+    for (i = 0; i < width; ++i) {
+	WRITE(image, pixel++, values[i] >> 28);
+    }
+}
+
+#define Store8(img,l,o,v)  (WRITE(img, (uint8_t *)(l) + ((o) >> 3), (v)))
+#if IMAGE_BYTE_ORDER == MSBFirst
+#define Store4(img,l,o,v)  Store8(img,l,o,((o) & 4 ?				\
+				   (Fetch8(img,l,o) & 0xf0) | (v) :		\
+				   (Fetch8(img,l,o) & 0x0f) | ((v) << 4)))
+#else
+#define Store4(img,l,o,v)  Store8(img,l,o,((o) & 4 ?			       \
+				   (Fetch8(img,l,o) & 0x0f) | ((v) << 4) : \
+				   (Fetch8(img,l,o) & 0xf0) | (v)))
+#endif
+
+static FASTCALL void
+fbStore_a4 (pixman_image_t *image,
+	    uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
+{
+    int i;
+    for (i = 0; i < width; ++i) {
+	Store4(image, bits, i + x, values[i]>>28);
+    }
+}
+
+static FASTCALL void
+fbStore_r1g2b1 (pixman_image_t *image,
+		uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
+{
+    int i;
+    for (i = 0; i < width; ++i) {
+	uint32_t  pixel;
+
+	Split(values[i]);
+	pixel = (((r >> 4) & 0x8) |
+		 ((g >> 5) & 0x6) |
+		 ((b >> 7)      ));
+	Store4(image, bits, i + x, pixel);
+    }
+}
+
+static FASTCALL void
+fbStore_b1g2r1 (pixman_image_t *image,
+		uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
+{
+    int i;
+    for (i = 0; i < width; ++i) {
+	uint32_t  pixel;
+
+	Split(values[i]);
+	pixel = (((b >> 4) & 0x8) |
+		 ((g >> 5) & 0x6) |
+		 ((r >> 7)      ));
+	Store4(image, bits, i + x, pixel);
+    }
+}
+
+static FASTCALL void
+fbStore_a1r1g1b1 (pixman_image_t *image,
+		  uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
+{
+    int i;
+    for (i = 0; i < width; ++i) {
+	uint32_t  pixel;
+	Splita(values[i]);
+	pixel = (((a >> 4) & 0x8) |
+		 ((r >> 5) & 0x4) |
+		 ((g >> 6) & 0x2) |
+		 ((b >> 7)      ));
+	Store4(image, bits, i + x, pixel);
+    }
+}
+
+static FASTCALL void
+fbStore_a1b1g1r1 (pixman_image_t *image,
+		  uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
+{
+    int i;
+    for (i = 0; i < width; ++i) {
+	uint32_t  pixel;
+	Splita(values[i]);
+	pixel = (((a >> 4) & 0x8) |
+		 ((b >> 5) & 0x4) |
+		 ((g >> 6) & 0x2) |
+		 ((r >> 7)      ));
+	Store4(image, bits, i + x, pixel);
+    }
+}
+
+static FASTCALL void
+fbStore_c4 (pixman_image_t *image,
+	    uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
+{
+    int i;
+    for (i = 0; i < width; ++i) {
+	uint32_t  pixel;
+
+	pixel = miIndexToEnt24(indexed, values[i]);
+	Store4(image, bits, i + x, pixel);
+    }
+}
+
+static FASTCALL void
+fbStore_a1 (pixman_image_t *image,
+	    uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
+{
+    int i;
+    for (i = 0; i < width; ++i) {
+	uint32_t  *pixel = ((uint32_t *) bits) + ((i+x) >> 5);
+	uint32_t  mask = FbStipMask((i+x) & 0x1f, 1);
+
+	uint32_t v = values[i] & 0x80000000 ? mask : 0;
+	WRITE(image, pixel, (READ(image, pixel) & ~mask) | v);
+    }
+}
+
+static FASTCALL void
+fbStore_g1 (pixman_image_t *image,
+	    uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
+{
+    int i;
+    for (i = 0; i < width; ++i) {
+	uint32_t  *pixel = ((uint32_t *) bits) + ((i+x) >> 5);
+	uint32_t  mask = FbStipMask((i+x) & 0x1f, 1);
+
+	uint32_t v = miIndexToEntY24(indexed,values[i]) ? mask : 0;
+	WRITE(image, pixel, (READ(image, pixel) & ~mask) | v);
+    }
+}
+
+
+storeProc STORE_PROC_FOR_PICTURE (bits_image_t * pict)
+{
+    switch(pict->format) {
+    case PIXMAN_a8r8g8b8: return fbStore_a8r8g8b8;
+    case PIXMAN_x8r8g8b8: return fbStore_x8r8g8b8;
+    case PIXMAN_a8b8g8r8: return fbStore_a8b8g8r8;
+    case PIXMAN_x8b8g8r8: return fbStore_x8b8g8r8;
+
+        /* 24bpp formats */
+    case PIXMAN_r8g8b8: return fbStore_r8g8b8;
+    case PIXMAN_b8g8r8: return fbStore_b8g8r8;
+
+        /* 16bpp formats */
+    case PIXMAN_r5g6b5: return fbStore_r5g6b5;
+    case PIXMAN_b5g6r5: return fbStore_b5g6r5;
+
+    case PIXMAN_a1r5g5b5: return fbStore_a1r5g5b5;
+    case PIXMAN_x1r5g5b5: return fbStore_x1r5g5b5;
+    case PIXMAN_a1b5g5r5: return fbStore_a1b5g5r5;
+    case PIXMAN_x1b5g5r5: return fbStore_x1b5g5r5;
+    case PIXMAN_a4r4g4b4: return fbStore_a4r4g4b4;
+    case PIXMAN_x4r4g4b4: return fbStore_x4r4g4b4;
+    case PIXMAN_a4b4g4r4: return fbStore_a4b4g4r4;
+    case PIXMAN_x4b4g4r4: return fbStore_x4b4g4r4;
+
+        /* 8bpp formats */
+    case PIXMAN_a8: return  fbStore_a8;
+    case PIXMAN_r3g3b2: return fbStore_r3g3b2;
+    case PIXMAN_b2g3r3: return fbStore_b2g3r3;
+    case PIXMAN_a2r2g2b2: return fbStore_a2r2g2b2;
+    case PIXMAN_c8: return  fbStore_c8;
+    case PIXMAN_g8: return  fbStore_c8;
+    case PIXMAN_x4a4: return fbStore_x4a4;
+
+        /* 4bpp formats */
+    case PIXMAN_a4: return  fbStore_a4;
+    case PIXMAN_r1g2b1: return fbStore_r1g2b1;
+    case PIXMAN_b1g2r1: return fbStore_b1g2r1;
+    case PIXMAN_a1r1g1b1: return fbStore_a1r1g1b1;
+    case PIXMAN_a1b1g1r1: return fbStore_a1b1g1r1;
+    case PIXMAN_c4: return  fbStore_c4;
+    case PIXMAN_g4: return  fbStore_c4;
+
+        /* 1bpp formats */
+    case PIXMAN_a1: return  fbStore_a1;
+    case PIXMAN_g1: return  fbStore_g1;
+    default:
+        return NULL;
+    }
+}
new file mode 100644
--- /dev/null
+++ b/gfx/cairo/libpixman/src/pixman-combine.c
@@ -0,0 +1,1266 @@
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <string.h>
+
+#include "pixman-private.h"
+/*
+ * There are two ways of handling alpha -- either as a single unified value or
+ * a separate value for each component, hence each macro must have two
+ * versions.  The unified alpha version has a 'U' at the end of the name,
+ * the component version has a 'C'.  Similarly, functions which deal with
+ * this difference will have two versions using the same convention.
+ */
+
+
+/*
+ * Combine src and mask
+ */
+FASTCALL static void
+pixman_fbCombineMaskU (uint32_t *src, const uint32_t *mask, int width)
+{
+    int i;
+    for (i = 0; i < width; ++i) {
+        uint32_t a = *(mask + i) >> 24;
+        uint32_t s = *(src + i);
+        FbByteMul(s, a);
+        *(src + i) = s;
+    }
+}
+
+/*
+ * All of the composing functions
+ */
+
+FASTCALL static void
+fbCombineClear (uint32_t *dest, const uint32_t *src, int width)
+{
+    memset(dest, 0, width*sizeof(uint32_t));
+}
+
+FASTCALL static void
+fbCombineSrcU (uint32_t *dest, const uint32_t *src, int width)
+{
+    memcpy(dest, src, width*sizeof(uint32_t));
+}
+
+/* if the Src is opaque, call fbCombineSrcU */
+FASTCALL static void
+fbCombineOverU (uint32_t *dest, const uint32_t *src, int width)
+{
+    int i;
+    for (i = 0; i < width; ++i) {
+        uint32_t s = *(src + i);
+        uint32_t d = *(dest + i);
+        uint32_t ia = Alpha(~s);
+
+        FbByteMulAdd(d, ia, s);
+	*(dest + i) = d;
+    }
+}
+
+/* if the Dst is opaque, this is a noop */
+FASTCALL static void
+fbCombineOverReverseU (uint32_t *dest, const uint32_t *src, int width)
+{
+    int i;
+    for (i = 0; i < width; ++i) {
+        uint32_t s = *(src + i);
+        uint32_t d = *(dest + i);
+        uint32_t ia = Alpha(~*(dest + i));
+        FbByteMulAdd(s, ia, d);
+	*(dest + i) = s;
+    }
+}
+
+/* if the Dst is opaque, call fbCombineSrcU */
+FASTCALL static void
+fbCombineInU (uint32_t *dest, const uint32_t *src, int width)
+{
+    int i;
+    for (i = 0; i < width; ++i) {
+        uint32_t s = *(src + i);
+        uint32_t a = Alpha(*(dest + i));
+        FbByteMul(s, a);
+	*(dest + i) = s;
+    }
+}
+
+/* if the Src is opaque, this is a noop */
+FASTCALL static void
+fbCombineInReverseU (uint32_t *dest, const uint32_t *src, int width)
+{
+    int i;
+    for (i = 0; i < width; ++i) {
+        uint32_t d = *(dest + i);
+        uint32_t a = Alpha(*(src + i));
+        FbByteMul(d, a);
+	*(dest + i) = d;
+    }
+}
+
+/* if the Dst is opaque, call fbCombineClear */
+FASTCALL static void
+fbCombineOutU (uint32_t *dest, const uint32_t *src, int width)
+{
+    int i;
+    for (i = 0; i < width; ++i) {
+        uint32_t s = *(src + i);
+        uint32_t a = Alpha(~*(dest + i));
+        FbByteMul(s, a);
+	*(dest + i) = s;
+    }
+}
+
+/* if the Src is opaque, call fbCombineClear */
+FASTCALL static void
+fbCombineOutReverseU (uint32_t *dest, const uint32_t *src, int width)
+{
+    int i;
+    for (i = 0; i < width; ++i) {
+        uint32_t d = *(dest + i);
+        uint32_t a = Alpha(~*(src + i));
+        FbByteMul(d, a);
+	*(dest + i) = d;
+    }
+}
+
+/* if the Src is opaque, call fbCombineInU */
+/* if the Dst is opaque, call fbCombineOverU */
+/* if both the Src and Dst are opaque, call fbCombineSrcU */
+FASTCALL static void
+fbCombineAtopU (uint32_t *dest, const uint32_t *src, int width)
+{
+    int i;
+    for (i = 0; i < width; ++i) {
+        uint32_t s = *(src + i);
+        uint32_t d = *(dest + i);
+        uint32_t dest_a = Alpha(d);
+        uint32_t src_ia = Alpha(~s);
+
+        FbByteAddMul(s, dest_a, d, src_ia);
+	*(dest + i) = s;
+    }
+}
+
+/* if the Src is opaque, call fbCombineOverReverseU */
+/* if the Dst is opaque, call fbCombineInReverseU */
+/* if both the Src and Dst are opaque, call fbCombineDstU */
+FASTCALL static void
+fbCombineAtopReverseU (uint32_t *dest, const uint32_t *src, int width)
+{
+    int i;
+    for (i = 0; i < width; ++i) {
+        uint32_t s = *(src + i);
+        uint32_t d = *(dest + i);
+        uint32_t src_a = Alpha(s);
+        uint32_t dest_ia = Alpha(~d);
+
+        FbByteAddMul(s, dest_ia, d, src_a);
+	*(dest + i) = s;
+    }
+}
+
+/* if the Src is opaque, call fbCombineOverU */
+/* if the Dst is opaque, call fbCombineOverReverseU */
+/* if both the Src and Dst are opaque, call fbCombineClear */
+FASTCALL static void
+fbCombineXorU (uint32_t *dest, const uint32_t *src, int width)
+{
+    int i;
+    for (i = 0; i < width; ++i) {
+        uint32_t s = *(src + i);
+        uint32_t d = *(dest + i);
+        uint32_t src_ia = Alpha(~s);
+        uint32_t dest_ia = Alpha(~d);
+
+        FbByteAddMul(s, dest_ia, d, src_ia);
+	*(dest + i) = s;
+    }
+}
+
+FASTCALL static void
+fbCombineAddU (uint32_t *dest, const uint32_t *src, int width)
+{
+    int i;
+    for (i = 0; i < width; ++i) {
+        uint32_t s = *(src + i);
+        uint32_t d = *(dest + i);
+        FbByteAdd(d, s);
+	*(dest + i) = d;
+    }
+}
+
+/* if the Src is opaque, call fbCombineAddU */
+/* if the Dst is opaque, call fbCombineAddU */
+/* if both the Src and Dst are opaque, call fbCombineAddU */
+FASTCALL static void
+fbCombineSaturateU (uint32_t *dest, const uint32_t *src, int width)
+{
+    int i;
+    for (i = 0; i < width; ++i) {
+        uint32_t  s = *(src + i);
+        uint32_t d = *(dest + i);
+        uint16_t  sa, da;
+
+        sa = s >> 24;
+        da = ~d >> 24;
+        if (sa > da)
+        {
+            sa = FbIntDiv(da, sa);
+            FbByteMul(s, sa);
+        };
+        FbByteAdd(d, s);
+	*(dest + i) = d;
+    }
+}
+
+
+/*
+ * All of the disjoint composing functions
+
+ The four entries in the first column indicate what source contributions
+ come from each of the four areas of the picture -- areas covered by neither
+ A nor B, areas covered only by A, areas covered only by B and finally
+ areas covered by both A and B.
+
+ Disjoint			Conjoint
+ Fa		Fb		Fa		Fb
+ (0,0,0,0)	0		0		0		0
+ (0,A,0,A)	1		0		1		0
+ (0,0,B,B)	0		1		0		1
+ (0,A,B,A)	1		min((1-a)/b,1)	1		max(1-a/b,0)
+ (0,A,B,B)	min((1-b)/a,1)	1		max(1-b/a,0)	1
+ (0,0,0,A)	max(1-(1-b)/a,0) 0		min(1,b/a)	0
+ (0,0,0,B)	0		max(1-(1-a)/b,0) 0		min(a/b,1)
+ (0,A,0,0)	min(1,(1-b)/a)	0		max(1-b/a,0)	0
+ (0,0,B,0)	0		min(1,(1-a)/b)	0		max(1-a/b,0)
+ (0,0,B,A)	max(1-(1-b)/a,0) min(1,(1-a)/b)	 min(1,b/a)	max(1-a/b,0)
+ (0,A,0,B)	min(1,(1-b)/a)	max(1-(1-a)/b,0) max(1-b/a,0)	min(1,a/b)
+ (0,A,B,0)	min(1,(1-b)/a)	min(1,(1-a)/b)	max(1-b/a,0)	max(1-a/b,0)
+
+*/
+
+#define CombineAOut 1
+#define CombineAIn  2
+#define CombineBOut 4
+#define CombineBIn  8
+
+#define CombineClear	0
+#define CombineA	(CombineAOut|CombineAIn)
+#define CombineB	(CombineBOut|CombineBIn)
+#define CombineAOver	(CombineAOut|CombineBOut|CombineAIn)
+#define CombineBOver	(CombineAOut|CombineBOut|CombineBIn)
+#define CombineAAtop	(CombineBOut|CombineAIn)
+#define CombineBAtop	(CombineAOut|CombineBIn)
+#define CombineXor	(CombineAOut|CombineBOut)
+
+/* portion covered by a but not b */
+FASTCALL static uint8_t
+fbCombineDisjointOutPart (uint8_t a, uint8_t b)
+{
+    /* min (1, (1-b) / a) */
+
+    b = ~b;		    /* 1 - b */
+    if (b >= a)		    /* 1 - b >= a -> (1-b)/a >= 1 */
+	return 0xff;	    /* 1 */
+    return FbIntDiv(b,a);   /* (1-b) / a */
+}
+
+/* portion covered by both a and b */
+FASTCALL static uint8_t
+fbCombineDisjointInPart (uint8_t a, uint8_t b)
+{
+    /* max (1-(1-b)/a,0) */
+    /*  = - min ((1-b)/a - 1, 0) */
+    /*  = 1 - min (1, (1-b)/a) */
+
+    b = ~b;		    /* 1 - b */
+    if (b >= a)		    /* 1 - b >= a -> (1-b)/a >= 1 */
+	return 0;	    /* 1 - 1 */
+    return ~FbIntDiv(b,a);  /* 1 - (1-b) / a */
+}
+
+/* portion covered by a but not b */
+FASTCALL static uint8_t
+fbCombineConjointOutPart (uint8_t a, uint8_t b)
+{
+    /* max (1-b/a,0) */
+    /* = 1-min(b/a,1) */
+
+    /* min (1, (1-b) / a) */
+
+    if (b >= a)		    /* b >= a -> b/a >= 1 */
+	return 0x00;	    /* 0 */
+    return ~FbIntDiv(b,a);   /* 1 - b/a */
+}
+
+/* portion covered by both a and b */
+FASTCALL static uint8_t
+fbCombineConjointInPart (uint8_t a, uint8_t b)
+{
+    /* min (1,b/a) */
+
+    if (b >= a)		    /* b >= a -> b/a >= 1 */
+	return 0xff;	    /* 1 */
+    return FbIntDiv(b,a);   /* b/a */
+}
+
+FASTCALL static void
+fbCombineDisjointGeneralU (uint32_t *dest, const uint32_t *src, int width, uint8_t combine)
+{
+    int i;
+    for (i = 0; i < width; ++i) {
+        uint32_t s = *(src + i);
+        uint32_t d = *(dest + i);
+        uint32_t m,n,o,p;
+        uint16_t Fa, Fb, t, u, v;
+        uint8_t sa = s >> 24;
+        uint8_t da = d >> 24;
+
+        switch (combine & CombineA) {
+        default:
+            Fa = 0;
+            break;
+        case CombineAOut:
+            Fa = fbCombineDisjointOutPart (sa, da);
+            break;
+        case CombineAIn:
+            Fa = fbCombineDisjointInPart (sa, da);
+            break;
+        case CombineA:
+            Fa = 0xff;
+            break;
+        }
+
+        switch (combine & CombineB) {
+        default:
+            Fb = 0;
+            break;
+        case CombineBOut:
+            Fb = fbCombineDisjointOutPart (da, sa);
+            break;
+        case CombineBIn:
+            Fb = fbCombineDisjointInPart (da, sa);
+            break;
+        case CombineB:
+            Fb = 0xff;
+            break;
+        }
+        m = FbGen (s,d,0,Fa,Fb,t, u, v);
+        n = FbGen (s,d,8,Fa,Fb,t, u, v);
+        o = FbGen (s,d,16,Fa,Fb,t, u, v);
+        p = FbGen (s,d,24,Fa,Fb,t, u, v);
+        s = m|n|o|p;
+	*(dest + i) = s;
+    }
+}
+
+FASTCALL static void
+fbCombineDisjointOverU (uint32_t *dest, const uint32_t *src, int width)
+{
+    int i;
+    for (i = 0; i < width; ++i) {
+        uint32_t  s = *(src + i);
+        uint16_t  a = s >> 24;
+
+        if (a != 0x00)
+        {
+            if (a != 0xff)
+            {
+                uint32_t d = *(dest + i);
+                a = fbCombineDisjointOutPart (d >> 24, a);
+                FbByteMulAdd(d, a, s);
+                s = d;
+            }
+	    *(dest + i) = s;
+        }
+    }
+}
+
+FASTCALL static void
+fbCombineDisjointInU (uint32_t *dest, const uint32_t *src, int width)
+{
+    fbCombineDisjointGeneralU (dest, src, width, CombineAIn);
+}
+
+FASTCALL static void
+fbCombineDisjointInReverseU (uint32_t *dest, const uint32_t *src, int width)
+{
+    fbCombineDisjointGeneralU (dest, src, width, CombineBIn);
+}
+
+FASTCALL static void
+fbCombineDisjointOutU (uint32_t *dest, const uint32_t *src, int width)
+{
+    fbCombineDisjointGeneralU (dest, src, width, CombineAOut);
+}
+
+FASTCALL static void
+fbCombineDisjointOutReverseU (uint32_t *dest, const uint32_t *src, int width)
+{
+    fbCombineDisjointGeneralU (dest, src, width, CombineBOut);
+}
+
+FASTCALL static void
+fbCombineDisjointAtopU (uint32_t *dest, const uint32_t *src, int width)
+{
+    fbCombineDisjointGeneralU (dest, src, width, CombineAAtop);
+}
+
+FASTCALL static void
+fbCombineDisjointAtopReverseU (uint32_t *dest, const uint32_t *src, int width)
+{
+    fbCombineDisjointGeneralU (dest, src, width, CombineBAtop);
+}
+
+FASTCALL static void
+fbCombineDisjointXorU (uint32_t *dest, const uint32_t *src, int width)
+{
+    fbCombineDisjointGeneralU (dest, src, width, CombineXor);
+}
+
+FASTCALL static void
+fbCombineConjointGeneralU (uint32_t *dest, const uint32_t *src, int width, uint8_t combine)
+{
+    int i;
+    for (i = 0; i < width; ++i) {
+        uint32_t  s = *(src + i);
+        uint32_t d = *(dest + i);
+        uint32_t  m,n,o,p;
+        uint16_t  Fa, Fb, t, u, v;
+        uint8_t sa = s >> 24;
+        uint8_t da = d >> 24;
+
+        switch (combine & CombineA) {
+        default:
+            Fa = 0;
+            break;
+        case CombineAOut:
+            Fa = fbCombineConjointOutPart (sa, da);
+            break;
+        case CombineAIn:
+            Fa = fbCombineConjointInPart (sa, da);
+            break;
+        case CombineA:
+            Fa = 0xff;
+            break;
+        }
+
+        switch (combine & CombineB) {
+        default:
+            Fb = 0;
+            break;
+        case CombineBOut:
+            Fb = fbCombineConjointOutPart (da, sa);
+            break;
+        case CombineBIn:
+            Fb = fbCombineConjointInPart (da, sa);
+            break;
+        case CombineB:
+            Fb = 0xff;
+            break;
+        }
+        m = FbGen (s,d,0,Fa,Fb,t, u, v);
+        n = FbGen (s,d,8,Fa,Fb,t, u, v);
+        o = FbGen (s,d,16,Fa,Fb,t, u, v);
+        p = FbGen (s,d,24,Fa,Fb,t, u, v);
+        s = m|n|o|p;
+	*(dest + i) = s;
+    }
+}
+
+FASTCALL static void
+fbCombineConjointOverU (uint32_t *dest, const uint32_t *src, int width)
+{
+    fbCombineConjointGeneralU (dest, src, width, CombineAOver);
+}
+
+
+FASTCALL static void
+fbCombineConjointOverReverseU (uint32_t *dest, const uint32_t *src, int width)
+{
+    fbCombineConjointGeneralU (dest, src, width, CombineBOver);
+}
+
+
+FASTCALL static void
+fbCombineConjointInU (uint32_t *dest, const uint32_t *src, int width)
+{
+    fbCombineConjointGeneralU (dest, src, width, CombineAIn);
+}
+
+
+FASTCALL static void
+fbCombineConjointInReverseU (uint32_t *dest, const uint32_t *src, int width)
+{
+    fbCombineConjointGeneralU (dest, src, width, CombineBIn);
+}
+
+FASTCALL static void
+fbCombineConjointOutU (uint32_t *dest, const uint32_t *src, int width)
+{
+    fbCombineConjointGeneralU (dest, src, width, CombineAOut);
+}
+
+FASTCALL static void
+fbCombineConjointOutReverseU (uint32_t *dest, const uint32_t *src, int width)
+{
+    fbCombineConjointGeneralU (dest, src, width, CombineBOut);
+}
+
+FASTCALL static void
+fbCombineConjointAtopU (uint32_t *dest, const uint32_t *src, int width)
+{
+    fbCombineConjointGeneralU (dest, src, width, CombineAAtop);
+}
+
+FASTCALL static void
+fbCombineConjointAtopReverseU (uint32_t *dest, const uint32_t *src, int width)
+{
+    fbCombineConjointGeneralU (dest, src, width, CombineBAtop);
+}
+
+FASTCALL static void
+fbCombineConjointXorU (uint32_t *dest, const uint32_t *src, int width)
+{
+    fbCombineConjointGeneralU (dest, src, width, CombineXor);
+}
+
+/********************************************************************************/
+/*************************** Per Channel functions ******************************/
+/********************************************************************************/
+
+FASTCALL static void
+fbCombineMaskC (uint32_t *src, uint32_t *mask)
+{
+    uint32_t a = *mask;
+
+    uint32_t	x;
+    uint16_t	xa;
+
+    if (!a)
+    {
+	*(src) = 0;
+	return;
+    }
+
+    x = *(src);
+    if (a == 0xffffffff)
+    {
+	x = x >> 24;
+	x |= x << 8;
+	x |= x << 16;
+	*(mask) = x;
+	return;
+    }
+
+    xa = x >> 24;
+    FbByteMulC(x, a);
+    *(src) = x;
+    FbByteMul(a, xa);
+    *(mask) = a;
+}
+
+FASTCALL static void
+fbCombineMaskValueC (uint32_t *src, const uint32_t *mask)
+{
+    uint32_t a = *mask;
+    uint32_t	x;
+
+    if (!a)
+    {
+	*(src) = 0;
+	return;
+    }
+
+    if (a == 0xffffffff)
+	return;
+
+    x = *(src);
+    FbByteMulC(x, a);
+    *(src) =x;
+}
+
+FASTCALL static void
+fbCombineMaskAlphaC (const uint32_t *src, uint32_t *mask)
+{
+    uint32_t a = *(mask);
+    uint32_t	x;
+
+    if (!a)
+	return;
+
+    x = *(src) >> 24;
+    if (x == 0xff)
+	return;
+    if (a == 0xffffffff)
+    {
+	x = x >> 24;
+	x |= x << 8;
+	x |= x << 16;
+	*(mask) = x;
+	return;
+    }
+
+    FbByteMul(a, x);
+    *(mask) = a;
+}
+
+
+
+FASTCALL static void
+fbCombineClearC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    memset(dest, 0, width*sizeof(uint32_t));
+}
+
+FASTCALL static void
+fbCombineSrcC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i) {
+	uint32_t s = *(src + i);
+	uint32_t m = *(mask + i);
+
+	fbCombineMaskValueC (&s, &m);
+
+	*(dest) = s;
+    }
+}
+
+FASTCALL static void
+fbCombineOverC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i) {
+	uint32_t s = *(src + i);
+	uint32_t m = *(mask + i);
+	uint32_t a;
+
+	fbCombineMaskC (&s, &m);
+
+	a = ~m;
+        if (a != 0xffffffff)
+        {
+            if (a)
+            {
+                uint32_t d = *(dest + i);
+                FbByteMulAddC(d, a, s);
+                s = d;
+            }
+	    *(dest + i) = s;
+        }
+    }
+}
+
+FASTCALL static void
+fbCombineOverReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i) {
+        uint32_t d = *(dest + i);
+        uint32_t a = ~d >> 24;
+
+        if (a)
+        {
+            uint32_t s = *(src + i);
+	    uint32_t m = *(mask + i);
+
+	    fbCombineMaskValueC (&s, &m);
+
+            if (a != 0xff)
+            {
+                FbByteMulAdd(s, a, d);
+            }
+	    *(dest + i) = s;
+        }
+    }
+}
+
+FASTCALL static void
+fbCombineInC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i) {
+        uint32_t d = *(dest + i);
+        uint16_t a = d >> 24;
+        uint32_t s = 0;
+        if (a)
+        {
+	    uint32_t m = *(mask + i);
+
+	    s = *(src + i);
+	    fbCombineMaskValueC (&s, &m);
+            if (a != 0xff)
+            {
+                FbByteMul(s, a);
+            }
+        }
+	*(dest + i) = s;
+    }
+}
+
+FASTCALL static void
+fbCombineInReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i) {
+        uint32_t s = *(src + i);
+        uint32_t m = *(mask + i);
+        uint32_t a;
+
+	fbCombineMaskAlphaC (&s, &m);
+
+	a = m;
+        if (a != 0xffffffff)
+        {
+            uint32_t d = 0;
+            if (a)
+            {
+                d = *(dest + i);
+                FbByteMulC(d, a);
+            }
+	    *(dest + i) = d;
+        }
+    }
+}
+
+FASTCALL static void
+fbCombineOutC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i) {
+        uint32_t d = *(dest + i);
+        uint16_t a = ~d >> 24;
+        uint32_t s = 0;
+        if (a)
+        {
+	    uint32_t m = *(mask + i);
+
+	    s = *(src + i);
+	    fbCombineMaskValueC (&s, &m);
+
+            if (a != 0xff)
+            {
+                FbByteMul(s, a);
+            }
+        }
+	*(dest + i) = s;
+    }
+}
+
+FASTCALL static void
+fbCombineOutReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i) {
+	uint32_t s = *(src + i);
+	uint32_t m = *(mask + i);
+	uint32_t a;
+
+	fbCombineMaskAlphaC (&s, &m);
+
+        a = ~m;
+        if (a != 0xffffffff)
+        {
+            uint32_t d = 0;
+            if (a)
+            {
+                d = *(dest + i);
+                FbByteMulC(d, a);
+            }
+	    *(dest + i) = d;
+        }
+    }
+}
+
+FASTCALL static void
+fbCombineAtopC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i) {
+        uint32_t d = *(dest + i);
+        uint32_t s = *(src + i);
+        uint32_t m = *(mask + i);
+        uint32_t ad;
+        uint16_t as = d >> 24;
+
+	fbCombineMaskC (&s, &m);
+
+        ad = ~m;
+
+        FbByteAddMulC(d, ad, s, as);
+	*(dest + i) = d;
+    }
+}
+
+FASTCALL static void
+fbCombineAtopReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i) {
+
+        uint32_t d = *(dest + i);
+        uint32_t s = *(src + i);
+        uint32_t m = *(mask + i);
+        uint32_t ad;
+        uint16_t as = ~d >> 24;
+
+	fbCombineMaskC (&s, &m);
+
+	ad = m;
+
+        FbByteAddMulC(d, ad, s, as);
+	*(dest + i) = d;
+    }
+}
+
+FASTCALL static void
+fbCombineXorC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i) {
+        uint32_t d = *(dest + i);
+        uint32_t s = *(src + i);
+        uint32_t m = *(mask + i);
+        uint32_t ad;
+        uint16_t as = ~d >> 24;
+
+	fbCombineMaskC (&s, &m);
+
+	ad = ~m;
+
+        FbByteAddMulC(d, ad, s, as);
+	*(dest + i) = d;
+    }
+}
+
+FASTCALL static void
+fbCombineAddC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i) {
+        uint32_t s = *(src + i);
+        uint32_t m = *(mask + i);
+        uint32_t d = *(dest + i);
+
+	fbCombineMaskValueC (&s, &m);
+
+        FbByteAdd(d, s);
+	*(dest + i) = d;
+    }
+}
+
+FASTCALL static void
+fbCombineSaturateC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i) {
+        uint32_t  s, d;
+        uint16_t  sa, sr, sg, sb, da;
+        uint16_t  t, u, v;
+        uint32_t  m,n,o,p;
+
+        d = *(dest + i);
+        s = *(src + i);
+	m = *(mask + i);
+
+	fbCombineMaskC (&s, &m);
+
+        sa = (m >> 24);
+        sr = (m >> 16) & 0xff;
+        sg = (m >>  8) & 0xff;
+        sb = (m      ) & 0xff;
+        da = ~d >> 24;
+
+        if (sb <= da)
+            m = FbAdd(s,d,0,t);
+        else
+            m = FbGen (s, d, 0, (da << 8) / sb, 0xff, t, u, v);
+
+        if (sg <= da)
+            n = FbAdd(s,d,8,t);
+        else
+            n = FbGen (s, d, 8, (da << 8) / sg, 0xff, t, u, v);
+
+        if (sr <= da)
+            o = FbAdd(s,d,16,t);
+        else
+            o = FbGen (s, d, 16, (da << 8) / sr, 0xff, t, u, v);
+
+        if (sa <= da)
+            p = FbAdd(s,d,24,t);
+        else
+            p = FbGen (s, d, 24, (da << 8) / sa, 0xff, t, u, v);
+
+	*(dest + i) = m|n|o|p;
+    }
+}
+
+FASTCALL static void
+fbCombineDisjointGeneralC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width, uint8_t combine)
+{
+    int i;
+
+    for (i = 0; i < width; ++i) {
+        uint32_t  s, d;
+        uint32_t  m,n,o,p;
+        uint32_t  Fa, Fb;
+        uint16_t  t, u, v;
+        uint32_t  sa;
+        uint8_t   da;
+
+        s = *(src + i);
+        m = *(mask + i);
+        d = *(dest + i);
+        da = d >> 24;
+
+	fbCombineMaskC (&s, &m);
+
+	sa = m;
+
+        switch (combine & CombineA) {
+        default:
+            Fa = 0;
+            break;
+        case CombineAOut:
+            m = fbCombineDisjointOutPart ((uint8_t) (sa >> 0), da);
+            n = fbCombineDisjointOutPart ((uint8_t) (sa >> 8), da) << 8;
+            o = fbCombineDisjointOutPart ((uint8_t) (sa >> 16), da) << 16;
+            p = fbCombineDisjointOutPart ((uint8_t) (sa >> 24), da) << 24;
+            Fa = m|n|o|p;
+            break;
+        case CombineAIn:
+            m = fbCombineDisjointInPart ((uint8_t) (sa >> 0), da);
+            n = fbCombineDisjointInPart ((uint8_t) (sa >> 8), da) << 8;
+            o = fbCombineDisjointInPart ((uint8_t) (sa >> 16), da) << 16;
+            p = fbCombineDisjointInPart ((uint8_t) (sa >> 24), da) << 24;
+            Fa = m|n|o|p;
+            break;
+        case CombineA:
+            Fa = 0xffffffff;
+            break;
+        }
+
+        switch (combine & CombineB) {
+        default:
+            Fb = 0;
+            break;
+        case CombineBOut:
+            m = fbCombineDisjointOutPart (da, (uint8_t) (sa >> 0));
+            n = fbCombineDisjointOutPart (da, (uint8_t) (sa >> 8)) << 8;
+            o = fbCombineDisjointOutPart (da, (uint8_t) (sa >> 16)) << 16;
+            p = fbCombineDisjointOutPart (da, (uint8_t) (sa >> 24)) << 24;
+            Fb = m|n|o|p;
+            break;
+        case CombineBIn:
+            m = fbCombineDisjointInPart (da, (uint8_t) (sa >> 0));
+            n = fbCombineDisjointInPart (da, (uint8_t) (sa >> 8)) << 8;
+            o = fbCombineDisjointInPart (da, (uint8_t) (sa >> 16)) << 16;
+            p = fbCombineDisjointInPart (da, (uint8_t) (sa >> 24)) << 24;
+            Fb = m|n|o|p;
+            break;
+        case CombineB:
+            Fb = 0xffffffff;
+            break;
+        }
+        m = FbGen (s,d,0,FbGet8(Fa,0),FbGet8(Fb,0),t, u, v);
+        n = FbGen (s,d,8,FbGet8(Fa,8),FbGet8(Fb,8),t, u, v);
+        o = FbGen (s,d,16,FbGet8(Fa,16),FbGet8(Fb,16),t, u, v);
+        p = FbGen (s,d,24,FbGet8(Fa,24),FbGet8(Fb,24),t, u, v);
+        s = m|n|o|p;
+	*(dest + i) = s;
+    }
+}
+
+FASTCALL static void
+fbCombineDisjointOverC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    fbCombineDisjointGeneralC (dest, src, mask, width, CombineAOver);
+}
+
+FASTCALL static void
+fbCombineDisjointInC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    fbCombineDisjointGeneralC (dest, src, mask, width, CombineAIn);
+}
+
+FASTCALL static void
+fbCombineDisjointInReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    fbCombineDisjointGeneralC (dest, src, mask, width, CombineBIn);
+}
+
+FASTCALL static void
+fbCombineDisjointOutC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    fbCombineDisjointGeneralC (dest, src, mask, width, CombineAOut);
+}
+
+FASTCALL static void
+fbCombineDisjointOutReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    fbCombineDisjointGeneralC (dest, src, mask, width, CombineBOut);
+}
+
+FASTCALL static void
+fbCombineDisjointAtopC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    fbCombineDisjointGeneralC (dest, src, mask, width, CombineAAtop);
+}
+
+FASTCALL static void
+fbCombineDisjointAtopReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    fbCombineDisjointGeneralC (dest, src, mask, width, CombineBAtop);
+}
+
+FASTCALL static void
+fbCombineDisjointXorC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    fbCombineDisjointGeneralC (dest, src, mask, width, CombineXor);
+}
+
+FASTCALL static void
+fbCombineConjointGeneralC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width, uint8_t combine)
+{
+    int i;
+
+    for (i = 0; i < width; ++i) {
+        uint32_t  s, d;
+        uint32_t  m,n,o,p;
+        uint32_t  Fa, Fb;
+        uint16_t  t, u, v;
+        uint32_t  sa;
+        uint8_t   da;
+
+        s = *(src + i);
+        m = *(mask + i);
+        d = *(dest + i);
+        da = d >> 24;
+
+	fbCombineMaskC (&s, &m);
+
+        sa = m;
+
+        switch (combine & CombineA) {
+        default:
+            Fa = 0;
+            break;
+        case CombineAOut:
+            m = fbCombineConjointOutPart ((uint8_t) (sa >> 0), da);
+            n = fbCombineConjointOutPart ((uint8_t) (sa >> 8), da) << 8;
+            o = fbCombineConjointOutPart ((uint8_t) (sa >> 16), da) << 16;
+            p = fbCombineConjointOutPart ((uint8_t) (sa >> 24), da) << 24;
+            Fa = m|n|o|p;
+            break;
+        case CombineAIn:
+            m = fbCombineConjointInPart ((uint8_t) (sa >> 0), da);
+            n = fbCombineConjointInPart ((uint8_t) (sa >> 8), da) << 8;
+            o = fbCombineConjointInPart ((uint8_t) (sa >> 16), da) << 16;
+            p = fbCombineConjointInPart ((uint8_t) (sa >> 24), da) << 24;
+            Fa = m|n|o|p;
+            break;
+        case CombineA:
+            Fa = 0xffffffff;
+            break;
+        }
+
+        switch (combine & CombineB) {
+        default:
+            Fb = 0;
+            break;
+        case CombineBOut:
+            m = fbCombineConjointOutPart (da, (uint8_t) (sa >> 0));
+            n = fbCombineConjointOutPart (da, (uint8_t) (sa >> 8)) << 8;
+            o = fbCombineConjointOutPart (da, (uint8_t) (sa >> 16)) << 16;
+            p = fbCombineConjointOutPart (da, (uint8_t) (sa >> 24)) << 24;
+            Fb = m|n|o|p;
+            break;
+        case CombineBIn:
+            m = fbCombineConjointInPart (da, (uint8_t) (sa >> 0));
+            n = fbCombineConjointInPart (da, (uint8_t) (sa >> 8)) << 8;
+            o = fbCombineConjointInPart (da, (uint8_t) (sa >> 16)) << 16;
+            p = fbCombineConjointInPart (da, (uint8_t) (sa >> 24)) << 24;
+            Fb = m|n|o|p;
+            break;
+        case CombineB:
+            Fb = 0xffffffff;
+            break;
+        }
+        m = FbGen (s,d,0,FbGet8(Fa,0),FbGet8(Fb,0),t, u, v);
+        n = FbGen (s,d,8,FbGet8(Fa,8),FbGet8(Fb,8),t, u, v);
+        o = FbGen (s,d,16,FbGet8(Fa,16),FbGet8(Fb,16),t, u, v);
+        p = FbGen (s,d,24,FbGet8(Fa,24),FbGet8(Fb,24),t, u, v);
+        s = m|n|o|p;
+	*(dest + i) = s;
+    }
+}
+
+FASTCALL static void
+fbCombineConjointOverC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    fbCombineConjointGeneralC (dest, src, mask, width, CombineAOver);
+}
+
+FASTCALL static void
+fbCombineConjointOverReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    fbCombineConjointGeneralC (dest, src, mask, width, CombineBOver);
+}
+
+FASTCALL static void
+fbCombineConjointInC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    fbCombineConjointGeneralC (dest, src, mask, width, CombineAIn);
+}
+
+FASTCALL static void
+fbCombineConjointInReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    fbCombineConjointGeneralC (dest, src, mask, width, CombineBIn);
+}
+
+FASTCALL static void
+fbCombineConjointOutC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    fbCombineConjointGeneralC (dest, src, mask, width, CombineAOut);
+}
+
+FASTCALL static void
+fbCombineConjointOutReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    fbCombineConjointGeneralC (dest, src, mask, width, CombineBOut);
+}
+
+FASTCALL static void
+fbCombineConjointAtopC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    fbCombineConjointGeneralC (dest, src, mask, width, CombineAAtop);
+}
+
+FASTCALL static void
+fbCombineConjointAtopReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    fbCombineConjointGeneralC (dest, src, mask, width, CombineBAtop);
+}
+
+FASTCALL static void
+fbCombineConjointXorC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    fbCombineConjointGeneralC (dest, src, mask, width, CombineXor);
+}
+
+static CombineFuncU pixman_fbCombineFuncU[] = {
+    fbCombineClear,
+    fbCombineSrcU,
+    NULL, /* CombineDst */
+    fbCombineOverU,
+    fbCombineOverReverseU,
+    fbCombineInU,
+    fbCombineInReverseU,
+    fbCombineOutU,
+    fbCombineOutReverseU,
+    fbCombineAtopU,
+    fbCombineAtopReverseU,
+    fbCombineXorU,
+    fbCombineAddU,
+    fbCombineSaturateU,
+    NULL,
+    NULL,
+    fbCombineClear,
+    fbCombineSrcU,
+    NULL, /* CombineDst */
+    fbCombineDisjointOverU,
+    fbCombineSaturateU, /* DisjointOverReverse */
+    fbCombineDisjointInU,
+    fbCombineDisjointInReverseU,
+    fbCombineDisjointOutU,
+    fbCombineDisjointOutReverseU,
+    fbCombineDisjointAtopU,
+    fbCombineDisjointAtopReverseU,
+    fbCombineDisjointXorU,
+    NULL,
+    NULL,
+    NULL,
+    NULL,
+    fbCombineClear,
+    fbCombineSrcU,
+    NULL, /* CombineDst */
+    fbCombineConjointOverU,
+    fbCombineConjointOverReverseU,
+    fbCombineConjointInU,
+    fbCombineConjointInReverseU,
+    fbCombineConjointOutU,
+    fbCombineConjointOutReverseU,
+    fbCombineConjointAtopU,
+    fbCombineConjointAtopReverseU,
+    fbCombineConjointXorU,
+};
+
+static CombineFuncC pixman_fbCombineFuncC[] = {
+    fbCombineClearC,
+    fbCombineSrcC,
+    NULL, /* Dest */
+    fbCombineOverC,
+    fbCombineOverReverseC,
+    fbCombineInC,
+    fbCombineInReverseC,
+    fbCombineOutC,
+    fbCombineOutReverseC,
+    fbCombineAtopC,
+    fbCombineAtopReverseC,
+    fbCombineXorC,
+    fbCombineAddC,
+    fbCombineSaturateC,
+    NULL,
+    NULL,
+    fbCombineClearC,	    /* 0x10 */
+    fbCombineSrcC,
+    NULL, /* Dest */
+    fbCombineDisjointOverC,
+    fbCombineSaturateC, /* DisjointOverReverse */
+    fbCombineDisjointInC,
+    fbCombineDisjointInReverseC,
+    fbCombineDisjointOutC,
+    fbCombineDisjointOutReverseC,
+    fbCombineDisjointAtopC,
+    fbCombineDisjointAtopReverseC,
+    fbCombineDisjointXorC,  /* 0x1b */
+    NULL,
+    NULL,
+    NULL,
+    NULL,
+    fbCombineClearC,
+    fbCombineSrcC,
+    NULL, /* Dest */
+    fbCombineConjointOverC,
+    fbCombineConjointOverReverseC,
+    fbCombineConjointInC,
+    fbCombineConjointInReverseC,
+    fbCombineConjointOutC,
+    fbCombineConjointOutReverseC,
+    fbCombineConjointAtopC,
+    fbCombineConjointAtopReverseC,
+    fbCombineConjointXorC,
+};
+
+FbComposeFunctions pixman_composeFunctions = {
+    pixman_fbCombineFuncU,
+    pixman_fbCombineFuncC,
+    pixman_fbCombineMaskU
+};
--- a/gfx/cairo/libpixman/src/pixman-compose.c
+++ b/gfx/cairo/libpixman/src/pixman-compose.c
@@ -30,29 +30,40 @@
 #include <stdlib.h>
 #include <string.h>
 #include <math.h>
 #include <assert.h>
 #include <limits.h>
 
 #include "pixman-private.h"
 
-/*
- *    FIXME:
- *		The stuff here is added just to get it to compile. Something sensible needs to
- *              be done before this can be used.
- *
- *   we should go through this code and clean up some of the weird stuff that have
- *   resulted from unmacro-ifying it.
- *
- */
-#define INLINE inline
+#ifdef PIXMAN_FB_ACCESSORS
+#define PIXMAN_COMPOSITE_RECT_GENERAL pixman_composite_rect_general_accessors
+
+#define FETCH_PROC_FOR_PICTURE pixman_fetchProcForPicture_accessors
+#define FETCH_PIXEL_PROC_FOR_PICTURE pixman_fetchPixelProcForPicture_accessors
+#define STORE_PROC_FOR_PICTURE pixman_storeProcForPicture_accessors
+
+#define FB_FETCH_TRANSFORMED fbFetchTransformed_accessors
+#define FB_FETCH_EXTERNAL_ALPHA fbFetchExternalAlpha_accessors
+#define FB_STORE_EXTERNAL_ALPHA fbStoreExternalAlpha_accessors
 
-/*   End of stuff added to get it to compile
- */
+#else
+
+#define PIXMAN_COMPOSITE_RECT_GENERAL pixman_composite_rect_general_no_accessors
+
+#define FETCH_PROC_FOR_PICTURE pixman_fetchProcForPicture
+#define FETCH_PIXEL_PROC_FOR_PICTURE pixman_fetchPixelProcForPicture
+#define STORE_PROC_FOR_PICTURE pixman_storeProcForPicture
+
+#define FB_FETCH_TRANSFORMED fbFetchTransformed
+#define FB_FETCH_EXTERNAL_ALPHA fbFetchExternalAlpha
+#define FB_STORE_EXTERNAL_ALPHA fbStoreExternalAlpha
+
+#endif
 
 static unsigned int
 SourcePictureClassify (source_image_t *pict,
 		       int	       x,
 		       int	       y,
 		       int	       width,
 		       int	       height)
 {
@@ -104,4273 +115,48 @@ SourcePictureClassify (source_image_t *p
 	    pict->class = SOURCE_IMAGE_CLASS_HORIZONTAL;
 	else if (factors[1] == factors[0])
 	    pict->class = SOURCE_IMAGE_CLASS_VERTICAL;
     }
 
     return pict->class;
 }
 
-#define SCANLINE_BUFFER_LENGTH 2048
-
-/*
- * YV12 setup and access macros
- */
-
-#define YV12_SETUP(pict) \
-	uint32_t *bits = pict->bits; \
-	int stride = pict->rowstride; \
-	int offset0 = stride < 0 ? \
-		((-stride) >> 1) * ((pict->height - 1) >> 1) - stride : \
-		stride * pict->height; \
-	int offset1 = stride < 0 ? \
-		offset0 + ((-stride) >> 1) * ((pict->height) >> 1) : \
-		offset0 + (offset0 >> 2)
-/* Note n trailing semicolon on the above macro; if it's there, then
- * the typical usage of YV12_SETUP(pict); will have an extra trailing ;
- * that some compilers will interpret as a statement -- and then any further
- * variable declarations will cause an error.
- */
-
-#define YV12_Y(line)		\
-    ((uint8_t *) ((bits) + (stride) * (line)))
-
-#define YV12_U(line)	      \
-    ((uint8_t *) ((bits) + offset1 + \
-		((stride) >> 1) * ((line) >> 1)))
-
-#define YV12_V(line)	      \
-    ((uint8_t *) ((bits) + offset0 + \
-		((stride) >> 1) * ((line) >> 1)))
-
-typedef FASTCALL void (*fetchProc)(bits_image_t *pict, int x, int y, int width, uint32_t *buffer);
-
-/*
- * All of the fetch functions
- */
-
-static FASTCALL void
-fbFetch_a8r8g8b8 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
-{
-    const uint32_t *bits = pict->bits + y*pict->rowstride;
-    MEMCPY_WRAPPED(pict,
-                   buffer, (const uint32_t *)bits + x,
-		   width*sizeof(uint32_t));
-}
-
-static FASTCALL void
-fbFetch_x8r8g8b8 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
-{
-    const uint32_t *bits = pict->bits + y*pict->rowstride;
-    const uint32_t *pixel = (const uint32_t *)bits + x;
-    const uint32_t *end = pixel + width;
-    while (pixel < end) {
-	*buffer++ = READ(pict, pixel++) | 0xff000000;
-    }
-}
-
-static FASTCALL void
-fbFetch_a8b8g8r8 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
-{
-    const uint32_t *bits = pict->bits + y*pict->rowstride;
-    const uint32_t *pixel = (uint32_t *)bits + x;
-    const uint32_t *end = pixel + width;
-    while (pixel < end) {
-	uint32_t p = READ(pict, pixel++);
-	*buffer++ = (p & 0xff00ff00) |
-	            ((p >> 16) & 0xff) |
-	    ((p & 0xff) << 16);
-    }
-}
-
-static FASTCALL void
-fbFetch_x8b8g8r8 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
-{
-    const uint32_t *bits = pict->bits + y*pict->rowstride;
-    const uint32_t *pixel = (uint32_t *)bits + x;
-    const uint32_t *end = pixel + width;
-    while (pixel < end) {
-	uint32_t p = READ(pict, pixel++);
-	*buffer++ = 0xff000000 |
-	    (p & 0x0000ff00) |
-	    ((p >> 16) & 0xff) |
-	    ((p & 0xff) << 16);
-    }
-}
-
-static FASTCALL void
-fbFetch_r8g8b8 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
-{
-    const uint32_t *bits = pict->bits + y*pict->rowstride;
-    const uint8_t *pixel = (const uint8_t *)bits + 3*x;
-    const uint8_t *end = pixel + 3*width;
-    while (pixel < end) {
-	uint32_t b = Fetch24(pict, pixel) | 0xff000000;
-	pixel += 3;
-	*buffer++ = b;
-    }
-}
-
-static FASTCALL void
-fbFetch_b8g8r8 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
-{
-    const uint32_t *bits = pict->bits + y*pict->rowstride;
-    const uint8_t *pixel = (const uint8_t *)bits + 3*x;
-    const uint8_t *end = pixel + 3*width;
-    while (pixel < end) {
-	uint32_t b = 0xff000000;
-#if IMAGE_BYTE_ORDER == MSBFirst
-	b |= (READ(pict, pixel++));
-	b |= (READ(pict, pixel++) << 8);
-	b |= (READ(pict, pixel++) << 16);
-#else
-	b |= (READ(pict, pixel++) << 16);
-	b |= (READ(pict, pixel++) << 8);
-	b |= (READ(pict, pixel++));
-#endif
-	*buffer++ = b;
-    }
-}
-
-static FASTCALL void
-fbFetch_r5g6b5 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
-{
-    const uint32_t *bits = pict->bits + y*pict->rowstride;
-    const uint16_t *pixel = (const uint16_t *)bits + x;
-    const uint16_t *end = pixel + width;
-    while (pixel < end) {
-	uint32_t p = READ(pict, pixel++);
-	uint32_t r = (((p) << 3) & 0xf8) |
-	    (((p) << 5) & 0xfc00) |
-	    (((p) << 8) & 0xf80000);
-	r |= (r >> 5) & 0x70007;
-	r |= (r >> 6) & 0x300;
-	*buffer++ = 0xff000000 | r;
-    }
-}
-
-static FASTCALL void
-fbFetch_b5g6r5 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
-{
-    uint32_t  r,g,b;
-    const uint32_t *bits = pict->bits + y*pict->rowstride;
-    const uint16_t *pixel = (const uint16_t *)bits + x;
-    const uint16_t *end = pixel + width;
-    while (pixel < end) {
-	uint32_t  p = READ(pict, pixel++);
-	b = ((p & 0xf800) | ((p & 0xe000) >> 5)) >> 8;
-	g = ((p & 0x07e0) | ((p & 0x0600) >> 6)) << 5;
-	r = ((p & 0x001c) | ((p & 0x001f) << 5)) << 14;
-	*buffer++ = 0xff000000 | r | g | b;
-    }
-}
-
-static FASTCALL void
-fbFetch_a1r5g5b5 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
-{
-    uint32_t  r,g,b, a;
-    const uint32_t *bits = pict->bits + y*pict->rowstride;
-    const uint16_t *pixel = (const uint16_t *)bits + x;
-    const uint16_t *end = pixel + width;
-    while (pixel < end) {
-	uint32_t  p = READ(pict, pixel++);
-
-	a = (uint32_t) ((uint8_t) (0 - ((p & 0x8000) >> 15))) << 24;
-	r = ((p & 0x7c00) | ((p & 0x7000) >> 5)) << 9;
-	g = ((p & 0x03e0) | ((p & 0x0380) >> 5)) << 6;
-	b = ((p & 0x001c) | ((p & 0x001f) << 5)) >> 2;
-	*buffer++ = a | r | g | b;
-    }
-}
-
-static FASTCALL void
-fbFetch_x1r5g5b5 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
-{
-    uint32_t  r,g,b;
-    const uint32_t *bits = pict->bits + y*pict->rowstride;
-    const uint16_t *pixel = (const uint16_t *)bits + x;
-    const uint16_t *end = pixel + width;
-    while (pixel < end) {
-	uint32_t  p = READ(pict, pixel++);
-
-	r = ((p & 0x7c00) | ((p & 0x7000) >> 5)) << 9;
-	g = ((p & 0x03e0) | ((p & 0x0380) >> 5)) << 6;
-	b = ((p & 0x001c) | ((p & 0x001f) << 5)) >> 2;
-	*buffer++ = 0xff000000 | r | g | b;
-    }
-}
-
-static FASTCALL void
-fbFetch_a1b5g5r5 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
-{
-    uint32_t  r,g,b, a;
-    const uint32_t *bits = pict->bits + y*pict->rowstride;
-    const uint16_t *pixel = (const uint16_t *)bits + x;
-    const uint16_t *end = pixel + width;
-    while (pixel < end) {
-	uint32_t  p = READ(pict, pixel++);
-
-	a = (uint32_t) ((uint8_t) (0 - ((p & 0x8000) >> 15))) << 24;
-	b = ((p & 0x7c00) | ((p & 0x7000) >> 5)) >> 7;
-	g = ((p & 0x03e0) | ((p & 0x0380) >> 5)) << 6;
-	r = ((p & 0x001c) | ((p & 0x001f) << 5)) << 14;
-	*buffer++ = a | r | g | b;
-    }
-}
-
-static FASTCALL void
-fbFetch_x1b5g5r5 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
-{
-    uint32_t  r,g,b;
-    const uint32_t *bits = pict->bits + y*pict->rowstride;
-    const uint16_t *pixel = (const uint16_t *)bits + x;
-    const uint16_t *end = pixel + width;
-    while (pixel < end) {
-	uint32_t  p = READ(pict, pixel++);
-
-	b = ((p & 0x7c00) | ((p & 0x7000) >> 5)) >> 7;
-	g = ((p & 0x03e0) | ((p & 0x0380) >> 5)) << 6;
-	r = ((p & 0x001c) | ((p & 0x001f) << 5)) << 14;
-	*buffer++ = 0xff000000 | r | g | b;
-    }
-}
-
-static FASTCALL void
-fbFetch_a4r4g4b4 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
-{
-    uint32_t  r,g,b, a;
-    const uint32_t *bits = pict->bits + y*pict->rowstride;
-    const uint16_t *pixel = (const uint16_t *)bits + x;
-    const uint16_t *end = pixel + width;
-    while (pixel < end) {
-	uint32_t  p = READ(pict, pixel++);
-
-	a = ((p & 0xf000) | ((p & 0xf000) >> 4)) << 16;
-	r = ((p & 0x0f00) | ((p & 0x0f00) >> 4)) << 12;
-	g = ((p & 0x00f0) | ((p & 0x00f0) >> 4)) << 8;
-	b = ((p & 0x000f) | ((p & 0x000f) << 4));
-	*buffer++ = a | r | g | b;
-    }
-}
-
-static FASTCALL void
-fbFetch_x4r4g4b4 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
-{
-    uint32_t  r,g,b;
-    const uint32_t *bits = pict->bits + y*pict->rowstride;
-    const uint16_t *pixel = (const uint16_t *)bits + x;
-    const uint16_t *end = pixel + width;
-    while (pixel < end) {
-	uint32_t  p = READ(pict, pixel++);
-
-	r = ((p & 0x0f00) | ((p & 0x0f00) >> 4)) << 12;
-	g = ((p & 0x00f0) | ((p & 0x00f0) >> 4)) << 8;
-	b = ((p & 0x000f) | ((p & 0x000f) << 4));
-	*buffer++ = 0xff000000 | r | g | b;
-    }
-}
-
-static FASTCALL void
-fbFetch_a4b4g4r4 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
-{
-    uint32_t  r,g,b, a;
-    const uint32_t *bits = pict->bits + y*pict->rowstride;
-    const uint16_t *pixel = (const uint16_t *)bits + x;
-    const uint16_t *end = pixel + width;
-    while (pixel < end) {
-	uint32_t  p = READ(pict, pixel++);
-
-	a = ((p & 0xf000) | ((p & 0xf000) >> 4)) << 16;
-	b = ((p & 0x0f00) | ((p & 0x0f00) >> 4)) >> 4;
-	g = ((p & 0x00f0) | ((p & 0x00f0) >> 4)) << 8;
-	r = ((p & 0x000f) | ((p & 0x000f) << 4)) << 16;
-	*buffer++ = a | r | g | b;
-    }
-}
-
-static FASTCALL void
-fbFetch_x4b4g4r4 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
-{
-    uint32_t  r,g,b;
-    const uint32_t *bits = pict->bits + y*pict->rowstride;
-    const uint16_t *pixel = (const uint16_t *)bits + x;
-    const uint16_t *end = pixel + width;
-    while (pixel < end) {
-	uint32_t  p = READ(pict, pixel++);
-
-	b = ((p & 0x0f00) | ((p & 0x0f00) >> 4)) >> 4;
-	g = ((p & 0x00f0) | ((p & 0x00f0) >> 4)) << 8;
-	r = ((p & 0x000f) | ((p & 0x000f) << 4)) << 16;
-	*buffer++ = 0xff000000 | r | g | b;
-    }
-}
-
-static FASTCALL void
-fbFetch_a8 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
-{
-    const uint32_t *bits = pict->bits + y*pict->rowstride;
-    const uint8_t *pixel = (const uint8_t *)bits + x;
-    const uint8_t *end = pixel + width;
-    while (pixel < end) {
-	*buffer++ = READ(pict, pixel++) << 24;
-    }
-}
-
-static FASTCALL void
-fbFetch_r3g3b2 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
-{
-    uint32_t  r,g,b;
-    const uint32_t *bits = pict->bits + y*pict->rowstride;
-    const uint8_t *pixel = (const uint8_t *)bits + x;
-    const uint8_t *end = pixel + width;
-    while (pixel < end) {
-	uint32_t  p = READ(pict, pixel++);
-
-	r = ((p & 0xe0) | ((p & 0xe0) >> 3) | ((p & 0xc0) >> 6)) << 16;
-	g = ((p & 0x1c) | ((p & 0x18) >> 3) | ((p & 0x1c) << 3)) << 8;
-	b = (((p & 0x03)     ) |
-	     ((p & 0x03) << 2) |
-	     ((p & 0x03) << 4) |
-	     ((p & 0x03) << 6));
-	*buffer++ = 0xff000000 | r | g | b;
-    }
-}
-
-static FASTCALL void
-fbFetch_b2g3r3 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
-{
-    uint32_t  r,g,b;
-    const uint32_t *bits = pict->bits + y*pict->rowstride;
-    const uint8_t *pixel = (const uint8_t *)bits + x;
-    const uint8_t *end = pixel + width;
-    while (pixel < end) {
-	uint32_t  p = READ(pict, pixel++);
-
-	b = (((p & 0xc0)     ) |
-	     ((p & 0xc0) >> 2) |
-	     ((p & 0xc0) >> 4) |
-	     ((p & 0xc0) >> 6));
-	g = ((p & 0x38) | ((p & 0x38) >> 3) | ((p & 0x30) << 2)) << 8;
-	r = (((p & 0x07)     ) |
-	     ((p & 0x07) << 3) |
-	     ((p & 0x06) << 6)) << 16;
-	*buffer++ = 0xff000000 | r | g | b;
-    }
-}
-
-static FASTCALL void
-fbFetch_a2r2g2b2 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
-{
-    uint32_t   a,r,g,b;
-    const uint32_t *bits = pict->bits + y*pict->rowstride;
-    const uint8_t *pixel = (const uint8_t *)bits + x;
-    const uint8_t *end = pixel + width;
-    while (pixel < end) {
-	uint32_t  p = READ(pict, pixel++);
-
-	a = ((p & 0xc0) * 0x55) << 18;
-	r = ((p & 0x30) * 0x55) << 12;
-	g = ((p & 0x0c) * 0x55) << 6;
-	b = ((p & 0x03) * 0x55);
-	*buffer++ = a|r|g|b;
-    }
-}
-
-static FASTCALL void
-fbFetch_a2b2g2r2 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
-{
-    uint32_t   a,r,g,b;
-    const uint32_t *bits = pict->bits + y*pict->rowstride;
-    const uint8_t *pixel = (const uint8_t *)bits + x;
-    const uint8_t *end = pixel + width;
-    while (pixel < end) {
-	uint32_t  p = READ(pict, pixel++);
-
-	a = ((p & 0xc0) * 0x55) << 18;
-	b = ((p & 0x30) * 0x55) >> 6;
-	g = ((p & 0x0c) * 0x55) << 6;
-	r = ((p & 0x03) * 0x55) << 16;
-	*buffer++ = a|r|g|b;
-    }
-}
-
-static FASTCALL void
-fbFetch_c8 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
-{
-    const uint32_t *bits = pict->bits + y*pict->rowstride;
-    const pixman_indexed_t * indexed = pict->indexed;
-    const uint8_t *pixel = (const uint8_t *)bits + x;
-    const uint8_t *end = pixel + width;
-    while (pixel < end) {
-	uint32_t  p = READ(pict, pixel++);
-	*buffer++ = indexed->rgba[p];
-    }
-}
-
-static FASTCALL void
-fbFetch_x4a4 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
-{
-    const uint32_t *bits = pict->bits + y*pict->rowstride;
-    const uint8_t *pixel = (const uint8_t *)bits + x;
-    const uint8_t *end = pixel + width;
-    while (pixel < end) {
-	uint8_t p = READ(pict, pixel++) & 0xf;
-	*buffer++ = (p | (p << 4)) << 24;
-    }
-}
-
-#define Fetch8(img,l,o)    (READ(img, (uint8_t *)(l) + ((o) >> 2)))
-#if IMAGE_BYTE_ORDER == MSBFirst
-#define Fetch4(img,l,o)    ((o) & 2 ? Fetch8(img,l,o) & 0xf : Fetch8(img,l,o) >> 4)
-#else
-#define Fetch4(img,l,o)    ((o) & 2 ? Fetch8(img,l,o) >> 4 : Fetch8(img,l,o) & 0xf)
-#endif
-
-static FASTCALL void
-fbFetch_a4 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
-{
-    const uint32_t *bits = pict->bits + y*pict->rowstride;
-    int i;
-    for (i = 0; i < width; ++i) {
-	uint32_t  p = Fetch4(pict, bits, i + x);
-
-	p |= p << 4;
-	*buffer++ = p << 24;
-    }
-}
-
-static FASTCALL void
-fbFetch_r1g2b1 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
-{
-    uint32_t  r,g,b;
-    const uint32_t *bits = pict->bits + y*pict->rowstride;
-    int i;
-    for (i = 0; i < width; ++i) {
-	uint32_t  p = Fetch4(pict, bits, i + x);
-
-	r = ((p & 0x8) * 0xff) << 13;
-	g = ((p & 0x6) * 0x55) << 7;
-	b = ((p & 0x1) * 0xff);
-	*buffer++ = 0xff000000|r|g|b;
-    }
-}
-
-static FASTCALL void
-fbFetch_b1g2r1 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
-{
-    uint32_t  r,g,b;
-    const uint32_t *bits = pict->bits + y*pict->rowstride;
-    int i;
-    for (i = 0; i < width; ++i) {
-	uint32_t  p = Fetch4(pict, bits, i + x);
-
-	b = ((p & 0x8) * 0xff) >> 3;
-	g = ((p & 0x6) * 0x55) << 7;
-	r = ((p & 0x1) * 0xff) << 16;
-	*buffer++ = 0xff000000|r|g|b;
-    }
-}
-
-static FASTCALL void
-fbFetch_a1r1g1b1 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
-{
-    uint32_t  a,r,g,b;
-    const uint32_t *bits = pict->bits + y*pict->rowstride;
-    int i;
-    for (i = 0; i < width; ++i) {
-	uint32_t  p = Fetch4(pict, bits, i + x);
-
-	a = ((p & 0x8) * 0xff) << 21;
-	r = ((p & 0x4) * 0xff) << 14;
-	g = ((p & 0x2) * 0xff) << 7;
-	b = ((p & 0x1) * 0xff);
-	*buffer++ = a|r|g|b;
-    }
-}
-
-static FASTCALL void
-fbFetch_a1b1g1r1 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
-{
-    uint32_t  a,r,g,b;
-    const uint32_t *bits = pict->bits + y*pict->rowstride;
-    int i;
-    for (i = 0; i < width; ++i) {
-	uint32_t  p = Fetch4(pict, bits, i + x);
-
-	a = ((p & 0x8) * 0xff) << 21;
-	r = ((p & 0x4) * 0xff) >> 3;
-	g = ((p & 0x2) * 0xff) << 7;
-	b = ((p & 0x1) * 0xff) << 16;
-	*buffer++ = a|r|g|b;
-    }
-}
-
-static FASTCALL void
-fbFetch_c4 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
-{
-    const uint32_t *bits = pict->bits + y*pict->rowstride;
-    const pixman_indexed_t * indexed = pict->indexed;
-    int i;
-    for (i = 0; i < width; ++i) {
-	uint32_t  p = Fetch4(pict, bits, i + x);
-
-	*buffer++ = indexed->rgba[p];
-    }
-}
-
-
-static FASTCALL void
-fbFetch_a1 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
-{
-    const uint32_t *bits = pict->bits + y*pict->rowstride;
-    int i;
-    for (i = 0; i < width; ++i) {
-	uint32_t  p = READ(pict, bits + ((i + x) >> 5));
-	uint32_t  a;
-#if BITMAP_BIT_ORDER == MSBFirst
-	a = p >> (0x1f - ((i+x) & 0x1f));
-#else
-	a = p >> ((i+x) & 0x1f);
-#endif
-	a = a & 1;
-	a |= a << 1;
-	a |= a << 2;
-	a |= a << 4;
-	*buffer++ = a << 24;
-    }
-}
-
-static FASTCALL void
-fbFetch_g1 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
-{
-    const uint32_t *bits = pict->bits + y*pict->rowstride;
-    const pixman_indexed_t * indexed = pict->indexed;
-    int i;
-    for (i = 0; i < width; ++i) {
-	uint32_t p = READ(pict, bits + ((i+x) >> 5));
-	uint32_t a;
-#if BITMAP_BIT_ORDER == MSBFirst
-	a = p >> (0x1f - ((i+x) & 0x1f));
-#else
-	a = p >> ((i+x) & 0x1f);
-#endif
-	a = a & 1;
-	*buffer++ = indexed->rgba[a];
-    }
-}
-
-static FASTCALL void
-fbFetch_yuy2 (bits_image_t *pict, int x, int line, int width, uint32_t *buffer)
-{
-    int16_t y, u, v;
-    int32_t r, g, b;
-    int   i;
-
-    const uint32_t *bits = pict->bits + pict->rowstride * line;
-
-    for (i = 0; i < width; i++)
-    {
-	y = ((uint8_t *) bits)[(x + i) << 1] - 16;
-	u = ((uint8_t *) bits)[(((x + i) << 1) & -4) + 1] - 128;
-	v = ((uint8_t *) bits)[(((x + i) << 1) & -4) + 3] - 128;
-
-	/* R = 1.164(Y - 16) + 1.596(V - 128) */
-	r = 0x012b27 * y + 0x019a2e * v;
-	/* G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128) */
-	g = 0x012b27 * y - 0x00d0f2 * v - 0x00647e * u;
-	/* B = 1.164(Y - 16) + 2.018(U - 128) */
-	b = 0x012b27 * y + 0x0206a2 * u;
-
-    WRITE(pict, buffer++, 0xff000000 |
-	(r >= 0 ? r < 0x1000000 ? r         & 0xff0000 : 0xff0000 : 0) |
-	(g >= 0 ? g < 0x1000000 ? (g >> 8)  & 0x00ff00 : 0x00ff00 : 0) |
-	(b >= 0 ? b < 0x1000000 ? (b >> 16) & 0x0000ff : 0x0000ff : 0));
-    }
-}
-
-static FASTCALL void
-fbFetch_yv12 (bits_image_t *pict, int x, int line, int width, uint32_t *buffer)
-{
-    YV12_SETUP(pict);
-    uint8_t *pY = YV12_Y (line);
-    uint8_t *pU = YV12_U (line);
-    uint8_t *pV = YV12_V (line);
-    int16_t y, u, v;
-    int32_t r, g, b;
-    int   i;
-
-    for (i = 0; i < width; i++)
-    {
-	y = pY[x + i] - 16;
-	u = pU[(x + i) >> 1] - 128;
-	v = pV[(x + i) >> 1] - 128;
-
-	/* R = 1.164(Y - 16) + 1.596(V - 128) */
-	r = 0x012b27 * y + 0x019a2e * v;
-	/* G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128) */
-	g = 0x012b27 * y - 0x00d0f2 * v - 0x00647e * u;
-	/* B = 1.164(Y - 16) + 2.018(U - 128) */
-	b = 0x012b27 * y + 0x0206a2 * u;
-
-	WRITE(pict, buffer++, 0xff000000 |
-	    (r >= 0 ? r < 0x1000000 ? r         & 0xff0000 : 0xff0000 : 0) |
-	    (g >= 0 ? g < 0x1000000 ? (g >> 8)  & 0x00ff00 : 0x00ff00 : 0) |
-	    (b >= 0 ? b < 0x1000000 ? (b >> 16) & 0x0000ff : 0x0000ff : 0));
-    }
-}
-
-static fetchProc fetchProcForPicture (bits_image_t * pict)
-{
-    switch(pict->format) {
-    case PIXMAN_a8r8g8b8: return fbFetch_a8r8g8b8;
-    case PIXMAN_x8r8g8b8: return fbFetch_x8r8g8b8;
-    case PIXMAN_a8b8g8r8: return fbFetch_a8b8g8r8;
-    case PIXMAN_x8b8g8r8: return fbFetch_x8b8g8r8;
-
-        /* 24bpp formats */
-    case PIXMAN_r8g8b8: return fbFetch_r8g8b8;
-    case PIXMAN_b8g8r8: return fbFetch_b8g8r8;
-
-        /* 16bpp formats */
-    case PIXMAN_r5g6b5: return fbFetch_r5g6b5;
-    case PIXMAN_b5g6r5: return fbFetch_b5g6r5;
-
-    case PIXMAN_a1r5g5b5: return fbFetch_a1r5g5b5;
-    case PIXMAN_x1r5g5b5: return fbFetch_x1r5g5b5;
-    case PIXMAN_a1b5g5r5: return fbFetch_a1b5g5r5;
-    case PIXMAN_x1b5g5r5: return fbFetch_x1b5g5r5;
-    case PIXMAN_a4r4g4b4: return fbFetch_a4r4g4b4;
-    case PIXMAN_x4r4g4b4: return fbFetch_x4r4g4b4;
-    case PIXMAN_a4b4g4r4: return fbFetch_a4b4g4r4;
-    case PIXMAN_x4b4g4r4: return fbFetch_x4b4g4r4;
-
-        /* 8bpp formats */
-    case PIXMAN_a8: return  fbFetch_a8;
-    case PIXMAN_r3g3b2: return fbFetch_r3g3b2;
-    case PIXMAN_b2g3r3: return fbFetch_b2g3r3;
-    case PIXMAN_a2r2g2b2: return fbFetch_a2r2g2b2;
-    case PIXMAN_a2b2g2r2: return fbFetch_a2b2g2r2;
-    case PIXMAN_c8: return  fbFetch_c8;
-    case PIXMAN_g8: return  fbFetch_c8;
-    case PIXMAN_x4a4: return fbFetch_x4a4;
-
-        /* 4bpp formats */
-    case PIXMAN_a4: return  fbFetch_a4;
-    case PIXMAN_r1g2b1: return fbFetch_r1g2b1;
-    case PIXMAN_b1g2r1: return fbFetch_b1g2r1;
-    case PIXMAN_a1r1g1b1: return fbFetch_a1r1g1b1;
-    case PIXMAN_a1b1g1r1: return fbFetch_a1b1g1r1;
-    case PIXMAN_c4: return  fbFetch_c4;
-    case PIXMAN_g4: return  fbFetch_c4;
-
-        /* 1bpp formats */
-    case PIXMAN_a1: return  fbFetch_a1;
-    case PIXMAN_g1: return  fbFetch_g1;
-
-        /* YUV formats */
-    case PIXMAN_yuy2: return fbFetch_yuy2;
-    case PIXMAN_yv12: return fbFetch_yv12;
-    }
-
-    return NULL;
-}
-
-/*
- * Pixel wise fetching
- */
-
-typedef FASTCALL uint32_t (*fetchPixelProc)(bits_image_t *pict, int offset, int line);
-
-static FASTCALL uint32_t
-fbFetchPixel_a8r8g8b8 (bits_image_t *pict, int offset, int line)
-{
-    uint32_t *bits = pict->bits + line*pict->rowstride;
-    return READ(pict, (uint32_t *)bits + offset);
-}
-
-static FASTCALL uint32_t
-fbFetchPixel_x8r8g8b8 (bits_image_t *pict, int offset, int line)
-{
-    uint32_t *bits = pict->bits + line*pict->rowstride;
-    return READ(pict, (uint32_t *)bits + offset) | 0xff000000;
-}
-
-static FASTCALL uint32_t
-fbFetchPixel_a8b8g8r8 (bits_image_t *pict, int offset, int line)
-{
-    uint32_t *bits = pict->bits + line*pict->rowstride;
-    uint32_t  pixel = READ(pict, (uint32_t *)bits + offset);
-
-    return ((pixel & 0xff000000) |
-	    ((pixel >> 16) & 0xff) |
-	    (pixel & 0x0000ff00) |
-	    ((pixel & 0xff) << 16));
-}
-
-static FASTCALL uint32_t
-fbFetchPixel_x8b8g8r8 (bits_image_t *pict, int offset, int line)
-{
-    uint32_t *bits = pict->bits + line*pict->rowstride;
-    uint32_t  pixel = READ(pict, (uint32_t *)bits + offset);
-
-    return ((0xff000000) |
-	    ((pixel >> 16) & 0xff) |
-	    (pixel & 0x0000ff00) |
-	    ((pixel & 0xff) << 16));
-}
-
-static FASTCALL uint32_t
-fbFetchPixel_r8g8b8 (bits_image_t *pict, int offset, int line)
-{
-    uint32_t *bits = pict->bits + line*pict->rowstride;
-    uint8_t   *pixel = ((uint8_t *) bits) + (offset*3);
-#if IMAGE_BYTE_ORDER == MSBFirst
-    return (0xff000000 |
-	    (READ(pict, pixel + 0) << 16) |
-	    (READ(pict, pixel + 1) << 8) |
-	    (READ(pict, pixel + 2)));
-#else
-    return (0xff000000 |
-	    (READ(pict, pixel + 2) << 16) |
-	    (READ(pict, pixel + 1) << 8) |
-	    (READ(pict, pixel + 0)));
-#endif
-}
-
-static FASTCALL uint32_t
-fbFetchPixel_b8g8r8 (bits_image_t *pict, int offset, int line)
-{
-    uint32_t *bits = pict->bits + line*pict->rowstride;
-    uint8_t   *pixel = ((uint8_t *) bits) + (offset*3);
-#if IMAGE_BYTE_ORDER == MSBFirst
-    return (0xff000000 |
-	    (READ(pict, pixel + 2) << 16) |
-	    (READ(pict, pixel + 1) << 8) |
-	    (READ(pict, pixel + 0)));
-#else
-    return (0xff000000 |
-	    (READ(pict, pixel + 0) << 16) |
-	    (READ(pict, pixel + 1) << 8) |
-	    (READ(pict, pixel + 2)));
-#endif
-}
-
-static FASTCALL uint32_t
-fbFetchPixel_r5g6b5 (bits_image_t *pict, int offset, int line)
-{
-    uint32_t  r,g,b;
-    uint32_t *bits = pict->bits + line*pict->rowstride;
-    uint32_t  pixel = READ(pict, (uint16_t *) bits + offset);
-
-    r = ((pixel & 0xf800) | ((pixel & 0xe000) >> 5)) << 8;
-    g = ((pixel & 0x07e0) | ((pixel & 0x0600) >> 6)) << 5;
-    b = ((pixel & 0x001c) | ((pixel & 0x001f) << 5)) >> 2;
-    return (0xff000000 | r | g | b);
-}
-
-static FASTCALL uint32_t
-fbFetchPixel_b5g6r5 (bits_image_t *pict, int offset, int line)
-{
-    uint32_t  r,g,b;
-    uint32_t *bits = pict->bits + line*pict->rowstride;
-    uint32_t  pixel = READ(pict, (uint16_t *) bits + offset);
-
-    b = ((pixel & 0xf800) | ((pixel & 0xe000) >> 5)) >> 8;
-    g = ((pixel & 0x07e0) | ((pixel & 0x0600) >> 6)) << 5;
-    r = ((pixel & 0x001c) | ((pixel & 0x001f) << 5)) << 14;
-    return (0xff000000 | r | g | b);
-}
-
-static FASTCALL uint32_t
-fbFetchPixel_a1r5g5b5 (bits_image_t *pict, int offset, int line)
-{
-    uint32_t  a,r,g,b;
-    uint32_t *bits = pict->bits + line*pict->rowstride;
-    uint32_t  pixel = READ(pict, (uint16_t *) bits + offset);
-
-    a = (uint32_t) ((uint8_t) (0 - ((pixel & 0x8000) >> 15))) << 24;
-    r = ((pixel & 0x7c00) | ((pixel & 0x7000) >> 5)) << 9;
-    g = ((pixel & 0x03e0) | ((pixel & 0x0380) >> 5)) << 6;
-    b = ((pixel & 0x001c) | ((pixel & 0x001f) << 5)) >> 2;
-    return (a | r | g | b);
-}
-
-static FASTCALL uint32_t
-fbFetchPixel_x1r5g5b5 (bits_image_t *pict, int offset, int line)
-{
-    uint32_t  r,g,b;
-    uint32_t *bits = pict->bits + line*pict->rowstride;
-    uint32_t  pixel = READ(pict, (uint16_t *) bits + offset);
-
-    r = ((pixel & 0x7c00) | ((pixel & 0x7000) >> 5)) << 9;
-    g = ((pixel & 0x03e0) | ((pixel & 0x0380) >> 5)) << 6;
-    b = ((pixel & 0x001c) | ((pixel & 0x001f) << 5)) >> 2;
-    return (0xff000000 | r | g | b);
-}
-
-static FASTCALL uint32_t
-fbFetchPixel_a1b5g5r5 (bits_image_t *pict, int offset, int line)
-{
-    uint32_t  a,r,g,b;
-    uint32_t *bits = pict->bits + line*pict->rowstride;
-    uint32_t  pixel = READ(pict, (uint16_t *) bits + offset);
-
-    a = (uint32_t) ((uint8_t) (0 - ((pixel & 0x8000) >> 15))) << 24;
-    b = ((pixel & 0x7c00) | ((pixel & 0x7000) >> 5)) >> 7;
-    g = ((pixel & 0x03e0) | ((pixel & 0x0380) >> 5)) << 6;
-    r = ((pixel & 0x001c) | ((pixel & 0x001f) << 5)) << 14;
-    return (a | r | g | b);
-}
-
-static FASTCALL uint32_t
-fbFetchPixel_x1b5g5r5 (bits_image_t *pict, int offset, int line)
-{
-    uint32_t  r,g,b;
-    uint32_t *bits = pict->bits + line*pict->rowstride;
-    uint32_t  pixel = READ(pict, (uint16_t *) bits + offset);
-
-    b = ((pixel & 0x7c00) | ((pixel & 0x7000) >> 5)) >> 7;
-    g = ((pixel & 0x03e0) | ((pixel & 0x0380) >> 5)) << 6;
-    r = ((pixel & 0x001c) | ((pixel & 0x001f) << 5)) << 14;
-    return (0xff000000 | r | g | b);
-}
-
-static FASTCALL uint32_t
-fbFetchPixel_a4r4g4b4 (bits_image_t *pict, int offset, int line)
-{
-    uint32_t  a,r,g,b;
-    uint32_t *bits = pict->bits + line*pict->rowstride;
-    uint32_t  pixel = READ(pict, (uint16_t *) bits + offset);
-
-    a = ((pixel & 0xf000) | ((pixel & 0xf000) >> 4)) << 16;
-    r = ((pixel & 0x0f00) | ((pixel & 0x0f00) >> 4)) << 12;
-    g = ((pixel & 0x00f0) | ((pixel & 0x00f0) >> 4)) << 8;
-    b = ((pixel & 0x000f) | ((pixel & 0x000f) << 4));
-    return (a | r | g | b);
-}
-
-static FASTCALL uint32_t
-fbFetchPixel_x4r4g4b4 (bits_image_t *pict, int offset, int line)
-{
-    uint32_t  r,g,b;
-    uint32_t *bits = pict->bits + line*pict->rowstride;
-    uint32_t  pixel = READ(pict, (uint16_t *) bits + offset);
-
-    r = ((pixel & 0x0f00) | ((pixel & 0x0f00) >> 4)) << 12;
-    g = ((pixel & 0x00f0) | ((pixel & 0x00f0) >> 4)) << 8;
-    b = ((pixel & 0x000f) | ((pixel & 0x000f) << 4));
-    return (0xff000000 | r | g | b);
-}
-
-static FASTCALL uint32_t
-fbFetchPixel_a4b4g4r4 (bits_image_t *pict, int offset, int line)
-{
-    uint32_t  a,r,g,b;
-    uint32_t *bits = pict->bits + line*pict->rowstride;
-    uint32_t  pixel = READ(pict, (uint16_t *) bits + offset);
-
-    a = ((pixel & 0xf000) | ((pixel & 0xf000) >> 4)) << 16;
-    b = ((pixel & 0x0f00) | ((pixel & 0x0f00) >> 4)) >> 4;
-    g = ((pixel & 0x00f0) | ((pixel & 0x00f0) >> 4)) << 8;
-    r = ((pixel & 0x000f) | ((pixel & 0x000f) << 4)) << 16;
-    return (a | r | g | b);
-}
-
-static FASTCALL uint32_t
-fbFetchPixel_x4b4g4r4 (bits_image_t *pict, int offset, int line)
-{
-    uint32_t  r,g,b;
-    uint32_t *bits = pict->bits + line*pict->rowstride;
-    uint32_t  pixel = READ(pict, (uint16_t *) bits + offset);
-
-    b = ((pixel & 0x0f00) | ((pixel & 0x0f00) >> 4)) >> 4;
-    g = ((pixel & 0x00f0) | ((pixel & 0x00f0) >> 4)) << 8;
-    r = ((pixel & 0x000f) | ((pixel & 0x000f) << 4)) << 16;
-    return (0xff000000 | r | g | b);
-}
-
-static FASTCALL uint32_t
-fbFetchPixel_a8 (bits_image_t *pict, int offset, int line)
-{
-    uint32_t *bits = pict->bits + line*pict->rowstride;
-    uint32_t   pixel = READ(pict, (uint8_t *) bits + offset);
-
-    return pixel << 24;
-}
-
-static FASTCALL uint32_t
-fbFetchPixel_r3g3b2 (bits_image_t *pict, int offset, int line)
-{
-    uint32_t  r,g,b;
-    uint32_t *bits = pict->bits + line*pict->rowstride;
-    uint32_t   pixel = READ(pict, (uint8_t *) bits + offset);
-
-    r = ((pixel & 0xe0) | ((pixel & 0xe0) >> 3) | ((pixel & 0xc0) >> 6)) << 16;
-    g = ((pixel & 0x1c) | ((pixel & 0x18) >> 3) | ((pixel & 0x1c) << 3)) << 8;
-    b = (((pixel & 0x03)     ) |
-	 ((pixel & 0x03) << 2) |
-	 ((pixel & 0x03) << 4) |
-	 ((pixel & 0x03) << 6));
-    return (0xff000000 | r | g | b);
-}
-
-static FASTCALL uint32_t
-fbFetchPixel_b2g3r3 (bits_image_t *pict, int offset, int line)
-{
-    uint32_t  r,g,b;
-    uint32_t *bits = pict->bits + line*pict->rowstride;
-    uint32_t   pixel = READ(pict, (uint8_t *) bits + offset);
-
-    b = (((pixel & 0xc0)     ) |
-	 ((pixel & 0xc0) >> 2) |
-	 ((pixel & 0xc0) >> 4) |
-	 ((pixel & 0xc0) >> 6));
-    g = ((pixel & 0x38) | ((pixel & 0x38) >> 3) | ((pixel & 0x30) << 2)) << 8;
-    r = (((pixel & 0x07)     ) |
-	 ((pixel & 0x07) << 3) |
-	 ((pixel & 0x06) << 6)) << 16;
-    return (0xff000000 | r | g | b);
-}
-
-static FASTCALL uint32_t
-fbFetchPixel_a2r2g2b2 (bits_image_t *pict, int offset, int line)
-{
-    uint32_t   a,r,g,b;
-    uint32_t *bits = pict->bits + line*pict->rowstride;
-    uint32_t   pixel = READ(pict, (uint8_t *) bits + offset);
-
-    a = ((pixel & 0xc0) * 0x55) << 18;
-    r = ((pixel & 0x30) * 0x55) << 12;
-    g = ((pixel & 0x0c) * 0x55) << 6;
-    b = ((pixel & 0x03) * 0x55);
-    return a|r|g|b;
-}
-
-static FASTCALL uint32_t
-fbFetchPixel_a2b2g2r2 (bits_image_t *pict, int offset, int line)
-{
-    uint32_t   a,r,g,b;
-    uint32_t *bits = pict->bits + line*pict->rowstride;
-    uint32_t   pixel = READ(pict, (uint8_t *) bits + offset);
-
-    a = ((pixel & 0xc0) * 0x55) << 18;
-    b = ((pixel & 0x30) * 0x55) >> 6;
-    g = ((pixel & 0x0c) * 0x55) << 6;
-    r = ((pixel & 0x03) * 0x55) << 16;
-    return a|r|g|b;
-}
-
-static FASTCALL uint32_t
-fbFetchPixel_c8 (bits_image_t *pict, int offset, int line)
-{
-    uint32_t *bits = pict->bits + line*pict->rowstride;
-    uint32_t   pixel = READ(pict, (uint8_t *) bits + offset);
-    const pixman_indexed_t * indexed = pict->indexed;
-    return indexed->rgba[pixel];
-}
-
-static FASTCALL uint32_t
-fbFetchPixel_x4a4 (bits_image_t *pict, int offset, int line)
-{
-    uint32_t *bits = pict->bits + line*pict->rowstride;
-    uint32_t   pixel = READ(pict, (uint8_t *) bits + offset);
-
-    return ((pixel & 0xf) | ((pixel & 0xf) << 4)) << 24;
-}
-
-static FASTCALL uint32_t
-fbFetchPixel_a4 (bits_image_t *pict, int offset, int line)
-{
-    uint32_t *bits = pict->bits + line*pict->rowstride;
-    uint32_t  pixel = Fetch4(pict, bits, offset);
-
-    pixel |= pixel << 4;
-    return pixel << 24;
-}
-
-static FASTCALL uint32_t
-fbFetchPixel_r1g2b1 (bits_image_t *pict, int offset, int line)
-{
-    uint32_t  r,g,b;
-    uint32_t *bits = pict->bits + line*pict->rowstride;
-    uint32_t  pixel = Fetch4(pict, bits, offset);
-
-    r = ((pixel & 0x8) * 0xff) << 13;
-    g = ((pixel & 0x6) * 0x55) << 7;
-    b = ((pixel & 0x1) * 0xff);
-    return 0xff000000|r|g|b;
-}
-
-static FASTCALL uint32_t
-fbFetchPixel_b1g2r1 (bits_image_t *pict, int offset, int line)
-{
-    uint32_t  r,g,b;
-    uint32_t *bits = pict->bits + line*pict->rowstride;
-    uint32_t  pixel = Fetch4(pict, bits, offset);
-
-    b = ((pixel & 0x8) * 0xff) >> 3;
-    g = ((pixel & 0x6) * 0x55) << 7;
-    r = ((pixel & 0x1) * 0xff) << 16;
-    return 0xff000000|r|g|b;
-}
-
-static FASTCALL uint32_t
-fbFetchPixel_a1r1g1b1 (bits_image_t *pict, int offset, int line)
-{
-    uint32_t  a,r,g,b;
-    uint32_t *bits = pict->bits + line*pict->rowstride;
-    uint32_t  pixel = Fetch4(pict, bits, offset);
-
-    a = ((pixel & 0x8) * 0xff) << 21;
-    r = ((pixel & 0x4) * 0xff) << 14;
-    g = ((pixel & 0x2) * 0xff) << 7;
-    b = ((pixel & 0x1) * 0xff);
-    return a|r|g|b;
-}
-
-static FASTCALL uint32_t
-fbFetchPixel_a1b1g1r1 (bits_image_t *pict, int offset, int line)
-{
-    uint32_t  a,r,g,b;
-    uint32_t *bits = pict->bits + line*pict->rowstride;
-    uint32_t  pixel = Fetch4(pict, bits, offset);
-
-    a = ((pixel & 0x8) * 0xff) << 21;
-    r = ((pixel & 0x4) * 0xff) >> 3;
-    g = ((pixel & 0x2) * 0xff) << 7;
-    b = ((pixel & 0x1) * 0xff) << 16;
-    return a|r|g|b;
-}
-
-static FASTCALL uint32_t
-fbFetchPixel_c4 (bits_image_t *pict, int offset, int line)
-{
-    uint32_t *bits = pict->bits + line*pict->rowstride;
-    uint32_t  pixel = Fetch4(pict, bits, offset);
-    const pixman_indexed_t * indexed = pict->indexed;
-
-    return indexed->rgba[pixel];
-}
-
-
-static FASTCALL uint32_t
-fbFetchPixel_a1 (bits_image_t *pict, int offset, int line)
-{
-    uint32_t *bits = pict->bits + line*pict->rowstride;
-    uint32_t  pixel = READ(pict, bits + (offset >> 5));
-    uint32_t  a;
-#if BITMAP_BIT_ORDER == MSBFirst
-    a = pixel >> (0x1f - (offset & 0x1f));
-#else
-    a = pixel >> (offset & 0x1f);
-#endif
-    a = a & 1;
-    a |= a << 1;
-    a |= a << 2;
-    a |= a << 4;
-    return a << 24;
-}
-
-static FASTCALL uint32_t
-fbFetchPixel_g1 (bits_image_t *pict, int offset, int line)
-{
-    uint32_t *bits = pict->bits + line*pict->rowstride;
-    uint32_t pixel = READ(pict, bits + (offset >> 5));
-    const pixman_indexed_t * indexed = pict->indexed;
-    uint32_t a;
-#if BITMAP_BIT_ORDER == MSBFirst
-    a = pixel >> (0x1f - (offset & 0x1f));
-#else
-    a = pixel >> (offset & 0x1f);
-#endif
-    a = a & 1;
-    return indexed->rgba[a];
-}
-
-static FASTCALL uint32_t
-fbFetchPixel_yuy2 (bits_image_t *pict, int offset, int line)
-{
-    int16_t y, u, v;
-    int32_t r, g, b;
-
-    const uint32_t *bits = pict->bits + pict->rowstride * line;
-
-    y = ((uint8_t *) bits)[offset << 1] - 16;
-    u = ((uint8_t *) bits)[((offset << 1) & -4) + 1] - 128;
-    v = ((uint8_t *) bits)[((offset << 1) & -4) + 3] - 128;
-
-    /* R = 1.164(Y - 16) + 1.596(V - 128) */
-    r = 0x012b27 * y + 0x019a2e * v;
-    /* G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128) */
-    g = 0x012b27 * y - 0x00d0f2 * v - 0x00647e * u;
-    /* B = 1.164(Y - 16) + 2.018(U - 128) */
-    b = 0x012b27 * y + 0x0206a2 * u;
-
-    return 0xff000000 |
-	(r >= 0 ? r < 0x1000000 ? r         & 0xff0000 : 0xff0000 : 0) |
-	(g >= 0 ? g < 0x1000000 ? (g >> 8)  & 0x00ff00 : 0x00ff00 : 0) |
-	(b >= 0 ? b < 0x1000000 ? (b >> 16) & 0x0000ff : 0x0000ff : 0);
-}
-
-static FASTCALL uint32_t
-fbFetchPixel_yv12 (bits_image_t *pict, int offset, int line)
-{
-    YV12_SETUP(pict);
-    int16_t y = YV12_Y (line)[offset] - 16;
-    int16_t u = YV12_U (line)[offset >> 1] - 128;
-    int16_t v = YV12_V (line)[offset >> 1] - 128;
-    int32_t r, g, b;
-
-    /* R = 1.164(Y - 16) + 1.596(V - 128) */
-    r = 0x012b27 * y + 0x019a2e * v;
-    /* G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128) */
-    g = 0x012b27 * y - 0x00d0f2 * v - 0x00647e * u;
-    /* B = 1.164(Y - 16) + 2.018(U - 128) */
-    b = 0x012b27 * y + 0x0206a2 * u;
-
-    return 0xff000000 |
-	(r >= 0 ? r < 0x1000000 ? r         & 0xff0000 : 0xff0000 : 0) |
-	(g >= 0 ? g < 0x1000000 ? (g >> 8)  & 0x00ff00 : 0x00ff00 : 0) |
-	(b >= 0 ? b < 0x1000000 ? (b >> 16) & 0x0000ff : 0x0000ff : 0);
-}
-
-static fetchPixelProc fetchPixelProcForPicture (bits_image_t * pict)
-{
-    switch(pict->format) {
-    case PIXMAN_a8r8g8b8: return fbFetchPixel_a8r8g8b8;
-    case PIXMAN_x8r8g8b8: return fbFetchPixel_x8r8g8b8;
-    case PIXMAN_a8b8g8r8: return fbFetchPixel_a8b8g8r8;
-    case PIXMAN_x8b8g8r8: return fbFetchPixel_x8b8g8r8;
-
-        /* 24bpp formats */
-    case PIXMAN_r8g8b8: return fbFetchPixel_r8g8b8;
-    case PIXMAN_b8g8r8: return fbFetchPixel_b8g8r8;
-
-        /* 16bpp formats */
-    case PIXMAN_r5g6b5: return fbFetchPixel_r5g6b5;
-    case PIXMAN_b5g6r5: return fbFetchPixel_b5g6r5;
-
-    case PIXMAN_a1r5g5b5: return fbFetchPixel_a1r5g5b5;
-    case PIXMAN_x1r5g5b5: return fbFetchPixel_x1r5g5b5;
-    case PIXMAN_a1b5g5r5: return fbFetchPixel_a1b5g5r5;
-    case PIXMAN_x1b5g5r5: return fbFetchPixel_x1b5g5r5;
-    case PIXMAN_a4r4g4b4: return fbFetchPixel_a4r4g4b4;
-    case PIXMAN_x4r4g4b4: return fbFetchPixel_x4r4g4b4;
-    case PIXMAN_a4b4g4r4: return fbFetchPixel_a4b4g4r4;
-    case PIXMAN_x4b4g4r4: return fbFetchPixel_x4b4g4r4;
-
-        /* 8bpp formats */
-    case PIXMAN_a8: return  fbFetchPixel_a8;
-    case PIXMAN_r3g3b2: return fbFetchPixel_r3g3b2;
-    case PIXMAN_b2g3r3: return fbFetchPixel_b2g3r3;
-    case PIXMAN_a2r2g2b2: return fbFetchPixel_a2r2g2b2;
-    case PIXMAN_a2b2g2r2: return fbFetchPixel_a2b2g2r2;
-    case PIXMAN_c8: return  fbFetchPixel_c8;
-    case PIXMAN_g8: return  fbFetchPixel_c8;
-    case PIXMAN_x4a4: return fbFetchPixel_x4a4;
-
-        /* 4bpp formats */
-    case PIXMAN_a4: return  fbFetchPixel_a4;
-    case PIXMAN_r1g2b1: return fbFetchPixel_r1g2b1;
-    case PIXMAN_b1g2r1: return fbFetchPixel_b1g2r1;
-    case PIXMAN_a1r1g1b1: return fbFetchPixel_a1r1g1b1;
-    case PIXMAN_a1b1g1r1: return fbFetchPixel_a1b1g1r1;
-    case PIXMAN_c4: return  fbFetchPixel_c4;
-    case PIXMAN_g4: return  fbFetchPixel_c4;
-
-        /* 1bpp formats */
-    case PIXMAN_a1: return  fbFetchPixel_a1;
-    case PIXMAN_g1: return  fbFetchPixel_g1;
-
-        /* YUV formats */
-    case PIXMAN_yuy2: return fbFetchPixel_yuy2;
-    case PIXMAN_yv12: return fbFetchPixel_yv12;
-    }
-
-    return NULL;
-}
-
-
-/*
- * All the store functions
- */
-
-typedef FASTCALL void (*storeProc) (pixman_image_t *image,
-				    uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed);
-
-#define Splita(v)	uint32_t	a = ((v) >> 24), r = ((v) >> 16) & 0xff, g = ((v) >> 8) & 0xff, b = (v) & 0xff
-#define Split(v)	uint32_t	r = ((v) >> 16) & 0xff, g = ((v) >> 8) & 0xff, b = (v) & 0xff
-
-static FASTCALL void
-fbStore_a8r8g8b8 (pixman_image_t *image,
-		  uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
-{
-    MEMCPY_WRAPPED(image, ((uint32_t *)bits) + x, values, width*sizeof(uint32_t));
-}
-
-static FASTCALL void
-fbStore_x8r8g8b8 (pixman_image_t *image,
-		  uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
-{
-    int i;
-    uint32_t *pixel = (uint32_t *)bits + x;
-    for (i = 0; i < width; ++i)
-	WRITE(image, pixel++, values[i] & 0xffffff);
-}
-
-static FASTCALL void
-fbStore_a8b8g8r8 (pixman_image_t *image,
-		  uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
-{
-    int i;
-    uint32_t *pixel = (uint32_t *)bits + x;
-    for (i = 0; i < width; ++i)
-	WRITE(image, pixel++, (values[i] & 0xff00ff00) | ((values[i] >> 16) & 0xff) | ((values[i] & 0xff) << 16));
-}
-
-static FASTCALL void
-fbStore_x8b8g8r8 (pixman_image_t *image,
-		  uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
-{
-    int i;
-    uint32_t *pixel = (uint32_t *)bits + x;
-    for (i = 0; i < width; ++i)
-	WRITE(image, pixel++, (values[i] & 0x0000ff00) | ((values[i] >> 16) & 0xff) | ((values[i] & 0xff) << 16));
-}
-
-static FASTCALL void
-fbStore_r8g8b8 (pixman_image_t *image,
-		uint32_t *bits, const uint32_t *values, int x, int width,
-		const pixman_indexed_t * indexed)
-{
-    int i;
-    uint8_t *pixel = ((uint8_t *) bits) + 3*x;
-    for (i = 0; i < width; ++i) {
-	Store24(image, pixel, values[i]);
-	pixel += 3;
-    }
-}
-
-static FASTCALL void
-fbStore_b8g8r8 (pixman_image_t *image,
-		uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
-{
-    int i;
-    uint8_t *pixel = ((uint8_t *) bits) + 3*x;
-    for (i = 0; i < width; ++i) {
-	uint32_t val = values[i];
-#if IMAGE_BYTE_ORDER == MSBFirst
-	WRITE(image, pixel++, Blue(val));
-	WRITE(image, pixel++, Green(val));
-	WRITE(image, pixel++, Red(val));
-#else
-	WRITE(image, pixel++, Red(val));
-	WRITE(image, pixel++, Green(val));
-	WRITE(image, pixel++, Blue(val));
-#endif
-    }
-}
-
-static FASTCALL void
-fbStore_r5g6b5 (pixman_image_t *image,
-		uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
-{
-    int i;
-    uint16_t *pixel = ((uint16_t *) bits) + x;
-    for (i = 0; i < width; ++i) {
-	uint32_t s = values[i];
-	WRITE(image, pixel++, ((s >> 3) & 0x001f) |
-	      ((s >> 5) & 0x07e0) |
-	      ((s >> 8) & 0xf800));
-    }
-}
-
-static FASTCALL void
-fbStore_b5g6r5 (pixman_image_t *image,
-		uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
-{
-    int i;
-    uint16_t  *pixel = ((uint16_t *) bits) + x;
-    for (i = 0; i < width; ++i) {
-	Split(values[i]);
-	WRITE(image, pixel++, ((b << 8) & 0xf800) |
-	      ((g << 3) & 0x07e0) |
-	      ((r >> 3)         ));
-    }
-}
-
-static FASTCALL void
-fbStore_a1r5g5b5 (pixman_image_t *image,
-		  uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
-{
-    int i;
-    uint16_t  *pixel = ((uint16_t *) bits) + x;
-    for (i = 0; i < width; ++i) {
-	Splita(values[i]);
-	WRITE(image, pixel++, ((a << 8) & 0x8000) |
-	      ((r << 7) & 0x7c00) |
-	      ((g << 2) & 0x03e0) |
-	      ((b >> 3)         ));
-    }
-}
-
-static FASTCALL void
-fbStore_x1r5g5b5 (pixman_image_t *image,
-		  uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
-{
-    int i;
-    uint16_t  *pixel = ((uint16_t *) bits) + x;
-    for (i = 0; i < width; ++i) {
-	Split(values[i]);
-	WRITE(image, pixel++, ((r << 7) & 0x7c00) |
-	      ((g << 2) & 0x03e0) |
-	      ((b >> 3)         ));
-    }
-}
-
-static FASTCALL void
-fbStore_a1b5g5r5 (pixman_image_t *image,
-		  uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
-{
-    int i;
-    uint16_t  *pixel = ((uint16_t *) bits) + x;
-    for (i = 0; i < width; ++i) {
-	Splita(values[i]);
-	WRITE(image, pixel++, ((a << 8) & 0x8000) |
-	      ((b << 7) & 0x7c00) |
-	      ((g << 2) & 0x03e0) |
-	      ((r >> 3)         ));
-    }
-}
-
-static FASTCALL void
-fbStore_x1b5g5r5 (pixman_image_t *image,
-		  uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
-{
-    int i;
-    uint16_t  *pixel = ((uint16_t *) bits) + x;
-    for (i = 0; i < width; ++i) {
-	Split(values[i]);
-	WRITE(image, pixel++, ((b << 7) & 0x7c00) |
-	      ((g << 2) & 0x03e0) |
-	      ((r >> 3)         ));
-    }
-}
-
-static FASTCALL void
-fbStore_a4r4g4b4 (pixman_image_t *image,
-		  uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
-{
-    int i;
-    uint16_t  *pixel = ((uint16_t *) bits) + x;
-    for (i = 0; i < width; ++i) {
-	Splita(values[i]);
-	WRITE(image, pixel++, ((a << 8) & 0xf000) |
-	      ((r << 4) & 0x0f00) |
-	      ((g     ) & 0x00f0) |
-	      ((b >> 4)         ));
-    }
-}
-
-static FASTCALL void
-fbStore_x4r4g4b4 (pixman_image_t *image,
-		  uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
-{
-    int i;
-    uint16_t  *pixel = ((uint16_t *) bits) + x;
-    for (i = 0; i < width; ++i) {
-	Split(values[i]);
-	WRITE(image, pixel++, ((r << 4) & 0x0f00) |
-	      ((g     ) & 0x00f0) |
-	      ((b >> 4)         ));
-    }
-}
-
-static FASTCALL void
-fbStore_a4b4g4r4 (pixman_image_t *image,
-		  uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
-{
-    int i;
-    uint16_t  *pixel = ((uint16_t *) bits) + x;
-    for (i = 0; i < width; ++i) {
-	Splita(values[i]);
-	WRITE(image, pixel++, ((a << 8) & 0xf000) |
-	      ((b << 4) & 0x0f00) |
-	      ((g     ) & 0x00f0) |
-	      ((r >> 4)         ));
-    }
-}
-
-static FASTCALL void
-fbStore_x4b4g4r4 (pixman_image_t *image,
-		  uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
-{
-    int i;
-    uint16_t  *pixel = ((uint16_t *) bits) + x;
-    for (i = 0; i < width; ++i) {
-	Split(values[i]);
-	WRITE(image, pixel++, ((b << 4) & 0x0f00) |
-	      ((g     ) & 0x00f0) |
-	      ((r >> 4)         ));
-    }
-}
-
-static FASTCALL void
-fbStore_a8 (pixman_image_t *image,
-	    uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
-{
-    int i;
-    uint8_t   *pixel = ((uint8_t *) bits) + x;
-    for (i = 0; i < width; ++i) {
-	WRITE(image, pixel++, values[i] >> 24);
-    }
-}
-
-static FASTCALL void
-fbStore_r3g3b2 (pixman_image_t *image,
-		uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
-{
-    int i;
-    uint8_t   *pixel = ((uint8_t *) bits) + x;
-    for (i = 0; i < width; ++i) {
-	Split(values[i]);
-	WRITE(image, pixel++,
-	      ((r     ) & 0xe0) |
-	      ((g >> 3) & 0x1c) |
-	      ((b >> 6)       ));
-    }
-}
-
-static FASTCALL void
-fbStore_b2g3r3 (pixman_image_t *image,
-		uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
-{
-    int i;
-    uint8_t   *pixel = ((uint8_t *) bits) + x;
-    for (i = 0; i < width; ++i) {
-	Split(values[i]);
-	WRITE(image, pixel++,
-	      ((b     ) & 0xc0) |
-	      ((g >> 2) & 0x1c) |
-	      ((r >> 5)       ));
-    }
-}
-
-static FASTCALL void
-fbStore_a2r2g2b2 (pixman_image_t *image,
-		  uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
-{
-    int i;
-    uint8_t   *pixel = ((uint8_t *) bits) + x;
-    for (i = 0; i < width; ++i) {
-	Splita(values[i]);
-	WRITE(image, pixel++, ((a     ) & 0xc0) |
-	      ((r >> 2) & 0x30) |
-	      ((g >> 4) & 0x0c) |
-	      ((b >> 6)       ));
-    }
-}
-
-static FASTCALL void
-fbStore_c8 (pixman_image_t *image,
-	    uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
-{
-    int i;
-    uint8_t   *pixel = ((uint8_t *) bits) + x;
-    for (i = 0; i < width; ++i) {
-	WRITE(image, pixel++, miIndexToEnt24(indexed,values[i]));
-    }
-}
-
-static FASTCALL void
-fbStore_x4a4 (pixman_image_t *image,
-	      uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
-{
-    int i;
-    uint8_t   *pixel = ((uint8_t *) bits) + x;
-    for (i = 0; i < width; ++i) {
-	WRITE(image, pixel++, values[i] >> 28);
-    }
-}
-
-#define Store8(img,l,o,v)  (WRITE(img, (uint8_t *)(l) + ((o) >> 3), (v)))
-#if IMAGE_BYTE_ORDER == MSBFirst
-#define Store4(img,l,o,v)  Store8(img,l,o,((o) & 4 ?				\
-				   (Fetch8(img,l,o) & 0xf0) | (v) :		\
-				   (Fetch8(img,l,o) & 0x0f) | ((v) << 4)))
-#else
-#define Store4(img,l,o,v)  Store8(img,l,o,((o) & 4 ?			       \
-				   (Fetch8(img,l,o) & 0x0f) | ((v) << 4) : \
-				   (Fetch8(img,l,o) & 0xf0) | (v)))
-#endif
-
-static FASTCALL void
-fbStore_a4 (pixman_image_t *image,
-	    uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
-{
-    int i;
-    for (i = 0; i < width; ++i) {
-	Store4(image, bits, i + x, values[i]>>28);
-    }
-}
-
-static FASTCALL void
-fbStore_r1g2b1 (pixman_image_t *image,
-		uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
-{
-    int i;
-    for (i = 0; i < width; ++i) {
-	uint32_t  pixel;
-
-	Split(values[i]);
-	pixel = (((r >> 4) & 0x8) |
-		 ((g >> 5) & 0x6) |
-		 ((b >> 7)      ));
-	Store4(image, bits, i + x, pixel);
-    }
-}
-
-static FASTCALL void
-fbStore_b1g2r1 (pixman_image_t *image,
-		uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
-{
-    int i;
-    for (i = 0; i < width; ++i) {
-	uint32_t  pixel;
-
-	Split(values[i]);
-	pixel = (((b >> 4) & 0x8) |
-		 ((g >> 5) & 0x6) |
-		 ((r >> 7)      ));
-	Store4(image, bits, i + x, pixel);
-    }
-}
-
-static FASTCALL void
-fbStore_a1r1g1b1 (pixman_image_t *image,
-		  uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
-{
-    int i;
-    for (i = 0; i < width; ++i) {
-	uint32_t  pixel;
-	Splita(values[i]);
-	pixel = (((a >> 4) & 0x8) |
-		 ((r >> 5) & 0x4) |
-		 ((g >> 6) & 0x2) |
-		 ((b >> 7)      ));
-	Store4(image, bits, i + x, pixel);
-    }
-}
-
-static FASTCALL void
-fbStore_a1b1g1r1 (pixman_image_t *image,
-		  uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
-{
-    int i;
-    for (i = 0; i < width; ++i) {
-	uint32_t  pixel;
-	Splita(values[i]);
-	pixel = (((a >> 4) & 0x8) |
-		 ((b >> 5) & 0x4) |
-		 ((g >> 6) & 0x2) |
-		 ((r >> 7)      ));
-	Store4(image, bits, i + x, pixel);
-    }
-}
-
-static FASTCALL void
-fbStore_c4 (pixman_image_t *image,
-	    uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
-{
-    int i;
-    for (i = 0; i < width; ++i) {
-	uint32_t  pixel;
-
-	pixel = miIndexToEnt24(indexed, values[i]);
-	Store4(image, bits, i + x, pixel);
-    }
-}
-
-static FASTCALL void
-fbStore_a1 (pixman_image_t *image,
-	    uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
-{
-    int i;
-    for (i = 0; i < width; ++i) {
-	uint32_t  *pixel = ((uint32_t *) bits) + ((i+x) >> 5);
-	uint32_t  mask = FbStipMask((i+x) & 0x1f, 1);
-
-	uint32_t v = values[i] & 0x80000000 ? mask : 0;
-	WRITE(image, pixel, (READ(image, pixel) & ~mask) | v);
-    }
-}
-
-static FASTCALL void
-fbStore_g1 (pixman_image_t *image,
-	    uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
-{
-    int i;
-    for (i = 0; i < width; ++i) {
-	uint32_t  *pixel = ((uint32_t *) bits) + ((i+x) >> 5);
-	uint32_t  mask = FbStipMask((i+x) & 0x1f, 1);
-
-	uint32_t v = miIndexToEntY24(indexed,values[i]) ? mask : 0;
-	WRITE(image, pixel, (READ(image, pixel) & ~mask) | v);
-    }
-}
-
-
-static storeProc storeProcForPicture (bits_image_t * pict)
-{
-    switch(pict->format) {
-    case PIXMAN_a8r8g8b8: return fbStore_a8r8g8b8;
-    case PIXMAN_x8r8g8b8: return fbStore_x8r8g8b8;
-    case PIXMAN_a8b8g8r8: return fbStore_a8b8g8r8;
-    case PIXMAN_x8b8g8r8: return fbStore_x8b8g8r8;
-
-        /* 24bpp formats */
-    case PIXMAN_r8g8b8: return fbStore_r8g8b8;
-    case PIXMAN_b8g8r8: return fbStore_b8g8r8;
-
-        /* 16bpp formats */
-    case PIXMAN_r5g6b5: return fbStore_r5g6b5;
-    case PIXMAN_b5g6r5: return fbStore_b5g6r5;
-
-    case PIXMAN_a1r5g5b5: return fbStore_a1r5g5b5;
-    case PIXMAN_x1r5g5b5: return fbStore_x1r5g5b5;
-    case PIXMAN_a1b5g5r5: return fbStore_a1b5g5r5;
-    case PIXMAN_x1b5g5r5: return fbStore_x1b5g5r5;
-    case PIXMAN_a4r4g4b4: return fbStore_a4r4g4b4;
-    case PIXMAN_x4r4g4b4: return fbStore_x4r4g4b4;
-    case PIXMAN_a4b4g4r4: return fbStore_a4b4g4r4;
-    case PIXMAN_x4b4g4r4: return fbStore_x4b4g4r4;
-
-        /* 8bpp formats */
-    case PIXMAN_a8: return  fbStore_a8;
-    case PIXMAN_r3g3b2: return fbStore_r3g3b2;
-    case PIXMAN_b2g3r3: return fbStore_b2g3r3;
-    case PIXMAN_a2r2g2b2: return fbStore_a2r2g2b2;
-    case PIXMAN_c8: return  fbStore_c8;
-    case PIXMAN_g8: return  fbStore_c8;
-    case PIXMAN_x4a4: return fbStore_x4a4;
-
-        /* 4bpp formats */
-    case PIXMAN_a4: return  fbStore_a4;
-    case PIXMAN_r1g2b1: return fbStore_r1g2b1;
-    case PIXMAN_b1g2r1: return fbStore_b1g2r1;
-    case PIXMAN_a1r1g1b1: return fbStore_a1r1g1b1;
-    case PIXMAN_a1b1g1r1: return fbStore_a1b1g1r1;
-    case PIXMAN_c4: return  fbStore_c4;
-    case PIXMAN_g4: return  fbStore_c4;
-
-        /* 1bpp formats */
-    case PIXMAN_a1: return  fbStore_a1;
-    case PIXMAN_g1: return  fbStore_g1;
-    default:
-        return NULL;
-    }
-}
-
-
-/*
- * Combine src and mask
- */
-static FASTCALL void
-pixman_fbCombineMaskU (uint32_t *src, const uint32_t *mask, int width)
-{
-    int i;
-    for (i = 0; i < width; ++i) {
-        uint32_t a = *(mask + i) >> 24;
-        uint32_t s = *(src + i);
-        FbByteMul(s, a);
-        *(src + i) = s;
-    }
-}
-
-/*
- * All of the composing functions
- */
-
-static FASTCALL void
-fbCombineClear (uint32_t *dest, const uint32_t *src, int width)
-{
-    memset(dest, 0, width*sizeof(uint32_t));
-}
-
-static FASTCALL void
-fbCombineSrcU (uint32_t *dest, const uint32_t *src, int width)
-{
-    memcpy(dest, src, width*sizeof(uint32_t));
-}
-
-
-static FASTCALL void
-fbCombineOverU (uint32_t *dest, const uint32_t *src, int width)
-{
-    int i;
-    for (i = 0; i < width; ++i) {
-        uint32_t s = *(src + i);
-        uint32_t d = *(dest + i);
-        uint32_t ia = Alpha(~s);
-
-        FbByteMulAdd(d, ia, s);
-	*(dest + i) = d;
-    }
-}
-
-static FASTCALL void
-fbCombineOverReverseU (uint32_t *dest, const uint32_t *src, int width)
-{
-    int i;
-    for (i = 0; i < width; ++i) {
-        uint32_t s = *(src + i);
-        uint32_t d = *(dest + i);
-        uint32_t ia = Alpha(~*(dest + i));
-        FbByteMulAdd(s, ia, d);
-	*(dest + i) = s;
-    }
-}
-
-static FASTCALL void
-fbCombineInU (uint32_t *dest, const uint32_t *src, int width)
-{
-    int i;
-    for (i = 0; i < width; ++i) {
-        uint32_t s = *(src + i);
-        uint32_t a = Alpha(*(dest + i));
-        FbByteMul(s, a);
-	*(dest + i) = s;
-    }
-}
-
-static FASTCALL void
-fbCombineInReverseU (uint32_t *dest, const uint32_t *src, int width)
-{
-    int i;
-    for (i = 0; i < width; ++i) {
-        uint32_t d = *(dest + i);
-        uint32_t a = Alpha(*(src + i));
-        FbByteMul(d, a);
-	*(dest + i) = d;
-    }
-}
-
-static FASTCALL void
-fbCombineOutU (uint32_t *dest, const uint32_t *src, int width)
-{
-    int i;
-    for (i = 0; i < width; ++i) {
-        uint32_t s = *(src + i);
-        uint32_t a = Alpha(~*(dest + i));
-        FbByteMul(s, a);
-	*(dest + i) = s;
-    }
-}
-
-static FASTCALL void
-fbCombineOutReverseU (uint32_t *dest, const uint32_t *src, int width)
-{
-    int i;
-    for (i = 0; i < width; ++i) {
-        uint32_t d = *(dest + i);
-        uint32_t a = Alpha(~*(src + i));
-        FbByteMul(d, a);
-	*(dest + i) = d;
-    }
-}
-
-static FASTCALL void
-fbCombineAtopU (uint32_t *dest, const uint32_t *src, int width)
-{
-    int i;
-    for (i = 0; i < width; ++i) {
-        uint32_t s = *(src + i);
-        uint32_t d = *(dest + i);
-        uint32_t dest_a = Alpha(d);
-        uint32_t src_ia = Alpha(~s);
-
-        FbByteAddMul(s, dest_a, d, src_ia);
-	*(dest + i) = s;
-    }
-}
-
-static FASTCALL void
-fbCombineAtopReverseU (uint32_t *dest, const uint32_t *src, int width)
-{
-    int i;
-    for (i = 0; i < width; ++i) {
-        uint32_t s = *(src + i);
-        uint32_t d = *(dest + i);
-        uint32_t src_a = Alpha(s);
-        uint32_t dest_ia = Alpha(~d);
-
-        FbByteAddMul(s, dest_ia, d, src_a);
-	*(dest + i) = s;
-    }
-}
-
-static FASTCALL void
-fbCombineXorU (uint32_t *dest, const uint32_t *src, int width)
-{
-    int i;
-    for (i = 0; i < width; ++i) {
-        uint32_t s = *(src + i);
-        uint32_t d = *(dest + i);
-        uint32_t src_ia = Alpha(~s);
-        uint32_t dest_ia = Alpha(~d);
-
-        FbByteAddMul(s, dest_ia, d, src_ia);
-	*(dest + i) = s;
-    }
-}
-
-static FASTCALL void
-fbCombineAddU (uint32_t *dest, const uint32_t *src, int width)
-{
-    int i;
-    for (i = 0; i < width; ++i) {
-        uint32_t s = *(src + i);
-        uint32_t d = *(dest + i);
-        FbByteAdd(d, s);
-	*(dest + i) = d;
-    }
-}
-
-static FASTCALL void
-fbCombineSaturateU (uint32_t *dest, const uint32_t *src, int width)
-{
-    int i;
-    for (i = 0; i < width; ++i) {
-        uint32_t  s = *(src + i);
-        uint32_t d = *(dest + i);
-        uint16_t  sa, da;
-
-        sa = s >> 24;
-        da = ~d >> 24;
-        if (sa > da)
-        {
-            sa = FbIntDiv(da, sa);
-            FbByteMul(s, sa);
-        };
-        FbByteAdd(d, s);
-	*(dest + i) = d;
-    }
-}
-
-/*
- * All of the disjoint composing functions
-
- The four entries in the first column indicate what source contributions
- come from each of the four areas of the picture -- areas covered by neither
- A nor B, areas covered only by A, areas covered only by B and finally
- areas covered by both A and B.
-
- Disjoint			Conjoint
- Fa		Fb		Fa		Fb
- (0,0,0,0)	0		0		0		0
- (0,A,0,A)	1		0		1		0
- (0,0,B,B)	0		1		0		1
- (0,A,B,A)	1		min((1-a)/b,1)	1		max(1-a/b,0)
- (0,A,B,B)	min((1-b)/a,1)	1		max(1-b/a,0)	1
- (0,0,0,A)	max(1-(1-b)/a,0) 0		min(1,b/a)	0
- (0,0,0,B)	0		max(1-(1-a)/b,0) 0		min(a/b,1)
- (0,A,0,0)	min(1,(1-b)/a)	0		max(1-b/a,0)	0
- (0,0,B,0)	0		min(1,(1-a)/b)	0		max(1-a/b,0)
- (0,0,B,A)	max(1-(1-b)/a,0) min(1,(1-a)/b)	 min(1,b/a)	max(1-a/b,0)
- (0,A,0,B)	min(1,(1-b)/a)	max(1-(1-a)/b,0) max(1-b/a,0)	min(1,a/b)
- (0,A,B,0)	min(1,(1-b)/a)	min(1,(1-a)/b)	max(1-b/a,0)	max(1-a/b,0)
-
-*/
-
-#define CombineAOut 1
-#define CombineAIn  2
-#define CombineBOut 4
-#define CombineBIn  8
-
-#define CombineClear	0
-#define CombineA	(CombineAOut|CombineAIn)
-#define CombineB	(CombineBOut|CombineBIn)
-#define CombineAOver	(CombineAOut|CombineBOut|CombineAIn)
-#define CombineBOver	(CombineAOut|CombineBOut|CombineBIn)
-#define CombineAAtop	(CombineBOut|CombineAIn)
-#define CombineBAtop	(CombineAOut|CombineBIn)
-#define CombineXor	(CombineAOut|CombineBOut)
-
-/* portion covered by a but not b */
-static INLINE uint8_t
-fbCombineDisjointOutPart (uint8_t a, uint8_t b)
-{
-    /* min (1, (1-b) / a) */
-
-    b = ~b;		    /* 1 - b */
-    if (b >= a)		    /* 1 - b >= a -> (1-b)/a >= 1 */
-	return 0xff;	    /* 1 */
-    return FbIntDiv(b,a);   /* (1-b) / a */
-}
-
-/* portion covered by both a and b */
-static INLINE uint8_t
-fbCombineDisjointInPart (uint8_t a, uint8_t b)
-{
-    /* max (1-(1-b)/a,0) */
-    /*  = - min ((1-b)/a - 1, 0) */
-    /*  = 1 - min (1, (1-b)/a) */
-
-    b = ~b;		    /* 1 - b */
-    if (b >= a)		    /* 1 - b >= a -> (1-b)/a >= 1 */
-	return 0;	    /* 1 - 1 */
-    return ~FbIntDiv(b,a);  /* 1 - (1-b) / a */
-}
-
-static FASTCALL void
-fbCombineDisjointGeneralU (uint32_t *dest, const uint32_t *src, int width, uint8_t combine)
-{
-    int i;
-    for (i = 0; i < width; ++i) {
-        uint32_t s = *(src + i);
-        uint32_t d = *(dest + i);
-        uint32_t m,n,o,p;
-        uint16_t Fa, Fb, t, u, v;
-        uint8_t sa = s >> 24;
-        uint8_t da = d >> 24;
-
-        switch (combine & CombineA) {
-        default:
-            Fa = 0;
-            break;
-        case CombineAOut:
-            Fa = fbCombineDisjointOutPart (sa, da);
-            break;
-        case CombineAIn:
-            Fa = fbCombineDisjointInPart (sa, da);
-            break;
-        case CombineA:
-            Fa = 0xff;
-            break;
-        }
-
-        switch (combine & CombineB) {
-        default:
-            Fb = 0;
-            break;
-        case CombineBOut:
-            Fb = fbCombineDisjointOutPart (da, sa);
-            break;
-        case CombineBIn:
-            Fb = fbCombineDisjointInPart (da, sa);
-            break;
-        case CombineB:
-            Fb = 0xff;
-            break;
-        }
-        m = FbGen (s,d,0,Fa,Fb,t, u, v);
-        n = FbGen (s,d,8,Fa,Fb,t, u, v);
-        o = FbGen (s,d,16,Fa,Fb,t, u, v);
-        p = FbGen (s,d,24,Fa,Fb,t, u, v);
-        s = m|n|o|p;
-	*(dest + i) = s;
-    }
-}
-
-static FASTCALL void
-fbCombineDisjointOverU (uint32_t *dest, const uint32_t *src, int width)
-{
-    int i;
-    for (i = 0; i < width; ++i) {
-        uint32_t  s = *(src + i);
-        uint16_t  a = s >> 24;
-
-        if (a != 0x00)
-        {
-            if (a != 0xff)
-            {
-                uint32_t d = *(dest + i);
-                a = fbCombineDisjointOutPart (d >> 24, a);
-                FbByteMulAdd(d, a, s);
-                s = d;
-            }
-	    *(dest + i) = s;
-        }
-    }
-}
-
-static FASTCALL void
-fbCombineDisjointInU (uint32_t *dest, const uint32_t *src, int width)
-{
-    fbCombineDisjointGeneralU (dest, src, width, CombineAIn);
-}
-
-static FASTCALL void
-fbCombineDisjointInReverseU (uint32_t *dest, const uint32_t *src, int width)
-{
-    fbCombineDisjointGeneralU (dest, src, width, CombineBIn);
-}
-
-static FASTCALL void
-fbCombineDisjointOutU (uint32_t *dest, const uint32_t *src, int width)
-{
-    fbCombineDisjointGeneralU (dest, src, width, CombineAOut);
-}
-
-static FASTCALL void
-fbCombineDisjointOutReverseU (uint32_t *dest, const uint32_t *src, int width)
-{
-    fbCombineDisjointGeneralU (dest, src, width, CombineBOut);
-}
-
-static FASTCALL void
-fbCombineDisjointAtopU (uint32_t *dest, const uint32_t *src, int width)
-{
-    fbCombineDisjointGeneralU (dest, src, width, CombineAAtop);
-}
-
-static FASTCALL void
-fbCombineDisjointAtopReverseU (uint32_t *dest, const uint32_t *src, int width)
-{
-    fbCombineDisjointGeneralU (dest, src, width, CombineBAtop);
-}
-
-static FASTCALL void
-fbCombineDisjointXorU (uint32_t *dest, const uint32_t *src, int width)
-{
-    fbCombineDisjointGeneralU (dest, src, width, CombineXor);
-}
-
-/* portion covered by a but not b */
-static INLINE uint8_t
-fbCombineConjointOutPart (uint8_t a, uint8_t b)
-{
-    /* max (1-b/a,0) */
-    /* = 1-min(b/a,1) */
-
-    /* min (1, (1-b) / a) */
-
-    if (b >= a)		    /* b >= a -> b/a >= 1 */
-	return 0x00;	    /* 0 */
-    return ~FbIntDiv(b,a);   /* 1 - b/a */
-}
-
-/* portion covered by both a and b */
-static INLINE uint8_t
-fbCombineConjointInPart (uint8_t a, uint8_t b)
-{
-    /* min (1,b/a) */
-
-    if (b >= a)		    /* b >= a -> b/a >= 1 */
-	return 0xff;	    /* 1 */
-    return FbIntDiv(b,a);   /* b/a */
-}
-
-static FASTCALL void
-fbCombineConjointGeneralU (uint32_t *dest, const uint32_t *src, int width, uint8_t combine)
-{
-    int i;
-    for (i = 0; i < width; ++i) {
-        uint32_t  s = *(src + i);
-        uint32_t d = *(dest + i);
-        uint32_t  m,n,o,p;
-        uint16_t  Fa, Fb, t, u, v;
-        uint8_t sa = s >> 24;
-        uint8_t da = d >> 24;
-
-        switch (combine & CombineA) {
-        default:
-            Fa = 0;
-            break;
-        case CombineAOut:
-            Fa = fbCombineConjointOutPart (sa, da);
-            break;
-        case CombineAIn:
-            Fa = fbCombineConjointInPart (sa, da);
-            break;
-        case CombineA:
-            Fa = 0xff;
-            break;
-        }
-
-        switch (combine & CombineB) {
-        default:
-            Fb = 0;
-            break;
-        case CombineBOut:
-            Fb = fbCombineConjointOutPart (da, sa);
-            break;
-        case CombineBIn:
-            Fb = fbCombineConjointInPart (da, sa);
-            break;
-        case CombineB:
-            Fb = 0xff;
-            break;
-        }
-        m = FbGen (s,d,0,Fa,Fb,t, u, v);
-        n = FbGen (s,d,8,Fa,Fb,t, u, v);
-        o = FbGen (s,d,16,Fa,Fb,t, u, v);
-        p = FbGen (s,d,24,Fa,Fb,t, u, v);
-        s = m|n|o|p;
-	*(dest + i) = s;
-    }
-}
-
-static FASTCALL void
-fbCombineConjointOverU (uint32_t *dest, const uint32_t *src, int width)
-{
-    fbCombineConjointGeneralU (dest, src, width, CombineAOver);
-}
-
-
-static FASTCALL void
-fbCombineConjointOverReverseU (uint32_t *dest, const uint32_t *src, int width)
-{
-    fbCombineConjointGeneralU (dest, src, width, CombineBOver);
-}
-
-
-static FASTCALL void
-fbCombineConjointInU (uint32_t *dest, const uint32_t *src, int width)
-{
-    fbCombineConjointGeneralU (dest, src, width, CombineAIn);
-}
-
-
-static FASTCALL void
-fbCombineConjointInReverseU (uint32_t *dest, const uint32_t *src, int width)
-{
-    fbCombineConjointGeneralU (dest, src, width, CombineBIn);
-}
-
-static FASTCALL void
-fbCombineConjointOutU (uint32_t *dest, const uint32_t *src, int width)
-{
-    fbCombineConjointGeneralU (dest, src, width, CombineAOut);
-}
-
-static FASTCALL void
-fbCombineConjointOutReverseU (uint32_t *dest, const uint32_t *src, int width)
-{
-    fbCombineConjointGeneralU (dest, src, width, CombineBOut);
-}
-
-static FASTCALL void
-fbCombineConjointAtopU (uint32_t *dest, const uint32_t *src, int width)
-{
-    fbCombineConjointGeneralU (dest, src, width, CombineAAtop);
-}
-
-static FASTCALL void
-fbCombineConjointAtopReverseU (uint32_t *dest, const uint32_t *src, int width)
-{
-    fbCombineConjointGeneralU (dest, src, width, CombineBAtop);
-}
-
-static FASTCALL void
-fbCombineConjointXorU (uint32_t *dest, const uint32_t *src, int width)
-{
-    fbCombineConjointGeneralU (dest, src, width, CombineXor);
-}
-
-static CombineFuncU pixman_fbCombineFuncU[] = {
-    fbCombineClear,
-    fbCombineSrcU,
-    NULL, /* CombineDst */
-    fbCombineOverU,
-    fbCombineOverReverseU,
-    fbCombineInU,
-    fbCombineInReverseU,
-    fbCombineOutU,
-    fbCombineOutReverseU,
-    fbCombineAtopU,
-    fbCombineAtopReverseU,
-    fbCombineXorU,
-    fbCombineAddU,
-    fbCombineSaturateU,
-    NULL,
-    NULL,
-    fbCombineClear,
-    fbCombineSrcU,
-    NULL, /* CombineDst */
-    fbCombineDisjointOverU,
-    fbCombineSaturateU, /* DisjointOverReverse */
-    fbCombineDisjointInU,
-    fbCombineDisjointInReverseU,
-    fbCombineDisjointOutU,
-    fbCombineDisjointOutReverseU,
-    fbCombineDisjointAtopU,
-    fbCombineDisjointAtopReverseU,
-    fbCombineDisjointXorU,
-    NULL,
-    NULL,
-    NULL,
-    NULL,
-    fbCombineClear,
-    fbCombineSrcU,
-    NULL, /* CombineDst */
-    fbCombineConjointOverU,
-    fbCombineConjointOverReverseU,
-    fbCombineConjointInU,
-    fbCombineConjointInReverseU,
-    fbCombineConjointOutU,
-    fbCombineConjointOutReverseU,
-    fbCombineConjointAtopU,
-    fbCombineConjointAtopReverseU,
-    fbCombineConjointXorU,
-};
-
-static INLINE void
-fbCombineMaskC (uint32_t *src, uint32_t *mask)
-{
-    uint32_t a = *mask;
-
-    uint32_t	x;
-    uint16_t	xa;
-
-    if (!a)
-    {
-	*(src) = 0;
-	return;
-    }
-
-    x = *(src);
-    if (a == 0xffffffff)
-    {
-	x = x >> 24;
-	x |= x << 8;
-	x |= x << 16;
-	*(mask) = x;
-	return;
-    }
-
-    xa = x >> 24;
-    FbByteMulC(x, a);
-    *(src) = x;
-    FbByteMul(a, xa);
-    *(mask) = a;
-}
-
-static INLINE void
-fbCombineMaskValueC (uint32_t *src, const uint32_t *mask)
-{
-    uint32_t a = *mask;
-    uint32_t	x;
-
-    if (!a)
-    {
-	*(src) = 0;
-	return;
-    }
-
-    if (a == 0xffffffff)
-	return;
-
-    x = *(src);
-    FbByteMulC(x, a);
-    *(src) =x;
-}
-
-static INLINE void
-fbCombineMaskAlphaC (const uint32_t *src, uint32_t *mask)
-{
-    uint32_t a = *(mask);
-    uint32_t	x;
-
-    if (!a)
-	return;
-
-    x = *(src) >> 24;
-    if (x == 0xff)
-	return;
-    if (a == 0xffffffff)
-    {
-	x = x >> 24;
-	x |= x << 8;
-	x |= x << 16;
-	*(mask) = x;
-	return;
-    }
-
-    FbByteMul(a, x);
-    *(mask) = a;
-}
-
-static FASTCALL void
-fbCombineClearC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    memset(dest, 0, width*sizeof(uint32_t));
-}
-
-static FASTCALL void
-fbCombineSrcC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i) {
-	uint32_t s = *(src + i);
-	uint32_t m = *(mask + i);
-
-	fbCombineMaskValueC (&s, &m);
-
-	*(dest) = s;
-    }
-}
-
-static FASTCALL void
-fbCombineOverC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i) {
-	uint32_t s = *(src + i);
-	uint32_t m = *(mask + i);
-	uint32_t a;
-
-	fbCombineMaskC (&s, &m);
-
-	a = ~m;
-        if (a != 0xffffffff)
-        {
-            if (a)
-            {
-                uint32_t d = *(dest + i);
-                FbByteMulAddC(d, a, s);
-                s = d;
-            }
-	    *(dest + i) = s;
-        }
-    }
-}
-
-static FASTCALL void
-fbCombineOverReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i) {
-        uint32_t d = *(dest + i);
-        uint32_t a = ~d >> 24;
-
-        if (a)
-        {
-            uint32_t s = *(src + i);
-	    uint32_t m = *(mask + i);
-
-	    fbCombineMaskValueC (&s, &m);
-
-            if (a != 0xff)
-            {
-                FbByteMulAdd(s, a, d);
-            }
-	    *(dest + i) = s;
-        }
-    }
-}
-
-static FASTCALL void
-fbCombineInC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i) {
-        uint32_t d = *(dest + i);
-        uint16_t a = d >> 24;
-        uint32_t s = 0;
-        if (a)
-        {
-	    uint32_t m = *(mask + i);
-
-	    s = *(src + i);
-	    fbCombineMaskValueC (&s, &m);
-            if (a != 0xff)
-            {
-                FbByteMul(s, a);
-            }
-        }
-	*(dest + i) = s;
-    }
-}
-
-static FASTCALL void
-fbCombineInReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i) {
-        uint32_t s = *(src + i);
-        uint32_t m = *(mask + i);
-        uint32_t a;
-
-	fbCombineMaskAlphaC (&s, &m);
-
-	a = m;
-        if (a != 0xffffffff)
-        {
-            uint32_t d = 0;
-            if (a)
-            {
-                d = *(dest + i);
-                FbByteMulC(d, a);
-            }
-	    *(dest + i) = d;
-        }
-    }
-}
-
-static FASTCALL void
-fbCombineOutC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i) {
-        uint32_t d = *(dest + i);
-        uint16_t a = ~d >> 24;
-        uint32_t s = 0;
-        if (a)
-        {
-	    uint32_t m = *(mask + i);
-
-	    s = *(src + i);
-	    fbCombineMaskValueC (&s, &m);
-
-            if (a != 0xff)
-            {
-                FbByteMul(s, a);
-            }
-        }
-	*(dest + i) = s;
-    }
-}
-
-static FASTCALL void
-fbCombineOutReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i) {
-	uint32_t s = *(src + i);
-	uint32_t m = *(mask + i);
-	uint32_t a;
-
-	fbCombineMaskAlphaC (&s, &m);
-
-        a = ~m;
-        if (a != 0xffffffff)
-        {
-            uint32_t d = 0;
-            if (a)
-            {
-                d = *(dest + i);
-                FbByteMulC(d, a);
-            }
-	    *(dest + i) = d;
-        }
-    }
-}
-
-static FASTCALL void
-fbCombineAtopC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i) {
-        uint32_t d = *(dest + i);
-        uint32_t s = *(src + i);
-        uint32_t m = *(mask + i);
-        uint32_t ad;
-        uint16_t as = d >> 24;
-
-	fbCombineMaskC (&s, &m);
-
-        ad = ~m;
-
-        FbByteAddMulC(d, ad, s, as);
-	*(dest + i) = d;
-    }
-}
-
-static FASTCALL void
-fbCombineAtopReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i) {
-
-        uint32_t d = *(dest + i);
-        uint32_t s = *(src + i);
-        uint32_t m = *(mask + i);
-        uint32_t ad;
-        uint16_t as = ~d >> 24;
-
-	fbCombineMaskC (&s, &m);
-
-	ad = m;
-
-        FbByteAddMulC(d, ad, s, as);
-	*(dest + i) = d;
-    }
-}
-
-static FASTCALL void
-fbCombineXorC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i) {
-        uint32_t d = *(dest + i);
-        uint32_t s = *(src + i);
-        uint32_t m = *(mask + i);
-        uint32_t ad;
-        uint16_t as = ~d >> 24;
-
-	fbCombineMaskC (&s, &m);
-
-	ad = ~m;
-
-        FbByteAddMulC(d, ad, s, as);
-	*(dest + i) = d;
-    }
-}
-
-static FASTCALL void
-fbCombineAddC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i) {
-        uint32_t s = *(src + i);
-        uint32_t m = *(mask + i);
-        uint32_t d = *(dest + i);
-
-	fbCombineMaskValueC (&s, &m);
-
-        FbByteAdd(d, s);
-	*(dest + i) = d;
-    }
-}
-
-static FASTCALL void
-fbCombineSaturateC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i) {
-        uint32_t  s, d;
-        uint16_t  sa, sr, sg, sb, da;
-        uint16_t  t, u, v;
-        uint32_t  m,n,o,p;
-
-        d = *(dest + i);
-        s = *(src + i);
-	m = *(mask + i);
-
-	fbCombineMaskC (&s, &m);
-
-        sa = (m >> 24);
-        sr = (m >> 16) & 0xff;
-        sg = (m >>  8) & 0xff;
-        sb = (m      ) & 0xff;
-        da = ~d >> 24;
-
-        if (sb <= da)
-            m = FbAdd(s,d,0,t);
-        else
-            m = FbGen (s, d, 0, (da << 8) / sb, 0xff, t, u, v);
-
-        if (sg <= da)
-            n = FbAdd(s,d,8,t);
-        else
-            n = FbGen (s, d, 8, (da << 8) / sg, 0xff, t, u, v);
-
-        if (sr <= da)
-            o = FbAdd(s,d,16,t);
-        else
-            o = FbGen (s, d, 16, (da << 8) / sr, 0xff, t, u, v);
-
-        if (sa <= da)
-            p = FbAdd(s,d,24,t);
-        else
-            p = FbGen (s, d, 24, (da << 8) / sa, 0xff, t, u, v);
-
-	*(dest + i) = m|n|o|p;
-    }
-}
-
-static FASTCALL void
-fbCombineDisjointGeneralC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width, uint8_t combine)
-{
-    int i;
-
-    for (i = 0; i < width; ++i) {
-        uint32_t  s, d;
-        uint32_t  m,n,o,p;
-        uint32_t  Fa, Fb;
-        uint16_t  t, u, v;
-        uint32_t  sa;
-        uint8_t   da;
-
-        s = *(src + i);
-        m = *(mask + i);
-        d = *(dest + i);
-        da = d >> 24;
-
-	fbCombineMaskC (&s, &m);
-
-	sa = m;
-
-        switch (combine & CombineA) {
-        default:
-            Fa = 0;
-            break;
-        case CombineAOut:
-            m = fbCombineDisjointOutPart ((uint8_t) (sa >> 0), da);
-            n = fbCombineDisjointOutPart ((uint8_t) (sa >> 8), da) << 8;
-            o = fbCombineDisjointOutPart ((uint8_t) (sa >> 16), da) << 16;
-            p = fbCombineDisjointOutPart ((uint8_t) (sa >> 24), da) << 24;
-            Fa = m|n|o|p;
-            break;
-        case CombineAIn:
-            m = fbCombineDisjointInPart ((uint8_t) (sa >> 0), da);
-            n = fbCombineDisjointInPart ((uint8_t) (sa >> 8), da) << 8;
-            o = fbCombineDisjointInPart ((uint8_t) (sa >> 16), da) << 16;
-            p = fbCombineDisjointInPart ((uint8_t) (sa >> 24), da) << 24;
-            Fa = m|n|o|p;
-            break;
-        case CombineA:
-            Fa = 0xffffffff;
-            break;
-        }
-
-        switch (combine & CombineB) {
-        default:
-            Fb = 0;
-            break;
-        case CombineBOut:
-            m = fbCombineDisjointOutPart (da, (uint8_t) (sa >> 0));
-            n = fbCombineDisjointOutPart (da, (uint8_t) (sa >> 8)) << 8;
-            o = fbCombineDisjointOutPart (da, (uint8_t) (sa >> 16)) << 16;
-            p = fbCombineDisjointOutPart (da, (uint8_t) (sa >> 24)) << 24;
-            Fb = m|n|o|p;
-            break;
-        case CombineBIn:
-            m = fbCombineDisjointInPart (da, (uint8_t) (sa >> 0));
-            n = fbCombineDisjointInPart (da, (uint8_t) (sa >> 8)) << 8;
-            o = fbCombineDisjointInPart (da, (uint8_t) (sa >> 16)) << 16;
-            p = fbCombineDisjointInPart (da, (uint8_t) (sa >> 24)) << 24;
-            Fb = m|n|o|p;
-            break;
-        case CombineB:
-            Fb = 0xffffffff;
-            break;
-        }
-        m = FbGen (s,d,0,FbGet8(Fa,0),FbGet8(Fb,0),t, u, v);
-        n = FbGen (s,d,8,FbGet8(Fa,8),FbGet8(Fb,8),t, u, v);
-        o = FbGen (s,d,16,FbGet8(Fa,16),FbGet8(Fb,16),t, u, v);
-        p = FbGen (s,d,24,FbGet8(Fa,24),FbGet8(Fb,24),t, u, v);
-        s = m|n|o|p;
-	*(dest + i) = s;
-    }
-}
-
-static FASTCALL void
-fbCombineDisjointOverC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    fbCombineDisjointGeneralC (dest, src, mask, width, CombineAOver);
-}
-
-static FASTCALL void
-fbCombineDisjointInC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    fbCombineDisjointGeneralC (dest, src, mask, width, CombineAIn);
-}
-
-static FASTCALL void
-fbCombineDisjointInReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    fbCombineDisjointGeneralC (dest, src, mask, width, CombineBIn);
-}
-
-static FASTCALL void
-fbCombineDisjointOutC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    fbCombineDisjointGeneralC (dest, src, mask, width, CombineAOut);
-}
-
-static FASTCALL void
-fbCombineDisjointOutReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    fbCombineDisjointGeneralC (dest, src, mask, width, CombineBOut);
-}
-
-static FASTCALL void
-fbCombineDisjointAtopC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    fbCombineDisjointGeneralC (dest, src, mask, width, CombineAAtop);
-}
-
-static FASTCALL void
-fbCombineDisjointAtopReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    fbCombineDisjointGeneralC (dest, src, mask, width, CombineBAtop);
-}
-
-static FASTCALL void
-fbCombineDisjointXorC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    fbCombineDisjointGeneralC (dest, src, mask, width, CombineXor);
-}
-
-static FASTCALL void
-fbCombineConjointGeneralC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width, uint8_t combine)
-{
-    int i;
-
-    for (i = 0; i < width; ++i) {
-        uint32_t  s, d;
-        uint32_t  m,n,o,p;
-        uint32_t  Fa, Fb;
-        uint16_t  t, u, v;
-        uint32_t  sa;
-        uint8_t   da;
-
-        s = *(src + i);
-        m = *(mask + i);
-        d = *(dest + i);
-        da = d >> 24;
-
-	fbCombineMaskC (&s, &m);
-
-        sa = m;
-
-        switch (combine & CombineA) {
-        default:
-            Fa = 0;
-            break;
-        case CombineAOut:
-            m = fbCombineConjointOutPart ((uint8_t) (sa >> 0), da);
-            n = fbCombineConjointOutPart ((uint8_t) (sa >> 8), da) << 8;
-            o = fbCombineConjointOutPart ((uint8_t) (sa >> 16), da) << 16;
-            p = fbCombineConjointOutPart ((uint8_t) (sa >> 24), da) << 24;
-            Fa = m|n|o|p;
-            break;
-        case CombineAIn:
-            m = fbCombineConjointInPart ((uint8_t) (sa >> 0), da);
-            n = fbCombineConjointInPart ((uint8_t) (sa >> 8), da) << 8;
-            o = fbCombineConjointInPart ((uint8_t) (sa >> 16), da) << 16;
-            p = fbCombineConjointInPart ((uint8_t) (sa >> 24), da) << 24;
-            Fa = m|n|o|p;
-            break;
-        case CombineA:
-            Fa = 0xffffffff;
-            break;
-        }
-
-        switch (combine & CombineB) {
-        default:
-            Fb = 0;
-            break;
-        case CombineBOut:
-            m = fbCombineConjointOutPart (da, (uint8_t) (sa >> 0));
-            n = fbCombineConjointOutPart (da, (uint8_t) (sa >> 8)) << 8;
-            o = fbCombineConjointOutPart (da, (uint8_t) (sa >> 16)) << 16;
-            p = fbCombineConjointOutPart (da, (uint8_t) (sa >> 24)) << 24;
-            Fb = m|n|o|p;
-            break;
-        case CombineBIn:
-            m = fbCombineConjointInPart (da, (uint8_t) (sa >> 0));
-            n = fbCombineConjointInPart (da, (uint8_t) (sa >> 8)) << 8;
-            o = fbCombineConjointInPart (da, (uint8_t) (sa >> 16)) << 16;
-            p = fbCombineConjointInPart (da, (uint8_t) (sa >> 24)) << 24;
-            Fb = m|n|o|p;
-            break;
-        case CombineB:
-            Fb = 0xffffffff;
-            break;
-        }
-        m = FbGen (s,d,0,FbGet8(Fa,0),FbGet8(Fb,0),t, u, v);
-        n = FbGen (s,d,8,FbGet8(Fa,8),FbGet8(Fb,8),t, u, v);
-        o = FbGen (s,d,16,FbGet8(Fa,16),FbGet8(Fb,16),t, u, v);
-        p = FbGen (s,d,24,FbGet8(Fa,24),FbGet8(Fb,24),t, u, v);
-        s = m|n|o|p;
-	*(dest + i) = s;
-    }
-}
-
-static FASTCALL void
-fbCombineConjointOverC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    fbCombineConjointGeneralC (dest, src, mask, width, CombineAOver);
-}
-
-static FASTCALL void
-fbCombineConjointOverReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    fbCombineConjointGeneralC (dest, src, mask, width, CombineBOver);
-}
-
-static FASTCALL void
-fbCombineConjointInC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    fbCombineConjointGeneralC (dest, src, mask, width, CombineAIn);
-}
-
-static FASTCALL void
-fbCombineConjointInReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    fbCombineConjointGeneralC (dest, src, mask, width, CombineBIn);
-}
-
-static FASTCALL void
-fbCombineConjointOutC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    fbCombineConjointGeneralC (dest, src, mask, width, CombineAOut);
-}
-
-static FASTCALL void
-fbCombineConjointOutReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    fbCombineConjointGeneralC (dest, src, mask, width, CombineBOut);
-}
-
-static FASTCALL void
-fbCombineConjointAtopC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    fbCombineConjointGeneralC (dest, src, mask, width, CombineAAtop);
-}
-
-static FASTCALL void
-fbCombineConjointAtopReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    fbCombineConjointGeneralC (dest, src, mask, width, CombineBAtop);
-}
-
-static FASTCALL void
-fbCombineConjointXorC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    fbCombineConjointGeneralC (dest, src, mask, width, CombineXor);
-}
-
-static CombineFuncC pixman_fbCombineFuncC[] = {
-    fbCombineClearC,
-    fbCombineSrcC,
-    NULL, /* Dest */
-    fbCombineOverC,
-    fbCombineOverReverseC,
-    fbCombineInC,
-    fbCombineInReverseC,
-    fbCombineOutC,
-    fbCombineOutReverseC,
-    fbCombineAtopC,
-    fbCombineAtopReverseC,
-    fbCombineXorC,
-    fbCombineAddC,
-    fbCombineSaturateC,
-    NULL,
-    NULL,
-    fbCombineClearC,	    /* 0x10 */
-    fbCombineSrcC,
-    NULL, /* Dest */
-    fbCombineDisjointOverC,
-    fbCombineSaturateC, /* DisjointOverReverse */
-    fbCombineDisjointInC,
-    fbCombineDisjointInReverseC,
-    fbCombineDisjointOutC,
-    fbCombineDisjointOutReverseC,
-    fbCombineDisjointAtopC,
-    fbCombineDisjointAtopReverseC,
-    fbCombineDisjointXorC,  /* 0x1b */
-    NULL,
-    NULL,
-    NULL,
-    NULL,
-    fbCombineClearC,
-    fbCombineSrcC,
-    NULL, /* Dest */
-    fbCombineConjointOverC,
-    fbCombineConjointOverReverseC,
-    fbCombineConjointInC,
-    fbCombineConjointInReverseC,
-    fbCombineConjointOutC,
-    fbCombineConjointOutReverseC,
-    fbCombineConjointAtopC,
-    fbCombineConjointAtopReverseC,
-    fbCombineConjointXorC,
-};
-
-
 static void fbFetchSolid(bits_image_t * pict, int x, int y, int width, uint32_t *buffer, uint32_t *mask, uint32_t maskBits)
 {
     uint32_t color;
     uint32_t *end;
-    fetchPixelProc fetch = fetchPixelProcForPicture(pict);
+    fetchPixelProc fetch = FETCH_PIXEL_PROC_FOR_PICTURE(pict);
 
     color = fetch(pict, 0, 0);
 
     end = buffer + width;
     while (buffer < end)
 	*(buffer++) = color;
-    fbFinishAccess (pict->pDrawable);
 }
 
 static void fbFetch(bits_image_t * pict, int x, int y, int width, uint32_t *buffer, uint32_t *mask, uint32_t maskBits)
 {
-    fetchProc fetch = fetchProcForPicture(pict);
+    fetchProc fetch = FETCH_PROC_FOR_PICTURE(pict);
 
     fetch(pict, x, y, width, buffer);
 }
 
-#ifdef PIXMAN_FB_ACCESSORS
-#define PIXMAN_COMPOSITE_RECT_GENERAL pixman_composite_rect_general_accessors
-#define PIXMAN_COMPOSE_FUNCTIONS pixman_composeFunctions_accessors
-#else
-#define PIXMAN_COMPOSITE_RECT_GENERAL pixman_composite_rect_general_no_accessors
-#define PIXMAN_COMPOSE_FUNCTIONS pixman_composeFunctions
-#endif
-
-#ifdef PIXMAN_FB_ACCESSORS	/* The accessor version can't be parameterized from outside */
-static const
-#endif
-FbComposeFunctions PIXMAN_COMPOSE_FUNCTIONS = {
-    pixman_fbCombineFuncU,
-    pixman_fbCombineFuncC,
-    pixman_fbCombineMaskU
-};
-
-typedef struct
-{
-    uint32_t        left_ag;
-    uint32_t        left_rb;
-    uint32_t        right_ag;
-    uint32_t        right_rb;
-    int32_t       left_x;
-    int32_t       right_x;
-    int32_t       stepper;
-
-    pixman_gradient_stop_t	*stops;
-    int                      num_stops;
-    unsigned int             spread;
-
-    int		  need_reset;
-} GradientWalker;
-
-static void
-_gradient_walker_init (GradientWalker  *walker,
-		       gradient_t      *gradient,
-		       unsigned int     spread)
-{
-    walker->num_stops = gradient->n_stops;
-    walker->stops     = gradient->stops;
-    walker->left_x    = 0;
-    walker->right_x   = 0x10000;
-    walker->stepper   = 0;
-    walker->left_ag   = 0;
-    walker->left_rb   = 0;
-    walker->right_ag  = 0;
-    walker->right_rb  = 0;
-    walker->spread    = spread;
-
-    walker->need_reset = TRUE;
-}
-
-static void
-_gradient_walker_reset (GradientWalker  *walker,
-                        pixman_fixed_32_32_t     pos)
-{
-    int32_t                  x, left_x, right_x;
-    pixman_color_t          *left_c, *right_c;
-    int                      n, count = walker->num_stops;
-    pixman_gradient_stop_t *      stops = walker->stops;
-
-    static const pixman_color_t   transparent_black = { 0, 0, 0, 0 };
-
-    switch (walker->spread)
-    {
-    case PIXMAN_REPEAT_NORMAL:
-	x = (int32_t)pos & 0xFFFF;
-	for (n = 0; n < count; n++)
-	    if (x < stops[n].x)
-		break;
-	if (n == 0) {
-	    left_x =  stops[count-1].x - 0x10000;
-	    left_c = &stops[count-1].color;
-	} else {
-	    left_x =  stops[n-1].x;
-	    left_c = &stops[n-1].color;
-	}
-
-	if (n == count) {
-	    right_x =  stops[0].x + 0x10000;
-	    right_c = &stops[0].color;
-	} else {
-	    right_x =  stops[n].x;
-	    right_c = &stops[n].color;
-	}
-	left_x  += (pos - x);
-	right_x += (pos - x);
-	break;
-
-    case PIXMAN_REPEAT_PAD:
-	for (n = 0; n < count; n++)
-	    if (pos < stops[n].x)
-		break;
-
-	if (n == 0) {
-	    left_x =  INT32_MIN;
-	    left_c = &stops[0].color;
-	} else {
-	    left_x =  stops[n-1].x;
-	    left_c = &stops[n-1].color;
-	}
-
-	if (n == count) {
-	    right_x =  INT32_MAX;
-	    right_c = &stops[n-1].color;
-	} else {
-	    right_x =  stops[n].x;
-	    right_c = &stops[n].color;
-	}
-	break;
-
-    case PIXMAN_REPEAT_REFLECT:
-	x = (int32_t)pos & 0xFFFF;
-	if ((int32_t)pos & 0x10000)
-	    x = 0x10000 - x;
-	for (n = 0; n < count; n++)
-	    if (x < stops[n].x)
-		break;
-
-	if (n == 0) {
-	    left_x =  -stops[0].x;
-	    left_c = &stops[0].color;
-	} else {
-	    left_x =  stops[n-1].x;
-	    left_c = &stops[n-1].color;
-	}
-
-	if (n == count) {
-	    right_x = 0x20000 - stops[n-1].x;
-	    right_c = &stops[n-1].color;
-	} else {
-	    right_x =  stops[n].x;
-	    right_c = &stops[n].color;
-	}
-
-	if ((int32_t)pos & 0x10000) {
-	    pixman_color_t  *tmp_c;
-	    int32_t          tmp_x;
-
-	    tmp_x   = 0x10000 - right_x;
-	    right_x = 0x10000 - left_x;
-	    left_x  = tmp_x;
-
-	    tmp_c   = right_c;
-	    right_c = left_c;
-	    left_c  = tmp_c;
-
-	    x = 0x10000 - x;
-	}
-	left_x  += (pos - x);
-	right_x += (pos - x);
-	break;
-
-    default:  /* RepeatNone */
-	for (n = 0; n < count; n++)
-	    if (pos < stops[n].x)
-		break;
-
-	if (n == 0)
-	{
-	    left_x  =  INT32_MIN;
-	    right_x =  stops[0].x;
-	    left_c  = right_c = (pixman_color_t*) &transparent_black;
-	}
-	else if (n == count)
-	{
-	    left_x  = stops[n-1].x;
-	    right_x = INT32_MAX;
-	    left_c  = right_c = (pixman_color_t*) &transparent_black;
-	}
-	else
-	{
-	    left_x  =  stops[n-1].x;
-	    right_x =  stops[n].x;
-	    left_c  = &stops[n-1].color;
-	    right_c = &stops[n].color;
-	}
-    }
-
-    walker->left_x   = left_x;
-    walker->right_x  = right_x;
-    walker->left_ag  = ((left_c->alpha >> 8) << 16)   | (left_c->green >> 8);
-    walker->left_rb  = ((left_c->red & 0xff00) << 8)  | (left_c->blue >> 8);
-    walker->right_ag = ((right_c->alpha >> 8) << 16)  | (right_c->green >> 8);
-    walker->right_rb = ((right_c->red & 0xff00) << 8) | (right_c->blue >> 8);
-
-    if ( walker->left_x == walker->right_x                ||
-	 ( walker->left_ag == walker->right_ag &&
-	   walker->left_rb == walker->right_rb )   )
-    {
-	walker->stepper = 0;
-    }
-    else
-    {
-	int32_t width = right_x - left_x;
-	walker->stepper = ((1 << 24) + width/2)/width;
-    }
-
-    walker->need_reset = FALSE;
-}
-
-#define  GRADIENT_WALKER_NEED_RESET(w,x)				\
-    ( (w)->need_reset || (x) < (w)->left_x || (x) >= (w)->right_x)
-
-
-/* the following assumes that GRADIENT_WALKER_NEED_RESET(w,x) is FALSE */
-static uint32_t
-_gradient_walker_pixel (GradientWalker  *walker,
-                        pixman_fixed_32_32_t     x)
-{
-    int  dist, idist;
-    uint32_t  t1, t2, a, color;
-
-    if (GRADIENT_WALKER_NEED_RESET (walker, x))
-        _gradient_walker_reset (walker, x);
-
-    dist  = ((int)(x - walker->left_x)*walker->stepper) >> 16;
-    idist = 256 - dist;
-
-    /* combined INTERPOLATE and premultiply */
-    t1 = walker->left_rb*idist + walker->right_rb*dist;
-    t1 = (t1 >> 8) & 0xff00ff;
-
-    t2  = walker->left_ag*idist + walker->right_ag*dist;
-    t2 &= 0xff00ff00;
-
-    color = t2 & 0xff000000;
-    a     = t2 >> 24;
-
-    t1  = t1*a + 0x800080;
-    t1  = (t1 + ((t1 >> 8) & 0xff00ff)) >> 8;
-
-    t2  = (t2 >> 8)*a + 0x800080;
-    t2  = (t2 + ((t2 >> 8) & 0xff00ff));
-
-    return (color | (t1 & 0xff00ff) | (t2 & 0xff00));
-}
-
-static void pixmanFetchSourcePict(source_image_t * pict, int x, int y, int width, uint32_t *buffer, uint32_t *mask, uint32_t maskBits)
-{
-#if 0
-    SourcePictPtr   pGradient = pict->pSourcePict;
-#endif
-    GradientWalker  walker;
-    uint32_t       *end = buffer + width;
-    gradient_t	    *gradient;
-
-    if (pict->common.type == SOLID)
-    {
-	register uint32_t color = ((solid_fill_t *)pict)->color;
-
-	while (buffer < end)
-	    *(buffer++) = color;
-
-	return;
-    }
-
-    gradient = (gradient_t *)pict;
-
-    _gradient_walker_init (&walker, gradient, pict->common.repeat);
-
-    if (pict->common.type == LINEAR) {
-	pixman_vector_t v, unit;
-	pixman_fixed_32_32_t l;
-	pixman_fixed_48_16_t dx, dy, a, b, off;
-	linear_gradient_t *linear = (linear_gradient_t *)pict;
-
-        /* reference point is the center of the pixel */
-        v.vector[0] = pixman_int_to_fixed(x) + pixman_fixed_1/2;
-        v.vector[1] = pixman_int_to_fixed(y) + pixman_fixed_1/2;
-        v.vector[2] = pixman_fixed_1;
-        if (pict->common.transform) {
-            if (!pixman_transform_point_3d (pict->common.transform, &v))
-                return;
-            unit.vector[0] = pict->common.transform->matrix[0][0];
-            unit.vector[1] = pict->common.transform->matrix[1][0];
-            unit.vector[2] = pict->common.transform->matrix[2][0];
-        } else {
-            unit.vector[0] = pixman_fixed_1;
-            unit.vector[1] = 0;
-            unit.vector[2] = 0;
-        }
-
-        dx = linear->p2.x - linear->p1.x;
-        dy = linear->p2.y - linear->p1.y;
-        l = dx*dx + dy*dy;
-        if (l != 0) {
-            a = (dx << 32) / l;
-            b = (dy << 32) / l;
-            off = (-a*linear->p1.x - b*linear->p1.y)>>16;
-        }
-        if (l == 0  || (unit.vector[2] == 0 && v.vector[2] == pixman_fixed_1)) {
-            pixman_fixed_48_16_t inc, t;
-            /* affine transformation only */
-            if (l == 0) {
-                t = 0;
-                inc = 0;
-            } else {
-                t = ((a*v.vector[0] + b*v.vector[1]) >> 16) + off;
-                inc = (a * unit.vector[0] + b * unit.vector[1]) >> 16;
-            }
-
-	    if (pict->class == SOURCE_IMAGE_CLASS_VERTICAL)
-	    {
-		register uint32_t color;
-
-		color = _gradient_walker_pixel( &walker, t );
-		while (buffer < end)
-		    *(buffer++) = color;
-	    }
-	    else
-	    {
-                if (!mask) {
-                    while (buffer < end)
-                    {
-			*(buffer) = _gradient_walker_pixel (&walker, t);
-                        buffer += 1;
-                        t      += inc;
-                    }
-                } else {
-                    while (buffer < end) {
-                        if (*mask++ & maskBits)
-                        {
-			    *(buffer) = _gradient_walker_pixel (&walker, t);
-                        }
-                        buffer += 1;
-                        t      += inc;
-                    }
-                }
-	    }
-	}
-	else /* projective transformation */
-	{
-	    pixman_fixed_48_16_t t;
-
-	    if (pict->class == SOURCE_IMAGE_CLASS_VERTICAL)
-	    {
-		register uint32_t color;
-
-		if (v.vector[2] == 0)
-		{
-		    t = 0;
-		}
-		else
-		{
-		    pixman_fixed_48_16_t x, y;
-
-		    x = ((pixman_fixed_48_16_t) v.vector[0] << 16) / v.vector[2];
-		    y = ((pixman_fixed_48_16_t) v.vector[1] << 16) / v.vector[2];
-		    t = ((a * x + b * y) >> 16) + off;
-		}
-
- 		color = _gradient_walker_pixel( &walker, t );
-		while (buffer < end)
-		    *(buffer++) = color;
-	    }
-	    else
-	    {
-		while (buffer < end)
-		{
-		    if (!mask || *mask++ & maskBits)
-		    {
-			if (v.vector[2] == 0) {
-			    t = 0;
-			} else {
-			    pixman_fixed_48_16_t x, y;
-			    x = ((pixman_fixed_48_16_t)v.vector[0] << 16) / v.vector[2];
-			    y = ((pixman_fixed_48_16_t)v.vector[1] << 16) / v.vector[2];
-			    t = ((a*x + b*y) >> 16) + off;
-			}
-			*(buffer) = _gradient_walker_pixel (&walker, t);
-		    }
-		    ++buffer;
-		    v.vector[0] += unit.vector[0];
-		    v.vector[1] += unit.vector[1];
-		    v.vector[2] += unit.vector[2];
-		}
-            }
-        }
-    } else {
-
-/*
- * In the radial gradient problem we are given two circles (c₁,r₁) and
- * (c₂,r₂) that define the gradient itself. Then, for any point p, we
- * must compute the value(s) of t within [0.0, 1.0] representing the
- * circle(s) that would color the point.
- *
- * There are potentially two values of t since the point p can be
- * colored by both sides of the circle, (which happens whenever one
- * circle is not entirely contained within the other).
- *
- * If we solve for a value of t that is outside of [0.0, 1.0] then we
- * use the extend mode (NONE, REPEAT, REFLECT, or PAD) to map to a
- * value within [0.0, 1.0].
- *
- * Here is an illustration of the problem:
- *
- *              p₂
- *           p  •
- *           •   ╲
- *        ·       ╲r₂
- *  p₁ ·           ╲
- *  •              θ╲
- *   ╲             ╌╌•
- *    ╲r₁        ·   c₂
- *    θ╲    ·
- *    ╌╌•
- *      c₁
- *
- * Given (c₁,r₁), (c₂,r₂) and p, we must find an angle θ such that two
- * points p₁ and p₂ on the two circles are collinear with p. Then, the
- * desired value of t is the ratio of the length of p₁p to the length
- * of p₁p₂.
- *
- * So, we have six unknown values: (p₁x, p₁y), (p₂x, p₂y), θ and t.
- * We can also write six equations that constrain the problem:
- *
- * Point p₁ is a distance r₁ from c₁ at an angle of θ:
- *
- *	1. p₁x = c₁x + r₁·cos θ
- *	2. p₁y = c₁y + r₁·sin θ
- *
- * Point p₂ is a distance r₂ from c₂ at an angle of θ:
- *
- *	3. p₂x = c₂x + r2·cos θ
- *	4. p₂y = c₂y + r2·sin θ
- *
- * Point p lies at a fraction t along the line segment p₁p₂:
- *
- *	5. px = t·p₂x + (1-t)·p₁x
- *	6. py = t·p₂y + (1-t)·p₁y
- *
- * To solve, first subtitute 1-4 into 5 and 6:
- *
- * px = t·(c₂x + r₂·cos θ) + (1-t)·(c₁x + r₁·cos θ)
- * py = t·(c₂y + r₂·sin θ) + (1-t)·(c₁y + r₁·sin θ)
- *
- * Then solve each for cos θ and sin θ expressed as a function of t:
- *
- * cos θ = (-(c₂x - c₁x)·t + (px - c₁x)) / ((r₂-r₁)·t + r₁)
- * sin θ = (-(c₂y - c₁y)·t + (py - c₁y)) / ((r₂-r₁)·t + r₁)
- *
- * To simplify this a bit, we define new variables for several of the
- * common terms as shown below:
- *
- *              p₂
- *           p  •
- *           •   ╲
- *        ·  ┆    ╲r₂
- *  p₁ ·     ┆     ╲
- *  •     pdy┆      ╲
- *   ╲       ┆       •c₂
- *    ╲r₁    ┆   ·   ┆
- *     ╲    ·┆       ┆cdy
- *      •╌╌╌╌┴╌╌╌╌╌╌╌┘
- *    c₁  pdx   cdx
- *
- * cdx = (c₂x - c₁x)
- * cdy = (c₂y - c₁y)
- *  dr =  r₂-r₁
- * pdx =  px - c₁x
- * pdy =  py - c₁y
- *
- * Note that cdx, cdy, and dr do not depend on point p at all, so can
- * be pre-computed for the entire gradient. The simplifed equations
- * are now:
- *
- * cos θ = (-cdx·t + pdx) / (dr·t + r₁)
- * sin θ = (-cdy·t + pdy) / (dr·t + r₁)
- *
- * Finally, to get a single function of t and eliminate the last
- * unknown θ, we use the identity sin²θ + cos²θ = 1. First, square
- * each equation, (we knew a quadratic was coming since it must be
- * possible to obtain two solutions in some cases):
- *
- * cos²θ = (cdx²t² - 2·cdx·pdx·t + pdx²) / (dr²·t² + 2·r₁·dr·t + r₁²)
- * sin²θ = (cdy²t² - 2·cdy·pdy·t + pdy²) / (dr²·t² + 2·r₁·dr·t + r₁²)
- *
- * Then add both together, set the result equal to 1, and express as a
- * standard quadratic equation in t of the form At² + Bt + C = 0
- *
- * (cdx² + cdy² - dr²)·t² - 2·(cdx·pdx + cdy·pdy + r₁·dr)·t + (pdx² + pdy² - r₁²) = 0
- *
- * In other words:
- *
- * A = cdx² + cdy² - dr²
- * B = -2·(pdx·cdx + pdy·cdy + r₁·dr)
- * C = pdx² + pdy² - r₁²
- *
- * And again, notice that A does not depend on p, so can be
- * precomputed. From here we just use the quadratic formula to solve
- * for t:
- *
- * t = (-2·B ± ⎷(B² - 4·A·C)) / 2·A
- */
-        /* radial or conical */
-        pixman_bool_t affine = TRUE;
-        double cx = 1.;
-        double cy = 0.;
-        double cz = 0.;
-	double rx = x + 0.5;
-	double ry = y + 0.5;
-        double rz = 1.;
-
-        if (pict->common.transform) {
-            pixman_vector_t v;
-            /* reference point is the center of the pixel */
-            v.vector[0] = pixman_int_to_fixed(x) + pixman_fixed_1/2;
-            v.vector[1] = pixman_int_to_fixed(y) + pixman_fixed_1/2;
-            v.vector[2] = pixman_fixed_1;
-            if (!pixman_transform_point_3d (pict->common.transform, &v))
-                return;
-
-            cx = pict->common.transform->matrix[0][0]/65536.;
-            cy = pict->common.transform->matrix[1][0]/65536.;
-            cz = pict->common.transform->matrix[2][0]/65536.;
-            rx = v.vector[0]/65536.;
-            ry = v.vector[1]/65536.;
-            rz = v.vector[2]/65536.;
-            affine = pict->common.transform->matrix[2][0] == 0 && v.vector[2] == pixman_fixed_1;
-        }
-
-        if (pict->common.type == RADIAL) {
-	    radial_gradient_t *radial = (radial_gradient_t *)pict;
-            if (affine) {
-                while (buffer < end) {
-		    if (!mask || *mask++ & maskBits)
-		    {
-			double pdx, pdy;
-			double B, C;
-			double det;
-			double c1x = radial->c1.x / 65536.0;
-			double c1y = radial->c1.y / 65536.0;
-			double r1  = radial->c1.radius / 65536.0;
-                        pixman_fixed_48_16_t t;
-
-			pdx = rx - c1x;
-			pdy = ry - c1y;
-
-			B = -2 * (  pdx * radial->cdx
-				    + pdy * radial->cdy
-				    + r1 * radial->dr);
-			C = (pdx * pdx + pdy * pdy - r1 * r1);
-
-                        det = (B * B) - (4 * radial->A * C);
-			if (det < 0.0)
-			    det = 0.0;
-
-			if (radial->A < 0)
-			    t = (pixman_fixed_48_16_t) ((- B - sqrt(det)) / (2.0 * radial->A) * 65536);
-			else
-			    t = (pixman_fixed_48_16_t) ((- B + sqrt(det)) / (2.0 * radial->A) * 65536);
-
-			*(buffer) = _gradient_walker_pixel (&walker, t);
-		    }
-		    ++buffer;
-
-                    rx += cx;
-                    ry += cy;
-                }
-            } else {
-		/* projective */
-                while (buffer < end) {
-		    if (!mask || *mask++ & maskBits)
-		    {
-			double pdx, pdy;
-			double B, C;
-			double det;
-			double c1x = radial->c1.x / 65536.0;
-			double c1y = radial->c1.y / 65536.0;
-			double r1  = radial->c1.radius / 65536.0;
-                        pixman_fixed_48_16_t t;
-			double x, y;
-
-			if (rz != 0) {
-			    x = rx/rz;
-			    y = ry/rz;
-			} else {
-			    x = y = 0.;
-			}
-
-			pdx = x - c1x;
-			pdy = y - c1y;
-
-			B = -2 * (  pdx * radial->cdx
-				    + pdy * radial->cdy
-				    + r1 * radial->dr);
-			C = (pdx * pdx + pdy * pdy - r1 * r1);
-
-                        det = (B * B) - (4 * radial->A * C);
-			if (det < 0.0)
-			    det = 0.0;
-
-			if (radial->A < 0)
-			    t = (pixman_fixed_48_16_t) ((- B - sqrt(det)) / (2.0 * radial->A) * 65536);
-			else
-			    t = (pixman_fixed_48_16_t) ((- B + sqrt(det)) / (2.0 * radial->A) * 65536);
-
-			*(buffer) = _gradient_walker_pixel (&walker, t);
-		    }
-		    ++buffer;
-
-                    rx += cx;
-                    ry += cy;
-		    rz += cz;
-                }
-            }
-        } else /* SourcePictTypeConical */ {
-	    conical_gradient_t *conical = (conical_gradient_t *)pict;
-            double a = conical->angle/(180.*65536);
-            if (affine) {
-                rx -= conical->center.x/65536.;
-                ry -= conical->center.y/65536.;
-
-                while (buffer < end) {
-		    double angle;
-
-                    if (!mask || *mask++ & maskBits)
-		    {
-                        pixman_fixed_48_16_t   t;
-
-                        angle = atan2(ry, rx) + a;
-			t     = (pixman_fixed_48_16_t) (angle * (65536. / (2*M_PI)));
-
-			*(buffer) = _gradient_walker_pixel (&walker, t);
-		    }
-
-                    ++buffer;
-                    rx += cx;
-                    ry += cy;
-                }
-            } else {
-                while (buffer < end) {
-                    double x, y;
-                    double angle;
-
-                    if (!mask || *mask++ & maskBits)
-                    {
-			pixman_fixed_48_16_t  t;
-
-			if (rz != 0) {
-			    x = rx/rz;
-			    y = ry/rz;
-			} else {
-			    x = y = 0.;
-			}
-			x -= conical->center.x/65536.;
-			y -= conical->center.y/65536.;
-			angle = atan2(y, x) + a;
-			t     = (pixman_fixed_48_16_t) (angle * (65536. / (2*M_PI)));
-
-			*(buffer) = _gradient_walker_pixel (&walker, t);
-		    }
-
-                    ++buffer;
-                    rx += cx;
-                    ry += cy;
-                    rz += cz;
-                }
-            }
-        }
-    }
-}
-
-/*
- * Fetch from region strategies
- */
-typedef FASTCALL uint32_t (*fetchFromRegionProc)(bits_image_t *pict, int x, int y, uint32_t *buffer, fetchPixelProc fetch, pixman_box16_t *box);
-
-static inline uint32_t
-fbFetchFromNoRegion(bits_image_t *pict, int x, int y, uint32_t *buffer, fetchPixelProc fetch, pixman_box16_t *box)
-{
-    return fetch (pict, x, y);
-}
-
-static uint32_t
-fbFetchFromNRectangles(bits_image_t *pict, int x, int y, uint32_t *buffer, fetchPixelProc fetch, pixman_box16_t *box)
-{
-    pixman_box16_t box2;
-    if (pixman_region_contains_point (pict->common.src_clip, x, y, &box2))
-        return fbFetchFromNoRegion(pict, x, y, buffer, fetch, box);
-    else
-        return 0;
-}
-
-static uint32_t
-fbFetchFromOneRectangle(bits_image_t *pict, int x, int y, uint32_t *buffer, fetchPixelProc fetch, pixman_box16_t *box)
-{
-    pixman_box16_t box2 = *box;
-    return ((x < box2.x1) | (x >= box2.x2) | (y < box2.y1) | (y >= box2.y2)) ?
-        0 : fbFetchFromNoRegion(pict, x, y, buffer, fetch, box);
-}
-
-/*
- * Fetching Algorithms
- */
-static void
-fbFetchTransformed_Nearest_Normal(bits_image_t * pict, int width, uint32_t *buffer, uint32_t *mask, uint32_t maskBits, pixman_bool_t affine, pixman_vector_t v, pixman_vector_t unit)
-{
-    pixman_box16_t* box = NULL;
-    fetchPixelProc   fetch;
-    fetchFromRegionProc fetchFromRegion;
-    int x, y, i;
-
-    /* initialize the two function pointers */
-    fetch = fetchPixelProcForPicture(pict);
-
-    if(pixman_region_n_rects (pict->common.src_clip) == 1)
-        fetchFromRegion = fbFetchFromNoRegion;
-    else
-        fetchFromRegion = fbFetchFromNRectangles;
-
-    for ( i = 0; i < width; ++i)
-    {
-        if (!mask || mask[i] & maskBits)
-        {
-            if (!v.vector[2])
-            {
-                *(buffer + i) = 0;
-            }
-            else
-            {
-                if (!affine)
-                {
-                    y = MOD(DIV(v.vector[1],v.vector[2]), pict->height);
-                    x = MOD(DIV(v.vector[0],v.vector[2]), pict->width);
-                }
-                else
-                {
-                    y = MOD(v.vector[1]>>16, pict->height);
-                    x = MOD(v.vector[0]>>16, pict->width);
-                }
-                *(buffer + i) = fetchFromRegion(pict, x, y, buffer, fetch, box);
-            }
-        }
-
-        v.vector[0] += unit.vector[0];
-        v.vector[1] += unit.vector[1];
-        v.vector[2] += unit.vector[2];
-    }
-}
-
-static void
-fbFetchTransformed_Nearest_Pad(bits_image_t * pict, int width, uint32_t *buffer, uint32_t *mask, uint32_t maskBits, pixman_bool_t affine, pixman_vector_t v, pixman_vector_t unit)
-{
-    pixman_box16_t *box = NULL;
-    fetchPixelProc   fetch;
-    fetchFromRegionProc fetchFromRegion;
-    int x, y, i;
-
-    /* initialize the two function pointers */
-    fetch = fetchPixelProcForPicture(pict);
-
-    if(pixman_region_n_rects (pict->common.src_clip) == 1)
-        fetchFromRegion = fbFetchFromNoRegion;
-    else
-        fetchFromRegion = fbFetchFromNRectangles;
-
-    for (i = 0; i < width; ++i)
-    {
-        if (!mask || mask[i] & maskBits)
-        {
-            if (!v.vector[2])
-            {
-                *(buffer + i) = 0;
-            }
-            else
-            {
-                if (!affine)
-                {
-                    y = CLIP(DIV(v.vector[1], v.vector[2]), 0, pict->height-1);
-                    x = CLIP(DIV(v.vector[0], v.vector[2]), 0, pict->width-1);
-                }
-                else
-                {
-                    y = CLIP(v.vector[1]>>16, 0, pict->height-1);
-                    x = CLIP(v.vector[0]>>16, 0, pict->width-1);
-                }
-
-                *(buffer + i) = fetchFromRegion(pict, x, y, buffer, fetch, box);
-            }
-        }
-
-        v.vector[0] += unit.vector[0];
-        v.vector[1] += unit.vector[1];
-        v.vector[2] += unit.vector[2];
-    }
-}
-
-static void
-fbFetchTransformed_Nearest_General(bits_image_t * pict, int width, uint32_t *buffer, uint32_t *mask, uint32_t maskBits, pixman_bool_t affine, pixman_vector_t v, pixman_vector_t unit)
-{
-    pixman_box16_t *box = NULL;
-    fetchPixelProc   fetch;
-    fetchFromRegionProc fetchFromRegion;
-    int x, y, i;
-
-    /* initialize the two function pointers */
-    fetch = fetchPixelProcForPicture(pict);
-
-    if(pixman_region_n_rects (pict->common.src_clip) == 1)
-    {
-        box = &(pict->common.src_clip->extents);
-        fetchFromRegion = fbFetchFromOneRectangle;
-    }
-    else
-    {
-        fetchFromRegion = fbFetchFromNRectangles;
-    }
-
-    for (i = 0; i < width; ++i) {
-        if (!mask || mask[i] & maskBits)
-        {
-            if (!v.vector[2]) {
-                *(buffer + i) = 0;
-            } else {
-                if (!affine) {
-                    y = DIV(v.vector[1],v.vector[2]);
-                    x = DIV(v.vector[0],v.vector[2]);
-                } else {
-                    y = v.vector[1]>>16;
-                    x = v.vector[0]>>16;
-                }
-                *(buffer + i) = fetchFromRegion(pict, x, y, buffer, fetch, box);
-            }
-        }
-        v.vector[0] += unit.vector[0];
-        v.vector[1] += unit.vector[1];
-        v.vector[2] += unit.vector[2];
-    }
-}
-
-static void
-fbFetchTransformed_Bilinear_Normal(bits_image_t * pict, int width, uint32_t *buffer, uint32_t *mask, uint32_t maskBits, pixman_bool_t affine, pixman_vector_t v, pixman_vector_t unit)
-{
-    pixman_box16_t *box = NULL;
-    fetchPixelProc   fetch;
-    fetchFromRegionProc fetchFromRegion;
-    int i;
-
-    /* initialize the two function pointers */
-    fetch = fetchPixelProcForPicture(pict);
-
-    if(pixman_region_n_rects (pict->common.src_clip) == 1)
-        fetchFromRegion = fbFetchFromNoRegion;
-    else
-        fetchFromRegion = fbFetchFromNRectangles;
-
-    for (i = 0; i < width; ++i) {
-        if (!mask || mask[i] & maskBits)
-        {
-            if (!v.vector[2]) {
-                *(buffer + i) = 0;
-            } else {
-                int x1, x2, y1, y2, distx, idistx, disty, idisty;
-                uint32_t tl, tr, bl, br, r;
-                uint32_t ft, fb;
-
-                if (!affine) {
-                    pixman_fixed_48_16_t div;
-                    div = ((pixman_fixed_48_16_t)v.vector[0] << 16)/v.vector[2];
-                    x1 = div >> 16;
-                    distx = ((pixman_fixed_t)div >> 8) & 0xff;
-                    div = ((pixman_fixed_48_16_t)v.vector[1] << 16)/v.vector[2];
-                    y1 = div >> 16;
-                    disty = ((pixman_fixed_t)div >> 8) & 0xff;
-                } else {
-                    x1 = v.vector[0] >> 16;
-                    distx = (v.vector[0] >> 8) & 0xff;
-                    y1 = v.vector[1] >> 16;
-                    disty = (v.vector[1] >> 8) & 0xff;
-                }
-                x2 = x1 + 1;
-                y2 = y1 + 1;
-
-                idistx = 256 - distx;
-                idisty = 256 - disty;
-
-                x1 = MOD (x1, pict->width);
-                x2 = MOD (x2, pict->width);
-                y1 = MOD (y1, pict->height);
-                y2 = MOD (y2, pict->height);
-
-                tl = fetchFromRegion(pict, x1, y1, buffer, fetch, box);
-                tr = fetchFromRegion(pict, x2, y1, buffer, fetch, box);
-                bl = fetchFromRegion(pict, x1, y2, buffer, fetch, box);
-                br = fetchFromRegion(pict, x2, y2, buffer, fetch, box);
-
-                ft = FbGet8(tl,0) * idistx + FbGet8(tr,0) * distx;
-                fb = FbGet8(bl,0) * idistx + FbGet8(br,0) * distx;
-                r = (((ft * idisty + fb * disty) >> 16) & 0xff);
-                ft = FbGet8(tl,8) * idistx + FbGet8(tr,8) * distx;
-                fb = FbGet8(bl,8) * idistx + FbGet8(br,8) * distx;
-                r |= (((ft * idisty + fb * disty) >> 8) & 0xff00);
-                ft = FbGet8(tl,16) * idistx + FbGet8(tr,16) * distx;
-                fb = FbGet8(bl,16) * idistx + FbGet8(br,16) * distx;
-                r |= (((ft * idisty + fb * disty)) & 0xff0000);
-                ft = FbGet8(tl,24) * idistx + FbGet8(tr,24) * distx;
-                fb = FbGet8(bl,24) * idistx + FbGet8(br,24) * distx;
-                r |= (((ft * idisty + fb * disty) << 8) & 0xff000000);
-                *(buffer + i) = r;
-            }
-        }
-        v.vector[0] += unit.vector[0];
-        v.vector[1] += unit.vector[1];
-        v.vector[2] += unit.vector[2];
-    }
-}
-
-static void
-fbFetchTransformed_Bilinear_Pad(bits_image_t * pict, int width, uint32_t *buffer, uint32_t *mask, uint32_t maskBits, pixman_bool_t affine, pixman_vector_t v, pixman_vector_t unit)
-{
-    pixman_box16_t *box = NULL;
-    fetchPixelProc   fetch;
-    fetchFromRegionProc fetchFromRegion;
-    int i;
-
-    /* initialize the two function pointers */
-    fetch = fetchPixelProcForPicture(pict);
-
-    if(pixman_region_n_rects (pict->common.src_clip) == 1)
-        fetchFromRegion = fbFetchFromNoRegion;
-    else
-        fetchFromRegion = fbFetchFromNRectangles;
-
-    for (i = 0; i < width; ++i) {
-        if (!mask || mask[i] & maskBits)
-        {
-            if (!v.vector[2]) {
-                *(buffer + i) = 0;
-            } else {
-                int x1, x2, y1, y2, distx, idistx, disty, idisty;
-                uint32_t tl, tr, bl, br, r;
-                uint32_t ft, fb;
-
-                if (!affine) {
-                    pixman_fixed_48_16_t div;
-                    div = ((pixman_fixed_48_16_t)v.vector[0] << 16)/v.vector[2];
-                    x1 = div >> 16;
-                    distx = ((pixman_fixed_t)div >> 8) & 0xff;
-                    div = ((pixman_fixed_48_16_t)v.vector[1] << 16)/v.vector[2];
-                    y1 = div >> 16;
-                    disty = ((pixman_fixed_t)div >> 8) & 0xff;
-                } else {
-                    x1 = v.vector[0] >> 16;
-                    distx = (v.vector[0] >> 8) & 0xff;
-                    y1 = v.vector[1] >> 16;
-                    disty = (v.vector[1] >> 8) & 0xff;
-                }
-                x2 = x1 + 1;
-                y2 = y1 + 1;
-
-                idistx = 256 - distx;
-                idisty = 256 - disty;
-
-                x1 = CLIP (x1, 0, pict->width-1);
-                x2 = CLIP (x2, 0, pict->width-1);
-                y1 = CLIP (y1, 0, pict->height-1);
-                y2 = CLIP (y2, 0, pict->height-1);
-
-                tl = fetchFromRegion(pict, x1, y1, buffer, fetch, box);
-                tr = fetchFromRegion(pict, x2, y1, buffer, fetch, box);
-                bl = fetchFromRegion(pict, x1, y2, buffer, fetch, box);
-                br = fetchFromRegion(pict, x2, y2, buffer, fetch, box);
-
-                ft = FbGet8(tl,0) * idistx + FbGet8(tr,0) * distx;
-                fb = FbGet8(bl,0) * idistx + FbGet8(br,0) * distx;
-                r = (((ft * idisty + fb * disty) >> 16) & 0xff);
-                ft = FbGet8(tl,8) * idistx + FbGet8(tr,8) * distx;
-                fb = FbGet8(bl,8) * idistx + FbGet8(br,8) * distx;
-                r |= (((ft * idisty + fb * disty) >> 8) & 0xff00);
-                ft = FbGet8(tl,16) * idistx + FbGet8(tr,16) * distx;
-                fb = FbGet8(bl,16) * idistx + FbGet8(br,16) * distx;
-                r |= (((ft * idisty + fb * disty)) & 0xff0000);
-                ft = FbGet8(tl,24) * idistx + FbGet8(tr,24) * distx;
-                fb = FbGet8(bl,24) * idistx + FbGet8(br,24) * distx;
-                r |= (((ft * idisty + fb * disty) << 8) & 0xff000000);
-                *(buffer + i) = r;
-            }
-        }
-        v.vector[0] += unit.vector[0];
-        v.vector[1] += unit.vector[1];
-        v.vector[2] += unit.vector[2];
-    }
-}
-
-static void
-fbFetchTransformed_Bilinear_General(bits_image_t * pict, int width, uint32_t *buffer, uint32_t *mask, uint32_t maskBits, pixman_bool_t affine, pixman_vector_t v, pixman_vector_t unit)
-{
-    pixman_box16_t *box = NULL;
-    fetchPixelProc   fetch;
-    fetchFromRegionProc fetchFromRegion;
-    int i;
-
-    /* initialize the two function pointers */
-    fetch = fetchPixelProcForPicture(pict);
-
-    if(pixman_region_n_rects (pict->common.src_clip) == 1)
-    {
-        box = &(pict->common.src_clip->extents);
-        fetchFromRegion = fbFetchFromOneRectangle;
-    }
-    else
-    {
-        fetchFromRegion = fbFetchFromNRectangles;
-    }
-
-    for (i = 0; i < width; ++i)
-    {
-        if (!mask || mask[i] & maskBits)
-        {
-            if (!v.vector[2]) {
-                *(buffer + i) = 0;
-            } else {
-                int x1, x2, y1, y2, distx, idistx, disty, idisty;
-                uint32_t tl, tr, bl, br, r;
-                uint32_t ft, fb;
-
-                if (!affine) {
-                    pixman_fixed_48_16_t div;
-                    div = ((pixman_fixed_48_16_t)v.vector[0] << 16)/v.vector[2];
-                    x1 = div >> 16;
-                    distx = ((pixman_fixed_t)div >> 8) & 0xff;
-                    div = ((pixman_fixed_48_16_t)v.vector[1] << 16)/v.vector[2];
-                    y1 = div >> 16;
-                    disty = ((pixman_fixed_t)div >> 8) & 0xff;
-                } else {
-                    x1 = v.vector[0] >> 16;
-                    distx = (v.vector[0] >> 8) & 0xff;
-                    y1 = v.vector[1] >> 16;
-                    disty = (v.vector[1] >> 8) & 0xff;
-                }
-                x2 = x1 + 1;
-                y2 = y1 + 1;
-
-                idistx = 256 - distx;
-                idisty = 256 - disty;
-
-                tl = fetchFromRegion(pict, x1, y1, buffer, fetch, box);
-                tr = fetchFromRegion(pict, x2, y1, buffer, fetch, box);
-                bl = fetchFromRegion(pict, x1, y2, buffer, fetch, box);
-                br = fetchFromRegion(pict, x2, y2, buffer, fetch, box);
-
-                ft = FbGet8(tl,0) * idistx + FbGet8(tr,0) * distx;
-                fb = FbGet8(bl,0) * idistx + FbGet8(br,0) * distx;
-                r = (((ft * idisty + fb * disty) >> 16) & 0xff);
-                ft = FbGet8(tl,8) * idistx + FbGet8(tr,8) * distx;
-                fb = FbGet8(bl,8) * idistx + FbGet8(br,8) * distx;
-                r |= (((ft * idisty + fb * disty) >> 8) & 0xff00);
-                ft = FbGet8(tl,16) * idistx + FbGet8(tr,16) * distx;
-                fb = FbGet8(bl,16) * idistx + FbGet8(br,16) * distx;
-                r |= (((ft * idisty + fb * disty)) & 0xff0000);
-                ft = FbGet8(tl,24) * idistx + FbGet8(tr,24) * distx;
-                fb = FbGet8(bl,24) * idistx + FbGet8(br,24) * distx;
-                r |= (((ft * idisty + fb * disty) << 8) & 0xff000000);
-                *(buffer + i) = r;
-            }
-        }
-
-        v.vector[0] += unit.vector[0];
-        v.vector[1] += unit.vector[1];
-        v.vector[2] += unit.vector[2];
-    }
-}
-
-static void
-fbFetchTransformed_Convolution(bits_image_t * pict, int width, uint32_t *buffer, uint32_t *mask, uint32_t maskBits, pixman_bool_t affine, pixman_vector_t v, pixman_vector_t unit)
-{
-    pixman_box16_t dummy;
-    fetchPixelProc fetch;
-    int i;
-
-    pixman_fixed_t *params = pict->common.filter_params;
-    int32_t cwidth = pixman_fixed_to_int(params[0]);
-    int32_t cheight = pixman_fixed_to_int(params[1]);
-    int xoff = (params[0] - pixman_fixed_1) >> 1;
-    int yoff = (params[1] - pixman_fixed_1) >> 1;
-    fetch = fetchPixelProcForPicture(pict);
-
-    params += 2;
-    for (i = 0; i < width; ++i) {
-        if (!mask || mask[i] & maskBits)
-        {
-            if (!v.vector[2]) {
-                *(buffer + i) = 0;
-            } else {
-                int x1, x2, y1, y2, x, y;
-                int32_t srtot, sgtot, sbtot, satot;
-                pixman_fixed_t *p = params;
-
-                if (!affine) {
-                    pixman_fixed_48_16_t tmp;
-                    tmp = ((pixman_fixed_48_16_t)v.vector[0] << 16)/v.vector[2] - xoff;
-                    x1 = pixman_fixed_to_int(tmp);
-                    tmp = ((pixman_fixed_48_16_t)v.vector[1] << 16)/v.vector[2] - yoff;
-                    y1 = pixman_fixed_to_int(tmp);
-                } else {
-                    x1 = pixman_fixed_to_int(v.vector[0] - xoff);
-                    y1 = pixman_fixed_to_int(v.vector[1] - yoff);
-                }
-                x2 = x1 + cwidth;
-                y2 = y1 + cheight;
-
-                srtot = sgtot = sbtot = satot = 0;
-
-                for (y = y1; y < y2; y++) {
-                    int ty;
-                    switch (pict->common.repeat) {
-                        case PIXMAN_REPEAT_NORMAL:
-                            ty = MOD (y, pict->height);
-                            break;
-                        case PIXMAN_REPEAT_PAD:
-                            ty = CLIP (y, 0, pict->height-1);
-                            break;
-                        default:
-                            ty = y;
-                    }
-                    for (x = x1; x < x2; x++) {
-                        if (*p) {
-                            int tx;
-                            switch (pict->common.repeat) {
-                                case PIXMAN_REPEAT_NORMAL:
-                                    tx = MOD (x, pict->width);
-                                    break;
-                                case PIXMAN_REPEAT_PAD:
-                                    tx = CLIP (x, 0, pict->width-1);
-                                    break;
-                                default:
-                                    tx = x;
-                            }
-                            if (pixman_region_contains_point (pict->common.src_clip, tx, ty, &dummy)) {
-                                uint32_t c = fetch(pict, tx, ty);
-
-                                srtot += Red(c) * *p;
-                                sgtot += Green(c) * *p;
-                                sbtot += Blue(c) * *p;
-                                satot += Alpha(c) * *p;
-                            }
-                        }
-                        p++;
-                    }
-                }
-
-                satot >>= 16;
-                srtot >>= 16;
-                sgtot >>= 16;
-                sbtot >>= 16;
-
-                if (satot < 0) satot = 0; else if (satot > 0xff) satot = 0xff;
-                if (srtot < 0) srtot = 0; else if (srtot > 0xff) srtot = 0xff;
-                if (sgtot < 0) sgtot = 0; else if (sgtot > 0xff) sgtot = 0xff;
-                if (sbtot < 0) sbtot = 0; else if (sbtot > 0xff) sbtot = 0xff;
-
-                *(buffer + i) = ((satot << 24) |
-                                 (srtot << 16) |
-                                 (sgtot <<  8) |
-                                 (sbtot       ));
-            }
-        }
-        v.vector[0] += unit.vector[0];
-        v.vector[1] += unit.vector[1];
-        v.vector[2] += unit.vector[2];
-    }
-}
-
-static void
-adjust (pixman_vector_t *v, pixman_vector_t *u, pixman_fixed_t adjustment)
-{
-    int delta_v = (adjustment * v->vector[2]) >> 16;
-    int delta_u = (adjustment * u->vector[2]) >> 16;
-    
-    v->vector[0] += delta_v;
-    v->vector[1] += delta_v;
-    
-    u->vector[0] += delta_u;
-    u->vector[1] += delta_u;
-}
-
-static void
-fbFetchTransformed(bits_image_t * pict, int x, int y, int width, uint32_t *buffer, uint32_t *mask, uint32_t maskBits)
-{
-    uint32_t     *bits;
-    int32_t    stride;
-    pixman_vector_t v;
-    pixman_vector_t unit;
-    pixman_bool_t affine = TRUE;
-
-    bits = pict->bits;
-    stride = pict->rowstride;
-
-    /* reference point is the center of the pixel */
-    v.vector[0] = pixman_int_to_fixed(x) + pixman_fixed_1 / 2;
-    v.vector[1] = pixman_int_to_fixed(y) + pixman_fixed_1 / 2;
-    v.vector[2] = pixman_fixed_1;
-
-    /* when using convolution filters or PIXMAN_REPEAT_PAD one might get here without a transform */
-    if (pict->common.transform)
-    {
-        if (!pixman_transform_point_3d (pict->common.transform, &v))
-        {
-            fbFinishAccess (pict->pDrawable);
-            return;
-        }
-        unit.vector[0] = pict->common.transform->matrix[0][0];
-        unit.vector[1] = pict->common.transform->matrix[1][0];
-        unit.vector[2] = pict->common.transform->matrix[2][0];
-        affine = v.vector[2] == pixman_fixed_1 && unit.vector[2] == 0;
-    }
-    else
-    {
-        unit.vector[0] = pixman_fixed_1;
-        unit.vector[1] = 0;
-        unit.vector[2] = 0;
-    }
-
-    /* This allows filtering code to pretend that pixels are located at integer coordinates */
-    adjust (&v, &unit, -(pixman_fixed_1 / 2));
-    
-    if (pict->common.filter == PIXMAN_FILTER_NEAREST || pict->common.filter == PIXMAN_FILTER_FAST)
-    {
-	/* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */
-	adjust (&v, &unit, pixman_fixed_1 / 2 - pixman_fixed_e);
-	
-        if (pict->common.repeat == PIXMAN_REPEAT_NORMAL)
-        {
-            fbFetchTransformed_Nearest_Normal(pict, width, buffer, mask, maskBits, affine, v, unit);
-
-        }
-        else if (pict->common.repeat == PIXMAN_REPEAT_PAD)
-        {
-            fbFetchTransformed_Nearest_Pad(pict, width, buffer, mask, maskBits, affine, v, unit);
-        }
-        else
-        {
-            fbFetchTransformed_Nearest_General(pict, width, buffer, mask, maskBits, affine, v, unit);
-        }
-    } else if (pict->common.filter == PIXMAN_FILTER_BILINEAR	||
-	       pict->common.filter == PIXMAN_FILTER_GOOD	||
-	       pict->common.filter == PIXMAN_FILTER_BEST)
-    {
-        if (pict->common.repeat == PIXMAN_REPEAT_NORMAL)
-        {
-            fbFetchTransformed_Bilinear_Normal(pict, width, buffer, mask, maskBits, affine, v, unit);
-        }
-        else if (pict->common.repeat == PIXMAN_REPEAT_PAD)
-        {
-            fbFetchTransformed_Bilinear_Pad(pict, width, buffer, mask, maskBits, affine, v, unit);
-        }
-        else
-        {
-            fbFetchTransformed_Bilinear_General(pict, width, buffer, mask, maskBits, affine, v, unit);
-        }
-    }
-    else if (pict->common.filter == PIXMAN_FILTER_CONVOLUTION)
-    {
-	/* Round to closest integer, ensuring that 0.5 rounds to 0, not 1 */
-	adjust (&v, &unit, pixman_fixed_1 / 2 - pixman_fixed_e);
-	
-        fbFetchTransformed_Convolution(pict, width, buffer, mask, maskBits, affine, v, unit);
-    }
-
-    fbFinishAccess (pict->pDrawable);
-}
-
-
-static void
-fbFetchExternalAlpha(bits_image_t * pict, int x, int y, int width, uint32_t *buffer, uint32_t *mask, uint32_t maskBits)
-{
-    int i;
-    uint32_t _alpha_buffer[SCANLINE_BUFFER_LENGTH];
-    uint32_t *alpha_buffer = _alpha_buffer;
-
-    if (!pict->common.alpha_map) {
-        fbFetchTransformed (pict, x, y, width, buffer, mask, maskBits);
-	return;
-    }
-    if (width > SCANLINE_BUFFER_LENGTH)
-        alpha_buffer = (uint32_t *) pixman_malloc_ab (width, sizeof(uint32_t));
-
-    fbFetchTransformed(pict, x, y, width, buffer, mask, maskBits);
-    fbFetchTransformed((bits_image_t *)pict->common.alpha_map, x - pict->common.alpha_origin.x,
-		       y - pict->common.alpha_origin.y, width, alpha_buffer,
-		       mask, maskBits);
-    for (i = 0; i < width; ++i) {
-        if (!mask || mask[i] & maskBits)
-	{
-	    int a = alpha_buffer[i]>>24;
-	    *(buffer + i) = (a << 24)
-		| (div_255(Red(*(buffer + i)) * a) << 16)
-		| (div_255(Green(*(buffer + i)) * a) << 8)
-		| (div_255(Blue(*(buffer + i)) * a));
-	}
-    }
-
-    if (alpha_buffer != _alpha_buffer)
-        free(alpha_buffer);
-}
-
 static void
 fbStore(bits_image_t * pict, int x, int y, int width, uint32_t *buffer)
 {
     uint32_t *bits;
     int32_t stride;
-    storeProc store = storeProcForPicture(pict);
+    storeProc store = STORE_PROC_FOR_PICTURE(pict);
     const pixman_indexed_t * indexed = pict->indexed;
 
     bits = pict->bits;
     stride = pict->rowstride;
     bits += y*stride;
     store((pixman_image_t *)pict, bits, buffer, x, width, indexed);
-    fbFinishAccess (pict->pDrawable);
-}
-
-static void
-fbStoreExternalAlpha(bits_image_t * pict, int x, int y, int width, uint32_t *buffer)
-{
-    uint32_t *bits, *alpha_bits;
-    int32_t stride, astride;
-    int ax, ay;
-    storeProc store;
-    storeProc astore;
-    const pixman_indexed_t * indexed = pict->indexed;
-    const pixman_indexed_t * aindexed;
-
-    if (!pict->common.alpha_map) {
-        fbStore(pict, x, y, width, buffer);
-	return;
-    }
-
-    store = storeProcForPicture(pict);
-    astore = storeProcForPicture(pict->common.alpha_map);
-    aindexed = pict->common.alpha_map->indexed;
-
-    ax = x;
-    ay = y;
-
-    bits = pict->bits;
-    stride = pict->rowstride;
-
-    alpha_bits = pict->common.alpha_map->bits;
-    astride = pict->common.alpha_map->rowstride;
-
-    bits       += y*stride;
-    alpha_bits += (ay - pict->common.alpha_origin.y)*astride;
-
-
-    store((pixman_image_t *)pict, bits, buffer, x, width, indexed);
-    astore((pixman_image_t *)pict->common.alpha_map,
-	   alpha_bits, buffer, ax - pict->common.alpha_origin.x, width, aindexed);
-
-    fbFinishAccess (pict->alpha_map->pDrawable);
-    fbFinishAccess (pict->pDrawable);
 }
 
 typedef void (*scanStoreProc)(pixman_image_t *, int, int, int, uint32_t *);
 typedef void (*scanFetchProc)(pixman_image_t *, int, int, int, uint32_t *,
 			      uint32_t *, uint32_t);
 
 #ifndef PIXMAN_FB_ACCESSORS
 static
@@ -4400,33 +186,33 @@ PIXMAN_COMPOSITE_RECT_GENERAL (const FbC
 					  data->width, data->height);
     }
     else
     {
 	bits_image_t *bits = (bits_image_t *)data->src;
 
 	if (bits->common.alpha_map)
 	{
-	    fetchSrc = (scanFetchProc)fbFetchExternalAlpha;
+	    fetchSrc = (scanFetchProc)FB_FETCH_EXTERNAL_ALPHA;
 	}
 	else if ((bits->common.repeat == PIXMAN_REPEAT_NORMAL || bits->common.repeat == PIXMAN_REPEAT_PAD) &&
 		 bits->width == 1 &&
 		 bits->height == 1)
 	{
 	    fetchSrc = (scanFetchProc)fbFetchSolid;
 	    srcClass = SOURCE_IMAGE_CLASS_HORIZONTAL;
 	}
 	else if (!bits->common.transform && bits->common.filter != PIXMAN_FILTER_CONVOLUTION
                 && bits->common.repeat != PIXMAN_REPEAT_PAD)
 	{
 	    fetchSrc = (scanFetchProc)fbFetch;
 	}
 	else
 	{
-	    fetchSrc = (scanFetchProc)fbFetchTransformed;
+	    fetchSrc = (scanFetchProc)FB_FETCH_TRANSFORMED;
 	}
     }
 
     if (!data->mask || data->op == PIXMAN_OP_CLEAR)
     {
 	fetchMask = NULL;
     }
     else
@@ -4439,36 +225,36 @@ PIXMAN_COMPOSITE_RECT_GENERAL (const FbC
 					       data->width, data->height);
 	}
 	else
 	{
 	    bits_image_t *bits = (bits_image_t *)data->mask;
 
 	    if (bits->common.alpha_map)
 	    {
-		fetchMask = (scanFetchProc)fbFetchExternalAlpha;
+		fetchMask = (scanFetchProc)FB_FETCH_EXTERNAL_ALPHA;
 	    }
 	    else if ((bits->common.repeat == PIXMAN_REPEAT_NORMAL || bits->common.repeat == PIXMAN_REPEAT_PAD) &&
 		     bits->width == 1 && bits->height == 1)
 	    {
 		fetchMask = (scanFetchProc)fbFetchSolid;
 		maskClass = SOURCE_IMAGE_CLASS_HORIZONTAL;
 	    }
 	    else if (!bits->common.transform && bits->common.filter != PIXMAN_FILTER_CONVOLUTION
                     && bits->common.repeat != PIXMAN_REPEAT_PAD)
 		fetchMask = (scanFetchProc)fbFetch;
 	    else
-		fetchMask = (scanFetchProc)fbFetchTransformed;
+		fetchMask = (scanFetchProc)FB_FETCH_TRANSFORMED;
 	}
     }
 
     if (data->dest->common.alpha_map)
     {
-	fetchDest = (scanFetchProc)fbFetchExternalAlpha;
-	store = (scanStoreProc)fbStoreExternalAlpha;
+	fetchDest = (scanFetchProc)FB_FETCH_EXTERNAL_ALPHA;
+	store = (scanStoreProc)FB_STORE_EXTERNAL_ALPHA;
 
 	if (data->op == PIXMAN_OP_CLEAR || data->op == PIXMAN_OP_SRC)
 	    fetchDest = NULL;
     }
     else
     {
 	fetchDest = (scanFetchProc)fbFetch;
 	store = (scanStoreProc)fbStore;
@@ -4511,17 +297,17 @@ PIXMAN_COMPOSITE_RECT_GENERAL (const FbC
     if (fetchSrc		   &&
 	fetchMask		   &&
 	data->mask		   &&
 	data->mask->common.type == BITS &&
 	data->mask->common.component_alpha &&
 	PIXMAN_FORMAT_RGB (data->mask->bits.format))
     {
 	uint32_t *mask_buffer = dest_buffer + data->width;
-	CombineFuncC compose = PIXMAN_COMPOSE_FUNCTIONS.combineC[data->op];
+	CombineFuncC compose = pixman_composeFunctions.combineC[data->op];
 	if (!compose)
 	    return;
 
 	for (i = 0; i < data->height; ++i) {
 	    /* fill first half of scanline with source */
 	    if (fetchSrc)
 	    {
 		if (fetchMask)
@@ -4575,17 +361,17 @@ PIXMAN_COMPOSITE_RECT_GENERAL (const FbC
 			 data->xDest + xoff,
 			 src_buffer, mask_buffer, data->width);
 	    }
 	}
     }
     else
     {
 	uint32_t *src_mask_buffer = 0, *mask_buffer = 0;
-	CombineFuncU compose = PIXMAN_COMPOSE_FUNCTIONS.combineU[data->op];
+	CombineFuncU compose = pixman_composeFunctions.combineU[data->op];
 	if (!compose)
 	    return;
 
 	if (fetchMask)
 	    mask_buffer = dest_buffer + data->width;
 
 	for (i = 0; i < data->height; ++i) {
 	    /* fill first half of scanline with source */
@@ -4604,44 +390,44 @@ PIXMAN_COMPOSITE_RECT_GENERAL (const FbC
 
 		if (srcClass == SOURCE_IMAGE_CLASS_HORIZONTAL)
 		{
 		    fetchSrc (data->src, data->xSrc, data->ySrc + i,
 			      data->width, src_buffer, 0, 0);
 
 		    if (mask_buffer)
 		    {
-			fbCombineInU (mask_buffer, src_buffer, data->width);
+			pixman_composeFunctions.combineU[PIXMAN_OP_IN] (mask_buffer, src_buffer, data->width);
 			src_mask_buffer = mask_buffer;
 		    }
 		    else
 			src_mask_buffer = src_buffer;
 
 		    fetchSrc = NULL;
 		}
 		else
 		{
 		    fetchSrc (data->src, data->xSrc, data->ySrc + i,
 			      data->width, src_buffer, mask_buffer,
 			      0xff000000);
 
 		    if (mask_buffer)
-			PIXMAN_COMPOSE_FUNCTIONS.combineMaskU (src_buffer,
-							       mask_buffer,
-							       data->width);
+			pixman_composeFunctions.combineMaskU (src_buffer,
+							      mask_buffer,
+							      data->width);
 
 		    src_mask_buffer = src_buffer;
 		}
 	    }
 	    else if (fetchMask)
 	    {
 		fetchMask (data->mask, data->xMask, data->yMask + i,
 			   data->width, mask_buffer, 0, 0);
 
-		fbCombineInU (mask_buffer, src_buffer, data->width);
+		pixman_composeFunctions.combineU[PIXMAN_OP_IN] (mask_buffer, src_buffer, data->width);
 
 		src_mask_buffer = mask_buffer;
 	    }
 
 	    if (store)
 	    {
 		/* fill dest into second half of scanline */
 		if (fetchDest)
@@ -4659,19 +445,16 @@ PIXMAN_COMPOSITE_RECT_GENERAL (const FbC
 	    {
 		/* blend */
 		compose (bits + (data->yDest + i+ yoff) * stride +
 			 data->xDest + xoff,
 			 src_mask_buffer, data->width);
 	    }
 	}
     }
-
-    if (!store)
-	fbFinishAccess (data->dest->pDrawable);
 }
 
 #ifndef PIXMAN_FB_ACCESSORS
 
 void
 pixman_composite_rect_general (const FbComposeData *data,
 			       uint32_t *scanline_buffer)
 {
--- a/gfx/cairo/libpixman/src/pixman-mmx.c
+++ b/gfx/cairo/libpixman/src/pixman-mmx.c
@@ -71,19 +71,18 @@
  *
  * Also try to minimize dependencies. i.e. when you need a value, try
  * to calculate it from a value that was calculated as early as
  * possible.
  */
 
 /* --------------- MMX primitivess ------------------------------------ */
 
+#ifdef __GNUC__
 typedef unsigned long long ullong;
-
-#ifdef __GNUC__
 typedef ullong mmxdatafield;
 #endif
 #ifdef _MSC_VER
 typedef unsigned __int64 ullong;
 typedef __m64 mmxdatafield;
 #endif
 
 typedef struct
@@ -908,18 +907,24 @@ mmxCombineAddC (uint32_t *dest, uint32_t
         *dest = store8888(d);
         ++src;
         ++dest;
         ++mask;
     }
     _mm_empty();
 }
 
-void fbComposeSetupMMX(void)
+void
+fbComposeSetupMMX(void)
 {
+    static pixman_bool_t initialized = FALSE;
+
+    if (initialized)
+	return;
+    
     /* check if we have MMX support and initialize accordingly */
     if (pixman_have_mmx())
     {
         pixman_composeFunctions.combineU[PIXMAN_OP_OVER] = mmxCombineOverU;
         pixman_composeFunctions.combineU[PIXMAN_OP_OVER_REVERSE] = mmxCombineOverReverseU;
         pixman_composeFunctions.combineU[PIXMAN_OP_IN] = mmxCombineInU;
         pixman_composeFunctions.combineU[PIXMAN_OP_IN_REVERSE] = mmxCombineInReverseU;
         pixman_composeFunctions.combineU[PIXMAN_OP_OUT] = mmxCombineOutU;
@@ -939,16 +944,18 @@ void fbComposeSetupMMX(void)
         pixman_composeFunctions.combineC[PIXMAN_OP_OUT_REVERSE] = mmxCombineOutReverseC;
         pixman_composeFunctions.combineC[PIXMAN_OP_ATOP] = mmxCombineAtopC;
         pixman_composeFunctions.combineC[PIXMAN_OP_ATOP_REVERSE] = mmxCombineAtopReverseC;
         pixman_composeFunctions.combineC[PIXMAN_OP_XOR] = mmxCombineXorC;
         pixman_composeFunctions.combineC[PIXMAN_OP_ADD] = mmxCombineAddC;
 
         pixman_composeFunctions.combineMaskU = mmxCombineMaskU;
     }
+
+    initialized = TRUE;
 }
 
 
 /* ------------------ MMX code paths called from fbpict.c ----------------------- */
 
 void
 fbCompositeSolid_nx8888mmx (pixman_op_t op,
 			    pixman_image_t * pSrc,
@@ -1611,17 +1618,17 @@ fbCompositeSolidMask_nx8x8888mmx (pixman
     CHECKPOINT();
 
     fbComposeGetSolid(pSrc, src, pDst->bits.format);
 
     srca = src >> 24;
     if (srca == 0)
 	return;
 
-    srcsrc = (unsigned long long)src << 32 | src;
+    srcsrc = (ullong)src << 32 | src;
 
     fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1);
     fbComposeGetStart (pMask, xMask, yMask, uint8_t, maskStride, maskLine, 1);
 
     vsrc = load8888 (src);
     vsrca = expand_alpha (vsrc);
 
     while (height--)
@@ -1654,17 +1661,17 @@ fbCompositeSolidMask_nx8x8888mmx (pixman
 	while (w >= 2)
 	{
 	    ullong m0, m1;
 	    m0 = *mask;
 	    m1 = *(mask + 1);
 
 	    if (srca == 0xff && (m0 & m1) == 0xff)
 	    {
-		*(unsigned long long *)dst = srcsrc;
+		*(ullong *)dst = srcsrc;
 	    }
 	    else if (m0 | m1)
 	    {
 		__m64 vdest;
 		__m64 dest0, dest1;
 
 		vdest = *(__m64 *)dst;
 
@@ -1975,17 +1982,17 @@ fbCompositeSolidMask_nx8x0565mmx (pixman
 				  uint16_t     height)
 {
     uint32_t	src, srca;
     uint16_t	*dstLine, *dst;
     uint8_t	*maskLine, *mask;
     int	dstStride, maskStride;
     uint16_t	w;
     __m64	vsrc, vsrca, tmp;
-    unsigned long long srcsrcsrcsrc, src16;
+    ullong srcsrcsrcsrc, src16;
 
     CHECKPOINT();
 
     fbComposeGetSolid(pSrc, src, pDst->bits.format);
 
     srca = src >> 24;
     if (srca == 0)
 	return;
@@ -2037,17 +2044,17 @@ fbCompositeSolidMask_nx8x0565mmx (pixman
 	    ullong m0, m1, m2, m3;
 	    m0 = *mask;
 	    m1 = *(mask + 1);
 	    m2 = *(mask + 2);
 	    m3 = *(mask + 3);
 
 	    if (srca == 0xff && (m0 & m1 & m2 & m3) == 0xff)
 	    {
-		*(unsigned long long *)dst = srcsrcsrcsrc;
+		*(ullong *)dst = srcsrcsrcsrc;
 	    }
 	    else if (m0 | m1 | m2 | m3)
 	    {
 		__m64 vdest;
 		__m64 vm0, vm1, vm2, vm3;
 
 		vdest = *(__m64 *)dst;
 
@@ -2951,18 +2958,16 @@ fbCompositeOver_x888x8x8888mmx (pixman_o
 				int16_t      yDst,
 				uint16_t     width,
 				uint16_t     height)
 {
     uint32_t	*src, *srcLine;
     uint32_t    *dst, *dstLine;
     uint8_t	*mask, *maskLine;
     int		 srcStride, maskStride, dstStride;
-    __m64 m;
-    uint32_t s, d;
     uint16_t w;
 
     fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1);
     fbComposeGetStart (pMask, xMask, yMask, uint8_t, maskStride, maskLine, 1);
     fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1);
 
     while (height--)
     {
--- a/gfx/cairo/libpixman/src/pixman-mmx.h
+++ b/gfx/cairo/libpixman/src/pixman-mmx.h
@@ -21,16 +21,19 @@
  * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
  * SOFTWARE.
  *
  * Author:  Søren Sandmann (sandmann@redhat.com)
  *          Lars Knoll (lars@trolltech.com)
  * 
  * Based on work by Owen Taylor
  */
+#ifndef _PIXMAN_MMX_H_
+#define _PIXMAN_MMX_H_
+
 #ifdef HAVE_DIX_CONFIG_H
 #include <dix-config.h>
 #endif
 
 #include "pixman-private.h"
 
 #ifdef USE_MMX
 
@@ -308,8 +311,10 @@ fbCompositeOver_x888x8x8888mmx (pixman_o
 				int16_t      xMask,
 				int16_t      yMask,
 				int16_t      xDst,
 				int16_t      yDst,
 				uint16_t     width,
 				uint16_t     height);
 
 #endif /* USE_MMX */
+
+#endif /* _PIXMAN_MMX_H_ */
--- a/gfx/cairo/libpixman/src/pixman-pict.c
+++ b/gfx/cairo/libpixman/src/pixman-pict.c
@@ -28,16 +28,17 @@
 #endif
 
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 
 #include "pixman-private.h"
 #include "pixman-mmx.h"
+#include "pixman-sse.h"
 
 #define FbFullMask(n)   ((n) == 32 ? (uint32_t)-1 : ((((uint32_t) 1) << n) - 1))
 
 #undef READ
 #undef WRITE
 #define READ(img,x) (*(x))
 #define WRITE(img,ptr,v) ((*(ptr)) = (v))
 
@@ -143,19 +144,16 @@ fbCompositeOver_x888x8x8888 (pixman_op_t
 		    d = fbIn (s, m);
 		    WRITE(pDst, dst, fbOver (d, READ(pDst, dst)));
 		}
 	    }
 	    src++;
 	    dst++;
 	}
     }
-
-    fbFinishAccess (pMask->pDrawable);
-    fbFinishAccess (pDst->pDrawable);
 }
 
 static void
 fbCompositeSolidMaskIn_nx8x8 (pixman_op_t      op,
 			      pixman_image_t    *iSrc,
 			      pixman_image_t    *iMask,
 			      pixman_image_t    *iDst,
 			      int16_t      xSrc,
@@ -334,19 +332,16 @@ fbCompositeSolidMask_nx8x8888 (pixman_op
 	    else if (m)
 	    {
 		d = fbIn (src, m);
 		WRITE(pDst, dst, fbOver (d, READ(pDst, dst)) & dstMask);
 	    }
 	    dst++;
 	}
     }
-
-    fbFinishAccess (pMask->pDrawable);
-    fbFinishAccess (pDst->pDrawable);
 }
 
 void
 fbCompositeSolidMask_nx8888x8888C (pixman_op_t op,
 				   pixman_image_t * pSrc,
 				   pixman_image_t * pMask,
 				   pixman_image_t * pDst,
 				   int16_t      xSrc,
@@ -410,19 +405,16 @@ fbCompositeSolidMask_nx8888x8888C (pixma
 		FbInOverC (src, srca, ma, d, 8, n);
 		FbInOverC (src, srca, ma, d, 16, o);
 		FbInOverC (src, srca, ma, d, 24, p);
 		WRITE(pDst, dst, m|n|o|p);
 	    }
 	    dst++;
 	}
     }
-
-    fbFinishAccess (pMask->pDrawable);
-    fbFinishAccess (pDst->pDrawable);
 }
 
 void
 fbCompositeSolidMask_nx8x0888 (pixman_op_t op,
 			       pixman_image_t * pSrc,
 			       pixman_image_t * pMask,
 			       pixman_image_t * pDst,
 			       int16_t      xSrc,
@@ -475,19 +467,16 @@ fbCompositeSolidMask_nx8x0888 (pixman_op
 	    else if (m)
 	    {
 		d = fbOver24 (fbIn(src,m), Fetch24(pDst, dst));
 		Store24(pDst, dst, d);
 	    }
 	    dst += 3;
 	}
     }
-
-    fbFinishAccess (pMask->pDrawable);
-    fbFinishAccess (pDst->pDrawable);
 }
 
 void
 fbCompositeSolidMask_nx8x0565 (pixman_op_t op,
 				  pixman_image_t * pSrc,
 				  pixman_image_t * pMask,
 				  pixman_image_t * pDst,
 				  int16_t      xSrc,
@@ -541,19 +530,16 @@ fbCompositeSolidMask_nx8x0565 (pixman_op
 	    {
 		d = READ(pDst, dst);
 		d = fbOver24 (fbIn(src,m), cvt0565to0888(d));
 		WRITE(pDst, dst, cvt8888to0565(d));
 	    }
 	    dst++;
 	}
     }
-
-    fbFinishAccess (pMask->pDrawable);
-    fbFinishAccess (pDst->pDrawable);
 }
 
 void
 fbCompositeSolidMask_nx8888x0565C (pixman_op_t op,
 				   pixman_image_t * pSrc,
 				   pixman_image_t * pMask,
 				   pixman_image_t * pDst,
 				   int16_t      xSrc,
@@ -617,19 +603,16 @@ fbCompositeSolidMask_nx8888x0565C (pixma
 		FbInOverC (src, srca, ma, d, 8, n);
 		FbInOverC (src, srca, ma, d, 16, o);
 		d = m|n|o;
 		WRITE(pDst, dst, cvt8888to0565(d));
 	    }
 	    dst++;
 	}
     }
-
-    fbFinishAccess (pMask->pDrawable);
-    fbFinishAccess (pDst->pDrawable);
 }
 
 void
 fbCompositeSrc_8888x8888 (pixman_op_t op,
 			 pixman_image_t * pSrc,
 			 pixman_image_t * pMask,
 			 pixman_image_t * pDst,
 			 int16_t      xSrc,
@@ -666,19 +649,16 @@ fbCompositeSrc_8888x8888 (pixman_op_t op
 	    a = s >> 24;
 	    if (a == 0xff)
 		WRITE(pDst, dst, s & dstMask);
 	    else if (a)
 		WRITE(pDst, dst, fbOver (s, READ(pDst, dst)) & dstMask);
 	    dst++;
 	}
     }
-
-    fbFinishAccess (pSrc->pDrawable);
-    fbFinishAccess (pDst->pDrawable);
 }
 
 void
 fbCompositeSrc_8888x0888 (pixman_op_t op,
 			 pixman_image_t * pSrc,
 			 pixman_image_t * pMask,
 			 pixman_image_t * pDst,
 			 int16_t      xSrc,
@@ -718,19 +698,16 @@ fbCompositeSrc_8888x0888 (pixman_op_t op
 		    d = s;
 		else
 		    d = fbOver24 (s, Fetch24(pDst, dst));
 		Store24(pDst, dst, d);
 	    }
 	    dst += 3;
 	}
     }
-
-    fbFinishAccess (pSrc->pDrawable);
-    fbFinishAccess (pDst->pDrawable);
 }
 
 void
 fbCompositeSrc_8888x0565 (pixman_op_t op,
 			 pixman_image_t * pSrc,
 			 pixman_image_t * pMask,
 			 pixman_image_t * pDst,
 			 int16_t      xSrc,
@@ -773,19 +750,16 @@ fbCompositeSrc_8888x0565 (pixman_op_t op
 		    d = READ(pDst, dst);
 		    d = fbOver24 (s, cvt0565to0888(d));
 		}
 		WRITE(pDst, dst, cvt8888to0565(d));
 	    }
 	    dst++;
 	}
     }
-
-    fbFinishAccess (pDst->pDrawable);
-    fbFinishAccess (pSrc->pDrawable);
 }
 
 void
 fbCompositeSrcAdd_8000x8000 (pixman_op_t	op,
 			     pixman_image_t * pSrc,
 			     pixman_image_t * pMask,
 			     pixman_image_t * pDst,
 			     int16_t      xSrc,
@@ -826,19 +800,16 @@ fbCompositeSrcAdd_8000x8000 (pixman_op_t
 		    t = d + s;
 		    s = t | (0 - (t >> 8));
 		}
 		WRITE(pDst, dst, s);
 	    }
 	    dst++;
 	}
     }
-
-    fbFinishAccess (pDst->pDrawable);
-    fbFinishAccess (pSrc->pDrawable);
 }
 
 void
 fbCompositeSrcAdd_8888x8888 (pixman_op_t	op,
 			     pixman_image_t * pSrc,
 			     pixman_image_t * pMask,
 			     pixman_image_t * pDst,
 			     int16_t      xSrc,
@@ -886,19 +857,16 @@ fbCompositeSrcAdd_8888x8888 (pixman_op_t
 			s = m|n|o|p;
 		    }
 		}
 		WRITE(pDst, dst, s);
 	    }
 	    dst++;
 	}
     }
-
-    fbFinishAccess (pDst->pDrawable);
-    fbFinishAccess (pSrc->pDrawable);
 }
 
 static void
 fbCompositeSrcAdd_8888x8x8 (pixman_op_t op,
 			    pixman_image_t * pSrc,
 			    pixman_image_t * pMask,
 			    pixman_image_t * pDst,
 			    int16_t      xSrc,
@@ -941,19 +909,16 @@ fbCompositeSrcAdd_8888x8x8 (pixman_op_t 
 	    d = READ(pDst, dst);
 
 	    m = FbInU (sa, 0, a, tmp);
 	    r = FbAdd (m, d, 0, tmp);
 
 	    WRITE(pDst, dst++, r);
 	}
     }
-
-    fbFinishAccess(pDst->pDrawable);
-    fbFinishAccess(pMask->pDrawable);
 }
 
 void
 fbCompositeSrcAdd_1000x1000 (pixman_op_t	op,
 			     pixman_image_t * pSrc,
 			     pixman_image_t * pMask,
 			     pixman_image_t * pDst,
 			     int16_t      xSrc,
@@ -991,18 +956,16 @@ fbCompositeSrcAdd_1000x1000 (pixman_op_t
 
 	   GXor,
 	   FB_ALLONES,
 	   srcBpp,
 
 	   FALSE,
 	   FALSE);
 
-    fbFinishAccess(pDst->pDrawable);
-    fbFinishAccess(pSrc->pDrawable);
 #endif
 }
 
 void
 fbCompositeSolidMask_nx1xn (pixman_op_t op,
 			    pixman_image_t * pSrc,
 			    pixman_image_t * pMask,
 			    pixman_image_t * pDst,
@@ -1053,18 +1016,16 @@ fbCompositeSolidMask_nx1xn (pixman_op_t 
 	      width * dstBpp,
 	      height,
 
 	      0x0,
 	      src,
 	      FB_ALLONES,
 	      0x0);
 
-    fbFinishAccess (pDst->pDrawable);
-    fbFinishAccess (pMask->pDrawable);
 #endif
 }
 
 /*
  * Apply a constant alpha value in an over computation
  */
 static void
 fbCompositeSrcSrc_nxn  (pixman_op_t	   op,
@@ -1125,19 +1086,16 @@ fbCompositeSrcSrc_nxn  (pixman_op_t	   o
 	   (height),
 
 	   GXcopy,
 	   FB_ALLONES,
 	   dstBpp,
 
 	   reverse,
 	   upsidedown);
-
-    fbFinishAccess(pSrc->pDrawable);
-    fbFinishAccess(pDst->pDrawable);
 #endif
 }
 
 static void
 pixman_image_composite_rect  (pixman_op_t                   op,
 			      pixman_image_t               *src,
 			      pixman_image_t               *mask,
 			      pixman_image_t               *dest,
@@ -1204,19 +1162,16 @@ fbCompositeSrc_8888xx888 (pixman_op_t op
 
     while (height--)
     {
 	memcpy (dst, src, n_bytes);
 
 	dst += dstStride;
 	src += srcStride;
     }
-
-    fbFinishAccess(pSrc->pDrawable);
-    fbFinishAccess(pDst->pDrawable);
 }
 
 static void
 pixman_walk_composite_region (pixman_op_t op,
 			      pixman_image_t * pSrc,
 			      pixman_image_t * pMask,
 			      pixman_image_t * pDst,
 			      int16_t xSrc,
@@ -1468,26 +1423,35 @@ static const FastPathInfo mmx_fast_paths
     { PIXMAN_OP_ADD, PIXMAN_solid,     PIXMAN_a8,       PIXMAN_a8,       fbCompositeSrcAdd_8888x8x8mmx,    0 },
     { PIXMAN_OP_SRC, PIXMAN_solid,     PIXMAN_a8,       PIXMAN_a8r8g8b8, fbCompositeSolidMaskSrc_nx8x8888mmx, 0 },
     { PIXMAN_OP_SRC, PIXMAN_solid,     PIXMAN_a8,       PIXMAN_x8r8g8b8, fbCompositeSolidMaskSrc_nx8x8888mmx, 0 },
     { PIXMAN_OP_SRC, PIXMAN_solid,     PIXMAN_a8,       PIXMAN_a8b8g8r8, fbCompositeSolidMaskSrc_nx8x8888mmx, 0 },
     { PIXMAN_OP_SRC, PIXMAN_solid,     PIXMAN_a8,       PIXMAN_x8b8g8r8, fbCompositeSolidMaskSrc_nx8x8888mmx, 0 },
 
     { PIXMAN_OP_SRC, PIXMAN_a8r8g8b8,  PIXMAN_null,	PIXMAN_a8r8g8b8, fbCompositeCopyAreammx, 0 },
     { PIXMAN_OP_SRC, PIXMAN_a8b8g8r8,  PIXMAN_null,	PIXMAN_a8b8g8r8, fbCompositeCopyAreammx, 0 },
+    { PIXMAN_OP_SRC, PIXMAN_a8r8g8b8,  PIXMAN_null,	PIXMAN_x8r8g8b8, fbCompositeCopyAreammx, 0 },
+    { PIXMAN_OP_SRC, PIXMAN_a8b8g8r8,  PIXMAN_null,	PIXMAN_x8b8g8r8, fbCompositeCopyAreammx, 0 },
     { PIXMAN_OP_SRC, PIXMAN_x8r8g8b8,  PIXMAN_null,	PIXMAN_x8r8g8b8, fbCompositeCopyAreammx, 0 },
     { PIXMAN_OP_SRC, PIXMAN_x8b8g8r8,  PIXMAN_null,	PIXMAN_x8b8g8r8, fbCompositeCopyAreammx, 0 },
     { PIXMAN_OP_SRC, PIXMAN_r5g6b5,    PIXMAN_null,     PIXMAN_r5g6b5,   fbCompositeCopyAreammx, 0 },
     { PIXMAN_OP_SRC, PIXMAN_b5g6r5,    PIXMAN_null,     PIXMAN_b5g6r5,   fbCompositeCopyAreammx, 0 },
     { PIXMAN_OP_IN,  PIXMAN_a8,        PIXMAN_null,     PIXMAN_a8,       fbCompositeIn_8x8mmx,   0 },
     { PIXMAN_OP_IN,  PIXMAN_solid,     PIXMAN_a8,	PIXMAN_a8,	 fbCompositeIn_nx8x8mmx, 0 },
     { PIXMAN_OP_NONE },
 };
 #endif
 
+#ifdef USE_SSE2
+static const FastPathInfo sse_fast_paths[] =
+{
+    { PIXMAN_OP_NONE },
+};
+#endif
+
 static const FastPathInfo c_fast_paths[] =
 {
     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_r5g6b5,   fbCompositeSolidMask_nx8x0565, 0 },
     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_b5g6r5,   fbCompositeSolidMask_nx8x0565, 0 },
     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_r8g8b8,   fbCompositeSolidMask_nx8x0888, 0 },
     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_b8g8r8,   fbCompositeSolidMask_nx8x0888, 0 },
     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_a8r8g8b8, fbCompositeSolidMask_nx8x8888, 0 },
     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_x8r8g8b8, fbCompositeSolidMask_nx8x8888, 0 },
@@ -1641,53 +1605,56 @@ pixman_image_composite (pixman_op_t     
 			int16_t      ySrc,
 			int16_t      xMask,
 			int16_t      yMask,
 			int16_t      xDst,
 			int16_t      yDst,
 			uint16_t     width,
 			uint16_t     height)
 {
-    pixman_bool_t	    srcRepeat = pSrc->type == BITS && pSrc->common.repeat == PIXMAN_REPEAT_NORMAL;
-    pixman_bool_t	    maskRepeat = FALSE;
-    pixman_bool_t	    srcTransform = pSrc->common.transform != NULL;
-    pixman_bool_t	    maskTransform = FALSE;
-    pixman_bool_t	    srcAlphaMap = pSrc->common.alpha_map != NULL;
-    pixman_bool_t	maskAlphaMap = FALSE;
-    pixman_bool_t	dstAlphaMap = pDst->common.alpha_map != NULL;
-    CompositeFunc   func = NULL;
+    pixman_bool_t srcRepeat = pSrc->type == BITS && pSrc->common.repeat == PIXMAN_REPEAT_NORMAL;
+    pixman_bool_t maskRepeat = FALSE;
+    pixman_bool_t srcTransform = pSrc->common.transform != NULL;
+    pixman_bool_t maskTransform = FALSE;
+    pixman_bool_t srcAlphaMap = pSrc->common.alpha_map != NULL;
+    pixman_bool_t maskAlphaMap = FALSE;
+    pixman_bool_t dstAlphaMap = pDst->common.alpha_map != NULL;
+    CompositeFunc func = NULL;
 
+#ifdef USE_SSE2
+    fbComposeSetupSSE();
+#endif
+    
 #ifdef USE_MMX
-    static pixman_bool_t mmx_setup = FALSE;
-    if (!mmx_setup)
-    {
-        fbComposeSetupMMX();
-        mmx_setup = TRUE;
-    }
+    fbComposeSetupMMX();
 #endif
 
     if (srcRepeat && srcTransform &&
 	pSrc->bits.width == 1 &&
 	pSrc->bits.height == 1)
+    {
 	srcTransform = FALSE;
+    }
 
     if (pMask && pMask->type == BITS)
     {
 	maskRepeat = pMask->common.repeat == PIXMAN_REPEAT_NORMAL;
 
 	maskTransform = pMask->common.transform != 0;
 	if (pMask->common.filter == PIXMAN_FILTER_CONVOLUTION)
 	    maskTransform = TRUE;
 
 	maskAlphaMap = pMask->common.alpha_map != 0;
 
 	if (maskRepeat && maskTransform &&
 	    pMask->bits.width == 1 &&
 	    pMask->bits.height == 1)
+	{
 	    maskTransform = FALSE;
+	}
     }
 
     if ((pSrc->type == BITS || can_get_solid (pSrc)) && (!pMask || pMask->type == BITS)
         && !srcTransform && !maskTransform
         && !maskAlphaMap && !srcAlphaMap && !dstAlphaMap
         && (pSrc->common.filter != PIXMAN_FILTER_CONVOLUTION)
         && (pSrc->common.repeat != PIXMAN_REPEAT_PAD)
         && (!pMask || (pMask->common.filter != PIXMAN_FILTER_CONVOLUTION && pMask->common.repeat != PIXMAN_REPEAT_PAD))
@@ -1701,19 +1668,26 @@ pixman_image_composite (pixman_op_t     
 	pixbuf =
 	    pSrc && pSrc->type == BITS		&&
 	    pMask && pMask->type == BITS	&&
 	    pSrc->bits.bits == pMask->bits.bits &&
 	    xSrc == xMask			&&
 	    ySrc == yMask			&&
 	    !pMask->common.component_alpha	&&
 	    !maskRepeat;
+	info = NULL;
+	
+#ifdef USE_SSE2
+	if (pixman_have_sse ())
+	    info = get_fast_path (sse_fast_paths, op, pSrc, pMask, pDst, pixbuf);
+	if (!info)
+#endif
 
 #ifdef USE_MMX
-	info = NULL;
+
 	if (pixman_have_mmx())
 	    info = get_fast_path (mmx_fast_paths, op, pSrc, pMask, pDst, pixbuf);
 	if (!info)
 #endif
 	    info = get_fast_path (c_fast_paths, op, pSrc, pMask, pDst, pixbuf);
 
 	if (info)
 	{
@@ -1971,10 +1945,29 @@ pixman_have_mmx (void)
     {
         unsigned int features = detectCPUFeatures();
 	mmx_present = (features & (MMX|MMX_Extensions)) == (MMX|MMX_Extensions);
         initialized = TRUE;
     }
 
     return mmx_present;
 }
+
+#ifdef USE_SSE2
+pixman_bool_t
+pixman_have_sse (void)
+{
+    static pixman_bool_t initialized = FALSE;
+    static pixman_bool_t sse_present;
+
+    if (!initialized)
+    {
+        unsigned int features = detectCPUFeatures();
+        sse_present = (features & (MMX|MMX_Extensions|SSE|SSE2)) == (MMX|MMX_Extensions|SSE|SSE2);
+        initialized = TRUE;
+    }
+
+    return sse_present;
+}
+#endif
+
 #endif /* __amd64__ */
 #endif
--- a/gfx/cairo/libpixman/src/pixman-private.h
+++ b/gfx/cairo/libpixman/src/pixman-private.h
@@ -143,16 +143,22 @@ typedef struct point point_t;
 
 /* FIXME - the types and structures below should be give proper names
  */
 
 #define FASTCALL
 typedef FASTCALL void (*CombineMaskU) (uint32_t *src, const uint32_t *mask, int width);
 typedef FASTCALL void (*CombineFuncU) (uint32_t *dest, const uint32_t *src, int width);
 typedef FASTCALL void (*CombineFuncC) (uint32_t *dest, uint32_t *src, uint32_t *mask, int width);
+typedef FASTCALL void (*fetchProc)(bits_image_t *pict, int x, int y, int width,
+                                   uint32_t *buffer);
+typedef FASTCALL uint32_t (*fetchPixelProc)(bits_image_t *pict, int offset, int line);
+typedef FASTCALL void (*storeProc)(pixman_image_t *, uint32_t *bits,
+                                   const uint32_t *values, int x, int width,
+                                   const pixman_indexed_t *);
 
 typedef struct _FbComposeData {
     uint8_t	 op;
     pixman_image_t	*src;
     pixman_image_t	*mask;
     pixman_image_t	*dest;
     int16_t	 xSrc;
     int16_t	 ySrc;
@@ -172,16 +178,42 @@ typedef struct _FbComposeFunctions {
 
 extern FbComposeFunctions pixman_composeFunctions;
 
 void pixman_composite_rect_general_accessors (const FbComposeData *data,
 					      uint32_t *scanline_buffer);
 void pixman_composite_rect_general (const FbComposeData *data,
 				    uint32_t *scanline_buffer);
 
+fetchProc pixman_fetchProcForPicture (bits_image_t *);
+fetchPixelProc pixman_fetchPixelProcForPicture (bits_image_t *);
+storeProc pixman_storeProcForPicture (bits_image_t *);
+fetchProc pixman_fetchProcForPicture_accessors (bits_image_t *);
+fetchPixelProc pixman_fetchPixelProcForPicture_accessors (bits_image_t *);
+storeProc pixman_storeProcForPicture_accessors (bits_image_t *);
+
+void pixmanFetchSourcePict(source_image_t *, int x, int y, int width,
+                           uint32_t *buffer, uint32_t *mask, uint32_t maskBits);
+
+void fbFetchTransformed(bits_image_t *, int x, int y, int width,
+                        uint32_t *buffer, uint32_t *mask, uint32_t maskBits);
+void fbStoreExternalAlpha(bits_image_t *, int x, int y, int width,
+                          uint32_t *buffer);
+void fbFetchExternalAlpha(bits_image_t *, int x, int y, int width,
+                          uint32_t *buffer, uint32_t *mask, uint32_t maskBits);
+
+void fbFetchTransformed_accessors(bits_image_t *, int x, int y, int width,
+                                  uint32_t *buffer, uint32_t *mask,
+                                  uint32_t maskBits);
+void fbStoreExternalAlpha_accessors(bits_image_t *, int x, int y, int width,
+                                    uint32_t *buffer);
+void fbFetchExternalAlpha_accessors(bits_image_t *, int x, int y, int width,
+                                    uint32_t *buffer, uint32_t *mask,
+                                    uint32_t maskBits);
+
 /* end */
 
 typedef enum
 {
     BITS,
     LINEAR,
     CONICAL,
     RADIAL,
@@ -296,16 +328,17 @@ union pixman_image
     bits_image_t		bits;
     gradient_t			gradient;
     linear_gradient_t		linear;
     conical_gradient_t		conical;
     radial_gradient_t		radial;
     solid_fill_t		solid;
 };
 
+
 #define LOG2_BITMAP_PAD 5
 #define FB_STIP_SHIFT	LOG2_BITMAP_PAD
 #define FB_STIP_UNIT	(1 << FB_STIP_SHIFT)
 #define FB_STIP_MASK	(FB_STIP_UNIT - 1)
 #define FB_STIP_ALLONES	((uint32_t) -1)
 
 #if BITMAP_BIT_ORDER == LSBFirst
 #define FbScrLeft(x,n)	((x) >> (n))
@@ -668,30 +701,25 @@ union pixman_image
     do {								\
 	size_t _i;							\
 	uint8_t *_dst = (uint8_t*)(dst);				\
 	for(_i = 0; _i < (size_t) size; _i++) {				\
 	    WRITE((img), _dst +_i, (val));				\
 	}								\
     } while (0)
 
-/* FIXME */
-#define fbPrepareAccess(x)
-#define fbFinishAccess(x)
-
 #else
 
 #define READ(img, ptr)		(*(ptr))
 #define WRITE(img, ptr, val)	(*(ptr) = (val))
 #define MEMCPY_WRAPPED(img, dst, src, size)					\
     memcpy(dst, src, size)
 #define MEMSET_WRAPPED(img, dst, val, size)					\
     memset(dst, val, size)
-#define fbPrepareAccess(x)
-#define fbFinishAccess(x)
+
 #endif
 
 #define fbComposeGetSolid(img, res, fmt)				\
     do									\
     {									\
 	pixman_format_code_t format__;					\
 	if (img->type == SOLID)						\
 	{								\
new file mode 100644
--- /dev/null
+++ b/gfx/cairo/libpixman/src/pixman-source.c
@@ -0,0 +1,681 @@
+/*
+ *
+ * Copyright © 2000 Keith Packard, member of The XFree86 Project, Inc.
+ *             2005 Lars Knoll & Zack Rusin, Trolltech
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of Keith Packard not be used in
+ * advertising or publicity pertaining to distribution of the software without
+ * specific, written prior permission.  Keith Packard makes no
+ * representations about the suitability of this software for any purpose.  It
+ * is provided "as is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
+ * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
+ * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "pixman-private.h"
+
+typedef struct
+{
+    uint32_t        left_ag;
+    uint32_t        left_rb;
+    uint32_t        right_ag;
+    uint32_t        right_rb;
+    int32_t       left_x;
+    int32_t       right_x;
+    int32_t       stepper;
+
+    pixman_gradient_stop_t	*stops;
+    int                      num_stops;
+    unsigned int             spread;
+
+    int		  need_reset;
+} GradientWalker;
+
+static void
+_gradient_walker_init (GradientWalker  *walker,
+		       gradient_t      *gradient,
+		       unsigned int     spread)
+{
+    walker->num_stops = gradient->n_stops;
+    walker->stops     = gradient->stops;
+    walker->left_x    = 0;
+    walker->right_x   = 0x10000;
+    walker->stepper   = 0;
+    walker->left_ag   = 0;
+    walker->left_rb   = 0;
+    walker->right_ag  = 0;
+    walker->right_rb  = 0;
+    walker->spread    = spread;
+
+    walker->need_reset = TRUE;
+}
+
+static void
+_gradient_walker_reset (GradientWalker  *walker,
+                        pixman_fixed_32_32_t     pos)
+{
+    int32_t                  x, left_x, right_x;
+    pixman_color_t          *left_c, *right_c;
+    int                      n, count = walker->num_stops;
+    pixman_gradient_stop_t *      stops = walker->stops;
+
+    static const pixman_color_t   transparent_black = { 0, 0, 0, 0 };
+
+    switch (walker->spread)
+    {
+    case PIXMAN_REPEAT_NORMAL:
+	x = (int32_t)pos & 0xFFFF;
+	for (n = 0; n < count; n++)
+	    if (x < stops[n].x)
+		break;
+	if (n == 0) {
+	    left_x =  stops[count-1].x - 0x10000;
+	    left_c = &stops[count-1].color;
+	} else {
+	    left_x =  stops[n-1].x;
+	    left_c = &stops[n-1].color;
+	}
+
+	if (n == count) {
+	    right_x =  stops[0].x + 0x10000;
+	    right_c = &stops[0].color;
+	} else {
+	    right_x =  stops[n].x;
+	    right_c = &stops[n].color;
+	}
+	left_x  += (pos - x);
+	right_x += (pos - x);
+	break;
+
+    case PIXMAN_REPEAT_PAD:
+	for (n = 0; n < count; n++)
+	    if (pos < stops[n].x)
+		break;
+
+	if (n == 0) {
+	    left_x =  INT32_MIN;
+	    left_c = &stops[0].color;
+	} else {
+	    left_x =  stops[n-1].x;
+	    left_c = &stops[n-1].color;
+	}
+
+	if (n == count) {
+	    right_x =  INT32_MAX;
+	    right_c = &stops[n-1].color;
+	} else {
+	    right_x =  stops[n].x;
+	    right_c = &stops[n].color;
+	}
+	break;
+
+    case PIXMAN_REPEAT_REFLECT:
+	x = (int32_t)pos & 0xFFFF;
+	if ((int32_t)pos & 0x10000)
+	    x = 0x10000 - x;
+	for (n = 0; n < count; n++)
+	    if (x < stops[n].x)
+		break;
+
+	if (n == 0) {
+	    left_x =  -stops[0].x;
+	    left_c = &stops[0].color;
+	} else {
+	    left_x =  stops[n-1].x;
+	    left_c = &stops[n-1].color;
+	}
+
+	if (n == count) {
+	    right_x = 0x20000 - stops[n-1].x;
+	    right_c = &stops[n-1].color;
+	} else {
+	    right_x =  stops[n].x;
+	    right_c = &stops[n].color;
+	}
+
+	if ((int32_t)pos & 0x10000) {
+	    pixman_color_t  *tmp_c;
+	    int32_t          tmp_x;
+
+	    tmp_x   = 0x10000 - right_x;
+	    right_x = 0x10000 - left_x;
+	    left_x  = tmp_x;
+
+	    tmp_c   = right_c;
+	    right_c = left_c;
+	    left_c  = tmp_c;
+
+	    x = 0x10000 - x;
+	}
+	left_x  += (pos - x);
+	right_x += (pos - x);
+	break;
+
+    default:  /* RepeatNone */
+	for (n = 0; n < count; n++)
+	    if (pos < stops[n].x)
+		break;
+
+	if (n == 0)
+	{
+	    left_x  =  INT32_MIN;
+	    right_x =  stops[0].x;
+	    left_c  = right_c = (pixman_color_t*) &transparent_black;
+	}
+	else if (n == count)
+	{
+	    left_x  = stops[n-1].x;
+	    right_x = INT32_MAX;
+	    left_c  = right_c = (pixman_color_t*) &transparent_black;
+	}
+	else
+	{
+	    left_x  =  stops[n-1].x;
+	    right_x =  stops[n].x;
+	    left_c  = &stops[n-1].color;
+	    right_c = &stops[n].color;
+	}
+    }
+
+    walker->left_x   = left_x;
+    walker->right_x  = right_x;
+    walker->left_ag  = ((left_c->alpha >> 8) << 16)   | (left_c->green >> 8);
+    walker->left_rb  = ((left_c->red & 0xff00) << 8)  | (left_c->blue >> 8);
+    walker->right_ag = ((right_c->alpha >> 8) << 16)  | (right_c->green >> 8);
+    walker->right_rb = ((right_c->red & 0xff00) << 8) | (right_c->blue >> 8);
+
+    if ( walker->left_x == walker->right_x                ||
+	 ( walker->left_ag == walker->right_ag &&
+	   walker->left_rb == walker->right_rb )   )
+    {
+	walker->stepper = 0;
+    }
+    else
+    {
+	int32_t width = right_x - left_x;
+	walker->stepper = ((1 << 24) + width/2)/width;
+    }
+
+    walker->need_reset = FALSE;
+}
+
+#define  GRADIENT_WALKER_NEED_RESET(w,x)				\
+    ( (w)->need_reset || (x) < (w)->left_x || (x) >= (w)->right_x)
+
+
+/* the following assumes that GRADIENT_WALKER_NEED_RESET(w,x) is FALSE */
+static uint32_t
+_gradient_walker_pixel (GradientWalker  *walker,
+                        pixman_fixed_32_32_t     x)
+{
+    int  dist, idist;
+    uint32_t  t1, t2, a, color;
+
+    if (GRADIENT_WALKER_NEED_RESET (walker, x))
+        _gradient_walker_reset (walker, x);
+
+    dist  = ((int)(x - walker->left_x)*walker->stepper) >> 16;
+    idist = 256 - dist;
+
+    /* combined INTERPOLATE and premultiply */
+    t1 = walker->left_rb*idist + walker->right_rb*dist;
+    t1 = (t1 >> 8) & 0xff00ff;
+
+    t2  = walker->left_ag*idist + walker->right_ag*dist;
+    t2 &= 0xff00ff00;
+
+    color = t2 & 0xff000000;
+    a     = t2 >> 24;
+
+    t1  = t1*a + 0x800080;
+    t1  = (t1 + ((t1 >> 8) & 0xff00ff)) >> 8;
+
+    t2  = (t2 >> 8)*a + 0x800080;
+    t2  = (t2 + ((t2 >> 8) & 0xff00ff));
+
+    return (color | (t1 & 0xff00ff) | (t2 & 0xff00));
+}
+
+void pixmanFetchSourcePict(source_image_t * pict, int x, int y, int width,
+                           uint32_t *buffer, uint32_t *mask, uint32_t maskBits)
+{
+#if 0
+    SourcePictPtr   pGradient = pict->pSourcePict;
+#endif
+    GradientWalker  walker;
+    uint32_t       *end = buffer + width;
+    gradient_t	    *gradient;
+
+    if (pict->common.type == SOLID)
+    {
+	register uint32_t color = ((solid_fill_t *)pict)->color;
+
+	while (buffer < end)
+	    *(buffer++) = color;
+
+	return;
+    }
+
+    gradient = (gradient_t *)pict;
+
+    _gradient_walker_init (&walker, gradient, pict->common.repeat);
+
+    if (pict->common.type == LINEAR) {
+	pixman_vector_t v, unit;
+	pixman_fixed_32_32_t l;
+	pixman_fixed_48_16_t dx, dy, a, b, off;
+	linear_gradient_t *linear = (linear_gradient_t *)pict;
+
+        /* reference point is the center of the pixel */
+        v.vector[0] = pixman_int_to_fixed(x) + pixman_fixed_1/2;
+        v.vector[1] = pixman_int_to_fixed(y) + pixman_fixed_1/2;
+        v.vector[2] = pixman_fixed_1;
+        if (pict->common.transform) {
+            if (!pixman_transform_point_3d (pict->common.transform, &v))
+                return;
+            unit.vector[0] = pict->common.transform->matrix[0][0];
+            unit.vector[1] = pict->common.transform->matrix[1][0];
+            unit.vector[2] = pict->common.transform->matrix[2][0];
+        } else {
+            unit.vector[0] = pixman_fixed_1;
+            unit.vector[1] = 0;
+            unit.vector[2] = 0;
+        }
+
+        dx = linear->p2.x - linear->p1.x;
+        dy = linear->p2.y - linear->p1.y;
+        l = dx*dx + dy*dy;
+        if (l != 0) {
+            a = (dx << 32) / l;
+            b = (dy << 32) / l;
+            off = (-a*linear->p1.x - b*linear->p1.y)>>16;
+        }
+        if (l == 0  || (unit.vector[2] == 0 && v.vector[2] == pixman_fixed_1)) {
+            pixman_fixed_48_16_t inc, t;
+            /* affine transformation only */
+            if (l == 0) {
+                t = 0;
+                inc = 0;
+            } else {
+                t = ((a*v.vector[0] + b*v.vector[1]) >> 16) + off;
+                inc = (a * unit.vector[0] + b * unit.vector[1]) >> 16;
+            }
+
+	    if (pict->class == SOURCE_IMAGE_CLASS_VERTICAL)
+	    {
+		register uint32_t color;
+
+		color = _gradient_walker_pixel( &walker, t );
+		while (buffer < end)
+		    *(buffer++) = color;
+	    }
+	    else
+	    {
+                if (!mask) {
+                    while (buffer < end)
+                    {
+			*(buffer) = _gradient_walker_pixel (&walker, t);
+                        buffer += 1;
+                        t      += inc;
+                    }
+                } else {
+                    while (buffer < end) {
+                        if (*mask++ & maskBits)
+                        {
+			    *(buffer) = _gradient_walker_pixel (&walker, t);
+                        }
+                        buffer += 1;
+                        t      += inc;
+                    }
+                }
+	    }
+	}
+	else /* projective transformation */
+	{
+	    pixman_fixed_48_16_t t;
+
+	    if (pict->class == SOURCE_IMAGE_CLASS_VERTICAL)
+	    {
+		register uint32_t color;
+
+		if (v.vector[2] == 0)
+		{
+		    t = 0;
+		}
+		else
+		{
+		    pixman_fixed_48_16_t x, y;
+
+		    x = ((pixman_fixed_48_16_t) v.vector[0] << 16) / v.vector[2];
+		    y = ((pixman_fixed_48_16_t) v.vector[1] << 16) / v.vector[2];
+		    t = ((a * x + b * y) >> 16) + off;
+		}
+
+ 		color = _gradient_walker_pixel( &walker, t );
+		while (buffer < end)
+		    *(buffer++) = color;
+	    }
+	    else
+	    {
+		while (buffer < end)
+		{
+		    if (!mask || *mask++ & maskBits)
+		    {
+			if (v.vector[2] == 0) {
+			    t = 0;
+			} else {
+			    pixman_fixed_48_16_t x, y;
+			    x = ((pixman_fixed_48_16_t)v.vector[0] << 16) / v.vector[2];
+			    y = ((pixman_fixed_48_16_t)v.vector[1] << 16) / v.vector[2];
+			    t = ((a*x + b*y) >> 16) + off;
+			}
+			*(buffer) = _gradient_walker_pixel (&walker, t);
+		    }
+		    ++buffer;
+		    v.vector[0] += unit.vector[0];
+		    v.vector[1] += unit.vector[1];
+		    v.vector[2] += unit.vector[2];
+		}
+            }
+        }
+    } else {
+
+/*
+ * In the radial gradient problem we are given two circles (c₁,r₁) and
+ * (c₂,r₂) that define the gradient itself. Then, for any point p, we
+ * must compute the value(s) of t within [0.0, 1.0] representing the
+ * circle(s) that would color the point.
+ *
+ * There are potentially two values of t since the point p can be
+ * colored by both sides of the circle, (which happens whenever one
+ * circle is not entirely contained within the other).
+ *
+ * If we solve for a value of t that is outside of [0.0, 1.0] then we
+ * use the extend mode (NONE, REPEAT, REFLECT, or PAD) to map to a
+ * value within [0.0, 1.0].
+ *
+ * Here is an illustration of the problem:
+ *
+ *              p₂
+ *           p  •
+ *           •   ╲
+ *        ·       ╲r₂
+ *  p₁ ·           ╲
+ *  •              θ╲
+ *   ╲             ╌╌•
+ *    ╲r₁        ·   c₂
+ *    θ╲    ·
+ *    ╌╌•
+ *      c₁
+ *
+ * Given (c₁,r₁), (c₂,r₂) and p, we must find an angle θ such that two
+ * points p₁ and p₂ on the two circles are collinear with p. Then, the
+ * desired value of t is the ratio of the length of p₁p to the length
+ * of p₁p₂.
+ *
+ * So, we have six unknown values: (p₁x, p₁y), (p₂x, p₂y), θ and t.
+ * We can also write six equations that constrain the problem:
+ *
+ * Point p₁ is a distance r₁ from c₁ at an angle of θ:
+ *
+ *	1. p₁x = c₁x + r₁·cos θ
+ *	2. p₁y = c₁y + r₁·sin θ
+ *
+ * Point p₂ is a distance r₂ from c₂ at an angle of θ:
+ *
+ *	3. p₂x = c₂x + r2·cos θ
+ *	4. p₂y = c₂y + r2·sin θ
+ *
+ * Point p lies at a fraction t along the line segment p₁p₂:
+ *
+ *	5. px = t·p₂x + (1-t)·p₁x
+ *	6. py = t·p₂y + (1-t)·p₁y
+ *
+ * To solve, first subtitute 1-4 into 5 and 6:
+ *
+ * px = t·(c₂x + r₂·cos θ) + (1-t)·(c₁x + r₁·cos θ)
+ * py = t·(c₂y + r₂·sin θ) + (1-t)·(c₁y + r₁·sin θ)
+ *
+ * Then solve each for cos θ and sin θ expressed as a function of t:
+ *
+ * cos θ = (-(c₂x - c₁x)·t + (px - c₁x)) / ((r₂-r₁)·t + r₁)
+ * sin θ = (-(c₂y - c₁y)·t + (py - c₁y)) / ((r₂-r₁)·t + r₁)
+ *
+ * To simplify this a bit, we define new variables for several of the
+ * common terms as shown below:
+ *
+ *              p₂
+ *           p  •
+ *           •   ╲
+ *        ·  ┆    ╲r₂
+ *  p₁ ·     ┆     ╲
+ *  •     pdy┆      ╲
+ *   ╲       ┆       •c₂
+ *    ╲r₁    ┆   ·   ┆
+ *     ╲    ·┆       ┆cdy
+ *      •╌╌╌╌┴╌╌╌╌╌╌╌┘
+ *    c₁  pdx   cdx
+ *
+ * cdx = (c₂x - c₁x)
+ * cdy = (c₂y - c₁y)
+ *  dr =  r₂-r₁
+ * pdx =  px - c₁x
+ * pdy =  py - c₁y
+ *
+ * Note that cdx, cdy, and dr do not depend on point p at all, so can
+ * be pre-computed for the entire gradient. The simplifed equations
+ * are now:
+ *
+ * cos θ = (-cdx·t + pdx) / (dr·t + r₁)
+ * sin θ = (-cdy·t + pdy) / (dr·t + r₁)
+ *
+ * Finally, to get a single function of t and eliminate the last
+ * unknown θ, we use the identity sin²θ + cos²θ = 1. First, square
+ * each equation, (we knew a quadratic was coming since it must be
+ * possible to obtain two solutions in some cases):
+ *
+ * cos²θ = (cdx²t² - 2·cdx·pdx·t + pdx²) / (dr²·t² + 2·r₁·dr·t + r₁²)
+ * sin²θ = (cdy²t² - 2·cdy·pdy·t + pdy²) / (dr²·t² + 2·r₁·dr·t + r₁²)
+ *
+ * Then add both together, set the result equal to 1, and express as a
+ * standard quadratic equation in t of the form At² + Bt + C = 0
+ *
+ * (cdx² + cdy² - dr²)·t² - 2·(cdx·pdx + cdy·pdy + r₁·dr)·t + (pdx² + pdy² - r₁²) = 0
+ *
+ * In other words:
+ *
+ * A = cdx² + cdy² - dr²
+ * B = -2·(pdx·cdx + pdy·cdy + r₁·dr)
+ * C = pdx² + pdy² - r₁²
+ *
+ * And again, notice that A does not depend on p, so can be
+ * precomputed. From here we just use the quadratic formula to solve
+ * for t:
+ *
+ * t = (-2·B ± ⎷(B² - 4·A·C)) / 2·A
+ */
+        /* radial or conical */
+        pixman_bool_t affine = TRUE;
+        double cx = 1.;
+        double cy = 0.;
+        double cz = 0.;
+	double rx = x + 0.5;
+	double ry = y + 0.5;
+        double rz = 1.;
+
+        if (pict->common.transform) {
+            pixman_vector_t v;
+            /* reference point is the center of the pixel */
+            v.vector[0] = pixman_int_to_fixed(x) + pixman_fixed_1/2;
+            v.vector[1] = pixman_int_to_fixed(y) + pixman_fixed_1/2;
+            v.vector[2] = pixman_fixed_1;
+            if (!pixman_transform_point_3d (pict->common.transform, &v))
+                return;
+
+            cx = pict->common.transform->matrix[0][0]/65536.;
+            cy = pict->common.transform->matrix[1][0]/65536.;
+            cz = pict->common.transform->matrix[2][0]/65536.;
+            rx = v.vector[0]/65536.;
+            ry = v.vector[1]/65536.;
+            rz = v.vector[2]/65536.;
+            affine = pict->common.transform->matrix[2][0] == 0 && v.vector[2] == pixman_fixed_1;
+        }
+
+        if (pict->common.type == RADIAL) {
+	    radial_gradient_t *radial = (radial_gradient_t *)pict;
+            if (affine) {
+                while (buffer < end) {
+		    if (!mask || *mask++ & maskBits)
+		    {
+			double pdx, pdy;
+			double B, C;
+			double det;
+			double c1x = radial->c1.x / 65536.0;
+			double c1y = radial->c1.y / 65536.0;
+			double r1  = radial->c1.radius / 65536.0;
+                        pixman_fixed_48_16_t t;
+
+			pdx = rx - c1x;
+			pdy = ry - c1y;
+
+			B = -2 * (  pdx * radial->cdx
+				    + pdy * radial->cdy
+				    + r1 * radial->dr);
+			C = (pdx * pdx + pdy * pdy - r1 * r1);
+
+                        det = (B * B) - (4 * radial->A * C);
+			if (det < 0.0)
+			    det = 0.0;
+
+			if (radial->A < 0)
+			    t = (pixman_fixed_48_16_t) ((- B - sqrt(det)) / (2.0 * radial->A) * 65536);
+			else
+			    t = (pixman_fixed_48_16_t) ((- B + sqrt(det)) / (2.0 * radial->A) * 65536);
+
+			*(buffer) = _gradient_walker_pixel (&walker, t);
+		    }
+		    ++buffer;
+
+                    rx += cx;
+                    ry += cy;
+                }
+            } else {
+		/* projective */
+                while (buffer < end) {
+		    if (!mask || *mask++ & maskBits)
+		    {
+			double pdx, pdy;
+			double B, C;
+			double det;
+			double c1x = radial->c1.x / 65536.0;
+			double c1y = radial->c1.y / 65536.0;
+			double r1  = radial->c1.radius / 65536.0;
+                        pixman_fixed_48_16_t t;
+			double x, y;
+
+			if (rz != 0) {
+			    x = rx/rz;
+			    y = ry/rz;
+			} else {
+			    x = y = 0.;
+			}
+
+			pdx = x - c1x;
+			pdy = y - c1y;
+
+			B = -2 * (  pdx * radial->cdx
+				    + pdy * radial->cdy
+				    + r1 * radial->dr);
+			C = (pdx * pdx + pdy * pdy - r1 * r1);
+
+                        det = (B * B) - (4 * radial->A * C);
+			if (det < 0.0)
+			    det = 0.0;
+
+			if (radial->A < 0)
+			    t = (pixman_fixed_48_16_t) ((- B - sqrt(det)) / (2.0 * radial->A) * 65536);
+			else
+			    t = (pixman_fixed_48_16_t) ((- B + sqrt(det)) / (2.0 * radial->A) * 65536);
+
+			*(buffer) = _gradient_walker_pixel (&walker, t);
+		    }
+		    ++buffer;
+
+                    rx += cx;
+                    ry += cy;
+		    rz += cz;
+                }
+            }
+        } else /* SourcePictTypeConical */ {
+	    conical_gradient_t *conical = (conical_gradient_t *)pict;
+            double a = conical->angle/(180.*65536);
+            if (affine) {
+                rx -= conical->center.x/65536.;
+                ry -= conical->center.y/65536.;
+
+                while (buffer < end) {
+		    double angle;
+
+                    if (!mask || *mask++ & maskBits)
+		    {
+                        pixman_fixed_48_16_t   t;
+
+                        angle = atan2(ry, rx) + a;
+			t     = (pixman_fixed_48_16_t) (angle * (65536. / (2*M_PI)));
+
+			*(buffer) = _gradient_walker_pixel (&walker, t);
+		    }
+
+                    ++buffer;
+                    rx += cx;
+                    ry += cy;
+                }
+            } else {
+                while (buffer < end) {
+                    double x, y;
+                    double angle;
+
+                    if (!mask || *mask++ & maskBits)
+                    {
+			pixman_fixed_48_16_t  t;
+
+			if (rz != 0) {
+			    x = rx/rz;
+			    y = ry/rz;
+			} else {
+			    x = y = 0.;
+			}
+			x -= conical->center.x/65536.;
+			y -= conical->center.y/65536.;
+			angle = atan2(y, x) + a;
+			t     = (pixman_fixed_48_16_t) (angle * (65536. / (2*M_PI)));
+
+			*(buffer) = _gradient_walker_pixel (&walker, t);
+		    }
+
+                    ++buffer;
+                    rx += cx;
+                    ry += cy;
+                    rz += cz;
+                }
+            }
+        }
+    }
+}
new file mode 100644
--- /dev/null
+++ b/gfx/cairo/libpixman/src/pixman-sse.c
@@ -0,0 +1,51 @@
+/*
+ * Copyright © 2008 Rodrigo Kumpera
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of Red Hat not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission.  Red Hat makes no representations about the
+ * suitability of this software for any purpose.  It is provided "as is"
+ * without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
+ * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
+ * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ *
+ * Author:  Rodrigo Kumpera (kumpera@gmail.com)
+ * 
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include "pixman-sse.h"
+
+#ifdef USE_SSE2
+
+void
+fbComposeSetupSSE(void)
+{
+    static pixman_bool_t initialized = FALSE;
+
+    if (initialized)
+	return;
+    
+    /* check if we have SSE2 support and initialize accordingly */
+    if (pixman_have_sse())
+    {
+    }
+
+    initialized = TRUE;
+}
+
+
+#endif /* USE_SSE2 */
new file mode 100644
--- /dev/null
+++ b/gfx/cairo/libpixman/src/pixman-sse.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright © 2008 Rodrigo Kumpera
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of Red Hat not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission.  Red Hat makes no representations about the
+ * suitability of this software for any purpose.  It is provided "as is"
+ * without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
+ * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
+ * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ *
+ * Author:  Rodrigo Kumpera (kumpera@gmail.com)
+ * 
+ */
+#ifndef _PIXMAN_SSE_H_
+#define _PIXMAN_SSE_H_
+
+#ifdef HAVE_DIX_CONFIG_H
+#include <dix-config.h>
+#endif
+
+#include "pixman-private.h"
+
+#ifdef USE_SSE2
+
+#if !defined(__amd64__) && !defined(__x86_64__)
+pixman_bool_t pixman_have_sse(void);
+#else
+#define pixman_have_sse() TRUE
+#endif
+
+#else
+#define pixman_have_sse() FALSE
+#endif
+
+#ifdef USE_SSE2
+
+void fbComposeSetupSSE(void);
+
+#endif /* USE_SSE2 */
+
+#endif /* _PIXMAN_SSE_H_ */
new file mode 100644
--- /dev/null
+++ b/gfx/cairo/libpixman/src/pixman-transformed-accessors.c
@@ -0,0 +1,3 @@
+#define PIXMAN_FB_ACCESSORS
+
+#include "pixman-transformed.c"
new file mode 100644
--- /dev/null
+++ b/gfx/cairo/libpixman/src/pixman-transformed.c
@@ -0,0 +1,726 @@
+/*
+ *
+ * Copyright © 2000 Keith Packard, member of The XFree86 Project, Inc.
+ *             2005 Lars Knoll & Zack Rusin, Trolltech
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of Keith Packard not be used in
+ * advertising or publicity pertaining to distribution of the software without
+ * specific, written prior permission.  Keith Packard makes no
+ * representations about the suitability of this software for any purpose.  It
+ * is provided "as is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
+ * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
+ * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <stdlib.h>
+
+#include "pixman-private.h"
+
+#ifdef PIXMAN_FB_ACCESSORS
+#define FETCH_PROC_FOR_PICTURE pixman_fetchProcForPicture_accessors
+#define FETCH_PIXEL_PROC_FOR_PICTURE pixman_fetchPixelProcForPicture_accessors
+#define STORE_PROC_FOR_PICTURE pixman_storeProcForPicture_accessors
+
+#define FB_FETCH_TRANSFORMED fbFetchTransformed_accessors
+#define FB_FETCH_EXTERNAL_ALPHA fbFetchExternalAlpha_accessors
+#define FB_STORE_EXTERNAL_ALPHA fbStoreExternalAlpha_accessors
+
+#else
+
+#define FETCH_PROC_FOR_PICTURE pixman_fetchProcForPicture
+#define FETCH_PIXEL_PROC_FOR_PICTURE pixman_fetchPixelProcForPicture
+#define STORE_PROC_FOR_PICTURE pixman_storeProcForPicture
+
+#define FB_FETCH_TRANSFORMED fbFetchTransformed
+#define FB_FETCH_EXTERNAL_ALPHA fbFetchExternalAlpha
+#define FB_STORE_EXTERNAL_ALPHA fbStoreExternalAlpha
+
+#endif
+
+/*
+ * Fetch from region strategies
+ */
+typedef FASTCALL uint32_t (*fetchFromRegionProc)(bits_image_t *pict, int x, int y, uint32_t *buffer, fetchPixelProc fetch, pixman_box16_t *box);
+
+static inline uint32_t
+fbFetchFromNoRegion(bits_image_t *pict, int x, int y, uint32_t *buffer, fetchPixelProc fetch, pixman_box16_t *box)
+{
+    return fetch (pict, x, y);
+}
+
+static uint32_t
+fbFetchFromNRectangles(bits_image_t *pict, int x, int y, uint32_t *buffer, fetchPixelProc fetch, pixman_box16_t *box)
+{
+    pixman_box16_t box2;
+    if (pixman_region_contains_point (pict->common.src_clip, x, y, &box2))
+        return fbFetchFromNoRegion(pict, x, y, buffer, fetch, box);
+    else
+        return 0;
+}
+
+static uint32_t
+fbFetchFromOneRectangle(bits_image_t *pict, int x, int y, uint32_t *buffer, fetchPixelProc fetch, pixman_box16_t *box)
+{
+    pixman_box16_t box2 = *box;
+    return ((x < box2.x1) | (x >= box2.x2) | (y < box2.y1) | (y >= box2.y2)) ?
+        0 : fbFetchFromNoRegion(pict, x, y, buffer, fetch, box);
+}
+
+/*
+ * Fetching Algorithms
+ */
+static void
+fbFetchTransformed_Nearest_Normal(bits_image_t * pict, int width, uint32_t *buffer, uint32_t *mask, uint32_t maskBits, pixman_bool_t affine, pixman_vector_t v, pixman_vector_t unit)
+{
+    pixman_box16_t* box = NULL;
+    fetchPixelProc   fetch;
+    fetchFromRegionProc fetchFromRegion;
+    int x, y, i;
+
+    /* initialize the two function pointers */
+    fetch = FETCH_PIXEL_PROC_FOR_PICTURE(pict);
+
+    if(pixman_region_n_rects (pict->common.src_clip) == 1)
+        fetchFromRegion = fbFetchFromNoRegion;
+    else
+        fetchFromRegion = fbFetchFromNRectangles;
+
+    for ( i = 0; i < width; ++i)
+    {
+        if (!mask || mask[i] & maskBits)
+        {
+            if (!v.vector[2])
+            {
+                *(buffer + i) = 0;
+            }
+            else
+            {
+                if (!affine)
+                {
+                    y = MOD(DIV(v.vector[1],v.vector[2]), pict->height);
+                    x = MOD(DIV(v.vector[0],v.vector[2]), pict->width);
+                }
+                else
+                {
+                    y = MOD(v.vector[1]>>16, pict->height);
+                    x = MOD(v.vector[0]>>16, pict->width);
+                }
+                *(buffer + i) = fetchFromRegion(pict, x, y, buffer, fetch, box);
+            }
+        }
+
+        v.vector[0] += unit.vector[0];
+        v.vector[1] += unit.vector[1];
+        v.vector[2] += unit.vector[2];
+    }
+}
+
+static void
+fbFetchTransformed_Nearest_Pad(bits_image_t * pict, int width, uint32_t *buffer, uint32_t *mask, uint32_t maskBits, pixman_bool_t affine, pixman_vector_t v, pixman_vector_t unit)
+{
+    pixman_box16_t *box = NULL;
+    fetchPixelProc   fetch;
+    fetchFromRegionProc fetchFromRegion;
+    int x, y, i;
+
+    /* initialize the two function pointers */
+    fetch = FETCH_PIXEL_PROC_FOR_PICTURE(pict);
+
+    if(pixman_region_n_rects (pict->common.src_clip) == 1)
+        fetchFromRegion = fbFetchFromNoRegion;
+    else
+        fetchFromRegion = fbFetchFromNRectangles;
+
+    for (i = 0; i < width; ++i)
+    {
+        if (!mask || mask[i] & maskBits)
+        {
+            if (!v.vector[2])
+            {
+                *(buffer + i) = 0;
+            }
+            else
+            {
+                if (!affine)
+                {
+                    y = CLIP(DIV(v.vector[1], v.vector[2]), 0, pict->height-1);
+                    x = CLIP(DIV(v.vector[0], v.vector[2]), 0, pict->width-1);
+                }
+                else
+                {
+                    y = CLIP(v.vector[1]>>16, 0, pict->height-1);
+                    x = CLIP(v.vector[0]>>16, 0, pict->width-1);
+                }
+
+                *(buffer + i) = fetchFromRegion(pict, x, y, buffer, fetch, box);
+            }
+        }
+
+        v.vector[0] += unit.vector[0];
+        v.vector[1] += unit.vector[1];
+        v.vector[2] += unit.vector[2];
+    }
+}
+
+static void
+fbFetchTransformed_Nearest_General(bits_image_t * pict, int width, uint32_t *buffer, uint32_t *mask, uint32_t maskBits, pixman_bool_t affine, pixman_vector_t v, pixman_vector_t unit)
+{
+    pixman_box16_t *box = NULL;
+    fetchPixelProc   fetch;
+    fetchFromRegionProc fetchFromRegion;
+    int x, y, i;
+
+    /* initialize the two function pointers */
+    fetch = FETCH_PIXEL_PROC_FOR_PICTURE(pict);
+
+    if(pixman_region_n_rects (pict->common.src_clip) == 1)
+    {
+        box = &(pict->common.src_clip->extents);
+        fetchFromRegion = fbFetchFromOneRectangle;
+    }
+    else
+    {
+        fetchFromRegion = fbFetchFromNRectangles;
+    }
+
+    for (i = 0; i < width; ++i) {
+        if (!mask || mask[i] & maskBits)
+        {
+            if (!v.vector[2]) {
+                *(buffer + i) = 0;
+            } else {
+                if (!affine) {
+                    y = DIV(v.vector[1],v.vector[2]);
+                    x = DIV(v.vector[0],v.vector[2]);
+                } else {
+                    y = v.vector[1]>>16;
+                    x = v.vector[0]>>16;
+                }
+                *(buffer + i) = fetchFromRegion(pict, x, y, buffer, fetch, box);
+            }
+        }
+        v.vector[0] += unit.vector[0];
+        v.vector[1] += unit.vector[1];
+        v.vector[2] += unit.vector[2];
+    }
+}
+
+static void
+fbFetchTransformed_Bilinear_Normal(bits_image_t * pict, int width, uint32_t *buffer, uint32_t *mask, uint32_t maskBits, pixman_bool_t affine, pixman_vector_t v, pixman_vector_t unit)
+{
+    pixman_box16_t *box = NULL;
+    fetchPixelProc   fetch;
+    fetchFromRegionProc fetchFromRegion;
+    int i;
+
+    /* initialize the two function pointers */
+    fetch = FETCH_PIXEL_PROC_FOR_PICTURE(pict);
+
+    if(pixman_region_n_rects (pict->common.src_clip) == 1)
+        fetchFromRegion = fbFetchFromNoRegion;
+    else
+        fetchFromRegion = fbFetchFromNRectangles;
+
+    for (i = 0; i < width; ++i) {
+        if (!mask || mask[i] & maskBits)
+        {
+            if (!v.vector[2]) {
+                *(buffer + i) = 0;
+            } else {
+                int x1, x2, y1, y2, distx, idistx, disty, idisty;
+                uint32_t tl, tr, bl, br, r;
+                uint32_t ft, fb;
+
+                if (!affine) {
+                    pixman_fixed_48_16_t div;
+                    div = ((pixman_fixed_48_16_t)v.vector[0] << 16)/v.vector[2];
+                    x1 = div >> 16;
+                    distx = ((pixman_fixed_t)div >> 8) & 0xff;
+                    div = ((pixman_fixed_48_16_t)v.vector[1] << 16)/v.vector[2];
+                    y1 = div >> 16;
+                    disty = ((pixman_fixed_t)div >> 8) & 0xff;
+                } else {
+                    x1 = v.vector[0] >> 16;
+                    distx = (v.vector[0] >> 8) & 0xff;
+                    y1 = v.vector[1] >> 16;
+                    disty = (v.vector[1] >> 8) & 0xff;
+                }
+                x2 = x1 + 1;
+                y2 = y1 + 1;
+
+                idistx = 256 - distx;
+                idisty = 256 - disty;
+
+                x1 = MOD (x1, pict->width);
+                x2 = MOD (x2, pict->width);
+                y1 = MOD (y1, pict->height);
+                y2 = MOD (y2, pict->height);
+
+                tl = fetchFromRegion(pict, x1, y1, buffer, fetch, box);
+                tr = fetchFromRegion(pict, x2, y1, buffer, fetch, box);
+                bl = fetchFromRegion(pict, x1, y2, buffer, fetch, box);
+                br = fetchFromRegion(pict, x2, y2, buffer, fetch, box);
+
+                ft = FbGet8(tl,0) * idistx + FbGet8(tr,0) * distx;
+                fb = FbGet8(bl,0) * idistx + FbGet8(br,0) * distx;
+                r = (((ft * idisty + fb * disty) >> 16) & 0xff);
+                ft = FbGet8(tl,8) * idistx + FbGet8(tr,8) * distx;
+                fb = FbGet8(bl,8) * idistx + FbGet8(br,8) * distx;
+                r |= (((ft * idisty + fb * disty) >> 8) & 0xff00);
+                ft = FbGet8(tl,16) * idistx + FbGet8(tr,16) * distx;
+                fb = FbGet8(bl,16) * idistx + FbGet8(br,16) * distx;
+                r |= (((ft * idisty + fb * disty)) & 0xff0000);
+                ft = FbGet8(tl,24) * idistx + FbGet8(tr,24) * distx;
+                fb = FbGet8(bl,24) * idistx + FbGet8(br,24) * distx;
+                r |= (((ft * idisty + fb * disty) << 8) & 0xff000000);
+                *(buffer + i) = r;
+            }
+        }
+        v.vector[0] += unit.vector[0];
+        v.vector[1] += unit.vector[1];
+        v.vector[2] += unit.vector[2];
+    }
+}
+
+static void
+fbFetchTransformed_Bilinear_Pad(bits_image_t * pict, int width, uint32_t *buffer, uint32_t *mask, uint32_t maskBits, pixman_bool_t affine, pixman_vector_t v, pixman_vector_t unit)
+{
+    pixman_box16_t *box = NULL;
+    fetchPixelProc   fetch;
+    fetchFromRegionProc fetchFromRegion;
+    int i;
+
+    /* initialize the two function pointers */
+    fetch = FETCH_PIXEL_PROC_FOR_PICTURE(pict);
+
+    if(pixman_region_n_rects (pict->common.src_clip) == 1)
+        fetchFromRegion = fbFetchFromNoRegion;
+    else
+        fetchFromRegion = fbFetchFromNRectangles;
+
+    for (i = 0; i < width; ++i) {
+        if (!mask || mask[i] & maskBits)
+        {
+            if (!v.vector[2]) {
+                *(buffer + i) = 0;
+            } else {
+                int x1, x2, y1, y2, distx, idistx, disty, idisty;
+                uint32_t tl, tr, bl, br, r;
+                uint32_t ft, fb;
+
+                if (!affine) {
+                    pixman_fixed_48_16_t div;
+                    div = ((pixman_fixed_48_16_t)v.vector[0] << 16)/v.vector[2];
+                    x1 = div >> 16;
+                    distx = ((pixman_fixed_t)div >> 8) & 0xff;
+                    div = ((pixman_fixed_48_16_t)v.vector[1] << 16)/v.vector[2];
+                    y1 = div >> 16;
+                    disty = ((pixman_fixed_t)div >> 8) & 0xff;
+                } else {
+                    x1 = v.vector[0] >> 16;
+                    distx = (v.vector[0] >> 8) & 0xff;
+                    y1 = v.vector[1] >> 16;
+                    disty = (v.vector[1] >> 8) & 0xff;
+                }
+                x2 = x1 + 1;
+                y2 = y1 + 1;
+
+                idistx = 256 - distx;
+                idisty = 256 - disty;
+
+                x1 = CLIP (x1, 0, pict->width-1);
+                x2 = CLIP (x2, 0, pict->width-1);
+                y1 = CLIP (y1, 0, pict->height-1);
+                y2 = CLIP (y2, 0, pict->height-1);
+
+                tl = fetchFromRegion(pict, x1, y1, buffer, fetch, box);
+                tr = fetchFromRegion(pict, x2, y1, buffer, fetch, box);
+                bl = fetchFromRegion(pict, x1, y2, buffer, fetch, box);
+                br = fetchFromRegion(pict, x2, y2, buffer, fetch, box);
+
+                ft = FbGet8(tl,0) * idistx + FbGet8(tr,0) * distx;
+                fb = FbGet8(bl,0) * idistx + FbGet8(br,0) * distx;
+                r = (((ft * idisty + fb * disty) >> 16) & 0xff);
+                ft = FbGet8(tl,8) * idistx + FbGet8(tr,8) * distx;
+                fb = FbGet8(bl,8) * idistx + FbGet8(br,8) * distx;
+                r |= (((ft * idisty + fb * disty) >> 8) & 0xff00);
+                ft = FbGet8(tl,16) * idistx + FbGet8(tr,16) * distx;
+                fb = FbGet8(bl,16) * idistx + FbGet8(br,16) * distx;
+                r |= (((ft * idisty + fb * disty)) & 0xff0000);
+                ft = FbGet8(tl,24) * idistx + FbGet8(tr,24) * distx;
+                fb = FbGet8(bl,24) * idistx + FbGet8(br,24) * distx;
+                r |= (((ft * idisty + fb * disty) << 8) & 0xff000000);
+                *(buffer + i) = r;
+            }
+        }
+        v.vector[0] += unit.vector[0];
+        v.vector[1] += unit.vector[1];
+        v.vector[2] += unit.vector[2];
+    }
+}
+
+static void
+fbFetchTransformed_Bilinear_General(bits_image_t * pict, int width, uint32_t *buffer, uint32_t *mask, uint32_t maskBits, pixman_bool_t affine, pixman_vector_t v, pixman_vector_t unit)
+{
+    pixman_box16_t *box = NULL;
+    fetchPixelProc   fetch;
+    fetchFromRegionProc fetchFromRegion;
+    int i;
+
+    /* initialize the two function pointers */
+    fetch = FETCH_PIXEL_PROC_FOR_PICTURE(pict);
+
+    if(pixman_region_n_rects (pict->common.src_clip) == 1)
+    {
+        box = &(pict->common.src_clip->extents);
+        fetchFromRegion = fbFetchFromOneRectangle;
+    }
+    else
+    {
+        fetchFromRegion = fbFetchFromNRectangles;
+    }
+
+    for (i = 0; i < width; ++i)
+    {
+        if (!mask || mask[i] & maskBits)
+        {
+            if (!v.vector[2]) {
+                *(buffer + i) = 0;
+            } else {
+                int x1, x2, y1, y2, distx, idistx, disty, idisty;
+                uint32_t tl, tr, bl, br, r;
+                uint32_t ft, fb;
+
+                if (!affine) {
+                    pixman_fixed_48_16_t div;
+                    div = ((pixman_fixed_48_16_t)v.vector[0] << 16)/v.vector[2];
+                    x1 = div >> 16;
+                    distx = ((pixman_fixed_t)div >> 8) & 0xff;
+                    div = ((pixman_fixed_48_16_t)v.vector[1] << 16)/v.vector[2];
+                    y1 = div >> 16;
+                    disty = ((pixman_fixed_t)div >> 8) & 0xff;
+                } else {
+                    x1 = v.vector[0] >> 16;
+                    distx = (v.vector[0] >> 8) & 0xff;
+                    y1 = v.vector[1] >> 16;
+                    disty = (v.vector[1] >> 8) & 0xff;
+                }
+                x2 = x1 + 1;
+                y2 = y1 + 1;
+
+                idistx = 256 - distx;
+                idisty = 256 - disty;
+
+                tl = fetchFromRegion(pict, x1, y1, buffer, fetch, box);
+                tr = fetchFromRegion(pict, x2, y1, buffer, fetch, box);
+                bl = fetchFromRegion(pict, x1, y2, buffer, fetch, box);
+                br = fetchFromRegion(pict, x2, y2, buffer, fetch, box);
+
+                ft = FbGet8(tl,0) * idistx + FbGet8(tr,0) * distx;
+                fb = FbGet8(bl,0) * idistx + FbGet8(br,0) * distx;
+                r = (((ft * idisty + fb * disty) >> 16) & 0xff);
+                ft = FbGet8(tl,8) * idistx + FbGet8(tr,8) * distx;
+                fb = FbGet8(bl,8) * idistx + FbGet8(br,8) * distx;
+                r |= (((ft * idisty + fb * disty) >> 8) & 0xff00);
+                ft = FbGet8(tl,16) * idistx + FbGet8(tr,16) * distx;
+                fb = FbGet8(bl,16) * idistx + FbGet8(br,16) * distx;
+                r |= (((ft * idisty + fb * disty)) & 0xff0000);
+                ft = FbGet8(tl,24) * idistx + FbGet8(tr,24) * distx;
+                fb = FbGet8(bl,24) * idistx + FbGet8(br,24) * distx;
+                r |= (((ft * idisty + fb * disty) << 8) & 0xff000000);
+                *(buffer + i) = r;
+            }
+        }
+
+        v.vector[0] += unit.vector[0];
+        v.vector[1] += unit.vector[1];
+        v.vector[2] += unit.vector[2];
+    }
+}
+
+static void
+fbFetchTransformed_Convolution(bits_image_t * pict, int width, uint32_t *buffer, uint32_t *mask, uint32_t maskBits, pixman_bool_t affine, pixman_vector_t v, pixman_vector_t unit)
+{
+    pixman_box16_t dummy;
+    fetchPixelProc fetch;
+    int i;
+
+    pixman_fixed_t *params = pict->common.filter_params;
+    int32_t cwidth = pixman_fixed_to_int(params[0]);
+    int32_t cheight = pixman_fixed_to_int(params[1]);
+    int xoff = (params[0] - pixman_fixed_1) >> 1;
+    int yoff = (params[1] - pixman_fixed_1) >> 1;
+    fetch = FETCH_PIXEL_PROC_FOR_PICTURE(pict);
+
+    params += 2;
+    for (i = 0; i < width; ++i) {
+        if (!mask || mask[i] & maskBits)
+        {
+            if (!v.vector[2]) {
+                *(buffer + i) = 0;
+            } else {
+                int x1, x2, y1, y2, x, y;
+                int32_t srtot, sgtot, sbtot, satot;
+                pixman_fixed_t *p = params;
+
+                if (!affine) {
+                    pixman_fixed_48_16_t tmp;
+                    tmp = ((pixman_fixed_48_16_t)v.vector[0] << 16)/v.vector[2] - xoff;
+                    x1 = pixman_fixed_to_int(tmp);
+                    tmp = ((pixman_fixed_48_16_t)v.vector[1] << 16)/v.vector[2] - yoff;
+                    y1 = pixman_fixed_to_int(tmp);
+                } else {
+                    x1 = pixman_fixed_to_int(v.vector[0] - xoff);
+                    y1 = pixman_fixed_to_int(v.vector[1] - yoff);
+                }
+                x2 = x1 + cwidth;
+                y2 = y1 + cheight;
+
+                srtot = sgtot = sbtot = satot = 0;
+
+                for (y = y1; y < y2; y++) {
+                    int ty;
+                    switch (pict->common.repeat) {
+                        case PIXMAN_REPEAT_NORMAL:
+                            ty = MOD (y, pict->height);
+                            break;
+                        case PIXMAN_REPEAT_PAD:
+                            ty = CLIP (y, 0, pict->height-1);
+                            break;
+                        default:
+                            ty = y;
+                    }
+                    for (x = x1; x < x2; x++) {
+                        if (*p) {
+                            int tx;
+                            switch (pict->common.repeat) {
+                                case PIXMAN_REPEAT_NORMAL:
+                                    tx = MOD (x, pict->width);
+                                    break;
+                                case PIXMAN_REPEAT_PAD:
+                                    tx = CLIP (x, 0, pict->width-1);
+                                    break;
+                                default:
+                                    tx = x;
+                            }
+                            if (pixman_region_contains_point (pict->common.src_clip, tx, ty, &dummy)) {
+                                uint32_t c = fetch(pict, tx, ty);
+
+                                srtot += Red(c) * *p;
+                                sgtot += Green(c) * *p;
+                                sbtot += Blue(c) * *p;
+                                satot += Alpha(c) * *p;
+                            }
+                        }
+                        p++;
+                    }
+                }
+
+                satot >>= 16;
+                srtot >>= 16;
+                sgtot >>= 16;
+                sbtot >>= 16;
+
+                if (satot < 0) satot = 0; else if (satot > 0xff) satot = 0xff;
+                if (srtot < 0) srtot = 0; else if (srtot > 0xff) srtot = 0xff;
+                if (sgtot < 0) sgtot = 0; else if (sgtot > 0xff) sgtot = 0xff;
+                if (sbtot < 0) sbtot = 0; else if (sbtot > 0xff) sbtot = 0xff;
+
+                *(buffer + i) = ((satot << 24) |
+                                 (srtot << 16) |
+                                 (sgtot <<  8) |
+                                 (sbtot       ));
+            }
+        }
+        v.vector[0] += unit.vector[0];
+        v.vector[1] += unit.vector[1];
+        v.vector[2] += unit.vector[2];
+    }
+}
+
+static void
+adjust (pixman_vector_t *v, pixman_vector_t *u, pixman_fixed_t adjustment)
+{
+    int delta_v = (adjustment * v->vector[2]) >> 16;
+    int delta_u = (adjustment * u->vector[2]) >> 16;
+    
+    v->vector[0] += delta_v;
+    v->vector[1] += delta_v;
+    
+    u->vector[0] += delta_u;
+    u->vector[1] += delta_u;
+}
+
+void
+FB_FETCH_TRANSFORMED(bits_image_t * pict, int x, int y, int width, uint32_t *buffer, uint32_t *mask, uint32_t maskBits)
+{
+    uint32_t     *bits;
+    int32_t    stride;
+    pixman_vector_t v;
+    pixman_vector_t unit;
+    pixman_bool_t affine = TRUE;
+
+    bits = pict->bits;
+    stride = pict->rowstride;
+
+    /* reference point is the center of the pixel */
+    v.vector[0] = pixman_int_to_fixed(x) + pixman_fixed_1 / 2;
+    v.vector[1] = pixman_int_to_fixed(y) + pixman_fixed_1 / 2;
+    v.vector[2] = pixman_fixed_1;
+
+    /* when using convolution filters or PIXMAN_REPEAT_PAD one might get here without a transform */
+    if (pict->common.transform)
+    {
+        if (!pixman_transform_point_3d (pict->common.transform, &v))
+            return;
+        unit.vector[0] = pict->common.transform->matrix[0][0];
+        unit.vector[1] = pict->common.transform->matrix[1][0];
+        unit.vector[2] = pict->common.transform->matrix[2][0];
+        affine = v.vector[2] == pixman_fixed_1 && unit.vector[2] == 0;
+    }
+    else
+    {
+        unit.vector[0] = pixman_fixed_1;
+        unit.vector[1] = 0;
+        unit.vector[2] = 0;
+    }
+
+    /* This allows filtering code to pretend that pixels are located at integer coordinates */
+    adjust (&v, &unit, -(pixman_fixed_1 / 2));
+    
+    if (pict->common.filter == PIXMAN_FILTER_NEAREST || pict->common.filter == PIXMAN_FILTER_FAST)
+    {
+	/* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */
+	adjust (&v, &unit, pixman_fixed_1 / 2 - pixman_fixed_e);
+	
+        if (pict->common.repeat == PIXMAN_REPEAT_NORMAL)
+        {
+            fbFetchTransformed_Nearest_Normal(pict, width, buffer, mask, maskBits, affine, v, unit);
+
+        }
+        else if (pict->common.repeat == PIXMAN_REPEAT_PAD)
+        {
+            fbFetchTransformed_Nearest_Pad(pict, width, buffer, mask, maskBits, affine, v, unit);
+        }
+        else
+        {
+            fbFetchTransformed_Nearest_General(pict, width, buffer, mask, maskBits, affine, v, unit);
+        }
+    } else if (pict->common.filter == PIXMAN_FILTER_BILINEAR	||
+	       pict->common.filter == PIXMAN_FILTER_GOOD	||
+	       pict->common.filter == PIXMAN_FILTER_BEST)
+    {
+        if (pict->common.repeat == PIXMAN_REPEAT_NORMAL)
+        {
+            fbFetchTransformed_Bilinear_Normal(pict, width, buffer, mask, maskBits, affine, v, unit);
+        }
+        else if (pict->common.repeat == PIXMAN_REPEAT_PAD)
+        {
+            fbFetchTransformed_Bilinear_Pad(pict, width, buffer, mask, maskBits, affine, v, unit);
+        }
+        else
+        {
+            fbFetchTransformed_Bilinear_General(pict, width, buffer, mask, maskBits, affine, v, unit);
+        }
+    }
+    else if (pict->common.filter == PIXMAN_FILTER_CONVOLUTION)
+    {
+	/* Round to closest integer, ensuring that 0.5 rounds to 0, not 1 */
+	adjust (&v, &unit, pixman_fixed_1 / 2 - pixman_fixed_e);
+	
+        fbFetchTransformed_Convolution(pict, width, buffer, mask, maskBits, affine, v, unit);
+    }
+}
+
+#define SCANLINE_BUFFER_LENGTH 2048
+
+void
+FB_FETCH_EXTERNAL_ALPHA(bits_image_t * pict, int x, int y, int width,
+                        uint32_t *buffer, uint32_t *mask, uint32_t maskBits)
+{
+    int i;
+    uint32_t _alpha_buffer[SCANLINE_BUFFER_LENGTH];
+    uint32_t *alpha_buffer = _alpha_buffer;
+
+    if (!pict->common.alpha_map) {
+        FB_FETCH_TRANSFORMED (pict, x, y, width, buffer, mask, maskBits);
+	return;
+    }
+    if (width > SCANLINE_BUFFER_LENGTH)
+        alpha_buffer = (uint32_t *) pixman_malloc_ab (width, sizeof(uint32_t));
+
+    FB_FETCH_TRANSFORMED(pict, x, y, width, buffer, mask, maskBits);
+    FB_FETCH_TRANSFORMED((bits_image_t *)pict->common.alpha_map, x - pict->common.alpha_origin.x,
+			 y - pict->common.alpha_origin.y, width, alpha_buffer,
+			 mask, maskBits);
+    for (i = 0; i < width; ++i) {
+        if (!mask || mask[i] & maskBits)
+	{
+	    int a = alpha_buffer[i]>>24;
+	    *(buffer + i) = (a << 24)
+		| (div_255(Red(*(buffer + i)) * a) << 16)
+		| (div_255(Green(*(buffer + i)) * a) << 8)
+		| (div_255(Blue(*(buffer + i)) * a));
+	}
+    }
+
+    if (alpha_buffer != _alpha_buffer)
+        free(alpha_buffer);
+}
+
+void
+FB_STORE_EXTERNAL_ALPHA(bits_image_t * pict, int x, int y, int width,
+                        uint32_t *buffer)
+{
+    uint32_t *bits, *alpha_bits;
+    int32_t stride, astride;
+    int ax, ay;
+    storeProc store;
+    storeProc astore;
+    const pixman_indexed_t * indexed = pict->indexed;
+    const pixman_indexed_t * aindexed;
+
+    if (!pict->common.alpha_map) {
+        // XXX[AGP]: This should never happen!
+        // fbStore(pict, x, y, width, buffer);
+        abort();
+	return;
+    }
+
+    store = STORE_PROC_FOR_PICTURE(pict);
+    astore = STORE_PROC_FOR_PICTURE(pict->common.alpha_map);
+    aindexed = pict->common.alpha_map->indexed;
+
+    ax = x;
+    ay = y;
+
+    bits = pict->bits;
+    stride = pict->rowstride;
+
+    alpha_bits = pict->common.alpha_map->bits;
+    astride = pict->common.alpha_map->rowstride;
+
+    bits       += y*stride;
+    alpha_bits += (ay - pict->common.alpha_origin.y)*astride;
+
+
+    store((pixman_image_t *)pict, bits, buffer, x, width, indexed);
+    astore((pixman_image_t *)pict->common.alpha_map,
+	   alpha_bits, buffer, ax - pict->common.alpha_origin.x, width, aindexed);
+}
+
--- a/gfx/cairo/libpixman/src/pixman-trap.c
+++ b/gfx/cairo/libpixman/src/pixman-trap.c
@@ -44,17 +44,17 @@ pixman_add_traps (pixman_image_t *	image
     pixman_fixed_t	y_off_fixed;
     pixman_edge_t  l, r;
     pixman_fixed_t	t, b;
 
     width = image->bits.width;
     height = image->bits.height;
     bpp = PIXMAN_FORMAT_BPP (image->bits.format);
     
-    x_off_fixed = pixman_int_to_fixed(y_off);
+    x_off_fixed = pixman_int_to_fixed(x_off);
     y_off_fixed = pixman_int_to_fixed(y_off);
 
     while (ntrap--)
     {
 	t = traps->top.y + y_off_fixed;
 	if (t < 0)
 	    t = 0;
 	t = pixman_sample_ceil_y (t, bpp);
@@ -78,18 +78,16 @@ pixman_add_traps (pixman_image_t *	image
 			      traps->top.y + y_off_fixed,
 			      traps->bot.r + x_off_fixed,
 			      traps->bot.y + y_off_fixed);
 	    
 	    pixman_rasterize_edges (image, &l, &r, t, b);
 	}
 	traps++;
     }
-
-    fbFinishAccess (pPicture->pDrawable);
 }
 
 static void
 dump_image (pixman_image_t *image,
 	    const char *title)
 {
     int i, j;
     
--- a/gfx/cairo/libpixman/src/pixman-utils.c
+++ b/gfx/cairo/libpixman/src/pixman-utils.c
@@ -402,8 +402,188 @@ pixman_malloc_abc (unsigned int a,
 {
     if (a >= INT32_MAX / b)
 	return NULL;
     else if (a * b >= INT32_MAX / c)
 	return NULL;
     else
 	return malloc (a * b * c);
 }
+
+
+/**
+ * pixman_version:
+ *
+ * Returns the version of the pixman library encoded in a single
+ * integer as per %PIXMAN_VERSION_ENCODE. The encoding ensures that
+ * later versions compare greater than earlier versions.
+ *
+ * A run-time comparison to check that pixman's version is greater than
+ * or equal to version X.Y.Z could be performed as follows:
+ *
+ * <informalexample><programlisting>
+ * if (pixman_version() >= PIXMAN_VERSION_ENCODE(X,Y,Z)) {...}
+ * </programlisting></informalexample>
+ *
+ * See also pixman_version_string() as well as the compile-time
+ * equivalents %PIXMAN_VERSION and %PIXMAN_VERSION_STRING.
+ *
+ * Return value: the encoded version.
+ **/
+int
+pixman_version (void)
+{
+    return PIXMAN_VERSION;
+}
+
+/**
+ * pixman_version_string:
+ *
+ * Returns the version of the pixman library as a human-readable string
+ * of the form "X.Y.Z".
+ *
+ * See also pixman_version() as well as the compile-time equivalents
+ * %PIXMAN_VERSION_STRING and %PIXMAN_VERSION.
+ *
+ * Return value: a string containing the version.
+ **/
+const char*
+pixman_version_string (void)
+{
+    return PIXMAN_VERSION_STRING;
+}
+
+/**
+ * pixman_format_supported_destination:
+ * @format: A pixman_format_code_t format
+ * 
+ * Return value: whether the provided format code is a supported
+ * format for a pixman surface used as a destination in
+ * rendering.
+ *
+ * Currently, all pixman_format_code_t values are supported
+ * except for the YUV formats.
+ **/
+pixman_bool_t
+pixman_format_supported_destination (pixman_format_code_t format)
+{
+    switch (format) {
+    /* 32 bpp formats */
+    case PIXMAN_a8r8g8b8:
+    case PIXMAN_x8r8g8b8:
+    case PIXMAN_a8b8g8r8:
+    case PIXMAN_x8b8g8r8:
+    case PIXMAN_r8g8b8:
+    case PIXMAN_b8g8r8:
+    case PIXMAN_r5g6b5:
+    case PIXMAN_b5g6r5:
+    /* 16 bpp formats */
+    case PIXMAN_a1r5g5b5:
+    case PIXMAN_x1r5g5b5:
+    case PIXMAN_a1b5g5r5:
+    case PIXMAN_x1b5g5r5:
+    case PIXMAN_a4r4g4b4:
+    case PIXMAN_x4r4g4b4:
+    case PIXMAN_a4b4g4r4:
+    case PIXMAN_x4b4g4r4:
+    /* 8bpp formats */
+    case PIXMAN_a8:
+    case PIXMAN_r3g3b2:
+    case PIXMAN_b2g3r3:
+    case PIXMAN_a2r2g2b2:
+    case PIXMAN_a2b2g2r2:
+    case PIXMAN_c8:
+    case PIXMAN_g8:
+    case PIXMAN_x4a4:
+    /* Collides with PIXMAN_c8
+    case PIXMAN_x4c4:
+    */
+    /* Collides with PIXMAN_g8
+    case PIXMAN_x4g4:
+    */
+    /* 4bpp formats */
+    case PIXMAN_a4:
+    case PIXMAN_r1g2b1:
+    case PIXMAN_b1g2r1:
+    case PIXMAN_a1r1g1b1:
+    case PIXMAN_a1b1g1r1:
+    case PIXMAN_c4:
+    case PIXMAN_g4:
+    /* 1bpp formats */
+    case PIXMAN_a1:
+    case PIXMAN_g1:
+	return TRUE;
+	
+    /* YUV formats */
+    case PIXMAN_yuy2:
+    case PIXMAN_yv12:
+    default:
+	return FALSE;
+    }
+}
+
+/**
+ * pixman_format_supported_source:
+ * @format: A pixman_format_code_t format
+ * 
+ * Return value: whether the provided format code is a supported
+ * format for a pixman surface used as a source in
+ * rendering.
+ *
+ * Currently, all pixman_format_code_t values are supported.
+ **/
+pixman_bool_t
+pixman_format_supported_source (pixman_format_code_t format)
+{
+    switch (format) {
+    /* 32 bpp formats */
+    case PIXMAN_a8r8g8b8:
+    case PIXMAN_x8r8g8b8:
+    case PIXMAN_a8b8g8r8:
+    case PIXMAN_x8b8g8r8:
+    case PIXMAN_r8g8b8:
+    case PIXMAN_b8g8r8:
+    case PIXMAN_r5g6b5:
+    case PIXMAN_b5g6r5:
+    /* 16 bpp formats */
+    case PIXMAN_a1r5g5b5:
+    case PIXMAN_x1r5g5b5:
+    case PIXMAN_a1b5g5r5:
+    case PIXMAN_x1b5g5r5:
+    case PIXMAN_a4r4g4b4:
+    case PIXMAN_x4r4g4b4:
+    case PIXMAN_a4b4g4r4:
+    case PIXMAN_x4b4g4r4:
+    /* 8bpp formats */
+    case PIXMAN_a8:
+    case PIXMAN_r3g3b2:
+    case PIXMAN_b2g3r3:
+    case PIXMAN_a2r2g2b2:
+    case PIXMAN_a2b2g2r2:
+    case PIXMAN_c8:
+    case PIXMAN_g8:
+    case PIXMAN_x4a4:
+    /* Collides with PIXMAN_c8
+    case PIXMAN_x4c4:
+    */
+    /* Collides with PIXMAN_g8
+    case PIXMAN_x4g4:
+    */
+    /* 4bpp formats */
+    case PIXMAN_a4:
+    case PIXMAN_r1g2b1:
+    case PIXMAN_b1g2r1:
+    case PIXMAN_a1r1g1b1:
+    case PIXMAN_a1b1g1r1:
+    case PIXMAN_c4:
+    case PIXMAN_g4:
+    /* 1bpp formats */
+    case PIXMAN_a1:
+    case PIXMAN_g1:
+    /* YUV formats */
+    case PIXMAN_yuy2:
+    case PIXMAN_yv12:
+	return TRUE;
+
+    default:
+	return FALSE;
+    }
+}
new file mode 100644
--- /dev/null
+++ b/gfx/cairo/libpixman/src/pixman-version.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright © 2008 Red Hat, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Author: Carl D. Worth <cworth@cworth.org>
+ */
+
+#ifndef PIXMAN_VERSION_H__
+#define PIXMAN_VERSION_H__
+
+#ifndef PIXMAN_H__
+#  error pixman-version.h should only be included by pixman.h
+#endif
+
+#define PIXMAN_VERSION_MAJOR 0
+#define PIXMAN_VERSION_MINOR 10
+#define PIXMAN_VERSION_MICRO 0
+
+#define PIXMAN_VERSION_STRING "0.10.0"
+
+#define PIXMAN_VERSION_ENCODE(major, minor, micro) (	\
+	  ((major) * 10000)				\
+	+ ((minor) *   100)				\
+	+ ((micro) *     1))
+
+#define PIXMAN_VERSION PIXMAN_VERSION_ENCODE(	\
+	PIXMAN_VERSION_MAJOR,			\
+	PIXMAN_VERSION_MINOR,			\
+	PIXMAN_VERSION_MICRO)
+
+#endif /* PIXMAN_VERSION_H__ */
--- a/gfx/cairo/libpixman/src/pixman.h
+++ b/gfx/cairo/libpixman/src/pixman.h
@@ -64,16 +64,18 @@ SOFTWARE.
  * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
  * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
  * PERFORMANCE OF THIS SOFTWARE.
  */
 
 #ifndef PIXMAN_H__
 #define PIXMAN_H__
 
+#include <pixman-version.h>
+
 /*
  * Standard integers
  */
 #if defined (__SVR4) && defined (__sun)
 #  include <sys/int_types.h>
 #  include <stdint.h>
 #elif defined (__OpenBSD__)
 #  include <inttypes.h>
@@ -267,16 +269,22 @@ struct pixman_region16
 
 typedef enum
 {
     PIXMAN_REGION_OUT,
     PIXMAN_REGION_IN,
     PIXMAN_REGION_PART
 } pixman_region_overlap_t;
 
+PIXMAN_EXPORT
+int			pixman_version (void);
+
+PIXMAN_EXPORT
+const char*		pixman_version_string (void);
+
 /* This function exists only to make it possible to preserve the X ABI - it should
  * go away at first opportunity.
  */
 PIXMAN_EXPORT
 void		        pixman_region_set_static_pointers (pixman_box16_t *empty_box,
 							   pixman_region16_data_t *empty_data,
 							   pixman_region16_data_t *broken_data);
 
@@ -490,16 +498,23 @@ typedef enum {
     
     PIXMAN_g1 =		PIXMAN_FORMAT(1,PIXMAN_TYPE_GRAY,0,0,0,0),
 
 /* YUV formats */
     PIXMAN_yuy2 =	PIXMAN_FORMAT(16,PIXMAN_TYPE_YUY2,0,0,0,0),
     PIXMAN_yv12 =	PIXMAN_FORMAT(12,PIXMAN_TYPE_YV12,0,0,0,0),
 } pixman_format_code_t;
 
+/* Querying supported format values. */
+PIXMAN_EXPORT
+pixman_bool_t	pixman_format_supported_destination (pixman_format_code_t format);
+
+PIXMAN_EXPORT
+pixman_bool_t	pixman_format_supported_source (pixman_format_code_t format);
+
 /* Constructors */
 PIXMAN_EXPORT
 pixman_image_t *pixman_image_create_solid_fill       (pixman_color_t               *color);
 PIXMAN_EXPORT
 pixman_image_t *pixman_image_create_linear_gradient  (pixman_point_fixed_t         *p1,
 						      pixman_point_fixed_t         *p2,
 						      const pixman_gradient_stop_t *stops,
 						      int                           n_stops);
@@ -543,20 +558,16 @@ PIXMAN_EXPORT
 void            pixman_image_set_repeat              (pixman_image_t               *image,
 						      pixman_repeat_t               repeat);
 PIXMAN_EXPORT
 pixman_bool_t   pixman_image_set_filter              (pixman_image_t               *image,
 						      pixman_filter_t               filter,
 						      const pixman_fixed_t         *filter_params,
 						      int                           n_filter_params);
 PIXMAN_EXPORT
-void            pixman_image_set_filter_params       (pixman_image_t               *image,
-						      pixman_fixed_t               *params,
-						      int                           n_params);
-PIXMAN_EXPORT
 void		pixman_image_set_source_clipping     (pixman_image_t		   *image,
 						      pixman_bool_t                 source_clipping);
 PIXMAN_EXPORT
 void            pixman_image_set_alpha_map           (pixman_image_t               *image,
 						      pixman_image_t               *alpha_map,
 						      int16_t                       x,
 						      int16_t                       y);
 PIXMAN_EXPORT