author | Jeff Muizelaar <jmuizelaar@mozilla.com> |
Fri, 20 Apr 2012 00:07:56 -0400 | |
changeset 92023 | dcbe20a6f53a2c73d6e33876cb33a2a8ca69f407 |
parent 92022 | 7b59d08bd69a3c2396680ce6068dfbffa1a07c3b |
child 92024 | 9c88cc584f049f5ab68ba4e1dd0f298cabec6fa3 |
push id | 22497 |
push user | eakhgari@mozilla.com |
push date | Fri, 20 Apr 2012 17:10:37 +0000 |
treeherder | mozilla-central@17af008937e3 [default view] [failures only] |
perfherder | [talos] [build metrics] [platform microbench] (compared to previous push) |
reviewers | joe |
bugs | 746695, 12931 |
milestone | 14.0a1 |
first release with | nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
|
last release without | nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
|
--- a/gfx/cairo/README +++ b/gfx/cairo/README @@ -3,17 +3,17 @@ Snapshots of cairo and glitz for mozilla We only include the relevant parts of each release (generally, src/*.[ch]), as we have Makefile.in's that integrate into the Mozilla build system. For documentation and similar, please see the official tarballs at http://www.cairographics.org/. VERSIONS: cairo (12d521df8acc483b2daa844d4f05dc2fe2765ba6) - pixman (2f4f2fb4859931bf6dc5632d8c919e7296736427) + pixman (0.24.2) ==== Patches ==== Some specific things: max-font-size.patch: Clamp freetype font size to 1000 to avoid overflow issues win32-logical-font-scale.patch: set CAIRO_WIN32_LOGICAL_FONT_SCALE to 1 @@ -183,13 +183,13 @@ fix-build-with-Werror=return-type.patch: pixman-android-cpu-detect.patch: Add CPU detection support for Android, where we can't reliably access /proc/self/auxv. pixman-rename-and-endian.patch: include cairo-platform.h for renaming of external symbols and endian macros NOTE: we previously supported ARM assembler on MSVC, this has been removed because of the maintenance burden pixman-export.patch: use cairo_public for PIXMAN_EXPORT to make sure pixman symbols are not exported in libxul -bad-memset.patch: Fix third argument in memcmp call in pixman-image.c +pixman-limits.patch: include limits.h for SIZE_MAX ==== disable printing patch ==== disable-printing.patch: allows us to use NS_PRINTING to disable printing.
deleted file mode 100644 --- a/gfx/cairo/bad-memset.patch +++ /dev/null @@ -1,28 +0,0 @@ -# HG changeset patch -# Parent a968f1430c74282c673c01a2c0a4417e238f2de7 -# User Thomas Prip Vestergaard <thomas@prip.nu> -Bug 710992 - Fix third argument in memcmp call in pixman-image.c - - -diff --git a/gfx/cairo/libpixman/src/pixman-image.c b/gfx/cairo/libpixman/src/pixman-image.c ---- a/gfx/cairo/libpixman/src/pixman-image.c -+++ b/gfx/cairo/libpixman/src/pixman-image.c -@@ -512,17 +512,17 @@ pixman_image_set_transform (pixman_image - free (common->transform); - common->transform = NULL; - result = TRUE; - - goto out; - } - - if (common->transform && -- memcmp (common->transform, transform, sizeof (pixman_transform_t) == 0)) -+ memcmp (common->transform, transform, sizeof (pixman_transform_t)) == 0) - { - return TRUE; - } - - if (common->transform == NULL) - common->transform = malloc (sizeof (pixman_transform_t)); - - if (common->transform == NULL)
--- a/gfx/cairo/libpixman/src/Makefile.in +++ b/gfx/cairo/libpixman/src/Makefile.in @@ -124,16 +124,17 @@ CSRCS = \ pixman-edge-accessors.c \ pixman-fast-path.c \ pixman-general.c \ pixman-gradient-walker.c \ pixman-image.c \ pixman-implementation.c \ pixman-linear-gradient.c \ pixman-matrix.c \ + pixman-noop.c \ pixman-radial-gradient.c \ pixman-region16.c \ pixman-region32.c \ pixman-solid-fill.c \ pixman-trap.c \ pixman-utils.c \ $(NULL)
--- a/gfx/cairo/libpixman/src/pixman-access.c +++ b/gfx/cairo/libpixman/src/pixman-access.c @@ -40,24 +40,129 @@ (((s) >> 8) & 0xff) * 301 + \ (((s) ) & 0xff) * 58) >> 2) #define CONVERT_RGB24_TO_RGB15(s) \ ((((s) >> 3) & 0x001f) | \ (((s) >> 6) & 0x03e0) | \ (((s) >> 9) & 0x7c00)) -#define RGB15_TO_ENTRY(mif,rgb15) \ - ((mif)->ent[rgb15]) +/* Fetch macros */ + +#ifdef WORDS_BIGENDIAN +#define FETCH_1(img,l,o) \ + (((READ ((img), ((uint32_t *)(l)) + ((o) >> 5))) >> (0x1f - ((o) & 0x1f))) & 0x1) +#else +#define FETCH_1(img,l,o) \ + ((((READ ((img), ((uint32_t *)(l)) + ((o) >> 5))) >> ((o) & 0x1f))) & 0x1) +#endif + +#define FETCH_8(img,l,o) (READ (img, (((uint8_t *)(l)) + ((o) >> 3)))) + +#ifdef WORDS_BIGENDIAN +#define FETCH_4(img,l,o) \ + (((4 * (o)) & 4) ? (FETCH_8 (img,l, 4 * (o)) & 0xf) : (FETCH_8 (img,l,(4 * (o))) >> 4)) +#else +#define FETCH_4(img,l,o) \ + (((4 * (o)) & 4) ? (FETCH_8 (img, l, 4 * (o)) >> 4) : (FETCH_8 (img, l, (4 * (o))) & 0xf)) +#endif + +#ifdef WORDS_BIGENDIAN +#define FETCH_24(img,l,o) \ + ((READ (img, (((uint8_t *)(l)) + ((o) * 3) + 0)) << 16) | \ + (READ (img, (((uint8_t *)(l)) + ((o) * 3) + 1)) << 8) | \ + (READ (img, (((uint8_t *)(l)) + ((o) * 3) + 2)) << 0)) +#else +#define FETCH_24(img,l,o) \ + ((READ (img, (((uint8_t *)(l)) + ((o) * 3) + 0)) << 0) | \ + (READ (img, (((uint8_t *)(l)) + ((o) * 3) + 1)) << 8) | \ + (READ (img, (((uint8_t *)(l)) + ((o) * 3) + 2)) << 16)) +#endif + +/* Store macros */ -#define RGB24_TO_ENTRY(mif,rgb24) \ - RGB15_TO_ENTRY (mif,CONVERT_RGB24_TO_RGB15 (rgb24)) +#ifdef WORDS_BIGENDIAN +#define STORE_1(img,l,o,v) \ + do \ + { \ + uint32_t *__d = ((uint32_t *)(l)) + ((o) >> 5); \ + uint32_t __m, __v; \ + \ + __m = 1 << (0x1f - ((o) & 0x1f)); \ + __v = (v)? __m : 0; \ + \ + WRITE((img), __d, (READ((img), __d) & ~__m) | __v); \ + } \ + while (0) +#else +#define STORE_1(img,l,o,v) \ + do \ + { \ + uint32_t *__d = ((uint32_t *)(l)) + ((o) >> 5); \ + uint32_t __m, __v; \ + \ + __m = 1 << ((o) & 0x1f); \ + __v = (v)? __m : 0; \ + \ + WRITE((img), __d, (READ((img), __d) & ~__m) | __v); \ + } \ + while (0) +#endif + +#define STORE_8(img,l,o,v) (WRITE (img, (uint8_t *)(l) + ((o) >> 3), (v))) -#define RGB24_TO_ENTRY_Y(mif,rgb24) \ - ((mif)->ent[CONVERT_RGB24_TO_Y15 (rgb24)]) +#ifdef WORDS_BIGENDIAN +#define STORE_4(img,l,o,v) \ + do \ + { \ + int bo = 4 * (o); \ + int v4 = (v) & 0x0f; \ + \ + STORE_8 (img, l, bo, ( \ + bo & 4 ? \ + (FETCH_8 (img, l, bo) & 0xf0) | (v4) : \ + (FETCH_8 (img, l, bo) & 0x0f) | (v4 << 4))); \ + } while (0) +#else +#define STORE_4(img,l,o,v) \ + do \ + { \ + int bo = 4 * (o); \ + int v4 = (v) & 0x0f; \ + \ + STORE_8 (img, l, bo, ( \ + bo & 4 ? \ + (FETCH_8 (img, l, bo) & 0x0f) | (v4 << 4) : \ + (FETCH_8 (img, l, bo) & 0xf0) | (v4))); \ + } while (0) +#endif + +#ifdef WORDS_BIGENDIAN +#define STORE_24(img,l,o,v) \ + do \ + { \ + uint8_t *__tmp = (l) + 3 * (o); \ + \ + WRITE ((img), __tmp++, ((v) & 0x00ff0000) >> 16); \ + WRITE ((img), __tmp++, ((v) & 0x0000ff00) >> 8); \ + WRITE ((img), __tmp++, ((v) & 0x000000ff) >> 0); \ + } \ + while (0) +#else +#define STORE_24(img,l,o,v) \ + do \ + { \ + uint8_t *__tmp = (l) + 3 * (o); \ + \ + WRITE ((img), __tmp++, ((v) & 0x000000ff) >> 0); \ + WRITE ((img), __tmp++, ((v) & 0x0000ff00) >> 8); \ + WRITE ((img), __tmp++, ((v) & 0x00ff0000) >> 16); \ + } \ + while (0) +#endif /* * YV12 setup and access macros */ #define YV12_SETUP(image) \ bits_image_t *__bits_image = (bits_image_t *)image; \ uint32_t *bits = __bits_image->bits; \ @@ -81,204 +186,330 @@ #define YV12_U(line) \ ((uint8_t *) ((bits) + offset1 + \ ((stride) >> 1) * ((line) >> 1))) #define YV12_V(line) \ ((uint8_t *) ((bits) + offset0 + \ ((stride) >> 1) * ((line) >> 1))) -/********************************** Fetch ************************************/ +/* Misc. helpers */ -static void -fetch_scanline_a8r8g8b8 (pixman_image_t *image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t *mask) -{ - const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; - - MEMCPY_WRAPPED (image, - buffer, (const uint32_t *)bits + x, - width * sizeof(uint32_t)); -} - -static void -fetch_scanline_x8r8g8b8 (pixman_image_t *image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t *mask) +static force_inline void +get_shifts (pixman_format_code_t format, + int *a, + int *r, + int *g, + int *b) { - const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; - const uint32_t *pixel = (const uint32_t *)bits + x; - const uint32_t *end = pixel + width; - - while (pixel < end) - *buffer++ = READ (image, pixel++) | 0xff000000; -} + switch (PIXMAN_FORMAT_TYPE (format)) + { + case PIXMAN_TYPE_A: + *b = 0; + *g = 0; + *r = 0; + *a = 0; + break; + + case PIXMAN_TYPE_ARGB: + *b = 0; + *g = *b + PIXMAN_FORMAT_B (format); + *r = *g + PIXMAN_FORMAT_G (format); + *a = *r + PIXMAN_FORMAT_R (format); + break; -static void -fetch_scanline_a8b8g8r8 (pixman_image_t *image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t *mask) -{ - const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; - const uint32_t *pixel = (uint32_t *)bits + x; - const uint32_t *end = pixel + width; - - while (pixel < end) - { - uint32_t p = READ (image, pixel++); - - *buffer++ = (p & 0xff00ff00) | - ((p >> 16) & 0xff) | - ((p & 0xff) << 16); + case PIXMAN_TYPE_ABGR: + *r = 0; + *g = *r + PIXMAN_FORMAT_R (format); + *b = *g + PIXMAN_FORMAT_G (format); + *a = *b + PIXMAN_FORMAT_B (format); + break; + + case PIXMAN_TYPE_BGRA: + /* With BGRA formats we start counting at the high end of the pixel */ + *b = PIXMAN_FORMAT_BPP (format) - PIXMAN_FORMAT_B (format); + *g = *b - PIXMAN_FORMAT_B (format); + *r = *g - PIXMAN_FORMAT_G (format); + *a = *r - PIXMAN_FORMAT_R (format); + break; + + case PIXMAN_TYPE_RGBA: + /* With BGRA formats we start counting at the high end of the pixel */ + *r = PIXMAN_FORMAT_BPP (format) - PIXMAN_FORMAT_R (format); + *g = *r - PIXMAN_FORMAT_R (format); + *b = *g - PIXMAN_FORMAT_G (format); + *a = *b - PIXMAN_FORMAT_B (format); + break; + + default: + assert (0); + break; } } -static void -fetch_scanline_x8b8g8r8 (pixman_image_t *image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t *mask) +static force_inline uint32_t +convert_channel (uint32_t pixel, uint32_t def_value, + int n_from_bits, int from_shift, + int n_to_bits, int to_shift) +{ + uint32_t v; + + if (n_from_bits && n_to_bits) + v = unorm_to_unorm (pixel >> from_shift, n_from_bits, n_to_bits); + else if (n_to_bits) + v = def_value; + else + v = 0; + + return (v & ((1 << n_to_bits) - 1)) << to_shift; +} + +static force_inline uint32_t +convert_pixel (pixman_format_code_t from, pixman_format_code_t to, uint32_t pixel) { - const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; - const uint32_t *pixel = (uint32_t *)bits + x; - const uint32_t *end = pixel + width; - - while (pixel < end) + int a_from_shift, r_from_shift, g_from_shift, b_from_shift; + int a_to_shift, r_to_shift, g_to_shift, b_to_shift; + uint32_t a, r, g, b; + + get_shifts (from, &a_from_shift, &r_from_shift, &g_from_shift, &b_from_shift); + get_shifts (to, &a_to_shift, &r_to_shift, &g_to_shift, &b_to_shift); + + a = convert_channel (pixel, ~0, + PIXMAN_FORMAT_A (from), a_from_shift, + PIXMAN_FORMAT_A (to), a_to_shift); + + r = convert_channel (pixel, 0, + PIXMAN_FORMAT_R (from), r_from_shift, + PIXMAN_FORMAT_R (to), r_to_shift); + + g = convert_channel (pixel, 0, + PIXMAN_FORMAT_G (from), g_from_shift, + PIXMAN_FORMAT_G (to), g_to_shift); + + b = convert_channel (pixel, 0, + PIXMAN_FORMAT_B (from), b_from_shift, + PIXMAN_FORMAT_B (to), b_to_shift); + + return a | r | g | b; +} + +static force_inline uint32_t +convert_pixel_to_a8r8g8b8 (pixman_image_t *image, + pixman_format_code_t format, + uint32_t pixel) +{ + if (PIXMAN_FORMAT_TYPE (format) == PIXMAN_TYPE_GRAY || + PIXMAN_FORMAT_TYPE (format) == PIXMAN_TYPE_COLOR) { - uint32_t p = READ (image, pixel++); - - *buffer++ = 0xff000000 | - (p & 0x0000ff00) | - ((p >> 16) & 0xff) | - ((p & 0xff) << 16); + return image->bits.indexed->rgba[pixel]; + } + else + { + return convert_pixel (format, PIXMAN_a8r8g8b8, pixel); + } +} + +static force_inline uint32_t +convert_pixel_from_a8r8g8b8 (pixman_image_t *image, + pixman_format_code_t format, uint32_t pixel) +{ + if (PIXMAN_FORMAT_TYPE (format) == PIXMAN_TYPE_GRAY) + { + pixel = CONVERT_RGB24_TO_Y15 (pixel); + + return image->bits.indexed->ent[pixel & 0x7fff]; + } + else if (PIXMAN_FORMAT_TYPE (format) == PIXMAN_TYPE_COLOR) + { + pixel = convert_pixel (PIXMAN_a8r8g8b8, PIXMAN_x1r5g5b5, pixel); + + return image->bits.indexed->ent[pixel & 0x7fff]; + } + else + { + return convert_pixel (PIXMAN_a8r8g8b8, format, pixel); } } -static void -fetch_scanline_b8g8r8a8 (pixman_image_t *image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t *mask) +static force_inline uint32_t +fetch_and_convert_pixel (pixman_image_t * image, + const uint8_t * bits, + int offset, + pixman_format_code_t format) { - const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; - const uint32_t *pixel = (uint32_t *)bits + x; - const uint32_t *end = pixel + width; + uint32_t pixel; - while (pixel < end) + switch (PIXMAN_FORMAT_BPP (format)) { - uint32_t p = READ (image, pixel++); + case 1: + pixel = FETCH_1 (image, bits, offset); + break; + + case 4: + pixel = FETCH_4 (image, bits, offset); + break; + + case 8: + pixel = READ (image, bits + offset); + break; - *buffer++ = (((p & 0xff000000) >> 24) | - ((p & 0x00ff0000) >> 8) | - ((p & 0x0000ff00) << 8) | - ((p & 0x000000ff) << 24)); + case 16: + pixel = READ (image, ((uint16_t *)bits + offset)); + break; + + case 24: + pixel = FETCH_24 (image, bits, offset); + break; + + case 32: + pixel = READ (image, ((uint32_t *)bits + offset)); + break; + + default: + pixel = 0xffff00ff; /* As ugly as possible to detect the bug */ + break; } + + return convert_pixel_to_a8r8g8b8 (image, format, pixel); } -static void -fetch_scanline_b8g8r8x8 (pixman_image_t *image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t *mask) +static force_inline void +convert_and_store_pixel (bits_image_t * image, + uint8_t * dest, + int offset, + pixman_format_code_t format, + uint32_t pixel) { - const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; - const uint32_t *pixel = (uint32_t *)bits + x; - const uint32_t *end = pixel + width; - - while (pixel < end) + uint32_t converted = convert_pixel_from_a8r8g8b8 ( + (pixman_image_t *)image, format, pixel); + + switch (PIXMAN_FORMAT_BPP (format)) { - uint32_t p = READ (image, pixel++); - - *buffer++ = (0xff000000 | - ((p & 0xff000000) >> 24) | - ((p & 0x00ff0000) >> 8) | - ((p & 0x0000ff00) << 8)); + case 1: + STORE_1 (image, dest, offset, converted & 0x01); + break; + + case 4: + STORE_4 (image, dest, offset, converted & 0xf); + break; + + case 8: + WRITE (image, (dest + offset), converted & 0xff); + break; + + case 16: + WRITE (image, ((uint16_t *)dest + offset), converted & 0xffff); + break; + + case 24: + STORE_24 (image, dest, offset, converted); + break; + + case 32: + WRITE (image, ((uint32_t *)dest + offset), converted); + break; + + default: + *dest = 0x0; + break; } } -static void -fetch_scanline_r8g8b8a8 (pixman_image_t *image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t *mask) -{ - const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; - const uint32_t *pixel = (uint32_t *)bits + x; - const uint32_t *end = pixel + width; - - while (pixel < end) - { - uint32_t p = READ (image, pixel++); - - *buffer++ = (((p & 0x000000ff) << 24) | (p >> 8)); - } -} +#define MAKE_ACCESSORS(format) \ + static void \ + fetch_scanline_ ## format (pixman_image_t *image, \ + int x, \ + int y, \ + int width, \ + uint32_t * buffer, \ + const uint32_t *mask) \ + { \ + uint8_t *bits = \ + (uint8_t *)(image->bits.bits + y * image->bits.rowstride); \ + int i; \ + \ + for (i = 0; i < width; ++i) \ + { \ + *buffer++ = \ + fetch_and_convert_pixel (image, bits, x + i, PIXMAN_ ## format); \ + } \ + } \ + \ + static void \ + store_scanline_ ## format (bits_image_t * image, \ + int x, \ + int y, \ + int width, \ + const uint32_t *values) \ + { \ + uint8_t *dest = \ + (uint8_t *)(image->bits + y * image->rowstride); \ + int i; \ + \ + for (i = 0; i < width; ++i) \ + { \ + convert_and_store_pixel ( \ + image, dest, i + x, PIXMAN_ ## format, values[i]); \ + } \ + } \ + \ + static uint32_t \ + fetch_pixel_ ## format (bits_image_t *image, \ + int offset, \ + int line) \ + { \ + uint8_t *bits = \ + (uint8_t *)(image->bits + line * image->rowstride); \ + \ + return fetch_and_convert_pixel ((pixman_image_t *)image, \ + bits, offset, PIXMAN_ ## format); \ + } \ + \ + static const void *const __dummy__ ## format -static void -fetch_scanline_r8g8b8x8 (pixman_image_t *image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t *mask) -{ - const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; - const uint32_t *pixel = (uint32_t *)bits + x; - const uint32_t *end = pixel + width; - - while (pixel < end) - { - uint32_t p = READ (image, pixel++); - - *buffer++ = (0xff000000 | (p >> 8)); - } -} +MAKE_ACCESSORS(a8r8g8b8); +MAKE_ACCESSORS(x8r8g8b8); +MAKE_ACCESSORS(a8b8g8r8); +MAKE_ACCESSORS(x8b8g8r8); +MAKE_ACCESSORS(x14r6g6b6); +MAKE_ACCESSORS(b8g8r8a8); +MAKE_ACCESSORS(b8g8r8x8); +MAKE_ACCESSORS(r8g8b8x8); +MAKE_ACCESSORS(r8g8b8a8); +MAKE_ACCESSORS(r8g8b8); +MAKE_ACCESSORS(b8g8r8); +MAKE_ACCESSORS(r5g6b5); +MAKE_ACCESSORS(b5g6r5); +MAKE_ACCESSORS(a1r5g5b5); +MAKE_ACCESSORS(x1r5g5b5); +MAKE_ACCESSORS(a1b5g5r5); +MAKE_ACCESSORS(x1b5g5r5); +MAKE_ACCESSORS(a4r4g4b4); +MAKE_ACCESSORS(x4r4g4b4); +MAKE_ACCESSORS(a4b4g4r4); +MAKE_ACCESSORS(x4b4g4r4); +MAKE_ACCESSORS(a8); +MAKE_ACCESSORS(c8); +MAKE_ACCESSORS(g8); +MAKE_ACCESSORS(r3g3b2); +MAKE_ACCESSORS(b2g3r3); +MAKE_ACCESSORS(a2r2g2b2); +MAKE_ACCESSORS(a2b2g2r2); +MAKE_ACCESSORS(x4a4); +MAKE_ACCESSORS(a4); +MAKE_ACCESSORS(g4); +MAKE_ACCESSORS(c4); +MAKE_ACCESSORS(r1g2b1); +MAKE_ACCESSORS(b1g2r1); +MAKE_ACCESSORS(a1r1g1b1); +MAKE_ACCESSORS(a1b1g1r1); +MAKE_ACCESSORS(a1); +MAKE_ACCESSORS(g1); -static void -fetch_scanline_x14r6g6b6 (pixman_image_t *image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t *mask) -{ - const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; - const uint32_t *pixel = (const uint32_t *)bits + x; - const uint32_t *end = pixel + width; - - while (pixel < end) - { - uint32_t p = READ (image, pixel++); - uint32_t r, g, b; - - r = ((p & 0x3f000) << 6) | ((p & 0x30000)); - g = ((p & 0x00fc0) << 4) | ((p & 0x00c00) >> 2); - b = ((p & 0x0003f) << 2) | ((p & 0x00030) >> 4); - - *buffer++ = 0xff000000 | r | g | b; - } -} +/********************************** Fetch ************************************/ /* Expects a uint64_t buffer */ static void fetch_scanline_a2r10g10b10 (pixman_image_t *image, int x, int y, int width, uint32_t * b, @@ -399,708 +630,16 @@ fetch_scanline_x2b10g10r10 (pixman_image g = g << 6 | g >> 4; b = b << 6 | b >> 4; *buffer++ = 0xffffULL << 48 | r << 32 | g << 16 | b; } } static void -fetch_scanline_r8g8b8 (pixman_image_t *image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t *mask) -{ - const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; - const uint8_t *pixel = (const uint8_t *)bits + 3 * x; - const uint8_t *end = pixel + 3 * width; - - while (pixel < end) - { - uint32_t b = 0xff000000; - -#ifdef WORDS_BIGENDIAN - b |= (READ (image, pixel++) << 16); - b |= (READ (image, pixel++) << 8); - b |= (READ (image, pixel++)); -#else - b |= (READ (image, pixel++)); - b |= (READ (image, pixel++) << 8); - b |= (READ (image, pixel++) << 16); -#endif - - *buffer++ = b; - } -} - -static void -fetch_scanline_b8g8r8 (pixman_image_t *image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t *mask) -{ - const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; - const uint8_t *pixel = (const uint8_t *)bits + 3 * x; - const uint8_t *end = pixel + 3 * width; - - while (pixel < end) - { - uint32_t b = 0xff000000; -#ifdef WORDS_BIGENDIAN - b |= (READ (image, pixel++)); - b |= (READ (image, pixel++) << 8); - b |= (READ (image, pixel++) << 16); -#else - b |= (READ (image, pixel++) << 16); - b |= (READ (image, pixel++) << 8); - b |= (READ (image, pixel++)); -#endif - *buffer++ = b; - } -} - -static void -fetch_scanline_r5g6b5 (pixman_image_t *image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t *mask) -{ - const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; - const uint16_t *pixel = (const uint16_t *)bits + x; - const uint16_t *end = pixel + width; - - while (pixel < end) - { - uint32_t p = READ (image, pixel++); - uint32_t r = (((p) << 3) & 0xf8) | - (((p) << 5) & 0xfc00) | - (((p) << 8) & 0xf80000); - - r |= (r >> 5) & 0x70007; - r |= (r >> 6) & 0x300; - - *buffer++ = 0xff000000 | r; - } -} - -static void -fetch_scanline_b5g6r5 (pixman_image_t *image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t *mask) -{ - const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; - const uint16_t *pixel = (const uint16_t *)bits + x; - const uint16_t *end = pixel + width; - - while (pixel < end) - { - uint32_t p = READ (image, pixel++); - uint32_t r, g, b; - - b = ((p & 0xf800) | ((p & 0xe000) >> 5)) >> 8; - g = ((p & 0x07e0) | ((p & 0x0600) >> 6)) << 5; - r = ((p & 0x001c) | ((p & 0x001f) << 5)) << 14; - - *buffer++ = 0xff000000 | r | g | b; - } -} - -static void -fetch_scanline_a1r5g5b5 (pixman_image_t *image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t *mask) -{ - const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; - const uint16_t *pixel = (const uint16_t *)bits + x; - const uint16_t *end = pixel + width; - - while (pixel < end) - { - uint32_t p = READ (image, pixel++); - uint32_t r, g, b, a; - - a = (uint32_t) ((uint8_t) (0 - ((p & 0x8000) >> 15))) << 24; - r = ((p & 0x7c00) | ((p & 0x7000) >> 5)) << 9; - g = ((p & 0x03e0) | ((p & 0x0380) >> 5)) << 6; - b = ((p & 0x001c) | ((p & 0x001f) << 5)) >> 2; - - *buffer++ = a | r | g | b; - } -} - -static void -fetch_scanline_x1r5g5b5 (pixman_image_t *image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t *mask) -{ - const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; - const uint16_t *pixel = (const uint16_t *)bits + x; - const uint16_t *end = pixel + width; - - while (pixel < end) - { - uint32_t p = READ (image, pixel++); - uint32_t r, g, b; - - r = ((p & 0x7c00) | ((p & 0x7000) >> 5)) << 9; - g = ((p & 0x03e0) | ((p & 0x0380) >> 5)) << 6; - b = ((p & 0x001c) | ((p & 0x001f) << 5)) >> 2; - - *buffer++ = 0xff000000 | r | g | b; - } -} - -static void -fetch_scanline_a1b5g5r5 (pixman_image_t *image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t *mask) -{ - const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; - const uint16_t *pixel = (const uint16_t *)bits + x; - const uint16_t *end = pixel + width; - uint32_t r, g, b, a; - - while (pixel < end) - { - uint32_t p = READ (image, pixel++); - - a = (uint32_t) ((uint8_t) (0 - ((p & 0x8000) >> 15))) << 24; - b = ((p & 0x7c00) | ((p & 0x7000) >> 5)) >> 7; - g = ((p & 0x03e0) | ((p & 0x0380) >> 5)) << 6; - r = ((p & 0x001c) | ((p & 0x001f) << 5)) << 14; - - *buffer++ = a | r | g | b; - } -} - -static void -fetch_scanline_x1b5g5r5 (pixman_image_t *image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t *mask) -{ - const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; - const uint16_t *pixel = (const uint16_t *)bits + x; - const uint16_t *end = pixel + width; - - while (pixel < end) - { - uint32_t p = READ (image, pixel++); - uint32_t r, g, b; - - b = ((p & 0x7c00) | ((p & 0x7000) >> 5)) >> 7; - g = ((p & 0x03e0) | ((p & 0x0380) >> 5)) << 6; - r = ((p & 0x001c) | ((p & 0x001f) << 5)) << 14; - - *buffer++ = 0xff000000 | r | g | b; - } -} - -static void -fetch_scanline_a4r4g4b4 (pixman_image_t *image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t *mask) -{ - const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; - const uint16_t *pixel = (const uint16_t *)bits + x; - const uint16_t *end = pixel + width; - - while (pixel < end) - { - uint32_t p = READ (image, pixel++); - uint32_t r, g, b, a; - - a = ((p & 0xf000) | ((p & 0xf000) >> 4)) << 16; - r = ((p & 0x0f00) | ((p & 0x0f00) >> 4)) << 12; - g = ((p & 0x00f0) | ((p & 0x00f0) >> 4)) << 8; - b = ((p & 0x000f) | ((p & 0x000f) << 4)); - - *buffer++ = a | r | g | b; - } -} - -static void -fetch_scanline_x4r4g4b4 (pixman_image_t *image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t *mask) -{ - const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; - const uint16_t *pixel = (const uint16_t *)bits + x; - const uint16_t *end = pixel + width; - - while (pixel < end) - { - uint32_t p = READ (image, pixel++); - uint32_t r, g, b; - - r = ((p & 0x0f00) | ((p & 0x0f00) >> 4)) << 12; - g = ((p & 0x00f0) | ((p & 0x00f0) >> 4)) << 8; - b = ((p & 0x000f) | ((p & 0x000f) << 4)); - - *buffer++ = 0xff000000 | r | g | b; - } -} - -static void -fetch_scanline_a4b4g4r4 (pixman_image_t *image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t *mask) -{ - const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; - const uint16_t *pixel = (const uint16_t *)bits + x; - const uint16_t *end = pixel + width; - - while (pixel < end) - { - uint32_t p = READ (image, pixel++); - uint32_t r, g, b, a; - - a = ((p & 0xf000) | ((p & 0xf000) >> 4)) << 16; - b = ((p & 0x0f00) | ((p & 0x0f00) >> 4)) >> 4; - g = ((p & 0x00f0) | ((p & 0x00f0) >> 4)) << 8; - r = ((p & 0x000f) | ((p & 0x000f) << 4)) << 16; - - *buffer++ = a | r | g | b; - } -} - -static void -fetch_scanline_x4b4g4r4 (pixman_image_t *image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t *mask) -{ - const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; - const uint16_t *pixel = (const uint16_t *)bits + x; - const uint16_t *end = pixel + width; - - while (pixel < end) - { - uint32_t p = READ (image, pixel++); - uint32_t r, g, b; - - b = ((p & 0x0f00) | ((p & 0x0f00) >> 4)) >> 4; - g = ((p & 0x00f0) | ((p & 0x00f0) >> 4)) << 8; - r = ((p & 0x000f) | ((p & 0x000f) << 4)) << 16; - - *buffer++ = 0xff000000 | r | g | b; - } -} - -static void -fetch_scanline_a8 (pixman_image_t *image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t *mask) -{ - const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; - const uint8_t *pixel = (const uint8_t *)bits + x; - const uint8_t *end = pixel + width; - - while (pixel < end) - *buffer++ = READ (image, pixel++) << 24; -} - -static void -fetch_scanline_r3g3b2 (pixman_image_t *image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t *mask) -{ - const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; - const uint8_t *pixel = (const uint8_t *)bits + x; - const uint8_t *end = pixel + width; - - while (pixel < end) - { - uint32_t p = READ (image, pixel++); - uint32_t r, g, b; - - r = ((p & 0xe0) | ((p & 0xe0) >> 3) | ((p & 0xc0) >> 6)) << 16; - g = ((p & 0x1c) | ((p & 0x18) >> 3) | ((p & 0x1c) << 3)) << 8; - b = (((p & 0x03) ) | - ((p & 0x03) << 2) | - ((p & 0x03) << 4) | - ((p & 0x03) << 6)); - - *buffer++ = 0xff000000 | r | g | b; - } -} - -static void -fetch_scanline_b2g3r3 (pixman_image_t *image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t *mask) -{ - const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; - const uint8_t *pixel = (const uint8_t *)bits + x; - const uint8_t *end = pixel + width; - - while (pixel < end) - { - uint32_t p = READ (image, pixel++); - uint32_t r, g, b; - - b = p & 0xc0; - b |= b >> 2; - b |= b >> 4; - b &= 0xff; - - g = (p & 0x38) << 10; - g |= g >> 3; - g |= g >> 6; - g &= 0xff00; - - r = (p & 0x7) << 21; - r |= r >> 3; - r |= r >> 6; - r &= 0xff0000; - - *buffer++ = 0xff000000 | r | g | b; - } -} - -static void -fetch_scanline_a2r2g2b2 (pixman_image_t *image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t *mask) -{ - const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; - const uint8_t *pixel = (const uint8_t *)bits + x; - const uint8_t *end = pixel + width; - - while (pixel < end) - { - uint32_t p = READ (image, pixel++); - uint32_t a, r, g, b; - - a = ((p & 0xc0) * 0x55) << 18; - r = ((p & 0x30) * 0x55) << 12; - g = ((p & 0x0c) * 0x55) << 6; - b = ((p & 0x03) * 0x55); - - *buffer++ = a | r | g | b; - } -} - -static void -fetch_scanline_a2b2g2r2 (pixman_image_t *image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t *mask) -{ - const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; - const uint8_t *pixel = (const uint8_t *)bits + x; - const uint8_t *end = pixel + width; - - while (pixel < end) - { - uint32_t p = READ (image, pixel++); - uint32_t a, r, g, b; - - a = ((p & 0xc0) * 0x55) << 18; - b = ((p & 0x30) * 0x55) >> 4; - g = ((p & 0x0c) * 0x55) << 6; - r = ((p & 0x03) * 0x55) << 16; - - *buffer++ = a | r | g | b; - } -} - -static void -fetch_scanline_c8 (pixman_image_t *image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t *mask) -{ - const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; - const pixman_indexed_t * indexed = image->bits.indexed; - const uint8_t *pixel = (const uint8_t *)bits + x; - const uint8_t *end = pixel + width; - - while (pixel < end) - { - uint32_t p = READ (image, pixel++); - - *buffer++ = indexed->rgba[p]; - } -} - -static void -fetch_scanline_x4a4 (pixman_image_t *image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t *mask) -{ - const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; - const uint8_t *pixel = (const uint8_t *)bits + x; - const uint8_t *end = pixel + width; - - while (pixel < end) - { - uint8_t p = READ (image, pixel++) & 0xf; - - *buffer++ = (p | (p << 4)) << 24; - } -} - -#define FETCH_8(img,l,o) (READ (img, (((uint8_t *)(l)) + ((o) >> 3)))) -#ifdef WORDS_BIGENDIAN -#define FETCH_4(img,l,o) \ - (((4 * (o)) & 4) ? (FETCH_8 (img,l, 4 * (o)) & 0xf) : (FETCH_8 (img,l,(4 * (o))) >> 4)) -#else -#define FETCH_4(img,l,o) \ - (((4 * (o)) & 4) ? (FETCH_8 (img, l, 4 * (o)) >> 4) : (FETCH_8 (img, l, (4 * (o))) & 0xf)) -#endif - -static void -fetch_scanline_a4 (pixman_image_t *image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t *mask) -{ - const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; - int i; - - for (i = 0; i < width; ++i) - { - uint32_t p = FETCH_4 (image, bits, i + x); - - p |= p << 4; - - *buffer++ = p << 24; - } -} - -static void -fetch_scanline_r1g2b1 (pixman_image_t *image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t *mask) -{ - const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; - int i; - - for (i = 0; i < width; ++i) - { - uint32_t p = FETCH_4 (image, bits, i + x); - uint32_t r, g, b; - - r = ((p & 0x8) * 0xff) << 13; - g = ((p & 0x6) * 0x55) << 7; - b = ((p & 0x1) * 0xff); - - *buffer++ = 0xff000000 | r | g | b; - } -} - -static void -fetch_scanline_b1g2r1 (pixman_image_t *image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t *mask) -{ - const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; - int i; - - for (i = 0; i < width; ++i) - { - uint32_t p = FETCH_4 (image, bits, i + x); - uint32_t r, g, b; - - b = ((p & 0x8) * 0xff) >> 3; - g = ((p & 0x6) * 0x55) << 7; - r = ((p & 0x1) * 0xff) << 16; - - *buffer++ = 0xff000000 | r | g | b; - } -} - -static void -fetch_scanline_a1r1g1b1 (pixman_image_t *image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t *mask) -{ - uint32_t a, r, g, b; - const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; - int i; - - for (i = 0; i < width; ++i) - { - uint32_t p = FETCH_4 (image, bits, i + x); - - a = ((p & 0x8) * 0xff) << 21; - r = ((p & 0x4) * 0xff) << 14; - g = ((p & 0x2) * 0xff) << 7; - b = ((p & 0x1) * 0xff); - - *buffer++ = a | r | g | b; - } -} - -static void -fetch_scanline_a1b1g1r1 (pixman_image_t *image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t *mask) -{ - const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; - int i; - - for (i = 0; i < width; ++i) - { - uint32_t p = FETCH_4 (image, bits, i + x); - uint32_t a, r, g, b; - - a = ((p & 0x8) * 0xff) << 21; - b = ((p & 0x4) * 0xff) >> 2; - g = ((p & 0x2) * 0xff) << 7; - r = ((p & 0x1) * 0xff) << 16; - - *buffer++ = a | r | g | b; - } -} - -static void -fetch_scanline_c4 (pixman_image_t *image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t *mask) -{ - const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; - const pixman_indexed_t * indexed = image->bits.indexed; - int i; - - for (i = 0; i < width; ++i) - { - uint32_t p = FETCH_4 (image, bits, i + x); - - *buffer++ = indexed->rgba[p]; - } -} - -static void -fetch_scanline_a1 (pixman_image_t *image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t *mask) -{ - const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; - int i; - - for (i = 0; i < width; ++i) - { - uint32_t p = READ (image, bits + ((i + x) >> 5)); - uint32_t a; - -#ifdef WORDS_BIGENDIAN - a = p >> (0x1f - ((i + x) & 0x1f)); -#else - a = p >> ((i + x) & 0x1f); -#endif - a = a & 1; - a |= a << 1; - a |= a << 2; - a |= a << 4; - - *buffer++ = a << 24; - } -} - -static void -fetch_scanline_g1 (pixman_image_t *image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t *mask) -{ - const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; - const pixman_indexed_t * indexed = image->bits.indexed; - int i; - - for (i = 0; i < width; ++i) - { - uint32_t p = READ (image, bits + ((i + x) >> 5)); - uint32_t a; - -#ifdef WORDS_BIGENDIAN - a = p >> (0x1f - ((i + x) & 0x1f)); -#else - a = p >> ((i + x) & 0x1f); -#endif - a = a & 1; - - *buffer++ = indexed->rgba[a]; - } -} - -static void fetch_scanline_yuy2 (pixman_image_t *image, int x, int line, int width, uint32_t * buffer, const uint32_t *mask) { const uint32_t *bits = image->bits.bits + image->bits.rowstride * line; @@ -1252,586 +791,16 @@ fetch_pixel_x2b10g10r10 (bits_image_t *i r = r << 6 | r >> 4; g = g << 6 | g >> 4; b = b << 6 | b >> 4; return 0xffffULL << 48 | r << 32 | g << 16 | b; } static uint32_t -fetch_pixel_a8r8g8b8 (bits_image_t *image, - int offset, - int line) -{ - uint32_t *bits = image->bits + line * image->rowstride; - return READ (image, (uint32_t *)bits + offset); -} - -static uint32_t -fetch_pixel_x8r8g8b8 (bits_image_t *image, - int offset, - int line) -{ - uint32_t *bits = image->bits + line * image->rowstride; - - return READ (image, (uint32_t *)bits + offset) | 0xff000000; -} - -static uint32_t -fetch_pixel_a8b8g8r8 (bits_image_t *image, - int offset, - int line) -{ - uint32_t *bits = image->bits + line * image->rowstride; - uint32_t pixel = READ (image, (uint32_t *)bits + offset); - - return ((pixel & 0xff000000) | - ((pixel >> 16) & 0xff) | - (pixel & 0x0000ff00) | - ((pixel & 0xff) << 16)); -} - -static uint32_t -fetch_pixel_x8b8g8r8 (bits_image_t *image, - int offset, - int line) -{ - uint32_t *bits = image->bits + line * image->rowstride; - uint32_t pixel = READ (image, (uint32_t *)bits + offset); - - return ((0xff000000) | - ((pixel >> 16) & 0xff) | - (pixel & 0x0000ff00) | - ((pixel & 0xff) << 16)); -} - -static uint32_t -fetch_pixel_b8g8r8a8 (bits_image_t *image, - int offset, - int line) -{ - uint32_t *bits = image->bits + line * image->rowstride; - uint32_t pixel = READ (image, (uint32_t *)bits + offset); - - return ((pixel & 0xff000000) >> 24 | - (pixel & 0x00ff0000) >> 8 | - (pixel & 0x0000ff00) << 8 | - (pixel & 0x000000ff) << 24); -} - -static uint32_t -fetch_pixel_b8g8r8x8 (bits_image_t *image, - int offset, - int line) -{ - uint32_t *bits = image->bits + line * image->rowstride; - uint32_t pixel = READ (image, (uint32_t *)bits + offset); - - return ((0xff000000) | - (pixel & 0xff000000) >> 24 | - (pixel & 0x00ff0000) >> 8 | - (pixel & 0x0000ff00) << 8); -} - -static uint32_t -fetch_pixel_r8g8b8a8 (bits_image_t *image, - int offset, - int line) -{ - uint32_t *bits = image->bits + line * image->rowstride; - uint32_t pixel = READ (image, (uint32_t *)bits + offset); - - return (((pixel & 0x000000ff) << 24) | (pixel >> 8)); -} - -static uint32_t -fetch_pixel_r8g8b8x8 (bits_image_t *image, - int offset, - int line) -{ - uint32_t *bits = image->bits + line * image->rowstride; - uint32_t pixel = READ (image, (uint32_t *)bits + offset); - - return (0xff000000 | (pixel >> 8)); -} - -static uint32_t -fetch_pixel_x14r6g6b6 (bits_image_t *image, - int offset, - int line) -{ - uint32_t *bits = image->bits + line * image->rowstride; - uint32_t pixel = READ (image, (uint32_t *) bits + offset); - uint32_t r, g, b; - - r = ((pixel & 0x3f000) << 6) | ((pixel & 0x30000)); - g = ((pixel & 0x00fc0) << 4) | ((pixel & 0x00c00) >> 2); - b = ((pixel & 0x0003f) << 2) | ((pixel & 0x00030) >> 4); - - return 0xff000000 | r | g | b; -} - -static uint32_t -fetch_pixel_r8g8b8 (bits_image_t *image, - int offset, - int line) -{ - uint32_t *bits = image->bits + line * image->rowstride; - uint8_t *pixel = ((uint8_t *) bits) + (offset * 3); - -#ifdef WORDS_BIGENDIAN - return (0xff000000 | - (READ (image, pixel + 0) << 16) | - (READ (image, pixel + 1) << 8) | - (READ (image, pixel + 2))); -#else - return (0xff000000 | - (READ (image, pixel + 2) << 16) | - (READ (image, pixel + 1) << 8) | - (READ (image, pixel + 0))); -#endif -} - -static uint32_t -fetch_pixel_b8g8r8 (bits_image_t *image, - int offset, - int line) -{ - uint32_t *bits = image->bits + line * image->rowstride; - uint8_t *pixel = ((uint8_t *) bits) + (offset * 3); -#ifdef WORDS_BIGENDIAN - return (0xff000000 | - (READ (image, pixel + 2) << 16) | - (READ (image, pixel + 1) << 8) | - (READ (image, pixel + 0))); -#else - return (0xff000000 | - (READ (image, pixel + 0) << 16) | - (READ (image, pixel + 1) << 8) | - (READ (image, pixel + 2))); -#endif -} - -static uint32_t -fetch_pixel_r5g6b5 (bits_image_t *image, - int offset, - int line) -{ - uint32_t *bits = image->bits + line * image->rowstride; - uint32_t pixel = READ (image, (uint16_t *) bits + offset); - uint32_t r, g, b; - - r = ((pixel & 0xf800) | ((pixel & 0xe000) >> 5)) << 8; - g = ((pixel & 0x07e0) | ((pixel & 0x0600) >> 6)) << 5; - b = ((pixel & 0x001c) | ((pixel & 0x001f) << 5)) >> 2; - - return (0xff000000 | r | g | b); -} - -static uint32_t -fetch_pixel_b5g6r5 (bits_image_t *image, - int offset, - int line) -{ - uint32_t r, g, b; - uint32_t *bits = image->bits + line * image->rowstride; - uint32_t pixel = READ (image, (uint16_t *) bits + offset); - - b = ((pixel & 0xf800) | ((pixel & 0xe000) >> 5)) >> 8; - g = ((pixel & 0x07e0) | ((pixel & 0x0600) >> 6)) << 5; - r = ((pixel & 0x001c) | ((pixel & 0x001f) << 5)) << 14; - - return (0xff000000 | r | g | b); -} - -static uint32_t -fetch_pixel_a1r5g5b5 (bits_image_t *image, - int offset, - int line) -{ - uint32_t *bits = image->bits + line * image->rowstride; - uint32_t pixel = READ (image, (uint16_t *) bits + offset); - uint32_t a, r, g, b; - - a = (uint32_t) ((uint8_t) (0 - ((pixel & 0x8000) >> 15))) << 24; - r = ((pixel & 0x7c00) | ((pixel & 0x7000) >> 5)) << 9; - g = ((pixel & 0x03e0) | ((pixel & 0x0380) >> 5)) << 6; - b = ((pixel & 0x001c) | ((pixel & 0x001f) << 5)) >> 2; - - return (a | r | g | b); -} - -static uint32_t -fetch_pixel_x1r5g5b5 (bits_image_t *image, - int offset, - int line) -{ - uint32_t *bits = image->bits + line * image->rowstride; - uint32_t pixel = READ (image, (uint16_t *) bits + offset); - uint32_t r, g, b; - - r = ((pixel & 0x7c00) | ((pixel & 0x7000) >> 5)) << 9; - g = ((pixel & 0x03e0) | ((pixel & 0x0380) >> 5)) << 6; - b = ((pixel & 0x001c) | ((pixel & 0x001f) << 5)) >> 2; - - return (0xff000000 | r | g | b); -} - -static uint32_t -fetch_pixel_a1b5g5r5 (bits_image_t *image, - int offset, - int line) -{ - uint32_t *bits = image->bits + line * image->rowstride; - uint32_t pixel = READ (image, (uint16_t *) bits + offset); - uint32_t a, r, g, b; - - a = (uint32_t) ((uint8_t) (0 - ((pixel & 0x8000) >> 15))) << 24; - b = ((pixel & 0x7c00) | ((pixel & 0x7000) >> 5)) >> 7; - g = ((pixel & 0x03e0) | ((pixel & 0x0380) >> 5)) << 6; - r = ((pixel & 0x001c) | ((pixel & 0x001f) << 5)) << 14; - - return (a | r | g | b); -} - -static uint32_t -fetch_pixel_x1b5g5r5 (bits_image_t *image, - int offset, - int line) -{ - uint32_t *bits = image->bits + line * image->rowstride; - uint32_t pixel = READ (image, (uint16_t *) bits + offset); - uint32_t r, g, b; - - b = ((pixel & 0x7c00) | ((pixel & 0x7000) >> 5)) >> 7; - g = ((pixel & 0x03e0) | ((pixel & 0x0380) >> 5)) << 6; - r = ((pixel & 0x001c) | ((pixel & 0x001f) << 5)) << 14; - - return (0xff000000 | r | g | b); -} - -static uint32_t -fetch_pixel_a4r4g4b4 (bits_image_t *image, - int offset, - int line) -{ - uint32_t *bits = image->bits + line * image->rowstride; - uint32_t pixel = READ (image, (uint16_t *) bits + offset); - uint32_t a, r, g, b; - - a = ((pixel & 0xf000) | ((pixel & 0xf000) >> 4)) << 16; - r = ((pixel & 0x0f00) | ((pixel & 0x0f00) >> 4)) << 12; - g = ((pixel & 0x00f0) | ((pixel & 0x00f0) >> 4)) << 8; - b = ((pixel & 0x000f) | ((pixel & 0x000f) << 4)); - - return (a | r | g | b); -} - -static uint32_t -fetch_pixel_x4r4g4b4 (bits_image_t *image, - int offset, - int line) -{ - uint32_t *bits = image->bits + line * image->rowstride; - uint32_t pixel = READ (image, (uint16_t *) bits + offset); - uint32_t r, g, b; - - r = ((pixel & 0x0f00) | ((pixel & 0x0f00) >> 4)) << 12; - g = ((pixel & 0x00f0) | ((pixel & 0x00f0) >> 4)) << 8; - b = ((pixel & 0x000f) | ((pixel & 0x000f) << 4)); - - return (0xff000000 | r | g | b); -} - -static uint32_t -fetch_pixel_a4b4g4r4 (bits_image_t *image, - int offset, - int line) -{ - uint32_t *bits = image->bits + line * image->rowstride; - uint32_t pixel = READ (image, (uint16_t *) bits + offset); - uint32_t a, r, g, b; - - a = ((pixel & 0xf000) | ((pixel & 0xf000) >> 4)) << 16; - b = ((pixel & 0x0f00) | ((pixel & 0x0f00) >> 4)) >> 4; - g = ((pixel & 0x00f0) | ((pixel & 0x00f0) >> 4)) << 8; - r = ((pixel & 0x000f) | ((pixel & 0x000f) << 4)) << 16; - - return (a | r | g | b); -} - -static uint32_t -fetch_pixel_x4b4g4r4 (bits_image_t *image, - int offset, - int line) -{ - uint32_t *bits = image->bits + line * image->rowstride; - uint32_t pixel = READ (image, (uint16_t *) bits + offset); - uint32_t r, g, b; - - b = ((pixel & 0x0f00) | ((pixel & 0x0f00) >> 4)) >> 4; - g = ((pixel & 0x00f0) | ((pixel & 0x00f0) >> 4)) << 8; - r = ((pixel & 0x000f) | ((pixel & 0x000f) << 4)) << 16; - - return (0xff000000 | r | g | b); -} - -static uint32_t -fetch_pixel_a8 (bits_image_t *image, - int offset, - int line) -{ - uint32_t *bits = image->bits + line * image->rowstride; - uint32_t pixel = READ (image, (uint8_t *) bits + offset); - - return pixel << 24; -} - -static uint32_t -fetch_pixel_r3g3b2 (bits_image_t *image, - int offset, - int line) -{ - uint32_t *bits = image->bits + line * image->rowstride; - uint32_t pixel = READ (image, (uint8_t *) bits + offset); - uint32_t r, g, b; - - r = ((pixel & 0xe0) | - ((pixel & 0xe0) >> 3) | - ((pixel & 0xc0) >> 6)) << 16; - - g = ((pixel & 0x1c) | - ((pixel & 0x18) >> 3) | - ((pixel & 0x1c) << 3)) << 8; - - b = (((pixel & 0x03) ) | - ((pixel & 0x03) << 2) | - ((pixel & 0x03) << 4) | - ((pixel & 0x03) << 6)); - - return (0xff000000 | r | g | b); -} - -static uint32_t -fetch_pixel_b2g3r3 (bits_image_t *image, - int offset, - int line) -{ - uint32_t *bits = image->bits + line * image->rowstride; - uint32_t p = READ (image, (uint8_t *) bits + offset); - uint32_t r, g, b; - - b = p & 0xc0; - b |= b >> 2; - b |= b >> 4; - b &= 0xff; - - g = (p & 0x38) << 10; - g |= g >> 3; - g |= g >> 6; - g &= 0xff00; - - r = (p & 0x7) << 21; - r |= r >> 3; - r |= r >> 6; - r &= 0xff0000; - - return 0xff000000 | r | g | b; -} - -static uint32_t -fetch_pixel_a2r2g2b2 (bits_image_t *image, - int offset, - int line) -{ - uint32_t *bits = image->bits + line * image->rowstride; - uint32_t pixel = READ (image, (uint8_t *) bits + offset); - uint32_t a, r, g, b; - - a = ((pixel & 0xc0) * 0x55) << 18; - r = ((pixel & 0x30) * 0x55) << 12; - g = ((pixel & 0x0c) * 0x55) << 6; - b = ((pixel & 0x03) * 0x55); - - return a | r | g | b; -} - -static uint32_t -fetch_pixel_a2b2g2r2 (bits_image_t *image, - int offset, - int line) -{ - uint32_t *bits = image->bits + line * image->rowstride; - uint32_t pixel = READ (image, (uint8_t *) bits + offset); - uint32_t a, r, g, b; - - a = ((pixel & 0xc0) * 0x55) << 18; - b = ((pixel & 0x30) * 0x55) >> 4; - g = ((pixel & 0x0c) * 0x55) << 6; - r = ((pixel & 0x03) * 0x55) << 16; - - return a | r | g | b; -} - -static uint32_t -fetch_pixel_c8 (bits_image_t *image, - int offset, - int line) -{ - uint32_t *bits = image->bits + line * image->rowstride; - uint32_t pixel = READ (image, (uint8_t *) bits + offset); - const pixman_indexed_t * indexed = image->indexed; - - return indexed->rgba[pixel]; -} - -static uint32_t -fetch_pixel_x4a4 (bits_image_t *image, - int offset, - int line) -{ - uint32_t *bits = image->bits + line * image->rowstride; - uint32_t pixel = READ (image, (uint8_t *) bits + offset); - - return ((pixel & 0xf) | ((pixel & 0xf) << 4)) << 24; -} - -static uint32_t -fetch_pixel_a4 (bits_image_t *image, - int offset, - int line) -{ - uint32_t *bits = image->bits + line * image->rowstride; - uint32_t pixel = FETCH_4 (image, bits, offset); - - pixel |= pixel << 4; - return pixel << 24; -} - -static uint32_t -fetch_pixel_r1g2b1 (bits_image_t *image, - int offset, - int line) -{ - uint32_t *bits = image->bits + line * image->rowstride; - uint32_t pixel = FETCH_4 (image, bits, offset); - uint32_t r, g, b; - - r = ((pixel & 0x8) * 0xff) << 13; - g = ((pixel & 0x6) * 0x55) << 7; - b = ((pixel & 0x1) * 0xff); - - return 0xff000000 | r | g | b; -} - -static uint32_t -fetch_pixel_b1g2r1 (bits_image_t *image, - int offset, - int line) -{ - uint32_t *bits = image->bits + line * image->rowstride; - uint32_t pixel = FETCH_4 (image, bits, offset); - uint32_t r, g, b; - - b = ((pixel & 0x8) * 0xff) >> 3; - g = ((pixel & 0x6) * 0x55) << 7; - r = ((pixel & 0x1) * 0xff) << 16; - - return 0xff000000 | r | g | b; -} - -static uint32_t -fetch_pixel_a1r1g1b1 (bits_image_t *image, - int offset, - int line) -{ - uint32_t *bits = image->bits + line * image->rowstride; - uint32_t pixel = FETCH_4 (image, bits, offset); - uint32_t a, r, g, b; - - a = ((pixel & 0x8) * 0xff) << 21; - r = ((pixel & 0x4) * 0xff) << 14; - g = ((pixel & 0x2) * 0xff) << 7; - b = ((pixel & 0x1) * 0xff); - - return a | r | g | b; -} - -static uint32_t -fetch_pixel_a1b1g1r1 (bits_image_t *image, - int offset, - int line) -{ - uint32_t *bits = image->bits + line * image->rowstride; - uint32_t pixel = FETCH_4 (image, bits, offset); - uint32_t a, r, g, b; - - a = ((pixel & 0x8) * 0xff) << 21; - b = ((pixel & 0x4) * 0xff) >> 2; - g = ((pixel & 0x2) * 0xff) << 7; - r = ((pixel & 0x1) * 0xff) << 16; - - return a | r | g | b; -} - -static uint32_t -fetch_pixel_c4 (bits_image_t *image, - int offset, - int line) -{ - uint32_t *bits = image->bits + line * image->rowstride; - uint32_t pixel = FETCH_4 (image, bits, offset); - const pixman_indexed_t * indexed = image->indexed; - - return indexed->rgba[pixel]; -} - -static uint32_t -fetch_pixel_a1 (bits_image_t *image, - int offset, - int line) -{ - uint32_t *bits = image->bits + line * image->rowstride; - uint32_t pixel = READ (image, bits + (offset >> 5)); - uint32_t a; - -#ifdef WORDS_BIGENDIAN - a = pixel >> (0x1f - (offset & 0x1f)); -#else - a = pixel >> (offset & 0x1f); -#endif - a = a & 1; - a |= a << 1; - a |= a << 2; - a |= a << 4; - - return a << 24; -} - -static uint32_t -fetch_pixel_g1 (bits_image_t *image, - int offset, - int line) -{ - uint32_t *bits = image->bits + line * image->rowstride; - uint32_t pixel = READ (image, bits + (offset >> 5)); - const pixman_indexed_t * indexed = image->indexed; - uint32_t a; - -#ifdef WORDS_BIGENDIAN - a = pixel >> (0x1f - (offset & 0x1f)); -#else - a = pixel >> (offset & 0x1f); -#endif - a = a & 1; - - return indexed->rgba[a]; -} - -static uint32_t fetch_pixel_yuy2 (bits_image_t *image, int offset, int line) { const uint32_t *bits = image->bits + image->rowstride * line; int16_t y, u, v; int32_t r, g, b; @@ -1878,27 +847,16 @@ fetch_pixel_yv12 (bits_image_t *image, return 0xff000000 | (r >= 0 ? r < 0x1000000 ? r & 0xff0000 : 0xff0000 : 0) | (g >= 0 ? g < 0x1000000 ? (g >> 8) & 0x00ff00 : 0x00ff00 : 0) | (b >= 0 ? b < 0x1000000 ? (b >> 16) & 0x0000ff : 0x0000ff : 0); } /*********************************** Store ************************************/ -#define SPLIT_A(v) \ - uint32_t a = ((v) >> 24), \ - r = ((v) >> 16) & 0xff, \ - g = ((v) >> 8) & 0xff, \ - b = (v) & 0xff - -#define SPLIT(v) \ - uint32_t r = ((v) >> 16) & 0xff, \ - g = ((v) >> 8) & 0xff, \ - b = (v) & 0xff - static void store_scanline_a2r10g10b10 (bits_image_t * image, int x, int y, int width, const uint32_t *v) { uint32_t *bits = image->bits + image->rowstride * y; @@ -1975,839 +933,16 @@ store_scanline_x2b10g10r10 (bits_image_t { WRITE (image, pixel++, ((values[i] >> 38) & 0x3ff) | ((values[i] >> 12) & 0xffc00) | ((values[i] << 14) & 0x3ff00000)); } } -static void -store_scanline_a8r8g8b8 (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *values) -{ - uint32_t *bits = image->bits + image->rowstride * y; - - MEMCPY_WRAPPED (image, ((uint32_t *)bits) + x, values, - width * sizeof(uint32_t)); -} - -static void -store_scanline_x8r8g8b8 (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *values) -{ - uint32_t *bits = image->bits + image->rowstride * y; - uint32_t *pixel = (uint32_t *)bits + x; - int i; - - for (i = 0; i < width; ++i) - WRITE (image, pixel++, values[i] & 0xffffff); -} - -static void -store_scanline_a8b8g8r8 (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *values) -{ - uint32_t *bits = image->bits + image->rowstride * y; - uint32_t *pixel = (uint32_t *)bits + x; - int i; - - for (i = 0; i < width; ++i) - { - WRITE (image, pixel++, - (values[i] & 0xff00ff00) | - ((values[i] >> 16) & 0xff) | - ((values[i] & 0xff) << 16)); - } -} - -static void -store_scanline_x8b8g8r8 (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *values) -{ - uint32_t *bits = image->bits + image->rowstride * y; - uint32_t *pixel = (uint32_t *)bits + x; - int i; - - for (i = 0; i < width; ++i) - { - WRITE (image, pixel++, - (values[i] & 0x0000ff00) | - ((values[i] >> 16) & 0xff) | - ((values[i] & 0xff) << 16)); - } -} - -static void -store_scanline_b8g8r8a8 (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *values) -{ - uint32_t *bits = image->bits + image->rowstride * y; - uint32_t *pixel = (uint32_t *)bits + x; - int i; - - for (i = 0; i < width; ++i) - { - WRITE (image, pixel++, - ((values[i] >> 24) & 0x000000ff) | - ((values[i] >> 8) & 0x0000ff00) | - ((values[i] << 8) & 0x00ff0000) | - ((values[i] << 24) & 0xff000000)); - } -} - -static void -store_scanline_b8g8r8x8 (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *values) -{ - uint32_t *bits = image->bits + image->rowstride * y; - uint32_t *pixel = (uint32_t *)bits + x; - int i; - - for (i = 0; i < width; ++i) - { - WRITE (image, pixel++, - ((values[i] >> 8) & 0x0000ff00) | - ((values[i] << 8) & 0x00ff0000) | - ((values[i] << 24) & 0xff000000)); - } -} - -static void -store_scanline_r8g8b8a8 (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *values) -{ - uint32_t *bits = image->bits + image->rowstride * y; - uint32_t *pixel = (uint32_t *)bits + x; - int i; - - for (i = 0; i < width; ++i) - { - WRITE (image, pixel++, - ((values[i] >> 24) & 0x000000ff) | (values[i] << 8)); - } -} - -static void -store_scanline_r8g8b8x8 (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *values) -{ - uint32_t *bits = image->bits + image->rowstride * y; - uint32_t *pixel = (uint32_t *)bits + x; - int i; - - for (i = 0; i < width; ++i) - WRITE (image, pixel++, (values[i] << 8)); -} - -static void -store_scanline_x14r6g6b6 (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *values) -{ - uint32_t *bits = image->bits + image->rowstride * y; - uint32_t *pixel = ((uint32_t *) bits) + x; - int i; - - for (i = 0; i < width; ++i) - { - uint32_t s = values[i]; - uint32_t r, g, b; - - r = (s & 0xfc0000) >> 6; - g = (s & 0x00fc00) >> 4; - b = (s & 0x0000fc) >> 2; - - WRITE (image, pixel++, r | g | b); - } -} - -static void -store_scanline_r8g8b8 (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *values) -{ - uint32_t *bits = image->bits + image->rowstride * y; - uint8_t *pixel = ((uint8_t *) bits) + 3 * x; - int i; - - for (i = 0; i < width; ++i) - { - uint32_t val = values[i]; - -#ifdef WORDS_BIGENDIAN - WRITE (image, pixel++, (val & 0x00ff0000) >> 16); - WRITE (image, pixel++, (val & 0x0000ff00) >> 8); - WRITE (image, pixel++, (val & 0x000000ff) >> 0); -#else - WRITE (image, pixel++, (val & 0x000000ff) >> 0); - WRITE (image, pixel++, (val & 0x0000ff00) >> 8); - WRITE (image, pixel++, (val & 0x00ff0000) >> 16); -#endif - } -} - -static void -store_scanline_b8g8r8 (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *values) -{ - uint32_t *bits = image->bits + image->rowstride * y; - uint8_t *pixel = ((uint8_t *) bits) + 3 * x; - int i; - - for (i = 0; i < width; ++i) - { - uint32_t val = values[i]; - -#ifdef WORDS_BIGENDIAN - WRITE (image, pixel++, (val & 0x000000ff) >> 0); - WRITE (image, pixel++, (val & 0x0000ff00) >> 8); - WRITE (image, pixel++, (val & 0x00ff0000) >> 16); -#else - WRITE (image, pixel++, (val & 0x00ff0000) >> 16); - WRITE (image, pixel++, (val & 0x0000ff00) >> 8); - WRITE (image, pixel++, (val & 0x000000ff) >> 0); -#endif - } -} - -static void -store_scanline_r5g6b5 (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *values) -{ - uint32_t *bits = image->bits + image->rowstride * y; - uint16_t *pixel = ((uint16_t *) bits) + x; - int i; - - for (i = 0; i < width; ++i) - { - uint32_t s = values[i]; - - WRITE (image, pixel++, - ((s >> 3) & 0x001f) | - ((s >> 5) & 0x07e0) | - ((s >> 8) & 0xf800)); - } -} - -static void -store_scanline_b5g6r5 (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *values) -{ - uint32_t *bits = image->bits + image->rowstride * y; - uint16_t *pixel = ((uint16_t *) bits) + x; - int i; - - for (i = 0; i < width; ++i) - { - SPLIT (values[i]); - - WRITE (image, pixel++, - ((b << 8) & 0xf800) | - ((g << 3) & 0x07e0) | - ((r >> 3) )); - } -} - -static void -store_scanline_a1r5g5b5 (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *values) -{ - uint32_t *bits = image->bits + image->rowstride * y; - uint16_t *pixel = ((uint16_t *) bits) + x; - int i; - - for (i = 0; i < width; ++i) - { - SPLIT_A (values[i]); - - WRITE (image, pixel++, - ((a << 8) & 0x8000) | - ((r << 7) & 0x7c00) | - ((g << 2) & 0x03e0) | - ((b >> 3) )); - } -} - -static void -store_scanline_x1r5g5b5 (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *values) -{ - uint32_t *bits = image->bits + image->rowstride * y; - uint16_t *pixel = ((uint16_t *) bits) + x; - int i; - - for (i = 0; i < width; ++i) - { - SPLIT (values[i]); - - WRITE (image, pixel++, - ((r << 7) & 0x7c00) | - ((g << 2) & 0x03e0) | - ((b >> 3) )); - } -} - -static void -store_scanline_a1b5g5r5 (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *values) -{ - uint32_t *bits = image->bits + image->rowstride * y; - uint16_t *pixel = ((uint16_t *) bits) + x; - int i; - - for (i = 0; i < width; ++i) - { - SPLIT_A (values[i]); - - WRITE (image, pixel++, - ((a << 8) & 0x8000) | - ((b << 7) & 0x7c00) | - ((g << 2) & 0x03e0) | - ((r >> 3) )); - } -} - -static void -store_scanline_x1b5g5r5 (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *values) -{ - uint32_t *bits = image->bits + image->rowstride * y; - uint16_t *pixel = ((uint16_t *) bits) + x; - int i; - - for (i = 0; i < width; ++i) - { - SPLIT (values[i]); - - WRITE (image, pixel++, ((b << 7) & 0x7c00) | - ((g << 2) & 0x03e0) | - ((r >> 3) )); - } -} - -static void -store_scanline_a4r4g4b4 (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *values) -{ - uint32_t *bits = image->bits + image->rowstride * y; - uint16_t *pixel = ((uint16_t *) bits) + x; - int i; - - for (i = 0; i < width; ++i) - { - SPLIT_A (values[i]); - - WRITE (image, pixel++, - ((a << 8) & 0xf000) | - ((r << 4) & 0x0f00) | - ((g ) & 0x00f0) | - ((b >> 4) )); - } -} - -static void -store_scanline_x4r4g4b4 (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *values) -{ - uint32_t *bits = image->bits + image->rowstride * y; - uint16_t *pixel = ((uint16_t *) bits) + x; - int i; - - for (i = 0; i < width; ++i) - { - SPLIT (values[i]); - - WRITE (image, pixel++, - ((r << 4) & 0x0f00) | - ((g ) & 0x00f0) | - ((b >> 4) )); - } -} - -static void -store_scanline_a4b4g4r4 (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *values) -{ - uint32_t *bits = image->bits + image->rowstride * y; - uint16_t *pixel = ((uint16_t *) bits) + x; - int i; - - for (i = 0; i < width; ++i) - { - SPLIT_A (values[i]); - WRITE (image, pixel++, ((a << 8) & 0xf000) | - ((b << 4) & 0x0f00) | - ((g ) & 0x00f0) | - ((r >> 4) )); - } -} - -static void -store_scanline_x4b4g4r4 (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *values) -{ - uint32_t *bits = image->bits + image->rowstride * y; - uint16_t *pixel = ((uint16_t *) bits) + x; - int i; - - for (i = 0; i < width; ++i) - { - SPLIT (values[i]); - - WRITE (image, pixel++, - ((b << 4) & 0x0f00) | - ((g ) & 0x00f0) | - ((r >> 4) )); - } -} - -static void -store_scanline_a8 (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *values) -{ - uint32_t *bits = image->bits + image->rowstride * y; - uint8_t *pixel = ((uint8_t *) bits) + x; - int i; - - for (i = 0; i < width; ++i) - { - WRITE (image, pixel++, values[i] >> 24); - } -} - -static void -store_scanline_r3g3b2 (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *values) -{ - uint32_t *bits = image->bits + image->rowstride * y; - uint8_t *pixel = ((uint8_t *) bits) + x; - int i; - - for (i = 0; i < width; ++i) - { - SPLIT (values[i]); - - WRITE (image, pixel++, - ((r ) & 0xe0) | - ((g >> 3) & 0x1c) | - ((b >> 6) )); - } -} - -static void -store_scanline_b2g3r3 (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *values) -{ - uint32_t *bits = image->bits + image->rowstride * y; - uint8_t *pixel = ((uint8_t *) bits) + x; - int i; - - for (i = 0; i < width; ++i) - { - SPLIT (values[i]); - - WRITE (image, pixel++, - ((b ) & 0xc0) | - ((g >> 2) & 0x38) | - ((r >> 5) )); - } -} - -static void -store_scanline_a2r2g2b2 (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *values) -{ - uint32_t *bits = image->bits + image->rowstride * y; - uint8_t *pixel = ((uint8_t *) bits) + x; - int i; - - for (i = 0; i < width; ++i) - { - SPLIT_A (values[i]); - - WRITE (image, pixel++, - ((a ) & 0xc0) | - ((r >> 2) & 0x30) | - ((g >> 4) & 0x0c) | - ((b >> 6) )); - } -} - -static void -store_scanline_a2b2g2r2 (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *values) -{ - uint32_t *bits = image->bits + image->rowstride * y; - uint8_t *pixel = ((uint8_t *) bits) + x; - int i; - - for (i = 0; i < width; ++i) - { - SPLIT_A (values[i]); - - WRITE (image, pixel++, - ((a ) & 0xc0) | - ((b >> 2) & 0x30) | - ((g >> 4) & 0x0c) | - ((r >> 6) )); - } -} - -static void -store_scanline_c8 (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *values) -{ - uint32_t *bits = image->bits + image->rowstride * y; - uint8_t *pixel = ((uint8_t *) bits) + x; - const pixman_indexed_t *indexed = image->indexed; - int i; - - for (i = 0; i < width; ++i) - WRITE (image, pixel++, RGB24_TO_ENTRY (indexed,values[i])); -} - -static void -store_scanline_g8 (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *values) -{ - uint32_t *bits = image->bits + image->rowstride * y; - uint8_t *pixel = ((uint8_t *) bits) + x; - const pixman_indexed_t *indexed = image->indexed; - int i; - - for (i = 0; i < width; ++i) - WRITE (image, pixel++, RGB24_TO_ENTRY_Y (indexed,values[i])); -} - -static void -store_scanline_x4a4 (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *values) -{ - uint32_t *bits = image->bits + image->rowstride * y; - uint8_t *pixel = ((uint8_t *) bits) + x; - int i; - - for (i = 0; i < width; ++i) - WRITE (image, pixel++, values[i] >> 28); -} - -#define STORE_8(img,l,o,v) (WRITE (img, (uint8_t *)(l) + ((o) >> 3), (v))) -#ifdef WORDS_BIGENDIAN - -#define STORE_4(img,l,o,v) \ - do \ - { \ - int bo = 4 * (o); \ - int v4 = (v) & 0x0f; \ - \ - STORE_8 (img, l, bo, ( \ - bo & 4 ? \ - (FETCH_8 (img, l, bo) & 0xf0) | (v4) : \ - (FETCH_8 (img, l, bo) & 0x0f) | (v4 << 4))); \ - } while (0) -#else - -#define STORE_4(img,l,o,v) \ - do \ - { \ - int bo = 4 * (o); \ - int v4 = (v) & 0x0f; \ - \ - STORE_8 (img, l, bo, ( \ - bo & 4 ? \ - (FETCH_8 (img, l, bo) & 0x0f) | (v4 << 4) : \ - (FETCH_8 (img, l, bo) & 0xf0) | (v4))); \ - } while (0) -#endif - -static void -store_scanline_a4 (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *values) -{ - uint32_t *bits = image->bits + image->rowstride * y; - int i; - - for (i = 0; i < width; ++i) - STORE_4 (image, bits, i + x, values[i] >> 28); -} - -static void -store_scanline_r1g2b1 (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *values) -{ - uint32_t *bits = image->bits + image->rowstride * y; - int i; - - for (i = 0; i < width; ++i) - { - uint32_t pixel; - - SPLIT (values[i]); - pixel = (((r >> 4) & 0x8) | - ((g >> 5) & 0x6) | - ((b >> 7) )); - STORE_4 (image, bits, i + x, pixel); - } -} - -static void -store_scanline_b1g2r1 (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *values) -{ - uint32_t *bits = image->bits + image->rowstride * y; - int i; - - for (i = 0; i < width; ++i) - { - uint32_t pixel; - - SPLIT (values[i]); - pixel = (((b >> 4) & 0x8) | - ((g >> 5) & 0x6) | - ((r >> 7) )); - STORE_4 (image, bits, i + x, pixel); - } -} - -static void -store_scanline_a1r1g1b1 (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *values) -{ - uint32_t *bits = image->bits + image->rowstride * y; - int i; - - for (i = 0; i < width; ++i) - { - uint32_t pixel; - - SPLIT_A (values[i]); - pixel = (((a >> 4) & 0x8) | - ((r >> 5) & 0x4) | - ((g >> 6) & 0x2) | - ((b >> 7) )); - - STORE_4 (image, bits, i + x, pixel); - } -} - -static void -store_scanline_a1b1g1r1 (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *values) -{ - uint32_t *bits = image->bits + image->rowstride * y; - int i; - - for (i = 0; i < width; ++i) - { - uint32_t pixel; - - SPLIT_A (values[i]); - pixel = (((a >> 4) & 0x8) | - ((b >> 5) & 0x4) | - ((g >> 6) & 0x2) | - ((r >> 7) )); - - STORE_4 (image, bits, i + x, pixel); - } -} - -static void -store_scanline_c4 (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *values) -{ - uint32_t *bits = image->bits + image->rowstride * y; - const pixman_indexed_t *indexed = image->indexed; - int i; - - for (i = 0; i < width; ++i) - { - uint32_t pixel; - - pixel = RGB24_TO_ENTRY (indexed, values[i]); - STORE_4 (image, bits, i + x, pixel); - } -} - -static void -store_scanline_g4 (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *values) -{ - uint32_t *bits = image->bits + image->rowstride * y; - const pixman_indexed_t *indexed = image->indexed; - int i; - - for (i = 0; i < width; ++i) - { - uint32_t pixel; - - pixel = RGB24_TO_ENTRY_Y (indexed, values[i]); - STORE_4 (image, bits, i + x, pixel); - } -} - -static void -store_scanline_a1 (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *values) -{ - uint32_t *bits = image->bits + image->rowstride * y; - int i; - - for (i = 0; i < width; ++i) - { - uint32_t *pixel = ((uint32_t *) bits) + ((i + x) >> 5); - uint32_t mask, v; - -#ifdef WORDS_BIGENDIAN - mask = 1 << (0x1f - ((i + x) & 0x1f)); -#else - mask = 1 << ((i + x) & 0x1f); -#endif - v = values[i] & 0x80000000 ? mask : 0; - - WRITE (image, pixel, (READ (image, pixel) & ~mask) | v); - } -} - -static void -store_scanline_g1 (bits_image_t * image, - int x, - int y, - int width, - const uint32_t *values) -{ - uint32_t *bits = image->bits + image->rowstride * y; - const pixman_indexed_t *indexed = image->indexed; - int i; - - for (i = 0; i < width; ++i) - { - uint32_t *pixel = ((uint32_t *) bits) + ((i + x) >> 5); - uint32_t mask, v; - -#ifdef WORDS_BIGENDIAN - mask = 1 << (0x1f - ((i + x) & 0x1f)); -#else - mask = 1 << ((i + x) & 0x1f); -#endif - v = RGB24_TO_ENTRY_Y (indexed, values[i]) & 0x1 ? mask : 0; - - WRITE (image, pixel, (READ (image, pixel) & ~mask) | v); - } -} - /* * Contracts a 64bpp image to 32bpp and then stores it using a regular 32-bit * store proc. Despite the type, this function expects a uint64_t buffer. */ static void store_scanline_generic_64 (bits_image_t * image, int x, int y, @@ -2839,34 +974,34 @@ static void fetch_scanline_generic_64 (pixman_image_t *image, int x, int y, int width, uint32_t * buffer, const uint32_t *mask) { pixman_format_code_t format; - + /* Fetch the pixels into the first half of buffer and then expand them in * place. */ image->bits.fetch_scanline_32 (image, x, y, width, buffer, NULL); format = image->bits.format; if (PIXMAN_FORMAT_TYPE (format) == PIXMAN_TYPE_COLOR || PIXMAN_FORMAT_TYPE (format) == PIXMAN_TYPE_GRAY) { /* Indexed formats are mapped to a8r8g8b8 with full * precision, so when expanding we shouldn't correct * for the width of the channels */ - + format = PIXMAN_a8r8g8b8; } - + pixman_expand ((uint64_t *)buffer, buffer, format, width); } /* Despite the type, this function expects a uint64_t *buffer */ static uint64_t fetch_pixel_generic_64 (bits_image_t *image, int offset, int line) @@ -2878,20 +1013,20 @@ fetch_pixel_generic_64 (bits_image_t *im format = image->format; if (PIXMAN_FORMAT_TYPE (format) == PIXMAN_TYPE_COLOR || PIXMAN_FORMAT_TYPE (format) == PIXMAN_TYPE_GRAY) { /* Indexed formats are mapped to a8r8g8b8 with full * precision, so when expanding we shouldn't correct * for the width of the channels */ - + format = PIXMAN_a8r8g8b8; } - + pixman_expand ((uint64_t *)&result, &pixel32, format, 1); return result; } /* * XXX: The transformed fetch path only works at 32-bpp so far. When all * paths have wide versions, this can be removed. @@ -2900,17 +1035,17 @@ fetch_pixel_generic_64 (bits_image_t *im */ static uint32_t fetch_pixel_generic_lossy_32 (bits_image_t *image, int offset, int line) { uint64_t pixel64 = image->fetch_pixel_64 (image, offset, line); uint32_t result; - + pixman_contract (&result, &pixel64, 1); return result; } typedef struct { pixman_format_code_t format; @@ -2965,43 +1100,39 @@ static const format_info_t accessors[] = FORMAT_INFO (a8), FORMAT_INFO (r3g3b2), FORMAT_INFO (b2g3r3), FORMAT_INFO (a2r2g2b2), FORMAT_INFO (a2b2g2r2), FORMAT_INFO (c8), -#define fetch_scanline_g8 fetch_scanline_c8 -#define fetch_pixel_g8 fetch_pixel_c8 FORMAT_INFO (g8), #define fetch_scanline_x4c4 fetch_scanline_c8 #define fetch_pixel_x4c4 fetch_pixel_c8 #define store_scanline_x4c4 store_scanline_c8 FORMAT_INFO (x4c4), -#define fetch_scanline_x4g4 fetch_scanline_c8 -#define fetch_pixel_x4g4 fetch_pixel_c8 +#define fetch_scanline_x4g4 fetch_scanline_g8 +#define fetch_pixel_x4g4 fetch_pixel_g8 #define store_scanline_x4g4 store_scanline_g8 FORMAT_INFO (x4g4), FORMAT_INFO (x4a4), /* 4bpp formats */ FORMAT_INFO (a4), FORMAT_INFO (r1g2b1), FORMAT_INFO (b1g2r1), FORMAT_INFO (a1r1g1b1), FORMAT_INFO (a1b1g1r1), FORMAT_INFO (c4), -#define fetch_scanline_g4 fetch_scanline_c4 -#define fetch_pixel_g4 fetch_pixel_c4 FORMAT_INFO (g4), /* 1bpp formats */ FORMAT_INFO (a1), FORMAT_INFO (g1), /* Wide formats */
--- a/gfx/cairo/libpixman/src/pixman-accessor.h +++ b/gfx/cairo/libpixman/src/pixman-accessor.h @@ -1,40 +1,25 @@ #ifdef PIXMAN_FB_ACCESSORS -#define ACCESS(sym) sym##_accessors - #define READ(img, ptr) \ (((bits_image_t *)(img))->read_func ((ptr), sizeof(*(ptr)))) #define WRITE(img, ptr,val) \ (((bits_image_t *)(img))->write_func ((ptr), (val), sizeof (*(ptr)))) -#define MEMCPY_WRAPPED(img, dst, src, size) \ - do { \ - size_t _i; \ - uint8_t *_dst = (uint8_t*)(dst), *_src = (uint8_t*)(src); \ - for(_i = 0; _i < size; _i++) { \ - WRITE((img), _dst +_i, READ((img), _src + _i)); \ - } \ - } while (0) - #define MEMSET_WRAPPED(img, dst, val, size) \ do { \ size_t _i; \ uint8_t *_dst = (uint8_t*)(dst); \ for(_i = 0; _i < (size_t) size; _i++) { \ WRITE((img), _dst +_i, (val)); \ } \ } while (0) #else -#define ACCESS(sym) sym - #define READ(img, ptr) (*(ptr)) #define WRITE(img, ptr, val) (*(ptr) = (val)) -#define MEMCPY_WRAPPED(img, dst, src, size) \ - memcpy(dst, src, size) #define MEMSET_WRAPPED(img, dst, val, size) \ memset(dst, val, size) #endif
--- a/gfx/cairo/libpixman/src/pixman-arm-common.h +++ b/gfx/cairo/libpixman/src/pixman-arm-common.h @@ -21,17 +21,17 @@ * DEALINGS IN THE SOFTWARE. * * Author: Siarhei Siamashka (siarhei.siamashka@nokia.com) */ #ifndef PIXMAN_ARM_COMMON_H #define PIXMAN_ARM_COMMON_H -#include "pixman-fast-path.h" +#include "pixman-inlines.h" /* Define some macros which can expand into proxy functions between * ARM assembly optimized functions and the rest of pixman fast path API. * * All the low level ARM assembly functions have to use ARM EABI * calling convention and take up to 8 arguments: * width, height, dst, dst_stride, src, src_stride, mask, mask_stride * @@ -58,36 +58,26 @@ pixman_composite_##name##_asm_##cputype int32_t h, \ dst_type *dst, \ int32_t dst_stride, \ src_type *src, \ int32_t src_stride); \ \ static void \ cputype##_composite_##name (pixman_implementation_t *imp, \ - pixman_op_t op, \ - pixman_image_t * src_image, \ - pixman_image_t * mask_image, \ - pixman_image_t * dst_image, \ - int32_t src_x, \ - int32_t src_y, \ - int32_t mask_x, \ - int32_t mask_y, \ - int32_t dest_x, \ - int32_t dest_y, \ - int32_t width, \ - int32_t height) \ + pixman_composite_info_t *info) \ { \ - dst_type *dst_line; \ + PIXMAN_COMPOSITE_ARGS (info); \ + dst_type *dst_line; \ src_type *src_line; \ int32_t dst_stride, src_stride; \ \ PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type, \ src_stride, src_line, src_cnt); \ - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type, \ + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type, \ dst_stride, dst_line, dst_cnt); \ \ pixman_composite_##name##_asm_##cputype (width, height, \ dst_line, dst_stride, \ src_line, src_stride); \ } #define PIXMAN_ARM_BIND_FAST_PATH_N_DST(flags, cputype, name, \ @@ -96,40 +86,30 @@ void pixman_composite_##name##_asm_##cputype (int32_t w, \ int32_t h, \ dst_type *dst, \ int32_t dst_stride, \ uint32_t src); \ \ static void \ cputype##_composite_##name (pixman_implementation_t *imp, \ - pixman_op_t op, \ - pixman_image_t * src_image, \ - pixman_image_t * mask_image, \ - pixman_image_t * dst_image, \ - int32_t src_x, \ - int32_t src_y, \ - int32_t mask_x, \ - int32_t mask_y, \ - int32_t dest_x, \ - int32_t dest_y, \ - int32_t width, \ - int32_t height) \ + pixman_composite_info_t *info) \ { \ + PIXMAN_COMPOSITE_ARGS (info); \ dst_type *dst_line; \ int32_t dst_stride; \ uint32_t src; \ \ src = _pixman_image_get_solid ( \ - imp, src_image, dst_image->bits.format); \ + imp, src_image, dest_image->bits.format); \ \ if ((flags & SKIP_ZERO_SRC) && src == 0) \ return; \ \ - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type, \ + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type, \ dst_stride, dst_line, dst_cnt); \ \ pixman_composite_##name##_asm_##cputype (width, height, \ dst_line, dst_stride, \ src); \ } #define PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST(flags, cputype, name, \ @@ -142,41 +122,31 @@ pixman_composite_##name##_asm_##cputype int32_t dst_stride, \ uint32_t src, \ int32_t unused, \ mask_type *mask, \ int32_t mask_stride); \ \ static void \ cputype##_composite_##name (pixman_implementation_t *imp, \ - pixman_op_t op, \ - pixman_image_t * src_image, \ - pixman_image_t * mask_image, \ - pixman_image_t * dst_image, \ - int32_t src_x, \ - int32_t src_y, \ - int32_t mask_x, \ - int32_t mask_y, \ - int32_t dest_x, \ - int32_t dest_y, \ - int32_t width, \ - int32_t height) \ + pixman_composite_info_t *info) \ { \ - dst_type *dst_line; \ + PIXMAN_COMPOSITE_ARGS (info); \ + dst_type *dst_line; \ mask_type *mask_line; \ int32_t dst_stride, mask_stride; \ uint32_t src; \ \ src = _pixman_image_get_solid ( \ - imp, src_image, dst_image->bits.format); \ + imp, src_image, dest_image->bits.format); \ \ if ((flags & SKIP_ZERO_SRC) && src == 0) \ return; \ \ - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type, \ + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type, \ dst_stride, dst_line, dst_cnt); \ PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type, \ mask_stride, mask_line, mask_cnt); \ \ pixman_composite_##name##_asm_##cputype (width, height, \ dst_line, dst_stride, \ src, 0, \ mask_line, mask_stride); \ @@ -191,41 +161,31 @@ pixman_composite_##name##_asm_##cputype dst_type *dst, \ int32_t dst_stride, \ src_type *src, \ int32_t src_stride, \ uint32_t mask); \ \ static void \ cputype##_composite_##name (pixman_implementation_t *imp, \ - pixman_op_t op, \ - pixman_image_t * src_image, \ - pixman_image_t * mask_image, \ - pixman_image_t * dst_image, \ - int32_t src_x, \ - int32_t src_y, \ - int32_t mask_x, \ - int32_t mask_y, \ - int32_t dest_x, \ - int32_t dest_y, \ - int32_t width, \ - int32_t height) \ + pixman_composite_info_t *info) \ { \ - dst_type *dst_line; \ + PIXMAN_COMPOSITE_ARGS (info); \ + dst_type *dst_line; \ src_type *src_line; \ int32_t dst_stride, src_stride; \ uint32_t mask; \ \ mask = _pixman_image_get_solid ( \ - imp, mask_image, dst_image->bits.format); \ + imp, mask_image, dest_image->bits.format); \ \ if ((flags & SKIP_ZERO_MASK) && mask == 0) \ return; \ \ - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type, \ + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type, \ dst_stride, dst_line, dst_cnt); \ PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type, \ src_stride, src_line, src_cnt); \ \ pixman_composite_##name##_asm_##cputype (width, height, \ dst_line, dst_stride, \ src_line, src_stride, \ mask); \ @@ -242,35 +202,25 @@ pixman_composite_##name##_asm_##cputype int32_t dst_stride, \ src_type *src, \ int32_t src_stride, \ mask_type *mask, \ int32_t mask_stride); \ \ static void \ cputype##_composite_##name (pixman_implementation_t *imp, \ - pixman_op_t op, \ - pixman_image_t * src_image, \ - pixman_image_t * mask_image, \ - pixman_image_t * dst_image, \ - int32_t src_x, \ - int32_t src_y, \ - int32_t mask_x, \ - int32_t mask_y, \ - int32_t dest_x, \ - int32_t dest_y, \ - int32_t width, \ - int32_t height) \ + pixman_composite_info_t *info) \ { \ - dst_type *dst_line; \ + PIXMAN_COMPOSITE_ARGS (info); \ + dst_type *dst_line; \ src_type *src_line; \ mask_type *mask_line; \ int32_t dst_stride, src_stride, mask_stride; \ \ - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type, \ + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type, \ dst_stride, dst_line, dst_cnt); \ PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type, \ src_stride, src_line, src_cnt); \ PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type, \ mask_stride, mask_line, mask_cnt); \ \ pixman_composite_##name##_asm_##cputype (width, height, \ dst_line, dst_stride, \ @@ -393,23 +343,27 @@ scaled_bilinear_scanline_##cputype##_##n if ((flags & SKIP_ZERO_SRC) && zero_src) \ return; \ pixman_scaled_bilinear_scanline_##name##_##op##_asm_##cputype ( \ dst, src_top, src_bottom, wt, wb, vx, unit_x, w); \ } \ \ FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_cover_##op, \ scaled_bilinear_scanline_##cputype##_##name##_##op, \ - src_type, uint32_t, dst_type, COVER, FALSE, FALSE) \ + src_type, uint32_t, dst_type, COVER, FLAG_NONE) \ FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_none_##op, \ scaled_bilinear_scanline_##cputype##_##name##_##op, \ - src_type, uint32_t, dst_type, NONE, FALSE, FALSE) \ + src_type, uint32_t, dst_type, NONE, FLAG_NONE) \ FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_pad_##op, \ scaled_bilinear_scanline_##cputype##_##name##_##op, \ - src_type, uint32_t, dst_type, PAD, FALSE, FALSE) + src_type, uint32_t, dst_type, PAD, FLAG_NONE) \ +FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_normal_##op, \ + scaled_bilinear_scanline_##cputype##_##name##_##op, \ + src_type, uint32_t, dst_type, NORMAL, \ + FLAG_NONE) #define PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_A8_DST(flags, cputype, name, op, \ src_type, dst_type) \ void \ pixman_scaled_bilinear_scanline_##name##_##op##_asm_##cputype ( \ dst_type * dst, \ const uint8_t * mask, \ @@ -438,17 +392,25 @@ scaled_bilinear_scanline_##cputype##_##n if ((flags & SKIP_ZERO_SRC) && zero_src) \ return; \ pixman_scaled_bilinear_scanline_##name##_##op##_asm_##cputype ( \ dst, mask, src_top, src_bottom, wt, wb, vx, unit_x, w); \ } \ \ FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_cover_##op, \ scaled_bilinear_scanline_##cputype##_##name##_##op, \ - src_type, uint8_t, dst_type, COVER, TRUE, FALSE) \ + src_type, uint8_t, dst_type, COVER, \ + FLAG_HAVE_NON_SOLID_MASK) \ FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_none_##op, \ scaled_bilinear_scanline_##cputype##_##name##_##op, \ - src_type, uint8_t, dst_type, NONE, TRUE, FALSE) \ + src_type, uint8_t, dst_type, NONE, \ + FLAG_HAVE_NON_SOLID_MASK) \ FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_pad_##op, \ scaled_bilinear_scanline_##cputype##_##name##_##op, \ - src_type, uint8_t, dst_type, PAD, TRUE, FALSE) + src_type, uint8_t, dst_type, PAD, \ + FLAG_HAVE_NON_SOLID_MASK) \ +FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_normal_##op, \ + scaled_bilinear_scanline_##cputype##_##name##_##op, \ + src_type, uint8_t, dst_type, NORMAL, \ + FLAG_HAVE_NON_SOLID_MASK) + #endif
--- a/gfx/cairo/libpixman/src/pixman-arm-neon-asm-bilinear.S +++ b/gfx/cairo/libpixman/src/pixman-arm-neon-asm-bilinear.S @@ -39,20 +39,16 @@ * 8, Deinterleave final result * 9. store destination pixels * * All registers with single number (i.e. src0, tmp0) are 64-bits registers. * Registers with double numbers(src01, dst01) are 128-bits registers. * All temp registers can be used freely outside the code block. * Assume that symbol(register .req) OUT and MASK are defined at caller of these macro blocks. * - * TODOs - * Support 0565 pixel format - * Optimization for two and last pixel cases - * * Remarks * There can be lots of pipeline stalls inside code block and between code blocks. * Further optimizations will be done by new macro templates using head/tail_head/tail scheme. */ /* Prevent the stack from becoming executable for no reason... */ #if defined(__linux__) && defined (__ELF__) .section .note.GNU-stack,"",%progbits @@ -87,31 +83,29 @@ fname: /* * Bilinear scaling support code which tries to provide pixel fetching, color * format conversion, and interpolation as separate macros which can be used * as the basic building blocks for constructing bilinear scanline functions. */ .macro bilinear_load_8888 reg1, reg2, tmp - mov TMP2, X, asr #16 + mov TMP1, X, asr #16 add X, X, UX - add TMP1, TOP, TMP2, asl #2 - add TMP2, BOTTOM, TMP2, asl #2 - vld1.32 {reg1}, [TMP1] - vld1.32 {reg2}, [TMP2] + add TMP1, TOP, TMP1, asl #2 + vld1.32 {reg1}, [TMP1], STRIDE + vld1.32 {reg2}, [TMP1] .endm .macro bilinear_load_0565 reg1, reg2, tmp - mov TMP2, X, asr #16 + mov TMP1, X, asr #16 add X, X, UX - add TMP1, TOP, TMP2, asl #1 - add TMP2, BOTTOM, TMP2, asl #1 - vld1.32 {reg2[0]}, [TMP1] - vld1.32 {reg2[1]}, [TMP2] + add TMP1, TOP, TMP1, asl #1 + vld1.32 {reg2[0]}, [TMP1], STRIDE + vld1.32 {reg2[1]}, [TMP1] convert_four_0565_to_x888_packed reg2, reg1, reg2, tmp .endm .macro bilinear_load_and_vertical_interpolate_two_8888 \ acc1, acc2, reg1, reg2, reg3, reg4, tmp1, tmp2 bilinear_load_8888 reg1, reg2, tmp1 vmull.u8 acc1, reg1, d28 @@ -129,71 +123,65 @@ fname: xacc1, xacc2, xreg1, xreg2, xreg3, xreg4, xacc2lo, xacc2hi bilinear_load_and_vertical_interpolate_two_8888 \ yacc1, yacc2, yreg1, yreg2, yreg3, yreg4, yacc2lo, yacc2hi .endm .macro bilinear_load_and_vertical_interpolate_two_0565 \ acc1, acc2, reg1, reg2, reg3, reg4, acc2lo, acc2hi + mov TMP1, X, asr #16 + add X, X, UX + add TMP1, TOP, TMP1, asl #1 mov TMP2, X, asr #16 add X, X, UX - mov TMP4, X, asr #16 - add X, X, UX - add TMP1, TOP, TMP2, asl #1 - add TMP2, BOTTOM, TMP2, asl #1 - add TMP3, TOP, TMP4, asl #1 - add TMP4, BOTTOM, TMP4, asl #1 - vld1.32 {acc2lo[0]}, [TMP1] - vld1.32 {acc2hi[0]}, [TMP3] - vld1.32 {acc2lo[1]}, [TMP2] - vld1.32 {acc2hi[1]}, [TMP4] + add TMP2, TOP, TMP2, asl #1 + vld1.32 {acc2lo[0]}, [TMP1], STRIDE + vld1.32 {acc2hi[0]}, [TMP2], STRIDE + vld1.32 {acc2lo[1]}, [TMP1] + vld1.32 {acc2hi[1]}, [TMP2] convert_0565_to_x888 acc2, reg3, reg2, reg1 vzip.u8 reg1, reg3 vzip.u8 reg2, reg4 vzip.u8 reg3, reg4 vzip.u8 reg1, reg2 vmull.u8 acc1, reg1, d28 vmlal.u8 acc1, reg2, d29 vmull.u8 acc2, reg3, d28 vmlal.u8 acc2, reg4, d29 .endm .macro bilinear_load_and_vertical_interpolate_four_0565 \ xacc1, xacc2, xreg1, xreg2, xreg3, xreg4, xacc2lo, xacc2hi \ yacc1, yacc2, yreg1, yreg2, yreg3, yreg4, yacc2lo, yacc2hi + mov TMP1, X, asr #16 + add X, X, UX + add TMP1, TOP, TMP1, asl #1 mov TMP2, X, asr #16 add X, X, UX - mov TMP4, X, asr #16 + add TMP2, TOP, TMP2, asl #1 + vld1.32 {xacc2lo[0]}, [TMP1], STRIDE + vld1.32 {xacc2hi[0]}, [TMP2], STRIDE + vld1.32 {xacc2lo[1]}, [TMP1] + vld1.32 {xacc2hi[1]}, [TMP2] + convert_0565_to_x888 xacc2, xreg3, xreg2, xreg1 + mov TMP1, X, asr #16 add X, X, UX - add TMP1, TOP, TMP2, asl #1 - add TMP2, BOTTOM, TMP2, asl #1 - add TMP3, TOP, TMP4, asl #1 - add TMP4, BOTTOM, TMP4, asl #1 - vld1.32 {xacc2lo[0]}, [TMP1] - vld1.32 {xacc2hi[0]}, [TMP3] - vld1.32 {xacc2lo[1]}, [TMP2] - vld1.32 {xacc2hi[1]}, [TMP4] - convert_0565_to_x888 xacc2, xreg3, xreg2, xreg1 + add TMP1, TOP, TMP1, asl #1 mov TMP2, X, asr #16 add X, X, UX - mov TMP4, X, asr #16 - add X, X, UX - add TMP1, TOP, TMP2, asl #1 - add TMP2, BOTTOM, TMP2, asl #1 - add TMP3, TOP, TMP4, asl #1 - add TMP4, BOTTOM, TMP4, asl #1 - vld1.32 {yacc2lo[0]}, [TMP1] + add TMP2, TOP, TMP2, asl #1 + vld1.32 {yacc2lo[0]}, [TMP1], STRIDE vzip.u8 xreg1, xreg3 - vld1.32 {yacc2hi[0]}, [TMP3] + vld1.32 {yacc2hi[0]}, [TMP2], STRIDE vzip.u8 xreg2, xreg4 - vld1.32 {yacc2lo[1]}, [TMP2] + vld1.32 {yacc2lo[1]}, [TMP1] vzip.u8 xreg3, xreg4 - vld1.32 {yacc2hi[1]}, [TMP4] + vld1.32 {yacc2hi[1]}, [TMP2] vzip.u8 xreg1, xreg2 convert_0565_to_x888 yacc2, yreg3, yreg2, yreg1 vmull.u8 xacc1, xreg1, d28 vzip.u8 yreg1, yreg3 vmlal.u8 xacc1, xreg2, d29 vzip.u8 yreg2, yreg4 vmull.u8 xacc2, xreg3, d28 vzip.u8 yreg3, yreg4 @@ -247,16 +235,17 @@ fname: vld1.32 {mask[0]}, [MASK]! .elseif numpix == 2 vld1.16 {mask[0]}, [MASK]! .elseif numpix == 1 vld1.8 {mask[0]}, [MASK]! .else .error bilinear_load_mask_8 numpix is unsupported .endif + pld [MASK, #prefetch_offset] .endm .macro bilinear_load_mask mask_fmt, numpix, mask bilinear_load_mask_&mask_fmt numpix, mask .endm /* @@ -274,16 +263,17 @@ fname: vld1.32 {dst0, dst1}, [OUT] .elseif numpix == 2 vld1.32 {dst0}, [OUT] .elseif numpix == 1 vld1.32 {dst0[0]}, [OUT] .else .error bilinear_load_dst_8888 numpix is unsupported .endif + pld [OUT, #(prefetch_offset * 4)] .endm .macro bilinear_load_dst_8888_over numpix, dst0, dst1, dst01 bilinear_load_dst_8888 numpix, dst0, dst1, dst01 .endm .macro bilinear_load_dst_8888_add numpix, dst0, dst1, dst01 bilinear_load_dst_8888 numpix, dst0, dst1, dst01 @@ -298,17 +288,17 @@ fname: * We will apply mask to interleaved source pixels, that is * (r0, r1, r2, r3, g0, g1, g2, g3) x (m0, m1, m2, m3, m0, m1, m2, m3) * (b0, b1, b2, b3, a0, a1, a2, a3) x (m0, m1, m2, m3, m0, m1, m2, m3) * So, we need to duplicate loaded mask into whole register. * * For two pixel case * (r0, r1, x, x, g0, g1, x, x) x (m0, m1, m0, m1, m0, m1, m0, m1) * (b0, b1, x, x, a0, a1, x, x) x (m0, m1, m0, m1, m0, m1, m0, m1) - * We can do some optimizations for this including one pixel cases. + * We can do some optimizations for this including last pixel cases. */ .macro bilinear_duplicate_mask_x numpix, mask .endm .macro bilinear_duplicate_mask_8 numpix, mask .if numpix == 4 vdup.32 mask, mask[0] .elseif numpix == 2 @@ -492,18 +482,17 @@ fname: .macro bilinear_interpolate_last_pixel src_fmt, mask_fmt, dst_fmt, op bilinear_load_&src_fmt d0, d1, d2 bilinear_load_mask mask_fmt, 1, d4 bilinear_load_dst dst_fmt, op, 1, d18, d19, q9 vmull.u8 q1, d0, d28 vmlal.u8 q1, d1, d29 - vshr.u16 d30, d24, #8 - /* 4 cycles bubble */ + /* 5 cycles bubble */ vshll.u16 q0, d2, #8 vmlsl.u16 q0, d2, d30 vmlal.u16 q0, d3, d30 /* 5 cycles bubble */ bilinear_duplicate_mask mask_fmt, 1, d4 vshrn.u32 d0, q0, #16 /* 3 cycles bubble */ vmovn.u16 d0, q0 @@ -520,28 +509,28 @@ fname: bilinear_store_&dst_fmt 1, q2, q3 .endm .macro bilinear_interpolate_two_pixels src_fmt, mask_fmt, dst_fmt, op bilinear_load_and_vertical_interpolate_two_&src_fmt \ q1, q11, d0, d1, d20, d21, d22, d23 bilinear_load_mask mask_fmt, 2, d4 bilinear_load_dst dst_fmt, op, 2, d18, d19, q9 - vshr.u16 q15, q12, #8 - vadd.u16 q12, q12, q13 vshll.u16 q0, d2, #8 vmlsl.u16 q0, d2, d30 vmlal.u16 q0, d3, d30 vshll.u16 q10, d22, #8 vmlsl.u16 q10, d22, d31 vmlal.u16 q10, d23, d31 - vshrn.u32 d30, q0, #16 - vshrn.u32 d31, q10, #16 + vshrn.u32 d0, q0, #16 + vshrn.u32 d1, q10, #16 bilinear_duplicate_mask mask_fmt, 2, d4 - vmovn.u16 d0, q15 + vshr.u16 q15, q12, #8 + vadd.u16 q12, q12, q13 + vmovn.u16 d0, q0 bilinear_interleave_src_dst \ mask_fmt, op, 2, d0, d1, q0, d18, d19, q9 bilinear_apply_mask_to_src \ mask_fmt, 2, d0, d1, q0, d4, \ q3, q8, q10, q11 bilinear_combine \ op, 2, d0, d1, q0, d18, d19, q9, \ q3, q8, q10, q11, d5 @@ -549,221 +538,830 @@ fname: bilinear_store_&dst_fmt 2, q2, q3 .endm .macro bilinear_interpolate_four_pixels src_fmt, mask_fmt, dst_fmt, op bilinear_load_and_vertical_interpolate_four_&src_fmt \ q1, q11, d0, d1, d20, d21, d22, d23 \ q3, q9, d4, d5, d16, d17, d18, d19 pld [TMP1, PF_OFFS] - vshr.u16 q15, q12, #8 - vadd.u16 q12, q12, q13 + sub TMP1, TMP1, STRIDE vshll.u16 q0, d2, #8 vmlsl.u16 q0, d2, d30 vmlal.u16 q0, d3, d30 vshll.u16 q10, d22, #8 vmlsl.u16 q10, d22, d31 vmlal.u16 q10, d23, d31 vshr.u16 q15, q12, #8 vshll.u16 q2, d6, #8 vmlsl.u16 q2, d6, d30 vmlal.u16 q2, d7, d30 vshll.u16 q8, d18, #8 - bilinear_load_mask mask_fmt, 4, d30 + bilinear_load_mask mask_fmt, 4, d22 bilinear_load_dst dst_fmt, op, 4, d2, d3, q1 - pld [TMP2, PF_OFFS] + pld [TMP1, PF_OFFS] vmlsl.u16 q8, d18, d31 vmlal.u16 q8, d19, d31 vadd.u16 q12, q12, q13 vshrn.u32 d0, q0, #16 vshrn.u32 d1, q10, #16 vshrn.u32 d4, q2, #16 vshrn.u32 d5, q8, #16 - bilinear_duplicate_mask mask_fmt, 4, d30 + bilinear_duplicate_mask mask_fmt, 4, d22 + vshr.u16 q15, q12, #8 vmovn.u16 d0, q0 vmovn.u16 d1, q2 + vadd.u16 q12, q12, q13 bilinear_interleave_src_dst \ mask_fmt, op, 4, d0, d1, q0, d2, d3, q1 bilinear_apply_mask_to_src \ - mask_fmt, 4, d0, d1, q0, d30, \ + mask_fmt, 4, d0, d1, q0, d22, \ q3, q8, q9, q10 bilinear_combine \ op, 4, d0, d1, q0, d2, d3, q1, \ - q3, q8, q9, q10, d22 + q3, q8, q9, q10, d23 bilinear_deinterleave_dst mask_fmt, op, 4, d0, d1, q0 bilinear_store_&dst_fmt 4, q2, q3 .endm -.macro generate_bilinear_scanline_func_src_dst \ - fname, src_fmt, dst_fmt, op, \ - bpp_shift, prefetch_distance +.set BILINEAR_FLAG_USE_MASK, 1 +.set BILINEAR_FLAG_USE_ALL_NEON_REGS, 2 + +/* + * Main template macro for generating NEON optimized bilinear scanline functions. + * + * Bilinear scanline generator macro take folling arguments: + * fname - name of the function to generate + * src_fmt - source color format (8888 or 0565) + * dst_fmt - destination color format (8888 or 0565) + * src/dst_bpp_shift - (1 << bpp_shift) is the size of src/dst pixel in bytes + * process_last_pixel - code block that interpolate one pixel and does not + * update horizontal weight + * process_two_pixels - code block that interpolate two pixels and update + * horizontal weight + * process_four_pixels - code block that interpolate four pixels and update + * horizontal weight + * process_pixblock_head - head part of middle loop + * process_pixblock_tail - tail part of middle loop + * process_pixblock_tail_head - tail_head of middle loop + * pixblock_size - number of pixels processed in a single middle loop + * prefetch_distance - prefetch in the source image by that many pixels ahead + */ + +.macro generate_bilinear_scanline_func \ + fname, \ + src_fmt, dst_fmt, src_bpp_shift, dst_bpp_shift, \ + bilinear_process_last_pixel, \ + bilinear_process_two_pixels, \ + bilinear_process_four_pixels, \ + bilinear_process_pixblock_head, \ + bilinear_process_pixblock_tail, \ + bilinear_process_pixblock_tail_head, \ + pixblock_size, \ + prefetch_distance, \ + flags pixman_asm_function fname - OUT .req r0 - TOP .req r1 - BOTTOM .req r2 - WT .req r3 - WB .req r4 - X .req r5 - UX .req r6 - WIDTH .req ip - TMP1 .req r3 - TMP2 .req r4 - PF_OFFS .req r7 - TMP3 .req r8 - TMP4 .req r9 - - mov ip, sp - push {r4, r5, r6, r7, r8, r9} - mov PF_OFFS, #prefetch_distance - ldmia ip, {WB, X, UX, WIDTH} - mul PF_OFFS, PF_OFFS, UX - - cmp WIDTH, #0 - ble 3f - - vdup.u16 q12, X - vdup.u16 q13, UX - vdup.u8 d28, WT - vdup.u8 d29, WB - vadd.u16 d25, d25, d26 - vadd.u16 q13, q13, q13 +.if pixblock_size == 8 +.elseif pixblock_size == 4 +.else + .error unsupported pixblock size +.endif - subs WIDTH, WIDTH, #4 - blt 1f - mov PF_OFFS, PF_OFFS, asr #(16 - bpp_shift) -0: - bilinear_interpolate_four_pixels src_fmt, x, dst_fmt, op - subs WIDTH, WIDTH, #4 - bge 0b -1: - tst WIDTH, #2 - beq 2f - bilinear_interpolate_two_pixels src_fmt, x, dst_fmt, op -2: - tst WIDTH, #1 - beq 3f - bilinear_interpolate_last_pixel src_fmt, x, dst_fmt, op -3: - pop {r4, r5, r6, r7, r8, r9} - bx lr +.if ((flags) & BILINEAR_FLAG_USE_MASK) == 0 + OUT .req r0 + TOP .req r1 + BOTTOM .req r2 + WT .req r3 + WB .req r4 + X .req r5 + UX .req r6 + WIDTH .req ip + TMP1 .req r3 + TMP2 .req r4 + PF_OFFS .req r7 + TMP3 .req r8 + TMP4 .req r9 + STRIDE .req r2 - .unreq OUT - .unreq TOP - .unreq BOTTOM - .unreq WT - .unreq WB - .unreq X - .unreq UX - .unreq WIDTH - .unreq TMP1 - .unreq TMP2 - .unreq PF_OFFS - .unreq TMP3 - .unreq TMP4 -.endfunc - -.endm - -.macro generate_bilinear_scanline_func_src_a8_dst \ - fname, src_fmt, dst_fmt, op, \ - bpp_shift, prefetch_distance - -pixman_asm_function fname + mov ip, sp + push {r4, r5, r6, r7, r8, r9} + mov PF_OFFS, #prefetch_distance + ldmia ip, {WB, X, UX, WIDTH} +.else OUT .req r0 MASK .req r1 TOP .req r2 BOTTOM .req r3 WT .req r4 WB .req r5 X .req r6 UX .req r7 WIDTH .req ip TMP1 .req r4 TMP2 .req r5 PF_OFFS .req r8 TMP3 .req r9 TMP4 .req r10 + STRIDE .req r3 + + .set prefetch_offset, prefetch_distance mov ip, sp push {r4, r5, r6, r7, r8, r9, r10, ip} mov PF_OFFS, #prefetch_distance ldmia ip, {WT, WB, X, UX, WIDTH} +.endif + mul PF_OFFS, PF_OFFS, UX +.if ((flags) & BILINEAR_FLAG_USE_ALL_NEON_REGS) != 0 + vpush {d8-d15} +.endif + + sub STRIDE, BOTTOM, TOP + .unreq BOTTOM + cmp WIDTH, #0 ble 3f vdup.u16 q12, X vdup.u16 q13, UX vdup.u8 d28, WT vdup.u8 d29, WB vadd.u16 d25, d25, d26 + + /* ensure good destination alignment */ + cmp WIDTH, #1 + blt 0f + tst OUT, #(1 << dst_bpp_shift) + beq 0f + vshr.u16 q15, q12, #8 + vadd.u16 q12, q12, q13 + bilinear_process_last_pixel + sub WIDTH, WIDTH, #1 +0: vadd.u16 q13, q13, q13 + vshr.u16 q15, q12, #8 + vadd.u16 q12, q12, q13 - subs WIDTH, WIDTH, #4 + cmp WIDTH, #2 + blt 0f + tst OUT, #(1 << (dst_bpp_shift + 1)) + beq 0f + bilinear_process_two_pixels + sub WIDTH, WIDTH, #2 +0: +.if pixblock_size == 8 + cmp WIDTH, #4 + blt 0f + tst OUT, #(1 << (dst_bpp_shift + 2)) + beq 0f + bilinear_process_four_pixels + sub WIDTH, WIDTH, #4 +0: +.endif + subs WIDTH, WIDTH, #pixblock_size blt 1f - mov PF_OFFS, PF_OFFS, asr #(16 - bpp_shift) + mov PF_OFFS, PF_OFFS, asr #(16 - src_bpp_shift) + bilinear_process_pixblock_head + subs WIDTH, WIDTH, #pixblock_size + blt 5f 0: - bilinear_interpolate_four_pixels src_fmt, 8, dst_fmt, op - subs WIDTH, WIDTH, #4 + bilinear_process_pixblock_tail_head + subs WIDTH, WIDTH, #pixblock_size bge 0b +5: + bilinear_process_pixblock_tail 1: +.if pixblock_size == 8 + tst WIDTH, #4 + beq 2f + bilinear_process_four_pixels +2: +.endif + /* handle the remaining trailing pixels */ tst WIDTH, #2 beq 2f - bilinear_interpolate_two_pixels src_fmt, 8, dst_fmt, op + bilinear_process_two_pixels 2: tst WIDTH, #1 beq 3f - bilinear_interpolate_last_pixel src_fmt, 8, dst_fmt, op + bilinear_process_last_pixel 3: +.if ((flags) & BILINEAR_FLAG_USE_ALL_NEON_REGS) != 0 + vpop {d8-d15} +.endif + +.if ((flags) & BILINEAR_FLAG_USE_MASK) == 0 + pop {r4, r5, r6, r7, r8, r9} +.else pop {r4, r5, r6, r7, r8, r9, r10, ip} +.endif bx lr .unreq OUT .unreq TOP - .unreq BOTTOM .unreq WT .unreq WB .unreq X .unreq UX .unreq WIDTH - .unreq MASK .unreq TMP1 .unreq TMP2 .unreq PF_OFFS .unreq TMP3 .unreq TMP4 + .unreq STRIDE +.if ((flags) & BILINEAR_FLAG_USE_MASK) != 0 + .unreq MASK +.endif + .endfunc .endm -generate_bilinear_scanline_func_src_dst \ - pixman_scaled_bilinear_scanline_8888_8888_OVER_asm_neon, \ - 8888, 8888, over, 2, 28 +/* src_8888_8_8888 */ +.macro bilinear_src_8888_8_8888_process_last_pixel + bilinear_interpolate_last_pixel 8888, 8, 8888, src +.endm + +.macro bilinear_src_8888_8_8888_process_two_pixels + bilinear_interpolate_two_pixels 8888, 8, 8888, src +.endm + +.macro bilinear_src_8888_8_8888_process_four_pixels + bilinear_interpolate_four_pixels 8888, 8, 8888, src +.endm + +.macro bilinear_src_8888_8_8888_process_pixblock_head + bilinear_src_8888_8_8888_process_four_pixels +.endm + +.macro bilinear_src_8888_8_8888_process_pixblock_tail +.endm + +.macro bilinear_src_8888_8_8888_process_pixblock_tail_head + bilinear_src_8888_8_8888_process_pixblock_tail + bilinear_src_8888_8_8888_process_pixblock_head +.endm + +/* src_8888_8_0565 */ +.macro bilinear_src_8888_8_0565_process_last_pixel + bilinear_interpolate_last_pixel 8888, 8, 0565, src +.endm + +.macro bilinear_src_8888_8_0565_process_two_pixels + bilinear_interpolate_two_pixels 8888, 8, 0565, src +.endm + +.macro bilinear_src_8888_8_0565_process_four_pixels + bilinear_interpolate_four_pixels 8888, 8, 0565, src +.endm + +.macro bilinear_src_8888_8_0565_process_pixblock_head + bilinear_src_8888_8_0565_process_four_pixels +.endm + +.macro bilinear_src_8888_8_0565_process_pixblock_tail +.endm + +.macro bilinear_src_8888_8_0565_process_pixblock_tail_head + bilinear_src_8888_8_0565_process_pixblock_tail + bilinear_src_8888_8_0565_process_pixblock_head +.endm + +/* src_0565_8_x888 */ +.macro bilinear_src_0565_8_x888_process_last_pixel + bilinear_interpolate_last_pixel 0565, 8, 8888, src +.endm + +.macro bilinear_src_0565_8_x888_process_two_pixels + bilinear_interpolate_two_pixels 0565, 8, 8888, src +.endm + +.macro bilinear_src_0565_8_x888_process_four_pixels + bilinear_interpolate_four_pixels 0565, 8, 8888, src +.endm + +.macro bilinear_src_0565_8_x888_process_pixblock_head + bilinear_src_0565_8_x888_process_four_pixels +.endm -generate_bilinear_scanline_func_src_dst \ - pixman_scaled_bilinear_scanline_8888_8888_ADD_asm_neon, \ - 8888, 8888, add, 2, 28 +.macro bilinear_src_0565_8_x888_process_pixblock_tail +.endm + +.macro bilinear_src_0565_8_x888_process_pixblock_tail_head + bilinear_src_0565_8_x888_process_pixblock_tail + bilinear_src_0565_8_x888_process_pixblock_head +.endm + +/* src_0565_8_0565 */ +.macro bilinear_src_0565_8_0565_process_last_pixel + bilinear_interpolate_last_pixel 0565, 8, 0565, src +.endm + +.macro bilinear_src_0565_8_0565_process_two_pixels + bilinear_interpolate_two_pixels 0565, 8, 0565, src +.endm + +.macro bilinear_src_0565_8_0565_process_four_pixels + bilinear_interpolate_four_pixels 0565, 8, 0565, src +.endm + +.macro bilinear_src_0565_8_0565_process_pixblock_head + bilinear_src_0565_8_0565_process_four_pixels +.endm + +.macro bilinear_src_0565_8_0565_process_pixblock_tail +.endm + +.macro bilinear_src_0565_8_0565_process_pixblock_tail_head + bilinear_src_0565_8_0565_process_pixblock_tail + bilinear_src_0565_8_0565_process_pixblock_head +.endm + +/* over_8888_8888 */ +.macro bilinear_over_8888_8888_process_last_pixel + bilinear_interpolate_last_pixel 8888, x, 8888, over +.endm + +.macro bilinear_over_8888_8888_process_two_pixels + bilinear_interpolate_two_pixels 8888, x, 8888, over +.endm + +.macro bilinear_over_8888_8888_process_four_pixels + bilinear_interpolate_four_pixels 8888, x, 8888, over +.endm + +.macro bilinear_over_8888_8888_process_pixblock_head + mov TMP1, X, asr #16 + add X, X, UX + add TMP1, TOP, TMP1, asl #2 + mov TMP2, X, asr #16 + add X, X, UX + add TMP2, TOP, TMP2, asl #2 + + vld1.32 {d22}, [TMP1], STRIDE + vld1.32 {d23}, [TMP1] + mov TMP3, X, asr #16 + add X, X, UX + add TMP3, TOP, TMP3, asl #2 + vmull.u8 q8, d22, d28 + vmlal.u8 q8, d23, d29 + + vld1.32 {d22}, [TMP2], STRIDE + vld1.32 {d23}, [TMP2] + mov TMP4, X, asr #16 + add X, X, UX + add TMP4, TOP, TMP4, asl #2 + vmull.u8 q9, d22, d28 + vmlal.u8 q9, d23, d29 -generate_bilinear_scanline_func_src_a8_dst \ - pixman_scaled_bilinear_scanline_8888_8_8888_SRC_asm_neon, \ - 8888, 8888, src, 2, 28 + vld1.32 {d22}, [TMP3], STRIDE + vld1.32 {d23}, [TMP3] + vmull.u8 q10, d22, d28 + vmlal.u8 q10, d23, d29 + + vshll.u16 q0, d16, #8 + vmlsl.u16 q0, d16, d30 + vmlal.u16 q0, d17, d30 + + pld [TMP4, PF_OFFS] + vld1.32 {d16}, [TMP4], STRIDE + vld1.32 {d17}, [TMP4] + pld [TMP4, PF_OFFS] + vmull.u8 q11, d16, d28 + vmlal.u8 q11, d17, d29 + + vshll.u16 q1, d18, #8 + vmlsl.u16 q1, d18, d31 + vmlal.u16 q1, d19, d31 + vshr.u16 q15, q12, #8 + vadd.u16 q12, q12, q13 +.endm + +.macro bilinear_over_8888_8888_process_pixblock_tail + vshll.u16 q2, d20, #8 + vmlsl.u16 q2, d20, d30 + vmlal.u16 q2, d21, d30 + vshll.u16 q3, d22, #8 + vmlsl.u16 q3, d22, d31 + vmlal.u16 q3, d23, d31 + vshrn.u32 d0, q0, #16 + vshrn.u32 d1, q1, #16 + vld1.32 {d2, d3}, [OUT, :128] + pld [OUT, #(prefetch_offset * 4)] + vshrn.u32 d4, q2, #16 + vshr.u16 q15, q12, #8 + vshrn.u32 d5, q3, #16 + vmovn.u16 d6, q0 + vmovn.u16 d7, q2 + vuzp.8 d6, d7 + vuzp.8 d2, d3 + vuzp.8 d6, d7 + vuzp.8 d2, d3 + vdup.32 d4, d7[1] + vmvn.8 d4, d4 + vmull.u8 q11, d2, d4 + vmull.u8 q2, d3, d4 + vrshr.u16 q1, q11, #8 + vrshr.u16 q10, q2, #8 + vraddhn.u16 d2, q1, q11 + vraddhn.u16 d3, q10, q2 + vqadd.u8 q3, q1, q3 + vuzp.8 d6, d7 + vuzp.8 d6, d7 + vadd.u16 q12, q12, q13 + vst1.32 {d6, d7}, [OUT, :128]! +.endm -generate_bilinear_scanline_func_src_a8_dst \ - pixman_scaled_bilinear_scanline_8888_8_0565_SRC_asm_neon, \ - 8888, 0565, src, 2, 28 +.macro bilinear_over_8888_8888_process_pixblock_tail_head + vshll.u16 q2, d20, #8 + mov TMP1, X, asr #16 + add X, X, UX + add TMP1, TOP, TMP1, asl #2 + vmlsl.u16 q2, d20, d30 + mov TMP2, X, asr #16 + add X, X, UX + add TMP2, TOP, TMP2, asl #2 + vmlal.u16 q2, d21, d30 + vshll.u16 q3, d22, #8 + vld1.32 {d20}, [TMP1], STRIDE + vmlsl.u16 q3, d22, d31 + vmlal.u16 q3, d23, d31 + vld1.32 {d21}, [TMP1] + vmull.u8 q8, d20, d28 + vmlal.u8 q8, d21, d29 + vshrn.u32 d0, q0, #16 + vshrn.u32 d1, q1, #16 + vld1.32 {d2, d3}, [OUT, :128] + pld [OUT, PF_OFFS] + vshrn.u32 d4, q2, #16 + vshr.u16 q15, q12, #8 + vld1.32 {d22}, [TMP2], STRIDE + vshrn.u32 d5, q3, #16 + vmovn.u16 d6, q0 + vld1.32 {d23}, [TMP2] + vmull.u8 q9, d22, d28 + mov TMP3, X, asr #16 + add X, X, UX + add TMP3, TOP, TMP3, asl #2 + mov TMP4, X, asr #16 + add X, X, UX + add TMP4, TOP, TMP4, asl #2 + vmlal.u8 q9, d23, d29 + vmovn.u16 d7, q2 + vld1.32 {d22}, [TMP3], STRIDE + vuzp.8 d6, d7 + vuzp.8 d2, d3 + vuzp.8 d6, d7 + vuzp.8 d2, d3 + vdup.32 d4, d7[1] + vld1.32 {d23}, [TMP3] + vmvn.8 d4, d4 + vmull.u8 q10, d22, d28 + vmlal.u8 q10, d23, d29 + vmull.u8 q11, d2, d4 + vmull.u8 q2, d3, d4 + vshll.u16 q0, d16, #8 + vmlsl.u16 q0, d16, d30 + vrshr.u16 q1, q11, #8 + vmlal.u16 q0, d17, d30 + vrshr.u16 q8, q2, #8 + vraddhn.u16 d2, q1, q11 + vraddhn.u16 d3, q8, q2 + pld [TMP4, PF_OFFS] + vld1.32 {d16}, [TMP4], STRIDE + vqadd.u8 q3, q1, q3 + vld1.32 {d17}, [TMP4] + pld [TMP4, PF_OFFS] + vmull.u8 q11, d16, d28 + vmlal.u8 q11, d17, d29 + vuzp.8 d6, d7 + vshll.u16 q1, d18, #8 + vuzp.8 d6, d7 + vmlsl.u16 q1, d18, d31 + vadd.u16 q12, q12, q13 + vmlal.u16 q1, d19, d31 + vshr.u16 q15, q12, #8 + vadd.u16 q12, q12, q13 + vst1.32 {d6, d7}, [OUT, :128]! +.endm + +/* over_8888_8_8888 */ +.macro bilinear_over_8888_8_8888_process_last_pixel + bilinear_interpolate_last_pixel 8888, 8, 8888, over +.endm + +.macro bilinear_over_8888_8_8888_process_two_pixels + bilinear_interpolate_two_pixels 8888, 8, 8888, over +.endm + +.macro bilinear_over_8888_8_8888_process_four_pixels + bilinear_interpolate_four_pixels 8888, 8, 8888, over +.endm -generate_bilinear_scanline_func_src_a8_dst \ - pixman_scaled_bilinear_scanline_0565_8_x888_SRC_asm_neon, \ - 0565, 8888, src, 1, 28 +.macro bilinear_over_8888_8_8888_process_pixblock_head + mov TMP1, X, asr #16 + add X, X, UX + add TMP1, TOP, TMP1, asl #2 + vld1.32 {d0}, [TMP1], STRIDE + mov TMP2, X, asr #16 + add X, X, UX + add TMP2, TOP, TMP2, asl #2 + vld1.32 {d1}, [TMP1] + mov TMP3, X, asr #16 + add X, X, UX + add TMP3, TOP, TMP3, asl #2 + vld1.32 {d2}, [TMP2], STRIDE + mov TMP4, X, asr #16 + add X, X, UX + add TMP4, TOP, TMP4, asl #2 + vld1.32 {d3}, [TMP2] + vmull.u8 q2, d0, d28 + vmull.u8 q3, d2, d28 + vmlal.u8 q2, d1, d29 + vmlal.u8 q3, d3, d29 + vshll.u16 q0, d4, #8 + vshll.u16 q1, d6, #8 + vmlsl.u16 q0, d4, d30 + vmlsl.u16 q1, d6, d31 + vmlal.u16 q0, d5, d30 + vmlal.u16 q1, d7, d31 + vshrn.u32 d0, q0, #16 + vshrn.u32 d1, q1, #16 + vld1.32 {d2}, [TMP3], STRIDE + vld1.32 {d3}, [TMP3] + pld [TMP4, PF_OFFS] + vld1.32 {d4}, [TMP4], STRIDE + vld1.32 {d5}, [TMP4] + pld [TMP4, PF_OFFS] + vmull.u8 q3, d2, d28 + vmlal.u8 q3, d3, d29 + vmull.u8 q1, d4, d28 + vmlal.u8 q1, d5, d29 + vshr.u16 q15, q12, #8 + vld1.32 {d22[0]}, [MASK]! + pld [MASK, #prefetch_offset] + vadd.u16 q12, q12, q13 + vmovn.u16 d16, q0 +.endm -generate_bilinear_scanline_func_src_a8_dst \ - pixman_scaled_bilinear_scanline_0565_8_0565_SRC_asm_neon, \ - 0565, 0565, src, 1, 28 +.macro bilinear_over_8888_8_8888_process_pixblock_tail + vshll.u16 q9, d6, #8 + vshll.u16 q10, d2, #8 + vmlsl.u16 q9, d6, d30 + vmlsl.u16 q10, d2, d31 + vmlal.u16 q9, d7, d30 + vmlal.u16 q10, d3, d31 + vshr.u16 q15, q12, #8 + vadd.u16 q12, q12, q13 + vdup.32 d22, d22[0] + vshrn.u32 d18, q9, #16 + vshrn.u32 d19, q10, #16 + vmovn.u16 d17, q9 + vld1.32 {d18, d19}, [OUT, :128] + pld [OUT, PF_OFFS] + vuzp.8 d16, d17 + vuzp.8 d18, d19 + vuzp.8 d16, d17 + vuzp.8 d18, d19 + vmull.u8 q10, d16, d22 + vmull.u8 q11, d17, d22 + vrsra.u16 q10, q10, #8 + vrsra.u16 q11, q11, #8 + vrshrn.u16 d16, q10, #8 + vrshrn.u16 d17, q11, #8 + vdup.32 d22, d17[1] + vmvn.8 d22, d22 + vmull.u8 q10, d18, d22 + vmull.u8 q11, d19, d22 + vrshr.u16 q9, q10, #8 + vrshr.u16 q0, q11, #8 + vraddhn.u16 d18, q9, q10 + vraddhn.u16 d19, q0, q11 + vqadd.u8 q9, q8, q9 + vuzp.8 d18, d19 + vuzp.8 d18, d19 + vst1.32 {d18, d19}, [OUT, :128]! +.endm -generate_bilinear_scanline_func_src_a8_dst \ - pixman_scaled_bilinear_scanline_8888_8_8888_OVER_asm_neon, \ - 8888, 8888, over, 2, 28 +.macro bilinear_over_8888_8_8888_process_pixblock_tail_head + vshll.u16 q9, d6, #8 + mov TMP1, X, asr #16 + add X, X, UX + add TMP1, TOP, TMP1, asl #2 + vshll.u16 q10, d2, #8 + vld1.32 {d0}, [TMP1], STRIDE + mov TMP2, X, asr #16 + add X, X, UX + add TMP2, TOP, TMP2, asl #2 + vmlsl.u16 q9, d6, d30 + vmlsl.u16 q10, d2, d31 + vld1.32 {d1}, [TMP1] + mov TMP3, X, asr #16 + add X, X, UX + add TMP3, TOP, TMP3, asl #2 + vmlal.u16 q9, d7, d30 + vmlal.u16 q10, d3, d31 + vld1.32 {d2}, [TMP2], STRIDE + mov TMP4, X, asr #16 + add X, X, UX + add TMP4, TOP, TMP4, asl #2 + vshr.u16 q15, q12, #8 + vadd.u16 q12, q12, q13 + vld1.32 {d3}, [TMP2] + vdup.32 d22, d22[0] + vshrn.u32 d18, q9, #16 + vshrn.u32 d19, q10, #16 + vmull.u8 q2, d0, d28 + vmull.u8 q3, d2, d28 + vmovn.u16 d17, q9 + vld1.32 {d18, d19}, [OUT, :128] + pld [OUT, #(prefetch_offset * 4)] + vmlal.u8 q2, d1, d29 + vmlal.u8 q3, d3, d29 + vuzp.8 d16, d17 + vuzp.8 d18, d19 + vshll.u16 q0, d4, #8 + vshll.u16 q1, d6, #8 + vuzp.8 d16, d17 + vuzp.8 d18, d19 + vmlsl.u16 q0, d4, d30 + vmlsl.u16 q1, d6, d31 + vmull.u8 q10, d16, d22 + vmull.u8 q11, d17, d22 + vmlal.u16 q0, d5, d30 + vmlal.u16 q1, d7, d31 + vrsra.u16 q10, q10, #8 + vrsra.u16 q11, q11, #8 + vshrn.u32 d0, q0, #16 + vshrn.u32 d1, q1, #16 + vrshrn.u16 d16, q10, #8 + vrshrn.u16 d17, q11, #8 + vld1.32 {d2}, [TMP3], STRIDE + vdup.32 d22, d17[1] + vld1.32 {d3}, [TMP3] + vmvn.8 d22, d22 + pld [TMP4, PF_OFFS] + vld1.32 {d4}, [TMP4], STRIDE + vmull.u8 q10, d18, d22 + vmull.u8 q11, d19, d22 + vld1.32 {d5}, [TMP4] + pld [TMP4, PF_OFFS] + vmull.u8 q3, d2, d28 + vrshr.u16 q9, q10, #8 + vrshr.u16 q15, q11, #8 + vmlal.u8 q3, d3, d29 + vmull.u8 q1, d4, d28 + vraddhn.u16 d18, q9, q10 + vraddhn.u16 d19, q15, q11 + vmlal.u8 q1, d5, d29 + vshr.u16 q15, q12, #8 + vqadd.u8 q9, q8, q9 + vld1.32 {d22[0]}, [MASK]! + vuzp.8 d18, d19 + vadd.u16 q12, q12, q13 + vuzp.8 d18, d19 + vmovn.u16 d16, q0 + vst1.32 {d18, d19}, [OUT, :128]! +.endm + +/* add_8888_8888 */ +.macro bilinear_add_8888_8888_process_last_pixel + bilinear_interpolate_last_pixel 8888, x, 8888, add +.endm + +.macro bilinear_add_8888_8888_process_two_pixels + bilinear_interpolate_two_pixels 8888, x, 8888, add +.endm + +.macro bilinear_add_8888_8888_process_four_pixels + bilinear_interpolate_four_pixels 8888, x, 8888, add +.endm + +.macro bilinear_add_8888_8888_process_pixblock_head + bilinear_add_8888_8888_process_four_pixels +.endm + +.macro bilinear_add_8888_8888_process_pixblock_tail +.endm + +.macro bilinear_add_8888_8888_process_pixblock_tail_head + bilinear_add_8888_8888_process_pixblock_tail + bilinear_add_8888_8888_process_pixblock_head +.endm -generate_bilinear_scanline_func_src_a8_dst \ +/* add_8888_8_8888 */ +.macro bilinear_add_8888_8_8888_process_last_pixel + bilinear_interpolate_last_pixel 8888, 8, 8888, add +.endm + +.macro bilinear_add_8888_8_8888_process_two_pixels + bilinear_interpolate_two_pixels 8888, 8, 8888, add +.endm + +.macro bilinear_add_8888_8_8888_process_four_pixels + bilinear_interpolate_four_pixels 8888, 8, 8888, add +.endm + +.macro bilinear_add_8888_8_8888_process_pixblock_head + bilinear_add_8888_8_8888_process_four_pixels +.endm + +.macro bilinear_add_8888_8_8888_process_pixblock_tail +.endm + +.macro bilinear_add_8888_8_8888_process_pixblock_tail_head + bilinear_add_8888_8_8888_process_pixblock_tail + bilinear_add_8888_8_8888_process_pixblock_head +.endm + + +/* Bilinear scanline functions */ +generate_bilinear_scanline_func \ + pixman_scaled_bilinear_scanline_8888_8_8888_SRC_asm_neon, \ + 8888, 8888, 2, 2, \ + bilinear_src_8888_8_8888_process_last_pixel, \ + bilinear_src_8888_8_8888_process_two_pixels, \ + bilinear_src_8888_8_8888_process_four_pixels, \ + bilinear_src_8888_8_8888_process_pixblock_head, \ + bilinear_src_8888_8_8888_process_pixblock_tail, \ + bilinear_src_8888_8_8888_process_pixblock_tail_head, \ + 4, 28, BILINEAR_FLAG_USE_MASK + +generate_bilinear_scanline_func \ + pixman_scaled_bilinear_scanline_8888_8_0565_SRC_asm_neon, \ + 8888, 0565, 2, 1, \ + bilinear_src_8888_8_0565_process_last_pixel, \ + bilinear_src_8888_8_0565_process_two_pixels, \ + bilinear_src_8888_8_0565_process_four_pixels, \ + bilinear_src_8888_8_0565_process_pixblock_head, \ + bilinear_src_8888_8_0565_process_pixblock_tail, \ + bilinear_src_8888_8_0565_process_pixblock_tail_head, \ + 4, 28, BILINEAR_FLAG_USE_MASK + +generate_bilinear_scanline_func \ + pixman_scaled_bilinear_scanline_0565_8_x888_SRC_asm_neon, \ + 0565, 8888, 1, 2, \ + bilinear_src_0565_8_x888_process_last_pixel, \ + bilinear_src_0565_8_x888_process_two_pixels, \ + bilinear_src_0565_8_x888_process_four_pixels, \ + bilinear_src_0565_8_x888_process_pixblock_head, \ + bilinear_src_0565_8_x888_process_pixblock_tail, \ + bilinear_src_0565_8_x888_process_pixblock_tail_head, \ + 4, 28, BILINEAR_FLAG_USE_MASK + +generate_bilinear_scanline_func \ + pixman_scaled_bilinear_scanline_0565_8_0565_SRC_asm_neon, \ + 0565, 0565, 1, 1, \ + bilinear_src_0565_8_0565_process_last_pixel, \ + bilinear_src_0565_8_0565_process_two_pixels, \ + bilinear_src_0565_8_0565_process_four_pixels, \ + bilinear_src_0565_8_0565_process_pixblock_head, \ + bilinear_src_0565_8_0565_process_pixblock_tail, \ + bilinear_src_0565_8_0565_process_pixblock_tail_head, \ + 4, 28, BILINEAR_FLAG_USE_MASK + +generate_bilinear_scanline_func \ + pixman_scaled_bilinear_scanline_8888_8888_OVER_asm_neon, \ + 8888, 8888, 2, 2, \ + bilinear_over_8888_8888_process_last_pixel, \ + bilinear_over_8888_8888_process_two_pixels, \ + bilinear_over_8888_8888_process_four_pixels, \ + bilinear_over_8888_8888_process_pixblock_head, \ + bilinear_over_8888_8888_process_pixblock_tail, \ + bilinear_over_8888_8888_process_pixblock_tail_head, \ + 4, 28, 0 + +generate_bilinear_scanline_func \ + pixman_scaled_bilinear_scanline_8888_8_8888_OVER_asm_neon, \ + 8888, 8888, 2, 2, \ + bilinear_over_8888_8_8888_process_last_pixel, \ + bilinear_over_8888_8_8888_process_two_pixels, \ + bilinear_over_8888_8_8888_process_four_pixels, \ + bilinear_over_8888_8_8888_process_pixblock_head, \ + bilinear_over_8888_8_8888_process_pixblock_tail, \ + bilinear_over_8888_8_8888_process_pixblock_tail_head, \ + 4, 28, BILINEAR_FLAG_USE_MASK + +generate_bilinear_scanline_func \ + pixman_scaled_bilinear_scanline_8888_8888_ADD_asm_neon, \ + 8888, 8888, 2, 2, \ + bilinear_add_8888_8888_process_last_pixel, \ + bilinear_add_8888_8888_process_two_pixels, \ + bilinear_add_8888_8888_process_four_pixels, \ + bilinear_add_8888_8888_process_pixblock_head, \ + bilinear_add_8888_8888_process_pixblock_tail, \ + bilinear_add_8888_8888_process_pixblock_tail_head, \ + 4, 28, 0 + +generate_bilinear_scanline_func \ pixman_scaled_bilinear_scanline_8888_8_8888_ADD_asm_neon, \ - 8888, 8888, add, 2, 28 + 8888, 8888, 2, 2, \ + bilinear_add_8888_8_8888_process_last_pixel, \ + bilinear_add_8888_8_8888_process_two_pixels, \ + bilinear_add_8888_8_8888_process_four_pixels, \ + bilinear_add_8888_8_8888_process_pixblock_head, \ + bilinear_add_8888_8_8888_process_pixblock_tail, \ + bilinear_add_8888_8_8888_process_pixblock_tail_head, \ + 4, 28, BILINEAR_FLAG_USE_MASK
--- a/gfx/cairo/libpixman/src/pixman-arm-neon-asm.S +++ b/gfx/cairo/libpixman/src/pixman-arm-neon-asm.S @@ -701,32 +701,75 @@ generate_composite_function_single_scanl default_init, \ default_cleanup, \ pixman_composite_over_8888_8888_process_pixblock_head, \ pixman_composite_over_8888_8888_process_pixblock_tail, \ pixman_composite_over_8888_8888_process_pixblock_tail_head /******************************************************************************/ -/* TODO: expand macros and do better instructions scheduling */ +.macro pixman_composite_over_n_8888_process_pixblock_head + /* deinterleaved source pixels in {d0, d1, d2, d3} */ + /* inverted alpha in {d24} */ + /* destination pixels in {d4, d5, d6, d7} */ + vmull.u8 q8, d24, d4 + vmull.u8 q9, d24, d5 + vmull.u8 q10, d24, d6 + vmull.u8 q11, d24, d7 +.endm + +.macro pixman_composite_over_n_8888_process_pixblock_tail + vrshr.u16 q14, q8, #8 + vrshr.u16 q15, q9, #8 + vrshr.u16 q2, q10, #8 + vrshr.u16 q3, q11, #8 + vraddhn.u16 d28, q14, q8 + vraddhn.u16 d29, q15, q9 + vraddhn.u16 d30, q2, q10 + vraddhn.u16 d31, q3, q11 + vqadd.u8 q14, q0, q14 + vqadd.u8 q15, q1, q15 +.endm + .macro pixman_composite_over_n_8888_process_pixblock_tail_head - pixman_composite_over_8888_8888_process_pixblock_tail + vrshr.u16 q14, q8, #8 + vrshr.u16 q15, q9, #8 + vrshr.u16 q2, q10, #8 + vrshr.u16 q3, q11, #8 + vraddhn.u16 d28, q14, q8 + vraddhn.u16 d29, q15, q9 + vraddhn.u16 d30, q2, q10 + vraddhn.u16 d31, q3, q11 vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! - vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! - pixman_composite_over_8888_8888_process_pixblock_head - cache_preload 8, 8 + vqadd.u8 q14, q0, q14 + PF add PF_X, PF_X, #8 + PF tst PF_CTL, #0x0F + PF addne PF_X, PF_X, #8 + PF subne PF_CTL, PF_CTL, #1 + vqadd.u8 q15, q1, q15 + PF cmp PF_X, ORIG_W + vmull.u8 q8, d24, d4 + PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift] + vmull.u8 q9, d24, d5 + PF subge PF_X, PF_X, ORIG_W + vmull.u8 q10, d24, d6 + PF subges PF_CTL, PF_CTL, #0x10 + vmull.u8 q11, d24, d7 + PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]! + vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! .endm .macro pixman_composite_over_n_8888_init add DUMMY, sp, #ARGS_STACK_OFFSET vld1.32 {d3[0]}, [DUMMY] vdup.8 d0, d3[0] vdup.8 d1, d3[1] vdup.8 d2, d3[2] vdup.8 d3, d3[3] + vmvn.8 d24, d3 /* get inverted alpha */ .endm generate_composite_function \ pixman_composite_over_n_8888_asm_neon, 0, 0, 32, \ FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ 8, /* number of pixels, processed in a single block */ \ 5, /* prefetch distance */ \ pixman_composite_over_n_8888_init, \ @@ -1171,66 +1214,239 @@ generate_composite_function \ pixman_composite_src_x888_8888_process_pixblock_tail_head, \ 0, /* dst_w_basereg */ \ 0, /* dst_r_basereg */ \ 0, /* src_basereg */ \ 0 /* mask_basereg */ /******************************************************************************/ +.macro pixman_composite_src_n_8_8888_process_pixblock_head + /* expecting solid source in {d0, d1, d2, d3} */ + /* mask is in d24 (d25, d26, d27 are unused) */ + + /* in */ + vmull.u8 q8, d24, d0 + vmull.u8 q9, d24, d1 + vmull.u8 q10, d24, d2 + vmull.u8 q11, d24, d3 + vrsra.u16 q8, q8, #8 + vrsra.u16 q9, q9, #8 + vrsra.u16 q10, q10, #8 + vrsra.u16 q11, q11, #8 +.endm + +.macro pixman_composite_src_n_8_8888_process_pixblock_tail + vrshrn.u16 d28, q8, #8 + vrshrn.u16 d29, q9, #8 + vrshrn.u16 d30, q10, #8 + vrshrn.u16 d31, q11, #8 +.endm + +.macro pixman_composite_src_n_8_8888_process_pixblock_tail_head + fetch_mask_pixblock + PF add PF_X, PF_X, #8 + vrshrn.u16 d28, q8, #8 + PF tst PF_CTL, #0x0F + vrshrn.u16 d29, q9, #8 + PF addne PF_X, PF_X, #8 + vrshrn.u16 d30, q10, #8 + PF subne PF_CTL, PF_CTL, #1 + vrshrn.u16 d31, q11, #8 + PF cmp PF_X, ORIG_W + vmull.u8 q8, d24, d0 + PF pld, [PF_MASK, PF_X, lsl #mask_bpp_shift] + vmull.u8 q9, d24, d1 + PF subge PF_X, PF_X, ORIG_W + vmull.u8 q10, d24, d2 + PF subges PF_CTL, PF_CTL, #0x10 + vmull.u8 q11, d24, d3 + PF ldrgeb DUMMY, [PF_MASK, MASK_STRIDE, lsl #mask_bpp_shift]! + vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! + vrsra.u16 q8, q8, #8 + vrsra.u16 q9, q9, #8 + vrsra.u16 q10, q10, #8 + vrsra.u16 q11, q11, #8 +.endm + +.macro pixman_composite_src_n_8_8888_init + add DUMMY, sp, #ARGS_STACK_OFFSET + vld1.32 {d3[0]}, [DUMMY] + vdup.8 d0, d3[0] + vdup.8 d1, d3[1] + vdup.8 d2, d3[2] + vdup.8 d3, d3[3] +.endm + +.macro pixman_composite_src_n_8_8888_cleanup +.endm + +generate_composite_function \ + pixman_composite_src_n_8_8888_asm_neon, 0, 8, 32, \ + FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + pixman_composite_src_n_8_8888_init, \ + pixman_composite_src_n_8_8888_cleanup, \ + pixman_composite_src_n_8_8888_process_pixblock_head, \ + pixman_composite_src_n_8_8888_process_pixblock_tail, \ + pixman_composite_src_n_8_8888_process_pixblock_tail_head, \ + +/******************************************************************************/ + +.macro pixman_composite_src_n_8_8_process_pixblock_head + vmull.u8 q0, d24, d16 + vmull.u8 q1, d25, d16 + vmull.u8 q2, d26, d16 + vmull.u8 q3, d27, d16 + vrsra.u16 q0, q0, #8 + vrsra.u16 q1, q1, #8 + vrsra.u16 q2, q2, #8 + vrsra.u16 q3, q3, #8 +.endm + +.macro pixman_composite_src_n_8_8_process_pixblock_tail + vrshrn.u16 d28, q0, #8 + vrshrn.u16 d29, q1, #8 + vrshrn.u16 d30, q2, #8 + vrshrn.u16 d31, q3, #8 +.endm + +.macro pixman_composite_src_n_8_8_process_pixblock_tail_head + fetch_mask_pixblock + PF add PF_X, PF_X, #8 + vrshrn.u16 d28, q0, #8 + PF tst PF_CTL, #0x0F + vrshrn.u16 d29, q1, #8 + PF addne PF_X, PF_X, #8 + vrshrn.u16 d30, q2, #8 + PF subne PF_CTL, PF_CTL, #1 + vrshrn.u16 d31, q3, #8 + PF cmp PF_X, ORIG_W + vmull.u8 q0, d24, d16 + PF pld, [PF_MASK, PF_X, lsl #mask_bpp_shift] + vmull.u8 q1, d25, d16 + PF subge PF_X, PF_X, ORIG_W + vmull.u8 q2, d26, d16 + PF subges PF_CTL, PF_CTL, #0x10 + vmull.u8 q3, d27, d16 + PF ldrgeb DUMMY, [PF_MASK, MASK_STRIDE, lsl #mask_bpp_shift]! + vst1.8 {d28, d29, d30, d31}, [DST_W, :128]! + vrsra.u16 q0, q0, #8 + vrsra.u16 q1, q1, #8 + vrsra.u16 q2, q2, #8 + vrsra.u16 q3, q3, #8 +.endm + +.macro pixman_composite_src_n_8_8_init + add DUMMY, sp, #ARGS_STACK_OFFSET + vld1.32 {d16[0]}, [DUMMY] + vdup.8 d16, d16[3] +.endm + +.macro pixman_composite_src_n_8_8_cleanup +.endm + +generate_composite_function \ + pixman_composite_src_n_8_8_asm_neon, 0, 8, 8, \ + FLAG_DST_WRITEONLY, \ + 32, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + pixman_composite_src_n_8_8_init, \ + pixman_composite_src_n_8_8_cleanup, \ + pixman_composite_src_n_8_8_process_pixblock_head, \ + pixman_composite_src_n_8_8_process_pixblock_tail, \ + pixman_composite_src_n_8_8_process_pixblock_tail_head + +/******************************************************************************/ + .macro pixman_composite_over_n_8_8888_process_pixblock_head /* expecting deinterleaved source data in {d8, d9, d10, d11} */ /* d8 - blue, d9 - green, d10 - red, d11 - alpha */ /* and destination data in {d4, d5, d6, d7} */ /* mask is in d24 (d25, d26, d27 are unused) */ /* in */ - vmull.u8 q0, d24, d8 - vmull.u8 q1, d24, d9 - vmull.u8 q6, d24, d10 - vmull.u8 q7, d24, d11 - vrshr.u16 q10, q0, #8 - vrshr.u16 q11, q1, #8 - vrshr.u16 q12, q6, #8 - vrshr.u16 q13, q7, #8 - vraddhn.u16 d0, q0, q10 - vraddhn.u16 d1, q1, q11 - vraddhn.u16 d2, q6, q12 - vraddhn.u16 d3, q7, q13 - vmvn.8 d24, d3 /* get inverted alpha */ + vmull.u8 q6, d24, d8 + vmull.u8 q7, d24, d9 + vmull.u8 q8, d24, d10 + vmull.u8 q9, d24, d11 + vrshr.u16 q10, q6, #8 + vrshr.u16 q11, q7, #8 + vrshr.u16 q12, q8, #8 + vrshr.u16 q13, q9, #8 + vraddhn.u16 d0, q6, q10 + vraddhn.u16 d1, q7, q11 + vraddhn.u16 d2, q8, q12 + vraddhn.u16 d3, q9, q13 + vmvn.8 d25, d3 /* get inverted alpha */ /* source: d0 - blue, d1 - green, d2 - red, d3 - alpha */ /* destination: d4 - blue, d5 - green, d6 - red, d7 - alpha */ /* now do alpha blending */ - vmull.u8 q8, d24, d4 - vmull.u8 q9, d24, d5 - vmull.u8 q10, d24, d6 - vmull.u8 q11, d24, d7 + vmull.u8 q8, d25, d4 + vmull.u8 q9, d25, d5 + vmull.u8 q10, d25, d6 + vmull.u8 q11, d25, d7 .endm .macro pixman_composite_over_n_8_8888_process_pixblock_tail vrshr.u16 q14, q8, #8 vrshr.u16 q15, q9, #8 - vrshr.u16 q12, q10, #8 - vrshr.u16 q13, q11, #8 + vrshr.u16 q6, q10, #8 + vrshr.u16 q7, q11, #8 vraddhn.u16 d28, q14, q8 vraddhn.u16 d29, q15, q9 - vraddhn.u16 d30, q12, q10 - vraddhn.u16 d31, q13, q11 + vraddhn.u16 d30, q6, q10 + vraddhn.u16 d31, q7, q11 vqadd.u8 q14, q0, q14 vqadd.u8 q15, q1, q15 .endm -/* TODO: expand macros and do better instructions scheduling */ .macro pixman_composite_over_n_8_8888_process_pixblock_tail_head - pixman_composite_over_n_8_8888_process_pixblock_tail - vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! + vrshr.u16 q14, q8, #8 vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! + vrshr.u16 q15, q9, #8 fetch_mask_pixblock - cache_preload 8, 8 - pixman_composite_over_n_8_8888_process_pixblock_head + vrshr.u16 q6, q10, #8 + PF add PF_X, PF_X, #8 + vrshr.u16 q7, q11, #8 + PF tst PF_CTL, #0x0F + vraddhn.u16 d28, q14, q8 + PF addne PF_X, PF_X, #8 + vraddhn.u16 d29, q15, q9 + PF subne PF_CTL, PF_CTL, #1 + vraddhn.u16 d30, q6, q10 + PF cmp PF_X, ORIG_W + vraddhn.u16 d31, q7, q11 + PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift] + vmull.u8 q6, d24, d8 + PF pld, [PF_MASK, PF_X, lsl #mask_bpp_shift] + vmull.u8 q7, d24, d9 + PF subge PF_X, PF_X, ORIG_W + vmull.u8 q8, d24, d10 + PF subges PF_CTL, PF_CTL, #0x10 + vmull.u8 q9, d24, d11 + PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]! + vqadd.u8 q14, q0, q14 + PF ldrgeb DUMMY, [PF_MASK, MASK_STRIDE, lsl #mask_bpp_shift]! + vqadd.u8 q15, q1, q15 + vrshr.u16 q10, q6, #8 + vrshr.u16 q11, q7, #8 + vrshr.u16 q12, q8, #8 + vrshr.u16 q13, q9, #8 + vraddhn.u16 d0, q6, q10 + vraddhn.u16 d1, q7, q11 + vraddhn.u16 d2, q8, q12 + vraddhn.u16 d3, q9, q13 + vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! + vmvn.8 d25, d3 + vmull.u8 q8, d25, d4 + vmull.u8 q9, d25, d5 + vmull.u8 q10, d25, d6 + vmull.u8 q11, d25, d7 .endm .macro pixman_composite_over_n_8_8888_init add DUMMY, sp, #ARGS_STACK_OFFSET vpush {d8-d15} vld1.32 {d11[0]}, [DUMMY] vdup.8 d8, d11[0] vdup.8 d9, d11[1] @@ -1510,21 +1726,21 @@ generate_composite_function \ /* * 'combine_mask_ca' replacement * * input: solid src (n) in {d8, d9, d10, d11} [B, G, R, A] * mask in {d24, d25, d26} [B, G, R] * output: updated src in {d0, d1, d2 } [B, G, R] * updated mask in {d24, d25, d26} [B, G, R] */ - vmull.u8 q1, d25, d9 + vmull.u8 q6, d26, d10 vqadd.u8 q8, q0, q8 vmull.u8 q0, d24, d8 vqadd.u8 d22, d2, d22 - vmull.u8 q6, d26, d10 + vmull.u8 q1, d25, d9 /* * convert the result in d16, d17, d22 to r5g6b5 and store * it into {d28, d29} */ vshll.u8 q14, d22, #8 vshll.u8 q10, d17, #8 vshll.u8 q15, d16, #8 vmull.u8 q9, d11, d25 @@ -1537,29 +1753,29 @@ generate_composite_function \ vrshr.u16 q10, q1, #8 vrshr.u16 q11, q6, #8 vraddhn.u16 d0, q0, q8 vraddhn.u16 d1, q1, q10 vraddhn.u16 d2, q6, q11 vrshr.u16 q11, q12, #8 vrshr.u16 q8, q9, #8 vrshr.u16 q6, q13, #8 + vraddhn.u16 d24, q12, q11 vraddhn.u16 d25, q9, q8 /* * convert 8 r5g6b5 pixel data from {d4, d5} to planar * 8-bit format and put data into d16 - blue, d17 - green, * d18 - red */ vshrn.u16 d17, q2, #3 vshrn.u16 d18, q2, #8 - vraddhn.u16 d24, q12, q11 vraddhn.u16 d26, q13, q6 vsli.u16 q2, q2, #5 + vsri.u8 d17, d17, #6 vsri.u8 d18, d18, #5 - vsri.u8 d17, d17, #6 /* * 'combine_over_ca' replacement * * output: updated dest in d16 - blue, d17 - green, d18 - red */ vmvn.8 q12, q12 vshrn.u16 d16, q2, #2 vmvn.8 d26, d26 @@ -2484,16 +2700,66 @@ generate_composite_function \ pixman_composite_out_reverse_8_0565_process_pixblock_tail_head, \ 28, /* dst_w_basereg */ \ 10, /* dst_r_basereg */ \ 15, /* src_basereg */ \ 0 /* mask_basereg */ /******************************************************************************/ +.macro pixman_composite_out_reverse_8_8888_process_pixblock_head + /* src is in d0 */ + /* destination pixel data is in {d4, d5, d6, d7} */ + vmvn.8 d1, d0 /* get inverted alpha */ + /* now do alpha blending */ + vmull.u8 q8, d1, d4 + vmull.u8 q9, d1, d5 + vmull.u8 q10, d1, d6 + vmull.u8 q11, d1, d7 +.endm + +.macro pixman_composite_out_reverse_8_8888_process_pixblock_tail + vrshr.u16 q14, q8, #8 + vrshr.u16 q15, q9, #8 + vrshr.u16 q12, q10, #8 + vrshr.u16 q13, q11, #8 + vraddhn.u16 d28, q14, q8 + vraddhn.u16 d29, q15, q9 + vraddhn.u16 d30, q12, q10 + vraddhn.u16 d31, q13, q11 + /* 32bpp result is in {d28, d29, d30, d31} */ +.endm + +/* TODO: expand macros and do better instructions scheduling */ +.macro pixman_composite_out_reverse_8_8888_process_pixblock_tail_head + fetch_src_pixblock + pixman_composite_out_reverse_8_8888_process_pixblock_tail + vld4.8 {d4, d5, d6, d7}, [DST_R, :128]! + cache_preload 8, 8 + pixman_composite_out_reverse_8_8888_process_pixblock_head + vst4.8 {d28, d29, d30, d31}, [DST_W, :128]! +.endm + +generate_composite_function \ + pixman_composite_out_reverse_8_8888_asm_neon, 8, 0, 32, \ + FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ + 8, /* number of pixels, processed in a single block */ \ + 5, /* prefetch distance */ \ + default_init, \ + default_cleanup, \ + pixman_composite_out_reverse_8_8888_process_pixblock_head, \ + pixman_composite_out_reverse_8_8888_process_pixblock_tail, \ + pixman_composite_out_reverse_8_8888_process_pixblock_tail_head, \ + 28, /* dst_w_basereg */ \ + 4, /* dst_r_basereg */ \ + 0, /* src_basereg */ \ + 0 /* mask_basereg */ + +/******************************************************************************/ + generate_composite_function_nearest_scanline \ pixman_scaled_nearest_scanline_8888_8888_OVER_asm_neon, 32, 0, 32, \ FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \ 8, /* number of pixels, processed in a single block */ \ default_init, \ default_cleanup, \ pixman_composite_over_8888_8888_process_pixblock_head, \ pixman_composite_over_8888_8888_process_pixblock_tail, \
--- a/gfx/cairo/libpixman/src/pixman-arm-neon.c +++ b/gfx/cairo/libpixman/src/pixman-arm-neon.c @@ -59,16 +59,18 @@ PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, add_8888_8888, uint32_t, 1, uint32_t, 1) PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, over_8888_0565, uint32_t, 1, uint16_t, 1) PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, over_8888_8888, uint32_t, 1, uint32_t, 1) PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, out_reverse_8_0565, uint8_t, 1, uint16_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, out_reverse_8_8888, + uint8_t, 1, uint32_t, 1) PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, neon, over_n_0565, uint16_t, 1) PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, neon, over_n_8888, uint32_t, 1) PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, neon, over_reverse_n_8888, uint32_t, 1) PIXMAN_ARM_BIND_FAST_PATH_N_DST (0, neon, in_n_8, @@ -83,16 +85,20 @@ PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SK PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8888_0565_ca, uint32_t, 1, uint16_t, 1) PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8_8, uint8_t, 1, uint8_t, 1) PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, add_n_8_8, uint8_t, 1, uint8_t, 1) PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, add_n_8_8888, uint8_t, 1, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (0, neon, src_n_8_8888, + uint8_t, 1, uint32_t, 1) +PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (0, neon, src_n_8_8, + uint8_t, 1, uint8_t, 1) PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, over_8888_n_8888, uint32_t, 1, uint32_t, 1) PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, over_8888_n_0565, uint32_t, 1, uint16_t, 1) PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, over_0565_n_0565, uint16_t, 1, uint16_t, 1) PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, add_8888_n_8888, @@ -224,39 +230,39 @@ static pixman_bool_t pixman_blt_neon (uint32_t *src_bits, uint32_t *dst_bits, int src_stride, int dst_stride, int src_bpp, int dst_bpp, int src_x, int src_y, - int dst_x, - int dst_y, + int dest_x, + int dest_y, int width, int height) { if (src_bpp != dst_bpp) return FALSE; switch (src_bpp) { case 16: pixman_composite_src_0565_0565_asm_neon ( width, height, (uint16_t *)(((char *) dst_bits) + - dst_y * dst_stride * 4 + dst_x * 2), dst_stride * 2, + dest_y * dst_stride * 4 + dest_x * 2), dst_stride * 2, (uint16_t *)(((char *) src_bits) + src_y * src_stride * 4 + src_x * 2), src_stride * 2); return TRUE; case 32: pixman_composite_src_8888_8888_asm_neon ( width, height, (uint32_t *)(((char *) dst_bits) + - dst_y * dst_stride * 4 + dst_x * 4), dst_stride, + dest_y * dst_stride * 4 + dest_x * 4), dst_stride, (uint32_t *)(((char *) src_bits) + src_y * src_stride * 4 + src_x * 4), src_stride); return TRUE; default: return FALSE; } } @@ -282,16 +288,22 @@ static const pixman_fast_path_t arm_neon PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, neon_composite_src_x888_8888), PIXMAN_STD_FAST_PATH (SRC, r8g8b8, null, r8g8b8, neon_composite_src_0888_0888), PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, x8r8g8b8, neon_composite_src_0888_8888_rev), PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, r5g6b5, neon_composite_src_0888_0565_rev), PIXMAN_STD_FAST_PATH (SRC, pixbuf, pixbuf, a8r8g8b8, neon_composite_src_pixbuf_8888), PIXMAN_STD_FAST_PATH (SRC, pixbuf, pixbuf, a8b8g8r8, neon_composite_src_rpixbuf_8888), PIXMAN_STD_FAST_PATH (SRC, rpixbuf, rpixbuf, a8r8g8b8, neon_composite_src_rpixbuf_8888), PIXMAN_STD_FAST_PATH (SRC, rpixbuf, rpixbuf, a8b8g8r8, neon_composite_src_pixbuf_8888), + PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8r8g8b8, neon_composite_src_n_8_8888), + PIXMAN_STD_FAST_PATH (SRC, solid, a8, x8r8g8b8, neon_composite_src_n_8_8888), + PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8b8g8r8, neon_composite_src_n_8_8888), + PIXMAN_STD_FAST_PATH (SRC, solid, a8, x8b8g8r8, neon_composite_src_n_8_8888), + PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8, neon_composite_src_n_8_8), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8, neon_composite_over_n_8_8), PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, neon_composite_over_n_8_0565), PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, neon_composite_over_n_8_0565), PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, neon_composite_over_n_8_8888), PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, neon_composite_over_n_8_8888), PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, neon_composite_over_n_8_8888), PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, neon_composite_over_n_8_8888), PIXMAN_STD_FAST_PATH (OVER, solid, null, r5g6b5, neon_composite_over_n_0565), @@ -340,16 +352,18 @@ static const pixman_fast_path_t arm_neon PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, neon_composite_add_8_8), PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, neon_composite_add_8888_8888), PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, neon_composite_add_8888_8888), PIXMAN_STD_FAST_PATH (IN, solid, null, a8, neon_composite_in_n_8), PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8r8g8b8, neon_composite_over_reverse_n_8888), PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8b8g8r8, neon_composite_over_reverse_n_8888), PIXMAN_STD_FAST_PATH (OUT_REVERSE, a8, null, r5g6b5, neon_composite_out_reverse_8_0565), PIXMAN_STD_FAST_PATH (OUT_REVERSE, a8, null, b5g6r5, neon_composite_out_reverse_8_0565), + PIXMAN_STD_FAST_PATH (OUT_REVERSE, a8, null, a8r8g8b8, neon_composite_out_reverse_8_8888), + PIXMAN_STD_FAST_PATH (OUT_REVERSE, a8, null, a8b8g8r8, neon_composite_out_reverse_8_8888), PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, neon_8888_8888), PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, neon_8888_8888), PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, neon_8888_8888), PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, neon_8888_8888), PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, r5g6b5, neon_8888_0565), PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, b5g6r5, neon_8888_0565), @@ -413,30 +427,30 @@ arm_neon_blt (pixman_implementation_t *i uint32_t * src_bits, uint32_t * dst_bits, int src_stride, int dst_stride, int src_bpp, int dst_bpp, int src_x, int src_y, - int dst_x, - int dst_y, + int dest_x, + int dest_y, int width, int height) { if (!pixman_blt_neon ( src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp, - src_x, src_y, dst_x, dst_y, width, height)) + src_x, src_y, dest_x, dest_y, width, height)) { return _pixman_implementation_blt ( imp->delegate, src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp, - src_x, src_y, dst_x, dst_y, width, height); + src_x, src_y, dest_x, dest_y, width, height); } return TRUE; } static pixman_bool_t arm_neon_fill (pixman_implementation_t *imp, uint32_t * bits,
--- a/gfx/cairo/libpixman/src/pixman-arm-simd.c +++ b/gfx/cairo/libpixman/src/pixman-arm-simd.c @@ -24,17 +24,17 @@ * */ #ifdef HAVE_CONFIG_H #include <config.h> #endif #include "pixman-private.h" #include "pixman-arm-common.h" -#include "pixman-fast-path.h" +#include "pixman-inlines.h" #if 0 /* This code was moved to 'pixman-arm-simd-asm.S' */ void pixman_composite_add_8_8_asm_armv6 (int32_t width, int32_t height, uint8_t *dst_line, int32_t dst_stride,
--- a/gfx/cairo/libpixman/src/pixman-bits-image.c +++ b/gfx/cairo/libpixman/src/pixman-bits-image.c @@ -29,16 +29,17 @@ #ifdef HAVE_CONFIG_H #include <config.h> #endif #include <stdio.h> #include <stdlib.h> #include <string.h> #include "pixman-private.h" #include "pixman-combine32.h" +#include "pixman-inlines.h" /* * By default, just evaluate the image at 32bpp and expand. Individual image * types can plug in a better scanline getter if they want to. For example * we could produce smoother gradients by evaluating them at higher color * depth, but that's a project for the future. */ static void @@ -85,157 +86,38 @@ fetch_pixel_no_alpha (bits_image_t *imag } return image->fetch_pixel_32 (image, x, y); } typedef uint32_t (* get_pixel_t) (bits_image_t *image, int x, int y, pixman_bool_t check_bounds); -static force_inline void -repeat (pixman_repeat_t repeat, int size, int *coord) -{ - switch (repeat) - { - case PIXMAN_REPEAT_NORMAL: - *coord = MOD (*coord, size); - break; - - case PIXMAN_REPEAT_PAD: - *coord = CLIP (*coord, 0, size - 1); - break; - - case PIXMAN_REPEAT_REFLECT: - *coord = MOD (*coord, size * 2); - - if (*coord >= size) - *coord = size * 2 - *coord - 1; - break; - - case PIXMAN_REPEAT_NONE: - break; - - default: - break; - } -} - static force_inline uint32_t bits_image_fetch_pixel_nearest (bits_image_t *image, pixman_fixed_t x, pixman_fixed_t y, get_pixel_t get_pixel) { int x0 = pixman_fixed_to_int (x - pixman_fixed_e); int y0 = pixman_fixed_to_int (y - pixman_fixed_e); if (image->common.repeat != PIXMAN_REPEAT_NONE) { - repeat (image->common.repeat, image->width, &x0); - repeat (image->common.repeat, image->height, &y0); + repeat (image->common.repeat, &x0, image->width); + repeat (image->common.repeat, &y0, image->height); return get_pixel (image, x0, y0, FALSE); } else { return get_pixel (image, x0, y0, TRUE); } } -#if SIZEOF_LONG > 4 - -static force_inline uint32_t -bilinear_interpolation (uint32_t tl, uint32_t tr, - uint32_t bl, uint32_t br, - int distx, int disty) -{ - uint64_t distxy, distxiy, distixy, distixiy; - uint64_t tl64, tr64, bl64, br64; - uint64_t f, r; - - distxy = distx * disty; - distxiy = distx * (256 - disty); - distixy = (256 - distx) * disty; - distixiy = (256 - distx) * (256 - disty); - - /* Alpha and Blue */ - tl64 = tl & 0xff0000ff; - tr64 = tr & 0xff0000ff; - bl64 = bl & 0xff0000ff; - br64 = br & 0xff0000ff; - - f = tl64 * distixiy + tr64 * distxiy + bl64 * distixy + br64 * distxy; - r = f & 0x0000ff0000ff0000ull; - - /* Red and Green */ - tl64 = tl; - tl64 = ((tl64 << 16) & 0x000000ff00000000ull) | (tl64 & 0x0000ff00ull); - - tr64 = tr; - tr64 = ((tr64 << 16) & 0x000000ff00000000ull) | (tr64 & 0x0000ff00ull); - - bl64 = bl; - bl64 = ((bl64 << 16) & 0x000000ff00000000ull) | (bl64 & 0x0000ff00ull); - - br64 = br; - br64 = ((br64 << 16) & 0x000000ff00000000ull) | (br64 & 0x0000ff00ull); - - f = tl64 * distixiy + tr64 * distxiy + bl64 * distixy + br64 * distxy; - r |= ((f >> 16) & 0x000000ff00000000ull) | (f & 0xff000000ull); - - return (uint32_t)(r >> 16); -} - -#else - -static force_inline uint32_t -bilinear_interpolation (uint32_t tl, uint32_t tr, - uint32_t bl, uint32_t br, - int distx, int disty) -{ - int distxy, distxiy, distixy, distixiy; - uint32_t f, r; - - distxy = distx * disty; - distxiy = (distx << 8) - distxy; /* distx * (256 - disty) */ - distixy = (disty << 8) - distxy; /* disty * (256 - distx) */ - distixiy = - 256 * 256 - (disty << 8) - - (distx << 8) + distxy; /* (256 - distx) * (256 - disty) */ - - /* Blue */ - r = (tl & 0x000000ff) * distixiy + (tr & 0x000000ff) * distxiy - + (bl & 0x000000ff) * distixy + (br & 0x000000ff) * distxy; - - /* Green */ - f = (tl & 0x0000ff00) * distixiy + (tr & 0x0000ff00) * distxiy - + (bl & 0x0000ff00) * distixy + (br & 0x0000ff00) * distxy; - r |= f & 0xff000000; - - tl >>= 16; - tr >>= 16; - bl >>= 16; - br >>= 16; - r >>= 16; - - /* Red */ - f = (tl & 0x000000ff) * distixiy + (tr & 0x000000ff) * distxiy - + (bl & 0x000000ff) * distixy + (br & 0x000000ff) * distxy; - r |= f & 0x00ff0000; - - /* Alpha */ - f = (tl & 0x0000ff00) * distixiy + (tr & 0x0000ff00) * distxiy - + (bl & 0x0000ff00) * distixy + (br & 0x0000ff00) * distxy; - r |= f & 0xff000000; - - return r; -} - -#endif - static force_inline uint32_t bits_image_fetch_pixel_bilinear (bits_image_t *image, pixman_fixed_t x, pixman_fixed_t y, get_pixel_t get_pixel) { pixman_repeat_t repeat_mode = image->common.repeat; int width = image->width; @@ -252,20 +134,20 @@ bits_image_fetch_pixel_bilinear (bits_im x1 = pixman_fixed_to_int (x1); y1 = pixman_fixed_to_int (y1); x2 = x1 + 1; y2 = y1 + 1; if (repeat_mode != PIXMAN_REPEAT_NONE) { - repeat (repeat_mode, width, &x1); - repeat (repeat_mode, height, &y1); - repeat (repeat_mode, width, &x2); - repeat (repeat_mode, height, &y2); + repeat (repeat_mode, &x1, width); + repeat (repeat_mode, &y1, height); + repeat (repeat_mode, &x2, width); + repeat (repeat_mode, &y2, height); tl = get_pixel (image, x1, y1, FALSE); bl = get_pixel (image, x1, y2, FALSE); tr = get_pixel (image, x2, y1, FALSE); br = get_pixel (image, x2, y2, FALSE); } else { @@ -524,18 +406,18 @@ bits_image_fetch_pixel_convolution (bits pixman_fixed_t f = *params; if (f) { uint32_t pixel; if (repeat_mode != PIXMAN_REPEAT_NONE) { - repeat (repeat_mode, width, &rx); - repeat (repeat_mode, height, &ry); + repeat (repeat_mode, &rx, width); + repeat (repeat_mode, &ry, height); pixel = get_pixel (image, rx, ry, FALSE); } else { pixel = get_pixel (image, rx, ry, TRUE); } @@ -807,20 +689,20 @@ bits_image_fetch_bilinear_affine (pixman x2 = x1 + 1; if (repeat_mode != PIXMAN_REPEAT_NONE) { uint32_t mask; mask = PIXMAN_FORMAT_A (format)? 0 : 0xff000000; - repeat (repeat_mode, width, &x1); - repeat (repeat_mode, height, &y1); - repeat (repeat_mode, width, &x2); - repeat (repeat_mode, height, &y2); + repeat (repeat_mode, &x1, width); + repeat (repeat_mode, &y1, height); + repeat (repeat_mode, &x2, width); + repeat (repeat_mode, &y2, height); row1 = (uint8_t *)bits->bits + bits->rowstride * 4 * y1; row2 = (uint8_t *)bits->bits + bits->rowstride * 4 * y2; tl = convert_pixel (row1, x1) | mask; tr = convert_pixel (row1, x2) | mask; bl = convert_pixel (row2, x1) | mask; br = convert_pixel (row2, x2) | mask; @@ -955,18 +837,18 @@ bits_image_fetch_nearest_affine (pixman_ buffer[i] = 0; } else { uint32_t mask = PIXMAN_FORMAT_A (format)? 0 : 0xff000000; if (repeat_mode != PIXMAN_REPEAT_NONE) { - repeat (repeat_mode, width, &x0); - repeat (repeat_mode, height, &y0); + repeat (repeat_mode, &x0, width); + repeat (repeat_mode, &y0, height); } row = (uint8_t *)bits->bits + bits->rowstride * 4 * y0; buffer[i] = convert_pixel (row, x0) | mask; } next: @@ -1048,46 +930,44 @@ MAKE_FETCHERS (none_a8, a8, MAKE_FETCHERS (reflect_a8, a8, PIXMAN_REPEAT_REFLECT) MAKE_FETCHERS (normal_a8, a8, PIXMAN_REPEAT_NORMAL) MAKE_FETCHERS (pad_r5g6b5, r5g6b5, PIXMAN_REPEAT_PAD) MAKE_FETCHERS (none_r5g6b5, r5g6b5, PIXMAN_REPEAT_NONE) MAKE_FETCHERS (reflect_r5g6b5, r5g6b5, PIXMAN_REPEAT_REFLECT) MAKE_FETCHERS (normal_r5g6b5, r5g6b5, PIXMAN_REPEAT_NORMAL) static void -bits_image_fetch_solid_32 (pixman_image_t * image, - int x, - int y, - int width, - uint32_t * buffer, - const uint32_t * mask) +replicate_pixel_32 (bits_image_t * bits, + int x, + int y, + int width, + uint32_t * buffer) { uint32_t color; uint32_t *end; - color = image->bits.fetch_pixel_32 (&image->bits, 0, 0); + color = bits->fetch_pixel_32 (bits, x, y); end = buffer + width; while (buffer < end) *(buffer++) = color; } static void -bits_image_fetch_solid_64 (pixman_image_t * image, - int x, - int y, - int width, - uint32_t * b, - const uint32_t * unused) +replicate_pixel_64 (bits_image_t * bits, + int x, + int y, + int width, + uint32_t * b) { uint64_t color; uint64_t *buffer = (uint64_t *)b; uint64_t *end; - color = image->bits.fetch_pixel_64 (&image->bits, 0, 0); + color = bits->fetch_pixel_64 (bits, x, y); end = buffer + width; while (buffer < end) *(buffer++) = color; } static void bits_image_fetch_untransformed_repeat_none (bits_image_t *image, @@ -1144,16 +1024,26 @@ bits_image_fetch_untransformed_repeat_no uint32_t w; while (y < 0) y += image->height; while (y >= image->height) y -= image->height; + if (image->width == 1) + { + if (wide) + replicate_pixel_64 (image, 0, y, width, buffer); + else + replicate_pixel_32 (image, 0, y, width, buffer); + + return; + } + while (width) { while (x < 0) x += image->width; while (x >= image->width) x -= image->width; w = MIN (width, image->width - x); @@ -1214,22 +1104,16 @@ typedef struct pixman_format_code_t format; uint32_t flags; fetch_scanline_t fetch_32; fetch_scanline_t fetch_64; } fetcher_info_t; static const fetcher_info_t fetcher_info[] = { - { PIXMAN_solid, - FAST_PATH_NO_ALPHA_MAP, - bits_image_fetch_solid_32, - bits_image_fetch_solid_64 - }, - { PIXMAN_any, (FAST_PATH_NO_ALPHA_MAP | FAST_PATH_ID_TRANSFORM | FAST_PATH_NO_CONVOLUTION_FILTER | FAST_PATH_NO_PAD_REPEAT | FAST_PATH_NO_REFLECT_REPEAT), bits_image_fetch_untransformed_32, bits_image_fetch_untransformed_64 @@ -1377,45 +1261,76 @@ dest_get_scanline_narrow (pixman_iter_t int x = iter->x; int y = iter->y; int width = iter->width; uint32_t * buffer = iter->buffer; image->bits.fetch_scanline_32 (image, x, y, width, buffer, mask); if (image->common.alpha_map) { - x -= image->common.alpha_origin_x; - y -= image->common.alpha_origin_y; + uint32_t *alpha; + + if ((alpha = malloc (width * sizeof (uint32_t)))) + { + int i; + + x -= image->common.alpha_origin_x; + y -= image->common.alpha_origin_y; - image->common.alpha_map->fetch_scanline_32 ( - (pixman_image_t *)image->common.alpha_map, - x, y, width, buffer, mask); + image->common.alpha_map->fetch_scanline_32 ( + (pixman_image_t *)image->common.alpha_map, + x, y, width, alpha, mask); + + for (i = 0; i < width; ++i) + { + buffer[i] &= ~0xff000000; + buffer[i] |= (alpha[i] & 0xff000000); + } + + free (alpha); + } } return iter->buffer; } static uint32_t * dest_get_scanline_wide (pixman_iter_t *iter, const uint32_t *mask) { bits_image_t * image = &iter->image->bits; int x = iter->x; int y = iter->y; int width = iter->width; - uint32_t * buffer = iter->buffer; + uint64_t * buffer = (uint64_t *)iter->buffer; image->fetch_scanline_64 ( - (pixman_image_t *)image, x, y, width, buffer, mask); + (pixman_image_t *)image, x, y, width, (uint32_t *)buffer, mask); if (image->common.alpha_map) { - x -= image->common.alpha_origin_x; - y -= image->common.alpha_origin_y; + uint64_t *alpha; + + if ((alpha = malloc (width * sizeof (uint64_t)))) + { + int i; + + x -= image->common.alpha_origin_x; + y -= image->common.alpha_origin_y; - image->common.alpha_map->fetch_scanline_64 ( - (pixman_image_t *)image->common.alpha_map, x, y, width, buffer, mask); + image->common.alpha_map->fetch_scanline_64 ( + (pixman_image_t *)image->common.alpha_map, + x, y, width, (uint32_t *)alpha, mask); + + for (i = 0; i < width; ++i) + { + buffer[i] &= ~0xffff000000000000ULL; + buffer[i] |= (alpha[i] & 0xffff000000000000ULL); + } + + free (alpha); + } } return iter->buffer; } static void dest_write_back_narrow (pixman_iter_t *iter) { @@ -1457,146 +1372,143 @@ dest_write_back_wide (pixman_iter_t *ite image->common.alpha_map->store_scanline_64 ( image->common.alpha_map, x, y, width, buffer); } iter->y++; } -static void -dest_write_back_direct (pixman_iter_t *iter) -{ - iter->buffer += iter->image->bits.rowstride; -} - void _pixman_bits_image_dest_iter_init (pixman_image_t *image, pixman_iter_t *iter) { if (iter->flags & ITER_NARROW) { - if (((image->common.flags & - (FAST_PATH_NO_ALPHA_MAP | FAST_PATH_NO_ACCESSORS)) == - (FAST_PATH_NO_ALPHA_MAP | FAST_PATH_NO_ACCESSORS)) && - (image->bits.format == PIXMAN_a8r8g8b8 || - (image->bits.format == PIXMAN_x8r8g8b8 && - (iter->flags & ITER_LOCALIZED_ALPHA)))) + if ((iter->flags & (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA)) == + (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA)) { - iter->buffer = image->bits.bits + iter->y * image->bits.rowstride + iter->x; - iter->get_scanline = _pixman_iter_get_scanline_noop; - iter->write_back = dest_write_back_direct; } else { - if ((iter->flags & (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA)) == - (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA)) - { - iter->get_scanline = _pixman_iter_get_scanline_noop; - } - else - { - iter->get_scanline = dest_get_scanline_narrow; - } - - iter->write_back = dest_write_back_narrow; + iter->get_scanline = dest_get_scanline_narrow; } + + iter->write_back = dest_write_back_narrow; } else { iter->get_scanline = dest_get_scanline_wide; iter->write_back = dest_write_back_wide; } } static uint32_t * create_bits (pixman_format_code_t format, int width, int height, - int * rowstride_bytes) + int * rowstride_bytes) { int stride; - int buf_size; + size_t buf_size; int bpp; /* what follows is a long-winded way, avoiding any possibility of integer * overflows, of saying: * stride = ((width * bpp + 0x1f) >> 5) * sizeof (uint32_t); */ bpp = PIXMAN_FORMAT_BPP (format); - if (pixman_multiply_overflows_int (width, bpp)) + if (_pixman_multiply_overflows_int (width, bpp)) return NULL; stride = width * bpp; - if (pixman_addition_overflows_int (stride, 0x1f)) + if (_pixman_addition_overflows_int (stride, 0x1f)) return NULL; stride += 0x1f; stride >>= 5; stride *= sizeof (uint32_t); - if (pixman_multiply_overflows_int (height, stride)) + if (_pixman_multiply_overflows_size (height, stride)) return NULL; buf_size = height * stride; if (rowstride_bytes) *rowstride_bytes = stride; return calloc (buf_size, 1); } +pixman_bool_t +_pixman_bits_image_init (pixman_image_t * image, + pixman_format_code_t format, + int width, + int height, + uint32_t * bits, + int rowstride) +{ + uint32_t *free_me = NULL; + + if (!bits && width && height) + { + int rowstride_bytes; + + free_me = bits = create_bits (format, width, height, &rowstride_bytes); + + if (!bits) + return FALSE; + + rowstride = rowstride_bytes / (int) sizeof (uint32_t); + } + + _pixman_image_init (image); + + image->type = BITS; + image->bits.format = format; + image->bits.width = width; + image->bits.height = height; + image->bits.bits = bits; + image->bits.free_me = free_me; + image->bits.read_func = NULL; + image->bits.write_func = NULL; + image->bits.rowstride = rowstride; + image->bits.indexed = NULL; + + image->common.property_changed = bits_image_property_changed; + + _pixman_image_reset_clip_region (image); + + return TRUE; +} + PIXMAN_EXPORT pixman_image_t * pixman_image_create_bits (pixman_format_code_t format, int width, int height, uint32_t * bits, int rowstride_bytes) { pixman_image_t *image; - uint32_t *free_me = NULL; /* must be a whole number of uint32_t's */ return_val_if_fail ( bits == NULL || (rowstride_bytes % sizeof (uint32_t)) == 0, NULL); return_val_if_fail (PIXMAN_FORMAT_BPP (format) >= PIXMAN_FORMAT_DEPTH (format), NULL); - if (!bits && width && height) - { - free_me = bits = create_bits (format, width, height, &rowstride_bytes); - if (!bits) - return NULL; - } - image = _pixman_image_allocate (); if (!image) + return NULL; + + if (!_pixman_bits_image_init (image, format, width, height, bits, + rowstride_bytes / (int) sizeof (uint32_t))) { - if (free_me) - free (free_me); - + free (image); return NULL; } - image->type = BITS; - image->bits.format = format; - image->bits.width = width; - image->bits.height = height; - image->bits.bits = bits; - image->bits.free_me = free_me; - image->bits.read_func = NULL; - image->bits.write_func = NULL; - - /* The rowstride is stored in number of uint32_t */ - image->bits.rowstride = rowstride_bytes / (int) sizeof (uint32_t); - - image->bits.indexed = NULL; - - image->common.property_changed = bits_image_property_changed; - - _pixman_image_reset_clip_region (image); - return image; }
--- a/gfx/cairo/libpixman/src/pixman-combine.c.template +++ b/gfx/cairo/libpixman/src/pixman-combine.c.template @@ -432,17 +432,17 @@ combine_saturate_u (pixman_implementatio * it has been designed to mirror ISO 32000. Note that at the current point * no released draft exists that shows this, as the formulas have not been * updated yet after the release of ISO 32000. * * The default implementation here uses the PDF_SEPARABLE_BLEND_MODE and * PDF_NON_SEPARABLE_BLEND_MODE macros, which take the blend function as an * argument. Note that this implementation operates on premultiplied colors, * while the PDF specification does not. Therefore the code uses the formula - * ar.Cra = (1 – as) . Dca + (1 – ad) . Sca + B(Dca, ad, Sca, as) + * Cra = (1 – as) . Dca + (1 – ad) . Sca + B(Dca, ad, Sca, as) */ /* * Multiply * B(Dca, ad, Sca, as) = Dca.Sca */ static void @@ -517,17 +517,17 @@ combine_multiply_ca (pixman_implementati comp1_t da = ALPHA_c (d); \ comp1_t ida = ~da; \ comp4_t result; \ \ result = d; \ UNcx4_MUL_UNc_ADD_UNcx4_MUL_UNc (result, isa, s, ida); \ \ *(dest + i) = result + \ - (DIV_ONE_UNc (sa * da) << A_SHIFT) + \ + (DIV_ONE_UNc (sa * (comp4_t)da) << A_SHIFT) + \ (blend_ ## name (RED_c (d), da, RED_c (s), sa) << R_SHIFT) + \ (blend_ ## name (GREEN_c (d), da, GREEN_c (s), sa) << G_SHIFT) + \ (blend_ ## name (BLUE_c (d), da, BLUE_c (s), sa)); \ } \ } \ \ static void \ combine_ ## name ## _ca (pixman_implementation_t *imp, \ @@ -547,17 +547,17 @@ combine_multiply_ca (pixman_implementati comp4_t result; \ \ combine_mask_value_ca (&s, &m); \ \ result = d; \ UNcx4_MUL_UNcx4_ADD_UNcx4_MUL_UNc (result, ~m, s, ida); \ \ result += \ - (DIV_ONE_UNc (ALPHA_c (m) * da) << A_SHIFT) + \ + (DIV_ONE_UNc (ALPHA_c (m) * (comp4_t)da) << A_SHIFT) + \ (blend_ ## name (RED_c (d), da, RED_c (s), RED_c (m)) << R_SHIFT) + \ (blend_ ## name (GREEN_c (d), da, GREEN_c (s), GREEN_c (m)) << G_SHIFT) + \ (blend_ ## name (BLUE_c (d), da, BLUE_c (s), BLUE_c (m))); \ \ *(dest + i) = result; \ } \ } @@ -844,17 +844,17 @@ PDF_SEPARABLE_BLEND_MODE (exclusion) * = clip_color ( r * C + r × l - r * LUM (C), r * a) * * = set_lum ( r * C, r * l, r * a) * * Finally, set_sat: * * r * set_sat (C, s) = set_sat (x * C, r * s) * - * The above holds for all non-zero x, because they x'es in the fraction for + * The above holds for all non-zero x, because the x'es in the fraction for * C_mid cancel out. Specifically, it holds for x = r: * * r * set_sat (C, s) = set_sat (r_c, rs) * */ /* So, for the non-separable PDF blend modes, we have (using s, d for * non-premultiplied colors, and S, D for premultiplied: @@ -880,18 +880,17 @@ PDF_SEPARABLE_BLEND_MODE (exclusion) * = set_lum (a_s * a_d * set_sat (D/a_d, SAT (S/a_s)), * a_s * LUM (D), a_s * a_d) * = set_lum (set_sat (a_s * D, a_d * SAT (S), a_s * LUM (D), a_s * a_d)) * * Hue: * * a_s * a_d * B(s, d) * = a_s * a_d * set_lum (set_sat (S/a_s, SAT (D/a_d)), LUM (D/a_d), 1) - * = a_s * a_d * set_lum (set_sat (a_d * S, a_s * SAT (D)), - * a_s * LUM (D), a_s * a_d) + * = set_lum (set_sat (a_d * S, a_s * SAT (D)), a_s * LUM (D), a_s * a_d) * */ #define CH_MIN(c) (c[0] < c[1] ? (c[0] < c[2] ? c[0] : c[2]) : (c[1] < c[2] ? c[1] : c[2])) #define CH_MAX(c) (c[0] > c[1] ? (c[0] > c[2] ? c[0] : c[2]) : (c[1] > c[2] ? c[1] : c[2])) #define LUM(c) ((c[0] * 30 + c[1] * 59 + c[2] * 11) / 100) #define SAT(c) (CH_MAX (c) - CH_MIN (c)) @@ -922,17 +921,17 @@ PDF_SEPARABLE_BLEND_MODE (exclusion) sc[0] = RED_c (s); \ dc[1] = GREEN_c (d); \ sc[1] = GREEN_c (s); \ dc[2] = BLUE_c (d); \ sc[2] = BLUE_c (s); \ blend_ ## name (c, dc, da, sc, sa); \ \ *(dest + i) = result + \ - (DIV_ONE_UNc (sa * da) << A_SHIFT) + \ + (DIV_ONE_UNc (sa * (comp4_t)da) << A_SHIFT) + \ (DIV_ONE_UNc (c[0]) << R_SHIFT) + \ (DIV_ONE_UNc (c[1]) << G_SHIFT) + \ (DIV_ONE_UNc (c[2])); \ } \ } static void set_lum (comp4_t dest[3], comp4_t src[3], comp4_t sa, comp4_t lum) @@ -1139,19 +1138,17 @@ blend_hsl_luminosity (comp4_t c[3], PDF_NON_SEPARABLE_BLEND_MODE (hsl_luminosity) #undef SAT #undef LUM #undef CH_MAX #undef CH_MIN #undef PDF_NON_SEPARABLE_BLEND_MODE -/* Overlay - * - * All of the disjoint composing functions +/* All of the disjoint/conjoint composing functions * * The four entries in the first column indicate what source contributions * come from each of the four areas of the picture -- areas covered by neither * A nor B, areas covered only by A, areas covered only by B and finally * areas covered by both A and B. * * Disjoint Conjoint * Fa Fb Fa Fb @@ -1162,16 +1159,19 @@ PDF_NON_SEPARABLE_BLEND_MODE (hsl_lumino * (0,A,B,B) min((1-b)/a,1) 1 max(1-b/a,0) 1 * (0,0,0,A) max(1-(1-b)/a,0) 0 min(1,b/a) 0 * (0,0,0,B) 0 max(1-(1-a)/b,0) 0 min(a/b,1) * (0,A,0,0) min(1,(1-b)/a) 0 max(1-b/a,0) 0 * (0,0,B,0) 0 min(1,(1-a)/b) 0 max(1-a/b,0) * (0,0,B,A) max(1-(1-b)/a,0) min(1,(1-a)/b) min(1,b/a) max(1-a/b,0) * (0,A,0,B) min(1,(1-b)/a) max(1-(1-a)/b,0) max(1-b/a,0) min(1,a/b) * (0,A,B,0) min(1,(1-b)/a) min(1,(1-a)/b) max(1-b/a,0) max(1-a/b,0) + * + * See http://marc.info/?l=xfree-render&m=99792000027857&w=2 for more + * information about these operators. */ #define COMBINE_A_OUT 1 #define COMBINE_A_IN 2 #define COMBINE_B_OUT 4 #define COMBINE_B_IN 8 #define COMBINE_CLEAR 0
--- a/gfx/cairo/libpixman/src/pixman-combine.h.template +++ b/gfx/cairo/libpixman/src/pixman-combine.h.template @@ -20,20 +20,20 @@ #define GREEN_c(x) (((x) >> G_SHIFT) & MASK) #define BLUE_c(x) ((x) & MASK) /* * Helper macros. */ #define MUL_UNc(a, b, t) \ - ((t) = (a) * (b) + ONE_HALF, ((((t) >> G_SHIFT ) + (t) ) >> G_SHIFT )) + ((t) = (a) * (comp2_t)(b) + ONE_HALF, ((((t) >> G_SHIFT ) + (t) ) >> G_SHIFT )) #define DIV_UNc(a, b) \ - (((comp2_t) (a) * MASK) / (b)) + (((comp2_t) (a) * MASK + ((b) / 2)) / (b)) #define ADD_UNc(x, y, t) \ ((t) = (x) + (y), \ (comp4_t) (comp1_t) ((t) | (0 - ((t) >> G_SHIFT)))) #define DIV_ONE_UNc(x) \ (((x) + ONE_HALF + (((x) + ONE_HALF) >> G_SHIFT)) >> G_SHIFT)
--- a/gfx/cairo/libpixman/src/pixman-compiler.h +++ b/gfx/cairo/libpixman/src/pixman-compiler.h @@ -13,16 +13,22 @@ #if defined (__GNUC__) # define FUNC ((const char*) (__PRETTY_FUNCTION__)) #elif defined (__sun) || (defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) # define FUNC ((const char*) (__func__)) #else # define FUNC ((const char*) ("???")) #endif +#if defined (__GNUC__) +# define MAYBE_UNUSED __attribute__((unused)) +#else +# define MAYBE_UNUSED +#endif + #ifndef INT16_MIN # define INT16_MIN (-32767-1) #endif #ifndef INT16_MAX # define INT16_MAX (32767) #endif @@ -37,16 +43,25 @@ #ifndef UINT32_MIN # define UINT32_MIN (0) #endif #ifndef UINT32_MAX # define UINT32_MAX (4294967295U) #endif +#ifndef INT64_MIN +# define INT64_MIN (-9223372036854775807-1) +#endif + +#ifndef INT64_MAX +# define INT64_MAX (9223372036854775807) +#endif + + #ifndef M_PI # define M_PI 3.14159265358979323846 #endif #ifdef _MSC_VER /* 'inline' is available only in C++ in MSVC */ # define inline __inline # define force_inline __forceinline @@ -84,20 +99,20 @@ /* TLS */ #if defined(PIXMAN_NO_TLS) # define PIXMAN_DEFINE_THREAD_LOCAL(type, name) \ static type name # define PIXMAN_GET_THREAD_LOCAL(name) \ (&name) -#elif defined(TOOLCHAIN_SUPPORTS__THREAD) +#elif defined(TLS) # define PIXMAN_DEFINE_THREAD_LOCAL(type, name) \ - static __thread type name + static TLS type name # define PIXMAN_GET_THREAD_LOCAL(name) \ (&name) #elif defined(__MINGW32__) # define _NO_W32_PSEUDO_MODIFIERS # include <windows.h>
--- a/gfx/cairo/libpixman/src/pixman-cpu.c +++ b/gfx/cairo/libpixman/src/pixman-cpu.c @@ -19,22 +19,27 @@ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #ifdef HAVE_CONFIG_H #include <config.h> #endif #include <string.h> +#include <stdlib.h> #if defined(USE_ARM_SIMD) && defined(_MSC_VER) /* Needed for EXCEPTION_ILLEGAL_INSTRUCTION */ #include <windows.h> #endif +#if defined(__APPLE__) +#include "TargetConditionals.h" +#endif + #include "pixman-private.h" #ifdef USE_VMX /* The CPU detection code needs to be in a file not compiled with * "-maltivec -mabi=altivec", as gcc would try to save vector register * across function calls causing SIGILL on cpus without Altivec/vmx. */ @@ -182,17 +187,17 @@ pixman_have_vmx (void) initialized = TRUE; } return have_vmx; } #endif /* __APPLE__ */ #endif /* USE_VMX */ -#if defined(USE_ARM_SIMD) || defined(USE_ARM_NEON) +#if defined(USE_ARM_SIMD) || defined(USE_ARM_NEON) || defined(USE_ARM_IWMMXT) #if defined(_MSC_VER) #if defined(USE_ARM_SIMD) extern int pixman_msvc_try_arm_simd_op (); pixman_bool_t pixman_have_arm_simd (void) @@ -239,43 +244,70 @@ pixman_have_arm_neon (void) initialized = TRUE; } return have_arm_neon; } #endif /* USE_ARM_NEON */ -#elif defined (__linux__) || defined(ANDROID) /* linux ELF */ +#elif (defined (__APPLE__) && defined(TARGET_OS_IPHONE)) /* iOS (iPhone/iPad/iPod touch) */ + +/* Detection of ARM NEON on iOS is fairly simple because iOS binaries + * contain separate executable images for each processor architecture. + * So all we have to do is detect the armv7 architecture build. The + * operating system automatically runs the armv7 binary for armv7 devices + * and the armv6 binary for armv6 devices. + */ + +pixman_bool_t +pixman_have_arm_simd (void) +{ +#if defined(USE_ARM_SIMD) + return TRUE; +#else + return FALSE; +#endif +} -#include <stdlib.h> -#include <unistd.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <sys/mman.h> -#include <fcntl.h> -#include <string.h> -#include <elf.h> +pixman_bool_t +pixman_have_arm_neon (void) +{ +#if defined(USE_ARM_NEON) && defined(__ARM_NEON__) + /* This is an armv7 cpu build */ + return TRUE; +#else + /* This is an armv6 cpu build */ + return FALSE; +#endif +} + +pixman_bool_t +pixman_have_arm_iwmmxt (void) +{ +#if defined(USE_ARM_IWMMXT) + return FALSE; +#else + return FALSE; +#endif +} + +#elif defined (__linux__) || defined(__ANDROID__) || defined(ANDROID) /* linux ELF or ANDROID */ static pixman_bool_t arm_has_v7 = FALSE; static pixman_bool_t arm_has_v6 = FALSE; static pixman_bool_t arm_has_vfp = FALSE; static pixman_bool_t arm_has_neon = FALSE; static pixman_bool_t arm_has_iwmmxt = FALSE; static pixman_bool_t arm_tests_initialized = FALSE; -#ifdef ANDROID - -/* on Android, we can't reliably access /proc/self/auxv, - * so instead read the text version in /proc/cpuinfo and - * parse that instead. - */ +#if defined(__ANDROID__) || defined(ANDROID) /* Android device support */ static void -pixman_arm_detect_cpu_features (void) +pixman_arm_read_auxv_or_cpu_features () { char buf[1024]; char* pos; const char* ver_token = "CPU architecture: "; FILE* f = fopen("/proc/cpuinfo", "r"); if (!f) { arm_tests_initialized = TRUE; return; @@ -293,20 +325,28 @@ pixman_arm_detect_cpu_features (void) } } arm_has_neon = strstr(buf, "neon") != NULL; arm_has_vfp = strstr(buf, "vfp") != NULL; arm_has_iwmmxt = strstr(buf, "iwmmxt") != NULL; arm_tests_initialized = TRUE; } -#else +#elif defined (__linux__) /* linux ELF */ + +#include <unistd.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/mman.h> +#include <fcntl.h> +#include <string.h> +#include <elf.h> static void -pixman_arm_detect_cpu_features (void) +pixman_arm_read_auxv_or_cpu_features () { int fd; Elf32_auxv_t aux; fd = open ("/proc/self/auxv", O_RDONLY); if (fd >= 0) { while (read (fd, &aux, sizeof(Elf32_auxv_t)) == sizeof(Elf32_auxv_t)) @@ -336,52 +376,114 @@ pixman_arm_detect_cpu_features (void) } } } close (fd); } arm_tests_initialized = TRUE; } -#endif + +#endif /* Linux elf */ #if defined(USE_ARM_SIMD) pixman_bool_t pixman_have_arm_simd (void) { if (!arm_tests_initialized) - pixman_arm_detect_cpu_features (); + pixman_arm_read_auxv_or_cpu_features (); return arm_has_v6; } #endif /* USE_ARM_SIMD */ #if defined(USE_ARM_NEON) pixman_bool_t pixman_have_arm_neon (void) { if (!arm_tests_initialized) - pixman_arm_detect_cpu_features (); + pixman_arm_read_auxv_or_cpu_features (); return arm_has_neon; } #endif /* USE_ARM_NEON */ -#else /* linux ELF */ +#if defined(USE_ARM_IWMMXT) +pixman_bool_t +pixman_have_arm_iwmmxt (void) +{ + if (!arm_tests_initialized) + pixman_arm_read_auxv_or_cpu_features (); + + return arm_has_iwmmxt; +} + +#endif /* USE_ARM_IWMMXT */ + +#else /* !_MSC_VER && !Linux elf && !Android */ #define pixman_have_arm_simd() FALSE #define pixman_have_arm_neon() FALSE +#define pixman_have_arm_iwmmxt() FALSE #endif -#endif /* USE_ARM_SIMD || USE_ARM_NEON */ +#endif /* USE_ARM_SIMD || USE_ARM_NEON || USE_ARM_IWMMXT */ + +#if defined(USE_MIPS_DSPR2) + +#if defined (__linux__) /* linux ELF */ + +pixman_bool_t +pixman_have_mips_dspr2 (void) +{ + const char *search_string = "MIPS 74K"; + const char *file_name = "/proc/cpuinfo"; + /* Simple detection of MIPS DSP ASE (revision 2) at runtime for Linux. + * It is based on /proc/cpuinfo, which reveals hardware configuration + * to user-space applications. According to MIPS (early 2010), no similar + * facility is universally available on the MIPS architectures, so it's up + * to individual OSes to provide such. + * + * Only currently available MIPS core that supports DSPr2 is 74K. + */ + + char cpuinfo_line[256]; + + FILE *f = NULL; -#if defined(USE_MMX) || defined(USE_SSE2) + if ((f = fopen (file_name, "r")) == NULL) + return FALSE; + + while (fgets (cpuinfo_line, sizeof (cpuinfo_line), f) != NULL) + { + if (strstr (cpuinfo_line, search_string) != NULL) + { + fclose (f); + return TRUE; + } + } + + fclose (f); + + /* Did not find string in the proc file. */ + return FALSE; +} + +#else /* linux ELF */ + +#define pixman_have_mips_dspr2() FALSE + +#endif /* linux ELF */ + +#endif /* USE_MIPS_DSPR2 */ + +#if defined(USE_X86_MMX) || defined(USE_SSE2) /* The CPU detection code needs to be in a file not compiled with * "-mmmx -msse", as gcc would generate CMOV instructions otherwise * that would lead to SIGILL instructions on old CPUs that don't have * it. */ #if !defined(__amd64__) && !defined(__x86_64__) && !defined(_M_AMD64) #ifdef HAVE_GETISAX @@ -562,31 +664,33 @@ detect_cpu_features (void) features |= MMX_EXTENSIONS; } } #endif /* HAVE_GETISAX */ return features; } +#ifdef USE_X86_MMX static pixman_bool_t pixman_have_mmx (void) { static pixman_bool_t initialized = FALSE; static pixman_bool_t mmx_present; if (!initialized) { unsigned int features = detect_cpu_features (); mmx_present = (features & (MMX | MMX_EXTENSIONS)) == (MMX | MMX_EXTENSIONS); initialized = TRUE; } return mmx_present; } +#endif #ifdef USE_SSE2 static pixman_bool_t pixman_have_sse2 (void) { static pixman_bool_t initialized = FALSE; static pixman_bool_t sse2_present; @@ -598,53 +702,98 @@ pixman_have_sse2 (void) } return sse2_present; } #endif #else /* __amd64__ */ -#ifdef USE_MMX +#ifdef USE_X86_MMX #define pixman_have_mmx() TRUE #endif #ifdef USE_SSE2 #define pixman_have_sse2() TRUE #endif #endif /* __amd64__ */ #endif +static pixman_bool_t +disabled (const char *name) +{ + const char *env; + + if ((env = getenv ("PIXMAN_DISABLE"))) + { + do + { + const char *end; + int len; + + if ((end = strchr (env, ' '))) + len = end - env; + else + len = strlen (env); + + if (strlen (name) == len && strncmp (name, env, len) == 0) + { + printf ("pixman: Disabled %s implementation\n", name); + return TRUE; + } + + env += len; + } + while (*env++); + } + + return FALSE; +} + pixman_implementation_t * _pixman_choose_implementation (void) { pixman_implementation_t *imp; imp = _pixman_implementation_create_general(); - imp = _pixman_implementation_create_fast_path (imp); - -#ifdef USE_MMX - if (pixman_have_mmx ()) + + if (!disabled ("fast")) + imp = _pixman_implementation_create_fast_path (imp); + +#ifdef USE_X86_MMX + if (!disabled ("mmx") && pixman_have_mmx ()) imp = _pixman_implementation_create_mmx (imp); #endif #ifdef USE_SSE2 - if (pixman_have_sse2 ()) + if (!disabled ("sse2") && pixman_have_sse2 ()) imp = _pixman_implementation_create_sse2 (imp); #endif #ifdef USE_ARM_SIMD - if (pixman_have_arm_simd ()) + if (!disabled ("arm-simd") && pixman_have_arm_simd ()) imp = _pixman_implementation_create_arm_simd (imp); #endif +#ifdef USE_ARM_IWMMXT + if (!disabled ("arm-iwmmxt") && pixman_have_arm_iwmmxt ()) + imp = _pixman_implementation_create_mmx (imp); +#endif + #ifdef USE_ARM_NEON - if (pixman_have_arm_neon ()) + if (!disabled ("arm-neon") && pixman_have_arm_neon ()) imp = _pixman_implementation_create_arm_neon (imp); #endif - + +#ifdef USE_MIPS_DSPR2 + if (!disabled ("mips-dspr2") && pixman_have_mips_dspr2 ()) + imp = _pixman_implementation_create_mips_dspr2 (imp); +#endif + #ifdef USE_VMX - if (pixman_have_vmx ()) + if (!disabled ("vmx") && pixman_have_vmx ()) imp = _pixman_implementation_create_vmx (imp); #endif + imp = _pixman_implementation_create_noop (imp); + return imp; }
--- a/gfx/cairo/libpixman/src/pixman-fast-path.c +++ b/gfx/cairo/libpixman/src/pixman-fast-path.c @@ -25,17 +25,17 @@ #ifdef HAVE_CONFIG_H #include <config.h> #endif #include <string.h> #include <stdlib.h> #include "pixman-private.h" #include "pixman-combine32.h" -#include "pixman-fast-path.h" +#include "pixman-inlines.h" static force_inline uint32_t fetch_24 (uint8_t *a) { if (((unsigned long)a) & 1) { #ifdef WORDS_BIGENDIAN return (*a << 16) | (*(uint16_t *)(a + 1)); @@ -103,38 +103,28 @@ in (uint32_t x, /* * Naming convention: * * op_src_mask_dest */ static void fast_composite_over_x888_8_8888 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) + pixman_composite_info_t *info) { + PIXMAN_COMPOSITE_ARGS (info); uint32_t *src, *src_line; uint32_t *dst, *dst_line; uint8_t *mask, *mask_line; int src_stride, mask_stride, dst_stride; uint8_t m; uint32_t s, d; int32_t w; - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); while (height--) { src = src_line; src_line += src_stride; dst = dst_line; @@ -163,29 +153,19 @@ fast_composite_over_x888_8_8888 (pixman_ src++; dst++; } } } static void fast_composite_in_n_8_8 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dest_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) + pixman_composite_info_t *info) { + PIXMAN_COMPOSITE_ARGS (info); uint32_t src, srca; uint8_t *dst_line, *dst; uint8_t *mask_line, *mask, m; int dst_stride, mask_stride; int32_t w; uint16_t t; src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); @@ -241,29 +221,19 @@ fast_composite_in_n_8_8 (pixman_implemen dst++; } } } } static void fast_composite_in_8_8 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dest_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) + pixman_composite_info_t *info) { + PIXMAN_COMPOSITE_ARGS (info); uint8_t *dst_line, *dst; uint8_t *src_line, *src; int dst_stride, src_stride; int32_t w; uint8_t s; uint16_t t; PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1); @@ -288,42 +258,32 @@ fast_composite_in_8_8 (pixman_implementa dst++; } } } static void fast_composite_over_n_8_8888 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) + pixman_composite_info_t *info) { + PIXMAN_COMPOSITE_ARGS (info); uint32_t src, srca; uint32_t *dst_line, *dst, d; uint8_t *mask_line, *mask, m; int dst_stride, mask_stride; int32_t w; - src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format); + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); srca = src >> 24; if (src == 0) return; - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); while (height--) { dst = dst_line; dst_line += dst_stride; mask = mask_line; mask_line += mask_stride; @@ -346,41 +306,31 @@ fast_composite_over_n_8_8888 (pixman_imp } dst++; } } } static void fast_composite_add_n_8888_8888_ca (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) + pixman_composite_info_t *info) { + PIXMAN_COMPOSITE_ARGS (info); uint32_t src, s; uint32_t *dst_line, *dst, d; uint32_t *mask_line, *mask, ma; int dst_stride, mask_stride; int32_t w; - src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format); + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); if (src == 0) return; - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1); while (height--) { dst = dst_line; dst_line += dst_stride; mask = mask_line; mask_line += mask_stride; @@ -402,42 +352,32 @@ fast_composite_add_n_8888_8888_ca (pixma dst++; } } } static void fast_composite_over_n_8888_8888_ca (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) + pixman_composite_info_t *info) { + PIXMAN_COMPOSITE_ARGS (info); uint32_t src, srca, s; uint32_t *dst_line, *dst, d; uint32_t *mask_line, *mask, ma; int dst_stride, mask_stride; int32_t w; - src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format); + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); srca = src >> 24; if (src == 0) return; - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1); while (height--) { dst = dst_line; dst_line += dst_stride; mask = mask_line; mask_line += mask_stride; @@ -468,43 +408,33 @@ fast_composite_over_n_8888_8888_ca (pixm dst++; } } } static void fast_composite_over_n_8_0888 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) + pixman_composite_info_t *info) { + PIXMAN_COMPOSITE_ARGS (info); uint32_t src, srca; uint8_t *dst_line, *dst; uint32_t d; uint8_t *mask_line, *mask, m; int dst_stride, mask_stride; int32_t w; - src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format); + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); srca = src >> 24; if (src == 0) return; - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 3); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 3); PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); while (height--) { dst = dst_line; dst_line += dst_stride; mask = mask_line; mask_line += mask_stride; @@ -533,43 +463,33 @@ fast_composite_over_n_8_0888 (pixman_imp } dst += 3; } } } static void fast_composite_over_n_8_0565 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) + pixman_composite_info_t *info) { + PIXMAN_COMPOSITE_ARGS (info); uint32_t src, srca; uint16_t *dst_line, *dst; uint32_t d; uint8_t *mask_line, *mask, m; int dst_stride, mask_stride; int32_t w; - src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format); + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); srca = src >> 24; if (src == 0) return; - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); while (height--) { dst = dst_line; dst_line += dst_stride; mask = mask_line; mask_line += mask_stride; @@ -599,46 +519,36 @@ fast_composite_over_n_8_0565 (pixman_imp } dst++; } } } static void fast_composite_over_n_8888_0565_ca (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) + pixman_composite_info_t *info) { + PIXMAN_COMPOSITE_ARGS (info); uint32_t src, srca, s; uint16_t src16; uint16_t *dst_line, *dst; uint32_t d; uint32_t *mask_line, *mask, ma; int dst_stride, mask_stride; int32_t w; - src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format); + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); srca = src >> 24; if (src == 0) return; src16 = CONVERT_8888_TO_0565 (src); - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1); while (height--) { dst = dst_line; dst_line += dst_stride; mask = mask_line; mask_line += mask_stride; @@ -676,36 +586,26 @@ fast_composite_over_n_8888_0565_ca (pixm } dst++; } } } static void fast_composite_over_8888_8888 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) + pixman_composite_info_t *info) { + PIXMAN_COMPOSITE_ARGS (info); uint32_t *dst_line, *dst; uint32_t *src_line, *src, s; int dst_stride, src_stride; uint8_t a; int32_t w; - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); while (height--) { dst = dst_line; dst_line += dst_stride; src = src_line; src_line += src_stride; @@ -721,35 +621,25 @@ fast_composite_over_8888_8888 (pixman_im *dst = over (s, *dst); dst++; } } } static void fast_composite_src_x888_8888 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) + pixman_composite_info_t *info) { + PIXMAN_COMPOSITE_ARGS (info); uint32_t *dst_line, *dst; uint32_t *src_line, *src; int dst_stride, src_stride; int32_t w; - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); while (height--) { dst = dst_line; dst_line += dst_stride; src = src_line; src_line += src_stride; @@ -758,37 +648,27 @@ fast_composite_src_x888_8888 (pixman_imp while (w--) *dst++ = (*src++) | 0xff000000; } } #if 0 static void fast_composite_over_8888_0888 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) + pixman_composite_info_t *info) { + PIXMAN_COMPOSITE_ARGS (info); uint8_t *dst_line, *dst; uint32_t d; uint32_t *src_line, *src, s; uint8_t a; int dst_stride, src_stride; int32_t w; - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 3); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 3); PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); while (height--) { dst = dst_line; dst_line += dst_stride; src = src_line; src_line += src_stride; @@ -810,38 +690,28 @@ fast_composite_over_8888_0888 (pixman_im dst += 3; } } } #endif static void fast_composite_over_8888_0565 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) + pixman_composite_info_t *info) { + PIXMAN_COMPOSITE_ARGS (info); uint16_t *dst_line, *dst; uint32_t d; uint32_t *src_line, *src, s; uint8_t a; int dst_stride, src_stride; int32_t w; PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); while (height--) { dst = dst_line; dst_line += dst_stride; src = src_line; src_line += src_stride; w = width; @@ -865,36 +735,26 @@ fast_composite_over_8888_0565 (pixman_im } dst++; } } } static void fast_composite_src_x888_0565 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) + pixman_composite_info_t *info) { + PIXMAN_COMPOSITE_ARGS (info); uint16_t *dst_line, *dst; uint32_t *src_line, *src, s; int dst_stride, src_stride; int32_t w; PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); while (height--) { dst = dst_line; dst_line += dst_stride; src = src_line; src_line += src_stride; w = width; @@ -905,38 +765,28 @@ fast_composite_src_x888_0565 (pixman_imp *dst = CONVERT_8888_TO_0565 (s); dst++; } } } static void fast_composite_add_8_8 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) + pixman_composite_info_t *info) { + PIXMAN_COMPOSITE_ARGS (info); uint8_t *dst_line, *dst; uint8_t *src_line, *src; int dst_stride, src_stride; int32_t w; uint8_t s, d; uint16_t t; PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1); - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); while (height--) { dst = dst_line; dst_line += dst_stride; src = src_line; src_line += src_stride; w = width; @@ -956,37 +806,27 @@ fast_composite_add_8_8 (pixman_implement } dst++; } } } static void fast_composite_add_8888_8888 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) + pixman_composite_info_t *info) { + PIXMAN_COMPOSITE_ARGS (info); uint32_t *dst_line, *dst; uint32_t *src_line, *src; int dst_stride, src_stride; int32_t w; uint32_t s, d; PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); while (height--) { dst = dst_line; dst_line += dst_stride; src = src_line; src_line += src_stride; w = width; @@ -1006,39 +846,29 @@ fast_composite_add_8888_8888 (pixman_imp } dst++; } } } static void fast_composite_add_n_8_8 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) + pixman_composite_info_t *info) { + PIXMAN_COMPOSITE_ARGS (info); uint8_t *dst_line, *dst; uint8_t *mask_line, *mask; int dst_stride, mask_stride; int32_t w; uint32_t src; uint8_t sa; - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); - src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format); + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); sa = (src >> 24); while (height--) { dst = dst_line; dst_line += dst_stride; mask = mask_line; mask_line += mask_stride; @@ -1071,38 +901,28 @@ fast_composite_add_n_8_8 (pixman_impleme #endif #define TEST_BIT(p, n) \ (*((p) + ((n) >> 5)) & CREATE_BITMASK ((n) & 31)) #define SET_BIT(p, n) \ do { *((p) + ((n) >> 5)) |= CREATE_BITMASK ((n) & 31); } while (0); static void -fast_composite_add_1000_1000 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) +fast_composite_add_1_1 (pixman_implementation_t *imp, + pixman_composite_info_t *info) { + PIXMAN_COMPOSITE_ARGS (info); uint32_t *dst_line, *dst; uint32_t *src_line, *src; int dst_stride, src_stride; int32_t w; PIXMAN_IMAGE_GET_LINE (src_image, 0, src_y, uint32_t, src_stride, src_line, 1); - PIXMAN_IMAGE_GET_LINE (dst_image, 0, dest_y, uint32_t, + PIXMAN_IMAGE_GET_LINE (dest_image, 0, dest_y, uint32_t, dst_stride, dst_line, 1); while (height--) { dst = dst_line; dst_line += dst_stride; src = src_line; src_line += src_stride; @@ -1117,45 +937,35 @@ fast_composite_add_1000_1000 (pixman_imp if (TEST_BIT (src, src_x + w)) SET_BIT (dst, dest_x + w); } } } static void fast_composite_over_n_1_8888 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) + pixman_composite_info_t *info) { + PIXMAN_COMPOSITE_ARGS (info); uint32_t src, srca; uint32_t *dst, *dst_line; uint32_t *mask, *mask_line; int mask_stride, dst_stride; uint32_t bitcache, bitmask; int32_t w; if (width <= 0) return; - src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format); + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); srca = src >> 24; if (src == 0) return; - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); PIXMAN_IMAGE_GET_LINE (mask_image, 0, mask_y, uint32_t, mask_stride, mask_line, 1); mask_line += mask_x >> 5; if (srca == 0xff) { while (height--) @@ -1209,47 +1019,37 @@ fast_composite_over_n_1_8888 (pixman_imp dst++; } } } } static void fast_composite_over_n_1_0565 (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) + pixman_composite_info_t *info) { + PIXMAN_COMPOSITE_ARGS (info); uint32_t src, srca; uint16_t *dst, *dst_line; uint32_t *mask, *mask_line; int mask_stride, dst_stride; uint32_t bitcache, bitmask; int32_t w; uint32_t d; uint16_t src565; if (width <= 0) return; - src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format); + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); srca = src >> 24; if (src == 0) return; - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); PIXMAN_IMAGE_GET_LINE (mask_image, 0, mask_y, uint32_t, mask_stride, mask_line, 1); mask_line += mask_x >> 5; if (srca == 0xff) { src565 = CONVERT_8888_TO_0565 (src); @@ -1311,108 +1111,259 @@ fast_composite_over_n_1_0565 (pixman_imp } /* * Simple bitblt */ static void fast_composite_solid_fill (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) + pixman_composite_info_t *info) { + PIXMAN_COMPOSITE_ARGS (info); uint32_t src; - src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format); + src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); - if (dst_image->bits.format == PIXMAN_a1) + if (dest_image->bits.format == PIXMAN_a1) { src = src >> 31; } - else if (dst_image->bits.format == PIXMAN_a8) + else if (dest_image->bits.format == PIXMAN_a8) { src = src >> 24; } - else if (dst_image->bits.format == PIXMAN_r5g6b5 || - dst_image->bits.format == PIXMAN_b5g6r5) + else if (dest_image->bits.format == PIXMAN_r5g6b5 || + dest_image->bits.format == PIXMAN_b5g6r5) { src = CONVERT_8888_TO_0565 (src); } - pixman_fill (dst_image->bits.bits, dst_image->bits.rowstride, - PIXMAN_FORMAT_BPP (dst_image->bits.format), + pixman_fill (dest_image->bits.bits, dest_image->bits.rowstride, + PIXMAN_FORMAT_BPP (dest_image->bits.format), dest_x, dest_y, width, height, src); } static void fast_composite_src_memcpy (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) + pixman_composite_info_t *info) { - int bpp = PIXMAN_FORMAT_BPP (dst_image->bits.format) / 8; + PIXMAN_COMPOSITE_ARGS (info); + int bpp = PIXMAN_FORMAT_BPP (dest_image->bits.format) / 8; uint32_t n_bytes = width * bpp; int dst_stride, src_stride; uint8_t *dst; uint8_t *src; src_stride = src_image->bits.rowstride * 4; - dst_stride = dst_image->bits.rowstride * 4; + dst_stride = dest_image->bits.rowstride * 4; src = (uint8_t *)src_image->bits.bits + src_y * src_stride + src_x * bpp; - dst = (uint8_t *)dst_image->bits.bits + dest_y * dst_stride + dest_x * bpp; + dst = (uint8_t *)dest_image->bits.bits + dest_y * dst_stride + dest_x * bpp; while (height--) { memcpy (dst, src, n_bytes); dst += dst_stride; src += src_stride; } } FAST_NEAREST (8888_8888_cover, 8888, 8888, uint32_t, uint32_t, SRC, COVER) FAST_NEAREST (8888_8888_none, 8888, 8888, uint32_t, uint32_t, SRC, NONE) FAST_NEAREST (8888_8888_pad, 8888, 8888, uint32_t, uint32_t, SRC, PAD) FAST_NEAREST (8888_8888_normal, 8888, 8888, uint32_t, uint32_t, SRC, NORMAL) +FAST_NEAREST (x888_8888_cover, x888, 8888, uint32_t, uint32_t, SRC, COVER) +FAST_NEAREST (x888_8888_pad, x888, 8888, uint32_t, uint32_t, SRC, PAD) +FAST_NEAREST (x888_8888_normal, x888, 8888, uint32_t, uint32_t, SRC, NORMAL) FAST_NEAREST (8888_8888_cover, 8888, 8888, uint32_t, uint32_t, OVER, COVER) FAST_NEAREST (8888_8888_none, 8888, 8888, uint32_t, uint32_t, OVER, NONE) FAST_NEAREST (8888_8888_pad, 8888, 8888, uint32_t, uint32_t, OVER, PAD) FAST_NEAREST (8888_8888_normal, 8888, 8888, uint32_t, uint32_t, OVER, NORMAL) FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, SRC, COVER) FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, SRC, NONE) FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, SRC, PAD) FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, SRC, NORMAL) FAST_NEAREST (565_565_normal, 0565, 0565, uint16_t, uint16_t, SRC, NORMAL) FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, OVER, COVER) FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, OVER, NONE) FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, OVER, PAD) FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, OVER, NORMAL) +#define REPEAT_MIN_WIDTH 32 + +static void +fast_composite_tiled_repeat (pixman_implementation_t *imp, + pixman_composite_info_t *info) +{ + PIXMAN_COMPOSITE_ARGS (info); + pixman_composite_func_t func; + pixman_format_code_t mask_format; + uint32_t src_flags, mask_flags; + + src_flags = (info->src_flags & ~FAST_PATH_NORMAL_REPEAT) | + FAST_PATH_SAMPLES_COVER_CLIP_NEAREST; + + if (mask_image) + { + mask_format = mask_image->common.extended_format_code; + mask_flags = info->mask_flags; + } + else + { + mask_format = PIXMAN_null; + mask_flags = FAST_PATH_IS_OPAQUE; + } + + if (_pixman_lookup_composite_function ( + imp->toplevel, info->op, + src_image->common.extended_format_code, src_flags, + mask_format, mask_flags, + dest_image->common.extended_format_code, info->dest_flags, + &imp, &func)) + { + int32_t sx, sy; + int32_t width_remain; + int32_t num_pixels; + int32_t src_width; + int32_t i, j; + pixman_image_t extended_src_image; + uint32_t extended_src[REPEAT_MIN_WIDTH * 2]; + pixman_bool_t need_src_extension; + uint32_t *src_line; + int32_t src_stride; + int32_t src_bpp; + pixman_composite_info_t info2 = *info; + + src_bpp = PIXMAN_FORMAT_BPP (src_image->bits.format); + + if (src_image->bits.width < REPEAT_MIN_WIDTH && + (src_bpp == 32 || src_bpp == 16 || src_bpp == 8)) + { + sx = src_x; + sx = MOD (sx, src_image->bits.width); + sx += width; + src_width = 0; + + while (src_width < REPEAT_MIN_WIDTH && src_width <= sx) + src_width += src_image->bits.width; + + src_stride = (src_width * (src_bpp >> 3) + 3) / (int) sizeof (uint32_t); + + /* Initialize/validate stack-allocated temporary image */ + _pixman_bits_image_init (&extended_src_image, src_image->bits.format, + src_width, 1, &extended_src[0], src_stride); + _pixman_image_validate (&extended_src_image); + + info2.src_image = &extended_src_image; + need_src_extension = TRUE; + } + else + { + src_width = src_image->bits.width; + need_src_extension = FALSE; + } + + sx = src_x; + sy = src_y; + + while (--height >= 0) + { + sx = MOD (sx, src_width); + sy = MOD (sy, src_image->bits.height); + + if (need_src_extension) + { + if (src_bpp == 32) + { + PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint32_t, src_stride, src_line, 1); + + for (i = 0; i < src_width; ) + { + for (j = 0; j < src_image->bits.width; j++, i++) + extended_src[i] = src_line[j]; + } + } + else if (src_bpp == 16) + { + uint16_t *src_line_16; + + PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint16_t, src_stride, + src_line_16, 1); + src_line = (uint32_t*)src_line_16; + + for (i = 0; i < src_width; ) + { + for (j = 0; j < src_image->bits.width; j++, i++) + ((uint16_t*)extended_src)[i] = ((uint16_t*)src_line)[j]; + } + } + else if (src_bpp == 8) + { + uint8_t *src_line_8; + + PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint8_t, src_stride, + src_line_8, 1); + src_line = (uint32_t*)src_line_8; + + for (i = 0; i < src_width; ) + { + for (j = 0; j < src_image->bits.width; j++, i++) + ((uint8_t*)extended_src)[i] = ((uint8_t*)src_line)[j]; + } + } + + info2.src_y = 0; + } + else + { + info2.src_y = sy; + } + + width_remain = width; + + while (width_remain > 0) + { + num_pixels = src_width - sx; + + if (num_pixels > width_remain) + num_pixels = width_remain; + + info2.src_x = sx; + info2.width = num_pixels; + info2.height = 1; + + func (imp, &info2); + + width_remain -= num_pixels; + info2.mask_x += num_pixels; + info2.dest_x += num_pixels; + sx = 0; + } + + sx = src_x; + sy++; + info2.mask_x = info->mask_x; + info2.mask_y++; + info2.dest_x = info->dest_x; + info2.dest_y++; + } + + if (need_src_extension) + _pixman_image_fini (&extended_src_image); + } + else + { + _pixman_log_error (FUNC, "Didn't find a suitable function "); + } +} + /* Use more unrolling for src_0565_0565 because it is typically CPU bound */ static force_inline void scaled_nearest_scanline_565_565_SRC (uint16_t * dst, const uint16_t * src, int32_t w, pixman_fixed_t vx, pixman_fixed_t unit_x, pixman_fixed_t max_vx, @@ -1492,40 +1443,30 @@ combine_over (uint32_t s, uint32_t *dst) static force_inline void combine_src (uint32_t s, uint32_t *dst) { *dst = s; } static void fast_composite_scaled_nearest (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src_image, - pixman_image_t * mask_image, - pixman_image_t * dst_image, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) + pixman_composite_info_t *info) { + PIXMAN_COMPOSITE_ARGS (info); uint32_t *dst_line; uint32_t *src_line; int dst_stride, src_stride; int src_width, src_height; pixman_repeat_t src_repeat; pixman_fixed_t unit_x, unit_y; pixman_format_code_t src_format; pixman_vector_t v; pixman_fixed_t vy; - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); /* pass in 0 instead of src_x and src_y because src_x and src_y need to be * transformed from destination space to source space */ PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, uint32_t, src_stride, src_line, 1); /* reference point is the center of the pixel */ v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2; v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2; @@ -1808,69 +1749,49 @@ blt_rotated_270_##suffix (pix_type src_stride, \ trailing_pixels, \ H); \ } \ } \ \ static void \ fast_composite_rotate_90_##suffix (pixman_implementation_t *imp, \ - pixman_op_t op, \ - pixman_image_t * src_image, \ - pixman_image_t * mask_image, \ - pixman_image_t * dst_image, \ - int32_t src_x, \ - int32_t src_y, \ - int32_t mask_x, \ - int32_t mask_y, \ - int32_t dest_x, \ - int32_t dest_y, \ - int32_t width, \ - int32_t height) \ -{ \ - pix_type *dst_line; \ + pixman_composite_info_t *info) \ +{ \ + PIXMAN_COMPOSITE_ARGS (info); \ + pix_type *dst_line; \ pix_type *src_line; \ int dst_stride, src_stride; \ int src_x_t, src_y_t; \ \ - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, pix_type, \ + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, pix_type, \ dst_stride, dst_line, 1); \ src_x_t = -src_y + pixman_fixed_to_int ( \ src_image->common.transform->matrix[0][2] + \ pixman_fixed_1 / 2 - pixman_fixed_e) - height;\ src_y_t = src_x + pixman_fixed_to_int ( \ src_image->common.transform->matrix[1][2] + \ pixman_fixed_1 / 2 - pixman_fixed_e); \ PIXMAN_IMAGE_GET_LINE (src_image, src_x_t, src_y_t, pix_type, \ src_stride, src_line, 1); \ blt_rotated_90_##suffix (dst_line, dst_stride, src_line, src_stride, \ width, height); \ } \ \ static void \ fast_composite_rotate_270_##suffix (pixman_implementation_t *imp, \ - pixman_op_t op, \ - pixman_image_t * src_image, \ - pixman_image_t * mask_image, \ - pixman_image_t * dst_image, \ - int32_t src_x, \ - int32_t src_y, \ - int32_t mask_x, \ - int32_t mask_y, \ - int32_t dest_x, \ - int32_t dest_y, \ - int32_t width, \ - int32_t height) \ + pixman_composite_info_t *info) \ { \ - pix_type *dst_line; \ + PIXMAN_COMPOSITE_ARGS (info); \ + pix_type *dst_line; \ pix_type *src_line; \ int dst_stride, src_stride; \ int src_x_t, src_y_t; \ \ - PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, pix_type, \ + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, pix_type, \ dst_stride, dst_line, 1); \ src_x_t = src_y + pixman_fixed_to_int ( \ src_image->common.transform->matrix[0][2] + \ pixman_fixed_1 / 2 - pixman_fixed_e); \ src_y_t = -src_x + pixman_fixed_to_int ( \ src_image->common.transform->matrix[1][2] + \ pixman_fixed_1 / 2 - pixman_fixed_e) - width; \ PIXMAN_IMAGE_GET_LINE (src_image, src_x_t, src_y_t, pix_type, \ @@ -1913,17 +1834,17 @@ static const pixman_fast_path_t c_fast_p PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, fast_composite_over_8888_8888), PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, r5g6b5, fast_composite_over_8888_0565), PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, fast_composite_over_8888_8888), PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, fast_composite_over_8888_8888), PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, fast_composite_over_8888_0565), PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, fast_composite_add_8888_8888), PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, fast_composite_add_8888_8888), PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, fast_composite_add_8_8), - PIXMAN_STD_FAST_PATH (ADD, a1, null, a1, fast_composite_add_1000_1000), + PIXMAN_STD_FAST_PATH (ADD, a1, null, a1, fast_composite_add_1_1), PIXMAN_STD_FAST_PATH_CA (ADD, solid, a8r8g8b8, a8r8g8b8, fast_composite_add_n_8888_8888_ca), PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, fast_composite_add_n_8_8), PIXMAN_STD_FAST_PATH (SRC, solid, null, a8r8g8b8, fast_composite_solid_fill), PIXMAN_STD_FAST_PATH (SRC, solid, null, x8r8g8b8, fast_composite_solid_fill), PIXMAN_STD_FAST_PATH (SRC, solid, null, a8b8g8r8, fast_composite_solid_fill), PIXMAN_STD_FAST_PATH (SRC, solid, null, x8b8g8r8, fast_composite_solid_fill), PIXMAN_STD_FAST_PATH (SRC, solid, null, a1, fast_composite_solid_fill), PIXMAN_STD_FAST_PATH (SRC, solid, null, a8, fast_composite_solid_fill), @@ -1961,16 +1882,23 @@ static const pixman_fast_path_t c_fast_p SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, 8888_8888), SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8, 8888_8888), SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, r5g6b5, 8888_565), SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, r5g6b5, 8888_565), SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, r5g6b5, 565_565), + SIMPLE_NEAREST_FAST_PATH_COVER (SRC, x8r8g8b8, a8r8g8b8, x888_8888), + SIMPLE_NEAREST_FAST_PATH_COVER (SRC, x8b8g8r8, a8b8g8r8, x888_8888), + SIMPLE_NEAREST_FAST_PATH_PAD (SRC, x8r8g8b8, a8r8g8b8, x888_8888), + SIMPLE_NEAREST_FAST_PATH_PAD (SRC, x8b8g8r8, a8b8g8r8, x888_8888), + SIMPLE_NEAREST_FAST_PATH_NORMAL (SRC, x8r8g8b8, a8r8g8b8, x888_8888), + SIMPLE_NEAREST_FAST_PATH_NORMAL (SRC, x8b8g8r8, a8b8g8r8, x888_8888), + SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, 8888_8888), SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, 8888_8888), SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, 8888_8888), SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, 8888_8888), SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, r5g6b5, 8888_565), #define NEAREST_FAST_PATH(op,s,d) \ @@ -1999,17 +1927,17 @@ static const pixman_fast_path_t c_fast_p NEAREST_FAST_PATH (OVER, x8r8g8b8, a8r8g8b8), NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8), NEAREST_FAST_PATH (OVER, x8b8g8r8, a8b8g8r8), NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8), #define SIMPLE_ROTATE_FLAGS(angle) \ (FAST_PATH_ROTATE_ ## angle ## _TRANSFORM | \ FAST_PATH_NEAREST_FILTER | \ - FAST_PATH_SAMPLES_COVER_CLIP | \ + FAST_PATH_SAMPLES_COVER_CLIP_NEAREST | \ FAST_PATH_STANDARD_FLAGS) #define SIMPLE_ROTATE_FAST_PATH(op,s,d,suffix) \ { PIXMAN_OP_ ## op, \ PIXMAN_ ## s, SIMPLE_ROTATE_FLAGS (90), \ PIXMAN_null, 0, \ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ fast_composite_rotate_90_##suffix, \ @@ -2022,23 +1950,33 @@ static const pixman_fast_path_t c_fast_p } SIMPLE_ROTATE_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, 8888), SIMPLE_ROTATE_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, 8888), SIMPLE_ROTATE_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, 8888), SIMPLE_ROTATE_FAST_PATH (SRC, r5g6b5, r5g6b5, 565), SIMPLE_ROTATE_FAST_PATH (SRC, a8, a8, 8), + /* Simple repeat fast path entry. */ + { PIXMAN_OP_any, + PIXMAN_any, + (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | FAST_PATH_BITS_IMAGE | + FAST_PATH_NORMAL_REPEAT), + PIXMAN_any, 0, + PIXMAN_any, FAST_PATH_STD_DEST_FLAGS, + fast_composite_tiled_repeat + }, + { PIXMAN_OP_NONE }, }; #ifdef WORDS_BIGENDIAN -#define A1_FILL_MASK(n, offs) (((1 << (n)) - 1) << (32 - (offs) - (n))) +#define A1_FILL_MASK(n, offs) (((1U << (n)) - 1) << (32 - (offs) - (n))) #else -#define A1_FILL_MASK(n, offs) (((1 << (n)) - 1) << (offs)) +#define A1_FILL_MASK(n, offs) (((1U << (n)) - 1) << (offs)) #endif static force_inline void pixman_fill1_line (uint32_t *dst, int offs, int width, int v) { if (offs) { int leading_pixels = 32 - offs;
--- a/gfx/cairo/libpixman/src/pixman-general.c +++ b/gfx/cairo/libpixman/src/pixman-general.c @@ -96,42 +96,32 @@ static const op_info_t op_flags[PIXMAN_N { ITER_LOCALIZED_ALPHA, ITER_LOCALIZED_ALPHA }, /* ADD */ { 0, 0 }, /* SATURATE */ }; #define SCANLINE_BUFFER_LENGTH 8192 static void general_composite_rect (pixman_implementation_t *imp, - pixman_op_t op, - pixman_image_t * src, - pixman_image_t * mask, - pixman_image_t * dest, - int32_t src_x, - int32_t src_y, - int32_t mask_x, - int32_t mask_y, - int32_t dest_x, - int32_t dest_y, - int32_t width, - int32_t height) + pixman_composite_info_t *info) { + PIXMAN_COMPOSITE_ARGS (info); uint64_t stack_scanline_buffer[(SCANLINE_BUFFER_LENGTH * 3 + 7) / 8]; uint8_t *scanline_buffer = (uint8_t *) stack_scanline_buffer; uint8_t *src_buffer, *mask_buffer, *dest_buffer; pixman_iter_t src_iter, mask_iter, dest_iter; pixman_combine_32_func_t compose; pixman_bool_t component_alpha; iter_flags_t narrow, src_flags; int Bpp; int i; - if ((src->common.flags & FAST_PATH_NARROW_FORMAT) && - (!mask || mask->common.flags & FAST_PATH_NARROW_FORMAT) && - (dest->common.flags & FAST_PATH_NARROW_FORMAT)) + if ((src_image->common.flags & FAST_PATH_NARROW_FORMAT) && + (!mask_image || mask_image->common.flags & FAST_PATH_NARROW_FORMAT) && + (dest_image->common.flags & FAST_PATH_NARROW_FORMAT)) { narrow = ITER_NARROW; Bpp = 4; } else { narrow = 0; Bpp = 8; @@ -147,60 +137,47 @@ general_composite_rect (pixman_implemen src_buffer = scanline_buffer; mask_buffer = src_buffer + width * Bpp; dest_buffer = mask_buffer + width * Bpp; /* src iter */ src_flags = narrow | op_flags[op].src; - _pixman_implementation_src_iter_init (imp->toplevel, &src_iter, src, + _pixman_implementation_src_iter_init (imp->toplevel, &src_iter, src_image, src_x, src_y, width, height, src_buffer, src_flags); /* mask iter */ if ((src_flags & (ITER_IGNORE_ALPHA | ITER_IGNORE_RGB)) == (ITER_IGNORE_ALPHA | ITER_IGNORE_RGB)) { /* If it doesn't matter what the source is, then it doesn't matter * what the mask is */ - mask = NULL; + mask_image = NULL; } component_alpha = - mask && - mask->common.type == BITS && - mask->common.component_alpha && - PIXMAN_FORMAT_RGB (mask->bits.format); + mask_image && + mask_image->common.type == BITS && + mask_image->common.component_alpha && + PIXMAN_FORMAT_RGB (mask_image->bits.format); _pixman_implementation_src_iter_init ( - imp->toplevel, &mask_iter, mask, mask_x, mask_y, width, height, + imp->toplevel, &mask_iter, mask_image, mask_x, mask_y, width, height, mask_buffer, narrow | (component_alpha? 0 : ITER_IGNORE_RGB)); /* dest iter */ - _pixman_implementation_dest_iter_init (imp->toplevel, &dest_iter, dest, - dest_x, dest_y, width, height, - dest_buffer, - narrow | op_flags[op].dst); + _pixman_implementation_dest_iter_init ( + imp->toplevel, &dest_iter, dest_image, dest_x, dest_y, width, height, + dest_buffer, narrow | op_flags[op].dst); - if (narrow) - { - if (component_alpha) - compose = _pixman_implementation_combine_32_ca; - else - compose = _pixman_implementation_combine_32; - } - else - { - if (component_alpha) - compose = (pixman_combine_32_func_t)_pixman_implementation_combine_64_ca; - else - compose = (pixman_combine_32_func_t)_pixman_implementation_combine_64; - } + compose = _pixman_implementation_lookup_combiner ( + imp->toplevel, op, component_alpha, narrow); if (!compose) return; for (i = 0; i < height; ++i) { uint32_t *s, *m, *d; @@ -228,18 +205,18 @@ general_blt (pixman_implementation_t *im uint32_t * src_bits, uint32_t * dst_bits, int src_stride, int dst_stride, int src_bpp, int dst_bpp, int src_x, int src_y, - int dst_x, - int dst_y, + int dest_x, + int dest_y, int width, int height) { /* We can't blit unless we have sse2 or mmx */ return FALSE; }
--- a/gfx/cairo/libpixman/src/pixman-gradient-walker.c +++ b/gfx/cairo/libpixman/src/pixman-gradient-walker.c @@ -26,133 +26,75 @@ #ifdef HAVE_CONFIG_H #include <config.h> #endif #include "pixman-private.h" void _pixman_gradient_walker_init (pixman_gradient_walker_t *walker, gradient_t * gradient, - unsigned int spread) + pixman_repeat_t repeat) { walker->num_stops = gradient->n_stops; walker->stops = gradient->stops; walker->left_x = 0; walker->right_x = 0x10000; walker->stepper = 0; walker->left_ag = 0; walker->left_rb = 0; walker->right_ag = 0; walker->right_rb = 0; - walker->spread = spread; + walker->repeat = repeat; walker->need_reset = TRUE; } -void -_pixman_gradient_walker_reset (pixman_gradient_walker_t *walker, - pixman_fixed_32_32_t pos) +static void +gradient_walker_reset (pixman_gradient_walker_t *walker, + pixman_fixed_48_16_t pos) { int32_t x, left_x, right_x; - pixman_color_t *left_c, *right_c; + pixman_color_t *left_c, *right_c; int n, count = walker->num_stops; - pixman_gradient_stop_t * stops = walker->stops; + pixman_gradient_stop_t *stops = walker->stops; - static const pixman_color_t transparent_black = { 0, 0, 0, 0 }; - - switch (walker->spread) + if (walker->repeat == PIXMAN_REPEAT_NORMAL) + { + x = (int32_t)pos & 0xffff; + } + else if (walker->repeat == PIXMAN_REPEAT_REFLECT) + { + x = (int32_t)pos & 0xffff; + if ((int32_t)pos & 0x10000) + x = 0x10000 - x; + } + else { - case PIXMAN_REPEAT_NORMAL: - x = (int32_t)pos & 0xFFFF; - for (n = 0; n < count; n++) - if (x < stops[n].x) - break; - if (n == 0) - { - left_x = stops[count - 1].x - 0x10000; - left_c = &stops[count - 1].color; - } - else - { - left_x = stops[n - 1].x; - left_c = &stops[n - 1].color; - } + x = pos; + } + + for (n = 0; n < count; n++) + { + if (x < stops[n].x) + break; + } + + left_x = stops[n - 1].x; + left_c = &stops[n - 1].color; + + right_x = stops[n].x; + right_c = &stops[n].color; - if (n == count) - { - right_x = stops[0].x + 0x10000; - right_c = &stops[0].color; - } - else - { - right_x = stops[n].x; - right_c = &stops[n].color; - } + if (walker->repeat == PIXMAN_REPEAT_NORMAL) + { left_x += (pos - x); right_x += (pos - x); - break; - - case PIXMAN_REPEAT_PAD: - for (n = 0; n < count; n++) - if (pos < stops[n].x) - break; - - if (n == 0) - { - left_x = INT32_MIN; - left_c = &stops[0].color; - } - else - { - left_x = stops[n - 1].x; - left_c = &stops[n - 1].color; - } - - if (n == count) - { - right_x = INT32_MAX; - right_c = &stops[n - 1].color; - } - else - { - right_x = stops[n].x; - right_c = &stops[n].color; - } - break; - - case PIXMAN_REPEAT_REFLECT: - x = (int32_t)pos & 0xFFFF; - if ((int32_t)pos & 0x10000) - x = 0x10000 - x; - for (n = 0; n < count; n++) - if (x < stops[n].x) - break; - - if (n == 0) - { - left_x = -stops[0].x; - left_c = &stops[0].color; - } - else - { - left_x = stops[n - 1].x; - left_c = &stops[n - 1].color; - } - - if (n == count) - { - right_x = 0x20000 - stops[n - 1].x; - right_c = &stops[n - 1].color; - } - else - { - right_x = stops[n].x; - right_c = &stops[n].color; - } - + } + else if (walker->repeat == PIXMAN_REPEAT_REFLECT) + { if ((int32_t)pos & 0x10000) { pixman_color_t *tmp_c; int32_t tmp_x; tmp_x = 0x10000 - right_x; right_x = 0x10000 - left_x; left_x = tmp_x; @@ -160,80 +102,56 @@ void tmp_c = right_c; right_c = left_c; left_c = tmp_c; x = 0x10000 - x; } left_x += (pos - x); right_x += (pos - x); - break; - - default: /* REPEAT_NONE */ - for (n = 0; n < count; n++) - if (pos < stops[n].x) - break; - + } + else if (walker->repeat == PIXMAN_REPEAT_NONE) + { if (n == 0) - { - left_x = INT32_MIN; - right_x = stops[0].x; - left_c = right_c = (pixman_color_t*) &transparent_black; - } + right_c = left_c; else if (n == count) - { - left_x = stops[n - 1].x; - right_x = INT32_MAX; - left_c = right_c = (pixman_color_t*) &transparent_black; - } - else - { - left_x = stops[n - 1].x; - right_x = stops[n].x; - left_c = &stops[n - 1].color; - right_c = &stops[n].color; - } + left_c = right_c; } walker->left_x = left_x; walker->right_x = right_x; walker->left_ag = ((left_c->alpha >> 8) << 16) | (left_c->green >> 8); walker->left_rb = ((left_c->red & 0xff00) << 8) | (left_c->blue >> 8); walker->right_ag = ((right_c->alpha >> 8) << 16) | (right_c->green >> 8); walker->right_rb = ((right_c->red & 0xff00) << 8) | (right_c->blue >> 8); if (walker->left_x == walker->right_x || - ( walker->left_ag == walker->right_ag && - walker->left_rb == walker->right_rb ) ) + (walker->left_ag == walker->right_ag && + walker->left_rb == walker->right_rb)) { walker->stepper = 0; } else { int32_t width = right_x - left_x; walker->stepper = ((1 << 24) + width / 2) / width; } walker->need_reset = FALSE; } -#define PIXMAN_GRADIENT_WALKER_NEED_RESET(w, x) \ - ( (w)->need_reset || (x) < (w)->left_x || (x) >= (w)->right_x) - - -/* the following assumes that PIXMAN_GRADIENT_WALKER_NEED_RESET(w,x) is FALSE */ uint32_t _pixman_gradient_walker_pixel (pixman_gradient_walker_t *walker, - pixman_fixed_32_32_t x) + pixman_fixed_48_16_t x) { int dist, idist; uint32_t t1, t2, a, color; - if (PIXMAN_GRADIENT_WALKER_NEED_RESET (walker, x)) - _pixman_gradient_walker_reset (walker, x); + if (walker->need_reset || x < walker->left_x || x >= walker->right_x) + gradient_walker_reset (walker, x); dist = ((int)(x - walker->left_x) * walker->stepper) >> 16; idist = 256 - dist; /* combined INTERPOLATE and premultiply */ t1 = walker->left_rb * idist + walker->right_rb * dist; t1 = (t1 >> 8) & 0xff00ff;
--- a/gfx/cairo/libpixman/src/pixman-image.c +++ b/gfx/cairo/libpixman/src/pixman-image.c @@ -26,62 +26,170 @@ #include <stdlib.h> #include <stdio.h> #include <string.h> #include <assert.h> #include "pixman-private.h" +static const pixman_color_t transparent_black = { 0, 0, 0, 0 }; + +static void +gradient_property_changed (pixman_image_t *image) +{ + gradient_t *gradient = &image->gradient; + int n = gradient->n_stops; + pixman_gradient_stop_t *stops = gradient->stops; + pixman_gradient_stop_t *begin = &(gradient->stops[-1]); + pixman_gradient_stop_t *end = &(gradient->stops[n]); + + switch (gradient->common.repeat) + { + default: + case PIXMAN_REPEAT_NONE: + begin->x = INT32_MIN; + begin->color = transparent_black; + end->x = INT32_MAX; + end->color = transparent_black; + break; + + case PIXMAN_REPEAT_NORMAL: + begin->x = stops[n - 1].x - pixman_fixed_1; + begin->color = stops[n - 1].color; + end->x = stops[0].x + pixman_fixed_1; + end->color = stops[0].color; + break; + + case PIXMAN_REPEAT_REFLECT: + begin->x = - stops[0].x; + begin->color = stops[0].color; + end->x = pixman_int_to_fixed (2) - stops[n - 1].x; + end->color = stops[n - 1].color; + break; + + case PIXMAN_REPEAT_PAD: + begin->x = INT32_MIN; + begin->color = stops[0].color; + end->x = INT32_MAX; + end->color = stops[n - 1].color; + break; + } +} + pixman_bool_t _pixman_init_gradient (gradient_t * gradient, const pixman_gradient_stop_t *stops, int n_stops) { return_val_if_fail (n_stops > 0, FALSE); - gradient->stops = pixman_malloc_ab (n_stops, sizeof (pixman_gradient_stop_t)); + /* We allocate two extra stops, one before the beginning of the stop list, + * and one after the end. These stops are initialized to whatever color + * would be used for positions outside the range of the stop list. + * + * This saves a bit of computation in the gradient walker. + * + * The pointer we store in the gradient_t struct still points to the + * first user-supplied struct, so when freeing, we will have to + * subtract one. + */ + gradient->stops = + pixman_malloc_ab (n_stops + 2, sizeof (pixman_gradient_stop_t)); if (!gradient->stops) return FALSE; + gradient->stops += 1; memcpy (gradient->stops, stops, n_stops * sizeof (pixman_gradient_stop_t)); - gradient->n_stops = n_stops; + gradient->common.property_changed = gradient_property_changed; + return TRUE; } +void +_pixman_image_init (pixman_image_t *image) +{ + image_common_t *common = &image->common; + + pixman_region32_init (&common->clip_region); + + common->alpha_count = 0; + common->have_clip_region = FALSE; + common->clip_sources = FALSE; + common->transform = NULL; + common->repeat = PIXMAN_REPEAT_NONE; + common->filter = PIXMAN_FILTER_NEAREST; + common->filter_params = NULL; + common->n_filter_params = 0; + common->alpha_map = NULL; + common->component_alpha = FALSE; + common->ref_count = 1; + common->property_changed = NULL; + common->client_clip = FALSE; + common->destroy_func = NULL; + common->destroy_data = NULL; + common->dirty = TRUE; +} + +pixman_bool_t +_pixman_image_fini (pixman_image_t *image) +{ + image_common_t *common = (image_common_t *)image; + + common->ref_count--; + + if (common->ref_count == 0) + { + if (image->common.destroy_func) + image->common.destroy_func (image, image->common.destroy_data); + + pixman_region32_fini (&common->clip_region); + + free (common->transform); + free (common->filter_params); + + if (common->alpha_map) + pixman_image_unref ((pixman_image_t *)common->alpha_map); + + if (image->type == LINEAR || + image->type == RADIAL || + image->type == CONICAL) + { + if (image->gradient.stops) + { + /* See _pixman_init_gradient() for an explanation of the - 1 */ + free (image->gradient.stops - 1); + } + + /* This will trigger if someone adds a property_changed + * method to the linear/radial/conical gradient overwriting + * the general one. + */ + assert ( + image->common.property_changed == gradient_property_changed); + } + + if (image->type == BITS && image->bits.free_me) + free (image->bits.free_me); + + return TRUE; + } + + return FALSE; +} + pixman_image_t * _pixman_image_allocate (void) { pixman_image_t *image = malloc (sizeof (pixman_image_t)); if (image) - { - image_common_t *common = &image->common; - - pixman_region32_init (&common->clip_region); - - common->alpha_count = 0; - common->have_clip_region = FALSE; - common->clip_sources = FALSE; - common->transform = NULL; - common->repeat = PIXMAN_REPEAT_NONE; - common->filter = PIXMAN_FILTER_NEAREST; - common->filter_params = NULL; - common->n_filter_params = 0; - common->alpha_map = NULL; - common->component_alpha = FALSE; - common->ref_count = 1; - common->property_changed = NULL; - common->client_clip = FALSE; - common->destroy_func = NULL; - common->destroy_data = NULL; - common->dirty = TRUE; - } + _pixman_image_init (image); return image; } static void image_property_changed (pixman_image_t *image) { image->common.dirty = TRUE; @@ -95,49 +203,19 @@ pixman_image_ref (pixman_image_t *image) return image; } /* returns TRUE when the image is freed */ PIXMAN_EXPORT pixman_bool_t pixman_image_unref (pixman_image_t *image) { - image_common_t *common = (image_common_t *)image; - - common->ref_count--; - - if (common->ref_count == 0) + if (_pixman_image_fini (image)) { - if (image->common.destroy_func) - image->common.destroy_func (image, image->common.destroy_data); - - pixman_region32_fini (&common->clip_region); - - if (common->transform) - free (common->transform); - - if (common->filter_params) - free (common->filter_params); - - if (common->alpha_map) - pixman_image_unref ((pixman_image_t *)common->alpha_map); - - if (image->type == LINEAR || - image->type == RADIAL || - image->type == CONICAL) - { - if (image->gradient.stops) - free (image->gradient.stops); - } - - if (image->type == BITS && image->bits.free_me) - free (image->bits.free_me); - free (image); - return TRUE; } return FALSE; } PIXMAN_EXPORT void pixman_image_set_destroy_function (pixman_image_t * image, @@ -216,23 +294,22 @@ compute_image_info (pixman_image_t *imag flags |= FAST_PATH_ROTATE_180_TRANSFORM; } flags |= FAST_PATH_SCALE_TRANSFORM; } else if (image->common.transform->matrix[0][0] == 0 && image->common.transform->matrix[1][1] == 0) { pixman_fixed_t m01 = image->common.transform->matrix[0][1]; - if (m01 == -image->common.transform->matrix[1][0]) - { - if (m01 == -pixman_fixed_1) - flags |= FAST_PATH_ROTATE_90_TRANSFORM; - else if (m01 == pixman_fixed_1) - flags |= FAST_PATH_ROTATE_270_TRANSFORM; - } + pixman_fixed_t m10 = image->common.transform->matrix[1][0]; + + if (m01 == -1 && m10 == 1) + flags |= FAST_PATH_ROTATE_90_TRANSFORM; + else if (m01 == 1 && m10 == -1) + flags |= FAST_PATH_ROTATE_270_TRANSFORM; } } if (image->common.transform->matrix[0][0] > 0) flags |= FAST_PATH_X_UNIT_POSITIVE; if (image->common.transform->matrix[1][0] == 0) flags |= FAST_PATH_Y_UNIT_ZERO; @@ -245,16 +322,57 @@ compute_image_info (pixman_image_t *imag case PIXMAN_FILTER_FAST: flags |= (FAST_PATH_NEAREST_FILTER | FAST_PATH_NO_CONVOLUTION_FILTER); break; case PIXMAN_FILTER_BILINEAR: case PIXMAN_FILTER_GOOD: case PIXMAN_FILTER_BEST: flags |= (FAST_PATH_BILINEAR_FILTER | FAST_PATH_NO_CONVOLUTION_FILTER); + + /* Here we have a chance to optimize BILINEAR filter to NEAREST if + * they are equivalent for the currently used transformation matrix. + */ + if (flags & FAST_PATH_ID_TRANSFORM) + { + flags |= FAST_PATH_NEAREST_FILTER; + } + else if ( + /* affine and integer translation components in matrix ... */ + ((flags & FAST_PATH_AFFINE_TRANSFORM) && + !pixman_fixed_frac (image->common.transform->matrix[0][2] | + image->common.transform->matrix[1][2])) && + ( + /* ... combined with a simple rotation */ + (flags & (FAST_PATH_ROTATE_90_TRANSFORM | + FAST_PATH_ROTATE_180_TRANSFORM | + FAST_PATH_ROTATE_270_TRANSFORM)) || + /* ... or combined with a simple non-rotated translation */ + (image->common.transform->matrix[0][0] == pixman_fixed_1 && + image->common.transform->matrix[1][1] == pixman_fixed_1 && + image->common.transform->matrix[0][1] == 0 && + image->common.transform->matrix[1][0] == 0) + ) + ) + { + /* FIXME: there are some affine-test failures, showing that + * handling of BILINEAR and NEAREST filter is not quite + * equivalent when getting close to 32K for the translation + * components of the matrix. That's likely some bug, but for + * now just skip BILINEAR->NEAREST optimization in this case. + */ + pixman_fixed_t magic_limit = pixman_int_to_fixed (30000); + if (image->common.transform->matrix[0][2] <= magic_limit && + image->common.transform->matrix[1][2] <= magic_limit && + image->common.transform->matrix[0][2] >= -magic_limit && + image->common.transform->matrix[1][2] >= -magic_limit) + { + flags |= FAST_PATH_NEAREST_FILTER; + } + } break; case PIXMAN_FILTER_CONVOLUTION: break; default: flags |= FAST_PATH_NO_CONVOLUTION_FILTER; break; @@ -315,22 +433,17 @@ compute_image_info (pixman_image_t *imag image->bits.height == 1 && image->common.repeat != PIXMAN_REPEAT_NONE) { code = PIXMAN_solid; } else { code = image->bits.format; - - if (!image->common.transform && - image->common.repeat == PIXMAN_REPEAT_NORMAL) - { - flags |= FAST_PATH_SIMPLE_REPEAT | FAST_PATH_SAMPLES_COVER_CLIP; - } + flags |= FAST_PATH_BITS_IMAGE; } if (!PIXMAN_FORMAT_A (image->bits.format) && PIXMAN_FORMAT_TYPE (image->bits.format) != PIXMAN_TYPE_GRAY && PIXMAN_FORMAT_TYPE (image->bits.format) != PIXMAN_TYPE_COLOR) { flags |= FAST_PATH_SAMPLES_OPAQUE;
--- a/gfx/cairo/libpixman/src/pixman-implementation.c +++ b/gfx/cairo/libpixman/src/pixman-implementation.c @@ -22,82 +22,34 @@ */ #ifdef HAVE_CONFIG_H #include <config.h> #endif #include <stdlib.h> #include "pixman-private.h" -static void -delegate_combine_32 (pixman_implementation_t * imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - _pixman_implementation_combine_32 (imp->delegate, - op, dest, src, mask, width); -} - -static void -delegate_combine_64 (pixman_implementation_t * imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - _pixman_implementation_combine_64 (imp->delegate, - op, dest, src, mask, width); -} - -static void -delegate_combine_32_ca (pixman_implementation_t * imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - _pixman_implementation_combine_32_ca (imp->delegate, - op, dest, src, mask, width); -} - -static void -delegate_combine_64_ca (pixman_implementation_t * imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - _pixman_implementation_combine_64_ca (imp->delegate, - op, dest, src, mask, width); -} - static pixman_bool_t delegate_blt (pixman_implementation_t * imp, uint32_t * src_bits, uint32_t * dst_bits, int src_stride, int dst_stride, int src_bpp, int dst_bpp, int src_x, int src_y, - int dst_x, - int dst_y, + int dest_x, + int dest_y, int width, int height) { return _pixman_implementation_blt ( imp->delegate, src_bits, dst_bits, src_stride, dst_stride, - src_bpp, dst_bpp, src_x, src_y, dst_x, dst_y, + src_bpp, dst_bpp, src_x, src_y, dest_x, dest_y, width, height); } static pixman_bool_t delegate_fill (pixman_implementation_t *imp, uint32_t * bits, int stride, int bpp, @@ -145,90 +97,73 @@ pixman_implementation_t * /* Fill out function pointers with ones that just delegate */ imp->blt = delegate_blt; imp->fill = delegate_fill; imp->src_iter_init = delegate_src_iter_init; imp->dest_iter_init = delegate_dest_iter_init; + imp->fast_paths = fast_paths; + for (i = 0; i < PIXMAN_N_OPERATORS; ++i) { - imp->combine_32[i] = delegate_combine_32; - imp->combine_64[i] = delegate_combine_64; - imp->combine_32_ca[i] = delegate_combine_32_ca; - imp->combine_64_ca[i] = delegate_combine_64_ca; + imp->combine_32[i] = NULL; + imp->combine_64[i] = NULL; + imp->combine_32_ca[i] = NULL; + imp->combine_64_ca[i] = NULL; } - imp->fast_paths = fast_paths; - return imp; } -void -_pixman_implementation_combine_32 (pixman_implementation_t * imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - (*imp->combine_32[op]) (imp, op, dest, src, mask, width); -} - -void -_pixman_implementation_combine_64 (pixman_implementation_t * imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) +pixman_combine_32_func_t +_pixman_implementation_lookup_combiner (pixman_implementation_t *imp, + pixman_op_t op, + pixman_bool_t component_alpha, + pixman_bool_t narrow) { - (*imp->combine_64[op]) (imp, op, dest, src, mask, width); -} + pixman_combine_32_func_t f; -void -_pixman_implementation_combine_32_ca (pixman_implementation_t * imp, - pixman_op_t op, - uint32_t * dest, - const uint32_t * src, - const uint32_t * mask, - int width) -{ - (*imp->combine_32_ca[op]) (imp, op, dest, src, mask, width); -} + do + { + pixman_combine_32_func_t (*combiners[]) = + { + (pixman_combine_32_func_t *)imp->combine_64, + (pixman_combine_32_func_t *)imp->combine_64_ca, + imp->combine_32, + imp->combine_32_ca, + }; -void -_pixman_implementation_combine_64_ca (pixman_implementation_t * imp, - pixman_op_t op, - uint64_t * dest, - const uint64_t * src, - const uint64_t * mask, - int width) -{ - (*imp->combine_64_ca[op]) (imp, op, dest, src, mask, width); + f = combiners[component_alpha | (narrow << 1)][op]; + + imp = imp->delegate; + } + while (!f); + + return f; } pixman_bool_t _pixman_implementation_blt (pixman_implementation_t * imp, uint32_t * src_bits, uint32_t * dst_bits, int src_stride, int dst_stride, int src_bpp, int dst_bpp, int src_x, int src_y, - int dst_x, - int dst_y, + int dest_x, + int dest_y, int width, int height) { return (*imp->blt) (imp, src_bits, dst_bits, src_stride, dst_stride, - src_bpp, dst_bpp, src_x, src_y, dst_x, dst_y, + src_bpp, dst_bpp, src_x, src_y, dest_x, dest_y, width, height); } pixman_bool_t _pixman_implementation_fill (pixman_implementation_t *imp, uint32_t * bits, int stride, int bpp, @@ -236,22 +171,16 @@ pixman_bool_t int y, int width, int height, uint32_t xor) { return (*imp->fill) (imp, bits, stride, bpp, x, y, width, height, xor); } -static uint32_t * -get_scanline_null (pixman_iter_t *iter, const uint32_t *mask) -{ - return NULL; -} - void _pixman_implementation_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter, pixman_image_t *image, int x, int y, int width, int height, @@ -261,29 +190,17 @@ void iter->image = image; iter->buffer = (uint32_t *)buffer; iter->x = x; iter->y = y; iter->width = width; iter->height = height; iter->flags = flags; - if (!image) - { - iter->get_scanline = get_scanline_null; - } - else if ((flags & (ITER_IGNORE_ALPHA | ITER_IGNORE_RGB)) == - (ITER_IGNORE_ALPHA | ITER_IGNORE_RGB)) - { - iter->get_scanline = _pixman_iter_get_scanline_noop; - } - else - { - (*imp->src_iter_init) (imp, iter); - } + (*imp->src_iter_init) (imp, iter); } void _pixman_implementation_dest_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter, pixman_image_t *image, int x, int y,
new file mode 100644 --- /dev/null +++ b/gfx/cairo/libpixman/src/pixman-inlines.h @@ -0,0 +1,1280 @@ +/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */ +/* + * Copyright © 2000 SuSE, Inc. + * Copyright © 2007 Red Hat, Inc. + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of SuSE not be used in advertising or + * publicity pertaining to distribution of the software without specific, + * written prior permission. SuSE makes no representations about the + * suitability of this software for any purpose. It is provided "as is" + * without express or implied warranty. + * + * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE + * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Author: Keith Packard, SuSE, Inc. + */ + +#ifndef PIXMAN_FAST_PATH_H__ +#define PIXMAN_FAST_PATH_H__ + +#include "pixman-private.h" + +#define PIXMAN_REPEAT_COVER -1 + +/* Flags describing input parameters to fast path macro template. + * Turning on some flag values may indicate that + * "some property X is available so template can use this" or + * "some property X should be handled by template". + * + * FLAG_HAVE_SOLID_MASK + * Input mask is solid so template should handle this. + * + * FLAG_HAVE_NON_SOLID_MASK + * Input mask is bits mask so template should handle this. + * + * FLAG_HAVE_SOLID_MASK and FLAG_HAVE_NON_SOLID_MASK are mutually + * exclusive. (It's not allowed to turn both flags on) + */ +#define FLAG_NONE (0) +#define FLAG_HAVE_SOLID_MASK (1 << 1) +#define FLAG_HAVE_NON_SOLID_MASK (1 << 2) + +/* To avoid too short repeated scanline function calls, extend source + * scanlines having width less than below constant value. + */ +#define REPEAT_NORMAL_MIN_WIDTH 64 + +static force_inline pixman_bool_t +repeat (pixman_repeat_t repeat, int *c, int size) +{ + if (repeat == PIXMAN_REPEAT_NONE) + { + if (*c < 0 || *c >= size) + return FALSE; + } + else if (repeat == PIXMAN_REPEAT_NORMAL) + { + while (*c >= size) + *c -= size; + while (*c < 0) + *c += size; + } + else if (repeat == PIXMAN_REPEAT_PAD) + { + *c = CLIP (*c, 0, size - 1); + } + else /* REFLECT */ + { + *c = MOD (*c, size * 2); + if (*c >= size) + *c = size * 2 - *c - 1; + } + return TRUE; +} + +#if SIZEOF_LONG > 4 + +static force_inline uint32_t +bilinear_interpolation (uint32_t tl, uint32_t tr, + uint32_t bl, uint32_t br, + int distx, int disty) +{ + uint64_t distxy, distxiy, distixy, distixiy; + uint64_t tl64, tr64, bl64, br64; + uint64_t f, r; + + distxy = distx * disty; + distxiy = distx * (256 - disty); + distixy = (256 - distx) * disty; + distixiy = (256 - distx) * (256 - disty); + + /* Alpha and Blue */ + tl64 = tl & 0xff0000ff; + tr64 = tr & 0xff0000ff; + bl64 = bl & 0xff0000ff; + br64 = br & 0xff0000ff; + + f = tl64 * distixiy + tr64 * distxiy + bl64 * distixy + br64 * distxy; + r = f & 0x0000ff0000ff0000ull; + + /* Red and Green */ + tl64 = tl; + tl64 = ((tl64 << 16) & 0x000000ff00000000ull) | (tl64 & 0x0000ff00ull); + + tr64 = tr; + tr64 = ((tr64 << 16) & 0x000000ff00000000ull) | (tr64 & 0x0000ff00ull); + + bl64 = bl; + bl64 = ((bl64 << 16) & 0x000000ff00000000ull) | (bl64 & 0x0000ff00ull); + + br64 = br; + br64 = ((br64 << 16) & 0x000000ff00000000ull) | (br64 & 0x0000ff00ull); + + f = tl64 * distixiy + tr64 * distxiy + bl64 * distixy + br64 * distxy; + r |= ((f >> 16) & 0x000000ff00000000ull) | (f & 0xff000000ull); + + return (uint32_t)(r >> 16); +} + +#else + +static force_inline uint32_t +bilinear_interpolation (uint32_t tl, uint32_t tr, + uint32_t bl, uint32_t br, + int distx, int disty) +{ + int distxy, distxiy, distixy, distixiy; + uint32_t f, r; + + distxy = distx * disty; + distxiy = (distx << 8) - distxy; /* distx * (256 - disty) */ + distixy = (disty << 8) - distxy; /* disty * (256 - distx) */ + distixiy = + 256 * 256 - (disty << 8) - + (distx << 8) + distxy; /* (256 - distx) * (256 - disty) */ + + /* Blue */ + r = (tl & 0x000000ff) * distixiy + (tr & 0x000000ff) * distxiy + + (bl & 0x000000ff) * distixy + (br & 0x000000ff) * distxy; + + /* Green */ + f = (tl & 0x0000ff00) * distixiy + (tr & 0x0000ff00) * distxiy + + (bl & 0x0000ff00) * distixy + (br & 0x0000ff00) * distxy; + r |= f & 0xff000000; + + tl >>= 16; + tr >>= 16; + bl >>= 16; + br >>= 16; + r >>= 16; + + /* Red */ + f = (tl & 0x000000ff) * distixiy + (tr & 0x000000ff) * distxiy + + (bl & 0x000000ff) * distixy + (br & 0x000000ff) * distxy; + r |= f & 0x00ff0000; + + /* Alpha */ + f = (tl & 0x0000ff00) * distixiy + (tr & 0x0000ff00) * distxiy + + (bl & 0x0000ff00) * distixy + (br & 0x0000ff00) * distxy; + r |= f & 0xff000000; + + return r; +} + +#endif + +/* + * For each scanline fetched from source image with PAD repeat: + * - calculate how many pixels need to be padded on the left side + * - calculate how many pixels need to be padded on the right side + * - update width to only count pixels which are fetched from the image + * All this information is returned via 'width', 'left_pad', 'right_pad' + * arguments. The code is assuming that 'unit_x' is positive. + * + * Note: 64-bit math is used in order to avoid potential overflows, which + * is probably excessive in many cases. This particular function + * may need its own correctness test and performance tuning. + */ +static force_inline void +pad_repeat_get_scanline_bounds (int32_t source_image_width, + pixman_fixed_t vx, + pixman_fixed_t unit_x, + int32_t * width, + int32_t * left_pad, + int32_t * right_pad) +{ + int64_t max_vx = (int64_t) source_image_width << 16; + int64_t tmp; + if (vx < 0) + { + tmp = ((int64_t) unit_x - 1 - vx) / unit_x; + if (tmp > *width) + { + *left_pad = *width; + *width = 0; + } + else + { + *left_pad = (int32_t) tmp; + *width -= (int32_t) tmp; + } + } + else + { + *left_pad = 0; + } + tmp = ((int64_t) unit_x - 1 - vx + max_vx) / unit_x - *left_pad; + if (tmp < 0) + { + *right_pad = *width; + *width = 0; + } + else if (tmp >= *width) + { + *right_pad = 0; + } + else + { + *right_pad = *width - (int32_t) tmp; + *width = (int32_t) tmp; + } +} + +/* A macroified version of specialized nearest scalers for some + * common 8888 and 565 formats. It supports SRC and OVER ops. + * + * There are two repeat versions, one that handles repeat normal, + * and one without repeat handling that only works if the src region + * used is completely covered by the pre-repeated source samples. + * + * The loops are unrolled to process two pixels per iteration for better + * performance on most CPU architectures (superscalar processors + * can issue several operations simultaneously, other processors can hide + * instructions latencies by pipelining operations). Unrolling more + * does not make much sense because the compiler will start running out + * of spare registers soon. + */ + +#define GET_8888_ALPHA(s) ((s) >> 24) + /* This is not actually used since we don't have an OVER with + 565 source, but it is needed to build. */ +#define GET_0565_ALPHA(s) 0xff +#define GET_x888_ALPHA(s) 0xff + +#define FAST_NEAREST_SCANLINE(scanline_func_name, SRC_FORMAT, DST_FORMAT, \ + src_type_t, dst_type_t, OP, repeat_mode) \ +static force_inline void \ +scanline_func_name (dst_type_t *dst, \ + const src_type_t *src, \ + int32_t w, \ + pixman_fixed_t vx, \ + pixman_fixed_t unit_x, \ + pixman_fixed_t max_vx, \ + pixman_bool_t fully_transparent_src) \ +{ \ + uint32_t d; \ + src_type_t s1, s2; \ + uint8_t a1, a2; \ + int x1, x2; \ + \ + if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER && fully_transparent_src) \ + return; \ + \ + if (PIXMAN_OP_ ## OP != PIXMAN_OP_SRC && PIXMAN_OP_ ## OP != PIXMAN_OP_OVER) \ + abort(); \ + \ + while ((w -= 2) >= 0) \ + { \ + x1 = vx >> 16; \ + vx += unit_x; \ + if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \ + { \ + /* This works because we know that unit_x is positive */ \ + while (vx >= max_vx) \ + vx -= max_vx; \ + } \ + s1 = src[x1]; \ + \ + x2 = vx >> 16; \ + vx += unit_x; \ + if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \ + { \ + /* This works because we know that unit_x is positive */ \ + while (vx >= max_vx) \ + vx -= max_vx; \ + } \ + s2 = src[x2]; \ + \ + if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER) \ + { \ + a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1); \ + a2 = GET_ ## SRC_FORMAT ## _ALPHA(s2); \ + \ + if (a1 == 0xff) \ + { \ + *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \ + } \ + else if (s1) \ + { \ + d = CONVERT_ ## DST_FORMAT ## _TO_8888 (*dst); \ + s1 = CONVERT_ ## SRC_FORMAT ## _TO_8888 (s1); \ + a1 ^= 0xff; \ + UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1); \ + *dst = CONVERT_8888_TO_ ## DST_FORMAT (d); \ + } \ + dst++; \ + \ + if (a2 == 0xff) \ + { \ + *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s2); \ + } \ + else if (s2) \ + { \ + d = CONVERT_## DST_FORMAT ## _TO_8888 (*dst); \ + s2 = CONVERT_## SRC_FORMAT ## _TO_8888 (s2); \ + a2 ^= 0xff; \ + UN8x4_MUL_UN8_ADD_UN8x4 (d, a2, s2); \ + *dst = CONVERT_8888_TO_ ## DST_FORMAT (d); \ + } \ + dst++; \ + } \ + else /* PIXMAN_OP_SRC */ \ + { \ + *dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \ + *dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s2); \ + } \ + } \ + \ + if (w & 1) \ + { \ + x1 = vx >> 16; \ + s1 = src[x1]; \ + \ + if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER) \ + { \ + a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1); \ + \ + if (a1 == 0xff) \ + { \ + *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \ + } \ + else if (s1) \ + { \ + d = CONVERT_## DST_FORMAT ## _TO_8888 (*dst); \ + s1 = CONVERT_ ## SRC_FORMAT ## _TO_8888 (s1); \ + a1 ^= 0xff; \ + UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1); \ + *dst = CONVERT_8888_TO_ ## DST_FORMAT (d); \ + } \ + dst++; \ + } \ + else /* PIXMAN_OP_SRC */ \ + { \ + *dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \ + } \ + } \ +} + +#define FAST_NEAREST_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, mask_type_t, \ + dst_type_t, repeat_mode, have_mask, mask_is_solid) \ +static void \ +fast_composite_scaled_nearest ## scale_func_name (pixman_implementation_t *imp, \ + pixman_composite_info_t *info) \ +{ \ + PIXMAN_COMPOSITE_ARGS (info); \ + dst_type_t *dst_line; \ + mask_type_t *mask_line; \ + src_type_t *src_first_line; \ + int y; \ + pixman_fixed_t max_vx = INT32_MAX; /* suppress uninitialized variable warning */ \ + pixman_fixed_t max_vy; \ + pixman_vector_t v; \ + pixman_fixed_t vx, vy; \ + pixman_fixed_t unit_x, unit_y; \ + int32_t left_pad, right_pad; \ + \ + src_type_t *src; \ + dst_type_t *dst; \ + mask_type_t solid_mask; \ + const mask_type_t *mask = &solid_mask; \ + int src_stride, mask_stride, dst_stride; \ + \ + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type_t, dst_stride, dst_line, 1); \ + if (have_mask) \ + { \ + if (mask_is_solid) \ + solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); \ + else \ + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type_t, \ + mask_stride, mask_line, 1); \ + } \ + /* pass in 0 instead of src_x and src_y because src_x and src_y need to be \ + * transformed from destination space to source space */ \ + PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1); \ + \ + /* reference point is the center of the pixel */ \ + v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2; \ + v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2; \ + v.vector[2] = pixman_fixed_1; \ + \ + if (!pixman_transform_point_3d (src_image->common.transform, &v)) \ + return; \ + \ + unit_x = src_image->common.transform->matrix[0][0]; \ + unit_y = src_image->common.transform->matrix[1][1]; \ + \ + /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */ \ + v.vector[0] -= pixman_fixed_e; \ + v.vector[1] -= pixman_fixed_e; \ + \ + vx = v.vector[0]; \ + vy = v.vector[1]; \ + \ + if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \ + { \ + /* Clamp repeating positions inside the actual samples */ \ + max_vx = src_image->bits.width << 16; \ + max_vy = src_image->bits.height << 16; \ + \ + repeat (PIXMAN_REPEAT_NORMAL, &vx, max_vx); \ + repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); \ + } \ + \ + if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD || \ + PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \ + { \ + pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x, \ + &width, &left_pad, &right_pad); \ + vx += left_pad * unit_x; \ + } \ + \ + while (--height >= 0) \ + { \ + dst = dst_line; \ + dst_line += dst_stride; \ + if (have_mask && !mask_is_solid) \ + { \ + mask = mask_line; \ + mask_line += mask_stride; \ + } \ + \ + y = vy >> 16; \ + vy += unit_y; \ + if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \ + repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); \ + if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD) \ + { \ + repeat (PIXMAN_REPEAT_PAD, &y, src_image->bits.height); \ + src = src_first_line + src_stride * y; \ + if (left_pad > 0) \ + { \ + scanline_func (mask, dst, src, left_pad, 0, 0, 0, FALSE); \ + } \ + if (width > 0) \ + { \ + scanline_func (mask + (mask_is_solid ? 0 : left_pad), \ + dst + left_pad, src, width, vx, unit_x, 0, FALSE); \ + } \ + if (right_pad > 0) \ + { \ + scanline_func (mask + (mask_is_solid ? 0 : left_pad + width), \ + dst + left_pad + width, src + src_image->bits.width - 1, \ + right_pad, 0, 0, 0, FALSE); \ + } \ + } \ + else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \ + { \ + static const src_type_t zero[1] = { 0 }; \ + if (y < 0 || y >= src_image->bits.height) \ + { \ + scanline_func (mask, dst, zero, left_pad + width + right_pad, 0, 0, 0, TRUE); \ + continue; \ + } \ + src = src_first_line + src_stride * y; \ + if (left_pad > 0) \ + { \ + scanline_func (mask, dst, zero, left_pad, 0, 0, 0, TRUE); \ + } \ + if (width > 0) \ + { \ + scanline_func (mask + (mask_is_solid ? 0 : left_pad), \ + dst + left_pad, src, width, vx, unit_x, 0, FALSE); \ + } \ + if (right_pad > 0) \ + { \ + scanline_func (mask + (mask_is_solid ? 0 : left_pad + width), \ + dst + left_pad + width, zero, right_pad, 0, 0, 0, TRUE); \ + } \ + } \ + else \ + { \ + src = src_first_line + src_stride * y; \ + scanline_func (mask, dst, src, width, vx, unit_x, max_vx, FALSE); \ + } \ + } \ +} + +/* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */ +#define FAST_NEAREST_MAINLOOP_COMMON(scale_func_name, scanline_func, src_type_t, mask_type_t, \ + dst_type_t, repeat_mode, have_mask, mask_is_solid) \ + FAST_NEAREST_MAINLOOP_INT(_ ## scale_func_name, scanline_func, src_type_t, mask_type_t, \ + dst_type_t, repeat_mode, have_mask, mask_is_solid) + +#define FAST_NEAREST_MAINLOOP_NOMASK(scale_func_name, scanline_func, src_type_t, dst_type_t, \ + repeat_mode) \ + static force_inline void \ + scanline_func##scale_func_name##_wrapper ( \ + const uint8_t *mask, \ + dst_type_t *dst, \ + const src_type_t *src, \ + int32_t w, \ + pixman_fixed_t vx, \ + pixman_fixed_t unit_x, \ + pixman_fixed_t max_vx, \ + pixman_bool_t fully_transparent_src) \ + { \ + scanline_func (dst, src, w, vx, unit_x, max_vx, fully_transparent_src); \ + } \ + FAST_NEAREST_MAINLOOP_INT (scale_func_name, scanline_func##scale_func_name##_wrapper, \ + src_type_t, uint8_t, dst_type_t, repeat_mode, FALSE, FALSE) + +#define FAST_NEAREST_MAINLOOP(scale_func_name, scanline_func, src_type_t, dst_type_t, \ + repeat_mode) \ + FAST_NEAREST_MAINLOOP_NOMASK(_ ## scale_func_name, scanline_func, src_type_t, \ + dst_type_t, repeat_mode) + +#define FAST_NEAREST(scale_func_name, SRC_FORMAT, DST_FORMAT, \ + src_type_t, dst_type_t, OP, repeat_mode) \ + FAST_NEAREST_SCANLINE(scaled_nearest_scanline_ ## scale_func_name ## _ ## OP, \ + SRC_FORMAT, DST_FORMAT, src_type_t, dst_type_t, \ + OP, repeat_mode) \ + FAST_NEAREST_MAINLOOP_NOMASK(_ ## scale_func_name ## _ ## OP, \ + scaled_nearest_scanline_ ## scale_func_name ## _ ## OP, \ + src_type_t, dst_type_t, repeat_mode) + + +#define SCALED_NEAREST_FLAGS \ + (FAST_PATH_SCALE_TRANSFORM | \ + FAST_PATH_NO_ALPHA_MAP | \ + FAST_PATH_NEAREST_FILTER | \ + FAST_PATH_NO_ACCESSORS | \ + FAST_PATH_NARROW_FORMAT) + +#define SIMPLE_NEAREST_FAST_PATH_NORMAL(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + (SCALED_NEAREST_FLAGS | \ + FAST_PATH_NORMAL_REPEAT | \ + FAST_PATH_X_UNIT_POSITIVE), \ + PIXMAN_null, 0, \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op, \ + } + +#define SIMPLE_NEAREST_FAST_PATH_PAD(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + (SCALED_NEAREST_FLAGS | \ + FAST_PATH_PAD_REPEAT | \ + FAST_PATH_X_UNIT_POSITIVE), \ + PIXMAN_null, 0, \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op, \ + } + +#define SIMPLE_NEAREST_FAST_PATH_NONE(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + (SCALED_NEAREST_FLAGS | \ + FAST_PATH_NONE_REPEAT | \ + FAST_PATH_X_UNIT_POSITIVE), \ + PIXMAN_null, 0, \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_nearest_ ## func ## _none ## _ ## op, \ + } + +#define SIMPLE_NEAREST_FAST_PATH_COVER(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST, \ + PIXMAN_null, 0, \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op, \ + } + +#define SIMPLE_NEAREST_A8_MASK_FAST_PATH_NORMAL(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + (SCALED_NEAREST_FLAGS | \ + FAST_PATH_NORMAL_REPEAT | \ + FAST_PATH_X_UNIT_POSITIVE), \ + PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op, \ + } + +#define SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + (SCALED_NEAREST_FLAGS | \ + FAST_PATH_PAD_REPEAT | \ + FAST_PATH_X_UNIT_POSITIVE), \ + PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op, \ + } + +#define SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + (SCALED_NEAREST_FLAGS | \ + FAST_PATH_NONE_REPEAT | \ + FAST_PATH_X_UNIT_POSITIVE), \ + PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_nearest_ ## func ## _none ## _ ## op, \ + } + +#define SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST, \ + PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op, \ + } + +#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NORMAL(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + (SCALED_NEAREST_FLAGS | \ + FAST_PATH_NORMAL_REPEAT | \ + FAST_PATH_X_UNIT_POSITIVE), \ + PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op, \ + } + +#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_PAD(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + (SCALED_NEAREST_FLAGS | \ + FAST_PATH_PAD_REPEAT | \ + FAST_PATH_X_UNIT_POSITIVE), \ + PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op, \ + } + +#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NONE(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + (SCALED_NEAREST_FLAGS | \ + FAST_PATH_NONE_REPEAT | \ + FAST_PATH_X_UNIT_POSITIVE), \ + PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_nearest_ ## func ## _none ## _ ## op, \ + } + +#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_COVER(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST, \ + PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op, \ + } + +/* Prefer the use of 'cover' variant, because it is faster */ +#define SIMPLE_NEAREST_FAST_PATH(op,s,d,func) \ + SIMPLE_NEAREST_FAST_PATH_COVER (op,s,d,func), \ + SIMPLE_NEAREST_FAST_PATH_NONE (op,s,d,func), \ + SIMPLE_NEAREST_FAST_PATH_PAD (op,s,d,func), \ + SIMPLE_NEAREST_FAST_PATH_NORMAL (op,s,d,func) + +#define SIMPLE_NEAREST_A8_MASK_FAST_PATH(op,s,d,func) \ + SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER (op,s,d,func), \ + SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE (op,s,d,func), \ + SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD (op,s,d,func) + +#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH(op,s,d,func) \ + SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_COVER (op,s,d,func), \ + SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NONE (op,s,d,func), \ + SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_PAD (op,s,d,func) + +/*****************************************************************************/ + +/* + * Identify 5 zones in each scanline for bilinear scaling. Depending on + * whether 2 pixels to be interpolated are fetched from the image itself, + * from the padding area around it or from both image and padding area. + */ +static force_inline void +bilinear_pad_repeat_get_scanline_bounds (int32_t source_image_width, + pixman_fixed_t vx, + pixman_fixed_t unit_x, + int32_t * left_pad, + int32_t * left_tz, + int32_t * width, + int32_t * right_tz, + int32_t * right_pad) +{ + int width1 = *width, left_pad1, right_pad1; + int width2 = *width, left_pad2, right_pad2; + + pad_repeat_get_scanline_bounds (source_image_width, vx, unit_x, + &width1, &left_pad1, &right_pad1); + pad_repeat_get_scanline_bounds (source_image_width, vx + pixman_fixed_1, + unit_x, &width2, &left_pad2, &right_pad2); + + *left_pad = left_pad2; + *left_tz = left_pad1 - left_pad2; + *right_tz = right_pad2 - right_pad1; + *right_pad = right_pad1; + *width -= *left_pad + *left_tz + *right_tz + *right_pad; +} + +/* + * Main loop template for single pass bilinear scaling. It needs to be + * provided with 'scanline_func' which should do the compositing operation. + * The needed function has the following prototype: + * + * scanline_func (dst_type_t * dst, + * const mask_type_ * mask, + * const src_type_t * src_top, + * const src_type_t * src_bottom, + * int32_t width, + * int weight_top, + * int weight_bottom, + * pixman_fixed_t vx, + * pixman_fixed_t unit_x, + * pixman_fixed_t max_vx, + * pixman_bool_t zero_src) + * + * Where: + * dst - destination scanline buffer for storing results + * mask - mask buffer (or single value for solid mask) + * src_top, src_bottom - two source scanlines + * width - number of pixels to process + * weight_top - weight of the top row for interpolation + * weight_bottom - weight of the bottom row for interpolation + * vx - initial position for fetching the first pair of + * pixels from the source buffer + * unit_x - position increment needed to move to the next pair + * of pixels + * max_vx - image size as a fixed point value, can be used for + * implementing NORMAL repeat (when it is supported) + * zero_src - boolean hint variable, which is set to TRUE when + * all source pixels are fetched from zero padding + * zone for NONE repeat + * + * Note: normally the sum of 'weight_top' and 'weight_bottom' is equal to 256, + * but sometimes it may be less than that for NONE repeat when handling + * fuzzy antialiased top or bottom image edges. Also both top and + * bottom weight variables are guaranteed to have value in 0-255 + * range and can fit into unsigned byte or be used with 8-bit SIMD + * multiplication instructions. + */ +#define FAST_BILINEAR_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, mask_type_t, \ + dst_type_t, repeat_mode, flags) \ +static void \ +fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp, \ + pixman_composite_info_t *info) \ +{ \ + PIXMAN_COMPOSITE_ARGS (info); \ + dst_type_t *dst_line; \ + mask_type_t *mask_line; \ + src_type_t *src_first_line; \ + int y1, y2; \ + pixman_fixed_t max_vx = INT32_MAX; /* suppress uninitialized variable warning */ \ + pixman_vector_t v; \ + pixman_fixed_t vx, vy; \ + pixman_fixed_t unit_x, unit_y; \ + int32_t left_pad, left_tz, right_tz, right_pad; \ + \ + dst_type_t *dst; \ + mask_type_t solid_mask; \ + const mask_type_t *mask = &solid_mask; \ + int src_stride, mask_stride, dst_stride; \ + \ + int src_width; \ + pixman_fixed_t src_width_fixed; \ + int max_x; \ + pixman_bool_t need_src_extension; \ + \ + PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type_t, dst_stride, dst_line, 1); \ + if (flags & FLAG_HAVE_SOLID_MASK) \ + { \ + solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); \ + mask_stride = 0; \ + } \ + else if (flags & FLAG_HAVE_NON_SOLID_MASK) \ + { \ + PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type_t, \ + mask_stride, mask_line, 1); \ + } \ + \ + /* pass in 0 instead of src_x and src_y because src_x and src_y need to be \ + * transformed from destination space to source space */ \ + PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1); \ + \ + /* reference point is the center of the pixel */ \ + v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2; \ + v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2; \ + v.vector[2] = pixman_fixed_1; \ + \ + if (!pixman_transform_point_3d (src_image->common.transform, &v)) \ + return; \ + \ + unit_x = src_image->common.transform->matrix[0][0]; \ + unit_y = src_image->common.transform->matrix[1][1]; \ + \ + v.vector[0] -= pixman_fixed_1 / 2; \ + v.vector[1] -= pixman_fixed_1 / 2; \ + \ + vy = v.vector[1]; \ + \ + if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD || \ + PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \ + { \ + bilinear_pad_repeat_get_scanline_bounds (src_image->bits.width, v.vector[0], unit_x, \ + &left_pad, &left_tz, &width, &right_tz, &right_pad); \ + if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD) \ + { \ + /* PAD repeat does not need special handling for 'transition zones' and */ \ + /* they can be combined with 'padding zones' safely */ \ + left_pad += left_tz; \ + right_pad += right_tz; \ + left_tz = right_tz = 0; \ + } \ + v.vector[0] += left_pad * unit_x; \ + } \ + \ + if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \ + { \ + vx = v.vector[0]; \ + repeat (PIXMAN_REPEAT_NORMAL, &vx, pixman_int_to_fixed(src_image->bits.width)); \ + max_x = pixman_fixed_to_int (vx + (width - 1) * unit_x) + 1; \ + \ + if (src_image->bits.width < REPEAT_NORMAL_MIN_WIDTH) \ + { \ + src_width = 0; \ + \ + while (src_width < REPEAT_NORMAL_MIN_WIDTH && src_width <= max_x) \ + src_width += src_image->bits.width; \ + \ + need_src_extension = TRUE; \ + } \ + else \ + { \ + src_width = src_image->bits.width; \ + need_src_extension = FALSE; \ + } \ + \ + src_width_fixed = pixman_int_to_fixed (src_width); \ + } \ + \ + while (--height >= 0) \ + { \ + int weight1, weight2; \ + dst = dst_line; \ + dst_line += dst_stride; \ + vx = v.vector[0]; \ + if (flags & FLAG_HAVE_NON_SOLID_MASK) \ + { \ + mask = mask_line; \ + mask_line += mask_stride; \ + } \ + \ + y1 = pixman_fixed_to_int (vy); \ + weight2 = (vy >> 8) & 0xff; \ + if (weight2) \ + { \ + /* normal case, both row weights are in 0-255 range and fit unsigned byte */ \ + y2 = y1 + 1; \ + weight1 = 256 - weight2; \ + } \ + else \ + { \ + /* set both top and bottom row to the same scanline, and weights to 128+128 */ \ + y2 = y1; \ + weight1 = weight2 = 128; \ + } \ + vy += unit_y; \ + if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD) \ + { \ + src_type_t *src1, *src2; \ + src_type_t buf1[2]; \ + src_type_t buf2[2]; \ + repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height); \ + repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height); \ + src1 = src_first_line + src_stride * y1; \ + src2 = src_first_line + src_stride * y2; \ + \ + if (left_pad > 0) \ + { \ + buf1[0] = buf1[1] = src1[0]; \ + buf2[0] = buf2[1] = src2[0]; \ + scanline_func (dst, mask, \ + buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, FALSE); \ + dst += left_pad; \ + if (flags & FLAG_HAVE_NON_SOLID_MASK) \ + mask += left_pad; \ + } \ + if (width > 0) \ + { \ + scanline_func (dst, mask, \ + src1, src2, width, weight1, weight2, vx, unit_x, 0, FALSE); \ + dst += width; \ + if (flags & FLAG_HAVE_NON_SOLID_MASK) \ + mask += width; \ + } \ + if (right_pad > 0) \ + { \ + buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; \ + buf2[0] = buf2[1] = src2[src_image->bits.width - 1]; \ + scanline_func (dst, mask, \ + buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, FALSE); \ + } \ + } \ + else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \ + { \ + src_type_t *src1, *src2; \ + src_type_t buf1[2]; \ + src_type_t buf2[2]; \ + /* handle top/bottom zero padding by just setting weights to 0 if needed */ \ + if (y1 < 0) \ + { \ + weight1 = 0; \ + y1 = 0; \ + } \ + if (y1 >= src_image->bits.height) \ + { \ + weight1 = 0; \ + y1 = src_image->bits.height - 1; \ + } \ + if (y2 < 0) \ + { \ + weight2 = 0; \ + y2 = 0; \ + } \ + if (y2 >= src_image->bits.height) \ + { \ + weight2 = 0; \ + y2 = src_image->bits.height - 1; \ + } \ + src1 = src_first_line + src_stride * y1; \ + src2 = src_first_line + src_stride * y2; \ + \ + if (left_pad > 0) \ + { \ + buf1[0] = buf1[1] = 0; \ + buf2[0] = buf2[1] = 0; \ + scanline_func (dst, mask, \ + buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, TRUE); \ + dst += left_pad; \ + if (flags & FLAG_HAVE_NON_SOLID_MASK) \ + mask += left_pad; \ + } \ + if (left_tz > 0) \ + { \ + buf1[0] = 0; \ + buf1[1] = src1[0]; \ + buf2[0] = 0; \ + buf2[1] = src2[0]; \ + scanline_func (dst, mask, \ + buf1, buf2, left_tz, weight1, weight2, \ + pixman_fixed_frac (vx), unit_x, 0, FALSE); \ + dst += left_tz; \ + if (flags & FLAG_HAVE_NON_SOLID_MASK) \ + mask += left_tz; \ + vx += left_tz * unit_x; \ + } \ + if (width > 0) \ + { \ + scanline_func (dst, mask, \ + src1, src2, width, weight1, weight2, vx, unit_x, 0, FALSE); \ + dst += width; \ + if (flags & FLAG_HAVE_NON_SOLID_MASK) \ + mask += width; \ + vx += width * unit_x; \ + } \ + if (right_tz > 0) \ + { \ + buf1[0] = src1[src_image->bits.width - 1]; \ + buf1[1] = 0; \ + buf2[0] = src2[src_image->bits.width - 1]; \ + buf2[1] = 0; \ + scanline_func (dst, mask, \ + buf1, buf2, right_tz, weight1, weight2, \ + pixman_fixed_frac (vx), unit_x, 0, FALSE); \ + dst += right_tz; \ + if (flags & FLAG_HAVE_NON_SOLID_MASK) \ + mask += right_tz; \ + } \ + if (right_pad > 0) \ + { \ + buf1[0] = buf1[1] = 0; \ + buf2[0] = buf2[1] = 0; \ + scanline_func (dst, mask, \ + buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, TRUE); \ + } \ + } \ + else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \ + { \ + int32_t num_pixels; \ + int32_t width_remain; \ + src_type_t * src_line_top; \ + src_type_t * src_line_bottom; \ + src_type_t buf1[2]; \ + src_type_t buf2[2]; \ + src_type_t extended_src_line0[REPEAT_NORMAL_MIN_WIDTH*2]; \ + src_type_t extended_src_line1[REPEAT_NORMAL_MIN_WIDTH*2]; \ + int i, j; \ + \ + repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); \ + repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); \ + src_line_top = src_first_line + src_stride * y1; \ + src_line_bottom = src_first_line + src_stride * y2; \ + \ + if (need_src_extension) \ + { \ + for (i=0; i<src_width;) \ + { \ + for (j=0; j<src_image->bits.width; j++, i++) \ + { \ + extended_src_line0[i] = src_line_top[j]; \ + extended_src_line1[i] = src_line_bottom[j]; \ + } \ + } \ + \ + src_line_top = &extended_src_line0[0]; \ + src_line_bottom = &extended_src_line1[0]; \ + } \ + \ + /* Top & Bottom wrap around buffer */ \ + buf1[0] = src_line_top[src_width - 1]; \ + buf1[1] = src_line_top[0]; \ + buf2[0] = src_line_bottom[src_width - 1]; \ + buf2[1] = src_line_bottom[0]; \ + \ + width_remain = width; \ + \ + while (width_remain > 0) \ + { \ + /* We use src_width_fixed because it can make vx in original source range */ \ + repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); \ + \ + /* Wrap around part */ \ + if (pixman_fixed_to_int (vx) == src_width - 1) \ + { \ + /* for positive unit_x \ + * num_pixels = max(n) + 1, where vx + n*unit_x < src_width_fixed \ + * \ + * vx is in range [0, src_width_fixed - pixman_fixed_e] \ + * So we are safe from overflow. \ + */ \ + num_pixels = ((src_width_fixed - vx - pixman_fixed_e) / unit_x) + 1; \ + \ + if (num_pixels > width_remain) \ + num_pixels = width_remain; \ + \ + scanline_func (dst, mask, buf1, buf2, num_pixels, \ + weight1, weight2, pixman_fixed_frac(vx), \ + unit_x, src_width_fixed, FALSE); \ + \ + width_remain -= num_pixels; \ + vx += num_pixels * unit_x; \ + dst += num_pixels; \ + \ + if (flags & FLAG_HAVE_NON_SOLID_MASK) \ + mask += num_pixels; \ + \ + repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); \ + } \ + \ + /* Normal scanline composite */ \ + if (pixman_fixed_to_int (vx) != src_width - 1 && width_remain > 0) \ + { \ + /* for positive unit_x \ + * num_pixels = max(n) + 1, where vx + n*unit_x < (src_width_fixed - 1) \ + * \ + * vx is in range [0, src_width_fixed - pixman_fixed_e] \ + * So we are safe from overflow here. \ + */ \ + num_pixels = ((src_width_fixed - pixman_fixed_1 - vx - pixman_fixed_e) \ + / unit_x) + 1; \ + \ + if (num_pixels > width_remain) \ + num_pixels = width_remain; \ + \ + scanline_func (dst, mask, src_line_top, src_line_bottom, num_pixels, \ + weight1, weight2, vx, unit_x, src_width_fixed, FALSE); \ + \ + width_remain -= num_pixels; \ + vx += num_pixels * unit_x; \ + dst += num_pixels; \ + \ + if (flags & FLAG_HAVE_NON_SOLID_MASK) \ + mask += num_pixels; \ + } \ + } \ + } \ + else \ + { \ + scanline_func (dst, mask, src_first_line + src_stride * y1, \ + src_first_line + src_stride * y2, width, \ + weight1, weight2, vx, unit_x, max_vx, FALSE); \ + } \ + } \ +} + +/* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */ +#define FAST_BILINEAR_MAINLOOP_COMMON(scale_func_name, scanline_func, src_type_t, mask_type_t, \ + dst_type_t, repeat_mode, flags) \ + FAST_BILINEAR_MAINLOOP_INT(_ ## scale_func_name, scanline_func, src_type_t, mask_type_t,\ + dst_type_t, repeat_mode, flags) + +#define SCALED_BILINEAR_FLAGS \ + (FAST_PATH_SCALE_TRANSFORM | \ + FAST_PATH_NO_ALPHA_MAP | \ + FAST_PATH_BILINEAR_FILTER | \ + FAST_PATH_NO_ACCESSORS | \ + FAST_PATH_NARROW_FORMAT) + +#define SIMPLE_BILINEAR_FAST_PATH_PAD(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + (SCALED_BILINEAR_FLAGS | \ + FAST_PATH_PAD_REPEAT | \ + FAST_PATH_X_UNIT_POSITIVE), \ + PIXMAN_null, 0, \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op, \ + } + +#define SIMPLE_BILINEAR_FAST_PATH_NONE(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + (SCALED_BILINEAR_FLAGS | \ + FAST_PATH_NONE_REPEAT | \ + FAST_PATH_X_UNIT_POSITIVE), \ + PIXMAN_null, 0, \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op, \ + } + +#define SIMPLE_BILINEAR_FAST_PATH_COVER(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR, \ + PIXMAN_null, 0, \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ + fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op, \ + } + +#define SIMPLE_BILINEAR_FAST_PATH_NORMAL(op,s,d,func) \ + { PIXMAN_OP_ ## op, \ + PIXMAN_ ## s, \ + (SCALED_BILINEAR_FLAGS | \ + FAST_PATH_NORMAL_REPEAT | \ + FAST_PATH_X_UNIT_POSITIVE), \ + PIXMAN_null, 0, \ + PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \