author | Oleg Romashin <romaxa@gmail.com> |
Wed, 15 Dec 2010 10:17:26 -0800 | |
changeset 59200 | 513cc1daf58f5bc303e89b8964f59184c681f305 |
parent 59199 | fae6da8a664c6d4c73df880c8e1af5aa1dec6993 |
child 59201 | aca204f42affc03c0a300253c5df200062ee7e8b |
push id | unknown |
push user | unknown |
push date | unknown |
reviewers | doublec, blocking-fennec |
bugs | 616469 |
milestone | 2.0b9pre |
first release with | nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
|
last release without | nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
|
gfx/ycbcr/README | file | annotate | diff | comparison | revisions | |
gfx/ycbcr/arm.patch | file | annotate | diff | comparison | revisions | |
gfx/ycbcr/update.sh | file | annotate | diff | comparison | revisions |
--- a/gfx/ycbcr/README +++ b/gfx/ycbcr/README @@ -8,8 +8,10 @@ The code was copied from a Chromium svn convert.patch: Change Chromium code to build using Mozilla build system. Add runtime CPU detection for MMX Move default C implementation to work on all platforms. Change Chromium code to allow a picture region. The YUV conversion will convert within this picture region only. Add YCbCr 4:4:4 support + +arm.patch: Add YCbCr to rgb16_565 conversion support, bug 616469
new file mode 100644 --- /dev/null +++ b/gfx/ycbcr/arm.patch @@ -0,0 +1,347 @@ +diff --git a/gfx/ycbcr/Makefile.in b/gfx/ycbcr/Makefile.in +--- a/gfx/ycbcr/Makefile.in ++++ b/gfx/ycbcr/Makefile.in +@@ -40,16 +40,21 @@ CPPSRCS += yuv_row_posix.cpp \ + else + CPPSRCS += yuv_row_other.cpp \ + $(NULL) + endif # Darwin + endif # SunOS + endif # linux + endif # windows + ++ifeq (arm,$(findstring arm,$(OS_TEST))) ++CPPSRCS += yuv_convert_arm.cpp \ ++ $(NULL) ++endif ++ + EXTRA_DSO_LDOPTS += \ + $(LIBS_DIR) \ + $(EXTRA_DSO_LIBS) \ + $(XPCOM_LIBS) \ + $(NSPR_LIBS) \ + $(NULL) + + include $(topsrcdir)/config/rules.mk +diff --git a/gfx/ycbcr/yuv_convert.cpp b/gfx/ycbcr/yuv_convert.cpp +--- a/gfx/ycbcr/yuv_convert.cpp ++++ b/gfx/ycbcr/yuv_convert.cpp +@@ -19,25 +19,56 @@ + #include "yuv_convert.h" + + // Header for low level row functions. + #include "yuv_row.h" + #define MOZILLA_SSE_INCLUDE_HEADER_FOR_SSE2 + #define MOZILLA_SSE_INCLUDE_HEADER_FOR_MMX + #include "mozilla/SSE.h" + ++#ifdef HAVE_YCBCR_TO_RGB565 ++void __attribute((noinline)) yv12_to_rgb565_neon(uint16 *dst, const uint8 *y, const uint8 *u, const uint8 *v, int n, int oddflag); ++#endif ++ + namespace mozilla { + + namespace gfx { + + // 16.16 fixed point arithmetic + const int kFractionBits = 16; + const int kFractionMax = 1 << kFractionBits; + const int kFractionMask = ((1 << kFractionBits) - 1); + ++ ++// Convert a frame of YUV to 16 bit RGB565. ++NS_GFX_(void) ConvertYCbCrToRGB565(const uint8* y_buf, ++ const uint8* u_buf, ++ const uint8* v_buf, ++ uint8* rgb_buf, ++ int pic_x, ++ int pic_y, ++ int pic_width, ++ int pic_height, ++ int y_pitch, ++ int uv_pitch, ++ int rgb_pitch, ++ YUVType yuv_type) ++{ ++#ifdef HAVE_YCBCR_TO_RGB565 ++ for (int i = 0; i < pic_height; i++) { ++ yv12_to_rgb565_neon((uint16*)rgb_buf + pic_width * i, ++ y_buf + y_pitch * i, ++ u_buf + uv_pitch * (i / 2), ++ v_buf + uv_pitch * (i / 2), ++ pic_width, ++ 0); ++ } ++#endif ++} ++ + // Convert a frame of YUV to 32 bit ARGB. + NS_GFX_(void) ConvertYCbCrToRGB32(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* rgb_buf, + int pic_x, + int pic_y, + int pic_width, +diff --git a/gfx/ycbcr/yuv_convert.h b/gfx/ycbcr/yuv_convert.h +--- a/gfx/ycbcr/yuv_convert.h ++++ b/gfx/ycbcr/yuv_convert.h +@@ -2,16 +2,20 @@ + // Use of this source code is governed by a BSD-style license that can be + // found in the LICENSE file. + + #ifndef MEDIA_BASE_YUV_CONVERT_H_ + #define MEDIA_BASE_YUV_CONVERT_H_ + + #include "chromium_types.h" + #include "gfxCore.h" ++ ++#ifdef __arm__ ++#define HAVE_YCBCR_TO_RGB565 1 ++#endif + + namespace mozilla { + + namespace gfx { + + // Type of YUV surface. + // The value of these enums matter as they are used to shift vertical indices. + enum YUVType { +@@ -36,16 +40,31 @@ enum Rotate { + // Filter affects how scaling looks. + enum ScaleFilter { + FILTER_NONE = 0, // No filter (point sampled). + FILTER_BILINEAR_H = 1, // Bilinear horizontal filter. + FILTER_BILINEAR_V = 2, // Bilinear vertical filter. + FILTER_BILINEAR = 3 // Bilinear filter. + }; + ++// Convert a frame of YUV to 16 bit RGB565. ++// Pass in YV12 formats ++NS_GFX_(void) ConvertYCbCrToRGB565(const uint8* yplane, ++ const uint8* uplane, ++ const uint8* vplane, ++ uint8* rgbframe, ++ int pic_x, ++ int pic_y, ++ int pic_width, ++ int pic_height, ++ int ystride, ++ int uvstride, ++ int rgbstride, ++ YUVType yuv_type); ++ + // Convert a frame of YUV to 32 bit ARGB. + // Pass in YV16/YV12 depending on source format + NS_GFX_(void) ConvertYCbCrToRGB32(const uint8* yplane, + const uint8* uplane, + const uint8* vplane, + uint8* rgbframe, + int pic_x, + int pic_y, +diff --git a/gfx/ycbcr/yuv_convert_arm.cpp b/gfx/ycbcr/yuv_convert_arm.cpp +new file mode 100644 +--- /dev/null ++++ b/gfx/ycbcr/yuv_convert_arm.cpp +@@ -0,0 +1,201 @@ ++// Copyright (c) 2010 The Chromium Authors. All rights reserved. ++// Use of this source code is governed by a BSD-style license that can be ++// found in the LICENSE file. ++ ++// contributor Siarhei Siamashka <siarhei.siamashka@gmail.com> ++ ++#include "yuv_convert.h" ++ ++void __attribute((noinline)) yv12_to_rgb565_neon(uint16 *dst, const uint8 *y, const uint8 *u, const uint8 *v, int n, int oddflag) ++{ ++ static __attribute__((aligned(16))) uint16 acc_r[8] = { ++ 22840, 22840, 22840, 22840, 22840, 22840, 22840, 22840, ++ }; ++ static __attribute__((aligned(16))) uint16 acc_g[8] = { ++ 17312, 17312, 17312, 17312, 17312, 17312, 17312, 17312, ++ }; ++ static __attribute__((aligned(16))) uint16 acc_b[8] = { ++ 28832, 28832, 28832, 28832, 28832, 28832, 28832, 28832, ++ }; ++ /* ++ * Registers: ++ * q0, q1 : d0, d1, d2, d3 - are used for initial loading of YUV data ++ * q2 : d4, d5 - are used for storing converted RGB data ++ * q3 : d6, d7 - are used for temporary storage ++ * ++ * q4-q7 - reserved ++ * ++ * q8, q9 : d16, d17, d18, d19 - are used for expanded Y data ++ * q10 : d20, d21 ++ * q11 : d22, d23 ++ * q12 : d24, d25 ++ * q13 : d26, d27 ++ * q13, q14, q15 - various constants (#16, #149, #204, #50, #104, #154) ++ */ ++ asm volatile ( ++".fpu neon\n" ++".macro convert_macroblock size\n" ++/* load up to 16 source pixels */ ++ ".if \\size == 16\n" ++ "pld [%[y], #64]\n" ++ "pld [%[u], #64]\n" ++ "pld [%[v], #64]\n" ++ "vld1.8 {d1}, [%[y]]!\n" ++ "vld1.8 {d3}, [%[y]]!\n" ++ "vld1.8 {d0}, [%[u]]!\n" ++ "vld1.8 {d2}, [%[v]]!\n" ++ ".elseif \\size == 8\n" ++ "vld1.8 {d1}, [%[y]]!\n" ++ "vld1.8 {d0[0]}, [%[u]]!\n" ++ "vld1.8 {d0[1]}, [%[u]]!\n" ++ "vld1.8 {d0[2]}, [%[u]]!\n" ++ "vld1.8 {d0[3]}, [%[u]]!\n" ++ "vld1.8 {d2[0]}, [%[v]]!\n" ++ "vld1.8 {d2[1]}, [%[v]]!\n" ++ "vld1.8 {d2[2]}, [%[v]]!\n" ++ "vld1.8 {d2[3]}, [%[v]]!\n" ++ ".elseif \\size == 4\n" ++ "vld1.8 {d1[0]}, [%[y]]!\n" ++ "vld1.8 {d1[1]}, [%[y]]!\n" ++ "vld1.8 {d1[2]}, [%[y]]!\n" ++ "vld1.8 {d1[3]}, [%[y]]!\n" ++ "vld1.8 {d0[0]}, [%[u]]!\n" ++ "vld1.8 {d0[1]}, [%[u]]!\n" ++ "vld1.8 {d2[0]}, [%[v]]!\n" ++ "vld1.8 {d2[1]}, [%[v]]!\n" ++ ".elseif \\size == 2\n" ++ "vld1.8 {d1[0]}, [%[y]]!\n" ++ "vld1.8 {d1[1]}, [%[y]]!\n" ++ "vld1.8 {d0[0]}, [%[u]]!\n" ++ "vld1.8 {d2[0]}, [%[v]]!\n" ++ ".elseif \\size == 1\n" ++ "vld1.8 {d1[0]}, [%[y]]!\n" ++ "vld1.8 {d0[0]}, [%[u]]!\n" ++ "vld1.8 {d2[0]}, [%[v]]!\n" ++ ".else\n" ++ ".error \"unsupported macroblock size\"\n" ++ ".endif\n" ++ ++ /* d1 - Y data (first 8 bytes) */ ++ /* d3 - Y data (next 8 bytes) */ ++ /* d0 - U data, d2 - V data */ ++ ++ /* split even and odd Y color components */ ++ "vuzp.8 d1, d3\n" /* d1 - evenY, d3 - oddY */ ++ /* clip upper and lower boundaries */ ++ "vqadd.u8 q0, q0, q4\n" ++ "vqadd.u8 q1, q1, q4\n" ++ "vqsub.u8 q0, q0, q5\n" ++ "vqsub.u8 q1, q1, q5\n" ++ ++ "vshr.u8 d4, d2, #1\n" /* d4 = V >> 1 */ ++ ++ "vmull.u8 q8, d1, d27\n" /* q8 = evenY * 149 */ ++ "vmull.u8 q9, d3, d27\n" /* q9 = oddY * 149 */ ++ ++ "vld1.16 {d20, d21}, [%[acc_r], :128]\n" /* q10 - initialize accumulator for red */ ++ "vsubw.u8 q10, q10, d4\n" /* red acc -= (V >> 1) */ ++ "vmlsl.u8 q10, d2, d28\n" /* red acc -= V * 204 */ ++ "vld1.16 {d22, d23}, [%[acc_g], :128]\n" /* q11 - initialize accumulator for green */ ++ "vmlsl.u8 q11, d2, d30\n" /* green acc -= V * 104 */ ++ "vmlsl.u8 q11, d0, d29\n" /* green acc -= U * 50 */ ++ "vld1.16 {d24, d25}, [%[acc_b], :128]\n" /* q12 - initialize accumulator for blue */ ++ "vmlsl.u8 q12, d0, d30\n" /* blue acc -= U * 104 */ ++ "vmlsl.u8 q12, d0, d31\n" /* blue acc -= U * 154 */ ++ ++ "vhsub.s16 q3, q8, q10\n" /* calculate even red components */ ++ "vhsub.s16 q10, q9, q10\n" /* calculate odd red components */ ++ "vqshrun.s16 d0, q3, #6\n" /* right shift, narrow and saturate even red components */ ++ "vqshrun.s16 d3, q10, #6\n" /* right shift, narrow and saturate odd red components */ ++ ++ "vhadd.s16 q3, q8, q11\n" /* calculate even green components */ ++ "vhadd.s16 q11, q9, q11\n" /* calculate odd green components */ ++ "vqshrun.s16 d1, q3, #6\n" /* right shift, narrow and saturate even green components */ ++ "vqshrun.s16 d4, q11, #6\n" /* right shift, narrow and saturate odd green components */ ++ ++ "vhsub.s16 q3, q8, q12\n" /* calculate even blue components */ ++ "vhsub.s16 q12, q9, q12\n" /* calculate odd blue components */ ++ "vqshrun.s16 d2, q3, #6\n" /* right shift, narrow and saturate even blue components */ ++ "vqshrun.s16 d5, q12, #6\n" /* right shift, narrow and saturate odd blue components */ ++ ++ "vzip.8 d0, d3\n" /* join even and odd red components */ ++ "vzip.8 d1, d4\n" /* join even and odd green components */ ++ "vzip.8 d2, d5\n" /* join even and odd blue components */ ++ ++ "vshll.u8 q3, d0, #8\n\t" ++ "vshll.u8 q8, d1, #8\n\t" ++ "vshll.u8 q9, d2, #8\n\t" ++ "vsri.u16 q3, q8, #5\t\n" ++ "vsri.u16 q3, q9, #11\t\n" ++ /* store pixel data to memory */ ++ ".if \\size == 16\n" ++ " vst1.16 {d6, d7}, [%[dst]]!\n" ++ " vshll.u8 q3, d3, #8\n\t" ++ " vshll.u8 q8, d4, #8\n\t" ++ " vshll.u8 q9, d5, #8\n\t" ++ " vsri.u16 q3, q8, #5\t\n" ++ " vsri.u16 q3, q9, #11\t\n" ++ " vst1.16 {d6, d7}, [%[dst]]!\n" ++ ".elseif \\size == 8\n" ++ " vst1.16 {d6, d7}, [%[dst]]!\n" ++ ".elseif \\size == 4\n" ++ " vst1.16 {d6}, [%[dst]]!\n" ++ ".elseif \\size == 2\n" ++ " vst1.16 {d6[0]}, [%[dst]]!\n" ++ " vst1.16 {d6[1]}, [%[dst]]!\n" ++ ".elseif \\size == 1\n" ++ " vst1.16 {d6[0]}, [%[dst]]!\n" ++ ".endif\n" ++ ".endm\n" ++ ++ "vmov.u8 d8, #15\n" /* add this to U/V to saturate upper boundary */ ++ "vmov.u8 d9, #20\n" /* add this to Y to saturate upper boundary */ ++ "vmov.u8 d10, #31\n" /* sub this from U/V to saturate lower boundary */ ++ "vmov.u8 d11, #36\n" /* sub this from Y to saturate lower boundary */ ++ ++ "vmov.u8 d26, #16\n" ++ "vmov.u8 d27, #149\n" ++ "vmov.u8 d28, #204\n" ++ "vmov.u8 d29, #50\n" ++ "vmov.u8 d30, #104\n" ++ "vmov.u8 d31, #154\n" ++ ++ "cmp %[oddflag], #0\n" ++ "beq 1f\n" ++ "convert_macroblock 1\n" ++ "sub %[n], %[n], #1\n" ++ "1:\n" ++ "subs %[n], %[n], #16\n" ++ "blt 2f\n" ++ "1:\n" ++ "convert_macroblock 16\n" ++ "subs %[n], %[n], #16\n" ++ "bge 1b\n" ++ "2:\n" ++ "tst %[n], #8\n" ++ "beq 3f\n" ++ "convert_macroblock 8\n" ++ "3:\n" ++ "tst %[n], #4\n" ++ "beq 4f\n" ++ "convert_macroblock 4\n" ++ "4:\n" ++ "tst %[n], #2\n" ++ "beq 5f\n" ++ "convert_macroblock 2\n" ++ "5:\n" ++ "tst %[n], #1\n" ++ "beq 6f\n" ++ "convert_macroblock 1\n" ++ "6:\n" ++ ".purgem convert_macroblock\n" ++ : [y] "+&r" (y), [u] "+&r" (u), [v] "+&r" (v), [dst] "+&r" (dst), [n] "+&r" (n) ++ : [acc_r] "r" (&acc_r[0]), [acc_g] "r" (&acc_g[0]), [acc_b] "r" (&acc_b[0]), ++ [oddflag] "r" (oddflag) ++ : "cc", "memory", ++ "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", ++ "d8", "d9", "d10", "d11", /* "d12", "d13", "d14", "d15", */ ++ "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23", ++ "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31" ++ ); ++}
--- a/gfx/ycbcr/update.sh +++ b/gfx/ycbcr/update.sh @@ -2,8 +2,9 @@ cp $1/media/base/yuv_convert.h . cp $1/media/base/yuv_convert.cc yuv_convert.cpp cp $1/media/base/yuv_row.h . cp $1/media/base/yuv_row_table.cc yuv_row_table.cpp cp $1/media/base/yuv_row_posix.cc yuv_row_posix.cpp cp $1/media/base/yuv_row_win.cc yuv_row_win.cpp cp $1/media/base/yuv_row_posix.cc yuv_row_c.cpp patch -p3 <convert.patch +patch -p3 <arm.patch