author | Carsten "Tomcat" Book <cbook@mozilla.com> |
Fri, 31 Jan 2014 15:02:48 +0100 | |
changeset 166271 | 735a648bca0d5131b2e87b6ff6226df9985debe9 |
parent 166270 | ed8501afc8cc506edf3524177d61f30fbf079258 |
child 166272 | aa795c21094fc14d152d208db8782758cc295ee3 |
child 166343 | 89993200a24468e6f36092a0bcd8682203e81f58 |
child 166393 | cabfcad0fc957e9bc29acf2e4509df91aa3250e5 |
child 166467 | 279aaa11c402d5b2958c23acb6041e25ede321d0 |
push id | 39151 |
push user | cbook@mozilla.com |
push date | Fri, 31 Jan 2014 14:11:34 +0000 |
treeherder | mozilla-inbound@aa795c21094f [default view] [failures only] |
perfherder | [talos] [build metrics] [platform microbench] (compared to previous push) |
bugs | 880419 |
milestone | 29.0a1 |
backs out | a829f51aae56b0dc380271701a028259c3838a97 |
first release with | nightly linux32
735a648bca0d
/
29.0a1
/
20140131095418
/
files
nightly linux64
735a648bca0d
/
29.0a1
/
20140131095418
/
files
nightly mac
735a648bca0d
/
29.0a1
/
20140131095418
/
files
nightly win32
735a648bca0d
/
29.0a1
/
20140131095418
/
files
nightly win64
735a648bca0d
/
29.0a1
/
20140131095418
/
files
|
last release without | nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
|
releases | nightly linux32
29.0a1
/
20140131095418
/
pushlog to previous
nightly linux64
29.0a1
/
20140131095418
/
pushlog to previous
nightly mac
29.0a1
/
20140131095418
/
pushlog to previous
nightly win32
29.0a1
/
20140131095418
/
pushlog to previous
nightly win64
29.0a1
/
20140131095418
/
pushlog to previous
|
deleted file mode 100644 --- a/media/webrtc/trunk/third_party/libyuv/Android.mk +++ /dev/null @@ -1,56 +0,0 @@ -# This is the Android makefile for libyuv for both platform and NDK. -LOCAL_PATH:= $(call my-dir) - -include $(CLEAR_VARS) - -LOCAL_CPP_EXTENSION := .cc - -LOCAL_SRC_FILES := \ - source/compare.cc \ - source/compare_common.cc \ - source/compare_posix.cc \ - source/convert.cc \ - source/convert_argb.cc \ - source/convert_from.cc \ - source/convert_from_argb.cc \ - source/convert_to_argb.cc \ - source/convert_to_i420.cc \ - source/cpu_id.cc \ - source/format_conversion.cc \ - source/planar_functions.cc \ - source/rotate.cc \ - source/rotate_argb.cc \ - source/rotate_mips.cc \ - source/row_any.cc \ - source/row_common.cc \ - source/row_mips.cc \ - source/row_posix.cc \ - source/scale.cc \ - source/scale_argb.cc \ - source/scale_common.cc \ - source/scale_mips.cc \ - source/scale_posix.cc \ - source/video_common.cc - -# TODO(fbarchard): Enable mjpeg encoder. -# source/mjpeg_decoder.cc -# source/convert_jpeg.cc -# source/mjpeg_validate.cc - -ifeq ($(TARGET_ARCH_ABI),armeabi-v7a) - LOCAL_CFLAGS += -DLIBYUV_NEON - LOCAL_SRC_FILES += \ - source/compare_neon.cc.neon \ - source/rotate_neon.cc.neon \ - source/row_neon.cc.neon \ - source/scale_neon.cc.neon -endif - -LOCAL_EXPORT_C_INCLUDES := $(LOCAL_PATH)/include -LOCAL_C_INCLUDES += $(LOCAL_PATH)/include - -LOCAL_MODULE := libyuv_static -LOCAL_MODULE_TAGS := optional - -include $(BUILD_STATIC_LIBRARY) -
--- a/media/webrtc/trunk/third_party/libyuv/DEPS +++ b/media/webrtc/trunk/third_party/libyuv/DEPS @@ -7,33 +7,28 @@ vars = { # folder name. "root_dir": "trunk", "extra_gyp_flag": "-Dextra_gyp_flag=0", # Use this googlecode_url variable only if there is an internal mirror for it. # If you do not know, use the full path while defining your new deps entry. "googlecode_url": "http://%s.googlecode.com/svn", "chromium_trunk" : "http://src.chromium.org/svn/trunk", - # chrome://version/ for revision of canary Chrome. - "chromium_revision": "232627", + "chromium_revision": "152335", } # NOTE: Prefer revision numbers to tags for svn deps. Use http rather than # https; the latter can cause problems for users behind proxies. deps = { "../chromium_deps": File(Var("chromium_trunk") + "/src/DEPS@" + Var("chromium_revision")), "build": Var("chromium_trunk") + "/src/build@" + Var("chromium_revision"), - # Needed by common.gypi. - "google_apis/build": - Var("chromium_trunk") + "/src/google_apis/build@" + Var("chromium_revision"), - "testing": Var("chromium_trunk") + "/src/testing@" + Var("chromium_revision"), "testing/gtest": From("chromium_deps", "src/testing/gtest"), "tools/clang": Var("chromium_trunk") + "/src/tools/clang@" + Var("chromium_revision"), @@ -50,81 +45,50 @@ deps = { # Needed by build/common.gypi. "tools/win/supalink": Var("chromium_trunk") + "/src/tools/win/supalink@" + Var("chromium_revision"), "third_party/libjpeg_turbo": From("chromium_deps", "src/third_party/libjpeg_turbo"), # Yasm assember required for libjpeg_turbo + # TODO(fbarchard): Switch back to chromium version. "third_party/yasm": - Var("chromium_trunk") + "/src/third_party/yasm@" + Var("chromium_revision"), + Var("chromium_trunk") + "/src/third_party/yasm@154708", "third_party/yasm/source/patched-yasm": - Var("chromium_trunk") + "/deps/third_party/yasm/patched-yasm@" + Var("chromium_revision"), + Var("chromium_trunk") + "/deps/third_party/yasm/patched-yasm@154708", } deps_os = { "win": { # Use WebRTC's, stripped down, version of Cygwin (required by GYP). "third_party/cygwin": (Var("googlecode_url") % "webrtc") + "/deps/third_party/cygwin@2672", # Used by libjpeg-turbo. # TODO(fbarchard): Remove binaries and run yasm from build folder. "third_party/yasm/binaries": - Var("chromium_trunk") + "/deps/third_party/yasm/binaries@" + Var("chromium_revision"), + Var("chromium_trunk") + "/deps/third_party/yasm/binaries@154708", "third_party/yasm": None, }, "unix": { "third_party/gold": From("chromium_deps", "src/third_party/gold"), }, - "android": { - "third_party/android_tools": - From("chromium_deps", "src/third_party/android_tools"), - - "third_party/libjpeg": - From("chromium_deps", "src/third_party/libjpeg"), - }, - "ios": { - # NSS, for SSLClientSocketNSS. - "third_party/nss": - From("chromium_deps", "src/third_party/nss"), - - "net/third_party/nss": - Var("chromium_trunk") + "/src/net/third_party/nss@" + Var("chromium_revision"), - - # class-dump utility to generate header files for undocumented SDKs. - "testing/iossim/third_party/class-dump": - From("chromium_deps", "src/testing/iossim/third_party/class-dump"), - - # Helper for running under the simulator. - "testing/iossim": - Var("chromium_trunk") + "/src/testing/iossim@" + Var("chromium_revision"), - }, } hooks = [ { # Pull clang on mac. If nothing changed, or on non-mac platforms, this takes # zero seconds to run. If something changed, it downloads a prebuilt clang. "pattern": ".", "action": ["python", Var("root_dir") + "/tools/clang/scripts/update.py", "--mac-only"], }, { # A change to a .gyp, .gypi, or to GYP itself should run the generator. "pattern": ".", "action": ["python", Var("root_dir") + "/build/gyp_chromium", - "--depth=" + Var("root_dir"), Var("root_dir") + "/all.gyp", + "--depth=" + Var("root_dir"), Var("root_dir") + "/libyuv_test.gyp", Var("extra_gyp_flag")], }, - { - # Update the cygwin mount on Windows. - # This is necessary to get the correct mapping between e.g. /bin and the - # cygwin path on Windows. Without it we can't run bash scripts in actions. - # Ideally this should be solved in "pylib/gyp/msvs_emulation.py". - "pattern": ".", - "action": ["python", Var("root_dir") + "/build/win/setup_cygwin_mount.py", - "--win-only"], - }, ]
deleted file mode 100644 --- a/media/webrtc/trunk/third_party/libyuv/LICENSE_THIRD_PARTY +++ /dev/null @@ -1,8 +0,0 @@ -This source tree contains third party source code which is governed by third -party licenses. This file contains references to files which are under other -licenses than the one provided in the LICENSE file in the root of the source -tree. - -Files governed by third party licenses: -source/x86inc.asm -
deleted file mode 100644 --- a/media/webrtc/trunk/third_party/libyuv/OWNERS +++ /dev/null @@ -1,2 +0,0 @@ -fbarchard@chromium.org -mflodman@chromium.org
--- a/media/webrtc/trunk/third_party/libyuv/README.chromium +++ b/media/webrtc/trunk/third_party/libyuv/README.chromium @@ -1,9 +1,9 @@ Name: libyuv URL: http://code.google.com/p/libyuv/ -Version: 971 +Version: 389 License: BSD License File: LICENSE Description: libyuv is an open source project that includes YUV conversion and scaling functionality.
deleted file mode 100644 --- a/media/webrtc/trunk/third_party/libyuv/all.gyp +++ /dev/null @@ -1,21 +0,0 @@ -# Copyright 2013 The LibYuv Project Authors. All rights reserved. -# -# Use of this source code is governed by a BSD-style license -# that can be found in the LICENSE file in the root of the source -# tree. An additional intellectual property rights grant can be found -# in the file PATENTS. All contributing project authors may -# be found in the AUTHORS file in the root of the source tree. - -# all.gyp and All target are for benefit of android gyp build. -{ - 'targets': [ - { - 'target_name': 'All', - 'type': 'none', - 'dependencies': [ - 'libyuv.gyp:*', - 'libyuv_test.gyp:*', - ], - }, - ], -}
--- a/media/webrtc/trunk/third_party/libyuv/codereview.settings +++ b/media/webrtc/trunk/third_party/libyuv/codereview.settings @@ -1,11 +1,12 @@ # This file is used by gcl to get repository specific information. # The LibYuv code review is via WebRtc's code review CODE_REVIEW_SERVER: webrtc-codereview.appspot.com #CC_LIST: -VIEW_VC: https://code.google.com/p/libyuv/source/detail?r= +#VIEW_VC: #STATUS: TRY_ON_UPLOAD: False -TRYSERVER_ROOT: src -TRYSERVER_SVN_URL: svn://svn.chromium.org/chrome-try/try-libyuv +TRYSERVER_HTTP_HOST: webrtc-cb-linux-master.cbf.corp.google.com +TRYSERVER_HTTP_PORT: 9020 +#TRYSERVER_SVN_URL: #GITCL_PREUPLOAD: #GITCL_PREDCOMMIT:
--- a/media/webrtc/trunk/third_party/libyuv/include/libyuv.h +++ b/media/webrtc/trunk/third_party/libyuv/include/libyuv.h @@ -1,33 +1,29 @@ /* * Copyright 2011 The LibYuv Project Authors. All rights reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may + * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef INCLUDE_LIBYUV_H_ // NOLINT #define INCLUDE_LIBYUV_H_ #include "libyuv/basic_types.h" #include "libyuv/compare.h" #include "libyuv/convert.h" #include "libyuv/convert_argb.h" #include "libyuv/convert_from.h" -#include "libyuv/convert_from_argb.h" #include "libyuv/cpu_id.h" #include "libyuv/format_conversion.h" -#include "libyuv/mjpeg_decoder.h" #include "libyuv/planar_functions.h" #include "libyuv/rotate.h" #include "libyuv/rotate_argb.h" -#include "libyuv/row.h" #include "libyuv/scale.h" #include "libyuv/scale_argb.h" -#include "libyuv/scale_row.h" #include "libyuv/version.h" #include "libyuv/video_common.h" #endif // INCLUDE_LIBYUV_H_ NOLINT
--- a/media/webrtc/trunk/third_party/libyuv/include/libyuv/basic_types.h +++ b/media/webrtc/trunk/third_party/libyuv/include/libyuv/basic_types.h @@ -1,54 +1,68 @@ /* * Copyright 2011 The LibYuv Project Authors. All rights reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may + * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef INCLUDE_LIBYUV_BASIC_TYPES_H_ // NOLINT #define INCLUDE_LIBYUV_BASIC_TYPES_H_ #include <stddef.h> // for NULL, size_t -#if defined(__ANDROID__) || (defined(_MSC_VER) && (_MSC_VER < 1600)) -#include <sys/types.h> // for uintptr_t on x86 -#else +#if !(defined(_MSC_VER) && (_MSC_VER < 1600)) #include <stdint.h> // for uintptr_t #endif -#ifndef GG_LONGLONG +#include <stdint.h> +typedef uint64_t uint64; +typedef int64_t int64; +#if defined(_MSC_VER) +// nsprpub/pr/include/obsolete/protypes.h defines these weirdly +typedef long int32; +typedef unsigned long uint32; +#else +typedef uint32_t uint32; +typedef int32_t int32; +#endif +typedef uint16_t uint16; +typedef int16_t int16; +typedef uint8_t uint8; +typedef int8_t int8; +#define INT_TYPES_DEFINED 1 + #ifndef INT_TYPES_DEFINED #define INT_TYPES_DEFINED #ifdef COMPILER_MSVC typedef unsigned __int64 uint64; typedef __int64 int64; #ifndef INT64_C #define INT64_C(x) x ## I64 #endif #ifndef UINT64_C #define UINT64_C(x) x ## UI64 #endif #define INT64_F "I64" #else // COMPILER_MSVC -#if defined(__LP64__) && !defined(__OpenBSD__) && !defined(__APPLE__) +#ifdef __LP64__ typedef unsigned long uint64; // NOLINT typedef long int64; // NOLINT #ifndef INT64_C #define INT64_C(x) x ## L #endif #ifndef UINT64_C #define UINT64_C(x) x ## UL #endif #define INT64_F "l" -#else // defined(__LP64__) && !defined(__OpenBSD__) && !defined(__APPLE__) +#else // __LP64__ typedef unsigned long long uint64; // NOLINT typedef long long int64; // NOLINT #ifndef INT64_C #define INT64_C(x) x ## LL #endif #ifndef UINT64_C #define UINT64_C(x) x ## ULL #endif @@ -57,37 +71,31 @@ typedef long long int64; // NOLINT #endif // COMPILER_MSVC typedef unsigned int uint32; typedef int int32; typedef unsigned short uint16; // NOLINT typedef short int16; // NOLINT typedef unsigned char uint8; typedef signed char int8; #endif // INT_TYPES_DEFINED -#endif // GG_LONGLONG // Detect compiler is for x86 or x64. #if defined(__x86_64__) || defined(_M_X64) || \ defined(__i386__) || defined(_M_IX86) #define CPU_X86 1 #endif // Detect compiler is for ARM. #if defined(__arm__) || defined(_M_ARM) #define CPU_ARM 1 #endif #ifndef ALIGNP -#ifdef __cplusplus #define ALIGNP(p, t) \ (reinterpret_cast<uint8*>(((reinterpret_cast<uintptr_t>(p) + \ ((t) - 1)) & ~((t) - 1)))) -#else -#define ALIGNP(p, t) \ - ((uint8*)((((uintptr_t)(p) + ((t) - 1)) & ~((t) - 1)))) /* NOLINT */ -#endif #endif #if !defined(LIBYUV_API) #if defined(_WIN32) || defined(__CYGWIN__) #if defined(LIBYUV_BUILDING_SHARED_LIBRARY) #define LIBYUV_API __declspec(dllexport) #elif defined(LIBYUV_USING_SHARED_LIBRARY) #define LIBYUV_API __declspec(dllimport) @@ -98,21 +106,9 @@ typedef signed char int8; (defined(LIBYUV_BUILDING_SHARED_LIBRARY) || \ defined(LIBYUV_USING_SHARED_LIBRARY)) #define LIBYUV_API __attribute__ ((visibility ("default"))) #else #define LIBYUV_API #endif // __GNUC__ #endif // LIBYUV_API -#define LIBYUV_BOOL int -#define LIBYUV_FALSE 0 -#define LIBYUV_TRUE 1 - -// Visual C x86 or GCC little endian. -#if defined(__x86_64__) || defined(_M_X64) || \ - defined(__i386__) || defined(_M_IX86) || \ - defined(__arm__) || defined(_M_ARM) || \ - (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) -#define LIBYUV_LITTLE_ENDIAN -#endif - #endif // INCLUDE_LIBYUV_BASIC_TYPES_H_ NOLINT
--- a/media/webrtc/trunk/third_party/libyuv/include/libyuv/compare.h +++ b/media/webrtc/trunk/third_party/libyuv/include/libyuv/compare.h @@ -1,29 +1,29 @@ /* * Copyright 2011 The LibYuv Project Authors. All rights reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may + * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef INCLUDE_LIBYUV_COMPARE_H_ // NOLINT #define INCLUDE_LIBYUV_COMPARE_H_ #include "libyuv/basic_types.h" #ifdef __cplusplus namespace libyuv { extern "C" { #endif -// Compute a hash for specified memory. Seed of 5381 recommended. +// Compute a hash for specified memory. Seed of 5381 recommended. LIBYUV_API uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed); // Sum Square Error - used to compute Mean Square Error or PSNR. LIBYUV_API uint64 ComputeSumSquareError(const uint8* src_a, const uint8* src_b, int count);
--- a/media/webrtc/trunk/third_party/libyuv/include/libyuv/convert.h +++ b/media/webrtc/trunk/third_party/libyuv/include/libyuv/convert.h @@ -1,15 +1,15 @@ /* * Copyright 2011 The LibYuv Project Authors. All rights reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may + * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef INCLUDE_LIBYUV_CONVERT_H_ // NOLINT #define INCLUDE_LIBYUV_CONVERT_H_ #include "libyuv/basic_types.h" // TODO(fbarchard): Remove the following headers includes. @@ -17,29 +17,42 @@ #include "libyuv/planar_functions.h" #include "libyuv/rotate.h" #ifdef __cplusplus namespace libyuv { extern "C" { #endif -// Convert I444 to I420. +// Alias. +#define I420ToI420 I420Copy + +// Copy I420 to I420. LIBYUV_API -int I444ToI420(const uint8* src_y, int src_stride_y, +int I420Copy(const uint8* src_y, int src_stride_y, + const uint8* src_u, int src_stride_u, + const uint8* src_v, int src_stride_v, + uint8* dst_y, int dst_stride_y, + uint8* dst_u, int dst_stride_u, + uint8* dst_v, int dst_stride_v, + int width, int height); + +// Convert I422 to I420. +LIBYUV_API +int I422ToI420(const uint8* src_y, int src_stride_y, const uint8* src_u, int src_stride_u, const uint8* src_v, int src_stride_v, uint8* dst_y, int dst_stride_y, uint8* dst_u, int dst_stride_u, uint8* dst_v, int dst_stride_v, int width, int height); -// Convert I422 to I420. +// Convert I444 to I420. LIBYUV_API -int I422ToI420(const uint8* src_y, int src_stride_y, +int I444ToI420(const uint8* src_y, int src_stride_y, const uint8* src_u, int src_stride_u, const uint8* src_v, int src_stride_v, uint8* dst_y, int dst_stride_y, uint8* dst_u, int dst_stride_u, uint8* dst_v, int dst_stride_v, int width, int height); // Convert I411 to I420. @@ -47,48 +60,45 @@ LIBYUV_API int I411ToI420(const uint8* src_y, int src_stride_y, const uint8* src_u, int src_stride_u, const uint8* src_v, int src_stride_v, uint8* dst_y, int dst_stride_y, uint8* dst_u, int dst_stride_u, uint8* dst_v, int dst_stride_v, int width, int height); -// Copy I420 to I420. -#define I420ToI420 I420Copy -LIBYUV_API -int I420Copy(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); - // Convert I400 (grey) to I420. LIBYUV_API int I400ToI420(const uint8* src_y, int src_stride_y, uint8* dst_y, int dst_stride_y, uint8* dst_u, int dst_stride_u, uint8* dst_v, int dst_stride_v, int width, int height); -// Convert NV12 to I420. +// Convert NV12 to I420. Also used for NV21. LIBYUV_API int NV12ToI420(const uint8* src_y, int src_stride_y, const uint8* src_uv, int src_stride_uv, uint8* dst_y, int dst_stride_y, uint8* dst_u, int dst_stride_u, uint8* dst_v, int dst_stride_v, int width, int height); -// Convert NV21 to I420. +// Convert M420 to I420. LIBYUV_API -int NV21ToI420(const uint8* src_y, int src_stride_y, - const uint8* src_vu, int src_stride_vu, +int M420ToI420(const uint8* src_m420, int src_stride_m420, + uint8* dst_y, int dst_stride_y, + uint8* dst_u, int dst_stride_u, + uint8* dst_v, int dst_stride_v, + int width, int height); + +// Convert Q420 to I420. +LIBYUV_API +int Q420ToI420(const uint8* src_y, int src_stride_y, + const uint8* src_yuy2, int src_stride_yuy2, uint8* dst_y, int dst_stride_y, uint8* dst_u, int dst_stride_u, uint8* dst_v, int dst_stride_v, int width, int height); // Convert YUY2 to I420. LIBYUV_API int YUY2ToI420(const uint8* src_yuy2, int src_stride_yuy2, @@ -100,28 +110,19 @@ int YUY2ToI420(const uint8* src_yuy2, in // Convert UYVY to I420. LIBYUV_API int UYVYToI420(const uint8* src_uyvy, int src_stride_uyvy, uint8* dst_y, int dst_stride_y, uint8* dst_u, int dst_stride_u, uint8* dst_v, int dst_stride_v, int width, int height); -// Convert M420 to I420. +// Convert V210 to I420. LIBYUV_API -int M420ToI420(const uint8* src_m420, int src_stride_m420, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); - -// Convert Q420 to I420. -LIBYUV_API -int Q420ToI420(const uint8* src_y, int src_stride_y, - const uint8* src_yuy2, int src_stride_yuy2, +int V210ToI420(const uint8* src_uyvy, int src_stride_uyvy, uint8* dst_y, int dst_stride_y, uint8* dst_u, int dst_stride_u, uint8* dst_v, int dst_stride_v, int width, int height); // ARGB little endian (bgra in memory) to I420. LIBYUV_API int ARGBToI420(const uint8* src_frame, int src_stride_frame, @@ -199,21 +200,16 @@ int ARGB4444ToI420(const uint8* src_fram // dst_width/height for clipping determine final size. LIBYUV_API int MJPGToI420(const uint8* sample, size_t sample_size, uint8* dst_y, int dst_stride_y, uint8* dst_u, int dst_stride_u, uint8* dst_v, int dst_stride_v, int src_width, int src_height, int dst_width, int dst_height); - -// Query size of MJPG in pixels. -LIBYUV_API -int MJPGSize(const uint8* sample, size_t sample_size, - int* width, int* height); #endif // Note Bayer formats (BGGR) To I420 are in format_conversion.h // Convert camera sample to I420 with cropping, rotation and vertical flip. // "src_size" is needed to parse MJPG. // "dst_stride_y" number of bytes in a row of the dst_y plane. // Normally this would be the same as dst_width, with recommended alignment @@ -224,31 +220,31 @@ int MJPGSize(const uint8* sample, size_t // Normally this would be the same as (dst_width + 1) / 2, with // recommended alignment to 16 bytes for better efficiency. // If rotation of 90 or 270 is used, stride is affected. // "crop_x" and "crop_y" are starting position for cropping. // To center, crop_x = (src_width - dst_width) / 2 // crop_y = (src_height - dst_height) / 2 // "src_width" / "src_height" is size of src_frame in pixels. // "src_height" can be negative indicating a vertically flipped image source. -// "crop_width" / "crop_height" is the size to crop the src to. +// "dst_width" / "dst_height" is size of destination to crop to. // Must be less than or equal to src_width/src_height // Cropping parameters are pre-rotation. // "rotation" can be 0, 90, 180 or 270. -// "format" is a fourcc. ie 'I420', 'YUY2' +// "format" is a fourcc. ie 'I420', 'YUY2' // Returns 0 for successful; -1 for invalid parameter. Non-zero for failure. LIBYUV_API int ConvertToI420(const uint8* src_frame, size_t src_size, uint8* dst_y, int dst_stride_y, uint8* dst_u, int dst_stride_u, uint8* dst_v, int dst_stride_v, int crop_x, int crop_y, int src_width, int src_height, - int crop_width, int crop_height, - enum RotationMode rotation, + int dst_width, int dst_height, + RotationMode rotation, uint32 format); #ifdef __cplusplus } // extern "C" } // namespace libyuv #endif #endif // INCLUDE_LIBYUV_CONVERT_H_ NOLINT
--- a/media/webrtc/trunk/third_party/libyuv/include/libyuv/convert_argb.h +++ b/media/webrtc/trunk/third_party/libyuv/include/libyuv/convert_argb.h @@ -1,30 +1,30 @@ /* * Copyright 2012 The LibYuv Project Authors. All rights reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may + * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef INCLUDE_LIBYUV_CONVERT_ARGB_H_ // NOLINT #define INCLUDE_LIBYUV_CONVERT_ARGB_H_ #include "libyuv/basic_types.h" // TODO(fbarchard): Remove the following headers includes #include "libyuv/convert_from.h" #include "libyuv/planar_functions.h" #include "libyuv/rotate.h" // TODO(fbarchard): This set of functions should exactly match convert.h -// Add missing Q420. -// TODO(fbarchard): Add tests. Create random content of right size and convert +// Add missing V210 and Q420. +// TODO(fbarchard): Add tests. Create random content of right size and convert // with C vs Opt and or to I420 and compare. // TODO(fbarchard): Some of these functions lack parameter setting. #ifdef __cplusplus namespace libyuv { extern "C" { #endif @@ -70,20 +70,17 @@ int I411ToARGB(const uint8* src_y, int s int width, int height); // Convert I400 (grey) to ARGB. LIBYUV_API int I400ToARGB(const uint8* src_y, int src_stride_y, uint8* dst_argb, int dst_stride_argb, int width, int height); -// Alias. -#define YToARGB I400ToARGB_Reference - -// Convert I400 to ARGB. Reverse of ARGBToI400. +// Convert I400 to ARGB. Reverse of ARGBToI400. LIBYUV_API int I400ToARGB_Reference(const uint8* src_y, int src_stride_y, uint8* dst_argb, int dst_stride_argb, int width, int height); // Convert NV12 to ARGB. LIBYUV_API int NV12ToARGB(const uint8* src_y, int src_stride_y, @@ -118,16 +115,22 @@ int YUY2ToARGB(const uint8* src_yuy2, in int width, int height); // Convert UYVY to ARGB. LIBYUV_API int UYVYToARGB(const uint8* src_uyvy, int src_stride_uyvy, uint8* dst_argb, int dst_stride_argb, int width, int height); +// TODO(fbarchard): Convert V210 to ARGB. +// LIBYUV_API +// int V210ToARGB(const uint8* src_uyvy, int src_stride_uyvy, +// uint8* dst_argb, int dst_stride_argb, +// int width, int height); + // BGRA little endian (argb in memory) to ARGB. LIBYUV_API int BGRAToARGB(const uint8* src_frame, int src_stride_frame, uint8* dst_argb, int dst_stride_argb, int width, int height); // ABGR little endian (rgba in memory) to ARGB. LIBYUV_API @@ -197,29 +200,29 @@ int MJPGToARGB(const uint8* sample, size // Normally this would be the same as (dst_width + 1) / 2, with // recommended alignment to 16 bytes for better efficiency. // If rotation of 90 or 270 is used, stride is affected. // "crop_x" and "crop_y" are starting position for cropping. // To center, crop_x = (src_width - dst_width) / 2 // crop_y = (src_height - dst_height) / 2 // "src_width" / "src_height" is size of src_frame in pixels. // "src_height" can be negative indicating a vertically flipped image source. -// "crop_width" / "crop_height" is the size to crop the src to. +// "dst_width" / "dst_height" is size of destination to crop to. // Must be less than or equal to src_width/src_height // Cropping parameters are pre-rotation. // "rotation" can be 0, 90, 180 or 270. -// "format" is a fourcc. ie 'I420', 'YUY2' +// "format" is a fourcc. ie 'I420', 'YUY2' // Returns 0 for successful; -1 for invalid parameter. Non-zero for failure. LIBYUV_API int ConvertToARGB(const uint8* src_frame, size_t src_size, uint8* dst_argb, int dst_stride_argb, int crop_x, int crop_y, int src_width, int src_height, - int crop_width, int crop_height, - enum RotationMode rotation, + int dst_width, int dst_height, + RotationMode rotation, uint32 format); #ifdef __cplusplus } // extern "C" } // namespace libyuv #endif #endif // INCLUDE_LIBYUV_CONVERT_ARGB_H_ NOLINT
--- a/media/webrtc/trunk/third_party/libyuv/include/libyuv/convert_from.h +++ b/media/webrtc/trunk/third_party/libyuv/include/libyuv/convert_from.h @@ -1,15 +1,15 @@ /* * Copyright 2011 The LibYuv Project Authors. All rights reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may + * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef INCLUDE_LIBYUV_CONVERT_FROM_H_ // NOLINT #define INCLUDE_LIBYUV_CONVERT_FROM_H_ #include "libyuv/basic_types.h" #include "libyuv/rotate.h" @@ -45,56 +45,48 @@ LIBYUV_API int I420ToI411(const uint8* src_y, int src_stride_y, const uint8* src_u, int src_stride_u, const uint8* src_v, int src_stride_v, uint8* dst_y, int dst_stride_y, uint8* dst_u, int dst_stride_u, uint8* dst_v, int dst_stride_v, int width, int height); -// Copy to I400. Source can be I420, I422, I444, I400, NV12 or NV21. +// Copy to I400. Source can be I420, I422, I444, I400, NV12 or NV21. LIBYUV_API int I400Copy(const uint8* src_y, int src_stride_y, uint8* dst_y, int dst_stride_y, int width, int height); +// TODO(fbarchard): I420ToNV12 // TODO(fbarchard): I420ToM420 // TODO(fbarchard): I420ToQ420 LIBYUV_API -int I420ToNV12(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_uv, int dst_stride_uv, - int width, int height); - -LIBYUV_API -int I420ToNV21(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_vu, int dst_stride_vu, - int width, int height); - -LIBYUV_API int I420ToYUY2(const uint8* src_y, int src_stride_y, const uint8* src_u, int src_stride_u, const uint8* src_v, int src_stride_v, uint8* dst_frame, int dst_stride_frame, int width, int height); LIBYUV_API int I420ToUYVY(const uint8* src_y, int src_stride_y, const uint8* src_u, int src_stride_u, const uint8* src_v, int src_stride_v, uint8* dst_frame, int dst_stride_frame, int width, int height); LIBYUV_API +int I420ToV210(const uint8* src_y, int src_stride_y, + const uint8* src_u, int src_stride_u, + const uint8* src_v, int src_stride_v, + uint8* dst_frame, int dst_stride_frame, + int width, int height); + +LIBYUV_API int I420ToARGB(const uint8* src_y, int src_stride_y, const uint8* src_u, int src_stride_u, const uint8* src_v, int src_stride_v, uint8* dst_argb, int dst_stride_argb, int width, int height); LIBYUV_API int I420ToBGRA(const uint8* src_y, int src_stride_y,
deleted file mode 100644 --- a/media/webrtc/trunk/third_party/libyuv/include/libyuv/convert_from_argb.h +++ /dev/null @@ -1,168 +0,0 @@ -/* - * Copyright 2012 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef INCLUDE_LIBYUV_CONVERT_FROM_ARGB_H_ // NOLINT -#define INCLUDE_LIBYUV_CONVERT_FROM_ARGB_H_ - -#include "libyuv/basic_types.h" - -#ifdef __cplusplus -namespace libyuv { -extern "C" { -#endif - -// Copy ARGB to ARGB. -#define ARGBToARGB ARGBCopy -LIBYUV_API -int ARGBCopy(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, - int width, int height); - -// Convert ARGB To BGRA. (alias) -#define ARGBToBGRA BGRAToARGB -LIBYUV_API -int BGRAToARGB(const uint8* src_frame, int src_stride_frame, - uint8* dst_argb, int dst_stride_argb, - int width, int height); - -// Convert ARGB To ABGR. (alias) -#define ARGBToABGR ABGRToARGB -LIBYUV_API -int ABGRToARGB(const uint8* src_frame, int src_stride_frame, - uint8* dst_argb, int dst_stride_argb, - int width, int height); - -// Convert ARGB To RGBA. -LIBYUV_API -int ARGBToRGBA(const uint8* src_frame, int src_stride_frame, - uint8* dst_argb, int dst_stride_argb, - int width, int height); - -// Convert ARGB To RGB24. -LIBYUV_API -int ARGBToRGB24(const uint8* src_argb, int src_stride_argb, - uint8* dst_rgb24, int dst_stride_rgb24, - int width, int height); - -// Convert ARGB To RAW. -LIBYUV_API -int ARGBToRAW(const uint8* src_argb, int src_stride_argb, - uint8* dst_rgb, int dst_stride_rgb, - int width, int height); - -// Convert ARGB To RGB565. -LIBYUV_API -int ARGBToRGB565(const uint8* src_argb, int src_stride_argb, - uint8* dst_rgb565, int dst_stride_rgb565, - int width, int height); - -// Convert ARGB To ARGB1555. -LIBYUV_API -int ARGBToARGB1555(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb1555, int dst_stride_argb1555, - int width, int height); - -// Convert ARGB To ARGB4444. -LIBYUV_API -int ARGBToARGB4444(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb4444, int dst_stride_argb4444, - int width, int height); - -// Convert ARGB To I444. -LIBYUV_API -int ARGBToI444(const uint8* src_argb, int src_stride_argb, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); - -// Convert ARGB To I422. -LIBYUV_API -int ARGBToI422(const uint8* src_argb, int src_stride_argb, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); - -// Convert ARGB To I420. (also in convert.h) -LIBYUV_API -int ARGBToI420(const uint8* src_argb, int src_stride_argb, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); - -// Convert ARGB to J420. (JPeg full range I420). -LIBYUV_API -int ARGBToJ420(const uint8* src_argb, int src_stride_argb, - uint8* dst_yj, int dst_stride_yj, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); - -// Convert ARGB To I411. -LIBYUV_API -int ARGBToI411(const uint8* src_argb, int src_stride_argb, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); - -// Convert ARGB to J400. (JPeg full range). -LIBYUV_API -int ARGBToJ400(const uint8* src_argb, int src_stride_argb, - uint8* dst_yj, int dst_stride_yj, - int width, int height); - -// Convert ARGB to I400. -LIBYUV_API -int ARGBToI400(const uint8* src_argb, int src_stride_argb, - uint8* dst_y, int dst_stride_y, - int width, int height); - -// Convert ARGB To NV12. -LIBYUV_API -int ARGBToNV12(const uint8* src_argb, int src_stride_argb, - uint8* dst_y, int dst_stride_y, - uint8* dst_uv, int dst_stride_uv, - int width, int height); - -// Convert ARGB To NV21. -LIBYUV_API -int ARGBToNV21(const uint8* src_argb, int src_stride_argb, - uint8* dst_y, int dst_stride_y, - uint8* dst_vu, int dst_stride_vu, - int width, int height); - -// Convert ARGB To NV21. -LIBYUV_API -int ARGBToNV21(const uint8* src_argb, int src_stride_argb, - uint8* dst_y, int dst_stride_y, - uint8* dst_vu, int dst_stride_vu, - int width, int height); - -// Convert ARGB To YUY2. -LIBYUV_API -int ARGBToYUY2(const uint8* src_argb, int src_stride_argb, - uint8* dst_yuy2, int dst_stride_yuy2, - int width, int height); - -// Convert ARGB To UYVY. -LIBYUV_API -int ARGBToUYVY(const uint8* src_argb, int src_stride_argb, - uint8* dst_uyvy, int dst_stride_uyvy, - int width, int height); - -#ifdef __cplusplus -} // extern "C" -} // namespace libyuv -#endif - -#endif // INCLUDE_LIBYUV_CONVERT_FROM_ARGB_H_ NOLINT
--- a/media/webrtc/trunk/third_party/libyuv/include/libyuv/cpu_id.h +++ b/media/webrtc/trunk/third_party/libyuv/include/libyuv/cpu_id.h @@ -1,81 +1,70 @@ /* * Copyright 2011 The LibYuv Project Authors. All rights reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may + * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef INCLUDE_LIBYUV_CPU_ID_H_ // NOLINT #define INCLUDE_LIBYUV_CPU_ID_H_ #include "libyuv/basic_types.h" #ifdef __cplusplus namespace libyuv { extern "C" { #endif -// TODO(fbarchard): Consider overlapping bits for different architectures. -// Internal flag to indicate cpuid requires initialization. -#define kCpuInit 0x1 +// Internal flag to indicate cpuid is initialized. +static const int kCpuInitialized = 0x1; // These flags are only valid on ARM processors. static const int kCpuHasARM = 0x2; static const int kCpuHasNEON = 0x4; // 0x8 reserved for future ARM flag. // These flags are only valid on x86 processors. static const int kCpuHasX86 = 0x10; static const int kCpuHasSSE2 = 0x20; static const int kCpuHasSSSE3 = 0x40; static const int kCpuHasSSE41 = 0x80; static const int kCpuHasSSE42 = 0x100; static const int kCpuHasAVX = 0x200; static const int kCpuHasAVX2 = 0x400; -static const int kCpuHasERMS = 0x800; -static const int kCpuHasFMA3 = 0x1000; -// 0x2000, 0x4000, 0x8000 reserved for future X86 flags. - -// These flags are only valid on MIPS processors. -static const int kCpuHasMIPS = 0x10000; -static const int kCpuHasMIPS_DSP = 0x20000; -static const int kCpuHasMIPS_DSPR2 = 0x40000; // Internal function used to auto-init. LIBYUV_API int InitCpuFlags(void); // Internal function for parsing /proc/cpuinfo. LIBYUV_API int ArmCpuCaps(const char* cpuinfo_name); // Detect CPU has SSE2 etc. // Test_flag parameter should be one of kCpuHas constants above. // returns non-zero if instruction set is detected static __inline int TestCpuFlag(int test_flag) { LIBYUV_API extern int cpu_info_; - return (cpu_info_ == kCpuInit ? InitCpuFlags() : cpu_info_) & test_flag; + return (cpu_info_ ? cpu_info_ : InitCpuFlags()) & test_flag; } // For testing, allow CPU flags to be disabled. // ie MaskCpuFlags(~kCpuHasSSSE3) to disable SSSE3. // MaskCpuFlags(-1) to enable all cpu specific optimizations. // MaskCpuFlags(0) to disable all cpu specific optimizations. LIBYUV_API void MaskCpuFlags(int enable_flags); -// Low level cpuid for X86. Returns zeros on other CPUs. -// eax is the info type that you want. -// ecx is typically the cpu number, and should normally be zero. +// Low level cpuid for X86. Returns zeros on other CPUs. LIBYUV_API -void CpuId(uint32 eax, uint32 ecx, uint32* cpu_info); +void CpuId(int cpu_info[4], int info_type); #ifdef __cplusplus } // extern "C" } // namespace libyuv #endif #endif // INCLUDE_LIBYUV_CPU_ID_H_ NOLINT
--- a/media/webrtc/trunk/third_party/libyuv/include/libyuv/format_conversion.h +++ b/media/webrtc/trunk/third_party/libyuv/include/libyuv/format_conversion.h @@ -1,15 +1,15 @@ /* * Copyright 2011 The LibYuv Project Authors. All rights reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may + * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef INCLUDE_LIBYUV_FORMATCONVERSION_H_ // NOLINT #define INCLUDE_LIBYUV_FORMATCONVERSION_H_ #include "libyuv/basic_types.h"
--- a/media/webrtc/trunk/third_party/libyuv/include/libyuv/mjpeg_decoder.h +++ b/media/webrtc/trunk/third_party/libyuv/include/libyuv/mjpeg_decoder.h @@ -1,42 +1,31 @@ /* * Copyright 2012 The LibYuv Project Authors. All rights reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may + * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef INCLUDE_LIBYUV_MJPEG_DECODER_H_ // NOLINT #define INCLUDE_LIBYUV_MJPEG_DECODER_H_ #include "libyuv/basic_types.h" -#ifdef __cplusplus // NOTE: For a simplified public API use convert.h MJPGToI420(). struct jpeg_common_struct; struct jpeg_decompress_struct; struct jpeg_source_mgr; namespace libyuv { -#ifdef __cplusplus -extern "C" { -#endif - -LIBYUV_BOOL ValidateJpeg(const uint8* sample, size_t sample_size); - -#ifdef __cplusplus -} // extern "C" -#endif - static const uint32 kUnknownDataSize = 0xFFFFFFFF; enum JpegSubsamplingType { kJpegYuv420, kJpegYuv422, kJpegYuv411, kJpegYuv444, kJpegYuv400, @@ -47,17 +36,17 @@ struct SetJmpErrorMgr; // MJPEG ("Motion JPEG") is a pseudo-standard video codec where the frames are // simply independent JPEG images with a fixed huffman table (which is omitted). // It is rarely used in video transmission, but is common as a camera capture // format, especially in Logitech devices. This class implements a decoder for // MJPEG frames. // // See http://tools.ietf.org/html/rfc2435 -class LIBYUV_API MJpegDecoder { +class MJpegDecoder { public: typedef void (*CallbackFunction)(void* opaque, const uint8* const* data, const int* strides, int rows); static const int kColorSpaceUnknown; static const int kColorSpaceGrayscale; @@ -65,22 +54,21 @@ class LIBYUV_API MJpegDecoder { static const int kColorSpaceYCbCr; static const int kColorSpaceCMYK; static const int kColorSpaceYCCK; MJpegDecoder(); ~MJpegDecoder(); // Loads a new frame, reads its headers, and determines the uncompressed - // image format. - // Returns LIBYUV_TRUE if image looks valid and format is supported. - // If return value is LIBYUV_TRUE, then the values for all the following - // getters are populated. + // image format. Returns true if image looks valid and format is supported. + // If return value is true, then the values for all the following getters + // are populated. // src_len is the size of the compressed mjpeg frame in bytes. - LIBYUV_BOOL LoadFrame(const uint8* src, size_t src_len); + bool LoadFrame(const uint8* src, size_t src_len); // Returns width of the last loaded frame in pixels. int GetWidth(); // Returns height of the last loaded frame in pixels. int GetHeight(); // Returns format of the last loaded frame. The return value is one of the @@ -114,32 +102,32 @@ class LIBYUV_API MJpegDecoder { // Width of a component in bytes with padding for DCTSIZE. Public for testing. int GetComponentStride(int component); // Size of a component in bytes. int GetComponentSize(int component); // Call this after LoadFrame() if you decide you don't want to decode it // after all. - LIBYUV_BOOL UnloadFrame(); + bool UnloadFrame(); // Decodes the entire image into a one-buffer-per-color-component format. // dst_width must match exactly. dst_height must be <= to image height; if // less, the image is cropped. "planes" must have size equal to at least // GetNumComponents() and they must point to non-overlapping buffers of size // at least GetComponentSize(i). The pointers in planes are incremented // to point to after the end of the written data. // TODO(fbarchard): Add dst_x, dst_y to allow specific rect to be decoded. - LIBYUV_BOOL DecodeToBuffers(uint8** planes, int dst_width, int dst_height); + bool DecodeToBuffers(uint8** planes, int dst_width, int dst_height); // Decodes the entire image and passes the data via repeated calls to a // callback function. Each call will get the data for a whole number of // image scanlines. // TODO(fbarchard): Add dst_x, dst_y to allow specific rect to be decoded. - LIBYUV_BOOL DecodeToCallback(CallbackFunction fn, void* opaque, + bool DecodeToCallback(CallbackFunction fn, void* opaque, int dst_width, int dst_height); // The helper function which recognizes the jpeg sub-sampling type. static JpegSubsamplingType JpegSubsamplingTypeHelper( int* subsample_x, int* subsample_y, int number_of_components); private: struct Buffer { @@ -160,42 +148,41 @@ class LIBYUV_API MJpegDecoder { long num_bytes); // NOLINT static void term_source(jpeg_decompress_struct* cinfo); static void ErrorHandler(jpeg_common_struct* cinfo); void AllocOutputBuffers(int num_outbufs); void DestroyOutputBuffers(); - LIBYUV_BOOL StartDecode(); - LIBYUV_BOOL FinishDecode(); + bool StartDecode(); + bool FinishDecode(); void SetScanlinePointers(uint8** data); - LIBYUV_BOOL DecodeImcuRow(); + bool DecodeImcuRow(); int GetComponentScanlinePadding(int component); // A buffer holding the input data for a frame. Buffer buf_; BufferVector buf_vec_; jpeg_decompress_struct* decompress_struct_; jpeg_source_mgr* source_mgr_; SetJmpErrorMgr* error_mgr_; - // LIBYUV_TRUE iff at least one component has scanline padding. (i.e., + // true iff at least one component has scanline padding. (i.e., // GetComponentScanlinePadding() != 0.) - LIBYUV_BOOL has_scanline_padding_; + bool has_scanline_padding_; // Temporaries used to point to scanline outputs. int num_outbufs_; // Outermost size of all arrays below. uint8*** scanlines_; int* scanlines_sizes_; // Temporary buffer used for decoding when we can't decode directly to the // output buffers. Large enough for just one iMCU row. uint8** databuf_; int* databuf_strides_; }; } // namespace libyuv -#endif // __cplusplus #endif // INCLUDE_LIBYUV_MJPEG_DECODER_H_ NOLINT
--- a/media/webrtc/trunk/third_party/libyuv/include/libyuv/planar_functions.h +++ b/media/webrtc/trunk/third_party/libyuv/include/libyuv/planar_functions.h @@ -1,15 +1,15 @@ /* * Copyright 2011 The LibYuv Project Authors. All rights reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may + * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef INCLUDE_LIBYUV_PLANAR_FUNCTIONS_H_ // NOLINT #define INCLUDE_LIBYUV_PLANAR_FUNCTIONS_H_ #include "libyuv/basic_types.h" @@ -17,107 +17,45 @@ #include "libyuv/convert.h" #include "libyuv/convert_argb.h" #ifdef __cplusplus namespace libyuv { extern "C" { #endif -// Copy a plane of data. -LIBYUV_API -void CopyPlane(const uint8* src_y, int src_stride_y, - uint8* dst_y, int dst_stride_y, - int width, int height); - -// Set a plane of data to a 32 bit value. LIBYUV_API void SetPlane(uint8* dst_y, int dst_stride_y, int width, int height, uint32 value); -// Copy I400. Supports inverting. +// Copy a plane of data (I420 to I400). LIBYUV_API -int I400ToI400(const uint8* src_y, int src_stride_y, +void CopyPlane(const uint8* src_y, int src_stride_y, uint8* dst_y, int dst_stride_y, int width, int height); - -// Copy I422 to I422. -#define I422ToI422 I422Copy +// Convert I420 to I400. (calls CopyPlane ignoring u/v). LIBYUV_API -int I422Copy(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); - -// Copy I444 to I444. -#define I444ToI444 I444Copy -LIBYUV_API -int I444Copy(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); - -// Convert YUY2 to I422. -LIBYUV_API -int YUY2ToI422(const uint8* src_yuy2, int src_stride_yuy2, +int I420ToI400(const uint8* src_y, int src_stride_y, uint8* dst_y, int dst_stride_y, uint8* dst_u, int dst_stride_u, uint8* dst_v, int dst_stride_v, int width, int height); -// Convert UYVY to I422. -LIBYUV_API -int UYVYToI422(const uint8* src_uyvy, int src_stride_uyvy, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); - -// Convert I420 to I400. (calls CopyPlane ignoring u/v). -LIBYUV_API -int I420ToI400(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - int width, int height); - -// Alias -#define I420ToI420Mirror I420Mirror - // I420 mirror. LIBYUV_API int I420Mirror(const uint8* src_y, int src_stride_y, const uint8* src_u, int src_stride_u, const uint8* src_v, int src_stride_v, uint8* dst_y, int dst_stride_y, uint8* dst_u, int dst_stride_u, uint8* dst_v, int dst_stride_v, int width, int height); -// Alias -#define I400ToI400Mirror I400Mirror - -// I400 mirror. A single plane is mirrored horizontally. -// Pass negative height to achieve 180 degree rotation. -LIBYUV_API -int I400Mirror(const uint8* src_y, int src_stride_y, - uint8* dst_y, int dst_stride_y, - int width, int height); - -// Alias -#define ARGBToARGBMirror ARGBMirror - // ARGB mirror. LIBYUV_API int ARGBMirror(const uint8* src_argb, int src_stride_argb, uint8* dst_argb, int dst_stride_argb, int width, int height); // Convert NV12 to RGB565. LIBYUV_API @@ -128,16 +66,70 @@ int NV12ToRGB565(const uint8* src_y, int // Convert NV21 to RGB565. LIBYUV_API int NV21ToRGB565(const uint8* src_y, int src_stride_y, const uint8* src_uv, int src_stride_uv, uint8* dst_rgb565, int dst_stride_rgb565, int width, int height); +// Aliases. +#define ARGBToBGRA BGRAToARGB +#define ARGBToABGR ABGRToARGB + +// Convert ARGB To RGBA. +LIBYUV_API +int ARGBToRGBA(const uint8* src_frame, int src_stride_frame, + uint8* dst_argb, int dst_stride_argb, + int width, int height); + +// Convert ARGB To RGB24. +LIBYUV_API +int ARGBToRGB24(const uint8* src_argb, int src_stride_argb, + uint8* dst_rgb24, int dst_stride_rgb24, + int width, int height); + +// Convert ARGB To RAW. +LIBYUV_API +int ARGBToRAW(const uint8* src_argb, int src_stride_argb, + uint8* dst_rgb, int dst_stride_rgb, + int width, int height); + +// Convert ARGB To RGB565. +LIBYUV_API +int ARGBToRGB565(const uint8* src_argb, int src_stride_argb, + uint8* dst_rgb565, int dst_stride_rgb565, + int width, int height); + +// Convert ARGB To ARGB1555. +LIBYUV_API +int ARGBToARGB1555(const uint8* src_argb, int src_stride_argb, + uint8* dst_argb1555, int dst_stride_argb1555, + int width, int height); + +// Convert ARGB To ARGB4444. +LIBYUV_API +int ARGBToARGB4444(const uint8* src_argb, int src_stride_argb, + uint8* dst_argb4444, int dst_stride_argb4444, + int width, int height); + +// Convert ARGB to I400. +LIBYUV_API +int ARGBToI400(const uint8* src_argb, int src_stride_argb, + uint8* dst_y, int dst_stride_y, + int width, int height); + +// ARGB little endian (bgra in memory) to I422. +LIBYUV_API +int ARGBToI422(const uint8* src_frame, int src_stride_frame, + uint8* dst_y, int dst_stride_y, + uint8* dst_u, int dst_stride_u, + uint8* dst_v, int dst_stride_v, + int width, int height); + // I422ToARGB is in convert_argb.h // Convert I422 to BGRA. LIBYUV_API int I422ToBGRA(const uint8* src_y, int src_stride_y, const uint8* src_u, int src_stride_u, const uint8* src_v, int src_stride_v, uint8* dst_bgra, int dst_stride_bgra, int width, int height); @@ -183,138 +175,62 @@ int ARGBGray(uint8* dst_argb, int dst_st int x, int y, int width, int height); // Make a rectangle of ARGB Sepia tone. LIBYUV_API int ARGBSepia(uint8* dst_argb, int dst_stride_argb, int x, int y, int width, int height); // Apply a matrix rotation to each ARGB pixel. -// matrix_argb is 4 signed ARGB values. -128 to 127 representing -2 to 2. -// The first 4 coefficients apply to B, G, R, A and produce B of the output. -// The next 4 coefficients apply to B, G, R, A and produce G of the output. -// The next 4 coefficients apply to B, G, R, A and produce R of the output. -// The last 4 coefficients apply to B, G, R, A and produce A of the output. -LIBYUV_API -int ARGBColorMatrix(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, - const int8* matrix_argb, - int width, int height); - -// Deprecated. Use ARGBColorMatrix instead. -// Apply a matrix rotation to each ARGB pixel. // matrix_argb is 3 signed ARGB values. -128 to 127 representing -1 to 1. // The first 4 coefficients apply to B, G, R, A and produce B of the output. // The next 4 coefficients apply to B, G, R, A and produce G of the output. // The last 4 coefficients apply to B, G, R, A and produce R of the output. LIBYUV_API -int RGBColorMatrix(uint8* dst_argb, int dst_stride_argb, - const int8* matrix_rgb, - int x, int y, int width, int height); +int ARGBColorMatrix(uint8* dst_argb, int dst_stride_argb, + const int8* matrix_argb, + int x, int y, int width, int height); // Apply a color table each ARGB pixel. // Table contains 256 ARGB values. LIBYUV_API int ARGBColorTable(uint8* dst_argb, int dst_stride_argb, const uint8* table_argb, int x, int y, int width, int height); -// Apply a color table each ARGB pixel but preserve destination alpha. -// Table contains 256 ARGB values. -LIBYUV_API -int RGBColorTable(uint8* dst_argb, int dst_stride_argb, - const uint8* table_argb, - int x, int y, int width, int height); - -// Apply a luma/color table each ARGB pixel but preserve destination alpha. -// Table contains 32768 values indexed by [Y][C] where 7 it 7 bit luma from -// RGB (YJ style) and C is an 8 bit color component (R, G or B). -LIBYUV_API -int ARGBLumaColorTable(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, - const uint8* luma_rgb_table, - int width, int height); - -// Apply a 3 term polynomial to ARGB values. -// poly points to a 4x4 matrix. The first row is constants. The 2nd row is -// coefficients for b, g, r and a. The 3rd row is coefficients for b squared, -// g squared, r squared and a squared. The 4rd row is coefficients for b to -// the 3, g to the 3, r to the 3 and a to the 3. The values are summed and -// result clamped to 0 to 255. -// A polynomial approximation can be dirived using software such as 'R'. - -LIBYUV_API -int ARGBPolynomial(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, - const float* poly, - int width, int height); - -// Quantize a rectangle of ARGB. Alpha unaffected. +// Quantize a rectangle of ARGB. Alpha unaffected. // scale is a 16 bit fractional fixed point scaler between 0 and 65535. // interval_size should be a value between 1 and 255. // interval_offset should be a value between 0 and 255. LIBYUV_API int ARGBQuantize(uint8* dst_argb, int dst_stride_argb, int scale, int interval_size, int interval_offset, int x, int y, int width, int height); // Copy ARGB to ARGB. LIBYUV_API int ARGBCopy(const uint8* src_argb, int src_stride_argb, uint8* dst_argb, int dst_stride_argb, int width, int height); -// Copy ARGB to ARGB. -LIBYUV_API -int ARGBCopyAlpha(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, - int width, int height); - -// Copy ARGB to ARGB. -LIBYUV_API -int ARGBCopyYToAlpha(const uint8* src_y, int src_stride_y, - uint8* dst_argb, int dst_stride_argb, - int width, int height); - typedef void (*ARGBBlendRow)(const uint8* src_argb0, const uint8* src_argb1, uint8* dst_argb, int width); // Get function to Alpha Blend ARGB pixels and store to destination. LIBYUV_API ARGBBlendRow GetARGBBlend(); // Alpha Blend ARGB images and store to destination. // Alpha of destination is set to 255. LIBYUV_API int ARGBBlend(const uint8* src_argb0, int src_stride_argb0, const uint8* src_argb1, int src_stride_argb1, uint8* dst_argb, int dst_stride_argb, int width, int height); -// Multiply ARGB image by ARGB image. Shifted down by 8. Saturates to 255. -LIBYUV_API -int ARGBMultiply(const uint8* src_argb0, int src_stride_argb0, - const uint8* src_argb1, int src_stride_argb1, - uint8* dst_argb, int dst_stride_argb, - int width, int height); - -// Add ARGB image with ARGB image. Saturates to 255. -LIBYUV_API -int ARGBAdd(const uint8* src_argb0, int src_stride_argb0, - const uint8* src_argb1, int src_stride_argb1, - uint8* dst_argb, int dst_stride_argb, - int width, int height); - -// Subtract ARGB image (argb1) from ARGB image (argb0). Saturates to 0. -LIBYUV_API -int ARGBSubtract(const uint8* src_argb0, int src_stride_argb0, - const uint8* src_argb1, int src_stride_argb1, - uint8* dst_argb, int dst_stride_argb, - int width, int height); - // Convert I422 to YUY2. LIBYUV_API int I422ToYUY2(const uint8* src_y, int src_stride_y, const uint8* src_u, int src_stride_u, const uint8* src_v, int src_stride_v, uint8* dst_frame, int dst_stride_frame, int width, int height); @@ -339,30 +255,26 @@ int ARGBUnattenuate(const uint8* src_arg int width, int height); // Convert MJPG to ARGB. LIBYUV_API int MJPGToARGB(const uint8* sample, size_t sample_size, uint8* argb, int argb_stride, int w, int h, int dw, int dh); -// Internal function - do not call directly. // Computes table of cumulative sum for image where the value is the sum -// of all values above and to the left of the entry. Used by ARGBBlur. +// of all values above and to the left of the entry. Used by ARGBBlur. LIBYUV_API int ARGBComputeCumulativeSum(const uint8* src_argb, int src_stride_argb, int32* dst_cumsum, int dst_stride32_cumsum, int width, int height); // Blur ARGB image. -// dst_cumsum table of width * (height + 1) * 16 bytes aligned to -// 16 byte boundary. -// dst_stride32_cumsum is number of ints in a row (width * 4). -// radius is number of pixels around the center. e.g. 1 = 3x3. 2=5x5. -// Blur is optimized for radius of 5 (11x11) or less. +// Caller should allocate dst_cumsum table of width * height * 16 bytes aligned +// to 16 byte boundary. LIBYUV_API int ARGBBlur(const uint8* src_argb, int src_stride_argb, uint8* dst_argb, int dst_stride_argb, int32* dst_cumsum, int dst_stride32_cumsum, int width, int height, int radius); // Multiply ARGB image by ARGB value. LIBYUV_API @@ -377,58 +289,32 @@ int ARGBShade(const uint8* src_argb, int // Internally uses ARGBScale bilinear filtering. // Caveat: This function will write up to 16 bytes beyond the end of dst_argb. LIBYUV_API int ARGBInterpolate(const uint8* src_argb0, int src_stride_argb0, const uint8* src_argb1, int src_stride_argb1, uint8* dst_argb, int dst_stride_argb, int width, int height, int interpolation); -#if defined(__pnacl__) || defined(__CLR_VER) || defined(COVERAGE_ENABLED) || \ +#if defined(__CLR_VER) || defined(COVERAGE_ENABLED) || \ defined(TARGET_IPHONE_SIMULATOR) -#define LIBYUV_DISABLE_X86 +#define YUV_DISABLE_ASM #endif - // Row functions for copying a pixels from a source with a slope to a row -// of destination. Useful for scaling, rotation, mirror, texture mapping. +// of destination. Useful for scaling, rotation, mirror, texture mapping. LIBYUV_API void ARGBAffineRow_C(const uint8* src_argb, int src_argb_stride, uint8* dst_argb, const float* uv_dudv, int width); // The following are available on all x86 platforms: -#if !defined(LIBYUV_DISABLE_X86) && \ +#if !defined(YUV_DISABLE_ASM) && \ (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) LIBYUV_API void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride, uint8* dst_argb, const float* uv_dudv, int width); #define HAS_ARGBAFFINEROW_SSE2 -#endif // LIBYUV_DISABLE_X86 - -// Shuffle ARGB channel order. e.g. BGRA to ARGB. -// shuffler is 16 bytes and must be aligned. -LIBYUV_API -int ARGBShuffle(const uint8* src_bgra, int src_stride_bgra, - uint8* dst_argb, int dst_stride_argb, - const uint8* shuffler, int width, int height); - -// Sobel ARGB effect with planar output. -LIBYUV_API -int ARGBSobelToPlane(const uint8* src_argb, int src_stride_argb, - uint8* dst_y, int dst_stride_y, - int width, int height); - -// Sobel ARGB effect. -LIBYUV_API -int ARGBSobel(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, - int width, int height); - -// Sobel ARGB effect w/ Sobel X, Sobel, Sobel Y in ARGB. -LIBYUV_API -int ARGBSobelXY(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, - int width, int height); +#endif #ifdef __cplusplus } // extern "C" } // namespace libyuv #endif #endif // INCLUDE_LIBYUV_PLANAR_FUNCTIONS_H_ NOLINT
--- a/media/webrtc/trunk/third_party/libyuv/include/libyuv/rotate.h +++ b/media/webrtc/trunk/third_party/libyuv/include/libyuv/rotate.h @@ -1,67 +1,61 @@ /* * Copyright 2011 The LibYuv Project Authors. All rights reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may + * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef INCLUDE_LIBYUV_ROTATE_H_ // NOLINT #define INCLUDE_LIBYUV_ROTATE_H_ #include "libyuv/basic_types.h" #ifdef __cplusplus namespace libyuv { extern "C" { #endif // Supported rotation. -typedef enum RotationMode { +enum RotationMode { kRotate0 = 0, // No rotation. kRotate90 = 90, // Rotate 90 degrees clockwise. kRotate180 = 180, // Rotate 180 degrees. kRotate270 = 270, // Rotate 270 degrees clockwise. // Deprecated. kRotateNone = 0, kRotateClockwise = 90, kRotateCounterClockwise = 270, -} RotationModeEnum; +}; // Rotate I420 frame. LIBYUV_API int I420Rotate(const uint8* src_y, int src_stride_y, const uint8* src_u, int src_stride_u, const uint8* src_v, int src_stride_v, uint8* dst_y, int dst_stride_y, uint8* dst_u, int dst_stride_u, uint8* dst_v, int dst_stride_v, - int src_width, int src_height, enum RotationMode mode); + int src_width, int src_height, RotationMode mode); // Rotate NV12 input and store in I420. LIBYUV_API int NV12ToI420Rotate(const uint8* src_y, int src_stride_y, const uint8* src_uv, int src_stride_uv, uint8* dst_y, int dst_stride_y, uint8* dst_u, int dst_stride_u, uint8* dst_v, int dst_stride_v, - int src_width, int src_height, enum RotationMode mode); + int src_width, int src_height, RotationMode mode); -// Rotate a plane by 0, 90, 180, or 270. -LIBYUV_API -int RotatePlane(const uint8* src, int src_stride, - uint8* dst, int dst_stride, - int src_width, int src_height, enum RotationMode mode); - -// Rotate planes by 90, 180, 270. Deprecated. +// Rotate planes by 90, 180, 270 LIBYUV_API void RotatePlane90(const uint8* src, int src_stride, uint8* dst, int dst_stride, int width, int height); LIBYUV_API void RotatePlane180(const uint8* src, int src_stride, uint8* dst, int dst_stride, @@ -76,33 +70,32 @@ LIBYUV_API void RotateUV90(const uint8* src, int src_stride, uint8* dst_a, int dst_stride_a, uint8* dst_b, int dst_stride_b, int width, int height); // Rotations for when U and V are interleaved. // These functions take one input pointer and // split the data into two buffers while -// rotating them. Deprecated. +// rotating them. LIBYUV_API void RotateUV180(const uint8* src, int src_stride, uint8* dst_a, int dst_stride_a, uint8* dst_b, int dst_stride_b, int width, int height); LIBYUV_API void RotateUV270(const uint8* src, int src_stride, uint8* dst_a, int dst_stride_a, uint8* dst_b, int dst_stride_b, int width, int height); // The 90 and 270 functions are based on transposes. // Doing a transpose with reversing the read/write // order will result in a rotation by +- 90 degrees. -// Deprecated. LIBYUV_API void TransposePlane(const uint8* src, int src_stride, uint8* dst, int dst_stride, int width, int height); LIBYUV_API void TransposeUV(const uint8* src, int src_stride, uint8* dst_a, int dst_stride_a,
--- a/media/webrtc/trunk/third_party/libyuv/include/libyuv/rotate_argb.h +++ b/media/webrtc/trunk/third_party/libyuv/include/libyuv/rotate_argb.h @@ -1,15 +1,15 @@ /* * Copyright 2012 The LibYuv Project Authors. All rights reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may + * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef INCLUDE_LIBYUV_ROTATE_ARGB_H_ // NOLINT #define INCLUDE_LIBYUV_ROTATE_ARGB_H_ #include "libyuv/basic_types.h" #include "libyuv/rotate.h" // For RotationMode. @@ -18,16 +18,16 @@ namespace libyuv { extern "C" { #endif // Rotate ARGB frame LIBYUV_API int ARGBRotate(const uint8* src_argb, int src_stride_argb, uint8* dst_argb, int dst_stride_argb, - int src_width, int src_height, enum RotationMode mode); + int src_width, int src_height, RotationMode mode); #ifdef __cplusplus } // extern "C" } // namespace libyuv #endif #endif // INCLUDE_LIBYUV_ROTATE_ARGB_H_ NOLINT
--- a/media/webrtc/trunk/third_party/libyuv/include/libyuv/row.h +++ b/media/webrtc/trunk/third_party/libyuv/include/libyuv/row.h @@ -1,1374 +1,553 @@ /* * Copyright 2011 The LibYuv Project Authors. All rights reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may + * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef INCLUDE_LIBYUV_ROW_H_ // NOLINT #define INCLUDE_LIBYUV_ROW_H_ -#include <stdlib.h> // For malloc. - #include "libyuv/basic_types.h" #ifdef __cplusplus namespace libyuv { extern "C" { #endif +// TODO(fbarchard): Remove kMaxStride +#define kMaxStride (2880 * 4) #define IS_ALIGNED(p, a) (!((uintptr_t)(p) & ((a) - 1))) -#ifdef __cplusplus -#define align_buffer_64(var, size) \ - uint8* var##_mem = reinterpret_cast<uint8*>(malloc((size) + 63)); \ - uint8* var = reinterpret_cast<uint8*> \ - ((reinterpret_cast<intptr_t>(var##_mem) + 63) & ~63) -#else -#define align_buffer_64(var, size) \ - uint8* var##_mem = (uint8*)(malloc((size) + 63)); /* NOLINT */ \ - uint8* var = (uint8*)(((intptr_t)(var##_mem) + 63) & ~63) /* NOLINT */ -#endif - -#define free_aligned_buffer_64(var) \ - free(var##_mem); \ - var = 0 - -#if defined(__pnacl__) || defined(__CLR_VER) || defined(COVERAGE_ENABLED) || \ +#if defined(__CLR_VER) || defined(COVERAGE_ENABLED) || \ defined(TARGET_IPHONE_SIMULATOR) -#define LIBYUV_DISABLE_X86 +#define YUV_DISABLE_ASM #endif // True if compiling for SSSE3 as a requirement. #if defined(__SSSE3__) || (defined(_M_IX86_FP) && (_M_IX86_FP >= 3)) #define LIBYUV_SSSE3_ONLY #endif -// Enable for NaCL pepper 33 for bundle and AVX2 support. -// #define NEW_BINUTILS - // The following are available on all x86 platforms: -#if !defined(LIBYUV_DISABLE_X86) && \ +#if !defined(YUV_DISABLE_ASM) && \ (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) -// Effects: -#define HAS_ARGBADDROW_SSE2 -#define HAS_ARGBAFFINEROW_SSE2 -#define HAS_ARGBATTENUATEROW_SSSE3 -#define HAS_ARGBBLENDROW_SSSE3 -#define HAS_ARGBCOLORMATRIXROW_SSSE3 -#define HAS_ARGBCOLORTABLEROW_X86 -#define HAS_ARGBCOPYALPHAROW_SSE2 -#define HAS_ARGBCOPYYTOALPHAROW_SSE2 -#define HAS_ARGBGRAYROW_SSSE3 -#define HAS_ARGBLUMACOLORTABLEROW_SSSE3 -#define HAS_ARGBMIRRORROW_SSSE3 -#define HAS_ARGBMULTIPLYROW_SSE2 -#define HAS_ARGBPOLYNOMIALROW_SSE2 -#define HAS_ARGBQUANTIZEROW_SSE2 -#define HAS_ARGBSEPIAROW_SSSE3 -#define HAS_ARGBSHADEROW_SSE2 -#define HAS_ARGBSUBTRACTROW_SSE2 -#define HAS_ARGBTOUVROW_SSSE3 -#define HAS_ARGBUNATTENUATEROW_SSE2 -#define HAS_COMPUTECUMULATIVESUMROW_SSE2 -#define HAS_CUMULATIVESUMTOAVERAGEROW_SSE2 -#define HAS_INTERPOLATEROW_SSE2 -#define HAS_INTERPOLATEROW_SSSE3 -#define HAS_RGBCOLORTABLEROW_X86 -#define HAS_SOBELROW_SSE2 -#define HAS_SOBELTOPLANEROW_SSE2 -#define HAS_SOBELXROW_SSE2 -#define HAS_SOBELXYROW_SSE2 -#define HAS_SOBELYROW_SSE2 - -// Conversions: +// Conversions. +#define HAS_ABGRTOARGBROW_SSSE3 #define HAS_ABGRTOUVROW_SSSE3 #define HAS_ABGRTOYROW_SSSE3 #define HAS_ARGB1555TOARGBROW_SSE2 #define HAS_ARGB4444TOARGBROW_SSE2 -#define HAS_ARGBSHUFFLEROW_SSE2 -#define HAS_ARGBSHUFFLEROW_SSSE3 #define HAS_ARGBTOARGB1555ROW_SSE2 #define HAS_ARGBTOARGB4444ROW_SSE2 -#define HAS_ARGBTOBAYERGGROW_SSE2 -#define HAS_ARGBTOBAYERROW_SSSE3 #define HAS_ARGBTORAWROW_SSSE3 #define HAS_ARGBTORGB24ROW_SSSE3 #define HAS_ARGBTORGB565ROW_SSE2 -#define HAS_ARGBTOUV422ROW_SSSE3 -#define HAS_ARGBTOUV444ROW_SSSE3 -#define HAS_ARGBTOUVJROW_SSSE3 -#define HAS_ARGBTOYJROW_SSSE3 +#define HAS_ARGBTORGBAROW_SSSE3 +#define HAS_ARGBTOUVROW_SSSE3 #define HAS_ARGBTOYROW_SSSE3 +#define HAS_BGRATOARGBROW_SSSE3 #define HAS_BGRATOUVROW_SSSE3 #define HAS_BGRATOYROW_SSSE3 -#define HAS_COPYROW_ERMS #define HAS_COPYROW_SSE2 #define HAS_COPYROW_X86 -#define HAS_HALFROW_SSE2 #define HAS_I400TOARGBROW_SSE2 #define HAS_I411TOARGBROW_SSSE3 -#define HAS_I422TOARGB1555ROW_SSSE3 #define HAS_I422TOABGRROW_SSSE3 -#define HAS_I422TOARGB1555ROW_SSSE3 -#define HAS_I422TOARGB4444ROW_SSSE3 #define HAS_I422TOARGBROW_SSSE3 #define HAS_I422TOBGRAROW_SSSE3 -#define HAS_I422TORAWROW_SSSE3 -#define HAS_I422TORGB24ROW_SSSE3 -#define HAS_I422TORGB565ROW_SSSE3 -#define HAS_I422TORGBAROW_SSSE3 -#define HAS_I422TOUYVYROW_SSE2 -#define HAS_I422TOYUY2ROW_SSE2 #define HAS_I444TOARGBROW_SSSE3 -#define HAS_MERGEUVROW_SSE2 -#define HAS_MIRRORROW_SSE2 #define HAS_MIRRORROW_SSSE3 -#define HAS_MIRRORROW_UV_SSSE3 -#define HAS_MIRRORUVROW_SSSE3 +#define HAS_MIRRORROWUV_SSSE3 #define HAS_NV12TOARGBROW_SSSE3 -#define HAS_NV12TORGB565ROW_SSSE3 #define HAS_NV21TOARGBROW_SSSE3 -#define HAS_NV21TORGB565ROW_SSSE3 #define HAS_RAWTOARGBROW_SSSE3 -#define HAS_RAWTOYROW_SSSE3 #define HAS_RGB24TOARGBROW_SSSE3 -#define HAS_RGB24TOYROW_SSSE3 #define HAS_RGB565TOARGBROW_SSE2 -#define HAS_RGBATOUVROW_SSSE3 -#define HAS_RGBATOYROW_SSSE3 -#define HAS_SETROW_X86 -#define HAS_SPLITUVROW_SSE2 -#define HAS_UYVYTOARGBROW_SSSE3 +#define HAS_SPLITUV_SSE2 #define HAS_UYVYTOUV422ROW_SSE2 #define HAS_UYVYTOUVROW_SSE2 #define HAS_UYVYTOYROW_SSE2 #define HAS_YTOARGBROW_SSE2 -#define HAS_YUY2TOARGBROW_SSSE3 #define HAS_YUY2TOUV422ROW_SSE2 #define HAS_YUY2TOUVROW_SSE2 #define HAS_YUY2TOYROW_SSE2 -#endif -// GCC >= 4.7.0 required for AVX2. -#if defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__)) -#if (__GNUC__ > 4) || (__GNUC__ == 4 && (__GNUC_MINOR__ >= 7)) -#define GCC_HAS_AVX2 1 -#endif // GNUC >= 4.7 -#endif // __GNUC__ - -// clang >= 3.4.0 required for AVX2. -#if defined(__clang__) && (defined(__x86_64__) || defined(__i386__)) -#if (__clang_major__ > 3) || (__clang_major__ == 3 && (__clang_minor__ >= 4)) -#define CLANG_HAS_AVX2 1 -#endif // clang >= 3.4 -#endif // __clang__ - -// Visual C 2012 required for AVX2. -#if defined(_M_IX86) && defined(_MSC_VER) && _MSC_VER >= 1700 -#define VISUALC_HAS_AVX2 1 -#endif // VisualStudio >= 2012 - -// The following are available on all x86 platforms, but -// require VS2012, clang 3.4 or gcc 4.7. -// The code supports NaCL but requires a new compiler and validator. -#if !defined(LIBYUV_DISABLE_X86) && (defined(VISUALC_HAS_AVX2) || \ - defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2)) -// Effects: -#define HAS_ARGBPOLYNOMIALROW_AVX2 -#define HAS_ARGBSHUFFLEROW_AVX2 -#define HAS_ARGBCOPYALPHAROW_AVX2 -#define HAS_ARGBCOPYYTOALPHAROW_AVX2 +// Effects +#define HAS_ARGBMIRRORROW_SSSE3 +#define HAS_ARGBAFFINEROW_SSE2 +#define HAS_ARGBATTENUATEROW_SSSE3 +#define HAS_ARGBBLENDROW_SSSE3 +#define HAS_ARGBCOLORMATRIXROW_SSSE3 +#define HAS_ARGBGRAYROW_SSSE3 +#define HAS_ARGBINTERPOLATEROW_SSSE3 +#define HAS_ARGBQUANTIZEROW_SSE2 +#define HAS_ARGBSEPIAROW_SSSE3 +#define HAS_ARGBSHADE_SSE2 +#define HAS_ARGBUNATTENUATEROW_SSE2 +#define HAS_COMPUTECUMULATIVESUMROW_SSE2 +#define HAS_CUMULATIVESUMTOAVERAGE_SSE2 #endif -// The following are require VS2012. -// TODO(fbarchard): Port to gcc. -#if !defined(LIBYUV_DISABLE_X86) && defined(VISUALC_HAS_AVX2) -#define HAS_ARGBTOUVROW_AVX2 -#define HAS_ARGBTOYJROW_AVX2 -#define HAS_ARGBTOYROW_AVX2 -#define HAS_HALFROW_AVX2 -#define HAS_I422TOARGBROW_AVX2 -#define HAS_INTERPOLATEROW_AVX2 -#define HAS_MERGEUVROW_AVX2 -#define HAS_MIRRORROW_AVX2 -#define HAS_SPLITUVROW_AVX2 -#define HAS_UYVYTOUV422ROW_AVX2 -#define HAS_UYVYTOUVROW_AVX2 -#define HAS_UYVYTOYROW_AVX2 -#define HAS_YUY2TOUV422ROW_AVX2 -#define HAS_YUY2TOUVROW_AVX2 -#define HAS_YUY2TOYROW_AVX2 - -// Effects: -#define HAS_ARGBADDROW_AVX2 -#define HAS_ARGBATTENUATEROW_AVX2 -#define HAS_ARGBMIRRORROW_AVX2 -#define HAS_ARGBMULTIPLYROW_AVX2 -#define HAS_ARGBSUBTRACTROW_AVX2 -#define HAS_ARGBUNATTENUATEROW_AVX2 -#endif // defined(VISUALC_HAS_AVX2) - -// The following are Yasm x86 only: -// TODO(fbarchard): Port AVX2 to inline. -#if !defined(LIBYUV_DISABLE_X86) && defined(HAVE_YASM) - (defined(_M_IX86) || defined(_M_X64) || \ - defined(__x86_64__) || defined(__i386__)) -#define HAS_MERGEUVROW_AVX2 -#define HAS_MERGEUVROW_MMX -#define HAS_SPLITUVROW_AVX2 -#define HAS_SPLITUVROW_MMX -#define HAS_UYVYTOYROW_AVX2 -#define HAS_UYVYTOYROW_MMX -#define HAS_YUY2TOYROW_AVX2 -#define HAS_YUY2TOYROW_MMX +// The following are Windows only: +#if !defined(YUV_DISABLE_ASM) && defined(_M_IX86) +#define HAS_ARGBCOLORTABLEROW_X86 +#define HAS_I422TORGBAROW_SSSE3 +#define HAS_ABGRTOARGBROW_SSSE3 +#define HAS_RGBATOARGBROW_SSSE3 +#define HAS_RGBATOUVROW_SSSE3 +#define HAS_RGBATOYROW_SSSE3 #endif // The following are disabled when SSSE3 is available: -#if !defined(LIBYUV_DISABLE_X86) && \ +#if !defined(YUV_DISABLE_ASM) && \ (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) && \ !defined(LIBYUV_SSSE3_ONLY) +#define HAS_MIRRORROW_SSE2 +#define HAS_ARGBATTENUATE_SSE2 #define HAS_ARGBBLENDROW_SSE2 -#define HAS_ARGBATTENUATEROW_SSE2 -#define HAS_MIRRORROW_SSE2 #endif -// The following are available on Neon platforms: -#if !defined(LIBYUV_DISABLE_NEON) && \ - (defined(__ARM_NEON__) || defined(LIBYUV_NEON)) -#define HAS_ABGRTOUVROW_NEON -#define HAS_ABGRTOYROW_NEON -#define HAS_ARGB1555TOARGBROW_NEON -#define HAS_ARGB1555TOUVROW_NEON -#define HAS_ARGB1555TOYROW_NEON -#define HAS_ARGB4444TOARGBROW_NEON -#define HAS_ARGB4444TOUVROW_NEON -#define HAS_ARGB4444TOYROW_NEON -#define HAS_ARGBTOARGB1555ROW_NEON -#define HAS_ARGBTOARGB4444ROW_NEON -#define HAS_ARGBTOBAYERROW_NEON -#define HAS_ARGBTOBAYERGGROW_NEON -#define HAS_ARGBTORAWROW_NEON -#define HAS_ARGBTORGB24ROW_NEON -#define HAS_ARGBTORGB565ROW_NEON -#define HAS_ARGBTOUV411ROW_NEON -#define HAS_ARGBTOUV422ROW_NEON -#define HAS_ARGBTOUV444ROW_NEON -#define HAS_ARGBTOUVROW_NEON -#define HAS_ARGBTOUVJROW_NEON -#define HAS_ARGBTOYROW_NEON -#define HAS_ARGBTOYJROW_NEON -#define HAS_BGRATOUVROW_NEON -#define HAS_BGRATOYROW_NEON +// The following are available on Neon platforms +#if !defined(YUV_DISABLE_ASM) && defined(__ARM_NEON__) +#define HAS_MIRRORROW_NEON +#define HAS_MIRRORROWUV_NEON +#define HAS_SPLITUV_NEON #define HAS_COPYROW_NEON -#define HAS_HALFROW_NEON -#define HAS_I400TOARGBROW_NEON -#define HAS_I411TOARGBROW_NEON -#define HAS_I422TOABGRROW_NEON -#define HAS_I422TOARGB1555ROW_NEON -#define HAS_I422TOARGB4444ROW_NEON #define HAS_I422TOARGBROW_NEON #define HAS_I422TOBGRAROW_NEON -#define HAS_I422TORAWROW_NEON -#define HAS_I422TORGB24ROW_NEON -#define HAS_I422TORGB565ROW_NEON +#define HAS_I422TOABGRROW_NEON #define HAS_I422TORGBAROW_NEON -#define HAS_I422TOUYVYROW_NEON -#define HAS_I422TOYUY2ROW_NEON -#define HAS_I444TOARGBROW_NEON -#define HAS_MERGEUVROW_NEON -#define HAS_MIRRORROW_NEON -#define HAS_MIRRORUVROW_NEON -#define HAS_NV12TOARGBROW_NEON -#define HAS_NV12TORGB565ROW_NEON -#define HAS_NV21TOARGBROW_NEON -#define HAS_NV21TORGB565ROW_NEON +// TODO(fbarchard): Hook these up to calling functions. +#define HAS_ARGBTORGBAROW_NEON +#define HAS_ARGBTORGB24ROW_NEON +#define HAS_ARGBTORAWROW_NEON +#define HAS_ABGRTOARGBROW_NEON +#define HAS_BGRATOARGBROW_NEON +#define HAS_RGBATOARGBROW_NEON #define HAS_RAWTOARGBROW_NEON -#define HAS_RAWTOUVROW_NEON -#define HAS_RAWTOYROW_NEON #define HAS_RGB24TOARGBROW_NEON -#define HAS_RGB24TOUVROW_NEON -#define HAS_RGB24TOYROW_NEON -#define HAS_RGB565TOARGBROW_NEON -#define HAS_RGB565TOUVROW_NEON -#define HAS_RGB565TOYROW_NEON -#define HAS_RGBATOUVROW_NEON -#define HAS_RGBATOYROW_NEON -#define HAS_SETROW_NEON -#define HAS_SPLITUVROW_NEON -#define HAS_UYVYTOARGBROW_NEON +#define HAS_YUY2TOUV422ROW_NEON +#define HAS_YUY2TOUVROW_NEON +#define HAS_YUY2TOYROW_NEON #define HAS_UYVYTOUV422ROW_NEON #define HAS_UYVYTOUVROW_NEON #define HAS_UYVYTOYROW_NEON -#define HAS_YTOARGBROW_NEON -#define HAS_YUY2TOARGBROW_NEON -#define HAS_YUY2TOUV422ROW_NEON -#define HAS_YUY2TOUVROW_NEON -#define HAS_YUY2TOYROW_NEON -// Effects: -#define HAS_ARGBADDROW_NEON -#define HAS_ARGBATTENUATEROW_NEON -#define HAS_ARGBBLENDROW_NEON -#define HAS_ARGBCOLORMATRIXROW_NEON -#define HAS_ARGBGRAYROW_NEON -#define HAS_ARGBMIRRORROW_NEON -#define HAS_ARGBMULTIPLYROW_NEON -#define HAS_ARGBQUANTIZEROW_NEON -#define HAS_ARGBSEPIAROW_NEON -#define HAS_ARGBSHADEROW_NEON -#define HAS_ARGBSUBTRACTROW_NEON -#define HAS_SOBELROW_NEON -#define HAS_SOBELTOPLANEROW_NEON -#define HAS_SOBELXYROW_NEON -#define HAS_SOBELXROW_NEON -#define HAS_SOBELYROW_NEON -#define HAS_INTERPOLATEROW_NEON -#endif - -// The following are available on Mips platforms: -#if !defined(LIBYUV_DISABLE_MIPS) && defined(__mips__) -#define HAS_COPYROW_MIPS -#if defined(__mips_dsp) && (__mips_dsp_rev >= 2) -#define HAS_I422TOABGRROW_MIPS_DSPR2 -#define HAS_I422TOARGBROW_MIPS_DSPR2 -#define HAS_I422TOBGRAROW_MIPS_DSPR2 -#define HAS_INTERPOLATEROWS_MIPS_DSPR2 -#define HAS_MIRRORROW_MIPS_DSPR2 -#define HAS_MIRRORUVROW_MIPS_DSPR2 -#define HAS_SPLITUVROW_MIPS_DSPR2 -#endif #endif #if defined(_MSC_VER) && !defined(__CLR_VER) #define SIMD_ALIGNED(var) __declspec(align(16)) var -typedef __declspec(align(16)) int16 vec16[8]; -typedef __declspec(align(16)) int32 vec32[4]; typedef __declspec(align(16)) int8 vec8[16]; -typedef __declspec(align(16)) uint16 uvec16[8]; -typedef __declspec(align(16)) uint32 uvec32[4]; typedef __declspec(align(16)) uint8 uvec8[16]; -typedef __declspec(align(32)) int16 lvec16[16]; -typedef __declspec(align(32)) int32 lvec32[8]; -typedef __declspec(align(32)) int8 lvec8[32]; -typedef __declspec(align(32)) uint16 ulvec16[16]; -typedef __declspec(align(32)) uint32 ulvec32[8]; -typedef __declspec(align(32)) uint8 ulvec8[32]; - +typedef __declspec(align(16)) int16 vec16[8]; +typedef __declspec(align(16)) uint16 uvec16[8]; +typedef __declspec(align(16)) int32 vec32[4]; +typedef __declspec(align(16)) uint32 uvec32[4]; #elif defined(__GNUC__) -// Caveat GCC 4.2 to 4.7 have a known issue using vectors with const. #define SIMD_ALIGNED(var) var __attribute__((aligned(16))) +typedef int8 __attribute__((vector_size(16))) vec8; +typedef uint8 __attribute__((vector_size(16))) uvec8; typedef int16 __attribute__((vector_size(16))) vec16; +typedef uint16 __attribute__((vector_size(16))) uvec16; typedef int32 __attribute__((vector_size(16))) vec32; -typedef int8 __attribute__((vector_size(16))) vec8; -typedef uint16 __attribute__((vector_size(16))) uvec16; typedef uint32 __attribute__((vector_size(16))) uvec32; -typedef uint8 __attribute__((vector_size(16))) uvec8; #else #define SIMD_ALIGNED(var) var +typedef int8 vec8[16]; +typedef uint8 uvec8[16]; typedef int16 vec16[8]; +typedef uint16 uvec16[8]; typedef int32 vec32[4]; -typedef int8 vec8[16]; -typedef uint16 uvec16[8]; typedef uint32 uvec32[4]; -typedef uint8 uvec8[16]; #endif #if defined(__APPLE__) || defined(__x86_64__) || defined(__llvm__) #define OMITFP #else #define OMITFP __attribute__((optimize("omit-frame-pointer"))) #endif -// NaCL macros for GCC x86 and x64. - -// TODO(nfullagar): When pepper_33 toolchain is distributed, default to -// NEW_BINUTILS and remove all BUNDLEALIGN occurances. -#if defined(__native_client__) -#define LABELALIGN ".p2align 5\n" -#else -#define LABELALIGN ".p2align 2\n" -#endif -#if defined(__native_client__) && defined(__x86_64__) -#if defined(NEW_BINUTILS) -#define BUNDLELOCK ".bundle_lock\n" -#define BUNDLEUNLOCK ".bundle_unlock\n" -#define BUNDLEALIGN "\n" -#else -#define BUNDLELOCK "\n" -#define BUNDLEUNLOCK "\n" -#define BUNDLEALIGN ".p2align 5\n" -#endif -#define MEMACCESS(base) "%%nacl:(%%r15,%q" #base ")" -#define MEMACCESS2(offset, base) "%%nacl:" #offset "(%%r15,%q" #base ")" -#define MEMLEA(offset, base) #offset "(%q" #base ")" -#define MEMLEA3(offset, index, scale) \ - #offset "(,%q" #index "," #scale ")" -#define MEMLEA4(offset, base, index, scale) \ - #offset "(%q" #base ",%q" #index "," #scale ")" -#define MEMMOVESTRING(s, d) "%%nacl:(%q" #s "),%%nacl:(%q" #d "), %%r15" -#define MEMSTORESTRING(reg, d) "%%" #reg ",%%nacl:(%q" #d "), %%r15" -#define MEMOPREG(opcode, offset, base, index, scale, reg) \ - BUNDLELOCK \ - "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \ - #opcode " (%%r15,%%r14),%%" #reg "\n" \ - BUNDLEUNLOCK -#define MEMOPMEM(opcode, reg, offset, base, index, scale) \ - BUNDLELOCK \ - "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \ - #opcode " %%" #reg ",(%%r15,%%r14)\n" \ - BUNDLEUNLOCK -#define MEMOPARG(opcode, offset, base, index, scale, arg) \ - BUNDLELOCK \ - "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \ - #opcode " (%%r15,%%r14),%" #arg "\n" \ - BUNDLEUNLOCK -#else -#define BUNDLEALIGN "\n" -#define MEMACCESS(base) "(%" #base ")" -#define MEMACCESS2(offset, base) #offset "(%" #base ")" -#define MEMLEA(offset, base) #offset "(%" #base ")" -#define MEMLEA3(offset, index, scale) \ - #offset "(,%" #index "," #scale ")" -#define MEMLEA4(offset, base, index, scale) \ - #offset "(%" #base ",%" #index "," #scale ")" -#define MEMMOVESTRING(s, d) -#define MEMSTORESTRING(reg, d) -#define MEMOPREG(opcode, offset, base, index, scale, reg) \ - #opcode " " #offset "(%" #base ",%" #index "," #scale "),%%" #reg "\n" -#define MEMOPMEM(opcode, reg, offset, base, index, scale) \ - #opcode " %%" #reg ","#offset "(%" #base ",%" #index "," #scale ")\n" -#define MEMOPARG(opcode, offset, base, index, scale, arg) \ - #opcode " " #offset "(%" #base ",%" #index "," #scale "),%" #arg "\n" -#endif - -void I444ToARGBRow_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void I422ToARGBRow_NEON(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* rgb_buf, + int width); +void I422ToBGRARow_NEON(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* rgb_buf, int width); -void I422ToARGBRow_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void I411ToARGBRow_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void I422ToBGRARow_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_bgra, - int width); -void I422ToABGRRow_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_abgr, - int width); -void I422ToRGBARow_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgba, +void I422ToABGRRow_NEON(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* rgb_buf, int width); -void I422ToRGB24Row_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgb24, - int width); -void I422ToRAWRow_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_raw, - int width); -void I422ToRGB565Row_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgb565, - int width); -void I422ToARGB1555Row_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb1555, - int width); -void I422ToARGB4444Row_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb4444, - int width); -void NV12ToARGBRow_NEON(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, - int width); -void NV21ToARGBRow_NEON(const uint8* src_y, - const uint8* src_vu, - uint8* dst_argb, - int width); -void NV12ToRGB565Row_NEON(const uint8* src_y, - const uint8* src_uv, - uint8* dst_rgb565, - int width); -void NV21ToRGB565Row_NEON(const uint8* src_y, - const uint8* src_vu, - uint8* dst_rgb565, - int width); -void YUY2ToARGBRow_NEON(const uint8* src_yuy2, - uint8* dst_argb, - int width); -void UYVYToARGBRow_NEON(const uint8* src_uyvy, - uint8* dst_argb, +void I422ToRGBARow_NEON(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* rgb_buf, int width); -void ARGBToYRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix); -void ARGBToYRow_Any_AVX2(const uint8* src_argb, uint8* dst_y, int pix); void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix); -void ARGBToYJRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix); -void ARGBToYJRow_Any_AVX2(const uint8* src_argb, uint8* dst_y, int pix); -void ARGBToYJRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix); -void BGRAToYRow_SSSE3(const uint8* src_bgra, uint8* dst_y, int pix); -void ABGRToYRow_SSSE3(const uint8* src_abgr, uint8* dst_y, int pix); -void RGBAToYRow_SSSE3(const uint8* src_rgba, uint8* dst_y, int pix); -void RGB24ToYRow_SSSE3(const uint8* src_rgb24, uint8* dst_y, int pix); -void RAWToYRow_SSSE3(const uint8* src_raw, uint8* dst_y, int pix); +void BGRAToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix); +void ABGRToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix); +void RGBAToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix); void ARGBToYRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix); -void ARGBToYJRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix); -void BGRAToYRow_Unaligned_SSSE3(const uint8* src_bgra, uint8* dst_y, int pix); -void ABGRToYRow_Unaligned_SSSE3(const uint8* src_abgr, uint8* dst_y, int pix); -void RGBAToYRow_Unaligned_SSSE3(const uint8* src_rgba, uint8* dst_y, int pix); -void RGB24ToYRow_Unaligned_SSSE3(const uint8* src_rgb24, uint8* dst_y, int pix); -void RAWToYRow_Unaligned_SSSE3(const uint8* src_raw, uint8* dst_y, int pix); -void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int pix); -void ARGBToYJRow_NEON(const uint8* src_argb, uint8* dst_y, int pix); -void ARGBToUV444Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, - int pix); -void ARGBToUV422Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, - int pix); -void ARGBToUV411Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, - int pix); -void ARGBToUVRow_NEON(const uint8* src_argb, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int pix); -void ARGBToUVJRow_NEON(const uint8* src_argb, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int pix); -void BGRAToUVRow_NEON(const uint8* src_bgra, int src_stride_bgra, - uint8* dst_u, uint8* dst_v, int pix); -void ABGRToUVRow_NEON(const uint8* src_abgr, int src_stride_abgr, - uint8* dst_u, uint8* dst_v, int pix); -void RGBAToUVRow_NEON(const uint8* src_rgba, int src_stride_rgba, - uint8* dst_u, uint8* dst_v, int pix); -void RGB24ToUVRow_NEON(const uint8* src_rgb24, int src_stride_rgb24, - uint8* dst_u, uint8* dst_v, int pix); -void RAWToUVRow_NEON(const uint8* src_raw, int src_stride_raw, - uint8* dst_u, uint8* dst_v, int pix); -void RGB565ToUVRow_NEON(const uint8* src_rgb565, int src_stride_rgb565, - uint8* dst_u, uint8* dst_v, int pix); -void ARGB1555ToUVRow_NEON(const uint8* src_argb1555, int src_stride_argb1555, - uint8* dst_u, uint8* dst_v, int pix); -void ARGB4444ToUVRow_NEON(const uint8* src_argb4444, int src_stride_argb4444, - uint8* dst_u, uint8* dst_v, int pix); -void BGRAToYRow_NEON(const uint8* src_bgra, uint8* dst_y, int pix); -void ABGRToYRow_NEON(const uint8* src_abgr, uint8* dst_y, int pix); -void RGBAToYRow_NEON(const uint8* src_rgba, uint8* dst_y, int pix); -void RGB24ToYRow_NEON(const uint8* src_rgb24, uint8* dst_y, int pix); -void RAWToYRow_NEON(const uint8* src_raw, uint8* dst_y, int pix); -void RGB565ToYRow_NEON(const uint8* src_rgb565, uint8* dst_y, int pix); -void ARGB1555ToYRow_NEON(const uint8* src_argb1555, uint8* dst_y, int pix); -void ARGB4444ToYRow_NEON(const uint8* src_argb4444, uint8* dst_y, int pix); -void ARGBToYRow_C(const uint8* src_argb, uint8* dst_y, int pix); -void ARGBToYJRow_C(const uint8* src_argb, uint8* dst_y, int pix); -void BGRAToYRow_C(const uint8* src_bgra, uint8* dst_y, int pix); -void ABGRToYRow_C(const uint8* src_abgr, uint8* dst_y, int pix); -void RGBAToYRow_C(const uint8* src_rgba, uint8* dst_y, int pix); -void RGB24ToYRow_C(const uint8* src_rgb24, uint8* dst_y, int pix); -void RAWToYRow_C(const uint8* src_raw, uint8* dst_y, int pix); -void RGB565ToYRow_C(const uint8* src_rgb565, uint8* dst_y, int pix); -void ARGB1555ToYRow_C(const uint8* src_argb1555, uint8* dst_y, int pix); -void ARGB4444ToYRow_C(const uint8* src_argb4444, uint8* dst_y, int pix); -void ARGBToYRow_Any_SSSE3(const uint8* src_argb, uint8* dst_y, int pix); -void ARGBToYJRow_Any_SSSE3(const uint8* src_argb, uint8* dst_y, int pix); -void BGRAToYRow_Any_SSSE3(const uint8* src_bgra, uint8* dst_y, int pix); -void ABGRToYRow_Any_SSSE3(const uint8* src_abgr, uint8* dst_y, int pix); -void RGBAToYRow_Any_SSSE3(const uint8* src_rgba, uint8* dst_y, int pix); -void RGB24ToYRow_Any_SSSE3(const uint8* src_rgb24, uint8* dst_y, int pix); -void RAWToYRow_Any_SSSE3(const uint8* src_raw, uint8* dst_y, int pix); -void ARGBToYRow_Any_NEON(const uint8* src_argb, uint8* dst_y, int pix); -void ARGBToYJRow_Any_NEON(const uint8* src_argb, uint8* dst_y, int pix); -void BGRAToYRow_Any_NEON(const uint8* src_bgra, uint8* dst_y, int pix); -void ABGRToYRow_Any_NEON(const uint8* src_abgr, uint8* dst_y, int pix); -void RGBAToYRow_Any_NEON(const uint8* src_rgba, uint8* dst_y, int pix); -void RGB24ToYRow_Any_NEON(const uint8* src_rgb24, uint8* dst_y, int pix); -void RAWToYRow_Any_NEON(const uint8* src_raw, uint8* dst_y, int pix); -void RGB565ToYRow_Any_NEON(const uint8* src_rgb565, uint8* dst_y, int pix); -void ARGB1555ToYRow_Any_NEON(const uint8* src_argb1555, uint8* dst_y, int pix); -void ARGB4444ToYRow_Any_NEON(const uint8* src_argb4444, uint8* dst_y, int pix); +void BGRAToYRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix); +void ABGRToYRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix); +void RGBAToYRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix); -void ARGBToUVRow_AVX2(const uint8* src_argb, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width); -void ARGBToUVRow_Any_AVX2(const uint8* src_argb, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width); -void ARGBToUVRow_SSSE3(const uint8* src_argb, int src_stride_argb, +void ARGBToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb, uint8* dst_u, uint8* dst_v, int width); -void ARGBToUVJRow_SSSE3(const uint8* src_argb, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width); -void BGRAToUVRow_SSSE3(const uint8* src_bgra, int src_stride_bgra, - uint8* dst_u, uint8* dst_v, int width); -void ABGRToUVRow_SSSE3(const uint8* src_abgr, int src_stride_abgr, +void BGRAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb, uint8* dst_u, uint8* dst_v, int width); -void RGBAToUVRow_SSSE3(const uint8* src_rgba, int src_stride_rgba, +void ABGRToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb, uint8* dst_u, uint8* dst_v, int width); -void ARGBToUVRow_Unaligned_SSSE3(const uint8* src_argb, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width); -void ARGBToUVJRow_Unaligned_SSSE3(const uint8* src_argb, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width); -void BGRAToUVRow_Unaligned_SSSE3(const uint8* src_bgra, int src_stride_bgra, - uint8* dst_u, uint8* dst_v, int width); -void ABGRToUVRow_Unaligned_SSSE3(const uint8* src_abgr, int src_stride_abgr, - uint8* dst_u, uint8* dst_v, int width); -void RGBAToUVRow_Unaligned_SSSE3(const uint8* src_rgba, int src_stride_rgba, +void RGBAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb, + uint8* dst_u, uint8* dst_v, int width); +void ARGBToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb, uint8* dst_u, uint8* dst_v, int width); -void ARGBToUVRow_Any_SSSE3(const uint8* src_argb, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width); -void ARGBToUVJRow_Any_SSSE3(const uint8* src_argb, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width); -void BGRAToUVRow_Any_SSSE3(const uint8* src_bgra, int src_stride_bgra, - uint8* dst_u, uint8* dst_v, int width); -void ABGRToUVRow_Any_SSSE3(const uint8* src_abgr, int src_stride_abgr, - uint8* dst_u, uint8* dst_v, int width); -void RGBAToUVRow_Any_SSSE3(const uint8* src_rgba, int src_stride_rgba, - uint8* dst_u, uint8* dst_v, int width); -void ARGBToUV444Row_Any_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, - int pix); -void ARGBToUV422Row_Any_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, - int pix); -void ARGBToUV411Row_Any_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, - int pix); -void ARGBToUVRow_Any_NEON(const uint8* src_argb, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int pix); -void ARGBToUVJRow_Any_NEON(const uint8* src_argb, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int pix); -void BGRAToUVRow_Any_NEON(const uint8* src_bgra, int src_stride_bgra, - uint8* dst_u, uint8* dst_v, int pix); -void ABGRToUVRow_Any_NEON(const uint8* src_abgr, int src_stride_abgr, - uint8* dst_u, uint8* dst_v, int pix); -void RGBAToUVRow_Any_NEON(const uint8* src_rgba, int src_stride_rgba, - uint8* dst_u, uint8* dst_v, int pix); -void RGB24ToUVRow_Any_NEON(const uint8* src_rgb24, int src_stride_rgb24, - uint8* dst_u, uint8* dst_v, int pix); -void RAWToUVRow_Any_NEON(const uint8* src_raw, int src_stride_raw, - uint8* dst_u, uint8* dst_v, int pix); -void RGB565ToUVRow_Any_NEON(const uint8* src_rgb565, int src_stride_rgb565, - uint8* dst_u, uint8* dst_v, int pix); -void ARGB1555ToUVRow_Any_NEON(const uint8* src_argb1555, - int src_stride_argb1555, - uint8* dst_u, uint8* dst_v, int pix); -void ARGB4444ToUVRow_Any_NEON(const uint8* src_argb4444, - int src_stride_argb4444, - uint8* dst_u, uint8* dst_v, int pix); -void ARGBToUVRow_C(const uint8* src_argb, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width); -void ARGBToUVJRow_C(const uint8* src_argb, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width); -void BGRAToUVRow_C(const uint8* src_bgra, int src_stride_bgra, - uint8* dst_u, uint8* dst_v, int width); -void ABGRToUVRow_C(const uint8* src_abgr, int src_stride_abgr, - uint8* dst_u, uint8* dst_v, int width); -void RGBAToUVRow_C(const uint8* src_rgba, int src_stride_rgba, - uint8* dst_u, uint8* dst_v, int width); -void RGB24ToUVRow_C(const uint8* src_rgb24, int src_stride_rgb24, - uint8* dst_u, uint8* dst_v, int width); -void RAWToUVRow_C(const uint8* src_raw, int src_stride_raw, - uint8* dst_u, uint8* dst_v, int width); -void RGB565ToUVRow_C(const uint8* src_rgb565, int src_stride_rgb565, - uint8* dst_u, uint8* dst_v, int width); -void ARGB1555ToUVRow_C(const uint8* src_argb1555, int src_stride_argb1555, - uint8* dst_u, uint8* dst_v, int width); -void ARGB4444ToUVRow_C(const uint8* src_argb4444, int src_stride_argb4444, - uint8* dst_u, uint8* dst_v, int width); +void BGRAToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb, + uint8* dst_u, uint8* dst_v, int width); +void ABGRToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb, + uint8* dst_u, uint8* dst_v, int width); +void RGBAToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb, + uint8* dst_u, uint8* dst_v, int width); -void ARGBToUV444Row_SSSE3(const uint8* src_argb, - uint8* dst_u, uint8* dst_v, int width); -void ARGBToUV444Row_Unaligned_SSSE3(const uint8* src_argb, - uint8* dst_u, uint8* dst_v, int width); -void ARGBToUV444Row_Any_SSSE3(const uint8* src_argb, - uint8* dst_u, uint8* dst_v, int width); - -void ARGBToUV422Row_SSSE3(const uint8* src_argb, - uint8* dst_u, uint8* dst_v, int width); -void ARGBToUV422Row_Unaligned_SSSE3(const uint8* src_argb, - uint8* dst_u, uint8* dst_v, int width); -void ARGBToUV422Row_Any_SSSE3(const uint8* src_argb, - uint8* dst_u, uint8* dst_v, int width); - -void ARGBToUV444Row_C(const uint8* src_argb, - uint8* dst_u, uint8* dst_v, int width); -void ARGBToUV422Row_C(const uint8* src_argb, - uint8* dst_u, uint8* dst_v, int width); -void ARGBToUV411Row_C(const uint8* src_argb, - uint8* dst_u, uint8* dst_v, int width); - -void MirrorRow_AVX2(const uint8* src, uint8* dst, int width); void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width); void MirrorRow_SSE2(const uint8* src, uint8* dst, int width); void MirrorRow_NEON(const uint8* src, uint8* dst, int width); -void MirrorRow_MIPS_DSPR2(const uint8* src, uint8* dst, int width); void MirrorRow_C(const uint8* src, uint8* dst, int width); -void MirrorUVRow_SSSE3(const uint8* src_uv, uint8* dst_u, uint8* dst_v, - int width); -void MirrorUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, - int width); -void MirrorUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, - int width); -void MirrorUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, - int width); +void MirrorRowUV_SSSE3(const uint8* src, uint8* dst_u, uint8* dst_v, int width); +void MirrorRowUV_NEON(const uint8* src, uint8* dst_u, uint8* dst_v, int width); +void MirrorRowUV_C(const uint8* src, uint8* dst_u, uint8* dst_v, int width); -void ARGBMirrorRow_AVX2(const uint8* src, uint8* dst, int width); void ARGBMirrorRow_SSSE3(const uint8* src, uint8* dst, int width); -void ARGBMirrorRow_NEON(const uint8* src, uint8* dst, int width); void ARGBMirrorRow_C(const uint8* src, uint8* dst, int width); -void SplitUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix); -void SplitUVRow_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix); -void SplitUVRow_AVX2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix); -void SplitUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix); -void SplitUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, - int pix); -void SplitUVRow_Unaligned_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, - int pix); -void SplitUVRow_Unaligned_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, - uint8* dst_v, int pix); -void SplitUVRow_Any_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, - int pix); -void SplitUVRow_Any_AVX2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, - int pix); -void SplitUVRow_Any_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, - int pix); -void SplitUVRow_Any_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, - int pix); - -void MergeUVRow_C(const uint8* src_u, const uint8* src_v, uint8* dst_uv, - int width); -void MergeUVRow_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv, - int width); -void MergeUVRow_AVX2(const uint8* src_u, const uint8* src_v, uint8* dst_uv, - int width); -void MergeUVRow_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv, - int width); -void MergeUVRow_Unaligned_SSE2(const uint8* src_u, const uint8* src_v, - uint8* dst_uv, int width); -void MergeUVRow_Any_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv, - int width); -void MergeUVRow_Any_AVX2(const uint8* src_u, const uint8* src_v, uint8* dst_uv, - int width); -void MergeUVRow_Any_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv, - int width); +void SplitUV_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix); +void SplitUV_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix); +void SplitUV_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix); void CopyRow_SSE2(const uint8* src, uint8* dst, int count); -void CopyRow_ERMS(const uint8* src, uint8* dst, int count); void CopyRow_X86(const uint8* src, uint8* dst, int count); void CopyRow_NEON(const uint8* src, uint8* dst, int count); -void CopyRow_MIPS(const uint8* src, uint8* dst, int count); void CopyRow_C(const uint8* src, uint8* dst, int count); -void ARGBCopyAlphaRow_C(const uint8* src_argb, uint8* dst_argb, int width); -void ARGBCopyAlphaRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width); -void ARGBCopyAlphaRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width); - -void ARGBCopyYToAlphaRow_C(const uint8* src_y, uint8* dst_argb, int width); -void ARGBCopyYToAlphaRow_SSE2(const uint8* src_y, uint8* dst_argb, int width); -void ARGBCopyYToAlphaRow_AVX2(const uint8* src_y, uint8* dst_argb, int width); +void ARGBToYRow_C(const uint8* src_argb, uint8* dst_y, int pix); +void BGRAToYRow_C(const uint8* src_argb, uint8* dst_y, int pix); +void ABGRToYRow_C(const uint8* src_argb, uint8* dst_y, int pix); +void RGBAToYRow_C(const uint8* src_argb, uint8* dst_y, int pix); -void SetRow_X86(uint8* dst, uint32 v32, int count); -void ARGBSetRows_X86(uint8* dst, uint32 v32, int width, - int dst_stride, int height); -void SetRow_NEON(uint8* dst, uint32 v32, int count); -void ARGBSetRows_NEON(uint8* dst, uint32 v32, int width, - int dst_stride, int height); -void SetRow_C(uint8* dst, uint32 v32, int count); -void ARGBSetRows_C(uint8* dst, uint32 v32, int width, int dst_stride, - int height); +void ARGBToUVRow_C(const uint8* src_argb0, int src_stride_argb, + uint8* dst_u, uint8* dst_v, int width); +void BGRAToUVRow_C(const uint8* src_argb0, int src_stride_argb, + uint8* dst_u, uint8* dst_v, int width); +void ABGRToUVRow_C(const uint8* src_argb0, int src_stride_argb, + uint8* dst_u, uint8* dst_v, int width); +void RGBAToUVRow_C(const uint8* src_argb0, int src_stride_argb, + uint8* dst_u, uint8* dst_v, int width); -// ARGBShufflers for BGRAToARGB etc. -void ARGBShuffleRow_C(const uint8* src_argb, uint8* dst_argb, - const uint8* shuffler, int pix); -void ARGBShuffleRow_SSE2(const uint8* src_argb, uint8* dst_argb, - const uint8* shuffler, int pix); -void ARGBShuffleRow_SSSE3(const uint8* src_argb, uint8* dst_argb, - const uint8* shuffler, int pix); -void ARGBShuffleRow_AVX2(const uint8* src_argb, uint8* dst_argb, - const uint8* shuffler, int pix); -void ARGBShuffleRow_NEON(const uint8* src_argb, uint8* dst_argb, - const uint8* shuffler, int pix); -void ARGBShuffleRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_argb, - const uint8* shuffler, int pix); -void ARGBShuffleRow_Any_SSE2(const uint8* src_argb, uint8* dst_argb, - const uint8* shuffler, int pix); -void ARGBShuffleRow_Any_SSSE3(const uint8* src_argb, uint8* dst_argb, - const uint8* shuffler, int pix); -void ARGBShuffleRow_Any_AVX2(const uint8* src_argb, uint8* dst_argb, - const uint8* shuffler, int pix); -void ARGBShuffleRow_Any_NEON(const uint8* src_argb, uint8* dst_argb, - const uint8* shuffler, int pix); +void BGRAToARGBRow_SSSE3(const uint8* src_bgra, uint8* dst_argb, int pix); +void ABGRToARGBRow_SSSE3(const uint8* src_abgr, uint8* dst_argb, int pix); +void RGBAToARGBRow_SSSE3(const uint8* src_rgba, uint8* dst_argb, int pix); +void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix); +void RAWToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix); +void ARGB1555ToARGBRow_SSE2(const uint8* src_argb, uint8* dst_argb, int pix); +void RGB565ToARGBRow_SSE2(const uint8* src_argb, uint8* dst_argb, int pix); +void ARGB4444ToARGBRow_SSE2(const uint8* src_argb, uint8* dst_argb, int pix); -void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix); -void RAWToARGBRow_SSSE3(const uint8* src_raw, uint8* dst_argb, int pix); -void RGB565ToARGBRow_SSE2(const uint8* src_rgb565, uint8* dst_argb, int pix); -void ARGB1555ToARGBRow_SSE2(const uint8* src_argb1555, uint8* dst_argb, - int pix); -void ARGB4444ToARGBRow_SSE2(const uint8* src_argb4444, uint8* dst_argb, - int pix); - +void BGRAToARGBRow_NEON(const uint8* src_bgra, uint8* dst_argb, int pix); +void ABGRToARGBRow_NEON(const uint8* src_abgr, uint8* dst_argb, int pix); +void RGBAToARGBRow_NEON(const uint8* src_rgba, uint8* dst_argb, int pix); void RGB24ToARGBRow_NEON(const uint8* src_rgb24, uint8* dst_argb, int pix); -void RAWToARGBRow_NEON(const uint8* src_raw, uint8* dst_argb, int pix); -void RGB565ToARGBRow_NEON(const uint8* src_rgb565, uint8* dst_argb, int pix); -void ARGB1555ToARGBRow_NEON(const uint8* src_argb1555, uint8* dst_argb, - int pix); -void ARGB4444ToARGBRow_NEON(const uint8* src_argb4444, uint8* dst_argb, - int pix); +void RAWToARGBRow_NEON(const uint8* src_rgb24, uint8* dst_argb, int pix); + +void BGRAToARGBRow_C(const uint8* src_bgra, uint8* dst_argb, int pix); +void ABGRToARGBRow_C(const uint8* src_abgr, uint8* dst_argb, int pix); +void RGBAToARGBRow_C(const uint8* src_rgba, uint8* dst_argb, int pix); void RGB24ToARGBRow_C(const uint8* src_rgb24, uint8* dst_argb, int pix); -void RAWToARGBRow_C(const uint8* src_raw, uint8* dst_argb, int pix); +void RAWToARGBRow_C(const uint8* src_rgb24, uint8* dst_argb, int pix); void RGB565ToARGBRow_C(const uint8* src_rgb, uint8* dst_argb, int pix); void ARGB1555ToARGBRow_C(const uint8* src_argb, uint8* dst_argb, int pix); void ARGB4444ToARGBRow_C(const uint8* src_argb, uint8* dst_argb, int pix); -void RGB24ToARGBRow_Any_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix); -void RAWToARGBRow_Any_SSSE3(const uint8* src_raw, uint8* dst_argb, int pix); -void RGB565ToARGBRow_Any_SSE2(const uint8* src_rgb565, uint8* dst_argb, - int pix); -void ARGB1555ToARGBRow_Any_SSE2(const uint8* src_argb1555, uint8* dst_argb, - int pix); -void ARGB4444ToARGBRow_Any_SSE2(const uint8* src_argb4444, uint8* dst_argb, - int pix); -void RGB24ToARGBRow_Any_NEON(const uint8* src_rgb24, uint8* dst_argb, int pix); -void RAWToARGBRow_Any_NEON(const uint8* src_raw, uint8* dst_argb, int pix); -void RGB565ToARGBRow_Any_NEON(const uint8* src_rgb565, uint8* dst_argb, - int pix); -void ARGB1555ToARGBRow_Any_NEON(const uint8* src_argb1555, uint8* dst_argb, - int pix); -void ARGB4444ToARGBRow_Any_NEON(const uint8* src_argb4444, uint8* dst_argb, - int pix); +void ARGBToRGBARow_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix); void ARGBToRGB24Row_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix); void ARGBToRAWRow_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix); void ARGBToRGB565Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix); void ARGBToARGB1555Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix); void ARGBToARGB4444Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix); +void ARGBToRGBARow_NEON(const uint8* src_argb, uint8* dst_rgb, int pix); void ARGBToRGB24Row_NEON(const uint8* src_argb, uint8* dst_rgb, int pix); void ARGBToRAWRow_NEON(const uint8* src_argb, uint8* dst_rgb, int pix); -void ARGBToRGB565Row_NEON(const uint8* src_argb, uint8* dst_rgb, int pix); -void ARGBToARGB1555Row_NEON(const uint8* src_argb, uint8* dst_rgb, int pix); -void ARGBToARGB4444Row_NEON(const uint8* src_argb, uint8* dst_rgb, int pix); void ARGBToRGBARow_C(const uint8* src_argb, uint8* dst_rgb, int pix); void ARGBToRGB24Row_C(const uint8* src_argb, uint8* dst_rgb, int pix); void ARGBToRAWRow_C(const uint8* src_argb, uint8* dst_rgb, int pix); void ARGBToRGB565Row_C(const uint8* src_argb, uint8* dst_rgb, int pix); void ARGBToARGB1555Row_C(const uint8* src_argb, uint8* dst_rgb, int pix); void ARGBToARGB4444Row_C(const uint8* src_argb, uint8* dst_rgb, int pix); void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix); -void I400ToARGBRow_Unaligned_SSE2(const uint8* src_y, uint8* dst_argb, int pix); -void I400ToARGBRow_NEON(const uint8* src_y, uint8* dst_argb, int pix); void I400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int pix); -void I400ToARGBRow_Any_SSE2(const uint8* src_y, uint8* dst_argb, int pix); -void I400ToARGBRow_Any_NEON(const uint8* src_y, uint8* dst_argb, int pix); -void I444ToARGBRow_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void I422ToARGBRow_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void I411ToARGBRow_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void NV12ToARGBRow_C(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, +void I444ToARGBRow_C(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* argb_buf, int width); -void NV21ToRGB565Row_C(const uint8* src_y, - const uint8* src_vu, - uint8* dst_argb, - int width); -void NV12ToRGB565Row_C(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, - int width); -void NV21ToARGBRow_C(const uint8* src_y, - const uint8* src_vu, - uint8* dst_argb, - int width); -void YUY2ToARGBRow_C(const uint8* src_yuy2, - uint8* dst_argb, + +void I422ToARGBRow_C(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* argb_buf, int width); -void UYVYToARGBRow_C(const uint8* src_uyvy, - uint8* dst_argb, - int width); -void I422ToBGRARow_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_bgra, + +void I411ToARGBRow_C(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* rgb_buf, int width); -void I422ToABGRRow_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_abgr, - int width); -void I422ToRGBARow_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgba, + +void NV12ToARGBRow_C(const uint8* y_buf, + const uint8* uv_buf, + uint8* argb_buf, int width); -void I422ToRGB24Row_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgb24, - int width); -void I422ToRAWRow_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_raw, - int width); -void I422ToARGB4444Row_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb4444, - int width); -void I422ToARGB1555Row_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb4444, - int width); -void I422ToRGB565Row_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgb565, - int width); -void YToARGBRow_C(const uint8* src_y, - uint8* dst_argb, + +void NV21ToARGBRow_C(const uint8* y_buf, + const uint8* vu_buf, + uint8* argb_buf, + int width); + +void I422ToBGRARow_C(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* bgra_buf, + int width); + +void I422ToABGRRow_C(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* abgr_buf, + int width); + +void I422ToRGBARow_C(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* rgba_buf, + int width); + +void YToARGBRow_C(const uint8* y_buf, + uint8* rgb_buf, int width); -void I422ToARGBRow_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void I444ToARGBRow_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void I422ToARGBRow_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void I411ToARGBRow_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void NV12ToARGBRow_SSSE3(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, - int width); -void NV21ToARGBRow_SSSE3(const uint8* src_y, - const uint8* src_vu, - uint8* dst_argb, - int width); -void NV12ToRGB565Row_SSSE3(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, - int width); -void NV21ToRGB565Row_SSSE3(const uint8* src_y, - const uint8* src_vu, - uint8* dst_argb, - int width); -void YUY2ToARGBRow_SSSE3(const uint8* src_yuy2, - uint8* dst_argb, - int width); -void UYVYToARGBRow_SSSE3(const uint8* src_uyvy, - uint8* dst_argb, - int width); -void I422ToBGRARow_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_bgra, - int width); -void I422ToABGRRow_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_abgr, - int width); -void I422ToRGBARow_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgba, + +void I444ToARGBRow_SSSE3(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* argb_buf, int width); -void I422ToARGB4444Row_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void I422ToARGB1555Row_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void I422ToRGB565Row_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -// RGB24/RAW are unaligned. -void I422ToRGB24Row_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgb24, - int width); -void I422ToRAWRow_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_raw, - int width); + +void I422ToARGBRow_SSSE3(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* argb_buf, + int width); + +void I411ToARGBRow_SSSE3(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* rgb_buf, + int width); + +void NV12ToARGBRow_SSSE3(const uint8* y_buf, + const uint8* uv_buf, + uint8* argb_buf, + int width); -void I444ToARGBRow_Unaligned_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void I422ToARGBRow_Unaligned_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void I411ToARGBRow_Unaligned_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void NV12ToARGBRow_Unaligned_SSSE3(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, - int width); -void NV21ToARGBRow_Unaligned_SSSE3(const uint8* src_y, - const uint8* src_vu, - uint8* dst_argb, - int width); -void YUY2ToARGBRow_Unaligned_SSSE3(const uint8* src_yuy2, - uint8* dst_argb, - int width); -void UYVYToARGBRow_Unaligned_SSSE3(const uint8* src_uyvy, - uint8* dst_argb, - int width); -void I422ToBGRARow_Unaligned_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_bgra, - int width); -void I422ToABGRRow_Unaligned_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_abgr, - int width); -void I422ToRGBARow_Unaligned_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgba, - int width); -void I422ToARGBRow_Any_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void I444ToARGBRow_Any_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void I422ToARGBRow_Any_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void I411ToARGBRow_Any_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void NV12ToARGBRow_Any_SSSE3(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, - int width); -void NV21ToARGBRow_Any_SSSE3(const uint8* src_y, - const uint8* src_vu, - uint8* dst_argb, - int width); -void NV12ToRGB565Row_Any_SSSE3(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, - int width); -void NV21ToRGB565Row_Any_SSSE3(const uint8* src_y, - const uint8* src_vu, - uint8* dst_argb, - int width); -void YUY2ToARGBRow_Any_SSSE3(const uint8* src_yuy2, - uint8* dst_argb, - int width); -void UYVYToARGBRow_Any_SSSE3(const uint8* src_uyvy, - uint8* dst_argb, - int width); -void I422ToBGRARow_Any_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_bgra, - int width); -void I422ToABGRRow_Any_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_abgr, - int width); -void I422ToRGBARow_Any_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgba, - int width); -void I422ToARGB4444Row_Any_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgba, - int width); -void I422ToARGB1555Row_Any_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgba, - int width); -void I422ToRGB565Row_Any_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgba, - int width); -// RGB24/RAW are unaligned. -void I422ToRGB24Row_Any_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void I422ToRAWRow_Any_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void YToARGBRow_SSE2(const uint8* src_y, - uint8* dst_argb, - int width); -void YToARGBRow_NEON(const uint8* src_y, - uint8* dst_argb, - int width); -void YToARGBRow_Any_SSE2(const uint8* src_y, - uint8* dst_argb, +void NV21ToARGBRow_SSSE3(const uint8* y_buf, + const uint8* vu_buf, + uint8* argb_buf, + int width); + +void I422ToBGRARow_SSSE3(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* bgra_buf, int width); -void YToARGBRow_Any_NEON(const uint8* src_y, - uint8* dst_argb, + +void I422ToABGRRow_SSSE3(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* abgr_buf, + int width); + +void I422ToRGBARow_SSSE3(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* rgba_buf, int width); -// ARGB preattenuated alpha blend. -void ARGBBlendRow_SSSE3(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); -void ARGBBlendRow_SSE2(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); -void ARGBBlendRow_NEON(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); -void ARGBBlendRow_C(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); +void I444ToARGBRow_Unaligned_SSSE3(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* argb_buf, + int width); + +void I422ToARGBRow_Unaligned_SSSE3(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* argb_buf, + int width); + +void I411ToARGBRow_Unaligned_SSSE3(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* rgb_buf, + int width); + +void NV12ToARGBRow_Unaligned_SSSE3(const uint8* y_buf, + const uint8* uv_buf, + uint8* argb_buf, + int width); -// ARGB multiply images. Same API as Blend, but these require -// pointer and width alignment for SSE2. -void ARGBMultiplyRow_C(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); -void ARGBMultiplyRow_SSE2(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); -void ARGBMultiplyRow_Any_SSE2(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); -void ARGBMultiplyRow_AVX2(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); -void ARGBMultiplyRow_Any_AVX2(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); -void ARGBMultiplyRow_NEON(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); -void ARGBMultiplyRow_Any_NEON(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); +void NV21ToARGBRow_Unaligned_SSSE3(const uint8* y_buf, + const uint8* vu_buf, + uint8* argb_buf, + int width); + +void I422ToBGRARow_Unaligned_SSSE3(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* bgra_buf, + int width); + +void I422ToABGRRow_Unaligned_SSSE3(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* abgr_buf, + int width); + +void I422ToRGBARow_Unaligned_SSSE3(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* rgba_buf, + int width); + +void I444ToARGBRow_Any_SSSE3(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* argb_buf, + int width); -// ARGB add images. -void ARGBAddRow_C(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); -void ARGBAddRow_SSE2(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); -void ARGBAddRow_Any_SSE2(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); -void ARGBAddRow_AVX2(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); -void ARGBAddRow_Any_AVX2(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); -void ARGBAddRow_NEON(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); -void ARGBAddRow_Any_NEON(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); +void I422ToARGBRow_Any_SSSE3(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* argb_buf, + int width); + +void I411ToARGBRow_Any_SSSE3(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* rgb_buf, + int width); + +void NV12ToARGBRow_Any_SSSE3(const uint8* y_buf, + const uint8* uv_buf, + uint8* argb_buf, + int width); + +void NV21ToARGBRow_Any_SSSE3(const uint8* y_buf, + const uint8* vu_buf, + uint8* argb_buf, + int width); -// ARGB subtract images. Same API as Blend, but these require -// pointer and width alignment for SSE2. -void ARGBSubtractRow_C(const uint8* src_argb, const uint8* src_argb1, +void I422ToBGRARow_Any_SSSE3(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* bgra_buf, + int width); + +void I422ToABGRRow_Any_SSSE3(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* abgr_buf, + int width); + +void I422ToRGBARow_Any_SSSE3(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* rgba_buf, + int width); + +void YToARGBRow_SSE2(const uint8* y_buf, + uint8* argb_buf, + int width); + +// ARGB preattenuated alpha blend. +void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1, + uint8* dst_argb, int width); +void ARGBBlendRow_SSE2(const uint8* src_argb0, const uint8* src_argb1, uint8* dst_argb, int width); -void ARGBSubtractRow_SSE2(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); -void ARGBSubtractRow_Any_SSE2(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); -void ARGBSubtractRow_AVX2(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); -void ARGBSubtractRow_Any_AVX2(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); -void ARGBSubtractRow_NEON(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); -void ARGBSubtractRow_Any_NEON(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); +void ARGBBlendRow_C(const uint8* src_argb0, const uint8* src_argb1, + uint8* dst_argb, int width); void ARGBToRGB24Row_Any_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix); void ARGBToRAWRow_Any_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix); void ARGBToRGB565Row_Any_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix); void ARGBToARGB1555Row_Any_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix); void ARGBToARGB4444Row_Any_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix); void ARGBToRGB24Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix); void ARGBToRAWRow_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix); -void ARGBToRGB565Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix); -void ARGBToARGB1555Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix); -void ARGBToARGB4444Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix); -void I444ToARGBRow_Any_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void I422ToARGBRow_Any_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void I411ToARGBRow_Any_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void I422ToBGRARow_Any_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void I422ToABGRRow_Any_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void I422ToRGBARow_Any_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, +void ARGBToYRow_Any_SSSE3(const uint8* src_argb, uint8* dst_y, int pix); +void BGRAToYRow_Any_SSSE3(const uint8* src_argb, uint8* dst_y, int pix); +void ABGRToYRow_Any_SSSE3(const uint8* src_argb, uint8* dst_y, int pix); +void RGBAToYRow_Any_SSSE3(const uint8* src_argb, uint8* dst_y, int pix); +void ARGBToUVRow_Any_SSSE3(const uint8* src_argb0, int src_stride_argb, + uint8* dst_u, uint8* dst_v, int width); +void BGRAToUVRow_Any_SSSE3(const uint8* src_argb0, int src_stride_argb, + uint8* dst_u, uint8* dst_v, int width); +void ABGRToUVRow_Any_SSSE3(const uint8* src_argb0, int src_stride_argb, + uint8* dst_u, uint8* dst_v, int width); +void RGBAToUVRow_Any_SSSE3(const uint8* src_argb0, int src_stride_argb, + uint8* dst_u, uint8* dst_v, int width); + +void I422ToARGBRow_Any_NEON(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* rgb_buf, int width); -void I422ToRGB24Row_Any_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void I422ToRAWRow_Any_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void I422ToARGB4444Row_Any_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void I422ToARGB1555Row_Any_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void I422ToRGB565Row_Any_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void NV12ToARGBRow_Any_NEON(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, - int width); -void NV21ToARGBRow_Any_NEON(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, + +void I422ToBGRARow_Any_NEON(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* rgb_buf, int width); -void NV12ToRGB565Row_Any_NEON(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, - int width); -void NV21ToRGB565Row_Any_NEON(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, - int width); -void YUY2ToARGBRow_Any_NEON(const uint8* src_yuy2, - uint8* dst_argb, - int width); -void UYVYToARGBRow_Any_NEON(const uint8* src_uyvy, - uint8* dst_argb, + +void I422ToABGRRow_Any_NEON(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* rgb_buf, int width); -void I422ToARGBRow_MIPS_DSPR2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void I422ToBGRARow_MIPS_DSPR2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void I422ToABGRRow_MIPS_DSPR2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void I422ToARGBRow_MIPS_DSPR2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void I422ToBGRARow_MIPS_DSPR2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void I422ToABGRRow_MIPS_DSPR2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void YUY2ToYRow_AVX2(const uint8* src_yuy2, uint8* dst_y, int pix); -void YUY2ToUVRow_AVX2(const uint8* src_yuy2, int stride_yuy2, - uint8* dst_u, uint8* dst_v, int pix); -void YUY2ToUV422Row_AVX2(const uint8* src_yuy2, - uint8* dst_u, uint8* dst_v, int pix); +void I422ToRGBARow_Any_NEON(const uint8* y_buf, + const uint8* u_buf, + const uint8* v_buf, + uint8* rgb_buf, + int width); + void YUY2ToYRow_SSE2(const uint8* src_yuy2, uint8* dst_y, int pix); void YUY2ToUVRow_SSE2(const uint8* src_yuy2, int stride_yuy2, uint8* dst_u, uint8* dst_v, int pix); void YUY2ToUV422Row_SSE2(const uint8* src_yuy2, uint8* dst_u, uint8* dst_v, int pix); void YUY2ToYRow_Unaligned_SSE2(const uint8* src_yuy2, uint8* dst_y, int pix); void YUY2ToUVRow_Unaligned_SSE2(const uint8* src_yuy2, int stride_yuy2, @@ -1380,315 +559,117 @@ void YUY2ToUVRow_NEON(const uint8* src_y uint8* dst_u, uint8* dst_v, int pix); void YUY2ToUV422Row_NEON(const uint8* src_yuy2, uint8* dst_u, uint8* dst_v, int pix); void YUY2ToYRow_C(const uint8* src_yuy2, uint8* dst_y, int pix); void YUY2ToUVRow_C(const uint8* src_yuy2, int stride_yuy2, uint8* dst_u, uint8* dst_v, int pix); void YUY2ToUV422Row_C(const uint8* src_yuy2, uint8* dst_u, uint8* dst_v, int pix); -void YUY2ToYRow_Any_AVX2(const uint8* src_yuy2, uint8* dst_y, int pix); -void YUY2ToUVRow_Any_AVX2(const uint8* src_yuy2, int stride_yuy2, - uint8* dst_u, uint8* dst_v, int pix); -void YUY2ToUV422Row_Any_AVX2(const uint8* src_yuy2, - uint8* dst_u, uint8* dst_v, int pix); void YUY2ToYRow_Any_SSE2(const uint8* src_yuy2, uint8* dst_y, int pix); void YUY2ToUVRow_Any_SSE2(const uint8* src_yuy2, int stride_yuy2, uint8* dst_u, uint8* dst_v, int pix); void YUY2ToUV422Row_Any_SSE2(const uint8* src_yuy2, uint8* dst_u, uint8* dst_v, int pix); void YUY2ToYRow_Any_NEON(const uint8* src_yuy2, uint8* dst_y, int pix); void YUY2ToUVRow_Any_NEON(const uint8* src_yuy2, int stride_yuy2, uint8* dst_u, uint8* dst_v, int pix); void YUY2ToUV422Row_Any_NEON(const uint8* src_yuy2, uint8* dst_u, uint8* dst_v, int pix); -void UYVYToYRow_AVX2(const uint8* src_uyvy, uint8* dst_y, int pix); -void UYVYToUVRow_AVX2(const uint8* src_uyvy, int stride_uyvy, - uint8* dst_u, uint8* dst_v, int pix); -void UYVYToUV422Row_AVX2(const uint8* src_uyvy, - uint8* dst_u, uint8* dst_v, int pix); + void UYVYToYRow_SSE2(const uint8* src_uyvy, uint8* dst_y, int pix); void UYVYToUVRow_SSE2(const uint8* src_uyvy, int stride_uyvy, uint8* dst_u, uint8* dst_v, int pix); void UYVYToUV422Row_SSE2(const uint8* src_uyvy, uint8* dst_u, uint8* dst_v, int pix); void UYVYToYRow_Unaligned_SSE2(const uint8* src_uyvy, uint8* dst_y, int pix); void UYVYToUVRow_Unaligned_SSE2(const uint8* src_uyvy, int stride_uyvy, uint8* dst_u, uint8* dst_v, int pix); void UYVYToUV422Row_Unaligned_SSE2(const uint8* src_uyvy, uint8* dst_u, uint8* dst_v, int pix); -void UYVYToYRow_AVX2(const uint8* src_uyvy, uint8* dst_y, int pix); -void UYVYToUVRow_AVX2(const uint8* src_uyvy, int stride_uyvy, - uint8* dst_u, uint8* dst_v, int pix); -void UYVYToUV422Row_AVX2(const uint8* src_uyvy, - uint8* dst_u, uint8* dst_v, int pix); void UYVYToYRow_NEON(const uint8* src_uyvy, uint8* dst_y, int pix); void UYVYToUVRow_NEON(const uint8* src_uyvy, int stride_uyvy, uint8* dst_u, uint8* dst_v, int pix); void UYVYToUV422Row_NEON(const uint8* src_uyvy, uint8* dst_u, uint8* dst_v, int pix); void UYVYToYRow_C(const uint8* src_uyvy, uint8* dst_y, int pix); void UYVYToUVRow_C(const uint8* src_uyvy, int stride_uyvy, uint8* dst_u, uint8* dst_v, int pix); void UYVYToUV422Row_C(const uint8* src_uyvy, uint8* dst_u, uint8* dst_v, int pix); -void UYVYToYRow_Any_AVX2(const uint8* src_uyvy, uint8* dst_y, int pix); -void UYVYToUVRow_Any_AVX2(const uint8* src_uyvy, int stride_uyvy, - uint8* dst_u, uint8* dst_v, int pix); -void UYVYToUV422Row_Any_AVX2(const uint8* src_uyvy, - uint8* dst_u, uint8* dst_v, int pix); void UYVYToYRow_Any_SSE2(const uint8* src_uyvy, uint8* dst_y, int pix); void UYVYToUVRow_Any_SSE2(const uint8* src_uyvy, int stride_uyvy, uint8* dst_u, uint8* dst_v, int pix); void UYVYToUV422Row_Any_SSE2(const uint8* src_uyvy, uint8* dst_u, uint8* dst_v, int pix); void UYVYToYRow_Any_NEON(const uint8* src_uyvy, uint8* dst_y, int pix); void UYVYToUVRow_Any_NEON(const uint8* src_uyvy, int stride_uyvy, uint8* dst_u, uint8* dst_v, int pix); void UYVYToUV422Row_Any_NEON(const uint8* src_uyvy, uint8* dst_u, uint8* dst_v, int pix); -void HalfRow_C(const uint8* src_uv, int src_uv_stride, - uint8* dst_uv, int pix); -void HalfRow_SSE2(const uint8* src_uv, int src_uv_stride, - uint8* dst_uv, int pix); -void HalfRow_AVX2(const uint8* src_uv, int src_uv_stride, - uint8* dst_uv, int pix); -void HalfRow_NEON(const uint8* src_uv, int src_uv_stride, - uint8* dst_uv, int pix); - -void ARGBToBayerRow_C(const uint8* src_argb, uint8* dst_bayer, - uint32 selector, int pix); -void ARGBToBayerRow_SSSE3(const uint8* src_argb, uint8* dst_bayer, - uint32 selector, int pix); -void ARGBToBayerRow_NEON(const uint8* src_argb, uint8* dst_bayer, - uint32 selector, int pix); -void ARGBToBayerRow_Any_SSSE3(const uint8* src_argb, uint8* dst_bayer, - uint32 selector, int pix); -void ARGBToBayerRow_Any_NEON(const uint8* src_argb, uint8* dst_bayer, - uint32 selector, int pix); -void ARGBToBayerGGRow_C(const uint8* src_argb, uint8* dst_bayer, - uint32 /* selector */, int pix); -void ARGBToBayerGGRow_SSE2(const uint8* src_argb, uint8* dst_bayer, - uint32 /* selector */, int pix); -void ARGBToBayerGGRow_NEON(const uint8* src_argb, uint8* dst_bayer, - uint32 /* selector */, int pix); -void ARGBToBayerGGRow_Any_SSE2(const uint8* src_argb, uint8* dst_bayer, - uint32 /* selector */, int pix); -void ARGBToBayerGGRow_Any_NEON(const uint8* src_argb, uint8* dst_bayer, - uint32 /* selector */, int pix); - -void I422ToYUY2Row_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_yuy2, int width); -void I422ToUYVYRow_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_uyvy, int width); -void I422ToYUY2Row_SSE2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_yuy2, int width); -void I422ToUYVYRow_SSE2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_uyvy, int width); -void I422ToYUY2Row_Any_SSE2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_yuy2, int width); -void I422ToUYVYRow_Any_SSE2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_uyvy, int width); -void I422ToYUY2Row_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_yuy2, int width); -void I422ToUYVYRow_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_uyvy, int width); -void I422ToYUY2Row_Any_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_yuy2, int width); -void I422ToUYVYRow_Any_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_uyvy, int width); - -// Effects related row functions. void ARGBAttenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width); void ARGBAttenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width); void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width); -void ARGBAttenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width); -void ARGBAttenuateRow_NEON(const uint8* src_argb, uint8* dst_argb, int width); -void ARGBAttenuateRow_Any_SSE2(const uint8* src_argb, uint8* dst_argb, - int width); -void ARGBAttenuateRow_Any_SSSE3(const uint8* src_argb, uint8* dst_argb, - int width); -void ARGBAttenuateRow_Any_AVX2(const uint8* src_argb, uint8* dst_argb, - int width); -void ARGBAttenuateRow_Any_NEON(const uint8* src_argb, uint8* dst_argb, - int width); // Inverse table for unattenuate, shared by C and SSE2. -extern const uint32 fixed_invtbl8[256]; +extern uint32 fixed_invtbl8[256]; void ARGBUnattenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width); void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width); -void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width); -void ARGBUnattenuateRow_Any_SSE2(const uint8* src_argb, uint8* dst_argb, - int width); -void ARGBUnattenuateRow_Any_AVX2(const uint8* src_argb, uint8* dst_argb, - int width); void ARGBGrayRow_C(const uint8* src_argb, uint8* dst_argb, int width); void ARGBGrayRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width); -void ARGBGrayRow_NEON(const uint8* src_argb, uint8* dst_argb, int width); void ARGBSepiaRow_C(uint8* dst_argb, int width); void ARGBSepiaRow_SSSE3(uint8* dst_argb, int width); -void ARGBSepiaRow_NEON(uint8* dst_argb, int width); -void ARGBColorMatrixRow_C(const uint8* src_argb, uint8* dst_argb, - const int8* matrix_argb, int width); -void ARGBColorMatrixRow_SSSE3(const uint8* src_argb, uint8* dst_argb, - const int8* matrix_argb, int width); -void ARGBColorMatrixRow_NEON(const uint8* src_argb, uint8* dst_argb, - const int8* matrix_argb, int width); +void ARGBColorMatrixRow_C(uint8* dst_argb, const int8* matrix_argb, int width); +void ARGBColorMatrixRow_SSSE3(uint8* dst_argb, const int8* matrix_argb, + int width); void ARGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width); void ARGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, int width); -void RGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width); -void RGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, int width); - void ARGBQuantizeRow_C(uint8* dst_argb, int scale, int interval_size, int interval_offset, int width); void ARGBQuantizeRow_SSE2(uint8* dst_argb, int scale, int interval_size, int interval_offset, int width); -void ARGBQuantizeRow_NEON(uint8* dst_argb, int scale, int interval_size, - int interval_offset, int width); + +// Used for blur. +void CumulativeSumToAverage_SSE2(const int32* topleft, const int32* botleft, + int width, int area, uint8* dst, int count); +void ComputeCumulativeSumRow_SSE2(const uint8* row, int32* cumsum, + const int32* previous_cumsum, int width); + +void CumulativeSumToAverage_C(const int32* topleft, const int32* botleft, + int width, int area, uint8* dst, int count); +void ComputeCumulativeSumRow_C(const uint8* row, int32* cumsum, + const int32* previous_cumsum, int width); void ARGBShadeRow_C(const uint8* src_argb, uint8* dst_argb, int width, uint32 value); void ARGBShadeRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width, uint32 value); -void ARGBShadeRow_NEON(const uint8* src_argb, uint8* dst_argb, int width, - uint32 value); - -// Used for blur. -void CumulativeSumToAverageRow_SSE2(const int32* topleft, const int32* botleft, - int width, int area, uint8* dst, int count); -void ComputeCumulativeSumRow_SSE2(const uint8* row, int32* cumsum, - const int32* previous_cumsum, int width); - -void CumulativeSumToAverageRow_C(const int32* topleft, const int32* botleft, - int width, int area, uint8* dst, int count); -void ComputeCumulativeSumRow_C(const uint8* row, int32* cumsum, - const int32* previous_cumsum, int width); LIBYUV_API void ARGBAffineRow_C(const uint8* src_argb, int src_argb_stride, uint8* dst_argb, const float* uv_dudv, int width); LIBYUV_API void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride, uint8* dst_argb, const float* uv_dudv, int width); -// Used for I420Scale, ARGBScale, and ARGBInterpolate. -void InterpolateRow_C(uint8* dst_ptr, const uint8* src_ptr, - ptrdiff_t src_stride_ptr, - int width, int source_y_fraction); -void InterpolateRow_SSE2(uint8* dst_ptr, const uint8* src_ptr, - ptrdiff_t src_stride_ptr, int width, - int source_y_fraction); -void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr, - ptrdiff_t src_stride_ptr, int width, - int source_y_fraction); -void InterpolateRow_AVX2(uint8* dst_ptr, const uint8* src_ptr, - ptrdiff_t src_stride_ptr, int width, - int source_y_fraction); -void InterpolateRow_NEON(uint8* dst_ptr, const uint8* src_ptr, - ptrdiff_t src_stride_ptr, int width, - int source_y_fraction); -void InterpolateRows_MIPS_DSPR2(uint8* dst_ptr, const uint8* src_ptr, - ptrdiff_t src_stride_ptr, int width, - int source_y_fraction); -void InterpolateRow_Unaligned_SSE2(uint8* dst_ptr, const uint8* src_ptr, - ptrdiff_t src_stride_ptr, int width, - int source_y_fraction); -void InterpolateRow_Unaligned_SSSE3(uint8* dst_ptr, const uint8* src_ptr, - ptrdiff_t src_stride_ptr, int width, - int source_y_fraction); -void InterpolateRow_Any_NEON(uint8* dst_ptr, const uint8* src_ptr, - ptrdiff_t src_stride_ptr, int width, - int source_y_fraction); -void InterpolateRow_Any_SSE2(uint8* dst_ptr, const uint8* src_ptr, - ptrdiff_t src_stride_ptr, int width, - int source_y_fraction); -void InterpolateRow_Any_SSSE3(uint8* dst_ptr, const uint8* src_ptr, - ptrdiff_t src_stride_ptr, int width, +void ARGBInterpolateRow_C(uint8* dst_ptr, const uint8* src_ptr, + ptrdiff_t src_stride, + int dst_width, int source_y_fraction); +void ARGBInterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr, + ptrdiff_t src_stride, int dst_width, int source_y_fraction); -void InterpolateRow_Any_AVX2(uint8* dst_ptr, const uint8* src_ptr, - ptrdiff_t src_stride_ptr, int width, - int source_y_fraction); -void InterpolateRows_Any_MIPS_DSPR2(uint8* dst_ptr, const uint8* src_ptr, - ptrdiff_t src_stride_ptr, int width, - int source_y_fraction); - -// Sobel images. -void SobelXRow_C(const uint8* src_y0, const uint8* src_y1, const uint8* src_y2, - uint8* dst_sobelx, int width); -void SobelXRow_SSE2(const uint8* src_y0, const uint8* src_y1, - const uint8* src_y2, uint8* dst_sobelx, int width); -void SobelXRow_NEON(const uint8* src_y0, const uint8* src_y1, - const uint8* src_y2, uint8* dst_sobelx, int width); -void SobelYRow_C(const uint8* src_y0, const uint8* src_y1, - uint8* dst_sobely, int width); -void SobelYRow_SSE2(const uint8* src_y0, const uint8* src_y1, - uint8* dst_sobely, int width); -void SobelYRow_NEON(const uint8* src_y0, const uint8* src_y1, - uint8* dst_sobely, int width); -void SobelRow_C(const uint8* src_sobelx, const uint8* src_sobely, - uint8* dst_argb, int width); -void SobelRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely, - uint8* dst_argb, int width); -void SobelRow_NEON(const uint8* src_sobelx, const uint8* src_sobely, - uint8* dst_argb, int width); -void SobelToPlaneRow_C(const uint8* src_sobelx, const uint8* src_sobely, - uint8* dst_y, int width); -void SobelToPlaneRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely, - uint8* dst_y, int width); -void SobelToPlaneRow_NEON(const uint8* src_sobelx, const uint8* src_sobely, - uint8* dst_y, int width); -void SobelXYRow_C(const uint8* src_sobelx, const uint8* src_sobely, - uint8* dst_argb, int width); -void SobelXYRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely, - uint8* dst_argb, int width); -void SobelXYRow_NEON(const uint8* src_sobelx, const uint8* src_sobely, - uint8* dst_argb, int width); - -void ARGBPolynomialRow_C(const uint8* src_argb, - uint8* dst_argb, const float* poly, - int width); -void ARGBPolynomialRow_SSE2(const uint8* src_argb, - uint8* dst_argb, const float* poly, - int width); -void ARGBPolynomialRow_AVX2(const uint8* src_argb, - uint8* dst_argb, const float* poly, - int width); - -void ARGBLumaColorTableRow_C(const uint8* src_argb, uint8* dst_argb, int width, - const uint8* luma, uint32 lumacoeff); -void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb, uint8* dst_argb, - int width, - const uint8* luma, uint32 lumacoeff); #ifdef __cplusplus } // extern "C" } // namespace libyuv #endif #endif // INCLUDE_LIBYUV_ROW_H_ NOLINT + +
--- a/media/webrtc/trunk/third_party/libyuv/include/libyuv/scale.h +++ b/media/webrtc/trunk/third_party/libyuv/include/libyuv/scale.h @@ -1,43 +1,42 @@ /* * Copyright 2011 The LibYuv Project Authors. All rights reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may + * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef INCLUDE_LIBYUV_SCALE_H_ // NOLINT #define INCLUDE_LIBYUV_SCALE_H_ #include "libyuv/basic_types.h" #ifdef __cplusplus namespace libyuv { extern "C" { #endif -// Supported filtering. -typedef enum FilterMode { +// Supported filtering +enum FilterMode { kFilterNone = 0, // Point sample; Fastest. - kFilterLinear = 1, // Filter horizontally only. - kFilterBilinear = 2, // Faster than box, but lower quality scaling down. - kFilterBox = 3 // Highest quality. -} FilterModeEnum; + kFilterBilinear = 1, // Faster than box, but lower quality scaling down. + kFilterBox = 2 // Highest quality. +}; // Scale a YUV plane. LIBYUV_API void ScalePlane(const uint8* src, int src_stride, int src_width, int src_height, uint8* dst, int dst_stride, int dst_width, int dst_height, - enum FilterMode filtering); + FilterMode filtering); // Scales a YUV 4:2:0 image from the src width and height to the // dst width and height. // If filtering is kFilterNone, a simple nearest-neighbor algorithm is // used. This produces basic (blocky) quality at the fastest speed. // If filtering is kFilterBilinear, interpolation is used to produce a better // quality image, at the expense of speed. // If filtering is kFilterBox, averaging is used to produce ever better @@ -48,38 +47,36 @@ LIBYUV_API int I420Scale(const uint8* src_y, int src_stride_y, const uint8* src_u, int src_stride_u, const uint8* src_v, int src_stride_v, int src_width, int src_height, uint8* dst_y, int dst_stride_y, uint8* dst_u, int dst_stride_u, uint8* dst_v, int dst_stride_v, int dst_width, int dst_height, - enum FilterMode filtering); + FilterMode filtering); -#ifdef __cplusplus // Legacy API. Deprecated. LIBYUV_API int Scale(const uint8* src_y, const uint8* src_u, const uint8* src_v, int src_stride_y, int src_stride_u, int src_stride_v, int src_width, int src_height, uint8* dst_y, uint8* dst_u, uint8* dst_v, int dst_stride_y, int dst_stride_u, int dst_stride_v, int dst_width, int dst_height, - LIBYUV_BOOL interpolate); + bool interpolate); // Legacy API. Deprecated. LIBYUV_API -int ScaleOffset(const uint8* src_i420, int src_width, int src_height, - uint8* dst_i420, int dst_width, int dst_height, int dst_yoffset, - LIBYUV_BOOL interpolate); +int ScaleOffset(const uint8* src, int src_width, int src_height, + uint8* dst, int dst_width, int dst_height, int dst_yoffset, + bool interpolate); // For testing, allow disabling of specialized scalers. LIBYUV_API -void SetUseReferenceImpl(LIBYUV_BOOL use); -#endif // __cplusplus +void SetUseReferenceImpl(bool use); #ifdef __cplusplus } // extern "C" } // namespace libyuv #endif #endif // INCLUDE_LIBYUV_SCALE_H_ NOLINT
--- a/media/webrtc/trunk/third_party/libyuv/include/libyuv/scale_argb.h +++ b/media/webrtc/trunk/third_party/libyuv/include/libyuv/scale_argb.h @@ -1,15 +1,15 @@ /* * Copyright 2012 The LibYuv Project Authors. All rights reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may + * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef INCLUDE_LIBYUV_SCALE_ARGB_H_ // NOLINT #define INCLUDE_LIBYUV_SCALE_ARGB_H_ #include "libyuv/basic_types.h" #include "libyuv/scale.h" // For FilterMode @@ -19,39 +19,16 @@ namespace libyuv { extern "C" { #endif LIBYUV_API int ARGBScale(const uint8* src_argb, int src_stride_argb, int src_width, int src_height, uint8* dst_argb, int dst_stride_argb, int dst_width, int dst_height, - enum FilterMode filtering); - -// Clipped scale takes destination rectangle coordinates for clip values. -LIBYUV_API -int ARGBScaleClip(const uint8* src_argb, int src_stride_argb, - int src_width, int src_height, - uint8* dst_argb, int dst_stride_argb, - int dst_width, int dst_height, - int clip_x, int clip_y, int clip_width, int clip_height, - enum FilterMode filtering); - -// TODO(fbarchard): Implement this. -// Scale with YUV conversion to ARGB and clipping. -LIBYUV_API -int YUVToARGBScaleClip(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint32 src_fourcc, - int src_width, int src_height, - uint8* dst_argb, int dst_stride_argb, - uint32 dst_fourcc, - int dst_width, int dst_height, - int clip_x, int clip_y, int clip_width, int clip_height, - enum FilterMode filtering); + FilterMode filtering); #ifdef __cplusplus } // extern "C" } // namespace libyuv #endif #endif // INCLUDE_LIBYUV_SCALE_ARGB_H_ NOLINT
deleted file mode 100644 --- a/media/webrtc/trunk/third_party/libyuv/include/libyuv/scale_row.h +++ /dev/null @@ -1,301 +0,0 @@ -/* - * Copyright 2013 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef INCLUDE_LIBYUV_SCALE_ROW_H_ // NOLINT -#define INCLUDE_LIBYUV_SCALE_ROW_H_ - -#include "libyuv/basic_types.h" - -#ifdef __cplusplus -namespace libyuv { -extern "C" { -#endif - -#if defined(__pnacl__) || defined(__CLR_VER) || defined(COVERAGE_ENABLED) || \ - defined(TARGET_IPHONE_SIMULATOR) -#define LIBYUV_DISABLE_X86 -#endif - -// The following are available on all x86 platforms: -#if !defined(LIBYUV_DISABLE_X86) && \ - (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) -#define HAS_SCALEROWDOWN2_SSE2 -#define HAS_SCALEROWDOWN4_SSE2 -#define HAS_SCALEROWDOWN34_SSSE3 -#define HAS_SCALEROWDOWN38_SSSE3 -#define HAS_SCALEADDROWS_SSE2 -#define HAS_SCALEFILTERCOLS_SSSE3 -#define HAS_SCALECOLSUP2_SSE2 -#define HAS_SCALEARGBROWDOWN2_SSE2 -#define HAS_SCALEARGBROWDOWNEVEN_SSE2 -#define HAS_SCALEARGBCOLS_SSE2 -#define HAS_SCALEARGBFILTERCOLS_SSSE3 -#define HAS_SCALEARGBCOLSUP2_SSE2 -#define HAS_FIXEDDIV_X86 -#define HAS_FIXEDDIV1_X86 -#endif - -// The following are available on Neon platforms: -#if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \ - (defined(__ARM_NEON__) || defined(LIBYUV_NEON)) -#define HAS_SCALEROWDOWN2_NEON -#define HAS_SCALEROWDOWN4_NEON -#define HAS_SCALEROWDOWN34_NEON -#define HAS_SCALEROWDOWN38_NEON -#define HAS_SCALEARGBROWDOWNEVEN_NEON -#define HAS_SCALEARGBROWDOWN2_NEON -#endif - -// The following are available on Mips platforms: -#if !defined(LIBYUV_DISABLE_MIPS) && !defined(__native_client__) && \ - defined(__mips__) && defined(__mips_dsp) && (__mips_dsp_rev >= 2) -#define HAS_SCALEROWDOWN2_MIPS_DSPR2 -#define HAS_SCALEROWDOWN4_MIPS_DSPR2 -#define HAS_SCALEROWDOWN34_MIPS_DSPR2 -#define HAS_SCALEROWDOWN38_MIPS_DSPR2 -#endif - -// Scale ARGB vertically with bilinear interpolation. -void ScalePlaneVertical(int src_height, - int dst_width, int dst_height, - int src_stride, int dst_stride, - const uint8* src_argb, uint8* dst_argb, - int x, int y, int dy, - int bpp, enum FilterMode filtering); - -// Simplify the filtering based on scale factors. -enum FilterMode ScaleFilterReduce(int src_width, int src_height, - int dst_width, int dst_height, - enum FilterMode filtering); - -// Divide num by div and return as 16.16 fixed point result. -int FixedDiv_C(int num, int div); -int FixedDiv_X86(int num, int div); -// Divide num - 1 by div - 1 and return as 16.16 fixed point result. -int FixedDiv1_C(int num, int div); -int FixedDiv1_X86(int num, int div); -#ifdef HAS_FIXEDDIV_X86 -#define FixedDiv FixedDiv_X86 -#define FixedDiv1 FixedDiv1_X86 -#else -#define FixedDiv FixedDiv_C -#define FixedDiv1 FixedDiv1_C -#endif - -// Compute slope values for stepping. -void ScaleSlope(int src_width, int src_height, - int dst_width, int dst_height, - enum FilterMode filtering, - int* x, int* y, int* dx, int* dy); - -void ScaleRowDown2_C(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); -void ScaleRowDown2Linear_C(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); -void ScaleRowDown2Box_C(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); -void ScaleRowDown4_C(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); -void ScaleRowDown4Box_C(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); -void ScaleRowDown34_C(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); -void ScaleRowDown34_0_Box_C(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* d, int dst_width); -void ScaleRowDown34_1_Box_C(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* d, int dst_width); -void ScaleCols_C(uint8* dst_ptr, const uint8* src_ptr, - int dst_width, int x, int dx); -void ScaleColsUp2_C(uint8* dst_ptr, const uint8* src_ptr, - int dst_width, int, int); -void ScaleFilterCols_C(uint8* dst_ptr, const uint8* src_ptr, - int dst_width, int x, int dx); -void ScaleFilterCols64_C(uint8* dst_ptr, const uint8* src_ptr, - int dst_width, int x, int dx); -void ScaleRowDown38_C(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); -void ScaleRowDown38_3_Box_C(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown38_2_Box_C(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleAddRows_C(const uint8* src_ptr, ptrdiff_t src_stride, - uint16* dst_ptr, int src_width, int src_height); -void ScaleARGBRowDown2_C(const uint8* src_argb, - ptrdiff_t src_stride, - uint8* dst_argb, int dst_width); -void ScaleARGBRowDown2Linear_C(const uint8* src_argb, - ptrdiff_t src_stride, - uint8* dst_argb, int dst_width); -void ScaleARGBRowDown2Box_C(const uint8* src_argb, ptrdiff_t src_stride, - uint8* dst_argb, int dst_width); -void ScaleARGBRowDownEven_C(const uint8* src_argb, ptrdiff_t src_stride, - int src_stepx, - uint8* dst_argb, int dst_width); -void ScaleARGBRowDownEvenBox_C(const uint8* src_argb, - ptrdiff_t src_stride, - int src_stepx, - uint8* dst_argb, int dst_width); -void ScaleARGBCols_C(uint8* dst_argb, const uint8* src_argb, - int dst_width, int x, int dx); -void ScaleARGBCols64_C(uint8* dst_argb, const uint8* src_argb, - int dst_width, int x, int dx); -void ScaleARGBColsUp2_C(uint8* dst_argb, const uint8* src_argb, - int dst_width, int, int); -void ScaleARGBFilterCols_C(uint8* dst_argb, const uint8* src_argb, - int dst_width, int x, int dx); -void ScaleARGBFilterCols64_C(uint8* dst_argb, const uint8* src_argb, - int dst_width, int x, int dx); - -void ScaleRowDown2_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown2Linear_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown2Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown2_Unaligned_SSE2(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown2Linear_Unaligned_SSE2(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown2Box_Unaligned_SSE2(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown4_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown4Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown34_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown34_1_Box_SSSE3(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown34_0_Box_SSSE3(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown38_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown38_3_Box_SSSE3(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown38_2_Box_SSSE3(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleAddRows_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, - uint16* dst_ptr, int src_width, - int src_height); -void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr, - int dst_width, int x, int dx); -void ScaleColsUp2_SSE2(uint8* dst_ptr, const uint8* src_ptr, - int dst_width, int x, int dx); -void ScaleARGBRowDown2_SSE2(const uint8* src_argb, - ptrdiff_t src_stride, - uint8* dst_argb, int dst_width); -void ScaleARGBRowDown2Linear_SSE2(const uint8* src_argb, - ptrdiff_t src_stride, - uint8* dst_argb, int dst_width); -void ScaleARGBRowDown2Box_SSE2(const uint8* src_argb, - ptrdiff_t src_stride, - uint8* dst_argb, int dst_width); -void ScaleARGBRowDownEven_SSE2(const uint8* src_argb, ptrdiff_t src_stride, - int src_stepx, - uint8* dst_argb, int dst_width); -void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb, - ptrdiff_t src_stride, - int src_stepx, - uint8* dst_argb, int dst_width); -void ScaleARGBCols_SSE2(uint8* dst_argb, const uint8* src_argb, - int dst_width, int x, int dx); -void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, const uint8* src_argb, - int dst_width, int x, int dx); -void ScaleARGBColsUp2_SSE2(uint8* dst_argb, const uint8* src_argb, - int dst_width, int x, int dx); -// Row functions. -void ScaleARGBRowDownEven_NEON(const uint8* src_argb, int src_stride, - int src_stepx, - uint8* dst_argb, int dst_width); -void ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb, int src_stride, - int src_stepx, - uint8* dst_argb, int dst_width); -void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); -void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); - -// ScaleRowDown2Box also used by planar functions -// NEON downscalers with interpolation. - -// Note - not static due to reuse in convert for 444 to 420. -void ScaleRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); - -void ScaleRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); - -void ScaleRowDown4_NEON(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown4Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); - -// Down scale from 4 to 3 pixels. Use the neon multilane read/write -// to load up the every 4th pixel into a 4 different registers. -// Point samples 32 pixels to 24 pixels. -void ScaleRowDown34_NEON(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown34_0_Box_NEON(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown34_1_Box_NEON(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); - -// 32 -> 12 -void ScaleRowDown38_NEON(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -// 32x3 -> 12x1 -void ScaleRowDown38_3_Box_NEON(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -// 32x2 -> 12x1 -void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); - -void ScaleRowDown2_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); -void ScaleRowDown2Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); -void ScaleRowDown4_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); -void ScaleRowDown4Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); -void ScaleRowDown34_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); -void ScaleRowDown34_0_Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* d, int dst_width); -void ScaleRowDown34_1_Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* d, int dst_width); -void ScaleRowDown38_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); -void ScaleRowDown38_2_Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown38_3_Box_MIPS_DSPR2(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); - -#ifdef __cplusplus -} // extern "C" -} // namespace libyuv -#endif - -#endif // INCLUDE_LIBYUV_SCALE_ROW_H_ NOLINT
--- a/media/webrtc/trunk/third_party/libyuv/include/libyuv/version.h +++ b/media/webrtc/trunk/third_party/libyuv/include/libyuv/version.h @@ -1,16 +1,16 @@ /* * Copyright 2012 The LibYuv Project Authors. All rights reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may + * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT #define INCLUDE_LIBYUV_VERSION_H_ -#define LIBYUV_VERSION 971 +#define LIBYUV_VERSION 389 #endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
--- a/media/webrtc/trunk/third_party/libyuv/include/libyuv/video_common.h +++ b/media/webrtc/trunk/third_party/libyuv/include/libyuv/video_common.h @@ -1,15 +1,15 @@ /* * Copyright 2011 The LibYuv Project Authors. All rights reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may + * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ // Common definitions for video, including fourcc and VideoFormat. #ifndef INCLUDE_LIBYUV_VIDEO_COMMON_H_ // NOLINT #define INCLUDE_LIBYUV_VIDEO_COMMON_H_ @@ -22,155 +22,132 @@ extern "C" { ////////////////////////////////////////////////////////////////////////////// // Definition of FourCC codes ////////////////////////////////////////////////////////////////////////////// // Convert four characters to a FourCC code. // Needs to be a macro otherwise the OS X compiler complains when the kFormat* // constants are used in a switch. -#ifdef __cplusplus #define FOURCC(a, b, c, d) ( \ (static_cast<uint32>(a)) | (static_cast<uint32>(b) << 8) | \ (static_cast<uint32>(c) << 16) | (static_cast<uint32>(d) << 24)) -#else -#define FOURCC(a, b, c, d) ( \ - ((uint32)(a)) | ((uint32)(b) << 8) | /* NOLINT */ \ - ((uint32)(c) << 16) | ((uint32)(d) << 24)) /* NOLINT */ -#endif // Some pages discussing FourCC codes: // http://www.fourcc.org/yuv.php // http://v4l2spec.bytesex.org/spec/book1.htm // http://developer.apple.com/quicktime/icefloe/dispatch020.html // http://msdn.microsoft.com/library/windows/desktop/dd206750.aspx#nv12 // http://people.xiph.org/~xiphmont/containers/nut/nut4cc.txt -// FourCC codes grouped according to implementation efficiency. -// Primary formats should convert in 1 efficient step. -// Secondary formats are converted in 2 steps. -// Auxilliary formats call primary converters. enum FourCC { - // 9 Primary YUV formats: 5 planar, 2 biplanar, 2 packed. + // Canonical fourcc codes used in our code. FOURCC_I420 = FOURCC('I', '4', '2', '0'), FOURCC_I422 = FOURCC('I', '4', '2', '2'), FOURCC_I444 = FOURCC('I', '4', '4', '4'), FOURCC_I411 = FOURCC('I', '4', '1', '1'), FOURCC_I400 = FOURCC('I', '4', '0', '0'), - FOURCC_NV21 = FOURCC('N', 'V', '2', '1'), - FOURCC_NV12 = FOURCC('N', 'V', '1', '2'), + FOURCC_YV12 = FOURCC('Y', 'V', '1', '2'), + FOURCC_YV16 = FOURCC('Y', 'V', '1', '6'), + FOURCC_YV24 = FOURCC('Y', 'V', '2', '4'), FOURCC_YUY2 = FOURCC('Y', 'U', 'Y', '2'), FOURCC_UYVY = FOURCC('U', 'Y', 'V', 'Y'), - - // 2 Secondary YUV formats: row biplanar. FOURCC_M420 = FOURCC('M', '4', '2', '0'), FOURCC_Q420 = FOURCC('Q', '4', '2', '0'), - - // 9 Primary RGB formats: 4 32 bpp, 2 24 bpp, 3 16 bpp. + FOURCC_V210 = FOURCC('V', '2', '1', '0'), + FOURCC_24BG = FOURCC('2', '4', 'B', 'G'), FOURCC_ARGB = FOURCC('A', 'R', 'G', 'B'), FOURCC_BGRA = FOURCC('B', 'G', 'R', 'A'), FOURCC_ABGR = FOURCC('A', 'B', 'G', 'R'), - FOURCC_24BG = FOURCC('2', '4', 'B', 'G'), + FOURCC_RGBA = FOURCC('R', 'G', 'B', 'A'), + FOURCC_RGBP = FOURCC('R', 'G', 'B', 'P'), // bgr565. + FOURCC_RGBO = FOURCC('R', 'G', 'B', 'O'), // abgr1555. + FOURCC_R444 = FOURCC('R', '4', '4', '4'), // argb4444. FOURCC_RAW = FOURCC('r', 'a', 'w', ' '), - FOURCC_RGBA = FOURCC('R', 'G', 'B', 'A'), - FOURCC_RGBP = FOURCC('R', 'G', 'B', 'P'), // rgb565 LE. - FOURCC_RGBO = FOURCC('R', 'G', 'B', 'O'), // argb1555 LE. - FOURCC_R444 = FOURCC('R', '4', '4', '4'), // argb4444 LE. - - // 4 Secondary RGB formats: 4 Bayer Patterns. + FOURCC_NV21 = FOURCC('N', 'V', '2', '1'), + FOURCC_NV12 = FOURCC('N', 'V', '1', '2'), + FOURCC_MJPG = FOURCC('M', 'J', 'P', 'G'), + FOURCC_H264 = FOURCC('H', '2', '6', '4'), + // Next four are Bayer RGB formats. The four characters define the order of + // the colours in each 2x2 pixel grid, going left-to-right and top-to-bottom. FOURCC_RGGB = FOURCC('R', 'G', 'G', 'B'), FOURCC_BGGR = FOURCC('B', 'G', 'G', 'R'), FOURCC_GRBG = FOURCC('G', 'R', 'B', 'G'), FOURCC_GBRG = FOURCC('G', 'B', 'R', 'G'), - // 1 Primary Compressed YUV format. - FOURCC_MJPG = FOURCC('M', 'J', 'P', 'G'), - - // 5 Auxiliary YUV variations: 3 with U and V planes are swapped, 1 Alias. - FOURCC_YV12 = FOURCC('Y', 'V', '1', '2'), - FOURCC_YV16 = FOURCC('Y', 'V', '1', '6'), - FOURCC_YV24 = FOURCC('Y', 'V', '2', '4'), - FOURCC_YU12 = FOURCC('Y', 'U', '1', '2'), // Linux version of I420. - FOURCC_J420 = FOURCC('J', '4', '2', '0'), - FOURCC_J400 = FOURCC('J', '4', '0', '0'), - - // 14 Auxiliary aliases. CanonicalFourCC() maps these to canonical fourcc. + // Aliases for canonical fourcc codes, replaced with their canonical + // equivalents by CanonicalFourCC(). FOURCC_IYUV = FOURCC('I', 'Y', 'U', 'V'), // Alias for I420. + FOURCC_YU12 = FOURCC('Y', 'U', '1', '2'), // Alias for I420. FOURCC_YU16 = FOURCC('Y', 'U', '1', '6'), // Alias for I422. FOURCC_YU24 = FOURCC('Y', 'U', '2', '4'), // Alias for I444. FOURCC_YUYV = FOURCC('Y', 'U', 'Y', 'V'), // Alias for YUY2. FOURCC_YUVS = FOURCC('y', 'u', 'v', 's'), // Alias for YUY2 on Mac. FOURCC_HDYC = FOURCC('H', 'D', 'Y', 'C'), // Alias for UYVY. - FOURCC_2VUY = FOURCC('2', 'v', 'u', 'y'), // Alias for UYVY on Mac. + FOURCC_2VUY = FOURCC('2', 'v', 'u', 'y'), // Alias for UYVY. FOURCC_JPEG = FOURCC('J', 'P', 'E', 'G'), // Alias for MJPG. FOURCC_DMB1 = FOURCC('d', 'm', 'b', '1'), // Alias for MJPG on Mac. FOURCC_BA81 = FOURCC('B', 'A', '8', '1'), // Alias for BGGR. FOURCC_RGB3 = FOURCC('R', 'G', 'B', '3'), // Alias for RAW. FOURCC_BGR3 = FOURCC('B', 'G', 'R', '3'), // Alias for 24BG. - FOURCC_CM32 = FOURCC(0, 0, 0, 32), // Alias for BGRA kCMPixelFormat_32ARGB - FOURCC_CM24 = FOURCC(0, 0, 0, 24), // Alias for RAW kCMPixelFormat_24RGB - FOURCC_L555 = FOURCC('L', '5', '5', '5'), // Alias for RGBO. - FOURCC_L565 = FOURCC('L', '5', '6', '5'), // Alias for RGBP. - FOURCC_5551 = FOURCC('5', '5', '5', '1'), // Alias for RGBO. - - // 1 Auxiliary compressed YUV format set aside for capturer. - FOURCC_H264 = FOURCC('H', '2', '6', '4'), // Match any fourcc. FOURCC_ANY = 0xFFFFFFFF, }; enum FourCCBpp { // Canonical fourcc codes used in our code. FOURCC_BPP_I420 = 12, FOURCC_BPP_I422 = 16, FOURCC_BPP_I444 = 24, FOURCC_BPP_I411 = 12, FOURCC_BPP_I400 = 8, - FOURCC_BPP_NV21 = 12, - FOURCC_BPP_NV12 = 12, + FOURCC_BPP_YV12 = 12, + FOURCC_BPP_YV16 = 16, + FOURCC_BPP_YV24 = 24, FOURCC_BPP_YUY2 = 16, FOURCC_BPP_UYVY = 16, FOURCC_BPP_M420 = 12, FOURCC_BPP_Q420 = 12, + FOURCC_BPP_V210 = 22, // 128 / 6 actually. + FOURCC_BPP_24BG = 24, FOURCC_BPP_ARGB = 32, FOURCC_BPP_BGRA = 32, FOURCC_BPP_ABGR = 32, FOURCC_BPP_RGBA = 32, - FOURCC_BPP_24BG = 24, - FOURCC_BPP_RAW = 24, FOURCC_BPP_RGBP = 16, FOURCC_BPP_RGBO = 16, FOURCC_BPP_R444 = 16, + FOURCC_BPP_RAW = 24, + FOURCC_BPP_NV21 = 12, + FOURCC_BPP_NV12 = 12, + FOURCC_BPP_MJPG = 0, // 0 means unknown. + FOURCC_BPP_H264 = 0, + // Next four are Bayer RGB formats. The four characters define the order of + // the colours in each 2x2 pixel grid, going left-to-right and top-to-bottom. FOURCC_BPP_RGGB = 8, FOURCC_BPP_BGGR = 8, FOURCC_BPP_GRBG = 8, FOURCC_BPP_GBRG = 8, - FOURCC_BPP_YV12 = 12, - FOURCC_BPP_YV16 = 16, - FOURCC_BPP_YV24 = 24, + + // Aliases for canonical fourcc codes, replaced with their canonical + // equivalents by CanonicalFourCC(). + FOURCC_BPP_IYUV = 12, FOURCC_BPP_YU12 = 12, - FOURCC_BPP_J420 = 12, - FOURCC_BPP_J400 = 8, - FOURCC_BPP_MJPG = 0, // 0 means unknown. - FOURCC_BPP_H264 = 0, - FOURCC_BPP_IYUV = 12, FOURCC_BPP_YU16 = 16, FOURCC_BPP_YU24 = 24, FOURCC_BPP_YUYV = 16, FOURCC_BPP_YUVS = 16, FOURCC_BPP_HDYC = 16, FOURCC_BPP_2VUY = 16, FOURCC_BPP_JPEG = 1, FOURCC_BPP_DMB1 = 1, FOURCC_BPP_BA81 = 8, FOURCC_BPP_RGB3 = 24, FOURCC_BPP_BGR3 = 24, - FOURCC_BPP_CM32 = 32, - FOURCC_BPP_CM24 = 24, // Match any fourcc. FOURCC_BPP_ANY = 0, // 0 means unknown. }; // Converts fourcc aliases into canonical ones. LIBYUV_API uint32 CanonicalFourCC(uint32 fourcc);
new file mode 100644 --- /dev/null +++ b/media/webrtc/trunk/third_party/libyuv/libyuv.Makefile @@ -0,0 +1,6 @@ +# This file is generated by gyp; do not edit. + +export builddir_name ?= trunk/third_party/libyuv/out +.PHONY: all +all: + $(MAKE) -C ../.. libyuv
--- a/media/webrtc/trunk/third_party/libyuv/libyuv.gyp +++ b/media/webrtc/trunk/third_party/libyuv/libyuv.gyp @@ -1,133 +1,99 @@ # Copyright 2011 The LibYuv Project Authors. All rights reserved. # # Use of this source code is governed by a BSD-style license # that can be found in the LICENSE file in the root of the source # tree. An additional intellectual property rights grant can be found -# in the file PATENTS. All contributing project authors may +# in the file PATENTS. All contributing project authors may # be found in the AUTHORS file in the root of the source tree. { - 'includes': [ - 'libyuv.gypi', - ], 'variables': { - 'use_system_libjpeg%': 0, - 'build_neon': 0, - 'conditions': [ - ['target_arch == "arm" and arm_version >= 7 and (arm_neon == 1 or arm_neon_optional == 1)', { - 'build_neon': 1, - }], - ], + 'use_system_libjpeg%': 0, + 'yuv_disable_asm%': 0, }, - 'conditions': [ - [ 'build_neon != 0', { - 'targets': [ - # The NEON-specific components. - { - 'target_name': 'libyuv_neon', - 'type': 'static_library', - 'standalone_static_library': 1, - 'defines': [ - 'LIBYUV_NEON', - ], - # TODO(noahric): This should remove whatever mfpu is set, not - # just vfpv3-d16. - 'cflags!': [ - '-mfpu=vfp', - '-mfpu=vfpv3', - '-mfpu=vfpv3-d16', - ], - 'cflags': [ - '-mfpu=neon', - ], - 'include_dirs': [ - 'include', - '.', - ], - 'direct_dependent_settings': { - 'include_dirs': [ - 'include', - '.', - ], - }, - 'sources': [ - # sources. - 'source/compare_neon.cc', - 'source/rotate_neon.cc', - 'source/row_neon.cc', - 'source/scale_neon.cc', - ], - }, - ], - }], - ], 'targets': [ { 'target_name': 'libyuv', - # Change type to 'shared_library' to build .so or .dll files. 'type': 'static_library', - # Allows libyuv.a redistributable library without external dependencies. - 'standalone_static_library': 1, + # 'type': 'shared_library', 'conditions': [ - # TODO(fbarchard): Use gyp define to enable jpeg. - [ 'OS != "ios"', { - 'defines': [ - 'HAVE_JPEG' + ['use_system_libjpeg==0', { + 'dependencies': [ + '<(DEPTH)/third_party/libjpeg_turbo/libjpeg.gyp:libjpeg', ], - 'conditions': [ - # Android uses libjpeg for system jpeg support. - [ 'OS == "android" and use_system_libjpeg == 1', { - 'dependencies': [ - '<(DEPTH)/third_party/libjpeg/libjpeg.gyp:libjpeg', - ], - }, { - 'dependencies': [ - '<(DEPTH)/third_party/libjpeg_turbo/libjpeg.gyp:libjpeg', - ], - }], - [ 'use_system_libjpeg == 1', { - 'link_settings': { - 'libraries': [ - '-ljpeg', - ], - } - }], - ], - }], - [ 'build_neon != 0', { - 'dependencies': [ - 'libyuv_neon', - ], - 'defines': [ - 'LIBYUV_NEON', - ] + }, { + 'link_settings': { + 'libraries': [ + '-ljpeg', + ], + }, }], ], 'defines': [ - # Enable the following 3 macros to turn off assembly for specified CPU. - # 'LIBYUV_DISABLE_X86', - # 'LIBYUV_DISABLE_NEON', - # 'LIBYUV_DISABLE_MIPS', - # Enable the following macro to build libyuv as a shared library (dll). - # 'LIBYUV_USING_SHARED_LIBRARY', + 'HAVE_JPEG', + # 'LIBYUV_BUILDING_SHARED_LIBRARY', ], 'include_dirs': [ 'include', '.', ], 'direct_dependent_settings': { 'include_dirs': [ 'include', '.', ], }, + 'conditions': [ + ['yuv_disable_asm==1', { + 'defines': [ + 'YUV_DISABLE_ASM', + ], + }], + ], 'sources': [ - '<@(libyuv_sources)', + # includes. + 'include/libyuv.h', + 'include/libyuv/basic_types.h', + 'include/libyuv/compare.h', + 'include/libyuv/convert.h', + 'include/libyuv/convert_argb.h', + 'include/libyuv/convert_from.h', + 'include/libyuv/cpu_id.h', + 'include/libyuv/format_conversion.h', + 'include/libyuv/mjpeg_decoder.h', + 'include/libyuv/planar_functions.h', + 'include/libyuv/rotate.h', + 'include/libyuv/rotate_argb.h', + 'include/libyuv/row.h', + 'include/libyuv/scale.h', + 'include/libyuv/scale_argb.h', + 'include/libyuv/version.h', + 'include/libyuv/video_common.h', + + # sources. + 'source/compare.cc', + 'source/convert.cc', + 'source/convert_argb.cc', + 'source/convert_from.cc', + 'source/cpu_id.cc', + 'source/format_conversion.cc', + 'source/mjpeg_decoder.cc', + 'source/planar_functions.cc', + 'source/rotate.cc', + 'source/rotate_argb.cc', + 'source/rotate_neon.cc', + 'source/row_common.cc', + 'source/row_neon.cc', + 'source/row_posix.cc', + 'source/row_win.cc', + 'source/scale.cc', + 'source/scale_argb.cc', + 'source/video_common.cc', ], }, ], # targets. } # Local Variables: # tab-width:2 # indent-tabs-mode:nil
deleted file mode 100644 --- a/media/webrtc/trunk/third_party/libyuv/libyuv.gypi +++ /dev/null @@ -1,67 +0,0 @@ -# Copyright 2014 The LibYuv Project Authors. All rights reserved. -# -# Use of this source code is governed by a BSD-style license -# that can be found in the LICENSE file in the root of the source -# tree. An additional intellectual property rights grant can be found -# in the file PATENTS. All contributing project authors may -# be found in the AUTHORS file in the root of the source tree. - -{ - 'variables': { - 'libyuv_sources': [ - # includes. - 'include/libyuv.h', - 'include/libyuv/basic_types.h', - 'include/libyuv/compare.h', - 'include/libyuv/convert.h', - 'include/libyuv/convert_argb.h', - 'include/libyuv/convert_from.h', - 'include/libyuv/convert_from_argb.h', - 'include/libyuv/cpu_id.h', - 'include/libyuv/format_conversion.h', - 'include/libyuv/mjpeg_decoder.h', - 'include/libyuv/planar_functions.h', - 'include/libyuv/rotate.h', - 'include/libyuv/rotate_argb.h', - 'include/libyuv/row.h', - 'include/libyuv/scale.h', - 'include/libyuv/scale_argb.h', - 'include/libyuv/scale_row.h', - 'include/libyuv/version.h', - 'include/libyuv/video_common.h', - - # sources. - 'source/compare.cc', - 'source/compare_common.cc', - 'source/compare_posix.cc', - 'source/compare_win.cc', - 'source/convert.cc', - 'source/convert_argb.cc', - 'source/convert_from.cc', - 'source/convert_from_argb.cc', - 'source/convert_jpeg.cc', - 'source/convert_to_argb.cc', - 'source/convert_to_i420.cc', - 'source/cpu_id.cc', - 'source/format_conversion.cc', - 'source/mjpeg_decoder.cc', - 'source/mjpeg_validate.cc', - 'source/planar_functions.cc', - 'source/rotate.cc', - 'source/rotate_argb.cc', - 'source/rotate_mips.cc', - 'source/row_any.cc', - 'source/row_common.cc', - 'source/row_mips.cc', - 'source/row_posix.cc', - 'source/row_win.cc', - 'source/scale.cc', - 'source/scale_argb.cc', - 'source/scale_common.cc', - 'source/scale_mips.cc', - 'source/scale_posix.cc', - 'source/scale_win.cc', - 'source/video_common.cc', - ], - } -}
new file mode 100644 --- /dev/null +++ b/media/webrtc/trunk/third_party/libyuv/libyuv.target.mk @@ -0,0 +1,211 @@ +# This file is generated by gyp; do not edit. + +TOOLSET := target +TARGET := libyuv +DEFS_Debug := \ + '-D_FILE_OFFSET_BITS=64' \ + '-DCHROMIUM_BUILD' \ + '-DUSE_LIBJPEG_TURBO=1' \ + '-DENABLE_ONE_CLICK_SIGNIN' \ + '-DGTK_DISABLE_SINGLE_INCLUDES=1' \ + '-DENABLE_REMOTING=1' \ + '-DENABLE_WEBRTC=1' \ + '-DENABLE_CONFIGURATION_POLICY' \ + '-DENABLE_INPUT_SPEECH' \ + '-DENABLE_NOTIFICATIONS' \ + '-DENABLE_GPU=1' \ + '-DUSE_OPENSSL=1' \ + '-DENABLE_EGLIMAGE=1' \ + '-DUSE_SKIA=1' \ + '-DENABLE_TASK_MANAGER=1' \ + '-DENABLE_WEB_INTENTS=1' \ + '-DENABLE_EXTENSIONS=1' \ + '-DENABLE_PLUGIN_INSTALLATION=1' \ + '-DENABLE_PROTECTOR_SERVICE=1' \ + '-DENABLE_SESSION_SERVICE=1' \ + '-DENABLE_THEMES=1' \ + '-DENABLE_BACKGROUND=1' \ + '-DENABLE_AUTOMATION=1' \ + '-DENABLE_PRINTING=1' \ + '-DENABLE_CAPTIVE_PORTAL_DETECTION=1' \ + '-DHAVE_JPEG' \ + '-DDYNAMIC_ANNOTATIONS_ENABLED=1' \ + '-DWTF_USE_DYNAMIC_ANNOTATIONS=1' \ + '-D_DEBUG' + +# Flags passed to all source files. +CFLAGS_Debug := \ + -pthread \ + -fno-exceptions \ + -fno-strict-aliasing \ + -Wno-unused-parameter \ + -Wno-missing-field-initializers \ + -fvisibility=hidden \ + -pipe \ + -fPIC \ + -Wno-format \ + -Wno-unused-result \ + -O0 \ + -g + +# Flags passed to only C files. +CFLAGS_C_Debug := + +# Flags passed to only C++ files. +CFLAGS_CC_Debug := \ + -fno-rtti \ + -fno-threadsafe-statics \ + -fvisibility-inlines-hidden \ + -Wno-deprecated + +INCS_Debug := \ + -Ithird_party/libyuv/include \ + -Ithird_party/libyuv \ + -Ithird_party/libjpeg_turbo + +DEFS_Release := \ + '-D_FILE_OFFSET_BITS=64' \ + '-DCHROMIUM_BUILD' \ + '-DUSE_LIBJPEG_TURBO=1' \ + '-DENABLE_ONE_CLICK_SIGNIN' \ + '-DGTK_DISABLE_SINGLE_INCLUDES=1' \ + '-DENABLE_REMOTING=1' \ + '-DENABLE_WEBRTC=1' \ + '-DENABLE_CONFIGURATION_POLICY' \ + '-DENABLE_INPUT_SPEECH' \ + '-DENABLE_NOTIFICATIONS' \ + '-DENABLE_GPU=1' \ + '-DUSE_OPENSSL=1' \ + '-DENABLE_EGLIMAGE=1' \ + '-DUSE_SKIA=1' \ + '-DENABLE_TASK_MANAGER=1' \ + '-DENABLE_WEB_INTENTS=1' \ + '-DENABLE_EXTENSIONS=1' \ + '-DENABLE_PLUGIN_INSTALLATION=1' \ + '-DENABLE_PROTECTOR_SERVICE=1' \ + '-DENABLE_SESSION_SERVICE=1' \ + '-DENABLE_THEMES=1' \ + '-DENABLE_BACKGROUND=1' \ + '-DENABLE_AUTOMATION=1' \ + '-DENABLE_PRINTING=1' \ + '-DENABLE_CAPTIVE_PORTAL_DETECTION=1' \ + '-DHAVE_JPEG' \ + '-DNDEBUG' \ + '-DNVALGRIND' \ + '-DDYNAMIC_ANNOTATIONS_ENABLED=0' + +# Flags passed to all source files. +CFLAGS_Release := \ + -pthread \ + -fno-exceptions \ + -fno-strict-aliasing \ + -Wno-unused-parameter \ + -Wno-missing-field-initializers \ + -fvisibility=hidden \ + -pipe \ + -fPIC \ + -Wno-format \ + -Wno-unused-result \ + -O2 \ + -fno-ident \ + -fdata-sections \ + -ffunction-sections + +# Flags passed to only C files. +CFLAGS_C_Release := + +# Flags passed to only C++ files. +CFLAGS_CC_Release := \ + -fno-rtti \ + -fno-threadsafe-statics \ + -fvisibility-inlines-hidden \ + -Wno-deprecated + +INCS_Release := \ + -Ithird_party/libyuv/include \ + -Ithird_party/libyuv \ + -Ithird_party/libjpeg_turbo + +OBJS := \ + $(obj).target/$(TARGET)/third_party/libyuv/source/compare.o \ + $(obj).target/$(TARGET)/third_party/libyuv/source/convert.o \ + $(obj).target/$(TARGET)/third_party/libyuv/source/convert_argb.o \ + $(obj).target/$(TARGET)/third_party/libyuv/source/convert_from.o \ + $(obj).target/$(TARGET)/third_party/libyuv/source/cpu_id.o \ + $(obj).target/$(TARGET)/third_party/libyuv/source/format_conversion.o \ + $(obj).target/$(TARGET)/third_party/libyuv/source/mjpeg_decoder.o \ + $(obj).target/$(TARGET)/third_party/libyuv/source/planar_functions.o \ + $(obj).target/$(TARGET)/third_party/libyuv/source/rotate.o \ + $(obj).target/$(TARGET)/third_party/libyuv/source/rotate_argb.o \ + $(obj).target/$(TARGET)/third_party/libyuv/source/rotate_neon.o \ + $(obj).target/$(TARGET)/third_party/libyuv/source/row_common.o \ + $(obj).target/$(TARGET)/third_party/libyuv/source/row_neon.o \ + $(obj).target/$(TARGET)/third_party/libyuv/source/row_posix.o \ + $(obj).target/$(TARGET)/third_party/libyuv/source/row_win.o \ + $(obj).target/$(TARGET)/third_party/libyuv/source/scale.o \ + $(obj).target/$(TARGET)/third_party/libyuv/source/scale_argb.o \ + $(obj).target/$(TARGET)/third_party/libyuv/source/video_common.o + +# Add to the list of files we specially track dependencies for. +all_deps += $(OBJS) + +# CFLAGS et al overrides must be target-local. +# See "Target-specific Variable Values" in the GNU Make manual. +$(OBJS): TOOLSET := $(TOOLSET) +$(OBJS): GYP_CFLAGS := $(DEFS_$(BUILDTYPE)) $(INCS_$(BUILDTYPE)) $(CFLAGS_$(BUILDTYPE)) $(CFLAGS_C_$(BUILDTYPE)) +$(OBJS): GYP_CXXFLAGS := $(DEFS_$(BUILDTYPE)) $(INCS_$(BUILDTYPE)) $(CFLAGS_$(BUILDTYPE)) $(CFLAGS_CC_$(BUILDTYPE)) + +# Suffix rules, putting all outputs into $(obj). + +$(obj).$(TOOLSET)/$(TARGET)/%.o: $(srcdir)/%.cc FORCE_DO_CMD + @$(call do_cmd,cxx,1) + +# Try building from generated source, too. + +$(obj).$(TOOLSET)/$(TARGET)/%.o: $(obj).$(TOOLSET)/%.cc FORCE_DO_CMD + @$(call do_cmd,cxx,1) + +$(obj).$(TOOLSET)/$(TARGET)/%.o: $(obj)/%.cc FORCE_DO_CMD + @$(call do_cmd,cxx,1) + +# End of this set of suffix rules +### Rules for final target. +LDFLAGS_Debug := \ + -pthread \ + -Wl,-z,noexecstack \ + -fPIC \ + -Wl,--threads \ + -Wl,--thread-count=4 \ + -B$(builddir)/../../third_party/gold \ + -Wl,--icf=none + +LDFLAGS_Release := \ + -pthread \ + -Wl,-z,noexecstack \ + -fPIC \ + -Wl,--threads \ + -Wl,--thread-count=4 \ + -B$(builddir)/../../third_party/gold \ + -Wl,--icf=none \ + -Wl,-O1 \ + -Wl,--as-needed \ + -Wl,--gc-sections + +LIBS := \ + + +$(obj).target/third_party/libyuv/libyuv.a: GYP_LDFLAGS := $(LDFLAGS_$(BUILDTYPE)) +$(obj).target/third_party/libyuv/libyuv.a: LIBS := $(LIBS) +$(obj).target/third_party/libyuv/libyuv.a: TOOLSET := $(TOOLSET) +$(obj).target/third_party/libyuv/libyuv.a: $(OBJS) FORCE_DO_CMD + $(call do_cmd,alink_thin) + +all_deps += $(obj).target/third_party/libyuv/libyuv.a +# Add target alias +.PHONY: libyuv +libyuv: $(obj).target/third_party/libyuv/libyuv.a + +# Add target alias to "all" target. +.PHONY: all +all: libyuv +
deleted file mode 100644 --- a/media/webrtc/trunk/third_party/libyuv/libyuv_nacl.gyp +++ /dev/null @@ -1,40 +0,0 @@ -# Copyright 2014 The LibYuv Project Authors. All rights reserved. -# -# Use of this source code is governed by a BSD-style license -# that can be found in the LICENSE file in the root of the source -# tree. An additional intellectual property rights grant can be found -# in the file PATENTS. All contributing project authors may -# be found in the AUTHORS file in the root of the source tree. - -{ - 'includes': [ - 'libyuv.gypi', - '../../native_client/build/untrusted.gypi', - ], - 'targets': [ - { - 'target_name': 'libyuv_nacl', - 'type': 'none', - 'variables': { - 'nlib_target': 'libyuv_nacl.a', - 'build_glibc': 0, - 'build_newlib': 0, - 'build_pnacl_newlib': 1, - }, - 'dependencies': [ - '../../native_client/tools.gyp:prep_toolchain', - ], - 'include_dirs': [ - 'include', - ], - 'direct_dependent_settings': { - 'include_dirs': [ - 'include', - ], - }, - 'sources': [ - '<@(libyuv_sources)', - ], - }, # target libyuv_nacl - ] -}
--- a/media/webrtc/trunk/third_party/libyuv/libyuv_test.gyp +++ b/media/webrtc/trunk/third_party/libyuv/libyuv_test.gyp @@ -1,66 +1,52 @@ # Copyright 2011 The LibYuv Project Authors. All rights reserved. # # Use of this source code is governed by a BSD-style license # that can be found in the LICENSE file in the root of the source # tree. An additional intellectual property rights grant can be found -# in the file PATENTS. All contributing project authors may +# in the file PATENTS. All contributing project authors may # be found in the AUTHORS file in the root of the source tree. { 'targets': [ { 'target_name': 'libyuv_unittest', 'type': 'executable', 'dependencies': [ 'libyuv.gyp:libyuv', # The tests are based on gtest 'testing/gtest.gyp:gtest', 'testing/gtest.gyp:gtest_main', ], 'defines': [ 'LIBYUV_SVNREVISION="<!(svnversion -n)"', - # Enable the following 3 macros to turn off assembly for specified CPU. - # 'LIBYUV_DISABLE_X86', - # 'LIBYUV_DISABLE_NEON', - # 'LIBYUV_DISABLE_MIPS', - # Enable the following macro to build libyuv as a shared library (dll). # 'LIBYUV_USING_SHARED_LIBRARY', ], 'sources': [ # headers 'unit_test/unit_test.h', # sources - 'unit_test/basictypes_test.cc', 'unit_test/compare_test.cc', - 'unit_test/convert_test.cc', 'unit_test/cpu_test.cc', - 'unit_test/math_test.cc', 'unit_test/planar_test.cc', 'unit_test/rotate_argb_test.cc', 'unit_test/rotate_test.cc', 'unit_test/scale_argb_test.cc', 'unit_test/scale_test.cc', 'unit_test/unit_test.cc', - 'unit_test/video_common_test.cc', 'unit_test/version_test.cc', ], 'conditions': [ ['OS=="linux"', { 'cflags': [ '-fexceptions', ], }], - [ 'OS != "ios"', { - 'defines': [ - 'HAVE_JPEG', - ], - }], ], # conditions }, { 'target_name': 'compare', 'type': 'executable', 'dependencies': [ 'libyuv.gyp:libyuv', @@ -72,56 +58,17 @@ 'conditions': [ ['OS=="linux"', { 'cflags': [ '-fexceptions', ], }], ], # conditions }, - { - 'target_name': 'convert', - 'type': 'executable', - 'dependencies': [ - 'libyuv.gyp:libyuv', - ], - 'sources': [ - # sources - 'util/convert.cc', - ], - 'conditions': [ - ['OS=="linux"', { - 'cflags': [ - '-fexceptions', - ], - }], - ], # conditions - }, - # TODO(fbarchard): Enable SSE2 and OpenMP for better performance. - { - 'target_name': 'psnr', - 'type': 'executable', - 'sources': [ - # sources - 'util/psnr_main.cc', - 'util/psnr.cc', - 'util/ssim.cc', - ], - }, - { - 'target_name': 'cpuid', - 'type': 'executable', - 'sources': [ - # sources - 'util/cpuid.c', - ], - 'dependencies': [ - 'libyuv.gyp:libyuv', - ], - }, + ], # targets } # Local Variables: # tab-width:2 # indent-tabs-mode:nil # End: # vim: set expandtab tabstop=2 shiftwidth=2:
deleted file mode 100644 --- a/media/webrtc/trunk/third_party/libyuv/linux.mk +++ /dev/null @@ -1,48 +0,0 @@ -# This is a generic makefile for libyuv for gcc. -# make -f linux.mk CC=clang++ - -CC=g++ -CCFLAGS=-O2 -fomit-frame-pointer -Iinclude/ - -LOCAL_OBJ_FILES := \ - source/compare.o \ - source/compare_common.o \ - source/compare_posix.o \ - source/convert.o \ - source/convert_argb.o \ - source/convert_from.o \ - source/convert_from_argb.o \ - source/convert_to_argb.o \ - source/convert_to_i420.o \ - source/cpu_id.o \ - source/format_conversion.o \ - source/planar_functions.o \ - source/rotate.o \ - source/rotate_argb.o \ - source/rotate_mips.o \ - source/row_any.o \ - source/row_common.o \ - source/row_mips.o \ - source/row_posix.o \ - source/scale.o \ - source/scale_argb.o \ - source/scale_common.o \ - source/scale_mips.o \ - source/scale_posix.o \ - source/video_common.o - -.cc.o: - $(CC) -c $(CCFLAGS) $*.cc -o $*.o - -all: libyuv.a convert linux.mk - -libyuv.a: $(LOCAL_OBJ_FILES) linux.mk - $(AR) $(ARFLAGS) -o $@ $(LOCAL_OBJ_FILES) - -# A test utility that uses libyuv conversion. -convert: util/convert.cc linux.mk - $(CC) $(CCFLAGS) -Iutil/ -o $@ util/convert.cc libyuv.a - -clean: - /bin/rm -f source/*.o *.ii *.s libyuv.a convert -
deleted file mode 100644 --- a/media/webrtc/trunk/third_party/libyuv/public.mk +++ /dev/null @@ -1,13 +0,0 @@ -# This file contains all the common make variables which are useful for -# anyone depending on this library. -# Note that dependencies on NDK are not directly listed since NDK auto adds -# them. - -LIBYUV_INCLUDES := $(LIBYUV_PATH)/include - -LIBYUV_C_FLAGS := - -LIBYUV_CPP_FLAGS := - -LIBYUV_LDLIBS := -LIBYUV_DEP_MODULES :=
--- a/media/webrtc/trunk/third_party/libyuv/source/compare.cc +++ b/media/webrtc/trunk/third_party/libyuv/source/compare.cc @@ -1,15 +1,15 @@ /* * Copyright 2011 The LibYuv Project Authors. All rights reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may + * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "libyuv/compare.h" #include <float.h> #include <math.h> #ifdef _OPENMP @@ -22,161 +22,445 @@ #ifdef __cplusplus namespace libyuv { extern "C" { #endif // hash seed of 5381 recommended. // Internal C version of HashDjb2 with int sized count for efficiency. -uint32 HashDjb2_C(const uint8* src, int count, uint32 seed); +static uint32 HashDjb2_C(const uint8* src, int count, uint32 seed) { + uint32 hash = seed; + for (int i = 0; i < count; ++i) { + hash += (hash << 5) + src[i]; + } + return hash; +} // This module is for Visual C x86 -#if !defined(LIBYUV_DISABLE_X86) && \ - (defined(_M_IX86) || \ - (defined(__x86_64__) || (defined(__i386__) && !defined(__pic__)))) +#if !defined(YUV_DISABLE_ASM) && defined(_M_IX86) #define HAS_HASHDJB2_SSE41 -uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed); +static const uvec32 kHash16x33 = { 0x92d9e201, 0, 0, 0 }; // 33 ^ 16 +static const uvec32 kHashMul0 = { + 0x0c3525e1, // 33 ^ 15 + 0xa3476dc1, // 33 ^ 14 + 0x3b4039a1, // 33 ^ 13 + 0x4f5f0981, // 33 ^ 12 +}; +static const uvec32 kHashMul1 = { + 0x30f35d61, // 33 ^ 11 + 0x855cb541, // 33 ^ 10 + 0x040a9121, // 33 ^ 9 + 0x747c7101, // 33 ^ 8 +}; +static const uvec32 kHashMul2 = { + 0xec41d4e1, // 33 ^ 7 + 0x4cfa3cc1, // 33 ^ 6 + 0x025528a1, // 33 ^ 5 + 0x00121881, // 33 ^ 4 +}; +static const uvec32 kHashMul3 = { + 0x00008c61, // 33 ^ 3 + 0x00000441, // 33 ^ 2 + 0x00000021, // 33 ^ 1 + 0x00000001, // 33 ^ 0 +}; + +// 27: 66 0F 38 40 C6 pmulld xmm0,xmm6 +// 44: 66 0F 38 40 DD pmulld xmm3,xmm5 +// 59: 66 0F 38 40 E5 pmulld xmm4,xmm5 +// 72: 66 0F 38 40 D5 pmulld xmm2,xmm5 +// 83: 66 0F 38 40 CD pmulld xmm1,xmm5 +#define pmulld(reg) _asm _emit 0x66 _asm _emit 0x0F _asm _emit 0x38 \ + _asm _emit 0x40 _asm _emit reg + +__declspec(naked) __declspec(align(16)) +static uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) { + __asm { + mov eax, [esp + 4] // src + mov ecx, [esp + 8] // count + movd xmm0, [esp + 12] // seed + + pxor xmm7, xmm7 // constant 0 for unpck + movdqa xmm6, kHash16x33 + + align 16 + wloop: + movdqu xmm1, [eax] // src[0-15] + lea eax, [eax + 16] + pmulld(0xc6) // pmulld xmm0,xmm6 hash *= 33 ^ 16 + movdqa xmm5, kHashMul0 + movdqa xmm2, xmm1 + punpcklbw xmm2, xmm7 // src[0-7] + movdqa xmm3, xmm2 + punpcklwd xmm3, xmm7 // src[0-3] + pmulld(0xdd) // pmulld xmm3, xmm5 + movdqa xmm5, kHashMul1 + movdqa xmm4, xmm2 + punpckhwd xmm4, xmm7 // src[4-7] + pmulld(0xe5) // pmulld xmm4, xmm5 + movdqa xmm5, kHashMul2 + punpckhbw xmm1, xmm7 // src[8-15] + movdqa xmm2, xmm1 + punpcklwd xmm2, xmm7 // src[8-11] + pmulld(0xd5) // pmulld xmm2, xmm5 + movdqa xmm5, kHashMul3 + punpckhwd xmm1, xmm7 // src[12-15] + pmulld(0xcd) // pmulld xmm1, xmm5 + paddd xmm3, xmm4 // add 16 results + paddd xmm1, xmm2 + sub ecx, 16 + paddd xmm1, xmm3 + + pshufd xmm2, xmm1, 14 // upper 2 dwords + paddd xmm1, xmm2 + pshufd xmm2, xmm1, 1 + paddd xmm1, xmm2 + paddd xmm0, xmm1 + jg wloop + + movd eax, xmm0 // return hash + ret + } +} -#if _MSC_VER >= 1700 -#define HAS_HASHDJB2_AVX2 -uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed); +#elif !defined(YUV_DISABLE_ASM) && \ + (defined(__x86_64__) || (defined(__i386__) && !defined(__pic__))) +// GCC 4.2 on OSX has link error when passing static or const to inline. +// TODO(fbarchard): Use static const when gcc 4.2 support is dropped. +#ifdef __APPLE__ +#define CONST +#else +#define CONST static const #endif - +#define HAS_HASHDJB2_SSE41 +CONST uvec32 kHash16x33 = { 0x92d9e201, 0, 0, 0 }; // 33 ^ 16 +CONST uvec32 kHashMul0 = { + 0x0c3525e1, // 33 ^ 15 + 0xa3476dc1, // 33 ^ 14 + 0x3b4039a1, // 33 ^ 13 + 0x4f5f0981, // 33 ^ 12 +}; +CONST uvec32 kHashMul1 = { + 0x30f35d61, // 33 ^ 11 + 0x855cb541, // 33 ^ 10 + 0x040a9121, // 33 ^ 9 + 0x747c7101, // 33 ^ 8 +}; +CONST uvec32 kHashMul2 = { + 0xec41d4e1, // 33 ^ 7 + 0x4cfa3cc1, // 33 ^ 6 + 0x025528a1, // 33 ^ 5 + 0x00121881, // 33 ^ 4 +}; +CONST uvec32 kHashMul3 = { + 0x00008c61, // 33 ^ 3 + 0x00000441, // 33 ^ 2 + 0x00000021, // 33 ^ 1 + 0x00000001, // 33 ^ 0 +}; +static uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) { + uint32 hash; + asm volatile ( + "movd %2,%%xmm0 \n" + "pxor %%xmm7,%%xmm7 \n" + "movdqa %4,%%xmm6 \n" + ".p2align 4 \n" + "1: \n" + "movdqu (%0),%%xmm1 \n" + "lea 0x10(%0),%0 \n" + "pmulld %%xmm6,%%xmm0 \n" + "movdqa %5,%%xmm5 \n" + "movdqa %%xmm1,%%xmm2 \n" + "punpcklbw %%xmm7,%%xmm2 \n" + "movdqa %%xmm2,%%xmm3 \n" + "punpcklwd %%xmm7,%%xmm3 \n" + "pmulld %%xmm5,%%xmm3 \n" + "movdqa %6,%%xmm5 \n" + "movdqa %%xmm2,%%xmm4 \n" + "punpckhwd %%xmm7,%%xmm4 \n" + "pmulld %%xmm5,%%xmm4 \n" + "movdqa %7,%%xmm5 \n" + "punpckhbw %%xmm7,%%xmm1 \n" + "movdqa %%xmm1,%%xmm2 \n" + "punpcklwd %%xmm7,%%xmm2 \n" + "pmulld %%xmm5,%%xmm2 \n" + "movdqa %8,%%xmm5 \n" + "punpckhwd %%xmm7,%%xmm1 \n" + "pmulld %%xmm5,%%xmm1 \n" + "paddd %%xmm4,%%xmm3 \n" + "paddd %%xmm2,%%xmm1 \n" + "sub $0x10,%1 \n" + "paddd %%xmm3,%%xmm1 \n" + "pshufd $0xe,%%xmm1,%%xmm2 \n" + "paddd %%xmm2,%%xmm1 \n" + "pshufd $0x1,%%xmm1,%%xmm2 \n" + "paddd %%xmm2,%%xmm1 \n" + "paddd %%xmm1,%%xmm0 \n" + "jg 1b \n" + "movd %%xmm0,%3 \n" + : "+r"(src), // %0 + "+r"(count), // %1 + "+rm"(seed), // %2 + "=g"(hash) // %3 + : "m"(kHash16x33), // %4 + "m"(kHashMul0), // %5 + "m"(kHashMul1), // %6 + "m"(kHashMul2), // %7 + "m"(kHashMul3) // %8 + : "memory", "cc" +#if defined(__SSE2__) + , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" +#endif + ); + return hash; +} #endif // HAS_HASHDJB2_SSE41 // hash seed of 5381 recommended. LIBYUV_API uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed) { - const int kBlockSize = 1 << 15; // 32768; - int remainder; uint32 (*HashDjb2_SSE)(const uint8* src, int count, uint32 seed) = HashDjb2_C; #if defined(HAS_HASHDJB2_SSE41) if (TestCpuFlag(kCpuHasSSE41)) { HashDjb2_SSE = HashDjb2_SSE41; } #endif -#if defined(HAS_HASHDJB2_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - HashDjb2_SSE = HashDjb2_AVX2; - } -#endif - while (count >= (uint64)(kBlockSize)) { + const int kBlockSize = 1 << 15; // 32768; + while (count >= static_cast<uint64>(kBlockSize)) { seed = HashDjb2_SSE(src, kBlockSize, seed); src += kBlockSize; count -= kBlockSize; } - remainder = (int)(count) & ~15; + int remainder = static_cast<int>(count) & ~15; if (remainder) { seed = HashDjb2_SSE(src, remainder, seed); src += remainder; count -= remainder; } - remainder = (int)(count) & 15; + remainder = static_cast<int>(count) & 15; if (remainder) { seed = HashDjb2_C(src, remainder, seed); } return seed; } -uint32 SumSquareError_C(const uint8* src_a, const uint8* src_b, int count); -#if !defined(LIBYUV_DISABLE_NEON) && \ - (defined(__ARM_NEON__) || defined(LIBYUV_NEON)) +#if !defined(YUV_DISABLE_ASM) && defined(__ARM_NEON__) #define HAS_SUMSQUAREERROR_NEON -uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count); + +static uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, + int count) { + volatile uint32 sse; + asm volatile ( + "vmov.u8 q7, #0 \n" + "vmov.u8 q9, #0 \n" + "vmov.u8 q8, #0 \n" + "vmov.u8 q10, #0 \n" + + "1: \n" + "vld1.u8 {q0}, [%0]! \n" + "vld1.u8 {q1}, [%1]! \n" + "vsubl.u8 q2, d0, d2 \n" + "vsubl.u8 q3, d1, d3 \n" + "vmlal.s16 q7, d4, d4 \n" + "vmlal.s16 q8, d6, d6 \n" + "vmlal.s16 q8, d5, d5 \n" + "vmlal.s16 q10, d7, d7 \n" + "subs %2, %2, #16 \n" + "bgt 1b \n" + + "vadd.u32 q7, q7, q8 \n" + "vadd.u32 q9, q9, q10 \n" + "vadd.u32 q10, q7, q9 \n" + "vpaddl.u32 q1, q10 \n" + "vadd.u64 d0, d2, d3 \n" + "vmov.32 %3, d0[0] \n" + : "+r"(src_a), + "+r"(src_b), + "+r"(count), + "=r"(sse) + : + : "memory", "cc", "q0", "q1", "q2", "q3", "q7", "q8", "q9", "q10"); + return sse; +} + +#elif !defined(YUV_DISABLE_ASM) && defined(_M_IX86) +#define HAS_SUMSQUAREERROR_SSE2 +__declspec(naked) __declspec(align(16)) +static uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, + int count) { + __asm { + mov eax, [esp + 4] // src_a + mov edx, [esp + 8] // src_b + mov ecx, [esp + 12] // count + pxor xmm0, xmm0 + pxor xmm5, xmm5 + sub edx, eax + + align 16 + wloop: + movdqa xmm1, [eax] + movdqa xmm2, [eax + edx] + lea eax, [eax + 16] + sub ecx, 16 + movdqa xmm3, xmm1 // abs trick + psubusb xmm1, xmm2 + psubusb xmm2, xmm3 + por xmm1, xmm2 + movdqa xmm2, xmm1 + punpcklbw xmm1, xmm5 + punpckhbw xmm2, xmm5 + pmaddwd xmm1, xmm1 + pmaddwd xmm2, xmm2 + paddd xmm0, xmm1 + paddd xmm0, xmm2 + jg wloop + + pshufd xmm1, xmm0, 0EEh + paddd xmm0, xmm1 + pshufd xmm1, xmm0, 01h + paddd xmm0, xmm1 + movd eax, xmm0 + ret + } +} + +#elif !defined(YUV_DISABLE_ASM) && (defined(__x86_64__) || defined(__i386__)) +#define HAS_SUMSQUAREERROR_SSE2 +static uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, + int count) { + uint32 sse; + asm volatile ( + "pxor %%xmm0,%%xmm0 \n" + "pxor %%xmm5,%%xmm5 \n" + "sub %0,%1 \n" + ".p2align 4 \n" + "1: \n" + "movdqa (%0),%%xmm1 \n" + "movdqa (%0,%1,1),%%xmm2 \n" + "lea 0x10(%0),%0 \n" + "sub $0x10,%2 \n" + "movdqa %%xmm1,%%xmm3 \n" + "psubusb %%xmm2,%%xmm1 \n" + "psubusb %%xmm3,%%xmm2 \n" + "por %%xmm2,%%xmm1 \n" + "movdqa %%xmm1,%%xmm2 \n" + "punpcklbw %%xmm5,%%xmm1 \n" + "punpckhbw %%xmm5,%%xmm2 \n" + "pmaddwd %%xmm1,%%xmm1 \n" + "pmaddwd %%xmm2,%%xmm2 \n" + "paddd %%xmm1,%%xmm0 \n" + "paddd %%xmm2,%%xmm0 \n" + "jg 1b \n" + + "pshufd $0xee,%%xmm0,%%xmm1 \n" + "paddd %%xmm1,%%xmm0 \n" + "pshufd $0x1,%%xmm0,%%xmm1 \n" + "paddd %%xmm1,%%xmm0 \n" + "movd %%xmm0,%3 \n" + + : "+r"(src_a), // %0 + "+r"(src_b), // %1 + "+r"(count), // %2 + "=g"(sse) // %3 + : + : "memory", "cc" +#if defined(__SSE2__) + , "xmm0", "xmm1", "xmm2", "xmm5" #endif -#if !defined(LIBYUV_DISABLE_X86) && \ - (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) -#define HAS_SUMSQUAREERROR_SSE2 -uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count); -#endif -// Visual C 2012 required for AVX2. -#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && _MSC_VER >= 1700 -#define HAS_SUMSQUAREERROR_AVX2 -uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count); + ); + return sse; +} #endif -// TODO(fbarchard): Refactor into row function. +static uint32 SumSquareError_C(const uint8* src_a, const uint8* src_b, + int count) { + uint32 sse = 0u; + for (int i = 0; i < count; ++i) { + int diff = src_a[i] - src_b[i]; + sse += static_cast<uint32>(diff * diff); + } + return sse; +} + LIBYUV_API uint64 ComputeSumSquareError(const uint8* src_a, const uint8* src_b, int count) { - // SumSquareError returns values 0 to 65535 for each squared difference. - // Up to 65536 of those can be summed and remain within a uint32. - // After each block of 65536 pixels, accumulate into a uint64. - const int kBlockSize = 65536; - int remainder = count & (kBlockSize - 1) & ~31; - uint64 sse = 0; - int i; uint32 (*SumSquareError)(const uint8* src_a, const uint8* src_b, int count) = SumSquareError_C; #if defined(HAS_SUMSQUAREERROR_NEON) if (TestCpuFlag(kCpuHasNEON)) { SumSquareError = SumSquareError_NEON; } -#endif -#if defined(HAS_SUMSQUAREERROR_SSE2) +#elif defined(HAS_SUMSQUAREERROR_SSE2) if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(src_a, 16) && IS_ALIGNED(src_b, 16)) { // Note only used for multiples of 16 so count is not checked. SumSquareError = SumSquareError_SSE2; } #endif -#if defined(HAS_SUMSQUAREERROR_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - // Note only used for multiples of 32 so count is not checked. - SumSquareError = SumSquareError_AVX2; - } -#endif + // 32K values will fit a 32bit int return value from SumSquareError. + // After each block of 32K, accumulate into 64 bit int. + const int kBlockSize = 1 << 15; // 32768; + uint64 sse = 0; #ifdef _OPENMP #pragma omp parallel for reduction(+: sse) #endif - for (i = 0; i < (count - (kBlockSize - 1)); i += kBlockSize) { + for (int i = 0; i < (count - (kBlockSize - 1)); i += kBlockSize) { sse += SumSquareError(src_a + i, src_b + i, kBlockSize); } src_a += count & ~(kBlockSize - 1); src_b += count & ~(kBlockSize - 1); + int remainder = count & (kBlockSize - 1) & ~15; if (remainder) { sse += SumSquareError(src_a, src_b, remainder); src_a += remainder; src_b += remainder; } - remainder = count & 31; + remainder = count & 15; if (remainder) { sse += SumSquareError_C(src_a, src_b, remainder); } return sse; } LIBYUV_API uint64 ComputeSumSquareErrorPlane(const uint8* src_a, int stride_a, const uint8* src_b, int stride_b, int width, int height) { + uint32 (*SumSquareError)(const uint8* src_a, const uint8* src_b, int count) = + SumSquareError_C; +#if defined(HAS_SUMSQUAREERROR_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + SumSquareError = SumSquareError_NEON; + } +#elif defined(HAS_SUMSQUAREERROR_SSE2) + if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16) && + IS_ALIGNED(src_a, 16) && IS_ALIGNED(stride_a, 16) && + IS_ALIGNED(src_b, 16) && IS_ALIGNED(stride_b, 16)) { + SumSquareError = SumSquareError_SSE2; + } +#endif + uint64 sse = 0; - int h; - // Coalesce rows. - if (stride_a == width && - stride_b == width) { - width *= height; - height = 1; - stride_a = stride_b = 0; - } - for (h = 0; h < height; ++h) { - sse += ComputeSumSquareError(src_a, src_b, width); + for (int h = 0; h < height; ++h) { + sse += SumSquareError(src_a, src_b, width); src_a += stride_a; src_b += stride_b; } + return sse; } LIBYUV_API double SumSquareErrorToPsnr(uint64 sse, uint64 count) { double psnr; if (sse > 0) { - double mse = (double)(count) / (double)(sse); + double mse = static_cast<double>(count) / static_cast<double>(sse); psnr = 10.0 * log10(255.0 * 255.0 * mse); } else { psnr = kMaxPsnr; // Limit to prevent divide by 0 } if (psnr > kMaxPsnr) psnr = kMaxPsnr; @@ -224,73 +508,69 @@ static const int64 cc2 = 239708; // (64 static double Ssim8x8_C(const uint8* src_a, int stride_a, const uint8* src_b, int stride_b) { int64 sum_a = 0; int64 sum_b = 0; int64 sum_sq_a = 0; int64 sum_sq_b = 0; int64 sum_axb = 0; - int i; - for (i = 0; i < 8; ++i) { - int j; - for (j = 0; j < 8; ++j) { + for (int i = 0; i < 8; ++i) { + for (int j = 0; j < 8; ++j) { sum_a += src_a[j]; sum_b += src_b[j]; sum_sq_a += src_a[j] * src_a[j]; sum_sq_b += src_b[j] * src_b[j]; sum_axb += src_a[j] * src_b[j]; } src_a += stride_a; src_b += stride_b; } - { - const int64 count = 64; - // scale the constants by number of pixels - const int64 c1 = (cc1 * count * count) >> 12; - const int64 c2 = (cc2 * count * count) >> 12; + const int64 count = 64; + // scale the constants by number of pixels + const int64 c1 = (cc1 * count * count) >> 12; + const int64 c2 = (cc2 * count * count) >> 12; - const int64 sum_a_x_sum_b = sum_a * sum_b; + const int64 sum_a_x_sum_b = sum_a * sum_b; - const int64 ssim_n = (2 * sum_a_x_sum_b + c1) * - (2 * count * sum_axb - 2 * sum_a_x_sum_b + c2); + const int64 ssim_n = (2 * sum_a_x_sum_b + c1) * + (2 * count * sum_axb - 2 * sum_a_x_sum_b + c2); - const int64 sum_a_sq = sum_a*sum_a; - const int64 sum_b_sq = sum_b*sum_b; + const int64 sum_a_sq = sum_a*sum_a; + const int64 sum_b_sq = sum_b*sum_b; - const int64 ssim_d = (sum_a_sq + sum_b_sq + c1) * - (count * sum_sq_a - sum_a_sq + - count * sum_sq_b - sum_b_sq + c2); + const int64 ssim_d = (sum_a_sq + sum_b_sq + c1) * + (count * sum_sq_a - sum_a_sq + + count * sum_sq_b - sum_b_sq + c2); - if (ssim_d == 0.0) { - return DBL_MAX; - } - return ssim_n * 1.0 / ssim_d; - } + if (ssim_d == 0.0) + return DBL_MAX; + return ssim_n * 1.0 / ssim_d; } // We are using a 8x8 moving window with starting location of each 8x8 window // on the 4x4 pixel grid. Such arrangement allows the windows to overlap // block boundaries to penalize blocking artifacts. LIBYUV_API double CalcFrameSsim(const uint8* src_a, int stride_a, const uint8* src_b, int stride_b, int width, int height) { int samples = 0; double ssim_total = 0; + double (*Ssim8x8)(const uint8* src_a, int stride_a, - const uint8* src_b, int stride_b) = Ssim8x8_C; + const uint8* src_b, int stride_b); + + Ssim8x8 = Ssim8x8_C; // sample point start with each 4x4 location - int i; - for (i = 0; i < height - 8; i += 4) { - int j; - for (j = 0; j < width - 8; j += 4) { + for (int i = 0; i < height - 8; i += 4) { + for (int j = 0; j < width - 8; j += 4) { ssim_total += Ssim8x8(src_a + j, stride_a, src_b + j, stride_b); samples++; } src_a += stride_a * 4; src_b += stride_b * 4; }
deleted file mode 100644 --- a/media/webrtc/trunk/third_party/libyuv/source/compare_common.cc +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Copyright 2012 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "libyuv/basic_types.h" - -#ifdef __cplusplus -namespace libyuv { -extern "C" { -#endif - -uint32 SumSquareError_C(const uint8* src_a, const uint8* src_b, int count) { - uint32 sse = 0u; - int i; - for (i = 0; i < count; ++i) { - int diff = src_a[i] - src_b[i]; - sse += (uint32)(diff * diff); - } - return sse; -} - -// hash seed of 5381 recommended. -// Internal C version of HashDjb2 with int sized count for efficiency. -uint32 HashDjb2_C(const uint8* src, int count, uint32 seed) { - uint32 hash = seed; - int i; - for (i = 0; i < count; ++i) { - hash += (hash << 5) + src[i]; - } - return hash; -} - -#ifdef __cplusplus -} // extern "C" -} // namespace libyuv -#endif
deleted file mode 100644 --- a/media/webrtc/trunk/third_party/libyuv/source/compare_neon.cc +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Copyright 2012 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "libyuv/basic_types.h" - -#ifdef __cplusplus -namespace libyuv { -extern "C" { -#endif - -#if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__) - -uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) { - volatile uint32 sse; - asm volatile ( - "vmov.u8 q8, #0 \n" - "vmov.u8 q10, #0 \n" - "vmov.u8 q9, #0 \n" - "vmov.u8 q11, #0 \n" - - ".p2align 2 \n" - "1: \n" - "vld1.8 {q0}, [%0]! \n" - "vld1.8 {q1}, [%1]! \n" - "subs %2, %2, #16 \n" - "vsubl.u8 q2, d0, d2 \n" - "vsubl.u8 q3, d1, d3 \n" - "vmlal.s16 q8, d4, d4 \n" - "vmlal.s16 q9, d6, d6 \n" - "vmlal.s16 q10, d5, d5 \n" - "vmlal.s16 q11, d7, d7 \n" - "bgt 1b \n" - - "vadd.u32 q8, q8, q9 \n" - "vadd.u32 q10, q10, q11 \n" - "vadd.u32 q11, q8, q10 \n" - "vpaddl.u32 q1, q11 \n" - "vadd.u64 d0, d2, d3 \n" - "vmov.32 %3, d0[0] \n" - : "+r"(src_a), - "+r"(src_b), - "+r"(count), - "=r"(sse) - : - : "memory", "cc", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11"); - return sse; -} - -#endif // __ARM_NEON__ - -#ifdef __cplusplus -} // extern "C" -} // namespace libyuv -#endif
deleted file mode 100644 --- a/media/webrtc/trunk/third_party/libyuv/source/compare_posix.cc +++ /dev/null @@ -1,158 +0,0 @@ -/* - * Copyright 2012 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "libyuv/basic_types.h" -#include "libyuv/row.h" - -#ifdef __cplusplus -namespace libyuv { -extern "C" { -#endif - -#if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__)) - -uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) { - uint32 sse; - asm volatile ( // NOLINT - "pxor %%xmm0,%%xmm0 \n" - "pxor %%xmm5,%%xmm5 \n" - LABELALIGN - "1: \n" - "movdqa " MEMACCESS(0) ",%%xmm1 \n" - "lea " MEMLEA(0x10, 0) ",%0 \n" - "movdqa " MEMACCESS(1) ",%%xmm2 \n" - "lea " MEMLEA(0x10, 1) ",%1 \n" - "sub $0x10,%2 \n" - "movdqa %%xmm1,%%xmm3 \n" - "psubusb %%xmm2,%%xmm1 \n" - "psubusb %%xmm3,%%xmm2 \n" - "por %%xmm2,%%xmm1 \n" - "movdqa %%xmm1,%%xmm2 \n" - "punpcklbw %%xmm5,%%xmm1 \n" - "punpckhbw %%xmm5,%%xmm2 \n" - "pmaddwd %%xmm1,%%xmm1 \n" - "pmaddwd %%xmm2,%%xmm2 \n" - "paddd %%xmm1,%%xmm0 \n" - "paddd %%xmm2,%%xmm0 \n" - "jg 1b \n" - - "pshufd $0xee,%%xmm0,%%xmm1 \n" - "paddd %%xmm1,%%xmm0 \n" - "pshufd $0x1,%%xmm0,%%xmm1 \n" - "paddd %%xmm1,%%xmm0 \n" - "movd %%xmm0,%3 \n" - - : "+r"(src_a), // %0 - "+r"(src_b), // %1 - "+r"(count), // %2 - "=g"(sse) // %3 - : - : "memory", "cc" -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" -#endif - ); // NOLINT - return sse; -} - -#endif // defined(__x86_64__) || defined(__i386__) - -#if !defined(LIBYUV_DISABLE_X86) && \ - (defined(__x86_64__) || (defined(__i386__) && !defined(__pic__))) -#define HAS_HASHDJB2_SSE41 -static uvec32 kHash16x33 = { 0x92d9e201, 0, 0, 0 }; // 33 ^ 16 -static uvec32 kHashMul0 = { - 0x0c3525e1, // 33 ^ 15 - 0xa3476dc1, // 33 ^ 14 - 0x3b4039a1, // 33 ^ 13 - 0x4f5f0981, // 33 ^ 12 -}; -static uvec32 kHashMul1 = { - 0x30f35d61, // 33 ^ 11 - 0x855cb541, // 33 ^ 10 - 0x040a9121, // 33 ^ 9 - 0x747c7101, // 33 ^ 8 -}; -static uvec32 kHashMul2 = { - 0xec41d4e1, // 33 ^ 7 - 0x4cfa3cc1, // 33 ^ 6 - 0x025528a1, // 33 ^ 5 - 0x00121881, // 33 ^ 4 -}; -static uvec32 kHashMul3 = { - 0x00008c61, // 33 ^ 3 - 0x00000441, // 33 ^ 2 - 0x00000021, // 33 ^ 1 - 0x00000001, // 33 ^ 0 -}; - -uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) { - uint32 hash; - asm volatile ( // NOLINT - "movd %2,%%xmm0 \n" - "pxor %%xmm7,%%xmm7 \n" - "movdqa %4,%%xmm6 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm1 \n" - "lea " MEMLEA(0x10, 0) ",%0 \n" - "pmulld %%xmm6,%%xmm0 \n" - "movdqa %5,%%xmm5 \n" - "movdqa %%xmm1,%%xmm2 \n" - "punpcklbw %%xmm7,%%xmm2 \n" - "movdqa %%xmm2,%%xmm3 \n" - "punpcklwd %%xmm7,%%xmm3 \n" - "pmulld %%xmm5,%%xmm3 \n" - "movdqa %6,%%xmm5 \n" - "movdqa %%xmm2,%%xmm4 \n" - "punpckhwd %%xmm7,%%xmm4 \n" - "pmulld %%xmm5,%%xmm4 \n" - "movdqa %7,%%xmm5 \n" - "punpckhbw %%xmm7,%%xmm1 \n" - "movdqa %%xmm1,%%xmm2 \n" - "punpcklwd %%xmm7,%%xmm2 \n" - "pmulld %%xmm5,%%xmm2 \n" - "movdqa %8,%%xmm5 \n" - "punpckhwd %%xmm7,%%xmm1 \n" - "pmulld %%xmm5,%%xmm1 \n" - "paddd %%xmm4,%%xmm3 \n" - "paddd %%xmm2,%%xmm1 \n" - "sub $0x10,%1 \n" - "paddd %%xmm3,%%xmm1 \n" - "pshufd $0xe,%%xmm1,%%xmm2 \n" - "paddd %%xmm2,%%xmm1 \n" - "pshufd $0x1,%%xmm1,%%xmm2 \n" - "paddd %%xmm2,%%xmm1 \n" - "paddd %%xmm1,%%xmm0 \n" - "jg 1b \n" - "movd %%xmm0,%3 \n" - : "+r"(src), // %0 - "+r"(count), // %1 - "+rm"(seed), // %2 - "=g"(hash) // %3 - : "m"(kHash16x33), // %4 - "m"(kHashMul0), // %5 - "m"(kHashMul1), // %6 - "m"(kHashMul2), // %7 - "m"(kHashMul3) // %8 - : "memory", "cc" -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" -#endif - ); // NOLINT - return hash; -} -#endif // defined(__x86_64__) || (defined(__i386__) && !defined(__pic__))) - -#ifdef __cplusplus -} // extern "C" -} // namespace libyuv -#endif -
deleted file mode 100644 --- a/media/webrtc/trunk/third_party/libyuv/source/compare_win.cc +++ /dev/null @@ -1,232 +0,0 @@ -/* - * Copyright 2012 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "libyuv/basic_types.h" -#include "libyuv/row.h" - -#ifdef __cplusplus -namespace libyuv { -extern "C" { -#endif - -#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER) - -__declspec(naked) __declspec(align(16)) -uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) { - __asm { - mov eax, [esp + 4] // src_a - mov edx, [esp + 8] // src_b - mov ecx, [esp + 12] // count - pxor xmm0, xmm0 - pxor xmm5, xmm5 - - align 4 - wloop: - movdqa xmm1, [eax] - lea eax, [eax + 16] - movdqa xmm2, [edx] - lea edx, [edx + 16] - sub ecx, 16 - movdqa xmm3, xmm1 // abs trick - psubusb xmm1, xmm2 - psubusb xmm2, xmm3 - por xmm1, xmm2 - movdqa xmm2, xmm1 - punpcklbw xmm1, xmm5 - punpckhbw xmm2, xmm5 - pmaddwd xmm1, xmm1 - pmaddwd xmm2, xmm2 - paddd xmm0, xmm1 - paddd xmm0, xmm2 - jg wloop - - pshufd xmm1, xmm0, 0xee - paddd xmm0, xmm1 - pshufd xmm1, xmm0, 0x01 - paddd xmm0, xmm1 - movd eax, xmm0 - ret - } -} - -// Visual C 2012 required for AVX2. -#if _MSC_VER >= 1700 -// C4752: found Intel(R) Advanced Vector Extensions; consider using /arch:AVX. -#pragma warning(disable: 4752) -__declspec(naked) __declspec(align(16)) -uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count) { - __asm { - mov eax, [esp + 4] // src_a - mov edx, [esp + 8] // src_b - mov ecx, [esp + 12] // count - vpxor ymm0, ymm0, ymm0 // sum - vpxor ymm5, ymm5, ymm5 // constant 0 for unpck - sub edx, eax - - align 4 - wloop: - vmovdqu ymm1, [eax] - vmovdqu ymm2, [eax + edx] - lea eax, [eax + 32] - sub ecx, 32 - vpsubusb ymm3, ymm1, ymm2 // abs difference trick - vpsubusb ymm2, ymm2, ymm1 - vpor ymm1, ymm2, ymm3 - vpunpcklbw ymm2, ymm1, ymm5 // u16. mutates order. - vpunpckhbw ymm1, ymm1, ymm5 - vpmaddwd ymm2, ymm2, ymm2 // square + hadd to u32. - vpmaddwd ymm1, ymm1, ymm1 - vpaddd ymm0, ymm0, ymm1 - vpaddd ymm0, ymm0, ymm2 - jg wloop - - vpshufd ymm1, ymm0, 0xee // 3, 2 + 1, 0 both lanes. - vpaddd ymm0, ymm0, ymm1 - vpshufd ymm1, ymm0, 0x01 // 1 + 0 both lanes. - vpaddd ymm0, ymm0, ymm1 - vpermq ymm1, ymm0, 0x02 // high + low lane. - vpaddd ymm0, ymm0, ymm1 - vmovd eax, xmm0 - vzeroupper - ret - } -} -#endif // _MSC_VER >= 1700 - -#define HAS_HASHDJB2_SSE41 -static uvec32 kHash16x33 = { 0x92d9e201, 0, 0, 0 }; // 33 ^ 16 -static uvec32 kHashMul0 = { - 0x0c3525e1, // 33 ^ 15 - 0xa3476dc1, // 33 ^ 14 - 0x3b4039a1, // 33 ^ 13 - 0x4f5f0981, // 33 ^ 12 -}; -static uvec32 kHashMul1 = { - 0x30f35d61, // 33 ^ 11 - 0x855cb541, // 33 ^ 10 - 0x040a9121, // 33 ^ 9 - 0x747c7101, // 33 ^ 8 -}; -static uvec32 kHashMul2 = { - 0xec41d4e1, // 33 ^ 7 - 0x4cfa3cc1, // 33 ^ 6 - 0x025528a1, // 33 ^ 5 - 0x00121881, // 33 ^ 4 -}; -static uvec32 kHashMul3 = { - 0x00008c61, // 33 ^ 3 - 0x00000441, // 33 ^ 2 - 0x00000021, // 33 ^ 1 - 0x00000001, // 33 ^ 0 -}; - -// 27: 66 0F 38 40 C6 pmulld xmm0,xmm6 -// 44: 66 0F 38 40 DD pmulld xmm3,xmm5 -// 59: 66 0F 38 40 E5 pmulld xmm4,xmm5 -// 72: 66 0F 38 40 D5 pmulld xmm2,xmm5 -// 83: 66 0F 38 40 CD pmulld xmm1,xmm5 -#define pmulld(reg) _asm _emit 0x66 _asm _emit 0x0F _asm _emit 0x38 \ - _asm _emit 0x40 _asm _emit reg - -__declspec(naked) __declspec(align(16)) -uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) { - __asm { - mov eax, [esp + 4] // src - mov ecx, [esp + 8] // count - movd xmm0, [esp + 12] // seed - - pxor xmm7, xmm7 // constant 0 for unpck - movdqa xmm6, kHash16x33 - - align 4 - wloop: - movdqu xmm1, [eax] // src[0-15] - lea eax, [eax + 16] - pmulld(0xc6) // pmulld xmm0,xmm6 hash *= 33 ^ 16 - movdqa xmm5, kHashMul0 - movdqa xmm2, xmm1 - punpcklbw xmm2, xmm7 // src[0-7] - movdqa xmm3, xmm2 - punpcklwd xmm3, xmm7 // src[0-3] - pmulld(0xdd) // pmulld xmm3, xmm5 - movdqa xmm5, kHashMul1 - movdqa xmm4, xmm2 - punpckhwd xmm4, xmm7 // src[4-7] - pmulld(0xe5) // pmulld xmm4, xmm5 - movdqa xmm5, kHashMul2 - punpckhbw xmm1, xmm7 // src[8-15] - movdqa xmm2, xmm1 - punpcklwd xmm2, xmm7 // src[8-11] - pmulld(0xd5) // pmulld xmm2, xmm5 - movdqa xmm5, kHashMul3 - punpckhwd xmm1, xmm7 // src[12-15] - pmulld(0xcd) // pmulld xmm1, xmm5 - paddd xmm3, xmm4 // add 16 results - paddd xmm1, xmm2 - sub ecx, 16 - paddd xmm1, xmm3 - - pshufd xmm2, xmm1, 0x0e // upper 2 dwords - paddd xmm1, xmm2 - pshufd xmm2, xmm1, 0x01 - paddd xmm1, xmm2 - paddd xmm0, xmm1 - jg wloop - - movd eax, xmm0 // return hash - ret - } -} - -// Visual C 2012 required for AVX2. -#if _MSC_VER >= 1700 -__declspec(naked) __declspec(align(16)) -uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed) { - __asm { - mov eax, [esp + 4] // src - mov ecx, [esp + 8] // count - movd xmm0, [esp + 12] // seed - movdqa xmm6, kHash16x33 - - align 4 - wloop: - vpmovzxbd xmm3, dword ptr [eax] // src[0-3] - pmulld xmm0, xmm6 // hash *= 33 ^ 16 - vpmovzxbd xmm4, dword ptr [eax + 4] // src[4-7] - pmulld xmm3, kHashMul0 - vpmovzxbd xmm2, dword ptr [eax + 8] // src[8-11] - pmulld xmm4, kHashMul1 - vpmovzxbd xmm1, dword ptr [eax + 12] // src[12-15] - pmulld xmm2, kHashMul2 - lea eax, [eax + 16] - pmulld xmm1, kHashMul3 - paddd xmm3, xmm4 // add 16 results - paddd xmm1, xmm2 - sub ecx, 16 - paddd xmm1, xmm3 - pshufd xmm2, xmm1, 0x0e // upper 2 dwords - paddd xmm1, xmm2 - pshufd xmm2, xmm1, 0x01 - paddd xmm1, xmm2 - paddd xmm0, xmm1 - jg wloop - - movd eax, xmm0 // return hash - ret - } -} -#endif // _MSC_VER >= 1700 - -#endif // !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER) - -#ifdef __cplusplus -} // extern "C" -} // namespace libyuv -#endif
--- a/media/webrtc/trunk/third_party/libyuv/source/convert.cc +++ b/media/webrtc/trunk/third_party/libyuv/source/convert.cc @@ -1,165 +1,336 @@ /* * Copyright 2011 The LibYuv Project Authors. All rights reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may + * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "libyuv/convert.h" #include "libyuv/basic_types.h" #include "libyuv/cpu_id.h" +#include "libyuv/format_conversion.h" +#ifdef HAVE_JPEG +#include "libyuv/mjpeg_decoder.h" +#endif #include "libyuv/planar_functions.h" #include "libyuv/rotate.h" -#include "libyuv/scale.h" // For ScalePlane() +#include "libyuv/video_common.h" #include "libyuv/row.h" #ifdef __cplusplus namespace libyuv { extern "C" { #endif -#define SUBSAMPLE(v, a, s) (v < 0) ? (-((-v + a) >> s)) : ((v + a) >> s) -static __inline int Abs(int v) { - return v >= 0 ? v : -v; -} - -// Any I4xx To I420 format with mirroring. -static int I4xxToI420(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int src_y_width, int src_y_height, - int src_uv_width, int src_uv_height) { - if (src_y_width == 0 || src_y_height == 0 || - src_uv_width == 0 || src_uv_height == 0) { - return -1; - } - const int dst_y_width = Abs(src_y_width); - const int dst_y_height = Abs(src_y_height); - const int dst_uv_width = SUBSAMPLE(dst_y_width, 1, 1); - const int dst_uv_height = SUBSAMPLE(dst_y_height, 1, 1); - ScalePlane(src_y, src_stride_y, src_y_width, src_y_height, - dst_y, dst_stride_y, dst_y_width, dst_y_height, - kFilterBilinear); - ScalePlane(src_u, src_stride_u, src_uv_width, src_uv_height, - dst_u, dst_stride_u, dst_uv_width, dst_uv_height, - kFilterBilinear); - ScalePlane(src_v, src_stride_v, src_uv_width, src_uv_height, - dst_v, dst_stride_v, dst_uv_width, dst_uv_height, - kFilterBilinear); - return 0; -} - // Copy I420 with optional flipping -// TODO(fbarchard): Use Scale plane which supports mirroring, but ensure -// is does row coalescing. LIBYUV_API int I420Copy(const uint8* src_y, int src_stride_y, const uint8* src_u, int src_stride_u, const uint8* src_v, int src_stride_v, uint8* dst_y, int dst_stride_y, uint8* dst_u, int dst_stride_u, uint8* dst_v, int dst_stride_v, int width, int height) { if (!src_y || !src_u || !src_v || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; - const int halfheight = (height + 1) >> 1; + int halfheight = (height + 1) >> 1; src_y = src_y + (height - 1) * src_stride_y; src_u = src_u + (halfheight - 1) * src_stride_u; src_v = src_v + (halfheight - 1) * src_stride_v; src_stride_y = -src_stride_y; src_stride_u = -src_stride_u; src_stride_v = -src_stride_v; } + int halfwidth = (width + 1) >> 1; + int halfheight = (height + 1) >> 1; if (dst_y) { CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); } - // Copy UV planes. - const int halfwidth = (width + 1) >> 1; - const int halfheight = (height + 1) >> 1; CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, halfheight); CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, halfheight); return 0; } -// 422 chroma is 1/2 width, 1x height -// 420 chroma is 1/2 width, 1/2 height +#if !defined(YUV_DISABLE_ASM) && defined(_M_IX86) +#define HAS_HALFROW_SSE2 +__declspec(naked) __declspec(align(16)) +static void HalfRow_SSE2(const uint8* src_uv, int src_uv_stride, + uint8* dst_uv, int pix) { + __asm { + push edi + mov eax, [esp + 4 + 4] // src_uv + mov edx, [esp + 4 + 8] // src_uv_stride + mov edi, [esp + 4 + 12] // dst_v + mov ecx, [esp + 4 + 16] // pix + sub edi, eax + + align 16 + convertloop: + movdqa xmm0, [eax] + pavgb xmm0, [eax + edx] + sub ecx, 16 + movdqa [eax + edi], xmm0 + lea eax, [eax + 16] + jg convertloop + pop edi + ret + } +} + +#elif !defined(YUV_DISABLE_ASM) && (defined(__x86_64__) || defined(__i386__)) +#define HAS_HALFROW_SSE2 +static void HalfRow_SSE2(const uint8* src_uv, int src_uv_stride, + uint8* dst_uv, int pix) { + asm volatile ( + "sub %0,%1 \n" + ".p2align 4 \n" +"1: \n" + "movdqa (%0),%%xmm0 \n" + "pavgb (%0,%3),%%xmm0 \n" + "sub $0x10,%2 \n" + "movdqa %%xmm0,(%0,%1) \n" + "lea 0x10(%0),%0 \n" + "jg 1b \n" + : "+r"(src_uv), // %0 + "+r"(dst_uv), // %1 + "+r"(pix) // %2 + : "r"(static_cast<intptr_t>(src_uv_stride)) // %3 + : "memory", "cc" +#if defined(__SSE2__) + , "xmm0" +#endif +); +} +#endif + +static void HalfRow_C(const uint8* src_uv, int src_uv_stride, + uint8* dst_uv, int pix) { + for (int x = 0; x < pix; ++x) { + dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1; + } +} + LIBYUV_API int I422ToI420(const uint8* src_y, int src_stride_y, const uint8* src_u, int src_stride_u, const uint8* src_v, int src_stride_v, uint8* dst_y, int dst_stride_y, uint8* dst_u, int dst_stride_u, uint8* dst_v, int dst_stride_v, int width, int height) { - const int src_uv_width = SUBSAMPLE(width, 1, 1); - return I4xxToI420(src_y, src_stride_y, - src_u, src_stride_u, - src_v, src_stride_v, - dst_y, dst_stride_y, - dst_u, dst_stride_u, - dst_v, dst_stride_v, - width, height, - src_uv_width, height); + if (!src_y || !src_u || !src_v || + !dst_y || !dst_u || !dst_v || + width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_y = src_y + (height - 1) * src_stride_y; + src_u = src_u + (height - 1) * src_stride_u; + src_v = src_v + (height - 1) * src_stride_v; + src_stride_y = -src_stride_y; + src_stride_u = -src_stride_u; + src_stride_v = -src_stride_v; + } + int halfwidth = (width + 1) >> 1; + void (*HalfRow)(const uint8* src_uv, int src_uv_stride, + uint8* dst_uv, int pix) = HalfRow_C; +#if defined(HAS_HALFROW_SSE2) + if (TestCpuFlag(kCpuHasSSE2) && + IS_ALIGNED(halfwidth, 16) && + IS_ALIGNED(src_u, 16) && IS_ALIGNED(src_stride_u, 16) && + IS_ALIGNED(src_v, 16) && IS_ALIGNED(src_stride_v, 16) && + IS_ALIGNED(dst_u, 16) && IS_ALIGNED(dst_stride_u, 16) && + IS_ALIGNED(dst_v, 16) && IS_ALIGNED(dst_stride_v, 16)) { + HalfRow = HalfRow_SSE2; + } +#endif + + // Copy Y plane + if (dst_y) { + CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); + } + + // SubSample U plane. + int y; + for (y = 0; y < height - 1; y += 2) { + HalfRow(src_u, src_stride_u, dst_u, halfwidth); + src_u += src_stride_u * 2; + dst_u += dst_stride_u; + } + if (height & 1) { + HalfRow(src_u, 0, dst_u, halfwidth); + } + + // SubSample V plane. + for (y = 0; y < height - 1; y += 2) { + HalfRow(src_v, src_stride_v, dst_v, halfwidth); + src_v += src_stride_v * 2; + dst_v += dst_stride_v; + } + if (height & 1) { + HalfRow(src_v, 0, dst_v, halfwidth); + } + return 0; } -// 444 chroma is 1x width, 1x height -// 420 chroma is 1/2 width, 1/2 height +// Blends 32x2 pixels to 16x1 +// source in scale.cc +#if !defined(YUV_DISABLE_ASM) && defined(__ARM_NEON__) +#define HAS_SCALEROWDOWN2_NEON +void ScaleRowDown2Int_NEON(const uint8* src_ptr, ptrdiff_t src_stride, + uint8* dst, int dst_width); +#elif !defined(YUV_DISABLE_ASM) && \ + (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) + +void ScaleRowDown2Int_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, + uint8* dst_ptr, int dst_width); +#endif +void ScaleRowDown2Int_C(const uint8* src_ptr, ptrdiff_t src_stride, + uint8* dst_ptr, int dst_width); + LIBYUV_API int I444ToI420(const uint8* src_y, int src_stride_y, const uint8* src_u, int src_stride_u, const uint8* src_v, int src_stride_v, uint8* dst_y, int dst_stride_y, uint8* dst_u, int dst_stride_u, uint8* dst_v, int dst_stride_v, int width, int height) { - return I4xxToI420(src_y, src_stride_y, - src_u, src_stride_u, - src_v, src_stride_v, - dst_y, dst_stride_y, - dst_u, dst_stride_u, - dst_v, dst_stride_v, - width, height, - width, height); + if (!src_y || !src_u || !src_v || + !dst_y || !dst_u || !dst_v || + width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_y = src_y + (height - 1) * src_stride_y; + src_u = src_u + (height - 1) * src_stride_u; + src_v = src_v + (height - 1) * src_stride_v; + src_stride_y = -src_stride_y; + src_stride_u = -src_stride_u; + src_stride_v = -src_stride_v; + } + int halfwidth = (width + 1) >> 1; + void (*ScaleRowDown2)(const uint8* src_ptr, ptrdiff_t src_stride, + uint8* dst_ptr, int dst_width) = ScaleRowDown2Int_C; +#if defined(HAS_SCALEROWDOWN2_NEON) + if (TestCpuFlag(kCpuHasNEON) && + IS_ALIGNED(halfwidth, 16)) { + ScaleRowDown2 = ScaleRowDown2Int_NEON; + } +#elif defined(HAS_SCALEROWDOWN2_SSE2) + if (TestCpuFlag(kCpuHasSSE2) && + IS_ALIGNED(halfwidth, 16) && + IS_ALIGNED(src_u, 16) && IS_ALIGNED(src_stride_u, 16) && + IS_ALIGNED(src_v, 16) && IS_ALIGNED(src_stride_v, 16) && + IS_ALIGNED(dst_u, 16) && IS_ALIGNED(dst_stride_u, 16) && + IS_ALIGNED(dst_v, 16) && IS_ALIGNED(dst_stride_v, 16)) { + ScaleRowDown2 = ScaleRowDown2Int_SSE2; + } +#endif + + // Copy Y plane + if (dst_y) { + CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); + } + + // SubSample U plane. + int y; + for (y = 0; y < height - 1; y += 2) { + ScaleRowDown2(src_u, src_stride_u, dst_u, halfwidth); + src_u += src_stride_u * 2; + dst_u += dst_stride_u; + } + if (height & 1) { + ScaleRowDown2(src_u, 0, dst_u, halfwidth); + } + + // SubSample V plane. + for (y = 0; y < height - 1; y += 2) { + ScaleRowDown2(src_v, src_stride_v, dst_v, halfwidth); + src_v += src_stride_v * 2; + dst_v += dst_stride_v; + } + if (height & 1) { + ScaleRowDown2(src_v, 0, dst_v, halfwidth); + } + return 0; } +// use Bilinear for upsampling chroma +void ScalePlaneBilinear(int src_width, int src_height, + int dst_width, int dst_height, + int src_stride, int dst_stride, + const uint8* src_ptr, uint8* dst_ptr); + // 411 chroma is 1/4 width, 1x height // 420 chroma is 1/2 width, 1/2 height LIBYUV_API int I411ToI420(const uint8* src_y, int src_stride_y, const uint8* src_u, int src_stride_u, const uint8* src_v, int src_stride_v, uint8* dst_y, int dst_stride_y, uint8* dst_u, int dst_stride_u, uint8* dst_v, int dst_stride_v, int width, int height) { - const int src_uv_width = SUBSAMPLE(width, 3, 2); - return I4xxToI420(src_y, src_stride_y, - src_u, src_stride_u, - src_v, src_stride_v, - dst_y, dst_stride_y, - dst_u, dst_stride_u, - dst_v, dst_stride_v, - width, height, - src_uv_width, height); + if (!src_y || !src_u || !src_v || + !dst_y || !dst_u || !dst_v || + width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + dst_y = dst_y + (height - 1) * dst_stride_y; + dst_u = dst_u + (height - 1) * dst_stride_u; + dst_v = dst_v + (height - 1) * dst_stride_v; + dst_stride_y = -dst_stride_y; + dst_stride_u = -dst_stride_u; + dst_stride_v = -dst_stride_v; + } + + // Copy Y plane + if (dst_y) { + CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); + } + + int halfwidth = (width + 1) >> 1; + int halfheight = (height + 1) >> 1; + int quarterwidth = (width + 3) >> 2; + + // Resample U plane. + ScalePlaneBilinear(quarterwidth, height, // from 1/4 width, 1x height + halfwidth, halfheight, // to 1/2 width, 1/2 height + src_stride_u, + dst_stride_u, + src_u, dst_u); + + // Resample V plane. + ScalePlaneBilinear(quarterwidth, height, // from 1/4 width, 1x height + halfwidth, halfheight, // to 1/2 width, 1/2 height + src_stride_v, + dst_stride_v, + src_v, dst_v); + return 0; } // I400 is greyscale typically used in MJPG LIBYUV_API int I400ToI420(const uint8* src_y, int src_stride_y, uint8* dst_y, int dst_stride_y, uint8* dst_u, int dst_stride_u, uint8* dst_v, int dst_stride_v, @@ -178,67 +349,56 @@ int I400ToI420(const uint8* src_y, int s int halfheight = (height + 1) >> 1; CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); SetPlane(dst_u, dst_stride_u, halfwidth, halfheight, 128); SetPlane(dst_v, dst_stride_v, halfwidth, halfheight, 128); return 0; } static void CopyPlane2(const uint8* src, int src_stride_0, int src_stride_1, - uint8* dst, int dst_stride, + uint8* dst, int dst_stride_frame, int width, int height) { void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C; -#if defined(HAS_COPYROW_X86) - if (TestCpuFlag(kCpuHasX86) && IS_ALIGNED(width, 4)) { - CopyRow = CopyRow_X86; - } -#endif -#if defined(HAS_COPYROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 32) && - IS_ALIGNED(src, 16) && - IS_ALIGNED(src_stride_0, 16) && IS_ALIGNED(src_stride_1, 16) && - IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) { - CopyRow = CopyRow_SSE2; - } -#endif -#if defined(HAS_COPYROW_ERMS) - if (TestCpuFlag(kCpuHasERMS)) { - CopyRow = CopyRow_ERMS; - } -#endif #if defined(HAS_COPYROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 32)) { + if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 64)) { CopyRow = CopyRow_NEON; } +#elif defined(HAS_COPYROW_X86) + if (IS_ALIGNED(width, 4)) { + CopyRow = CopyRow_X86; +#if defined(HAS_COPYROW_SSE2) + if (TestCpuFlag(kCpuHasSSE2) && + IS_ALIGNED(width, 32) && IS_ALIGNED(src, 16) && + IS_ALIGNED(src_stride_0, 16) && IS_ALIGNED(src_stride_1, 16) && + IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride_frame, 16)) { + CopyRow = CopyRow_SSE2; + } #endif -#if defined(HAS_COPYROW_MIPS) - if (TestCpuFlag(kCpuHasMIPS)) { - CopyRow = CopyRow_MIPS; } #endif // Copy plane for (int y = 0; y < height - 1; y += 2) { CopyRow(src, dst, width); - CopyRow(src + src_stride_0, dst + dst_stride, width); + CopyRow(src + src_stride_0, dst + dst_stride_frame, width); src += src_stride_0 + src_stride_1; - dst += dst_stride * 2; + dst += dst_stride_frame * 2; } if (height & 1) { CopyRow(src, dst, width); } } // Support converting from FOURCC_M420 // Useful for bandwidth constrained transports like USB 1.0 and 2.0 and for // easy conversion to I420. // M420 format description: // M420 is row biplanar 420: 2 rows of Y and 1 row of UV. // Chroma is half width / half height. (420) -// src_stride_m420 is row planar. Normally this will be the width in pixels. +// src_stride_m420 is row planar. Normally this will be the width in pixels. // The UV plane is half width, but 2 values, so src_stride_m420 applies to // this as well as the two Y planes. static int X420ToI420(const uint8* src_y, int src_stride_y0, int src_stride_y1, const uint8* src_uv, int src_stride_uv, uint8* dst_y, int dst_stride_y, uint8* dst_u, int dst_stride_u, uint8* dst_v, int dst_stride_v, @@ -254,91 +414,43 @@ static int X420ToI420(const uint8* src_y int halfheight = (height + 1) >> 1; dst_y = dst_y + (height - 1) * dst_stride_y; dst_u = dst_u + (halfheight - 1) * dst_stride_u; dst_v = dst_v + (halfheight - 1) * dst_stride_v; dst_stride_y = -dst_stride_y; dst_stride_u = -dst_stride_u; dst_stride_v = -dst_stride_v; } - // Coalesce rows. + int halfwidth = (width + 1) >> 1; - int halfheight = (height + 1) >> 1; - if (src_stride_y0 == width && - src_stride_y1 == width && - dst_stride_y == width) { - width *= height; - height = 1; - src_stride_y0 = src_stride_y1 = dst_stride_y = 0; - } - // Coalesce rows. - if (src_stride_uv == halfwidth * 2 && - dst_stride_u == halfwidth && - dst_stride_v == halfwidth) { - halfwidth *= halfheight; - halfheight = 1; - src_stride_uv = dst_stride_u = dst_stride_v = 0; + void (*SplitUV)(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) = + SplitUV_C; +#if defined(HAS_SPLITUV_NEON) + if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(halfwidth, 16)) { + SplitUV = SplitUV_NEON; } - void (*SplitUVRow)(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) = - SplitUVRow_C; -#if defined(HAS_SPLITUVROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && halfwidth >= 16) { - SplitUVRow = SplitUVRow_Any_SSE2; - if (IS_ALIGNED(halfwidth, 16)) { - SplitUVRow = SplitUVRow_Unaligned_SSE2; - if (IS_ALIGNED(src_uv, 16) && IS_ALIGNED(src_stride_uv, 16) && - IS_ALIGNED(dst_u, 16) && IS_ALIGNED(dst_stride_u, 16) && - IS_ALIGNED(dst_v, 16) && IS_ALIGNED(dst_stride_v, 16)) { - SplitUVRow = SplitUVRow_SSE2; - } - } - } -#endif -#if defined(HAS_SPLITUVROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2) && halfwidth >= 32) { - SplitUVRow = SplitUVRow_Any_AVX2; - if (IS_ALIGNED(halfwidth, 32)) { - SplitUVRow = SplitUVRow_AVX2; - } - } -#endif -#if defined(HAS_SPLITUVROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && halfwidth >= 16) { - SplitUVRow = SplitUVRow_Any_NEON; - if (IS_ALIGNED(halfwidth, 16)) { - SplitUVRow = SplitUVRow_NEON; - } - } -#endif -#if defined(HAS_SPLITUVROW_MIPS_DSPR2) - if (TestCpuFlag(kCpuHasMIPS_DSPR2) && halfwidth >= 16) { - SplitUVRow = SplitUVRow_Any_MIPS_DSPR2; - if (IS_ALIGNED(halfwidth, 16)) { - SplitUVRow = SplitUVRow_Unaligned_MIPS_DSPR2; - if (IS_ALIGNED(src_uv, 4) && IS_ALIGNED(src_stride_uv, 4) && - IS_ALIGNED(dst_u, 4) && IS_ALIGNED(dst_stride_u, 4) && - IS_ALIGNED(dst_v, 4) && IS_ALIGNED(dst_stride_v, 4)) { - SplitUVRow = SplitUVRow_MIPS_DSPR2; - } - } +#elif defined(HAS_SPLITUV_SSE2) + if (TestCpuFlag(kCpuHasSSE2) && + IS_ALIGNED(halfwidth, 16) && + IS_ALIGNED(src_uv, 16) && IS_ALIGNED(src_stride_uv, 16) && + IS_ALIGNED(dst_u, 16) && IS_ALIGNED(dst_stride_u, 16) && + IS_ALIGNED(dst_v, 16) && IS_ALIGNED(dst_stride_v, 16)) { + SplitUV = SplitUV_SSE2; } #endif if (dst_y) { - if (src_stride_y0 == src_stride_y1) { - CopyPlane(src_y, src_stride_y0, dst_y, dst_stride_y, width, height); - } else { - CopyPlane2(src_y, src_stride_y0, src_stride_y1, dst_y, dst_stride_y, - width, height); - } + CopyPlane2(src_y, src_stride_y0, src_stride_y1, dst_y, dst_stride_y, + width, height); } + int halfheight = (height + 1) >> 1; for (int y = 0; y < halfheight; ++y) { // Copy a row of UV. - SplitUVRow(src_uv, dst_u, dst_v, halfwidth); + SplitUV(src_uv, dst_u, dst_v, halfwidth); dst_u += dst_stride_u; dst_v += dst_stride_v; src_uv += src_stride_uv; } return 0; } // Convert NV12 to I420. @@ -352,32 +464,16 @@ int NV12ToI420(const uint8* src_y, int s return X420ToI420(src_y, src_stride_y, src_stride_y, src_uv, src_stride_uv, dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v, dst_stride_v, width, height); } -// Convert NV21 to I420. Same as NV12 but u and v pointers swapped. -LIBYUV_API -int NV21ToI420(const uint8* src_y, int src_stride_y, - const uint8* src_vu, int src_stride_vu, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { - return X420ToI420(src_y, src_stride_y, src_stride_y, - src_vu, src_stride_vu, - dst_y, dst_stride_y, - dst_v, dst_stride_v, - dst_u, dst_stride_u, - width, height); -} - // Convert M420 to I420. LIBYUV_API int M420ToI420(const uint8* src_m420, int src_stride_m420, uint8* dst_y, int dst_stride_y, uint8* dst_u, int dst_stride_u, uint8* dst_v, int dst_stride_v, int width, int height) { return X420ToI420(src_m420, src_stride_m420, src_stride_m420 * 2, @@ -411,82 +507,67 @@ int Q420ToI420(const uint8* src_y, int s dst_v = dst_v + (halfheight - 1) * dst_stride_v; dst_stride_y = -dst_stride_y; dst_stride_u = -dst_stride_u; dst_stride_v = -dst_stride_v; } // CopyRow for rows of just Y in Q420 copied to Y plane of I420. void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C; #if defined(HAS_COPYROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 32)) { + if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 64)) { CopyRow = CopyRow_NEON; } #endif #if defined(HAS_COPYROW_X86) if (IS_ALIGNED(width, 4)) { CopyRow = CopyRow_X86; } #endif #if defined(HAS_COPYROW_SSE2) if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 32) && IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) && IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { CopyRow = CopyRow_SSE2; } #endif -#if defined(HAS_COPYROW_ERMS) - if (TestCpuFlag(kCpuHasERMS)) { - CopyRow = CopyRow_ERMS; - } -#endif -#if defined(HAS_COPYROW_MIPS) - if (TestCpuFlag(kCpuHasMIPS)) { - CopyRow = CopyRow_MIPS; - } -#endif void (*YUY2ToUV422Row)(const uint8* src_yuy2, uint8* dst_u, uint8* dst_v, int pix) = YUY2ToUV422Row_C; void (*YUY2ToYRow)(const uint8* src_yuy2, uint8* dst_y, int pix) = YUY2ToYRow_C; #if defined(HAS_YUY2TOYROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && width >= 16) { - YUY2ToUV422Row = YUY2ToUV422Row_Any_SSE2; - YUY2ToYRow = YUY2ToYRow_Any_SSE2; + if (TestCpuFlag(kCpuHasSSE2)) { + if (width > 16) { + YUY2ToUV422Row = YUY2ToUV422Row_Any_SSE2; + YUY2ToYRow = YUY2ToYRow_Any_SSE2; + } if (IS_ALIGNED(width, 16)) { YUY2ToUV422Row = YUY2ToUV422Row_Unaligned_SSE2; YUY2ToYRow = YUY2ToYRow_Unaligned_SSE2; if (IS_ALIGNED(src_yuy2, 16) && IS_ALIGNED(src_stride_yuy2, 16)) { YUY2ToUV422Row = YUY2ToUV422Row_SSE2; if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { YUY2ToYRow = YUY2ToYRow_SSE2; } } } } -#endif -#if defined(HAS_YUY2TOYROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2) && width >= 32) { - YUY2ToUV422Row = YUY2ToUV422Row_Any_AVX2; - YUY2ToYRow = YUY2ToYRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - YUY2ToUV422Row = YUY2ToUV422Row_AVX2; - YUY2ToYRow = YUY2ToYRow_AVX2; +#elif defined(HAS_YUY2TOYROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + if (width > 8) { + YUY2ToYRow = YUY2ToYRow_Any_NEON; + if (width > 16) { + YUY2ToUV422Row = YUY2ToUV422Row_Any_NEON; + } } - } -#endif -#if defined(HAS_YUY2TOYROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && width >= 8) { - YUY2ToYRow = YUY2ToYRow_Any_NEON; - if (width >= 16) { - YUY2ToUV422Row = YUY2ToUV422Row_Any_NEON; - } - if (IS_ALIGNED(width, 16)) { + if (IS_ALIGNED(width, 8)) { YUY2ToYRow = YUY2ToYRow_NEON; - YUY2ToUV422Row = YUY2ToUV422Row_NEON; + if (IS_ALIGNED(width, 16)) { + YUY2ToUV422Row = YUY2ToUV422Row_NEON; + } } } #endif for (int y = 0; y < height - 1; y += 2) { CopyRow(src_y, dst_y, width); src_y += src_stride_y; dst_y += dst_stride_y; @@ -500,16 +581,69 @@ int Q420ToI420(const uint8* src_y, int s } if (height & 1) { CopyRow(src_y, dst_y, width); YUY2ToUV422Row(src_yuy2, dst_u, dst_v, width); } return 0; } +// Test if over reading on source is safe. +// TODO(fbarchard): Find more efficient solution to safely do odd sizes. +// Macros to control read policy, from slowest to fastest: +// READSAFE_NEVER - disables read ahead on systems with strict memory reads +// READSAFE_ODDHEIGHT - last row of odd height done with C. +// This policy assumes that the caller handles the last row of an odd height +// image using C. +// READSAFE_PAGE - enable read ahead within same page. +// A page is 4096 bytes. When reading ahead, if the last pixel is near the +// end the page, and a read spans the page into the next page, a memory +// exception can occur if that page has not been allocated, or is a guard +// page. This setting ensures the overread is within the same page. +// READSAFE_ALWAYS - enables read ahead on systems without memory exceptions +// or where buffers are padded by 64 bytes. + +#if defined(HAS_RGB24TOARGBROW_SSSE3) || \ + defined(HAS_RGB24TOARGBROW_SSSE3) || \ + defined(HAS_RAWTOARGBROW_SSSE3) || \ + defined(HAS_RGB565TOARGBROW_SSE2) || \ + defined(HAS_ARGB1555TOARGBROW_SSE2) || \ + defined(HAS_ARGB4444TOARGBROW_SSE2) + +#define READSAFE_ODDHEIGHT + +static bool TestReadSafe(const uint8* src_yuy2, int src_stride_yuy2, + int width, int height, int bpp, int overread) { + if (width > kMaxStride) { + return false; + } +#if defined(READSAFE_ALWAYS) + return true; +#elif defined(READSAFE_NEVER) + return false; +#elif defined(READSAFE_ODDHEIGHT) + if (!(width & 15) || + (src_stride_yuy2 >= 0 && (height & 1) && width * bpp >= overread)) { + return true; + } + return false; +#elif defined(READSAFE_PAGE) + if (src_stride_yuy2 >= 0) { + src_yuy2 += (height - 1) * src_stride_yuy2; + } + uintptr_t last_adr = (uintptr_t)(src_yuy2) + width * bpp - 1; + uintptr_t last_read_adr = last_adr + overread - 1; + if (((last_adr ^ last_read_adr) & ~4095) == 0) { + return true; + } + return false; +#endif +} +#endif + // Convert YUY2 to I420. LIBYUV_API int YUY2ToI420(const uint8* src_yuy2, int src_stride_yuy2, uint8* dst_y, int dst_stride_y, uint8* dst_u, int dst_stride_u, uint8* dst_v, int dst_stride_v, int width, int height) { // Negative height means invert the image. @@ -520,50 +654,45 @@ int YUY2ToI420(const uint8* src_yuy2, in } void (*YUY2ToUVRow)(const uint8* src_yuy2, int src_stride_yuy2, uint8* dst_u, uint8* dst_v, int pix); void (*YUY2ToYRow)(const uint8* src_yuy2, uint8* dst_y, int pix); YUY2ToYRow = YUY2ToYRow_C; YUY2ToUVRow = YUY2ToUVRow_C; #if defined(HAS_YUY2TOYROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && width >= 16) { - YUY2ToUVRow = YUY2ToUVRow_Any_SSE2; - YUY2ToYRow = YUY2ToYRow_Any_SSE2; + if (TestCpuFlag(kCpuHasSSE2)) { + if (width > 16) { + YUY2ToUVRow = YUY2ToUVRow_Any_SSE2; + YUY2ToYRow = YUY2ToYRow_Any_SSE2; + } if (IS_ALIGNED(width, 16)) { YUY2ToUVRow = YUY2ToUVRow_Unaligned_SSE2; YUY2ToYRow = YUY2ToYRow_Unaligned_SSE2; if (IS_ALIGNED(src_yuy2, 16) && IS_ALIGNED(src_stride_yuy2, 16)) { YUY2ToUVRow = YUY2ToUVRow_SSE2; if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { YUY2ToYRow = YUY2ToYRow_SSE2; } } } } -#endif -#if defined(HAS_YUY2TOYROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2) && width >= 32) { - YUY2ToUVRow = YUY2ToUVRow_Any_AVX2; - YUY2ToYRow = YUY2ToYRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - YUY2ToUVRow = YUY2ToUVRow_AVX2; - YUY2ToYRow = YUY2ToYRow_AVX2; +#elif defined(HAS_YUY2TOYROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + if (width > 8) { + YUY2ToYRow = YUY2ToYRow_Any_NEON; + if (width > 16) { + YUY2ToUVRow = YUY2ToUVRow_Any_NEON; + } } - } -#endif -#if defined(HAS_YUY2TOYROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && width >= 8) { - YUY2ToYRow = YUY2ToYRow_Any_NEON; - if (width >= 16) { - YUY2ToUVRow = YUY2ToUVRow_Any_NEON; - } - if (IS_ALIGNED(width, 16)) { + if (IS_ALIGNED(width, 8)) { YUY2ToYRow = YUY2ToYRow_NEON; - YUY2ToUVRow = YUY2ToUVRow_NEON; + if (IS_ALIGNED(width, 16)) { + YUY2ToUVRow = YUY2ToUVRow_NEON; + } } } #endif for (int y = 0; y < height - 1; y += 2) { YUY2ToUVRow(src_yuy2, src_stride_yuy2, dst_u, dst_v, width); YUY2ToYRow(src_yuy2, dst_y, width); YUY2ToYRow(src_yuy2 + src_stride_yuy2, dst_y + dst_stride_y, width); @@ -594,50 +723,45 @@ int UYVYToI420(const uint8* src_uyvy, in } void (*UYVYToUVRow)(const uint8* src_uyvy, int src_stride_uyvy, uint8* dst_u, uint8* dst_v, int pix); void (*UYVYToYRow)(const uint8* src_uyvy, uint8* dst_y, int pix); UYVYToYRow = UYVYToYRow_C; UYVYToUVRow = UYVYToUVRow_C; #if defined(HAS_UYVYTOYROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && width >= 16) { - UYVYToUVRow = UYVYToUVRow_Any_SSE2; - UYVYToYRow = UYVYToYRow_Any_SSE2; + if (TestCpuFlag(kCpuHasSSE2)) { + if (width > 16) { + UYVYToUVRow = UYVYToUVRow_Any_SSE2; + UYVYToYRow = UYVYToYRow_Any_SSE2; + } if (IS_ALIGNED(width, 16)) { UYVYToUVRow = UYVYToUVRow_Unaligned_SSE2; UYVYToYRow = UYVYToYRow_Unaligned_SSE2; if (IS_ALIGNED(src_uyvy, 16) && IS_ALIGNED(src_stride_uyvy, 16)) { UYVYToUVRow = UYVYToUVRow_SSE2; if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { UYVYToYRow = UYVYToYRow_SSE2; } } } } -#endif -#if defined(HAS_UYVYTOYROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2) && width >= 32) { - UYVYToUVRow = UYVYToUVRow_Any_AVX2; - UYVYToYRow = UYVYToYRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - UYVYToUVRow = UYVYToUVRow_AVX2; - UYVYToYRow = UYVYToYRow_AVX2; +#elif defined(HAS_UYVYTOYROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + if (width > 8) { + UYVYToYRow = UYVYToYRow_Any_NEON; + if (width > 16) { + UYVYToUVRow = UYVYToUVRow_Any_NEON; + } } - } -#endif -#if defined(HAS_UYVYTOYROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && width >= 8) { - UYVYToYRow = UYVYToYRow_Any_NEON; - if (width >= 16) { - UYVYToUVRow = UYVYToUVRow_Any_NEON; - } - if (IS_ALIGNED(width, 16)) { + if (IS_ALIGNED(width, 8)) { UYVYToYRow = UYVYToYRow_NEON; - UYVYToUVRow = UYVYToUVRow_NEON; + if (IS_ALIGNED(width, 16)) { + UYVYToUVRow = UYVYToUVRow_NEON; + } } } #endif for (int y = 0; y < height - 1; y += 2) { UYVYToUVRow(src_uyvy, src_stride_uyvy, dst_u, dst_v, width); UYVYToYRow(src_uyvy, dst_y, width); UYVYToYRow(src_uyvy + src_stride_uyvy, dst_y + dst_stride_y, width); @@ -648,17 +772,170 @@ int UYVYToI420(const uint8* src_uyvy, in } if (height & 1) { UYVYToUVRow(src_uyvy, 0, dst_u, dst_v, width); UYVYToYRow(src_uyvy, dst_y, width); } return 0; } -// Convert ARGB to I420. +// Visual C x86 or GCC little endian. +#if defined(__x86_64__) || defined(_M_X64) || \ + defined(__i386__) || defined(_M_IX86) || \ + defined(__arm__) || defined(_M_ARM) || \ + (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) +#define LIBYUV_LITTLE_ENDIAN +#endif + +#ifdef LIBYUV_LITTLE_ENDIAN +#define READWORD(p) (*reinterpret_cast<const uint32*>(p)) +#else +static inline uint32 READWORD(const uint8* p) { + return static_cast<uint32>(p[0]) | + (static_cast<uint32>(p[1]) << 8) | + (static_cast<uint32>(p[2]) << 16) | + (static_cast<uint32>(p[3]) << 24); +} +#endif + +// Must be multiple of 6 pixels. Will over convert to handle remainder. +// https://developer.apple.com/quicktime/icefloe/dispatch019.html#v210 +static void V210ToUYVYRow_C(const uint8* src_v210, uint8* dst_uyvy, int width) { + for (int x = 0; x < width; x += 6) { + uint32 w = READWORD(src_v210 + 0); + dst_uyvy[0] = (w >> 2) & 0xff; + dst_uyvy[1] = (w >> 12) & 0xff; + dst_uyvy[2] = (w >> 22) & 0xff; + + w = READWORD(src_v210 + 4); + dst_uyvy[3] = (w >> 2) & 0xff; + dst_uyvy[4] = (w >> 12) & 0xff; + dst_uyvy[5] = (w >> 22) & 0xff; + + w = READWORD(src_v210 + 8); + dst_uyvy[6] = (w >> 2) & 0xff; + dst_uyvy[7] = (w >> 12) & 0xff; + dst_uyvy[8] = (w >> 22) & 0xff; + + w = READWORD(src_v210 + 12); + dst_uyvy[9] = (w >> 2) & 0xff; + dst_uyvy[10] = (w >> 12) & 0xff; + dst_uyvy[11] = (w >> 22) & 0xff; + + src_v210 += 16; + dst_uyvy += 12; + } +} + +// Convert V210 to I420. +// V210 is 10 bit version of UYVY. 16 bytes to store 6 pixels. +// With is multiple of 48. +LIBYUV_API +int V210ToI420(const uint8* src_v210, int src_stride_v210, + uint8* dst_y, int dst_stride_y, + uint8* dst_u, int dst_stride_u, + uint8* dst_v, int dst_stride_v, + int width, int height) { + if (width * 2 * 2 > kMaxStride) { // 2 rows of UYVY are required. + return -1; + } else if (!src_v210 || !dst_y || !dst_u || !dst_v || + width <= 0 || height == 0) { + return -1; + } + // Negative height means invert the image. + if (height < 0) { + height = -height; + src_v210 = src_v210 + (height - 1) * src_stride_v210; + src_stride_v210 = -src_stride_v210; + } + SIMD_ALIGNED(uint8 row[kMaxStride * 2]); + void (*V210ToUYVYRow)(const uint8* src_v210, uint8* dst_uyvy, int pix); + V210ToUYVYRow = V210ToUYVYRow_C; + + void (*UYVYToUVRow)(const uint8* src_uyvy, int src_stride_uyvy, + uint8* dst_u, uint8* dst_v, int pix); + void (*UYVYToYRow)(const uint8* src_uyvy, + uint8* dst_y, int pix); + UYVYToYRow = UYVYToYRow_C; + UYVYToUVRow = UYVYToUVRow_C; +#if defined(HAS_UYVYTOYROW_SSE2) + if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16)) { + UYVYToUVRow = UYVYToUVRow_SSE2; + UYVYToYRow = UYVYToYRow_Unaligned_SSE2; + if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { + UYVYToYRow = UYVYToYRow_SSE2; + } + } +#elif defined(HAS_UYVYTOYROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + if (width > 8) { + UYVYToYRow = UYVYToYRow_Any_NEON; + if (width > 16) { + UYVYToUVRow = UYVYToUVRow_Any_NEON; + } + } + if (IS_ALIGNED(width, 8)) { + UYVYToYRow = UYVYToYRow_NEON; + if (IS_ALIGNED(width, 16)) { + UYVYToUVRow = UYVYToUVRow_NEON; + } + } + } +#endif + +#if defined(HAS_UYVYTOYROW_SSE2) + if (TestCpuFlag(kCpuHasSSE2)) { + if (width > 16) { + UYVYToUVRow = UYVYToUVRow_Any_SSE2; + UYVYToYRow = UYVYToYRow_Any_SSE2; + } + if (IS_ALIGNED(width, 16)) { + UYVYToYRow = UYVYToYRow_Unaligned_SSE2; + UYVYToUVRow = UYVYToUVRow_SSE2; + if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { + UYVYToYRow = UYVYToYRow_SSE2; + } + } + } +#elif defined(HAS_UYVYTOYROW_NEON) + if (TestCpuFlag(kCpuHasNEON)) { + if (width > 8) { + UYVYToYRow = UYVYToYRow_Any_NEON; + if (width > 16) { + UYVYToUVRow = UYVYToUVRow_Any_NEON; + } + } + if (IS_ALIGNED(width, 8)) { + UYVYToYRow = UYVYToYRow_NEON; + if (IS_ALIGNED(width, 16)) { + UYVYToUVRow = UYVYToUVRow_NEON; + } + } + } +#endif + + for (int y = 0; y < height - 1; y += 2) { + V210ToUYVYRow(src_v210, row, width); + V210ToUYVYRow(src_v210 + src_stride_v210, row + kMaxStride, width); + UYVYToUVRow(row, kMaxStride, dst_u, dst_v, width); + UYVYToYRow(row, dst_y, width); + UYVYToYRow(row + kMaxStride, dst_y + dst_stride_y, width); + src_v210 += src_stride_v210 * 2; + dst_y += dst_stride_y * 2; + dst_u += dst_stride_u; + dst_v += dst_stride_v; + } + if (height & 1) { + V210ToUYVYRow(src_v210, row, width); + UYVYToUVRow(row, 0, dst_u, dst_v, width); + UYVYToYRow(row, dst_y, width); + } + return 0; +} + LIBYUV_API int ARGBToI420(const uint8* src_argb, int src_stride_argb, uint8* dst_y, int dst_stride_y, uint8* dst_u, int dst_stride_u, uint8* dst_v, int dst_stride_v, int width, int height) { if (!src_argb || !dst_y || !dst_u || !dst_v || @@ -666,60 +943,40 @@ int ARGBToI420(const uint8* src_argb, in return -1; } // Negative height means invert the image. if (height < 0) { height = -height; src_argb = src_argb + (height - 1) * src_stride_argb; src_stride_argb = -src_stride_argb; } + void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix); void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C; - void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) = - ARGBToYRow_C; -#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { - ARGBToUVRow = ARGBToUVRow_Any_SSSE3; - ARGBToYRow = ARGBToYRow_Any_SSSE3; + uint8* dst_u, uint8* dst_v, int width); + + ARGBToYRow = ARGBToYRow_C; + ARGBToUVRow = ARGBToUVRow_C; +#if defined(HAS_ARGBTOYROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + if (width > 16) { + ARGBToUVRow = ARGBToUVRow_Any_SSSE3; + ARGBToYRow = ARGBToYRow_Any_SSSE3; + } if (IS_ALIGNED(width, 16)) { ARGBToUVRow = ARGBToUVRow_Unaligned_SSSE3; ARGBToYRow = ARGBToYRow_Unaligned_SSSE3; if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) { ARGBToUVRow = ARGBToUVRow_SSSE3; if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { ARGBToYRow = ARGBToYRow_SSSE3; } } } } #endif -#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2) && width >= 32) { - ARGBToUVRow = ARGBToUVRow_Any_AVX2; - ARGBToYRow = ARGBToYRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - ARGBToUVRow = ARGBToUVRow_AVX2; - ARGBToYRow = ARGBToYRow_AVX2; - } - } -#endif -#if defined(HAS_ARGBTOYROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && width >= 8) { - ARGBToYRow = ARGBToYRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { - ARGBToYRow = ARGBToYRow_NEON; - } - if (width >= 16) { - ARGBToUVRow = ARGBToUVRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - ARGBToUVRow = ARGBToUVRow_NEON; - } - } - } -#endif for (int y = 0; y < height - 1; y += 2) { ARGBToUVRow(src_argb, src_stride_argb, dst_u, dst_v, width); ARGBToYRow(src_argb, dst_y, width); ARGBToYRow(src_argb + src_stride_argb, dst_y + dst_stride_y, width); src_argb += src_stride_argb * 2; dst_y += dst_stride_y * 2; dst_u += dst_stride_u; @@ -727,17 +984,16 @@ int ARGBToI420(const uint8* src_argb, in } if (height & 1) { ARGBToUVRow(src_argb, 0, dst_u, dst_v, width); ARGBToYRow(src_argb, dst_y, width); } return 0; } -// Convert BGRA to I420. LIBYUV_API int BGRAToI420(const uint8* src_bgra, int src_stride_bgra, uint8* dst_y, int dst_stride_y, uint8* dst_u, int dst_stride_u, uint8* dst_v, int dst_stride_v, int width, int height) { if (!src_bgra || !dst_y || !dst_u || !dst_v || @@ -745,48 +1001,39 @@ int BGRAToI420(const uint8* src_bgra, in return -1; } // Negative height means invert the image. if (height < 0) { height = -height; src_bgra = src_bgra + (height - 1) * src_stride_bgra; src_stride_bgra = -src_stride_bgra; } + void (*BGRAToYRow)(const uint8* src_bgra, uint8* dst_y, int pix); void (*BGRAToUVRow)(const uint8* src_bgra0, int src_stride_bgra, - uint8* dst_u, uint8* dst_v, int width) = BGRAToUVRow_C; - void (*BGRAToYRow)(const uint8* src_bgra, uint8* dst_y, int pix) = - BGRAToYRow_C; + uint8* dst_u, uint8* dst_v, int width); + + BGRAToYRow = BGRAToYRow_C; + BGRAToUVRow = BGRAToUVRow_C; #if defined(HAS_BGRATOYROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { - BGRAToUVRow = BGRAToUVRow_Any_SSSE3; - BGRAToYRow = BGRAToYRow_Any_SSSE3; + if (TestCpuFlag(kCpuHasSSSE3)) { + if (width > 16) { + BGRAToUVRow = BGRAToUVRow_Any_SSSE3; + BGRAToYRow = BGRAToYRow_Any_SSSE3; + } if (IS_ALIGNED(width, 16)) { BGRAToUVRow = BGRAToUVRow_Unaligned_SSSE3; BGRAToYRow = BGRAToYRow_Unaligned_SSSE3; if (IS_ALIGNED(src_bgra, 16) && IS_ALIGNED(src_stride_bgra, 16)) { BGRAToUVRow = BGRAToUVRow_SSSE3; if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { BGRAToYRow = BGRAToYRow_SSSE3; } } } } -#elif defined(HAS_BGRATOYROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && width >= 8) { - BGRAToYRow = BGRAToYRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { - BGRAToYRow = BGRAToYRow_NEON; - } - if (width >= 16) { - BGRAToUVRow = BGRAToUVRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - BGRAToUVRow = BGRAToUVRow_NEON; - } - } - } #endif for (int y = 0; y < height - 1; y += 2) { BGRAToUVRow(src_bgra, src_stride_bgra, dst_u, dst_v, width); BGRAToYRow(src_bgra, dst_y, width); BGRAToYRow(src_bgra + src_stride_bgra, dst_y + dst_stride_y, width); src_bgra += src_stride_bgra * 2; dst_y += dst_stride_y * 2; @@ -795,17 +1042,16 @@ int BGRAToI420(const uint8* src_bgra, in } if (height & 1) { BGRAToUVRow(src_bgra, 0, dst_u, dst_v, width); BGRAToYRow(src_bgra, dst_y, width); } return 0; } -// Convert ABGR to I420. LIBYUV_API int ABGRToI420(const uint8* src_abgr, int src_stride_abgr, uint8* dst_y, int dst_stride_y, uint8* dst_u, int dst_stride_u, uint8* dst_v, int dst_stride_v, int width, int height) { if (!src_abgr || !dst_y || !dst_u || !dst_v || @@ -813,48 +1059,39 @@ int ABGRToI420(const uint8* src_abgr, in return -1; } // Negative height means invert the image. if (height < 0) { height = -height; src_abgr = src_abgr + (height - 1) * src_stride_abgr; src_stride_abgr = -src_stride_abgr; } + void (*ABGRToYRow)(const uint8* src_abgr, uint8* dst_y, int pix); void (*ABGRToUVRow)(const uint8* src_abgr0, int src_stride_abgr, - uint8* dst_u, uint8* dst_v, int width) = ABGRToUVRow_C; - void (*ABGRToYRow)(const uint8* src_abgr, uint8* dst_y, int pix) = - ABGRToYRow_C; + uint8* dst_u, uint8* dst_v, int width); + + ABGRToYRow = ABGRToYRow_C; + ABGRToUVRow = ABGRToUVRow_C; #if defined(HAS_ABGRTOYROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { - ABGRToUVRow = ABGRToUVRow_Any_SSSE3; - ABGRToYRow = ABGRToYRow_Any_SSSE3; + if (TestCpuFlag(kCpuHasSSSE3)) { + if (width > 16) { + ABGRToUVRow = ABGRToUVRow_Any_SSSE3; + ABGRToYRow = ABGRToYRow_Any_SSSE3; + } if (IS_ALIGNED(width, 16)) { ABGRToUVRow = ABGRToUVRow_Unaligned_SSSE3; ABGRToYRow = ABGRToYRow_Unaligned_SSSE3; if (IS_ALIGNED(src_abgr, 16) && IS_ALIGNED(src_stride_abgr, 16)) { ABGRToUVRow = ABGRToUVRow_SSSE3; if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { ABGRToYRow = ABGRToYRow_SSSE3; } } } } -#elif defined(HAS_ABGRTOYROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && width >= 8) { - ABGRToYRow = ABGRToYRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { - ABGRToYRow = ABGRToYRow_NEON; - } - if (width >= 16) { - ABGRToUVRow = ABGRToUVRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - ABGRToUVRow = ABGRToUVRow_NEON; - } - } - } #endif for (int y = 0; y < height - 1; y += 2) { ABGRToUVRow(src_abgr, src_stride_abgr, dst_u, dst_v, width); ABGRToYRow(src_abgr, dst_y, width); ABGRToYRow(src_abgr + src_stride_abgr, dst_y + dst_stride_y, width); src_abgr += src_stride_abgr * 2; dst_y += dst_stride_y * 2; @@ -863,17 +1100,16 @@ int ABGRToI420(const uint8* src_abgr, in } if (height & 1) { ABGRToUVRow(src_abgr, 0, dst_u, dst_v, width); ABGRToYRow(src_abgr, dst_y, width); } return 0; } -// Convert RGBA to I420. LIBYUV_API int RGBAToI420(const uint8* src_rgba, int src_stride_rgba, uint8* dst_y, int dst_stride_y, uint8* dst_u, int dst_stride_u, uint8* dst_v, int dst_stride_v, int width, int height) { if (!src_rgba || !dst_y || !dst_u || !dst_v || @@ -881,48 +1117,39 @@ int RGBAToI420(const uint8* src_rgba, in return -1; } // Negative height means invert the image. if (height < 0) { height = -height; src_rgba = src_rgba + (height - 1) * src_stride_rgba; src_stride_rgba = -src_stride_rgba; } + void (*RGBAToYRow)(const uint8* src_rgba, uint8* dst_y, int pix); void (*RGBAToUVRow)(const uint8* src_rgba0, int src_stride_rgba, - uint8* dst_u, uint8* dst_v, int width) = RGBAToUVRow_C; - void (*RGBAToYRow)(const uint8* src_rgba, uint8* dst_y, int pix) = - RGBAToYRow_C; + uint8* dst_u, uint8* dst_v, int width); + + RGBAToYRow = RGBAToYRow_C; + RGBAToUVRow = RGBAToUVRow_C; #if defined(HAS_RGBATOYROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { - RGBAToUVRow = RGBAToUVRow_Any_SSSE3; - RGBAToYRow = RGBAToYRow_Any_SSSE3; + if (TestCpuFlag(kCpuHasSSSE3)) { + if (width > 16) { + RGBAToUVRow = RGBAToUVRow_Any_SSSE3; + RGBAToYRow = RGBAToYRow_Any_SSSE3; + } if (IS_ALIGNED(width, 16)) { RGBAToUVRow = RGBAToUVRow_Unaligned_SSSE3; RGBAToYRow = RGBAToYRow_Unaligned_SSSE3; if (IS_ALIGNED(src_rgba, 16) && IS_ALIGNED(src_stride_rgba, 16)) { RGBAToUVRow = RGBAToUVRow_SSSE3; if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { RGBAToYRow = RGBAToYRow_SSSE3; } } } } -#elif defined(HAS_RGBATOYROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && width >= 8) { - RGBAToYRow = RGBAToYRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { - RGBAToYRow = RGBAToYRow_NEON; - } - if (width >= 16) { - RGBAToUVRow = RGBAToUVRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - RGBAToUVRow = RGBAToUVRow_NEON; - } - } - } #endif for (int y = 0; y < height - 1; y += 2) { RGBAToUVRow(src_rgba, src_stride_rgba, dst_u, dst_v, width); RGBAToYRow(src_rgba, dst_y, width); RGBAToYRow(src_rgba + src_stride_rgba, dst_y + dst_stride_y, width); src_rgba += src_stride_rgba * 2; dst_y += dst_stride_y * 2; @@ -931,561 +1158,932 @@ int RGBAToI420(const uint8* src_rgba, in } if (height & 1) { RGBAToUVRow(src_rgba, 0, dst_u, dst_v, width); RGBAToYRow(src_rgba, dst_y, width); } return 0; } -// Convert RGB24 to I420. LIBYUV_API int RGB24ToI420(const uint8* src_rgb24, int src_stride_rgb24, uint8* dst_y, int dst_stride_y, uint8* dst_u, int dst_stride_u, uint8* dst_v, int dst_stride_v, int width, int height) { - if (!src_rgb24 || !dst_y || !dst_u || !dst_v || - width <= 0 || height == 0) { + if (width * 4 > kMaxStride) { // Row buffer is required. return -1; + } else if (!src_rgb24 || + !dst_y || !dst_u || !dst_v || + width <= 0 || height == 0) { + return -1; } // Negative height means invert the image. if (height < 0) { height = -height; src_rgb24 = src_rgb24 + (height - 1) * src_stride_rgb24; src_stride_rgb24 = -src_stride_rgb24; } + SIMD_ALIGNED(uint8 row[kMaxStride * 2]); + void (*RGB24ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix); -#if defined(HAS_RGB24TOYROW_NEON) - void (*RGB24ToUVRow)(const uint8* src_rgb24, int src_stride_rgb24, - uint8* dst_u, uint8* dst_v, int width) = RGB24ToUVRow_C; - void (*RGB24ToYRow)(const uint8* src_rgb24, uint8* dst_y, int pix) = - RGB24ToYRow_C; - if (TestCpuFlag(kCpuHasNEON) && width >= 8) { - RGB24ToYRow = RGB24ToYRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { - RGB24ToYRow = RGB24ToYRow_NEON; - } - if (width >= 16) { - RGB24ToUVRow = RGB24ToUVRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - RGB24ToUVRow = RGB24ToUVRow_NEON; - } - } - } -#else // HAS_RGB24TOYROW_NEON - - // Allocate 2 rows of ARGB. - const int kRowSize = (width * 4 + 15) & ~15; - align_buffer_64(row, kRowSize * 2); - - void (*RGB24ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) = - RGB24ToARGBRow_C; + RGB24ToARGBRow = RGB24ToARGBRow_C; #if defined(HAS_RGB24TOARGBROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { - RGB24ToARGBRow = RGB24ToARGBRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - RGB24ToARGBRow = RGB24ToARGBRow_SSSE3; - } + if (TestCpuFlag(kCpuHasSSSE3) && + TestReadSafe(src_rgb24, src_stride_rgb24, width, height, 3, 48)) { + RGB24ToARGBRow = RGB24ToARGBRow_SSSE3; } #endif + + void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix); void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C; -#if defined(HAS_ARGBTOUVROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { - ARGBToUVRow = ARGBToUVRow_Any_SSSE3; + uint8* dst_u, uint8* dst_v, int width); + + ARGBToYRow = ARGBToYRow_C; + ARGBToUVRow = ARGBToUVRow_C; +#if defined(HAS_ARGBTOYROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + if (width > 16) { + ARGBToUVRow = ARGBToUVRow_Any_SSSE3; + } + ARGBToYRow = ARGBToYRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { ARGBToUVRow = ARGBToUVRow_SSSE3; - } - } -#endif - void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) = - ARGBToYRow_C; -#if defined(HAS_ARGBTOUVROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { - ARGBToYRow = ARGBToYRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { ARGBToYRow = ARGBToYRow_Unaligned_SSSE3; if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { ARGBToYRow = ARGBToYRow_SSSE3; } } } -#endif // HAS_ARGBTOUVROW_SSSE3 -#endif // HAS_RGB24TOYROW_NEON +#endif for (int y = 0; y < height - 1; y += 2) { -#if defined(HAS_RGB24TOYROW_NEON) - RGB24ToUVRow(src_rgb24, src_stride_rgb24, dst_u, dst_v, width); - RGB24ToYRow(src_rgb24, dst_y, width); - RGB24ToYRow(src_rgb24 + src_stride_rgb24, dst_y + dst_stride_y, width); -#else RGB24ToARGBRow(src_rgb24, row, width); - RGB24ToARGBRow(src_rgb24 + src_stride_rgb24, row + kRowSize, width); - ARGBToUVRow(row, kRowSize, dst_u, dst_v, width); + RGB24ToARGBRow(src_rgb24 + src_stride_rgb24, row + kMaxStride, width); + ARGBToUVRow(row, kMaxStride, dst_u, dst_v, width); ARGBToYRow(row, dst_y, width); - ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width); -#endif + ARGBToYRow(row + kMaxStride, dst_y + dst_stride_y, width); src_rgb24 += src_stride_rgb24 * 2; dst_y += dst_stride_y * 2; dst_u += dst_stride_u; dst_v += dst_stride_v; } if (height & 1) { -#if defined(HAS_RGB24TOYROW_NEON) - RGB24ToUVRow(src_rgb24, 0, dst_u, dst_v, width); - RGB24ToYRow(src_rgb24, dst_y, width); -#else - RGB24ToARGBRow(src_rgb24, row, width); + RGB24ToARGBRow_C(src_rgb24, row, width); ARGBToUVRow(row, 0, dst_u, dst_v, width); ARGBToYRow(row, dst_y, width); -#endif } -#if !defined(HAS_RGB24TOYROW_NEON) - free_aligned_buffer_64(row); -#endif return 0; } -// Convert RAW to I420. LIBYUV_API int RAWToI420(const uint8* src_raw, int src_stride_raw, uint8* dst_y, int dst_stride_y, uint8* dst_u, int dst_stride_u, uint8* dst_v, int dst_stride_v, int width, int height) { - if (!src_raw || !dst_y || !dst_u || !dst_v || - width <= 0 || height == 0) { + if (width * 4 > kMaxStride) { // Row buffer is required. return -1; + } else if (!src_raw || + !dst_y || !dst_u || !dst_v || + width <= 0 || height == 0) { + return -1; } // Negative height means invert the image. if (height < 0) { height = -height; src_raw = src_raw + (height - 1) * src_stride_raw; src_stride_raw = -src_stride_raw; } + SIMD_ALIGNED(uint8 row[kMaxStride * 2]); + void (*RAWToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix); -#if defined(HAS_RAWTOYROW_NEON) - void (*RAWToUVRow)(const uint8* src_raw, int src_stride_raw, - uint8* dst_u, uint8* dst_v, int width) = RAWToUVRow_C; - void (*RAWToYRow)(const uint8* src_raw, uint8* dst_y, int pix) = - RAWToYRow_C; - if (TestCpuFlag(kCpuHasNEON) && width >= 8) { - RAWToYRow = RAWToYRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { - RAWToYRow = RAWToYRow_NEON; - } - if (width >= 16) { - RAWToUVRow = RAWToUVRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - RAWToUVRow = RAWToUVRow_NEON; - } - } - } -#else // HAS_RAWTOYROW_NEON - - // Allocate 2 rows of ARGB. - const int kRowSize = (width * 4 + 15) & ~15; - align_buffer_64(row, kRowSize * 2); - - void (*RAWToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) = - RAWToARGBRow_C; + RAWToARGBRow = RAWToARGBRow_C; #if defined(HAS_RAWTOARGBROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { - RAWToARGBRow = RAWToARGBRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - RAWToARGBRow = RAWToARGBRow_SSSE3; - } + if (TestCpuFlag(kCpuHasSSSE3) && + TestReadSafe(src_raw, src_stride_raw, width, height, 3, 48)) { + RAWToARGBRow = RAWToARGBRow_SSSE3; } #endif + + void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix); void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C; -#if defined(HAS_ARGBTOUVROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { - ARGBToUVRow = ARGBToUVRow_Any_SSSE3; + uint8* dst_u, uint8* dst_v, int width); + + ARGBToYRow = ARGBToYRow_C; + ARGBToUVRow = ARGBToUVRow_C; +#if defined(HAS_ARGBTOYROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + if (width > 16) { + ARGBToUVRow = ARGBToUVRow_Any_SSSE3; + } + ARGBToYRow = ARGBToYRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { ARGBToUVRow = ARGBToUVRow_SSSE3; - } - } -#endif - void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) = - ARGBToYRow_C; -#if defined(HAS_ARGBTOUVROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { - ARGBToYRow = ARGBToYRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { ARGBToYRow = ARGBToYRow_Unaligned_SSSE3; if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { ARGBToYRow = ARGBToYRow_SSSE3; } } } -#endif // HAS_ARGBTOUVROW_SSSE3 -#endif // HAS_RAWTOYROW_NEON +#endif for (int y = 0; y < height - 1; y += 2) { -#if defined(HAS_RAWTOYROW_NEON) - RAWToUVRow(src_raw, src_stride_raw, dst_u, dst_v, width); - RAWToYRow(src_raw, dst_y, width); - RAWToYRow(src_raw + src_stride_raw, dst_y + dst_stride_y, width); -#else RAWToARGBRow(src_raw, row, width); - RAWToARGBRow(src_raw + src_stride_raw, row + kRowSize, width); - ARGBToUVRow(row, kRowSize, dst_u, dst_v, width); + RAWToARGBRow(src_raw + src_stride_raw, row + kMaxStride, width); + ARGBToUVRow(row, kMaxStride, dst_u, dst_v, width); ARGBToYRow(row, dst_y, width); - ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width); -#endif + ARGBToYRow(row + kMaxStride, dst_y + dst_stride_y, width); src_raw += src_stride_raw * 2; dst_y += dst_stride_y * 2; dst_u += dst_stride_u; dst_v += dst_stride_v; } if (height & 1) { -#if defined(HAS_RAWTOYROW_NEON) - RAWToUVRow(src_raw, 0, dst_u, dst_v, width); - RAWToYRow(src_raw, dst_y, width); -#else - RAWToARGBRow(src_raw, row, width); + RAWToARGBRow_C(src_raw, row, width); ARGBToUVRow(row, 0, dst_u, dst_v, width); ARGBToYRow(row, dst_y, width); -#endif } -#if !defined(HAS_RAWTOYROW_NEON) - free_aligned_buffer_64(row); -#endif return 0; } -// Convert RGB565 to I420. LIBYUV_API int RGB565ToI420(const uint8* src_rgb565, int src_stride_rgb565, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { - if (!src_rgb565 || !dst_y || !dst_u || !dst_v || - width <= 0 || height == 0) { + uint8* dst_y, int dst_stride_y, + uint8* dst_u, int dst_stride_u, + uint8* dst_v, int dst_stride_v, + int width, int height) { + if (width * 4 > kMaxStride) { // Row buffer is required. + return -1; + } else if (!src_rgb565 || + !dst_y || !dst_u || !dst_v || + width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; src_rgb565 = src_rgb565 + (height - 1) * src_stride_rgb565; src_stride_rgb565 = -src_stride_rgb565; } + SIMD_ALIGNED(uint8 row[kMaxStride * 2]); + void (*RGB565ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix); -#if defined(HAS_RGB565TOYROW_NEON) - void (*RGB565ToUVRow)(const uint8* src_rgb565, int src_stride_rgb565, - uint8* dst_u, uint8* dst_v, int width) = RGB565ToUVRow_C; - void (*RGB565ToYRow)(const uint8* src_rgb565, uint8* dst_y, int pix) = - RGB565ToYRow_C; - if (TestCpuFlag(kCpuHasNEON) && width >= 8) { - RGB565ToYRow = RGB565ToYRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { - RGB565ToYRow = RGB565ToYRow_NEON; - } - if (width >= 16) { - RGB565ToUVRow = RGB565ToUVRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - RGB565ToUVRow = RGB565ToUVRow_NEON; - } - } - } -#else // HAS_RGB565TOYROW_NEON - - // Allocate 2 rows of ARGB. - const int kRowSize = (width * 4 + 15) & ~15; - align_buffer_64(row, kRowSize * 2); - - void (*RGB565ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) = - RGB565ToARGBRow_C; + RGB565ToARGBRow = RGB565ToARGBRow_C; #if defined(HAS_RGB565TOARGBROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && width >= 8) { - RGB565ToARGBRow = RGB565ToARGBRow_Any_SSE2; - if (IS_ALIGNED(width, 8)) { - RGB565ToARGBRow = RGB565ToARGBRow_SSE2; - } + if (TestCpuFlag(kCpuHasSSE2) && + TestReadSafe(src_rgb565, src_stride_rgb565, width, height, 2, 16)) { + RGB565ToARGBRow = RGB565ToARGBRow_SSE2; } #endif + + void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix); void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C; -#if defined(HAS_ARGBTOUVROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { - ARGBToUVRow = ARGBToUVRow_Any_SSSE3; + uint8* dst_u, uint8* dst_v, int width); + + ARGBToYRow = ARGBToYRow_C; + ARGBToUVRow = ARGBToUVRow_C; +#if defined(HAS_ARGBTOYROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + if (width > 16) { + ARGBToUVRow = ARGBToUVRow_Any_SSSE3; + } + ARGBToYRow = ARGBToYRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { ARGBToUVRow = ARGBToUVRow_SSSE3; - } - } -#endif - void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) = - ARGBToYRow_C; -#if defined(HAS_ARGBTOUVROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { - ARGBToYRow = ARGBToYRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { ARGBToYRow = ARGBToYRow_Unaligned_SSSE3; if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { ARGBToYRow = ARGBToYRow_SSSE3; } } } -#endif // HAS_ARGBTOUVROW_SSSE3 -#endif // HAS_RGB565TOYROW_NEON +#endif for (int y = 0; y < height - 1; y += 2) { -#if defined(HAS_RGB565TOYROW_NEON) - RGB565ToUVRow(src_rgb565, src_stride_rgb565, dst_u, dst_v, width); - RGB565ToYRow(src_rgb565, dst_y, width); - RGB565ToYRow(src_rgb565 + src_stride_rgb565, dst_y + dst_stride_y, width); -#else RGB565ToARGBRow(src_rgb565, row, width); - RGB565ToARGBRow(src_rgb565 + src_stride_rgb565, row + kRowSize, width); - ARGBToUVRow(row, kRowSize, dst_u, dst_v, width); + RGB565ToARGBRow(src_rgb565 + src_stride_rgb565, row + kMaxStride, width); + ARGBToUVRow(row, kMaxStride, dst_u, dst_v, width); ARGBToYRow(row, dst_y, width); - ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width); -#endif + ARGBToYRow(row + kMaxStride, dst_y + dst_stride_y, width); src_rgb565 += src_stride_rgb565 * 2; dst_y += dst_stride_y * 2; dst_u += dst_stride_u; dst_v += dst_stride_v; } if (height & 1) { -#if defined(HAS_RGB565TOYROW_NEON) - RGB565ToUVRow(src_rgb565, 0, dst_u, dst_v, width); - RGB565ToYRow(src_rgb565, dst_y, width); -#else - RGB565ToARGBRow(src_rgb565, row, width); + RGB565ToARGBRow_C(src_rgb565, row, width); ARGBToUVRow(row, 0, dst_u, dst_v, width); ARGBToYRow(row, dst_y, width); -#endif } -#if !defined(HAS_RGB565TOYROW_NEON) - free_aligned_buffer_64(row); -#endif return 0; } -// Convert ARGB1555 to I420. LIBYUV_API int ARGB1555ToI420(const uint8* src_argb1555, int src_stride_argb1555, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { - if (!src_argb1555 || !dst_y || !dst_u || !dst_v || - width <= 0 || height == 0) { + uint8* dst_y, int dst_stride_y, + uint8* dst_u, int dst_stride_u, + uint8* dst_v, int dst_stride_v, + int width, int height) { + if (width * 4 > kMaxStride) { // Row buffer is required. return -1; + } else if (!src_argb1555 || + !dst_y || !dst_u || !dst_v || + width <= 0 || height == 0) { + return -1; } // Negative height means invert the image. if (height < 0) { height = -height; src_argb1555 = src_argb1555 + (height - 1) * src_stride_argb1555; src_stride_argb1555 = -src_stride_argb1555; } + SIMD_ALIGNED(uint8 row[kMaxStride * 2]); + void (*ARGB1555ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix); -#if defined(HAS_ARGB1555TOYROW_NEON) - void (*ARGB1555ToUVRow)(const uint8* src_argb1555, int src_stride_argb1555, - uint8* dst_u, uint8* dst_v, int width) = ARGB1555ToUVRow_C; - void (*ARGB1555ToYRow)(const uint8* src_argb1555, uint8* dst_y, int pix) = - ARGB1555ToYRow_C; - if (TestCpuFlag(kCpuHasNEON) && width >= 8) { - ARGB1555ToYRow = ARGB1555ToYRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { - ARGB1555ToYRow = ARGB1555ToYRow_NEON; - } - if (width >= 16) { - ARGB1555ToUVRow = ARGB1555ToUVRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - ARGB1555ToUVRow = ARGB1555ToUVRow_NEON; - } - } - } -#else // HAS_ARGB1555TOYROW_NEON - - // Allocate 2 rows of ARGB. - const int kRowSize = (width * 4 + 15) & ~15; - align_buffer_64(row, kRowSize * 2); - - void (*ARGB1555ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) = - ARGB1555ToARGBRow_C; + ARGB1555ToARGBRow = ARGB1555ToARGBRow_C; #if defined(HAS_ARGB1555TOARGBROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && width >= 8) { - ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_SSE2; - if (IS_ALIGNED(width, 8)) { - ARGB1555ToARGBRow = ARGB1555ToARGBRow_SSE2; - } + if (TestCpuFlag(kCpuHasSSE2) && + TestReadSafe(src_argb1555, src_stride_argb1555, width, height, 2, 16)) { + ARGB1555ToARGBRow = ARGB1555ToARGBRow_SSE2; } #endif + + void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix); void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C; -#if defined(HAS_ARGBTOUVROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { - ARGBToUVRow = ARGBToUVRow_Any_SSSE3; + uint8* dst_u, uint8* dst_v, int width); + + ARGBToYRow = ARGBToYRow_C; + ARGBToUVRow = ARGBToUVRow_C; +#if defined(HAS_ARGBTOYROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + if (width > 16) { + ARGBToUVRow = ARGBToUVRow_Any_SSSE3; + } + ARGBToYRow = ARGBToYRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { ARGBToUVRow = ARGBToUVRow_SSSE3; - } - } -#endif - void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) = - ARGBToYRow_C; -#if defined(HAS_ARGBTOUVROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { - ARGBToYRow = ARGBToYRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { ARGBToYRow = ARGBToYRow_Unaligned_SSSE3; if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { ARGBToYRow = ARGBToYRow_SSSE3; } } } -#endif // HAS_ARGBTOUVROW_SSSE3 -#endif // HAS_ARGB1555TOYROW_NEON +#endif for (int y = 0; y < height - 1; y += 2) { -#if defined(HAS_ARGB1555TOYROW_NEON) - ARGB1555ToUVRow(src_argb1555, src_stride_argb1555, dst_u, dst_v, width); - ARGB1555ToYRow(src_argb1555, dst_y, width); - ARGB1555ToYRow(src_argb1555 + src_stride_argb1555, dst_y + dst_stride_y, - width); -#else ARGB1555ToARGBRow(src_argb1555, row, width); - ARGB1555ToARGBRow(src_argb1555 + src_stride_argb1555, row + kRowSize, - width); - ARGBToUVRow(row, kRowSize, dst_u, dst_v, width); + ARGB1555ToARGBRow(src_argb1555 + src_stride_argb1555, + row + kMaxStride, width); + ARGBToUVRow(row, kMaxStride, dst_u, dst_v, width); ARGBToYRow(row, dst_y, width); - ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width); -#endif + ARGBToYRow(row + kMaxStride, dst_y + dst_stride_y, width); src_argb1555 += src_stride_argb1555 * 2; dst_y += dst_stride_y * 2; dst_u += dst_stride_u; dst_v += dst_stride_v; } if (height & 1) { -#if defined(HAS_ARGB1555TOYROW_NEON) - ARGB1555ToUVRow(src_argb1555, 0, dst_u, dst_v, width); - ARGB1555ToYRow(src_argb1555, dst_y, width); -#else - ARGB1555ToARGBRow(src_argb1555, row, width); + ARGB1555ToARGBRow_C(src_argb1555, row, width); ARGBToUVRow(row, 0, dst_u, dst_v, width); ARGBToYRow(row, dst_y, width); -#endif } -#if !defined(HAS_ARGB1555TOYROW_NEON) - free_aligned_buffer_64(row); -#endif return 0; } -// Convert ARGB4444 to I420. LIBYUV_API int ARGB4444ToI420(const uint8* src_argb4444, int src_stride_argb4444, uint8* dst_y, int dst_stride_y, uint8* dst_u, int dst_stride_u, uint8* dst_v, int dst_stride_v, int width, int height) { - if (!src_argb4444 || !dst_y || !dst_u || !dst_v || - width <= 0 || height == 0) { + if (width * 4 > kMaxStride) { // Row buffer is required. return -1; + } else if (!src_argb4444 || + !dst_y || !dst_u || !dst_v || + width <= 0 || height == 0) { + return -1; } // Negative height means invert the image. if (height < 0) { height = -height; src_argb4444 = src_argb4444 + (height - 1) * src_stride_argb4444; src_stride_argb4444 = -src_stride_argb4444; } + SIMD_ALIGNED(uint8 row[kMaxStride * 2]); + void (*ARGB4444ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix); -#if defined(HAS_ARGB4444TOYROW_NEON) - void (*ARGB4444ToUVRow)(const uint8* src_argb4444, int src_stride_argb4444, - uint8* dst_u, uint8* dst_v, int width) = ARGB4444ToUVRow_C; - void (*ARGB4444ToYRow)(const uint8* src_argb4444, uint8* dst_y, int pix) = - ARGB4444ToYRow_C; - if (TestCpuFlag(kCpuHasNEON) && width >= 8) { - ARGB4444ToYRow = ARGB4444ToYRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { - ARGB4444ToYRow = ARGB4444ToYRow_NEON; - } - if (width >= 16) { - ARGB4444ToUVRow = ARGB4444ToUVRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - ARGB4444ToUVRow = ARGB4444ToUVRow_NEON; - } - } - } -#else // HAS_ARGB4444TOYROW_NEON - - // Allocate 2 rows of ARGB. - const int kRowSize = (width * 4 + 15) & ~15; - align_buffer_64(row, kRowSize * 2); - - void (*ARGB4444ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) = - ARGB4444ToARGBRow_C; + ARGB4444ToARGBRow = ARGB4444ToARGBRow_C; #if defined(HAS_ARGB4444TOARGBROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && width >= 8) { - ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_SSE2; - if (IS_ALIGNED(width, 8)) { - ARGB4444ToARGBRow = ARGB4444ToARGBRow_SSE2; - } + if (TestCpuFlag(kCpuHasSSE2) && + TestReadSafe(src_argb4444, src_stride_argb4444, width, height, 2, 16)) { + ARGB4444ToARGBRow = ARGB4444ToARGBRow_SSE2; } #endif + + void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix); void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C; -#if defined(HAS_ARGBTOUVROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { - ARGBToUVRow = ARGBToUVRow_Any_SSSE3; + uint8* dst_u, uint8* dst_v, int width); + + ARGBToYRow = ARGBToYRow_C; + ARGBToUVRow = ARGBToUVRow_C; +#if defined(HAS_ARGBTOYROW_SSSE3) + if (TestCpuFlag(kCpuHasSSSE3)) { + if (width > 16) { + ARGBToUVRow = ARGBToUVRow_Any_SSSE3; + } + ARGBToYRow = ARGBToYRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { ARGBToUVRow = ARGBToUVRow_SSSE3; - } - } -#endif - void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) = - ARGBToYRow_C; -#if defined(HAS_ARGBTOUVROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { - ARGBToYRow = ARGBToYRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { ARGBToYRow = ARGBToYRow_Unaligned_SSSE3; if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { ARGBToYRow = ARGBToYRow_SSSE3; } } } -#endif // HAS_ARGBTOUVROW_SSSE3 -#endif // HAS_ARGB4444TOYROW_NEON +#endif for (int y = 0; y < height - 1; y += 2) { -#if defined(HAS_ARGB4444TOYROW_NEON) - ARGB4444ToUVRow(src_argb4444, src_stride_argb4444, dst_u, dst_v, width); - ARGB4444ToYRow(src_argb4444, dst_y, width); - ARGB4444ToYRow(src_argb4444 + src_stride_argb4444, dst_y + dst_stride_y, - width); -#else ARGB4444ToARGBRow(src_argb4444, row, width); - ARGB4444ToARGBRow(src_argb4444 + src_stride_argb4444, row + kRowSize, - width); - ARGBToUVRow(row, kRowSize, dst_u, dst_v, width); + ARGB4444ToARGBRow(src_argb4444 + src_stride_argb4444, + row + kMaxStride, width); + ARGBToUVRow(row, kMaxStride, dst_u, dst_v, width); ARGBToYRow(row, dst_y, width); - ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width); -#endif + ARGBToYRow(row + kMaxStride, dst_y + dst_stride_y, width); src_argb4444 += src_stride_argb4444 * 2; dst_y += dst_stride_y * 2; dst_u += dst_stride_u; dst_v += dst_stride_v; } if (height & 1) { -#if defined(HAS_ARGB4444TOYROW_NEON) - ARGB4444ToUVRow(src_argb4444, 0, dst_u, dst_v, width); - ARGB4444ToYRow(src_argb4444, dst_y, width); -#else - ARGB4444ToARGBRow(src_argb4444, row, width); + ARGB4444ToARGBRow_C(src_argb4444, row, width); ARGBToUVRow(row, 0, dst_u, dst_v, width); ARGBToYRow(row, dst_y, width); + } + return 0; +} + +#ifdef HAVE_JPEG +struct I420Buffers { + uint8* y; + int y_stride; + uint8* u; + int u_stride; + uint8* v; + int v_stride; + int w; + int h; +}; + +static void JpegCopyI420(void* opaque, + const uint8* const* data, + const int* strides, + int rows) { + I420Buffers* dest = static_cast<I420Buffers*>(opaque); + I420Copy(data[0], strides[0], + data[1], strides[1], + data[2], strides[2], + dest->y, dest->y_stride, + dest->u, dest->u_stride, + dest->v, dest->v_stride, + dest->w, rows); + dest->y += rows * dest->y_stride; + dest->u += ((rows + 1) >> 1) * dest->u_stride; + dest->v += ((rows + 1) >> 1) * dest->v_stride; + dest->h -= rows; +} + +static void JpegI422ToI420(void* opaque, + const uint8* const* data, + const int* strides, + int rows) { + I420Buffers* dest = static_cast<I420Buffers*>(opaque); + I422ToI420(data[0], strides[0], + data[1], strides[1], + data[2], strides[2], + dest->y, dest->y_stride, + dest->u, dest->u_stride, + dest->v, dest->v_stride, + dest->w, rows); + dest->y += rows * dest->y_stride; + dest->u += ((rows + 1) >> 1) * dest->u_stride; + dest->v += ((rows + 1) >> 1) * dest->v_stride; + dest->h -= rows; +} + +static void JpegI444ToI420(void* opaque, + const uint8* const* data, + const int* strides, + int rows) { + I420Buffers* dest = static_cast<I420Buffers*>(opaque); + I444ToI420(data[0], strides[0], + data[1], strides[1], + data[2], strides[2], + dest->y, dest->y_stride, + dest->u, dest->u_stride, + dest->v, dest->v_stride, + dest->w, rows); + dest->y += rows * dest->y_stride; + dest->u += ((rows + 1) >> 1) * dest->u_stride; + dest->v += ((rows + 1) >> 1) * dest->v_stride; + dest->h -= rows; +} + +static void JpegI411ToI420(void* opaque, + const uint8* const* data, + const int* strides, + int rows) { + I420Buffers* dest = static_cast<I420Buffers*>(opaque); + I411ToI420(data[0], strides[0], + data[1], strides[1], + data[2], strides[2], + dest->y, dest->y_stride, + dest->u, dest->u_stride, + dest->v, dest->v_stride, + dest->w, rows); + dest->y += rows * dest->y_stride; + dest->u += ((rows + 1) >> 1) * dest->u_stride; + dest->v += ((rows + 1) >> 1) * dest->v_stride; + dest->h -= rows; +} + +static void JpegI400ToI420(void* opaque, + const uint8* const* data, + const int* strides, + int rows) { + I420Buffers* dest = static_cast<I420Buffers*>(opaque); + I400ToI420(data[0], strides[0], + dest->y, dest->y_stride, + dest->u, dest->u_stride, + dest->v, dest->v_stride, + dest->w, rows); + dest->y += rows * dest->y_stride; + dest->u += ((rows + 1) >> 1) * dest->u_stride; + dest->v += ((rows + 1) >> 1) * dest->v_stride; + dest->h -= rows; +} + +// MJPG (Motion JPeg) to I420 +// TODO(fbarchard): review w and h requirement. dw and dh may be enough. +LIBYUV_API +int MJPGToI420(const uint8* sample, + size_t sample_size, + uint8* y, int y_stride, + uint8* u, int u_stride, + uint8* v, int v_stride, + int w, int h, + int dw, int dh) { + if (sample_size == kUnknownDataSize) { + // ERROR: MJPEG frame size unknown + return -1; + } + + // TODO(fbarchard): Port to C + MJpegDecoder mjpeg_decoder; + bool ret = mjpeg_decoder.LoadFrame(sample, sample_size); + if (ret && (mjpeg_decoder.GetWidth() != w || + mjpeg_decoder.GetHeight() != h)) { + // ERROR: MJPEG frame has unexpected dimensions + mjpeg_decoder.UnloadFrame(); + return 1; // runtime failure + } + if (ret) { + I420Buffers bufs = { y, y_stride, u, u_stride, v, v_stride, dw, dh }; + // YUV420 + if (mjpeg_decoder.GetColorSpace() == + MJpegDecoder::kColorSpaceYCbCr && + mjpeg_decoder.GetNumComponents() == 3 && + mjpeg_decoder.GetVertSampFactor(0) == 2 && + mjpeg_decoder.GetHorizSampFactor(0) == 2 && + mjpeg_decoder.GetVertSampFactor(1) == 1 && + mjpeg_decoder.GetHorizSampFactor(1) == 1 && + mjpeg_decoder.GetVertSampFactor(2) == 1 && + mjpeg_decoder.GetHorizSampFactor(2) == 1) { + ret = mjpeg_decoder.DecodeToCallback(&JpegCopyI420, &bufs, dw, dh); + // YUV422 + } else if (mjpeg_decoder.GetColorSpace() == + MJpegDecoder::kColorSpaceYCbCr && + mjpeg_decoder.GetNumComponents() == 3 && + mjpeg_decoder.GetVertSampFactor(0) == 1 && + mjpeg_decoder.GetHorizSampFactor(0) == 2 && + mjpeg_decoder.GetVertSampFactor(1) == 1 && + mjpeg_decoder.GetHorizSampFactor(1) == 1 && + mjpeg_decoder.GetVertSampFactor(2) == 1 && + mjpeg_decoder.GetHorizSampFactor(2) == 1) { + ret = mjpeg_decoder.DecodeToCallback(&JpegI422ToI420, &bufs, dw, dh); + // YUV444 + } else if (mjpeg_decoder.GetColorSpace() == + MJpegDecoder::kColorSpaceYCbCr && + mjpeg_decoder.GetNumComponents() == 3 && + mjpeg_decoder.GetVertSampFactor(0) == 1 && + mjpeg_decoder.GetHorizSampFactor(0) == 1 && + mjpeg_decoder.GetVertSampFactor(1) == 1 && + mjpeg_decoder.GetHorizSampFactor(1) == 1 && + mjpeg_decoder.GetVertSampFactor(2) == 1 && + mjpeg_decoder.GetHorizSampFactor(2) == 1) { + ret = mjpeg_decoder.DecodeToCallback(&JpegI444ToI420, &bufs, dw, dh); + // YUV411 + } else if (mjpeg_decoder.GetColorSpace() == + MJpegDecoder::kColorSpaceYCbCr && + mjpeg_decoder.GetNumComponents() == 3 && + mjpeg_decoder.GetVertSampFactor(0) == 1 && + mjpeg_decoder.GetHorizSampFactor(0) == 4 && + mjpeg_decoder.GetVertSampFactor(1) == 1 && + mjpeg_decoder.GetHorizSampFactor(1) == 1 && + mjpeg_decoder.GetVertSampFactor(2) == 1 && + mjpeg_decoder.GetHorizSampFactor(2) == 1) { + ret = mjpeg_decoder.DecodeToCallback(&JpegI411ToI420, &bufs, dw, dh); + // YUV400 + } else if (mjpeg_decoder.GetColorSpace() == + MJpegDecoder::kColorSpaceGrayscale && + mjpeg_decoder.GetNumComponents() == 1 && + mjpeg_decoder.GetVertSampFactor(0) == 1 && + mjpeg_decoder.GetHorizSampFactor(0) == 1) { + ret = mjpeg_decoder.DecodeToCallback(&JpegI400ToI420, &bufs, dw, dh); + } else { + // TODO(fbarchard): Implement conversion for any other colorspace/sample + // factors that occur in practice. 411 is supported by libjpeg + // ERROR: Unable to convert MJPEG frame because format is not supported + mjpeg_decoder.UnloadFrame(); + return 1; + } + } + return 0; +} #endif + +// Convert camera sample to I420 with cropping, rotation and vertical flip. +// src_width is used for source stride computation +// src_height is used to compute location of planes, and indicate inversion +// sample_size is measured in bytes and is the size of the frame. +// With MJPEG it is the compressed size of the frame. +LIBYUV_API +int ConvertToI420(const uint8* sample, +#ifdef HAVE_JPEG + size_t sample_size, +#else + size_t /* sample_size */, +#endif + uint8* y, int y_stride, + uint8* u, int u_stride, + uint8* v, int v_stride, + int crop_x, int crop_y, + int src_width, int src_height, + int dst_width, int dst_height, + RotationMode rotation, + uint32 format) { + if (!y || !u || !v || !sample || + src_width <= 0 || dst_width <= 0 || + src_height == 0 || dst_height == 0) { + return -1; + } + int aligned_src_width = (src_width + 1) & ~1; + const uint8* src; + const uint8* src_uv; + int abs_src_height = (src_height < 0) ? -src_height : src_height; + int inv_dst_height = (dst_height < 0) ? -dst_height : dst_height; + if (src_height < 0) { + inv_dst_height = -inv_dst_height; + } + int r = 0; + + // One pass rotation is available for some formats. For the rest, convert + // to I420 (with optional vertical flipping) into a temporary I420 buffer, + // and then rotate the I420 to the final destination buffer. + // For in-place conversion, if destination y is same as source sample, + // also enable temporary buffer. + bool need_buf = (rotation && format != FOURCC_NV12 && + format != FOURCC_NV21 && format != FOURCC_I420 && + format != FOURCC_YV12) || y == sample; + uint8* tmp_y = y; + uint8* tmp_u = u; + uint8* tmp_v = v; + int tmp_y_stride = y_stride; + int tmp_u_stride = u_stride; + int tmp_v_stride = v_stride; + uint8* buf = NULL; + int abs_dst_height = (dst_height < 0) ? -dst_height : dst_height; + if (need_buf) { + int y_size = dst_width * abs_dst_height; + int uv_size = ((dst_width + 1) / 2) * ((abs_dst_height + 1) / 2); + buf = new uint8[y_size + uv_size * 2]; + if (!buf) { + return 1; // Out of memory runtime error. + } + y = buf; + u = y + y_size; + v = u + uv_size; + y_stride = dst_width; + u_stride = v_stride = ((dst_width + 1) / 2); } -#if !defined(HAS_ARGB4444TOYROW_NEON) - free_aligned_buffer_64(row); + + switch (format) { + // Single plane formats + case FOURCC_YUY2: + src = sample + (aligned_src_width * crop_y + crop_x) * 2; + r = YUY2ToI420(src, aligned_src_width * 2, + y, y_stride, + u, u_stride, + v, v_stride, + dst_width, inv_dst_height); + break; + case FOURCC_UYVY: + src = sample + (aligned_src_width * crop_y + crop_x) * 2; + r = UYVYToI420(src, aligned_src_width * 2, + y, y_stride, + u, u_stride, + v, v_stride, + dst_width, inv_dst_height); + break; + case FOURCC_V210: + // stride is multiple of 48 pixels (128 bytes). + // pixels come in groups of 6 = 16 bytes + src = sample + (aligned_src_width + 47) / 48