author | Ralph Giles <giles@mozilla.com> |
Thu, 28 Nov 2013 14:20:00 -0800 | |
changeset 158304 | 07e357311cf304e2b55d78f4af0fa03a05a3e637 |
parent 158303 | cb5d23080c7012b83282cf01f8dd3ed8ed187bf6 |
child 158305 | 27c14a1b1d4ca47172337cbd4962834ff3227486 |
push id | 36962 |
push user | rgiles@mozilla.com |
push date | Mon, 02 Dec 2013 17:52:30 +0000 |
treeherder | mozilla-inbound@07e357311cf3 [default view] [failures only] |
perfherder | [talos] [build metrics] [platform microbench] (compared to previous push) |
reviewers | cpearce |
bugs | 944538 |
milestone | 28.0a1 |
first release with | nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
|
last release without | nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
|
--- a/media/libopus/README_MOZILLA +++ b/media/libopus/README_MOZILLA @@ -3,9 +3,9 @@ IETF Opus audio codec reference implemen The source in this directory was copied from an opus repository checkout by running the ./update.sh script. Any changes made to this version of the source should be reflected in that script, e.g. by applying patch files after the copy step. The upstream repository is https://git.xiph.org/opus.git -The git tag/revision used was v1.1-beta-23-gf2446c2. +The git tag/revision used was v1.1-rc2-1-g35a44c6.
--- a/media/libopus/celt/_kiss_fft_guts.h +++ b/media/libopus/celt/_kiss_fft_guts.h @@ -89,21 +89,21 @@ #define C_ADDTO( res , a)\ do {(res).r = ADD32((res).r, (a).r); (res).i = ADD32((res).i,(a).i);\ }while(0) #define C_SUBFROM( res , a)\ do {(res).r = ADD32((res).r,(a).r); (res).i = SUB32((res).i,(a).i); \ }while(0) -#if defined(ARMv4_ASM) +#if defined(OPUS_ARM_INLINE_ASM) #include "arm/kiss_fft_armv4.h" #endif -#if defined(ARMv5E_ASM) +#if defined(OPUS_ARM_INLINE_EDSP) #include "arm/kiss_fft_armv5e.h" #endif #else /* not FIXED_POINT*/ # define S_MUL(a,b) ( (a)*(b) ) #define C_MUL(m,a,b) \ do{ (m).r = (a).r*(b).r - (a).i*(b).i;\
--- a/media/libopus/celt/arch.h +++ b/media/libopus/celt/arch.h @@ -30,16 +30,17 @@ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef ARCH_H #define ARCH_H #include "opus_types.h" +#include "opus_defines.h" # if !defined(__GNUC_PREREQ) # if defined(__GNUC__)&&defined(__GNUC_MINOR__) # define __GNUC_PREREQ(_maj,_min) \ ((__GNUC__<<16)+__GNUC_MINOR__>=((_maj)<<16)+(_min)) # else # define __GNUC_PREREQ(_maj,_min) 0 # endif @@ -49,17 +50,17 @@ #define celt_fatal(str) _celt_fatal(str, __FILE__, __LINE__); #ifdef ENABLE_ASSERTIONS #include <stdio.h> #include <stdlib.h> #ifdef __GNUC__ __attribute__((noreturn)) #endif -static inline void _celt_fatal(const char *str, const char *file, int line) +static OPUS_INLINE void _celt_fatal(const char *str, const char *file, int line) { fprintf (stderr, "Fatal (internal) error in %s, line %d: %s\n", file, line, str); abort(); } #define celt_assert(cond) {if (!(cond)) {celt_fatal("assertion failed: " #cond);}} #define celt_assert2(cond, message) {if (!(cond)) {celt_fatal("assertion failed: " #cond "\n" message);}} #else #define celt_assert(cond) @@ -108,19 +109,19 @@ typedef opus_val32 celt_ener; #define SCALEOUT(a) (a) #ifdef FIXED_DEBUG #include "fixed_debug.h" #else #include "fixed_generic.h" -#ifdef ARMv5E_ASM +#ifdef OPUS_ARM_INLINE_EDSP #include "arm/fixed_armv5e.h" -#elif defined (ARMv4_ASM) +#elif defined (OPUS_ARM_INLINE_ASM) #include "arm/fixed_armv4.h" #elif defined (BFIN_ASM) #include "fixed_bfin.h" #elif defined (TI_C5X_ASM) #include "fixed_c5x.h" #elif defined (TI_C6X_ASM) #include "fixed_c6x.h" #endif @@ -180,16 +181,17 @@ typedef float celt_ener; #define MULT16_32_Q15(a,b) ((a)*(b)) #define MULT16_32_Q16(a,b) ((a)*(b)) #define MULT32_32_Q31(a,b) ((a)*(b)) #define MAC16_32_Q15(c,a,b) ((c)+(a)*(b)) #define MULT16_16_Q11_32(a,b) ((a)*(b)) +#define MULT16_16_Q11(a,b) ((a)*(b)) #define MULT16_16_Q13(a,b) ((a)*(b)) #define MULT16_16_Q14(a,b) ((a)*(b)) #define MULT16_16_Q15(a,b) ((a)*(b)) #define MULT16_16_P15(a,b) ((a)*(b)) #define MULT16_16_P13(a,b) ((a)*(b)) #define MULT16_16_P14(a,b) ((a)*(b)) #define MULT16_32_P16(a,b) ((a)*(b))
new file mode 100644 --- /dev/null +++ b/media/libopus/celt/arm/arm_celt_map.c @@ -0,0 +1,49 @@ +/* Copyright (c) 2010 Xiph.Org Foundation + * Copyright (c) 2013 Parrot */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "pitch.h" + +#if defined(OPUS_HAVE_RTCD) + +# if defined(FIXED_POINT) +opus_val32 (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *, + const opus_val16 *, opus_val32 *, int , int) = { + celt_pitch_xcorr_c, /* ARMv4 */ + MAY_HAVE_EDSP(celt_pitch_xcorr), /* EDSP */ + MAY_HAVE_MEDIA(celt_pitch_xcorr), /* Media */ + MAY_HAVE_NEON(celt_pitch_xcorr) /* NEON */ +}; +# else +# error "Floating-point implementation is not supported by ARM asm yet." \ + "Reconfigure with --disable-rtcd or send patches." +# endif + +#endif
--- a/media/libopus/celt/arm/armcpu.c +++ b/media/libopus/celt/arm/armcpu.c @@ -44,41 +44,41 @@ #define OPUS_CPU_ARM_NEON (1<<3) #if defined(_MSC_VER) /*For GetExceptionCode() and EXCEPTION_ILLEGAL_INSTRUCTION.*/ # define WIN32_LEAN_AND_MEAN # define WIN32_EXTRA_LEAN # include <windows.h> -static inline opus_uint32 opus_cpu_capabilities(void){ +static OPUS_INLINE opus_uint32 opus_cpu_capabilities(void){ opus_uint32 flags; flags=0; - /* MSVC has no inline __asm support for ARM, but it does let you __emit + /* MSVC has no OPUS_INLINE __asm support for ARM, but it does let you __emit * instructions via their assembled hex code. * All of these instructions should be essentially nops. */ -# if defined(ARMv5E_ASM) +# if defined(OPUS_ARM_MAY_HAVE_EDSP) __try{ /*PLD [r13]*/ __emit(0xF5DDF000); flags|=OPUS_CPU_ARM_EDSP; } __except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){ /*Ignore exception.*/ } -# if defined(ARMv6E_ASM) +# if defined(OPUS_ARM_MAY_HAVE_MEDIA) __try{ /*SHADD8 r3,r3,r3*/ __emit(0xE6333F93); flags|=OPUS_CPU_ARM_MEDIA; } __except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){ /*Ignore exception.*/ } -# if defined(ARM_HAVE_NEON) +# if defined(OPUS_ARM_MAY_HAVE_NEON) __try{ /*VORR q0,q0,q0*/ __emit(0xF2200150); flags|=OPUS_CPU_ARM_NEON; } __except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){ /*Ignore exception.*/ } @@ -102,38 +102,46 @@ opus_uint32 opus_cpu_capabilities(void) if(cpuinfo != NULL) { /* 512 should be enough for anybody (it's even enough for all the flags that * x86 has accumulated... so far). */ char buf[512]; while(fgets(buf, 512, cpuinfo) != NULL) { +# if defined(OPUS_ARM_MAY_HAVE_EDSP) || defined(OPUS_ARM_MAY_HAVE_NEON) /* Search for edsp and neon flag */ if(memcmp(buf, "Features", 8) == 0) { char *p; +# if defined(OPUS_ARM_MAY_HAVE_EDSP) p = strstr(buf, " edsp"); if(p != NULL && (p[5] == ' ' || p[5] == '\n')) flags |= OPUS_CPU_ARM_EDSP; +# endif +# if defined(OPUS_ARM_MAY_HAVE_NEON) p = strstr(buf, " neon"); if(p != NULL && (p[5] == ' ' || p[5] == '\n')) flags |= OPUS_CPU_ARM_NEON; +# endif } +# endif +# if defined(OPUS_ARM_MAY_HAVE_MEDIA) /* Search for media capabilities (>= ARMv6) */ if(memcmp(buf, "CPU architecture:", 17) == 0) { int version; version = atoi(buf+17); if(version >= 6) flags |= OPUS_CPU_ARM_MEDIA; } +# endif } fclose(cpuinfo); } return flags; } #else /* The feature registers which can tell us what the processor supports are
--- a/media/libopus/celt/arm/armcpu.h +++ b/media/libopus/celt/arm/armcpu.h @@ -20,16 +20,52 @@ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -/* Original code from libtheora modified to suit to Opus */ +#if !defined(ARMCPU_H) +# define ARMCPU_H + +# if defined(OPUS_ARM_MAY_HAVE_EDSP) +# define MAY_HAVE_EDSP(name) name ## _edsp +# else +# define MAY_HAVE_EDSP(name) name ## _c +# endif + +# if defined(OPUS_ARM_MAY_HAVE_MEDIA) +# define MAY_HAVE_MEDIA(name) name ## _media +# else +# define MAY_HAVE_MEDIA(name) MAY_HAVE_EDSP(name) +# endif + +# if defined(OPUS_ARM_MAY_HAVE_NEON) +# define MAY_HAVE_NEON(name) name ## _neon +# else +# define MAY_HAVE_NEON(name) MAY_HAVE_MEDIA(name) +# endif -#ifndef ARMCPU_H -#define ARMCPU_H +# if defined(OPUS_ARM_PRESUME_EDSP) +# define PRESUME_EDSP(name) name ## _edsp +# else +# define PRESUME_EDSP(name) name ## _c +# endif +# if defined(OPUS_ARM_PRESUME_MEDIA) +# define PRESUME_MEDIA(name) name ## _media +# else +# define PRESUME_MEDIA(name) PRESUME_EDSP(name) +# endif + +# if defined(OPUS_ARM_PRESUME_NEON) +# define PRESUME_NEON(name) name ## _neon +# else +# define PRESUME_NEON(name) PRESUME_MEDIA(name) +# endif + +# if defined(OPUS_HAVE_RTCD) int opus_select_arch(void); +# endif #endif
new file mode 100644 --- /dev/null +++ b/media/libopus/celt/arm/armopts.s.in @@ -0,0 +1,37 @@ +/* Copyright (C) 2013 Mozilla Corporation */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +; Set the following to 1 if we have EDSP instructions +; (LDRD/STRD, etc., ARMv5E and later). +OPUS_ARM_MAY_HAVE_EDSP * @OPUS_ARM_MAY_HAVE_EDSP@ + +; Set the following to 1 if we have ARMv6 media instructions. +OPUS_ARM_MAY_HAVE_MEDIA * @OPUS_ARM_MAY_HAVE_MEDIA@ + +; Set the following to 1 if we have NEON (some ARMv7) +OPUS_ARM_MAY_HAVE_NEON * @OPUS_ARM_MAY_HAVE_NEON@ + +END
new file mode 100644 --- /dev/null +++ b/media/libopus/celt/arm/celt_pitch_xcorr_arm.s @@ -0,0 +1,545 @@ +; Copyright (c) 2007-2008 CSIRO +; Copyright (c) 2007-2009 Xiph.Org Foundation +; Copyright (c) 2013 Parrot +; Written by Aurélien Zanelli +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; +; - Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; +; - Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in the +; documentation and/or other materials provided with the distribution. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER +; OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + AREA |.text|, CODE, READONLY + + GET celt/arm/armopts.s + +IF OPUS_ARM_MAY_HAVE_EDSP + EXPORT celt_pitch_xcorr_edsp +ENDIF + +IF OPUS_ARM_MAY_HAVE_NEON + EXPORT celt_pitch_xcorr_neon +ENDIF + +IF OPUS_ARM_MAY_HAVE_NEON + +; Compute sum[k]=sum(x[j]*y[j+k],j=0...len-1), k=0...3 +xcorr_kernel_neon PROC + ; input: + ; r3 = int len + ; r4 = opus_val16 *x + ; r5 = opus_val16 *y + ; q0 = opus_val32 sum[4] + ; output: + ; q0 = opus_val32 sum[4] + ; preserved: r0-r3, r6-r11, d2, q4-q7, q9-q15 + ; internal usage: + ; r12 = int j + ; d3 = y_3|y_2|y_1|y_0 + ; q2 = y_B|y_A|y_9|y_8|y_7|y_6|y_5|y_4 + ; q3 = x_7|x_6|x_5|x_4|x_3|x_2|x_1|x_0 + ; q8 = scratch + ; + ; Load y[0...3] + ; This requires len>0 to always be valid (which we assert in the C code). + VLD1.16 {d5}, [r5]! + SUBS r12, r3, #8 + BLE xcorr_kernel_neon_process4 +; Process 8 samples at a time. +; This loop loads one y value more than we actually need. Therefore we have to +; stop as soon as there are 8 or fewer samples left (instead of 7), to avoid +; reading past the end of the array. +xcorr_kernel_neon_process8 + ; This loop has 19 total instructions (10 cycles to issue, minimum), with + ; - 2 cycles of ARM insrtuctions, + ; - 10 cycles of load/store/byte permute instructions, and + ; - 9 cycles of data processing instructions. + ; On a Cortex A8, we dual-issue the maximum amount (9 cycles) between the + ; latter two categories, meaning the whole loop should run in 10 cycles per + ; iteration, barring cache misses. + ; + ; Load x[0...7] + VLD1.16 {d6, d7}, [r4]! + ; Unlike VMOV, VAND is a data processsing instruction (and doesn't get + ; assembled to VMOV, like VORR would), so it dual-issues with the prior VLD1. + VAND d3, d5, d5 + SUBS r12, r12, #8 + ; Load y[4...11] + VLD1.16 {d4, d5}, [r5]! + VMLAL.S16 q0, d3, d6[0] + VEXT.16 d16, d3, d4, #1 + VMLAL.S16 q0, d4, d7[0] + VEXT.16 d17, d4, d5, #1 + VMLAL.S16 q0, d16, d6[1] + VEXT.16 d16, d3, d4, #2 + VMLAL.S16 q0, d17, d7[1] + VEXT.16 d17, d4, d5, #2 + VMLAL.S16 q0, d16, d6[2] + VEXT.16 d16, d3, d4, #3 + VMLAL.S16 q0, d17, d7[2] + VEXT.16 d17, d4, d5, #3 + VMLAL.S16 q0, d16, d6[3] + VMLAL.S16 q0, d17, d7[3] + BGT xcorr_kernel_neon_process8 +; Process 4 samples here if we have > 4 left (still reading one extra y value). +xcorr_kernel_neon_process4 + ADDS r12, r12, #4 + BLE xcorr_kernel_neon_process2 + ; Load x[0...3] + VLD1.16 d6, [r4]! + ; Use VAND since it's a data processing instruction again. + VAND d4, d5, d5 + SUB r12, r12, #4 + ; Load y[4...7] + VLD1.16 d5, [r5]! + VMLAL.S16 q0, d4, d6[0] + VEXT.16 d16, d4, d5, #1 + VMLAL.S16 q0, d16, d6[1] + VEXT.16 d16, d4, d5, #2 + VMLAL.S16 q0, d16, d6[2] + VEXT.16 d16, d4, d5, #3 + VMLAL.S16 q0, d16, d6[3] +; Process 2 samples here if we have > 2 left (still reading one extra y value). +xcorr_kernel_neon_process2 + ADDS r12, r12, #2 + BLE xcorr_kernel_neon_process1 + ; Load x[0...1] + VLD2.16 {d6[],d7[]}, [r4]! + ; Use VAND since it's a data processing instruction again. + VAND d4, d5, d5 + SUB r12, r12, #2 + ; Load y[4...5] + VLD1.32 {d5[]}, [r5]! + VMLAL.S16 q0, d4, d6 + VEXT.16 d16, d4, d5, #1 + ; Replace bottom copy of {y5,y4} in d5 with {y3,y2} from d4, using VSRI + ; instead of VEXT, since it's a data-processing instruction. + VSRI.64 d5, d4, #32 + VMLAL.S16 q0, d16, d7 +; Process 1 sample using the extra y value we loaded above. +xcorr_kernel_neon_process1 + ; Load next *x + VLD1.16 {d6[]}, [r4]! + ADDS r12, r12, #1 + ; y[0...3] are left in d5 from prior iteration(s) (if any) + VMLAL.S16 q0, d5, d6 + MOVLE pc, lr +; Now process 1 last sample, not reading ahead. + ; Load last *y + VLD1.16 {d4[]}, [r5]! + VSRI.64 d4, d5, #16 + ; Load last *x + VLD1.16 {d6[]}, [r4]! + VMLAL.S16 q0, d4, d6 + MOV pc, lr + ENDP + +; opus_val32 celt_pitch_xcorr_neon(opus_val16 *_x, opus_val16 *_y, +; opus_val32 *xcorr, int len, int max_pitch) +celt_pitch_xcorr_neon PROC + ; input: + ; r0 = opus_val16 *_x + ; r1 = opus_val16 *_y + ; r2 = opus_val32 *xcorr + ; r3 = int len + ; output: + ; r0 = int maxcorr + ; internal usage: + ; r4 = opus_val16 *x (for xcorr_kernel_neon()) + ; r5 = opus_val16 *y (for xcorr_kernel_neon()) + ; r6 = int max_pitch + ; r12 = int j + ; q15 = int maxcorr[4] (q15 is not used by xcorr_kernel_neon()) + STMFD sp!, {r4-r6, lr} + LDR r6, [sp, #16] + VMOV.S32 q15, #1 + ; if (max_pitch < 4) goto celt_pitch_xcorr_neon_process4_done + SUBS r6, r6, #4 + BLT celt_pitch_xcorr_neon_process4_done +celt_pitch_xcorr_neon_process4 + ; xcorr_kernel_neon parameters: + ; r3 = len, r4 = _x, r5 = _y, q0 = {0, 0, 0, 0} + MOV r4, r0 + MOV r5, r1 + VEOR q0, q0, q0 + ; xcorr_kernel_neon only modifies r4, r5, r12, and q0...q3. + ; So we don't save/restore any other registers. + BL xcorr_kernel_neon + SUBS r6, r6, #4 + VST1.32 {q0}, [r2]! + ; _y += 4 + ADD r1, r1, #8 + VMAX.S32 q15, q15, q0 + ; if (max_pitch < 4) goto celt_pitch_xcorr_neon_process4_done + BGE celt_pitch_xcorr_neon_process4 +; We have less than 4 sums left to compute. +celt_pitch_xcorr_neon_process4_done + ADDS r6, r6, #4 + ; Reduce maxcorr to a single value + VMAX.S32 d30, d30, d31 + VPMAX.S32 d30, d30, d30 + ; if (max_pitch <= 0) goto celt_pitch_xcorr_neon_done + BLE celt_pitch_xcorr_neon_done +; Now compute each remaining sum one at a time. +celt_pitch_xcorr_neon_process_remaining + MOV r4, r0 + MOV r5, r1 + VMOV.I32 q0, #0 + SUBS r12, r3, #8 + BLT celt_pitch_xcorr_neon_process_remaining4 +; Sum terms 8 at a time. +celt_pitch_xcorr_neon_process_remaining_loop8 + ; Load x[0...7] + VLD1.16 {q1}, [r4]! + ; Load y[0...7] + VLD1.16 {q2}, [r5]! + SUBS r12, r12, #8 + VMLAL.S16 q0, d4, d2 + VMLAL.S16 q0, d5, d3 + BGE celt_pitch_xcorr_neon_process_remaining_loop8 +; Sum terms 4 at a time. +celt_pitch_xcorr_neon_process_remaining4 + ADDS r12, r12, #4 + BLT celt_pitch_xcorr_neon_process_remaining4_done + ; Load x[0...3] + VLD1.16 {d2}, [r4]! + ; Load y[0...3] + VLD1.16 {d3}, [r5]! + SUB r12, r12, #4 + VMLAL.S16 q0, d3, d2 +celt_pitch_xcorr_neon_process_remaining4_done + ; Reduce the sum to a single value. + VADD.S32 d0, d0, d1 + VPADDL.S32 d0, d0 + ADDS r12, r12, #4 + BLE celt_pitch_xcorr_neon_process_remaining_loop_done +; Sum terms 1 at a time. +celt_pitch_xcorr_neon_process_remaining_loop1 + VLD1.16 {d2[]}, [r4]! + VLD1.16 {d3[]}, [r5]! + SUBS r12, r12, #1 + VMLAL.S16 q0, d2, d3 + BGT celt_pitch_xcorr_neon_process_remaining_loop1 +celt_pitch_xcorr_neon_process_remaining_loop_done + VST1.32 {d0[0]}, [r2]! + VMAX.S32 d30, d30, d0 + SUBS r6, r6, #1 + ; _y++ + ADD r1, r1, #2 + ; if (--max_pitch > 0) goto celt_pitch_xcorr_neon_process_remaining + BGT celt_pitch_xcorr_neon_process_remaining +celt_pitch_xcorr_neon_done + VMOV.32 r0, d30[0] + LDMFD sp!, {r4-r6, pc} + ENDP + +ENDIF + +IF OPUS_ARM_MAY_HAVE_EDSP + +; This will get used on ARMv7 devices without NEON, so it has been optimized +; to take advantage of dual-issuing where possible. +xcorr_kernel_edsp PROC + ; input: + ; r3 = int len + ; r4 = opus_val16 *_x (must be 32-bit aligned) + ; r5 = opus_val16 *_y (must be 32-bit aligned) + ; r6...r9 = opus_val32 sum[4] + ; output: + ; r6...r9 = opus_val32 sum[4] + ; preserved: r0-r5 + ; internal usage + ; r2 = int j + ; r12,r14 = opus_val16 x[4] + ; r10,r11 = opus_val16 y[4] + STMFD sp!, {r2,r4,r5,lr} + LDR r10, [r5], #4 ; Load y[0...1] + SUBS r2, r3, #4 ; j = len-4 + LDR r11, [r5], #4 ; Load y[2...3] + BLE xcorr_kernel_edsp_process4_done + LDR r12, [r4], #4 ; Load x[0...1] + ; Stall +xcorr_kernel_edsp_process4 + ; The multiplies must issue from pipeline 0, and can't dual-issue with each + ; other. Every other instruction here dual-issues with a multiply, and is + ; thus "free". There should be no stalls in the body of the loop. + SMLABB r6, r12, r10, r6 ; sum[0] = MAC16_16(sum[0],x_0,y_0) + LDR r14, [r4], #4 ; Load x[2...3] + SMLABT r7, r12, r10, r7 ; sum[1] = MAC16_16(sum[1],x_0,y_1) + SUBS r2, r2, #4 ; j-=4 + SMLABB r8, r12, r11, r8 ; sum[2] = MAC16_16(sum[2],x_0,y_2) + SMLABT r9, r12, r11, r9 ; sum[3] = MAC16_16(sum[3],x_0,y_3) + SMLATT r6, r12, r10, r6 ; sum[0] = MAC16_16(sum[0],x_1,y_1) + LDR r10, [r5], #4 ; Load y[4...5] + SMLATB r7, r12, r11, r7 ; sum[1] = MAC16_16(sum[1],x_1,y_2) + SMLATT r8, r12, r11, r8 ; sum[2] = MAC16_16(sum[2],x_1,y_3) + SMLATB r9, r12, r10, r9 ; sum[3] = MAC16_16(sum[3],x_1,y_4) + LDRGT r12, [r4], #4 ; Load x[0...1] + SMLABB r6, r14, r11, r6 ; sum[0] = MAC16_16(sum[0],x_2,y_2) + SMLABT r7, r14, r11, r7 ; sum[1] = MAC16_16(sum[1],x_2,y_3) + SMLABB r8, r14, r10, r8 ; sum[2] = MAC16_16(sum[2],x_2,y_4) + SMLABT r9, r14, r10, r9 ; sum[3] = MAC16_16(sum[3],x_2,y_5) + SMLATT r6, r14, r11, r6 ; sum[0] = MAC16_16(sum[0],x_3,y_3) + LDR r11, [r5], #4 ; Load y[6...7] + SMLATB r7, r14, r10, r7 ; sum[1] = MAC16_16(sum[1],x_3,y_4) + SMLATT r8, r14, r10, r8 ; sum[2] = MAC16_16(sum[2],x_3,y_5) + SMLATB r9, r14, r11, r9 ; sum[3] = MAC16_16(sum[3],x_3,y_6) + BGT xcorr_kernel_edsp_process4 +xcorr_kernel_edsp_process4_done + ADDS r2, r2, #4 + BLE xcorr_kernel_edsp_done + LDRH r12, [r4], #2 ; r12 = *x++ + SUBS r2, r2, #1 ; j-- + ; Stall + SMLABB r6, r12, r10, r6 ; sum[0] = MAC16_16(sum[0],x,y_0) + LDRGTH r14, [r4], #2 ; r14 = *x++ + SMLABT r7, r12, r10, r7 ; sum[1] = MAC16_16(sum[1],x,y_1) + SMLABB r8, r12, r11, r8 ; sum[2] = MAC16_16(sum[2],x,y_2) + SMLABT r9, r12, r11, r9 ; sum[3] = MAC16_16(sum[3],x,y_3) + BLE xcorr_kernel_edsp_done + SMLABT r6, r14, r10, r6 ; sum[0] = MAC16_16(sum[0],x,y_1) + SUBS r2, r2, #1 ; j-- + SMLABB r7, r14, r11, r7 ; sum[1] = MAC16_16(sum[1],x,y_2) + LDRH r10, [r5], #2 ; r10 = y_4 = *y++ + SMLABT r8, r14, r11, r8 ; sum[2] = MAC16_16(sum[2],x,y_3) + LDRGTH r12, [r4], #2 ; r12 = *x++ + SMLABB r9, r14, r10, r9 ; sum[3] = MAC16_16(sum[3],x,y_4) + BLE xcorr_kernel_edsp_done + SMLABB r6, r12, r11, r6 ; sum[0] = MAC16_16(sum[0],tmp,y_2) + CMP r2, #1 ; j-- + SMLABT r7, r12, r11, r7 ; sum[1] = MAC16_16(sum[1],tmp,y_3) + LDRH r2, [r5], #2 ; r2 = y_5 = *y++ + SMLABB r8, r12, r10, r8 ; sum[2] = MAC16_16(sum[2],tmp,y_4) + LDRGTH r14, [r4] ; r14 = *x + SMLABB r9, r12, r2, r9 ; sum[3] = MAC16_16(sum[3],tmp,y_5) + BLE xcorr_kernel_edsp_done + SMLABT r6, r14, r11, r6 ; sum[0] = MAC16_16(sum[0],tmp,y_3) + LDRH r11, [r5] ; r11 = y_6 = *y + SMLABB r7, r14, r10, r7 ; sum[1] = MAC16_16(sum[1],tmp,y_4) + SMLABB r8, r14, r2, r8 ; sum[2] = MAC16_16(sum[2],tmp,y_5) + SMLABB r9, r14, r11, r9 ; sum[3] = MAC16_16(sum[3],tmp,y_6) +xcorr_kernel_edsp_done + LDMFD sp!, {r2,r4,r5,pc} + ENDP + +celt_pitch_xcorr_edsp PROC + ; input: + ; r0 = opus_val16 *_x (must be 32-bit aligned) + ; r1 = opus_val16 *_y (only needs to be 16-bit aligned) + ; r2 = opus_val32 *xcorr + ; r3 = int len + ; output: + ; r0 = maxcorr + ; internal usage + ; r4 = opus_val16 *x + ; r5 = opus_val16 *y + ; r6 = opus_val32 sum0 + ; r7 = opus_val32 sum1 + ; r8 = opus_val32 sum2 + ; r9 = opus_val32 sum3 + ; r1 = int max_pitch + ; r12 = int j + STMFD sp!, {r4-r11, lr} + MOV r5, r1 + LDR r1, [sp, #36] + MOV r4, r0 + TST r5, #3 + ; maxcorr = 1 + MOV r0, #1 + BEQ celt_pitch_xcorr_edsp_process1u_done +; Compute one sum at the start to make y 32-bit aligned. + SUBS r12, r3, #4 + ; r14 = sum = 0 + MOV r14, #0 + LDRH r8, [r5], #2 + BLE celt_pitch_xcorr_edsp_process1u_loop4_done + LDR r6, [r4], #4 + MOV r8, r8, LSL #16 +celt_pitch_xcorr_edsp_process1u_loop4 + LDR r9, [r5], #4 + SMLABT r14, r6, r8, r14 ; sum = MAC16_16(sum, x_0, y_0) + LDR r7, [r4], #4 + SMLATB r14, r6, r9, r14 ; sum = MAC16_16(sum, x_1, y_1) + LDR r8, [r5], #4 + SMLABT r14, r7, r9, r14 ; sum = MAC16_16(sum, x_2, y_2) + SUBS r12, r12, #4 ; j-=4 + SMLATB r14, r7, r8, r14 ; sum = MAC16_16(sum, x_3, y_3) + LDRGT r6, [r4], #4 + BGT celt_pitch_xcorr_edsp_process1u_loop4 + MOV r8, r8, LSR #16 +celt_pitch_xcorr_edsp_process1u_loop4_done + ADDS r12, r12, #4 +celt_pitch_xcorr_edsp_process1u_loop1 + LDRGEH r6, [r4], #2 + ; Stall + SMLABBGE r14, r6, r8, r14 ; sum = MAC16_16(sum, *x, *y) + SUBGES r12, r12, #1 + LDRGTH r8, [r5], #2 + BGT celt_pitch_xcorr_edsp_process1u_loop1 + ; Restore _x + SUB r4, r4, r3, LSL #1 + ; Restore and advance _y + SUB r5, r5, r3, LSL #1 + ; maxcorr = max(maxcorr, sum) + CMP r0, r14 + ADD r5, r5, #2 + MOVLT r0, r14 + SUBS r1, r1, #1 + ; xcorr[i] = sum + STR r14, [r2], #4 + BLE celt_pitch_xcorr_edsp_done +celt_pitch_xcorr_edsp_process1u_done + ; if (max_pitch < 4) goto celt_pitch_xcorr_edsp_process2 + SUBS r1, r1, #4 + BLT celt_pitch_xcorr_edsp_process2 +celt_pitch_xcorr_edsp_process4 + ; xcorr_kernel_edsp parameters: + ; r3 = len, r4 = _x, r5 = _y, r6...r9 = sum[4] = {0, 0, 0, 0} + MOV r6, #0 + MOV r7, #0 + MOV r8, #0 + MOV r9, #0 + BL xcorr_kernel_edsp ; xcorr_kernel_edsp(_x, _y+i, xcorr+i, len) + ; maxcorr = max(maxcorr, sum0, sum1, sum2, sum3) + CMP r0, r6 + ; _y+=4 + ADD r5, r5, #8 + MOVLT r0, r6 + CMP r0, r7 + MOVLT r0, r7 + CMP r0, r8 + MOVLT r0, r8 + CMP r0, r9 + MOVLT r0, r9 + STMIA r2!, {r6-r9} + SUBS r1, r1, #4 + BGE celt_pitch_xcorr_edsp_process4 +celt_pitch_xcorr_edsp_process2 + ADDS r1, r1, #2 + BLT celt_pitch_xcorr_edsp_process1a + SUBS r12, r3, #4 + ; {r10, r11} = {sum0, sum1} = {0, 0} + MOV r10, #0 + MOV r11, #0 + LDR r8, [r5], #4 + BLE celt_pitch_xcorr_edsp_process2_loop_done + LDR r6, [r4], #4 + LDR r9, [r5], #4 +celt_pitch_xcorr_edsp_process2_loop4 + SMLABB r10, r6, r8, r10 ; sum0 = MAC16_16(sum0, x_0, y_0) + LDR r7, [r4], #4 + SMLABT r11, r6, r8, r11 ; sum1 = MAC16_16(sum1, x_0, y_1) + SUBS r12, r12, #4 ; j-=4 + SMLATT r10, r6, r8, r10 ; sum0 = MAC16_16(sum0, x_1, y_1) + LDR r8, [r5], #4 + SMLATB r11, r6, r9, r11 ; sum1 = MAC16_16(sum1, x_1, y_2) + LDRGT r6, [r4], #4 + SMLABB r10, r7, r9, r10 ; sum0 = MAC16_16(sum0, x_2, y_2) + SMLABT r11, r7, r9, r11 ; sum1 = MAC16_16(sum1, x_2, y_3) + SMLATT r10, r7, r9, r10 ; sum0 = MAC16_16(sum0, x_3, y_3) + LDRGT r9, [r5], #4 + SMLATB r11, r7, r8, r11 ; sum1 = MAC16_16(sum1, x_3, y_4) + BGT celt_pitch_xcorr_edsp_process2_loop4 +celt_pitch_xcorr_edsp_process2_loop_done + ADDS r12, r12, #2 + BLE celt_pitch_xcorr_edsp_process2_1 + LDR r6, [r4], #4 + ; Stall + SMLABB r10, r6, r8, r10 ; sum0 = MAC16_16(sum0, x_0, y_0) + LDR r9, [r5], #4 + SMLABT r11, r6, r8, r11 ; sum1 = MAC16_16(sum1, x_0, y_1) + SUB r12, r12, #2 + SMLATT r10, r6, r8, r10 ; sum0 = MAC16_16(sum0, x_1, y_1) + MOV r8, r9 + SMLATB r11, r6, r9, r11 ; sum1 = MAC16_16(sum1, x_1, y_2) +celt_pitch_xcorr_edsp_process2_1 + LDRH r6, [r4], #2 + ADDS r12, r12, #1 + ; Stall + SMLABB r10, r6, r8, r10 ; sum0 = MAC16_16(sum0, x_0, y_0) + LDRGTH r7, [r4], #2 + SMLABT r11, r6, r8, r11 ; sum1 = MAC16_16(sum1, x_0, y_1) + BLE celt_pitch_xcorr_edsp_process2_done + LDRH r9, [r5], #2 + SMLABT r10, r7, r8, r10 ; sum0 = MAC16_16(sum0, x_0, y_1) + SMLABB r11, r7, r9, r11 ; sum1 = MAC16_16(sum1, x_0, y_2) +celt_pitch_xcorr_edsp_process2_done + ; Restore _x + SUB r4, r4, r3, LSL #1 + ; Restore and advance _y + SUB r5, r5, r3, LSL #1 + ; maxcorr = max(maxcorr, sum0) + CMP r0, r10 + ADD r5, r5, #2 + MOVLT r0, r10 + SUB r1, r1, #2 + ; maxcorr = max(maxcorr, sum1) + CMP r0, r11 + ; xcorr[i] = sum + STR r10, [r2], #4 + MOVLT r0, r11 + STR r11, [r2], #4 +celt_pitch_xcorr_edsp_process1a + ADDS r1, r1, #1 + BLT celt_pitch_xcorr_edsp_done + SUBS r12, r3, #4 + ; r14 = sum = 0 + MOV r14, #0 + BLT celt_pitch_xcorr_edsp_process1a_loop_done + LDR r6, [r4], #4 + LDR r8, [r5], #4 + LDR r7, [r4], #4 + LDR r9, [r5], #4 +celt_pitch_xcorr_edsp_process1a_loop4 + SMLABB r14, r6, r8, r14 ; sum = MAC16_16(sum, x_0, y_0) + SUBS r12, r12, #4 ; j-=4 + SMLATT r14, r6, r8, r14 ; sum = MAC16_16(sum, x_1, y_1) + LDRGE r6, [r4], #4 + SMLABB r14, r7, r9, r14 ; sum = MAC16_16(sum, x_2, y_2) + LDRGE r8, [r5], #4 + SMLATT r14, r7, r9, r14 ; sum = MAC16_16(sum, x_3, y_3) + LDRGE r7, [r4], #4 + LDRGE r9, [r5], #4 + BGE celt_pitch_xcorr_edsp_process1a_loop4 +celt_pitch_xcorr_edsp_process1a_loop_done + ADDS r12, r12, #2 + LDRGE r6, [r4], #4 + LDRGE r8, [r5], #4 + ; Stall + SMLABBGE r14, r6, r8, r14 ; sum = MAC16_16(sum, x_0, y_0) + SUBGE r12, r12, #2 + SMLATTGE r14, r6, r8, r14 ; sum = MAC16_16(sum, x_1, y_1) + ADDS r12, r12, #1 + LDRGEH r6, [r4], #2 + LDRGEH r8, [r5], #2 + ; Stall + SMLABBGE r14, r6, r8, r14 ; sum = MAC16_16(sum, *x, *y) + ; maxcorr = max(maxcorr, sum) + CMP r0, r14 + ; xcorr[i] = sum + STR r14, [r2], #4 + MOVLT r0, r14 +celt_pitch_xcorr_edsp_done + LDMFD sp!, {r4-r11, pc} + ENDP + +ENDIF + +END
--- a/media/libopus/celt/arm/fixed_armv4.h +++ b/media/libopus/celt/arm/fixed_armv4.h @@ -24,34 +24,34 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef FIXED_ARMv4_H #define FIXED_ARMv4_H /** 16x32 multiplication, followed by a 16-bit shift right. Results fits in 32 bits */ #undef MULT16_32_Q16 -static inline opus_val32 MULT16_32_Q16_armv4(opus_val16 a, opus_val32 b) +static OPUS_INLINE opus_val32 MULT16_32_Q16_armv4(opus_val16 a, opus_val32 b) { unsigned rd_lo; int rd_hi; __asm__( "#MULT16_32_Q16\n\t" "smull %0, %1, %2, %3\n\t" : "=&r"(rd_lo), "=&r"(rd_hi) : "%r"(b),"r"(a<<16) ); return rd_hi; } #define MULT16_32_Q16(a, b) (MULT16_32_Q16_armv4(a, b)) /** 16x32 multiplication, followed by a 15-bit shift right. Results fits in 32 bits */ #undef MULT16_32_Q15 -static inline opus_val32 MULT16_32_Q15_armv4(opus_val16 a, opus_val32 b) +static OPUS_INLINE opus_val32 MULT16_32_Q15_armv4(opus_val16 a, opus_val32 b) { unsigned rd_lo; int rd_hi; __asm__( "#MULT16_32_Q15\n\t" "smull %0, %1, %2, %3\n\t" : "=&r"(rd_lo), "=&r"(rd_hi) : "%r"(b), "r"(a<<16)
--- a/media/libopus/celt/arm/fixed_armv5e.h +++ b/media/libopus/celt/arm/fixed_armv5e.h @@ -29,33 +29,33 @@ #ifndef FIXED_ARMv5E_H #define FIXED_ARMv5E_H #include "fixed_armv4.h" /** 16x32 multiplication, followed by a 16-bit shift right. Results fits in 32 bits */ #undef MULT16_32_Q16 -static inline opus_val32 MULT16_32_Q16_armv5e(opus_val16 a, opus_val32 b) +static OPUS_INLINE opus_val32 MULT16_32_Q16_armv5e(opus_val16 a, opus_val32 b) { int res; __asm__( "#MULT16_32_Q16\n\t" "smulwb %0, %1, %2\n\t" : "=r"(res) : "r"(b),"r"(a) ); return res; } #define MULT16_32_Q16(a, b) (MULT16_32_Q16_armv5e(a, b)) /** 16x32 multiplication, followed by a 15-bit shift right. Results fits in 32 bits */ #undef MULT16_32_Q15 -static inline opus_val32 MULT16_32_Q15_armv5e(opus_val16 a, opus_val32 b) +static OPUS_INLINE opus_val32 MULT16_32_Q15_armv5e(opus_val16 a, opus_val32 b) { int res; __asm__( "#MULT16_32_Q15\n\t" "smulwb %0, %1, %2\n\t" : "=r"(res) : "r"(b), "r"(a) ); @@ -63,49 +63,49 @@ static inline opus_val32 MULT16_32_Q15_a } #define MULT16_32_Q15(a, b) (MULT16_32_Q15_armv5e(a, b)) /** 16x32 multiply, followed by a 15-bit shift right and 32-bit add. b must fit in 31 bits. Result fits in 32 bits. */ #undef MAC16_32_Q15 -static inline opus_val32 MAC16_32_Q15_armv5e(opus_val32 c, opus_val16 a, +static OPUS_INLINE opus_val32 MAC16_32_Q15_armv5e(opus_val32 c, opus_val16 a, opus_val32 b) { int res; __asm__( "#MAC16_32_Q15\n\t" "smlawb %0, %1, %2, %3;\n" : "=r"(res) : "r"(b<<1), "r"(a), "r"(c) ); return res; } #define MAC16_32_Q15(c, a, b) (MAC16_32_Q15_armv5e(c, a, b)) /** 16x16 multiply-add where the result fits in 32 bits */ #undef MAC16_16 -static inline opus_val32 MAC16_16_armv5e(opus_val32 c, opus_val16 a, +static OPUS_INLINE opus_val32 MAC16_16_armv5e(opus_val32 c, opus_val16 a, opus_val16 b) { int res; __asm__( "#MAC16_16\n\t" "smlabb %0, %1, %2, %3;\n" : "=r"(res) : "r"(a), "r"(b), "r"(c) ); return res; } #define MAC16_16(c, a, b) (MAC16_16_armv5e(c, a, b)) /** 16x16 multiplication where the result fits in 32 bits */ #undef MULT16_16 -static inline opus_val32 MULT16_16_armv5e(opus_val16 a, opus_val16 b) +static OPUS_INLINE opus_val32 MULT16_16_armv5e(opus_val16 a, opus_val16 b) { int res; __asm__( "#MULT16_16\n\t" "smulbb %0, %1, %2;\n" : "=r"(res) : "r"(a), "r"(b) );
new file mode 100644 --- /dev/null +++ b/media/libopus/celt/arm/pitch_arm.h @@ -0,0 +1,57 @@ +/* Copyright (c) 2010 Xiph.Org Foundation + * Copyright (c) 2013 Parrot */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#if !defined(PITCH_ARM_H) +# define PITCH_ARM_H + +# include "armcpu.h" + +# if defined(FIXED_POINT) + +# if defined(OPUS_ARM_MAY_HAVE_NEON) +opus_val32 celt_pitch_xcorr_neon(const opus_val16 *_x, const opus_val16 *_y, + opus_val32 *xcorr, int len, int max_pitch); +# endif + +# if defined(OPUS_ARM_MAY_HAVE_MEDIA) +# define celt_pitch_xcorr_media MAY_HAVE_EDSP(celt_pitch_xcorr) +# endif + +# if defined(OPUS_ARM_MAY_HAVE_EDSP) +opus_val32 celt_pitch_xcorr_edsp(const opus_val16 *_x, const opus_val16 *_y, + opus_val32 *xcorr, int len, int max_pitch); +# endif + +# if !defined(OPUS_HAVE_RTCD) +# define OVERRIDE_PITCH_XCORR (1) +# define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \ + ((void)(arch),PRESUME_NEON(celt_pitch_xcorr)(_x, _y, xcorr, len, max_pitch)) +# endif + +# endif + +#endif
--- a/media/libopus/celt/bands.c +++ b/media/libopus/celt/bands.c @@ -209,30 +209,46 @@ void denormalise_bands(const CELTMode *m opus_val16 g; opus_val16 lg; #ifdef FIXED_POINT int shift; #endif j=M*eBands[i]; band_end = M*eBands[i+1]; lg = ADD16(bandLogE[i+c*m->nbEBands], SHL16((opus_val16)eMeans[i],6)); -#ifdef FIXED_POINT +#ifndef FIXED_POINT + g = celt_exp2(lg); +#else /* Handle the integer part of the log energy */ shift = 16-(lg>>DB_SHIFT); if (shift>31) { shift=0; g=0; } else { /* Handle the fractional part. */ g = celt_exp2_frac(lg&((1<<DB_SHIFT)-1)); } -#else - g = celt_exp2(lg); + /* Handle extreme gains with negative shift. */ + if (shift<0) + { + /* For shift < -2 we'd be likely to overflow, so we're capping + the gain here. This shouldn't happen unless the bitstream is + already corrupted. */ + if (shift < -2) + { + g = 32767; + shift = -2; + } + do { + *f++ = SHL32(MULT16_16(*x++, g), -shift); + } while (++j<band_end); + } else #endif + /* Be careful of the fixed-point "else" just above when changing this code */ do { *f++ = SHR32(MULT16_16(*x++, g), shift); } while (++j<band_end); } celt_assert(start <= end); for (i=M*eBands[end];i<N;i++) *f++ = 0; } while (++c<C); @@ -487,17 +503,17 @@ int spreading_decision(const CELTMode *m if (hf_sum > 22) *tapset_decision=2; else if (hf_sum > 18) *tapset_decision=1; else *tapset_decision=0; } /*printf("%d %d %d\n", hf_sum, *hf_average, *tapset_decision);*/ - celt_assert(nbBands>0); /*M*(eBands[end]-eBands[end-1]) <= 8 assures this*/ + celt_assert(nbBands>0); /* end has to be non-zero */ sum /= nbBands; /* Recursive averaging */ sum = (sum+*average)>>1; *average = sum; /* Hysteresis */ sum = (3*sum + (((3-last_decision)<<7) + 64) + 2)>>2; if (sum < 80) { @@ -864,17 +880,16 @@ static unsigned quant_partition(struct b int N, int b, int B, celt_norm *lowband, int LM, opus_val16 gain, int fill) { const unsigned char *cache; int q; int curr_bits; int imid=0, iside=0; - int N_B=N; int B0=B; opus_val16 mid=0, side=0; unsigned cm=0; #ifdef RESYNTH int resynth = 1; #else int resynth = !ctx->encode; #endif @@ -886,18 +901,16 @@ static unsigned quant_partition(struct b ec_ctx *ec; encode = ctx->encode; m = ctx->m; i = ctx->i; spread = ctx->spread; ec = ctx->ec; - N_B /= B; - /* If we need 1.5 more bit than we can produce, split the band in two. */ cache = m->cache.bits + m->cache.index[(LM+1)*m->nbEBands+i]; if (LM != -1 && b > cache[cache[0]]+12 && N>2) { int mbits, sbits, delta; int itheta; int qalloc; struct split_ctx sctx; @@ -1067,17 +1080,16 @@ static unsigned quant_band(struct band_c int tf_change; encode = ctx->encode; tf_change = ctx->tf_change; longBlocks = B0==1; N_B /= B; - N_B0 = N_B; /* Special case for one sample */ if (N==1) { return quant_band_n1(ctx, X, NULL, b, lowband_out); } if (tf_change>0)
--- a/media/libopus/celt/celt.h +++ b/media/libopus/celt/celt.h @@ -47,30 +47,34 @@ extern "C" { #endif #define CELTEncoder OpusCustomEncoder #define CELTDecoder OpusCustomDecoder #define CELTMode OpusCustomMode typedef struct { int valid; - opus_val16 tonality; - opus_val16 tonality_slope; - opus_val16 noisiness; - opus_val16 activity; - opus_val16 music_prob; + float tonality; + float tonality_slope; + float noisiness; + float activity; + float music_prob; int bandwidth; }AnalysisInfo; #define __celt_check_mode_ptr_ptr(ptr) ((ptr) + ((ptr) - (const CELTMode**)(ptr))) #define __celt_check_analysis_ptr(ptr) ((ptr) + ((ptr) - (const AnalysisInfo*)(ptr))) /* Encoder/decoder Requests */ +/* Expose this option again when variable framesize actually works */ +#define OPUS_FRAMESIZE_VARIABLE 5010 /**< Optimize the frame size dynamically */ + + #define CELT_SET_PREDICTION_REQUEST 10002 /** Controls the use of interframe prediction. 0=Independent frames 1=Short term interframe prediction allowed 2=Long term prediction allowed */ #define CELT_SET_PREDICTION(x) CELT_SET_PREDICTION_REQUEST, __opus_check_int(x) @@ -104,29 +108,27 @@ typedef struct { #define CELT_SET_TONALITY_SLOPE(x) CELT_SET_TONALITY_SLOPE_REQUEST, __opus_check_int(x) #define CELT_SET_ANALYSIS_REQUEST 10022 #define CELT_SET_ANALYSIS(x) CELT_SET_ANALYSIS_REQUEST, __celt_check_analysis_ptr(x) #define OPUS_SET_LFE_REQUEST 10024 #define OPUS_SET_LFE(x) OPUS_SET_LFE_REQUEST, __opus_check_int(x) -#define OPUS_SET_ENERGY_SAVE_REQUEST 10026 -#define OPUS_SET_ENERGY_SAVE(x) OPUS_SET_ENERGY_SAVE_REQUEST, __opus_check_val16_ptr(x) - -#define OPUS_SET_ENERGY_MASK_REQUEST 10028 +#define OPUS_SET_ENERGY_MASK_REQUEST 10026 #define OPUS_SET_ENERGY_MASK(x) OPUS_SET_ENERGY_MASK_REQUEST, __opus_check_val16_ptr(x) /* Encoder stuff */ int celt_encoder_get_size(int channels); int celt_encode_with_ec(OpusCustomEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes, ec_enc *enc); -int celt_encoder_init(CELTEncoder *st, opus_int32 sampling_rate, int channels); +int celt_encoder_init(CELTEncoder *st, opus_int32 sampling_rate, int channels, + int arch); /* Decoder stuff */ int celt_decoder_get_size(int channels); @@ -136,17 +138,17 @@ int celt_decode_with_ec(OpusCustomDecode #define celt_encoder_ctl opus_custom_encoder_ctl #define celt_decoder_ctl opus_custom_decoder_ctl #ifdef CUSTOM_MODES #define OPUS_CUSTOM_NOSTATIC #else -#define OPUS_CUSTOM_NOSTATIC static inline +#define OPUS_CUSTOM_NOSTATIC static OPUS_INLINE #endif static const unsigned char trim_icdf[11] = {126, 124, 119, 109, 87, 41, 19, 9, 4, 2, 0}; /* Probs: NONE: 21.875%, LIGHT: 6.25%, NORMAL: 65.625%, AGGRESSIVE: 6.25% */ static const unsigned char spread_icdf[4] = {25, 23, 2, 0}; static const unsigned char tapset_icdf[3]={2,1,0}; @@ -161,43 +163,46 @@ static const unsigned char toOpusTable[2 static const unsigned char fromOpusTable[16] = { 0x80, 0x88, 0x90, 0x98, 0x40, 0x48, 0x50, 0x58, 0x20, 0x28, 0x30, 0x38, 0x00, 0x08, 0x10, 0x18 }; -static inline int toOpus(unsigned char c) +static OPUS_INLINE int toOpus(unsigned char c) { int ret=0; if (c<0xA0) ret = toOpusTable[c>>3]; if (ret == 0) return -1; else return ret|(c&0x7); } -static inline int fromOpus(unsigned char c) +static OPUS_INLINE int fromOpus(unsigned char c) { if (c<0x80) return -1; else return fromOpusTable[(c>>3)-16] | (c&0x7); } #endif /* CUSTOM_MODES */ #define COMBFILTER_MAXPERIOD 1024 #define COMBFILTER_MINPERIOD 15 extern const signed char tf_select_table[4][8]; int resampling_factor(opus_int32 rate); +void celt_preemphasis(const opus_val16 * OPUS_RESTRICT pcmp, celt_sig * OPUS_RESTRICT inp, + int N, int CC, int upsample, const opus_val16 *coef, celt_sig *mem, int clip); + void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N, opus_val16 g0, opus_val16 g1, int tapset0, int tapset1, const opus_val16 *window, int overlap); void init_caps(const CELTMode *m,int *cap,int LM,int C); #ifdef RESYNTH void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, const opus_val16 *coef, celt_sig *mem, celt_sig * OPUS_RESTRICT scratch);
--- a/media/libopus/celt/celt_decoder.c +++ b/media/libopus/celt/celt_decoder.c @@ -170,17 +170,17 @@ OPUS_CUSTOM_NOSTATIC int opus_custom_dec #ifdef CUSTOM_MODES void opus_custom_decoder_destroy(CELTDecoder *st) { opus_free(st); } #endif /* CUSTOM_MODES */ -static inline opus_val16 SIG2WORD16(celt_sig x) +static OPUS_INLINE opus_val16 SIG2WORD16(celt_sig x) { #ifdef FIXED_POINT x = PSHR32(x, SIG_SHIFT); x = MAX32(x, -32768); x = MIN32(x, 32767); return EXTRACT16(x); #else return (opus_val16)x; @@ -442,20 +442,21 @@ static void celt_decode_lost(CELTDecoder int pitch_index; VARDECL(opus_val32, etmp); VARDECL(opus_val16, exc); if (loss_count == 0) { VARDECL( opus_val16, lp_pitch_buf ); ALLOC( lp_pitch_buf, DECODE_BUFFER_SIZE>>1, opus_val16 ); - pitch_downsample(decode_mem, lp_pitch_buf, DECODE_BUFFER_SIZE, C); + pitch_downsample(decode_mem, lp_pitch_buf, + DECODE_BUFFER_SIZE, C, st->arch); pitch_search(lp_pitch_buf+(PLC_PITCH_LAG_MAX>>1), lp_pitch_buf, DECODE_BUFFER_SIZE-PLC_PITCH_LAG_MAX, - PLC_PITCH_LAG_MAX-PLC_PITCH_LAG_MIN, &pitch_index); + PLC_PITCH_LAG_MAX-PLC_PITCH_LAG_MIN, &pitch_index, st->arch); pitch_index = PLC_PITCH_LAG_MAX-pitch_index; st->last_pitch_index = pitch_index; } else { pitch_index = st->last_pitch_index; fade = QCONST16(.8f,15); } ALLOC(etmp, overlap, opus_val32); @@ -476,17 +477,18 @@ static void celt_decode_lost(CELTDecoder exc[i] = ROUND16(buf[DECODE_BUFFER_SIZE-MAX_PERIOD+i], SIG_SHIFT); } if (loss_count == 0) { opus_val32 ac[LPC_ORDER+1]; /* Compute LPC coefficients for the last MAX_PERIOD samples before the first loss so we can work in the excitation-filter domain. */ - _celt_autocorr(exc, ac, window, overlap, LPC_ORDER, MAX_PERIOD); + _celt_autocorr(exc, ac, window, overlap, + LPC_ORDER, MAX_PERIOD, st->arch); /* Add a noise floor of -40 dB. */ #ifdef FIXED_POINT ac[0] += SHR32(ac[0],13); #else ac[0] *= 1.0001f; #endif /* Use lag windowing to stabilize the Levinson-Durbin recursion. */ for (i=1;i<=LPC_ORDER;i++) @@ -660,17 +662,16 @@ int celt_decode_with_ec(CELTDecoder * OP VARDECL(celt_norm, X); VARDECL(int, fine_quant); VARDECL(int, pulses); VARDECL(int, cap); VARDECL(int, offsets); VARDECL(int, fine_priority); VARDECL(int, tf_res); VARDECL(unsigned char, collapse_masks); - celt_sig *out_mem[2]; celt_sig *decode_mem[2]; celt_sig *out_syn[2]; opus_val16 *lpc; opus_val16 *oldBandE, *oldLogE, *oldLogE2, *backgroundLogE; int shortBlocks; int isTransient; int intra_ener; @@ -701,17 +702,16 @@ int celt_decode_with_ec(CELTDecoder * OP mode = st->mode; nbEBands = mode->nbEBands; overlap = mode->overlap; eBands = mode->eBands; frame_size *= st->downsample; c=0; do { decode_mem[c] = st->_decode_mem + c*(DECODE_BUFFER_SIZE+overlap); - out_mem[c] = decode_mem[c]+DECODE_BUFFER_SIZE-MAX_PERIOD; } while (++c<CC); lpc = (opus_val16*)(st->_decode_mem+(DECODE_BUFFER_SIZE+overlap)*CC); oldBandE = lpc+CC*LPC_ORDER; oldLogE = oldBandE + 2*nbEBands; oldLogE2 = oldLogE + 2*nbEBands; backgroundLogE = oldLogE2 + 2*nbEBands; #ifdef CUSTOM_MODES @@ -931,17 +931,17 @@ int celt_decode_with_ec(CELTDecoder * OP int bound = M*eBands[effEnd]; if (st->downsample!=1) bound = IMIN(bound, N/st->downsample); for (i=bound;i<N;i++) freq[c*N+i] = 0; } while (++c<C); c=0; do { - out_syn[c] = out_mem[c]+MAX_PERIOD-N; + out_syn[c] = decode_mem[c]+DECODE_BUFFER_SIZE-N; } while (++c<CC); if (CC==2&&C==1) { for (i=0;i<N;i++) freq[N+i] = freq[i]; } if (CC==1&&C==2)
--- a/media/libopus/celt/celt_encoder.c +++ b/media/libopus/celt/celt_encoder.c @@ -106,17 +106,16 @@ struct OpusCustomEncoder { /* VBR-related parameters */ opus_int32 vbr_reservoir; opus_int32 vbr_drift; opus_int32 vbr_offset; opus_int32 vbr_count; opus_val32 overlap_max; opus_val16 stereo_saving; int intensity; - opus_val16 *energy_save; opus_val16 *energy_mask; opus_val16 spec_avg; #ifdef RESYNTH /* +MAX_PERIOD/2 to make space for overlap */ celt_sig syn_mem[2][2*MAX_PERIOD+MAX_PERIOD/2]; #endif @@ -157,27 +156,18 @@ CELTEncoder *opus_custom_encoder_create( st = NULL; } if (error) *error = ret; return st; } #endif /* CUSTOM_MODES */ -int celt_encoder_init(CELTEncoder *st, opus_int32 sampling_rate, int channels) -{ - int ret; - ret = opus_custom_encoder_init(st, opus_custom_mode_create(48000, 960, NULL), channels); - if (ret != OPUS_OK) - return ret; - st->upsample = resampling_factor(sampling_rate); - return OPUS_OK; -} - -OPUS_CUSTOM_NOSTATIC int opus_custom_encoder_init(CELTEncoder *st, const CELTMode *mode, int channels) +static int opus_custom_encoder_init_arch(CELTEncoder *st, const CELTMode *mode, + int channels, int arch) { if (channels < 0 || channels > 2) return OPUS_BAD_ARG; if (st==NULL || mode==NULL) return OPUS_ALLOC_FAIL; OPUS_CLEAR((char*)st, opus_custom_encoder_get_size(mode, channels)); @@ -186,33 +176,52 @@ OPUS_CUSTOM_NOSTATIC int opus_custom_enc st->overlap = mode->overlap; st->stream_channels = st->channels = channels; st->upsample = 1; st->start = 0; st->end = st->mode->effEBands; st->signalling = 1; - st->arch = opus_select_arch(); + st->arch = arch; st->constrained_vbr = 1; st->clip = 1; st->bitrate = OPUS_BITRATE_MAX; st->vbr = 0; st->force_intra = 0; st->complexity = 5; st->lsb_depth=24; opus_custom_encoder_ctl(st, OPUS_RESET_STATE); return OPUS_OK; } #ifdef CUSTOM_MODES +int opus_custom_encoder_init(CELTEncoder *st, const CELTMode *mode, int channels) +{ + return opus_custom_encoder_init_arch(st, mode, channels, opus_select_arch()); +} +#endif + +int celt_encoder_init(CELTEncoder *st, opus_int32 sampling_rate, int channels, + int arch) +{ + int ret; + ret = opus_custom_encoder_init_arch(st, + opus_custom_mode_create(48000, 960, NULL), channels, arch); + if (ret != OPUS_OK) + return ret; + st->upsample = resampling_factor(sampling_rate); + return OPUS_OK; +} + +#ifdef CUSTOM_MODES void opus_custom_encoder_destroy(CELTEncoder *st) { opus_free(st); } #endif /* CUSTOM_MODES */ static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int C, @@ -236,17 +245,16 @@ static int transient_analysis(const opus 5, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, }; SAVE_STACK; ALLOC(tmp, len, opus_val16); len2=len/2; - tf_max = 0; for (c=0;c<C;c++) { opus_val32 mean; opus_int32 unmask=0; opus_val32 norm; opus_val16 maxE; mem0=0; mem1=0; @@ -365,43 +373,43 @@ static int transient_analysis(const opus is_transient = rand()&0x1; #endif /*printf("%d %f %d\n", is_transient, (float)*tf_estimate, tf_max);*/ return is_transient; } /* Looks for sudden increases of energy to decide whether we need to patch the transient decision */ -int patch_transient_decision(opus_val16 *new, opus_val16 *old, int nbEBands, +int patch_transient_decision(opus_val16 *newE, opus_val16 *oldE, int nbEBands, int end, int C) { int i, c; opus_val32 mean_diff=0; opus_val16 spread_old[26]; /* Apply an aggressive (-6 dB/Bark) spreading function to the old frame to avoid false detection caused by irrelevant bands */ if (C==1) { - spread_old[0] = old[0]; + spread_old[0] = oldE[0]; for (i=1;i<end;i++) - spread_old[i] = MAX16(spread_old[i-1]-QCONST16(1.0f, DB_SHIFT), old[i]); + spread_old[i] = MAX16(spread_old[i-1]-QCONST16(1.0f, DB_SHIFT), oldE[i]); } else { - spread_old[0] = MAX16(old[0],old[nbEBands]); + spread_old[0] = MAX16(oldE[0],oldE[nbEBands]); for (i=1;i<end;i++) spread_old[i] = MAX16(spread_old[i-1]-QCONST16(1.0f, DB_SHIFT), - MAX16(old[i],old[i+nbEBands])); + MAX16(oldE[i],oldE[i+nbEBands])); } for (i=end-2;i>=0;i--) spread_old[i] = MAX16(spread_old[i], spread_old[i+1]-QCONST16(1.0f, DB_SHIFT)); /* Compute mean increase */ c=0; do { for (i=2;i<end-1;i++) { opus_val16 x1, x2; - x1 = MAX16(0, new[i]); + x1 = MAX16(0, newE[i]); x2 = MAX16(0, spread_old[i]); mean_diff = ADD32(mean_diff, EXTEND32(MAX16(0, SUB16(x1, x2)))); } } while (++c<C); mean_diff = DIV32(mean_diff, C*(end-3)); /*printf("%f %f %d\n", mean_diff, max_diff, count);*/ return mean_diff > QCONST16(1.f, DB_SHIFT); } @@ -447,17 +455,17 @@ static void compute_mdcts(const CELTMode out[c*B*N+i] *= upsample; for (;i<B*N;i++) out[c*B*N+i] = 0; } while (++c<C); } } -static void preemphasis(const opus_val16 * OPUS_RESTRICT pcmp, celt_sig * OPUS_RESTRICT inp, +void celt_preemphasis(const opus_val16 * OPUS_RESTRICT pcmp, celt_sig * OPUS_RESTRICT inp, int N, int CC, int upsample, const opus_val16 *coef, celt_sig *mem, int clip) { int i; opus_val16 coef0; celt_sig m; int Nu; coef0 = coef[0]; @@ -484,16 +492,18 @@ static void preemphasis(const opus_val16 #ifndef FIXED_POINT if (clip) { /* Clip input to avoid encoding non-portable files */ for (i=0;i<Nu;i++) inp[i*upsample] = MAX32(-65536.f, MIN32(65536.f,inp[i*upsample])); } +#else + (void)clip; /* Avoids a warning about clip being unused. */ #endif m = *mem; #ifdef CUSTOM_MODES if (coef[1] != 0) { opus_val16 coef1 = coef[1]; opus_val16 coef2 = coef[2]; for (i=0;i<N;i++) @@ -739,17 +749,17 @@ static void tf_encode(int start, int end tf_res[i] = tf_select_table[LM][4*isTransient+2*tf_select+tf_res[i]]; /*for(i=0;i<end;i++)printf("%d ", isTransient ? tf_res[i] : LM+tf_res[i]);printf("\n");*/ } static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X, const opus_val16 *bandLogE, int end, int LM, int C, int N0, AnalysisInfo *analysis, opus_val16 *stereo_saving, opus_val16 tf_estimate, - int intensity) + int intensity, opus_val16 surround_trim) { int i; opus_val32 diff=0; int c; int trim_index = 5; opus_val16 trim = QCONST16(5.f, 8); opus_val16 logXC, logXC2; if (C==2) @@ -813,21 +823,23 @@ static int alloc_trim_analysis(const CEL trim_index--; if (diff > QCONST16(8.f, DB_SHIFT)) trim_index--; if (diff < -QCONST16(4.f, DB_SHIFT)) trim_index++; if (diff < -QCONST16(10.f, DB_SHIFT)) trim_index++; trim -= MAX16(-QCONST16(2.f, 8), MIN16(QCONST16(2.f, 8), SHR16(diff+QCONST16(1.f, DB_SHIFT),DB_SHIFT-8)/6 )); + trim -= SHR16(surround_trim, DB_SHIFT-8); trim -= 2*SHR16(tf_estimate, 14-8); -#ifndef FIXED_POINT +#ifndef DISABLE_FLOAT_API if (analysis->valid) { - trim -= MAX16(-QCONST16(2.f, 8), MIN16(QCONST16(2.f, 8), 2*(analysis->tonality_slope+.05f))); + trim -= MAX16(-QCONST16(2.f, 8), MIN16(QCONST16(2.f, 8), + (opus_val16)(QCONST16(2.f, 8)*(analysis->tonality_slope+.05f)))); } #endif #ifdef FIXED_POINT trim_index = PSHR32(trim, 8); #else trim_index = (int)floor(.5f+trim); #endif @@ -872,17 +884,17 @@ static int stereo_analysis(const CELTMod thetas -= 8; return MULT16_32_Q15((m->eBands[13]<<(LM+1))+thetas, sumMS) > MULT16_32_Q15(m->eBands[13]<<(LM+1), sumLR); } static opus_val16 dynalloc_analysis(const opus_val16 *bandLogE, const opus_val16 *bandLogE2, int nbEBands, int start, int end, int C, int *offsets, int lsb_depth, const opus_int16 *logN, int isTransient, int vbr, int constrained_vbr, const opus_int16 *eBands, int LM, - int effectiveBytes, opus_int32 *tot_boost_, int lfe) + int effectiveBytes, opus_int32 *tot_boost_, int lfe, opus_val16 *surround_dynalloc) { int i, c; opus_int32 tot_boost=0; opus_val16 maxDepth; VARDECL(opus_val16, follower); VARDECL(opus_val16, noise_floor); SAVE_STACK; ALLOC(follower, C*nbEBands, opus_val16); @@ -935,16 +947,18 @@ static opus_val16 dynalloc_analysis(cons follower[i] = HALF16(MAX16(0, bandLogE[i]-follower[i]) + MAX16(0, bandLogE[nbEBands+i]-follower[nbEBands+i])); } } else { for (i=start;i<end;i++) { follower[i] = MAX16(0, bandLogE[i]-follower[i]); } } + for (i=start;i<end;i++) + follower[i] = MAX16(follower[i], surround_dynalloc[i]); /* For non-transient CBR/CVBR frames, halve the dynalloc contribution */ if ((!vbr || constrained_vbr)&&!isTransient) { for (i=start;i<end;i++) follower[i] = HALF16(follower[i]); } for (i=start;i<end;i++) { @@ -1016,21 +1030,22 @@ static int run_prefilter(CELTEncoder *st OPUS_COPY(pre[c]+COMBFILTER_MAXPERIOD, in+c*(N+st->overlap)+st->overlap, N); } while (++c<CC); if (enabled) { VARDECL(opus_val16, pitch_buf); ALLOC(pitch_buf, (COMBFILTER_MAXPERIOD+N)>>1, opus_val16); - pitch_downsample(pre, pitch_buf, COMBFILTER_MAXPERIOD+N, CC); + pitch_downsample(pre, pitch_buf, COMBFILTER_MAXPERIOD+N, CC, st->arch); /* Don't search for the fir last 1.5 octave of the range because there's too many false-positives due to short-term correlation */ pitch_search(pitch_buf+(COMBFILTER_MAXPERIOD>>1), pitch_buf, N, - COMBFILTER_MAXPERIOD-3*COMBFILTER_MINPERIOD, &pitch_index); + COMBFILTER_MAXPERIOD-3*COMBFILTER_MINPERIOD, &pitch_index, + st->arch); pitch_index = COMBFILTER_MAXPERIOD-pitch_index; gain1 = remove_doubling(pitch_buf, COMBFILTER_MAXPERIOD, COMBFILTER_MINPERIOD, N, &pitch_index, st->prefilter_period, st->prefilter_gain); if (pitch_index > COMBFILTER_MAXPERIOD-2) pitch_index = COMBFILTER_MAXPERIOD-2; gain1 = MULT16_16_Q15(QCONST16(.7f,15),gain1); /*printf("%d %d %f %f\n", pitch_change, pitch_index, gain1, st->analysis.tonality);*/ @@ -1135,54 +1150,55 @@ static int compute_vbr(const CELTMode *m coded_bands = lastCodedBands ? lastCodedBands : nbEBands; coded_bins = eBands[coded_bands]<<LM; if (C==2) coded_bins += eBands[IMIN(intensity, coded_bands)]<<LM; target = base_target; /*printf("%f %f %f %f %d %d ", st->analysis.activity, st->analysis.tonality, tf_estimate, st->stereo_saving, tot_boost, coded_bands);*/ -#ifndef FIXED_POINT +#ifndef DISABLE_FLOAT_API if (analysis->valid && analysis->activity<.4) target -= (opus_int32)((coded_bins<<BITRES)*(.4f-analysis->activity)); #endif /* Stereo savings */ if (C==2) { int coded_stereo_bands; int coded_stereo_dof; opus_val16 max_frac; coded_stereo_bands = IMIN(intensity, coded_bands); coded_stereo_dof = (eBands[coded_stereo_bands]<<LM)-coded_stereo_bands; /* Maximum fraction of the bits we can save if the signal is mono. */ max_frac = DIV32_16(MULT16_16(QCONST16(0.8f, 15), coded_stereo_dof), coded_bins); + stereo_saving = MIN16(stereo_saving, QCONST16(1.f, 8)); /*printf("%d %d %d ", coded_stereo_dof, coded_bins, tot_boost);*/ target -= (opus_int32)MIN32(MULT16_32_Q15(max_frac,target), SHR32(MULT16_16(stereo_saving-QCONST16(0.1f,8),(coded_stereo_dof<<BITRES)),8)); } /* Boost the rate according to dynalloc (minus the dynalloc average for calibration). */ target += tot_boost-(16<<LM); /* Apply transient boost, compensating for average boost. */ tf_calibration = variable_duration==OPUS_FRAMESIZE_VARIABLE ? QCONST16(0.02f,14) : QCONST16(0.04f,14); target += (opus_int32)SHL32(MULT16_32_Q15(tf_estimate-tf_calibration, target),1); -#ifndef FIXED_POINT +#ifndef DISABLE_FLOAT_API /* Apply tonality boost */ if (analysis->valid && !lfe) { opus_int32 tonal_target; float tonal; /* Tonality boost (compensating for the average). */ tonal = MAX16(0.f,analysis->tonality-.15f)-0.09f; tonal_target = target + (opus_int32)((coded_bins<<BITRES)*1.2f*tonal); if (pitch_change) tonal_target += (opus_int32)((coded_bins<<BITRES)*.8f); - /*printf("%f %f ", st->analysis.tonality, tonal);*/ + /*printf("%f %f ", analysis->tonality, tonal);*/ target = tonal_target; } #endif if (has_surround_mask&&!lfe) { opus_int32 surround_target = target + (opus_int32)SHR32(MULT16_16(surround_masking,coded_bins<<BITRES), DB_SHIFT); /*printf("%f %d %d %d %d %d %d ", surround_masking, coded_bins, st->end, st->intensity, surround_target, target, st->bitrate);*/ @@ -1286,32 +1302,41 @@ int celt_encode_with_ec(CELTEncoder * OP int nbEBands; int overlap; const opus_int16 *eBands; int secondMdct; int signalBandwidth; int transient_got_disabled=0; opus_val16 surround_masking=0; opus_val16 temporal_vbr=0; + opus_val16 surround_trim = 0; + opus_int32 equiv_rate = 510000; + VARDECL(opus_val16, surround_dynalloc); ALLOC_STACK; mode = st->mode; nbEBands = mode->nbEBands; overlap = mode->overlap; eBands = mode->eBands; tf_estimate = 0; if (nbCompressedBytes<2 || pcm==NULL) - return OPUS_BAD_ARG; + { + RESTORE_STACK; + return OPUS_BAD_ARG; + } frame_size *= st->upsample; for (LM=0;LM<=mode->maxLM;LM++) if (mode->shortMdctSize<<LM==frame_size) break; if (LM>mode->maxLM) + { + RESTORE_STACK; return OPUS_BAD_ARG; + } M=1<<LM; N = M*mode->shortMdctSize; prefilter_mem = st->in_mem+CC*(st->overlap); oldBandE = (opus_val16*)(st->in_mem+CC*(st->overlap+COMBFILTER_MAXPERIOD)); oldLogE = oldBandE + CC*nbEBands; oldLogE2 = oldLogE + CC*nbEBands; @@ -1332,17 +1357,20 @@ int celt_encode_with_ec(CELTEncoder * OP compressed[0] = tmp<<5; compressed[0] |= LM<<3; compressed[0] |= (C==2)<<2; /* Convert "standard mode" to Opus header */ if (mode->Fs==48000 && mode->shortMdctSize==120) { int c0 = toOpus(compressed[0]); if (c0<0) + { + RESTORE_STACK; return OPUS_BAD_ARG; + } compressed[0] = c0; } compressed++; nbCompressedBytes--; } #else celt_assert(st->signalling==0); #endif @@ -1366,16 +1394,18 @@ int celt_encode_with_ec(CELTEncoder * OP tmp = st->bitrate*frame_size; if (tell>1) tmp += tell; if (st->bitrate!=OPUS_BITRATE_MAX) nbCompressedBytes = IMAX(2, IMIN(nbCompressedBytes, (tmp+4*mode->Fs)/(8*mode->Fs)-!!st->signalling)); effectiveBytes = nbCompressedBytes; } + if (st->bitrate != OPUS_BITRATE_MAX) + equiv_rate = st->bitrate - (40*C+20)*((400>>LM) - 50); if (enc==NULL) { ec_enc_init(&_enc, compressed, nbCompressedBytes); enc = &_enc; } if (vbr_rate>0) @@ -1439,27 +1469,27 @@ int celt_encode_with_ec(CELTEncoder * OP ec_enc_shrink(enc, nbCompressedBytes); } /* Pretend we've filled all the remaining bits with zeros (that's what the initialiser did anyway) */ tell = nbCompressedBytes*8; enc->nbits_total+=tell-ec_tell(enc); } c=0; do { - preemphasis(pcm+c, in+c*(N+st->overlap)+st->overlap, N, CC, st->upsample, + celt_preemphasis(pcm+c, in+c*(N+st->overlap)+st->overlap, N, CC, st->upsample, mode->preemph, st->preemph_memE+c, st->clip); } while (++c<CC); /* Find pitch period and gain */ { int enabled; int qg; - enabled = (st->lfe || nbAvailableBytes>12*C) && st->start==0 && !silence && !st->disable_pf + enabled = ((st->lfe&&nbAvailableBytes>3) || nbAvailableBytes>12*C) && st->start==0 && !silence && !st->disable_pf && st->complexity >= 5 && !(st->consec_transient && LM!=3 && st->variable_duration==OPUS_FRAMESIZE_VARIABLE); prefilter_tapset = st->tapset_decision; pf_on = run_prefilter(st, in, prefilter_mem, CC, N, prefilter_tapset, &pitch_index, &gain1, &qg, enabled, nbAvailableBytes); if ((gain1 > QCONST16(.4f,15) || st->prefilter_gain > QCONST16(.4f,15)) && (!st->analysis.valid || st->analysis.tonality > .3) && (pitch_index > 1.26*st->prefilter_period || pitch_index < .79*st->prefilter_period)) pitch_change = 1; if (pf_on==0) @@ -1521,53 +1551,99 @@ int celt_encode_with_ec(CELTEncoder * OP { for (i=2;i<st->end;i++) { bandE[i] = IMIN(bandE[i], MULT16_32_Q15(QCONST16(1e-4f,15),bandE[0])); bandE[i] = MAX32(bandE[i], EPSILON); } } amp2Log2(mode, effEnd, st->end, bandE, bandLogE, C); - if (st->energy_save) + + ALLOC(surround_dynalloc, C*nbEBands, opus_val16); + for(i=0;i<st->end;i++) + surround_dynalloc[i] = 0; + /* This computes how much masking takes place between surround channels */ + if (st->start==0&&st->energy_mask&&!st->lfe) { - opus_val16 offset = shortBlocks?HALF16(SHL16(LM, DB_SHIFT)):0; -#ifdef FIXED_POINT - /* Compensate for the 1/8 gain we apply in the fixed-point downshift to avoid overflows. */ - offset -= QCONST16(3.0f, DB_SHIFT); -#endif - for(i=0;i<C*nbEBands;i++) - st->energy_save[i]=bandLogE[i]-offset; - st->energy_save=NULL; - } - /* This computes how much masking takes place between surround channels */ - if (st->energy_mask&&!st->lfe) - { + int mask_end; + int midband; + int count_dynalloc; opus_val32 mask_avg=0; - opus_val16 offset = shortBlocks?HALF16(SHL16(LM, DB_SHIFT)):0; + opus_val32 diff=0; + int count=0; + mask_end = IMAX(2,st->lastCodedBands); for (c=0;c<C;c++) { - opus_val16 followE, followMask; - followE = followMask = -QCONST16(14.f, DB_SHIFT); - for(i=0;i<st->end;i++) + for(i=0;i<mask_end;i++) { - /* We use a simple follower to approximate the masking spreading function. */ - followE = MAX16(followE-QCONST16(1.f, DB_SHIFT), bandLogE[nbEBands*c+i]-offset); - followMask = MAX16(followMask-QCONST16(1.f, DB_SHIFT), st->energy_mask[nbEBands*c+i]); - mask_avg += followE-followMask; + opus_val16 mask; + mask = MAX16(MIN16(st->energy_mask[nbEBands*c+i], + QCONST16(.25f, DB_SHIFT)), -QCONST16(2.0f, DB_SHIFT)); + if (mask > 0) + mask = HALF16(mask); + mask_avg += MULT16_16(mask, eBands[i+1]-eBands[i]); + count += eBands[i+1]-eBands[i]; + diff += MULT16_16(mask, 1+2*i-mask_end); } } - surround_masking = DIV32_16(mask_avg,C*st->end) + QCONST16(.7f, DB_SHIFT); - surround_masking = MIN16(MAX16(surround_masking, -QCONST16(2.f, DB_SHIFT)), QCONST16(.2f, DB_SHIFT)); - surround_masking -= HALF16(HALF16(surround_masking)); + mask_avg = DIV32_16(mask_avg,count); + mask_avg += QCONST16(.2f, DB_SHIFT); + diff = diff*6/(C*(mask_end-1)*(mask_end+1)*mask_end); + /* Again, being conservative */ + diff = HALF32(diff); + diff = MAX32(MIN32(diff, QCONST32(.031f, DB_SHIFT)), -QCONST32(.031f, DB_SHIFT)); + /* Find the band that's in the middle of the coded spectrum */ + for (midband=0;eBands[midband+1] < eBands[mask_end]/2;midband++); + count_dynalloc=0; + for(i=0;i<mask_end;i++) + { + opus_val32 lin; + opus_val16 unmask; + lin = mask_avg + diff*(i-midband); + if (C==2) + unmask = MAX16(st->energy_mask[i], st->energy_mask[nbEBands+i]); + else + unmask = st->energy_mask[i]; + unmask = MIN16(unmask, QCONST16(.0f, DB_SHIFT)); + unmask -= lin; + if (unmask > QCONST16(.25f, DB_SHIFT)) + { + surround_dynalloc[i] = unmask - QCONST16(.25f, DB_SHIFT); + count_dynalloc++; + } + } + if (count_dynalloc>=3) + { + /* If we need dynalloc in many bands, it's probably because our + initial masking rate was too low. */ + mask_avg += QCONST16(.25f, DB_SHIFT); + if (mask_avg>0) + { + /* Something went really wrong in the original calculations, + disabling masking. */ + mask_avg = 0; + diff = 0; + for(i=0;i<mask_end;i++) + surround_dynalloc[i] = 0; + } else { + for(i=0;i<mask_end;i++) + surround_dynalloc[i] = MAX16(0, surround_dynalloc[i]-QCONST16(.25f, DB_SHIFT)); + } + } + mask_avg += QCONST16(.2f, DB_SHIFT); + /* Convert to 1/64th units used for the trim */ + surround_trim = 64*diff; + /*printf("%d %d ", mask_avg, surround_trim);*/ + surround_masking = mask_avg; } /* Temporal VBR (but not for LFE) */ if (!st->lfe) { opus_val16 follow=-QCONST16(10.0f,DB_SHIFT); - float frame_avg=0; + opus_val32 frame_avg=0; opus_val16 offset = shortBlocks?HALF16(SHL16(LM, DB_SHIFT)):0; for(i=st->start;i<st->end;i++) { follow = MAX16(follow-QCONST16(1.f, DB_SHIFT), bandLogE[i]-offset); if (C==2) follow = MAX16(follow, bandLogE[i+nbEBands]-offset); frame_avg += follow; } @@ -1655,40 +1731,43 @@ int celt_encode_with_ec(CELTEncoder * OP if (st->complexity == 0) st->spread_decision = SPREAD_NONE; else st->spread_decision = SPREAD_NORMAL; } else { /* Disable new spreading+tapset estimator until we can show it works better than the old one. So far it seems like spreading_decision() works best. */ - if (0&&st->analysis.valid) +#if 0 + if (st->analysis.valid) { static const opus_val16 spread_thresholds[3] = {-QCONST16(.6f, 15), -QCONST16(.2f, 15), -QCONST16(.07f, 15)}; static const opus_val16 spread_histeresis[3] = {QCONST16(.15f, 15), QCONST16(.07f, 15), QCONST16(.02f, 15)}; static const opus_val16 tapset_thresholds[2] = {QCONST16(.0f, 15), QCONST16(.15f, 15)}; static const opus_val16 tapset_histeresis[2] = {QCONST16(.1f, 15), QCONST16(.05f, 15)}; st->spread_decision = hysteresis_decision(-st->analysis.tonality, spread_thresholds, spread_histeresis, 3, st->spread_decision); st->tapset_decision = hysteresis_decision(st->analysis.tonality_slope, tapset_thresholds, tapset_histeresis, 2, st->tapset_decision); - } else { + } else +#endif + { st->spread_decision = spreading_decision(mode, X, &st->tonal_average, st->spread_decision, &st->hf_average, &st->tapset_decision, pf_on&&!shortBlocks, effEnd, C, M); } /*printf("%d %d\n", st->tapset_decision, st->spread_decision);*/ /*printf("%f %d %f %d\n\n", st->analysis.tonality, st->spread_decision, st->analysis.tonality_slope, st->tapset_decision);*/ } ec_enc_icdf(enc, st->spread_decision, spread_icdf, 5); } ALLOC(offsets, nbEBands, int); maxDepth = dynalloc_analysis(bandLogE, bandLogE2, nbEBands, st->start, st->end, C, offsets, st->lsb_depth, mode->logN, isTransient, st->vbr, st->constrained_vbr, - eBands, LM, effectiveBytes, &tot_boost, st->lfe); + eBands, LM, effectiveBytes, &tot_boost, st->lfe, surround_dynalloc); /* For LFE, everything interesting is in the first band */ if (st->lfe) offsets[0] = IMIN(8, effectiveBytes/3); ALLOC(cap, nbEBands, int); init_caps(mode,cap,LM,C); dynalloc_logp = 6; total_bits<<=BITRES; @@ -1722,46 +1801,39 @@ int celt_encode_with_ec(CELTEncoder * OP /* Making dynalloc more likely */ if (j) dynalloc_logp = IMAX(2, dynalloc_logp-1); offsets[i] = boost; } if (C==2) { - int effectiveRate; - static const opus_val16 intensity_thresholds[21]= /* 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 off*/ - { 16,21,23,25,27,29,31,33,35,38,42,46,50,54,58,63,68,75,84,102,130}; + { 1, 2, 3, 4, 5, 6, 7, 8,16,24,36,44,50,56,62,67,72,79,88,106,134}; static const opus_val16 intensity_histeresis[21]= - { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 4, 5, 6, 8, 12}; + { 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 4, 5, 6, 8, 8}; /* Always use MS for 2.5 ms frames until we can do a better analysis */ if (LM!=0) dual_stereo = stereo_analysis(mode, X, LM, N); - /* Account for coarse energy */ - effectiveRate = (8*effectiveBytes - 80)>>LM; - - /* effectiveRate in kb/s */ - effectiveRate = 2*effectiveRate/5; - - st->intensity = hysteresis_decision((opus_val16)effectiveRate, intensity_thresholds, intensity_histeresis, 21, st->intensity); + st->intensity = hysteresis_decision((opus_val16)equiv_rate/1000, + intensity_thresholds, intensity_histeresis, 21, st->intensity); st->intensity = IMIN(st->end,IMAX(st->start, st->intensity)); } alloc_trim = 5; if (tell+(6<<BITRES) <= total_bits - total_boost) { if (st->lfe) alloc_trim = 5; else alloc_trim = alloc_trim_analysis(mode, X, bandLogE, - st->end, LM, C, N, &st->analysis, &st->stereo_saving, tf_estimate, st->intensity); + st->end, LM, C, N, &st->analysis, &st->stereo_saving, tf_estimate, st->intensity, surround_trim); ec_enc_icdf(enc, alloc_trim, trim_icdf, 7); tell = ec_tell_frac(enc); } /* Variable bitrate */ if (vbr_rate>0) { opus_val16 alpha; @@ -1774,17 +1846,17 @@ int celt_encode_with_ec(CELTEncoder * OP /* Don't attempt to use more than 510 kb/s, even for frames smaller than 20 ms. The CELT allocator will just not be able to use more than that anyway. */ nbCompressedBytes = IMIN(nbCompressedBytes,1275>>(3-LM)); base_target = vbr_rate - ((40*C+20)<<BITRES); if (st->constrained_vbr) base_target += (st->vbr_offset>>lm_diff); - target = compute_vbr(mode, &st->analysis, base_target, LM, st->bitrate, + target = compute_vbr(mode, &st->analysis, base_target, LM, equiv_rate, st->lastCodedBands, C, st->intensity, st->constrained_vbr, st->stereo_saving, tot_boost, tf_estimate, pitch_change, maxDepth, st->variable_duration, st->lfe, st->energy_mask!=NULL, surround_masking, temporal_vbr); /* The current offset is removed from the target and the space used so far is added*/ target=target+tell; @@ -1854,27 +1926,27 @@ int celt_encode_with_ec(CELTEncoder * OP ALLOC(pulses, nbEBands, int); ALLOC(fine_priority, nbEBands, int); /* bits = packet size - where we are - safety*/ bits = (((opus_int32)nbCompressedBytes*8)<<BITRES) - ec_tell_frac(enc) - 1; anti_collapse_rsv = isTransient&&LM>=2&&bits>=((LM+2)<<BITRES) ? (1<<BITRES) : 0; bits -= anti_collapse_rsv; signalBandwidth = st->end-1; -#ifndef FIXED_POINT +#ifndef DISABLE_FLOAT_API if (st->analysis.valid) { int min_bandwidth; - if (st->bitrate < (opus_int32)32000*C) + if (equiv_rate < (opus_int32)32000*C) min_bandwidth = 13; - else if (st->bitrate < (opus_int32)48000*C) + else if (equiv_rate < (opus_int32)48000*C) min_bandwidth = 16; - else if (st->bitrate < (opus_int32)60000*C) + else if (equiv_rate < (opus_int32)60000*C) min_bandwidth = 18; - else if (st->bitrate < (opus_int32)80000*C) + else if (equiv_rate < (opus_int32)80000*C) min_bandwidth = 19; else min_bandwidth = 20; signalBandwidth = IMAX(st->analysis.bandwidth, min_bandwidth); } #endif if (st->lfe) signalBandwidth = 1; @@ -2256,22 +2328,16 @@ int opus_custom_encoder_ctl(CELTEncoder } break; case OPUS_SET_LFE_REQUEST: { opus_int32 value = va_arg(ap, opus_int32); st->lfe = value; } break; - case OPUS_SET_ENERGY_SAVE_REQUEST: - { - opus_val16 *value = va_arg(ap, opus_val16*); - st->energy_save=value; - } - break; case OPUS_SET_ENERGY_MASK_REQUEST: { opus_val16 *value = va_arg(ap, opus_val16*); st->energy_mask = value; } break; default: goto bad_request;
--- a/media/libopus/celt/celt_lpc.c +++ b/media/libopus/celt/celt_lpc.c @@ -221,17 +221,18 @@ void celt_iir(const opus_val32 *_x, } int _celt_autocorr( const opus_val16 *x, /* in: [0...n-1] samples x */ opus_val32 *ac, /* out: [0...lag-1] ac values */ const opus_val16 *window, int overlap, int lag, - int n + int n, + int arch ) { opus_val32 d; int i, k; int fastN=n-lag; int shift; const opus_val16 *xptr; VARDECL(opus_val16, xx); @@ -270,17 +271,17 @@ int _celt_autocorr( { for(i=0;i<n;i++) xx[i] = PSHR32(xptr[i], shift); xptr = xx; } else shift = 0; } #endif - celt_pitch_xcorr(xptr, xptr, ac, fastN, lag+1); + celt_pitch_xcorr(xptr, xptr, ac, fastN, lag+1, arch); for (k=0;k<=lag;k++) { for (i = k+fastN, d = 0; i < n; i++) d = MAC16_16(d, xptr[i], xptr[i-k]); ac[k] += d; } #ifdef FIXED_POINT shift = 2*shift;
--- a/media/libopus/celt/celt_lpc.h +++ b/media/libopus/celt/celt_lpc.h @@ -43,11 +43,12 @@ void celt_fir(const opus_val16 *x, void celt_iir(const opus_val32 *x, const opus_val16 *den, opus_val32 *y, int N, int ord, opus_val16 *mem); -int _celt_autocorr(const opus_val16 *x, opus_val32 *ac, const opus_val16 *window, int overlap, int lag, int n); +int _celt_autocorr(const opus_val16 *x, opus_val32 *ac, + const opus_val16 *window, int overlap, int lag, int n, int arch); #endif /* PLC_H */
--- a/media/libopus/celt/cpu_support.h +++ b/media/libopus/celt/cpu_support.h @@ -23,29 +23,32 @@ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef CPU_SUPPORT_H #define CPU_SUPPORT_H -#if defined(OPUS_HAVE_RTCD) && defined(ARMv4_ASM) +#include "opus_types.h" +#include "opus_defines.h" + +#if defined(OPUS_HAVE_RTCD) && defined(OPUS_ARM_ASM) #include "arm/armcpu.h" /* We currently support 4 ARM variants: * arch[0] -> ARMv4 * arch[1] -> ARMv5E * arch[2] -> ARMv6 * arch[3] -> NEON */ #define OPUS_ARCHMASK 3 #else #define OPUS_ARCHMASK 0 -static inline int opus_select_arch(void) +static OPUS_INLINE int opus_select_arch(void) { return 0; } #endif #endif
--- a/media/libopus/celt/cwrs.c +++ b/media/libopus/celt/cwrs.c @@ -405,25 +405,25 @@ static const opus_uint32 CELT_PVQ_U_DATA 3248227095U, /*N=13, K=13...16:*/ 251595969, 579168825, 1267854873, 2653649025U, /*N=14, K=14:*/ 1409933619 }; #if defined(CUSTOM_MODES) -const opus_uint32 *const CELT_PVQ_U_ROW[15]={ +static const opus_uint32 *const CELT_PVQ_U_ROW[15]={ CELT_PVQ_U_DATA+ 0,CELT_PVQ_U_DATA+ 208,CELT_PVQ_U_DATA+ 415, CELT_PVQ_U_DATA+ 621,CELT_PVQ_U_DATA+ 826,CELT_PVQ_U_DATA+1030, CELT_PVQ_U_DATA+1233,CELT_PVQ_U_DATA+1336,CELT_PVQ_U_DATA+1389, CELT_PVQ_U_DATA+1421,CELT_PVQ_U_DATA+1441,CELT_PVQ_U_DATA+1455, CELT_PVQ_U_DATA+1464,CELT_PVQ_U_DATA+1470,CELT_PVQ_U_DATA+1473 }; #else -const opus_uint32 *const CELT_PVQ_U_ROW[15]={ +static const opus_uint32 *const CELT_PVQ_U_ROW[15]={ CELT_PVQ_U_DATA+ 0,CELT_PVQ_U_DATA+ 176,CELT_PVQ_U_DATA+ 351, CELT_PVQ_U_DATA+ 525,CELT_PVQ_U_DATA+ 698,CELT_PVQ_U_DATA+ 870, CELT_PVQ_U_DATA+1041,CELT_PVQ_U_DATA+1131,CELT_PVQ_U_DATA+1178, CELT_PVQ_U_DATA+1207,CELT_PVQ_U_DATA+1226,CELT_PVQ_U_DATA+1240, CELT_PVQ_U_DATA+1248,CELT_PVQ_U_DATA+1254,CELT_PVQ_U_DATA+1257 }; #endif @@ -529,33 +529,33 @@ void decode_pulses(int *_y,int _n,int _k cwrsi(_n,_k,ec_dec_uint(_dec,CELT_PVQ_V(_n,_k)),_y); } #else /* SMALL_FOOTPRINT */ /*Computes the next row/column of any recurrence that obeys the relation u[i][j]=u[i-1][j]+u[i][j-1]+u[i-1][j-1]. _ui0 is the base case for the new row/column.*/ -static inline void unext(opus_uint32 *_ui,unsigned _len,opus_uint32 _ui0){ +static OPUS_INLINE void unext(opus_uint32 *_ui,unsigned _len,opus_uint32 _ui0){ opus_uint32 ui1; unsigned j; /*This do-while will overrun the array if we don't have storage for at least 2 values.*/ j=1; do { ui1=UADD32(UADD32(_ui[j],_ui[j-1]),_ui0); _ui[j-1]=_ui0; _ui0=ui1; } while (++j<_len); _ui[j-1]=_ui0; } /*Computes the previous row/column of any recurrence that obeys the relation u[i-1][j]=u[i][j]-u[i][j-1]-u[i-1][j-1]. _ui0 is the base case for the new row/column.*/ -static inline void uprev(opus_uint32 *_ui,unsigned _n,opus_uint32 _ui0){ +static OPUS_INLINE void uprev(opus_uint32 *_ui,unsigned _n,opus_uint32 _ui0){ opus_uint32 ui1; unsigned j; /*This do-while will overrun the array if we don't have storage for at least 2 values.*/ j=1; do { ui1=USUB32(USUB32(_ui[j],_ui[j-1]),_ui0); _ui[j-1]=_ui0; _ui0=ui1; @@ -612,26 +612,26 @@ static void cwrsi(int _n,int _k,opus_uin } while(++j<_n); } /*Returns the index of the given combination of K elements chosen from a set of size 1 with associated sign bits. _y: The vector of pulses, whose sum of absolute values is K. _k: Returns K.*/ -static inline opus_uint32 icwrs1(const int *_y,int *_k){ +static OPUS_INLINE opus_uint32 icwrs1(const int *_y,int *_k){ *_k=abs(_y[0]); return _y[0]<0; } /*Returns the index of the given combination of K elements chosen from a set of size _n with associated sign bits. _y: The vector of pulses, whose sum of absolute values must be _k. _nc: Returns V(_n,_k).*/ -static inline opus_uint32 icwrs(int _n,int _k,opus_uint32 *_nc,const int *_y, +static OPUS_INLINE opus_uint32 icwrs(int _n,int _k,opus_uint32 *_nc,const int *_y, opus_uint32 *_u){ opus_uint32 i; int j; int k; /*We can't unroll the first two iterations of the loop unless _n>=2.*/ celt_assert(_n>=2); _u[0]=0; for(k=1;k<=_k+1;k++)_u[k]=(k<<1)-1;
--- a/media/libopus/celt/ecintrin.h +++ b/media/libopus/celt/ecintrin.h @@ -28,17 +28,17 @@ /*Some common macros for potential platform-specific optimization.*/ #include "opus_types.h" #include <math.h> #include <limits.h> #include "arch.h" #if !defined(_ecintrin_H) # define _ecintrin_H (1) -/*Some specific platforms may have optimized intrinsic or inline assembly +/*Some specific platforms may have optimized intrinsic or OPUS_INLINE assembly versions of these functions which can substantially improve performance. We define macros for them to allow easy incorporation of these non-ANSI features.*/ /*Modern gcc (4.x) can compile the naive versions of min and max with cmov if given an appropriate architecture, but the branchless bit-twiddling versions are just as fast, and do not require any special target architecture. Earlier gcc versions (3.x) compiled both code to the same assembly
--- a/media/libopus/celt/entcode.h +++ b/media/libopus/celt/entcode.h @@ -21,16 +21,17 @@ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "opus_types.h" +#include "opus_defines.h" #if !defined(_entcode_H) # define _entcode_H (1) # include <limits.h> # include <stddef.h> # include "ecintrin.h" /*OPT: ec_window must be at least 32 bits, but if you have fast arithmetic on a @@ -78,35 +79,35 @@ struct ec_ctx{ In the encoder: the number of oustanding carry propagating symbols.*/ opus_uint32 ext; /*A buffered input/output symbol, awaiting carry propagation.*/ int rem; /*Nonzero if an error occurred.*/ int error; }; -static inline opus_uint32 ec_range_bytes(ec_ctx *_this){ +static OPUS_INLINE opus_uint32 ec_range_bytes(ec_ctx *_this){ return _this->offs; } -static inline unsigned char *ec_get_buffer(ec_ctx *_this){ +static OPUS_INLINE unsigned char *ec_get_buffer(ec_ctx *_this){ return _this->buf; } -static inline int ec_get_error(ec_ctx *_this){ +static OPUS_INLINE int ec_get_error(ec_ctx *_this){ return _this->error; } /*Returns the number of bits "used" by the encoded or decoded symbols so far. This same number can be computed in either the encoder or the decoder, and is suitable for making coding decisions. Return: The number of bits. This will always be slightly larger than the exact value (e.g., all rounding error is in the positive direction).*/ -static inline int ec_tell(ec_ctx *_this){ +static OPUS_INLINE int ec_tell(ec_ctx *_this){ return _this->nbits_total-EC_ILOG(_this->rng); } /*Returns the number of bits "used" by the encoded or decoded symbols so far. This same number can be computed in either the encoder or the decoder, and is suitable for making coding decisions. Return: The number of bits scaled by 2**BITRES. This will always be slightly larger than the exact value (e.g., all
--- a/media/libopus/celt/fixed_debug.h +++ b/media/libopus/celt/fixed_debug.h @@ -28,19 +28,19 @@ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef FIXED_DEBUG_H #define FIXED_DEBUG_H #include <stdio.h> +#include "opus_defines.h" #ifdef CELT_C -#include "opus_defines.h" OPUS_EXPORT opus_int64 celt_mips=0; #else extern opus_int64 celt_mips; #endif #define MULT16_16SU(a,b) ((opus_val32)(opus_val16)(a)*(opus_val32)(opus_uint16)(b)) #define MULT32_32_Q31(a,b) ADD32(ADD32(SHL32(MULT16_16(SHR32((a),16),SHR((b),16)),1), SHR32(MULT16_16SU(SHR32((a),16),((b)&0x0000ffff)),15)), SHR32(MULT16_16SU(SHR32((b),16),((a)&0x0000ffff)),15)) @@ -54,17 +54,17 @@ extern opus_int64 celt_mips; #define VERIFY_SHORT(x) ((x)<=32767&&(x)>=-32768) #define VERIFY_INT(x) ((x)<=2147483647LL&&(x)>=-2147483648LL) #define VERIFY_UINT(x) ((x)<=(2147483647LLU<<1)) #define SHR(a,b) SHR32(a,b) #define PSHR(a,b) PSHR32(a,b) -static inline short NEG16(int x) +static OPUS_INLINE short NEG16(int x) { int res; if (!VERIFY_SHORT(x)) { fprintf (stderr, "NEG16: input is not short: %d\n", (int)x); #ifdef FIXED_DEBUG_ASSERT celt_assert(0); #endif @@ -75,17 +75,17 @@ static inline short NEG16(int x) fprintf (stderr, "NEG16: output is not short: %d\n", (int)res); #ifdef FIXED_DEBUG_ASSERT celt_assert(0); #endif } celt_mips++; return res; } -static inline int NEG32(opus_int64 x) +static OPUS_INLINE int NEG32(opus_int64 x) { opus_int64 res; if (!VERIFY_INT(x)) { fprintf (stderr, "NEG16: input is not int: %d\n", (int)x); #ifdef FIXED_DEBUG_ASSERT celt_assert(0); #endif @@ -98,49 +98,49 @@ static inline int NEG32(opus_int64 x) celt_assert(0); #endif } celt_mips+=2; return res; } #define EXTRACT16(x) EXTRACT16_(x, __FILE__, __LINE__) -static inline short EXTRACT16_(int x, char *file, int line) +static OPUS_INLINE short EXTRACT16_(int x, char *file, int line) { int res; if (!VERIFY_SHORT(x)) { fprintf (stderr, "EXTRACT16: input is not short: %d in %s: line %d\n", x, file, line); #ifdef FIXED_DEBUG_ASSERT celt_assert(0); #endif } res = x; celt_mips++; return res; } #define EXTEND32(x) EXTEND32_(x, __FILE__, __LINE__) -static inline int EXTEND32_(int x, char *file, int line) +static OPUS_INLINE int EXTEND32_(int x, char *file, int line) { int res; if (!VERIFY_SHORT(x)) { fprintf (stderr, "EXTEND32: input is not short: %d in %s: line %d\n", x, file, line); #ifdef FIXED_DEBUG_ASSERT celt_assert(0); #endif } res = x; celt_mips++; return res; } #define SHR16(a, shift) SHR16_(a, shift, __FILE__, __LINE__) -static inline short SHR16_(int a, int shift, char *file, int line) +static OPUS_INLINE short SHR16_(int a, int shift, char *file, int line) { int res; if (!VERIFY_SHORT(a) || !VERIFY_SHORT(shift)) { fprintf (stderr, "SHR16: inputs are not short: %d >> %d in %s: line %d\n", a, shift, file, line); #ifdef FIXED_DEBUG_ASSERT celt_assert(0); #endif @@ -152,17 +152,17 @@ static inline short SHR16_(int a, int sh #ifdef FIXED_DEBUG_ASSERT celt_assert(0); #endif } celt_mips++; return res; } #define SHL16(a, shift) SHL16_(a, shift, __FILE__, __LINE__) -static inline short SHL16_(int a, int shift, char *file, int line) +static OPUS_INLINE short SHL16_(int a, int shift, char *file, int line) { int res; if (!VERIFY_SHORT(a) || !VERIFY_SHORT(shift)) { fprintf (stderr, "SHL16: inputs are not short: %d %d in %s: line %d\n", a, shift, file, line); #ifdef FIXED_DEBUG_ASSERT celt_assert(0); #endif @@ -174,17 +174,17 @@ static inline short SHL16_(int a, int sh #ifdef FIXED_DEBUG_ASSERT celt_assert(0); #endif } celt_mips++; return res; } -static inline int SHR32(opus_int64 a, int shift) +static OPUS_INLINE int SHR32(opus_int64 a, int shift) { opus_int64 res; if (!VERIFY_INT(a) || !VERIFY_SHORT(shift)) { fprintf (stderr, "SHR32: inputs are not int: %d %d\n", (int)a, shift); #ifdef FIXED_DEBUG_ASSERT celt_assert(0); #endif @@ -196,17 +196,17 @@ static inline int SHR32(opus_int64 a, in #ifdef FIXED_DEBUG_ASSERT celt_assert(0); #endif } celt_mips+=2; return res; } #define SHL32(a, shift) SHL32_(a, shift, __FILE__, __LINE__) -static inline int SHL32_(opus_int64 a, int shift, char *file, int line) +static OPUS_INLINE int SHL32_(opus_int64 a, int shift, char *file, int line) { opus_int64 res; if (!VERIFY_INT(a) || !VERIFY_SHORT(shift)) { fprintf (stderr, "SHL32: inputs are not int: %lld %d in %s: line %d\n", a, shift, file, line); #ifdef FIXED_DEBUG_ASSERT celt_assert(0); #endif @@ -229,17 +229,17 @@ static inline int SHL32_(opus_int64 a, i #define ROUND16(x,a) (celt_mips--,EXTRACT16(PSHR32((x),(a)))) #define HALF16(x) (SHR16(x,1)) #define HALF32(x) (SHR32(x,1)) //#define SHR(a,shift) ((a) >> (shift)) //#define SHL(a,shift) ((a) << (shift)) #define ADD16(a, b) ADD16_(a, b, __FILE__, __LINE__) -static inline short ADD16_(int a, int b, char *file, int line) +static OPUS_INLINE short ADD16_(int a, int b, char *file, int line) { int res; if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b)) { fprintf (stderr, "ADD16: inputs are not short: %d %d in %s: line %d\n", a, b, file, line); #ifdef FIXED_DEBUG_ASSERT celt_assert(0); #endif @@ -252,17 +252,17 @@ static inline short ADD16_(int a, int b, celt_assert(0); #endif } celt_mips++; return res; } #define SUB16(a, b) SUB16_(a, b, __FILE__, __LINE__) -static inline short SUB16_(int a, int b, char *file, int line) +static OPUS_INLINE short SUB16_(int a, int b, char *file, int line) { int res; if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b)) { fprintf (stderr, "SUB16: inputs are not short: %d %d in %s: line %d\n", a, b, file, line); #ifdef FIXED_DEBUG_ASSERT celt_assert(0); #endif @@ -275,17 +275,17 @@ static inline short SUB16_(int a, int b, celt_assert(0); #endif } celt_mips++; return res; } #define ADD32(a, b) ADD32_(a, b, __FILE__, __LINE__) -static inline int ADD32_(opus_int64 a, opus_int64 b, char *file, int line) +static OPUS_INLINE int ADD32_(opus_int64 a, opus_int64 b, char *file, int line) { opus_int64 res; if (!VERIFY_INT(a) || !VERIFY_INT(b)) { fprintf (stderr, "ADD32: inputs are not int: %d %d in %s: line %d\n", (int)a, (int)b, file, line); #ifdef FIXED_DEBUG_ASSERT celt_assert(0); #endif @@ -298,17 +298,17 @@ static inline int ADD32_(opus_int64 a, o celt_assert(0); #endif } celt_mips+=2; return res; } #define SUB32(a, b) SUB32_(a, b, __FILE__, __LINE__) -static inline int SUB32_(opus_int64 a, opus_int64 b, char *file, int line) +static OPUS_INLINE int SUB32_(opus_int64 a, opus_int64 b, char *file, int line) { opus_int64 res; if (!VERIFY_INT(a) || !VERIFY_INT(b)) { fprintf (stderr, "SUB32: inputs are not int: %d %d in %s: line %d\n", (int)a, (int)b, file, line); #ifdef FIXED_DEBUG_ASSERT celt_assert(0); #endif @@ -322,17 +322,17 @@ static inline int SUB32_(opus_int64 a, o #endif } celt_mips+=2; return res; } #undef UADD32 #define UADD32(a, b) UADD32_(a, b, __FILE__, __LINE__) -static inline unsigned int UADD32_(opus_uint64 a, opus_uint64 b, char *file, int line) +static OPUS_INLINE unsigned int UADD32_(opus_uint64 a, opus_uint64 b, char *file, int line) { opus_uint64 res; if (!VERIFY_UINT(a) || !VERIFY_UINT(b)) { fprintf (stderr, "UADD32: inputs are not uint32: %llu %llu in %s: line %d\n", a, b, file, line); #ifdef FIXED_DEBUG_ASSERT celt_assert(0); #endif @@ -346,17 +346,17 @@ static inline unsigned int UADD32_(opus_ #endif } celt_mips+=2; return res; } #undef USUB32 #define USUB32(a, b) USUB32_(a, b, __FILE__, __LINE__) -static inline unsigned int USUB32_(opus_uint64 a, opus_uint64 b, char *file, int line) +static OPUS_INLINE unsigned int USUB32_(opus_uint64 a, opus_uint64 b, char *file, int line) { opus_uint64 res; if (!VERIFY_UINT(a) || !VERIFY_UINT(b)) { fprintf (stderr, "USUB32: inputs are not uint32: %llu %llu in %s: line %d\n", a, b, file, line); #ifdef FIXED_DEBUG_ASSERT celt_assert(0); #endif @@ -376,17 +376,17 @@ static inline unsigned int USUB32_(opus_ celt_assert(0); #endif } celt_mips+=2; return res; } /* result fits in 16 bits */ -static inline short MULT16_16_16(int a, int b) +static OPUS_INLINE short MULT16_16_16(int a, int b) { int res; if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b)) { fprintf (stderr, "MULT16_16_16: inputs are not short: %d %d\n", a, b); #ifdef FIXED_DEBUG_ASSERT celt_assert(0); #endif @@ -399,17 +399,17 @@ static inline short MULT16_16_16(int a, celt_assert(0); #endif } celt_mips++; return res; } #define MULT16_16(a, b) MULT16_16_(a, b, __FILE__, __LINE__) -static inline int MULT16_16_(int a, int b, char *file, int line) +static OPUS_INLINE int MULT16_16_(int a, int b, char *file, int line) { opus_int64 res; if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b)) { fprintf (stderr, "MULT16_16: inputs are not short: %d %d in %s: line %d\n", a, b, file, line); #ifdef FIXED_DEBUG_ASSERT celt_assert(0); #endif @@ -424,17 +424,17 @@ static inline int MULT16_16_(int a, int } celt_mips++; return res; } #define MAC16_16(c,a,b) (celt_mips-=2,ADD32((c),MULT16_16((a),(b)))) #define MULT16_32_QX(a, b, Q) MULT16_32_QX_(a, b, Q, __FILE__, __LINE__) -static inline int MULT16_32_QX_(int a, opus_int64 b, int Q, char *file, int line) +static OPUS_INLINE int MULT16_32_QX_(int a, opus_int64 b, int Q, char *file, int line) { opus_int64 res; if (!VERIFY_SHORT(a) || !VERIFY_INT(b)) { fprintf (stderr, "MULT16_32_Q%d: inputs are not short+int: %d %d in %s: line %d\n", Q, (int)a, (int)b, file, line); #ifdef FIXED_DEBUG_ASSERT celt_assert(0); #endif @@ -457,17 +457,17 @@ static inline int MULT16_32_QX_(int a, o if (Q==15) celt_mips+=3; else celt_mips+=4; return res; } #define MULT16_32_PX(a, b, Q) MULT16_32_PX_(a, b, Q, __FILE__, __LINE__) -static inline int MULT16_32_PX_(int a, opus_int64 b, int Q, char *file, int line) +static OPUS_INLINE int MULT16_32_PX_(int a, opus_int64 b, int Q, char *file, int line) { opus_int64 res; if (!VERIFY_SHORT(a) || !VERIFY_INT(b)) { fprintf (stderr, "MULT16_32_P%d: inputs are not short+int: %d %d in %s: line %d\n\n", Q, (int)a, (int)b, file, line); #ifdef FIXED_DEBUG_ASSERT celt_assert(0); #endif @@ -492,27 +492,37 @@ static inline int MULT16_32_PX_(int a, o else celt_mips+=5; return res; } #define MULT16_32_Q15(a,b) MULT16_32_QX(a,b,15) #define MAC16_32_Q15(c,a,b) (celt_mips-=2,ADD32((c),MULT16_32_Q15((a),(b)))) -static inline int SATURATE(int a, int b) +static OPUS_INLINE int SATURATE(int a, int b) { if (a>b) a=b; if (a<-b) a = -b; celt_mips+=3; return a; } -static inline int MULT16_16_Q11_32(int a, int b) +static OPUS_INLINE opus_int16 SATURATE16(opus_int32 a) +{ + celt_mips+=3; + if (a>32767) + return 32767; + else if (a<-32768) + return -32768; + else return a; +} + +static OPUS_INLINE int MULT16_16_Q11_32(int a, int b) { opus_int64 res; if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b)) { fprintf (stderr, "MULT16_16_Q11: inputs are not short: %d %d\n", a, b); #ifdef FIXED_DEBUG_ASSERT celt_assert(0); #endif @@ -524,17 +534,17 @@ static inline int MULT16_16_Q11_32(int a fprintf (stderr, "MULT16_16_Q11: output is not short: %d*%d=%d\n", (int)a, (int)b, (int)res); #ifdef FIXED_DEBUG_ASSERT celt_assert(0); #endif } celt_mips+=3; return res; } -static inline short MULT16_16_Q13(int a, int b) +static OPUS_INLINE short MULT16_16_Q13(int a, int b) { opus_int64 res; if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b)) { fprintf (stderr, "MULT16_16_Q13: inputs are not short: %d %d\n", a, b); #ifdef FIXED_DEBUG_ASSERT celt_assert(0); #endif @@ -546,17 +556,17 @@ static inline short MULT16_16_Q13(int a, fprintf (stderr, "MULT16_16_Q13: output is not short: %d*%d=%d\n", a, b, (int)res); #ifdef FIXED_DEBUG_ASSERT celt_assert(0); #endif } celt_mips+=3; return res; } -static inline short MULT16_16_Q14(int a, int b) +static OPUS_INLINE short MULT16_16_Q14(int a, int b) { opus_int64 res; if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b)) { fprintf (stderr, "MULT16_16_Q14: inputs are not short: %d %d\n", a, b); #ifdef FIXED_DEBUG_ASSERT celt_assert(0); #endif @@ -570,17 +580,17 @@ static inline short MULT16_16_Q14(int a, celt_assert(0); #endif } celt_mips+=3; return res; } #define MULT16_16_Q15(a, b) MULT16_16_Q15_(a, b, __FILE__, __LINE__) -static inline short MULT16_16_Q15_(int a, int b, char *file, int line) +static OPUS_INLINE short MULT16_16_Q15_(int a, int b, char *file, int line) { opus_int64 res; if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b)) { fprintf (stderr, "MULT16_16_Q15: inputs are not short: %d %d in %s: line %d\n", a, b, file, line); #ifdef FIXED_DEBUG_ASSERT celt_assert(0); #endif @@ -593,17 +603,17 @@ static inline short MULT16_16_Q15_(int a #ifdef FIXED_DEBUG_ASSERT celt_assert(0); #endif } celt_mips+=1; return res; } -static inline short MULT16_16_P13(int a, int b) +static OPUS_INLINE short MULT16_16_P13(int a, int b) { opus_int64 res; if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b)) { fprintf (stderr, "MULT16_16_P13: inputs are not short: %d %d\n", a, b); #ifdef FIXED_DEBUG_ASSERT celt_assert(0); #endif @@ -623,17 +633,17 @@ static inline short MULT16_16_P13(int a, fprintf (stderr, "MULT16_16_P13: output is not short: %d*%d=%d\n", a, b, (int)res); #ifdef FIXED_DEBUG_ASSERT celt_assert(0); #endif } celt_mips+=4; return res; } -static inline short MULT16_16_P14(int a, int b) +static OPUS_INLINE short MULT16_16_P14(int a, int b) { opus_int64 res; if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b)) { fprintf (stderr, "MULT16_16_P14: inputs are not short: %d %d\n", a, b); #ifdef FIXED_DEBUG_ASSERT celt_assert(0); #endif @@ -653,17 +663,17 @@ static inline short MULT16_16_P14(int a, fprintf (stderr, "MULT16_16_P14: output is not short: %d*%d=%d\n", a, b, (int)res); #ifdef FIXED_DEBUG_ASSERT celt_assert(0); #endif } celt_mips+=4; return res; } -static inline short MULT16_16_P15(int a, int b) +static OPUS_INLINE short MULT16_16_P15(int a, int b) { opus_int64 res; if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b)) { fprintf (stderr, "MULT16_16_P15: inputs are not short: %d %d\n", a, b); #ifdef FIXED_DEBUG_ASSERT celt_assert(0); #endif @@ -686,17 +696,17 @@ static inline short MULT16_16_P15(int a, #endif } celt_mips+=2; return res; } #define DIV32_16(a, b) DIV32_16_(a, b, __FILE__, __LINE__) -static inline int DIV32_16_(opus_int64 a, opus_int64 b, char *file, int line) +static OPUS_INLINE int DIV32_16_(opus_int64 a, opus_int64 b, char *file, int line) { opus_int64 res; if (b==0) { fprintf(stderr, "DIV32_16: divide by zero: %d/%d in %s: line %d\n", (int)a, (int)b, file, line); #ifdef FIXED_DEBUG_ASSERT celt_assert(0); #endif @@ -721,17 +731,17 @@ static inline int DIV32_16_(opus_int64 a celt_assert(0); #endif } celt_mips+=35; return res; } #define DIV32(a, b) DIV32_(a, b, __FILE__, __LINE__) -static inline int DIV32_(opus_int64 a, opus_int64 b, char *file, int line) +static OPUS_INLINE int DIV32_(opus_int64 a, opus_int64 b, char *file, int line) { opus_int64 res; if (b==0) { fprintf(stderr, "DIV32: divide by zero: %d/%d in %s: line %d\n", (int)a, (int)b, file, line); #ifdef FIXED_DEBUG_ASSERT celt_assert(0); #endif
--- a/media/libopus/celt/fixed_generic.h +++ b/media/libopus/celt/fixed_generic.h @@ -35,17 +35,17 @@ /** Multiply a 16-bit signed value by a 16-bit unsigned value. The result is a 32-bit signed value */ #define MULT16_16SU(a,b) ((opus_val32)(opus_val16)(a)*(opus_val32)(opus_uint16)(b)) /** 16x32 multiplication, followed by a 16-bit shift right. Results fits in 32 bits */ #define MULT16_32_Q16(a,b) ADD32(MULT16_16((a),SHR((b),16)), SHR(MULT16_16SU((a),((b)&0x0000ffff)),16)) /** 16x32 multiplication, followed by a 16-bit shift right (round-to-nearest). Results fits in 32 bits */ -#define MULT16_32_P16(a,b) ADD32(MULT16_16((a),SHR((b),16)), PSHR(MULT16_16((a),((b)&0x0000ffff)),16)) +#define MULT16_32_P16(a,b) ADD32(MULT16_16((a),SHR((b),16)), PSHR(MULT16_16SU((a),((b)&0x0000ffff)),16)) /** 16x32 multiplication, followed by a 15-bit shift right. Results fits in 32 bits */ #define MULT16_32_Q15(a,b) ADD32(SHL(MULT16_16((a),SHR((b),16)),1), SHR(MULT16_16SU((a),((b)&0x0000ffff)),15)) /** 32x32 multiplication, followed by a 31-bit shift right. Results fits in 32 bits */ #define MULT32_32_Q31(a,b) ADD32(ADD32(SHL(MULT16_16(SHR((a),16),SHR((b),16)),1), SHR(MULT16_16SU(SHR((a),16),((b)&0x0000ffff)),15)), SHR(MULT16_16SU(SHR((b),16),((a)&0x0000ffff)),15)) /** Compile-time conversion of float constant to 16-bit value */ @@ -111,16 +111,17 @@ /** 16x16 multiply-add where the result fits in 32 bits */ #define MAC16_16(c,a,b) (ADD32((c),MULT16_16((a),(b)))) /** 16x32 multiply, followed by a 15-bit shift right and 32-bit add. b must fit in 31 bits. Result fits in 32 bits. */ #define MAC16_32_Q15(c,a,b) ADD32(c,ADD32(MULT16_16((a),SHR((b),15)), SHR(MULT16_16((a),((b)&0x00007fff)),15))) #define MULT16_16_Q11_32(a,b) (SHR(MULT16_16((a),(b)),11)) +#define MULT16_16_Q11(a,b) (SHR(MULT16_16((a),(b)),11)) #define MULT16_16_Q13(a,b) (SHR(MULT16_16((a),(b)),13)) #define MULT16_16_Q14(a,b) (SHR(MULT16_16((a),(b)),14)) #define MULT16_16_Q15(a,b) (SHR(MULT16_16((a),(b)),15)) #define MULT16_16_P13(a,b) (SHR(ADD32(4096,MULT16_16((a),(b))),13)) #define MULT16_16_P14(a,b) (SHR(ADD32(8192,MULT16_16((a),(b))),14)) #define MULT16_16_P15(a,b) (SHR(ADD32(16384,MULT16_16((a),(b))),15))
--- a/media/libopus/celt/float_cast.h +++ b/media/libopus/celt/float_cast.h @@ -96,17 +96,17 @@ __inline long int float2int(float value) { return _mm_cvtss_si32(_mm_load_ss(&value)); } #elif (defined(_MSC_VER) && _MSC_VER >= 1400) && (defined (WIN32) || defined (_WIN32)) #include <math.h> /* Win32 doesn't seem to have these functions. - ** Therefore implement inline versions of these functions here. + ** Therefore implement OPUS_INLINE versions of these functions here. */ __inline long int float2int (float flt) { int intgr; _asm { fld flt @@ -123,17 +123,17 @@ #warning "Don't have the functions lrint() and lrintf ()." #warning "Replacing these functions with a standard C cast." #endif /* __STDC_VERSION__ >= 199901L */ #include <math.h> #define float2int(flt) ((int)(floor(.5+flt))) #endif #ifndef DISABLE_FLOAT_API -static inline opus_int16 FLOAT2INT16(float x) +static OPUS_INLINE opus_int16 FLOAT2INT16(float x) { x = x*CELT_SIG_SCALE; x = MAX32(x, -32768); x = MIN32(x, 32767); return (opus_int16)float2int(x); } #endif /* DISABLE_FLOAT_API */
--- a/media/libopus/celt/mathops.c +++ b/media/libopus/celt/mathops.c @@ -134,17 +134,17 @@ opus_val32 celt_sqrt(opus_val32 x) return rt; } #define L1 32767 #define L2 -7651 #define L3 8277 #define L4 -626 -static inline opus_val16 _celt_cos_pi_2(opus_val16 x) +static OPUS_INLINE opus_val16 _celt_cos_pi_2(opus_val16 x) { opus_val16 x2; x2 = MULT16_16_P15(x,x); return ADD16(1,MIN16(32766,ADD32(SUB16(L1,x2), MULT16_16_P15(x2, ADD32(L2, MULT16_16_P15(x2, ADD32(L3, MULT16_16_P15(L4, x2 )))))))); }
--- a/media/libopus/celt/mathops.h +++ b/media/libopus/celt/mathops.h @@ -39,33 +39,33 @@ #include "os_support.h" /* Multiplies two 16-bit fractional values. Bit-exactness of this macro is important */ #define FRAC_MUL16(a,b) ((16384+((opus_int32)(opus_int16)(a)*(opus_int16)(b)))>>15) unsigned isqrt32(opus_uint32 _val); #ifndef OVERRIDE_CELT_MAXABS16 -static inline opus_val32 celt_maxabs16(const opus_val16 *x, int len) +static OPUS_INLINE opus_val32 celt_maxabs16(const opus_val16 *x, int len) { int i; opus_val16 maxval = 0; opus_val16 minval = 0; for (i=0;i<len;i++) { maxval = MAX16(maxval, x[i]); minval = MIN16(minval, x[i]); } return MAX32(EXTEND32(maxval),-EXTEND32(minval)); } #endif #ifndef OVERRIDE_CELT_MAXABS32 #ifdef FIXED_POINT -static inline opus_val32 celt_maxabs32(const opus_val32 *x, int len) +static OPUS_INLINE opus_val32 celt_maxabs32(const opus_val32 *x, int len) { int i; opus_val32 maxval = 0; opus_val32 minval = 0; for (i=0;i<len;i++) { maxval = MAX32(maxval, x[i]); minval = MIN32(minval, x[i]); @@ -90,17 +90,17 @@ static inline opus_val32 celt_maxabs32(c #define frac_div32(a,b) ((float)(a)/(b)) #ifdef FLOAT_APPROX /* Note: This assumes radix-2 floating point with the exponent at bits 23..30 and an offset of 127 denorm, +/- inf and NaN are *not* handled */ /** Base-2 log approximation (log2(x)). */ -static inline float celt_log2(float x) +static OPUS_INLINE float celt_log2(float x) { int integer; float frac; union { float f; opus_uint32 i; } in; in.f = x; @@ -108,17 +108,17 @@ static inline float celt_log2(float x) in.i -= integer<<23; frac = in.f - 1.5f; frac = -0.41445418f + frac*(0.95909232f + frac*(-0.33951290f + frac*0.16541097f)); return 1+integer+frac; } /** Base-2 exponential approximation (2^x). */ -static inline float celt_exp2(float x) +static OPUS_INLINE float celt_exp2(float x) { int integer; float frac; union { float f; opus_uint32 i; } res; integer = floor(x); @@ -140,37 +140,38 @@ static inline float celt_exp2(float x) #endif #ifdef FIXED_POINT #include "os_support.h" #ifndef OVERRIDE_CELT_ILOG2 /** Integer log in base2. Undefined for zero and negative numbers */ -static inline opus_int16 celt_ilog2(opus_int32 x) +static OPUS_INLINE opus_int16 celt_ilog2(opus_int32 x) { celt_assert2(x>0, "celt_ilog2() only defined for strictly positive numbers"); return EC_ILOG(x)-1; } #endif /** Integer log in base2. Defined for zero, but not for negative numbers */ -static inline opus_int16 celt_zlog2(opus_val32 x) +static OPUS_INLINE opus_int16 celt_zlog2(opus_val32 x) { return x <= 0 ? 0 : celt_ilog2(x); } opus_val16 celt_rsqrt_norm(opus_val32 x); opus_val32 celt_sqrt(opus_val32 x); opus_val16 celt_cos_norm(opus_val32 x); -static inline opus_val16 celt_log2(opus_val32 x) +/** Base-2 logarithm approximation (log2(x)). (Q14 input, Q10 output) */ +static OPUS_INLINE opus_val16 celt_log2(opus_val32 x) { int i; opus_val16 n, frac; /* -0.41509302963303146, 0.9609890551383969, -0.31836011537636605, 0.15530808010959576, -0.08556153059057618 */ static const opus_val16 C[5] = {-6801+(1<<(13-DB_SHIFT)), 15746, -5217, 2545, -1401}; if (x==0) return -32767; @@ -186,24 +187,24 @@ static inline opus_val16 celt_log2(opus_ K2 = 3-4*log(2) K3 = 3*log(2) - 2 */ #define D0 16383 #define D1 22804 #define D2 14819 #define D3 10204 -static inline opus_val32 celt_exp2_frac(opus_val16 x) +static OPUS_INLINE opus_val32 celt_exp2_frac(opus_val16 x) { opus_val16 frac; frac = SHL16(x, 4); return ADD16(D0, MULT16_16_Q15(frac, ADD16(D1, MULT16_16_Q15(frac, ADD16(D2 , MULT16_16_Q15(D3,frac)))))); } /** Base-2 exponential approximation (2^x). (Q10 input, Q16 output) */ -static inline opus_val32 celt_exp2(opus_val16 x) +static OPUS_INLINE opus_val32 celt_exp2(opus_val16 x) { int integer; opus_val16 frac; integer = SHR16(x,10); if (integer>14) return 0x7f000000; else if (integer < -15) return 0; @@ -219,28 +220,28 @@ opus_val32 frac_div32(opus_val32 a, opus #define M1 32767 #define M2 -21 #define M3 -11943 #define M4 4936 /* Atan approximation using a 4th order polynomial. Input is in Q15 format and normalized by pi/4. Output is in Q15 format */ -static inline opus_val16 celt_atan01(opus_val16 x) +static OPUS_INLINE opus_val16 celt_atan01(opus_val16 x) { return MULT16_16_P15(x, ADD32(M1, MULT16_16_P15(x, ADD32(M2, MULT16_16_P15(x, ADD32(M3, MULT16_16_P15(M4, x))))))); } #undef M1 #undef M2 #undef M3 #undef M4 /* atan2() approximation valid for positive input values */ -static inline opus_val16 celt_atan2p(opus_val16 y, opus_val16 x) +static OPUS_INLINE opus_val16 celt_atan2p(opus_val16 y, opus_val16 x) { if (y < x) { opus_val32 arg; arg = celt_div(SHL32(EXTEND32(y),15),x); if (arg >= 32767) arg = 32767; return SHR16(celt_atan01(EXTRACT16(arg)),1);
--- a/media/libopus/celt/os_support.h +++ b/media/libopus/celt/os_support.h @@ -30,40 +30,43 @@ #ifndef OS_SUPPORT_H #define OS_SUPPORT_H #ifdef CUSTOM_SUPPORT # include "custom_support.h" #endif +#include "opus_types.h" +#include "opus_defines.h" + #include <string.h> #include <stdio.h> #include <stdlib.h> /** Opus wrapper for malloc(). To do your own dynamic allocation, all you need to do is replace this function and opus_free */ #ifndef OVERRIDE_OPUS_ALLOC -static inline void *opus_alloc (size_t size) +static OPUS_INLINE void *opus_alloc (size_t size) { return malloc(size); } #endif /** Same as celt_alloc(), except that the area is only needed inside a CELT call (might cause problem with wideband though) */ #ifndef OVERRIDE_OPUS_ALLOC_SCRATCH -static inline void *opus_alloc_scratch (size_t size) +static OPUS_INLINE void *opus_alloc_scratch (size_t size) { /* Scratch space doesn't need to be cleared */ return opus_alloc(size); } #endif /** Opus wrapper for free(). To do your own dynamic allocation, all you need to do is replace this function and opus_alloc */ #ifndef OVERRIDE_OPUS_FREE -static inline void opus_free (void *ptr) +static OPUS_INLINE void opus_free (void *ptr) { free(ptr); } #endif /** Copy n bytes of memory from src to dst. The 0* term provides compile-time type checking */ #ifndef OVERRIDE_OPUS_COPY #define OPUS_COPY(dst, src, n) (memcpy((dst), (src), (n)*sizeof(*(dst)) + 0*((dst)-(src)) ))
--- a/media/libopus/celt/pitch.c +++ b/media/libopus/celt/pitch.c @@ -140,17 +140,17 @@ static void celt_fir5(const opus_val16 * mem[1]=mem1; mem[2]=mem2; mem[3]=mem3; mem[4]=mem4; } void pitch_downsample(celt_sig * OPUS_RESTRICT x[], opus_val16 * OPUS_RESTRICT x_lp, - int len, int C) + int len, int C, int arch) { int i; opus_val32 ac[5]; opus_val16 tmp=Q15ONE; opus_val16 lpc[4], mem[5]={0,0,0,0,0}; opus_val16 lpc2[5]; opus_val16 c1 = QCONST16(.8f,15); #ifdef FIXED_POINT @@ -175,17 +175,17 @@ void pitch_downsample(celt_sig * OPUS_RE if (C==2) { for (i=1;i<len>>1;i++) x_lp[i] += SHR32(HALF32(HALF32(x[1][(2*i-1)]+x[1][(2*i+1)])+x[1][2*i]), shift); x_lp[0] += SHR32(HALF32(HALF32(x[1][1])+x[1][0]), shift); } _celt_autocorr(x_lp, ac, NULL, 0, - 4, len>>1); + 4, len>>1, arch); /* Noise floor -40 dB */ #ifdef FIXED_POINT ac[0] += SHR32(ac[0],13); #else ac[0] *= 1.0001f; #endif /* Lag windowing */ @@ -245,19 +245,24 @@ celt_pitch_xcorr(opus_val16 *x, opus_val #else /* Unrolled version of the pitch correlation -- runs faster on x86 and ARM */ #ifdef FIXED_POINT opus_val32 #else void #endif -celt_pitch_xcorr(const opus_val16 *_x, const opus_val16 *_y, opus_val32 *xcorr, int len, int max_pitch) +celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y, opus_val32 *xcorr, int len, int max_pitch) { int i,j; + /*The EDSP version requires that max_pitch is at least 1, and that _x is + 32-bit aligned. + Since it's hard to put asserts in assembly, put them here.*/ + celt_assert(max_pitch>0); + celt_assert((((unsigned char *)_x-(unsigned char *)NULL)&3)==0); #ifdef FIXED_POINT opus_val32 maxcorr=1; #endif for (i=0;i<max_pitch-3;i+=4) { opus_val32 sum[4]={0,0,0,0}; xcorr_kernel(_x, _y+i, sum, len); xcorr[i]=sum[0]; @@ -284,17 +289,17 @@ celt_pitch_xcorr(const opus_val16 *_x, c } #ifdef FIXED_POINT return maxcorr; #endif } #endif void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTRICT y, - int len, int max_pitch, int *pitch) + int len, int max_pitch, int *pitch, int arch) { int i, j; int lag; int best_pitch[2]={0,0}; VARDECL(opus_val16, x_lp4); VARDECL(opus_val16, y_lp4); VARDECL(opus_val32, xcorr); #ifdef FIXED_POINT @@ -337,17 +342,17 @@ void pitch_search(const opus_val16 * OPU } #endif /* Coarse search with 4x decimation */ #ifdef FIXED_POINT maxcorr = #endif - celt_pitch_xcorr(x_lp4, y_lp4, xcorr, len>>2, max_pitch>>2); + celt_pitch_xcorr(x_lp4, y_lp4, xcorr, len>>2, max_pitch>>2, arch); find_best_pitch(xcorr, y_lp4, len>>2, max_pitch>>2, best_pitch #ifdef FIXED_POINT , 0, maxcorr #endif ); /* Finer search with 2x decimation */
--- a/media/libopus/celt/pitch.h +++ b/media/libopus/celt/pitch.h @@ -30,37 +30,43 @@ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef PITCH_H #define PITCH_H #include "modes.h" +#include "cpu_support.h" #if defined(__SSE__) && !defined(FIXED_POINT) #include "x86/pitch_sse.h" #endif +#if defined(OPUS_ARM_ASM) && defined(FIXED_POINT) +# include "arm/pitch_arm.h" +#endif + void pitch_downsample(celt_sig * OPUS_RESTRICT x[], opus_val16 * OPUS_RESTRICT x_lp, - int len, int C); + int len, int C, int arch); void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTRICT y, - int len, int max_pitch, int *pitch); + int len, int max_pitch, int *pitch, int arch); opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod, int N, int *T0, int prev_period, opus_val16 prev_gain); /* OPT: This is the kernel you really want to optimize. It gets used a lot by the prefilter and by the PLC. */ #ifndef OVERRIDE_XCORR_KERNEL -static inline void xcorr_kernel(const opus_val16 * x, const opus_val16 * y, opus_val32 sum[4], int len) +static OPUS_INLINE void xcorr_kernel(const opus_val16 * x, const opus_val16 * y, opus_val32 sum[4], int len) { int j; opus_val16 y_0, y_1, y_2, y_3; + celt_assert(len>=3); y_3=0; /* gcc doesn't realize that y_3 can't be used uninitialized */ y_0=*y++; y_1=*y++; y_2=*y++; for (j=0;j<len-3;j+=4) { opus_val16 tmp; tmp = *x++; @@ -114,17 +120,17 @@ static inline void xcorr_kernel(const op sum[1] = MAC16_16(sum[1],tmp,y_3); sum[2] = MAC16_16(sum[2],tmp,y_0); sum[3] = MAC16_16(sum[3],tmp,y_1); } } #endif /* OVERRIDE_XCORR_KERNEL */ #ifndef OVERRIDE_DUAL_INNER_PROD -static inline void dual_inner_prod(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02, +static OPUS_INLINE void dual_inner_prod(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02, int N, opus_val32 *xy1, opus_val32 *xy2) { int i; opus_val32 xy01=0; opus_val32 xy02=0; for (i=0;i<N;i++) { xy01 = MAC16_16(xy01, x[i], y01[i]); @@ -135,11 +141,33 @@ static inline void dual_inner_prod(const } #endif #ifdef FIXED_POINT opus_val32 #else void #endif -celt_pitch_xcorr(const opus_val16 *_x, const opus_val16 *_y, opus_val32 *xcorr, int len, int max_pitch); +celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y, + opus_val32 *xcorr, int len, int max_pitch); + +#if !defined(OVERRIDE_PITCH_XCORR) +/*Is run-time CPU detection enabled on this platform?*/ +# if defined(OPUS_HAVE_RTCD) +extern +# if defined(FIXED_POINT) +opus_val32 +# else +void +# endif +(*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *, + const opus_val16 *, opus_val32 *, int, int); + +# define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \ + ((*CELT_PITCH_XCORR_IMPL[(arch)&OPUS_ARCHMASK])(_x, _y, \ + xcorr, len, max_pitch)) +# else +# define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \ + ((void)(arch),celt_pitch_xcorr_c(_x, _y, xcorr, len, max_pitch)) +# endif +#endif #endif
--- a/media/libopus/celt/quant_bands.c +++ b/media/libopus/celt/quant_bands.c @@ -307,27 +307,31 @@ void quant_coarse_energy(const CELTMode if (!intra) { unsigned char *intra_buf; ec_enc enc_intra_state; opus_int32 tell_intra; opus_uint32 nstart_bytes; opus_uint32 nintra_bytes; + opus_uint32 save_bytes; int badness2; VARDECL(unsigned char, intra_bits); tell_intra = ec_tell_frac(enc); enc_intra_state = *enc; nstart_bytes = ec_range_bytes(&enc_start_state); nintra_bytes = ec_range_bytes(&enc_intra_state); intra_buf = ec_get_buffer(&enc_intra_state) + nstart_bytes; - ALLOC(intra_bits, nintra_bytes-nstart_bytes, unsigned char); + save_bytes = nintra_bytes-nstart_bytes; + if (save_bytes == 0) + save_bytes = ALLOC_NONE; + ALLOC(intra_bits, save_bytes, unsigned char); /* Copy bits from intra bit-stream */ OPUS_COPY(intra_bits, intra_buf, nintra_bytes - nstart_bytes); *enc = enc_start_state; badness2 = quant_coarse_energy_impl(m, start, end, eBands, oldEBands, budget, tell, e_prob_model[LM][intra], error, enc, C, LM, 0, max_decay, lfe);
--- a/media/libopus/celt/rate.c +++ b/media/libopus/celt/rate.c @@ -240,17 +240,17 @@ void compute_pulse_cache(CELTMode *m, in } } } #endif /* CUSTOM_MODES */ #define ALLOC_STEPS 6 -static inline int interp_bits2pulses(const CELTMode *m, int start, int end, int skip_start, +static OPUS_INLINE int interp_bits2pulses(const CELTMode *m, int start, int end, int skip_start, const int *bits1, const int *bits2, const int *thresh, const int *cap, opus_int32 total, opus_int32 *_balance, int skip_rsv, int *intensity, int intensity_rsv, int *dual_stereo, int dual_stereo_rsv, int *bits, int *ebits, int *fine_priority, int C, int LM, ec_ctx *ec, int encode, int prev, int signalBandwidth) { opus_int32 psum; int lo, hi; int i, j; int logM;
--- a/media/libopus/celt/rate.h +++ b/media/libopus/celt/rate.h @@ -40,22 +40,22 @@ #define QTHETA_OFFSET 4 #define QTHETA_OFFSET_TWOPHASE 16 #include "cwrs.h" #include "modes.h" void compute_pulse_cache(CELTMode *m, int LM); -static inline int get_pulses(int i) +static OPUS_INLINE int get_pulses(int i) { return i<8 ? i : (8 + (i&7)) << ((i>>3)-1); } -static inline int bits2pulses(const CELTMode *m, int band, int LM, int bits) +static OPUS_INLINE int bits2pulses(const CELTMode *m, int band, int LM, int bits) { int i; int lo, hi; const unsigned char *cache; LM++; cache = m->cache.bits + m->cache.index[LM*m->nbEBands+band]; @@ -72,17 +72,17 @@ static inline int bits2pulses(const CELT lo = mid; } if (bits- (lo == 0 ? -1 : (int)cache[lo]) <= (int)cache[hi]-bits) return lo; else return hi; } -static inline int pulses2bits(const CELTMode *m, int band, int LM, int pulses) +static OPUS_INLINE int pulses2bits(const CELTMode *m, int band, int LM, int pulses) { const unsigned char *cache; LM++; cache = m->cache.bits + m->cache.index[LM*m->nbEBands+band]; return pulses == 0 ? 0 : cache[pulses]+1; }
--- a/media/libopus/celt/stack_alloc.h +++ b/media/libopus/celt/stack_alloc.h @@ -27,16 +27,19 @@ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef STACK_ALLOC_H #define STACK_ALLOC_H +#include "opus_types.h" +#include "opus_defines.h" + #if (!defined (VAR_ARRAYS) && !defined (USE_ALLOCA) && !defined (NONTHREADSAFE_PSEUDOSTACK)) #error "Opus requires one of VAR_ARRAYS, USE_ALLOCA, or NONTHREADSAFE_PSEUDOSTACK be defined to select the temporary allocation mode." #endif #ifdef USE_ALLOCA # ifdef WIN32 # include <malloc.h> # else @@ -87,30 +90,33 @@ #if defined(VAR_ARRAYS) #define VARDECL(type, var) #define ALLOC(var, size, type) type var[size] #define SAVE_STACK #define RESTORE_STACK #define ALLOC_STACK +/* C99 does not allow VLAs of size zero */ +#define ALLOC_NONE 1 #elif defined(USE_ALLOCA) #define VARDECL(type, var) type *var # ifdef WIN32 # define ALLOC(var, size, type) var = ((type*)_alloca(sizeof(type)*(size))) # else # define ALLOC(var, size, type) var = ((type*)alloca(sizeof(type)*(size))) # endif #define SAVE_STACK #define RESTORE_STACK #define ALLOC_STACK +#define ALLOC_NONE 0 #else #ifdef CELT_C char *global_stack=0; #else extern char *global_stack; #endif /* CELT_C */ @@ -138,33 +144,34 @@ extern char *global_stack_top; #define ALLOC_STACK char *_saved_stack; (global_stack = (global_stack==0) ? opus_alloc_scratch(GLOBAL_STACK_SIZE) : global_stack); _saved_stack = global_stack; #endif /* ENABLE_VALGRIND */ #include "os_support.h" #define VARDECL(type, var) type *var #define ALLOC(var, size, type) var = PUSH(global_stack, size, type) #define SAVE_STACK char *_saved_stack = global_stack; +#define ALLOC_NONE 0 #endif /* VAR_ARRAYS */ #ifdef ENABLE_VALGRIND #include <valgrind/memcheck.h> #define OPUS_CHECK_ARRAY(ptr, len) VALGRIND_CHECK_MEM_IS_DEFINED(ptr, len*sizeof(*ptr)) #define OPUS_CHECK_VALUE(value) VALGRIND_CHECK_VALUE_IS_DEFINED(value) #define OPUS_CHECK_ARRAY_COND(ptr, len) VALGRIND_CHECK_MEM_IS_DEFINED(ptr, len*sizeof(*ptr)) #define OPUS_CHECK_VALUE_COND(value) VALGRIND_CHECK_VALUE_IS_DEFINED(value) #define OPUS_PRINT_INT(value) do {fprintf(stderr, #value " = %d at %s:%d\n", value, __FILE__, __LINE__);}while(0) #define OPUS_FPRINTF fprintf #else -static inline int _opus_false(void) {return 0;} +static OPUS_INLINE int _opus_false(void) {return 0;} #define OPUS_CHECK_ARRAY(ptr, len) _opus_false() #define OPUS_CHECK_VALUE(value) _opus_false() #define OPUS_PRINT_INT(value) do{}while(0) #define OPUS_FPRINTF (void) #endif
--- a/media/libopus/celt/x86/pitch_sse.h +++ b/media/libopus/celt/x86/pitch_sse.h @@ -31,17 +31,17 @@ #ifndef PITCH_SSE_H #define PITCH_SSE_H #include <xmmintrin.h> #include "arch.h" #define OVERRIDE_XCORR_KERNEL -static inline void xcorr_kernel(const opus_val16 *x, const opus_val16 *y, opus_val32 sum[4], int len) +static OPUS_INLINE void xcorr_kernel(const opus_val16 *x, const opus_val16 *y, opus_val32 sum[4], int len) { int j; __m128 xsum1, xsum2; xsum1 = _mm_loadu_ps(sum); xsum2 = _mm_setzero_ps(); for (j = 0; j < len-3; j += 4) { @@ -67,17 +67,17 @@ static inline void xcorr_kernel(const op xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_load1_ps(x+j),_mm_loadu_ps(y+j))); } } } _mm_storeu_ps(sum,_mm_add_ps(xsum1,xsum2)); } #define OVERRIDE_DUAL_INNER_PROD -static inline void dual_inner_prod(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02, +static OPUS_INLINE void dual_inner_prod(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02, int N, opus_val32 *xy1, opus_val32 *xy2) { int i; __m128 xsum1, xsum2; xsum1 = _mm_setzero_ps(); xsum2 = _mm_setzero_ps(); for (i=0;i<N-3;i+=4) { @@ -97,17 +97,17 @@ static inline void dual_inner_prod(const for (;i<N;i++) { *xy1 = MAC16_16(*xy1, x[i], y01[i]); *xy2 = MAC16_16(*xy2, x[i], y02[i]); } } #define OVERRIDE_COMB_FILTER_CONST -static inline void comb_filter_const(opus_val32 *y, opus_val32 *x, int T, int N, +static OPUS_INLINE void comb_filter_const(opus_val32 *y, opus_val32 *x, int T, int N, opus_val16 g10, opus_val16 g11, opus_val16 g12) { int i; __m128 x0v; __m128 g10v, g11v, g12v; g10v = _mm_load1_ps(&g10); g11v = _mm_load1_ps(&g11); g12v = _mm_load1_ps(&g12);
--- a/media/libopus/celt_sources.mk +++ b/media/libopus/celt_sources.mk @@ -13,9 +13,16 @@ celt/mdct.c \ celt/modes.c \ celt/pitch.c \ celt/celt_lpc.c \ celt/quant_bands.c \ celt/rate.c \ celt/vq.c CELT_SOURCES_ARM = \ -celt/arm/armcpu.c +celt/arm/armcpu.c \ +celt/arm/arm_celt_map.c + +CELT_SOURCES_ARM_ASM = \ +celt/arm/celt_pitch_xcorr_arm.s + +CELT_AM_SOURCES_ARM_ASM = \ +celt/arm/armopts.s.in
--- a/media/libopus/include/opus.h +++ b/media/libopus/include/opus.h @@ -906,15 +906,73 @@ OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int * sufficient, and possibly much smaller. * @returns The total size of the output packet on success, or an error code * on failure. * @retval #OPUS_BUFFER_TOO_SMALL \a maxlen was insufficient to contain the * complete output packet. */ OPUS_EXPORT OPUS_WARN_UNUSED_RESULT opus_int32 opus_repacketizer_out(OpusRepacketizer *rp, unsigned char *data, opus_int32 maxlen) OPUS_ARG_NONNULL(1); +/** Pads a given Opus packet to a larger size (possibly changing the TOC sequence). + * @param[in,out] data <tt>const unsigned char*</tt>: The buffer containing the + * packet to pad. + * @param len <tt>opus_int32</tt>: The size of the packet. + * This must be at least 1. + * @param new_len <tt>opus_int32</tt>: The desired size of the packet after padding. + * This must be at least as large as len. + * @returns an error code + * @retval #OPUS_OK \a on success. + * @retval #OPUS_BAD_ARG \a len was less than 1 or new_len was less than len. + * @retval #OPUS_INVALID_PACKET \a data did not contain a valid Opus packet. + */ +OPUS_EXPORT int opus_packet_pad(unsigned char *data, opus_int32 len, opus_int32 new_len); + +/** Remove all padding from a given Opus packet and rewrite the TOC sequence to + * minimize space usage. + * @param[in,out] data <tt>const unsigned char*</tt>: The buffer containing the + * packet to strip. + * @param len <tt>opus_int32</tt>: The size of the packet. + * This must be at least 1. + * @returns The new size of the output packet on success, or an error code + * on failure. + * @retval #OPUS_BAD_ARG \a len was less than 1. + * @retval #OPUS_INVALID_PACKET \a data did not contain a valid Opus packet. + */ +OPUS_EXPORT OPUS_WARN_UNUSED_RESULT opus_int32 opus_packet_unpad(unsigned char *data, opus_int32 len); + +/** Pads a given Opus multi-stream packet to a larger size (possibly changing the TOC sequence). + * @param[in,out] data <tt>const unsigned char*</tt>: The buffer containing the + * packet to pad. + * @param len <tt>opus_int32</tt>: The size of the packet. + * This must be at least 1. + * @param new_len <tt>opus_int32</tt>: The desired size of the packet after padding. + * This must be at least 1. + * @param nb_streams <tt>opus_int32</tt>: The number of streams (not channels) in the packet. + * This must be at least as large as len. + * @returns an error code + * @retval #OPUS_OK \a on success. + * @retval #OPUS_BAD_ARG \a len was less than 1. + * @retval #OPUS_INVALID_PACKET \a data did not contain a valid Opus packet. + */ +OPUS_EXPORT int opus_multistream_packet_pad(unsigned char *data, opus_int32 len, opus_int32 new_len, int nb_streams); + +/** Remove all padding from a given Opus multi-stream packet and rewrite the TOC sequence to + * minimize space usage. + * @param[in,out] data <tt>const unsigned char*</tt>: The buffer containing the + * packet to strip. + * @param len <tt>opus_int32</tt>: The size of the packet. + * This must be at least 1. + * @param nb_streams <tt>opus_int32</tt>: The number of streams (not channels) in the packet. + * This must be at least 1. + * @returns The new size of the output packet on success, or an error code + * on failure. + * @retval #OPUS_BAD_ARG \a len was less than 1 or new_len was less than len. + * @retval #OPUS_INVALID_PACKET \a data did not contain a valid Opus packet. + */ +OPUS_EXPORT OPUS_WARN_UNUSED_RESULT opus_int32 opus_multistream_packet_unpad(unsigned char *data, opus_int32 len, int nb_streams); + /**@}*/ #ifdef __cplusplus } #endif #endif /* OPUS_H */
--- a/media/libopus/include/opus_custom.h +++ b/media/libopus/include/opus_custom.h @@ -42,17 +42,17 @@ extern "C" { #endif #ifdef CUSTOM_MODES # define OPUS_CUSTOM_EXPORT OPUS_EXPORT # define OPUS_CUSTOM_EXPORT_STATIC OPUS_EXPORT #else # define OPUS_CUSTOM_EXPORT # ifdef OPUS_BUILD -# define OPUS_CUSTOM_EXPORT_STATIC static inline +# define OPUS_CUSTOM_EXPORT_STATIC static OPUS_INLINE # else # define OPUS_CUSTOM_EXPORT_STATIC # endif #endif /** @defgroup opus_custom Opus Custom * @{ * Opus Custom is an optional part of the Opus specification and @@ -135,34 +135,35 @@ OPUS_CUSTOM_EXPORT void opus_custom_mode * @param [in] channels <tt>int</tt>: Number of channels * @returns size */ OPUS_CUSTOM_EXPORT_STATIC OPUS_WARN_UNUSED_RESULT int opus_custom_encoder_get_size( const OpusCustomMode *mode, int channels ) OPUS_ARG_NONNULL(1); +# ifdef CUSTOM_MODES /** Initializes a previously allocated encoder state * The memory pointed to by st must be the size returned by opus_custom_encoder_get_size. * This is intended for applications which use their own allocator instead of malloc. * @see opus_custom_encoder_create(),opus_custom_encoder_get_size() * To reset a previously initialized state use the OPUS_RESET_STATE CTL. * @param [in] st <tt>OpusCustomEncoder*</tt>: Encoder state * @param [in] mode <tt>OpusCustomMode *</tt>: Contains all the information about the characteristics of * the stream (must be the same characteristics as used for the * decoder) * @param [in] channels <tt>int</tt>: Number of channels * @return OPUS_OK Success or @ref opus_errorcodes */ -OPUS_CUSTOM_EXPORT_STATIC int opus_custom_encoder_init( +OPUS_CUSTOM_EXPORT int opus_custom_encoder_init( OpusCustomEncoder *st, const OpusCustomMode *mode, int channels ) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(2); - +# endif #endif /** Creates a new encoder state. Each stream needs its own encoder * state (can't be shared across simultaneous streams). * @param [in] mode <tt>OpusCustomMode*</tt>: Contains all the information about the characteristics of * the stream (must be the same characteristics as used for the * decoder)
--- a/media/libopus/include/opus_defines.h +++ b/media/libopus/include/opus_defines.h @@ -93,16 +93,28 @@ extern "C" { # define OPUS_RESTRICT __restrict # else # define OPUS_RESTRICT # endif #else # define OPUS_RESTRICT restrict #endif +#if (!defined(__STDC_VERSION__) || (__STDC_VERSION__ < 199901L) ) +# if OPUS_GNUC_PREREQ(2,7) +# define OPUS_INLINE __inline__ +# elif (defined(_MSC_VER)) +# define OPUS_INLINE __inline +# else +# define OPUS_INLINE +# endif +#else +# define OPUS_INLINE inline +#endif + /**Warning attributes for opus functions * NONNULL is not used in OPUS_BUILD to avoid the compiler optimizing out * some paranoid null checks. */ #if defined(__GNUC__) && OPUS_GNUC_PREREQ(3, 4) # define OPUS_WARN_UNUSED_RESULT __attribute__ ((__warn_unused_result__)) #else # define OPUS_WARN_UNUSED_RESULT #endif @@ -146,16 +158,18 @@ extern "C" { #define OPUS_GET_PITCH_REQUEST 4033 #define OPUS_SET_GAIN_REQUEST 4034 #define OPUS_GET_GAIN_REQUEST 4045 /* Should have been 4035 */ #define OPUS_SET_LSB_DEPTH_REQUEST 4036 #define OPUS_GET_LSB_DEPTH_REQUEST 4037 #define OPUS_GET_LAST_PACKET_DURATION_REQUEST 4039 #define OPUS_SET_EXPERT_FRAME_DURATION_REQUEST 4040 #define OPUS_GET_EXPERT_FRAME_DURATION_REQUEST 4041 +#define OPUS_SET_PREDICTION_DISABLED_REQUEST 4042 +#define OPUS_GET_PREDICTION_DISABLED_REQUEST 4043 /* Don't use 4045, it's already taken by OPUS_GET_GAIN_REQUEST */ /* Macros to trigger compilation errors when the wrong types are provided to a CTL */ #define __opus_check_int(x) (((void)((x) == (opus_int32)0)), (opus_int32)(x)) #define __opus_check_int_ptr(ptr) ((ptr) + ((ptr) - (opus_int32*)(ptr))) #define __opus_check_uint_ptr(ptr) ((ptr) + ((ptr) - (opus_uint32*)(ptr))) #define __opus_check_val16_ptr(ptr) ((ptr) + ((ptr) - (opus_val16*)(ptr))) @@ -189,17 +203,16 @@ extern "C" { #define OPUS_FRAMESIZE_ARG 5000 /**< Select frame size from the argument (default) */ #define OPUS_FRAMESIZE_2_5_MS 5001 /**< Use 2.5 ms frames */ #define OPUS_FRAMESIZE_5_MS 5002 /**< Use 5 ms frames */ #define OPUS_FRAMESIZE_10_MS 5003 /**< Use 10 ms frames */ #define OPUS_FRAMESIZE_20_MS 5004 /**< Use 20 ms frames */ #define OPUS_FRAMESIZE_40_MS 5005 /**< Use 40 ms frames */ #define OPUS_FRAMESIZE_60_MS 5006 /**< Use 60 ms frames */ -#define OPUS_FRAMESIZE_VARIABLE 5010 /**< Optimize the frame size dynamically */ /**@}*/ /** @defgroup opus_encoderctls Encoder related CTLs * * These are convenience macros for use with the \c opus_encode_ctl * interface. They are used to generate the appropriate series of @@ -570,16 +583,24 @@ extern "C" { * <dt>OPUS_FRAMESIZE_20_MS</dt><dd>Use 20 ms frames.</dd> * <dt>OPUS_FRAMESIZE_40_MS</dt><dd>Use 40 ms frames.</dd> * <dt>OPUS_FRAMESIZE_60_MS</dt><dd>Use 60 ms frames.</dd> * <dt>OPUS_FRAMESIZE_VARIABLE</dt><dd>Optimize the frame size dynamically.</dd> * </dl> * @hideinitializer */ #define OPUS_GET_EXPERT_FRAME_DURATION(x) OPUS_GET_EXPERT_FRAME_DURATION_REQUEST, __opus_check_int_ptr(x) +/** If set to 1, disables almost all use of prediction, making frames almost + completely independent. This reduces quality. (default : 0) + * @hideinitializer */ +#define OPUS_SET_PREDICTION_DISABLED(x) OPUS_SET_PREDICTION_DISABLED_REQUEST, __opus_check_int(x) +/** Gets the encoder's configured prediction status. + * @hideinitializer */ +#define OPUS_GET_PREDICTION_DISABLED(x) OPUS_GET_PREDICTION_DISABLED_REQUEST, __opus_check_int_ptr(x) + /**@}*/ /** @defgroup opus_genericctls Generic CTLs * * These macros are used with the \c opus_decoder_ctl and * \c opus_encoder_ctl calls to generate a particular * request. *
--- a/media/libopus/moz.build +++ b/media/libopus/moz.build @@ -11,17 +11,17 @@ EXPORTS.opus += [ 'include/opus_types.h', ] MSVC_ENABLE_PGO = True FINAL_LIBRARY = 'gkmedias' DEFINES['OPUS_BUILD'] = True -DEFINES['OPUS_VERSION'] = '"v1.1-beta-23-gf2446c2-mozilla"' +DEFINES['OPUS_VERSION'] = '"v1.1-rc2-1-g35a44c6-mozilla"' DEFINES['USE_ALLOCA'] = True if CONFIG['OS_ARCH'] in ('Linux', 'Darwin', 'DragonFly', 'FreeBSD', 'NetBSD', 'OpenBSD'): DEFINES['HAVE_LRINTF'] = True if CONFIG['OS_ARCH'] == 'WINNT': DEFINES['inline'] = '__inline'
--- a/media/libopus/silk/A2NLSF.c +++ b/media/libopus/silk/A2NLSF.c @@ -39,50 +39,50 @@ POSSIBILITY OF SUCH DAMAGE. #include "tables.h" /* Number of binary divisions, when not in low complexity mode */ #define BIN_DIV_STEPS_A2NLSF_FIX 3 /* must be no higher than 16 - log2( LSF_COS_TAB_SZ_FIX ) */ #define MAX_ITERATIONS_A2NLSF_FIX 30 /* Helper function for A2NLSF(..) */ /* Transforms polynomials from cos(n*f) to cos(f)^n */ -static inline void silk_A2NLSF_trans_poly( +static OPUS_INLINE void silk_A2NLSF_trans_poly( opus_int32 *p, /* I/O Polynomial */ const opus_int dd /* I Polynomial order (= filter order / 2 ) */ ) { opus_int k, n; for( k = 2; k <= dd; k++ ) { for( n = dd; n > k; n-- ) { p[ n - 2 ] -= p[ n ]; } p[ k - 2 ] -= silk_LSHIFT( p[ k ], 1 ); } } /* Helper function for A2NLSF(..) */ /* Polynomial evaluation */ -static inline opus_int32 silk_A2NLSF_eval_poly( /* return the polynomial evaluation, in Q16 */ +static OPUS_INLINE opus_int32 silk_A2NLSF_eval_poly( /* return the polynomial evaluation, in Q16 */ opus_int32 *p, /* I Polynomial, Q16 */ const opus_int32 x, /* I Evaluation point, Q12 */ const opus_int dd /* I Order */ ) { opus_int n; opus_int32 x_Q16, y32; y32 = p[ dd ]; /* Q16 */ x_Q16 = silk_LSHIFT( x, 4 ); for( n = dd - 1; n >= 0; n-- ) { y32 = silk_SMLAWW( p[ n ], y32, x_Q16 ); /* Q16 */ } return y32; } -static inline void silk_A2NLSF_init( +static OPUS_INLINE void silk_A2NLSF_init( const opus_int32 *a_Q16, opus_int32 *P, opus_int32 *Q, const opus_int dd ) { opus_int k;
--- a/media/libopus/silk/API.h +++ b/media/libopus/silk/API.h @@ -59,16 +59,17 @@ opus_int silk_Get_Encoder_Size( opus_int *encSizeBytes /* O Number of bytes in SILK encoder state */ ); /*************************/ /* Init or reset encoder */ /*************************/ opus_int silk_InitEncoder( /* O Returns error code */ void *encState, /* I/O State */ + int arch, /* I Run-time architecture */ silk_EncControlStruct *encStatus /* O Encoder Status */ ); /**************************/ /* Encode frame with Silk */ /**************************/ /* Note: if prefillFlag is set, the input must contain 10 ms of audio, irrespective of what */ /* encControl->payloadSize_ms is set to */
--- a/media/libopus/silk/CNG.c +++ b/media/libopus/silk/CNG.c @@ -28,17 +28,17 @@ POSSIBILITY OF SUCH DAMAGE. #ifdef HAVE_CONFIG_H #include "config.h" #endif #include "main.h" #include "stack_alloc.h" /* Generates excitation for CNG LPC synthesis */ -static inline void silk_CNG_exc( +static OPUS_INLINE void silk_CNG_exc( opus_int32 residual_Q10[], /* O CNG residual signal Q10 */ opus_int32 exc_buf_Q14[], /* I Random samples buffer Q10 */ opus_int32 Gain_Q16, /* I Gain to apply */ opus_int length, /* I Length */ opus_int32 *rand_seed /* I/O Seed to random index generator */ ) { opus_int32 seed;
--- a/media/libopus/silk/Inlines.h +++ b/media/libopus/silk/Inlines.h @@ -21,59 +21,59 @@ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NO SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ***********************************************************************/ /*! \file silk_Inlines.h - * \brief silk_Inlines.h defines inline signal processing functions. + * \brief silk_Inlines.h defines OPUS_INLINE signal processing functions. */ #ifndef SILK_FIX_INLINES_H #define SILK_FIX_INLINES_H #ifdef __cplusplus extern "C" { #endif /* count leading zeros of opus_int64 */ -static inline opus_int32 silk_CLZ64( opus_int64 in ) +static OPUS_INLINE opus_int32 silk_CLZ64( opus_int64 in ) { opus_int32 in_upper; in_upper = (opus_int32)silk_RSHIFT64(in, 32); if (in_upper == 0) { /* Search in the lower 32 bits */ return 32 + silk_CLZ32( (opus_int32) in ); } else { /* Search in the upper 32 bits */ return silk_CLZ32( in_upper ); } } /* get number of leading zeros and fractional part (the bits right after the leading one */ -static inline void silk_CLZ_FRAC( +static OPUS_INLINE void silk_CLZ_FRAC( opus_int32 in, /* I input */ opus_int32 *lz, /* O number of leading zeros */ opus_int32 *frac_Q7 /* O the 7 bits right after the leading one */ ) { opus_int32 lzeros = silk_CLZ32(in); * lz = lzeros; * frac_Q7 = silk_ROR32(in, 24 - lzeros) & 0x7f; } /* Approximation of square root */ /* Accuracy: < +/- 10% for output values > 15 */ /* < +/- 2.5% for output values > 120 */ -static inline opus_int32 silk_SQRT_APPROX( opus_int32 x ) +static OPUS_INLINE opus_int32 silk_SQRT_APPROX( opus_int32 x ) { opus_int32 y, lz, frac_Q7; if( x <= 0 ) { return 0; } silk_CLZ_FRAC(x, &lz, &frac_Q7); @@ -89,17 +89,17 @@ static inline opus_int32 silk_SQRT_APPRO /* increment using fractional part of input */ y = silk_SMLAWB(y, y, silk_SMULBB(213, frac_Q7)); return y; } /* Divide two int32 values and return result as int32 in a given Q-domain */ -static inline opus_int32 silk_DIV32_varQ( /* O returns a good approximation of "(a32 << Qres) / b32" */ +static OPUS_INLINE opus_int32 silk_DIV32_varQ( /* O returns a good approximation of "(a32 << Qres) / b32" */ const opus_int32 a32, /* I numerator (Q0) */ const opus_int32 b32, /* I denominator (Q0) */ const opus_int Qres /* I Q-domain of result (>= 0) */ ) { opus_int a_headrm, b_headrm, lshift; opus_int32 b32_inv, a32_nrm, b32_nrm, result; @@ -135,17 +135,17 @@ static inline opus_int32 silk_DIV32_varQ } else { /* Avoid undefined result */ return 0; } } } /* Invert int32 value and return result as int32 in a given Q-domain */ -static inline opus_int32 silk_INVERSE32_varQ( /* O returns a good approximation of "(1 << Qres) / b32" */ +static OPUS_INLINE opus_int32 silk_INVERSE32_varQ( /* O returns a good approximation of "(1 << Qres) / b32" */ const opus_int32 b32, /* I denominator (Q0) */ const opus_int Qres /* I Q-domain of result (> 0) */ ) { opus_int b_headrm, lshift; opus_int32 b32_inv, b32_nrm, err_Q32, result; silk_assert( b32 != 0 );
--- a/media/libopus/silk/LP_variable_cutoff.c +++ b/media/libopus/silk/LP_variable_cutoff.c @@ -33,17 +33,17 @@ POSSIBILITY OF SUCH DAMAGE. Elliptic/Cauer filters designed with 0.1 dB passband ripple, 80 dB minimum stopband attenuation, and [0.95 : 0.15 : 0.35] normalized cut off frequencies. */ #include "main.h" /* Helper function, interpolates the filter taps */ -static inline void silk_LP_interpolate_filter_taps( +static OPUS_INLINE void silk_LP_interpolate_filter_taps( opus_int32 B_Q28[ TRANSITION_NB ], opus_int32 A_Q28[ TRANSITION_NA ], const opus_int ind, const opus_int32 fac_Q16 ) { opus_int nb, na;
--- a/media/libopus/silk/MacroCount.h +++ b/media/libopus/silk/MacroCount.h @@ -29,134 +29,134 @@ POSSIBILITY OF SUCH DAMAGE. #define SIGPROCFIX_API_MACROCOUNT_H #include <stdio.h> #ifdef silk_MACRO_COUNT #define varDefine opus_int64 ops_count = 0; extern opus_int64 ops_count; -static inline opus_int64 silk_SaveCount(){ +static OPUS_INLINE opus_int64 silk_SaveCount(){ return(ops_count); } -static inline opus_int64 silk_SaveResetCount(){ +static OPUS_INLINE opus_int64 silk_SaveResetCount(){ opus_int64 ret; ret = ops_count; ops_count = 0; return(ret); } -static inline silk_PrintCount(){ +static OPUS_INLINE silk_PrintCount(){ printf("ops_count = %d \n ", (opus_int32)ops_count); } #undef silk_MUL -static inline opus_int32 silk_MUL(opus_int32 a32, opus_int32 b32){ +static OPUS_INLINE opus_int32 silk_MUL(opus_int32 a32, opus_int32 b32){ opus_int32 ret; ops_count += 4; ret = a32 * b32; return ret; } #undef silk_MUL_uint -static inline opus_uint32 silk_MUL_uint(opus_uint32 a32, opus_uint32 b32){ +static OPUS_INLINE opus_uint32 silk_MUL_uint(opus_uint32 a32, opus_uint32 b32){ opus_uint32 ret; ops_count += 4; ret = a32 * b32; return ret; } #undef silk_MLA -static inline opus_int32 silk_MLA(opus_int32 a32, opus_int32 b32, opus_int32 c32){ +static OPUS_INLINE opus_int32 silk_MLA(opus_int32 a32, opus_int32 b32, opus_int32 c32){ opus_int32 ret; ops_count += 4; ret = a32 + b32 * c32; return ret; } #undef silk_MLA_uint -static inline opus_int32 silk_MLA_uint(opus_uint32 a32, opus_uint32 b32, opus_uint32 c32){ +static OPUS_INLINE opus_int32 silk_MLA_uint(opus_uint32 a32, opus_uint32 b32, opus_uint32 c32){ opus_uint32 ret; ops_count += 4; ret = a32 + b32 * c32; return ret; } #undef silk_SMULWB -static inline opus_int32 silk_SMULWB(opus_int32 a32, opus_int32 b32){ +static OPUS_INLINE opus_int32 silk_SMULWB(opus_int32 a32, opus_int32 b32){ opus_int32 ret; ops_count += 5; ret = (a32 >> 16) * (opus_int32)((opus_int16)b32) + (((a32 & 0x0000FFFF) * (opus_int32)((opus_int16)b32)) >> 16); return ret; } #undef silk_SMLAWB -static inline opus_int32 silk_SMLAWB(opus_int32 a32, opus_int32 b32, opus_int32 c32){ +static OPUS_INLINE opus_int32 silk_SMLAWB(opus_int32 a32, opus_int32 b32, opus_int32 c32){ opus_int32 ret; ops_count += 5; ret = ((a32) + ((((b32) >> 16) * (opus_int32)((opus_int16)(c32))) + ((((b32) & 0x0000FFFF) * (opus_int32)((opus_int16)(c32))) >> 16))); return ret; } #undef silk_SMULWT -static inline opus_int32 silk_SMULWT(opus_int32 a32, opus_int32 b32){ +static OPUS_INLINE opus_int32 silk_SMULWT(opus_int32 a32, opus_int32 b32){ opus_int32 ret; ops_count += 4; ret = (a32 >> 16) * (b32 >> 16) + (((a32 & 0x0000FFFF) * (b32 >> 16)) >> 16); return ret; } #undef silk_SMLAWT -static inline opus_int32 silk_SMLAWT(opus_int32 a32, opus_int32 b32, opus_int32 c32){ +static OPUS_INLINE opus_int32 silk_SMLAWT(opus_int32 a32, opus_int32 b32, opus_int32 c32){ opus_int32 ret; ops_count += 4; ret = a32 + ((b32 >> 16) * (c32 >> 16)) + (((b32 & 0x0000FFFF) * ((c32 >> 16)) >> 16)); return ret; } #undef silk_SMULBB -static inline opus_int32 silk_SMULBB(opus_int32 a32, opus_int32 b32){ +static OPUS_INLINE opus_int32 silk_SMULBB(opus_int32 a32, opus_int32 b32){ opus_int32 ret; ops_count += 1; ret = (opus_int32)((opus_int16)a32) * (opus_int32)((opus_int16)b32); return ret; } #undef silk_SMLABB -static inline opus_int32 silk_SMLABB(opus_int32 a32, opus_int32 b32, opus_int32 c32){ +static OPUS_INLINE opus_int32 silk_SMLABB(opus_int32 a32, opus_int32 b32, opus_int32 c32){ opus_int32 ret; ops_count += 1; ret = a32 + (opus_int32)((opus_int16)b32) * (opus_int32)((opus_int16)c32); return ret; } #undef silk_SMULBT -static inline opus_int32 silk_SMULBT(opus_int32 a32, opus_int32 b32 ){ +static OPUS_INLINE opus_int32 silk_SMULBT(opus_int32 a32, opus_int32 b32 ){ opus_int32 ret; ops_count += 4; ret = ((opus_int32)((opus_int16)a32)) * (b32 >> 16); return ret; } #undef silk_SMLABT -static inline opus_int32 silk_SMLABT(opus_int32 a32, opus_int32 b32, opus_int32 c32){ +static OPUS_INLINE opus_int32 silk_SMLABT(opus_int32 a32, opus_int32 b32, opus_int32 c32){ opus_int32 ret; ops_count += 1; ret = a32 + ((opus_int32)((opus_int16)b32)) * (c32 >> 16); return ret; } #undef silk_SMULTT -static inline opus_int32 silk_SMULTT(opus_int32 a32, opus_int32 b32){ +static OPUS_INLINE opus_int32 silk_SMULTT(opus_int32 a32, opus_int32 b32){ opus_int32 ret; ops_count += 1; ret = (a32 >> 16) * (b32 >> 16); return ret; } #undef silk_SMLATT -static inline opus_int32 silk_SMLATT(opus_int32 a32, opus_int32 b32, opus_int32 c32){ +static OPUS_INLINE opus_int32 silk_SMLATT(opus_int32 a32, opus_int32 b32, opus_int32 c32){ opus_int32 ret; ops_count += 1; ret = a32 + (b32 >> 16) * (c32 >> 16); return ret; } /* multiply-accumulate macros that allow overflow in the addition (ie, no asserts in debug mode)*/ @@ -174,40 +174,40 @@ static inline opus_int32 silk_SMLATT(opu #undef silk_SMLAWB_ovflw #define silk_SMLAWB_ovflw silk_SMLAWB #undef silk_SMLAWT_ovflw #define silk_SMLAWT_ovflw silk_SMLAWT #undef silk_SMULL -static inline opus_int64 silk_SMULL(opus_int32 a32, opus_int32 b32){ +static OPUS_INLINE opus_int64 silk_SMULL(opus_int32 a32, opus_int32 b32){ opus_int64 ret; ops_count += 8; ret = ((opus_int64)(a32) * /*(opus_int64)*/(b32)); return ret; } #undef silk_SMLAL -static inline opus_int64 silk_SMLAL(opus_int64 a64, opus_int32 b32, opus_int32 c32){ +static OPUS_INLINE opus_int64 silk_SMLAL(opus_int64 a64, opus_int32 b32, opus_int32 c32){ opus_int64 ret; ops_count += 8; ret = a64 + ((opus_int64)(b32) * /*(opus_int64)*/(c32)); return ret; } #undef silk_SMLALBB -static inline opus_int64 silk_SMLALBB(opus_int64 a64, opus_int16 b16, opus_int16 c16){ +static OPUS_INLINE opus_int64 silk_SMLALBB(opus_int64 a64, opus_int16 b16, opus_int16 c16){ opus_int64 ret; ops_count += 4; ret = a64 + ((opus_int64)(b16) * /*(opus_int64)*/(c16)); return ret; } #undef SigProcFIX_CLZ16 -static inline opus_int32 SigProcFIX_CLZ16(opus_int16 in16) +static OPUS_INLINE opus_int32 SigProcFIX_CLZ16(opus_int16 in16) { opus_int32 out32 = 0; ops_count += 10; if( in16 == 0 ) { return 16; } /* test nibbles */ if( in16 & 0xFF00 ) { @@ -235,252 +235,252 @@ static inline opus_int32 SigProcFIX_CLZ1 if( in16 & 0xE ) return out32 + 2; else return out32 + 3; } } #undef SigProcFIX_CLZ32 -static inline opus_int32 SigProcFIX_CLZ32(opus_int32 in32) +static OPUS_INLINE opus_int32 SigProcFIX_CLZ32(opus_int32 in32) { /* test highest 16 bits and convert to opus_int16 */ ops_count += 2; if( in32 & 0xFFFF0000 ) { return SigProcFIX_CLZ16((opus_int16)(in32 >> 16)); } else { return SigProcFIX_CLZ16((opus_int16)in32) + 16; } } #undef silk_DIV32 -static inline opus_int32 silk_DIV32(opus_int32 a32, opus_int32 b32){ +static OPUS_INLINE opus_int32 silk_DIV32(opus_int32 a32, opus_int32 b32){ ops_count += 64; return a32 / b32; } #undef silk_DIV32_16 -static inline opus_int32 silk_DIV32_16(opus_int32 a32, opus_int32 b32){ +static OPUS_INLINE opus_int32 silk_DIV32_16(opus_int32 a32, opus_int32 b32){ ops_count += 32; return a32 / b32; } #undef silk_SAT8 -static inline opus_int8 silk_SAT8(opus_int64 a){ +static OPUS_INLINE opus_int8 silk_SAT8(opus_int64 a){ opus_int8 tmp; ops_count += 1; tmp = (opus_int8)((a) > silk_int8_MAX ? silk_int8_MAX : \ ((a) < silk_int8_MIN ? silk_int8_MIN : (a))); return(tmp); } #undef silk_SAT16 -static inline opus_int16 silk_SAT16(opus_int64 a){ +static OPUS_INLINE opus_int16 silk_SAT16(opus_int64 a){ opus_int16 tmp; ops_count += 1; tmp = (opus_int16)((a) > silk_int16_MAX ? silk_int16_MAX : \ ((a) < silk_int16_MIN ? silk_int16_MIN : (a))); return(tmp); } #undef silk_SAT32 -static inline opus_int32 silk_SAT32(opus_int64 a){ +static OPUS_INLINE opus_int32 silk_SAT32(opus_int64 a){ opus_int32 tmp; ops_count += 1; tmp = (opus_int32)((a) > silk_int32_MAX ? silk_int32_MAX : \ ((a) < silk_int32_MIN ? silk_int32_MIN : (a))); return(tmp); } #undef silk_POS_SAT32 -static inline opus_int32 silk_POS_SAT32(opus_int64 a){ +static OPUS_INLINE opus_int32 silk_POS_SAT32(opus_int64 a){ opus_int32 tmp; ops_count += 1; tmp = (opus_int32)((a) > silk_int32_MAX ? silk_int32_MAX : (a)); return(tmp); } #undef silk_ADD_POS_SAT8 -static inline opus_int8 silk_ADD_POS_SAT8(opus_int64 a, opus_int64 b){ +static OPUS_INLINE opus_int8 silk_ADD_POS_SAT8(opus_int64 a, opus_int64 b){ opus_int8 tmp; ops_count += 1; tmp = (opus_int8)((((a)+(b)) & 0x80) ? silk_int8_MAX : ((a)+(b))); return(tmp); } #undef silk_ADD_POS_SAT16 -static inline opus_int16 silk_ADD_POS_SAT16(opus_int64 a, opus_int64 b){ +static OPUS_INLINE opus_int16 silk_ADD_POS_SAT16(opus_int64 a, opus_int64 b){ opus_int16 tmp; ops_count += 1; tmp = (opus_int16)((((a)+(b)) & 0x8000) ? silk_int16_MAX : ((a)+(b))); return(tmp); } #undef silk_ADD_POS_SAT32 -static inline opus_int32 silk_ADD_POS_SAT32(opus_int64 a, opus_int64 b){ +static OPUS_INLINE opus_int32 silk_ADD_POS_SAT32(opus_int64 a, opus_int64 b){ opus_int32 tmp; ops_count += 1; tmp = (opus_int32)((((a)+(b)) & 0x80000000) ? silk_int32_MAX : ((a)+(b))); return(tmp); } #undef silk_ADD_POS_SAT64 -static inline opus_int64 silk_ADD_POS_SAT64(opus_int64 a, opus_int64 b){ +static OPUS_INLINE opus_int64 silk_ADD_POS_SAT64(opus_int64 a, opus_int64 b){ opus_int64 tmp; ops_count += 1; tmp = ((((a)+(b)) & 0x8000000000000000LL) ? silk_int64_MAX : ((a)+(b))); return(tmp); } #undef silk_LSHIFT8 -static inline opus_int8 silk_LSHIFT8(opus_int8 a, opus_int32 shift){ +static OPUS_INLINE opus_int8 silk_LSHIFT8(opus_int8 a, opus_int32 shift){ opus_int8 ret; ops_count += 1; ret = a << shift; return ret; } #undef silk_LSHIFT16 -static inline opus_int16 silk_LSHIFT16(opus_int16 a, opus_int32 shift){ +static OPUS_INLINE opus_int16 silk_LSHIFT16(opus_int16 a, opus_int32 shift){ opus_int16 ret; ops_count += 1; ret = a << shift; return ret; } #undef silk_LSHIFT32 -static inline opus_int32 silk_LSHIFT32(opus_int32 a, opus_int32 shift){ +static OPUS_INLINE opus_int32 silk_LSHIFT32(opus_int32 a, opus_int32 shift){ opus_int32 ret; ops_count += 1; ret = a << shift; return ret; } #undef silk_LSHIFT64 -static inline opus_int64 silk_LSHIFT64(opus_int64 a, opus_int shift){ +static OPUS_INLINE opus_int64 silk_LSHIFT64(opus_int64 a, opus_int shift){ ops_count += 1; return a << shift; } #undef silk_LSHIFT_ovflw -static inline opus_int32 silk_LSHIFT_ovflw(opus_int32 a, opus_int32 shift){ +static OPUS_INLINE opus_int32 silk_LSHIFT_ovflw(opus_int32 a, opus_int32 shift){ ops_count += 1; return a << shift; } #undef silk_LSHIFT_uint -static inline opus_uint32 silk_LSHIFT_uint(opus_uint32 a, opus_int32 shift){ +static OPUS_INLINE opus_uint32 silk_LSHIFT_uint(opus_uint32 a, opus_int32 shift){ opus_uint32 ret; ops_count += 1; ret = a << shift; return ret; } #undef silk_RSHIFT8 -static inline opus_int8 silk_RSHIFT8(opus_int8 a, opus_int32 shift){ +static OPUS_INLINE opus_int8 silk_RSHIFT8(opus_int8 a, opus_int32 shift){ ops_count += 1; return a >> shift; } #undef silk_RSHIFT16 -static inline opus_int16 silk_RSHIFT16(opus_int16 a, opus_int32 shift){ +static OPUS_INLINE opus_int16 silk_RSHIFT16(opus_int16 a, opus_int32 shift){ ops_count += 1; return a >> shift; } #undef silk_RSHIFT32 -static inline opus_int32 silk_RSHIFT32(opus_int32 a, opus_int32 shift){ +static OPUS_INLINE opus_int32 silk_RSHIFT32(opus_int32 a, opus_int32 shift){ ops_count += 1; return a >> shift; } #undef silk_RSHIFT64 -static inline opus_int64 silk_RSHIFT64(opus_int64 a, opus_int64 shift){ +static OPUS_INLINE opus_int64 silk_RSHIFT64(opus_int64 a, opus_int64 shift){ ops_count += 1; return a >> shift; } #undef silk_RSHIFT_uint -static inline opus_uint32 silk_RSHIFT_uint(opus_uint32 a, opus_int32 shift){ +static OPUS_INLINE opus_uint32 silk_RSHIFT_uint(opus_uint32 a, opus_int32 shift){ ops_count += 1; return a >> shift; } #undef silk_ADD_LSHIFT -static inline opus_int32 silk_ADD_LSHIFT(opus_int32 a, opus_int32 b, opus_int32 shift){ +static OPUS_INLINE opus_int32 silk_ADD_LSHIFT(opus_int32 a, opus_int32 b, opus_int32 shift){ opus_int32 ret; ops_count += 1; ret = a + (b << shift); return ret; /* shift >= 0*/ } #undef silk_ADD_LSHIFT32 -static inline opus_int32 silk_ADD_LSHIFT32(opus_int32 a, opus_int32 b, opus_int32 shift){ +static OPUS_INLINE opus_int32 silk_ADD_LSHIFT32(opus_int32 a, opus_int32 b, opus_int32 shift){ opus_int32 ret; ops_count += 1; ret = a + (b << shift); return ret; /* shift >= 0*/ } #undef silk_ADD_LSHIFT_uint -static inline opus_uint32 silk_ADD_LSHIFT_uint(opus_uint32 a, opus_uint32 b, opus_int32 shift){ +static OPUS_INLINE opus_uint32 silk_ADD_LSHIFT_uint(opus_uint32 a, opus_uint32 b, opus_int32 shift){ opus_uint32 ret; ops_count += 1; ret = a + (b << shift); return ret; /* shift >= 0*/ } #undef silk_ADD_RSHIFT -static inline opus_int32 silk_ADD_RSHIFT(opus_int32 a, opus_int32 b, opus_int32 shift){ +static OPUS_INLINE opus_int32 silk_ADD_RSHIFT(opus_int32 a, opus_int32 b, opus_int32 shift){ opus_int32 ret; ops_count += 1; ret = a + (b >> shift); return ret; /* shift > 0*/ } #undef silk_ADD_RSHIFT32 -static inline opus_int32 silk_ADD_RSHIFT32(opus_int32 a, opus_int32 b, opus_int32 shift){ +static OPUS_INLINE opus_int32 silk_ADD_RSHIFT32(opus_int32 a, opus_int32 b, opus_int32 shift){ opus_int32 ret; ops_count += 1; ret = a + (b >> shift); return ret; /* shift > 0*/ } #undef silk_ADD_RSHIFT_uint -static inline opus_uint32 silk_ADD_RSHIFT_uint(opus_uint32 a, opus_uint32 b, opus_int32 shift){ +static OPUS_INLINE opus_uint32 silk_ADD_RSHIFT_uint(opus_uint32 a, opus_uint32 b, opus_int32 shift){ opus_uint32 ret; ops_count += 1; ret = a + (b >> shift); return ret; /* shift > 0*/ } #undef silk_SUB_LSHIFT32 -static inline opus_int32 silk_SUB_LSHIFT32(opus_int32 a, opus_int32 b, opus_int32 shift){ +static OPUS_INLINE opus_int32 silk_SUB_LSHIFT32(opus_int32 a, opus_int32 b, opus_int32 shift){ opus_int32 ret; ops_count += 1; ret = a - (b << shift); return ret; /* shift >= 0*/ } #undef silk_SUB_RSHIFT32 -static inline opus_int32 silk_SUB_RSHIFT32(opus_int32 a, opus_int32 b, opus_int32 shift){ +static OPUS_INLINE opus_int32 silk_SUB_RSHIFT32(opus_int32 a, opus_int32 b, opus_int32 shift){ opus_int32 ret; ops_count += 1; ret = a - (b >> shift); return ret; /* shift > 0*/ } #undef silk_RSHIFT_ROUND -static inline opus_int32 silk_RSHIFT_ROUND(opus_int32 a, opus_int32 shift){ +static OPUS_INLINE opus_int32 silk_RSHIFT_ROUND(opus_int32 a, opus_int32 shift){ opus_int32 ret; ops_count += 3; ret = shift == 1 ? (a >> 1) + (a & 1) : ((a >> (shift - 1)) + 1) >> 1; return ret; } #undef silk_RSHIFT_ROUND64 -static inline opus_int64 silk_RSHIFT_ROUND64(opus_int64 a, opus_int32 shift){ +static OPUS_INLINE opus_int64 silk_RSHIFT_ROUND64(opus_int64 a, opus_int32 shift){ opus_int64 ret; ops_count += 6; ret = shift == 1 ? (a >> 1) + (a & 1) : ((a >> (shift - 1)) + 1) >> 1; return ret; } #undef silk_abs_int64 -static inline opus_int64 silk_abs_int64(opus_int64 a){ +static OPUS_INLINE opus_int64 silk_abs_int64(opus_int64 a){ ops_count += 1; return (((a) > 0) ? (a) : -(a)); /* Be careful, silk_abs returns wrong when input equals to silk_intXX_MIN*/ } #undef silk_abs_int32 -static inline opus_int32 silk_abs_int32(opus_int32 a){ +static OPUS_INLINE opus_int32 silk_abs_int32(opus_int32 a){ ops_count += 1; return silk_abs(a); } #undef silk_min static silk_min(a, b){ ops_count += 1; @@ -493,218 +493,218 @@ static silk_max(a, b){ } #undef silk_sign static silk_sign(a){ ops_count += 1; return ((a) > 0 ? 1 : ( (a) < 0 ? -1 : 0 )); } #undef silk_ADD16 -static inline opus_int16 silk_ADD16(opus_int16 a, opus_int16 b){ +static OPUS_INLINE opus_int16 silk_ADD16(opus_int16 a, opus_int16 b){ opus_int16 ret; ops_count += 1; ret = a + b; return ret; } #undef silk_ADD32 -static inline opus_int32 silk_ADD32(opus_int32 a, opus_int32 b){ +static OPUS_INLINE opus_int32 silk_ADD32(opus_int32 a, opus_int32 b){ opus_int32 ret; ops_count += 1; ret = a + b; return ret; } #undef silk_ADD64 -static inline opus_int64 silk_ADD64(opus_int64 a, opus_int64 b){ +static OPUS_INLINE opus_int64 silk_ADD64(opus_int64 a, opus_int64 b){ opus_int64 ret; ops_count += 2; ret = a + b; return ret; } #undef silk_SUB16 -static inline opus_int16 silk_SUB16(opus_int16 a, opus_int16 b){ +static OPUS_INLINE opus_int16 silk_SUB16(opus_int16 a, opus_int16 b){ opus_int16 ret; ops_count += 1; ret = a - b; return ret; } #undef silk_SUB32 -static inline opus_int32 silk_SUB32(opus_int32 a, opus_int32 b){ +static OPUS_INLINE opus_int32 silk_SUB32(opus_int32 a, opus_int32 b){ opus_int32 ret; ops_count += 1; ret = a - b; return ret; } #undef silk_SUB64 -static inline opus_int64 silk_SUB64(opus_int64 a, opus_int64 b){ +static OPUS_INLINE opus_int64 silk_SUB64(opus_int64 a, opus_int64 b){ opus_int64 ret; ops_count += 2; ret = a - b; return ret; } #undef silk_ADD_SAT16 -static inline opus_int16 silk_ADD_SAT16( opus_int16 a16, opus_int16 b16 ) { +static OPUS_INLINE opus_int16 silk_ADD_SAT16( opus_int16 a16, opus_int16 b16 ) { opus_int16 res; /* Nb will be counted in AKP_add32 and silk_SAT16*/ res = (opus_int16)silk_SAT16( silk_ADD32( (opus_int32)(a16), (b16) ) ); return res; } #undef silk_ADD_SAT32 -static inline opus_int32 silk_ADD_SAT32(opus_int32 a32, opus_int32 b32){ +static OPUS_INLINE opus_int32 silk_ADD_SAT32(opus_int32 a32, opus_int32 b32){ opus_int32 res; ops_count += 1; res = ((((a32) + (b32)) & 0x80000000) == 0 ? \ ((((a32) & (b32)) & 0x80000000) != 0 ? silk_int32_MIN : (a32)+(b32)) : \ ((((a32) | (b32)) & 0x80000000) == 0 ? silk_int32_MAX : (a32)+(b32)) ); return res; } #undef silk_ADD_SAT64 -static inline opus_int64 silk_ADD_SAT64( opus_int64 a64, opus_int64 b64 ) { +static OPUS_INLINE opus_int64 silk_ADD_SAT64( opus_int64 a64, opus_int64 b64 ) { opus_int64 res; ops_count += 1; res = ((((a64) + (b64)) & 0x8000000000000000LL) == 0 ? \ ((((a64) & (b64)) & 0x8000000000000000LL) != 0 ? silk_int64_MIN : (a64)+(b64)) : \ ((((a64) | (b64)) & 0x8000000000000000LL) == 0 ? silk_int64_MAX : (a64)+(b64)) ); return res; } #undef silk_SUB_SAT16 -static inline opus_int16 silk_SUB_SAT16( opus_int16 a16, opus_int16 b16 ) { +static OPUS_INLINE opus_int16 silk_SUB_SAT16( opus_int16 a16, opus_int16 b16 ) { opus_int16 res; silk_assert(0); /* Nb will be counted in sub-macros*/ res = (opus_int16)silk_SAT16( silk_SUB32( (opus_int32)(a16), (b16) ) ); return res; } #undef silk_SUB_SAT32 -static inline opus_int32 silk_SUB_SAT32( opus_int32 a32, opus_int32 b32 ) { +static OPUS_INLINE opus_int32 silk_SUB_SAT32( opus_int32 a32, opus_int32 b32 ) { opus_int32 res; ops_count += 1; res = ((((a32)-(b32)) & 0x80000000) == 0 ? \ (( (a32) & ((b32)^0x80000000) & 0x80000000) ? silk_int32_MIN : (a32)-(b32)) : \ ((((a32)^0x80000000) & (b32) & 0x80000000) ? silk_int32_MAX : (a32)-(b32)) ); return res; } #undef silk_SUB_SAT64 -static inline opus_int64 silk_SUB_SAT64( opus_int64 a64, opus_int64 b64 ) { +static OPUS_INLINE opus_int64 silk_SUB_SAT64( opus_int64 a64, opus_int64 b64 ) { opus_int64 res; ops_count += 1; res = ((((a64)-(b64)) & 0x8000000000000000LL) == 0 ? \ (( (a64) & ((b64)^0x8000000000000000LL) & 0x8000000000000000LL) ? silk_int64_MIN : (a64)-(b64)) : \ ((((a64)^0x8000000000000000LL) & (b64) & 0x8000000000000000LL) ? silk_int64_MAX : (a64)-(b64)) ); return res; } #undef silk_SMULWW -static inline opus_int32 silk_SMULWW(opus_int32 a32, opus_int32 b32){ +static OPUS_INLINE opus_int32 silk_SMULWW(opus_int32 a32, opus_int32 b32){ opus_int32 ret; /* Nb will be counted in sub-macros*/ ret = silk_MLA(silk_SMULWB((a32), (b32)), (a32), silk_RSHIFT_ROUND((b32), 16)); return ret; } #undef silk_SMLAWW -static inline opus_int32 silk_SMLAWW(opus_int32 a32, opus_int32 b32, opus_int32 c32){ +static OPUS_INLINE opus_int32 silk_SMLAWW(opus_int32 a32, opus_int32 b32, opus_int32 c32){ opus_int32 ret; /* Nb will be counted in sub-macros*/ ret = silk_MLA(silk_SMLAWB((a32), (b32), (c32)), (b32), silk_RSHIFT_ROUND((c32), 16)); return ret; } #undef silk_min_int -static inline opus_int silk_min_int(opus_int a, opus_int b) +static OPUS_INLINE opus_int silk_min_int(opus_int a, opus_int b) { ops_count += 1; return (((a) < (b)) ? (a) : (b)); } #undef silk_min_16 -static inline opus_int16 silk_min_16(opus_int16 a, opus_int16 b) +static OPUS_INLINE opus_int16 silk_min_16(opus_int16 a, opus_int16 b) { ops_count += 1; return (((a) < (b)) ? (a) : (b)); } #undef silk_min_32 -static inline opus_int32 silk_min_32(opus_int32 a, opus_int32 b) +static OPUS_INLINE opus_int32 silk_min_32(opus_int32 a, opus_int32 b) { ops_count += 1; return (((a) < (b)) ? (a) : (b)); } #undef silk_min_64 -static inline opus_int64 silk_min_64(opus_int64 a, opus_int64 b) +static OPUS_INLINE opus_int64 silk_min_64(opus_int64 a, opus_int64 b) { ops_count += 1; return (((a) < (b)) ? (a) : (b)); } /* silk_min() versions with typecast in the function call */ #undef silk_max_int -static inline opus_int silk_max_int(opus_int a, opus_int b) +static OPUS_INLINE opus_int silk_max_int(opus_int a, opus_int b) { ops_count += 1; return (((a) > (b)) ? (a) : (b)); } #undef silk_max_16 -static inline opus_int16 silk_max_16(opus_int16 a, opus_int16 b) +static OPUS_INLINE opus_int16 silk_max_16(opus_int16 a, opus_int16 b) { ops_count += 1; return (((a) > (b)) ? (a) : (b)); } #undef silk_max_32 -static inline opus_int32 silk_max_32(opus_int32 a, opus_int32 b) +static OPUS_INLINE opus_int32 silk_max_32(opus_int32 a, opus_int32 b) { ops_count += 1; return (((a) > (b)) ? (a) : (b)); } #undef silk_max_64 -static inline opus_int64 silk_max_64(opus_int64 a, opus_int64 b) +static OPUS_INLINE opus_int64 silk_max_64(opus_int64 a, opus_int64 b) { ops_count += 1; return (((a) > (b)) ? (a) : (b)); } #undef silk_LIMIT_int -static inline opus_int silk_LIMIT_int(opus_int a, opus_int limit1, opus_int limit2) +static OPUS_INLINE opus_int silk_LIMIT_int(opus_int a, opus_int limit1, opus_int limit2) { opus_int ret; ops_count += 6; ret = ((limit1) > (limit2) ? ((a) > (limit1) ? (limit1) : ((a) < (limit2) ? (limit2) : (a))) \ : ((a) > (limit2) ? (limit2) : ((a) < (limit1) ? (limit1) : (a)))); return(ret); } #undef silk_LIMIT_16 -static inline opus_int16 silk_LIMIT_16(opus_int16 a, opus_int16 limit1, opus_int16 limit2) +static OPUS_INLINE opus_int16 silk_LIMIT_16(opus_int16 a, opus_int16 limit1, opus_int16 limit2) { opus_int16 ret; ops_count += 6; ret = ((limit1) > (limit2) ? ((a) > (limit1) ? (limit1) : ((a) < (limit2) ? (limit2) : (a))) \ : ((a) > (limit2) ? (limit2) : ((a) < (limit1) ? (limit1) : (a)))); return(ret); } #undef silk_LIMIT_32 -static inline opus_int silk_LIMIT_32(opus_int32 a, opus_int32 limit1, opus_int32 limit2) +static OPUS_INLINE opus_int silk_LIMIT_32(opus_int32 a, opus_int32 limit1, opus_int32 limit2) { opus_int32 ret; ops_count += 6; ret = ((limit1) > (limit2) ? ((a) > (limit1) ? (limit1) : ((a) < (limit2) ? (limit2) : (a))) \ : ((a) > (limit2) ? (limit2) : ((a) < (limit1) ? (limit1) : (a)))); return(ret); }
--- a/media/libopus/silk/MacroDebug.h +++ b/media/libopus/silk/MacroDebug.h @@ -31,145 +31,145 @@ POSSIBILITY OF SUCH DAMAGE. /* Redefine macro functions with extensive assertion in DEBUG mode. As functions can't be undefined, this file can't work with SigProcFIX_MacroCount.h */ #if ( defined (FIXED_DEBUG) || ( 0 && defined (_DEBUG) ) ) && !defined (silk_MACRO_COUNT) #undef silk_ADD16 #define silk_ADD16(a,b) silk_ADD16_((a), (b), __FILE__, __LINE__) -static inline opus_int16 silk_ADD16_(opus_int16 a, opus_int16 b, char *file, int line){ +static OPUS_INLINE opus_int16 silk_ADD16_(opus_int16 a, opus_int16 b, char *file, int line){ opus_int16 ret; ret = a + b; if ( ret != silk_ADD_SAT16( a, b ) ) { fprintf (stderr, "silk_ADD16(%d, %d) in %s: line %d\n", a, b, file, line); #ifdef FIXED_DEBUG_ASSERT silk_assert( 0 ); #endif } return ret; } #undef silk_ADD32 #define silk_ADD32(a,b) silk_ADD32_((a), (b), __FILE__, __LINE__) -static inline opus_int32 silk_ADD32_(opus_int32 a, opus_int32 b, char *file, int line){ +static OPUS_INLINE opus_int32 silk_ADD32_(opus_int32 a, opus_int32 b, char *file, int line){ opus_int32 ret; ret = a + b; if ( ret != silk_ADD_SAT32( a, b ) ) { fprintf (stderr, "silk_ADD32(%d, %d) in %s: line %d\n", a, b, file, line); #ifdef FIXED_DEBUG_ASSERT silk_assert( 0 ); #endif } return ret; } #undef silk_ADD64 #define silk_ADD64(a,b) silk_ADD64_((a), (b), __FILE__, __LINE__) -static inline opus_int64 silk_ADD64_(opus_int64 a, opus_int64 b, char *file, int line){ +static OPUS_INLINE opus_int64 silk_ADD64_(opus_int64 a, opus_int64 b, char *file, int line){ opus_int64 ret; ret = a + b; if ( ret != silk_ADD_SAT64( a, b ) ) { fprintf (stderr, "silk_ADD64(%lld, %lld) in %s: line %d\n", (long long)a, (long long)b, file, line); #ifdef FIXED_DEBUG_ASSERT silk_assert( 0 ); #endif } return ret; } #undef silk_SUB16 #define silk_SUB16(a,b) silk_SUB16_((a), (b), __FILE__, __LINE__) -static inline opus_int16 silk_SUB16_(opus_int16 a, opus_int16 b, char *file, int line){ +static OPUS_INLINE opus_int16 silk_SUB16_(opus_int16 a, opus_int16 b, char *file, int line){ opus_int16 ret; ret = a - b; if ( ret != silk_SUB_SAT16( a, b ) ) { fprintf (stderr, "silk_SUB16(%d, %d) in %s: line %d\n", a, b, file, line); #ifdef FIXED_DEBUG_ASSERT silk_assert( 0 ); #endif } return ret; } #undef silk_SUB32 #define silk_SUB32(a,b) silk_SUB32_((a), (b), __FILE__, __LINE__) -static inline opus_int32 silk_SUB32_(opus_int32 a, opus_int32 b, char *file, int line){ +static OPUS_INLINE opus_int32 silk_SUB32_(opus_int32 a, opus_int32 b, char *file, int line){ opus_int32 ret; ret = a - b; if ( ret != silk_SUB_SAT32( a, b ) ) { fprintf (stderr, "silk_SUB32(%d, %d) in %s: line %d\n", a, b, file, line); #ifdef FIXED_DEBUG_ASSERT silk_assert( 0 ); #endif } return ret; } #undef silk_SUB64 #define silk_SUB64(a,b) silk_SUB64_((a), (b), __FILE__, __LINE__) -static inline opus_int64 silk_SUB64_(opus_int64 a, opus_int64 b, char *file, int line){ +static OPUS_INLINE opus_int64 silk_SUB64_(opus_int64 a, opus_int64 b, char *file, int line){ opus_int64 ret; ret = a - b; if ( ret != silk_SUB_SAT64( a, b ) ) { fprintf (stderr, "silk_SUB64(%lld, %lld) in %s: line %d\n", (long long)a, (long long)b, file, line); #ifdef FIXED_DEBUG_ASSERT silk_assert( 0 ); #endif } return ret; } #undef silk_ADD_SAT16 #define silk_ADD_SAT16(a,b) silk_ADD_SAT16_((a), (b), __FILE__, __LINE__) -static inline opus_int16 silk_ADD_SAT16_( opus_int16 a16, opus_int16 b16, char *file, int line) { +static OPUS_INLINE opus_int16 silk_ADD_SAT16_( opus_int16 a16, opus_int16 b16, char *file, int line) { opus_int16 res; res = (opus_int16)silk_SAT16( silk_ADD32( (opus_int32)(a16), (b16) ) ); if ( res != silk_SAT16( (opus_int32)a16 + (opus_int32)b16 ) ) { fprintf (stderr, "silk_ADD_SAT16(%d, %d) in %s: line %d\n", a16, b16, file, line); #ifdef FIXED_DEBUG_ASSERT silk_assert( 0 ); #endif } return res; } #undef silk_ADD_SAT32 #define silk_ADD_SAT32(a,b) silk_ADD_SAT32_((a), (b), __FILE__, __LINE__) -static inline opus_int32 silk_ADD_SAT32_(opus_int32 a32, opus_int32 b32, char *file, int line){ +static OPUS_INLINE opus_int32 silk_ADD_SAT32_(opus_int32 a32, opus_int32 b32, char *file, int line){ opus_int32 res; res = ((((opus_uint32)(a32) + (opus_uint32)(b32)) & 0x80000000) == 0 ? \ ((((a32) & (b32)) & 0x80000000) != 0 ? silk_int32_MIN : (a32)+(b32)) : \ ((((a32) | (b32)) & 0x80000000) == 0 ? silk_int32_MAX : (a32)+(b32)) ); if ( res != silk_SAT32( (opus_int64)a32 + (opus_int64)b32 ) ) { fprintf (stderr, "silk_ADD_SAT32(%d, %d) in %s: line %d\n", a32, b32, file, line); #ifdef FIXED_DEBUG_ASSERT silk_assert( 0 ); #endif } return res; } #undef silk_ADD_SAT64 #define silk_ADD_SAT64(a,b) silk_ADD_SAT64_((a), (b), __FILE__, __LINE__) -static inline opus_int64 silk_ADD_SAT64_( opus_int64 a64, opus_int64 b64, char *file, int line) { +static OPUS_INLINE opus_int64 silk_ADD_SAT64_( opus_int64 a64, opus_int64 b64, char *file, int line) { opus_int64 res; int fail = 0; res = ((((a64) + (b64)) & 0x8000000000000000LL) == 0 ? \ ((((a64) & (b64)) & 0x8000000000000000LL) != 0 ? silk_int64_MIN : (a64)+(b64)) : \ ((((a64) | (b64)) & 0x8000000000000000LL) == 0 ? silk_int64_MAX : (a64)+(b64)) ); if( res != a64 + b64 ) { /* Check that we saturated to the correct extreme value */ if ( !(( res == silk_int64_MAX && ( ( a64 >> 1 ) + ( b64 >> 1 ) > ( silk_int64_MAX >> 3 ) ) ) || @@ -188,49 +188,49 @@ static inline opus_int64 silk_ADD_SAT64_ silk_assert( 0 ); #endif } return res; } #undef silk_SUB_SAT16 #define silk_SUB_SAT16(a,b) silk_SUB_SAT16_((a), (b), __FILE__, __LINE__) -static inline opus_int16 silk_SUB_SAT16_( opus_int16 a16, opus_int16 b16, char *file, int line ) { +static OPUS_INLINE opus_int16 silk_SUB_SAT16_( opus_int16 a16, opus_int16 b16, char *file, int line ) { opus_int16 res; res = (opus_int16)silk_SAT16( silk_SUB32( (opus_int32)(a16), (b16) ) ); if ( res != silk_SAT16( (opus_int32)a16 - (opus_int32)b16 ) ) { fprintf (stderr, "silk_SUB_SAT16(%d, %d) in %s: line %d\n", a16, b16, file, line); #ifdef FIXED_DEBUG_ASSERT silk_assert( 0 ); #endif } return res; } #undef silk_SUB_SAT32 #define silk_SUB_SAT32(a,b) silk_SUB_SAT32_((a), (b), __FILE__, __LINE__) -static inline opus_int32 silk_SUB_SAT32_( opus_int32 a32, opus_int32 b32, char *file, int line ) { +static OPUS_INLINE opus_int32 silk_SUB_SAT32_( opus_int32 a32, opus_int32 b32, char *file, int line ) { opus_int32 res; res = ((((opus_uint32)(a32)-(opus_uint32)(b32)) & 0x80000000) == 0 ? \ (( (a32) & ((b32)^0x80000000) & 0x80000000) ? silk_int32_MIN : (a32)-(b32)) : \ ((((a32)^0x80000000) & (b32) & 0x80000000) ? silk_int32_MAX : (a32)-(b32)) ); if ( res != silk_SAT32( (opus_int64)a32 - (opus_int64)b32 ) ) { fprintf (stderr, "silk_SUB_SAT32(%d, %d) in %s: line %d\n", a32, b32, file, line); #ifdef FIXED_DEBUG_ASSERT silk_assert( 0 ); #endif } return res; } #undef silk_SUB_SAT64 #define silk_SUB_SAT64(a,b) silk_SUB_SAT64_((a), (b), __FILE__, __LINE__) -static inline opus_int64 silk_SUB_SAT64_( opus_int64 a64, opus_int64 b64, char *file, int line ) { +static OPUS_INLINE opus_int64 silk_SUB_SAT64_( opus_int64 a64, opus_int64 b64, char *file, int line ) { opus_int64 res; int fail = 0; res = ((((a64)-(b64)) & 0x8000000000000000LL) == 0 ? \ (( (a64) & ((b64)^0x8000000000000000LL) & 0x8000000000000000LL) ? silk_int64_MIN : (a64)-(b64)) : \ ((((a64)^0x8000000000000000LL) & (b64) & 0x8000000000000000LL) ? silk_int64_MAX : (a64)-(b64)) ); if( res != a64 - b64 ) { /* Check that we saturated to the correct extreme value */ if( !(( res == silk_int64_MAX && ( ( a64 >> 1 ) + ( b64 >> 1 ) > ( silk_int64_MAX >> 3 ) ) ) || @@ -249,139 +249,139 @@ static inline opus_int64 silk_SUB_SAT64_ silk_assert( 0 ); #endif } return res; } #undef silk_MUL #define silk_MUL(a,b) silk_MUL_((a), (b), __FILE__, __LINE__) -static inline opus_int32 silk_MUL_(opus_int32 a32, opus_int32 b32, char *file, int line){ +static OPUS_INLINE opus_int32 silk_MUL_(opus_int32 a32, opus_int32 b32, char *file, int line){ opus_int32 ret; opus_int64 ret64; ret = a32 * b32; ret64 = (opus_int64)a32 * (opus_int64)b32; if ( (opus_int64)ret != ret64 ) { fprintf (stderr, "silk_MUL(%d, %d) in %s: line %d\n", a32, b32, file, line); #ifdef FIXED_DEBUG_ASSERT silk_assert( 0 ); #endif } return ret; } #undef silk_MUL_uint #define silk_MUL_uint(a,b) silk_MUL_uint_((a), (b), __FILE__, __LINE__) -static inline opus_uint32 silk_MUL_uint_(opus_uint32 a32, opus_uint32 b32, char *file, int line){ +static OPUS_INLINE opus_uint32 silk_MUL_uint_(opus_uint32 a32, opus_uint32 b32, char *file, int line){ opus_uint32 ret; ret = a32 * b32; if ( (opus_uint64)ret != (opus_uint64)a32 * (opus_uint64)b32 ) { fprintf (stderr, "silk_MUL_uint(%u, %u) in %s: line %d\n", a32, b32, file, line); #ifdef FIXED_DEBUG_ASSERT silk_assert( 0 ); #endif } return ret; } #undef silk_MLA #define silk_MLA(a,b,c) silk_MLA_((a), (b), (c), __FILE__, __LINE__) -static inline opus_int32 silk_MLA_(opus_int32 a32, opus_int32 b32, opus_int32 c32, char *file, int line){ +static OPUS_INLINE opus_int32 silk_MLA_(opus_int32 a32, opus_int32 b32, opus_int32 c32, char *file, int line){ opus_int32 ret; ret = a32 + b32 * c32; if ( (opus_int64)ret != (opus_int64)a32 + (opus_int64)b32 * (opus_int64)c32 ) { fprintf (stderr, "silk_MLA(%d, %d, %d) in %s: line %d\n", a32, b32, c32, file, line); #ifdef FIXED_DEBUG_ASSERT silk_assert( 0 ); #endif } return ret; } #undef silk_MLA_uint #define silk_MLA_uint(a,b,c) silk_MLA_uint_((a), (b), (c), __FILE__, __LINE__) -static inline opus_int32 silk_MLA_uint_(opus_uint32 a32, opus_uint32 b32, opus_uint32 c32, char *file, int line){ +static OPUS_INLINE opus_int32 silk_MLA_uint_(opus_uint32 a32, opus_uint32 b32, opus_uint32 c32, char *file, int line){ opus_uint32 ret; ret = a32 + b32 * c32; if ( (opus_int64)ret != (opus_int64)a32 + (opus_int64)b32 * (opus_int64)c32 ) { fprintf (stderr, "silk_MLA_uint(%d, %d, %d) in %s: line %d\n", a32, b32, c32, file, line); #ifdef FIXED_DEBUG_ASSERT silk_assert( 0 ); #endif } return ret; } #undef silk_SMULWB #define silk_SMULWB(a,b) silk_SMULWB_((a), (b), __FILE__, __LINE__) -static inline opus_int32 silk_SMULWB_(opus_int32 a32, opus_int32 b32, char *file, int line){ +static OPUS_INLINE opus_int32 silk_SMULWB_(opus_int32 a32, opus_int32 b32, char *file, int line){ opus_int32 ret; ret = (a32 >> 16) * (opus_int32)((opus_int16)b32) + (((a32 & 0x0000FFFF) * (opus_int32)((opus_int16)b32)) >> 16); if ( (opus_int64)ret != ((opus_int64)a32 * (opus_int16)b32) >> 16 ) { fprintf (stderr, "silk_SMULWB(%d, %d) in %s: line %d\n", a32, b32, file, line); #ifdef FIXED_DEBUG_ASSERT silk_assert( 0 ); #endif } return ret; } #undef silk_SMLAWB #define silk_SMLAWB(a,b,c) silk_SMLAWB_((a), (b), (c), __FILE__, __LINE__) -static inline opus_int32 silk_SMLAWB_(opus_int32 a32, opus_int32 b32, opus_int32 c32, char *file, int line){ +static OPUS_INLINE opus_int32 silk_SMLAWB_(opus_int32 a32, opus_int32 b32, opus_int32 c32, char *file, int line){ opus_int32 ret; ret = silk_ADD32( a32, silk_SMULWB( b32, c32 ) ); if ( silk_ADD32( a32, silk_SMULWB( b32, c32 ) ) != silk_ADD_SAT32( a32, silk_SMULWB( b32, c32 ) ) ) { fprintf (stderr, "silk_SMLAWB(%d, %d, %d) in %s: line %d\n", a32, b32, c32, file, line); #ifdef FIXED_DEBUG_ASSERT silk_assert( 0 ); #endif } return ret; } #undef silk_SMULWT #define silk_SMULWT(a,b) silk_SMULWT_((a), (b), __FILE__, __LINE__) -static inline opus_int32 silk_SMULWT_(opus_int32 a32, opus_int32 b32, char *file, int line){ +static OPUS_INLINE opus_int32 silk_SMULWT_(opus_int32 a32, opus_int32 b32, char *file, int line){ opus_int32 ret; ret = (a32 >> 16) * (b32 >> 16) + (((a32 & 0x0000FFFF) * (b32 >> 16)) >> 16); if ( (opus_int64)ret != ((opus_int64)a32 * (b32 >> 16)) >> 16 ) { fprintf (stderr, "silk_SMULWT(%d, %d) in %s: line %d\n", a32, b32, file, line); #ifdef FIXED_DEBUG_ASSERT silk_assert( 0 ); #endif } return ret; } #undef silk_SMLAWT #define silk_SMLAWT(a,b,c) silk_SMLAWT_((a), (b), (c), __FILE__, __LINE__) -static inline opus_int32 silk_SMLAWT_(opus_int32 a32, opus_int32 b32, opus_int32 c32, char *file, int line){ +static OPUS_INLINE opus_int32 silk_SMLAWT_(opus_int32 a32, opus_int32 b32, opus_int32 c32, char *file, int line){ opus_int32 ret; ret = a32 + ((b32 >> 16) * (c32 >> 16)) + (((b32 & 0x0000FFFF) * ((c32 >> 16)) >> 16)); if ( (opus_int64)ret != (opus_int64)a32 + (((opus_int64)b32 * (c32 >> 16)) >> 16) ) { fprintf (stderr, "silk_SMLAWT(%d, %d, %d) in %s: line %d\n", a32, b32, c32, file, line); #ifdef FIXED_DEBUG_ASSERT silk_assert( 0 ); #endif } return ret; } #undef silk_SMULL #define silk_SMULL(a,b) silk_SMULL_((a), (b), __FILE__, __LINE__) -static inline opus_int64 silk_SMULL_(opus_int64 a64, opus_int64 b64, char *file, int line){ +static OPUS_INLINE opus_int64 silk_SMULL_(opus_int64 a64, opus_int64 b64, char *file, int line){ opus_int64 ret64; int fail = 0; ret64 = a64 * b64; if( b64 != 0 ) { fail = a64 != (ret64 / b64); } else if( a64 != 0 ) { fail = b64 != (ret64 / a64); } @@ -393,64 +393,64 @@ static inline opus_int64 silk_SMULL_(opu #endif } return ret64; } /* no checking needed for silk_SMULBB */ #undef silk_SMLABB #define silk_SMLABB(a,b,c) silk_SMLABB_((a), (b), (c), __FILE__, __LINE__) -static inline opus_int32 silk_SMLABB_(opus_int32 a32, opus_int32 b32, opus_int32 c32, char *file, int line){ +static OPUS_INLINE opus_int32 silk_SMLABB_(opus_int32 a32, opus_int32 b32, opus_int32 c32, char *file, int line){ opus_int32 ret; ret = a32 + (opus_int32)((opus_int16)b32) * (opus_int32)((opus_int16)c32); if ( (opus_int64)ret != (opus_int64)a32 + (opus_int64)b32 * (opus_int16)c32 ) { fprintf (stderr, "silk_SMLABB(%d, %d, %d) in %s: line %d\n", a32, b32, c32, file, line); #ifdef FIXED_DEBUG_ASSERT silk_assert( 0 ); #endif } return ret; } /* no checking needed for silk_SMULBT */ #undef silk_SMLABT #define silk_SMLABT(a,b,c) silk_SMLABT_((a), (b), (c), __FILE__, __LINE__) -static inline opus_int32 silk_SMLABT_(opus_int32 a32, opus_int32 b32, opus_int32 c32, char *file, int line){ +static OPUS_INLINE opus_int32 silk_SMLABT_(opus_int32 a32, opus_int32 b32, opus_int32 c32, char *file, int line){ opus_int32 ret; ret = a32 + ((opus_int32)((opus_int16)b32)) * (c32 >> 16); if ( (opus_int64)ret != (opus_int64)a32 + (opus_int64)b32 * (c32 >> 16) ) { fprintf (stderr, "silk_SMLABT(%d, %d, %d) in %s: line %d\n", a32, b32, c32, file, line); #ifdef FIXED_DEBUG_ASSERT silk_assert( 0 ); #endif } return ret; } /* no checking needed for silk_SMULTT */ #undef silk_SMLATT #define silk_SMLATT(a,b,c) silk_SMLATT_((a), (b), (c), __FILE__, __LINE__) -static inline opus_int32 silk_SMLATT_(opus_int32 a32, opus_int32 b32, opus_int32 c32, char *file, int line){ +static OPUS_INLINE opus_int32 silk_SMLATT_(opus_int32 a32, opus_int32 b32, opus_int32 c32, char *file, int line){ opus_int32 ret; ret = a32 + (b32 >> 16) * (c32 >> 16); if ( (opus_int64)ret != (opus_int64)a32 + (b32 >> 16) * (c32 >> 16) ) { fprintf (stderr, "silk_SMLATT(%d, %d, %d) in %s: line %d\n", a32, b32, c32, file, line); #ifdef FIXED_DEBUG_ASSERT silk_assert( 0 ); #endif } return ret; } #undef silk_SMULWW #define silk_SMULWW(a,b) silk_SMULWW_((a), (b), __FILE__, __LINE__) -static inline opus_int32 silk_SMULWW_(opus_int32 a32, opus_int32 b32, char *file, int line){ +static OPUS_INLINE opus_int32 silk_SMULWW_(opus_int32 a32, opus_int32 b32, char *file, int line){ opus_int32 ret, tmp1, tmp2; opus_int64 ret64; int fail = 0; ret = silk_SMULWB( a32, b32 ); tmp1 = silk_RSHIFT_ROUND( b32, 16 ); tmp2 = silk_MUL( a32, tmp1 ); @@ -471,17 +471,17 @@ static inline opus_int32 silk_SMULWW_(op #endif } return ret; } #undef silk_SMLAWW #define silk_SMLAWW(a,b,c) silk_SMLAWW_((a), (b), (c), __FILE__, __LINE__) -static inline opus_int32 silk_SMLAWW_(opus_int32 a32, opus_int32 b32, opus_int32 c32, char *file, int line){ +static OPUS_INLINE opus_int32 silk_SMLAWW_(opus_int32 a32, opus_int32 b32, opus_int32 c32, char *file, int line){ opus_int32 ret, tmp; tmp = silk_SMULWW( b32, c32 ); ret = silk_ADD32( a32, tmp ); if ( ret != silk_ADD_SAT32( a32, tmp ) ) { fprintf (stderr, "silk_SMLAWW(%d, %d, %d) in %s: line %d\n", a32, b32, c32, file, line); #ifdef FIXED_DEBUG_ASSERT @@ -500,30 +500,30 @@ static inline opus_int32 silk_SMLAWW_(op /* no checking needed for silk_SMULL no checking needed for silk_SMLAL no checking needed for silk_SMLALBB no checking needed for SigProcFIX_CLZ16 no checking needed for SigProcFIX_CLZ32*/ #undef silk_DIV32 #define silk_DIV32(a,b) silk_DIV32_((a), (b), __FILE__, __LINE__) -static inline opus_int32 silk_DIV32_(opus_int32 a32, opus_int32 b32, char *file, int line){ +static OPUS_INLINE opus_int32 silk_DIV32_(opus_int32 a32, opus_int32 b32, char *file, int line){ if ( b32 == 0 ) { fprintf (stderr, "silk_DIV32(%d, %d) in %s: line %d\n", a32, b32, file, line); #ifdef FIXED_DEBUG_ASSERT silk_assert( 0 ); #endif } return a32 / b32; } #undef silk_DIV32_16 #define silk_DIV32_16(a,b) silk_DIV32_16_((a), (b), __FILE__, __LINE__) -static inline opus_int32 silk_DIV32_16_(opus_int32 a32, opus_int32 b32, char *file, int line){ +static OPUS_INLINE opus_int32 silk_DIV32_16_(opus_int32 a32, opus_int32 b32, char *file, int line){ int fail = 0; fail |= b32 == 0; fail |= b32 > silk_int16_MAX; fail |= b32 < silk_int16_MIN; if ( fail ) { fprintf (stderr, "silk_DIV32_16(%d, %d) in %s: line %d\n", a32, b32, file, line); #ifdef FIXED_DEBUG_ASSERT @@ -539,17 +539,17 @@ static inline opus_int32 silk_DIV32_16_( no checking needed for silk_POS_SAT32 no checking needed for silk_ADD_POS_SAT8 no checking needed for silk_ADD_POS_SAT16 no checking needed for silk_ADD_POS_SAT32 no checking needed for silk_ADD_POS_SAT64 */ #undef silk_LSHIFT8 #define silk_LSHIFT8(a,b) silk_LSHIFT8_((a), (b), __FILE__, __LINE__) -static inline opus_int8 silk_LSHIFT8_(opus_int8 a, opus_int32 shift, char *file, int line){ +static OPUS_INLINE opus_int8 silk_LSHIFT8_(opus_int8 a, opus_int32 shift, char *file, int line){ opus_int8 ret; int fail = 0; ret = a << shift; fail |= shift < 0; fail |= shift >= 8; fail |= (opus_int64)ret != ((opus_int64)a) << shift; if ( fail ) { @@ -558,17 +558,17 @@ static inline opus_int8 silk_LSHIFT8_(op silk_assert( 0 ); #endif } return ret; } #undef silk_LSHIFT16 #define silk_LSHIFT16(a,b) silk_LSHIFT16_((a), (b), __FILE__, __LINE__) -static inline opus_int16 silk_LSHIFT16_(opus_int16 a, opus_int32 shift, char *file, int line){ +static OPUS_INLINE opus_int16 silk_LSHIFT16_(opus_int16 a, opus_int32 shift, char *file, int line){ opus_int16 ret; int fail = 0; ret = a << shift; fail |= shift < 0; fail |= shift >= 16; fail |= (opus_int64)ret != ((opus_int64)a) << shift; if ( fail ) { @@ -577,17 +577,17 @@ static inline opus_int16 silk_LSHIFT16_( silk_assert( 0 ); #endif } return ret; } #undef silk_LSHIFT32 #define silk_LSHIFT32(a,b) silk_LSHIFT32_((a), (b), __FILE__, __LINE__) -static inline opus_int32 silk_LSHIFT32_(opus_int32 a, opus_int32 shift, char *file, int line){ +static OPUS_INLINE opus_int32 silk_LSHIFT32_(opus_int32 a, opus_int32 shift, char *file, int line){ opus_int32 ret; int fail = 0; ret = a << shift; fail |= shift < 0; fail |= shift >= 32; fail |= (opus_int64)ret != ((opus_int64)a) << shift; if ( fail ) { @@ -596,17 +596,17 @@ static inline opus_int32 silk_LSHIFT32_( silk_assert( 0 ); #endif } return ret; } #undef silk_LSHIFT64 #define silk_LSHIFT64(a,b) silk_LSHIFT64_((a), (b), __FILE__, __LINE__) -static inline opus_int64 silk_LSHIFT64_(opus_int64 a, opus_int shift, char *file, int line){ +static OPUS_INLINE opus_int64 silk_LSHIFT64_(opus_int64 a, opus_int shift, char *file, int line){ opus_int64 ret; int fail = 0; ret = a << shift; fail |= shift < 0; fail |= shift >= 64; fail |= (ret>>shift) != ((opus_int64)a); if ( fail ) { @@ -615,325 +615,325 @@ static inline opus_int64 silk_LSHIFT64_( silk_assert( 0 ); #endif } return ret; } #undef silk_LSHIFT_ovflw #define silk_LSHIFT_ovflw(a,b) silk_LSHIFT_ovflw_((a), (b), __FILE__, __LINE__) -static inline opus_int32 silk_LSHIFT_ovflw_(opus_int32 a, opus_int32 shift, char *file, int line){ +static OPUS_INLINE opus_int32 silk_LSHIFT_ovflw_(opus_int32 a, opus_int32 shift, char *file, int line){ if ( (shift < 0) || (shift >= 32) ) /* no check for overflow */ { fprintf (stderr, "silk_LSHIFT_ovflw(%d, %d) in %s: line %d\n", a, shift, file, line); #ifdef FIXED_DEBUG_ASSERT silk_assert( 0 ); #endif } return a << shift; } #undef silk_LSHIFT_uint #define silk_LSHIFT_uint(a,b) silk_LSHIFT_uint_((a), (b), __FILE__, __LINE__) -static inline opus_uint32 silk_LSHIFT_uint_(opus_uint32 a, opus_int32 shift, char *file, int line){ +static OPUS_INLINE opus_uint32 silk_LSHIFT_uint_(opus_uint32 a, opus_int32 shift, char *file, int line){ opus_uint32 ret; ret = a << shift; if ( (shift < 0) || ((opus_int64)ret != ((opus_int64)a) << shift)) { fprintf (stderr, "silk_LSHIFT_uint(%u, %d) in %s: line %d\n", a, shift, file, line); #ifdef FIXED_DEBUG_ASSERT silk_assert( 0 ); #endif } return ret; } #undef silk_RSHIFT8 #define silk_RSHITF8(a,b) silk_RSHIFT8_((a), (b), __FILE__, __LINE__) -static inline opus_int8 silk_RSHIFT8_(opus_int8 a, opus_int32 shift, char *file, int line){ +static OPUS_INLINE opus_int8 silk_RSHIFT8_(opus_int8 a, opus_int32 shift, char *file, int line){ if ( (shift < 0) || (shift>=8) ) { fprintf (stderr, "silk_RSHITF8(%d, %d) in %s: line %d\n", a, shift, file, line); #ifdef FIXED_DEBUG_ASSERT silk_assert( 0 ); #endif } return a >> shift; } #undef silk_RSHIFT16 #define silk_RSHITF16(a,b) silk_RSHIFT16_((a), (b), __FILE__, __LINE__) -static inline opus_int16 silk_RSHIFT16_(opus_int16 a, opus_int32 shift, char *file, int line){ +static OPUS_INLINE opus_int16 silk_RSHIFT16_(opus_int16 a, opus_int32 shift, char *file, int line){ if ( (shift < 0) || (shift>=16) ) { fprintf (stderr, "silk_RSHITF16(%d, %d) in %s: line %d\n", a, shift, file, line); #ifdef FIXED_DEBUG_ASSERT silk_assert( 0 ); #endif } return a >> shift; } #undef silk_RSHIFT32 #define silk_RSHIFT32(a,b) silk_RSHIFT32_((a), (b), __FILE__, __LINE__) -static inline opus_int32 silk_RSHIFT32_(opus_int32 a, opus_int32 shift, char *file, int line){ +static OPUS_INLINE opus_int32 silk_RSHIFT32_(opus_int32 a, opus_int32 shift, char *file, int line){ if ( (shift < 0) || (shift>=32) ) { fprintf (stderr, "silk_RSHITF32(%d, %d) in %s: line %d\n", a, shift, file, line); #ifdef FIXED_DEBUG_ASSERT silk_assert( 0 ); #endif } return a >> shift; } #undef silk_RSHIFT64 #define silk_RSHIFT64(a,b) silk_RSHIFT64_((a), (b), __FILE__, __LINE__) -static inline opus_int64 silk_RSHIFT64_(opus_int64 a, opus_int64 shift, char *file, int line){ +static OPUS_INLINE opus_int64 silk_RSHIFT64_(opus_int64 a, opus_int64 shift, char *file, int line){ if ( (shift < 0) || (shift>=64) ) { fprintf (stderr, "silk_RSHITF64(%lld, %lld) in %s: line %d\n", (long long)a, (long long)shift, file, line); #ifdef FIXED_DEBUG_ASSERT silk_assert( 0 ); #endif } return a >> shift; } #undef silk_RSHIFT_uint #define silk_RSHIFT_uint(a,b) silk_RSHIFT_uint_((a), (b), __FILE__, __LINE__) -static inline opus_uint32 silk_RSHIFT_uint_(opus_uint32 a, opus_int32 shift, char *file, int line){ +static OPUS_INLINE opus_uint32 silk_RSHIFT_uint_(opus_uint32 a, opus_int32 shift, char *file, int line){ if ( (shift < 0) || (shift>32) ) { fprintf (stderr, "silk_RSHIFT_uint(%u, %d) in %s: line %d\n", a, shift, file, line); #ifdef FIXED_DEBUG_ASSERT silk_assert( 0 ); #endif } return a >> shift; } #undef silk_ADD_LSHIFT #define silk_ADD_LSHIFT(a,b,c) silk_ADD_LSHIFT_((a), (b), (c), __FILE__, __LINE__) -static inline int silk_ADD_LSHIFT_(int a, int b, int shift, char *file, int line){ +static OPUS_INLINE int silk_ADD_LSHIFT_(int a, int b, int shift, char *file, int line){ opus_int16 ret; ret = a + (b << shift); if ( (shift < 0) || (shift>15) || ((opus_int64)ret != (opus_int64)a + (((opus_int64)b) << shift)) ) { fprintf (stderr, "silk_ADD_LSHIFT(%d, %d, %d) in %s: line %d\n", a, b, shift, file, line); #ifdef FIXED_DEBUG_ASSERT silk_assert( 0 ); #endif } return ret; /* shift >= 0 */ } #undef silk_ADD_LSHIFT32 #define silk_ADD_LSHIFT32(a,b,c) silk_ADD_LSHIFT32_((a), (b), (c), __FILE__, __LINE__) -static inline opus_int32 silk_ADD_LSHIFT32_(opus_int32 a, opus_int32 b, opus_int32 shift, char *file, int line){ +static OPUS_INLINE opus_int32 silk_ADD_LSHIFT32_(opus_int32 a, opus_int32 b, opus_int32 shift, char *file, int line){ opus_int32 ret; ret = a + (b << shift); if ( (shift < 0) || (shift>31) || ((opus_int64)ret != (opus_int64)a + (((opus_int64)b) << shift)) ) { fprintf (stderr, "silk_ADD_LSHIFT32(%d, %d, %d) in %s: line %d\n", a, b, shift, file, line); #ifdef FIXED_DEBUG_ASSERT silk_assert( 0 ); #endif } return ret; /* shift >= 0 */ } #undef silk_ADD_LSHIFT_uint #define silk_ADD_LSHIFT_uint(a,b,c) silk_ADD_LSHIFT_uint_((a), (b), (c), __FILE__, __LINE__) -static inline opus_uint32 silk_ADD_LSHIFT_uint_(opus_uint32 a, opus_uint32 b, opus_int32 shift, char *file, int line){ +static OPUS_INLINE opus_uint32 silk_ADD_LSHIFT_uint_(opus_uint32 a, opus_uint32 b, opus_int32 shift, char *file, int line){ opus_uint32 ret; ret = a + (b << shift); if ( (shift < 0) || (shift>32) || ((opus_int64)ret != (opus_int64)a + (((opus_int64)b) << shift)) ) { fprintf (stderr, "silk_ADD_LSHIFT_uint(%u, %u, %d) in %s: line %d\n", a, b, shift, file, line); #ifdef FIXED_DEBUG_ASSERT silk_assert( 0 ); #endif } return ret; /* shift >= 0 */ } #undef silk_ADD_RSHIFT #define silk_ADD_RSHIFT(a,b,c) silk_ADD_RSHIFT_((a), (b), (c), __FILE__, __LINE__) -static inline int silk_ADD_RSHIFT_(int a, int b, int shift, char *file, int line){ +static OPUS_INLINE int silk_ADD_RSHIFT_(int a, int b, int shift, char *file, int line){ opus_int16 ret; ret = a + (b >> shift); if ( (shift < 0) || (shift>15) || ((opus_int64)ret != (opus_int64)a + (((opus_int64)b) >> shift)) ) { fprintf (stderr, "silk_ADD_RSHIFT(%d, %d, %d) in %s: line %d\n", a, b, shift, file, line); #ifdef FIXED_DEBUG_ASSERT silk_assert( 0 ); #endif } return ret; /* shift > 0 */ } #undef silk_ADD_RSHIFT32 #define silk_ADD_RSHIFT32(a,b,c) silk_ADD_RSHIFT32_((a), (b), (c), __FILE__, __LINE__) -static inline opus_int32 silk_ADD_RSHIFT32_(opus_int32 a, opus_int32 b, opus_int32 shift, char *file, int line){ +static OPUS_INLINE opus_int32 silk_ADD_RSHIFT32_(opus_int32 a, opus_int32 b, opus_int32 shift, char *file, int line){ opus_int32 ret; ret = a + (b >> shift); if ( (shift < 0) || (shift>31) || ((opus_int64)ret != (opus_int64)a + (((opus_int64)b) >> shift)) ) { fprintf (stderr, "silk_ADD_RSHIFT32(%d, %d, %d) in %s: line %d\n", a, b, shift, file, line); #ifdef FIXED_DEBUG_ASSERT silk_assert( 0 ); #endif } return ret; /* shift > 0 */ } #undef silk_ADD_RSHIFT_uint #define silk_ADD_RSHIFT_uint(a,b,c) silk_ADD_RSHIFT_uint_((a), (b), (c), __FILE__, __LINE__) -static inline opus_uint32 silk_ADD_RSHIFT_uint_(opus_uint32 a, opus_uint32 b, opus_int32 shift, char *file, int line){ +static OPUS_INLINE opus_uint32 silk_ADD_RSHIFT_uint_(opus_uint32 a, opus_uint32 b, opus_int32 shift, char *file, int line){ opus_uint32 ret; ret = a + (b >> shift); if ( (shift < 0) || (shift>32) || ((opus_int64)ret != (opus_int64)a + (((opus_int64)b) >> shift)) ) { fprintf (stderr, "silk_ADD_RSHIFT_uint(%u, %u, %d) in %s: line %d\n", a, b, shift, file, line); #ifdef FIXED_DEBUG_ASSERT silk_assert( 0 ); #endif } return ret; /* shift > 0 */ } #undef silk_SUB_LSHIFT32 #define silk_SUB_LSHIFT32(a,b,c) silk_SUB_LSHIFT32_((a), (b), (c), __FILE__, __LINE__) -static inline opus_int32 silk_SUB_LSHIFT32_(opus_int32 a, opus_int32 b, opus_int32 shift, char *file, int line){ +static OPUS_INLINE opus_int32 silk_SUB_LSHIFT32_(opus_int32 a, opus_int32 b, opus_int32 shift, char *file, int line){ opus_int32 ret; ret = a - (b << shift); if ( (shift < 0) || (shift>31) || ((opus_int64)ret != (opus_int64)a - (((opus_int64)b) << shift)) ) { fprintf (stderr, "silk_SUB_LSHIFT32(%d, %d, %d) in %s: line %d\n", a, b, shift, file, line); #ifdef FIXED_DEBUG_ASSERT silk_assert( 0 ); #endif } return ret; /* shift >= 0 */ } #undef silk_SUB_RSHIFT32 #define silk_SUB_RSHIFT32(a,b,c) silk_SUB_RSHIFT32_((a), (b), (c), __FILE__, __LINE__) -static inline opus_int32 silk_SUB_RSHIFT32_(opus_int32 a, opus_int32 b, opus_int32 shift, char *file, int line){ +static OPUS_INLINE opus_int32 silk_SUB_RSHIFT32_(opus_int32 a, opus_int32 b, opus_int32 shift, char *file, int line){ opus_int32 ret; ret = a - (b >> shift); if ( (shift < 0) || (shift>31) || ((opus_int64)ret != (opus_int64)a - (((opus_int64)b) >> shift)) ) { fprintf (stderr, "silk_SUB_RSHIFT32(%d, %d, %d) in %s: line %d\n", a, b, shift, file, line); #ifdef FIXED_DEBUG_ASSERT silk_assert( 0 ); #endif } return ret; /* shift > 0 */ } #undef silk_RSHIFT_ROUND #define silk_RSHIFT_ROUND(a,b) silk_RSHIFT_ROUND_((a), (b), __FILE__, __LINE__) -static inline opus_int32 silk_RSHIFT_ROUND_(opus_int32 a, opus_int32 shift, char *file, int line){ +static OPUS_INLINE opus_int32 silk_RSHIFT_ROUND_(opus_int32 a, opus_int32 shift, char *file, int line){ opus_int32 ret; ret = shift == 1 ? (a >> 1) + (a & 1) : ((a >> (shift - 1)) + 1) >> 1; /* the marco definition can't handle a shift of zero */ if ( (shift <= 0) || (shift>31) || ((opus_int64)ret != ((opus_int64)a + ((opus_int64)1 << (shift - 1))) >> shift) ) { fprintf (stderr, "silk_RSHIFT_ROUND(%d, %d) in %s: line %d\n", a, shift, file, line); #ifdef FIXED_DEBUG_ASSERT silk_assert( 0 ); #endif } return ret; } #undef silk_RSHIFT_ROUND64 #define silk_RSHIFT_ROUND64(a,b) silk_RSHIFT_ROUND64_((a), (b), __FILE__, __LINE__) -static inline opus_int64 silk_RSHIFT_ROUND64_(opus_int64 a, opus_int32 shift, char *file, int line){ +static OPUS_INLINE opus_int64 silk_RSHIFT_ROUND64_(opus_int64 a, opus_int32 shift, char *file, int line){ opus_int64 ret; /* the marco definition can't handle a shift of zero */ if ( (shift <= 0) || (shift>=64) ) { fprintf (stderr, "silk_RSHIFT_ROUND64(%lld, %d) in %s: line %d\n", (long long)a, shift, file, line); #ifdef FIXED_DEBUG_ASSERT silk_assert( 0 ); #endif } ret = shift == 1 ? (a >> 1) + (a & 1) : ((a >> (shift - 1)) + 1) >> 1; return ret; } /* silk_abs is used on floats also, so doesn't work... */ /*#undef silk_abs -static inline opus_int32 silk_abs(opus_int32 a){ +static OPUS_INLINE opus_int32 silk_abs(opus_int32 a){ silk_assert(a != 0x80000000); return (((a) > 0) ? (a) : -(a)); // Be careful, silk_abs returns wrong when input equals to silk_intXX_MIN }*/ #undef silk_abs_int64 #define silk_abs_int64(a) silk_abs_int64_((a), __FILE__, __LINE__) -static inline opus_int64 silk_abs_int64_(opus_int64 a, char *file, int line){ +static OPUS_INLINE opus_int64 silk_abs_int64_(opus_int64 a, char *file, int line){ if ( a == silk_int64_MIN ) { fprintf (stderr, "silk_abs_int64(%lld) in %s: line %d\n", (long long)a, file, line); #ifdef FIXED_DEBUG_ASSERT silk_assert( 0 ); #endif } return (((a) > 0) ? (a) : -(a)); /* Be careful, silk_abs returns wrong when input equals to silk_intXX_MIN */ } #undef silk_abs_int32 #define silk_abs_int32(a) silk_abs_int32_((a), __FILE__, __LINE__) -static inline opus_int32 silk_abs_int32_(opus_int32 a, char *file, int line){ +static OPUS_INLINE opus_int32 silk_abs_int32_(opus_int32 a, char *file, int line){ if ( a == silk_int32_MIN ) { fprintf (stderr, "silk_abs_int32(%d) in %s: line %d\n", a, file, line); #ifdef FIXED_DEBUG_ASSERT silk_assert( 0 ); #endif } return silk_abs(a); } #undef silk_CHECK_FIT8 #define silk_CHECK_FIT8(a) silk_CHECK_FIT8_((a), __FILE__, __LINE__) -static inline opus_int8 silk_CHECK_FIT8_( opus_int64 a, char *file, int line ){ +static OPUS_INLINE opus_int8 silk_CHECK_FIT8_( opus_int64 a, char *file, int line ){ opus_int8 ret; ret = (opus_int8)a; if ( (opus_int64)ret != a ) { fprintf (stderr, "silk_CHECK_FIT8(%lld) in %s: line %d\n", (long long)a, file, line); #ifdef FIXED_DEBUG_ASSERT silk_assert( 0 ); #endif } return( ret ); } #undef silk_CHECK_FIT16 #define silk_CHECK_FIT16(a) silk_CHECK_FIT16_((a), __FILE__, __LINE__) -static inline opus_int16 silk_CHECK_FIT16_( opus_int64 a, char *file, int line ){ +static OPUS_INLINE opus_int16 silk_CHECK_FIT16_( opus_int64 a, char *file, int line ){ opus_int16 ret; ret = (opus_int16)a; if ( (opus_int64)ret != a ) { fprintf (stderr, "silk_CHECK_FIT16(%lld) in %s: line %d\n", (long long)a, file, line); #ifdef FIXED_DEBUG_ASSERT silk_assert( 0 ); #endif } return( ret ); } #undef silk_CHECK_FIT32 #define silk_CHECK_FIT32(a) silk_CHECK_FIT32_((a), __FILE__, __LINE__) -static inline opus_int32 silk_CHECK_FIT32_( opus_int64 a, char *file, int line ){ +static OPUS_INLINE opus_int32 silk_CHECK_FIT32_( opus_int64 a, char *file, int line ){ opus_int32 ret; ret = (opus_int32)a; if ( (opus_int64)ret != a ) { fprintf (stderr, "silk_CHECK_FIT32(%lld) in %s: line %d\n", (long long)a, file, line); #ifdef FIXED_DEBUG_ASSERT silk_assert( 0 ); #endif
--- a/media/libopus/silk/NLSF2A.c +++ b/media/libopus/silk/NLSF2A.c @@ -36,17 +36,17 @@ POSSIBILITY OF SUCH DAMAGE. /* functions are accurate inverses of each other */ #include "SigProc_FIX.h" #include "tables.h" #define QA 16 /* helper function for NLSF2A(..) */ -static inline void silk_NLSF2A_find_poly( +static OPUS_INLINE void silk_NLSF2A_find_poly( opus_int32 *out, /* O intermediate polynomial, QA [dd+1] */ const opus_int32 *cLSF, /* I vector of interleaved 2*cos(LSFs), QA [d] */ opus_int dd /* I polynomial order (= 1/2 * filter order) */ ) { opus_int k, n; opus_int32 ftmp;
--- a/media/libopus/silk/NLSF_decode.c +++ b/media/libopus/silk/NLSF_decode.c @@ -27,17 +27,17 @@ POSSIBILITY OF SUCH DAMAGE. #ifdef HAVE_CONFIG_H #include "config.h" #endif #include "main.h" /* Predictive dequantizer for NLSF residuals */ -static inline void silk_NLSF_residual_dequant( /* O Returns RD value in Q30 */ +static OPUS_INLINE void silk_NLSF_residual_dequant( /* O Returns RD value in Q30 */ opus_int16 x_Q10[], /* O Output [ order ] */ const opus_int8 indices[], /* I Quantization indices [ order ] */ const opus_uint8 pred_coef_Q8[], /* I Backward predictor coefs [ order ] */ const opus_int quant_step_size_Q16, /* I Quantization step size */ const opus_int16 order /* I Number of input values */ ) { opus_int i, out_Q10, pred_Q10;
--- a/media/libopus/silk/NLSF_del_dec_quant.c +++ b/media/libopus/silk/NLSF_del_dec_quant.c @@ -116,17 +116,17 @@ opus_int32 silk_NLSF_del_dec_quant( } RD_tmp_Q25 = RD_Q25[ j ]; diff_Q10 = silk_SUB16( in_Q10, out0_Q10 ); RD_Q25[ j ] = silk_SMLABB( silk_MLA( RD_tmp_Q25, silk_SMULBB( diff_Q10, diff_Q10 ), w_Q5[ i ] ), mu_Q20, rate0_Q5 ); diff_Q10 = silk_SUB16( in_Q10, out1_Q10 ); RD_Q25[ j + nStates ] = silk_SMLABB( silk_MLA( RD_tmp_Q25, silk_SMULBB( diff_Q10, diff_Q10 ), w_Q5[ i ] ), mu_Q20, rate1_Q5 ); } - if( nStates < NLSF_QUANT_DEL_DEC_STATES ) { + if( nStates <= ( NLSF_QUANT_DEL_DEC_STATES >> 1 ) ) { /* double number of states and copy */ for( j = 0; j < nStates; j++ ) { ind[ j + nStates ][ i ] = ind[ j ][ i ] + 1; } nStates = silk_LSHIFT( nStates, 1 ); for( j = nStates; j < NLSF_QUANT_DEL_DEC_STATES; j++ ) { ind[ j ][ i ] = ind[ j - nStates ][ i ]; }
--- a/media/libopus/silk/NSQ.c +++ b/media/libopus/silk/NSQ.c @@ -27,31 +27,31 @@ POSSIBILITY OF SUCH DAMAGE. #ifdef HAVE_CONFIG_H #include "config.h" #endif #include "main.h" #include "stack_alloc.h" -static inline void silk_nsq_scale_states( +static OPUS_INLINE void silk_nsq_scale_states( const silk_encoder_state *psEncC, /* I Encoder State */ silk_nsq_state *NSQ, /* I/O NSQ state */ const opus_int32 x_Q3[], /* I input in Q3 */ opus_int32 x_sc_Q10[], /* O input scaled with 1/Gain */ const opus_int16 sLTP[], /* I re-whitened LTP state in Q0 */ opus_int32 sLTP_Q15[], /* O LTP state matching scaled input */ opus_int subfr, /* I subframe number */ const opus_int LTP_scale_Q14, /* I */ const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I */ const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lag */ const opus_int signal_type /* I Signal type */ ); -static inline void silk_noise_shape_quantizer( +static OPUS_INLINE void silk_noise_shape_quantizer( silk_nsq_state *NSQ, /* I/O NSQ state */ opus_int signalType, /* I Signal type */ const opus_int32 x_sc_Q10[], /* I */ opus_int8 pulses[], /* O */ opus_int16 xq[], /* O */ opus_int32 sLTP_Q15[], /* I/O LTP state */ const opus_int16 a_Q12[], /* I Short term prediction coefs */ const opus_int16 b_Q14[], /* I Long term prediction coefs */ @@ -167,17 +167,17 @@ void silk_NSQ( silk_memmove( NSQ->xq, &NSQ->xq[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int16 ) ); silk_memmove( NSQ->sLTP_shp_Q14, &NSQ->sLTP_shp_Q14[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int32 ) ); RESTORE_STACK; } /***********************************/ /* silk_noise_shape_quantizer */ /***********************************/ -static inline void silk_noise_shape_quantizer( +static OPUS_INLINE void silk_noise_shape_quantizer( silk_nsq_state *NSQ, /* I/O NSQ state */ opus_int signalType, /* I Signal type */ const opus_int32 x_sc_Q10[], /* I */ opus_int8 pulses[], /* O */ opus_int16 xq[], /* O */ opus_int32 sLTP_Q15[], /* I/O LTP state */ const opus_int16 a_Q12[], /* I Short term prediction coefs */ const opus_int16 b_Q14[], /* I Long term prediction coefs */ @@ -365,17 +365,17 @@ static inline void silk_noise_shape_quan /* Make dither dependent on quantized signal */ NSQ->rand_seed = silk_ADD32_ovflw( NSQ->rand_seed, pulses[ i ] ); } /* Update LPC synth buffer */ silk_memcpy( NSQ->sLPC_Q14, &NSQ->sLPC_Q14[ length ], NSQ_LPC_BUF_LENGTH * sizeof( opus_int32 ) ); } -static inline void silk_nsq_scale_states( +static OPUS_INLINE void silk_nsq_scale_states( const silk_encoder_state *psEncC, /* I Encoder State */ silk_nsq_state *NSQ, /* I/O NSQ state */ const opus_int32 x_Q3[], /* I input in Q3 */ opus_int32 x_sc_Q10[], /* O input scaled with 1/Gain */ const opus_int16 sLTP[], /* I re-whitened LTP state in Q0 */ opus_int32 sLTP_Q15[], /* O LTP state matching scaled input */ opus_int subfr, /* I subframe number */ const opus_int LTP_scale_Q14, /* I */
--- a/media/libopus/silk/NSQ_del_dec.c +++ b/media/libopus/silk/NSQ_del_dec.c @@ -52,17 +52,17 @@ typedef struct { opus_int32 xq_Q14; opus_int32 LF_AR_Q14; opus_int32 sLTP_shp_Q14; opus_int32 LPC_exc_Q14; } NSQ_sample_struct; typedef NSQ_sample_struct NSQ_sample_pair[ 2 ]; -static inline void silk_nsq_del_dec_scale_states( +static OPUS_INLINE void silk_nsq_del_dec_scale_states( const silk_encoder_state *psEncC, /* I Encoder State */ silk_nsq_state *NSQ, /* I/O NSQ state */ NSQ_del_dec_struct psDelDec[], /* I/O Delayed decision states */ const opus_int32 x_Q3[], /* I Input in Q3 */ opus_int32 x_sc_Q10[], /* O Input scaled with 1/Gain in Q10 */ const opus_int16 sLTP[], /* I Re-whitened LTP state in Q0 */ opus_int32 sLTP_Q15[], /* O LTP state matching scaled input */ opus_int subfr, /* I Subframe number */ @@ -72,17 +72,17 @@ static inline void silk_nsq_del_dec_scal const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lag */ const opus_int signal_type, /* I Signal type */ const opus_int decisionDelay /* I Decision delay */ ); /******************************************/ /* Noise shape quantizer for one subframe */ /******************************************/ -static inline void silk_noise_shape_quantizer_del_dec( +static OPUS_INLINE void silk_noise_shape_quantizer_del_dec( silk_nsq_state *NSQ, /* I/O NSQ state */ NSQ_del_dec_struct psDelDec[], /* I/O Delayed decision states */ opus_int signalType, /* I Signal type */ const opus_int32 x_Q10[], /* I */ opus_int8 pulses[], /* O */ opus_int16 xq[], /* O */ opus_int32 sLTP_Q15[], /* I/O LTP filter state */ opus_int32 delayedGain_Q10[], /* I/O Gain delay buffer */ @@ -298,17 +298,17 @@ void silk_NSQ_del_dec( silk_memmove( NSQ->xq, &NSQ->xq[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int16 ) ); silk_memmove( NSQ->sLTP_shp_Q14, &NSQ->sLTP_shp_Q14[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int32 ) ); RESTORE_STACK; } /******************************************/ /* Noise shape quantizer for one subframe */ /******************************************/ -static inline void silk_noise_shape_quantizer_del_dec( +static OPUS_INLINE void silk_noise_shape_quantizer_del_dec( silk_nsq_state *NSQ, /* I/O NSQ state */ NSQ_del_dec_struct psDelDec[], /* I/O Delayed decision states */ opus_int signalType, /* I Signal type */ const opus_int32 x_Q10[], /* I */ opus_int8 pulses[], /* O */ opus_int16 xq[], /* O */ opus_int32 sLTP_Q15[], /* I/O LTP filter state */ opus_int32 delayedGain_Q10[], /* I/O Gain delay buffer */ @@ -625,17 +625,17 @@ static inline void silk_noise_shape_quan /* Update LPC states */ for( k = 0; k < nStatesDelayedDecision; k++ ) { psDD = &psDelDec[ k ]; silk_memcpy( psDD->sLPC_Q14, &psDD->sLPC_Q14[ length ], NSQ_LPC_BUF_LENGTH * sizeof( opus_int32 ) ); } RESTORE_STACK; } -static inline void silk_nsq_del_dec_scale_states( +static OPUS_INLINE void silk_nsq_del_dec_scale_states( const silk_encoder_state *psEncC, /* I Encoder State */ silk_nsq_state *NSQ, /* I/O NSQ state */ NSQ_del_dec_struct psDelDec[], /* I/O Delayed decision states */ const opus_int32 x_Q3[], /* I Input in Q3 */ opus_int32 x_sc_Q10[], /* O Input scaled with 1/Gain in Q10 */ const opus_int16 sLTP[], /* I Re-whitened LTP state in Q0 */ opus_int32 sLTP_Q15[], /* O LTP state matching scaled input */ opus_int subfr, /* I Subframe number */
--- a/media/libopus/silk/PLC.c +++ b/media/libopus/silk/PLC.c @@ -33,22 +33,22 @@ POSSIBILITY OF SUCH DAMAGE. #include "stack_alloc.h" #include "PLC.h" #define NB_ATT 2 static const opus_int16 HARM_ATT_Q15[NB_ATT] = { 32440, 31130 }; /* 0.99, 0.95 */ static const opus_int16 PLC_RAND_ATTENUATE_V_Q15[NB_ATT] = { 31130, 26214 }; /* 0.95, 0.8 */ static const opus_int16 PLC_RAND_ATTENUATE_UV_Q15[NB_ATT] = { 32440, 29491 }; /* 0.99, 0.9 */ -static inline void silk_PLC_update( +static OPUS_INLINE void silk_PLC_update( silk_decoder_state *psDec, /* I/O Decoder state */ silk_decoder_control *psDecCtrl /* I/O Decoder control */ ); -static inline void silk_PLC_conceal( +static OPUS_INLINE void silk_PLC_conceal( silk_decoder_state *psDec, /* I/O Decoder state */ silk_decoder_control *psDecCtrl, /* I/O Decoder control */ opus_int16 frame[] /* O LPC residual signal */ ); void silk_PLC_Reset( silk_decoder_state *psDec /* I/O Decoder state */ @@ -87,17 +87,17 @@ void silk_PLC( /****************************/ silk_PLC_update( psDec, psDecCtrl ); } } /**************************************************/ /* Update state of PLC */ /**************************************************/ -static inline void silk_PLC_update( +static OPUS_INLINE void silk_PLC_update( silk_decoder_state *psDec, /* I/O Decoder state */ silk_decoder_control *psDecCtrl /* I/O Decoder control */ ) { opus_int32 LTP_Gain_Q14, temp_LTP_Gain_Q14; opus_int i, j; silk_PLC_struct *psPLC; @@ -160,17 +160,17 @@ static inline void silk_PLC_update( /* Save last two gains */ silk_memcpy( psPLC->prevGain_Q16, &psDecCtrl->Gains_Q16[ psDec->nb_subfr - 2 ], 2 * sizeof( opus_int32 ) ); psPLC->subfr_length = psDec->subfr_length; psPLC->nb_subfr = psDec->nb_subfr; } -static inline void silk_PLC_conceal( +static OPUS_INLINE void silk_PLC_conceal( silk_decoder_state *psDec, /* I/O Decoder state */ silk_decoder_control *psDecCtrl, /* I/O Decoder control */ opus_int16 frame[] /* O LPC residual signal */ ) { opus_int i, j, k; opus_int lag, idx, sLTP_buf_idx, shift1, shift2; opus_int32 rand_seed, harm_Gain_Q15, rand_Gain_Q15, inv_gain_Q30;
--- a/media/libopus/silk/SigProc_FIX.h +++ b/media/libopus/silk/SigProc_FIX.h @@ -222,17 +222,18 @@ void silk_apply_sine_window( ); /* Compute autocorrelation */ void silk_autocorr( opus_int32 *results, /* O Result (length correlationCount) */ opus_int *scale, /* O Scaling of the correlation vector */ const opus_int16 *inputData, /* I Input data to correlate */ const opus_int inputDataSize, /* I Length of input */ - const opus_int correlationCount /* I Number of correlation taps to compute */ + const opus_int correlationCount, /* I Number of correlation taps to compute */ + int arch /* I Run-time architecture */ ); void silk_decode_pitch( opus_int16 lagIndex, /* I */ opus_int8 contourIndex, /* O */ opus_int pitch_lags[], /* O 4 pitch values */ const opus_int Fs_kHz, /* I sampling frequency (kHz) */ const opus_int nb_subfr /* I number of sub frames */ @@ -244,17 +245,18 @@ opus_int silk_pitch_analysis_core( opus_int16 *lagIndex, /* O Lag Index */ opus_int8 *contourIndex, /* O Pitch contour Index */ opus_int *LTPCorr_Q15, /* I/O Normalized correlation; input: value from previous frame */ opus_int prevLag, /* I Last lag of previous frame; set to zero is unvoiced */ const opus_int32 search_thres1_Q16, /* I First stage threshold for lag candidates 0 - 1 */ const opus_int search_thres2_Q13, /* I Final threshold for lag candidates 0 - 1 */ const opus_int Fs_kHz, /* I Sample frequency (kHz) */ const opus_int complexity, /* I Complexity setting, 0-2, where 2 is highest */ - const opus_int nb_subfr /* I number of 5 ms subframes */ + const opus_int nb_subfr, /* I number of 5 ms subframes */ + int arch /* I Run-time architecture */ ); /* Compute Normalized Line Spectral Frequencies (NLSFs) from whitening filter coefficients */ /* If not all roots are found, the a_Q16 coefficients are bandwidth expanded until convergence. */ void silk_A2NLSF( opus_int16 *NLSF, /* O Normalized Line Spectral Frequencies in Q15 (0..2^15-1) [d] */ opus_int32 *a_Q16, /* I/O Monic whitening filter coefficients in Q16 [d] */ const opus_int d /* I Filter order (must be even) */ @@ -304,17 +306,18 @@ void silk_NLSF_VQ_weights_laroia( void silk_burg_modified( opus_int32 *res_nrg, /* O Residual energy */ opus_int *res_nrg_Q, /* O Residual energy Q value */ opus_int32 A_Q16[], /* O Prediction coefficients (length order) */ const opus_int16 x[], /* I Input signal, length: nb_subfr * ( D + subfr_length ) */ const opus_int32 minInvGain_Q30, /* I Inverse of max prediction gain */ const opus_int subfr_length, /* I Input signal subframe length (incl. D preceding samples) */ const opus_int nb_subfr, /* I Number of subframes stacked in x */ - const opus_int D /* I Order */ + const opus_int D, /* I Order */ + int arch /* I Run-time architecture */ ); /* Copy and multiply a vector by a constant */ void silk_scale_copy_vector16( opus_int16 *data_out, const opus_int16 *data_in, opus_int32 gain_Q16, /* I Gain in Q16 */ const opus_int dataSize /* I Length */ @@ -353,18 +356,18 @@ opus_int64 silk_inner_prod16_aligned_64( /********************************************************************/ /* MACROS */ /********************************************************************/ /* Rotate a32 right by 'rot' bits. Negative rot values result in rotating left. Output is 32bit int. Note: contemporary compilers recognize the C expression below and - compile it into a 'ror' instruction if available. No need for inline ASM! */ -static inline opus_int32 silk_ROR32( opus_int32 a32, opus_int rot ) + compile it into a 'ror' instruction if available. No need for OPUS_INLINE ASM! */ +static OPUS_INLINE opus_int32 silk_ROR32( opus_int32 a32, opus_int rot ) { opus_uint32 x = (opus_uint32) a32; opus_uint32 r = (opus_uint32) rot; opus_uint32 m = (opus_uint32) -rot; if( rot == 0 ) { return a32; } else if( rot < 0 ) { return (opus_int32) ((x << m) | (x >> (32 - m))); @@ -503,47 +506,47 @@ static inline opus_int32 silk_ROR32( opu #define silk_min(a, b) (((a) < (b)) ? (a) : (b)) #define silk_max(a, b) (((a) > (b)) ? (a) : (b)) /* Macro to convert floating-point constants to fixed-point */ #define SILK_FIX_CONST( C, Q ) ((opus_int32)((C) * ((opus_int64)1 << (Q)) + 0.5)) /* silk_min() versions with typecast in the function call */ -static inline opus_int silk_min_int(opus_int a, opus_int b) +static OPUS_INLINE opus_int silk_min_int(opus_int a, opus_int b) { return (((a) < (b)) ? (a) : (b)); } -static inline opus_int16 silk_min_16(opus_int16 a, opus_int16 b) +static OPUS_INLINE opus_int16 silk_min_16(opus_int16 a, opus_int16 b) { return (((a) < (b)) ? (a) : (b)); } -static inline opus_int32 silk_min_32(opus_int32 a, opus_int32 b) +static OPUS_INLINE opus_int32 silk_min_32(opus_int32 a, opus_int32 b) { return (((a) < (b)) ? (a) : (b)); } -static inline opus_int64 silk_min_64(opus_int64 a, opus_int64 b) +static OPUS_INLINE opus_int64 silk_min_64(opus_int64 a, opus_int64 b) { return (((a) < (b)) ? (a) : (b)); } /* silk_min() versions with typecast in the function call */ -static inline opus_int silk_max_int(opus_int a, opus_int b) +static OPUS_INLINE opus_int silk_max_int(opus_int a, opus_int b) { return (((a) > (b)) ? (a) : (b)); } -static inline opus_int16 silk_max_16(opus_int16 a, opus_int16 b) +static OPUS_INLINE opus_int16 silk_max_16(opus_int16 a, opus_int16 b) { return (((a) > (b)) ? (a) : (b)); } -static inline opus_int32 silk_max_32(opus_int32 a, opus_int32 b) +static OPUS_INLINE opus_int32 silk_max_32(opus_int32 a, opus_int32 b) { return (((a) > (b)) ? (a) : (b)); } -static inline opus_int64 silk_max_64(opus_int64 a, opus_int64 b) +static OPUS_INLINE opus_int64 silk_max_64(opus_int64 a, opus_int64 b) { return (((a) > (b)) ? (a) : (b)); } #define silk_LIMIT( a, limit1, limit2) ((limit1) > (limit2) ? ((a) > (limit1) ? (limit1) : ((a) < (limit2) ? (limit2) : (a))) \ : ((a) > (limit2) ? (limit2) : ((a) < (limit1) ? (limit1) : (a)))) #define silk_LIMIT_int silk_LIMIT @@ -571,21 +574,21 @@ static inline opus_int64 silk_max_64(opu /*#define silk_SMMUL(a32, b32) (opus_int32)silk_RSHIFT(silk_SMLAL(silk_SMULWB((a32), (b32)), (a32), silk_RSHIFT_ROUND((b32), 16)), 16)*/ /* the following seems faster on x86 */ #define silk_SMMUL(a32, b32) (opus_int32)silk_RSHIFT64(silk_SMULL((a32), (b32)), 32) #include "Inlines.h" #include "MacroCount.h" #include "MacroDebug.h" -#ifdef ARMv4_ASM +#ifdef OPUS_ARM_INLINE_ASM #include "arm/SigProc_FIX_armv4.h" #endif -#ifdef ARMv5E_ASM +#ifdef OPUS_ARM_INLINE_EDSP #include "arm/SigProc_FIX_armv5e.h" #endif #ifdef __cplusplus } #endif #endif /* SILK_SIGPROC_FIX_H */
--- a/media/libopus/silk/VAD.c +++ b/media/libopus/silk/VAD.c @@ -28,17 +28,17 @@ POSSIBILITY OF SUCH DAMAGE. #ifdef HAVE_CONFIG_H #include "config.h" #endif #include "main.h" #include "stack_alloc.h" /* Silk VAD noise level estimation */ -static inline void silk_VAD_GetNoiseLevels( +static OPUS_INLINE void silk_VAD_GetNoiseLevels( const opus_int32 pX[ VAD_N_BANDS ], /* I subband energies */ silk_VAD_state *psSilk_VAD /* I/O Pointer to Silk VAD state */ ); /**********************************/ /* Initialization of the Silk VAD */ /**********************************/ opus_int silk_VAD_Init( /* O Return value, 0 if success */ @@ -291,17 +291,17 @@ opus_int silk_VAD_GetSA_Q8( RESTORE_STACK; return( ret ); } /**************************/ /* Noise level estimation */ /**************************/ -static inline void silk_VAD_GetNoiseLevels( +static OPUS_INLINE void silk_VAD_GetNoiseLevels( const opus_int32 pX[ VAD_N_BANDS ], /* I subband energies */ silk_VAD_state *psSilk_VAD /* I/O Pointer to Silk VAD state */ ) { opus_int k; opus_int32 nl, nrg, inv_nrg; opus_int coef, min_coef;
--- a/media/libopus/silk/VQ_WMat_EC.c +++ b/media/libopus/silk/VQ_WMat_EC.c @@ -30,42 +30,50 @@ POSSIBILITY OF SUCH DAMAGE. #endif #include "main.h" /* Entropy constrained matrix-weighted VQ, hard-coded to 5-element vectors, for a single input data vector */ void silk_VQ_WMat_EC( opus_int8 *ind, /* O index of best codebook vector */ opus_int32 *rate_dist_Q14, /* O best weighted quant error + mu * rate */ + opus_int *gain_Q7, /* O sum of absolute LTP coefficients */ const opus_int16 *in_Q14, /* I input vector to be quantized */ const opus_int32 *W_Q18, /* I weighting matrix */ const opus_int8 *cb_Q7, /* I codebook */ + const opus_uint8 *cb_gain_Q7, /* I codebook effective gain */ const opus_uint8 *cl_Q5, /* I code length for each codebook vector */ const opus_int mu_Q9, /* I tradeoff betw. weighted error and rate */ + const opus_int32 max_gain_Q7, /* I maximum sum of absolute LTP coefficients */ opus_int L /* I number of vectors in codebook */ ) { - opus_int k; + opus_int k, gain_tmp_Q7; const opus_int8 *cb_row_Q7; opus_int16 diff_Q14[ 5 ]; opus_int32 sum1_Q14, sum2_Q16; /* Loop over codebook */ *rate_dist_Q14 = silk_int32_MAX; cb_row_Q7 = cb_Q7; for( k = 0; k < L; k++ ) { + gain_tmp_Q7 = cb_gain_Q7[k]; + diff_Q14[ 0 ] = in_Q14[ 0 ] - silk_LSHIFT( cb_row_Q7[ 0 ], 7 ); diff_Q14[ 1 ] = in_Q14[ 1 ] - silk_LSHIFT( cb_row_Q7[ 1 ], 7 ); diff_Q14[ 2 ] = in_Q14[ 2 ] - silk_LSHIFT( cb_row_Q7[ 2 ], 7 ); diff_Q14[ 3 ] = in_Q14[ 3 ] - silk_LSHIFT( cb_row_Q7[ 3 ], 7 ); diff_Q14[ 4 ] = in_Q14[ 4 ] - silk_LSHIFT( cb_row_Q7[ 4 ], 7 ); /* Weighted rate */ sum1_Q14 = silk_SMULBB( mu_Q9, cl_Q5[ k ] ); + /* Penalty for too large gain */ + sum1_Q14 = silk_ADD_LSHIFT32( sum1_Q14, silk_max( silk_SUB32( gain_tmp_Q7, max_gain_Q7 ), 0 ), 10 ); + silk_assert( sum1_Q14 >= 0 ); /* first row of W_Q18 */ sum2_Q16 = silk_SMULWB( W_Q18[ 1 ], diff_Q14[ 1 ] ); sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 2 ], diff_Q14[ 2 ] ); sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 3 ], diff_Q14[ 3 ] ); sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 4 ], diff_Q14[ 4 ] ); sum2_Q16 = silk_LSHIFT( sum2_Q16, 1 ); @@ -98,14 +106,15 @@ void silk_VQ_WMat_EC( sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16, diff_Q14[ 4 ] ); silk_assert( sum1_Q14 >= 0 ); /* find best */ if( sum1_Q14 < *rate_dist_Q14 ) { *rate_dist_Q14 = sum1_Q14; *ind = (opus_int8)k; + *gain_Q7 = gain_tmp_Q7; } /* Go to next cbk vector */ cb_row_Q7 += LTP_ORDER; } }
--- a/media/libopus/silk/arm/SigProc_FIX_armv4.h +++ b/media/libopus/silk/arm/SigProc_FIX_armv4.h @@ -25,17 +25,17 @@ CONTRACT, STRICT LIABILITY, OR TORT (INC ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ***********************************************************************/ #ifndef SILK_SIGPROC_FIX_ARMv4_H #define SILK_SIGPROC_FIX_ARMv4_H #undef silk_MLA -static inline opus_int32 silk_MLA_armv4(opus_int32 a, opus_int32 b, +static OPUS_INLINE opus_int32 silk_MLA_armv4(opus_int32 a, opus_int32 b, opus_int32 c) { opus_int32 res; __asm__( "#silk_MLA\n\t" "mla %0, %1, %2, %3\n\t" : "=&r"(res) : "r"(b), "r"(c), "r"(a)
--- a/media/libopus/silk/arm/SigProc_FIX_armv5e.h +++ b/media/libopus/silk/arm/SigProc_FIX_armv5e.h @@ -25,31 +25,31 @@ CONTRACT, STRICT LIABILITY, OR TORT (INC ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ***********************************************************************/ #ifndef SILK_SIGPROC_FIX_ARMv5E_H #define SILK_SIGPROC_FIX_ARMv5E_H #undef silk_SMULTT -static inline opus_int32 silk_SMULTT_armv5e(opus_int32 a, opus_int32 b) +static OPUS_INLINE opus_int32 silk_SMULTT_armv5e(opus_int32 a, opus_int32 b) { opus_int32 res; __asm__( "#silk_SMULTT\n\t" "smultt %0, %1, %2\n\t" : "=r"(res) : "%r"(a), "r"(b) ); return res; } #define silk_SMULTT(a, b) (silk_SMULTT_armv5e(a, b)) #undef silk_SMLATT -static inline opus_int32 silk_SMLATT_armv5e(opus_int32 a, opus_int32 b, +static OPUS_INLINE opus_int32 silk_SMLATT_armv5e(opus_int32 a, opus_int32 b, opus_int32 c) { opus_int32 res; __asm__( "#silk_SMLATT\n\t" "smlatt %0, %1, %2, %3\n\t" : "=r"(res) : "%r"(b), "r"(c), "r"(a)
--- a/media/libopus/silk/arm/macros_armv4.h +++ b/media/libopus/silk/arm/macros_armv4.h @@ -25,17 +25,17 @@ ARISING IN ANY WAY OUT OF THE USE OF THI POSSIBILITY OF SUCH DAMAGE. ***********************************************************************/ #ifndef SILK_MACROS_ARMv4_H #define SILK_MACROS_ARMv4_H /* (a32 * (opus_int32)((opus_int16)(b32))) >> 16 output have to be 32bit int */ #undef silk_SMULWB -static inline opus_int32 silk_SMULWB_armv4(opus_int32 a, opus_int16 b) +static OPUS_INLINE opus_int32 silk_SMULWB_armv4(opus_int32 a, opus_int16 b) { unsigned rd_lo; int rd_hi; __asm__( "#silk_SMULWB\n\t" "smull %0, %1, %2, %3\n\t" : "=&r"(rd_lo), "=&r"(rd_hi) : "%r"(a), "r"(b<<16) @@ -45,17 +45,17 @@ static inline opus_int32 silk_SMULWB_arm #define silk_SMULWB(a, b) (silk_SMULWB_armv4(a, b)) /* a32 + (b32 * (opus_int32)((opus_int16)(c32))) >> 16 output have to be 32bit int */ #undef silk_SMLAWB #define silk_SMLAWB(a, b, c) ((a) + silk_SMULWB(b, c)) /* (a32 * (b32 >> 16)) >> 16 */ #undef silk_SMULWT -static inline opus_int32 silk_SMULWT_armv4(opus_int32 a, opus_int32 b) +static OPUS_INLINE opus_int32 silk_SMULWT_armv4(opus_int32 a, opus_int32 b) { unsigned rd_lo; int rd_hi; __asm__( "#silk_SMULWT\n\t" "smull %0, %1, %2, %3\n\t" : "=&r"(rd_lo), "=&r"(rd_hi) : "%r"(a), "r"(b&~0xFFFF) @@ -65,32 +65,32 @@ static inline opus_int32 silk_SMULWT_arm #define silk_SMULWT(a, b) (silk_SMULWT_armv4(a, b)) /* a32 + (b32 * (c32 >> 16)) >> 16 */ #undef silk_SMLAWT #define silk_SMLAWT(a, b, c) ((a) + silk_SMULWT(b, c)) /* (a32 * b32) >> 16 */ #undef silk_SMULWW -static inline opus_int32 silk_SMULWW_armv4(opus_int32 a, opus_int32 b) +static OPUS_INLINE opus_int32 silk_SMULWW_armv4(opus_int32 a, opus_int32 b) { unsigned rd_lo; int rd_hi; __asm__( "#silk_SMULWW\n\t" "smull %0, %1, %2, %3\n\t" : "=&r"(rd_lo), "=&r"(rd_hi) : "%r"(a), "r"(b) ); return (rd_hi<<16)+(rd_lo>>16); } #define silk_SMULWW(a, b) (silk_SMULWW_armv4(a, b)) #undef silk_SMLAWW -static inline opus_int32 silk_SMLAWW_armv4(opus_int32 a, opus_int32 b, +static OPUS_INLINE opus_int32 silk_SMLAWW_armv4(opus_int32 a, opus_int32 b, opus_int32 c) { unsigned rd_lo; int rd_hi; __asm__( "#silk_SMLAWW\n\t" "smull %0, %1, %2, %3\n\t" : "=&r"(rd_lo), "=&r"(rd_hi)
--- a/media/libopus/silk/arm/macros_armv5e.h +++ b/media/libopus/silk/arm/macros_armv5e.h @@ -26,183 +26,183 @@ ARISING IN ANY WAY OUT OF THE USE OF THI POSSIBILITY OF SUCH DAMAGE. ***********************************************************************/ #ifndef SILK_MACROS_ARMv5E_H #define SILK_MACROS_ARMv5E_H /* (a32 * (opus_int32)((opus_int16)(b32))) >> 16 output have to be 32bit int */ #undef silk_SMULWB -static inline opus_int32 silk_SMULWB_armv5e(opus_int32 a, opus_int16 b) +static OPUS_INLINE opus_int32 silk_SMULWB_armv5e(opus_int32 a, opus_int16 b) { int res; __asm__( "#silk_SMULWB\n\t" "smulwb %0, %1, %2\n\t" : "=r"(res) : "r"(a), "r"(b) ); return res; } #define silk_SMULWB(a, b) (silk_SMULWB_armv5e(a, b)) /* a32 + (b32 * (opus_int32)((opus_int16)(c32))) >> 16 output have to be 32bit int */ #undef silk_SMLAWB -static inline opus_int32 silk_SMLAWB_armv5e(opus_int32 a, opus_int32 b, +static OPUS_INLINE opus_int32 silk_SMLAWB_armv5e(opus_int32 a, opus_int32 b, opus_int16 c) { int res; __asm__( "#silk_SMLAWB\n\t" "smlawb %0, %1, %2, %3\n\t" : "=r"(res) : "r"(b), "r"(c), "r"(a) ); return res; } #define silk_SMLAWB(a, b, c) (silk_SMLAWB_armv5e(a, b, c)) /* (a32 * (b32 >> 16)) >> 16 */ #undef silk_SMULWT -static inline opus_int32 silk_SMULWT_armv5e(opus_int32 a, opus_int32 b) +static OPUS_INLINE opus_int32 silk_SMULWT_armv5e(opus_int32 a, opus_int32 b) { int res; __asm__( "#silk_SMULWT\n\t" "smulwt %0, %1, %2\n\t" : "=r"(res) : "r"(a), "r"(b) ); return res; } #define silk_SMULWT(a, b) (silk_SMULWT_armv5e(a, b)) /* a32 + (b32 * (c32 >> 16)) >> 16 */ #undef silk_SMLAWT -static inline opus_int32 silk_SMLAWT_armv5e(opus_int32 a, opus_int32 b, +static OPUS_INLINE opus_int32 silk_SMLAWT_armv5e(opus_int32 a, opus_int32 b, opus_int32 c) { int res; __asm__( "#silk_SMLAWT\n\t" "smlawt %0, %1, %2, %3\n\t" : "=r"(res) : "r"(b), "r"(c), "r"(a) ); return res; } #define silk_SMLAWT(a, b, c) (silk_SMLAWT_armv5e(a, b, c)) /* (opus_int32)((opus_int16)(a3))) * (opus_int32)((opus_int16)(b32)) output have to be 32bit int */ #undef silk_SMULBB -static inline opus_int32 silk_SMULBB_armv5e(opus_int32 a, opus_int32 b) +static OPUS_INLINE opus_int32 silk_SMULBB_armv5e(opus_int32 a, opus_int32 b) { int res; __asm__( "#silk_SMULBB\n\t" "smulbb %0, %1, %2\n\t" : "=r"(res) : "%r"(a), "r"(b) ); return res; } #define silk_SMULBB(a, b) (silk_SMULBB_armv5e(a, b)) /* a32 + (opus_int32)((opus_int16)(b32)) * (opus_int32)((opus_int16)(c32)) output have to be 32bit int */ #undef silk_SMLABB -static inline opus_int32 silk_SMLABB_armv5e(opus_int32 a, opus_int32 b, +static OPUS_INLINE opus_int32 silk_SMLABB_armv5e(opus_int32 a, opus_int32 b, opus_int32 c) { int res; __asm__( "#silk_SMLABB\n\t" "smlabb %0, %1, %2, %3\n\t" : "=r"(res) : "%r"(b), "r"(c), "r"(a) ); return res; } #define silk_SMLABB(a, b, c) (silk_SMLABB_armv5e(a, b, c)) /* (opus_int32)((opus_int16)(a32)) * (b32 >> 16) */ #undef silk_SMULBT -static inline opus_int32 silk_SMULBT_armv5e(opus_int32 a, opus_int32 b) +static OPUS_INLINE opus_int32 silk_SMULBT_armv5e(opus_int32 a, opus_int32 b) { int res; __asm__( "#silk_SMULBT\n\t" "smulbt %0, %1, %2\n\t" : "=r"(res) : "r"(a), "r"(b) ); return res; } #define silk_SMULBT(a, b) (silk_SMULBT_armv5e(a, b)) /* a32 + (opus_int32)((opus_int16)(b32)) * (c32 >> 16) */ #undef silk_SMLABT -static inline opus_int32 silk_SMLABT_armv5e(opus_int32 a, opus_int32 b, +static OPUS_INLINE opus_int32 silk_SMLABT_armv5e(opus_int32 a, opus_int32 b, opus_int32 c) { int res; __asm__( "#silk_SMLABT\n\t" "smlabt %0, %1, %2, %3\n\t" : "=r"(res) : "r"(b), "r"(c), "r"(a) ); return res; } #define silk_SMLABT(a, b, c) (silk_SMLABT_armv5e(a, b, c)) /* add/subtract with output saturated */ #undef silk_ADD_SAT32 -static inline opus_int32 silk_ADD_SAT32_armv5e(opus_int32 a, opus_int32 b) +static OPUS_INLINE opus_int32 silk_ADD_SAT32_armv5e(opus_int32 a, opus_int32 b) { int res; __asm__( "#silk_ADD_SAT32\n\t" "qadd %0, %1, %2\n\t" : "=r"(res) : "%r"(a), "r"(b) ); return res; } #define silk_ADD_SAT32(a, b) (silk_ADD_SAT32_armv5e(a, b)) #undef silk_SUB_SAT32 -static inline opus_int32 silk_SUB_SAT32_armv5e(opus_int32 a, opus_int32 b) +static OPUS_INLINE opus_int32 silk_SUB_SAT32_armv5e(opus_int32 a, opus_int32 b) { int res; __asm__( "#silk_SUB_SAT32\n\t" "qsub %0, %1, %2\n\t" : "=r"(res) : "r"(a), "r"(b) ); return res; } #define silk_SUB_SAT32(a, b) (silk_SUB_SAT32_armv5e(a, b)) #undef silk_CLZ16 -static inline opus_int32 silk_CLZ16_armv5(opus_int16 in16) +static OPUS_INLINE opus_int32 silk_CLZ16_armv5(opus_int16 in16) { int res; __asm__( "#silk_CLZ16\n\t" "clz %0, %1;\n" : "=r"(res) : "r"(in16<<16|0x8000) ); return res; } #define silk_CLZ16(in16) (silk_CLZ16_armv5(in16)) #undef silk_CLZ32 -static inline opus_int32 silk_CLZ32_armv5(opus_int32 in32) +static OPUS_INLINE opus_int32 silk_CLZ32_armv5(opus_int32 in32) { int res; __asm__( "#silk_CLZ32\n\t" "clz %0, %1\n\t" : "=r"(res) : "r"(in32) );
--- a/media/libopus/silk/control.h +++ b/media/libopus/silk/control.h @@ -87,16 +87,19 @@ typedef struct { opus_int maxBits; /* I: Causes a smooth downmix to mono */ opus_int toMono; /* I: Opus encoder is allowing us to switch bandwidth */ opus_int opusCanSwitch; + /* I: Make frames as independent as possible (but still use LPC) */ + opus_int reducedDependency; + /* O: Internal sampling rate used, in Hertz; 8000/12000/16000 */ opus_int32 internalSampleRate; /* O: Flag that bandwidth switching is allowed (because low voice activity) */ opus_int allowBandwidthSwitch; /* O: Flag that SILK runs in WB mode without variable LP filter (use for switching between WB/SWB/FB) */ opus_int inWBmodeWithoutVariableLP;
--- a/media/libopus/silk/control_codec.c +++ b/media/libopus/silk/control_codec.c @@ -50,17 +50,17 @@ static opus_int silk_setup_fs( opus_int PacketSize_ms /* I */ ); static opus_int silk_setup_complexity( silk_encoder_state *psEncC, /* I/O */ opus_int Complexity /* I */ ); -static inline opus_int silk_setup_LBRR( +static OPUS_INLINE opus_int silk_setup_LBRR( silk_encoder_state *psEncC, /* I/O */ const opus_int32 TargetRate_bps /* I */ ); /* Control encoder */ opus_int silk_control_encoder( silk_encoder_state_Fxx *psEnc, /* I/O Pointer to Silk encoder state */ @@ -387,17 +387,17 @@ static opus_int silk_setup_complexity( silk_assert( psEncC->warping_Q16 <= 32767 ); silk_assert( psEncC->la_shape <= LA_SHAPE_MAX ); silk_assert( psEncC->shapeWinLength <= SHAPE_LPC_WIN_MAX ); silk_assert( psEncC->NLSF_MSVQ_Survivors <= NLSF_VQ_MAX_SURVIVORS ); return ret; } -static inline opus_int silk_setup_LBRR( +static OPUS_INLINE opus_int silk_setup_LBRR( silk_encoder_state *psEncC, /* I/O */ const opus_int32 TargetRate_bps /* I */ ) { opus_int ret = SILK_NO_ERROR; opus_int32 LBRR_rate_thres_bps; psEncC->LBRR_enabled = 0;
--- a/media/libopus/silk/dec_API.c +++ b/media/libopus/silk/dec_API.c @@ -300,17 +300,17 @@ opus_int silk_Decode( silk_memcpy( psDec->sStereo.sMid, &samplesOut1_tmp[ 0 ][ nSamplesOutDec ], 2 * sizeof( opus_int16 ) ); } /* Number of output samples */ *nSamplesOut = silk_DIV32( nSamplesOutDec * decControl->API_sampleRate, silk_SMULBB( channel_state[ 0 ].fs_kHz, 1000 ) ); /* Set up pointers to temp buffers */ ALLOC( samplesOut2_tmp, - decControl->nChannelsAPI == 2 ? *nSamplesOut : 0, opus_int16 ); + decControl->nChannelsAPI == 2 ? *nSamplesOut : ALLOC_NONE, opus_int16 ); if( decControl->nChannelsAPI == 2 ) { resample_out_ptr = samplesOut2_tmp; } else { resample_out_ptr = samplesOut; } for( n = 0; n < silk_min( decControl->nChannelsAPI, decControl->nChannelsInternal ); n++ ) {
--- a/media/libopus/silk/enc_API.c +++ b/media/libopus/silk/enc_API.c @@ -64,28 +64,29 @@ opus_int silk_Get_Encoder_Size( return ret; } /*************************/ /* Init or Reset encoder */ /*************************/ opus_int silk_InitEncoder( /* O Returns error code */ void *encState, /* I/O State */ + int arch, /* I Run-time architecture */ silk_EncControlStruct *encStatus /* O Encoder Status */ ) { silk_encoder *psEnc; opus_int n, ret = SILK_NO_ERROR; psEnc = (silk_encoder *)encState; /* Reset encoder */ silk_memset( psEnc, 0, sizeof( silk_encoder ) ); for( n = 0; n < ENCODER_NUM_CHANNELS; n++ ) { - if( ret += silk_init_encoder( &psEnc->state_Fxx[ n ] ) ) { + if( ret += silk_init_encoder( &psEnc->state_Fxx[ n ], arch ) ) { silk_assert( 0 ); } } psEnc->nChannelsAPI = 1; psEnc->nChannelsInternal = 1; /* Read control structure */ @@ -151,30 +152,35 @@ opus_int silk_Encode( opus_int nSamplesFromInput = 0, nSamplesFromInputMax; opus_int speech_act_thr_for_switch_Q8; opus_int32 TargetRate_bps, MStargetRates_bps[ 2 ], channelRate_bps, LBRR_symbol, sum; silk_encoder *psEnc = ( silk_encoder * )encState; VARDECL( opus_int16, buf ); opus_int transition, curr_block, tot_blocks; SAVE_STACK; + if (encControl->reducedDependency) + { + psEnc->state_Fxx[0].sCmn.first_frame_after_reset = 1; + psEnc->state_Fxx[1].sCmn.first_frame_after_reset = 1; + } psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded = psEnc->state_Fxx[ 1 ].sCmn.nFramesEncoded = 0; /* Check values in encoder control structure */ if( ( ret = check_control_input( encControl ) != 0 ) ) { silk_assert( 0 ); RESTORE_STACK; return ret; } encControl->switchReady = 0; if( encControl->nChannelsInternal > psEnc->nChannelsInternal ) { /* Mono -> Stereo transition: init state of second channel and stereo state */ - ret += silk_init_encoder( &psEnc->state_Fxx[ 1 ] ); + ret += silk_init_encoder( &psEnc->state_Fxx[ 1 ], psEnc->state_Fxx[ 0 ].sCmn.arch ); silk_memset( psEnc->sStereo.pred_prev_Q13, 0, sizeof( psEnc->sStereo.pred_prev_Q13 ) ); silk_memset( psEnc->sStereo.sSide, 0, sizeof( psEnc->sStereo.sSide ) ); psEnc->sStereo.mid_side_amp_Q0[ 0 ] = 0; psEnc->sStereo.mid_side_amp_Q0[ 1 ] = 1; psEnc->sStereo.mid_side_amp_Q0[ 2 ] = 0; psEnc->sStereo.mid_side_amp_Q0[ 3 ] = 1; psEnc->sStereo.width_prev_Q14 = 0; psEnc->sStereo.smth_width_Q14 = SILK_FIX_CONST( 1, 14 ); @@ -196,19 +202,18 @@ opus_int silk_Encode( /* Only accept input length of 10 ms */ if( nBlocksOf10ms != 1 ) { silk_assert( 0 ); RESTORE_STACK; return SILK_ENC_INPUT_INVALID_NO_OF_SAMPLES; } /* Reset Encoder */ for( n = 0; n < encControl->nChannelsInternal; n++ ) { - if( (ret = silk_init_encoder( &psEnc->state_Fxx[ n ] ) ) != 0 ) { - silk_assert( 0 ); - } + ret = silk_init_encoder( &psEnc->state_Fxx[ n ], psEnc->state_Fxx[ n ].sCmn.arch ); + silk_assert( !ret ); } tmp_payloadSize_ms = encControl->payloadSize_ms; encControl->payloadSize_ms = 10; tmp_complexity = encControl->complexity; encControl->complexity = 0; for( n = 0; n < encControl->nChannelsInternal; n++ ) { psEnc->state_Fxx[ n ].sCmn.controlled_since_last_payload = 0; psEnc->state_Fxx[ n ].sCmn.prefillFlag = 1;
--- a/media/libopus/silk/encode_pulses.c +++ b/media/libopus/silk/encode_pulses.c @@ -31,17 +31,17 @@ POSSIBILITY OF SUCH DAMAGE. #include "main.h" #include "stack_alloc.h" /*********************************************/ /* Encode quantization indices of excitation */ /*********************************************/ -static inline opus_int combine_and_check( /* return ok */ +static OPUS_INLINE opus_int combine_and_check( /* return ok */ opus_int *pulses_comb, /* O */ const opus_int *pulses_in, /* I */ opus_int max_pulses, /* I max value for sum of pulses */ opus_int len /* I number of output values */ ) { opus_int k, sum;
--- a/media/libopus/silk/fixed/autocorr_FIX.c +++ b/media/libopus/silk/fixed/autocorr_FIX.c @@ -33,15 +33,16 @@ POSSIBILITY OF SUCH DAMAGE. #include "celt_lpc.h" /* Compute autocorrelation */ void silk_autocorr( opus_int32 *results, /* O Result (length correlationCount) */ opus_int *scale, /* O Scaling of the correlation vector */ const opus_int16 *inputData, /* I Input data to correlate */ const opus_int inputDataSize, /* I Length of input */ - const opus_int correlationCount /* I Number of correlation taps to compute */ + const opus_int correlationCount, /* I Number of correlation taps to compute */ + int arch /* I Run-time architecture */ ) { opus_int corrCount; corrCount = silk_min_int( inputDataSize, correlationCount ); - *scale = _celt_autocorr(inputData, results, NULL, 0, corrCount-1, inputDataSize); + *scale = _celt_autocorr(inputData, results, NULL, 0, corrCount-1, inputDataSize, arch); }
--- a/media/libopus/silk/fixed/burg_modified_FIX.c +++ b/media/libopus/silk/fixed/burg_modified_FIX.c @@ -45,17 +45,18 @@ POSSIBILITY OF SUCH DAMAGE. void silk_burg_modified( opus_int32 *res_nrg, /* O Residual energy */ opus_int *res_nrg_Q, /* O Residual energy Q value */ opus_int32 A_Q16[], /* O Prediction coefficients (length order) */ const opus_int16 x[], /* I Input signal, length: nb_subfr * ( D + subfr_length ) */ const opus_int32 minInvGain_Q30, /* I Inverse of max prediction gain */ const opus_int subfr_length, /* I Input signal subframe length (incl. D preceding samples) */ const opus_int nb_subfr, /* I Number of subframes stacked in x */ - const opus_int D /* I Order */ + const opus_int D, /* I Order */ + int arch /* I Run-time architecture */ ) { opus_int k, n, s, lz, rshifts, rshifts_extra, reached_max_gain; opus_int32 C0, num, nrg, rc_Q31, invGain_Q30, Atmp_QA, Atmp1, tmp1, tmp2, x1, x2; const opus_int16 *x_ptr; opus_int32 C_first_row[ SILK_MAX_ORDER_LPC ]; opus_int32 C_last_row[ SILK_MAX_ORDER_LPC ]; opus_int32 Af_QA[ SILK_MAX_ORDER_LPC ]; @@ -93,17 +94,17 @@ void silk_burg_modified( silk_inner_prod16_aligned_64( x_ptr, x_ptr + n, subfr_length - n ), rshifts ); } } } else { for( s = 0; s < nb_subfr; s++ ) { int i; opus_int32 d; x_ptr = x + s * subfr_length; - celt_pitch_xcorr(x_ptr, x_ptr + 1, xcorr, subfr_length - D, D ); + celt_pitch_xcorr(x_ptr, x_ptr + 1, xcorr, subfr_length - D, D, arch ); for( n = 1; n < D + 1; n++ ) { for ( i = n + subfr_length - D, d = 0; i < subfr_length; i++ ) d = MAC16_16( d, x_ptr[ i ], x_ptr[ i - n ] ); xcorr[ n - 1 ] += d; } for( n = 1; n < D + 1; n++ ) { C_first_row[ n - 1 ] += silk_LSHIFT32( xcorr[ n - 1 ], -rshifts ); }
--- a/media/libopus/silk/fixed/encode_frame_FIX.c +++ b/media/libopus/silk/fixed/encode_frame_FIX.c @@ -29,17 +29,17 @@ POSSIBILITY OF SUCH DAMAGE. #include "config.h" #endif #include "main_FIX.h" #include "stack_alloc.h" #include "tuning_parameters.h" /* Low Bitrate Redundancy (LBRR) encoding. Reuse all parameters but encode with lower bitrate */ -static inline void silk_LBRR_encode_FIX( +static OPUS_INLINE void silk_LBRR_encode_FIX( silk_encoder_state_FIX *psEnc, /* I/O Pointer to Silk FIX encoder state */ silk_encoder_control_FIX *psEncCtrl, /* I/O Pointer to Silk FIX encoder control struct */ const opus_int32 xfw_Q3[], /* I Input signal */ opus_int condCoding /* I The type of conditional coding used so far for this frame */ ); void silk_encode_do_VAD_FIX( silk_encoder_state_FIX *psEnc /* I/O Pointer to Silk FIX encoder state */ @@ -127,22 +127,22 @@ opus_int silk_encode_frame_FIX( psEnc->sCmn.la_pitch + psEnc->sCmn.frame_length + psEnc->sCmn.ltp_mem_length, opus_int16 ); /* start of pitch LPC residual frame */ res_pitch_frame = res_pitch + psEnc->sCmn.ltp_mem_length; /*****************************************/ /* Find pitch lags, initial LPC analysis */ /*****************************************/ - silk_find_pitch_lags_FIX( psEnc, &sEncCtrl, res_pitch, x_frame ); + silk_find_pitch_lags_FIX( psEnc, &sEncCtrl, res_pitch, x_frame, psEnc->sCmn.arch ); /************************/ /* Noise shape analysis */ /************************/ - silk_noise_shape_analysis_FIX( psEnc, &sEncCtrl, res_pitch_frame, x_frame ); + silk_noise_shape_analysis_FIX( psEnc, &sEncCtrl, res_pitch_frame, x_frame, psEnc->sCmn.arch ); /***************************************************/ /* Find linear prediction coefficients (LPC + LTP) */ /***************************************************/ silk_find_pred_coefs_FIX( psEnc, &sEncCtrl, res_pitch, x_frame, condCoding ); /****************************************/ /* Process gains */ @@ -297,41 +297,41 @@ opus_int silk_encode_frame_FIX( gainsID = silk_gains_ID( psEnc->sCmn.indices.GainsIndices, psEnc->sCmn.nb_subfr ); } } /* Update input buffer */ silk_memmove( psEnc->x_buf, &psEnc->x_buf[ psEnc->sCmn.frame_length ], ( psEnc->sCmn.ltp_mem_length + LA_SHAPE_MS * psEnc->sCmn.fs_kHz ) * sizeof( opus_int16 ) ); - /* Parameters needed for next frame */ - psEnc->sCmn.prevLag = sEncCtrl.pitchL[ psEnc->sCmn.nb_subfr - 1 ]; - psEnc->sCmn.prevSignalType = psEnc->sCmn.indices.signalType; - /* Exit without entropy coding */ if( psEnc->sCmn.prefillFlag ) { /* No payload */ *pnBytesOut = 0; RESTORE_STACK; return ret; } + /* Parameters needed for next frame */ + psEnc->sCmn.prevLag = sEncCtrl.pitchL[ psEnc->sCmn.nb_subfr - 1 ]; + psEnc->sCmn.prevSignalType = psEnc->sCmn.indices.signalType; + /****************************************/ /* Finalize payload */ /****************************************/ psEnc->sCmn.first_frame_after_reset = 0; /* Payload size */ *pnBytesOut = silk_RSHIFT( ec_tell( psRangeEnc ) + 7, 3 ); RESTORE_STACK; return ret; } /* Low-Bitrate Redundancy (LBRR) encoding. Reuse all parameters but encode excitation at lower bitrate */ -static inline void silk_LBRR_encode_FIX( +static OPUS_INLINE void silk_LBRR_encode_FIX( silk_encoder_state_FIX *psEnc, /* I/O Pointer to Silk FIX encoder state */ silk_encoder_control_FIX *psEncCtrl, /* I/O Pointer to Silk FIX encoder control struct */ const opus_int32 xfw_Q3[], /* I Input signal */ opus_int condCoding /* I The type of conditional coding used so far for this frame */ ) { opus_int32 TempGains_Q16[ MAX_NB_SUBFR ]; SideInfoIndices *psIndices_LBRR = &psEnc->sCmn.indices_LBRR[ psEnc->sCmn.nFramesEncoded ];
--- a/media/libopus/silk/fixed/find_LPC_FIX.c +++ b/media/libopus/silk/fixed/find_LPC_FIX.c @@ -55,23 +55,23 @@ void silk_find_LPC_FIX( SAVE_STACK; subfr_length = psEncC->subfr_length + psEncC->predictLPCOrder; /* Default: no interpolation */ psEncC->indices.NLSFInterpCoef_Q2 = 4; /* Burg AR analysis for the full frame */ - silk_burg_modified( &res_nrg, &res_nrg_Q, a_Q16, x, minInvGain_Q30, subfr_length, psEncC->nb_subfr, psEncC->predictLPCOrder ); + silk_burg_modified( &res_nrg, &res_nrg_Q, a_Q16, x, minInvGain_Q30, subfr_length, psEncC->nb_subfr, psEncC->predictLPCOrder, psEncC->arch ); if( psEncC->useInterpolatedNLSFs && !psEncC->first_frame_after_reset && psEncC->nb_subfr == MAX_NB_SUBFR ) { VARDECL( opus_int16, LPC_res ); /* Optimal solution for last 10 ms */ - silk_burg_modified( &res_tmp_nrg, &res_tmp_nrg_Q, a_tmp_Q16, x + 2 * subfr_length, minInvGain_Q30, subfr_length, 2, psEncC->predictLPCOrder ); + silk_burg_modified( &res_tmp_nrg, &res_tmp_nrg_Q, a_tmp_Q16, x + 2 * subfr_length, minInvGain_Q30, subfr_length, 2, psEncC->predictLPCOrder, psEncC->arch ); /* subtract residual energy here, as that's easier than adding it to the */ /* residual energy of the first 10 ms in each iteration of the search below */ shift = res_tmp_nrg_Q - res_nrg_Q; if( shift >= 0 ) { if( shift < 32 ) { res_nrg = res_nrg - silk_RSHIFT( res_tmp_nrg, shift ); }
--- a/media/libopus/silk/fixed/find_pitch_lags_FIX.c +++ b/media/libopus/silk/fixed/find_pitch_lags_FIX.c @@ -33,17 +33,18 @@ POSSIBILITY OF SUCH DAMAGE. #include "stack_alloc.h" #include "tuning_parameters.h" /* Find pitch lags */ void silk_find_pitch_lags_FIX( silk_encoder_state_FIX *psEnc, /* I/O encoder state */ silk_encoder_control_FIX *psEncCtrl, /* I/O encoder control */ opus_int16 res[], /* O residual */ - const opus_int16 x[] /* I Speech signal */ + const opus_int16 x[], /* I Speech signal */ + int arch /* I Run-time architecture */ ) { opus_int buf_len, i, scale; opus_int32 thrhld_Q13, res_nrg; const opus_int16 *x_buf, *x_buf_ptr; VARDECL( opus_int16, Wsig ); opus_int16 *Wsig_ptr; opus_int32 auto_corr[ MAX_FIND_PITCH_LPC_ORDER + 1 ]; @@ -81,17 +82,17 @@ void silk_find_pitch_lags_FIX( silk_memcpy( Wsig_ptr, x_buf_ptr, ( psEnc->sCmn.pitch_LPC_win_length - silk_LSHIFT( psEnc->sCmn.la_pitch, 1 ) ) * sizeof( opus_int16 ) ); /* Last LA_LTP samples */ Wsig_ptr += psEnc->sCmn.pitch_LPC_win_length - silk_LSHIFT( psEnc->sCmn.la_pitch, 1 ); x_buf_ptr += psEnc->sCmn.pitch_LPC_win_length - silk_LSHIFT( psEnc->sCmn.la_pitch, 1 ); silk_apply_sine_window( Wsig_ptr, x_buf_ptr, 2, psEnc->sCmn.la_pitch ); /* Calculate autocorrelation sequence */ - silk_autocorr( auto_corr, &scale, Wsig, psEnc->sCmn.pitch_LPC_win_length, psEnc->sCmn.pitchEstimationLPCOrder + 1 ); + silk_autocorr( auto_corr, &scale, Wsig, psEnc->sCmn.pitch_LPC_win_length, psEnc->sCmn.pitchEstimationLPCOrder + 1, arch ); /* Add white noise, as fraction of energy */ auto_corr[ 0 ] = silk_SMLAWB( auto_corr[ 0 ], auto_corr[ 0 ], SILK_FIX_CONST( FIND_PITCH_WHITE_NOISE_FRACTION, 16 ) ) + 1; /* Calculate the reflection coefficients using schur */ res_nrg = silk_schur( rc_Q15, auto_corr, psEnc->sCmn.pitchEstimationLPCOrder ); /* Prediction gain */ @@ -122,17 +123,18 @@ void silk_find_pitch_lags_FIX( thrhld_Q13 = silk_SMLAWB( thrhld_Q13, SILK_FIX_CONST( -0.1, 14 ), psEnc->sCmn.input_tilt_Q15 ); thrhld_Q13 = silk_SAT16( thrhld_Q13 ); /*****************************************/ /* Call pitch estimator */ /*****************************************/ if( silk_pitch_analysis_core( res, psEncCtrl->pitchL, &psEnc->sCmn.indices.lagIndex, &psEnc->sCmn.indices.contourIndex, &psEnc->LTPCorr_Q15, psEnc->sCmn.prevLag, psEnc->sCmn.pitchEstimationThreshold_Q16, - (opus_int)thrhld_Q13, psEnc->sCmn.fs_kHz, psEnc->sCmn.pitchEstimationComplexity, psEnc->sCmn.nb_subfr ) == 0 ) + (opus_int)thrhld_Q13, psEnc->sCmn.fs_kHz, psEnc->sCmn.pitchEstimationComplexity, psEnc->sCmn.nb_subfr, + psEnc->sCmn.arch) == 0 ) { psEnc->sCmn.indices.signalType = TYPE_VOICED; } else { psEnc->sCmn.indices.signalType = TYPE_UNVOICED; } } else { silk_memset( psEncCtrl->pitchL, 0, sizeof( psEncCtrl->pitchL ) ); psEnc->sCmn.indices.lagIndex = 0;
--- a/media/libopus/silk/fixed/find_pred_coefs_FIX.c +++ b/media/libopus/silk/fixed/find_pred_coefs_FIX.c @@ -88,17 +88,17 @@ void silk_find_pred_coefs_FIX( /* LTP analysis */ silk_find_LTP_FIX( psEncCtrl->LTPCoef_Q14, WLTP, &psEncCtrl->LTPredCodGain_Q7, res_pitch, psEncCtrl->pitchL, Wght_Q15, psEnc->sCmn.subfr_length, psEnc->sCmn.nb_subfr, psEnc->sCmn.ltp_mem_length, LTP_corrs_rshift ); /* Quantize LTP gain parameters */ silk_quant_LTP_gains( psEncCtrl->LTPCoef_Q14, psEnc->sCmn.indices.LTPIndex, &psEnc->sCmn.indices.PERIndex, - WLTP, psEnc->sCmn.mu_LTP_Q9, psEnc->sCmn.LTPQuantLowComplexity, psEnc->sCmn.nb_subfr); + &psEnc->sCmn.sum_log_gain_Q7, WLTP, psEnc->sCmn.mu_LTP_Q9, psEnc->sCmn.LTPQuantLowComplexity, psEnc->sCmn.nb_subfr); /* Control LTP scaling */ silk_LTP_scale_ctrl_FIX( psEnc, psEncCtrl, condCoding ); /* Create LTP residual */ silk_LTP_analysis_filter_FIX( LPC_in_pre, x - psEnc->sCmn.predictLPCOrder, psEncCtrl->LTPCoef_Q14, psEncCtrl->pitchL, invGains_Q16, psEnc->sCmn.subfr_length, psEnc->sCmn.nb_subfr, psEnc->sCmn.predictLPCOrder ); @@ -113,16 +113,17 @@ void silk_find_pred_coefs_FIX( silk_scale_copy_vector16( x_pre_ptr, x_ptr, invGains_Q16[ i ], psEnc->sCmn.subfr_length + psEnc->sCmn.predictLPCOrder ); x_pre_ptr += psEnc->sCmn.subfr_length + psEnc->sCmn.predictLPCOrder; x_ptr += psEnc->sCmn.subfr_length; } silk_memset( psEncCtrl->LTPCoef_Q14, 0, psEnc->sCmn.nb_subfr * LTP_ORDER * sizeof( opus_int16 ) ); psEncCtrl->LTPredCodGain_Q7 = 0; + psEnc->sCmn.sum_log_gain_Q7 = 0; } /* Limit on total predictive coding gain */ if( psEnc->sCmn.first_frame_after_reset ) { minInvGain_Q30 = SILK_FIX_CONST( 1.0f / MAX_PREDICTION_POWER_GAIN_AFTER_RESET, 30 ); } else { minInvGain_Q30 = silk_log2lin( silk_SMLAWB( 16 << 7, (opus_int32)psEncCtrl->LTPredCodGain_Q7, SILK_FIX_CONST( 1.0 / 3, 16 ) ) ); /* Q16 */ minInvGain_Q30 = silk_DIV32_varQ( minInvGain_Q30,
--- a/media/libopus/silk/fixed/main_FIX.h +++ b/media/libopus/silk/fixed/main_FIX.h @@ -68,17 +68,18 @@ opus_int silk_encode_frame_FIX( ec_enc *psRangeEnc, /* I/O compressor data structure */ opus_int condCoding, /* I The type of conditional coding to use */ opus_int maxBits, /* I If > 0: maximum number of output bits */ opus_int useCBR /* I Flag to force constant-bitrate operation */ ); /* Initializes the Silk encoder state */ opus_int silk_init_encoder( - silk_encoder_state_Fxx *psEnc /* I/O Pointer to Silk FIX encoder state */ + silk_encoder_state_Fxx *psEnc, /* I/O Pointer to Silk FIX encoder state */ + int arch /* I Run-time architecture */ ); /* Control the Silk encoder */ opus_int silk_control_encoder( silk_encoder_state_Fxx *psEnc, /* I/O Pointer to Silk encoder state */ silk_EncControlStruct *encControl, /* I Control structure */ const opus_int32 TargetRate_bps, /* I Target max bitrate (bps) */ const opus_int allow_bw_switch, /* I Flag to allow switching audio bandwidth */ @@ -99,17 +100,18 @@ void silk_prefilter_FIX( /**************************/ /* Noise shaping analysis */ /**************************/ /* Compute noise shaping coefficients and initial gain values */ void silk_noise_shape_analysis_FIX( silk_encoder_state_FIX *psEnc, /* I/O Encoder state FIX */ silk_encoder_control_FIX *psEncCtrl, /* I/O Encoder control FIX */ const opus_int16 *pitch_res, /* I LPC residual from pitch analysis */ - const opus_int16 *x /* I Input signal [ frame_length + la_shape ] */ + const opus_int16 *x, /* I Input signal [ frame_length + la_shape ] */ + int arch /* I Run-time architecture */ ); /* Autocorrelations for a warped frequency axis */ void silk_warped_autocorrelation_FIX( opus_int32 *corr, /* O Result [order + 1] */ opus_int *scale, /* O Scaling of the correlation vector */ const opus_int16 *input, /* I Input data to correlate */ const opus_int warping_Q16, /* I Warping coefficient */ @@ -127,17 +129,18 @@ void silk_LTP_scale_ctrl_FIX( /**********************************************/ /* Prediction Analysis */ /**********************************************/ /* Find pitch lags */ void silk_find_pitch_lags_FIX( silk_encoder_state_FIX *psEnc, /* I/O encoder state */ silk_encoder_control_FIX *psEncCtrl, /* I/O encoder control */ opus_int16 res[], /* O residual */ - const opus_int16 x[] /* I Speech signal */ + const opus_int16 x[], /* I Speech signal */ + int arch /* I Run-time architecture */ ); /* Find LPC and LTP coefficients */ void silk_find_pred_coefs_FIX( silk_encoder_state_FIX *psEnc, /* I/O encoder state */ silk_encoder_control_FIX *psEncCtrl, /* I/O encoder control */ const opus_int16 res_pitch[], /* I Residual from pitch analysis */ const opus_int16 x[], /* I Speech signal */
--- a/media/libopus/silk/fixed/noise_shape_analysis_FIX.c +++ b/media/libopus/silk/fixed/noise_shape_analysis_FIX.c @@ -32,17 +32,17 @@ POSSIBILITY OF SUCH DAMAGE. #include "main_FIX.h" #include "stack_alloc.h" #include "tuning_parameters.h" /* Compute gain to make warped filter coefficients have a zero mean log frequency response on a */ /* non-warped frequency scale. (So that it can be implemented with a minimum-phase monic filter.) */ /* Note: A monic filter is one with the first coefficient equal to 1.0. In Silk we omit the first */ /* coefficient in an array of coefficients, for monic filters. */ -static inline opus_int32 warped_gain( /* gain in Q16*/ +static OPUS_INLINE opus_int32 warped_gain( /* gain in Q16*/ const opus_int32 *coefs_Q24, opus_int lambda_Q16, opus_int order ) { opus_int i; opus_int32 gain_Q24; lambda_Q16 = -lambda_Q16; @@ -51,17 +51,17 @@ static inline opus_int32 warped_gain( /* gain_Q24 = silk_SMLAWB( coefs_Q24[ i ], gain_Q24, lambda_Q16 ); } gain_Q24 = silk_SMLAWB( SILK_FIX_CONST( 1.0, 24 ), gain_Q24, -lambda_Q16 ); return silk_INVERSE32_varQ( gain_Q24, 40 ); } /* Convert warped filter coefficients to monic pseudo-warped coefficients and limit maximum */ /* amplitude of monic warped coefficients by using bandwidth expansion on the true coefficients */ -static inline void limit_warped_coefs( +static OPUS_INLINE void limit_warped_coefs( opus_int32 *coefs_syn_Q24, opus_int32 *coefs_ana_Q24, opus_int lambda_Q16, opus_int32 limit_Q24, opus_int order ) { opus_int i, iter, ind = 0; opus_int32 tmp, maxabs_Q24, chirp_Q16, gain_syn_Q16, gain_ana_Q16; @@ -140,17 +140,18 @@ static inline void limit_warped_coefs( /**************************************************************/ /* Compute noise shaping coefficients and initial gain values */ /**************************************************************/ void silk_noise_shape_analysis_FIX( silk_encoder_state_FIX *psEnc, /* I/O Encoder state FIX */ silk_encoder_control_FIX *psEncCtrl, /* I/O Encoder control FIX */ const opus_int16 *pitch_res, /* I LPC residual from pitch analysis */ - const opus_int16 *x /* I Input signal [ frame_length + la_shape ] */ + const opus_int16 *x, /* I Input signal [ frame_length + la_shape ] */ + int arch /* I Run-time architecture */ ) { silk_shape_state_FIX *psShapeSt = &psEnc->sShape; opus_int k, i, nSamples, Qnrg, b_Q14, warping_Q16, scale = 0; opus_int32 SNR_adj_dB_Q7, HarmBoost_Q16, HarmShapeGain_Q16, Tilt_Q16, tmp32; opus_int32 nrg, pre_nrg_Q30, log_energy_Q7, log_energy_prev_Q7, energy_variation_Q7; opus_int32 delta_Q16, BWExp1_Q16, BWExp2_Q16, gain_mult_Q16, gain_add_Q16, strength_Q16, b_Q8; opus_int32 auto_corr[ MAX_SHAPE_LPC_ORDER + 1 ]; @@ -276,17 +277,17 @@ void silk_noise_shape_analysis_FIX( /* Update pointer: next LPC analysis block */ x_ptr += psEnc->sCmn.subfr_length; if( psEnc->sCmn.warping_Q16 > 0 ) { /* Calculate warped auto correlation */ silk_warped_autocorrelation_FIX( auto_corr, &scale, x_windowed, warping_Q16, psEnc->sCmn.shapeWinLength, psEnc->sCmn.shapingLPCOrder ); } else { /* Calculate regular auto correlation */ - silk_autocorr( auto_corr, &scale, x_windowed, psEnc->sCmn.shapeWinLength, psEnc->sCmn.shapingLPCOrder + 1 ); + silk_autocorr( auto_corr, &scale, x_windowed, psEnc->sCmn.shapeWinLength, psEnc->sCmn.shapingLPCOrder + 1, arch ); } /* Add white noise, as a fraction of energy */ auto_corr[0] = silk_ADD32( auto_corr[0], silk_max_32( silk_SMULWB( silk_RSHIFT( auto_corr[ 0 ], 4 ), SILK_FIX_CONST( SHAPE_WHITE_NOISE_FRACTION, 20 ) ), 1 ) ); /* Calculate the reflection coefficients using schur */ nrg = silk_schur64( refl_coef_Q16, auto_corr, psEnc->sCmn.shapingLPCOrder );
--- a/media/libopus/silk/fixed/pitch_analysis_core_FIX.c +++ b/media/libopus/silk/fixed/pitch_analysis_core_FIX.c @@ -57,17 +57,18 @@ typedef opus_int32 silk_pe_stage3_vals[ /* Internally used functions */ /************************************************************/ static void silk_P_Ana_calc_corr_st3( silk_pe_stage3_vals cross_corr_st3[], /* O 3 DIM correlation array */ const opus_int16 frame[], /* I vector to correlate */ opus_int start_lag, /* I lag offset to search around */ opus_int sf_length, /* I length of a 5 ms subframe */ opus_int nb_subfr, /* I number of subframes */ - opus_int complexity /* I Complexity setting */ + opus_int complexity, /* I Complexity setting */ + int arch /* I Run-time architecture */ ); static void silk_P_Ana_calc_energy_st3( silk_pe_stage3_vals energies_st3[], /* O 3 DIM energy array */ const opus_int16 frame[], /* I vector to calc energy in */ opus_int start_lag, /* I lag offset to search around */ opus_int sf_length, /* I length of one 5 ms subframe */ opus_int nb_subfr, /* I number of subframes */ @@ -83,17 +84,18 @@ opus_int silk_pitch_analysis_core( opus_int16 *lagIndex, /* O Lag Index */ opus_int8 *contourIndex, /* O Pitch contour Index */ opus_int *LTPCorr_Q15, /* I/O Normalized correlation; input: value from previous frame */ opus_int prevLag, /* I Last lag of previous frame; set to zero is unvoiced */ const opus_int32 search_thres1_Q16, /* I First stage threshold for lag candidates 0 - 1 */ const opus_int search_thres2_Q13, /* I Final threshold for lag candidates 0 - 1 */ const opus_int Fs_kHz, /* I Sample frequency (kHz) */ const opus_int complexity, /* I Complexity setting, 0-2, where 2 is highest */ - const opus_int nb_subfr /* I number of 5 ms subframes */ + const opus_int nb_subfr, /* I number of 5 ms subframes */ + int arch /* I Run-time architecture */ ) { VARDECL( opus_int16, frame_8kHz ); VARDECL( opus_int16, frame_4kHz ); opus_int32 filt_state[ 6 ]; const opus_int16 *input_frame_ptr; opus_int i, k, d, j; VARDECL( opus_int16, C ); @@ -184,17 +186,17 @@ opus_int silk_pitch_analysis_core( silk_assert( target_ptr + SF_LENGTH_8KHZ <= frame_4kHz + frame_length_4kHz ); basis_ptr = target_ptr - MIN_LAG_4KHZ; /* Check that we are within range of the array */ silk_assert( basis_ptr >= frame_4kHz ); silk_assert( basis_ptr + SF_LENGTH_8KHZ <= frame_4kHz + frame_length_4kHz ); - celt_pitch_xcorr( target_ptr, target_ptr - MAX_LAG_4KHZ, xcorr32, SF_LENGTH_8KHZ, MAX_LAG_4KHZ - MIN_LAG_4KHZ + 1 ); + celt_pitch_xcorr( target_ptr, target_ptr - MAX_LAG_4KHZ, xcorr32, SF_LENGTH_8KHZ, MAX_LAG_4KHZ - MIN_LAG_4KHZ + 1, arch ); /* Calculate first vector products before loop */ cross_corr = xcorr32[ MAX_LAG_4KHZ - MIN_LAG_4KHZ ]; normalizer = silk_inner_prod_aligned( target_ptr, target_ptr, SF_LENGTH_8KHZ ); normalizer = silk_ADD32( normalizer, silk_inner_prod_aligned( basis_ptr, basis_ptr, SF_LENGTH_8KHZ ) ); normalizer = silk_ADD32( normalizer, silk_SMULBB( SF_LENGTH_8KHZ, 4000 ) ); matrix_ptr( C, k, 0, CSTRIDE_4KHZ ) = @@ -460,17 +462,17 @@ opus_int silk_pitch_analysis_core( if( Fs_kHz > 8 ) { VARDECL( opus_int16, scratch_mem ); /***************************************************************************/ /* Scale input signal down to avoid correlations measures from overflowing */ /***************************************************************************/ /* find scaling as max scaling for each subframe */ silk_sum_sqr_shift( &energy, &shift, frame, frame_length ); - ALLOC( scratch_mem, shift > 0 ? frame_length : 0, opus_int16 ); + ALLOC( scratch_mem, shift > 0 ? frame_length : ALLOC_NONE, opus_int16 ); if( shift > 0 ) { /* Move signal to scratch mem because the input signal should be unchanged */ shift = silk_RSHIFT( shift, 1 ); for( i = 0; i < frame_length; i++ ) { scratch_mem[ i ] = silk_RSHIFT( frame[ i ], shift ); } input_frame_ptr = scratch_mem; } else { @@ -511,17 +513,17 @@ opus_int silk_pitch_analysis_core( nb_cbk_search = PE_NB_CBKS_STAGE3_10MS; cbk_size = PE_NB_CBKS_STAGE3_10MS; Lag_CB_ptr = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ]; } /* Calculate the correlations and energies needed in stage 3 */ ALLOC( energies_st3, nb_subfr * nb_cbk_search, silk_pe_stage3_vals ); ALLOC( cross_corr_st3, nb_subfr * nb_cbk_search, silk_pe_stage3_vals ); - silk_P_Ana_calc_corr_st3( cross_corr_st3, input_frame_ptr, start_lag, sf_length, nb_subfr, complexity ); + silk_P_Ana_calc_corr_st3( cross_corr_st3, input_frame_ptr, start_lag, sf_length, nb_subfr, complexity, arch ); silk_P_Ana_calc_energy_st3( energies_st3, input_frame_ptr, start_lag, sf_length, nb_subfr, complexity ); lag_counter = 0; silk_assert( lag == silk_SAT16( lag ) ); contour_bias_Q15 = silk_DIV32_16( SILK_FIX_CONST( PE_FLATCONTOUR_BIAS, 15 ), lag ); target_ptr = &input_frame_ptr[ PE_LTP_MEM_LENGTH_MS * Fs_kHz ]; energy_target = silk_ADD32( silk_inner_prod_aligned( target_ptr, target_ptr, nb_subfr * sf_length ), 1 ); @@ -592,17 +594,18 @@ opus_int silk_pitch_analysis_core( * case 4*12*5 = 240 correlations, but more likely around 120. ***********************************************************************/ static void silk_P_Ana_calc_corr_st3( silk_pe_stage3_vals cross_corr_st3[], /* O 3 DIM correlation array */ const opus_int16 frame[], /* I vector to correlate */ opus_int start_lag, /* I lag offset to search around */ opus_int sf_length, /* I length of a 5 ms subframe */ opus_int nb_subfr, /* I number of subframes */ - opus_int complexity /* I Complexity setting */ + opus_int complexity, /* I Complexity setting */ + int arch /* I Run-time architecture */ ) { const opus_int16 *target_ptr; opus_int i, j, k, lag_counter, lag_low, lag_high; opus_int nb_cbk_search, delta, idx, cbk_size; VARDECL( opus_int32, scratch_mem ); VARDECL( opus_int32, xcorr32 ); const opus_int8 *Lag_range_ptr, *Lag_CB_ptr; @@ -629,17 +632,17 @@ static void silk_P_Ana_calc_corr_st3( target_ptr = &frame[ silk_LSHIFT( sf_length, 2 ) ]; /* Pointer to middle of frame */ for( k = 0; k < nb_subfr; k++ ) { lag_counter = 0; /* Calculate the correlations for each subframe */ lag_low = matrix_ptr( Lag_range_ptr, k, 0, 2 ); lag_high = matrix_ptr( Lag_range_ptr, k, 1, 2 ); silk_assert(lag_high-lag_low+1 <= SCRATCH_SIZE); - celt_pitch_xcorr( target_ptr, target_ptr - start_lag - lag_high, xcorr32, sf_length, lag_high - lag_low + 1 ); + celt_pitch_xcorr( target_ptr, target_ptr - start_lag - lag_high, xcorr32, sf_length, lag_high - lag_low + 1, arch ); for( j = lag_low; j <= lag_high; j++ ) { silk_assert( lag_counter < SCRATCH_SIZE ); scratch_mem[ lag_counter ] = xcorr32[ lag_high - j ]; lag_counter++; } delta = matrix_ptr( Lag_range_ptr, k, 0, 2 ); for( i = 0; i < nb_cbk_search; i++ ) {
--- a/media/libopus/silk/fixed/prefilter_FIX.c +++ b/media/libopus/silk/fixed/prefilter_FIX.c @@ -29,17 +29,17 @@ POSSIBILITY OF SUCH DAMAGE. #include "config.h" #endif #include "main_FIX.h" #include "stack_alloc.h" #include "tuning_parameters.h" /* Prefilter for finding Quantizer input signal */ -static inline void silk_prefilt_FIX( +static OPUS_INLINE void silk_prefilt_FIX( silk_prefilter_state_FIX *P, /* I/O state */ opus_int32 st_res_Q12[], /* I short term residual signal */ opus_int32 xw_Q3[], /* O prefiltered signal */ opus_int32 HarmShapeFIRPacked_Q12, /* I Harmonic shaping coeficients */ opus_int Tilt_Q14, /* I Tilt shaping coeficient */ opus_int32 LF_shp_Q14, /* I Low-frequancy shaping coeficients */ opus_int lag, /* I Lag for harmonic shaping */ opus_int length /* I Length of signals */ @@ -151,17 +151,17 @@ void silk_prefilter_FIX( pxw_Q3 += psEnc->sCmn.subfr_length; } P->lagPrev = psEncCtrl->pitchL[ psEnc->sCmn.nb_subfr - 1 ]; RESTORE_STACK; } /* Prefilter for finding Quantizer input signal */ -static inline void silk_prefilt_FIX( +static OPUS_INLINE void silk_prefilt_FIX( silk_prefilter_state_FIX *P, /* I/O state */ opus_int32 st_res_Q12[], /* I short term residual signal */ opus_int32 xw_Q3[], /* O prefiltered signal */ opus_int32 HarmShapeFIRPacked_Q12, /* I Harmonic shaping coeficients */ opus_int Tilt_Q14, /* I Tilt shaping coeficient */ opus_int32 LF_shp_Q14, /* I Low-frequancy shaping coeficients */ opus_int lag, /* I Lag for harmonic shaping */ opus_int length /* I Length of signals */
--- a/media/libopus/silk/fixed/solve_LS_FIX.c +++ b/media/libopus/silk/fixed/solve_LS_FIX.c @@ -38,40 +38,40 @@ POSSIBILITY OF SUCH DAMAGE. /*****************************/ typedef struct { opus_int32 Q36_part; opus_int32 Q48_part; } inv_D_t; /* Factorize square matrix A into LDL form */ -static inline void silk_LDL_factorize_FIX( +static OPUS_INLINE void silk_LDL_factorize_FIX( opus_int32 *A, /* I/O Pointer to Symetric Square Matrix */ opus_int M, /* I Size of Matrix */ opus_int32 *L_Q16, /* I/O Pointer to Square Upper triangular Matrix */ inv_D_t *inv_D /* I/O Pointer to vector holding inverted diagonal elements of D */ ); /* Solve Lx = b, when L is lower triangular and has ones on the diagonal */ -static inline void silk_LS_SolveFirst_FIX( +static OPUS_INLINE void silk_LS_SolveFirst_FIX( const opus_int32 *L_Q16, /* I Pointer to Lower Triangular Matrix */ opus_int M, /* I Dim of Matrix equation */ const opus_int32 *b, /* I b Vector */ opus_int32 *x_Q16 /* O x Vector */ ); /* Solve L^t*x = b, where L is lower triangular with ones on the diagonal */ -static inline void silk_LS_SolveLast_FIX( +static OPUS_INLINE void silk_LS_SolveLast_FIX( const opus_int32 *L_Q16, /* I Pointer to Lower Triangular Matrix */ const opus_int M, /* I Dim of Matrix equation */ const opus_int32 *b, /* I b Vector */ opus_int32 *x_Q16 /* O x Vector */ ); -static inline void silk_LS_divide_Q16_FIX( +static OPUS_INLINE void silk_LS_divide_Q16_FIX( opus_int32 T[], /* I/O Numenator vector */ inv_D_t *inv_D, /* I 1 / D vector */ opus_int M /* I dimension */ ); /* Solves Ax = b, assuming A is symmetric */ void silk_solve_LDL_FIX( opus_int32 *A, /* I Pointer to symetric square matrix A */ @@ -108,17 +108,17 @@ void silk_solve_LDL_FIX( /**************************************************** x = inv(L') * inv(D) * Y *****************************************************/ silk_LS_SolveLast_FIX( L_Q16, M, Y, x_Q16 ); RESTORE_STACK; } -static inline void silk_LDL_factorize_FIX( +static OPUS_INLINE void silk_LDL_factorize_FIX( opus_int32 *A, /* I/O Pointer to Symetric Square Matrix */ opus_int M, /* I Size of Matrix */ opus_int32 *L_Q16, /* I/O Pointer to Square Upper triangular Matrix */ inv_D_t *inv_D /* I/O Pointer to vector holding inverted diagonal elements of D */ ) { opus_int i, j, k, status, loop_count; const opus_int32 *ptr1, *ptr2; @@ -180,17 +180,17 @@ static inline void silk_LDL_factorize_FI ptr2 += M; } } } silk_assert( status == 0 ); } -static inline void silk_LS_divide_Q16_FIX( +static OPUS_INLINE void silk_LS_divide_Q16_FIX( opus_int32 T[], /* I/O Numenator vector */ inv_D_t *inv_D, /* I 1 / D vector */ opus_int M /* I dimension */ ) { opus_int i; opus_int32 tmp_32; opus_int32 one_div_diag_Q36, one_div_diag_Q48; @@ -200,17 +200,17 @@ static inline void silk_LS_divide_Q16_FI one_div_diag_Q48 = inv_D[ i ].Q48_part; tmp_32 = T[ i ]; T[ i ] = silk_ADD32( silk_SMMUL( tmp_32, one_div_diag_Q48 ), silk_RSHIFT( silk_SMULWW( tmp_32, one_div_diag_Q36 ), 4 ) ); } } /* Solve Lx = b, when L is lower triangular and has ones on the diagonal */ -static inline void silk_LS_SolveFirst_FIX( +static OPUS_INLINE void silk_LS_SolveFirst_FIX( const opus_int32 *L_Q16, /* I Pointer to Lower Triangular Matrix */ opus_int M, /* I Dim of Matrix equation */ const opus_int32 *b, /* I b Vector */ opus_int32 *x_Q16 /* O x Vector */ ) { opus_int i, j; const opus_int32 *ptr32; @@ -222,17 +222,17 @@ static inline void silk_LS_SolveFirst_FI for( j = 0; j < i; j++ ) { tmp_32 = silk_SMLAWW( tmp_32, ptr32[ j ], x_Q16[ j ] ); } x_Q16[ i ] = silk_SUB32( b[ i ], tmp_32 ); } } /* Solve L^t*x = b, where L is lower triangular with ones on the diagonal */ -static inline void silk_LS_SolveLast_FIX( +static OPUS_INLINE void silk_LS_SolveLast_FIX( const opus_int32 *L_Q16, /* I Pointer to Lower Triangular Matrix */ const opus_int M, /* I Dim of Matrix equation */ const opus_int32 *b, /* I b Vector */ opus_int32 *x_Q16 /* O x Vector */ ) { opus_int i, j; const opus_int32 *ptr32;
--- a/media/libopus/silk/float/LPC_analysis_filter_FLP.c +++ b/media/libopus/silk/float/LPC_analysis_filter_FLP.c @@ -35,17 +35,17 @@ POSSIBILITY OF SUCH DAMAGE. /************************************************/ /* LPC analysis filter */ /* NB! State is kept internally and the */ /* filter always starts with zero state */ /* first Order output samples are set to zero */ /************************************************/ /* 16th order LPC analysis filter, does not write first 16 samples */ -static inline void silk_LPC_analysis_filter16_FLP( +static OPUS_INLINE void silk_LPC_analysis_filter16_FLP( silk_float r_LPC[], /* O LPC residual signal */ const silk_float PredCoef[], /* I LPC coefficients */ const silk_float s[], /* I Input signal */ const opus_int length /* I Length of input signal */ ) { opus_int ix; silk_float LPC_pred; @@ -73,17 +73,17 @@ static inline void silk_LPC_analysis_fil s_ptr[ -15 ] * PredCoef[ 15 ]; /* prediction error */ r_LPC[ix] = s_ptr[ 1 ] - LPC_pred; } } /* 12th order LPC analysis filter, does not write first 12 samples */ -static inline void silk_LPC_analysis_filter12_FLP( +static OPUS_INLINE void silk_LPC_analysis_filter12_FLP( silk_float r_LPC[], /* O LPC residual signal */ const silk_float PredCoef[], /* I LPC coefficients */ const silk_float s[], /* I Input signal */ const opus_int length /* I Length of input signal */ ) { opus_int ix; silk_float LPC_pred; @@ -107,17 +107,17 @@ static inline void silk_LPC_analysis_fil s_ptr[ -11 ] * PredCoef[ 11 ]; /* prediction error */ r_LPC[ix] = s_ptr[ 1 ] - LPC_pred; } } /* 10th order LPC analysis filter, does not write first 10 samples */ -static inline void silk_LPC_analysis_filter10_FLP( +static OPUS_INLINE void silk_LPC_analysis_filter10_FLP( silk_float r_LPC[], /* O LPC residual signal */ const silk_float PredCoef[], /* I LPC coefficients */ const silk_float s[], /* I Input signal */ const opus_int length /* I Length of input signal */ ) { opus_int ix; silk_float LPC_pred; @@ -139,17 +139,17 @@ static inline void silk_LPC_analysis_fil s_ptr[ -9 ] * PredCoef[ 9 ]; /* prediction error */ r_LPC[ix] = s_ptr[ 1 ] - LPC_pred; } } /* 8th order LPC analysis filter, does not write first 8 samples */ -static inline void silk_LPC_analysis_filter8_FLP( +static OPUS_INLINE void silk_LPC_analysis_filter8_FLP( silk_float r_LPC[], /* O LPC residual signal */ const silk_float PredCoef[], /* I LPC coefficients */ const silk_float s[], /* I Input signal */ const opus_int length /* I Length of input signal */ ) { opus_int ix; silk_float LPC_pred; @@ -169,17 +169,17 @@ static inline void silk_LPC_analysis_fil s_ptr[ -7 ] * PredCoef[ 7 ]; /* prediction error */ r_LPC[ix] = s_ptr[ 1 ] - LPC_pred; } } /* 6th order LPC analysis filter, does not write first 6 samples */ -static inline void silk_LPC_analysis_filter6_FLP( +static OPUS_INLINE void silk_LPC_analysis_filter6_FLP( silk_float r_LPC[], /* O LPC residual signal */ const silk_float PredCoef[], /* I LPC coefficients */ const silk_float s[], /* I Input signal */ const opus_int length /* I Length of input signal */ ) { opus_int ix; silk_float LPC_pred;
--- a/media/libopus/silk/float/SigProc_FLP.h +++ b/media/libopus/silk/float/SigProc_FLP.h @@ -89,17 +89,18 @@ opus_int silk_pitch_analysis_core_FLP( opus_int16 *lagIndex, /* O Lag Index */ opus_int8 *contourIndex, /* O Pitch contour Index */ silk_float *LTPCorr, /* I/O Normalized correlation; input: value from previous frame */ opus_int prevLag, /* I Last lag of previous frame; set to zero is unvoiced */ const silk_float search_thres1, /* I First stage threshold for lag candidates 0 - 1 */ const silk_float search_thres2, /* I Final threshold for lag candidates 0 - 1 */ const opus_int Fs_kHz, /* I sample frequency (kHz) */ const opus_int complexity, /* I Complexity setting, 0-2, where 2 is highest */ - const opus_int nb_subfr /* I Number of 5 ms subframes */ + const opus_int nb_subfr, /* I Number of 5 ms subframes */ + int arch /* I Run-time architecture */ ); void silk_insertion_sort_decreasing_FLP( silk_float *a, /* I/O Unsorted / Sorted vector */ opus_int *idx, /* O Index vector for the sorted elements */ const opus_int L, /* I Vector length */ const opus_int K /* I Number of correctly sorted positions */ ); @@ -148,55 +149,55 @@ double silk_energy_FLP( #define PI (3.1415926536f) #define silk_min_float( a, b ) (((a) < (b)) ? (a) : (b)) #define silk_max_float( a, b ) (((a) > (b)) ? (a) : (b)) #define silk_abs_float( a ) ((silk_float)fabs(a)) /* sigmoid function */ -static inline silk_float silk_sigmoid( silk_float x ) +static OPUS_INLINE silk_float silk_sigmoid( silk_float x ) { return (silk_float)(1.0 / (1.0 + exp(-x))); } /* floating-point to integer conversion (rounding) */ -static inline opus_int32 silk_float2int( silk_float x ) +static OPUS_INLINE opus_int32 silk_float2int( silk_float x ) { return (opus_int32)float2int( x ); } /* floating-point to integer conversion (rounding) */ -static inline void silk_float2short_array( +static OPUS_INLINE void silk_float2short_array( opus_int16 *out, const silk_float *in, opus_int32 length ) { opus_int32 k; for( k = length - 1; k >= 0; k-- ) { out[k] = silk_SAT16( (opus_int32)float2int( in[k] ) ); } } /* integer to floating-point conversion */ -static inline void silk_short2float_array( +static OPUS_INLINE void silk_short2float_array( silk_float *out, const opus_int16 *in, opus_int32 length ) { opus_int32 k; for( k = length - 1; k >= 0; k-- ) { out[k] = (silk_float)in[k]; } } /* using log2() helps the fixed-point conversion */ -static inline silk_float silk_log2( double x ) +static OPUS_INLINE silk_float silk_log2( double x ) { return ( silk_float )( 3.32192809488736 * log10( x ) ); } #ifdef __cplusplus } #endif
--- a/media/libopus/silk/float/encode_frame_FLP.c +++ b/media/libopus/silk/float/encode_frame_FLP.c @@ -28,17 +28,17 @@ POSSIBILITY OF SUCH DAMAGE. #ifdef HAVE_CONFIG_H #include "config.h" #endif #include "main_FLP.h" #include "tuning_parameters.h" /* Low Bitrate Redundancy (LBRR) encoding. Reuse all parameters but encode with lower bitrate */ -static inline void silk_LBRR_encode_FLP( +static OPUS_INLINE void silk_LBRR_encode_FLP( silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */ silk_encoder_control_FLP *psEncCtrl, /* I/O Encoder control FLP */ const silk_float xfw[], /* I Input signal */ opus_int condCoding /* I The type of conditional coding used so far for this frame */ ); void silk_encode_do_VAD_FLP( silk_encoder_state_FLP *psEnc /* I/O Encoder state FLP */ @@ -124,17 +124,17 @@ opus_int silk_encode_frame_FLP( for( i = 0; i < 8; i++ ) { x_frame[ LA_SHAPE_MS * psEnc->sCmn.fs_kHz + i * ( psEnc->sCmn.frame_length >> 3 ) ] += ( 1 - ( i & 2 ) ) * 1e-6f; } if( !psEnc->sCmn.prefillFlag ) { /*****************************************/ /* Find pitch lags, initial LPC analysis */ /*****************************************/ - silk_find_pitch_lags_FLP( psEnc, &sEncCtrl, res_pitch, x_frame ); + silk_find_pitch_lags_FLP( psEnc, &sEncCtrl, res_pitch, x_frame, psEnc->sCmn.arch ); /************************/ /* Noise shape analysis */ /************************/ silk_noise_shape_analysis_FLP( psEnc, &sEncCtrl, res_pitch_frame, x_frame ); /***************************************************/ /* Find linear prediction coefficients (LPC + LTP) */ @@ -289,39 +289,39 @@ opus_int silk_encode_frame_FLP( } } } /* Update input buffer */ silk_memmove( psEnc->x_buf, &psEnc->x_buf[ psEnc->sCmn.frame_length ], ( psEnc->sCmn.ltp_mem_length + LA_SHAPE_MS * psEnc->sCmn.fs_kHz ) * sizeof( silk_float ) ); - /* Parameters needed for next frame */ - psEnc->sCmn.prevLag = sEncCtrl.pitchL[ psEnc->sCmn.nb_subfr - 1 ]; - psEnc->sCmn.prevSignalType = psEnc->sCmn.indices.signalType; - /* Exit without entropy coding */ if( psEnc->sCmn.prefillFlag ) { /* No payload */ *pnBytesOut = 0; return ret; } + /* Parameters needed for next frame */ + psEnc->sCmn.prevLag = sEncCtrl.pitchL[ psEnc->sCmn.nb_subfr - 1 ]; + psEnc->sCmn.prevSignalType = psEnc->sCmn.indices.signalType; + /****************************************/ /* Finalize payload */ /****************************************/ psEnc->sCmn.first_frame_after_reset = 0; /* Payload size */ *pnBytesOut = silk_RSHIFT( ec_tell( psRangeEnc ) + 7, 3 ); return ret; } /* Low-Bitrate Redundancy (LBRR) encoding. Reuse all parameters but encode excitation at lower bitrate */ -static inline void silk_LBRR_encode_FLP( +static OPUS_INLINE void silk_LBRR_encode_FLP( silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */ silk_encoder_control_FLP *psEncCtrl, /* I/O Encoder control FLP */ const silk_float xfw[], /* I Input signal */ opus_int condCoding /* I The type of conditional coding used so far for this frame */ ) { opus_int k; opus_int32 Gains_Q16[ MAX_NB_SUBFR ];
--- a/media/libopus/silk/float/find_pitch_lags_FLP.c +++ b/media/libopus/silk/float/find_pitch_lags_FLP.c @@ -32,17 +32,18 @@ POSSIBILITY OF SUCH DAMAGE. #include <stdlib.h> #include "main_FLP.h" #include "tuning_parameters.h" void silk_find_pitch_lags_FLP( silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */ silk_encoder_control_FLP *psEncCtrl, /* I/O Encoder control FLP */ silk_float res[], /* O Residual */ - const silk_float x[] /* I Speech signal */ + const silk_float x[], /* I Speech signal */ + int arch /* I Run-time architecture */ ) { opus_int buf_len; silk_float thrhld, res_nrg; const silk_float *x_buf_ptr, *x_buf; silk_float auto_corr[ MAX_FIND_PITCH_LPC_ORDER + 1 ]; silk_float A[ MAX_FIND_PITCH_LPC_ORDER ]; silk_float refl_coef[ MAX_FIND_PITCH_LPC_ORDER ]; @@ -111,17 +112,17 @@ void silk_find_pitch_lags_FLP( thrhld -= 0.15f * (psEnc->sCmn.prevSignalType >> 1); thrhld -= 0.1f * psEnc->sCmn.input_tilt_Q15 * ( 1.0f / 32768.0f ); /*****************************************/ /* Call Pitch estimator */ /*****************************************/ if( silk_pitch_analysis_core_FLP( res, psEncCtrl->pitchL, &psEnc->sCmn.indices.lagIndex, &psEnc->sCmn.indices.contourIndex, &psEnc->LTPCorr, psEnc->sCmn.prevLag, psEnc->sCmn.pitchEstimationThreshold_Q16 / 65536.0f, - thrhld, psEnc->sCmn.fs_kHz, psEnc->sCmn.pitchEstimationComplexity, psEnc->sCmn.nb_subfr ) == 0 ) + thrhld, psEnc->sCmn.fs_kHz, psEnc->sCmn.pitchEstimationComplexity, psEnc->sCmn.nb_subfr, arch ) == 0 ) { psEnc->sCmn.indices.signalType = TYPE_VOICED; } else { psEnc->sCmn.indices.signalType = TYPE_UNVOICED; } } else { silk_memset( psEncCtrl->pitchL, 0, sizeof( psEncCtrl->pitchL ) ); psEnc->sCmn.indices.lagIndex = 0;
--- a/media/libopus/silk/float/find_pred_coefs_FLP.c +++ b/media/libopus/silk/float/find_pred_coefs_FLP.c @@ -62,17 +62,17 @@ void silk_find_pred_coefs_FLP( silk_assert( psEnc->sCmn.ltp_mem_length - psEnc->sCmn.predictLPCOrder >= psEncCtrl->pitchL[ 0 ] + LTP_ORDER / 2 ); /* LTP analysis */ silk_find_LTP_FLP( psEncCtrl->LTPCoef, WLTP, &psEncCtrl->LTPredCodGain, res_pitch, psEncCtrl->pitchL, Wght, psEnc->sCmn.subfr_length, psEnc->sCmn.nb_subfr, psEnc->sCmn.ltp_mem_length ); /* Quantize LTP gain parameters */ silk_quant_LTP_gains_FLP( psEncCtrl->LTPCoef, psEnc->sCmn.indices.LTPIndex, &psEnc->sCmn.indices.PERIndex, - WLTP, psEnc->sCmn.mu_LTP_Q9, psEnc->sCmn.LTPQuantLowComplexity, psEnc->sCmn.nb_subfr ); + &psEnc->sCmn.sum_log_gain_Q7, WLTP, psEnc->sCmn.mu_LTP_Q9, psEnc->sCmn.LTPQuantLowComplexity, psEnc->sCmn.nb_subfr ); /* Control LTP scaling */ silk_LTP_scale_ctrl_FLP( psEnc, psEncCtrl, condCoding ); /* Create LTP residual */ silk_LTP_analysis_filter_FLP( LPC_in_pre, x - psEnc->sCmn.predictLPCOrder, psEncCtrl->LTPCoef, psEncCtrl->pitchL, invGains, psEnc->sCmn.subfr_length, psEnc->sCmn.nb_subfr, psEnc->sCmn.predictLPCOrder ); } else { @@ -85,16 +85,17 @@ void silk_find_pred_coefs_FLP( for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) { silk_scale_copy_vector_FLP( x_pre_ptr, x_ptr, invGains[ i ], psEnc->sCmn.subfr_length + psEnc->sCmn.predictLPCOrder ); x_pre_ptr += psEnc->sCmn.subfr_length + psEnc->sCmn.predictLPCOrder; x_ptr += psEnc->sCmn.subfr_length; } silk_memset( psEncCtrl->LTPCoef, 0, psEnc->sCmn.nb_subfr * LTP_ORDER * sizeof( silk_float ) ); psEncCtrl->LTPredCodGain = 0.0f; + psEnc->sCmn.sum_log_gain_Q7 = 0; } /* Limit on total predictive coding gain */ if( psEnc->sCmn.first_frame_after_reset ) { minInvGain = 1.0f / MAX_PREDICTION_POWER_GAIN_AFTER_RESET; } else { minInvGain = (silk_float)pow( 2, psEncCtrl->LTPredCodGain / 3 ) / MAX_PREDICTION_POWER_GAIN; minInvGain /= 0.25f + 0.75f * psEncCtrl->coding_quality;
--- a/media/libopus/silk/float/main_FLP.h +++ b/media/libopus/silk/float/main_FLP.h @@ -66,17 +66,18 @@ opus_int silk_encode_frame_FLP( ec_enc *psRangeEnc, /* I/O compressor data structure */ opus_int condCoding, /* I The type of conditional coding to use */ opus_int maxBits, /* I If > 0: maximum number of output bits */ opus_int useCBR /* I Flag to force constant-bitrate operation */ ); /* Initializes the Silk encoder state */ opus_int silk_init_encoder( - silk_encoder_state_FLP *psEnc /* I/O Encoder state FLP */ + silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */ + int arch /* I Run-tim architecture */ ); /* Control the Silk encoder */ opus_int silk_control_encoder( silk_encoder_state_FLP *psEnc, /* I/O Pointer to Silk encoder state FLP */ silk_EncControlStruct *encControl, /* I Control structure */ const opus_int32 TargetRate_bps, /* I Target max bitrate (bps) */ const opus_int allow_bw_switch, /* I Flag to allow switching audio bandwidth */ @@ -124,17 +125,18 @@ void silk_LTP_scale_ctrl_FLP( /**********************************************/ /* Prediction Analysis */ /**********************************************/ /* Find pitch lags */ void silk_find_pitch_lags_FLP( silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */ silk_encoder_control_FLP *psEncCtrl, /* I/O Encoder control FLP */ silk_float res[], /* O Residual */ - const silk_float x[] /* I Speech signal */ + const silk_float x[], /* I Speech signal */ + int arch /* I Run-time architecture */ ); /* Find LPC and LTP coefficients */ void silk_find_pred_coefs_FLP( silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */ silk_encoder_control_FLP *psEncCtrl, /* I/O Encoder control FLP */ const silk_float res_pitch[], /* I Residual from pitch analysis */ const silk_float x[], /* I Speech signal */ @@ -194,16 +196,17 @@ void silk_LPC_analysis_filter_FLP( const opus_int Order /* I LPC order */ ); /* LTP tap quantizer */ void silk_quant_LTP_gains_FLP( silk_float B[ MAX_NB_SUBFR * LTP_ORDER ], /* I/O (Un-)quantized LTP gains */ opus_int8 cbk_index[ MAX_NB_SUBFR ], /* O Codebook index */ opus_int8 *periodicity_index, /* O Periodicity index */ + opus_int32 *sum_log_gain_Q7, /* I/O Cumulative max prediction gain */ const silk_float W[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ], /* I Error weights */ const opus_int mu_Q10, /* I Mu value (R/D tradeoff) */ const opus_int lowComplexity, /* I Flag for low complexity */ const opus_int nb_subfr /* I number of subframes */ ); /* Residual energy: nrg = wxx - 2 * wXx * c + c' * wXX * c */ silk_float silk_residual_energy_covar_FLP( /* O Weighted residual energy */
--- a/media/libopus/silk/float/noise_shape_analysis_FLP.c +++ b/media/libopus/silk/float/noise_shape_analysis_FLP.c @@ -31,17 +31,17 @@ POSSIBILITY OF SUCH DAMAGE. #include "main_FLP.h" #include "tuning_parameters.h" /* Compute gain to make warped filter coefficients have a zero mean log frequency response on a */ /* non-warped frequency scale. (So that it can be implemented with a minimum-phase monic filter.) */ /* Note: A monic filter is one with the first coefficient equal to 1.0. In Silk we omit the first */ /* coefficient in an array of coefficients, for monic filters. */ -static inline silk_float warped_gain( +static OPUS_INLINE silk_float warped_gain( const silk_float *coefs, silk_float lambda, opus_int order ) { opus_int i; silk_float gain; lambda = -lambda; @@ -49,17 +49,17 @@ static inline silk_float warped_gain( for( i = order - 2; i >= 0; i-- ) { gain = lambda * gain + coefs[ i ]; } return (silk_float)( 1.0f / ( 1.0f - lambda * gain ) ); } /* Convert warped filter coefficients to monic pseudo-warped coefficients and limit maximum */ /* amplitude of monic warped coefficients by using bandwidth expansion on the true coefficients */ -static inline void warped_true2monic_coefs( +static OPUS_INLINE void warped_true2monic_coefs( silk_float *coefs_syn, silk_float *coefs_ana, silk_float lambda, silk_float limit, opus_int order ) { opus_int i, iter, ind = 0; silk_float tmp, maxabs, chirp, gain_syn, gain_ana;
--- a/media/libopus/silk/float/pitch_analysis_core_FLP.c +++ b/media/libopus/silk/float/pitch_analysis_core_FLP.c @@ -43,17 +43,18 @@ POSSIBILITY OF SUCH DAMAGE. /* Internally used functions */ /************************************************************/ static void silk_P_Ana_calc_corr_st3( silk_float cross_corr_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ], /* O 3 DIM correlation array */ const silk_float frame[], /* I vector to correlate */ opus_int start_lag, /* I start lag */ opus_int sf_length, /* I sub frame length */ opus_int nb_subfr, /* I number of subframes */ - opus_int complexity /* I Complexity setting */ + opus_int complexity, /* I Complexity setting */ + int arch /* I Run-time architecture */ ); static void silk_P_Ana_calc_energy_st3( silk_float energies_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ], /* O 3 DIM correlation array */ const silk_float frame[], /* I vector to correlate */ opus_int start_lag, /* I start lag */ opus_int sf_length, /* I sub frame length */ opus_int nb_subfr, /* I number of subframes */ @@ -69,17 +70,18 @@ opus_int silk_pitch_analysis_core_FLP( opus_int16 *lagIndex, /* O Lag Index */ opus_int8 *contourIndex, /* O Pitch contour Index */ silk_float *LTPCorr, /* I/O Normalized correlation; input: value from previous frame */ opus_int prevLag, /* I Last lag of previous frame; set to zero is unvoiced */ const silk_float search_thres1, /* I First stage threshold for lag candidates 0 - 1 */ const silk_float search_thres2, /* I Final threshold for lag candidates 0 - 1 */ const opus_int Fs_kHz, /* I sample frequency (kHz) */ const opus_int complexity, /* I Complexity setting, 0-2, where 2 is highest */ - const opus_int nb_subfr /* I Number of 5 ms subframes */ + const opus_int nb_subfr, /* I Number of 5 ms subframes */ + int arch /* I Run-time architecture */ ) { opus_int i, k, d, j; silk_float frame_8kHz[ PE_MAX_FRAME_LENGTH_MS * 8 ]; silk_float frame_4kHz[ PE_MAX_FRAME_LENGTH_MS * 4 ]; opus_int16 frame_8_FIX[ PE_MAX_FRAME_LENGTH_MS * 8 ]; opus_int16 frame_4_FIX[ PE_MAX_FRAME_LENGTH_MS * 4 ]; opus_int32 filt_state[ 6 ]; @@ -171,17 +173,17 @@ opus_int silk_pitch_analysis_core_FLP( silk_assert( target_ptr + sf_length_8kHz <= frame_4kHz + frame_length_4kHz ); basis_ptr = target_ptr - min_lag_4kHz; /* Check that we are within range of the array */ silk_assert( basis_ptr >= frame_4kHz ); silk_assert( basis_ptr + sf_length_8kHz <= frame_4kHz + frame_length_4kHz ); - celt_pitch_xcorr( target_ptr, target_ptr-max_lag_4kHz, xcorr, sf_length_8kHz, max_lag_4kHz - min_lag_4kHz + 1 ); + celt_pitch_xcorr( target_ptr, target_ptr-max_lag_4kHz, xcorr, sf_length_8kHz, max_lag_4kHz - min_lag_4kHz + 1, arch ); /* Calculate first vector products before loop */ cross_corr = xcorr[ max_lag_4kHz - min_lag_4kHz ]; normalizer = silk_energy_FLP( target_ptr, sf_length_8kHz ) + silk_energy_FLP( basis_ptr, sf_length_8kHz ) + sf_length_8kHz * 4000.0f; C[ 0 ][ min_lag_4kHz ] += (silk_float)( 2 * cross_corr / normalizer ); @@ -404,17 +406,17 @@ opus_int silk_pitch_analysis_core_FLP( start_lag = silk_max_int( lag - 2, min_lag ); end_lag = silk_min_int( lag + 2, max_lag ); lag_new = lag; /* to avoid undefined lag */ CBimax = 0; /* to avoid undefined lag */ CCmax = -1000.0f; /* Calculate the correlations and energies needed in stage 3 */ - silk_P_Ana_calc_corr_st3( cross_corr_st3, frame, start_lag, sf_length, nb_subfr, complexity ); + silk_P_Ana_calc_corr_st3( cross_corr_st3, frame, start_lag, sf_length, nb_subfr, complexity, arch ); silk_P_Ana_calc_energy_st3( energies_st3, frame, start_lag, sf_length, nb_subfr, complexity ); lag_counter = 0; silk_assert( lag == silk_SAT16( lag ) ); contour_bias = PE_FLATCONTOUR_BIAS / lag; /* Set up cbk parameters according to complexity setting and frame length */ if( nb_subfr == PE_MAX_NB_SUBFR ) { @@ -488,20 +490,21 @@ opus_int silk_pitch_analysis_core_FLP( * case 4*12*5 = 240 correlations, but more likely around 120. ***********************************************************************/ static void silk_P_Ana_calc_corr_st3( silk_float cross_corr_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ], /* O 3 DIM correlation array */ const silk_float frame[], /* I vector to correlate */ opus_int start_lag, /* I start lag */ opus_int sf_length, /* I sub frame length */ opus_int nb_subfr, /* I number of subframes */ - opus_int complexity /* I Complexity setting */ + opus_int complexity, /* I Complexity setting */ + int arch /* I Run-time architecture */ ) { - const silk_float *target_ptr, *basis_ptr; + const silk_float *target_ptr; opus_int i, j, k, lag_counter, lag_low, lag_high; opus_int nb_cbk_search, delta, idx, cbk_size; silk_float scratch_mem[ SCRATCH_SIZE ]; opus_val32 xcorr[ SCRATCH_SIZE ]; const opus_int8 *Lag_range_ptr, *Lag_CB_ptr; silk_assert( complexity >= SILK_PE_MIN_COMPLEX ); silk_assert( complexity <= SILK_PE_MAX_COMPLEX ); @@ -522,19 +525,18 @@ static void silk_P_Ana_calc_corr_st3( target_ptr = &frame[ silk_LSHIFT( sf_length, 2 ) ]; /* Pointer to middle of frame */ for( k = 0; k < nb_subfr; k++ ) { lag_counter = 0; /* Calculate the correlations for each subframe */ lag_low = matrix_ptr( Lag_range_ptr, k, 0, 2 ); lag_high = matrix_ptr( Lag_range_ptr, k, 1, 2 ); silk_assert(lag_high-lag_low+1 <= SCRATCH_SIZE); - celt_pitch_xcorr( target_ptr, target_ptr - start_lag - lag_high, xcorr, sf_length, lag_high - lag_low + 1 ); + celt_pitch_xcorr( target_ptr, target_ptr - start_lag - lag_high, xcorr, sf_length, lag_high - lag_low + 1, arch ); for( j = lag_low; j <= lag_high; j++ ) { - basis_ptr = target_ptr - ( start_lag + j ); silk_assert( lag_counter < SCRATCH_SIZE ); scratch_mem[ lag_counter ] = xcorr[ lag_high - j ]; lag_counter++; } delta = matrix_ptr( Lag_range_ptr, k, 0, 2 ); for( i = 0; i < nb_cbk_search; i++ ) { /* Fill out the 3 dim array that stores the correlations for */
--- a/media/libopus/silk/float/prefilter_FLP.c +++ b/media/libopus/silk/float/prefilter_FLP.c @@ -30,17 +30,17 @@ POSSIBILITY OF SUCH DAMAGE. #endif #include "main_FLP.h" #include "tuning_parameters.h" /* * Prefilter for finding Quantizer input signal */ -static inline void silk_prefilt_FLP( +static OPUS_INLINE void silk_prefilt_FLP( silk_prefilter_state_FLP *P, /* I/O state */ silk_float st_res[], /* I */ silk_float xw[], /* O */ silk_float *HarmShapeFIR, /* I */ silk_float Tilt, /* I */ silk_float LF_MA_shp, /* I */ silk_float LF_AR_shp, /* I */ opus_int lag, /* I */ @@ -148,17 +148,17 @@ void silk_prefilter_FLP( pxw += psEnc->sCmn.subfr_length; } P->lagPrev = psEncCtrl->pitchL[ psEnc->sCmn.nb_subfr - 1 ]; } /* * Prefilter for finding Quantizer input signal */ -static inline void silk_prefilt_FLP( +static OPUS_INLINE void silk_prefilt_FLP( silk_prefilter_state_FLP *P, /* I/O state */ silk_float st_res[], /* I */ silk_float xw[], /* O */ silk_float *HarmShapeFIR, /* I */ silk_float Tilt, /* I */ silk_float LF_MA_shp, /* I */ silk_float LF_AR_shp, /* I */ opus_int lag, /* I */
--- a/media/libopus/silk/float/solve_LS_FLP.c +++ b/media/libopus/silk/float/solve_LS_FLP.c @@ -32,39 +32,39 @@ POSSIBILITY OF SUCH DAMAGE. #include "main_FLP.h" #include "tuning_parameters.h" /********************************************************************** * LDL Factorisation. Finds the upper triangular matrix L and the diagonal * Matrix D (only the diagonal elements returned in a vector)such that * the symmetric matric A is given by A = L*D*L'. **********************************************************************/ -static inline void silk_LDL_FLP( +static OPUS_INLINE void silk_LDL_FLP( silk_float *A, /* I/O Pointer to Symetric Square Matrix */ opus_int M, /* I Size of Matrix */ silk_float *L, /* I/O Pointer to Square Upper triangular Matrix */ silk_float *Dinv /* I/O Pointer to vector holding the inverse diagonal elements of D */ ); /********************************************************************** * Function to solve linear equation Ax = b, when A is a MxM lower * triangular matrix, with ones on the diagonal. **********************************************************************/ -static inline void silk_SolveWithLowerTriangularWdiagOnes_FLP( +static OPUS_INLINE void silk_SolveWithLowerTriangularWdiagOnes_FLP( const silk_float *L, /* I Pointer to Lower Triangular Matrix */ opus_int M, /* I Dim of Matrix equation */ const silk_float *b, /* I b Vector */ silk_float *x /* O x Vector */ ); /********************************************************************** * Function to solve linear equation (A^T)x = b, when A is a MxM lower * triangular, with ones on the diagonal. (ie then A^T is upper triangular) **********************************************************************/ -static inline void silk_SolveWithUpperTriangularFromLowerWdiagOnes_FLP( +static OPUS_INLINE void silk_SolveWithUpperTriangularFromLowerWdiagOnes_FLP( const silk_float *L, /* I Pointer to Lower Triangular Matrix */ opus_int M, /* I Dim of Matrix equation */ const silk_float *b, /* I b Vector */ silk_float *x /* O x Vector */ ); /********************************************************************** * Function to solve linear equation Ax = b, when A is a MxM @@ -104,17 +104,17 @@ void silk_solve_LDL_FLP( T[ i ] = T[ i ] * Dinv[ i ]; } /**************************************************** x = inv(L') * inv(D) * T *****************************************************/ silk_SolveWithUpperTriangularFromLowerWdiagOnes_FLP( &L[ 0 ][ 0 ], M, T, x ); } -static inline void silk_SolveWithUpperTriangularFromLowerWdiagOnes_FLP( +static OPUS_INLINE void silk_SolveWithUpperTriangularFromLowerWdiagOnes_FLP( const silk_float *L, /* I Pointer to Lower Triangular Matrix */ opus_int M, /* I Dim of Matrix equation */ const silk_float *b, /* I b Vector */ silk_float *x /* O x Vector */ ) { opus_int i, j; silk_float temp; @@ -126,17 +126,17 @@ static inline void silk_SolveWithUpperTr for( j = M - 1; j > i ; j-- ) { temp += ptr1[ j * M ] * x[ j ]; } temp = b[ i ] - temp; x[ i ] = temp; } } -static inline void silk_SolveWithLowerTriangularWdiagOnes_FLP( +static OPUS_INLINE void silk_SolveWithLowerTriangularWdiagOnes_FLP( const silk_float *L, /* I Pointer to Lower Triangular Matrix */ opus_int M, /* I Dim of Matrix equation */ const silk_float *b, /* I b Vector */ silk_float *x /* O x Vector */ ) { opus_int i, j; silk_float temp; @@ -148,17 +148,17 @@ static inline void silk_SolveWithLowerTr for( j = 0; j < i; j++ ) { temp += ptr1[ j ] * x[ j ]; } temp = b[ i ] - temp; x[ i ] = temp; } } -static inline void silk_LDL_FLP( +static OPUS_INLINE void silk_LDL_FLP( silk_float *A, /* I/O Pointer to Symetric Square Matrix */ opus_int M, /* I Size of Matrix */ silk_float *L, /* I/O Pointer to Square Upper triangular Matrix */ silk_float *Dinv /* I/O Pointer to vector holding the inverse diagonal elements of D */ ) { opus_int i, j, k, loop_count, err = 1; silk_float *ptr1, *ptr2;
--- a/media/libopus/silk/float/wrappers_FLP.c +++ b/media/libopus/silk/float/wrappers_FLP.c @@ -170,16 +170,17 @@ void silk_NSQ_wrapper_FLP( /***********************************************/ /* Floating-point Silk LTP quantiation wrapper */ /***********************************************/ void silk_quant_LTP_gains_FLP( silk_float B[ MAX_NB_SUBFR * LTP_ORDER ], /* I/O (Un-)quantized LTP gains */ opus_int8 cbk_index[ MAX_NB_SUBFR ], /* O Codebook index */ opus_int8 *periodicity_index, /* O Periodicity index */ + opus_int32 *sum_log_gain_Q7, /* I/O Cumulative max prediction gain */ const silk_float W[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ], /* I Error weights */ const opus_int mu_Q10, /* I Mu value (R/D tradeoff) */ const opus_int lowComplexity, /* I Flag for low complexity */ const opus_int nb_subfr /* I number of subframes */ ) { opus_int i; opus_int16 B_Q14[ MAX_NB_SUBFR * LTP_ORDER ]; @@ -187,14 +188,14 @@ void silk_quant_LTP_gains_FLP( for( i = 0; i < nb_subfr * LTP_ORDER; i++ ) { B_Q14[ i ] = (opus_int16)silk_float2int( B[ i ] * 16384.0f ); } for( i = 0; i < nb_subfr * LTP_ORDER * LTP_ORDER; i++ ) { W_Q18[ i ] = (opus_int32)silk_float2int( W[ i ] * 262144.0f ); } - silk_quant_LTP_gains( B_Q14, cbk_index, periodicity_index, W_Q18, mu_Q10, lowComplexity, nb_subfr ); + silk_quant_LTP_gains( B_Q14, cbk_index, periodicity_index, sum_log_gain_Q7, W_Q18, mu_Q10, lowComplexity, nb_subfr ); for( i = 0; i < nb_subfr * LTP_ORDER; i++ ) { B[ i ] = (silk_float)B_Q14[ i ] * ( 1.0f / 16384.0f ); } }
--- a/media/libopus/silk/init_encoder.c +++ b/media/libopus/silk/init_encoder.c @@ -29,29 +29,33 @@ POSSIBILITY OF SUCH DAMAGE. #include "config.h" #endif #ifdef FIXED_POINT #include "main_FIX.h" #else #include "main_FLP.h" #endif #include "tuning_parameters.h" +#include "cpu_support.h" /*********************************/ /* Initialize Silk Encoder state */ /*********************************/ opus_int silk_init_encoder( - silk_encoder_state_Fxx *psEnc /* I/O Pointer to Silk FIX encoder state */ + silk_encoder_state_Fxx *psEnc, /* I/O Pointer to Silk FIX encoder state */ + int arch /* I Run-time architecture */ ) { opus_int ret = 0; /* Clear the entire encoder state */ silk_memset( psEnc, 0, sizeof( silk_encoder_state_Fxx ) ); + psEnc->sCmn.arch = arch; + psEnc->sCmn.variable_HP_smth1_Q15 = silk_LSHIFT( silk_lin2log( SILK_FIX_CONST( VARIABLE_HP_MIN_CUTOFF_HZ, 16 ) ) - ( 16 << 7 ), 8 ); psEnc->sCmn.variable_HP_smth2_Q15 = psEnc->sCmn.variable_HP_smth1_Q15; /* Used to deactivate LSF interpolation, pitch prediction */ psEnc->sCmn.first_frame_after_reset = 1; /* Initialize Silk VAD */ ret += silk_VAD_Init( &psEnc->sCmn.sVAD );
--- a/media/libopus/silk/log2lin.c +++ b/media/libopus/silk/log2lin.c @@ -36,17 +36,19 @@ POSSIBILITY OF SUCH DAMAGE. opus_int32 silk_log2lin( const opus_int32 inLog_Q7 /* I input on log scale */ ) { opus_int32 out, frac_Q7; if( inLog_Q7 < 0 ) { return 0; - } + } else if ( inLog_Q7 >= 3967 ) { + return silk_int32_MAX; + } out = silk_LSHIFT( 1, silk_RSHIFT( inLog_Q7, 7 ) ); frac_Q7 = inLog_Q7 & 0x7F; if( inLog_Q7 < 2048 ) { /* Piece-wise parabolic approximation */ out = silk_ADD_RSHIFT32( out, silk_MUL( out, silk_SMLAWB( frac_Q7, silk_SMULBB( frac_Q7, 128 - frac_Q7 ), -174 ) ), 7 ); } else { /* Piece-wise parabolic approximation */
--- a/media/libopus/silk/macros.h +++ b/media/libopus/silk/macros.h @@ -27,17 +27,20 @@ POSSIBILITY OF SUCH DAMAGE. #ifndef SILK_MACROS_H #define SILK_MACROS_H #ifdef HAVE_CONFIG_H #include "config.h" #endif -/* This is an inline header file for general platform. */ +#include "opus_types.h" +#include "opus_defines.h" + +/* This is an OPUS_INLINE header file for general platform. */ /* (a32 * (opus_int32)((opus_int16)(b32))) >> 16 output have to be 32bit int */ #define silk_SMULWB(a32, b32) ((((a32) >> 16) * (opus_int32)((opus_int16)(b32))) + ((((a32) & 0x0000FFFF) * (opus_int32)((opus_int16)(b32))) >> 16)) /* a32 + (b32 * (opus_int32)((opus_int16)(c32))) >> 16 output have to be 32bit int */ #define silk_SMLAWB(a32, b32, c32) ((a32) + ((((b32) >> 16) * (opus_int32)((opus_int16)(c32))) + ((((b32) & 0x0000FFFF) * (opus_int32)((opus_int16)(c32))) >> 16))) /* (a32 * (b32 >> 16)) >> 16 */ @@ -73,40 +76,40 @@ POSSIBILITY OF SUCH DAMAGE. ((((a) | (b)) & 0x80000000) == 0 ? silk_int32_MAX : (a)+(b)) ) #define silk_SUB_SAT32(a, b) ((((opus_uint32)(a)-(opus_uint32)(b)) & 0x80000000) == 0 ? \ (( (a) & ((b)^0x80000000) & 0x80000000) ? silk_int32_MIN : (a)-(b)) : \ ((((a)^0x80000000) & (b) & 0x80000000) ? silk_int32_MAX : (a)-(b)) ) #include "ecintrin.h" -static inline opus_int32 silk_CLZ16(opus_int16 in16) +static OPUS_INLINE opus_int32 silk_CLZ16(opus_int16 in16) { return 32 - EC_ILOG(in16<<16|0x8000); } -static inline opus_int32 silk_CLZ32(opus_int32 in32) +static OPUS_INLINE opus_int32 silk_CLZ32(opus_int32 in32) { return in32 ? 32 - EC_ILOG(in32) : 32; } /* Row based */ #define matrix_ptr(Matrix_base_adr, row, column, N) \ (*((Matrix_base_adr) + ((row)*(N)+(column)))) #define matrix_adr(Matrix_base_adr, row, column, N) \ ((Matrix_base_adr) + ((row)*(N)+(column))) /* Column based */ #ifndef matrix_c_ptr # define matrix_c_ptr(Matrix_base_adr, row, column, M) \ (*((Matrix_base_adr) + ((row)+(M)*(column)))) #endif -#ifdef ARMv4_ASM +#ifdef OPUS_ARM_INLINE_ASM #include "arm/macros_armv4.h" #endif -#ifdef ARMv5E_ASM +#ifdef OPUS_ARM_INLINE_EDSP #include "arm/macros_armv5e.h" #endif #endif /* SILK_MACROS_H */
--- a/media/libopus/silk/main.h +++ b/media/libopus/silk/main.h @@ -199,31 +199,35 @@ void silk_interpolate( const opus_int d /* I number of parameters */ ); /* LTP tap quantizer */ void silk_quant_LTP_gains( opus_int16 B_Q14[ MAX_NB_SUBFR * LTP_ORDER ], /* I/O (un)quantized LTP gains */ opus_int8 cbk_index[ MAX_NB_SUBFR ], /* O Codebook Index */ opus_int8 *periodicity_index, /* O Periodicity Index */ + opus_int32 *sum_gain_dB_Q7, /* I/O Cumulative max prediction gain */ const opus_int32 W_Q18[ MAX_NB_SUBFR*LTP_ORDER*LTP_ORDER ], /* I Error Weights in Q18 */ opus_int mu_Q9, /* I Mu value (R/D tradeoff) */ opus_int lowComplexity, /* I Flag for low complexity */ const opus_int nb_subfr /* I number of subframes */ ); /* Entropy constrained matrix-weighted VQ, for a single input data vector */ void silk_VQ_WMat_EC( opus_int8 *ind, /* O index of best codebook vector */ opus_int32 *rate_dist_Q14, /* O best weighted quant error + mu * rate */ + opus_int *gain_Q7, /* O sum of absolute LTP coefficients */ const opus_int16 *in_Q14, /* I input vector to be quantized */ const opus_int32 *W_Q18, /* I weighting matrix */ const opus_int8 *cb_Q7, /* I codebook */ + const opus_uint8 *cb_gain_Q7, /* I codebook effective gain */ const opus_uint8 *cl_Q5, /* I code length for each codebook vector */ const opus_int mu_Q9, /* I tradeoff betw. weighted error and rate */ + const opus_int32 max_gain_Q7, /* I maximum sum of absolute LTP coefficients */ opus_int L /* I number of vectors in codebook */ ); /************************************/ /* Noise shaping quantization (NSQ) */ /************************************/ void silk_NSQ( const silk_encoder_state *psEncC, /* I/O Encoder State */
--- a/media/libopus/silk/quant_LTP_gains.c +++ b/media/libopus/silk/quant_LTP_gains.c @@ -25,83 +25,104 @@ ARISING IN ANY WAY OUT OF THE USE OF THI POSSIBILITY OF SUCH DAMAGE. ***********************************************************************/ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include "main.h" +#include "tuning_parameters.h" void silk_quant_LTP_gains( opus_int16 B_Q14[ MAX_NB_SUBFR * LTP_ORDER ], /* I/O (un)quantized LTP gains */ opus_int8 cbk_index[ MAX_NB_SUBFR ], /* O Codebook Index */ opus_int8 *periodicity_index, /* O Periodicity Index */ + opus_int32 *sum_log_gain_Q7, /* I/O Cumulative max prediction gain */ const opus_int32 W_Q18[ MAX_NB_SUBFR*LTP_ORDER*LTP_ORDER ], /* I Error Weights in Q18 */ opus_int mu_Q9, /* I Mu value (R/D tradeoff) */ opus_int lowComplexity, /* I Flag for low complexity */ const opus_int nb_subfr /* I number of subframes */ ) { opus_int j, k, cbk_size; opus_int8 temp_idx[ MAX_NB_SUBFR ]; const opus_uint8 *cl_ptr_Q5; const opus_int8 *cbk_ptr_Q7; + const opus_uint8 *cbk_gain_ptr_Q7; const opus_int16 *b_Q14_ptr; const opus_int32 *W_Q18_ptr; opus_int32 rate_dist_Q14_subfr, rate_dist_Q14, min_rate_dist_Q14; + opus_int32 sum_log_gain_tmp_Q7, best_sum_log_gain_Q7, max_gain_Q7, gain_Q7; /***************************************************/ /* iterate over different codebooks with different */ /* rates/distortions, and choose best */ /***************************************************/ min_rate_dist_Q14 = silk_int32_MAX; + best_sum_log_gain_Q7 = 0; for( k = 0; k < 3; k++ ) { + /* Safety margin for pitch gain control, to take into account factors + such as state rescaling/rewhitening. */ + opus_int32 gain_safety = SILK_FIX_CONST( 0.4, 7 ); + cl_ptr_Q5 = silk_LTP_gain_BITS_Q5_ptrs[ k ]; cbk_ptr_Q7 = silk_LTP_vq_ptrs_Q7[ k ]; + cbk_gain_ptr_Q7 = silk_LTP_vq_gain_ptrs_Q7[ k ]; cbk_size = silk_LTP_vq_sizes[ k ]; /* Set up pointer to first subframe */ W_Q18_ptr = W_Q18; b_Q14_ptr = B_Q14; rate_dist_Q14 = 0; + sum_log_gain_tmp_Q7 = *sum_log_gain_Q7; for( j = 0; j < nb_subfr; j++ ) { + max_gain_Q7 = silk_log2lin( ( SILK_FIX_CONST( MAX_SUM_LOG_GAIN_DB / 6.0, 7 ) - sum_log_gain_tmp_Q7 ) + + SILK_FIX_CONST( 7, 7 ) ) - gain_safety; + silk_VQ_WMat_EC( &temp_idx[ j ], /* O index of best codebook vector */ &rate_dist_Q14_subfr, /* O best weighted quantization error + mu * rate */ + &gain_Q7, /* O sum of absolute LTP coefficients */ b_Q14_ptr, /* I input vector to be quantized */ W_Q18_ptr, /* I weighting matrix */ cbk_ptr_Q7, /* I codebook */ + cbk_gain_ptr_Q7, /* I codebook effective gains */ cl_ptr_Q5, /* I code length for each codebook vector */ mu_Q9, /* I tradeoff between weighted error and rate */ + max_gain_Q7, /* I maximum sum of absolute LTP coefficients */ cbk_size /* I number of vectors in codebook */ ); rate_dist_Q14 = silk_ADD_POS_SAT32( rate_dist_Q14, rate_dist_Q14_subfr ); + sum_log_gain_tmp_Q7 = silk_max(0, sum_log_gain_tmp_Q7 + + silk_lin2log( gain_safety + gain_Q7 ) - SILK_FIX_CONST( 7, 7 )); b_Q14_ptr += LTP_ORDER; W_Q18_ptr += LTP_ORDER * LTP_ORDER; } /* Avoid never finding a codebook */ rate_dist_Q14 = silk_min( silk_int32_MAX - 1, rate_dist_Q14 ); if( rate_dist_Q14 < min_rate_dist_Q14 ) { min_rate_dist_Q14 = rate_dist_Q14; *periodicity_index = (opus_int8)k; silk_memcpy( cbk_index, temp_idx, nb_subfr * sizeof( opus_int8 ) ); + best_sum_log_gain_Q7 = sum_log_gain_tmp_Q7; } /* Break early in low-complexity mode if rate distortion is below threshold */ if( lowComplexity && ( rate_dist_Q14 < silk_LTP_gain_middle_avg_RD_Q14 ) ) { break; } } cbk_ptr_Q7 = silk_LTP_vq_ptrs_Q7[ *periodicity_index ]; for( j = 0; j < nb_subfr; j++ ) { for( k = 0; k < LTP_ORDER; k++ ) { B_Q14[ j * LTP_ORDER + k ] = silk_LSHIFT( cbk_ptr_Q7[ cbk_index[ j ] * LTP_ORDER + k ], 7 ); } } + *sum_log_gain_Q7 = best_sum_log_gain_Q7; }
--- a/media/libopus/silk/resampler_private_IIR_FIR.c +++ b/media/libopus/silk/resampler_private_IIR_FIR.c @@ -28,17 +28,17 @@ POSSIBILITY OF SUCH DAMAGE. #ifdef HAVE_CONFIG_H #include "config.h" #endif #include "SigProc_FIX.h" #include "resampler_private.h" #include "stack_alloc.h" -static inline opus_int16 *silk_resampler_private_IIR_FIR_INTERPOL( +static OPUS_INLINE opus_int16 *silk_resampler_private_IIR_FIR_INTERPOL( opus_int16 *out, opus_int16 *buf, opus_int32 max_index_Q16, opus_int32 index_increment_Q16 ) { opus_int32 index_Q16, res_Q15; opus_int16 *buf_ptr;
--- a/media/libopus/silk/resampler_private_down_FIR.c +++ b/media/libopus/silk/resampler_private_down_FIR.c @@ -28,17 +28,17 @@ POSSIBILITY OF SUCH DAMAGE. #ifdef HAVE_CONFIG_H #include "config.h" #endif #include "SigProc_FIX.h" #include "resampler_private.h" #include "stack_alloc.h" -static inline opus_int16 *silk_resampler_private_down_FIR_INTERPOL( +static OPUS_INLINE opus_int16 *silk_resampler_private_down_FIR_INTERPOL( opus_int16 *out, opus_int32 *buf, const opus_int16 *FIR_Coefs, opus_int FIR_Order, opus_int FIR_Fracs, opus_int32 max_index_Q16, opus_int32 index_increment_Q16 )
--- a/media/libopus/silk/shell_coder.c +++ b/media/libopus/silk/shell_coder.c @@ -28,41 +28,41 @@ POSSIBILITY OF SUCH DAMAGE. #ifdef HAVE_CONFIG_H #include "config.h" #endif #include "main.h" /* shell coder; pulse-subframe length is hardcoded */ -static inline void combine_pulses( +static OPUS_INLINE void combine_pulses( opus_int *out, /* O combined pulses vector [len] */ const opus_int *in, /* I input vector [2 * len] */ const opus_int len /* I number of OUTPUT samples */ ) { opus_int k; for( k = 0; k < len; k++ ) { out[ k ] = in[ 2 * k ] + in[ 2 * k + 1 ]; } } -static inline void encode_split( +static OPUS_INLINE void encode_split( ec_enc *psRangeEnc, /* I/O compressor data structure */ const opus_int p_child1, /* I pulse amplitude of first child subframe */ const opus_int p, /* I pulse amplitude of current subframe */ const opus_uint8 *shell_table /* I table of shell cdfs */ ) { if( p > 0 ) { ec_enc_icdf( psRangeEnc, p_child1, &shell_table[ silk_shell_code_table_offsets[ p ] ], 8 ); } } -static inline void decode_split( +static OPUS_INLINE void decode_split( opus_int *p_child1, /* O pulse amplitude of first child subframe */ opus_int *p_child2, /* O pulse amplitude of second child subframe */ ec_dec *psRangeDec, /* I/O Compressor data structure */ const opus_int p, /* I pulse amplitude of current subframe */ const opus_uint8 *shell_table /* I table of shell cdfs */ ) { if( p > 0 ) {
--- a/media/libopus/silk/structs.h +++ b/media/libopus/silk/structs.h @@ -166,16 +166,17 @@ typedef struct { opus_int useInterpolatedNLSFs; /* Flag for using NLSF interpolation */ opus_int shapingLPCOrder; /* Filter order for noise shaping filters */ opus_int predictLPCOrder; /* Filter order for prediction filters */ opus_int pitchEstimationComplexity; /* Complexity level for pitch estimator */ opus_int pitchEstimationLPCOrder; /* Whitening filter order for pitch estimator */ opus_int32 pitchEstimationThreshold_Q16; /* Threshold for pitch estimator */ opus_int LTPQuantLowComplexity; /* Flag for low complexity LTP quantization */ opus_int mu_LTP_Q9; /* Rate-distortion tradeoff in LTP quantization */ + opus_int32 sum_log_gain_Q7; /* Cumulative max prediction gain */ opus_int NLSF_MSVQ_Survivors; /* Number of survivors in NLSF MSVQ */ opus_int first_frame_after_reset; /* Flag for deactivating NLSF interpolation, pitch prediction */ opus_int controlled_since_last_payload; /* Flag for ensuring codec_control only runs once per packet */ opus_int warping_Q16; /* Warping parameter for warped noise shaping */ opus_int useCBR; /* Flag to enable constant bitrate */ opus_int prefillFlag; /* Flag to indicate that only buffers are prefilled, no coding */ const opus_uint8 *pitch_lag_low_bits_iCDF; /* Pointer to iCDF table for low bits of pitch lag index */ const opus_uint8 *pitch_contour_iCDF; /* Pointer to iCDF table for pitch contour index */ @@ -186,16 +187,18 @@ typedef struct { opus_int8 VAD_flags[ MAX_FRAMES_PER_PACKET ]; opus_int8 LBRR_flag; opus_int LBRR_flags[ MAX_FRAMES_PER_PACKET ]; SideInfoIndices indices; opus_int8 pulses[ MAX_FRAME_LENGTH ]; + int arch; + /* Input/output buffering */ opus_int16 inputBuf[ MAX_FRAME_LENGTH + 2 ]; /* Buffer containing input signal */ opus_int inputBufIx; opus_int nFramesPerPacket; opus_int nFramesEncoded; /* Number of frames analyzed in current packet */ opus_int nChannelsAPI; opus_int nChannelsInternal;
--- a/media/libopus/silk/tables.h +++ b/media/libopus/silk/tables.h @@ -73,16 +73,18 @@ extern const opus_uint8 silk_uniform8_i extern const opus_uint8 silk_NLSF_EXT_iCDF[ 7 ]; /* 7 */ extern const opus_uint8 silk_LTP_per_index_iCDF[ 3 ]; /* 3 */ extern const opus_uint8 * const silk_LTP_gain_iCDF_ptrs[ NB_LTP_CBKS ]; /* 3 */ extern const opus_uint8 * const silk_LTP_gain_BITS_Q5_ptrs[ NB_LTP_CBKS ]; /* 3 */ extern const opus_int16 silk_LTP_gain_middle_avg_RD_Q14; extern const opus_int8 * const silk_LTP_vq_ptrs_Q7[ NB_LTP_CBKS ]; /* 168 */ +extern const opus_uint8 * const silk_LTP_vq_gain_ptrs_Q7[NB_LTP_CBKS]; + extern const opus_int8 silk_LTP_vq_sizes[ NB_LTP_CBKS ]; /* 3 */ extern const opus_uint8 silk_LTPscale_iCDF[ 3 ]; /* 4 */ extern const opus_int16 silk_LTPScales_table_Q14[ 3 ]; /* 6 */ extern const opus_uint8 silk_type_offset_VAD_iCDF[ 4 ]; /* 4 */ extern const opus_uint8 silk_type_offset_no_VAD_iCDF[ 2 ]; /* 2 */
--- a/media/libopus/silk/tables_LTP.c +++ b/media/libopus/silk/tables_LTP.c @@ -262,11 +262,35 @@ static const opus_int8 silk_LTP_gain_vq_ }; const opus_int8 * const silk_LTP_vq_ptrs_Q7[NB_LTP_CBKS] = { (opus_int8 *)&silk_LTP_gain_vq_0[0][0], (opus_int8 *)&silk_LTP_gain_vq_1[0][0], (opus_int8 *)&silk_LTP_gain_vq_2[0][0] }; +/* Maximum frequency-dependent response of the pitch taps above, + computed as max(abs(freqz(taps))) */ +static const opus_uint8 silk_LTP_gain_vq_0_gain[8] = { + 46, 2, 90, 87, 93, 91, 82, 98 +}; + +static const opus_uint8 silk_LTP_gain_vq_1_gain[16] = { + 109, 120, 118, 12, 113, 115, 117, 119, + 99, 59, 87, 111, 63, 111, 112, 80 +}; + +static const opus_uint8 silk_LTP_gain_vq_2_gain[32] = { + 126, 124, 125, 124, 129, 121, 126, 23, + 132, 127, 127, 127, 126, 127, 122, 133, + 130, 134, 101, 118, 119, 145, 126, 86, + 124, 120, 123, 119, 170, 173, 107, 109 +}; + +const opus_uint8 * const silk_LTP_vq_gain_ptrs_Q7[NB_LTP_CBKS] = { + &silk_LTP_gain_vq_0_gain[0], + &silk_LTP_gain_vq_1_gain[0], + &silk_LTP_gain_vq_2_gain[0] +}; + const opus_int8 silk_LTP_vq_sizes[NB_LTP_CBKS] = { 8, 16, 32 };
--- a/media/libopus/silk/tuning_parameters.h +++ b/media/libopus/silk/tuning_parameters.h @@ -45,29 +45,32 @@ extern "C" /* Bandwidth expansion for whitening filter in pitch analysis */ #define FIND_PITCH_BANDWIDTH_EXPANSION 0.99f /*********************/ /* Linear prediction */ /*********************/ -/* LPC analysis defines: regularization and bandwidth expansion */ +/* LPC analysis regularization */ #define FIND_LPC_COND_FAC 1e-5f /* LTP analysis defines */ #define FIND_LTP_COND_FAC 1e-5f #define LTP_DAMPING 0.05f #define LTP_SMOOTHING 0.1f /* LTP quantization settings */ #define MU_LTP_QUANT_NB 0.03f #define MU_LTP_QUANT_MB 0.025f #define MU_LTP_QUANT_WB 0.02f +/* Max cumulative LTP gain */ +#define MAX_SUM_LOG_GAIN_DB 250.0f + /***********************/ /* High pass filtering */ /***********************/ /* Smoothing parameters for low end of pitch frequency range estimation */ #define VARIABLE_HP_SMTH_COEF1 0.1f #define VARIABLE_HP_SMTH_COEF2 0.015f #define VARIABLE_HP_MAX_DELTA_FREQ 0.4f
--- a/media/libopus/silk/typedef.h +++ b/media/libopus/silk/typedef.h @@ -24,16 +24,17 @@ CONTRACT, STRICT LIABILITY, OR TORT (INC ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ***********************************************************************/ #ifndef SILK_TYPEDEF_H #define SILK_TYPEDEF_H #include "opus_types.h" +#include "opus_defines.h" #ifndef FIXED_POINT # include <float.h> # define silk_float float # define silk_float_MAX FLT_MAX #endif #define silk_int64_MAX ((opus_int64)0x7FFFFFFFFFFFFFFFLL) /* 2^63 - 1 */ @@ -58,17 +59,17 @@ POSSIBILITY OF SUCH DAMAGE. #else # ifdef ENABLE_ASSERTIONS # include <stdio.h> # include <stdlib.h> #define silk_fatal(str) _silk_fatal(str, __FILE__, __LINE__); #ifdef __GNUC__ __attribute__((noreturn)) #endif -static inline void _silk_fatal(const char *str, const char *file, int line) +static OPUS_INLINE void _silk_fatal(const char *str, const char *file, int line) { fprintf (stderr, "Fatal (internal) error in %s, line %d: %s\n", file, line, str); abort(); } # define silk_assert(COND) {if (!(COND)) {silk_fatal("assertion failed: " #COND);}} # else # define silk_assert(COND) # endif
--- a/media/libopus/src/analysis.c +++ b/media/libopus/src/analysis.c @@ -110,17 +110,17 @@ static const int extra_bands[NB_TOT_BAND };*/ #define NB_TONAL_SKIP_BANDS 9 #define cA 0.43157974f #define cB 0.67848403f #define cC 0.08595542f #define cE ((float)M_PI/2) -static inline float fast_atan2f(float y, float x) { +static OPUS_INLINE float fast_atan2f(float y, float x) { float x2, y2; /* Should avoid underflow on the values we'll get */ if (ABS16(x)+ABS16(y)<1e-9f) { x*=1e12f; y*=1e12f; } x2 = x*x; @@ -179,22 +179,22 @@ void tonality_get_info(TonalityAnalysisS psum=0; /* Summing the probability of transition patterns that involve music at time (DETECT_SIZE-curr_lookahead-1) */ for (i=0;i<DETECT_SIZE-curr_lookahead;i++) psum += tonal->pmusic[i]; for (;i<DETECT_SIZE;i++) psum += tonal->pspeech[i]; psum = psum*tonal->music_confidence + (1-psum)*tonal->speech_confidence; - /*printf("%f %f\n", psum, info_out->music_prob);*/ + /*printf("%f %f %f\n", psum, info_out->music_prob, info_out->tonality);*/ info_out->music_prob = psum; } -void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, const CELTMode *celt_mode, const void *x, int len, int offset, int C, int lsb_depth, downmix_func downmix) +void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, const CELTMode *celt_mode, const void *x, int len, int offset, int c1, int c2, int C, int lsb_depth, downmix_func downmix) { int i, b; const kiss_fft_state *kfft; VARDECL(kiss_fft_cpx, in); VARDECL(kiss_fft_cpx, out); int N = 480, N2=240; float * OPUS_RESTRICT A = tonal->angle; float * OPUS_RESTRICT dA = tonal->d_angle; @@ -229,17 +229,17 @@ void tonality_analysis(TonalityAnalysisS alphaE = 1.f/IMIN(50, 1+tonal->count); alphaE2 = 1.f/IMIN(1000, 1+tonal->count); if (tonal->count<4) tonal->music_prob = .5; kfft = celt_mode->mdct.kfft[0]; if (tonal->count==0) tonal->mem_fill = 240; - downmix(x, &tonal->inmem[tonal->mem_fill], IMIN(len, ANALYSIS_BUF_SIZE-tonal->mem_fill), offset, C); + downmix(x, &tonal->inmem[tonal->mem_fill], IMIN(len, ANALYSIS_BUF_SIZE-tonal->mem_fill), offset, c1, c2, C); if (tonal->mem_fill+len < ANALYSIS_BUF_SIZE) { tonal->mem_fill += len; /* Don't have enough to update the analysis */ RESTORE_STACK; return; } info = &tonal->info[tonal->write_pos++]; @@ -248,37 +248,37 @@ void tonality_analysis(TonalityAnalysisS ALLOC(in, 480, kiss_fft_cpx); ALLOC(out, 480, kiss_fft_cpx); ALLOC(tonality, 240, float); ALLOC(noisiness, 240, float); for (i=0;i<N2;i++) { float w = analysis_window[i]; - in[i].r = MULT16_16(w, tonal->inmem[i]); - in[i].i = MULT16_16(w, tonal->inmem[N2+i]); - in[N-i-1].r = MULT16_16(w, tonal->inmem[N-i-1]); - in[N-i-1].i = MULT16_16(w, tonal->inmem[N+N2-i-1]); + in[i].r = (kiss_fft_scalar)(w*tonal->inmem[i]); + in[i].i = (kiss_fft_scalar)(w*tonal->inmem[N2+i]); + in[N-i-1].r = (kiss_fft_scalar)(w*tonal->inmem[N-i-1]); + in[N-i-1].i = (kiss_fft_scalar)(w*tonal->inmem[N+N2-i-1]); } OPUS_MOVE(tonal->inmem, tonal->inmem+ANALYSIS_BUF_SIZE-240, 240); remaining = len - (ANALYSIS_BUF_SIZE-tonal->mem_fill); - downmix(x, &tonal->inmem[240], remaining, offset+ANALYSIS_BUF_SIZE-tonal->mem_fill, C); + downmix(x, &tonal->inmem[240], remaining, offset+ANALYSIS_BUF_SIZE-tonal->mem_fill, c1, c2, C); tonal->mem_fill = 240 + remaining; opus_fft(kfft, in, out); for (i=1;i<N2;i++) { float X1r, X2r, X1i, X2i; float angle, d_angle, d2_angle; float angle2, d_angle2, d2_angle2; float mod1, mod2, avg_mod; - X1r = out[i].r+out[N-i].r; - X1i = out[i].i-out[N-i].i; - X2r = out[i].i+out[N-i].i; - X2i = out[N-i].r-out[i].r; + X1r = (float)out[i].r+out[N-i].r; + X1i = (float)out[i].i-out[N-i].i; + X2r = (float)out[i].i+out[N-i].i; + X2i = (float)out[N-i].r-out[i].r; angle = (float)(.5f/M_PI)*fast_atan2f(X1i, X1r); d_angle = angle - A[i]; d2_angle = d_angle - dA[i]; angle2 = (float)(.5f/M_PI)*fast_atan2f(X2i, X2r); d_angle2 = angle2 - angle; d2_angle2 = d_angle2 - d_angle; @@ -312,57 +312,60 @@ void tonality_analysis(TonalityAnalysisS for (b=0;b<NB_TBANDS;b++) { tonal->lowE[b] = 1e10; tonal->highE[b] = -1e10; } } relativeE = 0; frame_loudness = 0; - bandwidth_mask = 0; for (b=0;b<NB_TBANDS;b++) { float E=0, tE=0, nE=0; float L1, L2; float stationarity; for (i=tbands[b];i<tbands[b+1];i++) { - float binE = out[i].r*out[i].r + out[N-i].r*out[N-i].r - + out[i].i*out[i].i + out[N-i].i*out[N-i].i; + float binE = out[i].r*(float)out[i].r + out[N-i].r*(float)out[N-i].r + + out[i].i*(float)out[i].i + out[N-i].i*(float)out[N-i].i; +#ifdef FIXED_POINT + /* FIXME: It's probably best to change the BFCC filter initial state instead */ + binE *= 5.55e-17f; +#endif E += binE; tE += binE*tonality[i]; nE += binE*2.f*(.5f-noisiness[i]); } tonal->E[tonal->E_count][b] = E; frame_noisiness += nE/(1e-15f+E); - frame_loudness += celt_sqrt(E+1e-10f); + frame_loudness += (float)sqrt(E+1e-10f); logE[b] = (float)log(E+1e-10f); tonal->lowE[b] = MIN32(logE[b], tonal->lowE[b]+.01f); tonal->highE[b] = MAX32(logE[b], tonal->highE[b]-.1f); if (tonal->highE[b] < tonal->lowE[b]+1.f) { tonal->highE[b]+=.5f; tonal->lowE[b]-=.5f; } - relativeE += (logE[b]-tonal->lowE[b])/(EPSILON+tonal->highE[b]-tonal->lowE[b]); + relativeE += (logE[b]-tonal->lowE[b])/(1e-15f+tonal->highE[b]-tonal->lowE[b]); L1=L2=0; for (i=0;i<NB_FRAMES;i++) { - L1 += celt_sqrt(tonal->E[i][b]); + L1 += (float)sqrt(tonal->E[i][b]); L2 += tonal->E[i][b]; } - stationarity = MIN16(0.99f,L1/celt_sqrt(EPSILON+NB_FRAMES*L2)); + stationarity = MIN16(0.99f,L1/(float)sqrt(1e-15+NB_FRAMES*L2)); stationarity *= stationarity; stationarity *= stationarity; frame_stationarity += stationarity; /*band_tonality[b] = tE/(1e-15+E)*/; - band_tonality[b] = MAX16(tE/(EPSILON+E), stationarity*tonal->prev_band_tonality[b]); + band_tonality[b] = MAX16(tE/(1e-15f+E), stationarity*tonal->prev_band_tonality[b]); #if 0 if (b>=NB_TONAL_SKIP_BANDS) { frame_tonality += tweight[b]*band_tonality[b]; tw_sum += tweight[b]; } #else frame_tonality += band_tonality[b]; @@ -374,28 +377,31 @@ void tonality_analysis(TonalityAnalysisS /*printf("%f %f ", band_tonality[b], stationarity);*/ tonal->prev_band_tonality[b] = band_tonality[b]; } bandwidth_mask = 0; bandwidth = 0; maxE = 0; noise_floor = 5.7e-4f/(1<<(IMAX(0,lsb_depth-8))); +#ifdef FIXED_POINT + noise_floor *= 1<<(15+SIG_SHIFT); +#endif noise_floor *= noise_floor; for (b=0;b<NB_TOT_BANDS;b++) { float E=0; int band_start, band_end; /* Keep a margin of 300 Hz for aliasing */ band_start = extra_bands[b]; band_end = extra_bands[b+1]; for (i=band_start;i<band_end;i++) { - float binE = out[i].r*out[i].r + out[N-i].r*out[N-i].r - + out[i].i*out[i].i + out[N-i].i*out[N-i].i; + float binE = out[i].r*(float)out[i].r + out[N-i].r*(float)out[N-i].r + + out[i].i*(float)out[i].i + out[N-i].i*(float)out[N-i].i; E += binE; } maxE = MAX32(maxE, E); tonal->meanE[b] = MAX32((1-alphaE2)*tonal->meanE[b], E); E = MAX32(E, tonal->meanE[b]); /* Use a simple follower with 13 dB/Bark slope for spreading function */ bandwidth_mask = MAX32(.05f*bandwidth_mask, E); /* Consider the band "active" only if all these conditions are met: @@ -464,24 +470,24 @@ void tonality_analysis(TonalityAnalysisS for (i=0;i<8;i++) { tonal->mem[i+24] = tonal->mem[i+16]; tonal->mem[i+16] = tonal->mem[i+8]; tonal->mem[i+8] = tonal->mem[i]; tonal->mem[i] = BFCC[i]; } for (i=0;i<9;i++) - features[11+i] = celt_sqrt(tonal->std[i]); + features[11+i] = (float)sqrt(tonal->std[i]); features[20] = info->tonality; features[21] = info->activity; features[22] = frame_stationarity; features[23] = info->tonality_slope; features[24] = tonal->lowECount; -#ifndef FIXED_POINT +#ifndef DISABLE_FLOAT_API mlp_process(&net, features, frame_probs); frame_probs[0] = .5f*(frame_probs[0]+1); /* Curve fitting between the MLP probability and the actual probability */ frame_probs[0] = .01f + 1.21f*frame_probs[0]*frame_probs[0] - .23f*(float)pow(frame_probs[0], 10); /* Probability of active audio (as opposed to silence) */ frame_probs[1] = .5f*frame_probs[1]+.5f; /* Consider that silence has a 50-50 probability. */ frame_probs[0] = frame_probs[1]*frame_probs[0] + (1-frame_probs[1])*.5f; @@ -585,17 +591,16 @@ void tonality_analysis(TonalityAnalysisS tonal->speech_confidence += adapt*MIN16(.2f,frame_probs[0]-tonal->speech_confidence); } } else { if (tonal->music_confidence_count==0) tonal->music_confidence = .9f; if (tonal->speech_confidence_count==0) tonal->speech_confidence = .1f; } - psum = MAX16(tonal->speech_confidence, MIN16(tonal->music_confidence, psum)); } if (tonal->last_music != (tonal->music_prob>.5f)) tonal->last_transition=0; tonal->last_music = tonal->music_prob>.5f; #else info->music_prob = 0; #endif /*for (i=0;i<25;i++) @@ -606,49 +611,35 @@ void tonality_analysis(TonalityAnalysisS /*printf("%d %d\n", info->bandwidth, info->opus_bandwidth);*/ info->noisiness = frame_noisiness; info->valid = 1; if (info_out!=NULL) OPUS_COPY(info_out, info, 1); RESTORE_STACK; } -int run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, const void *pcm, - const void *analysis_pcm, int frame_size, int variable_duration, int C, opus_int32 Fs, int bitrate_bps, - int delay_compensation, int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_info) +void run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, const void *analysis_pcm, + int analysis_frame_size, int frame_size, int c1, int c2, int C, opus_int32 Fs, + int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_info) { int offset; int pcm_len; - /* Avoid overflow/wrap-around of the analysis buffer */ - frame_size = IMIN((DETECT_SIZE-5)*Fs/100, frame_size); - - pcm_len = frame_size - analysis->analysis_offset; - offset = 0; - do { - tonality_analysis(analysis, NULL, celt_mode, analysis_pcm, IMIN(480, pcm_len), offset, C, lsb_depth, downmix); - offset += 480; - pcm_len -= 480; - } while (pcm_len>0); - analysis->analysis_offset = frame_size; - - if (variable_duration == OPUS_FRAMESIZE_VARIABLE && frame_size >= Fs/200) + if (analysis_pcm != NULL) { - int LM = 3; - LM = optimize_framesize((const opus_val16*)pcm, frame_size, C, Fs, bitrate_bps, - analysis->prev_tonality, analysis->subframe_mem, delay_compensation, downmix); - while ((Fs/400<<LM)>frame_size) - LM--; - frame_size = (Fs/400<<LM); - } else { - frame_size = frame_size_select(frame_size, variable_duration, Fs); + /* Avoid overflow/wrap-around of the analysis buffer */ + analysis_frame_size = IMIN((DETECT_SIZE-5)*Fs/100, analysis_frame_size); + + pcm_len = analysis_frame_size - analysis->analysis_offset; + offset = analysis->analysis_offset; + do { + tonality_analysis(analysis, NULL, celt_mode, analysis_pcm, IMIN(480, pcm_len), offset, c1, c2, C, lsb_depth, downmix); + offset += 480; + pcm_len -= 480; + } while (pcm_len>0); + analysis->analysis_offset = analysis_frame_size; + + analysis->analysis_offset -= frame_size; } - if (frame_size<0) - return -1; - analysis->analysis_offset -= frame_size; - /* Only perform analysis up to 20-ms frames. Longer ones will be split if - they're in CELT-only mode. */ analysis_info->valid = 0; tonality_get_info(analysis, analysis_info, frame_size); - - return frame_size; }
--- a/media/libopus/src/analysis.h +++ b/media/libopus/src/analysis.h @@ -37,17 +37,17 @@ #define ANALYSIS_BUF_SIZE 720 /* 15 ms at 48 kHz */ #define DETECT_SIZE 200 typedef struct { float angle[240]; float d_angle[240]; float d2_angle[240]; - float inmem[ANALYSIS_BUF_SIZE]; + opus_val32 inmem[ANALYSIS_BUF_SIZE]; int mem_fill; /* number of usable samples in the buffer */ float prev_band_tonality[NB_TBANDS]; float prev_tonality; float E[NB_FRAMES][NB_TBANDS]; float lowE[NB_TBANDS]; float highE[NB_TBANDS]; float meanE[NB_TOT_BANDS]; float mem[32]; @@ -55,17 +55,17 @@ typedef struct { float std[9]; float music_prob; float Etracker; float lowECount; int E_count; int last_music; int last_transition; int count; - opus_val32 subframe_mem[3]; + float subframe_mem[3]; int analysis_offset; /** Probability of having speech for time i to DETECT_SIZE-1 (and music before). pspeech[0] is the probability that all frames in the window are speech. */ float pspeech[DETECT_SIZE]; /** Probability of having music for time i to DETECT_SIZE-1 (and speech before). pmusic[0] is the probability that all frames in the window are music. */ float pmusic[DETECT_SIZE]; float speech_confidence; @@ -74,17 +74,17 @@ typedef struct { int music_confidence_count; int write_pos; int read_pos; int read_subframe; AnalysisInfo info[DETECT_SIZE]; } TonalityAnalysisState; void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info, - const CELTMode *celt_mode, const void *x, int len, int offset, int C, int lsb_depth, downmix_func downmix); + const CELTMode *celt_mode, const void *x, int len, int offset, int c1, int c2, int C, int lsb_depth, downmix_func downmix); void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int len); -int run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, const void *pcm, - const void *analysis_pcm, int frame_size, int variable_duration, int C, opus_int32 Fs, int bitrate_bps, - int delay_compensation, int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_info); +void run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, const void *analysis_pcm, + int analysis_frame_size, int frame_size, int c1, int c2, int C, opus_int32 Fs, + int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_info); #endif
--- a/media/libopus/src/mlp.c +++ b/media/libopus/src/mlp.c @@ -24,72 +24,77 @@ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifdef HAVE_CONFIG_H #include "config.h" #endif +#include "opus_types.h" +#include "opus_defines.h" + #include <math.h> #include "mlp.h" #include "arch.h" #include "tansig_table.h" #define MAX_NEURONS 100 -#ifdef FIXED_POINT -static inline opus_val16 tansig_approx(opus_val32 _x) /* Q19 */ +#if 0 +static OPUS_INLINE opus_val16 tansig_approx(opus_val32 _x) /* Q19 */ { int i; opus_val16 xx; /* Q11 */ /*double x, y;*/ opus_val16 dy, yy; /* Q14 */ /*x = 1.9073e-06*_x;*/ - if (_x>=QCONST32(10,19)) + if (_x>=QCONST32(8,19)) return QCONST32(1.,14); - if (_x<=-QCONST32(10,19)) + if (_x<=-QCONST32(8,19)) return -QCONST32(1.,14); xx = EXTRACT16(SHR32(_x, 8)); /*i = lrint(25*x);*/ i = SHR32(ADD32(1024,MULT16_16(25, xx)),11); /*x -= .04*i;*/ xx -= EXTRACT16(SHR32(MULT16_16(20972,i),8)); /*x = xx*(1./2048);*/ /*y = tansig_table[250+i];*/ yy = tansig_table[250+i]; /*y = yy*(1./16384);*/ dy = 16384-MULT16_16_Q14(yy,yy); yy = yy + MULT16_16_Q14(MULT16_16_Q11(xx,dy),(16384 - MULT16_16_Q11(yy,xx))); return yy; } #else /*extern const float tansig_table[501];*/ -static inline opus_val16 tansig_approx(opus_val16 x) +static OPUS_INLINE float tansig_approx(float x) { int i; - opus_val16 y, dy; - opus_val16 sign=1; - if (x>=8) + float y, dy; + float sign=1; + /* Tests are reversed to catch NaNs */ + if (!(x<8)) return 1; - if (x<=-8) + if (!(x>-8)) return -1; if (x<0) { x=-x; sign=-1; } i = (int)floor(.5f+25*x); x -= .04f*i; y = tansig_table[i]; dy = 1-y*y; y = y + x*dy*(1 - y*x); return sign*y; } #endif +#if 0 void mlp_process(const MLP *m, const opus_val16 *in, opus_val16 *out) { int j; opus_val16 hidden[MAX_NEURONS]; const opus_val16 *W = m->weights; /* Copy to tmp_in */ for (j=0;j<m->topo[1];j++) { @@ -103,9 +108,33 @@ void mlp_process(const MLP *m, const opu { int k; opus_val32 sum = SHL32(EXTEND32(*W++),14); for (k=0;k<m->topo[1];k++) sum = MAC16_16(sum, hidden[k], *W++); out[j] = tansig_approx(EXTRACT16(PSHR32(sum,17))); } } - +#else +void mlp_process(const MLP *m, const float *in, float *out) +{ + int j; + float hidden[MAX_NEURONS]; + const float *W = m->weights; + /* Copy to tmp_in */ + for (j=0;j<m->topo[1];j++) + { + int k; + float sum = *W++; + for (k=0;k<m->topo[0];k++) + sum = sum + in[k]**W++; + hidden[j] = tansig_approx(sum); + } + for (j=0;j<m->topo[2];j++) + { + int k; + float sum = *W++; + for (k=0;k<m->topo[1];k++) + sum = sum + hidden[k]**W++; + out[j] = tansig_approx(sum); + } +} +#endif
--- a/media/libopus/src/mlp.h +++ b/media/libopus/src/mlp.h @@ -28,14 +28,14 @@ #ifndef _MLP_H_ #define _MLP_H_ #include "arch.h" typedef struct { int layers; const int *topo; - const opus_val16 *weights; + const float *weights; } MLP; -void mlp_process(const MLP *m, const opus_val16 *in, opus_val16 *out); +void mlp_process(const MLP *m, const float *in, float *out); #endif /* _MLP_H_ */
--- a/media/libopus/src/opus.c +++ b/media/libopus/src/opus.c @@ -34,16 +34,18 @@ #ifndef DISABLE_FLOAT_API OPUS_EXPORT void opus_pcm_soft_clip(float *_x, int N, int C, float *declip_mem) { int c; int i; float *x; + if (C<1 || N<1 || !_x || !declip_mem) return; + /* First thing: saturate everything to +/- 2 which is the highest level our non-linearity can handle. At the point where the signal reaches +/-2, the derivative will be zero anyway, so this doesn't introduce any discontinuity in the derivative. */ for (i=0;i<N*C;i++) _x[i] = MAX16(-2.f, MIN16(2.f, _x[i])); for (c=0;c<C;c++) { @@ -139,8 +141,189 @@ int encode_size(int size, unsigned char return 1; } else { data[0] = 252+(size&0x3); data[1] = (size-(int)data[0])>>2; return 2; } } +static int parse_size(const unsigned char *data, opus_int32 len, opus_int16 *size) +{ + if (len<1) + { + *size = -1; + return -1; + } else if (data[0]<252) + { + *size = data[0]; + return 1; + } else if (len<2) + { + *size = -1; + return -1; + } else { + *size = 4*data[1] + data[0]; + return 2; + } +} + +int opus_packet_parse_impl(const unsigned char *data, opus_int32 len, + int self_delimited, unsigned char *out_toc, + const unsigned char *frames[48], opus_int16 size[48], + int *payload_offset, opus_int32 *packet_offset) +{ + int i, bytes; + int count; + int cbr; + unsigned char ch, toc; + int framesize; + opus_int32 last_size; + opus_int32 pad = 0; + const unsigned char *data0 = data; + + if (size==NULL) + return OPUS_BAD_ARG; + + framesize = opus_packet_get_samples_per_frame(data, 48000); + + cbr = 0; + toc = *data++; + len--; + last_size = len; + switch (toc&0x3) + { + /* One frame */ + case 0: + count=1; + break; + /* Two CBR frames */ + case 1: + count=2; + cbr = 1; + if (!self_delimited) + { + if (len&0x1) + return OPUS_INVALID_PACKET; + last_size = len/2; + /* If last_size doesn't fit in size[0], we'll catch it later */ + size[0] = (opus_int16)last_size; + } + break; + /* Two VBR frames */ + case 2: + count = 2; + bytes = parse_size(data, len, size); + len -= bytes; + if (size[0]<0 || size[0] > len) + return OPUS_INVALID_PACKET; + data += bytes; + last_size = len-size[0]; + break; + /* Multiple CBR/VBR frames (from 0 to 120 ms) */ + default: /*case 3:*/ + if (len<1) + return OPUS_INVALID_PACKET; + /* Number of frames encoded in bits 0 to 5 */ + ch = *data++; + count = ch&0x3F; + if (count <= 0 || framesize*count > 5760) + return OPUS_INVALID_PACKET; + len--; + /* Padding flag is bit 6 */ + if (ch&0x40) + { + int p; + do { + int tmp; + if (len<=0) + return OPUS_INVALID_PACKET; + p = *data++; + len--; + tmp = p==255 ? 254: p; + len -= tmp; + pad += tmp; + } while (p==255); + } + if (len<0) + return OPUS_INVALID_PACKET; + /* VBR flag is bit 7 */ + cbr = !(ch&0x80); + if (!cbr) + { + /* VBR case */ + last_size = len; + for (i=0;i<count-1;i++) + { + bytes = parse_size(data, len, size+i); + len -= bytes; + if (size[i]<0 || size[i] > len) + return OPUS_INVALID_PACKET; + data += bytes; + last_size -= bytes+size[i];