author | Ralph Giles <giles@mozilla.com> |
Mon, 30 Apr 2012 16:20:22 -0700 | |
changeset 92771 | 8bb81a28639c3546a6dc1167b9e2e26eda8f0d73 |
parent 92770 | c0822f99d85085f9a5ed33abe80fbde7d654d976 |
child 92772 | 32e001c1351b8f84e72945241adf453008e50c8a |
push id | 8864 |
push user | tterriberry@mozilla.com |
push date | Mon, 30 Apr 2012 23:24:50 +0000 |
treeherder | mozilla-inbound@32e001c1351b [default view] [failures only] |
perfherder | [talos] [build metrics] [platform microbench] (compared to previous push) |
reviewers | derf |
bugs | 674225 |
milestone | 15.0a1 |
first release with | nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
|
last release without | nightly linux32
nightly linux64
nightly mac
nightly win32
nightly win64
|
new file mode 100644 --- /dev/null +++ b/media/libopus/COPYING @@ -0,0 +1,27 @@ +Copyright 2001-2011 Xiph.Org, Skype Limited, Octasic, + Jean-Marc Valin, Timothy B. Terriberry, + CSIRO, Gregory Maxwell, Mark Borgerding, + Erik de Castro Lopo + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +- Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
new file mode 100644 --- /dev/null +++ b/media/libopus/README_MOZILLA @@ -0,0 +1,11 @@ +IETF Opus audio codec reference implementation. + +The source in this directory was copied from an opus +repository checkout by running the ./update.sh script. +Any changes made to this version of the source should +be reflected in that script, e.g. by applying patch +files after the copy step. + +The upstream repository is https://git.xiph.org/opus.git + +The git tag/revision used was v0.9.9.
new file mode 100644 --- /dev/null +++ b/media/libopus/celt/_kiss_fft_guts.h @@ -0,0 +1,176 @@ +/*Copyright (c) 2003-2004, Mark Borgerding + + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE.*/ + +#ifndef KISS_FFT_GUTS_H +#define KISS_FFT_GUTS_H + +#define MIN(a,b) ((a)<(b) ? (a):(b)) +#define MAX(a,b) ((a)>(b) ? (a):(b)) + +/* kiss_fft.h + defines kiss_fft_scalar as either short or a float type + and defines + typedef struct { kiss_fft_scalar r; kiss_fft_scalar i; }kiss_fft_cpx; */ +#include "kiss_fft.h" + +/* + Explanation of macros dealing with complex math: + + C_MUL(m,a,b) : m = a*b + C_FIXDIV( c , div ) : if a fixed point impl., c /= div. noop otherwise + C_SUB( res, a,b) : res = a - b + C_SUBFROM( res , a) : res -= a + C_ADDTO( res , a) : res += a + * */ +#ifdef FIXED_POINT +#include "arch.h" + + +# define SAMPPROD long long +#define SAMP_MAX 2147483647 +#define TWID_MAX 32767 +#define TRIG_UPSCALE 1 + +#define SAMP_MIN -SAMP_MAX + + +# define S_MUL(a,b) MULT16_32_Q15(b, a) + +# define C_MUL(m,a,b) \ + do{ (m).r = SUB32(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)); \ + (m).i = ADD32(S_MUL((a).r,(b).i) , S_MUL((a).i,(b).r)); }while(0) + +# define C_MULC(m,a,b) \ + do{ (m).r = ADD32(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)); \ + (m).i = SUB32(S_MUL((a).i,(b).r) , S_MUL((a).r,(b).i)); }while(0) + +# define C_MUL4(m,a,b) \ + do{ (m).r = SHR(SUB32(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)),2); \ + (m).i = SHR(ADD32(S_MUL((a).r,(b).i) , S_MUL((a).i,(b).r)),2); }while(0) + +# define C_MULBYSCALAR( c, s ) \ + do{ (c).r = S_MUL( (c).r , s ) ;\ + (c).i = S_MUL( (c).i , s ) ; }while(0) + +# define DIVSCALAR(x,k) \ + (x) = S_MUL( x, (TWID_MAX-((k)>>1))/(k)+1 ) + +# define C_FIXDIV(c,div) \ + do { DIVSCALAR( (c).r , div); \ + DIVSCALAR( (c).i , div); }while (0) + +#define C_ADD( res, a,b)\ + do {(res).r=ADD32((a).r,(b).r); (res).i=ADD32((a).i,(b).i); \ + }while(0) +#define C_SUB( res, a,b)\ + do {(res).r=SUB32((a).r,(b).r); (res).i=SUB32((a).i,(b).i); \ + }while(0) +#define C_ADDTO( res , a)\ + do {(res).r = ADD32((res).r, (a).r); (res).i = ADD32((res).i,(a).i);\ + }while(0) + +#define C_SUBFROM( res , a)\ + do {(res).r = ADD32((res).r,(a).r); (res).i = SUB32((res).i,(a).i); \ + }while(0) + +#else /* not FIXED_POINT*/ + +# define S_MUL(a,b) ( (a)*(b) ) +#define C_MUL(m,a,b) \ + do{ (m).r = (a).r*(b).r - (a).i*(b).i;\ + (m).i = (a).r*(b).i + (a).i*(b).r; }while(0) +#define C_MULC(m,a,b) \ + do{ (m).r = (a).r*(b).r + (a).i*(b).i;\ + (m).i = (a).i*(b).r - (a).r*(b).i; }while(0) + +#define C_MUL4(m,a,b) C_MUL(m,a,b) + +# define C_FIXDIV(c,div) /* NOOP */ +# define C_MULBYSCALAR( c, s ) \ + do{ (c).r *= (s);\ + (c).i *= (s); }while(0) +#endif + +#ifndef CHECK_OVERFLOW_OP +# define CHECK_OVERFLOW_OP(a,op,b) /* noop */ +#endif + +#ifndef C_ADD +#define C_ADD( res, a,b)\ + do { \ + CHECK_OVERFLOW_OP((a).r,+,(b).r)\ + CHECK_OVERFLOW_OP((a).i,+,(b).i)\ + (res).r=(a).r+(b).r; (res).i=(a).i+(b).i; \ + }while(0) +#define C_SUB( res, a,b)\ + do { \ + CHECK_OVERFLOW_OP((a).r,-,(b).r)\ + CHECK_OVERFLOW_OP((a).i,-,(b).i)\ + (res).r=(a).r-(b).r; (res).i=(a).i-(b).i; \ + }while(0) +#define C_ADDTO( res , a)\ + do { \ + CHECK_OVERFLOW_OP((res).r,+,(a).r)\ + CHECK_OVERFLOW_OP((res).i,+,(a).i)\ + (res).r += (a).r; (res).i += (a).i;\ + }while(0) + +#define C_SUBFROM( res , a)\ + do {\ + CHECK_OVERFLOW_OP((res).r,-,(a).r)\ + CHECK_OVERFLOW_OP((res).i,-,(a).i)\ + (res).r -= (a).r; (res).i -= (a).i; \ + }while(0) +#endif /* C_ADD defined */ + +#ifdef FIXED_POINT +/*# define KISS_FFT_COS(phase) TRIG_UPSCALE*floor(MIN(32767,MAX(-32767,.5+32768 * cos (phase)))) +# define KISS_FFT_SIN(phase) TRIG_UPSCALE*floor(MIN(32767,MAX(-32767,.5+32768 * sin (phase))))*/ +# define KISS_FFT_COS(phase) floor(.5+TWID_MAX*cos (phase)) +# define KISS_FFT_SIN(phase) floor(.5+TWID_MAX*sin (phase)) +# define HALF_OF(x) ((x)>>1) +#elif defined(USE_SIMD) +# define KISS_FFT_COS(phase) _mm_set1_ps( cos(phase) ) +# define KISS_FFT_SIN(phase) _mm_set1_ps( sin(phase) ) +# define HALF_OF(x) ((x)*_mm_set1_ps(.5f)) +#else +# define KISS_FFT_COS(phase) (kiss_fft_scalar) cos(phase) +# define KISS_FFT_SIN(phase) (kiss_fft_scalar) sin(phase) +# define HALF_OF(x) ((x)*.5f) +#endif + +#define kf_cexp(x,phase) \ + do{ \ + (x)->r = KISS_FFT_COS(phase);\ + (x)->i = KISS_FFT_SIN(phase);\ + }while(0) + +#define kf_cexp2(x,phase) \ + do{ \ + (x)->r = TRIG_UPSCALE*celt_cos_norm((phase));\ + (x)->i = TRIG_UPSCALE*celt_cos_norm((phase)-32768);\ +}while(0) + +#endif /* KISS_FFT_GUTS_H */
new file mode 100644 --- /dev/null +++ b/media/libopus/celt/arch.h @@ -0,0 +1,208 @@ +/* Copyright (c) 2003-2008 Jean-Marc Valin + Copyright (c) 2007-2008 CSIRO + Copyright (c) 2007-2009 Xiph.Org Foundation + Written by Jean-Marc Valin */ +/** + @file arch.h + @brief Various architecture definitions for CELT +*/ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef ARCH_H +#define ARCH_H + +#include "opus_types.h" + +# if !defined(__GNUC_PREREQ) +# if defined(__GNUC__)&&defined(__GNUC_MINOR__) +# define __GNUC_PREREQ(_maj,_min) \ + ((__GNUC__<<16)+__GNUC_MINOR__>=((_maj)<<16)+(_min)) +# else +# define __GNUC_PREREQ(_maj,_min) 0 +# endif +# endif + +#define CELT_SIG_SCALE 32768.f + +#define celt_fatal(str) _celt_fatal(str, __FILE__, __LINE__); +#ifdef ENABLE_ASSERTIONS +#include <stdio.h> +#include <stdlib.h> +#ifdef __GNUC__ +__attribute__((noreturn)) +#endif +static inline void _celt_fatal(const char *str, const char *file, int line) +{ + fprintf (stderr, "Fatal (internal) error in %s, line %d: %s\n", file, line, str); + abort(); +} +#define celt_assert(cond) {if (!(cond)) {celt_fatal("assertion failed: " #cond);}} +#define celt_assert2(cond, message) {if (!(cond)) {celt_fatal("assertion failed: " #cond "\n" message);}} +#else +#define celt_assert(cond) +#define celt_assert2(cond, message) +#endif + +#define IMUL32(a,b) ((a)*(b)) + +#define ABS(x) ((x) < 0 ? (-(x)) : (x)) /**< Absolute integer value. */ +#define ABS16(x) ((x) < 0 ? (-(x)) : (x)) /**< Absolute 16-bit value. */ +#define MIN16(a,b) ((a) < (b) ? (a) : (b)) /**< Minimum 16-bit value. */ +#define MAX16(a,b) ((a) > (b) ? (a) : (b)) /**< Maximum 16-bit value. */ +#define ABS32(x) ((x) < 0 ? (-(x)) : (x)) /**< Absolute 32-bit value. */ +#define MIN32(a,b) ((a) < (b) ? (a) : (b)) /**< Minimum 32-bit value. */ +#define MAX32(a,b) ((a) > (b) ? (a) : (b)) /**< Maximum 32-bit value. */ +#define IMIN(a,b) ((a) < (b) ? (a) : (b)) /**< Minimum int value. */ +#define IMAX(a,b) ((a) > (b) ? (a) : (b)) /**< Maximum int value. */ +#define UADD32(a,b) ((a)+(b)) +#define USUB32(a,b) ((a)-(b)) + +#define PRINT_MIPS(file) + +#ifdef FIXED_POINT + +typedef opus_int16 opus_val16; +typedef opus_int32 opus_val32; + +typedef opus_val32 celt_sig; +typedef opus_val16 celt_norm; +typedef opus_val32 celt_ener; + +#define Q15ONE 32767 + +#define SIG_SHIFT 12 + +#define NORM_SCALING 16384 + +#define DB_SHIFT 10 + +#define EPSILON 1 +#define VERY_LARGE16 ((opus_val16)32767) +#define Q15_ONE ((opus_val16)32767) + +#define SCALEIN(a) (a) +#define SCALEOUT(a) (a) + +#ifdef FIXED_DEBUG +#include "fixed_debug.h" +#else + +#include "fixed_generic.h" + +#ifdef ARM5E_ASM +#include "fixed_arm5e.h" +#elif defined (ARM4_ASM) +#include "fixed_arm4.h" +#elif defined (BFIN_ASM) +#include "fixed_bfin.h" +#elif defined (TI_C5X_ASM) +#include "fixed_c5x.h" +#elif defined (TI_C6X_ASM) +#include "fixed_c6x.h" +#endif + +#endif + +#else /* FIXED_POINT */ + +typedef float opus_val16; +typedef float opus_val32; + +typedef float celt_sig; +typedef float celt_norm; +typedef float celt_ener; + +#define Q15ONE 1.0f + +#define NORM_SCALING 1.f + +#define EPSILON 1e-15f +#define VERY_LARGE16 1e15f +#define Q15_ONE ((opus_val16)1.f) + +#define QCONST16(x,bits) (x) +#define QCONST32(x,bits) (x) + +#define NEG16(x) (-(x)) +#define NEG32(x) (-(x)) +#define EXTRACT16(x) (x) +#define EXTEND32(x) (x) +#define SHR16(a,shift) (a) +#define SHL16(a,shift) (a) +#define SHR32(a,shift) (a) +#define SHL32(a,shift) (a) +#define PSHR32(a,shift) (a) +#define VSHR32(a,shift) (a) + +#define PSHR(a,shift) (a) +#define SHR(a,shift) (a) +#define SHL(a,shift) (a) +#define SATURATE(x,a) (x) + +#define ROUND16(a,shift) (a) +#define HALF16(x) (.5f*(x)) +#define HALF32(x) (.5f*(x)) + +#define ADD16(a,b) ((a)+(b)) +#define SUB16(a,b) ((a)-(b)) +#define ADD32(a,b) ((a)+(b)) +#define SUB32(a,b) ((a)-(b)) +#define MULT16_16_16(a,b) ((a)*(b)) +#define MULT16_16(a,b) ((opus_val32)(a)*(opus_val32)(b)) +#define MAC16_16(c,a,b) ((c)+(opus_val32)(a)*(opus_val32)(b)) + +#define MULT16_32_Q15(a,b) ((a)*(b)) +#define MULT16_32_Q16(a,b) ((a)*(b)) + +#define MULT32_32_Q31(a,b) ((a)*(b)) + +#define MAC16_32_Q15(c,a,b) ((c)+(a)*(b)) + +#define MULT16_16_Q11_32(a,b) ((a)*(b)) +#define MULT16_16_Q13(a,b) ((a)*(b)) +#define MULT16_16_Q14(a,b) ((a)*(b)) +#define MULT16_16_Q15(a,b) ((a)*(b)) +#define MULT16_16_P15(a,b) ((a)*(b)) +#define MULT16_16_P13(a,b) ((a)*(b)) +#define MULT16_16_P14(a,b) ((a)*(b)) + +#define DIV32_16(a,b) (((opus_val32)(a))/(opus_val16)(b)) +#define DIV32(a,b) (((opus_val32)(a))/(opus_val32)(b)) + +#define SCALEIN(a) ((a)*CELT_SIG_SCALE) +#define SCALEOUT(a) ((a)*(1/CELT_SIG_SCALE)) + +#endif /* !FIXED_POINT */ + +#ifndef GLOBAL_STACK_SIZE +#ifdef FIXED_POINT +#define GLOBAL_STACK_SIZE 100000 +#else +#define GLOBAL_STACK_SIZE 100000 +#endif +#endif + +#endif /* ARCH_H */
new file mode 100644 --- /dev/null +++ b/media/libopus/celt/bands.c @@ -0,0 +1,1297 @@ +/* Copyright (c) 2007-2008 CSIRO + Copyright (c) 2007-2009 Xiph.Org Foundation + Copyright (c) 2008-2009 Gregory Maxwell + Written by Jean-Marc Valin and Gregory Maxwell */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <math.h> +#include "bands.h" +#include "modes.h" +#include "vq.h" +#include "cwrs.h" +#include "stack_alloc.h" +#include "os_support.h" +#include "mathops.h" +#include "rate.h" + +opus_uint32 celt_lcg_rand(opus_uint32 seed) +{ + return 1664525 * seed + 1013904223; +} + +/* This is a cos() approximation designed to be bit-exact on any platform. Bit exactness + with this approximation is important because it has an impact on the bit allocation */ +static opus_int16 bitexact_cos(opus_int16 x) +{ + opus_int32 tmp; + opus_int16 x2; + tmp = (4096+((opus_int32)(x)*(x)))>>13; + celt_assert(tmp<=32767); + x2 = tmp; + x2 = (32767-x2) + FRAC_MUL16(x2, (-7651 + FRAC_MUL16(x2, (8277 + FRAC_MUL16(-626, x2))))); + celt_assert(x2<=32766); + return 1+x2; +} + +static int bitexact_log2tan(int isin,int icos) +{ + int lc; + int ls; + lc=EC_ILOG(icos); + ls=EC_ILOG(isin); + icos<<=15-lc; + isin<<=15-ls; + return (ls-lc)*(1<<11) + +FRAC_MUL16(isin, FRAC_MUL16(isin, -2597) + 7932) + -FRAC_MUL16(icos, FRAC_MUL16(icos, -2597) + 7932); +} + +#ifdef FIXED_POINT +/* Compute the amplitude (sqrt energy) in each of the bands */ +void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int M) +{ + int i, c, N; + const opus_int16 *eBands = m->eBands; + N = M*m->shortMdctSize; + c=0; do { + for (i=0;i<end;i++) + { + int j; + opus_val32 maxval=0; + opus_val32 sum = 0; + + j=M*eBands[i]; do { + maxval = MAX32(maxval, X[j+c*N]); + maxval = MAX32(maxval, -X[j+c*N]); + } while (++j<M*eBands[i+1]); + + if (maxval > 0) + { + int shift = celt_ilog2(maxval)-10; + j=M*eBands[i]; do { + sum = MAC16_16(sum, EXTRACT16(VSHR32(X[j+c*N],shift)), + EXTRACT16(VSHR32(X[j+c*N],shift))); + } while (++j<M*eBands[i+1]); + /* We're adding one here to make damn sure we never end up with a pitch vector that's + larger than unity norm */ + bandE[i+c*m->nbEBands] = EPSILON+VSHR32(EXTEND32(celt_sqrt(sum)),-shift); + } else { + bandE[i+c*m->nbEBands] = EPSILON; + } + /*printf ("%f ", bandE[i+c*m->nbEBands]);*/ + } + } while (++c<C); + /*printf ("\n");*/ +} + +/* Normalise each band such that the energy is one. */ +void normalise_bands(const CELTMode *m, const celt_sig * restrict freq, celt_norm * restrict X, const celt_ener *bandE, int end, int C, int M) +{ + int i, c, N; + const opus_int16 *eBands = m->eBands; + N = M*m->shortMdctSize; + c=0; do { + i=0; do { + opus_val16 g; + int j,shift; + opus_val16 E; + shift = celt_zlog2(bandE[i+c*m->nbEBands])-13; + E = VSHR32(bandE[i+c*m->nbEBands], shift); + g = EXTRACT16(celt_rcp(SHL32(E,3))); + j=M*eBands[i]; do { + X[j+c*N] = MULT16_16_Q15(VSHR32(freq[j+c*N],shift-1),g); + } while (++j<M*eBands[i+1]); + } while (++i<end); + } while (++c<C); +} + +#else /* FIXED_POINT */ +/* Compute the amplitude (sqrt energy) in each of the bands */ +void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int M) +{ + int i, c, N; + const opus_int16 *eBands = m->eBands; + N = M*m->shortMdctSize; + c=0; do { + for (i=0;i<end;i++) + { + int j; + opus_val32 sum = 1e-27f; + for (j=M*eBands[i];j<M*eBands[i+1];j++) + sum += X[j+c*N]*X[j+c*N]; + bandE[i+c*m->nbEBands] = celt_sqrt(sum); + /*printf ("%f ", bandE[i+c*m->nbEBands]);*/ + } + } while (++c<C); + /*printf ("\n");*/ +} + +/* Normalise each band such that the energy is one. */ +void normalise_bands(const CELTMode *m, const celt_sig * restrict freq, celt_norm * restrict X, const celt_ener *bandE, int end, int C, int M) +{ + int i, c, N; + const opus_int16 *eBands = m->eBands; + N = M*m->shortMdctSize; + c=0; do { + for (i=0;i<end;i++) + { + int j; + opus_val16 g = 1.f/(1e-27f+bandE[i+c*m->nbEBands]); + for (j=M*eBands[i];j<M*eBands[i+1];j++) + X[j+c*N] = freq[j+c*N]*g; + } + } while (++c<C); +} + +#endif /* FIXED_POINT */ + +/* De-normalise the energy to produce the synthesis from the unit-energy bands */ +void denormalise_bands(const CELTMode *m, const celt_norm * restrict X, celt_sig * restrict freq, const celt_ener *bandE, int end, int C, int M) +{ + int i, c, N; + const opus_int16 *eBands = m->eBands; + N = M*m->shortMdctSize; + celt_assert2(C<=2, "denormalise_bands() not implemented for >2 channels"); + c=0; do { + celt_sig * restrict f; + const celt_norm * restrict x; + f = freq+c*N; + x = X+c*N; + for (i=0;i<end;i++) + { + int j, band_end; + opus_val32 g = SHR32(bandE[i+c*m->nbEBands],1); + j=M*eBands[i]; + band_end = M*eBands[i+1]; + do { + *f++ = SHL32(MULT16_32_Q15(*x, g),2); + x++; + } while (++j<band_end); + } + for (i=M*eBands[end];i<N;i++) + *f++ = 0; + } while (++c<C); +} + +/* This prevents energy collapse for transients with multiple short MDCTs */ +void anti_collapse(const CELTMode *m, celt_norm *X_, unsigned char *collapse_masks, int LM, int C, int CC, int size, + int start, int end, opus_val16 *logE, opus_val16 *prev1logE, + opus_val16 *prev2logE, int *pulses, opus_uint32 seed) +{ + int c, i, j, k; + for (i=start;i<end;i++) + { + int N0; + opus_val16 thresh, sqrt_1; + int depth; +#ifdef FIXED_POINT + int shift; +#endif + + N0 = m->eBands[i+1]-m->eBands[i]; + /* depth in 1/8 bits */ + depth = (1+pulses[i])/((m->eBands[i+1]-m->eBands[i])<<LM); + +#ifdef FIXED_POINT + thresh = MULT16_32_Q15(QCONST16(0.5f, 15), MIN32(32767,SHR32(celt_exp2(-SHL16(depth, 10-BITRES)),1) )); + { + opus_val32 t; + t = N0<<LM; + shift = celt_ilog2(t)>>1; + t = SHL32(t, (7-shift)<<1); + sqrt_1 = celt_rsqrt_norm(t); + } +#else + thresh = .5f*celt_exp2(-.125f*depth); + sqrt_1 = celt_rsqrt(N0<<LM); +#endif + + c=0; do + { + celt_norm *X; + opus_val16 prev1; + opus_val16 prev2; + opus_val32 Ediff; + opus_val16 r; + int renormalize=0; + prev1 = prev1logE[c*m->nbEBands+i]; + prev2 = prev2logE[c*m->nbEBands+i]; + if (C==1) + { + prev1 = MAX16(prev1,prev1logE[m->nbEBands+i]); + prev2 = MAX16(prev2,prev2logE[m->nbEBands+i]); + } + Ediff = EXTEND32(logE[c*m->nbEBands+i])-EXTEND32(MIN16(prev1,prev2)); + Ediff = MAX16(0, Ediff); + +#ifdef FIXED_POINT + if (Ediff < 16384) + r = 2*MIN16(16383,SHR32(celt_exp2(-EXTRACT16(Ediff)),1)); + else + r = 0; + if (LM==3) + r = MULT16_16_Q14(23170, MIN32(23169, r)); + r = SHR16(MIN16(thresh, r),1); + r = SHR32(MULT16_16_Q15(sqrt_1, r),shift); +#else + /* r needs to be multiplied by 2 or 2*sqrt(2) depending on LM because + short blocks don't have the same energy as long */ + r = 2.f*celt_exp2(-Ediff); + if (LM==3) + r *= 1.41421356f; + r = MIN16(thresh, r); + r = r*sqrt_1; +#endif + X = X_+c*size+(m->eBands[i]<<LM); + for (k=0;k<1<<LM;k++) + { + /* Detect collapse */ + if (!(collapse_masks[i*C+c]&1<<k)) + { + /* Fill with noise */ + for (j=0;j<N0;j++) + { + seed = celt_lcg_rand(seed); + X[(j<<LM)+k] = (seed&0x8000 ? r : -r); + } + renormalize = 1; + } + } + /* We just added some energy, so we need to renormalise */ + if (renormalize) + renormalise_vector(X, N0<<LM, Q15ONE); + } while (++c<C); + } +} + +static void intensity_stereo(const CELTMode *m, celt_norm *X, celt_norm *Y, const celt_ener *bandE, int bandID, int N) +{ + int i = bandID; + int j; + opus_val16 a1, a2; + opus_val16 left, right; + opus_val16 norm; +#ifdef FIXED_POINT + int shift = celt_zlog2(MAX32(bandE[i], bandE[i+m->nbEBands]))-13; +#endif + left = VSHR32(bandE[i],shift); + right = VSHR32(bandE[i+m->nbEBands],shift); + norm = EPSILON + celt_sqrt(EPSILON+MULT16_16(left,left)+MULT16_16(right,right)); + a1 = DIV32_16(SHL32(EXTEND32(left),14),norm); + a2 = DIV32_16(SHL32(EXTEND32(right),14),norm); + for (j=0;j<N;j++) + { + celt_norm r, l; + l = X[j]; + r = Y[j]; + X[j] = MULT16_16_Q14(a1,l) + MULT16_16_Q14(a2,r); + /* Side is not encoded, no need to calculate */ + } +} + +static void stereo_split(celt_norm *X, celt_norm *Y, int N) +{ + int j; + for (j=0;j<N;j++) + { + celt_norm r, l; + l = MULT16_16_Q15(QCONST16(.70710678f,15), X[j]); + r = MULT16_16_Q15(QCONST16(.70710678f,15), Y[j]); + X[j] = l+r; + Y[j] = r-l; + } +} + +static void stereo_merge(celt_norm *X, celt_norm *Y, opus_val16 mid, int N) +{ + int j; + opus_val32 xp=0, side=0; + opus_val32 El, Er; + opus_val16 mid2; +#ifdef FIXED_POINT + int kl, kr; +#endif + opus_val32 t, lgain, rgain; + + /* Compute the norm of X+Y and X-Y as |X|^2 + |Y|^2 +/- sum(xy) */ + for (j=0;j<N;j++) + { + xp = MAC16_16(xp, X[j], Y[j]); + side = MAC16_16(side, Y[j], Y[j]); + } + /* Compensating for the mid normalization */ + xp = MULT16_32_Q15(mid, xp); + /* mid and side are in Q15, not Q14 like X and Y */ + mid2 = SHR32(mid, 1); + El = MULT16_16(mid2, mid2) + side - 2*xp; + Er = MULT16_16(mid2, mid2) + side + 2*xp; + if (Er < QCONST32(6e-4f, 28) || El < QCONST32(6e-4f, 28)) + { + for (j=0;j<N;j++) + Y[j] = X[j]; + return; + } + +#ifdef FIXED_POINT + kl = celt_ilog2(El)>>1; + kr = celt_ilog2(Er)>>1; +#endif + t = VSHR32(El, (kl-7)<<1); + lgain = celt_rsqrt_norm(t); + t = VSHR32(Er, (kr-7)<<1); + rgain = celt_rsqrt_norm(t); + +#ifdef FIXED_POINT + if (kl < 7) + kl = 7; + if (kr < 7) + kr = 7; +#endif + + for (j=0;j<N;j++) + { + celt_norm r, l; + /* Apply mid scaling (side is already scaled) */ + l = MULT16_16_Q15(mid, X[j]); + r = Y[j]; + X[j] = EXTRACT16(PSHR32(MULT16_16(lgain, SUB16(l,r)), kl+1)); + Y[j] = EXTRACT16(PSHR32(MULT16_16(rgain, ADD16(l,r)), kr+1)); + } +} + +/* Decide whether we should spread the pulses in the current frame */ +int spreading_decision(const CELTMode *m, celt_norm *X, int *average, + int last_decision, int *hf_average, int *tapset_decision, int update_hf, + int end, int C, int M) +{ + int i, c, N0; + int sum = 0, nbBands=0; + const opus_int16 * restrict eBands = m->eBands; + int decision; + int hf_sum=0; + + celt_assert(end>0); + + N0 = M*m->shortMdctSize; + + if (M*(eBands[end]-eBands[end-1]) <= 8) + return SPREAD_NONE; + c=0; do { + for (i=0;i<end;i++) + { + int j, N, tmp=0; + int tcount[3] = {0,0,0}; + celt_norm * restrict x = X+M*eBands[i]+c*N0; + N = M*(eBands[i+1]-eBands[i]); + if (N<=8) + continue; + /* Compute rough CDF of |x[j]| */ + for (j=0;j<N;j++) + { + opus_val32 x2N; /* Q13 */ + + x2N = MULT16_16(MULT16_16_Q15(x[j], x[j]), N); + if (x2N < QCONST16(0.25f,13)) + tcount[0]++; + if (x2N < QCONST16(0.0625f,13)) + tcount[1]++; + if (x2N < QCONST16(0.015625f,13)) + tcount[2]++; + } + + /* Only include four last bands (8 kHz and up) */ + if (i>m->nbEBands-4) + hf_sum += 32*(tcount[1]+tcount[0])/N; + tmp = (2*tcount[2] >= N) + (2*tcount[1] >= N) + (2*tcount[0] >= N); + sum += tmp*256; + nbBands++; + } + } while (++c<C); + + if (update_hf) + { + if (hf_sum) + hf_sum /= C*(4-m->nbEBands+end); + *hf_average = (*hf_average+hf_sum)>>1; + hf_sum = *hf_average; + if (*tapset_decision==2) + hf_sum += 4; + else if (*tapset_decision==0) + hf_sum -= 4; + if (hf_sum > 22) + *tapset_decision=2; + else if (hf_sum > 18) + *tapset_decision=1; + else + *tapset_decision=0; + } + /*printf("%d %d %d\n", hf_sum, *hf_average, *tapset_decision);*/ + celt_assert(nbBands>0); /*M*(eBands[end]-eBands[end-1]) <= 8 assures this*/ + sum /= nbBands; + /* Recursive averaging */ + sum = (sum+*average)>>1; + *average = sum; + /* Hysteresis */ + sum = (3*sum + (((3-last_decision)<<7) + 64) + 2)>>2; + if (sum < 80) + { + decision = SPREAD_AGGRESSIVE; + } else if (sum < 256) + { + decision = SPREAD_NORMAL; + } else if (sum < 384) + { + decision = SPREAD_LIGHT; + } else { + decision = SPREAD_NONE; + } +#ifdef FUZZING + decision = rand()&0x3; + *tapset_decision=rand()%3; +#endif + return decision; +} + +#ifdef MEASURE_NORM_MSE + +float MSE[30] = {0}; +int nbMSEBands = 0; +int MSECount[30] = {0}; + +void dump_norm_mse(void) +{ + int i; + for (i=0;i<nbMSEBands;i++) + { + printf ("%g ", MSE[i]/MSECount[i]); + } + printf ("\n"); +} + +void measure_norm_mse(const CELTMode *m, float *X, float *X0, float *bandE, float *bandE0, int M, int N, int C) +{ + static int init = 0; + int i; + if (!init) + { + atexit(dump_norm_mse); + init = 1; + } + for (i=0;i<m->nbEBands;i++) + { + int j; + int c; + float g; + if (bandE0[i]<10 || (C==2 && bandE0[i+m->nbEBands]<1)) + continue; + c=0; do { + g = bandE[i+c*m->nbEBands]/(1e-15+bandE0[i+c*m->nbEBands]); + for (j=M*m->eBands[i];j<M*m->eBands[i+1];j++) + MSE[i] += (g*X[j+c*N]-X0[j+c*N])*(g*X[j+c*N]-X0[j+c*N]); + } while (++c<C); + MSECount[i]+=C; + } + nbMSEBands = m->nbEBands; +} + +#endif + +/* Indexing table for converting from natural Hadamard to ordery Hadamard + This is essentially a bit-reversed Gray, on top of which we've added + an inversion of the order because we want the DC at the end rather than + the beginning. The lines are for N=2, 4, 8, 16 */ +static const int ordery_table[] = { + 1, 0, + 3, 0, 2, 1, + 7, 0, 4, 3, 6, 1, 5, 2, + 15, 0, 8, 7, 12, 3, 11, 4, 14, 1, 9, 6, 13, 2, 10, 5, +}; + +static void deinterleave_hadamard(celt_norm *X, int N0, int stride, int hadamard) +{ + int i,j; + VARDECL(celt_norm, tmp); + int N; + SAVE_STACK; + N = N0*stride; + ALLOC(tmp, N, celt_norm); + celt_assert(stride>0); + if (hadamard) + { + const int *ordery = ordery_table+stride-2; + for (i=0;i<stride;i++) + { + for (j=0;j<N0;j++) + tmp[ordery[i]*N0+j] = X[j*stride+i]; + } + } else { + for (i=0;i<stride;i++) + for (j=0;j<N0;j++) + tmp[i*N0+j] = X[j*stride+i]; + } + for (j=0;j<N;j++) + X[j] = tmp[j]; + RESTORE_STACK; +} + +static void interleave_hadamard(celt_norm *X, int N0, int stride, int hadamard) +{ + int i,j; + VARDECL(celt_norm, tmp); + int N; + SAVE_STACK; + N = N0*stride; + ALLOC(tmp, N, celt_norm); + if (hadamard) + { + const int *ordery = ordery_table+stride-2; + for (i=0;i<stride;i++) + for (j=0;j<N0;j++) + tmp[j*stride+i] = X[ordery[i]*N0+j]; + } else { + for (i=0;i<stride;i++) + for (j=0;j<N0;j++) + tmp[j*stride+i] = X[i*N0+j]; + } + for (j=0;j<N;j++) + X[j] = tmp[j]; + RESTORE_STACK; +} + +void haar1(celt_norm *X, int N0, int stride) +{ + int i, j; + N0 >>= 1; + for (i=0;i<stride;i++) + for (j=0;j<N0;j++) + { + celt_norm tmp1, tmp2; + tmp1 = MULT16_16_Q15(QCONST16(.70710678f,15), X[stride*2*j+i]); + tmp2 = MULT16_16_Q15(QCONST16(.70710678f,15), X[stride*(2*j+1)+i]); + X[stride*2*j+i] = tmp1 + tmp2; + X[stride*(2*j+1)+i] = tmp1 - tmp2; + } +} + +static int compute_qn(int N, int b, int offset, int pulse_cap, int stereo) +{ + static const opus_int16 exp2_table8[8] = + {16384, 17866, 19483, 21247, 23170, 25267, 27554, 30048}; + int qn, qb; + int N2 = 2*N-1; + if (stereo && N==2) + N2--; + /* The upper limit ensures that in a stereo split with itheta==16384, we'll + always have enough bits left over to code at least one pulse in the + side; otherwise it would collapse, since it doesn't get folded. */ + qb = IMIN(b-pulse_cap-(4<<BITRES), (b+N2*offset)/N2); + + qb = IMIN(8<<BITRES, qb); + + if (qb<(1<<BITRES>>1)) { + qn = 1; + } else { + qn = exp2_table8[qb&0x7]>>(14-(qb>>BITRES)); + qn = (qn+1)>>1<<1; + } + celt_assert(qn <= 256); + return qn; +} + +/* This function is responsible for encoding and decoding a band for both + the mono and stereo case. Even in the mono case, it can split the band + in two and transmit the energy difference with the two half-bands. It + can be called recursively so bands can end up being split in 8 parts. */ +static unsigned quant_band(int encode, const CELTMode *m, int i, celt_norm *X, celt_norm *Y, + int N, int b, int spread, int B, int intensity, int tf_change, celt_norm *lowband, ec_ctx *ec, + opus_int32 *remaining_bits, int LM, celt_norm *lowband_out, const celt_ener *bandE, int level, + opus_uint32 *seed, opus_val16 gain, celt_norm *lowband_scratch, int fill) +{ + const unsigned char *cache; + int q; + int curr_bits; + int stereo, split; + int imid=0, iside=0; + int N0=N; + int N_B=N; + int N_B0; + int B0=B; + int time_divide=0; + int recombine=0; + int inv = 0; + opus_val16 mid=0, side=0; + int longBlocks; + unsigned cm=0; +#ifdef RESYNTH + int resynth = 1; +#else + int resynth = !encode; +#endif + + longBlocks = B0==1; + + N_B /= B; + N_B0 = N_B; + + split = stereo = Y != NULL; + + /* Special case for one sample */ + if (N==1) + { + int c; + celt_norm *x = X; + c=0; do { + int sign=0; + if (*remaining_bits>=1<<BITRES) + { + if (encode) + { + sign = x[0]<0; + ec_enc_bits(ec, sign, 1); + } else { + sign = ec_dec_bits(ec, 1); + } + *remaining_bits -= 1<<BITRES; + b-=1<<BITRES; + } + if (resynth) + x[0] = sign ? -NORM_SCALING : NORM_SCALING; + x = Y; + } while (++c<1+stereo); + if (lowband_out) + lowband_out[0] = SHR16(X[0],4); + return 1; + } + + if (!stereo && level == 0) + { + int k; + if (tf_change>0) + recombine = tf_change; + /* Band recombining to increase frequency resolution */ + + if (lowband && (recombine || ((N_B&1) == 0 && tf_change<0) || B0>1)) + { + int j; + for (j=0;j<N;j++) + lowband_scratch[j] = lowband[j]; + lowband = lowband_scratch; + } + + for (k=0;k<recombine;k++) + { + static const unsigned char bit_interleave_table[16]={ + 0,1,1,1,2,3,3,3,2,3,3,3,2,3,3,3 + }; + if (encode) + haar1(X, N>>k, 1<<k); + if (lowband) + haar1(lowband, N>>k, 1<<k); + fill = bit_interleave_table[fill&0xF]|bit_interleave_table[fill>>4]<<2; + } + B>>=recombine; + N_B<<=recombine; + + /* Increasing the time resolution */ + while ((N_B&1) == 0 && tf_change<0) + { + if (encode) + haar1(X, N_B, B); + if (lowband) + haar1(lowband, N_B, B); + fill |= fill<<B; + B <<= 1; + N_B >>= 1; + time_divide++; + tf_change++; + } + B0=B; + N_B0 = N_B; + + /* Reorganize the samples in time order instead of frequency order */ + if (B0>1) + { + if (encode) + deinterleave_hadamard(X, N_B>>recombine, B0<<recombine, longBlocks); + if (lowband) + deinterleave_hadamard(lowband, N_B>>recombine, B0<<recombine, longBlocks); + } + } + + /* If we need 1.5 more bit than we can produce, split the band in two. */ + cache = m->cache.bits + m->cache.index[(LM+1)*m->nbEBands+i]; + if (!stereo && LM != -1 && b > cache[cache[0]]+12 && N>2) + { + N >>= 1; + Y = X+N; + split = 1; + LM -= 1; + if (B==1) + fill = (fill&1)|(fill<<1); + B = (B+1)>>1; + } + + if (split) + { + int qn; + int itheta=0; + int mbits, sbits, delta; + int qalloc; + int pulse_cap; + int offset; + int orig_fill; + opus_int32 tell; + + /* Decide on the resolution to give to the split parameter theta */ + pulse_cap = m->logN[i]+LM*(1<<BITRES); + offset = (pulse_cap>>1) - (stereo&&N==2 ? QTHETA_OFFSET_TWOPHASE : QTHETA_OFFSET); + qn = compute_qn(N, b, offset, pulse_cap, stereo); + if (stereo && i>=intensity) + qn = 1; + if (encode) + { + /* theta is the atan() of the ratio between the (normalized) + side and mid. With just that parameter, we can re-scale both + mid and side because we know that 1) they have unit norm and + 2) they are orthogonal. */ + itheta = stereo_itheta(X, Y, stereo, N); + } + tell = ec_tell_frac(ec); + if (qn!=1) + { + if (encode) + itheta = (itheta*qn+8192)>>14; + + /* Entropy coding of the angle. We use a uniform pdf for the + time split, a step for stereo, and a triangular one for the rest. */ + if (stereo && N>2) + { + int p0 = 3; + int x = itheta; + int x0 = qn/2; + int ft = p0*(x0+1) + x0; + /* Use a probability of p0 up to itheta=8192 and then use 1 after */ + if (encode) + { + ec_encode(ec,x<=x0?p0*x:(x-1-x0)+(x0+1)*p0,x<=x0?p0*(x+1):(x-x0)+(x0+1)*p0,ft); + } else { + int fs; + fs=ec_decode(ec,ft); + if (fs<(x0+1)*p0) + x=fs/p0; + else + x=x0+1+(fs-(x0+1)*p0); + ec_dec_update(ec,x<=x0?p0*x:(x-1-x0)+(x0+1)*p0,x<=x0?p0*(x+1):(x-x0)+(x0+1)*p0,ft); + itheta = x; + } + } else if (B0>1 || stereo) { + /* Uniform pdf */ + if (encode) + ec_enc_uint(ec, itheta, qn+1); + else + itheta = ec_dec_uint(ec, qn+1); + } else { + int fs=1, ft; + ft = ((qn>>1)+1)*((qn>>1)+1); + if (encode) + { + int fl; + + fs = itheta <= (qn>>1) ? itheta + 1 : qn + 1 - itheta; + fl = itheta <= (qn>>1) ? itheta*(itheta + 1)>>1 : + ft - ((qn + 1 - itheta)*(qn + 2 - itheta)>>1); + + ec_encode(ec, fl, fl+fs, ft); + } else { + /* Triangular pdf */ + int fl=0; + int fm; + fm = ec_decode(ec, ft); + + if (fm < ((qn>>1)*((qn>>1) + 1)>>1)) + { + itheta = (isqrt32(8*(opus_uint32)fm + 1) - 1)>>1; + fs = itheta + 1; + fl = itheta*(itheta + 1)>>1; + } + else + { + itheta = (2*(qn + 1) + - isqrt32(8*(opus_uint32)(ft - fm - 1) + 1))>>1; + fs = qn + 1 - itheta; + fl = ft - ((qn + 1 - itheta)*(qn + 2 - itheta)>>1); + } + + ec_dec_update(ec, fl, fl+fs, ft); + } + } + itheta = (opus_int32)itheta*16384/qn; + if (encode && stereo) + { + if (itheta==0) + intensity_stereo(m, X, Y, bandE, i, N); + else + stereo_split(X, Y, N); + } + /* NOTE: Renormalising X and Y *may* help fixed-point a bit at very high rate. + Let's do that at higher complexity */ + } else if (stereo) { + if (encode) + { + inv = itheta > 8192; + if (inv) + { + int j; + for (j=0;j<N;j++) + Y[j] = -Y[j]; + } + intensity_stereo(m, X, Y, bandE, i, N); + } + if (b>2<<BITRES && *remaining_bits > 2<<BITRES) + { + if (encode) + ec_enc_bit_logp(ec, inv, 2); + else + inv = ec_dec_bit_logp(ec, 2); + } else + inv = 0; + itheta = 0; + } + qalloc = ec_tell_frac(ec) - tell; + b -= qalloc; + + orig_fill = fill; + if (itheta == 0) + { + imid = 32767; + iside = 0; + fill &= (1<<B)-1; + delta = -16384; + } else if (itheta == 16384) + { + imid = 0; + iside = 32767; + fill &= ((1<<B)-1)<<B; + delta = 16384; + } else { + imid = bitexact_cos(itheta); + iside = bitexact_cos(16384-itheta); + /* This is the mid vs side allocation that minimizes squared error + in that band. */ + delta = FRAC_MUL16((N-1)<<7,bitexact_log2tan(iside,imid)); + } + +#ifdef FIXED_POINT + mid = imid; + side = iside; +#else + mid = (1.f/32768)*imid; + side = (1.f/32768)*iside; +#endif + + /* This is a special case for N=2 that only works for stereo and takes + advantage of the fact that mid and side are orthogonal to encode + the side with just one bit. */ + if (N==2 && stereo) + { + int c; + int sign=0; + celt_norm *x2, *y2; + mbits = b; + sbits = 0; + /* Only need one bit for the side */ + if (itheta != 0 && itheta != 16384) + sbits = 1<<BITRES; + mbits -= sbits; + c = itheta > 8192; + *remaining_bits -= qalloc+sbits; + + x2 = c ? Y : X; + y2 = c ? X : Y; + if (sbits) + { + if (encode) + { + /* Here we only need to encode a sign for the side */ + sign = x2[0]*y2[1] - x2[1]*y2[0] < 0; + ec_enc_bits(ec, sign, 1); + } else { + sign = ec_dec_bits(ec, 1); + } + } + sign = 1-2*sign; + /* We use orig_fill here because we want to fold the side, but if + itheta==16384, we'll have cleared the low bits of fill. */ + cm = quant_band(encode, m, i, x2, NULL, N, mbits, spread, B, intensity, tf_change, lowband, ec, remaining_bits, LM, lowband_out, NULL, level, seed, gain, lowband_scratch, orig_fill); + /* We don't split N=2 bands, so cm is either 1 or 0 (for a fold-collapse), + and there's no need to worry about mixing with the other channel. */ + y2[0] = -sign*x2[1]; + y2[1] = sign*x2[0]; + if (resynth) + { + celt_norm tmp; + X[0] = MULT16_16_Q15(mid, X[0]); + X[1] = MULT16_16_Q15(mid, X[1]); + Y[0] = MULT16_16_Q15(side, Y[0]); + Y[1] = MULT16_16_Q15(side, Y[1]); + tmp = X[0]; + X[0] = SUB16(tmp,Y[0]); + Y[0] = ADD16(tmp,Y[0]); + tmp = X[1]; + X[1] = SUB16(tmp,Y[1]); + Y[1] = ADD16(tmp,Y[1]); + } + } else { + /* "Normal" split code */ + celt_norm *next_lowband2=NULL; + celt_norm *next_lowband_out1=NULL; + int next_level=0; + opus_int32 rebalance; + + /* Give more bits to low-energy MDCTs than they would otherwise deserve */ + if (B0>1 && !stereo && (itheta&0x3fff)) + { + if (itheta > 8192) + /* Rough approximation for pre-echo masking */ + delta -= delta>>(4-LM); + else + /* Corresponds to a forward-masking slope of 1.5 dB per 10 ms */ + delta = IMIN(0, delta + (N<<BITRES>>(5-LM))); + } + mbits = IMAX(0, IMIN(b, (b-delta)/2)); + sbits = b-mbits; + *remaining_bits -= qalloc; + + if (lowband && !stereo) + next_lowband2 = lowband+N; /* >32-bit split case */ + + /* Only stereo needs to pass on lowband_out. Otherwise, it's + handled at the end */ + if (stereo) + next_lowband_out1 = lowband_out; + else + next_level = level+1; + + rebalance = *remaining_bits; + if (mbits >= sbits) + { + /* In stereo mode, we do not apply a scaling to the mid because we need the normalized + mid for folding later */ + cm = quant_band(encode, m, i, X, NULL, N, mbits, spread, B, intensity, tf_change, + lowband, ec, remaining_bits, LM, next_lowband_out1, + NULL, next_level, seed, stereo ? Q15ONE : MULT16_16_P15(gain,mid), lowband_scratch, fill); + rebalance = mbits - (rebalance-*remaining_bits); + if (rebalance > 3<<BITRES && itheta!=0) + sbits += rebalance - (3<<BITRES); + + /* For a stereo split, the high bits of fill are always zero, so no + folding will be done to the side. */ + cm |= quant_band(encode, m, i, Y, NULL, N, sbits, spread, B, intensity, tf_change, + next_lowband2, ec, remaining_bits, LM, NULL, + NULL, next_level, seed, MULT16_16_P15(gain,side), NULL, fill>>B)<<((B0>>1)&(stereo-1)); + } else { + /* For a stereo split, the high bits of fill are always zero, so no + folding will be done to the side. */ + cm = quant_band(encode, m, i, Y, NULL, N, sbits, spread, B, intensity, tf_change, + next_lowband2, ec, remaining_bits, LM, NULL, + NULL, next_level, seed, MULT16_16_P15(gain,side), NULL, fill>>B)<<((B0>>1)&(stereo-1)); + rebalance = sbits - (rebalance-*remaining_bits); + if (rebalance > 3<<BITRES && itheta!=16384) + mbits += rebalance - (3<<BITRES); + /* In stereo mode, we do not apply a scaling to the mid because we need the normalized + mid for folding later */ + cm |= quant_band(encode, m, i, X, NULL, N, mbits, spread, B, intensity, tf_change, + lowband, ec, remaining_bits, LM, next_lowband_out1, + NULL, next_level, seed, stereo ? Q15ONE : MULT16_16_P15(gain,mid), lowband_scratch, fill); + } + } + + } else { + /* This is the basic no-split case */ + q = bits2pulses(m, i, LM, b); + curr_bits = pulses2bits(m, i, LM, q); + *remaining_bits -= curr_bits; + + /* Ensures we can never bust the budget */ + while (*remaining_bits < 0 && q > 0) + { + *remaining_bits += curr_bits; + q--; + curr_bits = pulses2bits(m, i, LM, q); + *remaining_bits -= curr_bits; + } + + if (q!=0) + { + int K = get_pulses(q); + + /* Finally do the actual quantization */ + if (encode) + { + cm = alg_quant(X, N, K, spread, B, ec +#ifdef RESYNTH + , gain +#endif + ); + } else { + cm = alg_unquant(X, N, K, spread, B, ec, gain); + } + } else { + /* If there's no pulse, fill the band anyway */ + int j; + if (resynth) + { + unsigned cm_mask; + /*B can be as large as 16, so this shift might overflow an int on a + 16-bit platform; use a long to get defined behavior.*/ + cm_mask = (unsigned)(1UL<<B)-1; + fill &= cm_mask; + if (!fill) + { + for (j=0;j<N;j++) + X[j] = 0; + } else { + if (lowband == NULL) + { + /* Noise */ + for (j=0;j<N;j++) + { + *seed = celt_lcg_rand(*seed); + X[j] = (celt_norm)((opus_int32)*seed>>20); + } + cm = cm_mask; + } else { + /* Folded spectrum */ + for (j=0;j<N;j++) + { + opus_val16 tmp; + *seed = celt_lcg_rand(*seed); + /* About 48 dB below the "normal" folding level */ + tmp = QCONST16(1.0f/256, 10); + tmp = (*seed)&0x8000 ? tmp : -tmp; + X[j] = lowband[j]+tmp; + } + cm = fill; + } + renormalise_vector(X, N, gain); + } + } + } + } + + /* This code is used by the decoder and by the resynthesis-enabled encoder */ + if (resynth) + { + if (stereo) + { + if (N!=2) + stereo_merge(X, Y, mid, N); + if (inv) + { + int j; + for (j=0;j<N;j++) + Y[j] = -Y[j]; + } + } else if (level == 0) + { + int k; + + /* Undo the sample reorganization going from time order to frequency order */ + if (B0>1) + interleave_hadamard(X, N_B>>recombine, B0<<recombine, longBlocks); + + /* Undo time-freq changes that we did earlier */ + N_B = N_B0; + B = B0; + for (k=0;k<time_divide;k++) + { + B >>= 1; + N_B <<= 1; + cm |= cm>>B; + haar1(X, N_B, B); + } + + for (k=0;k<recombine;k++) + { + static const unsigned char bit_deinterleave_table[16]={ + 0x00,0x03,0x0C,0x0F,0x30,0x33,0x3C,0x3F, + 0xC0,0xC3,0xCC,0xCF,0xF0,0xF3,0xFC,0xFF + }; + cm = bit_deinterleave_table[cm]; + haar1(X, N0>>k, 1<<k); + } + B<<=recombine; + + /* Scale output for later folding */ + if (lowband_out) + { + int j; + opus_val16 n; + n = celt_sqrt(SHL32(EXTEND32(N0),22)); + for (j=0;j<N0;j++) + lowband_out[j] = MULT16_16_Q15(n,X[j]); + } + cm &= (1<<B)-1; + } + } + return cm; +} + +void quant_all_bands(int encode, const CELTMode *m, int start, int end, + celt_norm *X_, celt_norm *Y_, unsigned char *collapse_masks, const celt_ener *bandE, int *pulses, + int shortBlocks, int spread, int dual_stereo, int intensity, int *tf_res, + opus_int32 total_bits, opus_int32 balance, ec_ctx *ec, int LM, int codedBands, opus_uint32 *seed) +{ + int i; + opus_int32 remaining_bits; + const opus_int16 * restrict eBands = m->eBands; + celt_norm * restrict norm, * restrict norm2; + VARDECL(celt_norm, _norm); + VARDECL(celt_norm, lowband_scratch); + int B; + int M; + int lowband_offset; + int update_lowband = 1; + int C = Y_ != NULL ? 2 : 1; +#ifdef RESYNTH + int resynth = 1; +#else + int resynth = !encode; +#endif + SAVE_STACK; + + M = 1<<LM; + B = shortBlocks ? M : 1; + ALLOC(_norm, C*M*eBands[m->nbEBands], celt_norm); + ALLOC(lowband_scratch, M*(eBands[m->nbEBands]-eBands[m->nbEBands-1]), celt_norm); + norm = _norm; + norm2 = norm + M*eBands[m->nbEBands]; + + lowband_offset = 0; + for (i=start;i<end;i++) + { + opus_int32 tell; + int b; + int N; + opus_int32 curr_balance; + int effective_lowband=-1; + celt_norm * restrict X, * restrict Y; + int tf_change=0; + unsigned x_cm; + unsigned y_cm; + + X = X_+M*eBands[i]; + if (Y_!=NULL) + Y = Y_+M*eBands[i]; + else + Y = NULL; + N = M*eBands[i+1]-M*eBands[i]; + tell = ec_tell_frac(ec); + + /* Compute how many bits we want to allocate to this band */ + if (i != start) + balance -= tell; + remaining_bits = total_bits-tell-1; + if (i <= codedBands-1) + { + curr_balance = balance / IMIN(3, codedBands-i); + b = IMAX(0, IMIN(16383, IMIN(remaining_bits+1,pulses[i]+curr_balance))); + } else { + b = 0; + } + + if (resynth && M*eBands[i]-N >= M*eBands[start] && (update_lowband || lowband_offset==0)) + lowband_offset = i; + + tf_change = tf_res[i]; + if (i>=m->effEBands) + { + X=norm; + if (Y_!=NULL) + Y = norm; + } + + /* Get a conservative estimate of the collapse_mask's for the bands we're + going to be folding from. */ + if (lowband_offset != 0 && (spread!=SPREAD_AGGRESSIVE || B>1 || tf_change<0)) + { + int fold_start; + int fold_end; + int fold_i; + /* This ensures we never repeat spectral content within one band */ + effective_lowband = IMAX(M*eBands[start], M*eBands[lowband_offset]-N); + fold_start = lowband_offset; + while(M*eBands[--fold_start] > effective_lowband); + fold_end = lowband_offset-1; + while(M*eBands[++fold_end] < effective_lowband+N); + x_cm = y_cm = 0; + fold_i = fold_start; do { + x_cm |= collapse_masks[fold_i*C+0]; + y_cm |= collapse_masks[fold_i*C+C-1]; + } while (++fold_i<fold_end); + } + /* Otherwise, we'll be using the LCG to fold, so all blocks will (almost + always) be non-zero.*/ + else + x_cm = y_cm = (1<<B)-1; + + if (dual_stereo && i==intensity) + { + int j; + + /* Switch off dual stereo to do intensity */ + dual_stereo = 0; + for (j=M*eBands[start];j<M*eBands[i];j++) + norm[j] = HALF32(norm[j]+norm2[j]); + } + if (dual_stereo) + { + x_cm = quant_band(encode, m, i, X, NULL, N, b/2, spread, B, intensity, tf_change, + effective_lowband != -1 ? norm+effective_lowband : NULL, ec, &remaining_bits, LM, + norm+M*eBands[i], bandE, 0, seed, Q15ONE, lowband_scratch, x_cm); + y_cm = quant_band(encode, m, i, Y, NULL, N, b/2, spread, B, intensity, tf_change, + effective_lowband != -1 ? norm2+effective_lowband : NULL, ec, &remaining_bits, LM, + norm2+M*eBands[i], bandE, 0, seed, Q15ONE, lowband_scratch, y_cm); + } else { + x_cm = quant_band(encode, m, i, X, Y, N, b, spread, B, intensity, tf_change, + effective_lowband != -1 ? norm+effective_lowband : NULL, ec, &remaining_bits, LM, + norm+M*eBands[i], bandE, 0, seed, Q15ONE, lowband_scratch, x_cm|y_cm); + y_cm = x_cm; + } + collapse_masks[i*C+0] = (unsigned char)x_cm; + collapse_masks[i*C+C-1] = (unsigned char)y_cm; + balance += pulses[i] + tell; + + /* Update the folding position only as long as we have 1 bit/sample depth */ + update_lowband = b>(N<<BITRES); + } + RESTORE_STACK; +} +
new file mode 100644 --- /dev/null +++ b/media/libopus/celt/bands.h @@ -0,0 +1,95 @@ +/* Copyright (c) 2007-2008 CSIRO + Copyright (c) 2007-2009 Xiph.Org Foundation + Copyright (c) 2008-2009 Gregory Maxwell + Written by Jean-Marc Valin and Gregory Maxwell */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef BANDS_H +#define BANDS_H + +#include "arch.h" +#include "modes.h" +#include "entenc.h" +#include "entdec.h" +#include "rate.h" + +/** Compute the amplitude (sqrt energy) in each of the bands + * @param m Mode data + * @param X Spectrum + * @param bands Square root of the energy for each band (returned) + */ +void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int M); + +/*void compute_noise_energies(const CELTMode *m, const celt_sig *X, const opus_val16 *tonality, celt_ener *bandE);*/ + +/** Normalise each band of X such that the energy in each band is + equal to 1 + * @param m Mode data + * @param X Spectrum (returned normalised) + * @param bands Square root of the energy for each band + */ +void normalise_bands(const CELTMode *m, const celt_sig * restrict freq, celt_norm * restrict X, const celt_ener *bandE, int end, int C, int M); + +/** Denormalise each band of X to restore full amplitude + * @param m Mode data + * @param X Spectrum (returned de-normalised) + * @param bands Square root of the energy for each band + */ +void denormalise_bands(const CELTMode *m, const celt_norm * restrict X, celt_sig * restrict freq, const celt_ener *bandE, int end, int C, int M); + +#define SPREAD_NONE (0) +#define SPREAD_LIGHT (1) +#define SPREAD_NORMAL (2) +#define SPREAD_AGGRESSIVE (3) + +int spreading_decision(const CELTMode *m, celt_norm *X, int *average, + int last_decision, int *hf_average, int *tapset_decision, int update_hf, + int end, int C, int M); + +#ifdef MEASURE_NORM_MSE +void measure_norm_mse(const CELTMode *m, float *X, float *X0, float *bandE, float *bandE0, int M, int N, int C); +#endif + +void haar1(celt_norm *X, int N0, int stride); + +/** Quantisation/encoding of the residual spectrum + * @param m Mode data + * @param X Residual (normalised) + * @param total_bits Total number of bits that can be used for the frame (including the ones already spent) + * @param enc Entropy encoder + */ +void quant_all_bands(int encode, const CELTMode *m, int start, int end, + celt_norm * X, celt_norm * Y, unsigned char *collapse_masks, const celt_ener *bandE, int *pulses, + int time_domain, int fold, int dual_stereo, int intensity, int *tf_res, + opus_int32 total_bits, opus_int32 balance, ec_ctx *ec, int M, int codedBands, opus_uint32 *seed); + +void anti_collapse(const CELTMode *m, celt_norm *X_, unsigned char *collapse_masks, int LM, int C, int CC, int size, + int start, int end, opus_val16 *logE, opus_val16 *prev1logE, + opus_val16 *prev2logE, int *pulses, opus_uint32 seed); + +opus_uint32 celt_lcg_rand(opus_uint32 seed); + +#endif /* BANDS_H */
new file mode 100644 --- /dev/null +++ b/media/libopus/celt/celt.c @@ -0,0 +1,2847 @@ +/* Copyright (c) 2007-2008 CSIRO + Copyright (c) 2007-2010 Xiph.Org Foundation + Copyright (c) 2008 Gregory Maxwell + Written by Jean-Marc Valin and Gregory Maxwell */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#define CELT_C + +#include "os_support.h" +#include "mdct.h" +#include <math.h> +#include "celt.h" +#include "pitch.h" +#include "bands.h" +#include "modes.h" +#include "entcode.h" +#include "quant_bands.h" +#include "rate.h" +#include "stack_alloc.h" +#include "mathops.h" +#include "float_cast.h" +#include <stdarg.h> +#include "celt_lpc.h" +#include "vq.h" + +#ifndef OPUS_VERSION +#define OPUS_VERSION "unknown" +#endif + +#ifdef CUSTOM_MODES +#define OPUS_CUSTOM_NOSTATIC +#else +#define OPUS_CUSTOM_NOSTATIC static inline +#endif + +static const unsigned char trim_icdf[11] = {126, 124, 119, 109, 87, 41, 19, 9, 4, 2, 0}; +/* Probs: NONE: 21.875%, LIGHT: 6.25%, NORMAL: 65.625%, AGGRESSIVE: 6.25% */ +static const unsigned char spread_icdf[4] = {25, 23, 2, 0}; + +static const unsigned char tapset_icdf[3]={2,1,0}; + +#ifdef CUSTOM_MODES +static const unsigned char toOpusTable[20] = { + 0xE0, 0xE8, 0xF0, 0xF8, + 0xC0, 0xC8, 0xD0, 0xD8, + 0xA0, 0xA8, 0xB0, 0xB8, + 0x00, 0x00, 0x00, 0x00, + 0x80, 0x88, 0x90, 0x98, +}; + +static const unsigned char fromOpusTable[16] = { + 0x80, 0x88, 0x90, 0x98, + 0x40, 0x48, 0x50, 0x58, + 0x20, 0x28, 0x30, 0x38, + 0x00, 0x08, 0x10, 0x18 +}; + +static inline int toOpus(unsigned char c) +{ + int ret=0; + if (c<0xA0) + ret = toOpusTable[c>>3]; + if (ret == 0) + return -1; + else + return ret|(c&0x7); +} + +static inline int fromOpus(unsigned char c) +{ + if (c<0x80) + return -1; + else + return fromOpusTable[(c>>3)-16] | (c&0x7); +} +#endif /* CUSTOM_MODES */ + +#define COMBFILTER_MAXPERIOD 1024 +#define COMBFILTER_MINPERIOD 15 + +static int resampling_factor(opus_int32 rate) +{ + int ret; + switch (rate) + { + case 48000: + ret = 1; + break; + case 24000: + ret = 2; + break; + case 16000: + ret = 3; + break; + case 12000: + ret = 4; + break; + case 8000: + ret = 6; + break; + default: +#ifndef CUSTOM_MODES + celt_assert(0); +#endif + ret = 0; + break; + } + return ret; +} + +/** Encoder state + @brief Encoder state + */ +struct OpusCustomEncoder { + const OpusCustomMode *mode; /**< Mode used by the encoder */ + int overlap; + int channels; + int stream_channels; + + int force_intra; + int clip; + int disable_pf; + int complexity; + int upsample; + int start, end; + + opus_int32 bitrate; + int vbr; + int signalling; + int constrained_vbr; /* If zero, VBR can do whatever it likes with the rate */ + int loss_rate; + + /* Everything beyond this point gets cleared on a reset */ +#define ENCODER_RESET_START rng + + opus_uint32 rng; + int spread_decision; + opus_val32 delayedIntra; + int tonal_average; + int lastCodedBands; + int hf_average; + int tapset_decision; + + int prefilter_period; + opus_val16 prefilter_gain; + int prefilter_tapset; +#ifdef RESYNTH + int prefilter_period_old; + opus_val16 prefilter_gain_old; + int prefilter_tapset_old; +#endif + int consec_transient; + + opus_val32 preemph_memE[2]; + opus_val32 preemph_memD[2]; + + /* VBR-related parameters */ + opus_int32 vbr_reservoir; + opus_int32 vbr_drift; + opus_int32 vbr_offset; + opus_int32 vbr_count; + +#ifdef RESYNTH + celt_sig syn_mem[2][2*MAX_PERIOD]; +#endif + + celt_sig in_mem[1]; /* Size = channels*mode->overlap */ + /* celt_sig prefilter_mem[], Size = channels*COMBFILTER_PERIOD */ + /* celt_sig overlap_mem[], Size = channels*mode->overlap */ + /* opus_val16 oldEBands[], Size = 2*channels*mode->nbEBands */ +}; + +int celt_encoder_get_size(int channels) +{ + CELTMode *mode = opus_custom_mode_create(48000, 960, NULL); + return opus_custom_encoder_get_size(mode, channels); +} + +OPUS_CUSTOM_NOSTATIC int opus_custom_encoder_get_size(const CELTMode *mode, int channels) +{ + int size = sizeof(struct CELTEncoder) + + (2*channels*mode->overlap-1)*sizeof(celt_sig) + + channels*COMBFILTER_MAXPERIOD*sizeof(celt_sig) + + 3*channels*mode->nbEBands*sizeof(opus_val16); + return size; +} + +#ifdef CUSTOM_MODES +CELTEncoder *opus_custom_encoder_create(const CELTMode *mode, int channels, int *error) +{ + int ret; + CELTEncoder *st = (CELTEncoder *)opus_alloc(opus_custom_encoder_get_size(mode, channels)); + /* init will handle the NULL case */ + ret = opus_custom_encoder_init(st, mode, channels); + if (ret != OPUS_OK) + { + opus_custom_encoder_destroy(st); + st = NULL; + } + if (error) + *error = ret; + return st; +} +#endif /* CUSTOM_MODES */ + +int celt_encoder_init(CELTEncoder *st, opus_int32 sampling_rate, int channels) +{ + int ret; + ret = opus_custom_encoder_init(st, opus_custom_mode_create(48000, 960, NULL), channels); + if (ret != OPUS_OK) + return ret; + st->upsample = resampling_factor(sampling_rate); + return OPUS_OK; +} + +OPUS_CUSTOM_NOSTATIC int opus_custom_encoder_init(CELTEncoder *st, const CELTMode *mode, int channels) +{ + if (channels < 0 || channels > 2) + return OPUS_BAD_ARG; + + if (st==NULL || mode==NULL) + return OPUS_ALLOC_FAIL; + + OPUS_CLEAR((char*)st, opus_custom_encoder_get_size(mode, channels)); + + st->mode = mode; + st->overlap = mode->overlap; + st->stream_channels = st->channels = channels; + + st->upsample = 1; + st->start = 0; + st->end = st->mode->effEBands; + st->signalling = 1; + + st->constrained_vbr = 1; + st->clip = 1; + + st->bitrate = OPUS_BITRATE_MAX; + st->vbr = 0; + st->force_intra = 0; + st->complexity = 5; + + opus_custom_encoder_ctl(st, OPUS_RESET_STATE); + + return OPUS_OK; +} + +#ifdef CUSTOM_MODES +void opus_custom_encoder_destroy(CELTEncoder *st) +{ + opus_free(st); +} +#endif /* CUSTOM_MODES */ + +static inline opus_val16 SIG2WORD16(celt_sig x) +{ +#ifdef FIXED_POINT + x = PSHR32(x, SIG_SHIFT); + x = MAX32(x, -32768); + x = MIN32(x, 32767); + return EXTRACT16(x); +#else + return (opus_val16)x; +#endif +} + +static int transient_analysis(const opus_val32 * restrict in, int len, int C, + int overlap) +{ + int i; + VARDECL(opus_val16, tmp); + opus_val32 mem0=0,mem1=0; + int is_transient = 0; + int block; + int N; + VARDECL(opus_val16, bins); + SAVE_STACK; + ALLOC(tmp, len, opus_val16); + + block = overlap/2; + N=len/block; + ALLOC(bins, N, opus_val16); + if (C==1) + { + for (i=0;i<len;i++) + tmp[i] = SHR32(in[i],SIG_SHIFT); + } else { + for (i=0;i<len;i++) + tmp[i] = SHR32(ADD32(in[i],in[i+len]), SIG_SHIFT+1); + } + + /* High-pass filter: (1 - 2*z^-1 + z^-2) / (1 - z^-1 + .5*z^-2) */ + for (i=0;i<len;i++) + { + opus_val32 x,y; + x = tmp[i]; + y = ADD32(mem0, x); +#ifdef FIXED_POINT + mem0 = mem1 + y - SHL32(x,1); + mem1 = x - SHR32(y,1); +#else + mem0 = mem1 + y - 2*x; + mem1 = x - .5f*y; +#endif + tmp[i] = EXTRACT16(SHR(y,2)); + } + /* First few samples are bad because we don't propagate the memory */ + for (i=0;i<12;i++) + tmp[i] = 0; + + for (i=0;i<N;i++) + { + int j; + opus_val16 max_abs=0; + for (j=0;j<block;j++) + max_abs = MAX16(max_abs, ABS16(tmp[i*block+j])); + bins[i] = max_abs; + } + for (i=0;i<N;i++) + { + int j; + int conseq=0; + opus_val16 t1, t2, t3; + + t1 = MULT16_16_Q15(QCONST16(.15f, 15), bins[i]); + t2 = MULT16_16_Q15(QCONST16(.4f, 15), bins[i]); + t3 = MULT16_16_Q15(QCONST16(.15f, 15), bins[i]); + for (j=0;j<i;j++) + { + if (bins[j] < t1) + conseq++; + if (bins[j] < t2) + conseq++; + else + conseq = 0; + } + if (conseq>=3) + is_transient=1; + conseq = 0; + for (j=i+1;j<N;j++) + { + if (bins[j] < t3) + conseq++; + else + conseq = 0; + } + if (conseq>=7) + is_transient=1; + } + RESTORE_STACK; +#ifdef FUZZING + is_transient = rand()&0x1; +#endif + return is_transient; +} + +/** Apply window and compute the MDCT for all sub-frames and + all channels in a frame */ +static void compute_mdcts(const CELTMode *mode, int shortBlocks, celt_sig * restrict in, celt_sig * restrict out, int C, int LM) +{ + if (C==1 && !shortBlocks) + { + const int overlap = OVERLAP(mode); + clt_mdct_forward(&mode->mdct, in, out, mode->window, overlap, mode->maxLM-LM, 1); + } else { + const int overlap = OVERLAP(mode); + int N = mode->shortMdctSize<<LM; + int B = 1; + int b, c; + if (shortBlocks) + { + N = mode->shortMdctSize; + B = shortBlocks; + } + c=0; do { + for (b=0;b<B;b++) + { + /* Interleaving the sub-frames while doing the MDCTs */ + clt_mdct_forward(&mode->mdct, in+c*(B*N+overlap)+b*N, &out[b+c*N*B], mode->window, overlap, shortBlocks ? mode->maxLM : mode->maxLM-LM, B); + } + } while (++c<C); + } +} + +/** Compute the IMDCT and apply window for all sub-frames and + all channels in a frame */ +static void compute_inv_mdcts(const CELTMode *mode, int shortBlocks, celt_sig *X, + celt_sig * restrict out_mem[], + celt_sig * restrict overlap_mem[], int C, int LM) +{ + int c; + const int N = mode->shortMdctSize<<LM; + const int overlap = OVERLAP(mode); + VARDECL(opus_val32, x); + SAVE_STACK; + + ALLOC(x, N+overlap, opus_val32); + c=0; do { + int j; + int b; + int N2 = N; + int B = 1; + + if (shortBlocks) + { + N2 = mode->shortMdctSize; + B = shortBlocks; + } + /* Prevents problems from the imdct doing the overlap-add */ + OPUS_CLEAR(x, overlap); + + for (b=0;b<B;b++) + { + /* IMDCT on the interleaved the sub-frames */ + clt_mdct_backward(&mode->mdct, &X[b+c*N2*B], x+N2*b, mode->window, overlap, shortBlocks ? mode->maxLM : mode->maxLM-LM, B); + } + + for (j=0;j<overlap;j++) + out_mem[c][j] = x[j] + overlap_mem[c][j]; + for (;j<N;j++) + out_mem[c][j] = x[j]; + for (j=0;j<overlap;j++) + overlap_mem[c][j] = x[N+j]; + } while (++c<C); + RESTORE_STACK; +} + +static void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, const opus_val16 *coef, celt_sig *mem) +{ + int c; + int count=0; + c=0; do { + int j; + celt_sig * restrict x; + opus_val16 * restrict y; + celt_sig m = mem[c]; + x =in[c]; + y = pcm+c; + for (j=0;j<N;j++) + { + celt_sig tmp = *x + m; + m = MULT16_32_Q15(coef[0], tmp) + - MULT16_32_Q15(coef[1], *x); + tmp = SHL32(MULT16_32_Q15(coef[3], tmp), 2); + x++; + /* Technically the store could be moved outside of the if because + the stores we don't want will just be overwritten */ + if (count==0) + *y = SCALEOUT(SIG2WORD16(tmp)); + if (++count==downsample) + { + y+=C; + count=0; + } + } + mem[c] = m; + } while (++c<C); +} + +static void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N, + opus_val16 g0, opus_val16 g1, int tapset0, int tapset1, + const opus_val16 *window, int overlap) +{ + int i; + /* printf ("%d %d %f %f\n", T0, T1, g0, g1); */ + opus_val16 g00, g01, g02, g10, g11, g12; + static const opus_val16 gains[3][3] = { + {QCONST16(0.3066406250f, 15), QCONST16(0.2170410156f, 15), QCONST16(0.1296386719f, 15)}, + {QCONST16(0.4638671875f, 15), QCONST16(0.2680664062f, 15), QCONST16(0.f, 15)}, + {QCONST16(0.7998046875f, 15), QCONST16(0.1000976562f, 15), QCONST16(0.f, 15)}}; + g00 = MULT16_16_Q15(g0, gains[tapset0][0]); + g01 = MULT16_16_Q15(g0, gains[tapset0][1]); + g02 = MULT16_16_Q15(g0, gains[tapset0][2]); + g10 = MULT16_16_Q15(g1, gains[tapset1][0]); + g11 = MULT16_16_Q15(g1, gains[tapset1][1]); + g12 = MULT16_16_Q15(g1, gains[tapset1][2]); + for (i=0;i<overlap;i++) + { + opus_val16 f; + f = MULT16_16_Q15(window[i],window[i]); + y[i] = x[i] + + MULT16_32_Q15(MULT16_16_Q15((Q15ONE-f),g00),x[i-T0]) + + MULT16_32_Q15(MULT16_16_Q15((Q15ONE-f),g01),x[i-T0-1]) + + MULT16_32_Q15(MULT16_16_Q15((Q15ONE-f),g01),x[i-T0+1]) + + MULT16_32_Q15(MULT16_16_Q15((Q15ONE-f),g02),x[i-T0-2]) + + MULT16_32_Q15(MULT16_16_Q15((Q15ONE-f),g02),x[i-T0+2]) + + MULT16_32_Q15(MULT16_16_Q15(f,g10),x[i-T1]) + + MULT16_32_Q15(MULT16_16_Q15(f,g11),x[i-T1-1]) + + MULT16_32_Q15(MULT16_16_Q15(f,g11),x[i-T1+1]) + + MULT16_32_Q15(MULT16_16_Q15(f,g12),x[i-T1-2]) + + MULT16_32_Q15(MULT16_16_Q15(f,g12),x[i-T1+2]); + + } + for (i=overlap;i<N;i++) + y[i] = x[i] + + MULT16_32_Q15(g10,x[i-T1]) + + MULT16_32_Q15(g11,x[i-T1-1]) + + MULT16_32_Q15(g11,x[i-T1+1]) + + MULT16_32_Q15(g12,x[i-T1-2]) + + MULT16_32_Q15(g12,x[i-T1+2]); +} + +static const signed char tf_select_table[4][8] = { + {0, -1, 0, -1, 0,-1, 0,-1}, + {0, -1, 0, -2, 1, 0, 1,-1}, + {0, -2, 0, -3, 2, 0, 1,-1}, + {0, -2, 0, -3, 3, 0, 1,-1}, +}; + +static opus_val32 l1_metric(const celt_norm *tmp, int N, int LM, int width) +{ + int i, j; + static const opus_val16 sqrtM_1[4] = {Q15ONE, QCONST16(.70710678f,15), QCONST16(0.5f,15), QCONST16(0.35355339f,15)}; + opus_val32 L1; + opus_val16 bias; + L1=0; + for (i=0;i<1<<LM;i++) + { + opus_val32 L2 = 0; + for (j=0;j<N>>LM;j++) + L2 = MAC16_16(L2, tmp[(j<<LM)+i], tmp[(j<<LM)+i]); + L1 += celt_sqrt(L2); + } + L1 = MULT16_32_Q15(sqrtM_1[LM], L1); + if (width==1) + bias = QCONST16(.12f,15)*LM; + else if (width==2) + bias = QCONST16(.05f,15)*LM; + else + bias = QCONST16(.02f,15)*LM; + L1 = MAC16_32_Q15(L1, bias, L1); + return L1; +} + +static int tf_analysis(const CELTMode *m, int len, int C, int isTransient, + int *tf_res, int nbCompressedBytes, celt_norm *X, int N0, int LM, + int *tf_sum) +{ + int i; + VARDECL(int, metric); + int cost0; + int cost1; + VARDECL(int, path0); + VARDECL(int, path1); + VARDECL(celt_norm, tmp); + int lambda; + int tf_select=0; + SAVE_STACK; + + if (nbCompressedBytes<15*C) + { + *tf_sum = 0; + for (i=0;i<len;i++) + tf_res[i] = isTransient; + return 0; + } + if (nbCompressedBytes<40) + lambda = 12; + else if (nbCompressedBytes<60) + lambda = 6; + else if (nbCompressedBytes<100) + lambda = 4; + else + lambda = 3; + + ALLOC(metric, len, int); + ALLOC(tmp, (m->eBands[len]-m->eBands[len-1])<<LM, celt_norm); + ALLOC(path0, len, int); + ALLOC(path1, len, int); + + *tf_sum = 0; + for (i=0;i<len;i++) + { + int j, k, N; + opus_val32 L1, best_L1; + int best_level=0; + N = (m->eBands[i+1]-m->eBands[i])<<LM; + for (j=0;j<N;j++) + tmp[j] = X[j+(m->eBands[i]<<LM)]; + /* Just add the right channel if we're in stereo */ + if (C==2) + for (j=0;j<N;j++) + tmp[j] = ADD16(tmp[j],X[N0+j+(m->eBands[i]<<LM)]); + L1 = l1_metric(tmp, N, isTransient ? LM : 0, N>>LM); + best_L1 = L1; + /*printf ("%f ", L1);*/ + for (k=0;k<LM;k++) + { + int B; + + if (isTransient) + B = (LM-k-1); + else + B = k+1; + + if (isTransient) + haar1(tmp, N>>(LM-k), 1<<(LM-k)); + else + haar1(tmp, N>>k, 1<<k); + + L1 = l1_metric(tmp, N, B, N>>LM); + + if (L1 < best_L1) + { + best_L1 = L1; + best_level = k+1; + } + } + /*printf ("%d ", isTransient ? LM-best_level : best_level);*/ + if (isTransient) + metric[i] = best_level; + else + metric[i] = -best_level; + *tf_sum += metric[i]; + } + /*printf("\n");*/ + /* NOTE: Future optimized implementations could detect extreme transients and set + tf_select = 1 but so far we have not found a reliable way of making this useful */ + tf_select = 0; + + cost0 = 0; + cost1 = isTransient ? 0 : lambda; + /* Viterbi forward pass */ + for (i=1;i<len;i++) + { + int curr0, curr1; + int from0, from1; + + from0 = cost0; + from1 = cost1 + lambda; + if (from0 < from1) + { + curr0 = from0; + path0[i]= 0; + } else { + curr0 = from1; + path0[i]= 1; + } + + from0 = cost0 + lambda; + from1 = cost1; + if (from0 < from1) + { + curr1 = from0; + path1[i]= 0; + } else { + curr1 = from1; + path1[i]= 1; + } + cost0 = curr0 + abs(metric[i]-tf_select_table[LM][4*isTransient+2*tf_select+0]); + cost1 = curr1 + abs(metric[i]-tf_select_table[LM][4*isTransient+2*tf_select+1]); + } + tf_res[len-1] = cost0 < cost1 ? 0 : 1; + /* Viterbi backward pass to check the decisions */ + for (i=len-2;i>=0;i--) + { + if (tf_res[i+1] == 1) + tf_res[i] = path1[i+1]; + else + tf_res[i] = path0[i+1]; + } + RESTORE_STACK; +#ifdef FUZZING + tf_select = rand()&0x1; + tf_res[0] = rand()&0x1; + for (i=1;i<len;i++) + tf_res[i] = tf_res[i-1] ^ ((rand()&0xF) == 0); +#endif + return tf_select; +} + +static void tf_encode(int start, int end, int isTransient, int *tf_res, int LM, int tf_select, ec_enc *enc) +{ + int curr, i; + int tf_select_rsv; + int tf_changed; + int logp; + opus_uint32 budget; + opus_uint32 tell; + budget = enc->storage*8; + tell = ec_tell(enc); + logp = isTransient ? 2 : 4; + /* Reserve space to code the tf_select decision. */ + tf_select_rsv = LM>0 && tell+logp+1 <= budget; + budget -= tf_select_rsv; + curr = tf_changed = 0; + for (i=start;i<end;i++) + { + if (tell+logp<=budget) + { + ec_enc_bit_logp(enc, tf_res[i] ^ curr, logp); + tell = ec_tell(enc); + curr = tf_res[i]; + tf_changed |= curr; + } + else + tf_res[i] = curr; + logp = isTransient ? 4 : 5; + } + /* Only code tf_select if it would actually make a difference. */ + if (tf_select_rsv && + tf_select_table[LM][4*isTransient+0+tf_changed]!= + tf_select_table[LM][4*isTransient+2+tf_changed]) + ec_enc_bit_logp(enc, tf_select, 1); + else + tf_select = 0; + for (i=start;i<end;i++) + tf_res[i] = tf_select_table[LM][4*isTransient+2*tf_select+tf_res[i]]; + /*printf("%d %d ", isTransient, tf_select); for(i=0;i<end;i++)printf("%d ", tf_res[i]);printf("\n");*/ +} + +static void tf_decode(int start, int end, int isTransient, int *tf_res, int LM, ec_dec *dec) +{ + int i, curr, tf_select; + int tf_select_rsv; + int tf_changed; + int logp; + opus_uint32 budget; + opus_uint32 tell; + + budget = dec->storage*8; + tell = ec_tell(dec); + logp = isTransient ? 2 : 4; + tf_select_rsv = LM>0 && tell+logp+1<=budget; + budget -= tf_select_rsv; + tf_changed = curr = 0; + for (i=start;i<end;i++) + { + if (tell+logp<=budget) + { + curr ^= ec_dec_bit_logp(dec, logp); + tell = ec_tell(dec); + tf_changed |= curr; + } + tf_res[i] = curr; + logp = isTransient ? 4 : 5; + } + tf_select = 0; + if (tf_select_rsv && + tf_select_table[LM][4*isTransient+0+tf_changed] != + tf_select_table[LM][4*isTransient+2+tf_changed]) + { + tf_select = ec_dec_bit_logp(dec, 1); + } + for (i=start;i<end;i++) + { + tf_res[i] = tf_select_table[LM][4*isTransient+2*tf_select+tf_res[i]]; + } +} + +static void init_caps(const CELTMode *m,int *cap,int LM,int C) +{ + int i; + for (i=0;i<m->nbEBands;i++) + { + int N; + N=(m->eBands[i+1]-m->eBands[i])<<LM; + cap[i] = (m->cache.caps[m->nbEBands*(2*LM+C-1)+i]+64)*C*N>>2; + } +} + +static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X, + const opus_val16 *bandLogE, int end, int LM, int C, int N0) +{ + int i; + opus_val32 diff=0; + int c; + int trim_index = 5; + if (C==2) + { + opus_val16 sum = 0; /* Q10 */ + /* Compute inter-channel correlation for low frequencies */ + for (i=0;i<8;i++) + { + int j; + opus_val32 partial = 0; + for (j=m->eBands[i]<<LM;j<m->eBands[i+1]<<LM;j++) + partial = MAC16_16(partial, X[j], X[N0+j]); + sum = ADD16(sum, EXTRACT16(SHR32(partial, 18))); + } + sum = MULT16_16_Q15(QCONST16(1.f/8, 15), sum); + /*printf ("%f\n", sum);*/ + if (sum > QCONST16(.995f,10)) + trim_index-=4; + else if (sum > QCONST16(.92f,10)) + trim_index-=3; + else if (sum > QCONST16(.85f,10)) + trim_index-=2; + else if (sum > QCONST16(.8f,10)) + trim_index-=1; + } + + /* Estimate spectral tilt */ + c=0; do { + for (i=0;i<end-1;i++) + { + diff += bandLogE[i+c*m->nbEBands]*(opus_int32)(2+2*i-m->nbEBands); + } + } while (++c<C); + /* We divide by two here to avoid making the tilt larger for stereo as a + result of a bug in the loop above */ + diff /= 2*C*(end-1); + /*printf("%f\n", diff);*/ + if (diff > QCONST16(2.f, DB_SHIFT)) + trim_index--; + if (diff > QCONST16(8.f, DB_SHIFT)) + trim_index--; + if (diff < -QCONST16(4.f, DB_SHIFT)) + trim_index++; + if (diff < -QCONST16(10.f, DB_SHIFT)) + trim_index++; + + if (trim_index<0) + trim_index = 0; + if (trim_index>10) + trim_index = 10; +#ifdef FUZZING + trim_index = rand()%11; +#endif + return trim_index; +} + +static int stereo_analysis(const CELTMode *m, const celt_norm *X, + int LM, int N0) +{ + int i; + int thetas; + opus_val32 sumLR = EPSILON, sumMS = EPSILON; + + /* Use the L1 norm to model the entropy of the L/R signal vs the M/S signal */ + for (i=0;i<13;i++) + { + int j; + for (j=m->eBands[i]<<LM;j<m->eBands[i+1]<<LM;j++) + { + opus_val16 L, R, M, S; + L = X[j]; + R = X[N0+j]; + M = L+R; + S = L-R; + sumLR += EXTEND32(ABS16(L)) + EXTEND32(ABS16(R)); + sumMS += EXTEND32(ABS16(M)) + EXTEND32(ABS16(S)); + } + } + sumMS = MULT16_32_Q15(QCONST16(0.707107f, 15), sumMS); + thetas = 13; + /* We don't need thetas for lower bands with LM<=1 */ + if (LM<=1) + thetas -= 8; + return MULT16_32_Q15((m->eBands[13]<<(LM+1))+thetas, sumMS) + > MULT16_32_Q15(m->eBands[13]<<(LM+1), sumLR); +} + +int celt_encode_with_ec(CELTEncoder * restrict st, const opus_val16 * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes, ec_enc *enc) +{ + int i, c, N; + opus_int32 bits; + ec_enc _enc; + VARDECL(celt_sig, in); + VARDECL(celt_sig, freq); + VARDECL(celt_norm, X); + VARDECL(celt_ener, bandE); + VARDECL(opus_val16, bandLogE); + VARDECL(int, fine_quant); + VARDECL(opus_val16, error); + VARDECL(int, pulses); + VARDECL(int, cap); + VARDECL(int, offsets); + VARDECL(int, fine_priority); + VARDECL(int, tf_res); + VARDECL(unsigned char, collapse_masks); + celt_sig *prefilter_mem; + opus_val16 *oldBandE, *oldLogE, *oldLogE2; + int shortBlocks=0; + int isTransient=0; + const int CC = st->channels; + const int C = st->stream_channels; + int LM, M; + int tf_select; + int nbFilledBytes, nbAvailableBytes; + int effEnd; + int codedBands; + int tf_sum; + int alloc_trim; + int pitch_index=COMBFILTER_MINPERIOD; + opus_val16 gain1 = 0; + int intensity=0; + int dual_stereo=0; + int effectiveBytes; + opus_val16 pf_threshold; + int dynalloc_logp; + opus_int32 vbr_rate; + opus_int32 total_bits; + opus_int32 total_boost; + opus_int32 balance; + opus_int32 tell; + int prefilter_tapset=0; + int pf_on; + int anti_collapse_rsv; + int anti_collapse_on=0; + int silence=0; + ALLOC_STACK; + + if (nbCompressedBytes<2 || pcm==NULL) + return OPUS_BAD_ARG; + + frame_size *= st->upsample; + for (LM=0;LM<=st->mode->maxLM;LM++) + if (st->mode->shortMdctSize<<LM==frame_size) + break; + if (LM>st->mode->maxLM) + return OPUS_BAD_ARG; + M=1<<LM; + N = M*st->mode->shortMdctSize; + + prefilter_mem = st->in_mem+CC*(st->overlap); + oldBandE = (opus_val16*)(st->in_mem+CC*(2*st->overlap+COMBFILTER_MAXPERIOD)); + oldLogE = oldBandE + CC*st->mode->nbEBands; + oldLogE2 = oldLogE + CC*st->mode->nbEBands; + + if (enc==NULL) + { + tell=1; + nbFilledBytes=0; + } else { + tell=ec_tell(enc); + nbFilledBytes=(tell+4)>>3; + } + +#ifdef CUSTOM_MODES + if (st->signalling && enc==NULL) + { + int tmp = (st->mode->effEBands-st->end)>>1; + st->end = IMAX(1, st->mode->effEBands-tmp); + compressed[0] = tmp<<5; + compressed[0] |= LM<<3; + compressed[0] |= (C==2)<<2; + /* Convert "standard mode" to Opus header */ + if (st->mode->Fs==48000 && st->mode->shortMdctSize==120) + { + int c0 = toOpus(compressed[0]); + if (c0<0) + return OPUS_BAD_ARG; + compressed[0] = c0; + } + compressed++; + nbCompressedBytes--; + } +#else + celt_assert(st->signalling==0); +#endif + + /* Can't produce more than 1275 output bytes */ + nbCompressedBytes = IMIN(nbCompressedBytes,1275); + nbAvailableBytes = nbCompressedBytes - nbFilledBytes; + + if (st->vbr && st->bitrate!=OPUS_BITRATE_MAX) + { + opus_int32 den=st->mode->Fs>>BITRES; + vbr_rate=(st->bitrate*frame_size+(den>>1))/den; +#ifdef CUSTOM_MODES + if (st->signalling) + vbr_rate -= 8<<BITRES; +#endif + effectiveBytes = vbr_rate>>(3+BITRES); + } else { + opus_int32 tmp; + vbr_rate = 0; + tmp = st->bitrate*frame_size; + if (tell>1) + tmp += tell; + if (st->bitrate!=OPUS_BITRATE_MAX) + nbCompressedBytes = IMAX(2, IMIN(nbCompressedBytes, + (tmp+4*st->mode->Fs)/(8*st->mode->Fs)-!!st->signalling)); + effectiveBytes = nbCompressedBytes; + } + + if (enc==NULL) + { + ec_enc_init(&_enc, compressed, nbCompressedBytes); + enc = &_enc; + } + + if (vbr_rate>0) + { + /* Computes the max bit-rate allowed in VBR mode to avoid violating the + target rate and buffering. + We must do this up front so that bust-prevention logic triggers + correctly if we don't have enough bits. */ + if (st->constrained_vbr) + { + opus_int32 vbr_bound; + opus_int32 max_allowed; + /* We could use any multiple of vbr_rate as bound (depending on the + delay). + This is clamped to ensure we use at least two bytes if the encoder + was entirely empty, but to allow 0 in hybrid mode. */ + vbr_bound = vbr_rate; + max_allowed = IMIN(IMAX(tell==1?2:0, + (vbr_rate+vbr_bound-st->vbr_reservoir)>>(BITRES+3)), + nbAvailableBytes); + if(max_allowed < nbAvailableBytes) + { + nbCompressedBytes = nbFilledBytes+max_allowed; + nbAvailableBytes = max_allowed; + ec_enc_shrink(enc, nbCompressedBytes); + } + } + } + total_bits = nbCompressedBytes*8; + + effEnd = st->end; + if (effEnd > st->mode->effEBands) + effEnd = st->mode->effEBands; + + ALLOC(in, CC*(N+st->overlap), celt_sig); + + /* Find pitch period and gain */ + { + VARDECL(celt_sig, _pre); + celt_sig *pre[2]; + SAVE_STACK; + ALLOC(_pre, CC*(N+COMBFILTER_MAXPERIOD), celt_sig); + + pre[0] = _pre; + pre[1] = _pre + (N+COMBFILTER_MAXPERIOD); + + silence = 1; + c=0; do { + int count = 0; + const opus_val16 * restrict pcmp = pcm+c; + celt_sig * restrict inp = in+c*(N+st->overlap)+st->overlap; + + for (i=0;i<N;i++) + { + celt_sig x, tmp; + + x = SCALEIN(*pcmp); +#ifndef FIXED_POINT + if (!(x==x)) + x = 0; + if (st->clip) + x = MAX32(-65536.f, MIN32(65536.f,x)); +#endif + if (++count==st->upsample) + { + count=0; + pcmp+=CC; + } else { + x = 0; + } + /* Apply pre-emphasis */ + tmp = MULT16_16(st->mode->preemph[2], x); + *inp = tmp + st->preemph_memE[c]; + st->preemph_memE[c] = MULT16_32_Q15(st->mode->preemph[1], *inp) + - MULT16_32_Q15(st->mode->preemph[0], tmp); + silence = silence && *inp == 0; + inp++; + } + OPUS_COPY(pre[c], prefilter_mem+c*COMBFILTER_MAXPERIOD, COMBFILTER_MAXPERIOD); + OPUS_COPY(pre[c]+COMBFILTER_MAXPERIOD, in+c*(N+st->overlap)+st->overlap, N); + } while (++c<CC); + +#ifdef FUZZING + if ((rand()&0x3F)==0) + silence = 1; +#endif + if (tell==1) + ec_enc_bit_logp(enc, silence, 15); + else + silence=0; + if (silence) + { + /*In VBR mode there is no need to send more than the minimum. */ + if (vbr_rate>0) + { + effectiveBytes=nbCompressedBytes=IMIN(nbCompressedBytes, nbFilledBytes+2); + total_bits=nbCompressedBytes*8; + nbAvailableBytes=2; + ec_enc_shrink(enc, nbCompressedBytes); + } + /* Pretend we've filled all the remaining bits with zeros + (that's what the initialiser did anyway) */ + tell = nbCompressedBytes*8; + enc->nbits_total+=tell-ec_tell(enc); + } + if (nbAvailableBytes>12*C && st->start==0 && !silence && !st->disable_pf && st->complexity >= 5) + { + VARDECL(opus_val16, pitch_buf); + ALLOC(pitch_buf, (COMBFILTER_MAXPERIOD+N)>>1, opus_val16); + + pitch_downsample(pre, pitch_buf, COMBFILTER_MAXPERIOD+N, CC); + pitch_search(pitch_buf+(COMBFILTER_MAXPERIOD>>1), pitch_buf, N, + COMBFILTER_MAXPERIOD-COMBFILTER_MINPERIOD, &pitch_index); + pitch_index = COMBFILTER_MAXPERIOD-pitch_index; + + gain1 = remove_doubling(pitch_buf, COMBFILTER_MAXPERIOD, COMBFILTER_MINPERIOD, + N, &pitch_index, st->prefilter_period, st->prefilter_gain); + if (pitch_index > COMBFILTER_MAXPERIOD-2) + pitch_index = COMBFILTER_MAXPERIOD-2; + gain1 = MULT16_16_Q15(QCONST16(.7f,15),gain1); + if (st->loss_rate>2) + gain1 = HALF32(gain1); + if (st->loss_rate>4) + gain1 = HALF32(gain1); + if (st->loss_rate>8) + gain1 = 0; + prefilter_tapset = st->tapset_decision; + } else { + gain1 = 0; + } + + /* Gain threshold for enabling the prefilter/postfilter */ + pf_threshold = QCONST16(.2f,15); + + /* Adjusting the threshold based on rate and continuity */ + if (abs(pitch_index-st->prefilter_period)*10>pitch_index) + pf_threshold += QCONST16(.2f,15); + if (nbAvailableBytes<25) + pf_threshold += QCONST16(.1f,15); + if (nbAvailableBytes<35) + pf_threshold += QCONST16(.1f,15); + if (st->prefilter_gain > QCONST16(.4f,15)) + pf_threshold -= QCONST16(.1f,15); + if (st->prefilter_gain > QCONST16(.55f,15)) + pf_threshold -= QCONST16(.1f,15); + + /* Hard threshold at 0.2 */ + pf_threshold = MAX16(pf_threshold, QCONST16(.2f,15)); + if (gain1<pf_threshold) + { + if(st->start==0 && tell+16<=total_bits) + ec_enc_bit_logp(enc, 0, 1); + gain1 = 0; + pf_on = 0; + } else { + /*This block is not gated by a total bits check only because + of the nbAvailableBytes check above.*/ + int qg; + int octave; + + if (ABS16(gain1-st->prefilter_gain)<QCONST16(.1f,15)) + gain1=st->prefilter_gain; + +#ifdef FIXED_POINT + qg = ((gain1+1536)>>10)/3-1; +#else + qg = (int)floor(.5f+gain1*32/3)-1; +#endif + qg = IMAX(0, IMIN(7, qg)); + ec_enc_bit_logp(enc, 1, 1); + pitch_index += 1; + octave = EC_ILOG(pitch_index)-5; + ec_enc_uint(enc, octave, 6); + ec_enc_bits(enc, pitch_index-(16<<octave), 4+octave); + pitch_index -= 1; + ec_enc_bits(enc, qg, 3); + if (ec_tell(enc)+2<=total_bits) + ec_enc_icdf(enc, prefilter_tapset, tapset_icdf, 2); + else + prefilter_tapset = 0; + gain1 = QCONST16(0.09375f,15)*(qg+1); + pf_on = 1; + } + /*printf("%d %f\n", pitch_index, gain1);*/ + + c=0; do { + int offset = st->mode->shortMdctSize-st->mode->overlap; + st->prefilter_period=IMAX(st->prefilter_period, COMBFILTER_MINPERIOD); + OPUS_COPY(in+c*(N+st->overlap), st->in_mem+c*(st->overlap), st->overlap); + if (offset) + comb_filter(in+c*(N+st->overlap)+st->overlap, pre[c]+COMBFILTER_MAXPERIOD, + st->prefilter_period, st->prefilter_period, offset, -st->prefilter_gain, -st->prefilter_gain, + st->prefilter_tapset, st->prefilter_tapset, NULL, 0); + + comb_filter(in+c*(N+st->overlap)+st->overlap+offset, pre[c]+COMBFILTER_MAXPERIOD+offset, + st->prefilter_period, pitch_index, N-offset, -st->prefilter_gain, -gain1, + st->prefilter_tapset, prefilter_tapset, st->mode->window, st->mode->overlap); + OPUS_COPY(st->in_mem+c*(st->overlap), in+c*(N+st->overlap)+N, st->overlap); + + if (N>COMBFILTER_MAXPERIOD) + { + OPUS_MOVE(prefilter_mem+c*COMBFILTER_MAXPERIOD, pre[c]+N, COMBFILTER_MAXPERIOD); + } else { + OPUS_MOVE(prefilter_mem+c*COMBFILTER_MAXPERIOD, prefilter_mem+c*COMBFILTER_MAXPERIOD+N, COMBFILTER_MAXPERIOD-N); + OPUS_MOVE(prefilter_mem+c*COMBFILTER_MAXPERIOD+COMBFILTER_MAXPERIOD-N, pre[c]+COMBFILTER_MAXPERIOD, N); + } + } while (++c<CC); + + RESTORE_STACK; + } + + isTransient = 0; + shortBlocks = 0; + if (LM>0 && ec_tell(enc)+3<=total_bits) + { + if (st->complexity > 1) + { + isTransient = transient_analysis(in, N+st->overlap, CC, + st->overlap); + if (isTransient) + shortBlocks = M; + } + ec_enc_bit_logp(enc, isTransient, 3); + } + + ALLOC(freq, CC*N, celt_sig); /**< Interleaved signal MDCTs */ + ALLOC(bandE,st->mode->nbEBands*CC, celt_ener); + ALLOC(bandLogE,st->mode->nbEBands*CC, opus_val16); + /* Compute MDCTs */ + compute_mdcts(st->mode, shortBlocks, in, freq, CC, LM); + + if (CC==2&&C==1) + { + for (i=0;i<N;i++) + freq[i] = ADD32(HALF32(freq[i]), HALF32(freq[N+i])); + } + if (st->upsample != 1) + { + c=0; do + { + int bound = N/st->upsample; + for (i=0;i<bound;i++) + freq[c*N+i] *= st->upsample; + for (;i<N;i++) + freq[c*N+i] = 0; + } while (++c<C); + } + ALLOC(X, C*N, celt_norm); /**< Interleaved normalised MDCTs */ + + compute_band_energies(st->mode, freq, bandE, effEnd, C, M); + + amp2Log2(st->mode, effEnd, st->end, bandE, bandLogE, C); + + /* Band normalisation */ + normalise_bands(st->mode, freq, X, bandE, effEnd, C, M); + + ALLOC(tf_res, st->mode->nbEBands, int); + tf_select = tf_analysis(st->mode, effEnd, C, isTransient, tf_res, effectiveBytes, X, N, LM, &tf_sum); + for (i=effEnd;i<st->end;i++) + tf_res[i] = tf_res[effEnd-1]; + + ALLOC(error, C*st->mode->nbEBands, opus_val16); + quant_coarse_energy(st->mode, st->start, st->end, effEnd, bandLogE, + oldBandE, total_bits, error, enc, + C, LM, nbAvailableBytes, st->force_intra, + &st->delayedIntra, st->complexity >= 4, st->loss_rate); + + tf_encode(st->start, st->end, isTransient, tf_res, LM, tf_select, enc); + + st->spread_decision = SPREAD_NORMAL; + if (ec_tell(enc)+4<=total_bits) + { + if (shortBlocks || st->complexity < 3 || nbAvailableBytes < 10*C) + { + if (st->complexity == 0) + st->spread_decision = SPREAD_NONE; + } else { + st->spread_decision = spreading_decision(st->mode, X, + &st->tonal_average, st->spread_decision, &st->hf_average, + &st->tapset_decision, pf_on&&!shortBlocks, effEnd, C, M); + } + ec_enc_icdf(enc, st->spread_decision, spread_icdf, 5); + } + + ALLOC(cap, st->mode->nbEBands, int); + ALLOC(offsets, st->mode->nbEBands, int); + + init_caps(st->mode,cap,LM,C); + for (i=0;i<st->mode->nbEBands;i++) + offsets[i] = 0; + /* Dynamic allocation code */ + /* Make sure that dynamic allocation can't make us bust the budget */ + if (effectiveBytes > 50 && LM>=1) + { + int t1, t2; + if (LM <= 1) + { + t1 = 3; + t2 = 5; + } else { + t1 = 2; + t2 = 4; + } + for (i=st->start+1;i<st->end-1;i++) + { + opus_val32 d2; + d2 = 2*bandLogE[i]-bandLogE[i-1]-bandLogE[i+1]; + if (C==2) + d2 = HALF32(d2 + 2*bandLogE[i+st->mode->nbEBands]- + bandLogE[i-1+st->mode->nbEBands]-bandLogE[i+1+st->mode->nbEBands]); +#ifdef FUZZING + if((rand()&0xF)==0) + { + offsets[i] += 1; + if((rand()&0x3)==0) + offsets[i] += 1+(rand()&0x3); + } +#else + if (d2 > SHL16(t1,DB_SHIFT)) + offsets[i] += 1; + if (d2 > SHL16(t2,DB_SHIFT)) + offsets[i] += 1; +#endif + } + } + dynalloc_logp = 6; + total_bits<<=BITRES; + total_boost = 0; + tell = ec_tell_frac(enc); + for (i=st->start;i<st->end;i++) + { + int width, quanta; + int dynalloc_loop_logp; + int boost; + int j; + width = C*(st->mode->eBands[i+1]-st->mode->eBands[i])<<LM; + /* quanta is 6 bits, but no more than 1 bit/sample + and no less than 1/8 bit/sample */ + quanta = IMIN(width<<BITRES, IMAX(6<<BITRES, width)); + dynalloc_loop_logp = dynalloc_logp; + boost = 0; + for (j = 0; tell+(dynalloc_loop_logp<<BITRES) < total_bits-total_boost + && boost < cap[i]; j++) + { + int flag; + flag = j<offsets[i]; + ec_enc_bit_logp(enc, flag, dynalloc_loop_logp); + tell = ec_tell_frac(enc); + if (!flag) + break; + boost += quanta; + total_boost += quanta; + dynalloc_loop_logp = 1; + } + /* Making dynalloc more likely */ + if (j) + dynalloc_logp = IMAX(2, dynalloc_logp-1); + offsets[i] = boost; + } + alloc_trim = 5; + if (tell+(6<<BITRES) <= total_bits - total_boost) + { + alloc_trim = alloc_trim_analysis(st->mode, X, bandLogE, + st->end, LM, C, N); + ec_enc_icdf(enc, alloc_trim, trim_icdf, 7); + tell = ec_tell_frac(enc); + } + + /* Variable bitrate */ + if (vbr_rate>0) + { + opus_val16 alpha; + opus_int32 delta; + /* The target rate in 8th bits per frame */ + opus_int32 target; + opus_int32 min_allowed; + int lm_diff = st->mode->maxLM - LM; + + target = vbr_rate + (st->vbr_offset>>lm_diff) - ((40*C+20)<<BITRES); + + /* Shortblocks get a large boost in bitrate, but since they + are uncommon long blocks are not greatly affected */ + if (shortBlocks || tf_sum < -2*(st->end-st->start)) + target = 7*target/4; + else if (tf_sum < -(st->end-st->start)) + target = 3*target/2; + else if (M > 1) + target-=(target+14)/28; + + /* The current offset is removed from the target and the space used + so far is added*/ + target=target+tell; + + /* In VBR mode the frame size must not be reduced so much that it would + result in the encoder running out of bits. + The margin of 2 bytes ensures that none of the bust-prevention logic + in the decoder will have triggered so far. */ + min_allowed = ((tell+total_boost+(1<<(BITRES+3))-1)>>(BITRES+3)) + 2 - nbFilledBytes; + + nbAvailableBytes = (target+(1<<(BITRES+2)))>>(BITRES+3); + nbAvailableBytes = IMAX(min_allowed,nbAvailableBytes); + nbAvailableBytes = IMIN(nbCompressedBytes,nbAvailableBytes+nbFilledBytes) - nbFilledBytes; + + /* By how much did we "miss" the target on that frame */ + delta = target - vbr_rate; + + target=nbAvailableBytes<<(BITRES+3); + + /*If the frame is silent we don't adjust our drift, otherwise + the encoder will shoot to very high rates after hitting a + span of silence, but we do allow the bitres to refill. + This means that we'll undershoot our target in CVBR/VBR modes + on files with lots of silence. */ + if(silence) + { + nbAvailableBytes = 2; + target = 2*8<<BITRES; + delta = 0; + } + + if (st->vbr_count < 970) + { + st->vbr_count++; + alpha = celt_rcp(SHL32(EXTEND32(st->vbr_count+20),16)); + } else + alpha = QCONST16(.001f,15); + /* How many bits have we used in excess of what we're allowed */ + if (st->constrained_vbr) + st->vbr_reservoir += target - vbr_rate; + /*printf ("%d\n", st->vbr_reservoir);*/ + + /* Compute the offset we need to apply in order to reach the target */ + st->vbr_drift += (opus_int32)MULT16_32_Q15(alpha,(delta*(1<<lm_diff))-st->vbr_offset-st->vbr_drift); + st->vbr_offset = -st->vbr_drift; + /*printf ("%d\n", st->vbr_drift);*/ + + if (st->constrained_vbr && st->vbr_reservoir < 0) + { + /* We're under the min value -- increase rate */ + int adjust = (-st->vbr_reservoir)/(8<<BITRES); + /* Unless we're just coding silence */ + nbAvailableBytes += silence?0:adjust; + st->vbr_reservoir = 0; + /*printf ("+%d\n", adjust);*/ + } + nbCompressedBytes = IMIN(nbCompressedBytes,nbAvailableBytes+nbFilledBytes); + /* This moves the raw bits to take into account the new compressed size */ + ec_enc_shrink(enc, nbCompressedBytes); + } + if (C==2) + { + int effectiveRate; + + /* Always use MS for 2.5 ms frames until we can do a better analysis */ + if (LM!=0) + dual_stereo = stereo_analysis(st->mode, X, LM, N); + + /* Account for coarse energy */ + effectiveRate = (8*effectiveBytes - 80)>>LM; + + /* effectiveRate in kb/s */ + effectiveRate = 2*effectiveRate/5; + if (effectiveRate<35) + intensity = 8; + else if (effectiveRate<50) + intensity = 12; + else if (effectiveRate<68) + intensity = 16; + else if (effectiveRate<84) + intensity = 18; + else if (effectiveRate<102) + intensity = 19; + else if (effectiveRate<130) + intensity = 20; + else + intensity = 100; + intensity = IMIN(st->end,IMAX(st->start, intensity)); + } + + /* Bit allocation */ + ALLOC(fine_quant, st->mode->nbEBands, int); + ALLOC(pulses, st->mode->nbEBands, int); + ALLOC(fine_priority, st->mode->nbEBands, int); + + /* bits = packet size - where we are - safety*/ + bits = (((opus_int32)nbCompressedBytes*8)<<BITRES) - ec_tell_frac(enc) - 1; + anti_collapse_rsv = isTransient&&LM>=2&&bits>=((LM+2)<<BITRES) ? (1<<BITRES) : 0; + bits -= anti_collapse_rsv; + codedBands = compute_allocation(st->mode, st->start, st->end, offsets, cap, + alloc_trim, &intensity, &dual_stereo, bits, &balance, pulses, + fine_quant, fine_priority, C, LM, enc, 1, st->lastCodedBands); + st->lastCodedBands = codedBands; + + quant_fine_energy(st->mode, st->start, st->end, oldBandE, error, fine_quant, enc, C); + +#ifdef MEASURE_NORM_MSE + float X0[3000]; + float bandE0[60]; + c=0; do + for (i=0;i<N;i++) + X0[i+c*N] = X[i+c*N]; + while (++c<C); + for (i=0;i<C*st->mode->nbEBands;i++) + bandE0[i] = bandE[i]; +#endif + + /* Residual quantisation */ + ALLOC(collapse_masks, C*st->mode->nbEBands, unsigned char); + quant_all_bands(1, st->mode, st->start, st->end, X, C==2 ? X+N : NULL, collapse_masks, + bandE, pulses, shortBlocks, st->spread_decision, dual_stereo, intensity, tf_res, + nbCompressedBytes*(8<<BITRES)-anti_collapse_rsv, balance, enc, LM, codedBands, &st->rng); + + if (anti_collapse_rsv > 0) + { + anti_collapse_on = st->consec_transient<2; +#ifdef FUZZING + anti_collapse_on = rand()&0x1; +#endif + ec_enc_bits(enc, anti_collapse_on, 1); + } + quant_energy_finalise(st->mode, st->start, st->end, oldBandE, error, fine_quant, fine_priority, nbCompressedBytes*8-ec_tell(enc), enc, C); + + if (silence) + { + for (i=0;i<C*st->mode->nbEBands;i++) + oldBandE[i] = -QCONST16(28.f,DB_SHIFT); + } + +#ifdef RESYNTH + /* Re-synthesis of the coded audio if required */ + { + celt_sig *out_mem[2]; + celt_sig *overlap_mem[2]; + + log2Amp(st->mode, st->start, st->end, bandE, oldBandE, C); + if (silence) + { + for (i=0;i<C*st->mode->nbEBands;i++) + bandE[i] = 0; + } + +#ifdef MEASURE_NORM_MSE + measure_norm_mse(st->mode, X, X0, bandE, bandE0, M, N, C); +#endif + if (anti_collapse_on) + { + anti_collapse(st->mode, X, collapse_masks, LM, C, CC, N, + st->start, st->end, oldBandE, oldLogE, oldLogE2, pulses, st->rng); + } + + /* Synthesis */ + denormalise_bands(st->mode, X, freq, bandE, effEnd, C, M); + + OPUS_MOVE(st->syn_mem[0], st->syn_mem[0]+N, MAX_PERIOD); + if (CC==2) + OPUS_MOVE(st->syn_mem[1], st->syn_mem[1]+N, MAX_PERIOD); + + c=0; do + for (i=0;i<M*st->mode->eBands[st->start];i++) + freq[c*N+i] = 0; + while (++c<C); + c=0; do + for (i=M*st->mode->eBands[st->end];i<N;i++) + freq[c*N+i] = 0; + while (++c<C); + + if (CC==2&&C==1) + { + for (i=0;i<N;i++) + freq[N+i] = freq[i]; + } + + out_mem[0] = st->syn_mem[0]+MAX_PERIOD; + if (CC==2) + out_mem[1] = st->syn_mem[1]+MAX_PERIOD; + + overlap_mem[0] = prefilter_mem+CC*COMBFILTER_MAXPERIOD; + if (CC==2) + overlap_mem[1] = overlap_mem[0] + st->overlap; + + compute_inv_mdcts(st->mode, shortBlocks, freq, out_mem, overlap_mem, CC, LM); + + c=0; do { + st->prefilter_period=IMAX(st->prefilter_period, COMBFILTER_MINPERIOD); + st->prefilter_period_old=IMAX(st->prefilter_period_old, COMBFILTER_MINPERIOD); + comb_filter(out_mem[c], out_mem[c], st->prefilter_period_old, st->prefilter_period, st->mode->shortMdctSize, + st->prefilter_gain_old, st->prefilter_gain, st->prefilter_tapset_old, st->prefilter_tapset, + st->mode->window, st->overlap); + if (LM!=0) + comb_filter(out_mem[c]+st->mode->shortMdctSize, out_mem[c]+st->mode->shortMdctSize, st->prefilter_period, pitch_index, N-st->mode->shortMdctSize, + st->prefilter_gain, gain1, st->prefilter_tapset, prefilter_tapset, + st->mode->window, st->mode->overlap); + } while (++c<CC); + + deemphasis(out_mem, (opus_val16*)pcm, N, CC, st->upsample, st->mode->preemph, st->preemph_memD); + st->prefilter_period_old = st->prefilter_period; + st->prefilter_gain_old = st->prefilter_gain; + st->prefilter_tapset_old = st->prefilter_tapset; + } +#endif + + st->prefilter_period = pitch_index; + st->prefilter_gain = gain1; + st->prefilter_tapset = prefilter_tapset; +#ifdef RESYNTH + if (LM!=0) + { + st->prefilter_period_old = st->prefilter_period; + st->prefilter_gain_old = st->prefilter_gain; + st->prefilter_tapset_old = st->prefilter_tapset; + } +#endif + + if (CC==2&&C==1) { + for (i=0;i<st->mode->nbEBands;i++) + oldBandE[st->mode->nbEBands+i]=oldBandE[i]; + } + + if (!isTransient) + { + for (i=0;i<CC*st->mode->nbEBands;i++) + oldLogE2[i] = oldLogE[i]; + for (i=0;i<CC*st->mode->nbEBands;i++) + oldLogE[i] = oldBandE[i]; + } else { + for (i=0;i<CC*st->mode->nbEBands;i++) + oldLogE[i] = MIN16(oldLogE[i], oldBandE[i]); + } + /* In case start or end were to change */ + c=0; do + { + for (i=0;i<st->start;i++) + { + oldBandE[c*st->mode->nbEBands+i]=0; + oldLogE[c*st->mode->nbEBands+i]=oldLogE2[c*st->mode->nbEBands+i]=-QCONST16(28.f,DB_SHIFT); + } + for (i=st->end;i<st->mode->nbEBands;i++) + { + oldBandE[c*st->mode->nbEBands+i]=0; + oldLogE[c*st->mode->nbEBands+i]=oldLogE2[c*st->mode->nbEBands+i]=-QCONST16(28.f,DB_SHIFT); + } + } while (++c<CC); + + if (isTransient) + st->consec_transient++; + else + st->consec_transient=0; + st->rng = enc->rng; + + /* If there's any room left (can only happen for very high rates), + it's already filled with zeros */ + ec_enc_done(enc); + +#ifdef CUSTOM_MODES + if (st->signalling) + nbCompressedBytes++; +#endif + + RESTORE_STACK; + if (ec_get_error(enc)) + return OPUS_INTERNAL_ERROR; + else + return nbCompressedBytes; +} + + +#ifdef CUSTOM_MODES + +#ifdef FIXED_POINT +int opus_custom_encode(CELTEncoder * restrict st, const opus_int16 * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes) +{ + return celt_encode_with_ec(st, pcm, frame_size, compressed, nbCompressedBytes, NULL); +} + +#ifndef DISABLE_FLOAT_API +int opus_custom_encode_float(CELTEncoder * restrict st, const float * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes) +{ + int j, ret, C, N; + VARDECL(opus_int16, in); + ALLOC_STACK; + + if (pcm==NULL) + return OPUS_BAD_ARG; + + C = st->channels; + N = frame_size; + ALLOC(in, C*N, opus_int16); + + for (j=0;j<C*N;j++) + in[j] = FLOAT2INT16(pcm[j]); + + ret=celt_encode_with_ec(st,in,frame_size,compressed,nbCompressedBytes, NULL); +#ifdef RESYNTH + for (j=0;j<C*N;j++) + ((float*)pcm)[j]=in[j]*(1.f/32768.f); +#endif + RESTORE_STACK; + return ret; +} +#endif /* DISABLE_FLOAT_API */ +#else + +int opus_custom_encode(CELTEncoder * restrict st, const opus_int16 * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes) +{ + int j, ret, C, N; + VARDECL(celt_sig, in); + ALLOC_STACK; + + if (pcm==NULL) + return OPUS_BAD_ARG; + + C=st->channels; + N=frame_size; + ALLOC(in, C*N, celt_sig); + for (j=0;j<C*N;j++) { + in[j] = SCALEOUT(pcm[j]); + } + + ret = celt_encode_with_ec(st,in,frame_size,compressed,nbCompressedBytes, NULL); +#ifdef RESYNTH + for (j=0;j<C*N;j++) + ((opus_int16*)pcm)[j] = FLOAT2INT16(in[j]); +#endif + RESTORE_STACK; + return ret; +} + +int opus_custom_encode_float(CELTEncoder * restrict st, const float * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes) +{ + return celt_encode_with_ec(st, pcm, frame_size, compressed, nbCompressedBytes, NULL); +} + +#endif + +#endif /* CUSTOM_MODES */ + +int opus_custom_encoder_ctl(CELTEncoder * restrict st, int request, ...) +{ + va_list ap; + + va_start(ap, request); + switch (request) + { + case OPUS_SET_COMPLEXITY_REQUEST: + { + int value = va_arg(ap, opus_int32); + if (value<0 || value>10) + goto bad_arg; + st->complexity = value; + } + break; + case CELT_SET_START_BAND_REQUEST: + { + opus_int32 value = va_arg(ap, opus_int32); + if (value<0 || value>=st->mode->nbEBands) + goto bad_arg; + st->start = value; + } + break; + case CELT_SET_END_BAND_REQUEST: + { + opus_int32 value = va_arg(ap, opus_int32); + if (value<1 || value>st->mode->nbEBands) + goto bad_arg; + st->end = value; + } + break; + case CELT_SET_PREDICTION_REQUEST: + { + int value = va_arg(ap, opus_int32); + if (value<0 || value>2) + goto bad_arg; + st->disable_pf = value<=1; + st->force_intra = value==0; + } + break; + case OPUS_SET_PACKET_LOSS_PERC_REQUEST: + { + int value = va_arg(ap, opus_int32); + if (value<0 || value>100) + goto bad_arg; + st->loss_rate = value; + } + break; + case OPUS_SET_VBR_CONSTRAINT_REQUEST: + { + opus_int32 value = va_arg(ap, opus_int32); + st->constrained_vbr = value; + } + break; + case OPUS_SET_VBR_REQUEST: + { + opus_int32 value = va_arg(ap, opus_int32); + st->vbr = value; + } + break; + case OPUS_SET_BITRATE_REQUEST: + { + opus_int32 value = va_arg(ap, opus_int32); + if (value<=500 && value!=OPUS_BITRATE_MAX) + goto bad_arg; + value = IMIN(value, 260000*st->channels); + st->bitrate = value; + } + break; + case CELT_SET_CHANNELS_REQUEST: + { + opus_int32 value = va_arg(ap, opus_int32); + if (value<1 || value>2) + goto bad_arg; + st->stream_channels = value; + } + break; + case OPUS_RESET_STATE: + { + int i; + opus_val16 *oldBandE, *oldLogE, *oldLogE2; + oldBandE = (opus_val16*)(st->in_mem+st->channels*(2*st->overlap+COMBFILTER_MAXPERIOD)); + oldLogE = oldBandE + st->channels*st->mode->nbEBands; + oldLogE2 = oldLogE + st->channels*st->mode->nbEBands; + OPUS_CLEAR((char*)&st->ENCODER_RESET_START, + opus_custom_encoder_get_size(st->mode, st->channels)- + ((char*)&st->ENCODER_RESET_START - (char*)st)); + for (i=0;i<st->channels*st->mode->nbEBands;i++) + oldLogE[i]=oldLogE2[i]=-QCONST16(28.f,DB_SHIFT); + st->vbr_offset = 0; + st->delayedIntra = 1; + st->spread_decision = SPREAD_NORMAL; + st->tonal_average = 256; + st->hf_average = 0; + st->tapset_decision = 0; + } + break; +#ifdef CUSTOM_MODES + case CELT_SET_INPUT_CLIPPING_REQUEST: + { + opus_int32 value = va_arg(ap, opus_int32); + st->clip = value; + } + break; +#endif + case CELT_SET_SIGNALLING_REQUEST: + { + opus_int32 value = va_arg(ap, opus_int32); + st->signalling = value; + } + break; + case CELT_GET_MODE_REQUEST: + { + const CELTMode ** value = va_arg(ap, const CELTMode**); + if (value==0) + goto bad_arg; + *value=st->mode; + } + break; + case OPUS_GET_FINAL_RANGE_REQUEST: + { + opus_uint32 * value = va_arg(ap, opus_uint32 *); + if (value==0) + goto bad_arg; + *value=st->rng; + } + break; + default: + goto bad_request; + } + va_end(ap); + return OPUS_OK; +bad_arg: + va_end(ap); + return OPUS_BAD_ARG; +bad_request: + va_end(ap); + return OPUS_UNIMPLEMENTED; +} + +/**********************************************************************/ +/* */ +/* DECODER */ +/* */ +/**********************************************************************/ +#define DECODE_BUFFER_SIZE 2048 + +/** Decoder state + @brief Decoder state + */ +struct OpusCustomDecoder { + const OpusCustomMode *mode; + int overlap; + int channels; + int stream_channels; + + int downsample; + int start, end; + int signalling; + + /* Everything beyond this point gets cleared on a reset */ +#define DECODER_RESET_START rng + + opus_uint32 rng; + int error; + int last_pitch_index; + int loss_count; + int postfilter_period; + int postfilter_period_old; + opus_val16 postfilter_gain; + opus_val16 postfilter_gain_old; + int postfilter_tapset; + int postfilter_tapset_old; + + celt_sig preemph_memD[2]; + + celt_sig _decode_mem[1]; /* Size = channels*(DECODE_BUFFER_SIZE+mode->overlap) */ + /* opus_val16 lpc[], Size = channels*LPC_ORDER */ + /* opus_val16 oldEBands[], Size = 2*mode->nbEBands */ + /* opus_val16 oldLogE[], Size = 2*mode->nbEBands */ + /* opus_val16 oldLogE2[], Size = 2*mode->nbEBands */ + /* opus_val16 backgroundLogE[], Size = 2*mode->nbEBands */ +}; + +int celt_decoder_get_size(int channels) +{ + const CELTMode *mode = opus_custom_mode_create(48000, 960, NULL); + return opus_custom_decoder_get_size(mode, channels); +} + +OPUS_CUSTOM_NOSTATIC int opus_custom_decoder_get_size(const CELTMode *mode, int channels) +{ + int size = sizeof(struct CELTDecoder) + + (channels*(DECODE_BUFFER_SIZE+mode->overlap)-1)*sizeof(celt_sig) + + channels*LPC_ORDER*sizeof(opus_val16) + + 4*2*mode->nbEBands*sizeof(opus_val16); + return size; +} + +#ifdef CUSTOM_MODES +CELTDecoder *opus_custom_decoder_create(const CELTMode *mode, int channels, int *error) +{ + int ret; + CELTDecoder *st = (CELTDecoder *)opus_alloc(opus_custom_decoder_get_size(mode, channels)); + ret = opus_custom_decoder_init(st, mode, channels); + if (ret != OPUS_OK) + { + opus_custom_decoder_destroy(st); + st = NULL; + } + if (error) + *error = ret; + return st; +} +#endif /* CUSTOM_MODES */ + +int celt_decoder_init(CELTDecoder *st, opus_int32 sampling_rate, int channels) +{ + int ret; + ret = opus_custom_decoder_init(st, opus_custom_mode_create(48000, 960, NULL), channels); + if (ret != OPUS_OK) + return ret; + st->downsample = resampling_factor(sampling_rate); + if (st->downsample==0) + return OPUS_BAD_ARG; + else + return OPUS_OK; +} + +OPUS_CUSTOM_NOSTATIC int opus_custom_decoder_init(CELTDecoder *st, const CELTMode *mode, int channels) +{ + if (channels < 0 || channels > 2) + return OPUS_BAD_ARG; + + if (st==NULL) + return OPUS_ALLOC_FAIL; + + OPUS_CLEAR((char*)st, opus_custom_decoder_get_size(mode, channels)); + + st->mode = mode; + st->overlap = mode->overlap; + st->stream_channels = st->channels = channels; + + st->downsample = 1; + st->start = 0; + st->end = st->mode->effEBands; + st->signalling = 1; + + st->loss_count = 0; + + opus_custom_decoder_ctl(st, OPUS_RESET_STATE); + + return OPUS_OK; +} + +#ifdef CUSTOM_MODES +void opus_custom_decoder_destroy(CELTDecoder *st) +{ + opus_free(st); +} +#endif /* CUSTOM_MODES */ + +static void celt_decode_lost(CELTDecoder * restrict st, opus_val16 * restrict pcm, int N, int LM) +{ + int c; + int pitch_index; + int overlap = st->mode->overlap; + opus_val16 fade = Q15ONE; + int i, len; + const int C = st->channels; + int offset; + celt_sig *out_mem[2]; + celt_sig *decode_mem[2]; + celt_sig *overlap_mem[2]; + opus_val16 *lpc; + opus_val32 *out_syn[2]; + opus_val16 *oldBandE, *oldLogE, *oldLogE2, *backgroundLogE; + SAVE_STACK; + + c=0; do { + decode_mem[c] = st->_decode_mem + c*(DECODE_BUFFER_SIZE+st->overlap); + out_mem[c] = decode_mem[c]+DECODE_BUFFER_SIZE-MAX_PERIOD; + overlap_mem[c] = decode_mem[c]+DECODE_BUFFER_SIZE; + } while (++c<C); + lpc = (opus_val16*)(st->_decode_mem+(DECODE_BUFFER_SIZE+st->overlap)*C); + oldBandE = lpc+C*LPC_ORDER; + oldLogE = oldBandE + 2*st->mode->nbEBands; + oldLogE2 = oldLogE + 2*st->mode->nbEBands; + backgroundLogE = oldLogE2 + 2*st->mode->nbEBands; + + out_syn[0] = out_mem[0]+MAX_PERIOD-N; + if (C==2) + out_syn[1] = out_mem[1]+MAX_PERIOD-N; + + len = N+st->mode->overlap; + + if (st->loss_count >= 5 || st->start!=0) + { + /* Noise-based PLC/CNG */ + VARDECL(celt_sig, freq); + VARDECL(celt_norm, X); + VARDECL(celt_ener, bandE); + opus_uint32 seed; + int effEnd; + + effEnd = st->end; + if (effEnd > st->mode->effEBands) + effEnd = st->mode->effEBands; + + ALLOC(freq, C*N, celt_sig); /**< Interleaved signal MDCTs */ + ALLOC(X, C*N, celt_norm); /**< Interleaved normalised MDCTs */ + ALLOC(bandE, st->mode->nbEBands*C, celt_ener); + + if (st->loss_count >= 5) + log2Amp(st->mode, st->start, st->end, bandE, backgroundLogE, C); + else { + /* Energy decay */ + opus_val16 decay = st->loss_count==0 ? QCONST16(1.5f, DB_SHIFT) : QCONST16(.5f, DB_SHIFT); + c=0; do + { + for (i=st->start;i<st->end;i++) + oldBandE[c*st->mode->nbEBands+i] -= decay; + } while (++c<C); + log2Amp(st->mode, st->start, st->end, bandE, oldBandE, C); + } + seed = st->rng; + for (c=0;c<C;c++) + { + for (i=0;i<(st->mode->eBands[st->start]<<LM);i++) + X[c*N+i] = 0; + for (i=st->start;i<st->mode->effEBands;i++) + { + int j; + int boffs; + int blen; + boffs = N*c+(st->mode->eBands[i]<<LM); + blen = (st->mode->eBands[i+1]-st->mode->eBands[i])<<LM; + for (j=0;j<blen;j++) + { + seed = celt_lcg_rand(seed); + X[boffs+j] = (celt_norm)((opus_int32)seed>>20); + } + renormalise_vector(X+boffs, blen, Q15ONE); + } + for (i=(st->mode->eBands[st->end]<<LM);i<N;i++) + X[c*N+i] = 0; + } + st->rng = seed; + + denormalise_bands(st->mode, X, freq, bandE, st->mode->effEBands, C, 1<<LM); + + c=0; do + for (i=0;i<st->mode->eBands[st->start]<<LM;i++) + freq[c*N+i] = 0; + while (++c<C); + c=0; do { + int bound = st->mode->eBands[effEnd]<<LM; + if (st->downsample!=1) + bound = IMIN(bound, N/st->downsample); + for (i=bound;i<N;i++) + freq[c*N+i] = 0; + } while (++c<C); + compute_inv_mdcts(st->mode, 0, freq, out_syn, overlap_mem, C, LM); + } else { + /* Pitch-based PLC */ + if (st->loss_count == 0) + { + opus_val16 pitch_buf[DECODE_BUFFER_SIZE>>1]; + /* Corresponds to a min pitch of 67 Hz. It's possible to save CPU in this + search by using only part of the decode buffer */ + int poffset = 720; + pitch_downsample(decode_mem, pitch_buf, DECODE_BUFFER_SIZE, C); + /* Max pitch is 100 samples (480 Hz) */ + pitch_search(pitch_buf+((poffset)>>1), pitch_buf, DECODE_BUFFER_SIZE-poffset, + poffset-100, &pitch_index); + pitch_index = poffset-pitch_index; + st->last_pitch_index = pitch_index; + } else { + pitch_index = st->last_pitch_index; + fade = QCONST16(.8f,15); + } + + c=0; do { + VARDECL(opus_val32, e); + opus_val16 exc[MAX_PERIOD]; + opus_val32 ac[LPC_ORDER+1]; + opus_val16 decay = 1; + opus_val32 S1=0; + opus_val16 mem[LPC_ORDER]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; + + ALLOC(e, MAX_PERIOD+2*st->mode->overlap, opus_val32); + + offset = MAX_PERIOD-pitch_index; + for (i=0;i<MAX_PERIOD;i++) + exc[i] = ROUND16(out_mem[c][i], SIG_SHIFT); + + if (st->loss_count == 0) + { + _celt_autocorr(exc, ac, st->mode->window, st->mode->overlap, + LPC_ORDER, MAX_PERIOD); + + /* Noise floor -40 dB */ +#ifdef FIXED_POINT + ac[0] += SHR32(ac[0],13); +#else + ac[0] *= 1.0001f; +#endif + /* Lag windowing */ + for (i=1;i<=LPC_ORDER;i++) + { + /*ac[i] *= exp(-.5*(2*M_PI*.002*i)*(2*M_PI*.002*i));*/ +#ifdef FIXED_POINT + ac[i] -= MULT16_32_Q15(2*i*i, ac[i]); +#else + ac[i] -= ac[i]*(.008f*i)*(.008f*i); +#endif + } + + _celt_lpc(lpc+c*LPC_ORDER, ac, LPC_ORDER); + } + for (i=0;i<LPC_ORDER;i++) + mem[i] = ROUND16(out_mem[c][MAX_PERIOD-1-i], SIG_SHIFT); + celt_fir(exc, lpc+c*LPC_ORDER, exc, MAX_PERIOD, LPC_ORDER, mem); + /*for (i=0;i<MAX_PERIOD;i++)printf("%d ", exc[i]); printf("\n");*/ + /* Check if the waveform is decaying (and if so how fast) */ + { + opus_val32 E1=1, E2=1; + int period; + if (pitch_index <= MAX_PERIOD/2) + period = pitch_index; + else + period = MAX_PERIOD/2; + for (i=0;i<period;i++) + { + E1 += SHR32(MULT16_16(exc[MAX_PERIOD-period+i],exc[MAX_PERIOD-period+i]),8); + E2 += SHR32(MULT16_16(exc[MAX_PERIOD-2*period+i],exc[MAX_PERIOD-2*period+i]),8); + } + if (E1 > E2) + E1 = E2; + decay = celt_sqrt(frac_div32(SHR(E1,1),E2)); + } + + /* Copy excitation, taking decay into account */ + for (i=0;i<len+st->mode->overlap;i++) + { + opus_val16 tmp; + if (offset+i >= MAX_PERIOD) + { + offset -= pitch_index; + decay = MULT16_16_Q15(decay, decay); + } + e[i] = SHL32(EXTEND32(MULT16_16_Q15(decay, exc[offset+i])), SIG_SHIFT); + tmp = ROUND16(out_mem[c][offset+i],SIG_SHIFT); + S1 += SHR32(MULT16_16(tmp,tmp),8); + } + for (i=0;i<LPC_ORDER;i++) + mem[i] = ROUND16(out_mem[c][MAX_PERIOD-1-i], SIG_SHIFT); + for (i=0;i<len+st->mode->overlap;i++) + e[i] = MULT16_32_Q15(fade, e[i]); + celt_iir(e, lpc+c*LPC_ORDER, e, len+st->mode->overlap, LPC_ORDER, mem); + + { + opus_val32 S2=0; + for (i=0;i<len+overlap;i++) + { + opus_val16 tmp = ROUND16(e[i],SIG_SHIFT); + S2 += SHR32(MULT16_16(tmp,tmp),8); + } + /* This checks for an "explosion" in the synthesis */ +#ifdef FIXED_POINT + if (!(S1 > SHR32(S2,2))) +#else + /* Float test is written this way to catch NaNs at the same time */ + if (!(S1 > 0.2f*S2)) +#endif + { + for (i=0;i<len+overlap;i++) + e[i] = 0; + } else if (S1 < S2) + { + opus_val16 ratio = celt_sqrt(frac_div32(SHR32(S1,1)+1,S2+1)); + for (i=0;i<len+overlap;i++) + e[i] = MULT16_32_Q15(ratio, e[i]); + } + } + + /* Apply post-filter to the MDCT overlap of the previous frame */ + comb_filter(out_mem[c]+MAX_PERIOD, out_mem[c]+MAX_PERIOD, st->postfilter_period, st->postfilter_period, st->overlap, + st->postfilter_gain, st->postfilter_gain, st->postfilter_tapset, st->postfilter_tapset, + NULL, 0); + + for (i=0;i<MAX_PERIOD+st->mode->overlap-N;i++) + out_mem[c][i] = out_mem[c][N+i]; + + /* Apply TDAC to the concealed audio so that it blends with the + previous and next frames */ + for (i=0;i<overlap/2;i++) + { + opus_val32 tmp; + tmp = MULT16_32_Q15(st->mode->window[i], e[N+overlap-1-i]) + + MULT16_32_Q15(st->mode->window[overlap-i-1], e[N+i ]); + out_mem[c][MAX_PERIOD+i] = MULT16_32_Q15(st->mode->window[overlap-i-1], tmp); + out_mem[c][MAX_PERIOD+overlap-i-1] = MULT16_32_Q15(st->mode->window[i], tmp); + } + for (i=0;i<N;i++) + out_mem[c][MAX_PERIOD-N+i] = e[i]; + + /* Apply pre-filter to the MDCT overlap for the next frame (post-filter will be applied then) */ + comb_filter(e, out_mem[c]+MAX_PERIOD, st->postfilter_period, st->postfilter_period, st->overlap, + -st->postfilter_gain, -st->postfilter_gain, st->postfilter_tapset, st->postfilter_tapset, + NULL, 0); + for (i=0;i<overlap;i++) + out_mem[c][MAX_PERIOD+i] = e[i]; + } while (++c<C); + } + + deemphasis(out_syn, pcm, N, C, st->downsample, st->mode->preemph, st->preemph_memD); + + st->loss_count++; + + RESTORE_STACK; +} + +int celt_decode_with_ec(CELTDecoder * restrict st, const unsigned char *data, int len, opus_val16 * restrict pcm, int frame_size, ec_dec *dec) +{ + int c, i, N; + int spread_decision; + opus_int32 bits; + ec_dec _dec; + VARDECL(celt_sig, freq); + VARDECL(celt_norm, X); + VARDECL(celt_ener, bandE); + VARDECL(int, fine_quant); + VARDECL(int, pulses); + VARDECL(int, cap); + VARDECL(int, offsets); + VARDECL(int, fine_priority); + VARDECL(int, tf_res); + VARDECL(unsigned char, collapse_masks); + celt_sig *out_mem[2]; + celt_sig *decode_mem[2]; + celt_sig *overlap_mem[2]; + celt_sig *out_syn[2]; + opus_val16 *lpc; + opus_val16 *oldBandE, *oldLogE, *oldLogE2, *backgroundLogE; + + int shortBlocks; + int isTransient; + int intra_ener; + const int CC = st->channels; + int LM, M; + int effEnd; + int codedBands; + int alloc_trim; + int postfilter_pitch; + opus_val16 postfilter_gain; + int intensity=0; + int dual_stereo=0; + opus_int32 total_bits; + opus_int32 balance; + opus_int32 tell; + int dynalloc_logp; + int postfilter_tapset; + int anti_collapse_rsv; + int anti_collapse_on=0; + int silence; + int C = st->stream_channels; + ALLOC_STACK; + + frame_size *= st->downsample; + + c=0; do { + decode_mem[c] = st->_decode_mem + c*(DECODE_BUFFER_SIZE+st->overlap); + out_mem[c] = decode_mem[c]+DECODE_BUFFER_SIZE-MAX_PERIOD; + overlap_mem[c] = decode_mem[c]+DECODE_BUFFER_SIZE; + } while (++c<CC); + lpc = (opus_val16*)(st->_decode_mem+(DECODE_BUFFER_SIZE+st->overlap)*CC); + oldBandE = lpc+CC*LPC_ORDER; + oldLogE = oldBandE + 2*st->mode->nbEBands; + oldLogE2 = oldLogE + 2*st->mode->nbEBands; + backgroundLogE = oldLogE2 + 2*st->mode->nbEBands; + +#ifdef CUSTOM_MODES + if (st->signalling && data!=NULL) + { + int data0=data[0]; + /* Convert "standard mode" to Opus header */ + if (st->mode->Fs==48000 && st->mode->shortMdctSize==120) + { + data0 = fromOpus(data0); + if (data0<0) + return OPUS_INVALID_PACKET; + } + st->end = IMAX(1, st->mode->effEBands-2*(data0>>5)); + LM = (data0>>3)&0x3; + C = 1 + ((data0>>2)&0x1); + data++; + len--; + if (LM>st->mode->maxLM) + return OPUS_INVALID_PACKET; + if (frame_size < st->mode->shortMdctSize<<LM) + return OPUS_BUFFER_TOO_SMALL; + else + frame_size = st->mode->shortMdctSize<<LM; + } else { +#else + { +#endif + for (LM=0;LM<=st->mode->maxLM;LM++) + if (st->mode->shortMdctSize<<LM==frame_size) + break; + if (LM>st->mode->maxLM) + return OPUS_BAD_ARG; + } + M=1<<LM; + + if (len<0 || len>1275 || pcm==NULL) + return OPUS_BAD_ARG; + + N = M*st->mode->shortMdctSize; + + effEnd = st->end; + if (effEnd > st->mode->effEBands) + effEnd = st->mode->effEBands; + + ALLOC(freq, IMAX(CC,C)*N, celt_sig); /**< Interleaved signal MDCTs */ + ALLOC(X, C*N, celt_norm); /**< Interleaved normalised MDCTs */ + ALLOC(bandE, st->mode->nbEBands*C, celt_ener); + c=0; do + for (i=0;i<M*st->mode->eBands[st->start];i++) + X[c*N+i] = 0; + while (++c<C); + c=0; do + for (i=M*st->mode->eBands[effEnd];i<N;i++) + X[c*N+i] = 0; + while (++c<C); + + if (data == NULL || len<=1) + { + celt_decode_lost(st, pcm, N, LM); + RESTORE_STACK; + return frame_size/st->downsample; + } + + if (dec == NULL) + { + ec_dec_init(&_dec,(unsigned char*)data,len); + dec = &_dec; + } + + if (C==1) + { + for (i=0;i<st->mode->nbEBands;i++) + oldBandE[i]=MAX16(oldBandE[i],oldBandE[st->mode->nbEBands+i]); + } + + total_bits = len*8; + tell = ec_tell(dec); + + if (tell >= total_bits) + silence = 1; + else if (tell==1) + silence = ec_dec_bit_logp(dec, 15); + else + silence = 0; + if (silence) + { + /* Pretend we've read all the remaining bits */ + tell = len*8; + dec->nbits_total+=tell-ec_tell(dec); + } + + postfilter_gain = 0; + postfilter_pitch = 0; + postfilter_tapset = 0; + if (st->start==0 && tell+16 <= total_bits) + { + if(ec_dec_bit_logp(dec, 1)) + { + int qg, octave; + octave = ec_dec_uint(dec, 6); + postfilter_pitch = (16<<octave)+ec_dec_bits(dec, 4+octave)-1; + qg = ec_dec_bits(dec, 3); + if (ec_tell(dec)+2<=total_bits) + postfilter_tapset = ec_dec_icdf(dec, tapset_icdf, 2); + postfilter_gain = QCONST16(.09375f,15)*(qg+1); + } + tell = ec_tell(dec); + } + + if (LM > 0 && tell+3 <= total_bits) + { + isTransient = ec_dec_bit_logp(dec, 3); + tell = ec_tell(dec); + } + else + isTransient = 0; + + if (isTransient) + shortBlocks = M; + else + shortBlocks = 0; + + /* Decode the global flags (first symbols in the stream) */ + intra_ener = tell+3<=total_bits ? ec_dec_bit_logp(dec, 3) : 0; + /* Get band energies */ + unquant_coarse_energy(st->mode, st->start, st->end, oldBandE, + intra_ener, dec, C, LM); + + ALLOC(tf_res, st->mode->nbEBands, int); + tf_decode(st->start, st->end, isTransient, tf_res, LM, dec); + + tell = ec_tell(dec); + spread_decision = SPREAD_NORMAL; + if (tell+4 <= total_bits) + spread_decision = ec_dec_icdf(dec, spread_icdf, 5); + + ALLOC(pulses, st->mode->nbEBands, int); + ALLOC(cap, st->mode->nbEBands, int); + ALLOC(offsets, st->mode->nbEBands, int); + ALLOC(fine_priority, st->mode->nbEBands, int); + + init_caps(st->mode,cap,LM,C); + + dynalloc_logp = 6; + total_bits<<=BITRES; + tell = ec_tell_frac(dec); + for (i=st->start;i<st->end;i++) + { + int width, quanta; + int dynalloc_loop_logp; + int boost; + width = C*(st->mode->eBands[i+1]-st->mode->eBands[i])<<LM; + /* quanta is 6 bits, but no more than 1 bit/sample + and no less than 1/8 bit/sample */ + quanta = IMIN(width<<BITRES, IMAX(6<<BITRES, width)); + dynalloc_loop_logp = dynalloc_logp; + boost = 0; + while (tell+(dynalloc_loop_logp<<BITRES) < total_bits && boost < cap[i]) + { + int flag; + flag = ec_dec_bit_logp(dec, dynalloc_loop_logp); + tell = ec_tell_frac(dec); + if (!flag) + break; + boost += quanta; + total_bits -= quanta; + dynalloc_loop_logp = 1; + } + offsets[i] = boost; + /* Making dynalloc more likely */ + if (boost>0) + dynalloc_logp = IMAX(2, dynalloc_logp-1); + } + + ALLOC(fine_quant, st->mode->nbEBands, int); + alloc_trim = tell+(6<<BITRES) <= total_bits ? + ec_dec_icdf(dec, trim_icdf, 7) : 5; + + bits = (((opus_int32)len*8)<<BITRES) - ec_tell_frac(dec) - 1; + anti_collapse_rsv = isTransient&&LM>=2&&bits>=((LM+2)<<BITRES) ? (1<<BITRES) : 0; + bits -= anti_collapse_rsv; + codedBands = compute_allocation(st->mode, st->start, st->end, offsets, cap, + alloc_trim, &intensity, &dual_stereo, bits, &balance, pulses, + fine_quant, fine_priority, C, LM, dec, 0, 0); + + unquant_fine_energy(st->mode, st->start, st->end, oldBandE, fine_quant, dec, C); + + /* Decode fixed codebook */ + ALLOC(collapse_masks, C*st->mode->nbEBands, unsigned char); + quant_all_bands(0, st->mode, st->start, st->end, X, C==2 ? X+N : NULL, collapse_masks, + NULL, pulses, shortBlocks, spread_decision, dual_stereo, intensity, tf_res, + len*(8<<BITRES)-anti_collapse_rsv, balance, dec, LM, codedBands, &st->rng); + + if (anti_collapse_rsv > 0) + { + anti_collapse_on = ec_dec_bits(dec, 1); + } + + unquant_energy_finalise(st->mode, st->start, st->end, oldBandE, + fine_quant, fine_priority, len*8-ec_tell(dec), dec, C); + + if (anti_collapse_on) + anti_collapse(st->mode, X, collapse_masks, LM, C, CC, N, + st->start, st->end, oldBandE, oldLogE, oldLogE2, pulses, st->rng); + + log2Amp(st->mode, st->start, st->end, bandE, oldBandE, C); + + if (silence) + { + for (i=0;i<C*st->mode->nbEBands;i++) + { + bandE[i] = 0; + oldBandE[i] = -QCONST16(28.f,DB_SHIFT); + } + } + /* Synthesis */ + denormalise_bands(st->mode, X, freq, bandE, effEnd, C, M); + + OPUS_MOVE(decode_mem[0], decode_mem[0]+N, DECODE_BUFFER_SIZE-N); + if (CC==2) + OPUS_MOVE(decode_mem[1], decode_mem[1]+N, DECODE_BUFFER_SIZE-N); + + c=0; do + for (i=0;i<M*st->mode->eBands[st->start];i++) + freq[c*N+i] = 0; + while (++c<C); + c=0; do { + int bound = M*st->mode->eBands[effEnd]; + if (st->downsample!=1) + bound = IMIN(bound, N/st->downsample); + for (i=bound;i<N;i++) + freq[c*N+i] = 0; + } while (++c<C); + + out_syn[0] = out_mem[0]+MAX_PERIOD-N; + if (CC==2) + out_syn[1] = out_mem[1]+MAX_PERIOD-N; + + if (CC==2&&C==1) + { + for (i=0;i<N;i++) + freq[N+i] = freq[i]; + } + if (CC==1&&C==2) + { + for (i=0;i<N;i++) + freq[i] = HALF32(ADD32(freq[i],freq[N+i])); + } + + /* Compute inverse MDCTs */ + compute_inv_mdcts(st->mode, shortBlocks, freq, out_syn, overlap_mem, CC, LM); + + c=0; do { + st->postfilter_period=IMAX(st->postfilter_period, COMBFILTER_MINPERIOD); + st->postfilter_period_old=IMAX(st->postfilter_period_old, COMBFILTER_MINPERIOD); + comb_filter(out_syn[c], out_syn[c], st->postfilter_period_old, st->postfilter_period, st->mode->shortMdctSize, + st->postfilter_gain_old, st->postfilter_gain, st->postfilter_tapset_old, st->postfilter_tapset, + st->mode->window, st->overlap); + if (LM!=0) + comb_filter(out_syn[c]+st->mode->shortMdctSize, out_syn[c]+st->mode->shortMdctSize, st->postfilter_period, postfilter_pitch, N-st->mode->shortMdctSize, + st->postfilter_gain, postfilter_gain, st->postfilter_tapset, postfilter_tapset, + st->mode->window, st->mode->overlap); + + } while (++c<CC); + st->postfilter_period_old = st->postfilter_period; + st->postfilter_gain_old = st->postfilter_gain; + st->postfilter_tapset_old = st->postfilter_tapset; + st->postfilter_period = postfilter_pitch; + st->postfilter_gain = postfilter_gain; + st->postfilter_tapset = postfilter_tapset; + if (LM!=0) + { + st->postfilter_period_old = st->postfilter_period; + st->postfilter_gain_old = st->postfilter_gain; + st->postfilter_tapset_old = st->postfilter_tapset; + } + + if (C==1) { + for (i=0;i<st->mode->nbEBands;i++) + oldBandE[st->mode->nbEBands+i]=oldBandE[i]; + } + + /* In case start or end were to change */ + if (!isTransient) + { + for (i=0;i<2*st->mode->nbEBands;i++) + oldLogE2[i] = oldLogE[i]; + for (i=0;i<2*st->mode->nbEBands;i++) + oldLogE[i] = oldBandE[i]; + for (i=0;i<2*st->mode->nbEBands;i++) + backgroundLogE[i] = MIN16(backgroundLogE[i] + M*QCONST16(0.001f,DB_SHIFT), oldBandE[i]); + } else { + for (i=0;i<2*st->mode->nbEBands;i++) + oldLogE[i] = MIN16(oldLogE[i], oldBandE[i]); + } + c=0; do + { + for (i=0;i<st->start;i++) + { + oldBandE[c*st->mode->nbEBands+i]=0; + oldLogE[c*st->mode->nbEBands+i]=oldLogE2[c*st->mode->nbEBands+i]=-QCONST16(28.f,DB_SHIFT); + } + for (i=st->end;i<st->mode->nbEBands;i++) + { + oldBandE[c*st->mode->nbEBands+i]=0; + oldLogE[c*st->mode->nbEBands+i]=oldLogE2[c*st->mode->nbEBands+i]=-QCONST16(28.f,DB_SHIFT); + } + } while (++c<2); + st->rng = dec->rng; + + deemphasis(out_syn, pcm, N, CC, st->downsample, st->mode->preemph, st->preemph_memD); + st->loss_count = 0; + RESTORE_STACK; + if (ec_tell(dec) > 8*len) + return OPUS_INTERNAL_ERROR; + if(ec_get_error(dec)) + st->error = 1; + return frame_size/st->downsample; +} + + +#ifdef CUSTOM_MODES + +#ifdef FIXED_POINT +int opus_custom_decode(CELTDecoder * restrict st, const unsigned char *data, int len, opus_int16 * restrict pcm, int frame_size) +{ + return celt_decode_with_ec(st, data, len, pcm, frame_size, NULL); +} + +#ifndef DISABLE_FLOAT_API +int opus_custom_decode_float(CELTDecoder * restrict st, const unsigned char *data, int len, float * restrict pcm, int frame_size) +{ + int j, ret, C, N; + VARDECL(opus_int16, out); + ALLOC_STACK; + + if (pcm==NULL) + return OPUS_BAD_ARG; + + C = st->channels; + N = frame_size; + + ALLOC(out, C*N, opus_int16); + ret=celt_decode_with_ec(st, data, len, out, frame_size, NULL); + if (ret>0) + for (j=0;j<C*ret;j++) + pcm[j]=out[j]*(1.f/32768.f); + + RESTORE_STACK; + return ret; +} +#endif /* DISABLE_FLOAT_API */ + +#else + +int opus_custom_decode_float(CELTDecoder * restrict st, const unsigned char *data, int len, float * restrict pcm, int frame_size) +{ + return celt_decode_with_ec(st, data, len, pcm, frame_size, NULL); +} + +int opus_custom_decode(CELTDecoder * restrict st, const unsigned char *data, int len, opus_int16 * restrict pcm, int frame_size) +{ + int j, ret, C, N; + VARDECL(celt_sig, out); + ALLOC_STACK; + + if (pcm==NULL) + return OPUS_BAD_ARG; + + C = st->channels; + N = frame_size; + ALLOC(out, C*N, celt_sig); + + ret=celt_decode_with_ec(st, data, len, out, frame_size, NULL); + + if (ret>0) + for (j=0;j<C*ret;j++) + pcm[j] = FLOAT2INT16 (out[j]); + + RESTORE_STACK; + return ret; +} + +#endif +#endif /* CUSTOM_MODES */ + +int opus_custom_decoder_ctl(CELTDecoder * restrict st, int request, ...) +{ + va_list ap; + + va_start(ap, request); + switch (request) + { + case CELT_SET_START_BAND_REQUEST: + { + opus_int32 value = va_arg(ap, opus_int32); + if (value<0 || value>=st->mode->nbEBands) + goto bad_arg; + st->start = value; + } + break; + case CELT_SET_END_BAND_REQUEST: + { + opus_int32 value = va_arg(ap, opus_int32); + if (value<1 || value>st->mode->nbEBands) + goto bad_arg; + st->end = value; + } + break; + case CELT_SET_CHANNELS_REQUEST: + { + opus_int32 value = va_arg(ap, opus_int32); + if (value<1 || value>2) + goto bad_arg; + st->stream_channels = value; + } + break; + case CELT_GET_AND_CLEAR_ERROR_REQUEST: + { + int *value = va_arg(ap, opus_int32*); + if (value==NULL) + goto bad_arg; + *value=st->error; + st->error = 0; + } + break; + case OPUS_GET_LOOKAHEAD_REQUEST: + { + int *value = va_arg(ap, opus_int32*); + if (value==NULL) + goto bad_arg; + *value = st->overlap/st->downsample; + } + break; + case OPUS_RESET_STATE: + { + int i; + opus_val16 *lpc, *oldBandE, *oldLogE, *oldLogE2; + lpc = (opus_val16*)(st->_decode_mem+(DECODE_BUFFER_SIZE+st->overlap)*st->channels); + oldBandE = lpc+st->channels*LPC_ORDER; + oldLogE = oldBandE + 2*st->mode->nbEBands; + oldLogE2 = oldLogE + 2*st->mode->nbEBands; + OPUS_CLEAR((char*)&st->DECODER_RESET_START, + opus_custom_decoder_get_size(st->mode, st->channels)- + ((char*)&st->DECODER_RESET_START - (char*)st)); + for (i=0;i<2*st->mode->nbEBands;i++) + oldLogE[i]=oldLogE2[i]=-QCONST16(28.f,DB_SHIFT); + } + break; + case OPUS_GET_PITCH_REQUEST: + { + int *value = va_arg(ap, opus_int32*); + if (value==NULL) + goto bad_arg; + *value = st->postfilter_period; + } + break; +#ifdef OPUS_BUILD + case CELT_GET_MODE_REQUEST: + { + const CELTMode ** value = va_arg(ap, const CELTMode**); + if (value==0) + goto bad_arg; + *value=st->mode; + } + break; + case CELT_SET_SIGNALLING_REQUEST: + { + opus_int32 value = va_arg(ap, opus_int32); + st->signalling = value; + } + break; + case OPUS_GET_FINAL_RANGE_REQUEST: + { + opus_uint32 * value = va_arg(ap, opus_uint32 *); + if (value==0) + goto bad_arg; + *value=st->rng; + } + break; +#endif + default: + goto bad_request; + } + va_end(ap); + return OPUS_OK; +bad_arg: + va_end(ap); + return OPUS_BAD_ARG; +bad_request: + va_end(ap); + return OPUS_UNIMPLEMENTED; +} + + + +const char *opus_strerror(int error) +{ + static const char *error_strings[8] = { + "success", + "invalid argument", + "buffer too small", + "internal error", + "corrupted stream", + "request not implemented", + "invalid state", + "memory allocation failed" + }; + if (error > 0 || error < -7) + return "unknown error"; + else + return error_strings[-error]; +} + +const char *opus_get_version_string(void) +{ + return "libopus " OPUS_VERSION +#ifdef FUZZING + "-fuzzing" +#endif + ; +}
new file mode 100644 --- /dev/null +++ b/media/libopus/celt/celt.h @@ -0,0 +1,117 @@ +/* Copyright (c) 2007-2008 CSIRO + Copyright (c) 2007-2009 Xiph.Org Foundation + Copyright (c) 2008 Gregory Maxwell + Written by Jean-Marc Valin and Gregory Maxwell */ +/** + @file celt.h + @brief Contains all the functions for encoding and decoding audio + */ + +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef CELT_H +#define CELT_H + +#include "opus_types.h" +#include "opus_defines.h" +#include "opus_custom.h" +#include "entenc.h" +#include "entdec.h" +#include "arch.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define CELTEncoder OpusCustomEncoder +#define CELTDecoder OpusCustomDecoder +#define CELTMode OpusCustomMode + +#define _celt_check_mode_ptr_ptr(ptr) ((ptr) + ((ptr) - (const CELTMode**)(ptr))) + +/* Encoder/decoder Requests */ + +#define CELT_SET_PREDICTION_REQUEST 10002 +/** Controls the use of interframe prediction. + 0=Independent frames + 1=Short term interframe prediction allowed + 2=Long term prediction allowed + */ +#define CELT_SET_PREDICTION(x) CELT_SET_PREDICTION_REQUEST, __opus_check_int(x) + +#define CELT_SET_INPUT_CLIPPING_REQUEST 10004 +#define CELT_SET_INPUT_CLIPPING(x) CELT_SET_INPUT_CLIPPING_REQUEST, __opus_check_int(x) + +#define CELT_GET_AND_CLEAR_ERROR_REQUEST 10007 +#define CELT_GET_AND_CLEAR_ERROR(x) CELT_GET_AND_CLEAR_ERROR_REQUEST, __opus_check_int_ptr(x) + +#define CELT_SET_CHANNELS_REQUEST 10008 +#define CELT_SET_CHANNELS(x) CELT_SET_CHANNELS_REQUEST, __opus_check_int(x) + + +/* Internal */ +#define CELT_SET_START_BAND_REQUEST 10010 +#define CELT_SET_START_BAND(x) CELT_SET_START_BAND_REQUEST, __opus_check_int(x) + +#define CELT_SET_END_BAND_REQUEST 10012 +#define CELT_SET_END_BAND(x) CELT_SET_END_BAND_REQUEST, __opus_check_int(x) + +#define CELT_GET_MODE_REQUEST 10015 +/** Get the CELTMode used by an encoder or decoder */ +#define CELT_GET_MODE(x) CELT_GET_MODE_REQUEST, _celt_check_mode_ptr_ptr(x) + +#define CELT_SET_SIGNALLING_REQUEST 10016 +#define CELT_SET_SIGNALLING(x) CELT_SET_SIGNALLING_REQUEST, __opus_check_int(x) + + + +/* Encoder stuff */ + +int celt_encoder_get_size(int channels); + +int celt_encode_with_ec(OpusCustomEncoder * restrict st, const opus_val16 * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes, ec_enc *enc); + +int celt_encoder_init(CELTEncoder *st, opus_int32 sampling_rate, int channels); + + + +/* Decoder stuff */ + +int celt_decoder_get_size(int channels); + + +int celt_decoder_init(CELTDecoder *st, opus_int32 sampling_rate, int channels); + +int celt_decode_with_ec(OpusCustomDecoder * restrict st, const unsigned char *data, int len, opus_val16 * restrict pcm, int frame_size, ec_dec *dec); + +#define celt_encoder_ctl opus_custom_encoder_ctl +#define celt_decoder_ctl opus_custom_decoder_ctl + +#ifdef __cplusplus +} +#endif + +#endif /* CELT_H */
new file mode 100644 --- /dev/null +++ b/media/libopus/celt/celt_lpc.c @@ -0,0 +1,188 @@ +/* Copyright (c) 2009-2010 Xiph.Org Foundation + Written by Jean-Marc Valin */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "celt_lpc.h" +#include "stack_alloc.h" +#include "mathops.h" + +void _celt_lpc( + opus_val16 *_lpc, /* out: [0...p-1] LPC coefficients */ +const opus_val32 *ac, /* in: [0...p] autocorrelation values */ +int p +) +{ + int i, j; + opus_val32 r; + opus_val32 error = ac[0]; +#ifdef FIXED_POINT + opus_val32 lpc[LPC_ORDER]; +#else + float *lpc = _lpc; +#endif + + for (i = 0; i < p; i++) + lpc[i] = 0; + if (ac[0] != 0) + { + for (i = 0; i < p; i++) { + /* Sum up this iteration's reflection coefficient */ + opus_val32 rr = 0; + for (j = 0; j < i; j++) + rr += MULT32_32_Q31(lpc[j],ac[i - j]); + rr += SHR32(ac[i + 1],3); + r = -frac_div32(SHL32(rr,3), error); + /* Update LPC coefficients and total error */ + lpc[i] = SHR32(r,3); + for (j = 0; j < (i+1)>>1; j++) + { + opus_val32 tmp1, tmp2; + tmp1 = lpc[j]; + tmp2 = lpc[i-1-j]; + lpc[j] = tmp1 + MULT32_32_Q31(r,tmp2); + lpc[i-1-j] = tmp2 + MULT32_32_Q31(r,tmp1); + } + + error = error - MULT32_32_Q31(MULT32_32_Q31(r,r),error); + /* Bail out once we get 30 dB gain */ +#ifdef FIXED_POINT + if (error<SHR32(ac[0],10)) + break; +#else + if (error<.001f*ac[0]) + break; +#endif + } + } +#ifdef FIXED_POINT + for (i=0;i<p;i++) + _lpc[i] = ROUND16(lpc[i],16); +#endif +} + +void celt_fir(const opus_val16 *x, + const opus_val16 *num, + opus_val16 *y, + int N, + int ord, + opus_val16 *mem) +{ + int i,j; + + for (i=0;i<N;i++) + { + opus_val32 sum = SHL32(EXTEND32(x[i]), SIG_SHIFT); + for (j=0;j<ord;j++) + { + sum += MULT16_16(num[j],mem[j]); + } + for (j=ord-1;j>=1;j--) + { + mem[j]=mem[j-1]; + } + mem[0] = x[i]; + y[i] = ROUND16(sum, SIG_SHIFT); + } +} + +void celt_iir(const opus_val32 *x, + const opus_val16 *den, + opus_val32 *y, + int N, + int ord, + opus_val16 *mem) +{ + int i,j; + for (i=0;i<N;i++) + { + opus_val32 sum = x[i]; + for (j=0;j<ord;j++) + { + sum -= MULT16_16(den[j],mem[j]); + } + for (j=ord-1;j>=1;j--) + { + mem[j]=mem[j-1]; + } + mem[0] = ROUND16(sum,SIG_SHIFT); + y[i] = sum; + } +} + +void _celt_autocorr( + const opus_val16 *x, /* in: [0...n-1] samples x */ + opus_val32 *ac, /* out: [0...lag-1] ac values */ + const opus_val16 *window, + int overlap, + int lag, + int n + ) +{ + opus_val32 d; + int i; + VARDECL(opus_val16, xx); + SAVE_STACK; + ALLOC(xx, n, opus_val16); + celt_assert(n>0); + celt_assert(overlap>=0); + for (i=0;i<n;i++) + xx[i] = x[i]; + for (i=0;i<overlap;i++) + { + xx[i] = MULT16_16_Q15(x[i],window[i]); + xx[n-i-1] = MULT16_16_Q15(x[n-i-1],window[i]); + } +#ifdef FIXED_POINT + { + opus_val32 ac0=0; + int shift; + for(i=0;i<n;i++) + ac0 += SHR32(MULT16_16(xx[i],xx[i]),9); + ac0 += 1+n; + + shift = celt_ilog2(ac0)-30+10; + shift = (shift+1)/2; + for(i=0;i<n;i++) + xx[i] = VSHR32(xx[i], shift); + } +#endif + while (lag>=0) + { + for (i = lag, d = 0; i < n; i++) + d += xx[i] * xx[i-lag]; + ac[lag] = d; + /*printf ("%f ", ac[lag]);*/ + lag--; + } + /*printf ("\n");*/ + ac[0] += 10; + + RESTORE_STACK; +}
new file mode 100644 --- /dev/null +++ b/media/libopus/celt/celt_lpc.h @@ -0,0 +1,53 @@ +/* Copyright (c) 2009-2010 Xiph.Org Foundation + Written by Jean-Marc Valin */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef PLC_H +#define PLC_H + +#include "arch.h" + +#define LPC_ORDER 24 + +void _celt_lpc(opus_val16 *_lpc, const opus_val32 *ac, int p); + +void celt_fir(const opus_val16 *x, + const opus_val16 *num, + opus_val16 *y, + int N, + int ord, + opus_val16 *mem); + +void celt_iir(const opus_val32 *x, + const opus_val16 *den, + opus_val32 *y, + int N, + int ord, + opus_val16 *mem); + +void _celt_autocorr(const opus_val16 *x, opus_val32 *ac, const opus_val16 *window, int overlap, int lag, int n); + +#endif /* PLC_H */
new file mode 100644 --- /dev/null +++ b/media/libopus/celt/cwrs.c @@ -0,0 +1,644 @@ +/* Copyright (c) 2007-2008 CSIRO + Copyright (c) 2007-2009 Xiph.Org Foundation + Copyright (c) 2007-2009 Timothy B. Terriberry + Written by Timothy B. Terriberry and Jean-Marc Valin */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "os_support.h" +#include "cwrs.h" +#include "mathops.h" +#include "arch.h" + +#ifdef CUSTOM_MODES + +/*Guaranteed to return a conservatively large estimate of the binary logarithm + with frac bits of fractional precision. + Tested for all possible 32-bit inputs with frac=4, where the maximum + overestimation is 0.06254243 bits.*/ +int log2_frac(opus_uint32 val, int frac) +{ + int l; + l=EC_ILOG(val); + if(val&(val-1)){ + /*This is (val>>l-16), but guaranteed to round up, even if adding a bias + before the shift would cause overflow (e.g., for 0xFFFFxxxx).*/ + if(l>16)val=(val>>(l-16))+(((val&((1<<(l-16))-1))+(1<<(l-16))-1)>>(l-16)); + else val<<=16-l; + l=(l-1)<<frac; + /*Note that we always need one iteration, since the rounding up above means + that we might need to adjust the integer part of the logarithm.*/ + do{ + int b; + b=(int)(val>>16); + l+=b<<frac; + val=(val+b)>>b; + val=(val*val+0x7FFF)>>15; + } + while(frac-->0); + /*If val is not exactly 0x8000, then we have to round up the remainder.*/ + return l+(val>0x8000); + } + /*Exact powers of two require no rounding.*/ + else return (l-1)<<frac; +} +#endif + +#ifndef SMALL_FOOTPRINT + +#define MASK32 (0xFFFFFFFF) + +/*INV_TABLE[i] holds the multiplicative inverse of (2*i+1) mod 2**32.*/ +static const opus_uint32 INV_TABLE[53]={ + 0x00000001,0xAAAAAAAB,0xCCCCCCCD,0xB6DB6DB7, + 0x38E38E39,0xBA2E8BA3,0xC4EC4EC5,0xEEEEEEEF, + 0xF0F0F0F1,0x286BCA1B,0x3CF3CF3D,0xE9BD37A7, + 0xC28F5C29,0x684BDA13,0x4F72C235,0xBDEF7BDF, + 0x3E0F83E1,0x8AF8AF8B,0x914C1BAD,0x96F96F97, + 0xC18F9C19,0x2FA0BE83,0xA4FA4FA5,0x677D46CF, + 0x1A1F58D1,0xFAFAFAFB,0x8C13521D,0x586FB587, + 0xB823EE09,0xA08AD8F3,0xC10C9715,0xBEFBEFBF, + 0xC0FC0FC1,0x07A44C6B,0xA33F128D,0xE327A977, + 0xC7E3F1F9,0x962FC963,0x3F2B3885,0x613716AF, + 0x781948B1,0x2B2E43DB,0xFCFCFCFD,0x6FD0EB67, + 0xFA3F47E9,0xD2FD2FD3,0x3F4FD3F5,0xD4E25B9F, + 0x5F02A3A1,0xBF5A814B,0x7C32B16D,0xD3431B57, + 0xD8FD8FD9, +}; + +/*Computes (_a*_b-_c)/(2*_d+1) when the quotient is known to be exact. + _a, _b, _c, and _d may be arbitrary so long as the arbitrary precision result + fits in 32 bits, but currently the table for multiplicative inverses is only + valid for _d<=52.*/ +static inline opus_uint32 imusdiv32odd(opus_uint32 _a,opus_uint32 _b, + opus_uint32 _c,int _d){ + celt_assert(_d<=52); + return (_a*_b-_c)*INV_TABLE[_d]&MASK32; +} + +/*Computes (_a*_b-_c)/_d when the quotient is known to be exact. + _d does not actually have to be even, but imusdiv32odd will be faster when + it's odd, so you should use that instead. + _a and _d are assumed to be small (e.g., _a*_d fits in 32 bits; currently the + table for multiplicative inverses is only valid for _d<=54). + _b and _c may be arbitrary so long as the arbitrary precision reuslt fits in + 32 bits.*/ +static inline opus_uint32 imusdiv32even(opus_uint32 _a,opus_uint32 _b, + opus_uint32 _c,int _d){ + opus_uint32 inv; + int mask; + int shift; + int one; + celt_assert(_d>0); + celt_assert(_d<=54); + shift=EC_ILOG(_d^(_d-1)); + inv=INV_TABLE[(_d-1)>>shift]; + shift--; + one=1<<shift; + mask=one-1; + return (_a*(_b>>shift)-(_c>>shift)+ + ((_a*(_b&mask)+one-(_c&mask))>>shift)-1)*inv&MASK32; +} + +#endif /* SMALL_FOOTPRINT */ + +/*Although derived separately, the pulse vector coding scheme is equivalent to + a Pyramid Vector Quantizer \cite{Fis86}. + Some additional notes about an early version appear at + http://people.xiph.org/~tterribe/notes/cwrs.html, but the codebook ordering + and the definitions of some terms have evolved since that was written. + + The conversion from a pulse vector to an integer index (encoding) and back + (decoding) is governed by two related functions, V(N,K) and U(N,K). + + V(N,K) = the number of combinations, with replacement, of N items, taken K + at a time, when a sign bit is added to each item taken at least once (i.e., + the number of N-dimensional unit pulse vectors with K pulses). + One way to compute this is via + V(N,K) = K>0 ? sum(k=1...K,2**k*choose(N,k)*choose(K-1,k-1)) : 1, + where choose() is the binomial function. + A table of values for N<10 and K<10 looks like: + V[10][10] = { + {1, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {1, 2, 2, 2, 2, 2, 2, 2, 2, 2}, + {1, 4, 8, 12, 16, 20, 24, 28, 32, 36}, + {1, 6, 18, 38, 66, 102, 146, 198, 258, 326}, + {1, 8, 32, 88, 192, 360, 608, 952, 1408, 1992}, + {1, 10, 50, 170, 450, 1002, 1970, 3530, 5890, 9290}, + {1, 12, 72, 292, 912, 2364, 5336, 10836, 20256, 35436}, + {1, 14, 98, 462, 1666, 4942, 12642, 28814, 59906, 115598}, + {1, 16, 128, 688, 2816, 9424, 27008, 68464, 157184, 332688}, + {1, 18, 162, 978, 4482, 16722, 53154, 148626, 374274, 864146} + }; + + U(N,K) = the number of such combinations wherein N-1 objects are taken at + most K-1 at a time. + This is given by + U(N,K) = sum(k=0...K-1,V(N-1,k)) + = K>0 ? (V(N-1,K-1) + V(N,K-1))/2 : 0. + The latter expression also makes clear that U(N,K) is half the number of such + combinations wherein the first object is taken at least once. + Although it may not be clear from either of these definitions, U(N,K) is the + natural function to work with when enumerating the pulse vector codebooks, + not V(N,K). + U(N,K) is not well-defined for N=0, but with the extension + U(0,K) = K>0 ? 0 : 1, + the function becomes symmetric: U(N,K) = U(K,N), with a similar table: + U[10][10] = { + {1, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 1, 1, 1, 1, 1, 1, 1, 1, 1}, + {0, 1, 3, 5, 7, 9, 11, 13, 15, 17}, + {0, 1, 5, 13, 25, 41, 61, 85, 113, 145}, + {0, 1, 7, 25, 63, 129, 231, 377, 575, 833}, + {0, 1, 9, 41, 129, 321, 681, 1289, 2241, 3649}, + {0, 1, 11, 61, 231, 681, 1683, 3653, 7183, 13073}, + {0, 1, 13, 85, 377, 1289, 3653, 8989, 19825, 40081}, + {0, 1, 15, 113, 575, 2241, 7183, 19825, 48639, 108545}, + {0, 1, 17, 145, 833, 3649, 13073, 40081, 108545, 265729} + }; + + With this extension, V(N,K) may be written in terms of U(N,K): + V(N,K) = U(N,K) + U(N,K+1) + for all N>=0, K>=0. + Thus U(N,K+1) represents the number of combinations where the first element + is positive or zero, and U(N,K) represents the number of combinations where + it is negative. + With a large enough table of U(N,K) values, we could write O(N) encoding + and O(min(N*log(K),N+K)) decoding routines, but such a table would be + prohibitively large for small embedded devices (K may be as large as 32767 + for small N, and N may be as large as 200). + + Both functions obey the same recurrence relation: + V(N,K) = V(N-1,K) + V(N,K-1) + V(N-1,K-1), + U(N,K) = U(N-1,K) + U(N,K-1) + U(N-1,K-1), + for all N>0, K>0, with different initial conditions at N=0 or K=0. + This allows us to construct a row of one of the tables above given the + previous row or the next row. + Thus we can derive O(NK) encoding and decoding routines with O(K) memory + using only addition and subtraction. + + When encoding, we build up from the U(2,K) row and work our way forwards. + When decoding, we need to start at the U(N,K) row and work our way backwards, + which requires a means of computing U(N,K). + U(N,K) may be computed from two previous values with the same N: + U(N,K) = ((2*N-1)*U(N,K-1) - U(N,K-2))/(K-1) + U(N,K-2) + for all N>1, and since U(N,K) is symmetric, a similar relation holds for two + previous values with the same K: + U(N,K>1) = ((2*K-1)*U(N-1,K) - U(N-2,K))/(N-1) + U(N-2,K) + for all K>1. + This allows us to construct an arbitrary row of the U(N,K) table by starting + with the first two values, which are constants. + This saves roughly 2/3 the work in our O(NK) decoding routine, but costs O(K) + multiplications. + Similar relations can be derived for V(N,K), but are not used here. + + For N>0 and K>0, U(N,K) and V(N,K) take on the form of an (N-1)-degree + polynomial for fixed N. + The first few are + U(1,K) = 1, + U(2,K) = 2*K-1, + U(3,K) = (2*K-2)*K+1, + U(4,K) = (((4*K-6)*K+8)*K-3)/3, + U(5,K) = ((((2*K-4)*K+10)*K-8)*K+3)/3, + and + V(1,K) = 2, + V(2,K) = 4*K, + V(3,K) = 4*K*K+2, + V(4,K) = 8*(K*K+2)*K/3, + V(5,K) = ((4*K*K+20)*K*K+6)/3, + for all K>0. + This allows us to derive O(N) encoding and O(N*log(K)) decoding routines for + small N (and indeed decoding is also O(N) for N<3). + + @ARTICLE{Fis86, + author="Thomas R. Fischer", + title="A Pyramid Vector Quantizer", + journal="IEEE Transactions on Information Theory", + volume="IT-32", + number=4, + pages="568--583", + month=Jul, + year=1986 + }*/ + +#ifndef SMALL_FOOTPRINT +/*Compute U(2,_k). + Note that this may be called with _k=32768 (maxK[2]+1).*/ +static inline unsigned ucwrs2(unsigned _k){ + celt_assert(_k>0); + return _k+(_k-1); +} + +/*Compute V(2,_k).*/ +static inline opus_uint32 ncwrs2(int _k){ + celt_assert(_k>0); + return 4*(opus_uint32)_k; +} + +/*Compute U(3,_k). + Note that this may be called with _k=32768 (maxK[3]+1).*/ +static inline opus_uint32 ucwrs3(unsigned _k){ + celt_assert(_k>0); + return (2*(opus_uint32)_k-2)*_k+1; +} + +/*Compute V(3,_k).*/ +static inline opus_uint32 ncwrs3(int _k){ + celt_assert(_k>0); + return 2*(2*(unsigned)_k*(opus_uint32)_k+1); +} + +/*Compute U(4,_k).*/ +static inline opus_uint32 ucwrs4(int _k){ + celt_assert(_k>0); + return imusdiv32odd(2*_k,(2*_k-3)*(opus_uint32)_k+4,3,1); +} + +/*Compute V(4,_k).*/ +static inline opus_uint32 ncwrs4(int _k){ + celt_assert(_k>0); + return ((_k*(opus_uint32)_k+2)*_k)/3<<3; +} + +#endif /* SMALL_FOOTPRINT */ + +/*Computes the next row/column of any recurrence that obeys the relation + u[i][j]=u[i-1][j]+u[i][j-1]+u[i-1][j-1]. + _ui0 is the base case for the new row/column.*/ +static inline void unext(opus_uint32 *_ui,unsigned _len,opus_uint32 _ui0){ + opus_uint32 ui1; + unsigned j; + /*This do-while will overrun the array if we don't have storage for at least + 2 values.*/ + j=1; do { + ui1=UADD32(UADD32(_ui[j],_ui[j-1]),_ui0); + _ui[j-1]=_ui0; + _ui0=ui1; + } while (++j<_len); + _ui[j-1]=_ui0; +} + +/*Computes the previous row/column of any recurrence that obeys the relation + u[i-1][j]=u[i][j]-u[i][j-1]-u[i-1][j-1]. + _ui0 is the base case for the new row/column.*/ +static inline void uprev(opus_uint32 *_ui,unsigned _n,opus_uint32 _ui0){ + opus_uint32 ui1; + unsigned j; + /*This do-while will overrun the array if we don't have storage for at least + 2 values.*/ + j=1; do { + ui1=USUB32(USUB32(_ui[j],_ui[j-1]),_ui0); + _ui[j-1]=_ui0; + _ui0=ui1; + } while (++j<_n); + _ui[j-1]=_ui0; +} + +/*Compute V(_n,_k), as well as U(_n,0..._k+1). + _u: On exit, _u[i] contains U(_n,i) for i in [0..._k+1].*/ +static opus_uint32 ncwrs_urow(unsigned _n,unsigned _k,opus_uint32 *_u){ + opus_uint32 um2; + unsigned len; + unsigned k; + len=_k+2; + /*We require storage at least 3 values (e.g., _k>0).*/ + celt_assert(len>=3); + _u[0]=0; + _u[1]=um2=1; +#ifndef SMALL_FOOTPRINT + /*_k>52 doesn't work in the false branch due to the limits of INV_TABLE, + but _k isn't tested here because k<=52 for n=7*/ + if(_n<=6) +#endif + { + /*If _n==0, _u[0] should be 1 and the rest should be 0.*/ + /*If _n==1, _u[i] should be 1 for i>1.*/ + celt_assert(_n>=2); + /*If _k==0, the following do-while loop will overflow the buffer.*/ + celt_assert(_k>0); + k=2; + do _u[k]=(k<<1)-1; + while(++k<len); + for(k=2;k<_n;k++)unext(_u+1,_k+1,1); + } +#ifndef SMALL_FOOTPRINT + else{ + opus_uint32 um1; + opus_uint32 n2m1; + _u[2]=n2m1=um1=(_n<<1)-1; + for(k=3;k<len;k++){ + /*U(N,K) = ((2*N-1)*U(N,K-1)-U(N,K-2))/(K-1) + U(N,K-2)*/ + _u[k]=um2=imusdiv32even(n2m1,um1,um2,k-1)+um2; + if(++k>=len)break; + _u[k]=um1=imusdiv32odd(n2m1,um2,um1,(k-1)>>1)+um1; + } + } +#endif /* SMALL_FOOTPRINT */ + return _u[_k]+_u[_k+1]; +} + +#ifndef SMALL_FOOTPRINT + +/*Returns the _i'th combination of _k elements (at most 32767) chosen from a + set of size 1 with associated sign bits. + _y: Returns the vector of pulses.*/ +static inline void cwrsi1(int _k,opus_uint32 _i,int *_y){ + int s; + s=-(int)_i; + _y[0]=(_k+s)^s; +} + +/*Returns the _i'th combination of _k elements (at most 32767) chosen from a + set of size 2 with associated sign bits. + _y: Returns the vector of pulses.*/ +static inline void cwrsi2(int _k,opus_uint32 _i,int *_y){ + opus_uint32 p; + int s; + int yj; + p=ucwrs2(_k+1U); + s=-(_i>=p); + _i-=p&s; + yj=_k; + _k=(_i+1)>>1; + p=_k?ucwrs2(_k):0; + _i-=p; + yj-=_k; + _y[0]=(yj+s)^s; + cwrsi1(_k,_i,_y+1); +} + +/*Returns the _i'th combination of _k elements (at most 32767) chosen from a + set of size 3 with associated sign bits. + _y: Returns the vector of pulses.*/ +static void cwrsi3(int _k,opus_uint32 _i,int *_y){ + opus_uint32 p; + int s; + int yj; + p=ucwrs3(_k+1U); + s=-(_i>=p); + _i-=p&s; + yj=_k; + /*Finds the maximum _k such that ucwrs3(_k)<=_i (tested for all + _i<2147418113=U(3,32768)).*/ + _k=_i>0?(isqrt32(2*_i-1)+1)>>1:0; + p=_k?ucwrs3(_k):0; + _i-=p; + yj-=_k; + _y[0]=(yj+s)^s; + cwrsi2(_k,_i,_y+1); +} + +/*Returns the _i'th combination of _k elements (at most 1172) chosen from a set + of size 4 with associated sign bits. + _y: Returns the vector of pulses.*/ +static void cwrsi4(int _k,opus_uint32 _i,int *_y){ + opus_uint32 p; + int s; + int yj; + int kl; + int kr; + p=ucwrs4(_k+1); + s=-(_i>=p); + _i-=p&s; + yj=_k; + /*We could solve a cubic for k here, but the form of the direct solution does + not lend itself well to exact integer arithmetic. + Instead we do a binary search on U(4,K).*/ + kl=0; + kr=_k; + for(;;){ + _k=(kl+kr)>>1; + p=_k?ucwrs4(_k):0; + if(p<_i){ + if(_k>=kr)break; + kl=_k+1; + } + else if(p>_i)kr=_k-1; + else break; + } + _i-=p; + yj-=_k; + _y[0]=(yj+s)^s; + cwrsi3(_k,_i,_y+1); +} + +#endif /* SMALL_FOOTPRINT */ + +/*Returns the _i'th combination of _k elements chosen from a set of size _n + with associated sign bits. + _y: Returns the vector of pulses. + _u: Must contain entries [0..._k+1] of row _n of U() on input. + Its contents will be destructively modified.*/ +static void cwrsi(int _n,int _k,opus_uint32 _i,int *_y,opus_uint32 *_u){ + int j; + celt_assert(_n>0); + j=0; + do{ + opus_uint32 p; + int s; + int yj; + p=_u[_k+1]; + s=-(_i>=p); + _i-=p&s; + yj=_k; + p=_u[_k]; + while(p>_i)p=_u[--_k]; + _i-=p; + yj-=_k; + _y[j]=(yj+s)^s; + uprev(_u,_k+2,0); + } + while(++j<_n); +} + +/*Returns the index of the given combination of K elements chosen from a set + of size 1 with associated sign bits. + _y: The vector of pulses, whose sum of absolute values is K. + _k: Returns K.*/ +static inline opus_uint32 icwrs1(const int *_y,int *_k){ + *_k=abs(_y[0]); + return _y[0]<0; +} + +#ifndef SMALL_FOOTPRINT + +/*Returns the index of the given combination of K elements chosen from a set + of size 2 with associated sign bits. + _y: The vector of pulses, whose sum of absolute values is K. + _k: Returns K.*/ +static inline opus_uint32 icwrs2(const int *_y,int *_k){ + opus_uint32 i; + int k; + i=icwrs1(_y+1,&k); + i+=k?ucwrs2(k):0; + k+=abs(_y[0]); + if(_y[0]<0)i+=ucwrs2(k+1U); + *_k=k; + return i; +} + +/*Returns the index of the given combination of K elements chosen from a set + of size 3 with associated sign bits. + _y: The vector of pulses, whose sum of absolute values is K. + _k: Returns K.*/ +static inline opus_uint32 icwrs3(const int *_y,int *_k){ + opus_uint32 i; + int k; + i=icwrs2(_y+1,&k); + i+=k?ucwrs3(k):0; + k+=abs(_y[0]); + if(_y[0]<0)i+=ucwrs3(k+1U); + *_k=k; + return i; +} + +/*Returns the index of the given combination of K elements chosen from a set + of size 4 with associated sign bits. + _y: The vector of pulses, whose sum of absolute values is K. + _k: Returns K.*/ +static inline opus_uint32 icwrs4(const int *_y,int *_k){ + opus_uint32 i; + int k; + i=icwrs3(_y+1,&k); + i+=k?ucwrs4(k):0; + k+=abs(_y[0]); + if(_y[0]<0)i+=ucwrs4(k+1); + *_k=k; + return i; +} + +#endif /* SMALL_FOOTPRINT */ + +/*Returns the index of the given combination of K elements chosen from a set + of size _n with associated sign bits. + _y: The vector of pulses, whose sum of absolute values must be _k. + _nc: Returns V(_n,_k).*/ +static inline opus_uint32 icwrs(int _n,int _k,opus_uint32 *_nc,const int *_y, + opus_uint32 *_u){ + opus_uint32 i; + int j; + int k; + /*We can't unroll the first two iterations of the loop unless _n>=2.*/ + celt_assert(_n>=2); + _u[0]=0; + for(k=1;k<=_k+1;k++)_u[k]=(k<<1)-1; + i=icwrs1(_y+_n-1,&k); + j=_n-2; + i+=_u[k]; + k+=abs(_y[j]); + if(_y[j]<0)i+=_u[k+1]; + while(j-->0){ + unext(_u,_k+2,0); + i+=_u[k]; + k+=abs(_y[j]); + if(_y[j]<0)i+=_u[k+1]; + } + *_nc=_u[k]+_u[k+1]; + return i; +} + +#ifdef CUSTOM_MODES +void get_required_bits(opus_int16 *_bits,int _n,int _maxk,int _frac){ + int k; + /*_maxk==0 => there's nothing to do.*/ + celt_assert(_maxk>0); + _bits[0]=0; + if (_n==1) + { + for (k=1;k<=_maxk;k++) + _bits[k] = 1<<_frac; + } + else { + VARDECL(opus_uint32,u); + SAVE_STACK; + ALLOC(u,_maxk+2U,opus_uint32); + ncwrs_urow(_n,_maxk,u); + for(k=1;k<=_maxk;k++) + _bits[k]=log2_frac(u[k]+u[k+1],_frac); + RESTORE_STACK; + } +} +#endif /* CUSTOM_MODES */ + +void encode_pulses(const int *_y,int _n,int _k,ec_enc *_enc){ + opus_uint32 i; + celt_assert(_k>0); +#ifndef SMALL_FOOTPRINT + switch(_n){ + case 2:{ + i=icwrs2(_y,&_k); + ec_enc_uint(_enc,i,ncwrs2(_k)); + }break; + case 3:{ + i=icwrs3(_y,&_k); + ec_enc_uint(_enc,i,ncwrs3(_k)); + }break; + case 4:{ + i=icwrs4(_y,&_k); + ec_enc_uint(_enc,i,ncwrs4(_k)); + }break; + default: + { +#endif + VARDECL(opus_uint32,u); + opus_uint32 nc; + SAVE_STACK; + ALLOC(u,_k+2U,opus_uint32); + i=icwrs(_n,_k,&nc,_y,u); + ec_enc_uint(_enc,i,nc); + RESTORE_STACK; +#ifndef SMALL_FOOTPRINT + } + break; + } +#endif +} + +void decode_pulses(int *_y,int _n,int _k,ec_dec *_dec) +{ + celt_assert(_k>0); +#ifndef SMALL_FOOTPRINT + switch(_n){ + case 2:cwrsi2(_k,ec_dec_uint(_dec,ncwrs2(_k)),_y);break; + case 3:cwrsi3(_k,ec_dec_uint(_dec,ncwrs3(_k)),_y);break; + case 4:cwrsi4(_k,ec_dec_uint(_dec,ncwrs4(_k)),_y);break; + default: + { +#endif + VARDECL(opus_uint32,u); + SAVE_STACK; + ALLOC(u,_k+2U,opus_uint32); + cwrsi(_n,_k,ec_dec_uint(_dec,ncwrs_urow(_n,_k,u)),_y,u); + RESTORE_STACK; +#ifndef SMALL_FOOTPRINT + } + break; + } +#endif +}
new file mode 100644 --- /dev/null +++ b/media/libopus/celt/cwrs.h @@ -0,0 +1,48 @@ +/* Copyright (c) 2007-2008 CSIRO + Copyright (c) 2007-2009 Xiph.Org Foundation + Copyright (c) 2007-2009 Timothy B. Terriberry + Written by Timothy B. Terriberry and Jean-Marc Valin */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef CWRS_H +#define CWRS_H + +#include "arch.h" +#include "stack_alloc.h" +#include "entenc.h" +#include "entdec.h" + +#ifdef CUSTOM_MODES +int log2_frac(opus_uint32 val, int frac); +#endif + +void get_required_bits(opus_int16 *bits, int N, int K, int frac); + +void encode_pulses(const int *_y, int N, int K, ec_enc *enc); + +void decode_pulses(int *_y, int N, int K, ec_dec *dec); + +#endif /* CWRS_H */
new file mode 100644 --- /dev/null +++ b/media/libopus/celt/ecintrin.h @@ -0,0 +1,102 @@ +/* Copyright (c) 2003-2008 Timothy B. Terriberry + Copyright (c) 2008 Xiph.Org Foundation */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/*Some common macros for potential platform-specific optimization.*/ +#include "opus_types.h" +#include <math.h> +#include <limits.h> +#include "arch.h" +#if !defined(_ecintrin_H) +# define _ecintrin_H (1) + +/*Some specific platforms may have optimized intrinsic or inline assembly + versions of these functions which can substantially improve performance. + We define macros for them to allow easy incorporation of these non-ANSI + features.*/ + +/*Note that we do not provide a macro for abs(), because it is provided as a + library function, which we assume is translated into an intrinsic to avoid + the function call overhead and then implemented in the smartest way for the + target platform. + With modern gcc (4.x), this is true: it uses cmov instructions if the + architecture supports it and branchless bit-twiddling if it does not (the + speed difference between the two approaches is not measurable). + Interestingly, the bit-twiddling method was patented in 2000 (US 6,073,150) + by Sun Microsystems, despite prior art dating back to at least 1996: + http://web.archive.org/web/19961201174141/www.x86.org/ftp/articles/pentopt/PENTOPT.TXT + On gcc 3.x, however, our assumption is not true, as abs() is translated to a + conditional jump, which is horrible on deeply piplined architectures (e.g., + all consumer architectures for the past decade or more) when the sign cannot + be reliably predicted.*/ + +/*Modern gcc (4.x) can compile the naive versions of min and max with cmov if + given an appropriate architecture, but the branchless bit-twiddling versions + are just as fast, and do not require any special target architecture. + Earlier gcc versions (3.x) compiled both code to the same assembly + instructions, because of the way they represented ((_b)>(_a)) internally.*/ +# define EC_MINI(_a,_b) ((_a)+(((_b)-(_a))&-((_b)<(_a)))) + +/*Count leading zeros. + This macro should only be used for implementing ec_ilog(), if it is defined. + All other code should use EC_ILOG() instead.*/ +#if defined(_MSC_VER) +# include <intrin.h> +/*In _DEBUG mode this is not an intrinsic by default.*/ +# pragma intrinsic(_BitScanReverse) + +static __inline int ec_bsr(unsigned long _x){ + unsigned long ret; + _BitScanReverse(&ret,_x); + return (int)ret; +} +# define EC_CLZ0 (1) +# define EC_CLZ(_x) (-ec_bsr(_x)) +#elif defined(ENABLE_TI_DSPLIB) +# include "dsplib.h" +# define EC_CLZ0 (31) +# define EC_CLZ(_x) (_lnorm(_x)) +#elif __GNUC_PREREQ(3,4) +# if INT_MAX>=2147483647 +# define EC_CLZ0 ((int)sizeof(unsigned)*CHAR_BIT) +# define EC_CLZ(_x) (__builtin_clz(_x)) +# elif LONG_MAX>=2147483647L +# define EC_CLZ0 ((int)sizeof(unsigned long)*CHAR_BIT) +# define EC_CLZ(_x) (__builtin_clzl(_x)) +# endif +#endif + +#if defined(EC_CLZ) +/*Note that __builtin_clz is not defined when _x==0, according to the gcc + documentation (and that of the BSR instruction that implements it on x86). + The majority of the time we can never pass it zero. + When we need to, it can be special cased.*/ +# define EC_ILOG(_x) (EC_CLZ0-EC_CLZ(_x)) +#else +int ec_ilog(opus_uint32 _v); +# define EC_ILOG(_x) (ec_ilog(_x)) +#endif +#endif
new file mode 100644 --- /dev/null +++ b/media/libopus/celt/entcode.c @@ -0,0 +1,88 @@ +/* Copyright (c) 2001-2011 Timothy B. Terriberry +*/ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "entcode.h" +#include "arch.h" + +#if !defined(EC_CLZ) +int ec_ilog(opus_uint32 _v){ + /*On a Pentium M, this branchless version tested as the fastest on + 1,000,000,000 random 32-bit integers, edging out a similar version with + branches, and a 256-entry LUT version.*/ + int ret; + int m; + ret=!!_v; + m=!!(_v&0xFFFF0000)<<4; + _v>>=m; + ret|=m; + m=!!(_v&0xFF00)<<3; + _v>>=m; + ret|=m; + m=!!(_v&0xF0)<<2; + _v>>=m; + ret|=m; + m=!!(_v&0xC)<<1; + _v>>=m; + ret|=m; + ret+=!!(_v&0x2); + return ret; +} +#endif + +opus_uint32 ec_tell_frac(ec_ctx *_this){ + opus_uint32 nbits; + opus_uint32 r; + int l; + int i; + /*To handle the non-integral number of bits still left in the encoder/decoder + state, we compute the worst-case number of bits of val that must be + encoded to ensure that the value is inside the range for any possible + subsequent bits. + The computation here is independent of val itself (the decoder does not + even track that value), even though the real number of bits used after + ec_enc_done() may be 1 smaller if rng is a power of two and the + corresponding trailing bits of val are all zeros. + If we did try to track that special case, then coding a value with a + probability of 1/(1<<n) might sometimes appear to use more than n bits. + This may help explain the surprising result that a newly initialized + encoder or decoder claims to have used 1 bit.*/ + nbits=_this->nbits_total<<BITRES; + l=EC_ILOG(_this->rng); + r=_this->rng>>(l-16); + for(i=BITRES;i-->0;){ + int b; + r=r*r>>15; + b=(int)(r>>16); + l=l<<1|b; + r>>=b; + } + return nbits-l; +}
new file mode 100644 --- /dev/null +++ b/media/libopus/celt/entcode.h @@ -0,0 +1,116 @@ +/* Copyright (c) 2001-2011 Timothy B. Terriberry + Copyright (c) 2008-2009 Xiph.Org Foundation */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include "opus_types.h" + +#if !defined(_entcode_H) +# define _entcode_H (1) +# include <limits.h> +# include <stddef.h> +# include "ecintrin.h" + +/*OPT: ec_window must be at least 32 bits, but if you have fast arithmetic on a + larger type, you can speed up the decoder by using it here.*/ +typedef opus_uint32 ec_window; +typedef struct ec_ctx ec_ctx; +typedef struct ec_ctx ec_enc; +typedef struct ec_ctx ec_dec; + +# define EC_WINDOW_SIZE ((int)sizeof(ec_window)*CHAR_BIT) + +/*The number of bits to use for the range-coded part of unsigned integers.*/ +# define EC_UINT_BITS (8) + +/*The resolution of fractional-precision bit usage measurements, i.e., + 3 => 1/8th bits.*/ +# define BITRES 3 + +/*The entropy encoder/decoder context. + We use the same structure for both, so that common functions like ec_tell() + can be used on either one.*/ +struct ec_ctx{ + /*Buffered input/output.*/ + unsigned char *buf; + /*The size of the buffer.*/ + opus_uint32 storage; + /*The offset at which the last byte containing raw bits was read/written.*/ + opus_uint32 end_offs; + /*Bits that will be read from/written at the end.*/ + ec_window end_window; + /*Number of valid bits in end_window.*/ + int nend_bits; + /*The total number of whole bits read/written. + This does not include partial bits currently in the range coder.*/ + int nbits_total; + /*The offset at which the next range coder byte will be read/written.*/ + opus_uint32 offs; + /*The number of values in the current range.*/ + opus_uint32 rng; + /*In the decoder: the difference between the top of the current range and + the input value, minus one. + In the encoder: the low end of the current range.*/ + opus_uint32 val; + /*In the decoder: the saved normalization factor from ec_decode(). + In the encoder: the number of oustanding carry propagating symbols.*/ + opus_uint32 ext; + /*A buffered input/output symbol, awaiting carry propagation.*/ + int rem; + /*Nonzero if an error occurred.*/ + int error; +}; + +static inline opus_uint32 ec_range_bytes(ec_ctx *_this){ + return _this->offs; +} + +static inline unsigned char *ec_get_buffer(ec_ctx *_this){ + return _this->buf; +} + +static inline int ec_get_error(ec_ctx *_this){ + return _this->error; +} + +/*Returns the number of bits "used" by the encoded or decoded symbols so far. + This same number can be computed in either the encoder or the decoder, and is + suitable for making coding decisions. + Return: The number of bits. + This will always be slightly larger than the exact value (e.g., all + rounding error is in the positive direction).*/ +static inline int ec_tell(ec_ctx *_this){ + return _this->nbits_total-EC_ILOG(_this->rng); +} + +/*Returns the number of bits "used" by the encoded or decoded symbols so far. + This same number can be computed in either the encoder or the decoder, and is + suitable for making coding decisions. + Return: The number of bits scaled by 2**BITRES. + This will always be slightly larger than the exact value (e.g., all + rounding error is in the positive direction).*/ +opus_uint32 ec_tell_frac(ec_ctx *_this); + +#endif
new file mode 100644 --- /dev/null +++ b/media/libopus/celt/entdec.c @@ -0,0 +1,249 @@ +/* Copyright (c) 2001-2011 Timothy B. Terriberry + Copyright (c) 2008-2009 Xiph.Org Foundation */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <stddef.h> +#include "os_support.h" +#include "arch.h" +#include "entdec.h" +#include "mfrngcod.h" + +/*A range decoder. + This is an entropy decoder based upon \cite{Mar79}, which is itself a + rediscovery of the FIFO arithmetic code introduced by \cite{Pas76}. + It is very similar to arithmetic encoding, except that encoding is done with + digits in any base, instead of with bits, and so it is faster when using + larger bases (i.e.: a byte). + The author claims an average waste of $\frac{1}{2}\log_b(2b)$ bits, where $b$ + is the base, longer than the theoretical optimum, but to my knowledge there + is no published justification for this claim. + This only seems true when using near-infinite precision arithmetic so that + the process is carried out with no rounding errors. + + IBM (the author's employer) never sought to patent the idea, and to my + knowledge the algorithm is unencumbered by any patents, though its + performance is very competitive with proprietary arithmetic coding. + The two are based on very similar ideas, however. + An excellent description of implementation details is available at + http://www.arturocampos.com/ac_range.html + A recent work \cite{MNW98} which proposes several changes to arithmetic + encoding for efficiency actually re-discovers many of the principles + behind range encoding, and presents a good theoretical analysis of them. + + End of stream is handled by writing out the smallest number of bits that + ensures that the stream will be correctly decoded regardless of the value of + any subsequent bits. + ec_tell() can be used to determine how many bits were needed to decode + all the symbols thus far; other data can be packed in the remaining bits of + the input buffer. + @PHDTHESIS{Pas76, + author="Richard Clark Pasco", + title="Source coding algorithms for fast data compression", + school="Dept. of Electrical Engineering, Stanford University", + address="Stanford, CA", + month=May, + year=1976 + } + @INPROCEEDINGS{Mar79, + author="Martin, G.N.N.", + title="Range encoding: an algorithm for removing redundancy from a digitised + message", + booktitle="Video & Data Recording Conference", + year=1979, + address="Southampton", + month=Jul + } + @ARTICLE{MNW98, + author="Alistair Moffat and Radford Neal and Ian H. Witten", + title="Arithmetic Coding Revisited", + journal="{ACM} Transactions on Information Systems", + year=1998, + volume=16, + number=3, + pages="256--294", + month=Jul, + URL="http://www.stanford.edu/class/ee398/handouts/papers/Moffat98ArithmCoding.pdf" + }*/ + +static int ec_read_byte(ec_dec *_this){ + return _this->offs<_this->storage?_this->buf[_this->offs++]:0; +} + +static int ec_read_byte_from_end(ec_dec *_this){ + return _this->end_offs<_this->storage? + _this->buf[_this->storage-++(_this->end_offs)]:0; +} + +/*Normalizes the contents of val and rng so that rng lies entirely in the + high-order symbol.*/ +static void ec_dec_normalize(ec_dec *_this){ + /*If the range is too small, rescale it and input some bits.*/ + while(_this->rng<=EC_CODE_BOT){ + int sym; + _this->nbits_total+=EC_SYM_BITS; + _this->rng<<=EC_SYM_BITS; + /*Use up the remaining bits from our last symbol.*/ + sym=_this->rem; + /*Read the next value from the input.*/ + _this->rem=ec_read_byte(_this); + /*Take the rest of the bits we need from this new symbol.*/ + sym=(sym<<EC_SYM_BITS|_this->rem)>>(EC_SYM_BITS-EC_CODE_EXTRA); + /*And subtract them from val, capped to be less than EC_CODE_TOP.*/ + _this->val=((_this->val<<EC_SYM_BITS)+(EC_SYM_MAX&~sym))&(EC_CODE_TOP-1); + } +} + +void ec_dec_init(ec_dec *_this,unsigned char *_buf,opus_uint32 _storage){ + _this->buf=_buf; + _this->storage=_storage; + _this->end_offs=0; + _this->end_window=0; + _this->nend_bits=0; + /*This is the offset from which ec_tell() will subtract partial bits. + The final value after the ec_dec_normalize() call will be the same as in + the encoder, but we have to compensate for the bits that are added there.*/ + _this->nbits_total=EC_CODE_BITS+1 + -((EC_CODE_BITS-EC_CODE_EXTRA)/EC_SYM_BITS)*EC_SYM_BITS; + _this->offs=0; + _this->rng=1U<<EC_CODE_EXTRA; + _this->rem=ec_read_byte(_this); + _this->val=_this->rng-1-(_this->rem>>(EC_SYM_BITS-EC_CODE_EXTRA)); + _this->error=0; + /*Normalize the interval.*/ + ec_dec_normalize(_this); +} + +unsigned ec_decode(ec_dec *_this,unsigned _ft){ + unsigned s; + _this->ext=_this->rng/_ft; + s=(unsigned)(_this->val/_this->ext); + return _ft-EC_MINI(s+1,_ft); +} + +unsigned ec_decode_bin(ec_dec *_this,unsigned _bits){ + unsigned s; + _this->ext=_this->rng>>_bits; + s=(unsigned)(_this->val/_this->ext); + return (1U<<_bits)-EC_MINI(s+1U,1U<<_bits); +} + +void ec_dec_update(ec_dec *_this,unsigned _fl,unsigned _fh,unsigned _ft){ + opus_uint32 s; + s=IMUL32(_this->ext,_ft-_fh); + _this->val-=s; + _this->rng=_fl>0?IMUL32(_this->ext,_fh-_fl):_this->rng-s; + ec_dec_normalize(_this); +} + +/*The probability of having a "one" is 1/(1<<_logp).*/ +int ec_dec_bit_logp(ec_dec *_this,unsigned _logp){ + opus_uint32 r; + opus_uint32 d; + opus_uint32 s; + int ret; + r=_this->rng; + d=_this->val; + s=r>>_logp; + ret=d<s; + if(!ret)_this->val=d-s; + _this->rng=ret?s:r-s; + ec_dec_normalize(_this); + return ret; +} + +int ec_dec_icdf(ec_dec *_this,const unsigned char *_icdf,unsigned _ftb){ + opus_uint32 r; + opus_uint32 d; + opus_uint32 s; + opus_uint32 t; + int ret; + s=_this->rng; + d=_this->val; + r=s>>_ftb; + ret=-1; + do{ + t=s; + s=IMUL32(r,_icdf[++ret]); + } + while(d<s); + _this->val=d-s; + _this->rng=t-s; + ec_dec_normalize(_this); + return ret; +} + +opus_uint32 ec_dec_uint(ec_dec *_this,opus_uint32 _ft){ + unsigned ft; + unsigned s; + int ftb; + /*In order to optimize EC_ILOG(), it is undefined for the value 0.*/ + celt_assert(_ft>1); + _ft--; + ftb=EC_ILOG(_ft); + if(ftb>EC_UINT_BITS){ + opus_uint32 t; + ftb-=EC_UINT_BITS; + ft=(unsigned)(_ft>>ftb)+1; + s=ec_decode(_this,ft); + ec_dec_update(_this,s,s+1,ft); + t=(opus_uint32)s<<ftb|ec_dec_bits(_this,ftb); + if(t<=_ft)return t; + _this->error=1; + return _ft; + } + else{ + _ft++; + s=ec_decode(_this,(unsigned)_ft); + ec_dec_update(_this,s,s+1,(unsigned)_ft); + return s; + } +} + +opus_uint32 ec_dec_bits(ec_dec *_this,unsigned _bits){ + ec_window window; + int available; + opus_uint32 ret; + window=_this->end_window; + available=_this->nend_bits; + if((unsigned)available<_bits){ + do{ + window|=(ec_window)ec_read_byte_from_end(_this)<<available; + available+=EC_SYM_BITS; + } + while(available<=EC_WINDOW_SIZE-EC_SYM_BITS); + } + ret=(opus_uint32)window&(((opus_uint32)1<<_bits)-1U); + window>>=_bits; + available-=_bits; + _this->end_window=window; + _this->nend_bits=available; + _this->nbits_total+=_bits; + return ret; +}
new file mode 100644 --- /dev/null +++ b/media/libopus/celt/entdec.h @@ -0,0 +1,100 @@ +/* Copyright (c) 2001-2011 Timothy B. Terriberry + Copyright (c) 2008-2009 Xiph.Org Foundation */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#if !defined(_entdec_H) +# define _entdec_H (1) +# include <limits.h> +# include "entcode.h" + +/*Initializes the decoder. + _buf: The input buffer to use. + Return: 0 on success, or a negative value on error.*/ +void ec_dec_init(ec_dec *_this,unsigned char *_buf,opus_uint32 _storage); + +/*Calculates the cumulative frequency for the next symbol. + This can then be fed into the probability model to determine what that + symbol is, and the additional frequency information required to advance to + the next symbol. + This function cannot be called more than once without a corresponding call to + ec_dec_update(), or decoding will not proceed correctly. + _ft: The total frequency of the symbols in the alphabet the next symbol was + encoded with. + Return: A cumulative frequency representing the encoded symbol. + If the cumulative frequency of all the symbols before the one that + was encoded was fl, and the cumulative frequency of all the symbols + up to and including the one encoded is fh, then the returned value + will fall in the range [fl,fh).*/ +unsigned ec_decode(ec_dec *_this,unsigned _ft); + +/*Equivalent to ec_decode() with _ft==1<<_bits.*/ +unsigned ec_decode_bin(ec_dec *_this,unsigned _bits); + +/*Advance the decoder past the next symbol using the frequency information the + symbol was encoded with. + Exactly one call to ec_decode() must have been made so that all necessary + intermediate calculations are performed. + _fl: The cumulative frequency of all symbols that come before the symbol + decoded. + _fh: The cumulative frequency of all symbols up to and including the symbol + decoded. + Together with _fl, this defines the range [_fl,_fh) in which the value + returned above must fall. + _ft: The total frequency of the symbols in the alphabet the symbol decoded + was encoded in. + This must be the same as passed to the preceding call to ec_decode().*/ +void ec_dec_update(ec_dec *_this,unsigned _fl,unsigned _fh,unsigned _ft); + +/* Decode a bit that has a 1/(1<<_logp) probability of being a one */ +int ec_dec_bit_logp(ec_dec *_this,unsigned _logp); + +/*Decodes a symbol given an "inverse" CDF table. + No call to ec_dec_update() is necessary after this call. + _icdf: The "inverse" CDF, such that symbol s falls in the range + [s>0?ft-_icdf[s-1]:0,ft-_icdf[s]), where ft=1<<_ftb. + The values must be monotonically non-increasing, and the last value + must be 0. + _ftb: The number of bits of precision in the cumulative distribution. + Return: The decoded symbol s.*/ +int ec_dec_icdf(ec_dec *_this,const unsigned char *_icdf,unsigned _ftb); + +/*Extracts a raw unsigned integer with a non-power-of-2 range from the stream. + The bits must have been encoded with ec_enc_uint(). + No call to ec_dec_update() is necessary after this call. + _ft: The number of integers that can be decoded (one more than the max). + This must be at least one, and no more than 2**32-1. + Return: The decoded bits.*/ +opus_uint32 ec_dec_uint(ec_dec *_this,opus_uint32 _ft); + +/*Extracts a sequence of raw bits from the stream. + The bits must have been encoded with ec_enc_bits(). + No call to ec_dec_update() is necessary after this call. + _ftb: The number of bits to extract. + This must be between 0 and 25, inclusive. + Return: The decoded bits.*/ +opus_uint32 ec_dec_bits(ec_dec *_this,unsigned _ftb); + +#endif
new file mode 100644 --- /dev/null +++ b/media/libopus/celt/entenc.c @@ -0,0 +1,294 @@ +/* Copyright (c) 2001-2011 Timothy B. Terriberry + Copyright (c) 2008-2009 Xiph.Org Foundation */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#if defined(HAVE_CONFIG_H) +# include "config.h" +#endif +#include "os_support.h" +#include "arch.h" +#include "entenc.h" +#include "mfrngcod.h" + +/*A range encoder. + See entdec.c and the references for implementation details \cite{Mar79,MNW98}. + + @INPROCEEDINGS{Mar79, + author="Martin, G.N.N.", + title="Range encoding: an algorithm for removing redundancy from a digitised + message", + booktitle="Video \& Data Recording Conference", + year=1979, + address="Southampton", + month=Jul + } + @ARTICLE{MNW98, + author="Alistair Moffat and Radford Neal and Ian H. Witten", + title="Arithmetic Coding Revisited", + journal="{ACM} Transactions on Information Systems", + year=1998, + volume=16, + number=3, + pages="256--294", + month=Jul, + URL="http://www.stanford.edu/class/ee398/handouts/papers/Moffat98ArithmCoding.pdf" + }*/ + +static int ec_write_byte(ec_enc *_this,unsigned _value){ + if(_this->offs+_this->end_offs>=_this->storage)return -1; + _this->buf[_this->offs++]=(unsigned char)_value; + return 0; +} + +static int ec_write_byte_at_end(ec_enc *_this,unsigned _value){ + if(_this->offs+_this->end_offs>=_this->storage)return -1; + _this->buf[_this->storage-++(_this->end_offs)]=(unsigned char)_value; + return 0; +} + +/*Outputs a symbol, with a carry bit. + If there is a potential to propagate a carry over several symbols, they are + buffered until it can be determined whether or not an actual carry will + occur. + If the counter for the buffered symbols overflows, then the stream becomes + undecodable. + This gives a theoretical limit of a few billion symbols in a single packet on + 32-bit systems. + The alternative is to truncate the range in order to force a carry, but + requires similar carry tracking in the decoder, needlessly slowing it down.*/ +static void ec_enc_carry_out(ec_enc *_this,int _c){ + if(_c!=EC_SYM_MAX){ + /*No further carry propagation possible, flush buffer.*/ + int carry; + carry=_c>>EC_SYM_BITS; + /*Don't output a byte on the first write. + This compare should be taken care of by branch-prediction thereafter.*/ + if(_this->rem>=0)_this->error|=ec_write_byte(_this,_this->rem+carry); + if(_this->ext>0){ + unsigned sym; + sym=(EC_SYM_MAX+carry)&EC_SYM_MAX; + do _this->error|=ec_write_byte(_this,sym); + while(--(_this->ext)>0); + } + _this->rem=_c&EC_SYM_MAX; + } + else _this->ext++; +} + +static void ec_enc_normalize(ec_enc *_this){ + /*If the range is too small, output some bits and rescale it.*/ + while(_this->rng<=EC_CODE_BOT){ + ec_enc_carry_out(_this,(int)(_this->val>>EC_CODE_SHIFT)); + /*Move the next-to-high-order symbol into the high-order position.*/ + _this->val=(_this->val<<EC_SYM_BITS)&(EC_CODE_TOP-1); + _this->rng<<=EC_SYM_BITS; + _this->nbits_total+=EC_SYM_BITS; + } +} + +void ec_enc_init(ec_enc *_this,unsigned char *_buf,opus_uint32 _size){ + _this->buf=_buf; + _this->end_offs=0; + _this->end_window=0; + _this->nend_bits=0; + /*This is the offset from which ec_tell() will subtract partial bits.*/ + _this->nbits_total=EC_CODE_BITS+1; + _this->offs=0; + _this->rng=EC_CODE_TOP; + _this->rem=-1; + _this->val=0; + _this->ext=0; + _this->storage=_size; + _this->error=0; +} + +void ec_encode(ec_enc *_this,unsigned _fl,unsigned _fh,unsigned _ft){ + opus_uint32 r; + r=_this->rng/_ft; + if(_fl>0){ + _this->val+=_this->rng-IMUL32(r,(_ft-_fl)); + _this->rng=IMUL32(r,(_fh-_fl)); + } + else _this->rng-=IMUL32(r,(_ft-_fh)); + ec_enc_normalize(_this); +} + +void ec_encode_bin(ec_enc *_this,unsigned _fl,unsigned _fh,unsigned _bits){ + opus_uint32 r; + r=_this->rng>>_bits; + if(_fl>0){ + _this->val+=_this->rng-IMUL32(r,((1U<<_bits)-_fl)); + _this->rng=IMUL32(r,(_fh-_fl)); + } + else _this->rng-=IMUL32(r,((1U<<_bits)-_fh)); + ec_enc_normalize(_this); +} + +/*The probability of having a "one" is 1/(1<<_logp).*/ +void ec_enc_bit_logp(ec_enc *_this,int _val,unsigned _logp){ + opus_uint32 r; + opus_uint32 s; + opus_uint32 l; + r=_this->rng; + l=_this->val; + s=r>>_logp; + r-=s; + if(_val)_this->val=l+r; + _this->rng=_val?s:r; + ec_enc_normalize(_this); +} + +void ec_enc_icdf(ec_enc *_this,int _s,const unsigned char *_icdf,unsigned _ftb){ + opus_uint32 r; + r=_this->rng>>_ftb; + if(_s>0){ + _this->val+=_this->rng-IMUL32(r,_icdf[_s-1]); + _this->rng=IMUL32(r,_icdf[_s-1]-_icdf[_s]); + } + else _this->rng-=IMUL32(r,_icdf[_s]); + ec_enc_normalize(_this); +} + +void ec_enc_uint(ec_enc *_this,opus_uint32 _fl,opus_uint32 _ft){ + unsigned ft; + unsigned fl; + int ftb; + /*In order to optimize EC_ILOG(), it is undefined for the value 0.*/ + celt_assert(_ft>1); + _ft--; + ftb=EC_ILOG(_ft); + if(ftb>EC_UINT_BITS){ + ftb-=EC_UINT_BITS; + ft=(_ft>>ftb)+1; + fl=(unsigned)(_fl>>ftb); + ec_encode(_this,fl,fl+1,ft); + ec_enc_bits(_this,_fl&(((opus_uint32)1<<ftb)-1U),ftb); + } + else ec_encode(_this,_fl,_fl+1,_ft+1); +} + +void ec_enc_bits(ec_enc *_this,opus_uint32 _fl,unsigned _bits){ + ec_window window; + int used; + window=_this->end_window; + used=_this->nend_bits; + celt_assert(_bits>0); + if(used+_bits>EC_WINDOW_SIZE){ + do{ + _this->error|=ec_write_byte_at_end(_this,(unsigned)window&EC_SYM_MAX); + window>>=EC_SYM_BITS; + used-=EC_SYM_BITS; + } + while(used>=EC_SYM_BITS); + } + window|=(ec_window)_fl<<used; + used+=_bits; + _this->end_window=window; + _this->nend_bits=used; + _this->nbits_total+=_bits; +} + +void ec_enc_patch_initial_bits(ec_enc *_this,unsigned _val,unsigned _nbits){ + int shift; + unsigned mask; + celt_assert(_nbits<=EC_SYM_BITS); + shift=EC_SYM_BITS-_nbits; + mask=((1<<_nbits)-1)<<shift; + if(_this->offs>0){ + /*The first byte has been finalized.*/ + _this->buf[0]=(unsigned char)((_this->buf[0]&~mask)|_val<<shift); + } + else if(_this->rem>=0){ + /*The first byte is still awaiting carry propagation.*/ + _this->rem=(_this->rem&~mask)|_val<<shift; + } + else if(_this->rng<=(EC_CODE_TOP>>_nbits)){ + /*The renormalization loop has never been run.*/ + _this->val=(_this->val&~((opus_uint32)mask<<EC_CODE_SHIFT))| + (opus_uint32)_val<<(EC_CODE_SHIFT+shift); + } + /*The encoder hasn't even encoded _nbits of data yet.*/ + else _this->error=-1; +} + +void ec_enc_shrink(ec_enc *_this,opus_uint32 _size){ + celt_assert(_this->offs+_this->end_offs<=_size); + OPUS_MOVE(_this->buf+_size-_this->end_offs, + _this->buf+_this->storage-_this->end_offs,_this->end_offs); + _this->storage=_size; +} + +void ec_enc_done(ec_enc *_this){ + ec_window window; + int used; + opus_uint32 msk; + opus_uint32 end; + int l; + /*We output the minimum number of bits that ensures that the symbols encoded + thus far will be decoded correctly regardless of the bits that follow.*/ + l=EC_CODE_BITS-EC_ILOG(_this->rng); + msk=(EC_CODE_TOP-1)>>l; + end=(_this->val+msk)&~msk; + if((end|msk)>=_this->val+_this->rng){ + l++; + msk>>=1; + end=(_this->val+msk)&~msk; + } + while(l>0){ + ec_enc_carry_out(_this,(int)(end>>EC_CODE_SHIFT)); + end=(end<<EC_SYM_BITS)&(EC_CODE_TOP-1); + l-=EC_SYM_BITS; + } + /*If we have a buffered byte flush it into the output buffer.*/ + if(_this->rem>=0||_this->ext>0)ec_enc_carry_out(_this,0); + /*If we have buffered extra bits, flush them as well.*/ + window=_this->end_window; + used=_this->nend_bits; + while(used>=EC_SYM_BITS){ + _this->error|=ec_write_byte_at_end(_this,(unsigned)window&EC_SYM_MAX); + window>>=EC_SYM_BITS; + used-=EC_SYM_BITS; + } + /*Clear any excess space and add any remaining extra bits to the last byte.*/ + if(!_this->error){ + OPUS_CLEAR(_this->buf+_this->offs, + _this->storage-_this->offs-_this->end_offs); + if(used>0){ + /*If there's no range coder data at all, give up.*/ + if(_this->end_offs>=_this->storage)_this->error=-1; + else{ + l=-l; + /*If we've busted, don't add too many extra bits to the last byte; it + would corrupt the range coder data, and that's more important.*/ + if(_this->offs+_this->end_offs>=_this->storage&&l<used){ + window&=(1<<l)-1; + _this->error=-1; + } + _this->buf[_this->storage-_this->end_offs-1]|=(unsigned char)window; + } + } + } +}
new file mode 100644 --- /dev/null +++ b/media/libopus/celt/entenc.h @@ -0,0 +1,110 @@ +/* Copyright (c) 2001-2011 Timothy B. Terriberry + Copyright (c) 2008-2009 Xiph.Org Foundation */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#if !defined(_entenc_H) +# define _entenc_H (1) +# include <stddef.h> +# include "entcode.h" + +/*Initializes the encoder. + _buf: The buffer to store output bytes in. + _size: The size of the buffer, in chars.*/ +void ec_enc_init(ec_enc *_this,unsigned char *_buf,opus_uint32 _size); +/*Encodes a symbol given its frequency information. + The frequency information must be discernable by the decoder, assuming it + has read only the previous symbols from the stream. + It is allowable to change the frequency information, or even the entire + source alphabet, so long as the decoder can tell from the context of the + previously encoded information that it is supposed to do so as well. + _fl: The cumulative frequency of all symbols that come before the one to be + encoded. + _fh: The cumulative frequency of all symbols up to and including the one to + be encoded. + Together with _fl, this defines the range [_fl,_fh) in which the + decoded value will fall. + _ft: The sum of the frequencies of all the symbols*/ +void ec_encode(ec_enc *_this,unsigned _fl,unsigned _fh,unsigned _ft); + +/*Equivalent to ec_encode() with _ft==1<<_bits.*/ +void ec_encode_bin(ec_enc *_this,unsigned _fl,unsigned _fh,unsigned _bits); + +/* Encode a bit that has a 1/(1<<_logp) probability of being a one */ +void ec_enc_bit_logp(ec_enc *_this,int _val,unsigned _logp); + +/*Encodes a symbol given an "inverse" CDF table. + _s: The index of the symbol to encode. + _icdf: The "inverse" CDF, such that symbol _s falls in the range + [_s>0?ft-_icdf[_s-1]:0,ft-_icdf[_s]), where ft=1<<_ftb. + The values must be monotonically non-increasing, and the last value + must be 0. + _ftb: The number of bits of precision in the cumulative distribution.*/ +void ec_enc_icdf(ec_enc *_this,int _s,const unsigned char *_icdf,unsigned _ftb); + +/*Encodes a raw unsigned integer in the stream. + _fl: The integer to encode. + _ft: The number of integers that can be encoded (one more than the max). + This must be at least one, and no more than 2**32-1.*/ +void ec_enc_uint(ec_enc *_this,opus_uint32 _fl,opus_uint32 _ft); + +/*Encodes a sequence of raw bits in the stream. + _fl: The bits to encode. + _ftb: The number of bits to encode. + This must be between 1 and 25, inclusive.*/ +void ec_enc_bits(ec_enc *_this,opus_uint32 _fl,unsigned _ftb); + +/*Overwrites a few bits at the very start of an existing stream, after they + have already been encoded. + This makes it possible to have a few flags up front, where it is easy for + decoders to access them without parsing the whole stream, even if their + values are not determined until late in the encoding process, without having + to buffer all the intermediate symbols in the encoder. + In order for this to work, at least _nbits bits must have already been + encoded using probabilities that are an exact power of two. + The encoder can verify the number of encoded bits is sufficient, but cannot + check this latter condition. + _val: The bits to encode (in the least _nbits significant bits). + They will be decoded in order from most-significant to least. + _nbits: The number of bits to overwrite. + This must be no more than 8.*/ +void ec_enc_patch_initial_bits(ec_enc *_this,unsigned _val,unsigned _nbits); + +/*Compacts the data to fit in the target size. + This moves up the raw bits at the end of the current buffer so they are at + the end of the new buffer size. + The caller must ensure that the amount of data that's already been written + will fit in the new size. + _size: The number of bytes in the new buffer. + This must be large enough to contain the bits already written, and + must be no larger than the existing size.*/ +void ec_enc_shrink(ec_enc *_this,opus_uint32 _size); + +/*Indicates that there are no more symbols to encode. + All reamining output bytes are flushed to the output buffer. + ec_enc_init() must be called before the encoder can be used again.*/ +void ec_enc_done(ec_enc *_this); + +#endif
new file mode 100644 --- /dev/null +++ b/media/libopus/celt/fixed_debug.h @@ -0,0 +1,511 @@ +/* Copyright (C) 2003-2008 Jean-Marc Valin + Copyright (C) 2007-2009 Xiph.Org Foundation */ +/** + @file fixed_debug.h + @brief Fixed-point operations with debugging +*/ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef FIXED_DEBUG_H +#define FIXED_DEBUG_H + +#include <stdio.h> + +#ifdef CELT_C +long long celt_mips=0; +#else +extern long long celt_mips; +#endif + +#define MULT16_16SU(a,b) ((opus_val32)(opus_val16)(a)*(opus_val32)(opus_uint16)(b)) +#define MULT32_32_Q31(a,b) ADD32(ADD32(SHL32(MULT16_16(SHR32((a),16),SHR((b),16)),1), SHR32(MULT16_16SU(SHR32((a),16),((b)&0x0000ffff)),15)), SHR32(MULT16_16SU(SHR32((b),16),((a)&0x0000ffff)),15)) + +/** 16x32 multiplication, followed by a 16-bit shift right. Results fits in 32 bits */ +#define MULT16_32_Q16(a,b) ADD32(MULT16_16((a),SHR32((b),16)), SHR32(MULT16_16SU((a),((b)&0x0000ffff)),16)) + +#define QCONST16(x,bits) ((opus_val16)(.5+(x)*(((opus_val32)1)<<(bits)))) +#define QCONST32(x,bits) ((opus_val32)(.5+(x)*(((opus_val32)1)<<(bits)))) + +#define VERIFY_SHORT(x) ((x)<=32767&&(x)>=-32768) +#define VERIFY_INT(x) ((x)<=2147483647LL&&(x)>=-2147483648LL) +#define VERIFY_UINT(x) ((x)<=(2147483647LLU<<1)) + +#define SHR(a,b) SHR32(a,b) +#define PSHR(a,b) PSHR32(a,b) + +static inline short NEG16(int x) +{ + int res; + if (!VERIFY_SHORT(x)) + { + fprintf (stderr, "NEG16: input is not short: %d\n", (int)x); + } + res = -x; + if (!VERIFY_SHORT(res)) + fprintf (stderr, "NEG16: output is not short: %d\n", (int)res); + celt_mips++; + return res; +} +static inline int NEG32(long long x) +{ + long long res; + if (!VERIFY_INT(x)) + { + fprintf (stderr, "NEG16: input is not int: %d\n", (int)x); + } + res = -x; + if (!VERIFY_INT(res)) + fprintf (stderr, "NEG16: output is not int: %d\n", (int)res); + celt_mips+=2; + return res; +} + +#define EXTRACT16(x) EXTRACT16_(x, __FILE__, __LINE__) +static inline short EXTRACT16_(int x, char *file, int line) +{ + int res; + if (!VERIFY_SHORT(x)) + { + fprintf (stderr, "EXTRACT16: input is not short: %d in %s: line %d\n", x, file, line); + } + res = x; + celt_mips++; + return res; +} + +#define EXTEND32(x) EXTEND32_(x, __FILE__, __LINE__) +static inline int EXTEND32_(int x, char *file, int line) +{ + int res; + if (!VERIFY_SHORT(x)) + { + fprintf (stderr, "EXTEND32: input is not short: %d in %s: line %d\n", x, file, line); + } + res = x; + celt_mips++; + return res; +} + +#define SHR16(a, shift) SHR16_(a, shift, __FILE__, __LINE__) +static inline short SHR16_(int a, int shift, char *file, int line) +{ + int res; + if (!VERIFY_SHORT(a) || !VERIFY_SHORT(shift)) + { + fprintf (stderr, "SHR16: inputs are not short: %d >> %d in %s: line %d\n", a, shift, file, line); + } + res = a>>shift; + if (!VERIFY_SHORT(res)) + fprintf (stderr, "SHR16: output is not short: %d in %s: line %d\n", res, file, line); + celt_mips++; + return res; +} +#define SHL16(a, shift) SHL16_(a, shift, __FILE__, __LINE__) +static inline short SHL16_(int a, int shift, char *file, int line) +{ + int res; + if (!VERIFY_SHORT(a) || !VERIFY_SHORT(shift)) + { + fprintf (stderr, "SHL16: inputs are not short: %d %d in %s: line %d\n", a, shift, file, line); + } + res = a<<shift; + if (!VERIFY_SHORT(res)) + fprintf (stderr, "SHL16: output is not short: %d in %s: line %d\n", res, file, line); + celt_mips++; + return res; +} + +static inline int SHR32(long long a, int shift) +{ + long long res; + if (!VERIFY_INT(a) || !VERIFY_SHORT(shift)) + { + fprintf (stderr, "SHR32: inputs are not int: %d %d\n", (int)a, shift); + } + res = a>>shift; + if (!VERIFY_INT(res)) + { + fprintf (stderr, "SHR32: output is not int: %d\n", (int)res); + } + celt_mips+=2; + return res; +} +static inline int SHL32(long long a, int shift) +{ + long long res; + if (!VERIFY_INT(a) || !VERIFY_SHORT(shift)) + { + fprintf (stderr, "SHL32: inputs are not int: %d %d\n", (int)a, shift); + } + res = a<<shift; + if (!VERIFY_INT(res)) + { + fprintf (stderr, "SHL32: output is not int: %d\n", (int)res); + } + celt_mips+=2; + return res; +} + +#define PSHR32(a,shift) (celt_mips--,SHR32(ADD32((a),(((opus_val32)(1)<<((shift))>>1))),shift)) +#define VSHR32(a, shift) (((shift)>0) ? SHR32(a, shift) : SHL32(a, -(shift))) + +#define ROUND16(x,a) (celt_mips--,EXTRACT16(PSHR32((x),(a)))) +#define HALF16(x) (SHR16(x,1)) +#define HALF32(x) (SHR32(x,1)) + +//#define SHR(a,shift) ((a) >> (shift)) +//#define SHL(a,shift) ((a) << (shift)) + +#define ADD16(a, b) ADD16_(a, b, __FILE__, __LINE__) +static inline short ADD16_(int a, int b, char *file, int line) +{ + int res; + if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b)) + { + fprintf (stderr, "ADD16: inputs are not short: %d %d in %s: line %d\n", a, b, file, line); + } + res = a+b; + if (!VERIFY_SHORT(res)) + { + fprintf (stderr, "ADD16: output is not short: %d+%d=%d in %s: line %d\n", a,b,res, file, line); + } + celt_mips++; + return res; +} + +#define SUB16(a, b) SUB16_(a, b, __FILE__, __LINE__) +static inline short SUB16_(int a, int b, char *file, int line) +{ + int res; + if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b)) + { + fprintf (stderr, "SUB16: inputs are not short: %d %d in %s: line %d\n", a, b, file, line); + } + res = a-b; + if (!VERIFY_SHORT(res)) + fprintf (stderr, "SUB16: output is not short: %d in %s: line %d\n", res, file, line); + celt_mips++; + return res; +} + +#define ADD32(a, b) ADD32_(a, b, __FILE__, __LINE__) +static inline int ADD32_(long long a, long long b, char *file, int line) +{ + long long res; + if (!VERIFY_INT(a) || !VERIFY_INT(b)) + { + fprintf (stderr, "ADD32: inputs are not int: %d %d in %s: line %d\n", (int)a, (int)b, file, line); + } + res = a+b; + if (!VERIFY_INT(res)) + { + fprintf (stderr, "ADD32: output is not int: %d in %s: line %d\n", (int)res, file, line); + } + celt_mips+=2; + return res; +} + +#define SUB32(a, b) SUB32_(a, b, __FILE__, __LINE__) +static inline int SUB32_(long long a, long long b, char *file, int line) +{ + long long res; + if (!VERIFY_INT(a) || !VERIFY_INT(b)) + { + fprintf (stderr, "SUB32: inputs are not int: %d %d in %s: line %d\n", (int)a, (int)b, file, line); + } + res = a-b; + if (!VERIFY_INT(res)) + fprintf (stderr, "SUB32: output is not int: %d in %s: line %d\n", (int)res, file, line); + celt_mips+=2; + return res; +} + +#undef UADD32 +#define UADD32(a, b) UADD32_(a, b, __FILE__, __LINE__) +static inline unsigned int UADD32_(unsigned long long a, unsigned long long b, char *file, int line) +{ + long long res; + if (!VERIFY_UINT(a) || !VERIFY_UINT(b)) + { + fprintf (stderr, "UADD32: inputs are not int: %u %u in %s: line %d\n", (unsigned)a, (unsigned)b, file, line); + } + res = a+b; + if (!VERIFY_UINT(res)) + { + fprintf (stderr, "UADD32: output is not int: %u in %s: line %d\n", (unsigned)res, file, line); + } + celt_mips+=2; + return res; +} + +#undef USUB32 +#define USUB32(a, b) USUB32_(a, b, __FILE__, __LINE__) +static inline unsigned int USUB32_(unsigned long long a, unsigned long long b, char *file, int line) +{ + long long res; + if (!VERIFY_UINT(a) || !VERIFY_UINT(b)) + { + /*fprintf (stderr, "USUB32: inputs are not int: %llu %llu in %s: line %d\n", (unsigned)a, (unsigned)b, file, line);*/ + } + res = a-b; + if (!VERIFY_UINT(res)) + { + /*fprintf (stderr, "USUB32: output is not int: %llu - %llu = %llu in %s: line %d\n", a, b, res, file, line);*/ + } + celt_mips+=2; + return res; +} + +/* result fits in 16 bits */ +static inline short MULT16_16_16(int a, int b) +{ + int res; + if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b)) + { + fprintf (stderr, "MULT16_16_16: inputs are not short: %d %d\n", a, b); + } + res = a*b; + if (!VERIFY_SHORT(res)) + fprintf (stderr, "MULT16_16_16: output is not short: %d\n", res); + celt_mips++; + return res; +} + +#define MULT16_16(a, b) MULT16_16_(a, b, __FILE__, __LINE__) +static inline int MULT16_16_(int a, int b, char *file, int line) +{ + long long res; + if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b)) + { + fprintf (stderr, "MULT16_16: inputs are not short: %d %d in %s: line %d\n", a, b, file, line); + } + res = ((long long)a)*b; + if (!VERIFY_INT(res)) + fprintf (stderr, "MULT16_16: output is not int: %d in %s: line %d\n", (int)res, file, line); + celt_mips++; + return res; +} + +#define MAC16_16(c,a,b) (celt_mips-=2,ADD32((c),MULT16_16((a),(b)))) + +#define MULT16_32_QX(a, b, Q) MULT16_32_QX_(a, b, Q, __FILE__, __LINE__) +static inline int MULT16_32_QX_(int a, long long b, int Q, char *file, int line) +{ + long long res; + if (!VERIFY_SHORT(a) || !VERIFY_INT(b)) + { + fprintf (stderr, "MULT16_32_Q%d: inputs are not short+int: %d %d in %s: line %d\n", Q, (int)a, (int)b, file, line); + } + if (ABS32(b)>=((opus_val32)(1)<<(15+Q))) + fprintf (stderr, "MULT16_32_Q%d: second operand too large: %d %d in %s: line %d\n", Q, (int)a, (int)b, file, line); + res = (((long long)a)*(long long)b) >> Q; + if (!VERIFY_INT(res)) + fprintf (stderr, "MULT16_32_Q%d: output is not int: %d*%d=%d in %s: line %d\n", Q, (int)a, (int)b,(int)res, file, line); + if (Q==15) + celt_mips+=3; + else + celt_mips+=4; + return res; +} + +#define MULT16_32_Q15(a,b) MULT16_32_QX(a,b,15) +#define MAC16_32_Q15(c,a,b) (celt_mips-=2,ADD32((c),MULT16_32_Q15((a),(b)))) + +static inline int SATURATE(int a, int b) +{ + if (a>b) + a=b; + if (a<-b) + a = -b; + celt_mips+=3; + return a; +} + +static inline int MULT16_16_Q11_32(int a, int b) +{ + long long res; + if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b)) + { + fprintf (stderr, "MULT16_16_Q11: inputs are not short: %d %d\n", a, b); + } + res = ((long long)a)*b; + res >>= 11; + if (!VERIFY_INT(res)) + fprintf (stderr, "MULT16_16_Q11: output is not short: %d*%d=%d\n", (int)a, (int)b, (int)res); + celt_mips+=3; + return res; +} +static inline short MULT16_16_Q13(int a, int b) +{ + long long res; + if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b)) + { + fprintf (stderr, "MULT16_16_Q13: inputs are not short: %d %d\n", a, b); + } + res = ((long long)a)*b; + res >>= 13; + if (!VERIFY_SHORT(res)) + fprintf (stderr, "MULT16_16_Q13: output is not short: %d*%d=%d\n", a, b, (int)res); + celt_mips+=3; + return res; +} +static inline short MULT16_16_Q14(int a, int b) +{ + long long res; + if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b)) + { + fprintf (stderr, "MULT16_16_Q14: inputs are not short: %d %d\n", a, b); + } + res = ((long long)a)*b; + res >>= 14; + if (!VERIFY_SHORT(res)) + fprintf (stderr, "MULT16_16_Q14: output is not short: %d\n", (int)res); + celt_mips+=3; + return res; +} + +#define MULT16_16_Q15(a, b) MULT16_16_Q15_(a, b, __FILE__, __LINE__) +static inline short MULT16_16_Q15_(int a, int b, char *file, int line) +{ + long long res; + if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b)) + { + fprintf (stderr, "MULT16_16_Q15: inputs are not short: %d %d in %s: line %d\n", a, b, file, line); + } + res = ((long long)a)*b; + res >>= 15; + if (!VERIFY_SHORT(res)) + { + fprintf (stderr, "MULT16_16_Q15: output is not short: %d in %s: line %d\n", (int)res, file, line); + } + celt_mips+=1; + return res; +} + +static inline short MULT16_16_P13(int a, int b) +{ + long long res; + if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b)) + { + fprintf (stderr, "MULT16_16_P13: inputs are not short: %d %d\n", a, b); + } + res = ((long long)a)*b; + res += 4096; + if (!VERIFY_INT(res)) + fprintf (stderr, "MULT16_16_P13: overflow: %d*%d=%d\n", a, b, (int)res); + res >>= 13; + if (!VERIFY_SHORT(res)) + fprintf (stderr, "MULT16_16_P13: output is not short: %d*%d=%d\n", a, b, (int)res); + celt_mips+=4; + return res; +} +static inline short MULT16_16_P14(int a, int b) +{ + long long res; + if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b)) + { + fprintf (stderr, "MULT16_16_P14: inputs are not short: %d %d\n", a, b); + } + res = ((long long)a)*b; + res += 8192; + if (!VERIFY_INT(res)) + fprintf (stderr, "MULT16_16_P14: overflow: %d*%d=%d\n", a, b, (int)res); + res >>= 14; + if (!VERIFY_SHORT(res)) + fprintf (stderr, "MULT16_16_P14: output is not short: %d*%d=%d\n", a, b, (int)res); + celt_mips+=4; + return res; +} +static inline short MULT16_16_P15(int a, int b) +{ + long long res; + if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b)) + { + fprintf (stderr, "MULT16_16_P15: inputs are not short: %d %d\n", a, b); + } + res = ((long long)a)*b; + res += 16384; + if (!VERIFY_INT(res)) + fprintf (stderr, "MULT16_16_P15: overflow: %d*%d=%d\n", a, b, (int)res); + res >>= 15; + if (!VERIFY_SHORT(res)) + fprintf (stderr, "MULT16_16_P15: output is not short: %d*%d=%d\n", a, b, (int)res); + celt_mips+=2; + return res; +} + +#define DIV32_16(a, b) DIV32_16_(a, b, __FILE__, __LINE__) + +static inline int DIV32_16_(long long a, long long b, char *file, int line) +{ + long long res; + if (b==0) + { + fprintf(stderr, "DIV32_16: divide by zero: %d/%d in %s: line %d\n", (int)a, (int)b, file, line); + return 0; + } + if (!VERIFY_INT(a) || !VERIFY_SHORT(b)) + { + fprintf (stderr, "DIV32_16: inputs are not int/short: %d %d in %s: line %d\n", (int)a, (int)b, file, line); + } + res = a/b; + if (!VERIFY_SHORT(res)) + { + fprintf (stderr, "DIV32_16: output is not short: %d / %d = %d in %s: line %d\n", (int)a,(int)b,(int)res, file, line); + if (res>32767) + res = 32767; + if (res<-32768) + res = -32768; + } + celt_mips+=35; + return res; +} + +#define DIV32(a, b) DIV32_(a, b, __FILE__, __LINE__) +static inline int DIV32_(long long a, long long b, char *file, int line) +{ + long long res; + if (b==0) + { + fprintf(stderr, "DIV32: divide by zero: %d/%d in %s: line %d\n", (int)a, (int)b, file, line); + return 0; + } + + if (!VERIFY_INT(a) || !VERIFY_INT(b)) + { + fprintf (stderr, "DIV32: inputs are not int/short: %d %d in %s: line %d\n", (int)a, (int)b, file, line); + } + res = a/b; + if (!VERIFY_INT(res)) + fprintf (stderr, "DIV32: output is not int: %d in %s: line %d\n", (int)res, file, line); + celt_mips+=70; + return res; +} + +#undef PRINT_MIPS +#define PRINT_MIPS(file) do {fprintf (file, "total complexity = %llu MIPS\n", celt_mips);} while (0); + +#endif
new file mode 100644 --- /dev/null +++ b/media/libopus/celt/fixed_generic.h @@ -0,0 +1,126 @@ +/* Copyright (C) 2007-2009 Xiph.Org Foundation + Copyright (C) 2003-2008 Jean-Marc Valin + Copyright (C) 2007-2008 CSIRO */ +/** + @file fixed_generic.h + @brief Generic fixed-point operations +*/ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef FIXED_GENERIC_H +#define FIXED_GENERIC_H + +/** Multiply a 16-bit signed value by a 16-bit unsigned value. The result is a 32-bit signed value */ +#define MULT16_16SU(a,b) ((opus_val32)(opus_val16)(a)*(opus_val32)(opus_uint16)(b)) + +/** 16x32 multiplication, followed by a 16-bit shift right. Results fits in 32 bits */ +#define MULT16_32_Q16(a,b) ADD32(MULT16_16((a),SHR((b),16)), SHR(MULT16_16SU((a),((b)&0x0000ffff)),16)) + +/** 16x32 multiplication, followed by a 15-bit shift right. Results fits in 32 bits */ +#define MULT16_32_Q15(a,b) ADD32(SHL(MULT16_16((a),SHR((b),16)),1), SHR(MULT16_16SU((a),((b)&0x0000ffff)),15)) + +/** 32x32 multiplication, followed by a 31-bit shift right. Results fits in 32 bits */ +#define MULT32_32_Q31(a,b) ADD32(ADD32(SHL(MULT16_16(SHR((a),16),SHR((b),16)),1), SHR(MULT16_16SU(SHR((a),16),((b)&0x0000ffff)),15)), SHR(MULT16_16SU(SHR((b),16),((a)&0x0000ffff)),15)) + +/** Compile-time conversion of float constant to 16-bit value */ +#define QCONST16(x,bits) ((opus_val16)(.5+(x)*(((opus_val32)1)<<(bits)))) + +/** Compile-time conversion of float constant to 32-bit value */ +#define QCONST32(x,bits) ((opus_val32)(.5+(x)*(((opus_val32)1)<<(bits)))) + +/** Negate a 16-bit value */ +#define NEG16(x) (-(x)) +/** Negate a 32-bit value */ +#define NEG32(x) (-(x)) + +/** Change a 32-bit value into a 16-bit value. The value is assumed to fit in 16-bit, otherwise the result is undefined */ +#define EXTRACT16(x) ((opus_val16)(x)) +/** Change a 16-bit value into a 32-bit value */ +#define EXTEND32(x) ((opus_val32)(x)) + +/** Arithmetic shift-right of a 16-bit value */ +#define SHR16(a,shift) ((a) >> (shift)) +/** Arithmetic shift-left of a 16-bit value */ +#define SHL16(a,shift) ((opus_int16)((opus_uint16)(a)<<(shift))) +/** Arithmetic shift-right of a 32-bit value */ +#define SHR32(a,shift) ((a) >> (shift)) +/** Arithmetic shift-left of a 32-bit value */ +#define SHL32(a,shift) ((opus_int32)((opus_uint32)(a)<<(shift))) + +/** 32-bit arithmetic shift right with rounding-to-nearest instead of rounding down */ +#define PSHR32(a,shift) (SHR32((a)+((EXTEND32(1)<<((shift))>>1)),shift)) +/** 32-bit arithmetic shift right where the argument can be negative */ +#define VSHR32(a, shift) (((shift)>0) ? SHR32(a, shift) : SHL32(a, -(shift))) + +/** "RAW" macros, should not be used outside of this header file */ +#define SHR(a,shift) ((a) >> (shift)) +#define SHL(a,shift) SHL32(a,shift) +#define PSHR(a,shift) (SHR((a)+((EXTEND32(1)<<((shift))>>1)),shift)) +#define SATURATE(x,a) (((x)>(a) ? (a) : (x)<-(a) ? -(a) : (x))) + +/** Shift by a and round-to-neareast 32-bit value. Result is a 16-bit value */ +#define ROUND16(x,a) (EXTRACT16(PSHR32((x),(a)))) +/** Divide by two */ +#define HALF16(x) (SHR16(x,1)) +#define HALF32(x) (SHR32(x,1)) + +/** Add two 16-bit values */ +#define ADD16(a,b) ((opus_val16)((opus_val16)(a)+(opus_val16)(b))) +/** Subtract two 16-bit values */ +#define SUB16(a,b) ((opus_val16)(a)-(opus_val16)(b)) +/** Add two 32-bit values */ +#define ADD32(a,b) ((opus_val32)(a)+(opus_val32)(b)) +/** Subtract two 32-bit values */ +#define SUB32(a,b) ((opus_val32)(a)-(opus_val32)(b)) + +/** 16x16 multiplication where the result fits in 16 bits */ +#define MULT16_16_16(a,b) ((((opus_val16)(a))*((opus_val16)(b)))) + +/* (opus_val32)(opus_val16) gives TI compiler a hint that it's 16x16->32 multiply */ +/** 16x16 multiplication where the result fits in 32 bits */ +#define MULT16_16(a,b) (((opus_val32)(opus_val16)(a))*((opus_val32)(opus_val16)(b))) + +/** 16x16 multiply-add where the result fits in 32 bits */ +#define MAC16_16(c,a,b) (ADD32((c),MULT16_16((a),(b)))) +/** 16x32 multiply-add, followed by a 15-bit shift right. Results fits in 32 bits */ +#define MAC16_32_Q15(c,a,b) ADD32(c,ADD32(MULT16_16((a),SHR((b),15)), SHR(MULT16_16((a),((b)&0x00007fff)),15))) + +#define MULT16_16_Q11_32(a,b) (SHR(MULT16_16((a),(b)),11)) +#define MULT16_16_Q13(a,b) (SHR(MULT16_16((a),(b)),13)) +#define MULT16_16_Q14(a,b) (SHR(MULT16_16((a),(b)),14)) +#define MULT16_16_Q15(a,b) (SHR(MULT16_16((a),(b)),15)) + +#define MULT16_16_P13(a,b) (SHR(ADD32(4096,MULT16_16((a),(b))),13)) +#define MULT16_16_P14(a,b) (SHR(ADD32(8192,MULT16_16((a),(b))),14)) +#define MULT16_16_P15(a,b) (SHR(ADD32(16384,MULT16_16((a),(b))),15)) + +/** Divide a 32-bit value by a 16-bit value. Result fits in 16 bits */ +#define DIV32_16(a,b) ((opus_val16)(((opus_val32)(a))/((opus_val16)(b)))) + +/** Divide a 32-bit value by a 32-bit value. Result fits in 32 bits */ +#define DIV32(a,b) (((opus_val32)(a))/((opus_val32)(b))) + +#endif
new file mode 100644 --- /dev/null +++ b/media/libopus/celt/float_cast.h @@ -0,0 +1,138 @@ +/* Copyright (C) 2001 Erik de Castro Lopo <erikd AT mega-nerd DOT com> */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/* Version 1.1 */ + +#ifndef FLOAT_CAST_H +#define FLOAT_CAST_H + + +#include "arch.h" + +/*============================================================================ +** On Intel Pentium processors (especially PIII and probably P4), converting +** from float to int is very slow. To meet the C specs, the code produced by +** most C compilers targeting Pentium needs to change the FPU rounding mode +** before the float to int conversion is performed. +** +** Changing the FPU rounding mode causes the FPU pipeline to be flushed. It +** is this flushing of the pipeline which is so slow. +** +** Fortunately the ISO C99 specifications define the functions lrint, lrintf, +** llrint and llrintf which fix this problem as a side effect. +** +** On Unix-like systems, the configure process should have detected the +** presence of these functions. If they weren't found we have to replace them +** here with a standard C cast. +*/ + +/* +** The C99 prototypes for lrint and lrintf are as follows: +** +** long int lrintf (float x) ; +** long int lrint (double x) ; +*/ + +/* The presence of the required functions are detected during the configure +** process and the values HAVE_LRINT and HAVE_LRINTF are set accordingly in +** the config.h file. +*/ + +#if (HAVE_LRINTF) + +/* These defines enable functionality introduced with the 1999 ISO C +** standard. They must be defined before the inclusion of math.h to +** engage them. If optimisation is enabled, these functions will be +** inlined. With optimisation switched off, you have to link in the +** maths library using -lm. +*/ + +#define _ISOC9X_SOURCE 1 +#define _ISOC99_SOURCE 1 + +#define __USE_ISOC9X 1 +#define __USE_ISOC99 1 + +#include <math.h> +#define float2int(x) lrintf(x) + +#elif (defined(HAVE_LRINT)) + +#define _ISOC9X_SOURCE 1 +#define _ISOC99_SOURCE 1 + +#define __USE_ISOC9X 1 +#define __USE_ISOC99 1 + +#include <math.h> +#define float2int(x) lrint(x) + +#elif (defined (WIN64) || defined (_WIN64)) + #include <xmmintrin.h> + + __inline long int float2int(float value) + { + return _mm_cvtss_si32(_mm_load_ss(&value)); + } +#elif (defined (WIN32) || defined (_WIN32)) + #include <math.h> + + /* Win32 doesn't seem to have these functions. + ** Therefore implement inline versions of these functions here. + */ + + __inline long int + float2int (float flt) + { int intgr; + + _asm + { fld flt + fistp intgr + } ; + + return intgr ; + } + +#else + +#if (defined(__GNUC__) && defined(__STDC__) && __STDC__ && __STDC_VERSION__ >= 199901L) + /* supported by gcc in C99 mode, but not by all other compilers */ + #warning "Don't have the functions lrint() and lrintf ()." + #warning "Replacing these functions with a standard C cast." +#endif /* __STDC_VERSION__ >= 199901L */ + #include <math.h> + #define float2int(flt) ((int)(floor(.5+flt))) +#endif + +static inline opus_int16 FLOAT2INT16(float x) +{ + x = x*CELT_SIG_SCALE; + x = MAX32(x, -32768); + x = MIN32(x, 32767); + return (opus_int16)float2int(x); +} + +#endif /* FLOAT_CAST_H */
new file mode 100644 --- /dev/null +++ b/media/libopus/celt/kiss_fft.c @@ -0,0 +1,722 @@ +/*Copyright (c) 2003-2004, Mark Borgerding + Lots of modifications by Jean-Marc Valin + Copyright (c) 2005-2007, Xiph.Org Foundation + Copyright (c) 2008, Xiph.Org Foundation, CSIRO + + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE.*/ + +/* This code is originally from Mark Borgerding's KISS-FFT but has been + heavily modified to better suit Opus */ + +#ifndef SKIP_CONFIG_H +# ifdef HAVE_CONFIG_H +# include "config.h" +# endif +#endif + +#include "_kiss_fft_guts.h" +#include "arch.h" +#include "os_support.h" +#include "mathops.h" +#include "stack_alloc.h" +#include "os_support.h" + +/* The guts header contains all the multiplication and addition macros that are defined for + complex numbers. It also delares the kf_ internal functions. +*/ + +static void kf_bfly2( + kiss_fft_cpx * Fout, + const size_t fstride, + const kiss_fft_state *st, + int m, + int N, + int mm + ) +{ + kiss_fft_cpx * Fout2; + const kiss_twiddle_cpx * tw1; + int i,j; + kiss_fft_cpx * Fout_beg = Fout; + for (i=0;i<N;i++) + { + Fout = Fout_beg + i*mm; + Fout2 = Fout + m; + tw1 = st->twiddles; + for(j=0;j<m;j++) + { + kiss_fft_cpx t; + Fout->r = SHR(Fout->r, 1);Fout->i = SHR(Fout->i, 1); + Fout2->r = SHR(Fout2->r, 1);Fout2->i = SHR(Fout2->i, 1); + C_MUL (t, *Fout2 , *tw1); + tw1 += fstride; + C_SUB( *Fout2 , *Fout , t ); + C_ADDTO( *Fout , t ); + ++Fout2; + ++Fout; + } + } +} + +static void ki_bfly2( + kiss_fft_cpx * Fout, + const size_t fstride, + const kiss_fft_state *st, + int m, + int N, + int mm + ) +{ + kiss_fft_cpx * Fout2; + const kiss_twiddle_cpx * tw1; + kiss_fft_cpx t; + int i,j; + kiss_fft_cpx * Fout_beg = Fout; + for (i=0;i<N;i++) + { + Fout = Fout_beg + i*mm; + Fout2 = Fout + m; + tw1 = st->twiddles; + for(j=0;j<m;j++) + { + C_MULC (t, *Fout2 , *tw1); + tw1 += fstride; + C_SUB( *Fout2 , *Fout , t ); + C_ADDTO( *Fout , t ); + ++Fout2; + ++Fout; + } + } +} + +static void kf_bfly4( + kiss_fft_cpx * Fout, + const size_t fstride, + const kiss_fft_state *st, + int m, + int N, + int mm + ) +{ + const kiss_twiddle_cpx *tw1,*tw2,*tw3; + kiss_fft_cpx scratch[6]; + const size_t m2=2*m; + const size_t m3=3*m; + int i, j; + + kiss_fft_cpx * Fout_beg = Fout; + for (i=0;i<N;i++) + { + Fout = Fout_beg + i*mm; + tw3 = tw2 = tw1 = st->twiddles; + for (j=0;j<m;j++) + { + C_MUL4(scratch[0],Fout[m] , *tw1 ); + C_MUL4(scratch[1],Fout[m2] , *tw2 ); + C_MUL4(scratch[2],Fout[m3] , *tw3 ); + + Fout->r = PSHR(Fout->r, 2); + Fout->i = PSHR(Fout->i, 2); + C_SUB( scratch[5] , *Fout, scratch[1] ); + C_ADDTO(*Fout, scratch[1]); + C_ADD( scratch[3] , scratch[0] , scratch[2] ); + C_SUB( scratch[4] , scratch[0] , scratch[2] ); + Fout[m2].r = PSHR(Fout[m2].r, 2); + Fout[m2].i = PSHR(Fout[m2].i, 2); + C_SUB( Fout[m2], *Fout, scratch[3] ); + tw1 += fstride; + tw2 += fstride*2; + tw3 += fstride*3; + C_ADDTO( *Fout , scratch[3] ); + + Fout[m].r = scratch[5].r + scratch[4].i; + Fout[m].i = scratch[5].i - scratch[4].r; + Fout[m3].r = scratch[5].r - scratch[4].i; + Fout[m3].i = scratch[5].i + scratch[4].r; + ++Fout; + } + } +} + +static void ki_bfly4( + kiss_fft_cpx * Fout, + const size_t fstride, + const kiss_fft_state *st, + int m, + int N, + int mm + ) +{ + const kiss_twiddle_cpx *tw1,*tw2,*tw3; + kiss_fft_cpx scratch[6]; + const size_t m2=2*m; + const size_t m3=3*m; + int i, j; + + kiss_fft_cpx * Fout_beg = Fout; + for (i=0;i<N;i++) + { + Fout = Fout_beg + i*mm; + tw3 = tw2 = tw1 = st->twiddles; + for (j=0;j<m;j++) + { + C_MULC(scratch[0],Fout[m] , *tw1 ); + C_MULC(scratch[1],Fout[m2] , *tw2 ); + C_MULC(scratch[2],Fout[m3] , *tw3 ); + + C_SUB( scratch[5] , *Fout, scratch[1] ); + C_ADDTO(*Fout, scratch[1]); + C_ADD( scratch[3] , scratch[0] , scratch[2] ); + C_SUB( scratch[4] , scratch[0] , scratch[2] ); + C_SUB( Fout[m2], *Fout, scratch[3] ); + tw1 += fstride; + tw2 += fstride*2; + tw3 += fstride*3; + C_ADDTO( *Fout , scratch[3] ); + + Fout[m].r = scratch[5].r - scratch[4].i; + Fout[m].i = scratch[5].i + scratch[4].r; + Fout[m3].r = scratch[5].r + scratch[4].i; + Fout[m3].i = scratch[5].i - scratch[4].r; + ++Fout; + } + } +} + +#ifndef RADIX_TWO_ONLY + +static void kf_bfly3( + kiss_fft_cpx * Fout, + const size_t fstride, + const kiss_fft_state *st, + int m, + int N, + int mm + ) +{ + int i; + size_t k; + const size_t m2 = 2*m; + const kiss_twiddle_cpx *tw1,*tw2; + kiss_fft_cpx scratch[5]; + kiss_twiddle_cpx epi3; + + kiss_fft_cpx * Fout_beg = Fout; + epi3 = st->twiddles[fstride*m]; + for (i=0;i<N;i++) + { + Fout = Fout_beg + i*mm; + tw1=tw2=st->twiddles; + k=m; + do { + C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3); + + C_MUL(scratch[1],Fout[m] , *tw1); + C_MUL(scratch[2],Fout[m2] , *tw2); + + C_ADD(scratch[3],scratch[1],scratch[2]); + C_SUB(scratch[0],scratch[1],scratch[2]); + tw1 += fstride; + tw2 += fstride*2; + + Fout[m].r = Fout->r - HALF_OF(scratch[3].r); + Fout[m].i = Fout->i - HALF_OF(scratch[3].i); + + C_MULBYSCALAR( scratch[0] , epi3.i ); + + C_ADDTO(*Fout,scratch[3]); + + Fout[m2].r = Fout[m].r + scratch[0].i; + Fout[m2].i = Fout[m].i - scratch[0].r; + + Fout[m].r -= scratch[0].i; + Fout[m].i += scratch[0].r; + + ++Fout; + } while(--k); + } +} + +static void ki_bfly3( + kiss_fft_cpx * Fout, + const size_t fstride, + const kiss_fft_state *st, + int m, + int N, + int mm + ) +{ + int i, k; + const size_t m2 = 2*m; + const kiss_twiddle_cpx *tw1,*tw2; + kiss_fft_cpx scratch[5]; + kiss_twiddle_cpx epi3; + + kiss_fft_cpx * Fout_beg = Fout; + epi3 = st->twiddles[fstride*m]; + for (i=0;i<N;i++) + { + Fout = Fout_beg + i*mm; + tw1=tw2=st->twiddles; + k=m; + do{ + + C_MULC(scratch[1],Fout[m] , *tw1); + C_MULC(scratch[2],Fout[m2] , *tw2); + + C_ADD(scratch[3],scratch[1],scratch[2]); + C_SUB(scratch[0],scratch[1],scratch[2]); + tw1 += fstride; + tw2 += fstride*2; + + Fout[m].r = Fout->r - HALF_OF(scratch[3].r); + Fout[m].i = Fout->i - HALF_OF(scratch[3].i); + + C_MULBYSCALAR( scratch[0] , -epi3.i ); + + C_ADDTO(*Fout,scratch[3]); + + Fout[m2].r = Fout[m].r + scratch[0].i; + Fout[m2].i = Fout[m].i - scratch[0].r; + + Fout[m].r -= scratch[0].i; + Fout[m].i += scratch[0].r; + + ++Fout; + }while(--k); + } +} + +static void kf_bfly5( + kiss_fft_cpx * Fout, + const size_t fstride, + const kiss_fft_state *st, + int m, + int N, + int mm + ) +{ + kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; + int i, u; + kiss_fft_cpx scratch[13]; + const kiss_twiddle_cpx * twiddles = st->twiddles; + const kiss_twiddle_cpx *tw; + kiss_twiddle_cpx ya,yb; + kiss_fft_cpx * Fout_beg = Fout; + + ya = twiddles[fstride*m]; + yb = twiddles[fstride*2*m]; + tw=st->twiddles; + + for (i=0;i<N;i++) + { + Fout = Fout_beg + i*mm; + Fout0=Fout; + Fout1=Fout0+m; + Fout2=Fout0+2*m; + Fout3=Fout0+3*m; + Fout4=Fout0+4*m; + + for ( u=0; u<m; ++u ) { + C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5); + scratch[0] = *Fout0; + + C_MUL(scratch[1] ,*Fout1, tw[u*fstride]); + C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]); + C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]); + C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]); + + C_ADD( scratch[7],scratch[1],scratch[4]); + C_SUB( scratch[10],scratch[1],scratch[4]); + C_ADD( scratch[8],scratch[2],scratch[3]); + C_SUB( scratch[9],scratch[2],scratch[3]); + + Fout0->r += scratch[7].r + scratch[8].r; + Fout0->i += scratch[7].i + scratch[8].i; + + scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r); + scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r); + + scratch[6].r = S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i); + scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i); + + C_SUB(*Fout1,scratch[5],scratch[6]); + C_ADD(*Fout4,scratch[5],scratch[6]); + + scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r); + scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r); + scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i); + scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i); + + C_ADD(*Fout2,scratch[11],scratch[12]); + C_SUB(*Fout3,scratch[11],scratch[12]); + + ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4; + } + } +} + +static void ki_bfly5( + kiss_fft_cpx * Fout, + const size_t fstride, + const kiss_fft_state *st, + int m, + int N, + int mm + ) +{ + kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; + int i, u; + kiss_fft_cpx scratch[13]; + const kiss_twiddle_cpx * twiddles = st->twiddles; + const kiss_twiddle_cpx *tw; + kiss_twiddle_cpx ya,yb; + kiss_fft_cpx * Fout_beg = Fout; + + ya = twiddles[fstride*m]; + yb = twiddles[fstride*2*m]; + tw=st->twiddles; + + for (i=0;i<N;i++) + { + Fout = Fout_beg + i*mm; + Fout0=Fout; + Fout1=Fout0+m; + Fout2=Fout0+2*m; + Fout3=Fout0+3*m; + Fout4=Fout0+4*m; + + for ( u=0; u<m; ++u ) { + scratch[0] = *Fout0; + + C_MULC(scratch[1] ,*Fout1, tw[u*fstride]); + C_MULC(scratch[2] ,*Fout2, tw[2*u*fstride]); + C_MULC(scratch[3] ,*Fout3, tw[3*u*fstride]); + C_MULC(scratch[4] ,*Fout4, tw[4*u*fstride]); + + C_ADD( scratch[7],scratch[1],scratch[4]); + C_SUB( scratch[10],scratch[1],scratch[4]); + C_ADD( scratch[8],scratch[2],scratch[3]); + C_SUB( scratch[9],scratch[2],scratch[3]); + + Fout0->r += scratch[7].r + scratch[8].r; + Fout0->i += scratch[7].i + scratch[8].i; + + scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r); + scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r); + + scratch[6].r = -S_MUL(scratch[10].i,ya.i) - S_MUL(scratch[9].i,yb.i); + scratch[6].i = S_MUL(scratch[10].r,ya.i) + S_MUL(scratch[9].r,yb.i); + + C_SUB(*Fout1,scratch[5],scratch[6]); + C_ADD(*Fout4,scratch[5],scratch[6]); + + scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r); + scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r); + scratch[12].r = S_MUL(scratch[10].i,yb.i) - S_MUL(scratch[9].i,ya.i); + scratch[12].i = -S_MUL(scratch[10].r,yb.i) + S_MUL(scratch[9].r,ya.i); + + C_ADD(*Fout2,scratch[11],scratch[12]); + C_SUB(*Fout3,scratch[11],scratch[12]); + + ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4; + } + } +} + +#endif + + +#ifdef CUSTOM_MODES + +static +void compute_bitrev_table( + int Fout, + opus_int16 *f, + const size_t fstride, + int in_stride, + opus_int16 * factors, + const kiss_fft_state *st + ) +{ + const int p=*factors++; /* the radix */ + const int m=*factors++; /* stage's fft length/p */ + + /*printf ("fft %d %d %d %d %d %d\n", p*m, m, p, s2, fstride*in_stride, N);*/ + if (m==1) + { + int j; + for (j=0;j<p;j++) + { + *f = Fout+j; + f += fstride*in_stride; + } + } else { + int j; + for (j=0;j<p;j++) + { + compute_bitrev_table( Fout , f, fstride*p, in_stride, factors,st); + f += fstride*in_stride; + Fout += m; + } + } +} + +/* facbuf is populated by p1,m1,p2,m2, ... + where + p[i] * m[i] = m[i-1] + m0 = n */ +static +int kf_factor(int n,opus_int16 * facbuf) +{ + int p=4; + + /*factor out powers of 4, powers of 2, then any remaining primes */ + do { + while (n % p) {